1 /* 2 * Copyright (c) 2009, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package com.sun.xml.internal.dtdparser; 27 28 import org.xml.sax.EntityResolver; 29 import org.xml.sax.InputSource; 30 import org.xml.sax.Locator; 31 import org.xml.sax.SAXException; 32 import org.xml.sax.SAXParseException; 33 34 import java.io.IOException; 35 import java.util.ArrayList; 36 import java.util.Enumeration; 37 import java.util.Hashtable; 38 import java.util.Locale; 39 import java.util.Set; 40 import java.util.Vector; 41 42 /** 43 * This implements parsing of XML 1.0 DTDs. 44 * <p/> 45 * This conforms to the portion of the XML 1.0 specification related 46 * to the external DTD subset. 47 * <p/> 48 * For multi-language applications (such as web servers using XML 49 * processing to create dynamic content), a method supports choosing 50 * a locale for parser diagnostics which is both understood by the 51 * message recipient and supported by the parser. 52 * <p/> 53 * This parser produces a stream of parse events. It supports some 54 * features (exposing comments, CDATA sections, and entity references) 55 * which are not required to be reported by conformant XML processors. 56 * 57 * @author David Brownell 58 * @author Janet Koenig 59 * @author Kohsuke KAWAGUCHI 60 * @version $Id: DTDParser.java,v 1.2 2009/04/16 15:25:49 snajper Exp $ 61 */ 62 public class DTDParser { 63 public final static String TYPE_CDATA = "CDATA"; 64 public final static String TYPE_ID = "ID"; 65 public final static String TYPE_IDREF = "IDREF"; 66 public final static String TYPE_IDREFS = "IDREFS"; 67 public final static String TYPE_ENTITY = "ENTITY"; 68 public final static String TYPE_ENTITIES = "ENTITIES"; 69 public final static String TYPE_NMTOKEN = "NMTOKEN"; 70 public final static String TYPE_NMTOKENS = "NMTOKENS"; 71 public final static String TYPE_NOTATION = "NOTATION"; 72 public final static String TYPE_ENUMERATION = "ENUMERATION"; 73 74 75 // stack of input entities being merged 76 private InputEntity in; 77 78 // temporaries reused during parsing 79 private StringBuffer strTmp; 80 private char nameTmp []; 81 private NameCache nameCache; 82 private char charTmp [] = new char[2]; 83 84 // temporary DTD parsing state 85 private boolean doLexicalPE; 86 87 // DTD state, used during parsing 88 // private SimpleHashtable elements = new SimpleHashtable (47); 89 protected final Set declaredElements = new java.util.HashSet(); 90 private SimpleHashtable params = new SimpleHashtable(7); 91 92 // exposed to package-private subclass 93 Hashtable notations = new Hashtable(7); 94 SimpleHashtable entities = new SimpleHashtable(17); 95 96 private SimpleHashtable ids = new SimpleHashtable(); 97 98 // listeners for DTD parsing events 99 private DTDEventListener dtdHandler; 100 101 private EntityResolver resolver; 102 private Locale locale; 103 104 // string constants -- use these copies so "==" works 105 // package private 106 static final String strANY = "ANY"; 107 static final String strEMPTY = "EMPTY"; 108 109 /** 110 * Used by applications to request locale for diagnostics. 111 * 112 * @param l The locale to use, or null to use system defaults 113 * (which may include only message IDs). 114 */ setLocale(Locale l)115 public void setLocale(Locale l) throws SAXException { 116 117 if (l != null && !messages.isLocaleSupported(l.toString())) { 118 throw new SAXException(messages.getMessage(locale, 119 "P-078", new Object[]{l})); 120 } 121 locale = l; 122 } 123 124 /** 125 * Returns the diagnostic locale. 126 */ getLocale()127 public Locale getLocale() { 128 return locale; 129 } 130 131 /** 132 * Chooses a client locale to use for diagnostics, using the first 133 * language specified in the list that is supported by this parser. 134 * That locale is then set using <a href="#setLocale(java.util.Locale)"> 135 * setLocale()</a>. Such a list could be provided by a variety of user 136 * preference mechanisms, including the HTTP <em>Accept-Language</em> 137 * header field. 138 * 139 * @param languages Array of language specifiers, ordered with the most 140 * preferable one at the front. For example, "en-ca" then "fr-ca", 141 * followed by "zh_CN". Both RFC 1766 and Java styles are supported. 142 * @return The chosen locale, or null. 143 * @see MessageCatalog 144 */ chooseLocale(String languages [])145 public Locale chooseLocale(String languages []) 146 throws SAXException { 147 148 Locale l = messages.chooseLocale(languages); 149 150 if (l != null) { 151 setLocale(l); 152 } 153 return l; 154 } 155 156 /** 157 * Lets applications control entity resolution. 158 */ setEntityResolver(EntityResolver r)159 public void setEntityResolver(EntityResolver r) { 160 161 resolver = r; 162 } 163 164 /** 165 * Returns the object used to resolve entities 166 */ getEntityResolver()167 public EntityResolver getEntityResolver() { 168 169 return resolver; 170 } 171 172 /** 173 * Used by applications to set handling of DTD parsing events. 174 */ setDtdHandler(DTDEventListener handler)175 public void setDtdHandler(DTDEventListener handler) { 176 dtdHandler = handler; 177 if (handler != null) 178 handler.setDocumentLocator(new Locator() { 179 public String getPublicId() { 180 return DTDParser.this.getPublicId(); 181 } 182 183 public String getSystemId() { 184 return DTDParser.this.getSystemId(); 185 } 186 187 public int getLineNumber() { 188 return DTDParser.this.getLineNumber(); 189 } 190 191 public int getColumnNumber() { 192 return DTDParser.this.getColumnNumber(); 193 } 194 }); 195 } 196 197 /** 198 * Returns the handler used to for DTD parsing events. 199 */ getDtdHandler()200 public DTDEventListener getDtdHandler() { 201 return dtdHandler; 202 } 203 204 /** 205 * Parse a DTD. 206 */ parse(InputSource in)207 public void parse(InputSource in) 208 throws IOException, SAXException { 209 init(); 210 parseInternal(in); 211 } 212 213 /** 214 * Parse a DTD. 215 */ parse(String uri)216 public void parse(String uri) 217 throws IOException, SAXException { 218 InputSource in; 219 220 init(); 221 // System.out.println ("parse (\"" + uri + "\")"); 222 in = resolver.resolveEntity(null, uri); 223 224 // If custom resolver punts resolution to parser, handle it ... 225 if (in == null) { 226 in = Resolver.createInputSource(new java.net.URL(uri), false); 227 228 // ... or if custom resolver doesn't correctly construct the 229 // input entity, patch it up enough so relative URIs work, and 230 // issue a warning to minimize later confusion. 231 } else if (in.getSystemId() == null) { 232 warning("P-065", null); 233 in.setSystemId(uri); 234 } 235 236 parseInternal(in); 237 } 238 239 // makes sure the parser is reset to "before a document" init()240 private void init() { 241 in = null; 242 243 // alloc temporary data used in parsing 244 strTmp = new StringBuffer(); 245 nameTmp = new char[20]; 246 nameCache = new NameCache(); 247 248 // reset doc info 249 // isInAttribute = false; 250 251 doLexicalPE = false; 252 253 entities.clear(); 254 notations.clear(); 255 params.clear(); 256 // elements.clear (); 257 declaredElements.clear(); 258 259 // initialize predefined references ... re-interpreted later 260 builtin("amp", "&"); 261 builtin("lt", "<"); 262 builtin("gt", ">"); 263 builtin("quot", "\""); 264 builtin("apos", "'"); 265 266 if (locale == null) 267 locale = Locale.getDefault(); 268 if (resolver == null) 269 resolver = new Resolver(); 270 if (dtdHandler == null) 271 dtdHandler = new DTDHandlerBase(); 272 } 273 builtin(String entityName, String entityValue)274 private void builtin(String entityName, String entityValue) { 275 InternalEntity entity; 276 entity = new InternalEntity(entityName, entityValue.toCharArray()); 277 entities.put(entityName, entity); 278 } 279 280 281 //////////////////////////////////////////////////////////////// 282 // 283 // parsing is by recursive descent, code roughly 284 // following the BNF rules except tweaked for simple 285 // lookahead. rules are more or less in numeric order, 286 // except where code sharing suggests other structures. 287 // 288 // a classic benefit of recursive descent parsers: it's 289 // relatively easy to get diagnostics that make sense. 290 // 291 //////////////////////////////////////////////////////////////// 292 293 parseInternal(InputSource input)294 private void parseInternal(InputSource input) 295 throws IOException, SAXException { 296 297 if (input == null) 298 fatal("P-000"); 299 300 try { 301 in = InputEntity.getInputEntity(dtdHandler, locale); 302 in.init(input, null, null, false); 303 304 dtdHandler.startDTD(in); 305 306 // [30] extSubset ::= TextDecl? extSubsetDecl 307 // [31] extSubsetDecl ::= ( markupdecl | conditionalSect 308 // | PEReference | S )* 309 // ... same as [79] extPE, which is where the code is 310 311 ExternalEntity externalSubset = new ExternalEntity(in); 312 externalParameterEntity(externalSubset); 313 314 if (!in.isEOF()) { 315 fatal("P-001", new Object[] 316 {Integer.toHexString(((int) getc()))}); 317 } 318 afterRoot(); 319 dtdHandler.endDTD(); 320 321 } catch (EndOfInputException e) { 322 if (!in.isDocument()) { 323 String name = in.getName(); 324 do { // force a relevant URI and line number 325 in = in.pop(); 326 } while (in.isInternal()); 327 fatal("P-002", new Object[]{name}); 328 } else { 329 fatal("P-003", null); 330 } 331 } catch (RuntimeException e) { 332 // Don't discard location that triggered the exception 333 // ## Should properly wrap exception 334 System.err.print("Internal DTD parser error: "); // ## 335 e.printStackTrace(); 336 throw new SAXParseException(e.getMessage() != null 337 ? e.getMessage() : e.getClass().getName(), 338 getPublicId(), getSystemId(), 339 getLineNumber(), getColumnNumber()); 340 341 } finally { 342 // recycle temporary data used during parsing 343 strTmp = null; 344 nameTmp = null; 345 nameCache = null; 346 347 // ditto input sources etc 348 if (in != null) { 349 in.close(); 350 in = null; 351 } 352 353 // get rid of all DTD info ... some of it would be 354 // useful for editors etc, investigate later. 355 356 params.clear(); 357 entities.clear(); 358 notations.clear(); 359 declaredElements.clear(); 360 // elements.clear(); 361 ids.clear(); 362 } 363 } 364 afterRoot()365 void afterRoot() throws SAXException { 366 // Make sure all IDREFs match declared ID attributes. We scan 367 // after the document element is parsed, since XML allows forward 368 // references, and only now can we know if they're all resolved. 369 370 for (Enumeration e = ids.keys(); 371 e.hasMoreElements(); 372 ) { 373 String id = (String) e.nextElement(); 374 Boolean value = (Boolean) ids.get(id); 375 if (Boolean.FALSE == value) 376 error("V-024", new Object[]{id}); 377 } 378 } 379 380 381 // role is for diagnostics whitespace(String roleId)382 private void whitespace(String roleId) 383 throws IOException, SAXException { 384 385 // [3] S ::= (#x20 | #x9 | #xd | #xa)+ 386 if (!maybeWhitespace()) { 387 fatal("P-004", new Object[] 388 {messages.getMessage(locale, roleId)}); 389 } 390 } 391 392 // S? maybeWhitespace()393 private boolean maybeWhitespace() 394 throws IOException, SAXException { 395 396 if (!doLexicalPE) 397 return in.maybeWhitespace(); 398 399 // see getc() for the PE logic -- this lets us splice 400 // expansions of PEs in "anywhere". getc() has smarts, 401 // so for external PEs we don't bypass it. 402 403 // XXX we can marginally speed PE handling, and certainly 404 // be cleaner (hence potentially more correct), by using 405 // the observations that expanded PEs only start and stop 406 // where whitespace is allowed. getc wouldn't need any 407 // "lexical" PE expansion logic, and no other method needs 408 // to handle termination of PEs. (parsing of literals would 409 // still need to pop entities, but not parsing of references 410 // in content.) 411 412 char c = getc(); 413 boolean saw = false; 414 415 while (c == ' ' || c == '\t' || c == '\n' || c == '\r') { 416 saw = true; 417 418 // this gracefully ends things when we stop playing 419 // with internal parameters. caller should have a 420 // grammar rule allowing whitespace at end of entity. 421 if (in.isEOF() && !in.isInternal()) 422 return saw; 423 c = getc(); 424 } 425 ungetc(); 426 return saw; 427 } 428 maybeGetName()429 private String maybeGetName() 430 throws IOException, SAXException { 431 432 NameCacheEntry entry = maybeGetNameCacheEntry(); 433 return (entry == null) ? null : entry.name; 434 } 435 maybeGetNameCacheEntry()436 private NameCacheEntry maybeGetNameCacheEntry() 437 throws IOException, SAXException { 438 439 // [5] Name ::= (Letter|'_'|':') (Namechar)* 440 char c = getc(); 441 442 if (!XmlChars.isLetter(c) && c != ':' && c != '_') { 443 ungetc(); 444 return null; 445 } 446 return nameCharString(c); 447 } 448 449 // Used when parsing enumerations getNmtoken()450 private String getNmtoken() 451 throws IOException, SAXException { 452 453 // [7] Nmtoken ::= (Namechar)+ 454 char c = getc(); 455 if (!XmlChars.isNameChar(c)) 456 fatal("P-006", new Object[]{new Character(c)}); 457 return nameCharString(c).name; 458 } 459 460 // n.b. this gets used when parsing attribute values (for 461 // internal references) so we can't use strTmp; it's also 462 // a hotspot for CPU and memory in the parser (called at least 463 // once for each element) so this has been optimized a bit. 464 nameCharString(char c)465 private NameCacheEntry nameCharString(char c) 466 throws IOException, SAXException { 467 468 int i = 1; 469 470 nameTmp[0] = c; 471 for (; ;) { 472 if ((c = in.getNameChar()) == 0) 473 break; 474 if (i >= nameTmp.length) { 475 char tmp [] = new char[nameTmp.length + 10]; 476 System.arraycopy(nameTmp, 0, tmp, 0, nameTmp.length); 477 nameTmp = tmp; 478 } 479 nameTmp[i++] = c; 480 } 481 return nameCache.lookupEntry(nameTmp, i); 482 } 483 484 // 485 // much similarity between parsing entity values in DTD 486 // and attribute values (in DTD or content) ... both follow 487 // literal parsing rules, newline canonicalization, etc 488 // 489 // leaves value in 'strTmp' ... either a "replacement text" (4.5), 490 // or else partially normalized attribute value (the first bit 491 // of 3.3.3's spec, without the "if not CDATA" bits). 492 // parseLiteral(boolean isEntityValue)493 private void parseLiteral(boolean isEntityValue) 494 throws IOException, SAXException { 495 496 // [9] EntityValue ::= 497 // '"' ([^"&%] | Reference | PEReference)* '"' 498 // | "'" ([^'&%] | Reference | PEReference)* "'" 499 // [10] AttValue ::= 500 // '"' ([^"&] | Reference )* '"' 501 // | "'" ([^'&] | Reference )* "'" 502 char quote = getc(); 503 char c; 504 InputEntity source = in; 505 506 if (quote != '\'' && quote != '"') { 507 fatal("P-007"); 508 } 509 510 // don't report entity expansions within attributes, 511 // they're reported "fully expanded" via SAX 512 // isInAttribute = !isEntityValue; 513 514 // get value into strTmp 515 strTmp = new StringBuffer(); 516 517 // scan, allowing entity push/pop wherever ... 518 // expanded entities can't terminate the literal! 519 for (; ;) { 520 if (in != source && in.isEOF()) { 521 // we don't report end of parsed entities 522 // within attributes (no SAX hooks) 523 in = in.pop(); 524 continue; 525 } 526 if ((c = getc()) == quote && in == source) { 527 break; 528 } 529 530 // 531 // Basically the "reference in attribute value" 532 // row of the chart in section 4.4 of the spec 533 // 534 if (c == '&') { 535 String entityName = maybeGetName(); 536 537 if (entityName != null) { 538 nextChar(';', "F-020", entityName); 539 540 // 4.4 says: bypass these here ... we'll catch 541 // forbidden refs to unparsed entities on use 542 if (isEntityValue) { 543 strTmp.append('&'); 544 strTmp.append(entityName); 545 strTmp.append(';'); 546 continue; 547 } 548 expandEntityInLiteral(entityName, entities, isEntityValue); 549 550 551 // character references are always included immediately 552 } else if ((c = getc()) == '#') { 553 int tmp = parseCharNumber(); 554 555 if (tmp > 0xffff) { 556 tmp = surrogatesToCharTmp(tmp); 557 strTmp.append(charTmp[0]); 558 if (tmp == 2) 559 strTmp.append(charTmp[1]); 560 } else 561 strTmp.append((char) tmp); 562 } else 563 fatal("P-009"); 564 continue; 565 566 } 567 568 // expand parameter entities only within entity value literals 569 if (c == '%' && isEntityValue) { 570 String entityName = maybeGetName(); 571 572 if (entityName != null) { 573 nextChar(';', "F-021", entityName); 574 expandEntityInLiteral(entityName, params, isEntityValue); 575 continue; 576 } else 577 fatal("P-011"); 578 } 579 580 // For attribute values ... 581 if (!isEntityValue) { 582 // 3.3.3 says whitespace normalizes to space... 583 if (c == ' ' || c == '\t' || c == '\n' || c == '\r') { 584 strTmp.append(' '); 585 continue; 586 } 587 588 // "<" not legal in parsed literals ... 589 if (c == '<') 590 fatal("P-012"); 591 } 592 593 strTmp.append(c); 594 } 595 // isInAttribute = false; 596 } 597 598 // does a SINGLE expansion of the entity (often reparsed later) expandEntityInLiteral(String name, SimpleHashtable table, boolean isEntityValue)599 private void expandEntityInLiteral(String name, SimpleHashtable table, 600 boolean isEntityValue) 601 throws IOException, SAXException { 602 603 Object entity = table.get(name); 604 605 if (entity instanceof InternalEntity) { 606 InternalEntity value = (InternalEntity) entity; 607 pushReader(value.buf, name, !value.isPE); 608 609 } else if (entity instanceof ExternalEntity) { 610 if (!isEntityValue) // must be a PE ... 611 fatal("P-013", new Object[]{name}); 612 // XXX if this returns false ... 613 pushReader((ExternalEntity) entity); 614 615 } else if (entity == null) { 616 // 617 // Note: much confusion about whether spec requires such 618 // errors to be fatal in many cases, but none about whether 619 // it allows "normal" errors to be unrecoverable! 620 // 621 fatal((table == params) ? "V-022" : "P-014", 622 new Object[]{name}); 623 } 624 } 625 626 // [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") 627 // for PUBLIC and SYSTEM literals, also "<?xml ...type='literal'?>' 628 629 // NOTE: XML spec should explicitly say that PE ref syntax is 630 // ignored in PIs, comments, SystemLiterals, and Pubid Literal 631 // values ... can't process the XML spec's own DTD without doing 632 // that for comments. 633 getQuotedString(String type, String extra)634 private String getQuotedString(String type, String extra) 635 throws IOException, SAXException { 636 637 // use in.getc to bypass PE processing 638 char quote = in.getc(); 639 640 if (quote != '\'' && quote != '"') 641 fatal("P-015", new Object[]{ 642 messages.getMessage(locale, type, new Object[]{extra}) 643 }); 644 645 char c; 646 647 strTmp = new StringBuffer(); 648 while ((c = in.getc()) != quote) 649 strTmp.append((char) c); 650 return strTmp.toString(); 651 } 652 653 parsePublicId()654 private String parsePublicId() throws IOException, SAXException { 655 656 // [12] PubidLiteral ::= ('"' PubidChar* '"') | ("'" PubidChar* "'") 657 // [13] PubidChar ::= #x20|#xd|#xa|[a-zA-Z0-9]|[-'()+,./:=?;!*#@$_%] 658 String retval = getQuotedString("F-033", null); 659 for (int i = 0; i < retval.length(); i++) { 660 char c = retval.charAt(i); 661 if (" \r\n-'()+,./:=?;!*#@$_%0123456789".indexOf(c) == -1 662 && !(c >= 'A' && c <= 'Z') 663 && !(c >= 'a' && c <= 'z')) 664 fatal("P-016", new Object[]{new Character(c)}); 665 } 666 strTmp = new StringBuffer(); 667 strTmp.append(retval); 668 return normalize(false); 669 } 670 671 // [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) 672 // handled by: InputEntity.parsedContent() 673 maybeComment(boolean skipStart)674 private boolean maybeComment(boolean skipStart) 675 throws IOException, SAXException { 676 677 // [15] Comment ::= '<!--' 678 // ( (Char - '-') | ('-' (Char - '-'))* 679 // '-->' 680 if (!in.peek(skipStart ? "!--" : "<!--", null)) 681 return false; 682 683 boolean savedLexicalPE = doLexicalPE; 684 boolean saveCommentText; 685 686 doLexicalPE = false; 687 saveCommentText = false; 688 if (saveCommentText) 689 strTmp = new StringBuffer(); 690 691 oneComment: 692 for (; ;) { 693 try { 694 // bypass PE expansion, but permit PEs 695 // to complete ... valid docs won't care. 696 for (; ;) { 697 int c = getc(); 698 if (c == '-') { 699 c = getc(); 700 if (c != '-') { 701 if (saveCommentText) 702 strTmp.append('-'); 703 ungetc(); 704 continue; 705 } 706 nextChar('>', "F-022", null); 707 break oneComment; 708 } 709 if (saveCommentText) 710 strTmp.append((char) c); 711 } 712 } catch (EndOfInputException e) { 713 // 714 // This is fatal EXCEPT when we're processing a PE... 715 // in which case a validating processor reports an error. 716 // External PEs are easy to detect; internal ones we 717 // infer by being an internal entity outside an element. 718 // 719 if (in.isInternal()) { 720 error("V-021", null); 721 } 722 fatal("P-017"); 723 } 724 } 725 doLexicalPE = savedLexicalPE; 726 if (saveCommentText) 727 dtdHandler.comment(strTmp.toString()); 728 return true; 729 } 730 maybePI(boolean skipStart)731 private boolean maybePI(boolean skipStart) 732 throws IOException, SAXException { 733 734 // [16] PI ::= '<?' PITarget 735 // (S (Char* - (Char* '?>' Char*)))? 736 // '?>' 737 // [17] PITarget ::= Name - (('X'|'x')('M'|'m')('L'|'l') 738 boolean savedLexicalPE = doLexicalPE; 739 740 if (!in.peek(skipStart ? "?" : "<?", null)) 741 return false; 742 doLexicalPE = false; 743 744 String target = maybeGetName(); 745 746 if (target == null) { 747 fatal("P-018"); 748 } 749 if ("xml".equals(target)) { 750 fatal("P-019"); 751 } 752 if ("xml".equalsIgnoreCase(target)) { 753 fatal("P-020", new Object[]{target}); 754 } 755 756 if (maybeWhitespace()) { 757 strTmp = new StringBuffer(); 758 try { 759 for (; ;) { 760 // use in.getc to bypass PE processing 761 char c = in.getc(); 762 //Reached the end of PI. 763 if (c == '?' && in.peekc('>')) 764 break; 765 strTmp.append(c); 766 } 767 } catch (EndOfInputException e) { 768 fatal("P-021"); 769 } 770 dtdHandler.processingInstruction(target, strTmp.toString()); 771 } else { 772 if (!in.peek("?>", null)) { 773 fatal("P-022"); 774 } 775 dtdHandler.processingInstruction(target, ""); 776 } 777 778 doLexicalPE = savedLexicalPE; 779 return true; 780 } 781 782 // [18] CDSect ::= CDStart CData CDEnd 783 // [19] CDStart ::= '<![CDATA[' 784 // [20] CData ::= (Char* - (Char* ']]>' Char*)) 785 // [21] CDEnd ::= ']]>' 786 // 787 // ... handled by InputEntity.unparsedContent() 788 789 // collapsing several rules together ... 790 // simpler than attribute literals -- no reference parsing! maybeReadAttribute(String name, boolean must)791 private String maybeReadAttribute(String name, boolean must) 792 throws IOException, SAXException { 793 794 // [24] VersionInfo ::= S 'version' Eq \'|\" versionNum \'|\" 795 // [80] EncodingDecl ::= S 'encoding' Eq \'|\" EncName \'|\" 796 // [32] SDDecl ::= S 'standalone' Eq \'|\" ... \'|\" 797 if (!maybeWhitespace()) { 798 if (!must) { 799 return null; 800 } 801 fatal("P-024", new Object[]{name}); 802 // NOTREACHED 803 } 804 805 if (!peek(name)) { 806 if (must) { 807 fatal("P-024", new Object[]{name}); 808 } else { 809 // To ensure that the whitespace is there so that when we 810 // check for the next attribute we assure that the 811 // whitespace still exists. 812 ungetc(); 813 return null; 814 } 815 } 816 817 // [25] Eq ::= S? '=' S? 818 maybeWhitespace(); 819 nextChar('=', "F-023", null); 820 maybeWhitespace(); 821 822 return getQuotedString("F-035", name); 823 } 824 readVersion(boolean must, String versionNum)825 private void readVersion(boolean must, String versionNum) 826 throws IOException, SAXException { 827 828 String value = maybeReadAttribute("version", must); 829 830 // [26] versionNum ::= ([a-zA-Z0-9_.:]| '-')+ 831 832 if (must && value == null) 833 fatal("P-025", new Object[]{versionNum}); 834 if (value != null) { 835 int length = value.length(); 836 for (int i = 0; i < length; i++) { 837 char c = value.charAt(i); 838 if (!((c >= '0' && c <= '9') 839 || c == '_' || c == '.' 840 || (c >= 'a' && c <= 'z') 841 || (c >= 'A' && c <= 'Z') 842 || c == ':' || c == '-') 843 ) 844 fatal("P-026", new Object[]{value}); 845 } 846 } 847 if (value != null && !value.equals(versionNum)) 848 error("P-027", new Object[]{versionNum, value}); 849 } 850 851 // common code used by most markup declarations 852 // ... S (Q)Name ... getMarkupDeclname(String roleId, boolean qname)853 private String getMarkupDeclname(String roleId, boolean qname) 854 throws IOException, SAXException { 855 856 String name; 857 858 whitespace(roleId); 859 name = maybeGetName(); 860 if (name == null) 861 fatal("P-005", new Object[] 862 {messages.getMessage(locale, roleId)}); 863 return name; 864 } 865 maybeMarkupDecl()866 private boolean maybeMarkupDecl() 867 throws IOException, SAXException { 868 869 // [29] markupdecl ::= elementdecl | Attlistdecl 870 // | EntityDecl | NotationDecl | PI | Comment 871 return maybeElementDecl() 872 || maybeAttlistDecl() 873 || maybeEntityDecl() 874 || maybeNotationDecl() 875 || maybePI(false) 876 || maybeComment(false); 877 } 878 879 private static final String XmlLang = "xml:lang"; 880 isXmlLang(String value)881 private boolean isXmlLang(String value) { 882 883 // [33] LanguageId ::= Langcode ('-' Subcode)* 884 // [34] Langcode ::= ISO639Code | IanaCode | UserCode 885 // [35] ISO639Code ::= [a-zA-Z] [a-zA-Z] 886 // [36] IanaCode ::= [iI] '-' SubCode 887 // [37] UserCode ::= [xX] '-' SubCode 888 // [38] SubCode ::= [a-zA-Z]+ 889 890 // the ISO and IANA codes (and subcodes) are registered, 891 // but that's neither a WF nor a validity constraint. 892 893 int nextSuffix; 894 char c; 895 896 if (value.length() < 2) 897 return false; 898 c = value.charAt(1); 899 if (c == '-') { // IANA, or user, code 900 c = value.charAt(0); 901 if (!(c == 'i' || c == 'I' || c == 'x' || c == 'X')) 902 return false; 903 nextSuffix = 1; 904 } else if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) { 905 // 2 letter ISO code, or error 906 c = value.charAt(0); 907 if (!((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))) 908 return false; 909 nextSuffix = 2; 910 } else 911 return false; 912 913 // here "suffix" ::= '-' [a-zA-Z]+ suffix* 914 while (nextSuffix < value.length()) { 915 c = value.charAt(nextSuffix); 916 if (c != '-') 917 break; 918 while (++nextSuffix < value.length()) { 919 c = value.charAt(nextSuffix); 920 if (!((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))) 921 break; 922 } 923 } 924 return value.length() == nextSuffix && c != '-'; 925 } 926 927 928 // 929 // CHAPTER 3: Logical Structures 930 // 931 932 /** 933 * To validate, subclassers should at this time make sure that 934 * values are of the declared types:<UL> 935 * <LI> ID and IDREF(S) values are Names 936 * <LI> NMTOKEN(S) are Nmtokens 937 * <LI> ENUMERATION values match one of the tokens 938 * <LI> NOTATION values match a notation name 939 * <LI> ENTITIY(IES) values match an unparsed external entity 940 * </UL> 941 * <p/> 942 * <P> Separately, make sure IDREF values match some ID 943 * provided in the document (in the afterRoot method). 944 */ 945 /* void validateAttributeSyntax (Attribute attr, String value) 946 throws DTDParseException { 947 // ID, IDREF(S) ... values are Names 948 if (Attribute.ID == attr.type()) { 949 if (!XmlNames.isName (value)) 950 error ("V-025", new Object [] { value }); 951 952 Boolean b = (Boolean) ids.getNonInterned (value); 953 if (b == null || b.equals (Boolean.FALSE)) 954 ids.put (value.intern (), Boolean.TRUE); 955 else 956 error ("V-026", new Object [] { value }); 957 958 } else if (Attribute.IDREF == attr.type()) { 959 if (!XmlNames.isName (value)) 960 error ("V-027", new Object [] { value }); 961 962 Boolean b = (Boolean) ids.getNonInterned (value); 963 if (b == null) 964 ids.put (value.intern (), Boolean.FALSE); 965 966 } else if (Attribute.IDREFS == attr.type()) { 967 StringTokenizer tokenizer = new StringTokenizer (value); 968 Boolean b; 969 boolean sawValue = false; 970 971 while (tokenizer.hasMoreTokens ()) { 972 value = tokenizer.nextToken (); 973 if (!XmlNames.isName (value)) 974 error ("V-027", new Object [] { value }); 975 b = (Boolean) ids.getNonInterned (value); 976 if (b == null) 977 ids.put (value.intern (), Boolean.FALSE); 978 sawValue = true; 979 } 980 if (!sawValue) 981 error ("V-039", null); 982 983 984 // NMTOKEN(S) ... values are Nmtoken(s) 985 } else if (Attribute.NMTOKEN == attr.type()) { 986 if (!XmlNames.isNmtoken (value)) 987 error ("V-028", new Object [] { value }); 988 989 } else if (Attribute.NMTOKENS == attr.type()) { 990 StringTokenizer tokenizer = new StringTokenizer (value); 991 boolean sawValue = false; 992 993 while (tokenizer.hasMoreTokens ()) { 994 value = tokenizer.nextToken (); 995 if (!XmlNames.isNmtoken (value)) 996 error ("V-028", new Object [] { value }); 997 sawValue = true; 998 } 999 if (!sawValue) 1000 error ("V-032", null); 1001 1002 // ENUMERATION ... values match one of the tokens 1003 } else if (Attribute.ENUMERATION == attr.type()) { 1004 for (int i = 0; i < attr.values().length; i++) 1005 if (value.equals (attr.values()[i])) 1006 return; 1007 error ("V-029", new Object [] { value }); 1008 1009 // NOTATION values match a notation name 1010 } else if (Attribute.NOTATION == attr.type()) { 1011 // 1012 // XXX XML 1.0 spec should probably list references to 1013 // externally defined notations in standalone docs as 1014 // validity errors. Ditto externally defined unparsed 1015 // entities; neither should show up in attributes, else 1016 // one needs to read the external declarations in order 1017 // to make sense of the document (exactly what tagging 1018 // a doc as "standalone" intends you won't need to do). 1019 // 1020 for (int i = 0; i < attr.values().length; i++) 1021 if (value.equals (attr.values()[i])) 1022 return; 1023 error ("V-030", new Object [] { value }); 1024 1025 // ENTITY(IES) values match an unparsed entity(ies) 1026 } else if (Attribute.ENTITY == attr.type()) { 1027 // see note above re standalone 1028 if (!isUnparsedEntity (value)) 1029 error ("V-031", new Object [] { value }); 1030 1031 } else if (Attribute.ENTITIES == attr.type()) { 1032 StringTokenizer tokenizer = new StringTokenizer (value); 1033 boolean sawValue = false; 1034 1035 while (tokenizer.hasMoreTokens ()) { 1036 value = tokenizer.nextToken (); 1037 // see note above re standalone 1038 if (!isUnparsedEntity (value)) 1039 error ("V-031", new Object [] { value }); 1040 sawValue = true; 1041 } 1042 if (!sawValue) 1043 error ("V-040", null); 1044 1045 } else if (Attribute.CDATA != attr.type()) 1046 throw new InternalError (attr.type()); 1047 } 1048 */ 1049 /* 1050 private boolean isUnparsedEntity (String name) 1051 { 1052 Object e = entities.getNonInterned (name); 1053 if (e == null || !(e instanceof ExternalEntity)) 1054 return false; 1055 return ((ExternalEntity)e).notation != null; 1056 } 1057 */ maybeElementDecl()1058 private boolean maybeElementDecl() 1059 throws IOException, SAXException { 1060 1061 // [45] elementDecl ::= '<!ELEMENT' S Name S contentspec S? '>' 1062 // [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children 1063 InputEntity start = peekDeclaration("!ELEMENT"); 1064 1065 if (start == null) 1066 return false; 1067 1068 // n.b. for content models where inter-element whitespace is 1069 // ignorable, we mark that fact here. 1070 String name = getMarkupDeclname("F-015", true); 1071 // Element element = (Element) elements.get (name); 1072 // boolean declEffective = false; 1073 1074 /* 1075 if (element != null) { 1076 if (element.contentModel() != null) { 1077 error ("V-012", new Object [] { name }); 1078 } // else <!ATTLIST name ...> came first 1079 } else { 1080 element = new Element(name); 1081 elements.put (element.name(), element); 1082 declEffective = true; 1083 } 1084 */ 1085 if (declaredElements.contains(name)) 1086 error("V-012", new Object[]{name}); 1087 else { 1088 declaredElements.add(name); 1089 // declEffective = true; 1090 } 1091 1092 short modelType; 1093 whitespace("F-000"); 1094 if (peek(strEMPTY)) { 1095 /// // leave element.contentModel as null for this case. 1096 dtdHandler.startContentModel(name, modelType = DTDEventListener.CONTENT_MODEL_EMPTY); 1097 } else if (peek(strANY)) { 1098 /// element.setContentModel(new StringModel(StringModelType.ANY)); 1099 dtdHandler.startContentModel(name, modelType = DTDEventListener.CONTENT_MODEL_ANY); 1100 } else { 1101 modelType = getMixedOrChildren(name); 1102 } 1103 1104 dtdHandler.endContentModel(name, modelType); 1105 1106 maybeWhitespace(); 1107 char c = getc(); 1108 if (c != '>') 1109 fatal("P-036", new Object[]{name, new Character(c)}); 1110 if (start != in) 1111 error("V-013", null); 1112 1113 /// dtdHandler.elementDecl(element); 1114 1115 return true; 1116 } 1117 1118 // We're leaving the content model as a regular expression; 1119 // it's an efficient natural way to express such things, and 1120 // libraries often interpret them. No whitespace in the 1121 // model we store, though! 1122 1123 /** 1124 * returns content model type. 1125 */ getMixedOrChildren(String elementName )1126 private short getMixedOrChildren(String elementName/*Element element*/) 1127 throws IOException, SAXException { 1128 1129 InputEntity start; 1130 1131 // [47] children ::= (choice|seq) ('?'|'*'|'+')? 1132 strTmp = new StringBuffer(); 1133 1134 nextChar('(', "F-028", elementName); 1135 start = in; 1136 maybeWhitespace(); 1137 strTmp.append('('); 1138 1139 short modelType; 1140 if (peek("#PCDATA")) { 1141 strTmp.append("#PCDATA"); 1142 dtdHandler.startContentModel(elementName, modelType = DTDEventListener.CONTENT_MODEL_MIXED); 1143 getMixed(elementName, start); 1144 } else { 1145 dtdHandler.startContentModel(elementName, modelType = DTDEventListener.CONTENT_MODEL_CHILDREN); 1146 getcps(elementName, start); 1147 } 1148 1149 return modelType; 1150 } 1151 1152 // '(' S? already consumed 1153 // matching ')' must be in "start" entity if validating getcps( String elementName, InputEntity start)1154 private void getcps(/*Element element,*/String elementName, InputEntity start) 1155 throws IOException, SAXException { 1156 1157 // [48] cp ::= (Name|choice|seq) ('?'|'*'|'+')? 1158 // [49] choice ::= '(' S? cp (S? '|' S? cp)* S? ')' 1159 // [50] seq ::= '(' S? cp (S? ',' S? cp)* S? ')' 1160 boolean decided = false; 1161 char type = 0; 1162 // ContentModel retval, temp, current; 1163 1164 // retval = temp = current = null; 1165 1166 dtdHandler.startModelGroup(); 1167 1168 do { 1169 String tag; 1170 1171 tag = maybeGetName(); 1172 if (tag != null) { 1173 strTmp.append(tag); 1174 // temp = new ElementModel(tag); 1175 // getFrequency((RepeatableContent)temp); 1176 ///-> 1177 dtdHandler.childElement(tag, getFrequency()); 1178 ///<- 1179 } else if (peek("(")) { 1180 InputEntity next = in; 1181 strTmp.append('('); 1182 maybeWhitespace(); 1183 // temp = getcps(element, next); 1184 // getFrequency(temp); 1185 ///-> 1186 getcps(elementName, next); 1187 /// getFrequency(); <- this looks like a bug 1188 ///<- 1189 } else 1190 fatal((type == 0) ? "P-039" : 1191 ((type == ',') ? "P-037" : "P-038"), 1192 new Object[]{new Character(getc())}); 1193 1194 maybeWhitespace(); 1195 if (decided) { 1196 char c = getc(); 1197 1198 // if (current != null) { 1199 // current.addChild(temp); 1200 // } 1201 if (c == type) { 1202 strTmp.append(type); 1203 maybeWhitespace(); 1204 reportConnector(type); 1205 continue; 1206 } else if (c == '\u0029') { // rparen 1207 ungetc(); 1208 continue; 1209 } else { 1210 fatal((type == 0) ? "P-041" : "P-040", 1211 new Object[]{ 1212 new Character(c), 1213 new Character(type) 1214 }); 1215 } 1216 } else { 1217 type = getc(); 1218 switch (type) { 1219 case '|': 1220 case ',': 1221 reportConnector(type); 1222 break; 1223 default: 1224 // retval = temp; 1225 ungetc(); 1226 continue; 1227 } 1228 // retval = (ContentModel)current; 1229 decided = true; 1230 // current.addChild(temp); 1231 strTmp.append(type); 1232 } 1233 maybeWhitespace(); 1234 } while (!peek(")")); 1235 1236 if (in != start) 1237 error("V-014", new Object[]{elementName}); 1238 strTmp.append(')'); 1239 1240 dtdHandler.endModelGroup(getFrequency()); 1241 // return retval; 1242 } 1243 reportConnector(char type)1244 private void reportConnector(char type) throws SAXException { 1245 switch (type) { 1246 case '|': 1247 dtdHandler.connector(DTDEventListener.CHOICE); ///<- 1248 return; 1249 case ',': 1250 dtdHandler.connector(DTDEventListener.SEQUENCE); ///<- 1251 return; 1252 default: 1253 throw new Error(); //assertion failed. 1254 } 1255 } 1256 getFrequency()1257 private short getFrequency() 1258 throws IOException, SAXException { 1259 1260 final char c = getc(); 1261 1262 if (c == '?') { 1263 strTmp.append(c); 1264 return DTDEventListener.OCCURENCE_ZERO_OR_ONE; 1265 // original.setRepeat(Repeat.ZERO_OR_ONE); 1266 } else if (c == '+') { 1267 strTmp.append(c); 1268 return DTDEventListener.OCCURENCE_ONE_OR_MORE; 1269 // original.setRepeat(Repeat.ONE_OR_MORE); 1270 } else if (c == '*') { 1271 strTmp.append(c); 1272 return DTDEventListener.OCCURENCE_ZERO_OR_MORE; 1273 // original.setRepeat(Repeat.ZERO_OR_MORE); 1274 } else { 1275 ungetc(); 1276 return DTDEventListener.OCCURENCE_ONCE; 1277 } 1278 } 1279 1280 // '(' S? '#PCDATA' already consumed 1281 // matching ')' must be in "start" entity if validating getMixed(String elementName, InputEntity start)1282 private void getMixed(String elementName, /*Element element,*/ InputEntity start) 1283 throws IOException, SAXException { 1284 1285 // [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' 1286 // | '(' S? '#PCDATA' S? ')' 1287 maybeWhitespace(); 1288 if (peek("\u0029*") || peek("\u0029")) { 1289 if (in != start) 1290 error("V-014", new Object[]{elementName}); 1291 strTmp.append(')'); 1292 // element.setContentModel(new StringModel(StringModelType.PCDATA)); 1293 return; 1294 } 1295 1296 ArrayList l = new ArrayList(); 1297 // l.add(new StringModel(StringModelType.PCDATA)); 1298 1299 1300 while (peek("|")) { 1301 String name; 1302 1303 strTmp.append('|'); 1304 maybeWhitespace(); 1305 1306 doLexicalPE = true; 1307 name = maybeGetName(); 1308 if (name == null) 1309 fatal("P-042", new Object[] 1310 {elementName, Integer.toHexString(getc())}); 1311 if (l.contains(name)) { 1312 error("V-015", new Object[]{name}); 1313 } else { 1314 l.add(name); 1315 dtdHandler.mixedElement(name); 1316 } 1317 strTmp.append(name); 1318 maybeWhitespace(); 1319 } 1320 1321 if (!peek("\u0029*")) // right paren 1322 fatal("P-043", new Object[] 1323 {elementName, new Character(getc())}); 1324 if (in != start) 1325 error("V-014", new Object[]{elementName}); 1326 strTmp.append(')'); 1327 // ChoiceModel cm = new ChoiceModel((Collection)l); 1328 // cm.setRepeat(Repeat.ZERO_OR_MORE); 1329 // element.setContentModel(cm); 1330 } 1331 maybeAttlistDecl()1332 private boolean maybeAttlistDecl() 1333 throws IOException, SAXException { 1334 1335 // [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>' 1336 InputEntity start = peekDeclaration("!ATTLIST"); 1337 1338 if (start == null) 1339 return false; 1340 1341 String elementName = getMarkupDeclname("F-016", true); 1342 // Element element = (Element) elements.get (name); 1343 1344 // if (element == null) { 1345 // // not yet declared -- no problem. 1346 // element = new Element(name); 1347 // elements.put(name, element); 1348 // } 1349 1350 while (!peek(">")) { 1351 1352 // [53] AttDef ::= S Name S AttType S DefaultDecl 1353 // [54] AttType ::= StringType | TokenizedType | EnumeratedType 1354 1355 // look for global attribute definitions, don't expand for now... 1356 maybeWhitespace(); 1357 char c = getc(); 1358 if (c == '%') { 1359 String entityName = maybeGetName(); 1360 if (entityName != null) { 1361 nextChar(';', "F-021", entityName); 1362 whitespace("F-021"); 1363 continue; 1364 } else 1365 fatal("P-011"); 1366 } 1367 1368 ungetc(); 1369 // look for attribute name otherwise 1370 String attName = maybeGetName(); 1371 if (attName == null) { 1372 fatal("P-044", new Object[]{new Character(getc())}); 1373 } 1374 whitespace("F-001"); 1375 1376 /// Attribute a = new Attribute (name); 1377 1378 String typeName; 1379 Vector values = null; // notation/enumeration values 1380 1381 // Note: use the type constants from Attribute 1382 // so that "==" may be used (faster) 1383 1384 // [55] StringType ::= 'CDATA' 1385 if (peek(TYPE_CDATA)) 1386 /// a.setType(Attribute.CDATA); 1387 typeName = TYPE_CDATA; 1388 1389 // [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' 1390 // | 'ENTITY' | 'ENTITIES' 1391 // | 'NMTOKEN' | 'NMTOKENS' 1392 // n.b. if "IDREFS" is there, both "ID" and "IDREF" 1393 // match peekahead ... so this order matters! 1394 else if (peek(TYPE_IDREFS)) 1395 typeName = TYPE_IDREFS; 1396 else if (peek(TYPE_IDREF)) 1397 typeName = TYPE_IDREF; 1398 else if (peek(TYPE_ID)) { 1399 typeName = TYPE_ID; 1400 // TODO: should implement this error check? 1401 /// if (element.id() != null) { 1402 /// error ("V-016", new Object [] { element.id() }); 1403 /// } else 1404 /// element.setId(name); 1405 } else if (peek(TYPE_ENTITY)) 1406 typeName = TYPE_ENTITY; 1407 else if (peek(TYPE_ENTITIES)) 1408 typeName = TYPE_ENTITIES; 1409 else if (peek(TYPE_NMTOKENS)) 1410 typeName = TYPE_NMTOKENS; 1411 else if (peek(TYPE_NMTOKEN)) 1412 typeName = TYPE_NMTOKEN; 1413 1414 // [57] EnumeratedType ::= NotationType | Enumeration 1415 // [58] NotationType ::= 'NOTATION' S '(' S? Name 1416 // (S? '|' S? Name)* S? ')' 1417 else if (peek(TYPE_NOTATION)) { 1418 typeName = TYPE_NOTATION; 1419 whitespace("F-002"); 1420 nextChar('(', "F-029", null); 1421 maybeWhitespace(); 1422 1423 values = new Vector(); 1424 do { 1425 String name; 1426 if ((name = maybeGetName()) == null) 1427 fatal("P-068"); 1428 // permit deferred declarations 1429 if (notations.get(name) == null) 1430 notations.put(name, name); 1431 values.addElement(name); 1432 maybeWhitespace(); 1433 if (peek("|")) 1434 maybeWhitespace(); 1435 } while (!peek(")")); 1436 /// a.setValues(new String [v.size ()]); 1437 /// for (int i = 0; i < v.size (); i++) 1438 /// a.setValue(i, (String)v.elementAt(i)); 1439 1440 // [59] Enumeration ::= '(' S? Nmtoken (S? '|' Nmtoken)* S? ')' 1441 } else if (peek("(")) { 1442 /// a.setType(Attribute.ENUMERATION); 1443 typeName = TYPE_ENUMERATION; 1444 1445 maybeWhitespace(); 1446 1447 /// Vector v = new Vector (); 1448 values = new Vector(); 1449 do { 1450 String name = getNmtoken(); 1451 /// v.addElement (name); 1452 values.addElement(name); 1453 maybeWhitespace(); 1454 if (peek("|")) 1455 maybeWhitespace(); 1456 } while (!peek(")")); 1457 /// a.setValues(new String [v.size ()]); 1458 /// for (int i = 0; i < v.size (); i++) 1459 /// a.setValue(i, (String)v.elementAt(i)); 1460 } else { 1461 fatal("P-045", 1462 new Object[]{attName, new Character(getc())}); 1463 typeName = null; 1464 } 1465 1466 short attributeUse; 1467 String defaultValue = null; 1468 1469 // [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' 1470 // | (('#FIXED' S)? AttValue) 1471 whitespace("F-003"); 1472 if (peek("#REQUIRED")) 1473 attributeUse = DTDEventListener.USE_REQUIRED; 1474 /// a.setIsRequired(true); 1475 else if (peek("#FIXED")) { 1476 /// if (a.type() == Attribute.ID) 1477 if (typeName == TYPE_ID) 1478 error("V-017", new Object[]{attName}); 1479 /// a.setIsFixed(true); 1480 attributeUse = DTDEventListener.USE_FIXED; 1481 whitespace("F-004"); 1482 parseLiteral(false); 1483 /// if (a.type() != Attribute.CDATA) 1484 /// a.setDefaultValue(normalize(false)); 1485 /// else 1486 /// a.setDefaultValue(strTmp.toString()); 1487 1488 if (typeName == TYPE_CDATA) 1489 defaultValue = normalize(false); 1490 else 1491 defaultValue = strTmp.toString(); 1492 1493 // TODO: implement this check 1494 /// if (a.type() != Attribute.CDATA) 1495 /// validateAttributeSyntax (a, a.defaultValue()); 1496 } else if (!peek("#IMPLIED")) { 1497 attributeUse = DTDEventListener.USE_IMPLIED; 1498 1499 /// if (a.type() == Attribute.ID) 1500 if (typeName == TYPE_ID) 1501 error("V-018", new Object[]{attName}); 1502 parseLiteral(false); 1503 /// if (a.type() != Attribute.CDATA) 1504 /// a.setDefaultValue(normalize(false)); 1505 /// else 1506 /// a.setDefaultValue(strTmp.toString()); 1507 if (typeName == TYPE_CDATA) 1508 defaultValue = normalize(false); 1509 else 1510 defaultValue = strTmp.toString(); 1511 1512 // TODO: implement this check 1513 /// if (a.type() != Attribute.CDATA) 1514 /// validateAttributeSyntax (a, a.defaultValue()); 1515 } else { 1516 // TODO: this looks like an fatal error. 1517 attributeUse = DTDEventListener.USE_NORMAL; 1518 } 1519 1520 if (XmlLang.equals(attName) 1521 && defaultValue/* a.defaultValue()*/ != null 1522 && !isXmlLang(defaultValue/*a.defaultValue()*/)) 1523 error("P-033", new Object[]{defaultValue /*a.defaultValue()*/}); 1524 1525 // TODO: isn't it an error to specify the same attribute twice? 1526 /// if (!element.attributes().contains(a)) { 1527 /// element.addAttribute(a); 1528 /// dtdHandler.attributeDecl(a); 1529 /// } 1530 1531 String[] v = (values != null) ? (String[]) values.toArray(new String[0]) : null; 1532 dtdHandler.attributeDecl(elementName, attName, typeName, v, attributeUse, defaultValue); 1533 maybeWhitespace(); 1534 } 1535 if (start != in) 1536 error("V-013", null); 1537 return true; 1538 } 1539 1540 // used when parsing literal attribute values, 1541 // or public identifiers. 1542 // 1543 // input in strTmp normalize(boolean invalidIfNeeded)1544 private String normalize(boolean invalidIfNeeded) { 1545 1546 // this can allocate an extra string... 1547 1548 String s = strTmp.toString(); 1549 String s2 = s.trim(); 1550 boolean didStrip = false; 1551 1552 if (s != s2) { 1553 s = s2; 1554 s2 = null; 1555 didStrip = true; 1556 } 1557 strTmp = new StringBuffer(); 1558 for (int i = 0; i < s.length(); i++) { 1559 char c = s.charAt(i); 1560 if (!XmlChars.isSpace(c)) { 1561 strTmp.append(c); 1562 continue; 1563 } 1564 strTmp.append(' '); 1565 while (++i < s.length() && XmlChars.isSpace(s.charAt(i))) 1566 didStrip = true; 1567 i--; 1568 } 1569 if (didStrip) 1570 return strTmp.toString(); 1571 else 1572 return s; 1573 } 1574 maybeConditionalSect()1575 private boolean maybeConditionalSect() 1576 throws IOException, SAXException { 1577 1578 // [61] conditionalSect ::= includeSect | ignoreSect 1579 1580 if (!peek("<![")) 1581 return false; 1582 1583 String keyword; 1584 InputEntity start = in; 1585 1586 maybeWhitespace(); 1587 1588 if ((keyword = maybeGetName()) == null) 1589 fatal("P-046"); 1590 maybeWhitespace(); 1591 nextChar('[', "F-030", null); 1592 1593 // [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' 1594 // extSubsetDecl ']]>' 1595 if ("INCLUDE".equals(keyword)) { 1596 for (; ;) { 1597 while (in.isEOF() && in != start) 1598 in = in.pop(); 1599 if (in.isEOF()) { 1600 error("V-020", null); 1601 } 1602 if (peek("]]>")) 1603 break; 1604 1605 doLexicalPE = false; 1606 if (maybeWhitespace()) 1607 continue; 1608 if (maybePEReference()) 1609 continue; 1610 doLexicalPE = true; 1611 if (maybeMarkupDecl() || maybeConditionalSect()) 1612 continue; 1613 1614 fatal("P-047"); 1615 } 1616 1617 // [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' 1618 // ignoreSectcontents ']]>' 1619 // [64] ignoreSectcontents ::= Ignore ('<![' 1620 // ignoreSectcontents ']]>' Ignore)* 1621 // [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*) 1622 } else if ("IGNORE".equals(keyword)) { 1623 int nestlevel = 1; 1624 // ignoreSectcontents 1625 doLexicalPE = false; 1626 while (nestlevel > 0) { 1627 char c = getc(); // will pop input entities 1628 if (c == '<') { 1629 if (peek("![")) 1630 nestlevel++; 1631 } else if (c == ']') { 1632 if (peek("]>")) 1633 nestlevel--; 1634 } else 1635 continue; 1636 } 1637 } else 1638 fatal("P-048", new Object[]{keyword}); 1639 return true; 1640 } 1641 1642 1643 // 1644 // CHAPTER 4: Physical Structures 1645 // 1646 1647 // parse decimal or hex numeric character reference parseCharNumber()1648 private int parseCharNumber() 1649 throws IOException, SAXException { 1650 1651 char c; 1652 int retval = 0; 1653 1654 // n.b. we ignore overflow ... 1655 if (getc() != 'x') { 1656 ungetc(); 1657 for (; ;) { 1658 c = getc(); 1659 if (c >= '0' && c <= '9') { 1660 retval *= 10; 1661 retval += (c - '0'); 1662 continue; 1663 } 1664 if (c == ';') 1665 return retval; 1666 fatal("P-049"); 1667 } 1668 } else 1669 for (; ;) { 1670 c = getc(); 1671 if (c >= '0' && c <= '9') { 1672 retval <<= 4; 1673 retval += (c - '0'); 1674 continue; 1675 } 1676 if (c >= 'a' && c <= 'f') { 1677 retval <<= 4; 1678 retval += 10 + (c - 'a'); 1679 continue; 1680 } 1681 if (c >= 'A' && c <= 'F') { 1682 retval <<= 4; 1683 retval += 10 + (c - 'A'); 1684 continue; 1685 } 1686 if (c == ';') 1687 return retval; 1688 fatal("P-050"); 1689 } 1690 } 1691 1692 // parameter is a UCS-4 character ... i.e. not just 16 bit UNICODE, 1693 // though still subject to the 'Char' construct in XML surrogatesToCharTmp(int ucs4)1694 private int surrogatesToCharTmp(int ucs4) 1695 throws SAXException { 1696 1697 if (ucs4 <= 0xffff) { 1698 if (XmlChars.isChar(ucs4)) { 1699 charTmp[0] = (char) ucs4; 1700 return 1; 1701 } 1702 } else if (ucs4 <= 0x0010ffff) { 1703 // we represent these as UNICODE surrogate pairs 1704 ucs4 -= 0x10000; 1705 charTmp[0] = (char) (0xd800 | ((ucs4 >> 10) & 0x03ff)); 1706 charTmp[1] = (char) (0xdc00 | (ucs4 & 0x03ff)); 1707 return 2; 1708 } 1709 fatal("P-051", new Object[]{Integer.toHexString(ucs4)}); 1710 // NOTREACHED 1711 return -1; 1712 } 1713 maybePEReference()1714 private boolean maybePEReference() 1715 throws IOException, SAXException { 1716 1717 // This is the SYNTACTIC version of this construct. 1718 // When processing external entities, there is also 1719 // a LEXICAL version; see getc() and doLexicalPE. 1720 1721 // [69] PEReference ::= '%' Name ';' 1722 if (!in.peekc('%')) 1723 return false; 1724 1725 String name = maybeGetName(); 1726 Object entity; 1727 1728 if (name == null) 1729 fatal("P-011"); 1730 nextChar(';', "F-021", name); 1731 entity = params.get(name); 1732 1733 if (entity instanceof InternalEntity) { 1734 InternalEntity value = (InternalEntity) entity; 1735 pushReader(value.buf, name, false); 1736 1737 } else if (entity instanceof ExternalEntity) { 1738 pushReader((ExternalEntity) entity); 1739 externalParameterEntity((ExternalEntity) entity); 1740 1741 } else if (entity == null) { 1742 error("V-022", new Object[]{name}); 1743 } 1744 return true; 1745 } 1746 maybeEntityDecl()1747 private boolean maybeEntityDecl() 1748 throws IOException, SAXException { 1749 1750 // [70] EntityDecl ::= GEDecl | PEDecl 1751 // [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>' 1752 // [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDEF S? '>' 1753 // [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?) 1754 // [74] PEDef ::= EntityValue | ExternalID 1755 // 1756 InputEntity start = peekDeclaration("!ENTITY"); 1757 1758 if (start == null) 1759 return false; 1760 1761 String entityName; 1762 SimpleHashtable defns; 1763 ExternalEntity externalId; 1764 boolean doStore; 1765 1766 // PE expansion gets selectively turned off several places: 1767 // in ENTITY declarations (here), in comments, in PIs. 1768 1769 // Here, we allow PE entities to be declared, and allows 1770 // literals to include PE refs without the added spaces 1771 // required with their expansion in markup decls. 1772 1773 doLexicalPE = false; 1774 whitespace("F-005"); 1775 if (in.peekc('%')) { 1776 whitespace("F-006"); 1777 defns = params; 1778 } else 1779 defns = entities; 1780 1781 ungetc(); // leave some whitespace 1782 doLexicalPE = true; 1783 entityName = getMarkupDeclname("F-017", false); 1784 whitespace("F-007"); 1785 externalId = maybeExternalID(); 1786 1787 // 1788 // first definition sticks ... e.g. internal subset PEs are used 1789 // to override DTD defaults. It's also an "error" to incorrectly 1790 // redefine builtin internal entities, but since reporting such 1791 // errors is optional we only give warnings ("just in case") for 1792 // non-parameter entities. 1793 // 1794 doStore = (defns.get(entityName) == null); 1795 if (!doStore && defns == entities) 1796 warning("P-054", new Object[]{entityName}); 1797 1798 // internal entities 1799 if (externalId == null) { 1800 char value []; 1801 InternalEntity entity; 1802 1803 doLexicalPE = false; // "ab%bar;cd" -maybe-> "abcd" 1804 parseLiteral(true); 1805 doLexicalPE = true; 1806 if (doStore) { 1807 value = new char[strTmp.length()]; 1808 if (value.length != 0) 1809 strTmp.getChars(0, value.length, value, 0); 1810 entity = new InternalEntity(entityName, value); 1811 entity.isPE = (defns == params); 1812 entity.isFromInternalSubset = false; 1813 defns.put(entityName, entity); 1814 if (defns == entities) 1815 dtdHandler.internalGeneralEntityDecl(entityName, 1816 new String(value)); 1817 } 1818 1819 // external entities (including unparsed) 1820 } else { 1821 // [76] NDataDecl ::= S 'NDATA' S Name 1822 if (defns == entities && maybeWhitespace() 1823 && peek("NDATA")) { 1824 externalId.notation = getMarkupDeclname("F-018", false); 1825 1826 // flag undeclared notation for checking after 1827 // the DTD is fully processed 1828 if (notations.get(externalId.notation) == null) 1829 notations.put(externalId.notation, Boolean.TRUE); 1830 } 1831 externalId.name = entityName; 1832 externalId.isPE = (defns == params); 1833 externalId.isFromInternalSubset = false; 1834 if (doStore) { 1835 defns.put(entityName, externalId); 1836 if (externalId.notation != null) 1837 dtdHandler.unparsedEntityDecl(entityName, 1838 externalId.publicId, externalId.systemId, 1839 externalId.notation); 1840 else if (defns == entities) 1841 dtdHandler.externalGeneralEntityDecl(entityName, 1842 externalId.publicId, externalId.systemId); 1843 } 1844 } 1845 maybeWhitespace(); 1846 nextChar('>', "F-031", entityName); 1847 if (start != in) 1848 error("V-013", null); 1849 return true; 1850 } 1851 maybeExternalID()1852 private ExternalEntity maybeExternalID() 1853 throws IOException, SAXException { 1854 1855 // [75] ExternalID ::= 'SYSTEM' S SystemLiteral 1856 // | 'PUBLIC' S' PubidLiteral S Systemliteral 1857 String temp = null; 1858 ExternalEntity retval; 1859 1860 if (peek("PUBLIC")) { 1861 whitespace("F-009"); 1862 temp = parsePublicId(); 1863 } else if (!peek("SYSTEM")) 1864 return null; 1865 1866 retval = new ExternalEntity(in); 1867 retval.publicId = temp; 1868 whitespace("F-008"); 1869 retval.systemId = parseSystemId(); 1870 return retval; 1871 } 1872 parseSystemId()1873 private String parseSystemId() 1874 throws IOException, SAXException { 1875 1876 String uri = getQuotedString("F-034", null); 1877 int temp = uri.indexOf(':'); 1878 1879 // resolve relative URIs ... must do it here since 1880 // it's relative to the source file holding the URI! 1881 1882 // "new java.net.URL (URL, string)" conforms to RFC 1630, 1883 // but we can't use that except when the URI is a URL. 1884 // The entity resolver is allowed to handle URIs that are 1885 // not URLs, so we pass URIs through with scheme intact 1886 if (temp == -1 || uri.indexOf('/') < temp) { 1887 String baseURI; 1888 1889 baseURI = in.getSystemId(); 1890 if (baseURI == null) 1891 fatal("P-055", new Object[]{uri}); 1892 if (uri.length() == 0) 1893 uri = "."; 1894 baseURI = baseURI.substring(0, baseURI.lastIndexOf('/') + 1); 1895 if (uri.charAt(0) != '/') 1896 uri = baseURI + uri; 1897 else { 1898 // XXX slashes at the beginning of a relative URI are 1899 // a special case we don't handle. 1900 throw new InternalError(); 1901 } 1902 1903 // letting other code map any "/xxx/../" or "/./" to "/", 1904 // since all URIs must handle it the same. 1905 } 1906 // check for fragment ID in URI 1907 if (uri.indexOf('#') != -1) 1908 error("P-056", new Object[]{uri}); 1909 return uri; 1910 } 1911 maybeTextDecl()1912 private void maybeTextDecl() 1913 throws IOException, SAXException { 1914 1915 // [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>' 1916 if (peek("<?xml")) { 1917 readVersion(false, "1.0"); 1918 readEncoding(true); 1919 maybeWhitespace(); 1920 if (!peek("?>")) 1921 fatal("P-057"); 1922 } 1923 } 1924 externalParameterEntity(ExternalEntity next)1925 private void externalParameterEntity(ExternalEntity next) 1926 throws IOException, SAXException { 1927 1928 // 1929 // Reap the intended benefits of standalone declarations: 1930 // don't deal with external parameter entities, except to 1931 // validate the standalone declaration. 1932 // 1933 1934 // n.b. "in external parameter entities" (and external 1935 // DTD subset, same grammar) parameter references can 1936 // occur "within" markup declarations ... expansions can 1937 // cross syntax rules. Flagged here; affects getc(). 1938 1939 // [79] ExtPE ::= TextDecl? extSubsetDecl 1940 // [31] extSubsetDecl ::= ( markupdecl | conditionalSect 1941 // | PEReference | S )* 1942 InputEntity pe; 1943 1944 // XXX if this returns false ... 1945 1946 pe = in; 1947 maybeTextDecl(); 1948 while (!pe.isEOF()) { 1949 // pop internal PEs (and whitespace before/after) 1950 if (in.isEOF()) { 1951 in = in.pop(); 1952 continue; 1953 } 1954 doLexicalPE = false; 1955 if (maybeWhitespace()) 1956 continue; 1957 if (maybePEReference()) 1958 continue; 1959 doLexicalPE = true; 1960 if (maybeMarkupDecl() || maybeConditionalSect()) 1961 continue; 1962 break; 1963 } 1964 // if (in != pe) throw new InternalError("who popped my PE?"); 1965 if (!pe.isEOF()) 1966 fatal("P-059", new Object[]{in.getName()}); 1967 } 1968 readEncoding(boolean must)1969 private void readEncoding(boolean must) 1970 throws IOException, SAXException { 1971 1972 // [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* 1973 String name = maybeReadAttribute("encoding", must); 1974 1975 if (name == null) 1976 return; 1977 for (int i = 0; i < name.length(); i++) { 1978 char c = name.charAt(i); 1979 if ((c >= 'A' && c <= 'Z') 1980 || (c >= 'a' && c <= 'z')) 1981 continue; 1982 if (i != 0 1983 && ((c >= '0' && c <= '9') 1984 || c == '-' 1985 || c == '_' 1986 || c == '.' 1987 )) 1988 continue; 1989 fatal("P-060", new Object[]{new Character(c)}); 1990 } 1991 1992 // 1993 // This should be the encoding in use, and it's even an error for 1994 // it to be anything else (in certain cases that are impractical to 1995 // to test, and may even be insufficient). So, we do the best we 1996 // can, and warn if things look suspicious. Note that Java doesn't 1997 // uniformly expose the encodings, and that the names it uses 1998 // internally are nonstandard. Also, that the XML spec allows 1999 // such "errors" not to be reported at all. 2000 // 2001 String currentEncoding = in.getEncoding(); 2002 2003 if (currentEncoding != null 2004 && !name.equalsIgnoreCase(currentEncoding)) 2005 warning("P-061", new Object[]{name, currentEncoding}); 2006 } 2007 maybeNotationDecl()2008 private boolean maybeNotationDecl() 2009 throws IOException, SAXException { 2010 2011 // [82] NotationDecl ::= '<!NOTATION' S Name S 2012 // (ExternalID | PublicID) S? '>' 2013 // [83] PublicID ::= 'PUBLIC' S PubidLiteral 2014 InputEntity start = peekDeclaration("!NOTATION"); 2015 2016 if (start == null) 2017 return false; 2018 2019 String name = getMarkupDeclname("F-019", false); 2020 ExternalEntity entity = new ExternalEntity(in); 2021 2022 whitespace("F-011"); 2023 if (peek("PUBLIC")) { 2024 whitespace("F-009"); 2025 entity.publicId = parsePublicId(); 2026 if (maybeWhitespace()) { 2027 if (!peek(">")) 2028 entity.systemId = parseSystemId(); 2029 else 2030 ungetc(); 2031 } 2032 } else if (peek("SYSTEM")) { 2033 whitespace("F-008"); 2034 entity.systemId = parseSystemId(); 2035 } else 2036 fatal("P-062"); 2037 maybeWhitespace(); 2038 nextChar('>', "F-032", name); 2039 if (start != in) 2040 error("V-013", null); 2041 if (entity.systemId != null && entity.systemId.indexOf('#') != -1) 2042 error("P-056", new Object[]{entity.systemId}); 2043 2044 Object value = notations.get(name); 2045 if (value != null && value instanceof ExternalEntity) 2046 warning("P-063", new Object[]{name}); 2047 2048 else { 2049 notations.put(name, entity); 2050 dtdHandler.notationDecl(name, entity.publicId, 2051 entity.systemId); 2052 } 2053 return true; 2054 } 2055 2056 2057 //////////////////////////////////////////////////////////////// 2058 // 2059 // UTILITIES 2060 // 2061 //////////////////////////////////////////////////////////////// 2062 getc()2063 private char getc() throws IOException, SAXException { 2064 2065 if (!doLexicalPE) { 2066 char c = in.getc(); 2067 return c; 2068 } 2069 2070 // 2071 // External parameter entities get funky processing of '%param;' 2072 // references. It's not clearly defined in the XML spec; but it 2073 // boils down to having those refs be _lexical_ in most cases to 2074 // include partial syntax productions. It also needs selective 2075 // enabling; "<!ENTITY % foo ...>" must work, for example, and 2076 // if "bar" is an empty string PE, "ab%bar;cd" becomes "abcd" 2077 // if it's expanded in a literal, else "ab cd". PEs also do 2078 // not expand within comments or PIs, and external PEs are only 2079 // allowed to have markup decls (and so aren't handled lexically). 2080 // 2081 // This PE handling should be merged into maybeWhitespace, where 2082 // it can be dealt with more consistently. 2083 // 2084 // Also, there are some validity constraints in this area. 2085 // 2086 char c; 2087 2088 while (in.isEOF()) { 2089 if (in.isInternal() || (doLexicalPE && !in.isDocument())) 2090 in = in.pop(); 2091 else { 2092 fatal("P-064", new Object[]{in.getName()}); 2093 } 2094 } 2095 if ((c = in.getc()) == '%' && doLexicalPE) { 2096 // PE ref ::= '%' name ';' 2097 String name = maybeGetName(); 2098 Object entity; 2099 2100 if (name == null) 2101 fatal("P-011"); 2102 nextChar(';', "F-021", name); 2103 entity = params.get(name); 2104 2105 // push a magic "entity" before and after the 2106 // real one, so ungetc() behaves uniformly 2107 pushReader(" ".toCharArray(), null, false); 2108 if (entity instanceof InternalEntity) 2109 pushReader(((InternalEntity) entity).buf, name, false); 2110 else if (entity instanceof ExternalEntity) 2111 // PEs can't be unparsed! 2112 // XXX if this returns false ... 2113 pushReader((ExternalEntity) entity); 2114 else if (entity == null) 2115 // see note in maybePEReference re making this be nonfatal. 2116 fatal("V-022"); 2117 else 2118 throw new InternalError(); 2119 pushReader(" ".toCharArray(), null, false); 2120 return in.getc(); 2121 } 2122 return c; 2123 } 2124 ungetc()2125 private void ungetc() { 2126 2127 in.ungetc(); 2128 } 2129 peek(String s)2130 private boolean peek(String s) 2131 throws IOException, SAXException { 2132 2133 return in.peek(s, null); 2134 } 2135 2136 // Return the entity starting the specified declaration 2137 // (for validating declaration nesting) else null. 2138 peekDeclaration(String s)2139 private InputEntity peekDeclaration(String s) 2140 throws IOException, SAXException { 2141 2142 InputEntity start; 2143 2144 if (!in.peekc('<')) 2145 return null; 2146 start = in; 2147 if (in.peek(s, null)) 2148 return start; 2149 in.ungetc(); 2150 return null; 2151 } 2152 nextChar(char c, String location, String near)2153 private void nextChar(char c, String location, String near) 2154 throws IOException, SAXException { 2155 2156 while (in.isEOF() && !in.isDocument()) 2157 in = in.pop(); 2158 if (!in.peekc(c)) 2159 fatal("P-008", new Object[] 2160 {new Character(c), 2161 messages.getMessage(locale, location), 2162 (near == null ? "" : ('"' + near + '"'))}); 2163 } 2164 2165 pushReader(char buf [], String name, boolean isGeneral)2166 private void pushReader(char buf [], String name, boolean isGeneral) 2167 throws SAXException { 2168 2169 InputEntity r = InputEntity.getInputEntity(dtdHandler, locale); 2170 r.init(buf, name, in, !isGeneral); 2171 in = r; 2172 } 2173 pushReader(ExternalEntity next)2174 private boolean pushReader(ExternalEntity next) 2175 throws IOException, SAXException { 2176 2177 InputEntity r = InputEntity.getInputEntity(dtdHandler, locale); 2178 InputSource s; 2179 try { 2180 s = next.getInputSource(resolver); 2181 } catch (IOException e) { 2182 String msg = 2183 "unable to open the external entity from :" + next.systemId; 2184 if (next.publicId != null) 2185 msg += " (public id:" + next.publicId + ")"; 2186 2187 SAXParseException spe = new SAXParseException(msg, 2188 getPublicId(), getSystemId(), getLineNumber(), getColumnNumber(), e); 2189 dtdHandler.fatalError(spe); 2190 throw e; 2191 } 2192 2193 r.init(s, next.name, in, next.isPE); 2194 in = r; 2195 return true; 2196 } 2197 getPublicId()2198 public String getPublicId() { 2199 2200 return (in == null) ? null : in.getPublicId(); 2201 } 2202 getSystemId()2203 public String getSystemId() { 2204 2205 return (in == null) ? null : in.getSystemId(); 2206 } 2207 getLineNumber()2208 public int getLineNumber() { 2209 2210 return (in == null) ? -1 : in.getLineNumber(); 2211 } 2212 getColumnNumber()2213 public int getColumnNumber() { 2214 2215 return (in == null) ? -1 : in.getColumnNumber(); 2216 } 2217 2218 // error handling convenience routines 2219 warning(String messageId, Object parameters [])2220 private void warning(String messageId, Object parameters []) 2221 throws SAXException { 2222 2223 SAXParseException e = new SAXParseException(messages.getMessage(locale, messageId, parameters), 2224 getPublicId(), getSystemId(), getLineNumber(), getColumnNumber()); 2225 2226 dtdHandler.warning(e); 2227 } 2228 error(String messageId, Object parameters [])2229 void error(String messageId, Object parameters []) 2230 throws SAXException { 2231 2232 SAXParseException e = new SAXParseException(messages.getMessage(locale, messageId, parameters), 2233 getPublicId(), getSystemId(), getLineNumber(), getColumnNumber()); 2234 2235 dtdHandler.error(e); 2236 } 2237 fatal(String messageId)2238 private void fatal(String messageId) throws SAXException { 2239 2240 fatal(messageId, null); 2241 } 2242 fatal(String messageId, Object parameters [])2243 private void fatal(String messageId, Object parameters []) 2244 throws SAXException { 2245 2246 SAXParseException e = new SAXParseException(messages.getMessage(locale, messageId, parameters), 2247 getPublicId(), getSystemId(), getLineNumber(), getColumnNumber()); 2248 2249 dtdHandler.fatalError(e); 2250 2251 throw e; 2252 } 2253 2254 // 2255 // Map char arrays to strings ... cuts down both on memory and 2256 // CPU usage for element/attribute/other names that are reused. 2257 // 2258 // Documents typically repeat names a lot, so we more or less 2259 // intern all the strings within the document; since some strings 2260 // are repeated in multiple documents (e.g. stylesheets) we go 2261 // a bit further, and intern globally. 2262 // 2263 static class NameCache { 2264 // 2265 // Unless we auto-grow this, the default size should be a 2266 // reasonable bit larger than needed for most XML files 2267 // we've yet seen (and be prime). If it's too small, the 2268 // penalty is just excess cache collisions. 2269 // 2270 NameCacheEntry hashtable [] = new NameCacheEntry[541]; 2271 2272 // 2273 // Usually we just want to get the 'symbol' for these chars 2274 // lookup(char value [], int len)2275 String lookup(char value [], int len) { 2276 2277 return lookupEntry(value, len).name; 2278 } 2279 2280 // 2281 // Sometimes we need to scan the chars in the resulting 2282 // string, so there's an accessor which exposes them. 2283 // (Mostly for element end tags.) 2284 // lookupEntry(char value [], int len)2285 NameCacheEntry lookupEntry(char value [], int len) { 2286 2287 int index = 0; 2288 NameCacheEntry entry; 2289 2290 // hashing to get index 2291 for (int i = 0; i < len; i++) 2292 index = index * 31 + value[i]; 2293 index &= 0x7fffffff; 2294 index %= hashtable.length; 2295 2296 // return entry if one's there ... 2297 for (entry = hashtable[index]; 2298 entry != null; 2299 entry = entry.next) { 2300 if (entry.matches(value, len)) 2301 return entry; 2302 } 2303 2304 // else create new one 2305 entry = new NameCacheEntry(); 2306 entry.chars = new char[len]; 2307 System.arraycopy(value, 0, entry.chars, 0, len); 2308 entry.name = new String(entry.chars); 2309 // 2310 // NOTE: JDK 1.1 has a fixed size string intern table, 2311 // with non-GC'd entries. It can panic here; that's a 2312 // JDK problem, use 1.2 or later with many identifiers. 2313 // 2314 entry.name = entry.name.intern(); // "global" intern 2315 entry.next = hashtable[index]; 2316 hashtable[index] = entry; 2317 return entry; 2318 } 2319 } 2320 2321 static class NameCacheEntry { 2322 2323 String name; 2324 char chars []; 2325 NameCacheEntry next; 2326 matches(char value [], int len)2327 boolean matches(char value [], int len) { 2328 2329 if (chars.length != len) 2330 return false; 2331 for (int i = 0; i < len; i++) 2332 if (value[i] != chars[i]) 2333 return false; 2334 return true; 2335 } 2336 } 2337 2338 // 2339 // Message catalog for diagnostics. 2340 // 2341 static final Catalog messages = new Catalog(); 2342 2343 static final class Catalog extends MessageCatalog { 2344 Catalog()2345 Catalog() { 2346 super(DTDParser.class); 2347 } 2348 } 2349 2350 } 2351