1 /* XMLElement.java 2 * 3 * $Revision: 1.2 $ 4 * $Date: 2002/08/03 04:36:34 $ 5 * $Name: $ 6 * 7 * This file is part of NanoXML 2 Lite. 8 * Copyright (C) 2000-2002 Marc De Scheemaecker, All Rights Reserved. 9 * 10 * This software is provided 'as-is', without any express or implied warranty. 11 * In no event will the authors be held liable for any damages arising from the 12 * use of this software. 13 * 14 * Permission is granted to anyone to use this software for any purpose, 15 * including commercial applications, and to alter it and redistribute it 16 * freely, subject to the following restrictions: 17 * 18 * 1. The origin of this software must not be misrepresented; you must not 19 * claim that you wrote the original software. If you use this software in 20 * a product, an acknowledgment in the product documentation would be 21 * appreciated but is not required. 22 * 23 * 2. Altered source versions must be plainly marked as such, and must not be 24 * misrepresented as being the original software. 25 * 26 * 3. This notice may not be removed or altered from any source distribution. 27 *****************************************************************************/ 28 29 /* JAM: hacked the source to remove unneeded methods and comments. */ 30 31 package net.sourceforge.nanoxml; 32 33 34 import java.io.IOException; 35 import java.io.OutputStream; 36 import java.io.PrintStream; 37 import java.io.Reader; 38 import java.util.Enumeration; 39 import java.util.HashMap; 40 import java.util.Map; 41 import java.util.Set; 42 import java.util.Vector; 43 import net.sourceforge.jnlp.util.logging.OutputController; 44 45 /** 46 * XMLElement is a representation of an XML object. The object is able to parse 47 * XML code. 48 * <dl> 49 * <dt><b>Parsing XML Data</b></dt> 50 * <dd> 51 * You can parse XML data using the following code: 52 * <pre>{@code 53 *XMLElement xml = new XMLElement(); 54 *FileReader reader = new FileReader("filename.xml"); 55 *xml.parseFromReader(reader); 56 *}</pre></dd></dl> 57 * <dl><dt><b>Retrieving Attributes</b></dt> 58 * <dd> 59 * You can enumerate the attributes of an element using the method 60 * {@link #enumerateAttributeNames() enumerateAttributeNames}. 61 * The attribute values can be retrieved using the method 62 * {@link #getAttribute(java.lang.String) getAttribute}. 63 * The following example shows how to list the attributes of an element: 64 * <pre>{@code 65 *XMLElement element = ...; 66 *Enumeration enum = element.enumerateAttributeNames(); 67 *while (enum.hasMoreElements()) { 68 * String key = (String) enum.nextElement(); 69 * String value = (String) element.getAttribute(key); 70 * System.out.println(key + " = " + value); 71 *}}</pre></dd></dl> 72 * <dl><dt><b>Retrieving Child Elements</b></dt> 73 * <dd> 74 * You can enumerate the children of an element using 75 * {@link #enumerateChildren() enumerateChildren}. 76 * The number of child elements can be retrieved using 77 * {@link #countChildren() countChildren}. 78 * </dd></dl> 79 * <dl><dt><b>Elements Containing Character Data</b></dt> 80 * <dd> 81 * If an elements contains character data, like in the following example: 82 * <pre>{@code <title>The Title</title>}</pre> 83 * you can retrieve that data using the method 84 * {@link #getContent() getContent}. 85 * </dd></dl> 86 * <dl><dt><b>Subclassing XMLElement</b></dt> 87 * <dd> 88 * When subclassing XMLElement, you need to override the method 89 * {@link #createAnotherElement() createAnotherElement} 90 * which has to return a new copy of the receiver. 91 * </dd></dl> 92 * 93 * @see net.sourceforge.nanoxml.XMLParseException 94 * 95 * @author Marc De Scheemaecker 96 * <<A href="mailto:cyberelf@mac.com">cyberelf@mac.com</A>> 97 * @version $Name: $, $Revision: 1.2 $ 98 */ 99 public class XMLElement { 100 101 /** 102 * The attributes given to the element. 103 * 104 * <dl><dt><b>Invariants:</b></dt><dd> 105 * <ul><li>The field can be empty.</li> 106 * <li>The field is never {@code null}.</li> 107 * <li>The keys and the values are strings.</li> 108 * </ul></dd></dl> 109 */ 110 private Map<String, Object> attributes; 111 112 /** 113 * Child elements of the element. 114 * 115 * <dl><dt><b>Invariants:</b></dt><dd> 116 * <ul><li>The field can be empty.</li> 117 * <li>The field is never {@code null}.</li> 118 * <li>The elements are instances of {@code XMLElement} 119 * or a subclass of {@code XMLElement}.</li> 120 * </ul></dd></dl> 121 */ 122 private Vector<XMLElement> children; 123 124 /** 125 * The name of the element. 126 * 127 * <dl><dt><b>Invariants:</b></dt><dd> 128 * <ul><li>The field is {@code null} iff the element is not 129 * initialized by either parse or {@link #setName setName()}.</li> 130 * <li>If the field is not {@code null}, it's not empty.</li> 131 * <li>If the field is not {@code null}, it contains a valid 132 * XML identifier.</li> 133 * </ul></dd></dl> 134 */ 135 private String name; 136 137 /** 138 * The {@code #PCDATA} content of the object. 139 * 140 * <dl><dt><b>Invariants:</b></dt><dd> 141 * <ul><li>The field is {@code null} iff the element is not a 142 * {@code #PCDATA} element.</li> 143 * <li>The field can be any string, including the empty string.</li> 144 * </ul></dd></dl> 145 */ 146 private String contents; 147 148 /** 149 * Conversion table for &...; entities. The keys are the entity names 150 * without the & and ; delimiters. 151 * 152 * <dl><dt><b>Invariants:</b></dt><dd> 153 * <ul><li>The field is never {@code null}.</li> 154 * <li>The field always contains the following associations: 155 * "lt" => "<", "gt" => ">", 156 * "quot" => "\"", "apos" => "'", 157 * "amp" => "&"</li> 158 * <li>The keys are strings</li> 159 * <li>The values are char arrays</li> 160 * </ul></dd></dl> 161 */ 162 private Map<String, char[]> entities; 163 164 /** 165 * The line number where the element starts. 166 * 167 * <dl><dt><b>Invariants:</b></dt><dd> 168 * <ul><li>{@code lineNr >= 0}</li> 169 * </ul></dd></dl> 170 */ 171 private int lineNr; 172 173 /** 174 * {@code true} if the case of the element and attribute names are case 175 * insensitive. 176 */ 177 private boolean ignoreCase; 178 179 /** 180 * {@code true} if the leading and trailing whitespace of {@code #PCDATA} 181 * sections have to be ignored. 182 */ 183 private boolean ignoreWhitespace; 184 185 /** 186 * Character read too much. 187 * <p> 188 * This character provides push-back functionality to the input reader 189 * without having to use a PushbackReader. 190 * If there is no such character, this field is {@code '\0'}. 191 */ 192 private char charReadTooMuch; 193 194 /** 195 * Character read too much for the comment remover. 196 */ 197 private char sanitizeCharReadTooMuch; 198 199 /** 200 * Whether the BOM header appeared 201 */ 202 private boolean BOM = false; 203 204 /** 205 * The reader provided by the caller of the parse method. 206 * 207 * <dl><dt><b>Invariants:</b></dt><dd> 208 * <ul><li>The field is not {@code null} while the parse method is 209 * running.</li> 210 * </ul></dd></dl> 211 */ 212 private Reader reader; 213 214 /** 215 * The current line number in the source content. 216 * 217 * <dl><dt><b>Invariants:</b></dt><dd> 218 * <ul><li>parserLineNr > 0 while the parse method is running.</li> 219 * </ul></dd></dl> 220 */ 221 private int parserLineNr; 222 223 /** 224 * Creates and initializes a new XML element. 225 * <p> 226 * Calling the construction is equivalent to: 227 * <ul><li>{@code new XMLElement(new HashMap(), false, true)}</li></ul> 228 * 229 * <dl><dt><b>Postconditions:</b></dt><dd> 230 * <ul><li>{@linkplain #countChildren} => 0</li> 231 * <li>{@linkplain #enumerateChildren} => empty enumeration</li> 232 * <li>enumeratePropertyNames() => empty enumeration</li> 233 * <li>getChildren() => empty vector</li> 234 * <li>{@linkplain #getContent} => ""</li> 235 * <li>{@linkplain #getLineNr} => 0</li> 236 * <li>{@linkplain #getName} => null</li> 237 * </ul></dd></dl> 238 */ XMLElement()239 public XMLElement() { 240 this(new HashMap<String, char[]>(), false, true, true); 241 } 242 243 /** 244 * Creates and initializes a new XML element. 245 * <p> 246 * This constructor should <i>only</i> be called from 247 * {@link #createAnotherElement} to create child elements. 248 * 249 * @param entities 250 * The entity conversion table. 251 * @param skipLeadingWhitespace 252 * {@code true} if leading and trailing whitespace in PCDATA 253 * content has to be removed. 254 * @param fillBasicConversionTable 255 * {@code true} if the basic entities need to be added to 256 * the entity list (client code calling this constructor). 257 * @param ignoreCase 258 * {@code true} if the case of element and attribute names have 259 * to be ignored. 260 * 261 * <dl><dt><b>Preconditions:</b></dt><dd> 262 * <ul><li>{@code entities != null}</li> 263 * <li>if {@code fillBasicConversionTable == false} 264 * then {@code entities} contains at least the following 265 * entries: {@code amp}, {@code lt}, {@code gt}, {@code apos} and 266 * {@code quot}</li> 267 * </ul></dd></dl> 268 * 269 * <dl><dt><b>Postconditions:</b></dt><dd> 270 * <ul><li>{@linkplain #countChildren} => 0</li> 271 * <li>{@linkplain #enumerateChildren} => empty enumeration</li> 272 * <li>enumeratePropertyNames() => empty enumeration</li> 273 * <li>getChildren() => empty vector</li> 274 * <li>{@linkplain #getContent} => ""</li> 275 * <li>{@linkplain #getLineNr} => 0</li> 276 * <li>{@linkplain #getName} => null</li> 277 * </ul></dd></dl> 278 */ XMLElement(Map<String, char[]> entities, boolean skipLeadingWhitespace, boolean fillBasicConversionTable, boolean ignoreCase)279 protected XMLElement(Map<String, char[]> entities, 280 boolean skipLeadingWhitespace, 281 boolean fillBasicConversionTable, 282 boolean ignoreCase) { 283 this.ignoreWhitespace = skipLeadingWhitespace; 284 this.ignoreCase = ignoreCase; 285 this.name = null; 286 this.contents = ""; 287 this.attributes = new HashMap<>(); 288 this.children = new Vector<>(); 289 this.entities = entities; 290 this.lineNr = 0; 291 Set<String> e = this.entities.keySet(); 292 for(String key: e) { 293 Object value = this.entities.get(key); 294 if (value instanceof String) { 295 entities.put(key, ((String) value).toCharArray()); 296 } 297 } 298 if (fillBasicConversionTable) { 299 this.entities.put("amp", new char[] { '&' }); 300 this.entities.put("quot", new char[] { '"' }); 301 this.entities.put("apos", new char[] { '\'' }); 302 this.entities.put("lt", new char[] { '<' }); 303 this.entities.put("gt", new char[] { '>' }); 304 } 305 } 306 307 /** 308 * Adds a child element. 309 * 310 * @param child 311 * The child element to add. 312 * 313 * <dl><dt><b>Preconditions:</b></dt><dd> 314 * <ul><li>{@code child != null}</li> 315 * <li>{@code child.getName() != null}</li> 316 * <li>{@code child} does not have a parent element</li> 317 * </ul></dd></dl> 318 * 319 * <dl><dt><b>Postconditions:</b></dt><dd> 320 * <ul><li>{@linkplain #countChildren} => old.countChildren() + 1</li> 321 * <li>{@linkplain #enumerateChildren} => old.enumerateChildren() 322 + child</li> 323 * <li>getChildren() => old.enumerateChildren() + child</li> 324 * </ul></dd></dl> 325 * 326 */ addChild(XMLElement child)327 public void addChild(XMLElement child) { 328 this.children.addElement(child); 329 } 330 331 /** 332 * Adds or modifies an attribute. 333 * 334 * @param name 335 * The name of the attribute. 336 * @param value 337 * The value of the attribute. 338 * 339 * <dl><dt><b>Preconditions:</b></dt><dd> 340 * <ul><li>{@code name != null}</li> 341 * <li>{@code name} is a valid XML identifier</li> 342 * <li>{@code value != null}</li> 343 * </ul></dd></dl> 344 * 345 * <dl><dt><b>Postconditions:</b></dt><dd> 346 * <ul><li>{@linkplain #enumerateAttributeNames} 347 * => old.enumerateAttributeNames() + name</li> 348 * <li>{@linkplain #getAttribute(java.lang.String) getAttribute(name)} 349 * => value</li> 350 * </ul></dd></dl> 351 */ setAttribute(String name, Object value)352 public void setAttribute(String name, 353 Object value) { 354 if (this.ignoreCase) { 355 name = name.toUpperCase(); 356 } 357 this.attributes.put(name, value.toString()); 358 } 359 360 /** 361 * @return the number of child elements of the element. 362 * 363 * <dl><dt><b>Postconditions:</b></dt><dd> 364 * <ul><li>{@code result >= 0}</li> 365 * </ul></dd></dl> 366 */ countChildren()367 public int countChildren() { 368 return this.children.size(); 369 } 370 371 /** 372 * @return Enumeration of the attribute names. 373 * 374 * <dl><dt><b>Postconditions:</b></dt><dd> 375 * <ul><li>{@code result != null}</li> 376 * </ul></dd></dl> 377 */ enumerateAttributeNames()378 public Enumeration<String> enumerateAttributeNames() { 379 return new Vector(this.attributes.keySet()).elements(); 380 } 381 382 /** 383 * @return Enumeration the child elements. 384 * 385 * <dl><dt><b>Postconditions:</b></dt><dd> 386 * <ul><li>{@code result != null}</li> 387 * </ul></dd></dl> 388 */ enumerateChildren()389 public Enumeration<XMLElement> enumerateChildren() { 390 return this.children.elements(); 391 } 392 393 /** 394 * @return the PCDATA content of the object. If there is no such content, 395 * {@code null} is returned. 396 */ getContent()397 public String getContent() { 398 return this.contents; 399 } 400 401 /** 402 * @return the line nr in the source data on which the element is found. 403 * This method returns {@code 0} there is no associated source data. 404 * 405 * <dl><dt><b>Postconditions:</b></dt><dd> 406 * <ul><li>{@code result >= 0}</li> 407 * </ul></dd></dl> 408 */ getLineNr()409 public int getLineNr() { 410 return this.lineNr; 411 } 412 413 /** 414 * @return an attribute of the element. 415 * <p> 416 * If the attribute doesn't exist, {@code null} is returned. 417 * 418 * @param name The name of the attribute. 419 * 420 * <dl><dt><b>Preconditions:</b></dt><dd> 421 * <ul><li>{@code name != null}</li> 422 * <li>{@code name} is a valid XML identifier</li> 423 * </ul></dd></dl> 424 */ getAttribute(String name)425 public Object getAttribute(String name) { 426 if (this.ignoreCase) { 427 name = name.toUpperCase(); 428 } 429 Object value = this.attributes.get(name); 430 return value; 431 } 432 433 /** 434 * Returns the name of the element. 435 * @return this {@code XMLElement} object's name 436 */ getName()437 public String getName() { 438 return this.name; 439 } 440 441 /** 442 * Reads one XML element from a {@link java.io.Reader} and parses it. 443 * 444 * @param reader 445 * The reader from which to retrieve the XML data. 446 * 447 * <dl><dt><b>Preconditions:</b></dt><dd> 448 * <ul><li>{@code reader != null}</li> 449 * <li>{@code reader} is not closed</li> 450 * </ul></dd></dl> 451 * 452 * <dl><dt><b>Postconditions:</b></dt><dd> 453 * <ul><li>the state of the receiver is updated to reflect the XML element 454 * parsed from the reader</li> 455 * <li>the reader points to the first character following the last 456 * {@code '>'} character of the XML element</li> 457 * </ul></dd></dl> 458 * 459 * @throws java.io.IOException 460 * If an error occured while reading the input. 461 * @throws net.sourceforge.nanoxml.XMLParseException 462 * If an error occured while parsing the read data. 463 */ parseFromReader(Reader reader)464 public void parseFromReader(Reader reader) 465 throws IOException, XMLParseException { 466 this.parseFromReader(reader, /*startingLineNr*/1); 467 } 468 469 /** 470 * Reads one XML element from a java.io.Reader and parses it. 471 * 472 * @param reader 473 * The reader from which to retrieve the XML data. 474 * @param startingLineNr 475 * The line number of the first line in the data. 476 * 477 * <dl><dt><b>Preconditions:</b></dt><dd> 478 * <ul><li>{@code reader != null}</li> 479 * <li>{@code reader} is not closed</li> 480 * </ul></dd></dl> 481 * 482 * <dl><dt><b>Postconditions:</b></dt><dd> 483 * <ul><li>the state of the receiver is updated to reflect the XML element 484 * parsed from the reader</li> 485 * <li>the reader points to the first character following the last 486 * {@code '>'} character of the XML element</li> 487 * </ul></dd></dl> 488 * 489 * @throws java.io.IOException 490 * If an error occured while reading the input. 491 * @throws net.sourceforge.nanoxml.XMLParseException 492 * If an error occured while parsing the read data. 493 */ parseFromReader(Reader reader, int startingLineNr)494 public void parseFromReader(Reader reader, 495 int startingLineNr) 496 throws IOException, XMLParseException { 497 this.charReadTooMuch = '\0'; 498 this.reader = reader; 499 this.parserLineNr = startingLineNr; 500 501 for (;;) { 502 char ch = this.scanLeadingWhitespace(); 503 504 if (ch != '<') { 505 throw this.expectedInput("<", ch); 506 } 507 508 ch = this.readChar(); 509 510 if ((ch == '!') || (ch == '?')) { 511 this.skipSpecialTag(0); 512 } else { 513 this.unreadChar(ch); 514 this.scanElement(this); 515 return; 516 } 517 } 518 } 519 520 /** 521 * Creates a new similar XML element. 522 * <p> 523 * You should override this method when subclassing XMLElement. 524 * </p> 525 * @return next element in tree based on global settings 526 */ createAnotherElement()527 protected XMLElement createAnotherElement() { 528 return new XMLElement(this.entities, 529 this.ignoreWhitespace, 530 false, 531 this.ignoreCase); 532 } 533 534 /** 535 * Changes the content string. 536 * 537 * @param content 538 * The new content string. 539 */ setContent(String content)540 public void setContent(String content) { 541 this.contents = content; 542 } 543 544 /** 545 * Changes the name of the element. 546 * 547 * @param name 548 * The new name. 549 * 550 * <dl><dt><b>Preconditions:</b></dt><dd> 551 * <ul><li>{@code name != null}</li> 552 * <li>{@code name} is a valid XML identifier</li> 553 * </ul></dd></dl> 554 */ setName(String name)555 public void setName(String name) { 556 this.name = name; 557 } 558 559 /** 560 * Scans an identifier from the current reader. 561 * The scanned identifier is appended to <code>result</code>. 562 * 563 * @param result 564 * The buffer in which the scanned identifier will be put. 565 * 566 * <dl><dt><b>Preconditions:</b></dt><dd> 567 * <ul><li>{@code result != null}</li> 568 * <li>The next character read from the reader is a valid first 569 * character of an XML identifier.</li> 570 * </ul></dd></dl> 571 * 572 * <dl><dt><b>Postconditions:</b></dt><dd> 573 * <ul><li>The next character read from the reader won't be an identifier 574 * character.</li> 575 * </ul></dd></dl> 576 * @throws java.io.IOException if something goes wrong 577 */ scanIdentifier(StringBuffer result)578 protected void scanIdentifier(StringBuffer result) 579 throws IOException { 580 for (;;) { 581 char ch = this.readChar(); 582 if (((ch < 'A') || (ch > 'Z')) && ((ch < 'a') || (ch > 'z')) 583 && ((ch < '0') || (ch > '9')) && (ch != '_') && (ch != '.') 584 && (ch != ':') && (ch != '-') && (ch <= '\u007E')) { 585 this.unreadChar(ch); 586 return; 587 } 588 result.append(ch); 589 } 590 } 591 isRegularWhiteSpace(char ch)592 private boolean isRegularWhiteSpace(char ch) { 593 switch (ch) { 594 case ' ': 595 case '\t': 596 case '\n': 597 case '\r': 598 return true; 599 default: 600 return false; 601 } 602 } 603 604 /** 605 * This method scans an identifier from the current reader. 606 * 607 * @return the next character following the whitespace. 608 * @throws java.io.IOException if something goes wrong 609 */ scanWhitespace()610 private char scanWhitespace() 611 throws IOException { 612 while(true) { 613 char ch = this.readChar(); 614 if (!isRegularWhiteSpace(ch)) { 615 return ch; 616 } 617 } 618 } 619 /** 620 * This method scans an leading identifier from the current reader. 621 * 622 * UNlike scanWhitespace, it skipps also BOM 623 * 624 * @return the next character following the whitespace. 625 * @throws java.io.IOException if something goes wrong 626 */ scanLeadingWhitespace()627 private char scanLeadingWhitespace() 628 throws IOException { 629 while(true) { 630 char ch = this.readChar(); 631 //this is BOM , not space 632 if (ch == '') { 633 BOM = true; 634 } else if (!isRegularWhiteSpace(ch)) { 635 return ch; 636 } 637 } 638 } 639 640 /** 641 * This method scans an identifier from the current reader. 642 * <p> 643 * The scanned whitespace is appended to {@code result}. 644 * 645 * @param result where to append scanned text 646 * @return the next character following the whitespace. 647 * 648 * <dl><dt><b>Preconditions:</b></dt><dd> 649 * <ul><li>{@code result != null}</li> 650 * </ul></dd></dl> 651 * @throws java.io.IOException if something goes wrong 652 */ scanWhitespace(StringBuffer result)653 protected char scanWhitespace(StringBuffer result) 654 throws IOException { 655 while (true) { 656 char ch = this.readChar(); 657 if (!isRegularWhiteSpace(ch)) { 658 return ch; 659 } else { 660 switch (ch) { 661 case ' ': 662 case '\t': 663 case '\n': 664 result.append(ch); 665 } 666 } 667 } 668 } 669 670 /** 671 * This method scans a delimited string from the current reader. 672 * <p> 673 * The scanned string without delimiters is appended to {@code string}. 674 * 675 * <dl><dt><b>Preconditions:</b></dt><dd> 676 * <ul><li>{@code string != null}</li> 677 * <li>the next char read is the string delimiter</li> 678 * </ul></dd></dl> 679 * @param string where to append the result 680 * @throws java.io.IOException if something goes wrong 681 */ scanString(StringBuffer string)682 protected void scanString(StringBuffer string) 683 throws IOException { 684 char delimiter = this.readChar(); 685 if ((delimiter != '\'') && (delimiter != '"')) { 686 throw this.expectedInput("' or \""); 687 } 688 for (;;) { 689 char ch = this.readChar(); 690 if (ch == delimiter) { 691 return; 692 } else if (ch == '&') { 693 this.resolveEntity(string); 694 } else { 695 string.append(ch); 696 } 697 } 698 } 699 700 /** 701 * Scans a {@code #PCDATA} element. CDATA sections and entities are 702 * resolved. 703 * <p> 704 * The next < char is skipped. 705 * <p> 706 * The scanned data is appended to {@code data}. 707 * 708 * <dl><dt><b>Preconditions:</b></dt><dd> 709 * <ul><li>{@code data != null}</li> 710 * </ul></dd></dl> 711 * @param data where to append data 712 * @throws java.io.IOException if something goes wrong 713 */ scanPCData(StringBuffer data)714 protected void scanPCData(StringBuffer data) 715 throws IOException { 716 for (;;) { 717 char ch = this.readChar(); 718 if (ch == '<') { 719 ch = this.readChar(); 720 if (ch == '!') { 721 this.checkCDATA(data); 722 } else { 723 this.unreadChar(ch); 724 return; 725 } 726 } else if (ch == '&') { 727 this.resolveEntity(data); 728 } else { 729 data.append(ch); 730 } 731 } 732 } 733 734 /** 735 * Scans a special tag and if the tag is a CDATA section, append its 736 * content to {@code buf}. 737 * 738 * <dl><dt><b>Preconditions:</b></dt><dd> 739 * <ul><li>{@code buf != null}</li> 740 * <li>The first < has already been read.</li> 741 * </ul></dd></dl> 742 * @param buf buffer where to append data 743 * @return whether the CDATA were ok 744 * @throws java.io.IOException if something goes wrong 745 */ checkCDATA(StringBuffer buf)746 protected boolean checkCDATA(StringBuffer buf) 747 throws IOException { 748 char ch = this.readChar(); 749 if (ch != '[') { 750 this.unreadChar(ch); 751 this.skipSpecialTag(0); 752 return false; 753 } else if (!this.checkLiteral("CDATA[")) { 754 this.skipSpecialTag(1); // one [ has already been read 755 return false; 756 } else { 757 int delimiterCharsSkipped = 0; 758 while (delimiterCharsSkipped < 3) { 759 ch = this.readChar(); 760 switch (ch) { 761 case ']': 762 if (delimiterCharsSkipped < 2) { 763 delimiterCharsSkipped += 1; 764 } else { 765 buf.append(']'); 766 buf.append(']'); 767 delimiterCharsSkipped = 0; 768 } 769 break; 770 case '>': 771 if (delimiterCharsSkipped < 2) { 772 for (int i = 0; i < delimiterCharsSkipped; i++) { 773 buf.append(']'); 774 } 775 delimiterCharsSkipped = 0; 776 buf.append('>'); 777 } else { 778 delimiterCharsSkipped = 3; 779 } 780 break; 781 default: 782 for (int i = 0; i < delimiterCharsSkipped; i += 1) { 783 buf.append(']'); 784 } 785 buf.append(ch); 786 delimiterCharsSkipped = 0; 787 } 788 } 789 return true; 790 } 791 } 792 793 /** 794 * Skips a comment. 795 * 796 * <dl><dt><b>Preconditions:</b></dt><dd> 797 * <ul><li>The first <!-- has already been read.</li> 798 * </ul></dd></dl> 799 * @throws java.io.IOException if something goes wrong 800 */ skipComment()801 protected void skipComment() 802 throws IOException { 803 int dashesToRead = 2; 804 while (dashesToRead > 0) { 805 char ch = this.readChar(); 806 if (ch == '-') { 807 dashesToRead -= 1; 808 } else { 809 dashesToRead = 2; 810 } 811 812 // Be more tolerant of extra -- (double dashes) 813 // in comments. 814 if (dashesToRead == 0) { 815 ch = this.readChar(); 816 if (ch == '>') { 817 return; 818 } else { 819 dashesToRead = 2; 820 this.unreadChar(ch); 821 } 822 } 823 } 824 /* 825 if (this.readChar() != '>') { 826 throw this.expectedInput(">"); 827 } 828 */ 829 } 830 831 /** 832 * Skips a special tag or comment. 833 * 834 * @param bracketLevel The number of open square brackets ([) that have 835 * already been read. 836 * 837 * <dl><dt><b>Preconditions:</b></dt><dd> 838 * <ul><li>The first <! has already been read.</li> 839 * <li>{@code bracketLevel >= 0}</li> 840 * </ul></dd></dl> 841 * @throws java.io.IOException if something goes wrong 842 */ skipSpecialTag(int bracketLevel)843 protected void skipSpecialTag(int bracketLevel) 844 throws IOException { 845 int tagLevel = 1; // < 846 char stringDelimiter = '\0'; 847 if (bracketLevel == 0) { 848 char ch = this.readChar(); 849 if (ch == '[') { 850 bracketLevel += 1; 851 } else if (ch == '-') { 852 ch = this.readChar(); 853 if (ch == '[') { 854 bracketLevel += 1; 855 } else if (ch == ']') { 856 bracketLevel -= 1; 857 } else if (ch == '-') { 858 this.skipComment(); 859 return; 860 } 861 } 862 } 863 while (tagLevel > 0) { 864 char ch = this.readChar(); 865 if (stringDelimiter == '\0') { 866 if ((ch == '"') || (ch == '\'')) { 867 stringDelimiter = ch; 868 } else if (bracketLevel <= 0) { 869 if (ch == '<') { 870 tagLevel += 1; 871 } else if (ch == '>') { 872 tagLevel -= 1; 873 } 874 } 875 if (ch == '[') { 876 bracketLevel += 1; 877 } else if (ch == ']') { 878 bracketLevel -= 1; 879 } 880 } else { 881 if (ch == stringDelimiter) { 882 stringDelimiter = '\0'; 883 } 884 } 885 } 886 } 887 888 /** 889 * Scans the data for literal text. 890 * <p> 891 * Scanning stops when a character does not match or after the complete 892 * text has been checked, whichever comes first. 893 * 894 * @param literal the literal to check. 895 * 896 * <dl><dt><b>Preconditions:</b></dt><dd> 897 * <ul><li>{@code literal != null}</li> 898 * </ul></dd></dl> 899 * @return true if literal was ok 900 * @throws java.io.IOException if something goes wrong 901 */ checkLiteral(String literal)902 protected boolean checkLiteral(String literal) 903 throws IOException { 904 int length = literal.length(); 905 for (int i = 0; i < length; i += 1) { 906 if (this.readChar() != literal.charAt(i)) { 907 return false; 908 } 909 } 910 return true; 911 } 912 913 /** 914 * Reads a character from a reader. 915 * @return the read char 916 * @throws java.io.IOException if something goes wrong 917 */ readChar()918 protected char readChar() 919 throws IOException { 920 if (this.charReadTooMuch != '\0') { 921 char ch = this.charReadTooMuch; 922 this.charReadTooMuch = '\0'; 923 return ch; 924 } else { 925 int i = this.reader.read(); 926 if (i < 0) { 927 throw this.unexpectedEndOfData(); 928 } else if (i == 10) { 929 this.parserLineNr += 1; 930 return '\n'; 931 } else { 932 return (char) i; 933 } 934 } 935 } 936 937 /** 938 * Scans an XML element. 939 * 940 * @param elt The element that will contain the result. 941 * 942 * <dl><dt><b>Preconditions:</b></dt><dd> 943 * <ul><li>The first < has already been read.</li> 944 * <li>{@code elt != null}</li> 945 * </ul></dd></dl> 946 * @throws java.io.IOException if something goes wrong 947 */ scanElement(XMLElement elt)948 protected void scanElement(XMLElement elt) 949 throws IOException { 950 StringBuffer buf = new StringBuffer(); 951 this.scanIdentifier(buf); 952 String lname = buf.toString(); 953 elt.setName(lname); 954 char ch = this.scanWhitespace(); 955 while ((ch != '>') && (ch != '/')) { 956 buf.setLength(0); 957 this.unreadChar(ch); 958 this.scanIdentifier(buf); 959 String key = buf.toString(); 960 ch = this.scanWhitespace(); 961 if (ch != '=') { 962 throw this.expectedInput("="); 963 } 964 this.unreadChar(this.scanWhitespace()); 965 buf.setLength(0); 966 this.scanString(buf); 967 elt.setAttribute(key, buf); 968 ch = this.scanWhitespace(); 969 } 970 if (ch == '/') { 971 ch = this.readChar(); 972 if (ch != '>') { 973 throw this.expectedInput(">"); 974 } 975 return; 976 } 977 buf.setLength(0); 978 ch = this.scanWhitespace(buf); 979 if (ch != '<') { 980 this.unreadChar(ch); 981 this.scanPCData(buf); 982 } else { 983 for (;;) { 984 ch = this.readChar(); 985 if (ch == '!') { 986 if (this.checkCDATA(buf)) { 987 this.scanPCData(buf); 988 break; 989 } else { 990 ch = this.scanWhitespace(buf); 991 if (ch != '<') { 992 this.unreadChar(ch); 993 this.scanPCData(buf); 994 break; 995 } 996 } 997 } else { 998 buf.setLength(0); 999 break; 1000 } 1001 } 1002 } 1003 if (buf.length() == 0) { 1004 while (ch != '/') { 1005 if (ch == '!') { 1006 ch = this.readChar(); 1007 if (ch != '-') { 1008 throw this.expectedInput("Comment or Element"); 1009 } 1010 ch = this.readChar(); 1011 if (ch != '-') { 1012 throw this.expectedInput("Comment or Element"); 1013 } 1014 this.skipComment(); 1015 } else { 1016 this.unreadChar(ch); 1017 XMLElement child = this.createAnotherElement(); 1018 this.scanElement(child); 1019 elt.addChild(child); 1020 } 1021 ch = this.scanWhitespace(); 1022 if (ch != '<') { 1023 throw this.expectedInput("<"); 1024 } 1025 ch = this.readChar(); 1026 } 1027 this.unreadChar(ch); 1028 } else { 1029 if (this.ignoreWhitespace) { 1030 elt.setContent(buf.toString().trim()); 1031 } else { 1032 elt.setContent(buf.toString()); 1033 } 1034 } 1035 ch = this.readChar(); 1036 if (ch != '/') { 1037 throw this.expectedInput("/"); 1038 } 1039 this.unreadChar(this.scanWhitespace()); 1040 if (!this.checkLiteral(lname)) { 1041 throw this.expectedInput(lname); 1042 } 1043 if (this.scanWhitespace() != '>') { 1044 throw this.expectedInput(">"); 1045 } 1046 } 1047 1048 /** 1049 * Resolves an entity. The name of the entity is read from the reader. 1050 * <p> 1051 * The value of the entity is appended to {@code buf}. 1052 * 1053 * @param buf Where to put the entity value. 1054 * 1055 * <dl><dt><b>Preconditions:</b></dt><dd> 1056 * <ul><li>The first & has already been read.</li> 1057 * <li>{@code buf != null}</li> 1058 * </ul></dd></dl> 1059 * @throws java.io.IOException if something goes wrong 1060 */ resolveEntity(StringBuffer buf)1061 protected void resolveEntity(StringBuffer buf) 1062 throws IOException { 1063 char ch = '\0'; 1064 StringBuffer keyBuf = new StringBuffer(); 1065 for (;;) { 1066 ch = this.readChar(); 1067 if (ch == ';') { 1068 break; 1069 } 1070 keyBuf.append(ch); 1071 } 1072 String key = keyBuf.toString(); 1073 if (key.charAt(0) == '#') { 1074 try { 1075 if (key.charAt(1) == 'x') { 1076 ch = (char) Integer.parseInt(key.substring(2), 16); 1077 } else { 1078 ch = (char) Integer.parseInt(key.substring(1), 10); 1079 } 1080 } catch (NumberFormatException e) { 1081 throw this.unknownEntity(key); 1082 } 1083 buf.append(ch); 1084 } else { 1085 char[] value = entities.get(key); 1086 if (value == null) { 1087 throw this.unknownEntity(key); 1088 } 1089 buf.append(value); 1090 } 1091 } 1092 1093 /** 1094 * Pushes a character back to the read-back buffer. 1095 * 1096 * @param ch The character to push back. 1097 * 1098 * <dl><dt><b>Preconditions:</b></dt><dd> 1099 * <ul><li>The read-back buffer is empty.</li> 1100 * <li>{@code ch != '\0'}</li> 1101 * </ul></dd></dl> 1102 */ unreadChar(char ch)1103 protected void unreadChar(char ch) { 1104 this.charReadTooMuch = ch; 1105 } 1106 1107 /** 1108 * Creates a parse exception for when an invalid valueset is given to 1109 * a method. 1110 * 1111 * @param name The name of the entity. 1112 * 1113 * <dl><dt><b>Preconditions:</b></dt><dd> 1114 * <ul><li>{@code name != null}</li> 1115 * </ul></dd></dl> 1116 * @return exception to be thrown 1117 */ invalidValueSet(String name)1118 protected XMLParseException invalidValueSet(String name) { 1119 String msg = "Invalid value set (entity name = \"" + name + "\")"; 1120 return new XMLParseException(this.getName(), this.parserLineNr, msg); 1121 } 1122 1123 /** 1124 * Creates a parse exception for when an invalid value is given to a 1125 * method. 1126 * 1127 * @param name The name of the entity. 1128 * @param value The value of the entity. 1129 * 1130 * <dl><dt><b>Preconditions:</b></dt><dd> 1131 * <ul><li>{@code name != null}</li> 1132 * <li>{@code value != null}</li> 1133 * </ul></dd></dl> 1134 * @return exception to be used 1135 */ invalidValue(String name, String value)1136 protected XMLParseException invalidValue(String name, 1137 String value) { 1138 String msg = "Attribute \"" + name + "\" does not contain a valid " 1139 + "value (\"" + value + "\")"; 1140 return new XMLParseException(this.getName(), this.parserLineNr, msg); 1141 } 1142 1143 /** 1144 * Creates a parse exception for when the end of the data input has been 1145 * reached. 1146 * @return exception to be used 1147 */ unexpectedEndOfData()1148 protected XMLParseException unexpectedEndOfData() { 1149 String msg = "Unexpected end of data reached"; 1150 return new XMLParseException(this.getName(), this.parserLineNr, msg); 1151 } 1152 1153 /** 1154 * Creates a parse exception for when a syntax error occured. 1155 * 1156 * @param context The context in which the error occured. 1157 * 1158 * <dl><dt><b>Preconditions:</b></dt><dd> 1159 * <ul><li>{@code context != null}</li> 1160 * <li>{@code context.length() > 0}</li> 1161 * </ul></dd></dl> 1162 * @return exception to be used 1163 */ syntaxError(String context)1164 protected XMLParseException syntaxError(String context) { 1165 String msg = "Syntax error while parsing " + context; 1166 return new XMLParseException(this.getName(), this.parserLineNr, msg); 1167 } 1168 1169 /** 1170 * Creates a parse exception for when the next character read is not 1171 * the character that was expected. 1172 * 1173 * @param charSet The set of characters (in human readable form) that was 1174 * expected. 1175 * 1176 * <dl><dt><b>Preconditions:</b></dt><dd> 1177 * <ul><li>{@code charSet != null}</li> 1178 * <li>{@code charSet.length() > 0}</li> 1179 * </ul></dd></dl> 1180 * @return exception to be used 1181 */ expectedInput(String charSet)1182 protected XMLParseException expectedInput(String charSet) { 1183 String msg = "Expected: " + charSet; 1184 return new XMLParseException(this.getName(), this.parserLineNr, msg); 1185 } 1186 1187 /** 1188 * Creates a parse exception for when the next character read is not 1189 * the character that was expected. 1190 * 1191 * @param charSet The set of characters (in human readable form) that was 1192 * expected. 1193 * @param ch The character that was received instead. 1194 * <dl><dt><b>Preconditions:</b></dt><dd> 1195 * <ul><li>{@code charSet != null}</li> 1196 * <li>{@code charSet.length() > 0}</li> 1197 * </ul></dd></dl> 1198 * @return exception to be used 1199 */ expectedInput(String charSet, char ch)1200 protected XMLParseException expectedInput(String charSet, char ch) { 1201 String msg = "Expected: '" + charSet + "'" + " but got: '" + ch + "'"; 1202 return new XMLParseException(this.getName(), this.parserLineNr, msg); 1203 } 1204 1205 /** 1206 * Creates a parse exception for when an entity could not be resolved. 1207 * 1208 * @param name The name of the entity. 1209 * @return exception to be used 1210 * 1211 * <dl><dt><b>Preconditions:</b></dt><dd> 1212 * <ul><li>{@code name != null}</li> 1213 * <li>{@code name.length() > 0}</li> 1214 * </ul></dd></dl> 1215 */ unknownEntity(String name)1216 protected XMLParseException unknownEntity(String name) { 1217 String msg = "Unknown or invalid entity: &" + name + ";"; 1218 return new XMLParseException(this.getName(), this.parserLineNr, msg); 1219 } 1220 1221 /** 1222 * Reads an xml file and removes the comments, leaving only relevant 1223 * xml code. 1224 * 1225 * @param isr The reader of the {@link java.io.InputStream} containing the xml. 1226 * @param pout The {@link java.io.PipedOutputStream} that will be receiving the 1227 * filtered xml file. 1228 */ sanitizeInput(Reader isr, OutputStream pout)1229 public void sanitizeInput(Reader isr, OutputStream pout) { 1230 StringBuilder line = new StringBuilder(); 1231 try (PrintStream out = new PrintStream(pout)) { 1232 this.sanitizeCharReadTooMuch = '\0'; 1233 this.reader = isr; 1234 this.parserLineNr = 0; 1235 int newline = 2; 1236 char prev = ' '; 1237 1238 while (true) { 1239 char ch; 1240 if (this.sanitizeCharReadTooMuch != '\0') { 1241 ch = this.sanitizeCharReadTooMuch; 1242 this.sanitizeCharReadTooMuch = '\0'; 1243 } else { 1244 1245 int i = this.reader.read(); 1246 if (i == -1) { 1247 // no character in buffer, and nothing read 1248 out.flush(); 1249 break; 1250 } else if (i == 10) { 1251 ch = '\n'; 1252 } else { 1253 ch = (char) i; 1254 } 1255 } 1256 1257 char next; 1258 int i = this.reader.read(); 1259 if (i == -1) { 1260 // character in buffer and nothing read. write out 1261 // what's in the buffer 1262 out.print(ch); 1263 out.flush(); 1264 if (ch == 10) { 1265 OutputController.getLogger().log(line.toString()); 1266 line = new StringBuilder("line: " + newline + " "); 1267 newline++; 1268 } else { 1269 line.append(ch); 1270 } 1271 break; 1272 } else if (i == 10) { 1273 next = '\n'; 1274 } else { 1275 next = (char) i; 1276 } 1277 1278 this.sanitizeCharReadTooMuch = next; 1279 1280 // If the next chars are !--, then we've hit a comment tag, 1281 // and should skip it. 1282 if (ch == '<' && sanitizeCharReadTooMuch == '!') { 1283 ch = (char) this.reader.read(); 1284 if (ch == '-') { 1285 ch = (char) this.reader.read(); 1286 if (ch == '-') { 1287 this.skipComment(); 1288 this.sanitizeCharReadTooMuch = '\0'; 1289 } else { 1290 out.print('<'); 1291 out.print('!'); 1292 out.print('-'); 1293 this.sanitizeCharReadTooMuch = ch; 1294 line.append("<"); 1295 line.append("!"); 1296 line.append("-"); 1297 } 1298 } else { 1299 out.print('<'); 1300 out.print('!'); 1301 this.sanitizeCharReadTooMuch = ch; 1302 line.append("<"); 1303 line.append("!"); 1304 } 1305 } 1306 // Otherwise we haven't hit a comment, and we should write ch. 1307 else { 1308 out.print(ch); 1309 if (ch == 10) { 1310 OutputController.getLogger().log(line.toString()); 1311 line = new StringBuilder("line: " + newline + " "); 1312 newline++; 1313 } else { 1314 line.append(ch); 1315 } 1316 } 1317 prev = next; 1318 } 1319 isr.close(); 1320 } catch (Exception e) { 1321 // Print the stack trace here -- xml.parseFromReader() will 1322 // throw the ParseException if something goes wrong. 1323 OutputController.getLogger().log(OutputController.Level.ERROR_ALL, e); 1324 } finally { 1325 OutputController.getLogger().log("");//force new line in all cases 1326 OutputController.getLogger().log(line.toString()); //flush remaining line 1327 1328 } 1329 } 1330 isBOM()1331 public boolean isBOM() { 1332 return BOM; 1333 } 1334 1335 1336 1337 } 1338