1 /* XMLElement.java
2  *
3  * $Revision: 1.2 $
4  * $Date: 2002/08/03 04:36:34 $
5  * $Name:  $
6  *
7  * This file is part of NanoXML 2 Lite.
8  * Copyright (C) 2000-2002 Marc De Scheemaecker, All Rights Reserved.
9  *
10  * This software is provided 'as-is', without any express or implied warranty.
11  * In no event will the authors be held liable for any damages arising from the
12  * use of this software.
13  *
14  * Permission is granted to anyone to use this software for any purpose,
15  * including commercial applications, and to alter it and redistribute it
16  * freely, subject to the following restrictions:
17  *
18  *  1. The origin of this software must not be misrepresented; you must not
19  *     claim that you wrote the original software. If you use this software in
20  *     a product, an acknowledgment in the product documentation would be
21  *     appreciated but is not required.
22  *
23  *  2. Altered source versions must be plainly marked as such, and must not be
24  *     misrepresented as being the original software.
25  *
26  *  3. This notice may not be removed or altered from any source distribution.
27  *****************************************************************************/
28 
29 /* JAM: hacked the source to remove unneeded methods and comments. */
30 
31 package net.sourceforge.nanoxml;
32 
33 
34 import java.io.IOException;
35 import java.io.OutputStream;
36 import java.io.PrintStream;
37 import java.io.Reader;
38 import java.util.Enumeration;
39 import java.util.HashMap;
40 import java.util.Map;
41 import java.util.Set;
42 import java.util.Vector;
43 import net.sourceforge.jnlp.util.logging.OutputController;
44 
45 /**
46  * XMLElement is a representation of an XML object. The object is able to parse
47  * XML code.
48  * <dl>
49  * <dt><b>Parsing XML Data</b></dt>
50  * <dd>
51  * You can parse XML data using the following code:
52  * <pre>{@code
53  *XMLElement xml = new XMLElement();
54  *FileReader reader = new FileReader("filename.xml");
55  *xml.parseFromReader(reader);
56  *}</pre></dd></dl>
57  * <dl><dt><b>Retrieving Attributes</b></dt>
58  * <dd>
59  * You can enumerate the attributes of an element using the method
60  * {@link #enumerateAttributeNames() enumerateAttributeNames}.
61  * The attribute values can be retrieved using the method
62  * {@link #getAttribute(java.lang.String) getAttribute}.
63  * The following example shows how to list the attributes of an element:
64  * <pre>{@code
65  *XMLElement element = ...;
66  *Enumeration enum = element.enumerateAttributeNames();
67  *while (enum.hasMoreElements()) {
68  *    String key = (String) enum.nextElement();
69  *    String value = (String) element.getAttribute(key);
70  *    System.out.println(key + " = " + value);
71  *}}</pre></dd></dl>
72  * <dl><dt><b>Retrieving Child Elements</b></dt>
73  * <dd>
74  * You can enumerate the children of an element using
75  * {@link #enumerateChildren() enumerateChildren}.
76  * The number of child elements can be retrieved using
77  * {@link #countChildren() countChildren}.
78  * </dd></dl>
79  * <dl><dt><b>Elements Containing Character Data</b></dt>
80  * <dd>
81  * If an elements contains character data, like in the following example:
82  * <pre>{@code <title>The Title</title>}</pre>
83  * you can retrieve that data using the method
84  * {@link #getContent() getContent}.
85  * </dd></dl>
86  * <dl><dt><b>Subclassing XMLElement</b></dt>
87  * <dd>
88  * When subclassing XMLElement, you need to override the method
89  * {@link #createAnotherElement() createAnotherElement}
90  * which has to return a new copy of the receiver.
91  * </dd></dl>
92  *
93  * @see net.sourceforge.nanoxml.XMLParseException
94  *
95  * @author Marc De Scheemaecker
96  *         &lt;<A href="mailto:cyberelf@mac.com">cyberelf@mac.com</A>&gt;
97  * @version $Name:  $, $Revision: 1.2 $
98  */
99 public class XMLElement {
100 
101     /**
102      * The attributes given to the element.
103      *
104      * <dl><dt><b>Invariants:</b></dt><dd>
105      * <ul><li>The field can be empty.</li>
106      *     <li>The field is never {@code null}.</li>
107      *     <li>The keys and the values are strings.</li>
108      * </ul></dd></dl>
109      */
110     private Map<String, Object> attributes;
111 
112     /**
113      * Child elements of the element.
114      *
115      * <dl><dt><b>Invariants:</b></dt><dd>
116      * <ul><li>The field can be empty.</li>
117      *     <li>The field is never {@code null}.</li>
118      *     <li>The elements are instances of {@code XMLElement}
119      *         or a subclass of {@code XMLElement}.</li>
120      * </ul></dd></dl>
121      */
122     private Vector<XMLElement> children;
123 
124     /**
125      * The name of the element.
126      *
127      * <dl><dt><b>Invariants:</b></dt><dd>
128      * <ul><li>The field is {@code null} iff the element is not
129      *         initialized by either parse or {@link #setName setName()}.</li>
130      *     <li>If the field is not {@code null}, it's not empty.</li>
131      *     <li>If the field is not {@code null}, it contains a valid
132      *         XML identifier.</li>
133      * </ul></dd></dl>
134      */
135     private String name;
136 
137     /**
138      * The {@code #PCDATA} content of the object.
139      *
140      * <dl><dt><b>Invariants:</b></dt><dd>
141      * <ul><li>The field is {@code null} iff the element is not a
142      *         {@code #PCDATA} element.</li>
143      *     <li>The field can be any string, including the empty string.</li>
144      * </ul></dd></dl>
145      */
146     private String contents;
147 
148     /**
149      * Conversion table for &amp;...; entities. The keys are the entity names
150      * without the &amp; and ; delimiters.
151      *
152      * <dl><dt><b>Invariants:</b></dt><dd>
153      * <ul><li>The field is never {@code null}.</li>
154      *     <li>The field always contains the following associations:
155      *         "lt"&nbsp;=&gt;&nbsp;"&lt;", "gt"&nbsp;=&gt;&nbsp;"&gt;",
156      *         "quot"&nbsp;=&gt;&nbsp;"\"", "apos"&nbsp;=&gt;&nbsp;"'",
157      *         "amp"&nbsp;=&gt;&nbsp;"&amp;"</li>
158      *     <li>The keys are strings</li>
159      *     <li>The values are char arrays</li>
160      * </ul></dd></dl>
161      */
162     private Map<String, char[]> entities;
163 
164     /**
165      * The line number where the element starts.
166      *
167      * <dl><dt><b>Invariants:</b></dt><dd>
168      * <ul><li>{@code lineNr >= 0}</li>
169      * </ul></dd></dl>
170      */
171     private int lineNr;
172 
173     /**
174      * {@code true} if the case of the element and attribute names are case
175      * insensitive.
176      */
177     private boolean ignoreCase;
178 
179     /**
180      * {@code true} if the leading and trailing whitespace of {@code #PCDATA}
181      * sections have to be ignored.
182      */
183     private boolean ignoreWhitespace;
184 
185     /**
186      * Character read too much.
187      * <p>
188      * This character provides push-back functionality to the input reader
189      * without having to use a PushbackReader.
190      * If there is no such character, this field is {@code '\0'}.
191      */
192     private char charReadTooMuch;
193 
194     /**
195      * Character read too much for the comment remover.
196      */
197     private char sanitizeCharReadTooMuch;
198 
199    /**
200      * Whether the BOM header appeared
201      */
202     private boolean BOM = false;
203 
204     /**
205      * The reader provided by the caller of the parse method.
206      *
207      * <dl><dt><b>Invariants:</b></dt><dd>
208      * <ul><li>The field is not {@code null} while the parse method is
209      *         running.</li>
210      * </ul></dd></dl>
211      */
212     private Reader reader;
213 
214     /**
215      * The current line number in the source content.
216      *
217      * <dl><dt><b>Invariants:</b></dt><dd>
218      * <ul><li>parserLineNr &gt; 0 while the parse method is running.</li>
219      * </ul></dd></dl>
220      */
221     private int parserLineNr;
222 
223     /**
224      * Creates and initializes a new XML element.
225      * <p>
226      * Calling the construction is equivalent to:
227      * <ul><li>{@code new XMLElement(new HashMap(), false, true)}</li></ul>
228      *
229      * <dl><dt><b>Postconditions:</b></dt><dd>
230      * <ul><li>{@linkplain #countChildren} =&gt; 0</li>
231      *     <li>{@linkplain #enumerateChildren} =&gt; empty enumeration</li>
232      *     <li>enumeratePropertyNames() =&gt; empty enumeration</li>
233      *     <li>getChildren() =&gt; empty vector</li>
234      *     <li>{@linkplain #getContent} =&gt; ""</li>
235      *     <li>{@linkplain #getLineNr} =&gt; 0</li>
236      *     <li>{@linkplain #getName} =&gt; null</li>
237      * </ul></dd></dl>
238      */
XMLElement()239     public XMLElement() {
240         this(new HashMap<String, char[]>(), false, true, true);
241     }
242 
243     /**
244      * Creates and initializes a new XML element.
245      * <p>
246      * This constructor should <i>only</i> be called from
247      * {@link #createAnotherElement} to create child elements.
248      *
249      * @param entities
250      *     The entity conversion table.
251      * @param skipLeadingWhitespace
252      *     {@code true} if leading and trailing whitespace in PCDATA
253      *     content has to be removed.
254      * @param fillBasicConversionTable
255      *     {@code true} if the basic entities need to be added to
256      *     the entity list (client code calling this constructor).
257      * @param ignoreCase
258      *     {@code true} if the case of element and attribute names have
259      *     to be ignored.
260      *
261      * <dl><dt><b>Preconditions:</b></dt><dd>
262      * <ul><li>{@code entities != null}</li>
263      *     <li>if {@code fillBasicConversionTable == false}
264      *         then {@code entities} contains at least the following
265      *         entries: {@code amp}, {@code lt}, {@code gt}, {@code apos} and
266      *         {@code quot}</li>
267      * </ul></dd></dl>
268      *
269      * <dl><dt><b>Postconditions:</b></dt><dd>
270      * <ul><li>{@linkplain #countChildren} =&gt; 0</li>
271      *     <li>{@linkplain #enumerateChildren} =&gt; empty enumeration</li>
272      *     <li>enumeratePropertyNames() =&gt; empty enumeration</li>
273      *     <li>getChildren() =&gt; empty vector</li>
274      *     <li>{@linkplain #getContent} =&gt; ""</li>
275      *     <li>{@linkplain #getLineNr} =&gt; 0</li>
276      *     <li>{@linkplain #getName} =&gt; null</li>
277      * </ul></dd></dl>
278      */
XMLElement(Map<String, char[]> entities, boolean skipLeadingWhitespace, boolean fillBasicConversionTable, boolean ignoreCase)279     protected XMLElement(Map<String, char[]> entities,
280                          boolean skipLeadingWhitespace,
281                          boolean fillBasicConversionTable,
282                          boolean ignoreCase) {
283         this.ignoreWhitespace = skipLeadingWhitespace;
284         this.ignoreCase = ignoreCase;
285         this.name = null;
286         this.contents = "";
287         this.attributes = new HashMap<>();
288         this.children = new Vector<>();
289         this.entities = entities;
290         this.lineNr = 0;
291         Set<String> e = this.entities.keySet();
292         for(String key: e) {
293             Object value = this.entities.get(key);
294             if (value instanceof String) {
295                 entities.put(key, ((String) value).toCharArray());
296             }
297         }
298         if (fillBasicConversionTable) {
299             this.entities.put("amp", new char[] { '&' });
300             this.entities.put("quot", new char[] { '"' });
301             this.entities.put("apos", new char[] { '\'' });
302             this.entities.put("lt", new char[] { '<' });
303             this.entities.put("gt", new char[] { '>' });
304         }
305     }
306 
307     /**
308      * Adds a child element.
309      *
310      * @param child
311      *     The child element to add.
312      *
313      * <dl><dt><b>Preconditions:</b></dt><dd>
314      * <ul><li>{@code child != null}</li>
315      *     <li>{@code child.getName() != null}</li>
316      *     <li>{@code child} does not have a parent element</li>
317      * </ul></dd></dl>
318      *
319      * <dl><dt><b>Postconditions:</b></dt><dd>
320      * <ul><li>{@linkplain #countChildren} =&gt; old.countChildren() + 1</li>
321      *     <li>{@linkplain #enumerateChildren} =&gt; old.enumerateChildren()
322                + child</li>
323      *     <li>getChildren() =&gt; old.enumerateChildren() + child</li>
324      * </ul></dd></dl>
325      *
326      */
addChild(XMLElement child)327     public void addChild(XMLElement child) {
328         this.children.addElement(child);
329     }
330 
331     /**
332      * Adds or modifies an attribute.
333      *
334      * @param name
335      *     The name of the attribute.
336      * @param value
337      *     The value of the attribute.
338      *
339      * <dl><dt><b>Preconditions:</b></dt><dd>
340      * <ul><li>{@code name != null}</li>
341      *     <li>{@code name} is a valid XML identifier</li>
342      *     <li>{@code value != null}</li>
343      * </ul></dd></dl>
344      *
345      * <dl><dt><b>Postconditions:</b></dt><dd>
346      * <ul><li>{@linkplain #enumerateAttributeNames}
347      *         =&gt; old.enumerateAttributeNames() + name</li>
348      *     <li>{@linkplain #getAttribute(java.lang.String) getAttribute(name)}
349      *         =&gt; value</li>
350      * </ul></dd></dl>
351      */
setAttribute(String name, Object value)352     public void setAttribute(String name,
353                              Object value) {
354         if (this.ignoreCase) {
355             name = name.toUpperCase();
356         }
357         this.attributes.put(name, value.toString());
358     }
359 
360     /**
361      * @return the number of child elements of the element.
362      *
363      * <dl><dt><b>Postconditions:</b></dt><dd>
364      * <ul><li>{@code result >= 0}</li>
365      * </ul></dd></dl>
366      */
countChildren()367     public int countChildren() {
368         return this.children.size();
369     }
370 
371     /**
372      * @return Enumeration of the attribute names.
373      *
374      * <dl><dt><b>Postconditions:</b></dt><dd>
375      * <ul><li>{@code result != null}</li>
376      * </ul></dd></dl>
377      */
enumerateAttributeNames()378     public Enumeration<String> enumerateAttributeNames() {
379         return new Vector(this.attributes.keySet()).elements();
380     }
381 
382     /**
383      * @return Enumeration the child elements.
384      *
385      * <dl><dt><b>Postconditions:</b></dt><dd>
386      * <ul><li>{@code result != null}</li>
387      * </ul></dd></dl>
388      */
enumerateChildren()389     public Enumeration<XMLElement> enumerateChildren() {
390         return this.children.elements();
391     }
392 
393     /**
394      * @return the PCDATA content of the object. If there is no such content,
395      * {@code null} is returned.
396      */
getContent()397     public String getContent() {
398         return this.contents;
399     }
400 
401     /**
402      * @return the line nr in the source data on which the element is found.
403      * This method returns {@code 0} there is no associated source data.
404      *
405      * <dl><dt><b>Postconditions:</b></dt><dd>
406      * <ul><li>{@code result >= 0}</li>
407      * </ul></dd></dl>
408      */
getLineNr()409     public int getLineNr() {
410         return this.lineNr;
411     }
412 
413     /**
414      * @return an attribute of the element.
415      * <p>
416      * If the attribute doesn't exist, {@code null} is returned.
417      *
418      * @param name The name of the attribute.
419      *
420      * <dl><dt><b>Preconditions:</b></dt><dd>
421      * <ul><li>{@code name != null}</li>
422      *     <li>{@code name} is a valid XML identifier</li>
423      * </ul></dd></dl>
424      */
getAttribute(String name)425     public Object getAttribute(String name) {
426         if (this.ignoreCase) {
427             name = name.toUpperCase();
428         }
429         Object value = this.attributes.get(name);
430         return value;
431     }
432 
433     /**
434      * Returns the name of the element.
435      * @return this {@code XMLElement} object's name
436      */
getName()437     public String getName() {
438         return this.name;
439     }
440 
441     /**
442      * Reads one XML element from a {@link java.io.Reader} and parses it.
443      *
444      * @param reader
445      *     The reader from which to retrieve the XML data.
446      *
447      * <dl><dt><b>Preconditions:</b></dt><dd>
448      * <ul><li>{@code reader != null}</li>
449      *     <li>{@code reader} is not closed</li>
450      * </ul></dd></dl>
451      *
452      * <dl><dt><b>Postconditions:</b></dt><dd>
453      * <ul><li>the state of the receiver is updated to reflect the XML element
454      *         parsed from the reader</li>
455      *     <li>the reader points to the first character following the last
456      *         {@code '&gt;'} character of the XML element</li>
457      * </ul></dd></dl>
458      *
459      * @throws java.io.IOException
460      *     If an error occured while reading the input.
461      * @throws net.sourceforge.nanoxml.XMLParseException
462      *     If an error occured while parsing the read data.
463      */
parseFromReader(Reader reader)464     public void parseFromReader(Reader reader)
465             throws IOException, XMLParseException {
466         this.parseFromReader(reader, /*startingLineNr*/1);
467     }
468 
469     /**
470      * Reads one XML element from a java.io.Reader and parses it.
471      *
472      * @param reader
473      *     The reader from which to retrieve the XML data.
474      * @param startingLineNr
475      *     The line number of the first line in the data.
476      *
477      * <dl><dt><b>Preconditions:</b></dt><dd>
478      * <ul><li>{@code reader != null}</li>
479      *     <li>{@code reader} is not closed</li>
480      * </ul></dd></dl>
481      *
482      * <dl><dt><b>Postconditions:</b></dt><dd>
483      * <ul><li>the state of the receiver is updated to reflect the XML element
484      *         parsed from the reader</li>
485      *     <li>the reader points to the first character following the last
486      *         {@code '&gt;'} character of the XML element</li>
487      * </ul></dd></dl>
488      *
489      * @throws java.io.IOException
490      *     If an error occured while reading the input.
491      * @throws net.sourceforge.nanoxml.XMLParseException
492      *     If an error occured while parsing the read data.
493      */
parseFromReader(Reader reader, int startingLineNr)494     public void parseFromReader(Reader reader,
495                                 int startingLineNr)
496             throws IOException, XMLParseException {
497         this.charReadTooMuch = '\0';
498         this.reader = reader;
499         this.parserLineNr = startingLineNr;
500 
501         for (;;) {
502             char ch = this.scanLeadingWhitespace();
503 
504             if (ch != '<') {
505                 throw this.expectedInput("<", ch);
506             }
507 
508             ch = this.readChar();
509 
510             if ((ch == '!') || (ch == '?')) {
511                 this.skipSpecialTag(0);
512             } else {
513                 this.unreadChar(ch);
514                 this.scanElement(this);
515                 return;
516             }
517         }
518     }
519 
520     /**
521      * Creates a new similar XML element.
522      * <p>
523      * You should override this method when subclassing XMLElement.
524      * </p>
525      * @return next element in tree based on global settings
526      */
createAnotherElement()527     protected XMLElement createAnotherElement() {
528         return new XMLElement(this.entities,
529                               this.ignoreWhitespace,
530                               false,
531                               this.ignoreCase);
532     }
533 
534     /**
535      * Changes the content string.
536      *
537      * @param content
538      *     The new content string.
539      */
setContent(String content)540     public void setContent(String content) {
541         this.contents = content;
542     }
543 
544     /**
545      * Changes the name of the element.
546      *
547      * @param name
548      *     The new name.
549      *
550      * <dl><dt><b>Preconditions:</b></dt><dd>
551      * <ul><li>{@code name != null}</li>
552      *     <li>{@code name} is a valid XML identifier</li>
553      * </ul></dd></dl>
554      */
setName(String name)555     public void setName(String name) {
556         this.name = name;
557     }
558 
559     /**
560      * Scans an identifier from the current reader.
561      * The scanned identifier is appended to <code>result</code>.
562      *
563      * @param result
564      *     The buffer in which the scanned identifier will be put.
565      *
566      * <dl><dt><b>Preconditions:</b></dt><dd>
567      * <ul><li>{@code result != null}</li>
568      *     <li>The next character read from the reader is a valid first
569      *         character of an XML identifier.</li>
570      * </ul></dd></dl>
571      *
572      * <dl><dt><b>Postconditions:</b></dt><dd>
573      * <ul><li>The next character read from the reader won't be an identifier
574      *         character.</li>
575      * </ul></dd></dl>
576      * @throws java.io.IOException if something goes wrong
577      */
scanIdentifier(StringBuffer result)578     protected void scanIdentifier(StringBuffer result)
579             throws IOException {
580         for (;;) {
581             char ch = this.readChar();
582             if (((ch < 'A') || (ch > 'Z')) && ((ch < 'a') || (ch > 'z'))
583                     && ((ch < '0') || (ch > '9')) && (ch != '_') && (ch != '.')
584                     && (ch != ':') && (ch != '-') && (ch <= '\u007E')) {
585                 this.unreadChar(ch);
586                 return;
587             }
588             result.append(ch);
589         }
590     }
591 
isRegularWhiteSpace(char ch)592     private boolean isRegularWhiteSpace(char ch) {
593         switch (ch) {
594             case ' ':
595             case '\t':
596             case '\n':
597             case '\r':
598                 return true;
599             default:
600                 return false;
601         }
602     }
603 
604     /**
605      * This method scans an identifier from the current reader.
606      *
607      * @return the next character following the whitespace.
608      * @throws java.io.IOException if something goes wrong
609      */
scanWhitespace()610     private char scanWhitespace()
611             throws IOException {
612         while(true) {
613             char ch = this.readChar();
614             if (!isRegularWhiteSpace(ch)) {
615                 return ch;
616             }
617         }
618     }
619      /**
620      * This method scans an leading identifier from the current reader.
621      *
622      * UNlike scanWhitespace, it skipps also BOM
623      *
624      * @return the next character following the whitespace.
625      * @throws java.io.IOException if something goes wrong
626      */
scanLeadingWhitespace()627     private char scanLeadingWhitespace()
628             throws IOException {
629         while(true) {
630             char ch = this.readChar();
631             //this is BOM , not space
632             if (ch == '') {
633                 BOM = true;
634             } else if (!isRegularWhiteSpace(ch)) {
635                 return ch;
636             }
637         }
638     }
639 
640     /**
641      * This method scans an identifier from the current reader.
642      * <p>
643      * The scanned whitespace is appended to {@code result}.
644      *
645      * @param result where to append scanned text
646      * @return the next character following the whitespace.
647      *
648      * <dl><dt><b>Preconditions:</b></dt><dd>
649      * <ul><li>{@code result != null}</li>
650      * </ul></dd></dl>
651      * @throws java.io.IOException if something goes wrong
652      */
scanWhitespace(StringBuffer result)653     protected char scanWhitespace(StringBuffer result)
654             throws IOException {
655         while (true) {
656             char ch = this.readChar();
657             if (!isRegularWhiteSpace(ch)) {
658                 return ch;
659             } else {
660                 switch (ch) {
661                     case ' ':
662                     case '\t':
663                     case '\n':
664                         result.append(ch);
665                 }
666             }
667         }
668     }
669 
670     /**
671      * This method scans a delimited string from the current reader.
672      * <p>
673      * The scanned string without delimiters is appended to {@code string}.
674      *
675      * <dl><dt><b>Preconditions:</b></dt><dd>
676      * <ul><li>{@code string != null}</li>
677      *     <li>the next char read is the string delimiter</li>
678      * </ul></dd></dl>
679      * @param string where to append the result
680      * @throws java.io.IOException if something goes wrong
681      */
scanString(StringBuffer string)682     protected void scanString(StringBuffer string)
683             throws IOException {
684         char delimiter = this.readChar();
685         if ((delimiter != '\'') && (delimiter != '"')) {
686             throw this.expectedInput("' or \"");
687         }
688         for (;;) {
689             char ch = this.readChar();
690             if (ch == delimiter) {
691                 return;
692             } else if (ch == '&') {
693                 this.resolveEntity(string);
694             } else {
695                 string.append(ch);
696             }
697         }
698     }
699 
700     /**
701      * Scans a {@code #PCDATA} element. CDATA sections and entities are
702      * resolved.
703      * <p>
704      * The next &lt; char is skipped.
705      * <p>
706      * The scanned data is appended to {@code data}.
707      *
708      * <dl><dt><b>Preconditions:</b></dt><dd>
709      * <ul><li>{@code data != null}</li>
710      * </ul></dd></dl>
711      * @param data where to append data
712      * @throws java.io.IOException if something goes wrong
713      */
scanPCData(StringBuffer data)714     protected void scanPCData(StringBuffer data)
715             throws IOException {
716         for (;;) {
717             char ch = this.readChar();
718             if (ch == '<') {
719                 ch = this.readChar();
720                 if (ch == '!') {
721                     this.checkCDATA(data);
722                 } else {
723                     this.unreadChar(ch);
724                     return;
725                 }
726             } else if (ch == '&') {
727                 this.resolveEntity(data);
728             } else {
729                 data.append(ch);
730             }
731         }
732     }
733 
734     /**
735      * Scans a special tag and if the tag is a CDATA section, append its
736      * content to {@code buf}.
737      *
738      * <dl><dt><b>Preconditions:</b></dt><dd>
739      * <ul><li>{@code buf != null}</li>
740      *     <li>The first &lt; has already been read.</li>
741      * </ul></dd></dl>
742      * @param buf buffer where to append data
743      * @return whether the CDATA were ok
744      * @throws java.io.IOException if something goes wrong
745      */
checkCDATA(StringBuffer buf)746     protected boolean checkCDATA(StringBuffer buf)
747             throws IOException {
748         char ch = this.readChar();
749         if (ch != '[') {
750             this.unreadChar(ch);
751             this.skipSpecialTag(0);
752             return false;
753         } else if (!this.checkLiteral("CDATA[")) {
754             this.skipSpecialTag(1); // one [ has already been read
755             return false;
756         } else {
757             int delimiterCharsSkipped = 0;
758             while (delimiterCharsSkipped < 3) {
759                 ch = this.readChar();
760                 switch (ch) {
761                     case ']':
762                         if (delimiterCharsSkipped < 2) {
763                             delimiterCharsSkipped += 1;
764                         } else {
765                             buf.append(']');
766                             buf.append(']');
767                             delimiterCharsSkipped = 0;
768                         }
769                         break;
770                     case '>':
771                         if (delimiterCharsSkipped < 2) {
772                             for (int i = 0; i < delimiterCharsSkipped; i++) {
773                                 buf.append(']');
774                             }
775                             delimiterCharsSkipped = 0;
776                             buf.append('>');
777                         } else {
778                             delimiterCharsSkipped = 3;
779                         }
780                         break;
781                     default:
782                         for (int i = 0; i < delimiterCharsSkipped; i += 1) {
783                             buf.append(']');
784                         }
785                         buf.append(ch);
786                         delimiterCharsSkipped = 0;
787                 }
788             }
789             return true;
790         }
791     }
792 
793     /**
794      * Skips a comment.
795      *
796      * <dl><dt><b>Preconditions:</b></dt><dd>
797      * <ul><li>The first &lt;!-- has already been read.</li>
798      * </ul></dd></dl>
799      * @throws java.io.IOException if something goes wrong
800      */
skipComment()801     protected void skipComment()
802             throws IOException {
803         int dashesToRead = 2;
804         while (dashesToRead > 0) {
805             char ch = this.readChar();
806             if (ch == '-') {
807                 dashesToRead -= 1;
808             } else {
809                 dashesToRead = 2;
810             }
811 
812             // Be more tolerant of extra -- (double dashes)
813             // in comments.
814             if (dashesToRead == 0) {
815                 ch = this.readChar();
816                 if (ch == '>') {
817                     return;
818                 } else {
819                     dashesToRead = 2;
820                     this.unreadChar(ch);
821                 }
822             }
823         }
824         /*
825         if (this.readChar() != '>') {
826             throw this.expectedInput(">");
827         }
828         */
829     }
830 
831     /**
832      * Skips a special tag or comment.
833      *
834      * @param bracketLevel The number of open square brackets ([) that have
835      *                     already been read.
836      *
837      * <dl><dt><b>Preconditions:</b></dt><dd>
838      * <ul><li>The first &lt;! has already been read.</li>
839      *     <li>{@code bracketLevel &gt;= 0}</li>
840      * </ul></dd></dl>
841      * @throws java.io.IOException if something goes wrong
842      */
skipSpecialTag(int bracketLevel)843     protected void skipSpecialTag(int bracketLevel)
844             throws IOException {
845         int tagLevel = 1; // <
846         char stringDelimiter = '\0';
847         if (bracketLevel == 0) {
848             char ch = this.readChar();
849             if (ch == '[') {
850                 bracketLevel += 1;
851             } else if (ch == '-') {
852                 ch = this.readChar();
853                 if (ch == '[') {
854                     bracketLevel += 1;
855                 } else if (ch == ']') {
856                     bracketLevel -= 1;
857                 } else if (ch == '-') {
858                     this.skipComment();
859                     return;
860                 }
861             }
862         }
863         while (tagLevel > 0) {
864             char ch = this.readChar();
865             if (stringDelimiter == '\0') {
866                 if ((ch == '"') || (ch == '\'')) {
867                     stringDelimiter = ch;
868                 } else if (bracketLevel <= 0) {
869                     if (ch == '<') {
870                         tagLevel += 1;
871                     } else if (ch == '>') {
872                         tagLevel -= 1;
873                     }
874                 }
875                 if (ch == '[') {
876                     bracketLevel += 1;
877                 } else if (ch == ']') {
878                     bracketLevel -= 1;
879                 }
880             } else {
881                 if (ch == stringDelimiter) {
882                     stringDelimiter = '\0';
883                 }
884             }
885         }
886     }
887 
888     /**
889      * Scans the data for literal text.
890      * <p>
891      * Scanning stops when a character does not match or after the complete
892      * text has been checked, whichever comes first.
893      *
894      * @param literal the literal to check.
895      *
896      * <dl><dt><b>Preconditions:</b></dt><dd>
897      * <ul><li>{@code literal != null}</li>
898      * </ul></dd></dl>
899      * @return true if literal was ok
900      * @throws java.io.IOException  if something goes wrong
901      */
checkLiteral(String literal)902     protected boolean checkLiteral(String literal)
903             throws IOException {
904         int length = literal.length();
905         for (int i = 0; i < length; i += 1) {
906             if (this.readChar() != literal.charAt(i)) {
907                 return false;
908             }
909         }
910         return true;
911     }
912 
913     /**
914      * Reads a character from a reader.
915      * @return the read char
916      * @throws java.io.IOException if something goes wrong
917      */
readChar()918     protected char readChar()
919             throws IOException {
920         if (this.charReadTooMuch != '\0') {
921             char ch = this.charReadTooMuch;
922             this.charReadTooMuch = '\0';
923             return ch;
924         } else {
925             int i = this.reader.read();
926             if (i < 0) {
927                 throw this.unexpectedEndOfData();
928             } else if (i == 10) {
929                 this.parserLineNr += 1;
930                 return '\n';
931             } else {
932                 return (char) i;
933             }
934         }
935     }
936 
937     /**
938      * Scans an XML element.
939      *
940      * @param elt The element that will contain the result.
941      *
942      * <dl><dt><b>Preconditions:</b></dt><dd>
943      * <ul><li>The first &lt; has already been read.</li>
944      *     <li>{@code elt != null}</li>
945      * </ul></dd></dl>
946      * @throws java.io.IOException if something goes wrong
947      */
scanElement(XMLElement elt)948     protected void scanElement(XMLElement elt)
949             throws IOException {
950         StringBuffer buf = new StringBuffer();
951         this.scanIdentifier(buf);
952         String lname = buf.toString();
953         elt.setName(lname);
954         char ch = this.scanWhitespace();
955         while ((ch != '>') && (ch != '/')) {
956             buf.setLength(0);
957             this.unreadChar(ch);
958             this.scanIdentifier(buf);
959             String key = buf.toString();
960             ch = this.scanWhitespace();
961             if (ch != '=') {
962                 throw this.expectedInput("=");
963             }
964             this.unreadChar(this.scanWhitespace());
965             buf.setLength(0);
966             this.scanString(buf);
967             elt.setAttribute(key, buf);
968             ch = this.scanWhitespace();
969         }
970         if (ch == '/') {
971             ch = this.readChar();
972             if (ch != '>') {
973                 throw this.expectedInput(">");
974             }
975             return;
976         }
977         buf.setLength(0);
978         ch = this.scanWhitespace(buf);
979         if (ch != '<') {
980             this.unreadChar(ch);
981             this.scanPCData(buf);
982         } else {
983             for (;;) {
984                 ch = this.readChar();
985                 if (ch == '!') {
986                     if (this.checkCDATA(buf)) {
987                         this.scanPCData(buf);
988                         break;
989                     } else {
990                         ch = this.scanWhitespace(buf);
991                         if (ch != '<') {
992                             this.unreadChar(ch);
993                             this.scanPCData(buf);
994                             break;
995                         }
996                     }
997                 } else {
998                     buf.setLength(0);
999                     break;
1000                 }
1001             }
1002         }
1003         if (buf.length() == 0) {
1004             while (ch != '/') {
1005                 if (ch == '!') {
1006                     ch = this.readChar();
1007                     if (ch != '-') {
1008                         throw this.expectedInput("Comment or Element");
1009                     }
1010                     ch = this.readChar();
1011                     if (ch != '-') {
1012                         throw this.expectedInput("Comment or Element");
1013                     }
1014                     this.skipComment();
1015                 } else {
1016                     this.unreadChar(ch);
1017                     XMLElement child = this.createAnotherElement();
1018                     this.scanElement(child);
1019                     elt.addChild(child);
1020                 }
1021                 ch = this.scanWhitespace();
1022                 if (ch != '<') {
1023                     throw this.expectedInput("<");
1024                 }
1025                 ch = this.readChar();
1026             }
1027             this.unreadChar(ch);
1028         } else {
1029             if (this.ignoreWhitespace) {
1030                 elt.setContent(buf.toString().trim());
1031             } else {
1032                 elt.setContent(buf.toString());
1033             }
1034         }
1035         ch = this.readChar();
1036         if (ch != '/') {
1037             throw this.expectedInput("/");
1038         }
1039         this.unreadChar(this.scanWhitespace());
1040         if (!this.checkLiteral(lname)) {
1041             throw this.expectedInput(lname);
1042         }
1043         if (this.scanWhitespace() != '>') {
1044             throw this.expectedInput(">");
1045         }
1046     }
1047 
1048     /**
1049      * Resolves an entity. The name of the entity is read from the reader.
1050      * <p>
1051      * The value of the entity is appended to {@code buf}.
1052      *
1053      * @param buf Where to put the entity value.
1054      *
1055      * <dl><dt><b>Preconditions:</b></dt><dd>
1056      * <ul><li>The first &amp; has already been read.</li>
1057      *     <li>{@code buf != null}</li>
1058      * </ul></dd></dl>
1059      * @throws java.io.IOException if something goes wrong
1060      */
resolveEntity(StringBuffer buf)1061     protected void resolveEntity(StringBuffer buf)
1062             throws IOException {
1063         char ch = '\0';
1064         StringBuffer keyBuf = new StringBuffer();
1065         for (;;) {
1066             ch = this.readChar();
1067             if (ch == ';') {
1068                 break;
1069             }
1070             keyBuf.append(ch);
1071         }
1072         String key = keyBuf.toString();
1073         if (key.charAt(0) == '#') {
1074             try {
1075                 if (key.charAt(1) == 'x') {
1076                     ch = (char) Integer.parseInt(key.substring(2), 16);
1077                 } else {
1078                     ch = (char) Integer.parseInt(key.substring(1), 10);
1079                 }
1080             } catch (NumberFormatException e) {
1081                 throw this.unknownEntity(key);
1082             }
1083             buf.append(ch);
1084         } else {
1085             char[] value = entities.get(key);
1086             if (value == null) {
1087                 throw this.unknownEntity(key);
1088             }
1089             buf.append(value);
1090         }
1091     }
1092 
1093     /**
1094      * Pushes a character back to the read-back buffer.
1095      *
1096      * @param ch The character to push back.
1097      *
1098      * <dl><dt><b>Preconditions:</b></dt><dd>
1099      * <ul><li>The read-back buffer is empty.</li>
1100      *     <li>{@code ch != '\0'}</li>
1101      * </ul></dd></dl>
1102      */
unreadChar(char ch)1103     protected void unreadChar(char ch) {
1104         this.charReadTooMuch = ch;
1105     }
1106 
1107     /**
1108      * Creates a parse exception for when an invalid valueset is given to
1109      * a method.
1110      *
1111      * @param name The name of the entity.
1112      *
1113      * <dl><dt><b>Preconditions:</b></dt><dd>
1114      * <ul><li>{@code name != null}</li>
1115      * </ul></dd></dl>
1116      * @return exception to be thrown
1117      */
invalidValueSet(String name)1118     protected XMLParseException invalidValueSet(String name) {
1119         String msg = "Invalid value set (entity name = \"" + name + "\")";
1120         return new XMLParseException(this.getName(), this.parserLineNr, msg);
1121     }
1122 
1123     /**
1124      * Creates a parse exception for when an invalid value is given to a
1125      * method.
1126      *
1127      * @param name  The name of the entity.
1128      * @param value The value of the entity.
1129      *
1130      * <dl><dt><b>Preconditions:</b></dt><dd>
1131      * <ul><li>{@code name != null}</li>
1132      *     <li>{@code value != null}</li>
1133      * </ul></dd></dl>
1134      * @return exception to be used
1135      */
invalidValue(String name, String value)1136     protected XMLParseException invalidValue(String name,
1137                                              String value) {
1138         String msg = "Attribute \"" + name + "\" does not contain a valid "
1139                    + "value (\"" + value + "\")";
1140         return new XMLParseException(this.getName(), this.parserLineNr, msg);
1141     }
1142 
1143     /**
1144      * Creates a parse exception for when the end of the data input has been
1145      * reached.
1146      * @return  exception to be used
1147      */
unexpectedEndOfData()1148     protected XMLParseException unexpectedEndOfData() {
1149         String msg = "Unexpected end of data reached";
1150         return new XMLParseException(this.getName(), this.parserLineNr, msg);
1151     }
1152 
1153     /**
1154      * Creates a parse exception for when a syntax error occured.
1155      *
1156      * @param context The context in which the error occured.
1157      *
1158      * <dl><dt><b>Preconditions:</b></dt><dd>
1159      * <ul><li>{@code context != null}</li>
1160      *     <li>{@code context.length() &gt; 0}</li>
1161      * </ul></dd></dl>
1162      * @return exception to be used
1163      */
syntaxError(String context)1164     protected XMLParseException syntaxError(String context) {
1165         String msg = "Syntax error while parsing " + context;
1166         return new XMLParseException(this.getName(), this.parserLineNr, msg);
1167     }
1168 
1169     /**
1170      * Creates a parse exception for when the next character read is not
1171      * the character that was expected.
1172      *
1173      * @param charSet The set of characters (in human readable form) that was
1174      *                expected.
1175      *
1176      * <dl><dt><b>Preconditions:</b></dt><dd>
1177      * <ul><li>{@code charSet != null}</li>
1178      *     <li>{@code charSet.length() &gt; 0}</li>
1179      * </ul></dd></dl>
1180      * @return exception to be used
1181      */
expectedInput(String charSet)1182     protected XMLParseException expectedInput(String charSet) {
1183         String msg = "Expected: " + charSet;
1184         return new XMLParseException(this.getName(), this.parserLineNr, msg);
1185     }
1186 
1187     /**
1188      * Creates a parse exception for when the next character read is not
1189      * the character that was expected.
1190      *
1191      * @param charSet The set of characters (in human readable form) that was
1192      *                expected.
1193      * @param ch The character that was received instead.
1194      * <dl><dt><b>Preconditions:</b></dt><dd>
1195      * <ul><li>{@code charSet != null}</li>
1196      *     <li>{@code charSet.length() &gt; 0}</li>
1197      * </ul></dd></dl>
1198      * @return exception to be used
1199      */
expectedInput(String charSet, char ch)1200     protected XMLParseException expectedInput(String charSet, char ch) {
1201         String msg = "Expected: '" + charSet + "'" + " but got: '" + ch + "'";
1202         return new XMLParseException(this.getName(), this.parserLineNr, msg);
1203     }
1204 
1205     /**
1206      * Creates a parse exception for when an entity could not be resolved.
1207      *
1208      * @param name The name of the entity.
1209      * @return exception to be used
1210      *
1211      * <dl><dt><b>Preconditions:</b></dt><dd>
1212      * <ul><li>{@code name != null}</li>
1213      *     <li>{@code name.length() &gt; 0}</li>
1214      * </ul></dd></dl>
1215      */
unknownEntity(String name)1216     protected XMLParseException unknownEntity(String name) {
1217         String msg = "Unknown or invalid entity: &" + name + ";";
1218         return new XMLParseException(this.getName(), this.parserLineNr, msg);
1219     }
1220 
1221     /**
1222      * Reads an xml file and removes the comments, leaving only relevant
1223      * xml code.
1224      *
1225      * @param isr The reader of the {@link java.io.InputStream} containing the xml.
1226      * @param pout The {@link java.io.PipedOutputStream} that will be receiving the
1227      *             filtered xml file.
1228      */
sanitizeInput(Reader isr, OutputStream pout)1229     public void sanitizeInput(Reader isr, OutputStream pout) {
1230         StringBuilder line = new StringBuilder();
1231         try (PrintStream out = new PrintStream(pout)) {
1232             this.sanitizeCharReadTooMuch = '\0';
1233             this.reader = isr;
1234             this.parserLineNr = 0;
1235             int newline = 2;
1236             char prev = ' ';
1237 
1238             while (true) {
1239                 char ch;
1240                 if (this.sanitizeCharReadTooMuch != '\0') {
1241                     ch = this.sanitizeCharReadTooMuch;
1242                     this.sanitizeCharReadTooMuch = '\0';
1243                 } else {
1244 
1245                     int i = this.reader.read();
1246                     if (i == -1) {
1247                         // no character in buffer, and nothing read
1248                         out.flush();
1249                         break;
1250                     } else if (i == 10) {
1251                         ch = '\n';
1252                     } else {
1253                         ch = (char) i;
1254                     }
1255                 }
1256 
1257                 char next;
1258                 int i = this.reader.read();
1259                 if (i == -1) {
1260                     // character in buffer and nothing read. write out
1261                     // what's in the buffer
1262                     out.print(ch);
1263                     out.flush();
1264                     if (ch == 10) {
1265                         OutputController.getLogger().log(line.toString());
1266                         line = new StringBuilder("line: " + newline + " ");
1267                         newline++;
1268                     } else {
1269                         line.append(ch);
1270                     }
1271                     break;
1272                 } else if (i == 10) {
1273                     next = '\n';
1274                 } else {
1275                     next = (char) i;
1276                 }
1277 
1278                 this.sanitizeCharReadTooMuch = next;
1279 
1280                 // If the next chars are !--, then we've hit a comment tag,
1281                 // and should skip it.
1282                 if (ch == '<' && sanitizeCharReadTooMuch == '!') {
1283                     ch = (char) this.reader.read();
1284                     if (ch == '-') {
1285                         ch = (char) this.reader.read();
1286                         if (ch == '-') {
1287                             this.skipComment();
1288                             this.sanitizeCharReadTooMuch = '\0';
1289                         } else {
1290                             out.print('<');
1291                             out.print('!');
1292                             out.print('-');
1293                             this.sanitizeCharReadTooMuch = ch;
1294                             line.append("<");
1295                             line.append("!");
1296                             line.append("-");
1297                         }
1298                     } else {
1299                         out.print('<');
1300                         out.print('!');
1301                         this.sanitizeCharReadTooMuch = ch;
1302                         line.append("<");
1303                         line.append("!");
1304                     }
1305                 }
1306                 // Otherwise we haven't hit a comment, and we should write ch.
1307                 else {
1308                     out.print(ch);
1309                     if (ch == 10) {
1310                         OutputController.getLogger().log(line.toString());
1311                         line = new StringBuilder("line: " + newline + " ");
1312                         newline++;
1313                     } else {
1314                         line.append(ch);
1315                     }
1316                 }
1317                 prev = next;
1318             }
1319             isr.close();
1320         } catch (Exception e) {
1321             // Print the stack trace here -- xml.parseFromReader() will
1322             // throw the ParseException if something goes wrong.
1323             OutputController.getLogger().log(OutputController.Level.ERROR_ALL, e);
1324         } finally {
1325             OutputController.getLogger().log("");//force new line in all cases
1326             OutputController.getLogger().log(line.toString()); //flush remaining line
1327 
1328         }
1329     }
1330 
isBOM()1331     public boolean isBOM() {
1332         return BOM;
1333     }
1334 
1335 
1336 
1337 }
1338