1 /* Copyright (c) 2007-2008 The University of the West Indies
2  *
3  * Contact: robert.lancashire@uwimona.edu.jm
4  *
5  *  This library is free software; you can redistribute it and/or
6  *  modify it under the terms of the GNU Lesser General Public
7  *  License as published by the Free Software Foundation; either
8  *  version 2.1 of the License, or (at your option) any later version.
9  *
10  *  This library is distributed in the hope that it will be useful,
11  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  *  Lesser General Public License for more details.
14  *
15  *  You should have received a copy of the GNU Lesser General Public
16  *  License along with this library; if not, write to the Free Software
17  *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18  */
19 
20 package jspecview.source;
21 
22 import java.io.BufferedReader;
23 import java.util.Hashtable;
24 
25 import javajs.util.SB;
26 
27 
28 
29 public class XMLParser {
30 
31   /*
32    * A simple very light-weight XML reader
33    * See AnIMLSource.java and CMLSource.java for implementation.
34    *
35    *  Bob Hanson hansonr@stolaf.edu  8/22/2008
36    *
37    *
38    */
39 
40   private XmlEvent thisEvent = new XmlEvent(TAG_NONE);
41   private DataBuffer buffer;
42 
43   public final static int TAG_NONE = 0;
44   public final static int START_ELEMENT = 1;
45   public final static int END_ELEMENT = 2;
46   public final static int START_END_ELEMENT = 3;
47   public final static int CHARACTERS = 4;
48   public final static int COMMENT = 6;
49   public final static int EOF = 8;
50 
51 
XMLParser(BufferedReader br)52   public XMLParser(BufferedReader br) {
53     buffer = new DataBuffer(br);
54   }
55 
getBufferData()56   public String getBufferData() {
57     return (buffer == null ? null : buffer.data.toString().substring(0, buffer.ptr));
58   }
59 
60   /**
61    * for value without surrounding tag
62    *
63    * @return value
64    * @throws Exception
65    */
thisValue()66   public String thisValue() throws Exception {
67     return buffer.nextEvent().toString().trim();
68   }
69 
70   /**
71    * for <xxxx> value </xxxx>
72    *
73    * @return value
74    * @throws Exception
75    */
qualifiedValue()76   public String qualifiedValue() throws Exception {
77     buffer.nextTag();
78     String value = buffer.nextEvent().toString().trim();
79     buffer.nextTag();
80     return value;
81   }
82 
peek()83   public int peek() throws Exception {
84     thisEvent = buffer.peek();
85     return thisEvent.getEventType();
86   }
87 
hasNext()88   public boolean hasNext() {
89     return buffer.hasNext();
90   }
91 
nextTag()92   public void nextTag() throws Exception {
93     while ((thisEvent = buffer.nextTag()).eventType == XMLParser.COMMENT) {
94     }
95   }
96 
nextEvent()97   public int nextEvent() throws Exception {
98     thisEvent = buffer.nextEvent();
99     return thisEvent.getEventType();
100   }
101 
nextStartTag()102   public void nextStartTag() throws Exception {
103     thisEvent = buffer.nextTag();
104     while (!thisEvent.isStartElement())
105       thisEvent = buffer.nextTag();
106   }
107 
getTagName()108   public String getTagName() {
109     return thisEvent.getTagName();
110   }
111 
getTagType()112   public int getTagType() {
113     return thisEvent.getTagType();
114   }
115 
getEndTag()116   public String getEndTag() {
117     return thisEvent.getTagName();
118   }
119 
nextValue()120   public String nextValue() throws Exception {
121     buffer.nextTag();
122     return buffer.nextEvent().toString().trim();
123   }
124 
getAttributeList()125   public String getAttributeList() {
126     return thisEvent.toString().toLowerCase();
127   }
128 
getAttrValueLC(String key)129   public String getAttrValueLC(String key) {
130     return getAttrValue(key).toLowerCase();
131   }
132 
getAttrValue(String name)133   public String getAttrValue(String name) {
134     String a = thisEvent.getAttributeByName(name);
135     return (a == null ? "" : a);
136   }
137 
getCharacters()138   public String getCharacters() throws Exception {
139     SB sb = new SB();
140     thisEvent = buffer.peek();
141     int eventType = thisEvent.getEventType();
142 
143     while (eventType != CHARACTERS)
144       thisEvent = buffer.nextEvent();
145     while (eventType == CHARACTERS) {
146       thisEvent = buffer.nextEvent();
147       eventType = thisEvent.getEventType();
148       if (eventType == CHARACTERS)
149         sb.append(thisEvent.toString());
150     }
151     return sb.toString();
152   }
153 
154   private class DataBuffer extends DataString {
155 
DataBuffer(BufferedReader br)156     DataBuffer(BufferedReader br) {
157       reader = br;
158     }
159 
hasNext()160     boolean hasNext() {
161       if (ptr == ptEnd)
162         try {
163           readLine();
164         } catch (Exception e) {
165           return false;
166         }
167       return ptr < ptEnd;
168     }
169 
170     @Override
readLine()171     public boolean readLine() throws Exception {
172       String s = reader.readLine();
173       if (s == null) {
174         return false;
175       }
176       data.append(s + "\n");
177       ptEnd = data.length();
178       return true;
179     }
180 
peek()181     XmlEvent peek() throws Exception {
182       if (ptEnd - ptr < 2)
183         try {
184           readLine();
185         } catch (Exception e) {
186           return new XmlEvent(EOF);
187         }
188       int pt0 = ptr;
189       XmlEvent e = new XmlEvent(this);
190       ptr = pt0;
191       return e;
192     }
193 
nextTag()194     XmlEvent nextTag() throws Exception {
195       flush();
196       skipTo('<', false);
197       XmlEvent e = new XmlEvent(this);
198       return e;
199     }
200 
nextEvent()201     XmlEvent nextEvent() throws Exception {
202       flush();
203       // cursor is always left after the last element
204       return new XmlEvent(this);
205     }
206 
207   }
208 
209   private class DataString {
210 
211     SB data;
212     protected BufferedReader reader;
213     int ptr;
214     int ptEnd;
215 
DataString()216     DataString() {
217       this.data = new SB();
218     }
219 
DataString(SB data)220     DataString(SB data) {
221       this.data = data;
222       ptEnd = data.length();
223     }
224 
getNCharactersRemaining()225     int getNCharactersRemaining() {
226       return ptEnd - ptr;
227     }
228 
flush()229     protected void flush() {
230       if (data.length() < 1000 || ptEnd - ptr > 100)
231         return;
232       data = new SB().append(data.substring(ptr));
233       //System.out.println(data);
234       ptr = 0;
235       ptEnd = data.length();
236       //System.out.println("flush " + ptEnd);
237     }
238 
substring(int i, int j)239     String substring(int i, int j) {
240       return data.toString().substring(i, j);
241     }
242 
skipOver(char c, boolean inQuotes)243     int skipOver(char c, boolean inQuotes) throws Exception {
244       if (skipTo(c, inQuotes) > 0 && ptr != ptEnd) {
245         ptr++;
246       }
247       return ptr;
248     }
249 
skipTo(char toWhat, boolean inQuotes)250     int skipTo(char toWhat, boolean inQuotes) throws Exception {
251       if (data == null)
252         return -1;
253       char ch;
254       if (ptr == ptEnd) {
255         if (reader == null)
256           return -1;
257         readLine();
258       }
259       int ptEnd1 = ptEnd - 1;
260       while (ptr < ptEnd && (ch = data.charAt(ptr)) != toWhat) {
261         if (inQuotes && ch == '\\' && ptr < ptEnd1) {
262           // must escape \" by skipping the quote and
263           // must escape \\" by skipping the second \
264           if ((ch = data.charAt(ptr + 1)) == '"' || ch == '\\')
265             ptr++;
266         } else if (ch == '"') {
267           ptr++;
268           if (skipTo('"', true) < 0)
269             return -1;
270         }
271         if (++ptr == ptEnd) {
272           if (reader == null)
273             return -1;
274           readLine();
275         }
276       }
277       return ptr;
278     }
279 
readLine()280     public boolean readLine() throws Exception {
281       return false;
282     }
283   }
284 
285   private class XmlEvent {
286 
287     int eventType = TAG_NONE;
288     private int ptr = 0;
289     private Tag tag;
290     private String data;
291 
292     @Override
toString()293     public String toString() {
294       return (data != null ? data : tag != null ? tag.text : null);
295     }
296 
XmlEvent(int eventType)297     XmlEvent(int eventType) {
298       this.eventType = eventType;
299     }
300 
XmlEvent(DataBuffer b)301     XmlEvent(DataBuffer b) throws Exception {
302       ptr = b.ptr;
303       int n = b.getNCharactersRemaining();
304       eventType = (n == 0 ? EOF : n == 1
305           || b.data.charAt(b.ptr) != '<' ? CHARACTERS
306           : b.data.charAt(b.ptr + 1) != '/' ? START_ELEMENT : END_ELEMENT);
307       if (eventType == EOF)
308         return;
309       if (eventType == CHARACTERS) {
310         b.skipTo('<', false);
311         data = b.data.toString().substring(ptr, b.ptr);
312       } else {
313         b.skipOver('>', false);
314         String s = b.data.toString().substring(ptr, b.ptr);
315         if (s.startsWith("<!--"))
316           eventType = COMMENT;
317         //System.out.println("new tag: " + s);
318         tag = new Tag(s);
319       }
320     }
321 
getEventType()322     public int getEventType() {
323       return eventType;
324     }
325 
isStartElement()326     boolean isStartElement() {
327       return (eventType & START_ELEMENT) != 0;
328     }
329 
getTagName()330     public String getTagName() {
331       return (tag == null ? null : tag.getName());
332     }
333 
getTagType()334     public int getTagType() {
335       return (tag == null ? TAG_NONE : tag.tagType);
336     }
337 
getAttributeByName(String name)338     public String getAttributeByName(String name) {
339       return (tag == null ? null : tag.getAttributeByName(name));
340     }
341 
342 }
343 
344   class Tag {
345     int tagType;
346     String name;
347     String text;
348     private Hashtable<String, String> attributes;
349 
Tag()350     Tag() {
351       //System.out.println("tag");
352     }
353 
Tag(String fulltag)354     Tag(String fulltag) {
355       text = fulltag;
356       tagType = (fulltag.startsWith("<!--") ? COMMENT
357           : fulltag.charAt(1) == '/' ? END_ELEMENT : fulltag
358           .charAt(fulltag.length() - 2) == '/' ? START_END_ELEMENT
359           : START_ELEMENT);
360     }
361 
getName()362     String getName() {
363       if (name != null)
364         return name;
365       int ptTemp = (tagType == END_ELEMENT ? 2 : 1);
366       int n = text.length() - (tagType == START_END_ELEMENT ? 2 : 1);
367       while (ptTemp < n && Character.isWhitespace(text.charAt(ptTemp)))
368         ptTemp++;
369       int pt0 = ptTemp;
370       while (ptTemp < n && !Character.isWhitespace(text.charAt(ptTemp)))
371         ptTemp++;
372       return name = text.substring(pt0, ptTemp).toLowerCase().trim();
373     }
374 
getAttributeByName(String attrName)375     String getAttributeByName(String attrName) {
376       if (attributes == null)
377         getAttributes();
378       return attributes.get(attrName.toLowerCase());
379     }
380 
getAttributes()381     private void getAttributes() {
382       attributes = new Hashtable<String, String>();
383       DataString d = new DataString(
384           new SB().append(text));
385       try {
386         if (d.skipTo(' ', false) < 0)
387           return;
388         int pt0;
389         while ((pt0 = ++d.ptr) >= 0) {
390           if (d.skipTo('=', false) < 0)
391             return;
392           String name = d.substring(pt0, d.ptr).trim().toLowerCase();
393           d.skipTo('"', false);
394           pt0 = ++d.ptr;
395           d.skipTo('"', true);
396           String attr = d.substring(pt0, d.ptr);
397           attributes.put(name, attr);
398           int pt1 = name.indexOf(":");
399           if (pt1 >= 0) {
400             name = name.substring(pt1).trim();
401             attributes.put(name, attr);
402           }
403 
404         }
405       } catch (Exception e) {
406         // not relavent
407       }
408     }
409 
410   }
411 
requiresEndTag()412   public boolean requiresEndTag() {
413     int tagType = thisEvent.getTagType();
414     return  tagType != START_END_ELEMENT && tagType != COMMENT;
415   }
416 }
417