1 /* WellFormednessFilter.java --
2    Copyright (C) 1999,2000,2001 Free Software Foundation, Inc.
3 
4 This file is part of GNU Classpath.
5 
6 GNU Classpath is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
10 
11 GNU Classpath is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 General Public License for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GNU Classpath; see the file COPYING.  If not, write to the
18 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 02110-1301 USA.
20 
21 Linking this library statically or dynamically with other modules is
22 making a combined work based on this library.  Thus, the terms and
23 conditions of the GNU General Public License cover the whole
24 combination.
25 
26 As a special exception, the copyright holders of this library give you
27 permission to link this library with independent modules to produce an
28 executable, regardless of the license terms of these independent
29 modules, and to copy and distribute the resulting executable under
30 terms of your choice, provided that you also meet, for each linked
31 independent module, the terms and conditions of the license of that
32 module.  An independent module is a module which is not derived from
33 or based on this library.  If you modify this library, you may extend
34 this exception to your version of the library, but you are not
35 obligated to do so.  If you do not wish to do so, delete this
36 exception statement from your version. */
37 
38 package gnu.xml.pipeline;
39 
40 import java.util.EmptyStackException;
41 import java.util.Stack;
42 
43 import org.xml.sax.Attributes;
44 import org.xml.sax.ErrorHandler;
45 import org.xml.sax.Locator;
46 import org.xml.sax.SAXException;
47 import org.xml.sax.SAXParseException;
48 
49 /**
50  * This filter reports fatal exceptions in the case of event streams that
51  * are not well formed.  The rules currently tested include: <ul>
52  *
53  *      <li>setDocumentLocator ... may be called only before startDocument
54  *
55  *      <li>startDocument/endDocument ... must be paired, and all other
56  *      calls (except setDocumentLocator) must be nested within these.
57  *
58  *      <li>startElement/endElement ... must be correctly paired, and
59  *      may never appear within CDATA sections.
60  *
61  *      <li>comment ... can't contain "--"
62  *
63  *      <li>character data ... can't contain "]]&gt;"
64  *
65  *      <li>whitespace ... can't contain CR
66  *
67  *      <li>whitespace and character data must be within an element
68  *
69  *      <li>processing instruction ... can't contain "?&gt;" or CR
70  *
71  *      <li>startCDATA/endCDATA ... must be correctly paired.
72  *
73  *      </ul>
74  *
75  * <p> Other checks for event stream correctness may be provided in
76  * the future.  For example, insisting that
77  * entity boundaries nest correctly,
78  * namespace scopes nest correctly,
79  * namespace values never contain relative URIs,
80  * attributes don't have "&lt;" characters;
81  * and more.
82  *
83  * @author David Brownell
84  */
85 public final class WellFormednessFilter extends EventFilter
86 {
87     private boolean             startedDoc;
88     private Stack               elementStack = new Stack ();
89     private boolean             startedCDATA;
90     private String              dtdState = "before";
91 
92 
93     /**
94      * Swallows all events after performing well formedness checks.
95      */
96         // constructor used by PipelineFactory
WellFormednessFilter()97     public WellFormednessFilter ()
98         { this (null); }
99 
100 
101     /**
102      * Passes events through to the specified consumer, after first
103      * processing them.
104      */
105         // constructor used by PipelineFactory
WellFormednessFilter(EventConsumer consumer)106     public WellFormednessFilter (EventConsumer consumer)
107     {
108         super (consumer);
109 
110         setContentHandler (this);
111         setDTDHandler (this);
112 
113         try {
114             setProperty (LEXICAL_HANDLER, this);
115         } catch (SAXException e) { /* can't happen */ }
116     }
117 
118     /**
119      * Resets state as if any preceding event stream was well formed.
120      * Particularly useful if it ended through some sort of error,
121      * and the endDocument call wasn't made.
122      */
reset()123     public void reset ()
124     {
125         startedDoc = false;
126         startedCDATA = false;
127         elementStack.removeAllElements ();
128     }
129 
130 
getException(String message)131     private SAXParseException getException (String message)
132     {
133         SAXParseException       e;
134         Locator                 locator = getDocumentLocator ();
135 
136         if (locator == null)
137             return new SAXParseException (message, null, null, -1, -1);
138         else
139             return new SAXParseException (message, locator);
140     }
141 
fatalError(String message)142     private void fatalError (String message)
143     throws SAXException
144     {
145         SAXParseException       e = getException (message);
146         ErrorHandler            handler = getErrorHandler ();
147 
148         if (handler != null)
149             handler.fatalError (e);
150         throw e;
151     }
152 
153     /**
154      * Throws an exception when called after startDocument.
155      *
156      * @param locator the locator, to be used in error reporting or relative
157      *  URI resolution.
158      *
159      * @exception IllegalStateException when called after the document
160      *  has already been started
161      */
setDocumentLocator(Locator locator)162     public void setDocumentLocator (Locator locator)
163     {
164         if (startedDoc)
165             throw new IllegalStateException (
166                     "setDocumentLocator called after startDocument");
167         super.setDocumentLocator (locator);
168     }
169 
startDocument()170     public void startDocument () throws SAXException
171     {
172         if (startedDoc)
173             fatalError ("startDocument called more than once");
174         startedDoc = true;
175         startedCDATA = false;
176         elementStack.removeAllElements ();
177         super.startDocument ();
178     }
179 
startElement( String uri, String localName, String qName, Attributes atts )180     public void startElement (
181         String uri, String localName,
182         String qName, Attributes atts
183     ) throws SAXException
184     {
185         if (!startedDoc)
186             fatalError ("callback outside of document?");
187         if ("inside".equals (dtdState))
188             fatalError ("element inside DTD?");
189         else
190             dtdState = "after";
191         if (startedCDATA)
192             fatalError ("element inside CDATA section");
193         if (qName == null || "".equals (qName))
194             fatalError ("startElement name missing");
195         elementStack.push (qName);
196         super.startElement (uri, localName, qName, atts);
197     }
198 
endElement(String uri, String localName, String qName)199     public void endElement (String uri, String localName, String qName)
200     throws SAXException
201     {
202         if (!startedDoc)
203             fatalError ("callback outside of document?");
204         if (startedCDATA)
205             fatalError ("element inside CDATA section");
206         if (qName == null || "".equals (qName))
207             fatalError ("endElement name missing");
208 
209         try {
210             String      top = (String) elementStack.pop ();
211 
212             if (!qName.equals (top))
213                 fatalError ("<" + top + " ...>...</" + qName + ">");
214             // XXX could record/test namespace info
215         } catch (EmptyStackException e) {
216             fatalError ("endElement without startElement:  </" + qName + ">");
217         }
218         super.endElement (uri, localName, qName);
219     }
220 
endDocument()221     public void endDocument () throws SAXException
222     {
223         if (!startedDoc)
224             fatalError ("callback outside of document?");
225         dtdState = "before";
226         startedDoc = false;
227         super.endDocument ();
228     }
229 
230 
startDTD(String root, String publicId, String systemId)231     public void startDTD (String root, String publicId, String systemId)
232     throws SAXException
233     {
234         if (!startedDoc)
235             fatalError ("callback outside of document?");
236     if ("before" != dtdState)
237             fatalError ("two DTDs?");
238         if (!elementStack.empty ())
239             fatalError ("DTD must precede root element");
240         dtdState = "inside";
241         super.startDTD (root, publicId, systemId);
242     }
243 
notationDecl(String name, String publicId, String systemId)244     public void notationDecl (String name, String publicId, String systemId)
245     throws SAXException
246     {
247 // FIXME: not all parsers will report startDTD() ...
248 // we'd rather insist we're "inside".
249     if ("after" == dtdState)
250             fatalError ("not inside DTD");
251         super.notationDecl (name, publicId, systemId);
252     }
253 
unparsedEntityDecl(String name, String publicId, String systemId, String notationName)254     public void unparsedEntityDecl (String name,
255         String publicId, String systemId, String notationName)
256     throws SAXException
257     {
258 // FIXME: not all parsers will report startDTD() ...
259 // we'd rather insist we're "inside".
260     if ("after" == dtdState)
261             fatalError ("not inside DTD");
262         super.unparsedEntityDecl (name, publicId, systemId, notationName);
263     }
264 
265     // FIXME:  add the four DeclHandler calls too
266 
endDTD()267     public void endDTD ()
268     throws SAXException
269     {
270         if (!startedDoc)
271             fatalError ("callback outside of document?");
272         if ("inside" != dtdState)
273             fatalError ("DTD ends without start?");
274         dtdState = "after";
275         super.endDTD ();
276     }
277 
characters(char ch [], int start, int length)278     public void characters (char ch [], int start, int length)
279     throws SAXException
280     {
281         int here = start, end = start + length;
282         if (elementStack.empty ())
283             fatalError ("characters must be in an element");
284         while (here < end) {
285             if (ch [here++] != ']')
286                 continue;
287             if (here == end)    // potential problem ...
288                 continue;
289             if (ch [here++] != ']')
290                 continue;
291             if (here == end)    // potential problem ...
292                 continue;
293             if (ch [here++] == '>')
294                 fatalError ("character data can't contain \"]]>\"");
295         }
296         super.characters (ch, start, length);
297     }
298 
ignorableWhitespace(char ch [], int start, int length)299     public void ignorableWhitespace (char ch [], int start, int length)
300     throws SAXException
301     {
302         int here = start, end = start + length;
303         if (elementStack.empty ())
304             fatalError ("characters must be in an element");
305         while (here < end) {
306             if (ch [here++] == '\r')
307                 fatalError ("whitespace can't contain CR");
308         }
309         super.ignorableWhitespace (ch, start, length);
310     }
311 
processingInstruction(String target, String data)312     public void processingInstruction (String target, String data)
313     throws SAXException
314     {
315         if (data.indexOf ('\r') > 0)
316             fatalError ("PIs can't contain CR");
317         if (data.indexOf ("?>") > 0)
318             fatalError ("PIs can't contain \"?>\"");
319     }
320 
comment(char ch [], int start, int length)321     public void comment (char ch [], int start, int length)
322     throws SAXException
323     {
324         if (!startedDoc)
325             fatalError ("callback outside of document?");
326         if (startedCDATA)
327             fatalError ("comments can't nest in CDATA");
328         int here = start, end = start + length;
329         while (here < end) {
330             if (ch [here] == '\r')
331                 fatalError ("comments can't contain CR");
332             if (ch [here++] != '-')
333                 continue;
334             if (here == end)
335                 fatalError ("comments can't end with \"--->\"");
336             if (ch [here++] == '-')
337                 fatalError ("comments can't contain \"--\"");
338         }
339         super.comment (ch, start, length);
340     }
341 
startCDATA()342     public void startCDATA ()
343     throws SAXException
344     {
345         if (!startedDoc)
346             fatalError ("callback outside of document?");
347         if (startedCDATA)
348             fatalError ("CDATA starts can't nest");
349         startedCDATA = true;
350         super.startCDATA ();
351     }
352 
endCDATA()353     public void endCDATA ()
354     throws SAXException
355     {
356         if (!startedDoc)
357             fatalError ("callback outside of document?");
358         if (!startedCDATA)
359             fatalError ("CDATA end without start?");
360         startedCDATA = false;
361         super.endCDATA ();
362     }
363 }
364