1 /* WellFormednessFilter.java -- 2 Copyright (C) 1999,2000,2001 Free Software Foundation, Inc. 3 4 This file is part of GNU Classpath. 5 6 GNU Classpath is free software; you can redistribute it and/or modify 7 it under the terms of the GNU General Public License as published by 8 the Free Software Foundation; either version 2, or (at your option) 9 any later version. 10 11 GNU Classpath is distributed in the hope that it will be useful, but 12 WITHOUT ANY WARRANTY; without even the implied warranty of 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 General Public License for more details. 15 16 You should have received a copy of the GNU General Public License 17 along with GNU Classpath; see the file COPYING. If not, write to the 18 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 19 02110-1301 USA. 20 21 Linking this library statically or dynamically with other modules is 22 making a combined work based on this library. Thus, the terms and 23 conditions of the GNU General Public License cover the whole 24 combination. 25 26 As a special exception, the copyright holders of this library give you 27 permission to link this library with independent modules to produce an 28 executable, regardless of the license terms of these independent 29 modules, and to copy and distribute the resulting executable under 30 terms of your choice, provided that you also meet, for each linked 31 independent module, the terms and conditions of the license of that 32 module. An independent module is a module which is not derived from 33 or based on this library. If you modify this library, you may extend 34 this exception to your version of the library, but you are not 35 obligated to do so. If you do not wish to do so, delete this 36 exception statement from your version. */ 37 38 package gnu.xml.pipeline; 39 40 import java.util.EmptyStackException; 41 import java.util.Stack; 42 43 import org.xml.sax.Attributes; 44 import org.xml.sax.ErrorHandler; 45 import org.xml.sax.Locator; 46 import org.xml.sax.SAXException; 47 import org.xml.sax.SAXParseException; 48 49 /** 50 * This filter reports fatal exceptions in the case of event streams that 51 * are not well formed. The rules currently tested include: <ul> 52 * 53 * <li>setDocumentLocator ... may be called only before startDocument 54 * 55 * <li>startDocument/endDocument ... must be paired, and all other 56 * calls (except setDocumentLocator) must be nested within these. 57 * 58 * <li>startElement/endElement ... must be correctly paired, and 59 * may never appear within CDATA sections. 60 * 61 * <li>comment ... can't contain "--" 62 * 63 * <li>character data ... can't contain "]]>" 64 * 65 * <li>whitespace ... can't contain CR 66 * 67 * <li>whitespace and character data must be within an element 68 * 69 * <li>processing instruction ... can't contain "?>" or CR 70 * 71 * <li>startCDATA/endCDATA ... must be correctly paired. 72 * 73 * </ul> 74 * 75 * <p> Other checks for event stream correctness may be provided in 76 * the future. For example, insisting that 77 * entity boundaries nest correctly, 78 * namespace scopes nest correctly, 79 * namespace values never contain relative URIs, 80 * attributes don't have "<" characters; 81 * and more. 82 * 83 * @author David Brownell 84 */ 85 public final class WellFormednessFilter extends EventFilter 86 { 87 private boolean startedDoc; 88 private Stack elementStack = new Stack (); 89 private boolean startedCDATA; 90 private String dtdState = "before"; 91 92 93 /** 94 * Swallows all events after performing well formedness checks. 95 */ 96 // constructor used by PipelineFactory WellFormednessFilter()97 public WellFormednessFilter () 98 { this (null); } 99 100 101 /** 102 * Passes events through to the specified consumer, after first 103 * processing them. 104 */ 105 // constructor used by PipelineFactory WellFormednessFilter(EventConsumer consumer)106 public WellFormednessFilter (EventConsumer consumer) 107 { 108 super (consumer); 109 110 setContentHandler (this); 111 setDTDHandler (this); 112 113 try { 114 setProperty (LEXICAL_HANDLER, this); 115 } catch (SAXException e) { /* can't happen */ } 116 } 117 118 /** 119 * Resets state as if any preceding event stream was well formed. 120 * Particularly useful if it ended through some sort of error, 121 * and the endDocument call wasn't made. 122 */ reset()123 public void reset () 124 { 125 startedDoc = false; 126 startedCDATA = false; 127 elementStack.removeAllElements (); 128 } 129 130 getException(String message)131 private SAXParseException getException (String message) 132 { 133 SAXParseException e; 134 Locator locator = getDocumentLocator (); 135 136 if (locator == null) 137 return new SAXParseException (message, null, null, -1, -1); 138 else 139 return new SAXParseException (message, locator); 140 } 141 fatalError(String message)142 private void fatalError (String message) 143 throws SAXException 144 { 145 SAXParseException e = getException (message); 146 ErrorHandler handler = getErrorHandler (); 147 148 if (handler != null) 149 handler.fatalError (e); 150 throw e; 151 } 152 153 /** 154 * Throws an exception when called after startDocument. 155 * 156 * @param locator the locator, to be used in error reporting or relative 157 * URI resolution. 158 * 159 * @exception IllegalStateException when called after the document 160 * has already been started 161 */ setDocumentLocator(Locator locator)162 public void setDocumentLocator (Locator locator) 163 { 164 if (startedDoc) 165 throw new IllegalStateException ( 166 "setDocumentLocator called after startDocument"); 167 super.setDocumentLocator (locator); 168 } 169 startDocument()170 public void startDocument () throws SAXException 171 { 172 if (startedDoc) 173 fatalError ("startDocument called more than once"); 174 startedDoc = true; 175 startedCDATA = false; 176 elementStack.removeAllElements (); 177 super.startDocument (); 178 } 179 startElement( String uri, String localName, String qName, Attributes atts )180 public void startElement ( 181 String uri, String localName, 182 String qName, Attributes atts 183 ) throws SAXException 184 { 185 if (!startedDoc) 186 fatalError ("callback outside of document?"); 187 if ("inside".equals (dtdState)) 188 fatalError ("element inside DTD?"); 189 else 190 dtdState = "after"; 191 if (startedCDATA) 192 fatalError ("element inside CDATA section"); 193 if (qName == null || "".equals (qName)) 194 fatalError ("startElement name missing"); 195 elementStack.push (qName); 196 super.startElement (uri, localName, qName, atts); 197 } 198 endElement(String uri, String localName, String qName)199 public void endElement (String uri, String localName, String qName) 200 throws SAXException 201 { 202 if (!startedDoc) 203 fatalError ("callback outside of document?"); 204 if (startedCDATA) 205 fatalError ("element inside CDATA section"); 206 if (qName == null || "".equals (qName)) 207 fatalError ("endElement name missing"); 208 209 try { 210 String top = (String) elementStack.pop (); 211 212 if (!qName.equals (top)) 213 fatalError ("<" + top + " ...>...</" + qName + ">"); 214 // XXX could record/test namespace info 215 } catch (EmptyStackException e) { 216 fatalError ("endElement without startElement: </" + qName + ">"); 217 } 218 super.endElement (uri, localName, qName); 219 } 220 endDocument()221 public void endDocument () throws SAXException 222 { 223 if (!startedDoc) 224 fatalError ("callback outside of document?"); 225 dtdState = "before"; 226 startedDoc = false; 227 super.endDocument (); 228 } 229 230 startDTD(String root, String publicId, String systemId)231 public void startDTD (String root, String publicId, String systemId) 232 throws SAXException 233 { 234 if (!startedDoc) 235 fatalError ("callback outside of document?"); 236 if ("before" != dtdState) 237 fatalError ("two DTDs?"); 238 if (!elementStack.empty ()) 239 fatalError ("DTD must precede root element"); 240 dtdState = "inside"; 241 super.startDTD (root, publicId, systemId); 242 } 243 notationDecl(String name, String publicId, String systemId)244 public void notationDecl (String name, String publicId, String systemId) 245 throws SAXException 246 { 247 // FIXME: not all parsers will report startDTD() ... 248 // we'd rather insist we're "inside". 249 if ("after" == dtdState) 250 fatalError ("not inside DTD"); 251 super.notationDecl (name, publicId, systemId); 252 } 253 unparsedEntityDecl(String name, String publicId, String systemId, String notationName)254 public void unparsedEntityDecl (String name, 255 String publicId, String systemId, String notationName) 256 throws SAXException 257 { 258 // FIXME: not all parsers will report startDTD() ... 259 // we'd rather insist we're "inside". 260 if ("after" == dtdState) 261 fatalError ("not inside DTD"); 262 super.unparsedEntityDecl (name, publicId, systemId, notationName); 263 } 264 265 // FIXME: add the four DeclHandler calls too 266 endDTD()267 public void endDTD () 268 throws SAXException 269 { 270 if (!startedDoc) 271 fatalError ("callback outside of document?"); 272 if ("inside" != dtdState) 273 fatalError ("DTD ends without start?"); 274 dtdState = "after"; 275 super.endDTD (); 276 } 277 characters(char ch [], int start, int length)278 public void characters (char ch [], int start, int length) 279 throws SAXException 280 { 281 int here = start, end = start + length; 282 if (elementStack.empty ()) 283 fatalError ("characters must be in an element"); 284 while (here < end) { 285 if (ch [here++] != ']') 286 continue; 287 if (here == end) // potential problem ... 288 continue; 289 if (ch [here++] != ']') 290 continue; 291 if (here == end) // potential problem ... 292 continue; 293 if (ch [here++] == '>') 294 fatalError ("character data can't contain \"]]>\""); 295 } 296 super.characters (ch, start, length); 297 } 298 ignorableWhitespace(char ch [], int start, int length)299 public void ignorableWhitespace (char ch [], int start, int length) 300 throws SAXException 301 { 302 int here = start, end = start + length; 303 if (elementStack.empty ()) 304 fatalError ("characters must be in an element"); 305 while (here < end) { 306 if (ch [here++] == '\r') 307 fatalError ("whitespace can't contain CR"); 308 } 309 super.ignorableWhitespace (ch, start, length); 310 } 311 processingInstruction(String target, String data)312 public void processingInstruction (String target, String data) 313 throws SAXException 314 { 315 if (data.indexOf ('\r') > 0) 316 fatalError ("PIs can't contain CR"); 317 if (data.indexOf ("?>") > 0) 318 fatalError ("PIs can't contain \"?>\""); 319 } 320 comment(char ch [], int start, int length)321 public void comment (char ch [], int start, int length) 322 throws SAXException 323 { 324 if (!startedDoc) 325 fatalError ("callback outside of document?"); 326 if (startedCDATA) 327 fatalError ("comments can't nest in CDATA"); 328 int here = start, end = start + length; 329 while (here < end) { 330 if (ch [here] == '\r') 331 fatalError ("comments can't contain CR"); 332 if (ch [here++] != '-') 333 continue; 334 if (here == end) 335 fatalError ("comments can't end with \"--->\""); 336 if (ch [here++] == '-') 337 fatalError ("comments can't contain \"--\""); 338 } 339 super.comment (ch, start, length); 340 } 341 startCDATA()342 public void startCDATA () 343 throws SAXException 344 { 345 if (!startedDoc) 346 fatalError ("callback outside of document?"); 347 if (startedCDATA) 348 fatalError ("CDATA starts can't nest"); 349 startedCDATA = true; 350 super.startCDATA (); 351 } 352 endCDATA()353 public void endCDATA () 354 throws SAXException 355 { 356 if (!startedDoc) 357 fatalError ("callback outside of document?"); 358 if (!startedCDATA) 359 fatalError ("CDATA end without start?"); 360 startedCDATA = false; 361 super.endCDATA (); 362 } 363 } 364