1 /* tagStack.java -- The HTML tag stack. 2 Copyright (C) 2005 Free Software Foundation, Inc. 3 4 This file is part of GNU Classpath. 5 6 GNU Classpath is free software; you can redistribute it and/or modify 7 it under the terms of the GNU General Public License as published by 8 the Free Software Foundation; either version 2, or (at your option) 9 any later version. 10 11 GNU Classpath is distributed in the hope that it will be useful, but 12 WITHOUT ANY WARRANTY; without even the implied warranty of 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 General Public License for more details. 15 16 You should have received a copy of the GNU General Public License 17 along with GNU Classpath; see the file COPYING. If not, write to the 18 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 19 02110-1301 USA. 20 21 Linking this library statically or dynamically with other modules is 22 making a combined work based on this library. Thus, the terms and 23 conditions of the GNU General Public License cover the whole 24 combination. 25 26 As a special exception, the copyright holders of this library give you 27 permission to link this library with independent modules to produce an 28 executable, regardless of the license terms of these independent 29 modules, and to copy and distribute the resulting executable under 30 terms of your choice, provided that you also meet, for each linked 31 independent module, the terms and conditions of the license of that 32 module. An independent module is a module which is not derived from 33 or based on this library. If you modify this library, you may extend 34 this exception to your version of the library, but you are not 35 obligated to do so. If you do not wish to do so, delete this 36 exception statement from your version. */ 37 38 39 package gnu.javax.swing.text.html.parser; 40 41 import gnu.javax.swing.text.html.parser.models.node; 42 import gnu.javax.swing.text.html.parser.models.transformer; 43 44 import java.util.BitSet; 45 import java.util.Enumeration; 46 import java.util.LinkedList; 47 import java.util.ListIterator; 48 49 import javax.swing.text.SimpleAttributeSet; 50 import javax.swing.text.html.HTML; 51 import javax.swing.text.html.parser.*; 52 53 /** 54 * <p>The HTML content validator, is responsible for opening and 55 * closing elements with optional start/end tags, detecting 56 * the wrongly placed html tags and reporting errors. The working instance 57 * is the inner class inside the {@link javax.swing.text.html.parser.Parser } 58 * </p> 59 * <p>This class could potentially 60 * provide basis for automated closing and insertion of the html tags, 61 * correcting the found html errors. 62 * @author Audrius Meskauskas, Lithuania (AudriusA@Bioinformatics.org) 63 */ 64 public abstract class htmlValidator 65 { 66 /** 67 * The tag reference, holding additional information that the tag 68 * has been forcibly closed. 69 */ 70 protected class hTag 71 { 72 protected final Element element; 73 protected final HTML.Tag tag; 74 protected final TagElement tgElement; 75 protected boolean forcibly_closed; 76 protected node validationTrace; 77 hTag(TagElement an_element)78 protected hTag(TagElement an_element) 79 { 80 element = an_element.getElement(); 81 tag = an_element.getHTMLTag(); 82 tgElement = an_element; 83 84 if (element.content != null) 85 validationTrace = transformer.transform(element.content, dtd); 86 } 87 88 /** 89 * This is called when the tag must be forcibly closed because 90 * it would make the newly appearing tag invalid. 91 * The parser is not notified about such event (just the error 92 * is reported). For such tags, the closing message does not 93 * appear when later reaching the end of stream. The exception is 94 * the <head> tag: the parser is notified about its silent closing 95 * when <body> or other html content appears. 96 */ forciblyCloseDueContext()97 protected void forciblyCloseDueContext() 98 { 99 forcibly_closed = true; 100 } 101 102 /** 103 * This is called when the tag must be forcibly closed after 104 * reaching the end of stream. The parser is notified as if 105 * closing the tag explicitly. 106 */ forciblyCloseDueEndOfStream()107 protected void forciblyCloseDueEndOfStream() 108 { 109 forcibly_closed = true; 110 handleSupposedEndTag(element); 111 } 112 } 113 114 /** 115 * The DTD, providing information about the valid document structure. 116 */ 117 protected final DTD dtd; 118 119 /** 120 * The stack, holding the current tag context. 121 */ 122 protected final LinkedList stack = new LinkedList(); 123 124 /** 125 * Creates a new tag stack, using the given DTD. 126 * @param a_dtd A DTD, providing the information about the valid 127 * tag content. 128 */ htmlValidator(DTD a_dtd)129 public htmlValidator(DTD a_dtd) 130 { 131 dtd = a_dtd; 132 } 133 134 /** 135 * Close all opened tags (called at the end of parsing). 136 */ closeAll()137 public void closeAll() 138 { 139 hTag h; 140 while (!stack.isEmpty()) 141 { 142 h = (hTag) stack.getLast(); 143 if (!h.forcibly_closed && !h.element.omitEnd()) 144 s_error("Unclosed <" + h.tag + ">, closing at the end of stream"); 145 146 handleSupposedEndTag(h.element); 147 148 closeTag(h.tgElement); 149 } 150 } 151 152 /** 153 * Remove the given tag from the stack or (if found) from the list 154 * of the forcibly closed tags. 155 */ closeTag(TagElement tElement)156 public void closeTag(TagElement tElement) 157 { 158 HTML.Tag tag = tElement.getHTMLTag(); 159 hTag x; 160 hTag close; 161 162 if (!stack.isEmpty()) 163 { 164 ListIterator iter = stack.listIterator(stack.size()); 165 166 while (iter.hasPrevious()) 167 { 168 x = (hTag) iter.previous(); 169 if (tag.equals(x.tag)) 170 { 171 if (x.forcibly_closed && !x.element.omitEnd()) 172 s_error("The tag <" + x.tag + 173 "> has already been forcibly closed" 174 ); 175 176 177 // If the tag has a content model defined, forcibly close all 178 // tags that were opened after the tag being currently closed. 179 closing: 180 if (x.element.content != null) 181 { 182 iter = stack.listIterator(stack.size()); 183 while (iter.hasPrevious()) 184 { 185 close = (hTag) iter.previous(); 186 if (close == x) 187 break closing; 188 handleSupposedEndTag(close.element); 189 iter.remove(); 190 } 191 } 192 193 stack.remove(x); 194 return; 195 } 196 } 197 } 198 s_error("Closing unopened <" + tag + ">"); 199 } 200 201 /** 202 * Add the given HTML tag to the stack of the opened tags. Forcibly closes 203 * all tags in the stack that does not allow this tag in they content (error 204 * is reported). 205 * @param element 206 */ openTag(TagElement tElement, htmlAttributeSet parameters)207 public void openTag(TagElement tElement, htmlAttributeSet parameters) 208 { 209 // If this is a fictional call, the message from the parser 210 // has recursively returned - ignore. 211 if (tElement.fictional()) 212 return; 213 214 validateParameters(tElement, parameters); 215 216 // If the stack is empty, start from HTML 217 if (stack.isEmpty() && tElement.getHTMLTag() != HTML.Tag.HTML) 218 { 219 Element html = dtd.getElement(HTML.Tag.HTML.toString()); 220 openFictionalTag(html); 221 } 222 223 Object v = tagIsValidForContext(tElement); 224 if (v != Boolean.TRUE) 225 { 226 // The tag is not valid for context, the content 227 // model suggest to open another tag. 228 if (v instanceof Element) 229 { 230 int n = 0; 231 while (v instanceof Element && (n++ < 100)) 232 { 233 Element fe = (Element) v; 234 235 // notify the content model that we add the proposed tag 236 getCurrentContentModel().show(fe); 237 openFictionalTag(fe); 238 239 Object vv = tagIsValidForContext(tElement); 240 if (vv instanceof Element) // One level of nesting is supported. 241 { 242 openFictionalTag((Element) vv); 243 244 Object vx = tagIsValidForContext(tElement); 245 if (vx instanceof Element) 246 openFictionalTag((Element) vx); 247 } 248 else if (vv == Boolean.FALSE) 249 { 250 // The tag is still not valid for the current 251 // content after opening a fictional element. 252 if (fe.omitEnd()) 253 { 254 // close the previously opened fictional tag. 255 closeLast(); 256 vv = tagIsValidForContext(tElement); 257 if (vv instanceof Element) 258 259 // another tag was suggested by the content model 260 openFictionalTag((Element) vv); 261 } 262 } 263 v = tagIsValidForContext(tElement); 264 } 265 } 266 else // If the current element has the optional end tag, close it. 267 { 268 if (!stack.isEmpty()) 269 { 270 closing: 271 do 272 { 273 hTag last = (hTag) stack.getLast(); 274 if (last.element.omitEnd()) 275 { 276 closeLast(); 277 v = tagIsValidForContext(tElement); 278 if (v instanceof Element) // another tag was suggested by the content model 279 { 280 openFictionalTag((Element) v); 281 break closing; 282 } 283 } 284 else 285 break closing; 286 } 287 while (v == Boolean.FALSE && !stack.isEmpty()); 288 } 289 } 290 } 291 292 stack.add(new hTag(tElement)); 293 } 294 295 /** 296 * Clear the stack. 297 */ restart()298 public void restart() 299 { 300 stack.clear(); 301 } 302 303 /** 304 * Check if this tag is valid for the current context. 305 * Return Boolean.True if it is OK, Boolean.False 306 * if it is surely not OK or the Element that the 307 * content model recommends to insert making the situation 308 * ok. If Boolean.True is returned, the content model current 309 * position is moved forward. Otherwise this position remains 310 * the same. 311 * @param tElement 312 * @return 313 */ tagIsValidForContext(TagElement tElement)314 public Object tagIsValidForContext(TagElement tElement) 315 { 316 // Check the current content model, if one is available. 317 node cv = getCurrentContentModel(); 318 319 if (cv != null) 320 return cv.show(tElement.getElement()); 321 322 // Check exclusions and inclusions. 323 ListIterator iter = stack.listIterator(stack.size()); 324 hTag t; 325 final int idx = tElement.getElement().index; 326 327 // Check only known tags. 328 if (idx >= 0) 329 { 330 BitSet inclusions = new BitSet(); 331 while (iter.hasPrevious()) 332 { 333 t = (hTag) iter.previous(); 334 if (!t.forcibly_closed) 335 { 336 if (t.element.exclusions != null && 337 t.element.exclusions.get(idx) 338 ) 339 return Boolean.FALSE; 340 341 if (t.element.inclusions != null) 342 inclusions.or(t.element.inclusions); 343 } 344 } 345 if (!inclusions.get(idx)) 346 return Boolean.FALSE; 347 } 348 return Boolean.TRUE; 349 } 350 351 /** 352 * Validate tag without storing in into the tag stack. This is called 353 * for the empty tags and results the subsequent calls to the openTag 354 * and closeTag. 355 */ validateTag(TagElement tElement, htmlAttributeSet parameters)356 public void validateTag(TagElement tElement, htmlAttributeSet parameters) 357 { 358 openTag(tElement, parameters); 359 closeTag(tElement); 360 } 361 362 /** 363 * Check for mandatory elements, subsequent to the last tag: 364 * @param tElement The element that will be inserted next. 365 */ checkContentModel(TagElement tElement, boolean first)366 protected void checkContentModel(TagElement tElement, boolean first) 367 { 368 if (stack.isEmpty()) 369 return; 370 371 hTag last = (hTag) stack.getLast(); 372 if (last.validationTrace == null) 373 return; 374 375 Object r = last.validationTrace.show(tElement.getElement()); 376 if (r == Boolean.FALSE) 377 s_error("The <" + last.element + "> does not match the content model " + 378 last.validationTrace 379 ); 380 else if (r instanceof Element) // The content model recommends insertion of this element 381 { 382 if (!first) 383 closeTag(last.tgElement); 384 handleSupposedStartTag((Element) r); 385 openTag(new TagElement((Element) r), null); 386 } 387 } 388 389 /** 390 * The method is called when the tag must be closed because 391 * it does not allow the subsequent elements inside its context 392 * or the end of stream has been reached. The parser is only 393 * informed if the element being closed does not require the 394 * end tag (the "omitEnd" flag is set). 395 * The closing message must be passed to the parser mechanism 396 * before passing message about the opening the next tag. 397 * 398 * @param element The tag being fictionally (forcibly) closed. 399 */ handleSupposedEndTag(Element element)400 protected abstract void handleSupposedEndTag(Element element); 401 402 /** 403 * The method is called when the validator decides to open the 404 * tag on its own initiative. This may happen if the content model 405 * includes the element with the optional (supposed) start tag. 406 * 407 * @param element The tag being opened. 408 */ handleSupposedStartTag(Element element)409 protected abstract void handleSupposedStartTag(Element element); 410 411 /** 412 * Handles the error message. This method must be overridden to pass 413 * the message where required. 414 * @param msg The message text. 415 */ s_error(String msg)416 protected abstract void s_error(String msg); 417 418 /** 419 * Validate the parameters, report the error if the given parameter is 420 * not in the parameter set, valid for the given attribute. The information 421 * about the valid parameter set is taken from the Element, enclosed 422 * inside the tag. The method does not validate the default parameters. 423 * @param tag The tag 424 * @param parameters The parameters of this tag. 425 */ validateParameters(TagElement tag, htmlAttributeSet parameters)426 protected void validateParameters(TagElement tag, htmlAttributeSet parameters) 427 { 428 if (parameters == null || 429 parameters == htmlAttributeSet.EMPTY_HTML_ATTRIBUTE_SET || 430 parameters == SimpleAttributeSet.EMPTY 431 ) 432 return; 433 434 Enumeration enumeration = parameters.getAttributeNames(); 435 436 while (enumeration.hasMoreElements()) 437 { 438 validateAttribute(tag, parameters, enumeration); 439 } 440 441 // Check for missing required values. 442 AttributeList a = tag.getElement().getAttributes(); 443 444 while (a != null) 445 { 446 if (a.getModifier() == DTDConstants.REQUIRED) 447 if (parameters.getAttribute(a.getName()) == null) 448 { 449 s_error("Missing required attribute '" + a.getName() + "' for <" + 450 tag.getHTMLTag() + ">" 451 ); 452 } 453 a = a.next; 454 } 455 } 456 getCurrentContentModel()457 private node getCurrentContentModel() 458 { 459 if (!stack.isEmpty()) 460 { 461 hTag last = (hTag) stack.getLast(); 462 return last.validationTrace; 463 } 464 else 465 return null; 466 } 467 closeLast()468 private void closeLast() 469 { 470 handleSupposedEndTag(((hTag) stack.getLast()).element); 471 stack.removeLast(); 472 } 473 openFictionalTag(Element e)474 private void openFictionalTag(Element e) 475 { 476 handleSupposedStartTag(e); 477 stack.add(new hTag(new TagElement(e, true))); 478 if (!e.omitStart()) 479 s_error("<" + e + "> is expected (supposing it)"); 480 } 481 validateAttribute(TagElement tag, htmlAttributeSet parameters, Enumeration enumeration )482 private void validateAttribute(TagElement tag, htmlAttributeSet parameters, 483 Enumeration enumeration 484 ) 485 { 486 Object foundAttribute; 487 AttributeList dtdAttribute; 488 foundAttribute = enumeration.nextElement(); 489 dtdAttribute = tag.getElement().getAttribute(foundAttribute.toString()); 490 if (dtdAttribute == null) 491 { 492 StringBuffer valid = 493 new StringBuffer("The tag <" + tag.getHTMLTag() + 494 "> cannot contain the attribute '" + foundAttribute + 495 "'. The valid attributes for this tag are: " 496 ); 497 498 AttributeList a = tag.getElement().getAttributes(); 499 500 while (a != null) 501 { 502 valid.append(a.name.toUpperCase()); 503 valid.append(' '); 504 a = a.next; 505 } 506 s_error(valid.toString()); 507 } 508 509 else 510 { 511 String value = parameters.getAttribute(foundAttribute).toString(); 512 513 if (dtdAttribute.type == DTDConstants.NUMBER) 514 validateNumberAttribute(tag, foundAttribute, value); 515 516 if (dtdAttribute.type == DTDConstants.NAME || 517 dtdAttribute.type == DTDConstants.ID 518 ) 519 validateNameOrIdAttribute(tag, foundAttribute, value); 520 521 if (dtdAttribute.values != null) 522 validateAttributeWithValueList(tag, foundAttribute, dtdAttribute, 523 value 524 ); 525 } 526 } 527 validateAttributeWithValueList(TagElement tag, Object foundAttribute, AttributeList dtdAttribute, String value )528 private void validateAttributeWithValueList(TagElement tag, 529 Object foundAttribute, 530 AttributeList dtdAttribute, 531 String value 532 ) 533 { 534 if (!dtdAttribute.values.contains(value.toLowerCase()) && 535 !dtdAttribute.values.contains(value.toUpperCase()) 536 ) 537 { 538 StringBuffer valid; 539 if (dtdAttribute.values.size() == 1) 540 valid = 541 new StringBuffer("The attribute '" + foundAttribute + 542 "' of the tag <" + tag.getHTMLTag() + 543 "> cannot have the value '" + value + 544 "'. The only valid value is " 545 ); 546 else 547 valid = 548 new StringBuffer("The attribute '" + foundAttribute + 549 "' of the tag <" + tag.getHTMLTag() + 550 "> cannot have the value '" + value + "'. The " + 551 dtdAttribute.values.size() + 552 " valid values are: " 553 ); 554 555 Enumeration vv = dtdAttribute.values.elements(); 556 while (vv.hasMoreElements()) 557 { 558 valid.append('"'); 559 valid.append(vv.nextElement()); 560 valid.append("\" "); 561 } 562 s_error(valid.toString()); 563 } 564 } 565 validateNameOrIdAttribute(TagElement tag, Object foundAttribute, String value )566 private void validateNameOrIdAttribute(TagElement tag, Object foundAttribute, 567 String value 568 ) 569 { 570 boolean ok = true; 571 572 if (!Character.isLetter(value.charAt(0))) 573 ok = false; 574 575 char c; 576 for (int i = 0; i < value.length(); i++) 577 { 578 c = value.charAt(i); 579 if (!( 580 Character.isLetter(c) || Character.isDigit(c) || 581 "".indexOf(c) >= 0 582 ) 583 ) 584 ok = false; 585 } 586 if (!ok) 587 s_error("The '" + foundAttribute + "' attribute of the tag <" + 588 tag.getHTMLTag() + "> must start from letter and consist of " + 589 "letters, digits, hypens, colons, underscores and periods. " + 590 "It cannot be '" + value + "'" 591 ); 592 } 593 validateNumberAttribute(TagElement tag, Object foundAttribute, String value )594 private void validateNumberAttribute(TagElement tag, Object foundAttribute, 595 String value 596 ) 597 { 598 try 599 { 600 Integer.parseInt(value); 601 } 602 catch (NumberFormatException ex) 603 { 604 s_error("The '" + foundAttribute + "' attribute of the tag <" + 605 tag.getHTMLTag() + "> must be a valid number and not '" + 606 value + "'" 607 ); 608 } 609 } 610 } 611