1 /* tagStack.java -- The HTML tag stack. 2 Copyright (C) 2005 Free Software Foundation, Inc. 3 4 This file is part of GNU Classpath. 5 6 GNU Classpath is free software; you can redistribute it and/or modify 7 it under the terms of the GNU General Public License as published by 8 the Free Software Foundation; either version 2, or (at your option) 9 any later version. 10 11 GNU Classpath is distributed in the hope that it will be useful, but 12 WITHOUT ANY WARRANTY; without even the implied warranty of 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 General Public License for more details. 15 16 You should have received a copy of the GNU General Public License 17 along with GNU Classpath; see the file COPYING. If not, write to the 18 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 19 02110-1301 USA. 20 21 Linking this library statically or dynamically with other modules is 22 making a combined work based on this library. Thus, the terms and 23 conditions of the GNU General Public License cover the whole 24 combination. 25 26 As a special exception, the copyright holders of this library give you 27 permission to link this library with independent modules to produce an 28 executable, regardless of the license terms of these independent 29 modules, and to copy and distribute the resulting executable under 30 terms of your choice, provided that you also meet, for each linked 31 independent module, the terms and conditions of the license of that 32 module. An independent module is a module which is not derived from 33 or based on this library. If you modify this library, you may extend 34 this exception to your version of the library, but you are not 35 obligated to do so. If you do not wish to do so, delete this 36 exception statement from your version. */ 37 38 39 package gnu.javax.swing.text.html.parser; 40 41 import gnu.java.lang.CPStringBuilder; 42 43 import gnu.javax.swing.text.html.parser.models.node; 44 import gnu.javax.swing.text.html.parser.models.transformer; 45 46 import java.util.BitSet; 47 import java.util.Enumeration; 48 import java.util.LinkedList; 49 import java.util.ListIterator; 50 51 import javax.swing.text.SimpleAttributeSet; 52 import javax.swing.text.html.HTML; 53 import javax.swing.text.html.parser.*; 54 55 /** 56 * <p>The HTML content validator, is responsible for opening and 57 * closing elements with optional start/end tags, detecting 58 * the wrongly placed html tags and reporting errors. The working instance 59 * is the inner class inside the {@link javax.swing.text.html.parser.Parser } 60 * </p> 61 * <p>This class could potentially 62 * provide basis for automated closing and insertion of the html tags, 63 * correcting the found html errors. 64 * @author Audrius Meskauskas, Lithuania (AudriusA@Bioinformatics.org) 65 */ 66 public abstract class htmlValidator 67 { 68 /** 69 * The tag reference, holding additional information that the tag 70 * has been forcibly closed. 71 */ 72 protected class hTag 73 { 74 protected final Element element; 75 protected final HTML.Tag tag; 76 protected final TagElement tgElement; 77 protected boolean forcibly_closed; 78 protected node validationTrace; 79 hTag(TagElement an_element)80 protected hTag(TagElement an_element) 81 { 82 element = an_element.getElement(); 83 tag = an_element.getHTMLTag(); 84 tgElement = an_element; 85 86 if (element.content != null) 87 validationTrace = transformer.transform(element.content, dtd); 88 } 89 90 /** 91 * This is called when the tag must be forcibly closed because 92 * it would make the newly appearing tag invalid. 93 * The parser is not notified about such event (just the error 94 * is reported). For such tags, the closing message does not 95 * appear when later reaching the end of stream. The exception is 96 * the <head> tag: the parser is notified about its silent closing 97 * when <body> or other html content appears. 98 */ forciblyCloseDueContext()99 protected void forciblyCloseDueContext() 100 { 101 forcibly_closed = true; 102 } 103 104 /** 105 * This is called when the tag must be forcibly closed after 106 * reaching the end of stream. The parser is notified as if 107 * closing the tag explicitly. 108 */ forciblyCloseDueEndOfStream()109 protected void forciblyCloseDueEndOfStream() 110 { 111 forcibly_closed = true; 112 handleSupposedEndTag(element); 113 } 114 } 115 116 /** 117 * The DTD, providing information about the valid document structure. 118 */ 119 protected final DTD dtd; 120 121 /** 122 * The stack, holding the current tag context. 123 */ 124 protected final LinkedList stack = new LinkedList(); 125 126 /** 127 * Creates a new tag stack, using the given DTD. 128 * @param a_dtd A DTD, providing the information about the valid 129 * tag content. 130 */ htmlValidator(DTD a_dtd)131 public htmlValidator(DTD a_dtd) 132 { 133 dtd = a_dtd; 134 } 135 136 /** 137 * Close all opened tags (called at the end of parsing). 138 */ closeAll()139 public void closeAll() 140 { 141 hTag h; 142 while (!stack.isEmpty()) 143 { 144 h = (hTag) stack.getLast(); 145 if (!h.forcibly_closed && !h.element.omitEnd()) 146 s_error("Unclosed <" + h.tag + ">, closing at the end of stream"); 147 148 handleSupposedEndTag(h.element); 149 150 closeTag(h.tgElement); 151 } 152 } 153 154 /** 155 * Remove the given tag from the stack or (if found) from the list 156 * of the forcibly closed tags. 157 */ closeTag(TagElement tElement)158 public boolean closeTag(TagElement tElement) 159 { 160 HTML.Tag tag = tElement.getHTMLTag(); 161 hTag x; 162 hTag close; 163 164 if (!stack.isEmpty()) 165 { 166 ListIterator iter = stack.listIterator(stack.size()); 167 168 while (iter.hasPrevious()) 169 { 170 x = (hTag) iter.previous(); 171 if (tag.equals(x.tag)) 172 { 173 if (x.forcibly_closed && !x.element.omitEnd()) 174 s_error("The tag <" + x.tag + 175 "> has already been forcibly closed" 176 ); 177 178 179 // If the tag has a content model defined, forcibly close all 180 // tags that were opened after the tag being currently closed. 181 closing: 182 if (x.element.content != null) 183 { 184 iter = stack.listIterator(stack.size()); 185 while (iter.hasPrevious()) 186 { 187 close = (hTag) iter.previous(); 188 if (close == x) 189 break closing; 190 handleSupposedEndTag(close.element); 191 iter.remove(); 192 } 193 } 194 195 stack.remove(x); 196 return true; 197 } 198 } 199 } 200 s_error("Closing unopened <" + tag + ">"); 201 return false; 202 } 203 204 /** 205 * Add the given HTML tag to the stack of the opened tags. Forcibly closes 206 * all tags in the stack that does not allow this tag in they content (error 207 * is reported). 208 * @param element 209 */ openTag(TagElement tElement, htmlAttributeSet parameters)210 public void openTag(TagElement tElement, htmlAttributeSet parameters) 211 { 212 // If this is a fictional call, the message from the parser 213 // has recursively returned - ignore. 214 if (tElement.fictional()) 215 return; 216 217 validateParameters(tElement, parameters); 218 219 // If the stack is empty, start from HTML 220 if (stack.isEmpty() && tElement.getHTMLTag() != HTML.Tag.HTML) 221 { 222 Element html = dtd.getElement(HTML.Tag.HTML.toString()); 223 openFictionalTag(html); 224 } 225 226 Object v = tagIsValidForContext(tElement); 227 if (v != Boolean.TRUE) 228 { 229 // The tag is not valid for context, the content 230 // model suggest to open another tag. 231 if (v instanceof Element) 232 { 233 int n = 0; 234 while (v instanceof Element && (n++ < 100)) 235 { 236 Element fe = (Element) v; 237 238 // notify the content model that we add the proposed tag 239 node ccm = getCurrentContentModel(); 240 if (ccm != null) 241 ccm.show(fe); 242 openFictionalTag(fe); 243 244 Object vv = tagIsValidForContext(tElement); 245 if (vv instanceof Element) // One level of nesting is supported. 246 { 247 openFictionalTag((Element) vv); 248 249 Object vx = tagIsValidForContext(tElement); 250 if (vx instanceof Element) 251 openFictionalTag((Element) vx); 252 } 253 else if (vv == Boolean.FALSE) 254 { 255 // The tag is still not valid for the current 256 // content after opening a fictional element. 257 if (fe.omitEnd()) 258 { 259 // close the previously opened fictional tag. 260 closeLast(); 261 vv = tagIsValidForContext(tElement); 262 if (vv instanceof Element) 263 264 // another tag was suggested by the content model 265 openFictionalTag((Element) vv); 266 } 267 } 268 v = tagIsValidForContext(tElement); 269 } 270 } 271 else // If the current element has the optional end tag, close it. 272 { 273 if (!stack.isEmpty()) 274 { 275 closing: 276 do 277 { 278 hTag last = (hTag) stack.getLast(); 279 if (last.element.omitEnd()) 280 { 281 closeLast(); 282 v = tagIsValidForContext(tElement); 283 if (v instanceof Element) // another tag was suggested by the content model 284 { 285 openFictionalTag((Element) v); 286 break closing; 287 } 288 } 289 else 290 break closing; 291 } 292 while (v == Boolean.FALSE && !stack.isEmpty()); 293 } 294 } 295 } 296 297 stack.add(new hTag(tElement)); 298 } 299 300 /** 301 * Clear the stack. 302 */ restart()303 public void restart() 304 { 305 stack.clear(); 306 } 307 308 /** 309 * Check if this tag is valid for the current context. Return Boolean.True if 310 * it is OK, Boolean.False if it is surely not OK or the Element that the 311 * content model recommends to insert making the situation ok. If Boolean.True 312 * is returned, the content model current position is moved forward. Otherwise 313 * this position remains the same. 314 * 315 * @param tElement 316 * @return 317 */ tagIsValidForContext(TagElement tElement)318 public Object tagIsValidForContext(TagElement tElement) 319 { 320 // Check the current content model, if one is available. 321 node cv = getCurrentContentModel(); 322 323 if (cv != null) 324 return cv.show(tElement.getElement()); 325 326 // Check exclusions and inclusions. 327 ListIterator iter = stack.listIterator(stack.size()); 328 hTag t = null; 329 final int idx = tElement.getElement().index; 330 331 // Check only known tags. 332 if (idx >= 0) 333 { 334 BitSet inclusions = new BitSet(); 335 while (iter.hasPrevious()) 336 { 337 t = (hTag) iter.previous(); 338 if (! t.forcibly_closed) 339 { 340 if (t.element.exclusions != null 341 && t.element.exclusions.get(idx)) 342 return Boolean.FALSE; 343 344 if (t.element.inclusions != null) 345 inclusions.or(t.element.inclusions); 346 } 347 } 348 if (! inclusions.get(idx)) 349 { 350 // If we need to insert something, and cannot do this, but 351 // it is allowed to insert the paragraph here, insert the 352 // paragraph. 353 Element P = dtd.getElement(HTML_401F.P); 354 if (inclusions.get(P.index)) 355 return P; 356 else 357 return Boolean.FALSE; 358 } 359 } 360 return Boolean.TRUE; 361 } 362 363 /** 364 * Validate tag without storing in into the tag stack. This is called 365 * for the empty tags and results the subsequent calls to the openTag 366 * and closeTag. 367 */ validateTag(TagElement tElement, htmlAttributeSet parameters)368 public void validateTag(TagElement tElement, htmlAttributeSet parameters) 369 { 370 openTag(tElement, parameters); 371 closeTag(tElement); 372 } 373 374 /** 375 * Check for mandatory elements, subsequent to the last tag: 376 * @param tElement The element that will be inserted next. 377 */ checkContentModel(TagElement tElement, boolean first)378 protected void checkContentModel(TagElement tElement, boolean first) 379 { 380 if (stack.isEmpty()) 381 return; 382 383 hTag last = (hTag) stack.getLast(); 384 if (last.validationTrace == null) 385 return; 386 387 Object r = last.validationTrace.show(tElement.getElement()); 388 if (r == Boolean.FALSE) 389 s_error("The <" + last.element + "> does not match the content model " + 390 last.validationTrace 391 ); 392 else if (r instanceof Element) // The content model recommends insertion of this element 393 { 394 if (!first) 395 closeTag(last.tgElement); 396 handleSupposedStartTag((Element) r); 397 openTag(new TagElement((Element) r), null); 398 } 399 } 400 401 /** 402 * The method is called when the tag must be closed because 403 * it does not allow the subsequent elements inside its context 404 * or the end of stream has been reached. The parser is only 405 * informed if the element being closed does not require the 406 * end tag (the "omitEnd" flag is set). 407 * The closing message must be passed to the parser mechanism 408 * before passing message about the opening the next tag. 409 * 410 * @param element The tag being fictionally (forcibly) closed. 411 */ handleSupposedEndTag(Element element)412 protected abstract void handleSupposedEndTag(Element element); 413 414 /** 415 * The method is called when the validator decides to open the 416 * tag on its own initiative. This may happen if the content model 417 * includes the element with the optional (supposed) start tag. 418 * 419 * @param element The tag being opened. 420 */ handleSupposedStartTag(Element element)421 protected abstract void handleSupposedStartTag(Element element); 422 423 /** 424 * Handles the error message. This method must be overridden to pass 425 * the message where required. 426 * @param msg The message text. 427 */ s_error(String msg)428 protected abstract void s_error(String msg); 429 430 /** 431 * Validate the parameters, report the error if the given parameter is 432 * not in the parameter set, valid for the given attribute. The information 433 * about the valid parameter set is taken from the Element, enclosed 434 * inside the tag. The method does not validate the default parameters. 435 * @param tag The tag 436 * @param parameters The parameters of this tag. 437 */ validateParameters(TagElement tag, htmlAttributeSet parameters)438 protected void validateParameters(TagElement tag, htmlAttributeSet parameters) 439 { 440 if (parameters == null || 441 parameters == htmlAttributeSet.EMPTY_HTML_ATTRIBUTE_SET || 442 parameters == SimpleAttributeSet.EMPTY 443 ) 444 return; 445 446 Enumeration enumeration = parameters.getAttributeNames(); 447 448 while (enumeration.hasMoreElements()) 449 { 450 validateAttribute(tag, parameters, enumeration); 451 } 452 453 // Check for missing required values. 454 AttributeList a = tag.getElement().getAttributes(); 455 456 while (a != null) 457 { 458 if (a.getModifier() == DTDConstants.REQUIRED) 459 if (parameters.getAttribute(a.getName()) == null) 460 { 461 s_error("Missing required attribute '" + a.getName() + "' for <" + 462 tag.getHTMLTag() + ">" 463 ); 464 } 465 a = a.next; 466 } 467 } 468 getCurrentContentModel()469 private node getCurrentContentModel() 470 { 471 if (!stack.isEmpty()) 472 { 473 hTag last = (hTag) stack.getLast(); 474 return last.validationTrace; 475 } 476 else 477 return null; 478 } 479 closeLast()480 private void closeLast() 481 { 482 handleSupposedEndTag(((hTag) stack.getLast()).element); 483 stack.removeLast(); 484 } 485 openFictionalTag(Element e)486 private void openFictionalTag(Element e) 487 { 488 handleSupposedStartTag(e); 489 stack.add(new hTag(new TagElement(e, true))); 490 if (!e.omitStart()) 491 s_error("<" + e + "> is expected (supposing it)"); 492 } 493 validateAttribute(TagElement tag, htmlAttributeSet parameters, Enumeration enumeration )494 private void validateAttribute(TagElement tag, htmlAttributeSet parameters, 495 Enumeration enumeration 496 ) 497 { 498 Object foundAttribute; 499 AttributeList dtdAttribute; 500 foundAttribute = enumeration.nextElement(); 501 dtdAttribute = tag.getElement().getAttribute(foundAttribute.toString()); 502 if (dtdAttribute == null) 503 { 504 CPStringBuilder valid = 505 new CPStringBuilder("The tag <" + tag.getHTMLTag() + 506 "> cannot contain the attribute '" + foundAttribute + 507 "'. The valid attributes for this tag are: " 508 ); 509 510 AttributeList a = tag.getElement().getAttributes(); 511 512 while (a != null) 513 { 514 valid.append(a.name.toUpperCase()); 515 valid.append(' '); 516 a = a.next; 517 } 518 s_error(valid.toString()); 519 } 520 521 else 522 { 523 String value = parameters.getAttribute(foundAttribute).toString(); 524 525 if (dtdAttribute.type == DTDConstants.NUMBER) 526 validateNumberAttribute(tag, foundAttribute, value); 527 528 if (dtdAttribute.type == DTDConstants.NAME || 529 dtdAttribute.type == DTDConstants.ID 530 ) 531 validateNameOrIdAttribute(tag, foundAttribute, value); 532 533 if (dtdAttribute.values != null) 534 validateAttributeWithValueList(tag, foundAttribute, dtdAttribute, 535 value 536 ); 537 } 538 } 539 validateAttributeWithValueList(TagElement tag, Object foundAttribute, AttributeList dtdAttribute, String value )540 private void validateAttributeWithValueList(TagElement tag, 541 Object foundAttribute, 542 AttributeList dtdAttribute, 543 String value 544 ) 545 { 546 if (!dtdAttribute.values.contains(value.toLowerCase()) && 547 !dtdAttribute.values.contains(value.toUpperCase()) 548 ) 549 { 550 CPStringBuilder valid; 551 if (dtdAttribute.values.size() == 1) 552 valid = 553 new CPStringBuilder("The attribute '" + foundAttribute + 554 "' of the tag <" + tag.getHTMLTag() + 555 "> cannot have the value '" + value + 556 "'. The only valid value is " 557 ); 558 else 559 valid = 560 new CPStringBuilder("The attribute '" + foundAttribute + 561 "' of the tag <" + tag.getHTMLTag() + 562 "> cannot have the value '" + value + "'. The " + 563 dtdAttribute.values.size() + 564 " valid values are: " 565 ); 566 567 Enumeration vv = dtdAttribute.values.elements(); 568 while (vv.hasMoreElements()) 569 { 570 valid.append('"'); 571 valid.append(vv.nextElement()); 572 valid.append("\" "); 573 } 574 s_error(valid.toString()); 575 } 576 } 577 validateNameOrIdAttribute(TagElement tag, Object foundAttribute, String value )578 private void validateNameOrIdAttribute(TagElement tag, Object foundAttribute, 579 String value 580 ) 581 { 582 boolean ok = true; 583 584 if (!Character.isLetter(value.charAt(0))) 585 ok = false; 586 587 char c; 588 for (int i = 0; i < value.length(); i++) 589 { 590 c = value.charAt(i); 591 if (!( 592 Character.isLetter(c) || Character.isDigit(c) || 593 "".indexOf(c) >= 0 594 ) 595 ) 596 ok = false; 597 } 598 if (!ok) 599 s_error("The '" + foundAttribute + "' attribute of the tag <" + 600 tag.getHTMLTag() + "> must start from letter and consist of " + 601 "letters, digits, hypens, colons, underscores and periods. " + 602 "It cannot be '" + value + "'" 603 ); 604 } 605 validateNumberAttribute(TagElement tag, Object foundAttribute, String value )606 private void validateNumberAttribute(TagElement tag, Object foundAttribute, 607 String value 608 ) 609 { 610 try 611 { 612 Integer.parseInt(value); 613 } 614 catch (NumberFormatException ex) 615 { 616 s_error("The '" + foundAttribute + "' attribute of the tag <" + 617 tag.getHTMLTag() + "> must be a valid number and not '" + 618 value + "'" 619 ); 620 } 621 } 622 } 623