1 /* tagStack.java -- The HTML tag stack.
2    Copyright (C) 2005 Free Software Foundation, Inc.
3 
4 This file is part of GNU Classpath.
5 
6 GNU Classpath is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
10 
11 GNU Classpath is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 General Public License for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GNU Classpath; see the file COPYING.  If not, write to the
18 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 02110-1301 USA.
20 
21 Linking this library statically or dynamically with other modules is
22 making a combined work based on this library.  Thus, the terms and
23 conditions of the GNU General Public License cover the whole
24 combination.
25 
26 As a special exception, the copyright holders of this library give you
27 permission to link this library with independent modules to produce an
28 executable, regardless of the license terms of these independent
29 modules, and to copy and distribute the resulting executable under
30 terms of your choice, provided that you also meet, for each linked
31 independent module, the terms and conditions of the license of that
32 module.  An independent module is a module which is not derived from
33 or based on this library.  If you modify this library, you may extend
34 this exception to your version of the library, but you are not
35 obligated to do so.  If you do not wish to do so, delete this
36 exception statement from your version. */
37 
38 
39 package gnu.javax.swing.text.html.parser;
40 
41 import gnu.javax.swing.text.html.parser.models.node;
42 import gnu.javax.swing.text.html.parser.models.transformer;
43 
44 import java.util.BitSet;
45 import java.util.Enumeration;
46 import java.util.LinkedList;
47 import java.util.ListIterator;
48 
49 import javax.swing.text.SimpleAttributeSet;
50 import javax.swing.text.html.HTML;
51 import javax.swing.text.html.parser.*;
52 
53 /**
54  * <p>The HTML content validator, is responsible for opening and
55  * closing elements with optional start/end tags, detecting
56  * the wrongly placed html tags and reporting errors. The working instance
57  * is the inner class inside the {@link javax.swing.text.html.parser.Parser }
58  * </p>
59  * <p>This class could potentially
60  * provide basis for automated closing and insertion of the html tags,
61  * correcting the found html errors.
62  * @author Audrius Meskauskas, Lithuania (AudriusA@Bioinformatics.org)
63  */
64 public abstract class htmlValidator
65 {
66   /**
67    * The tag reference, holding additional information that the tag
68    * has been forcibly closed.
69    */
70   protected class hTag
71   {
72     protected final Element element;
73     protected final HTML.Tag tag;
74     protected final TagElement tgElement;
75     protected boolean forcibly_closed;
76     protected node validationTrace;
77 
hTag(TagElement an_element)78     protected hTag(TagElement an_element)
79     {
80       element = an_element.getElement();
81       tag = an_element.getHTMLTag();
82       tgElement = an_element;
83 
84       if (element.content != null)
85         validationTrace = transformer.transform(element.content, dtd);
86     }
87 
88     /**
89      * This is called when the tag must be forcibly closed because
90      * it would make the newly appearing tag invalid.
91      * The parser is not notified about such event (just the error
92      * is reported). For such tags, the closing message does not
93      * appear when later reaching the end of stream. The exception is
94      * the &lt;head&gt; tag: the parser is notified about its silent closing
95      * when &lt;body&gt; or other html content appears.
96      */
forciblyCloseDueContext()97     protected void forciblyCloseDueContext()
98     {
99       forcibly_closed = true;
100     }
101 
102     /**
103      * This is called when the tag must be forcibly closed after
104      * reaching the end of stream. The parser is notified as if
105      * closing the tag explicitly.
106      */
forciblyCloseDueEndOfStream()107     protected void forciblyCloseDueEndOfStream()
108     {
109       forcibly_closed = true;
110       handleSupposedEndTag(element);
111     }
112   }
113 
114   /**
115    * The DTD, providing information about the valid document structure.
116    */
117   protected final DTD dtd;
118 
119   /**
120   * The stack, holding the current tag context.
121   */
122   protected final LinkedList stack = new LinkedList();
123 
124   /**
125    * Creates a new tag stack, using the given DTD.
126    * @param a_dtd A DTD, providing the information about the valid
127    * tag content.
128    */
htmlValidator(DTD a_dtd)129   public htmlValidator(DTD a_dtd)
130   {
131     dtd = a_dtd;
132   }
133 
134   /**
135    * Close all opened tags (called at the end of parsing).
136    */
closeAll()137   public void closeAll()
138   {
139     hTag h;
140     while (!stack.isEmpty())
141       {
142         h = (hTag) stack.getLast();
143         if (!h.forcibly_closed && !h.element.omitEnd())
144           s_error("Unclosed <" + h.tag + ">, closing at the end of stream");
145 
146         handleSupposedEndTag(h.element);
147 
148         closeTag(h.tgElement);
149       }
150   }
151 
152   /**
153    * Remove the given tag from the stack or (if found) from the list
154    * of the forcibly closed tags.
155    */
closeTag(TagElement tElement)156   public void closeTag(TagElement tElement)
157   {
158     HTML.Tag tag = tElement.getHTMLTag();
159     hTag x;
160     hTag close;
161 
162     if (!stack.isEmpty())
163       {
164         ListIterator iter = stack.listIterator(stack.size());
165 
166         while (iter.hasPrevious())
167           {
168             x = (hTag) iter.previous();
169             if (tag.equals(x.tag))
170               {
171                 if (x.forcibly_closed && !x.element.omitEnd())
172                   s_error("The tag <" + x.tag +
173                           "> has already been forcibly closed"
174                          );
175 
176 
177                 // If the tag has a content model defined, forcibly close all
178                 // tags that were opened after the tag being currently closed.
179                 closing:
180                 if (x.element.content != null)
181                   {
182                     iter = stack.listIterator(stack.size());
183                     while (iter.hasPrevious())
184                       {
185                         close = (hTag) iter.previous();
186                         if (close == x)
187                           break closing;
188                         handleSupposedEndTag(close.element);
189                         iter.remove();
190                       }
191                   }
192 
193                 stack.remove(x);
194                 return;
195               }
196           }
197       }
198     s_error("Closing unopened <" + tag + ">");
199   }
200 
201   /**
202    * Add the given HTML tag to the stack of the opened tags. Forcibly closes
203    * all tags in the stack that does not allow this tag in they content (error
204    * is reported).
205    * @param element
206    */
openTag(TagElement tElement, htmlAttributeSet parameters)207   public void openTag(TagElement tElement, htmlAttributeSet parameters)
208   {
209     // If this is a fictional call, the message from the parser
210     // has recursively returned - ignore.
211     if (tElement.fictional())
212       return;
213 
214     validateParameters(tElement, parameters);
215 
216     // If the stack is empty, start from HTML
217     if (stack.isEmpty() && tElement.getHTMLTag() != HTML.Tag.HTML)
218       {
219         Element html = dtd.getElement(HTML.Tag.HTML.toString());
220         openFictionalTag(html);
221       }
222 
223     Object v = tagIsValidForContext(tElement);
224     if (v != Boolean.TRUE)
225       {
226         // The tag is not valid for context, the content
227         // model suggest to open another tag.
228         if (v instanceof Element)
229           {
230             int n = 0;
231             while (v instanceof Element && (n++ < 100))
232               {
233                 Element fe = (Element) v;
234 
235                 // notify the content model that we add the proposed tag
236                 getCurrentContentModel().show(fe);
237                 openFictionalTag(fe);
238 
239                 Object vv = tagIsValidForContext(tElement);
240                 if (vv instanceof Element) // One level of nesting is supported.
241                   {
242                     openFictionalTag((Element) vv);
243 
244                     Object vx = tagIsValidForContext(tElement);
245                     if (vx instanceof Element)
246                       openFictionalTag((Element) vx);
247                   }
248                 else if (vv == Boolean.FALSE)
249                   {
250                     // The tag is still not valid for the current
251                     // content after opening a fictional element.
252                     if (fe.omitEnd())
253                       {
254                         // close the previously opened fictional tag.
255                         closeLast();
256                         vv = tagIsValidForContext(tElement);
257                         if (vv instanceof Element)
258 
259                           // another tag was suggested by the content model
260                           openFictionalTag((Element) vv);
261                       }
262                   }
263                 v = tagIsValidForContext(tElement);
264               }
265           }
266         else // If the current element has the optional end tag, close it.
267           {
268             if (!stack.isEmpty())
269               {
270                 closing:
271                 do
272                   {
273                     hTag last = (hTag) stack.getLast();
274                     if (last.element.omitEnd())
275                       {
276                         closeLast();
277                         v = tagIsValidForContext(tElement);
278                         if (v instanceof Element) // another tag was suggested by the content model
279                           {
280                             openFictionalTag((Element) v);
281                             break closing;
282                           }
283                       }
284                     else
285                       break closing;
286                   }
287                 while (v == Boolean.FALSE && !stack.isEmpty());
288               }
289           }
290       }
291 
292     stack.add(new hTag(tElement));
293   }
294 
295   /**
296    * Clear the stack.
297    */
restart()298   public void restart()
299   {
300     stack.clear();
301   }
302 
303   /**
304    * Check if this tag is valid for the current context.
305    * Return Boolean.True if it is OK, Boolean.False
306    * if it is surely not OK or the Element that the
307    * content model recommends to insert making the situation
308    * ok. If Boolean.True is returned, the content model current
309    * position is moved forward. Otherwise this position remains
310    * the same.
311    * @param tElement
312    * @return
313    */
tagIsValidForContext(TagElement tElement)314   public Object tagIsValidForContext(TagElement tElement)
315   {
316     // Check the current content model, if one is available.
317     node cv = getCurrentContentModel();
318 
319     if (cv != null)
320       return cv.show(tElement.getElement());
321 
322     // Check exclusions and inclusions.
323     ListIterator iter = stack.listIterator(stack.size());
324     hTag t;
325     final int idx = tElement.getElement().index;
326 
327     // Check only known tags.
328     if (idx >= 0)
329       {
330         BitSet inclusions = new BitSet();
331         while (iter.hasPrevious())
332           {
333             t = (hTag) iter.previous();
334             if (!t.forcibly_closed)
335               {
336                 if (t.element.exclusions != null &&
337                     t.element.exclusions.get(idx)
338                    )
339                   return Boolean.FALSE;
340 
341                 if (t.element.inclusions != null)
342                   inclusions.or(t.element.inclusions);
343               }
344           }
345         if (!inclusions.get(idx))
346           return Boolean.FALSE;
347       }
348     return Boolean.TRUE;
349   }
350 
351   /**
352    * Validate tag without storing in into the tag stack. This is called
353    * for the empty tags and results the subsequent calls to the openTag
354    * and closeTag.
355    */
validateTag(TagElement tElement, htmlAttributeSet parameters)356   public void validateTag(TagElement tElement, htmlAttributeSet parameters)
357   {
358     openTag(tElement, parameters);
359     closeTag(tElement);
360   }
361 
362   /**
363    * Check for mandatory elements, subsequent to the last tag:
364    * @param tElement The element that will be inserted next.
365    */
checkContentModel(TagElement tElement, boolean first)366   protected void checkContentModel(TagElement tElement, boolean first)
367   {
368     if (stack.isEmpty())
369       return;
370 
371     hTag last = (hTag) stack.getLast();
372     if (last.validationTrace == null)
373       return;
374 
375     Object r = last.validationTrace.show(tElement.getElement());
376     if (r == Boolean.FALSE)
377       s_error("The <" + last.element + "> does not match the content model " +
378               last.validationTrace
379              );
380     else if (r instanceof Element) // The content model recommends insertion of this element
381       {
382         if (!first)
383           closeTag(last.tgElement);
384         handleSupposedStartTag((Element) r);
385         openTag(new TagElement((Element) r), null);
386       }
387   }
388 
389   /**
390    * The method is called when the tag must be closed because
391    * it does not allow the subsequent elements inside its context
392    * or the end of stream has been reached. The parser is only
393    * informed if the element being closed does not require the
394    * end tag (the "omitEnd" flag is set).
395    * The closing message must be passed to the parser mechanism
396    * before passing message about the opening the next tag.
397    *
398    * @param element The tag being fictionally (forcibly) closed.
399    */
handleSupposedEndTag(Element element)400   protected abstract void handleSupposedEndTag(Element element);
401 
402   /**
403    * The method is called when the validator decides to open the
404    * tag on its own initiative. This may happen if the content model
405    * includes the element with the optional (supposed) start tag.
406    *
407    * @param element The tag being opened.
408    */
handleSupposedStartTag(Element element)409   protected abstract void handleSupposedStartTag(Element element);
410 
411   /**
412    * Handles the error message. This method must be overridden to pass
413    * the message where required.
414    * @param msg The message text.
415    */
s_error(String msg)416   protected abstract void s_error(String msg);
417 
418   /**
419    * Validate the parameters, report the error if the given parameter is
420    * not in the parameter set, valid for the given attribute. The information
421    * about the valid parameter set is taken from the Element, enclosed
422    * inside the tag. The method does not validate the default parameters.
423    * @param tag The tag
424    * @param parameters The parameters of this tag.
425    */
validateParameters(TagElement tag, htmlAttributeSet parameters)426   protected void validateParameters(TagElement tag, htmlAttributeSet parameters)
427   {
428     if (parameters == null ||
429         parameters == htmlAttributeSet.EMPTY_HTML_ATTRIBUTE_SET ||
430         parameters == SimpleAttributeSet.EMPTY
431        )
432       return;
433 
434     Enumeration enumeration = parameters.getAttributeNames();
435 
436     while (enumeration.hasMoreElements())
437       {
438         validateAttribute(tag, parameters, enumeration);
439       }
440 
441     // Check for missing required values.
442     AttributeList a = tag.getElement().getAttributes();
443 
444     while (a != null)
445       {
446         if (a.getModifier() == DTDConstants.REQUIRED)
447           if (parameters.getAttribute(a.getName()) == null)
448             {
449               s_error("Missing required attribute '" + a.getName() + "' for <" +
450                       tag.getHTMLTag() + ">"
451                      );
452             }
453         a = a.next;
454       }
455   }
456 
getCurrentContentModel()457   private node getCurrentContentModel()
458   {
459     if (!stack.isEmpty())
460       {
461         hTag last = (hTag) stack.getLast();
462         return last.validationTrace;
463       }
464     else
465       return null;
466   }
467 
closeLast()468   private void closeLast()
469   {
470     handleSupposedEndTag(((hTag) stack.getLast()).element);
471     stack.removeLast();
472   }
473 
openFictionalTag(Element e)474   private void openFictionalTag(Element e)
475   {
476     handleSupposedStartTag(e);
477     stack.add(new hTag(new TagElement(e, true)));
478     if (!e.omitStart())
479       s_error("<" + e + "> is expected (supposing it)");
480   }
481 
validateAttribute(TagElement tag, htmlAttributeSet parameters, Enumeration enumeration )482   private void validateAttribute(TagElement tag, htmlAttributeSet parameters,
483                                  Enumeration enumeration
484                                 )
485   {
486     Object foundAttribute;
487     AttributeList dtdAttribute;
488     foundAttribute = enumeration.nextElement();
489     dtdAttribute = tag.getElement().getAttribute(foundAttribute.toString());
490     if (dtdAttribute == null)
491       {
492         StringBuffer valid =
493           new StringBuffer("The tag <" + tag.getHTMLTag() +
494                            "> cannot contain the attribute '" + foundAttribute +
495                            "'. The valid attributes for this tag are: "
496                           );
497 
498         AttributeList a = tag.getElement().getAttributes();
499 
500         while (a != null)
501           {
502             valid.append(a.name.toUpperCase());
503             valid.append(' ');
504             a = a.next;
505           }
506         s_error(valid.toString());
507       }
508 
509     else
510       {
511         String value = parameters.getAttribute(foundAttribute).toString();
512 
513         if (dtdAttribute.type == DTDConstants.NUMBER)
514           validateNumberAttribute(tag, foundAttribute, value);
515 
516         if (dtdAttribute.type == DTDConstants.NAME ||
517             dtdAttribute.type == DTDConstants.ID
518            )
519           validateNameOrIdAttribute(tag, foundAttribute, value);
520 
521         if (dtdAttribute.values != null)
522           validateAttributeWithValueList(tag, foundAttribute, dtdAttribute,
523                                          value
524                                         );
525       }
526   }
527 
validateAttributeWithValueList(TagElement tag, Object foundAttribute, AttributeList dtdAttribute, String value )528   private void validateAttributeWithValueList(TagElement tag,
529                                               Object foundAttribute,
530                                               AttributeList dtdAttribute,
531                                               String value
532                                              )
533   {
534     if (!dtdAttribute.values.contains(value.toLowerCase()) &&
535         !dtdAttribute.values.contains(value.toUpperCase())
536        )
537       {
538         StringBuffer valid;
539         if (dtdAttribute.values.size() == 1)
540           valid =
541             new StringBuffer("The attribute '" + foundAttribute +
542                              "' of the tag <" + tag.getHTMLTag() +
543                              "> cannot have the value '" + value +
544                              "'. The only valid value is "
545                             );
546         else
547           valid =
548             new StringBuffer("The attribute '" + foundAttribute +
549                              "' of the tag <" + tag.getHTMLTag() +
550                              "> cannot have the value '" + value + "'. The " +
551                              dtdAttribute.values.size() +
552                              " valid values are: "
553                             );
554 
555         Enumeration vv = dtdAttribute.values.elements();
556         while (vv.hasMoreElements())
557           {
558             valid.append('"');
559             valid.append(vv.nextElement());
560             valid.append("\"  ");
561           }
562         s_error(valid.toString());
563       }
564   }
565 
validateNameOrIdAttribute(TagElement tag, Object foundAttribute, String value )566   private void validateNameOrIdAttribute(TagElement tag, Object foundAttribute,
567                                          String value
568                                         )
569   {
570     boolean ok = true;
571 
572     if (!Character.isLetter(value.charAt(0)))
573       ok = false;
574 
575     char c;
576     for (int i = 0; i < value.length(); i++)
577       {
578         c = value.charAt(i);
579         if (!(
580               Character.isLetter(c) || Character.isDigit(c) ||
581               "".indexOf(c) >= 0
582             )
583            )
584           ok = false;
585       }
586     if (!ok)
587       s_error("The '" + foundAttribute + "' attribute of the tag <" +
588               tag.getHTMLTag() + "> must start from letter and consist of " +
589               "letters, digits, hypens, colons, underscores and periods. " +
590               "It cannot be '" + value + "'"
591              );
592   }
593 
validateNumberAttribute(TagElement tag, Object foundAttribute, String value )594   private void validateNumberAttribute(TagElement tag, Object foundAttribute,
595                                        String value
596                                       )
597   {
598     try
599       {
600         Integer.parseInt(value);
601       }
602     catch (NumberFormatException ex)
603       {
604         s_error("The '" + foundAttribute + "' attribute of the tag <" +
605                 tag.getHTMLTag() + "> must be a valid number and not '" +
606                 value + "'"
607                );
608       }
609   }
610 }
611