1 /* tagStack.java -- The HTML tag stack.
2    Copyright (C) 2005 Free Software Foundation, Inc.
3 
4 This file is part of GNU Classpath.
5 
6 GNU Classpath is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
10 
11 GNU Classpath is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 General Public License for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GNU Classpath; see the file COPYING.  If not, write to the
18 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 02110-1301 USA.
20 
21 Linking this library statically or dynamically with other modules is
22 making a combined work based on this library.  Thus, the terms and
23 conditions of the GNU General Public License cover the whole
24 combination.
25 
26 As a special exception, the copyright holders of this library give you
27 permission to link this library with independent modules to produce an
28 executable, regardless of the license terms of these independent
29 modules, and to copy and distribute the resulting executable under
30 terms of your choice, provided that you also meet, for each linked
31 independent module, the terms and conditions of the license of that
32 module.  An independent module is a module which is not derived from
33 or based on this library.  If you modify this library, you may extend
34 this exception to your version of the library, but you are not
35 obligated to do so.  If you do not wish to do so, delete this
36 exception statement from your version. */
37 
38 
39 package gnu.javax.swing.text.html.parser;
40 
41 import gnu.java.lang.CPStringBuilder;
42 
43 import gnu.javax.swing.text.html.parser.models.node;
44 import gnu.javax.swing.text.html.parser.models.transformer;
45 
46 import java.util.BitSet;
47 import java.util.Enumeration;
48 import java.util.LinkedList;
49 import java.util.ListIterator;
50 
51 import javax.swing.text.SimpleAttributeSet;
52 import javax.swing.text.html.HTML;
53 import javax.swing.text.html.parser.*;
54 
55 /**
56  * <p>The HTML content validator, is responsible for opening and
57  * closing elements with optional start/end tags, detecting
58  * the wrongly placed html tags and reporting errors. The working instance
59  * is the inner class inside the {@link javax.swing.text.html.parser.Parser }
60  * </p>
61  * <p>This class could potentially
62  * provide basis for automated closing and insertion of the html tags,
63  * correcting the found html errors.
64  * @author Audrius Meskauskas, Lithuania (AudriusA@Bioinformatics.org)
65  */
66 public abstract class htmlValidator
67 {
68   /**
69    * The tag reference, holding additional information that the tag
70    * has been forcibly closed.
71    */
72   protected class hTag
73   {
74     protected final Element element;
75     protected final HTML.Tag tag;
76     protected final TagElement tgElement;
77     protected boolean forcibly_closed;
78     protected node validationTrace;
79 
hTag(TagElement an_element)80     protected hTag(TagElement an_element)
81     {
82       element = an_element.getElement();
83       tag = an_element.getHTMLTag();
84       tgElement = an_element;
85 
86       if (element.content != null)
87         validationTrace = transformer.transform(element.content, dtd);
88     }
89 
90     /**
91      * This is called when the tag must be forcibly closed because
92      * it would make the newly appearing tag invalid.
93      * The parser is not notified about such event (just the error
94      * is reported). For such tags, the closing message does not
95      * appear when later reaching the end of stream. The exception is
96      * the &lt;head&gt; tag: the parser is notified about its silent closing
97      * when &lt;body&gt; or other html content appears.
98      */
forciblyCloseDueContext()99     protected void forciblyCloseDueContext()
100     {
101       forcibly_closed = true;
102     }
103 
104     /**
105      * This is called when the tag must be forcibly closed after
106      * reaching the end of stream. The parser is notified as if
107      * closing the tag explicitly.
108      */
forciblyCloseDueEndOfStream()109     protected void forciblyCloseDueEndOfStream()
110     {
111       forcibly_closed = true;
112       handleSupposedEndTag(element);
113     }
114   }
115 
116   /**
117    * The DTD, providing information about the valid document structure.
118    */
119   protected final DTD dtd;
120 
121   /**
122   * The stack, holding the current tag context.
123   */
124   protected final LinkedList stack = new LinkedList();
125 
126   /**
127    * Creates a new tag stack, using the given DTD.
128    * @param a_dtd A DTD, providing the information about the valid
129    * tag content.
130    */
htmlValidator(DTD a_dtd)131   public htmlValidator(DTD a_dtd)
132   {
133     dtd = a_dtd;
134   }
135 
136   /**
137    * Close all opened tags (called at the end of parsing).
138    */
closeAll()139   public void closeAll()
140   {
141     hTag h;
142     while (!stack.isEmpty())
143       {
144         h = (hTag) stack.getLast();
145         if (!h.forcibly_closed && !h.element.omitEnd())
146           s_error("Unclosed <" + h.tag + ">, closing at the end of stream");
147 
148         handleSupposedEndTag(h.element);
149 
150         closeTag(h.tgElement);
151       }
152   }
153 
154   /**
155    * Remove the given tag from the stack or (if found) from the list
156    * of the forcibly closed tags.
157    */
closeTag(TagElement tElement)158   public boolean closeTag(TagElement tElement)
159   {
160     HTML.Tag tag = tElement.getHTMLTag();
161     hTag x;
162     hTag close;
163 
164     if (!stack.isEmpty())
165       {
166         ListIterator iter = stack.listIterator(stack.size());
167 
168         while (iter.hasPrevious())
169           {
170             x = (hTag) iter.previous();
171             if (tag.equals(x.tag))
172               {
173                 if (x.forcibly_closed && !x.element.omitEnd())
174                   s_error("The tag <" + x.tag +
175                           "> has already been forcibly closed"
176                          );
177 
178 
179                 // If the tag has a content model defined, forcibly close all
180                 // tags that were opened after the tag being currently closed.
181                 closing:
182                 if (x.element.content != null)
183                   {
184                     iter = stack.listIterator(stack.size());
185                     while (iter.hasPrevious())
186                       {
187                         close = (hTag) iter.previous();
188                         if (close == x)
189                           break closing;
190                         handleSupposedEndTag(close.element);
191                         iter.remove();
192                       }
193                   }
194 
195                 stack.remove(x);
196                 return true;
197               }
198           }
199       }
200     s_error("Closing unopened <" + tag + ">");
201     return false;
202   }
203 
204   /**
205    * Add the given HTML tag to the stack of the opened tags. Forcibly closes
206    * all tags in the stack that does not allow this tag in they content (error
207    * is reported).
208    * @param element
209    */
openTag(TagElement tElement, htmlAttributeSet parameters)210   public void openTag(TagElement tElement, htmlAttributeSet parameters)
211   {
212     // If this is a fictional call, the message from the parser
213     // has recursively returned - ignore.
214     if (tElement.fictional())
215       return;
216 
217     validateParameters(tElement, parameters);
218 
219     // If the stack is empty, start from HTML
220     if (stack.isEmpty() && tElement.getHTMLTag() != HTML.Tag.HTML)
221       {
222         Element html = dtd.getElement(HTML.Tag.HTML.toString());
223         openFictionalTag(html);
224       }
225 
226     Object v = tagIsValidForContext(tElement);
227     if (v != Boolean.TRUE)
228       {
229         // The tag is not valid for context, the content
230         // model suggest to open another tag.
231         if (v instanceof Element)
232           {
233             int n = 0;
234             while (v instanceof Element && (n++ < 100))
235               {
236                 Element fe = (Element) v;
237 
238                 // notify the content model that we add the proposed tag
239                 node ccm = getCurrentContentModel();
240                 if (ccm != null)
241                   ccm.show(fe);
242                 openFictionalTag(fe);
243 
244                 Object vv = tagIsValidForContext(tElement);
245                 if (vv instanceof Element) // One level of nesting is supported.
246                   {
247                     openFictionalTag((Element) vv);
248 
249                     Object vx = tagIsValidForContext(tElement);
250                     if (vx instanceof Element)
251                       openFictionalTag((Element) vx);
252                   }
253                 else if (vv == Boolean.FALSE)
254                   {
255                     // The tag is still not valid for the current
256                     // content after opening a fictional element.
257                     if (fe.omitEnd())
258                       {
259                         // close the previously opened fictional tag.
260                         closeLast();
261                         vv = tagIsValidForContext(tElement);
262                         if (vv instanceof Element)
263 
264                           // another tag was suggested by the content model
265                           openFictionalTag((Element) vv);
266                       }
267                   }
268                 v = tagIsValidForContext(tElement);
269               }
270           }
271         else // If the current element has the optional end tag, close it.
272           {
273             if (!stack.isEmpty())
274               {
275                 closing:
276                 do
277                   {
278                     hTag last = (hTag) stack.getLast();
279                     if (last.element.omitEnd())
280                       {
281                         closeLast();
282                         v = tagIsValidForContext(tElement);
283                         if (v instanceof Element) // another tag was suggested by the content model
284                           {
285                             openFictionalTag((Element) v);
286                             break closing;
287                           }
288                       }
289                     else
290                       break closing;
291                   }
292                 while (v == Boolean.FALSE && !stack.isEmpty());
293               }
294           }
295       }
296 
297     stack.add(new hTag(tElement));
298   }
299 
300   /**
301    * Clear the stack.
302    */
restart()303   public void restart()
304   {
305     stack.clear();
306   }
307 
308   /**
309    * Check if this tag is valid for the current context. Return Boolean.True if
310    * it is OK, Boolean.False if it is surely not OK or the Element that the
311    * content model recommends to insert making the situation ok. If Boolean.True
312    * is returned, the content model current position is moved forward. Otherwise
313    * this position remains the same.
314    *
315    * @param tElement
316    * @return
317    */
tagIsValidForContext(TagElement tElement)318   public Object tagIsValidForContext(TagElement tElement)
319   {
320     // Check the current content model, if one is available.
321     node cv = getCurrentContentModel();
322 
323     if (cv != null)
324       return cv.show(tElement.getElement());
325 
326     // Check exclusions and inclusions.
327     ListIterator iter = stack.listIterator(stack.size());
328     hTag t = null;
329     final int idx = tElement.getElement().index;
330 
331     // Check only known tags.
332     if (idx >= 0)
333       {
334         BitSet inclusions = new BitSet();
335         while (iter.hasPrevious())
336           {
337             t = (hTag) iter.previous();
338             if (! t.forcibly_closed)
339               {
340                 if (t.element.exclusions != null
341                     && t.element.exclusions.get(idx))
342                   return Boolean.FALSE;
343 
344                 if (t.element.inclusions != null)
345                   inclusions.or(t.element.inclusions);
346               }
347           }
348         if (! inclusions.get(idx))
349           {
350             // If we need to insert something, and cannot do this, but
351             // it is allowed to insert the paragraph here, insert the
352             // paragraph.
353             Element P = dtd.getElement(HTML_401F.P);
354             if (inclusions.get(P.index))
355               return P;
356             else
357               return Boolean.FALSE;
358           }
359       }
360     return Boolean.TRUE;
361   }
362 
363   /**
364    * Validate tag without storing in into the tag stack. This is called
365    * for the empty tags and results the subsequent calls to the openTag
366    * and closeTag.
367    */
validateTag(TagElement tElement, htmlAttributeSet parameters)368   public void validateTag(TagElement tElement, htmlAttributeSet parameters)
369   {
370     openTag(tElement, parameters);
371     closeTag(tElement);
372   }
373 
374   /**
375    * Check for mandatory elements, subsequent to the last tag:
376    * @param tElement The element that will be inserted next.
377    */
checkContentModel(TagElement tElement, boolean first)378   protected void checkContentModel(TagElement tElement, boolean first)
379   {
380     if (stack.isEmpty())
381       return;
382 
383     hTag last = (hTag) stack.getLast();
384     if (last.validationTrace == null)
385       return;
386 
387     Object r = last.validationTrace.show(tElement.getElement());
388     if (r == Boolean.FALSE)
389       s_error("The <" + last.element + "> does not match the content model " +
390               last.validationTrace
391              );
392     else if (r instanceof Element) // The content model recommends insertion of this element
393       {
394         if (!first)
395           closeTag(last.tgElement);
396         handleSupposedStartTag((Element) r);
397         openTag(new TagElement((Element) r), null);
398       }
399   }
400 
401   /**
402    * The method is called when the tag must be closed because
403    * it does not allow the subsequent elements inside its context
404    * or the end of stream has been reached. The parser is only
405    * informed if the element being closed does not require the
406    * end tag (the "omitEnd" flag is set).
407    * The closing message must be passed to the parser mechanism
408    * before passing message about the opening the next tag.
409    *
410    * @param element The tag being fictionally (forcibly) closed.
411    */
handleSupposedEndTag(Element element)412   protected abstract void handleSupposedEndTag(Element element);
413 
414   /**
415    * The method is called when the validator decides to open the
416    * tag on its own initiative. This may happen if the content model
417    * includes the element with the optional (supposed) start tag.
418    *
419    * @param element The tag being opened.
420    */
handleSupposedStartTag(Element element)421   protected abstract void handleSupposedStartTag(Element element);
422 
423   /**
424    * Handles the error message. This method must be overridden to pass
425    * the message where required.
426    * @param msg The message text.
427    */
s_error(String msg)428   protected abstract void s_error(String msg);
429 
430   /**
431    * Validate the parameters, report the error if the given parameter is
432    * not in the parameter set, valid for the given attribute. The information
433    * about the valid parameter set is taken from the Element, enclosed
434    * inside the tag. The method does not validate the default parameters.
435    * @param tag The tag
436    * @param parameters The parameters of this tag.
437    */
validateParameters(TagElement tag, htmlAttributeSet parameters)438   protected void validateParameters(TagElement tag, htmlAttributeSet parameters)
439   {
440     if (parameters == null ||
441         parameters == htmlAttributeSet.EMPTY_HTML_ATTRIBUTE_SET ||
442         parameters == SimpleAttributeSet.EMPTY
443        )
444       return;
445 
446     Enumeration enumeration = parameters.getAttributeNames();
447 
448     while (enumeration.hasMoreElements())
449       {
450         validateAttribute(tag, parameters, enumeration);
451       }
452 
453     // Check for missing required values.
454     AttributeList a = tag.getElement().getAttributes();
455 
456     while (a != null)
457       {
458         if (a.getModifier() == DTDConstants.REQUIRED)
459           if (parameters.getAttribute(a.getName()) == null)
460             {
461               s_error("Missing required attribute '" + a.getName() + "' for <" +
462                       tag.getHTMLTag() + ">"
463                      );
464             }
465         a = a.next;
466       }
467   }
468 
getCurrentContentModel()469   private node getCurrentContentModel()
470   {
471     if (!stack.isEmpty())
472       {
473         hTag last = (hTag) stack.getLast();
474         return last.validationTrace;
475       }
476     else
477       return null;
478   }
479 
closeLast()480   private void closeLast()
481   {
482     handleSupposedEndTag(((hTag) stack.getLast()).element);
483     stack.removeLast();
484   }
485 
openFictionalTag(Element e)486   private void openFictionalTag(Element e)
487   {
488     handleSupposedStartTag(e);
489     stack.add(new hTag(new TagElement(e, true)));
490     if (!e.omitStart())
491       s_error("<" + e + "> is expected (supposing it)");
492   }
493 
validateAttribute(TagElement tag, htmlAttributeSet parameters, Enumeration enumeration )494   private void validateAttribute(TagElement tag, htmlAttributeSet parameters,
495                                  Enumeration enumeration
496                                 )
497   {
498     Object foundAttribute;
499     AttributeList dtdAttribute;
500     foundAttribute = enumeration.nextElement();
501     dtdAttribute = tag.getElement().getAttribute(foundAttribute.toString());
502     if (dtdAttribute == null)
503       {
504         CPStringBuilder valid =
505           new CPStringBuilder("The tag <" + tag.getHTMLTag() +
506                               "> cannot contain the attribute '" + foundAttribute +
507                               "'. The valid attributes for this tag are: "
508                               );
509 
510         AttributeList a = tag.getElement().getAttributes();
511 
512         while (a != null)
513           {
514             valid.append(a.name.toUpperCase());
515             valid.append(' ');
516             a = a.next;
517           }
518         s_error(valid.toString());
519       }
520 
521     else
522       {
523         String value = parameters.getAttribute(foundAttribute).toString();
524 
525         if (dtdAttribute.type == DTDConstants.NUMBER)
526           validateNumberAttribute(tag, foundAttribute, value);
527 
528         if (dtdAttribute.type == DTDConstants.NAME ||
529             dtdAttribute.type == DTDConstants.ID
530            )
531           validateNameOrIdAttribute(tag, foundAttribute, value);
532 
533         if (dtdAttribute.values != null)
534           validateAttributeWithValueList(tag, foundAttribute, dtdAttribute,
535                                          value
536                                         );
537       }
538   }
539 
validateAttributeWithValueList(TagElement tag, Object foundAttribute, AttributeList dtdAttribute, String value )540   private void validateAttributeWithValueList(TagElement tag,
541                                               Object foundAttribute,
542                                               AttributeList dtdAttribute,
543                                               String value
544                                              )
545   {
546     if (!dtdAttribute.values.contains(value.toLowerCase()) &&
547         !dtdAttribute.values.contains(value.toUpperCase())
548        )
549       {
550         CPStringBuilder valid;
551         if (dtdAttribute.values.size() == 1)
552           valid =
553             new CPStringBuilder("The attribute '" + foundAttribute +
554                                 "' of the tag <" + tag.getHTMLTag() +
555                                 "> cannot have the value '" + value +
556                                 "'. The only valid value is "
557                                 );
558         else
559           valid =
560             new CPStringBuilder("The attribute '" + foundAttribute +
561                                 "' of the tag <" + tag.getHTMLTag() +
562                                 "> cannot have the value '" + value + "'. The " +
563                                 dtdAttribute.values.size() +
564                                 " valid values are: "
565                                 );
566 
567         Enumeration vv = dtdAttribute.values.elements();
568         while (vv.hasMoreElements())
569           {
570             valid.append('"');
571             valid.append(vv.nextElement());
572             valid.append("\"  ");
573           }
574         s_error(valid.toString());
575       }
576   }
577 
validateNameOrIdAttribute(TagElement tag, Object foundAttribute, String value )578   private void validateNameOrIdAttribute(TagElement tag, Object foundAttribute,
579                                          String value
580                                         )
581   {
582     boolean ok = true;
583 
584     if (!Character.isLetter(value.charAt(0)))
585       ok = false;
586 
587     char c;
588     for (int i = 0; i < value.length(); i++)
589       {
590         c = value.charAt(i);
591         if (!(
592               Character.isLetter(c) || Character.isDigit(c) ||
593               "".indexOf(c) >= 0
594             )
595            )
596           ok = false;
597       }
598     if (!ok)
599       s_error("The '" + foundAttribute + "' attribute of the tag <" +
600               tag.getHTMLTag() + "> must start from letter and consist of " +
601               "letters, digits, hypens, colons, underscores and periods. " +
602               "It cannot be '" + value + "'"
603              );
604   }
605 
validateNumberAttribute(TagElement tag, Object foundAttribute, String value )606   private void validateNumberAttribute(TagElement tag, Object foundAttribute,
607                                        String value
608                                       )
609   {
610     try
611       {
612         Integer.parseInt(value);
613       }
614     catch (NumberFormatException ex)
615       {
616         s_error("The '" + foundAttribute + "' attribute of the tag <" +
617                 tag.getHTMLTag() + "> must be a valid number and not '" +
618                 value + "'"
619                );
620       }
621   }
622 }
623