1 /* access.c -- carry out accessibility checks
2 
3   Copyright University of Toronto
4   Portions (c) 1998-2009 (W3C) MIT, ERCIM, Keio University
5   See tidy.h for the copyright notice.
6 
7 */
8 
9 
10 #include "tidy-int.h"
11 #include "access.h"
12 #include "message.h"
13 #include "tags.h"
14 #include "attrs.h"
15 #include "tmbstr.h"
16 
17 
18 /*
19     The accessibility checks to perform depending on user's desire.
20 
21     1. priority 1
22     2. priority 1 & 2
23     3. priority 1, 2, & 3
24 */
25 
26 /* List of possible image types */
27 static const ctmbstr imageExtensions[] =
28 {".jpg", ".gif", ".tif", ".pct", ".pic", ".iff", ".dib",
29  ".tga", ".pcx", ".png", ".jpeg", ".tiff", ".bmp"};
30 
31 #define N_IMAGE_EXTS (sizeof(imageExtensions)/sizeof(ctmbstr))
32 
33 /* List of possible sound file types */
34 static const ctmbstr soundExtensions[] =
35 {".wav", ".au", ".aiff", ".snd", ".ra", ".rm"};
36 
37 static const int soundExtErrCodes[] =
38 {
39     AUDIO_MISSING_TEXT_WAV,
40     AUDIO_MISSING_TEXT_AU,
41     AUDIO_MISSING_TEXT_AIFF,
42     AUDIO_MISSING_TEXT_SND,
43     AUDIO_MISSING_TEXT_RA,
44     AUDIO_MISSING_TEXT_RM
45 };
46 
47 #define N_AUDIO_EXTS (sizeof(soundExtensions)/sizeof(ctmbstr))
48 
49 /* List of possible media extensions */
50 static const ctmbstr mediaExtensions[] =
51 {".mpg", ".mov", ".asx", ".avi", ".ivf", ".m1v", ".mmm", ".mp2v",
52  ".mpa", ".mpe", ".mpeg", ".ram", ".smi", ".smil", ".swf",
53  ".wm", ".wma", ".wmv"};
54 
55 #define N_MEDIA_EXTS (sizeof(mediaExtensions)/sizeof(ctmbstr))
56 
57 /* List of possible frame sources */
58 static const ctmbstr frameExtensions[] =
59 {".htm", ".html", ".shtm", ".shtml", ".cfm", ".cfml",
60 ".asp", ".cgi", ".pl", ".smil"};
61 
62 #define N_FRAME_EXTS (sizeof(frameExtensions)/sizeof(ctmbstr))
63 
64 /* List of possible colour values */
65 static const int colorValues[][3] =
66 {
67   {240, 248, 255 },
68   {250, 235, 215 },
69   {0, 255, 255 },
70   {127, 255, 212 },
71   {240, 255, 255 },
72   {245, 245, 220 },
73   {255, 228, 196 },
74   {0, 0, 0 },
75   {255, 235, 205 },
76   {0, 0, 255 },
77   {138, 43, 226 },
78   {165, 42, 42 },
79   {222, 184, 135 },
80   {95, 158, 160 },
81   {127, 255, 0 },
82   {210, 105, 30 },
83   {255, 127, 80 },
84   {100, 149, 237 },
85   {255, 248, 220 },
86   {220, 20, 60 },
87   {0, 255, 255 },
88   {0, 0, 139 },
89   {0, 139, 139 },
90   {184, 134, 11 },
91   {169, 169, 169 },
92   {0, 100, 0 },
93   {169, 169, 169 },
94   {189, 183, 107 },
95   {139, 0, 139 },
96   {85, 107, 47 },
97   {255, 140, 0 },
98   {153, 50, 204 },
99   {139, 0, 0 },
100   {233, 150, 122 },
101   {143, 188, 143 },
102   {72, 61, 139 },
103   {47, 79, 79 },
104   {47, 79, 79 },
105   {0, 206, 209 },
106   {148, 0, 211 },
107   {255, 20, 147 },
108   {0, 191, 255 },
109   {105, 105, 105 },
110   {105, 105, 105 },
111   {30, 144, 255 },
112   {178, 34, 34 },
113   {255, 250, 240 },
114   {34, 139, 34 },
115   {255, 0, 255 },
116   {220, 220, 220 },
117   {248, 248, 255 },
118   {255, 215, 0 },
119   {218, 165, 32 },
120   {128, 128, 128 },
121   {0, 128, 0 },
122   {173, 255, 47 },
123   {128, 128, 128 },
124   {240, 255, 240 },
125   {255, 105, 180 },
126   {205, 92, 92 },
127   {75, 0, 130 },
128   {255, 255, 240 },
129   {240, 230, 140 },
130   {230, 230, 250 },
131   {255, 240, 245 },
132   {124, 252, 0 },
133   {255, 250, 205 },
134   {173, 216, 230 },
135   {240, 128, 128 },
136   {224, 255, 255 },
137   {250, 250, 210 },
138   {211, 211, 211 },
139   {144, 238, 144 },
140   {211, 211, 211 },
141   {255, 182, 193 },
142   {255, 160, 122 },
143   {32, 178, 170 },
144   {135, 206, 250 },
145   {119, 136, 153 },
146   {119, 136, 153 },
147   {176, 196, 222 },
148   {255, 255, 224 },
149   {0, 255, 0 },
150   {50, 205, 50 },
151   {250, 240, 230 },
152   {255, 0, 255 },
153   {128, 0, 0 },
154   {102, 205, 170 },
155   {0, 0, 205 },
156   {186, 85, 211 },
157   {147, 112, 219 },
158   {60, 179, 113 },
159   {123, 104, 238 },
160   {0, 250, 154 },
161   {72, 209, 204 },
162   {199, 21, 133 },
163   {25, 25, 112 },
164   {245, 255, 250 },
165   {255, 228, 225 },
166   {255, 228, 181 },
167   {255, 222, 173 },
168   {0, 0, 128 },
169   {253, 245, 230 },
170   {128, 128, 0 },
171   {107, 142, 35 },
172   {255, 165, 0 },
173   {255, 69, 0 },
174   {218, 112, 214 },
175   {238, 232, 170 },
176   {152, 251, 152 },
177   {175, 238, 238 },
178   {219, 112, 147 },
179   {255, 239, 213 },
180   {255, 218, 185 },
181   {205, 133, 63 },
182   {255, 192, 203 },
183   {221, 160, 221 },
184   {176, 224, 230 },
185   {128, 0, 128 },
186   {102, 51, 153 },
187   {255, 0, 0 },
188   {188, 143, 143 },
189   {65, 105, 225 },
190   {139, 69, 19 },
191   {250, 128, 114 },
192   {244, 164, 96 },
193   {46, 139, 87 },
194   {255, 245, 238 },
195   {160, 82, 45 },
196   {192, 192, 192 },
197   {135, 206, 235 },
198   {106, 90, 205 },
199   {112, 128, 144 },
200   {112, 128, 144 },
201   {255, 250, 250 },
202   {0, 255, 127 },
203   {70, 130, 180 },
204   {210, 180, 140 },
205   {0, 128, 128 },
206   {216, 191, 216 },
207   {255, 99, 71 },
208   {64, 224, 208 },
209   {238, 130, 238 },
210   {245, 222, 179 },
211   {255, 255, 255 },
212   {245, 245, 245 },
213   {255, 255, 0 },
214   {154, 205, 50 }
215 };
216 
217 #define N_COLOR_VALS (sizeof(colorValues)/(sizeof(int[3]))
218 
219 /* These arrays are used to convert color names to their RGB values */
220 static const ctmbstr colorNames[] =
221 {
222   "aliceblue",
223   "antiquewhite",
224   "aqua",
225   "aquamarine",
226   "azure",
227   "beige",
228   "bisque",
229   "black",
230   "blanchedalmond",
231   "blue",
232   "blueviolet",
233   "brown",
234   "burlywood",
235   "cadetblue",
236   "chartreuse",
237   "chocolate",
238   "coral",
239   "cornflowerblue",
240   "cornsilk",
241   "crimson",
242   "cyan",
243   "darkblue",
244   "darkcyan",
245   "darkgoldenrod",
246   "darkgray",
247   "darkgreen",
248   "darkgrey",
249   "darkkhaki",
250   "darkmagenta",
251   "darkolivegreen",
252   "darkorange",
253   "darkorchid",
254   "darkred",
255   "darksalmon",
256   "darkseagreen",
257   "darkslateblue",
258   "darkslategray",
259   "darkslategrey",
260   "darkturquoise",
261   "darkviolet",
262   "deeppink",
263   "deepskyblue",
264   "dimgray",
265   "dimgrey",
266   "dodgerblue",
267   "firebrick",
268   "floralwhite",
269   "forestgreen",
270   "fuchsia",
271   "gainsboro",
272   "ghostwhite",
273   "gold",
274   "goldenrod",
275   "gray",
276   "green",
277   "greenyellow",
278   "grey",
279   "honeydew",
280   "hotpink",
281   "indianred",
282   "indigo",
283   "ivory",
284   "khaki",
285   "lavender",
286   "lavenderblush",
287   "lawngreen",
288   "lemonchiffon",
289   "lightblue",
290   "lightcoral",
291   "lightcyan",
292   "lightgoldenrodyellow",
293   "lightgray",
294   "lightgreen",
295   "lightgrey",
296   "lightpink",
297   "lightsalmon",
298   "lightseagreen",
299   "lightskyblue",
300   "lightslategray",
301   "lightslategrey",
302   "lightsteelblue",
303   "lightyellow",
304   "lime",
305   "limegreen",
306   "linen",
307   "magenta",
308   "maroon",
309   "mediumaquamarine",
310   "mediumblue",
311   "mediumorchid",
312   "mediumpurple",
313   "mediumseagreen",
314   "mediumslateblue",
315   "mediumspringgreen",
316   "mediumturquoise",
317   "mediumvioletred",
318   "midnightblue",
319   "mintcream",
320   "mistyrose",
321   "moccasin",
322   "navajowhite",
323   "navy",
324   "oldlace",
325   "olive",
326   "olivedrab",
327   "orange",
328   "orangered",
329   "orchid",
330   "palegoldenrod",
331   "palegreen",
332   "paleturquoise",
333   "palevioletred",
334   "papayawhip",
335   "peachpuff",
336   "peru",
337   "pink",
338   "plum",
339   "powderblue",
340   "purple",
341   "rebeccapurple",
342   "red",
343   "rosybrown",
344   "royalblue",
345   "saddlebrown",
346   "salmon",
347   "sandybrown",
348   "seagreen",
349   "seashell",
350   "sienna",
351   "silver",
352   "skyblue",
353   "slateblue",
354   "slategray",
355   "slategrey",
356   "snow",
357   "springgreen",
358   "steelblue",
359   "tan",
360   "teal",
361   "thistle",
362   "tomato",
363   "turquoise",
364   "violet",
365   "wheat",
366   "white",
367   "whitesmoke",
368   "yellow",
369   "yellowgreen",
370 };
371 
372 #define N_COLOR_NAMES (sizeof(colorNames)/sizeof(ctmbstr))
373 #define N_COLORS N_COLOR_NAMES
374 
375 
376 /* function prototypes */
377 static void InitAccessibilityChecks( TidyDocImpl* doc, int level123 );
378 static void FreeAccessibilityChecks( TidyDocImpl* doc );
379 
380 static Bool GetRgb( ctmbstr color, int rgb[3] );
381 static Bool CompareColors( const int rgbBG[3], const int rgbFG[3] );
382 static int  ctox( tmbchar ch );
383 
384 /*
385 static void CheckMapAccess( TidyDocImpl* doc, Node* node, Node* front);
386 static void GetMapLinks( TidyDocImpl* doc, Node* node, Node* front);
387 static void CompareAnchorLinks( TidyDocImpl* doc, Node* front, int counter);
388 static void FindMissingLinks( TidyDocImpl* doc, Node* node, int counter);
389 */
390 static void CheckFormControls( TidyDocImpl* doc, Node* node );
391 static void MetaDataPresent( TidyDocImpl* doc, Node* node );
392 static void CheckEmbed( TidyDocImpl* doc, Node* node );
393 static void CheckListUsage( TidyDocImpl* doc, Node* node );
394 
395 /*
396     IsFilePath attempts to determine whether or not the URI indicated
397     by path is a file rather than a TLD. For example, sample.com.au might
398     be confused with an audio file.
399 */
IsFilePath(ctmbstr path)400 static Bool IsFilePath( ctmbstr path )
401 {
402     const char *p = path;
403     char c;
404     typedef enum states { initial, protocol_found, slash_found, file_found } states;
405     states state = initial;
406 
407     while ( ( c = *p++ ) != 0 && state != file_found )
408     {
409         switch ( state )
410         {
411             case initial:
412                 if ( c == ':' )
413                     state = protocol_found;
414                 break;
415 
416             case protocol_found:
417                 if ( c =='/' )
418                     state = slash_found;
419                 break;
420 
421             case slash_found:
422                 if ( c =='/' )
423                     state = protocol_found;
424                 else
425                     state = file_found;
426                 break;
427 
428             default:
429                 break;
430         }
431 
432     }
433 
434     return state == file_found || state == initial;
435 }
436 
437 
438 /*
439     GetFileExtension takes a path and returns the extension
440     portion of the path (if any).
441 */
442 
GetFileExtension(ctmbstr path,tmbchar * ext,uint maxExt)443 static void GetFileExtension( ctmbstr path, tmbchar *ext, uint maxExt )
444 {
445     int i = TY_(tmbstrlen)(path) - 1;
446 
447     ext[0] = '\0';
448 
449     do {
450         if ( path[i] == '/' || path[i] == '\\' )
451             break;
452         else if ( path[i] == '.' )
453         {
454             TY_(tmbstrncpy)( ext, path+i, maxExt );
455             break;
456         }
457     } while ( --i > 0 );
458 }
459 
460 /************************************************************************
461 * IsImage
462 *
463 * Checks if the given filename is an image file.
464 * Returns 'yes' if it is, 'no' if it's not.
465 ************************************************************************/
466 
IsImage(ctmbstr iType)467 static Bool IsImage( ctmbstr iType )
468 {
469     uint i;
470     tmbchar ext[20];
471 
472     if ( !IsFilePath(iType) ) return 0;
473 
474     GetFileExtension( iType, ext, sizeof(ext) );
475 
476     /* Compare it to the array of known image file extensions */
477     for (i = 0; i < N_IMAGE_EXTS; i++)
478     {
479         if ( TY_(tmbstrcasecmp)(ext, imageExtensions[i]) == 0 )
480             return yes;
481     }
482 
483     return no;
484 }
485 
486 
487 /***********************************************************************
488 * IsSoundFile
489 *
490 * Checks if the given filename is a sound file.
491 * Returns 'yes' if it is, 'no' if it's not.
492 ***********************************************************************/
493 
IsSoundFile(ctmbstr sType)494 static int IsSoundFile( ctmbstr sType )
495 {
496     uint i;
497     tmbchar ext[ 20 ];
498 
499     if ( !IsFilePath(sType) ) return 0;
500 
501     GetFileExtension( sType, ext, sizeof(ext) );
502 
503     for (i = 0; i < N_AUDIO_EXTS; i++)
504     {
505         if ( TY_(tmbstrcasecmp)(ext, soundExtensions[i]) == 0 )
506             return soundExtErrCodes[i];
507     }
508     return 0;
509 }
510 
511 
512 /***********************************************************************
513 * IsValidSrcExtension
514 *
515 * Checks if the 'SRC' value within the FRAME element is valid
516 * The 'SRC' extension must end in ".htm", ".html", ".shtm", ".shtml",
517 * ".cfm", ".cfml", ".asp", ".cgi", ".pl", or ".smil"
518 *
519 * Returns yes if it is, returns no otherwise.
520 ***********************************************************************/
521 
IsValidSrcExtension(ctmbstr sType)522 static Bool IsValidSrcExtension( ctmbstr sType )
523 {
524     uint i;
525     tmbchar ext[20];
526 
527     if ( !IsFilePath(sType) ) return 0;
528 
529     GetFileExtension( sType, ext, sizeof(ext) );
530 
531     for (i = 0; i < N_FRAME_EXTS; i++)
532     {
533         if ( TY_(tmbstrcasecmp)(ext, frameExtensions[i]) == 0 )
534             return yes;
535     }
536     return no;
537 }
538 
539 
540 /*********************************************************************
541 * IsValidMediaExtension
542 *
543 * Checks to warn the user that syncronized text equivalents are
544 * required if multimedia is used.
545 *********************************************************************/
546 
IsValidMediaExtension(ctmbstr sType)547 static Bool IsValidMediaExtension( ctmbstr sType )
548 {
549     uint i;
550     tmbchar ext[20];
551 
552     if ( !IsFilePath(sType) ) return 0;
553 
554     GetFileExtension( sType, ext, sizeof(ext) );
555 
556     for (i = 0; i < N_MEDIA_EXTS; i++)
557     {
558         if ( TY_(tmbstrcasecmp)(ext, mediaExtensions[i]) == 0 )
559             return yes;
560     }
561     return no;
562 }
563 
564 
565 /************************************************************************
566 * IsWhitespace
567 *
568 * Checks if the given string is all whitespace.
569 * Returns 'yes' if it is, 'no' if it's not.
570 ************************************************************************/
571 
IsWhitespace(ctmbstr pString)572 static Bool IsWhitespace( ctmbstr pString )
573 {
574     Bool isWht = yes;
575     ctmbstr cp;
576 
577     for ( cp = pString; isWht && cp && *cp; ++cp )
578     {
579         isWht = TY_(IsWhite)( *cp );
580     }
581     return isWht;
582 }
583 
hasValue(AttVal * av)584 static Bool hasValue( AttVal* av )
585 {
586     return ( av && ! IsWhitespace(av->value) );
587 }
588 
589 /***********************************************************************
590 * IsPlaceholderAlt
591 *
592 * Checks to see if there is an image and photo place holder contained
593 * in the ALT text.
594 *
595 * Returns 'yes' if there is, 'no' if not.
596 ***********************************************************************/
597 
IsPlaceholderAlt(ctmbstr txt)598 static Bool IsPlaceholderAlt( ctmbstr txt )
599 {
600     return ( strstr(txt, "image") != NULL ||
601              strstr(txt, "photo") != NULL );
602 }
603 
604 
605 /***********************************************************************
606 * IsPlaceholderTitle
607 *
608 * Checks to see if there is an TITLE place holder contained
609 * in the 'ALT' text.
610 *
611 * Returns 'yes' if there is, 'no' if not.
612 
613 static Bool IsPlaceHolderTitle( ctmbstr txt )
614 {
615     return ( strstr(txt, "title") != NULL );
616 }
617 ***********************************************************************/
618 
619 
620 /***********************************************************************
621 * IsPlaceHolderObject
622 *
623 * Checks to see if there is an OBJECT place holder contained
624 * in the 'ALT' text.
625 *
626 * Returns 'yes' if there is, 'no' if not.
627 ***********************************************************************/
628 
IsPlaceHolderObject(ctmbstr txt)629 static Bool IsPlaceHolderObject( ctmbstr txt )
630 {
631     return ( strstr(txt, "object") != NULL );
632 }
633 
634 
635 /**********************************************************
636 * EndsWithBytes
637 *
638 * Checks to see if the ALT text ends with 'bytes'
639 * Returns 'yes', if true, 'no' otherwise.
640 **********************************************************/
641 
EndsWithBytes(ctmbstr txt)642 static Bool EndsWithBytes( ctmbstr txt )
643 {
644     uint len = TY_(tmbstrlen)( txt );
645     return ( len >= 5 && TY_(tmbstrcmp)(txt+len-5, "bytes") == 0 );
646 }
647 
648 
649 /*******************************************************
650 * textFromOneNode
651 *
652 * Returns a list of characters contained within one
653 * text node.
654 *******************************************************/
655 
textFromOneNode(TidyDocImpl * doc,Node * node)656 static ctmbstr textFromOneNode( TidyDocImpl* doc, Node* node )
657 {
658     uint i;
659     uint x = 0;
660     tmbstr txt = doc->access.text;
661 
662     if ( node )
663     {
664         /* Copy contents of a text node */
665         for (i = node->start; i < node->end; ++i, ++x )
666         {
667             txt[x] = doc->lexer->lexbuf[i];
668 
669             /* Check buffer overflow */
670             if ( x >= sizeof(doc->access.text)-1 )
671                 break;
672         }
673     }
674 
675     txt[x] = '\0';
676     return txt;
677 }
678 
679 
680 /*********************************************************
681 * getTextNode
682 *
683 * Locates text nodes within a container element.
684 * Retrieves text that are found contained within
685 * text nodes, and concatenates the text.
686 *********************************************************/
687 
getTextNode(TidyDocImpl * doc,Node * node)688 static void getTextNode( TidyDocImpl* doc, Node* node )
689 {
690     tmbstr txtnod = doc->access.textNode;
691 
692     /*
693        Continues to traverse through container element until it no
694        longer contains any more contents
695     */
696 
697     /* If the tag of the node is NULL, then grab the text within the node */
698     if ( TY_(nodeIsText)(node) )
699     {
700         uint i;
701 
702         /* Retrieves each character found within the text node */
703         for (i = node->start; i < node->end; i++)
704         {
705             /* The text must not exceed buffer */
706             if ( doc->access.counter >= TEXTBUF_SIZE-1 )
707                 return;
708 
709             txtnod[ doc->access.counter++ ] = doc->lexer->lexbuf[i];
710         }
711 
712         /* Traverses through the contents within a container element */
713         for ( node = node->content; node != NULL; node = node->next )
714             getTextNode( doc, node );
715     }
716 }
717 
718 
719 /**********************************************************
720 * getTextNodeClear
721 *
722 * Clears the current 'textNode' and reloads it with new
723 * text.  The textNode must be cleared before use.
724 **********************************************************/
725 
getTextNodeClear(TidyDocImpl * doc,Node * node)726 static tmbstr getTextNodeClear( TidyDocImpl* doc, Node* node )
727 {
728     /* Clears list */
729     TidyClearMemory( doc->access.textNode, TEXTBUF_SIZE );
730     doc->access.counter = 0;
731 
732     getTextNode( doc, node->content );
733     return doc->access.textNode;
734 }
735 
736 /**********************************************************
737 * LevelX_Enabled
738 *
739 * Tell whether access "X" is enabled.
740 **********************************************************/
741 
Level1_Enabled(TidyDocImpl * doc)742 static Bool Level1_Enabled( TidyDocImpl* doc )
743 {
744    return doc->access.PRIORITYCHK == 1 ||
745           doc->access.PRIORITYCHK == 2 ||
746           doc->access.PRIORITYCHK == 3;
747 }
Level2_Enabled(TidyDocImpl * doc)748 static Bool Level2_Enabled( TidyDocImpl* doc )
749 {
750     return doc->access.PRIORITYCHK == 2 ||
751            doc->access.PRIORITYCHK == 3;
752 }
Level3_Enabled(TidyDocImpl * doc)753 static Bool Level3_Enabled( TidyDocImpl* doc )
754 {
755     return doc->access.PRIORITYCHK == 3;
756 }
757 
758 /********************************************************
759 * CheckColorAvailable
760 *
761 * Verify that information conveyed with color is
762 * available without color.
763 ********************************************************/
764 
CheckColorAvailable(TidyDocImpl * doc,Node * node)765 static void CheckColorAvailable( TidyDocImpl* doc, Node* node )
766 {
767     if (Level1_Enabled( doc ))
768     {
769         if ( nodeIsIMG(node) )
770             TY_(ReportAccessError)( doc, node, INFORMATION_NOT_CONVEYED_IMAGE );
771 
772         else if ( nodeIsAPPLET(node) )
773             TY_(ReportAccessError)( doc, node, INFORMATION_NOT_CONVEYED_APPLET );
774 
775         else if ( nodeIsOBJECT(node) )
776             TY_(ReportAccessError)( doc, node, INFORMATION_NOT_CONVEYED_OBJECT );
777 
778         else if ( nodeIsSCRIPT(node) )
779             TY_(ReportAccessError)( doc, node, INFORMATION_NOT_CONVEYED_SCRIPT );
780 
781         else if ( nodeIsINPUT(node) )
782             TY_(ReportAccessError)( doc, node, INFORMATION_NOT_CONVEYED_INPUT );
783     }
784 }
785 
786 /*********************************************************************
787 * CheckColorContrast
788 *
789 * Checks elements for color contrast.  Must have valid contrast for
790 * valid visibility.
791 *
792 * This logic is extremely fragile as it does not recognize
793 * the fact that color is inherited by many components and
794 * that BG and FG colors are often set separately.  E.g. the
795 * background color may be set by for the body or a table
796 * or a cell.  The foreground color may be set by any text
797 * element (p, h1, h2, input, textarea), either explicitly
798 * or by style.  Ergo, this test will not handle most real
799 * world cases.  It's a start, however.
800 *********************************************************************/
801 
CheckColorContrast(TidyDocImpl * doc,Node * node)802 static void CheckColorContrast( TidyDocImpl* doc, Node* node )
803 {
804     int rgbBG[3] = {255,255,255};   /* Black text on white BG */
805 
806     if (Level3_Enabled( doc ))
807     {
808         Bool gotBG = yes;
809         AttVal* av;
810 
811         /* Check for 'BGCOLOR' first to compare with other color attributes */
812         for ( av = node->attributes; av; av = av->next )
813         {
814             if ( attrIsBGCOLOR(av) )
815             {
816                 if ( hasValue(av) )
817                     gotBG = GetRgb( av->value, rgbBG );
818             }
819         }
820 
821         /*
822            Search for COLOR attributes to compare with background color
823            Must have valid colour contrast
824         */
825         for ( av = node->attributes; gotBG && av != NULL; av = av->next )
826         {
827             uint errcode = 0;
828             if ( attrIsTEXT(av) )
829                 errcode = COLOR_CONTRAST_TEXT;
830             else if ( attrIsLINK(av) )
831                 errcode = COLOR_CONTRAST_LINK;
832             else if ( attrIsALINK(av) )
833                 errcode = COLOR_CONTRAST_ACTIVE_LINK;
834             else if ( attrIsVLINK(av) )
835                 errcode = COLOR_CONTRAST_VISITED_LINK;
836 
837             if ( errcode && hasValue(av) )
838             {
839                 int rgbFG[3] = {0, 0, 0};  /* Black text */
840 
841                 if ( GetRgb(av->value, rgbFG) &&
842                      !CompareColors(rgbBG, rgbFG) )
843                 {
844                     TY_(ReportAccessError)( doc, node, errcode );
845                 }
846             }
847         }
848     }
849 }
850 
851 
852 /**************************************************************
853 * CompareColors
854 *
855 * Compares two RGB colors for good contrast.
856 **************************************************************/
minmax(int i1,int i2)857 static int minmax( int i1, int i2 )
858 {
859    return MAX(i1, i2) - MIN(i1,i2);
860 }
brightness(const int rgb[3])861 static int brightness( const int rgb[3] )
862 {
863    return ((rgb[0]*299) + (rgb[1]*587) + (rgb[2]*114)) / 1000;
864 }
865 
CompareColors(const int rgbBG[3],const int rgbFG[3])866 static Bool CompareColors( const int rgbBG[3], const int rgbFG[3] )
867 {
868     int brightBG = brightness( rgbBG );
869     int brightFG = brightness( rgbFG );
870 
871     int diffBright = minmax( brightBG, brightFG );
872 
873     int diffColor = minmax( rgbBG[0], rgbFG[0] )
874                   + minmax( rgbBG[1], rgbFG[1] )
875                   + minmax( rgbBG[2], rgbFG[2] );
876 
877     return ( diffBright > 180 &&
878              diffColor > 500 );
879 }
880 
881 
882 /*********************************************************************
883 * GetRgb
884 *
885 * Gets the red, green and blue values for this attribute for the
886 * background.
887 *
888 * Example: If attribute is BGCOLOR="#121005" then red = 18, green = 16,
889 * blue = 5.
890 *********************************************************************/
891 
GetRgb(ctmbstr color,int rgb[])892 static Bool GetRgb( ctmbstr color, int rgb[] )
893 {
894     uint x;
895 
896     /* Check if we have a color name */
897     for (x = 0; x < N_COLORS; x++)
898     {
899         if ( strstr(colorNames[x], color) != NULL )
900         {
901             rgb[0] = colorValues[x][0];
902             rgb[1] = colorValues[x][1];
903             rgb[2] = colorValues[x][2];
904             return yes;
905         }
906     }
907 
908     /*
909        No color name so must be hex values
910        Is this a number in hexadecimal format?
911     */
912 
913     /* Must be 7 characters in the RGB value (including '#') */
914     if ( TY_(tmbstrlen)(color) == 7 && color[0] == '#' )
915     {
916         rgb[0] = (ctox(color[1]) * 16) + ctox(color[2]);
917         rgb[1] = (ctox(color[3]) * 16) + ctox(color[4]);
918         rgb[2] = (ctox(color[5]) * 16) + ctox(color[6]);
919         return yes;
920     }
921     return no;
922 }
923 
924 
925 
926 /*******************************************************************
927 * ctox
928 *
929 * Converts a character to a number.
930 * Example: if given character is 'A' then returns 10.
931 *
932 * Returns the number that the character represents. Returns -1 if not a
933 * valid number.
934 *******************************************************************/
935 
ctox(tmbchar ch)936 static int ctox( tmbchar ch )
937 {
938     if ( ch >= '0' && ch <= '9' )
939     {
940          return ch - '0';
941     }
942     else if ( ch >= 'a' && ch <= 'f' )
943     {
944         return ch - 'a' + 10;
945     }
946     else if ( ch >= 'A' && ch <= 'F' )
947     {
948         return ch - 'A' + 10;
949     }
950     return -1;
951 }
952 
953 
954 /***********************************************************
955 * CheckImage
956 *
957 * Checks all image attributes for specific elements to
958 * check for validity of the values contained within
959 * the attributes.  An appropriate warning message is displayed
960 * to indicate the error.
961 ***********************************************************/
962 
CheckImage(TidyDocImpl * doc,Node * node)963 static void CheckImage( TidyDocImpl* doc, Node* node )
964 {
965     Bool HasAlt = no;
966     Bool HasIsMap = no;
967     Bool HasLongDesc = no;
968     Bool HasDLINK = no;
969     Bool HasValidHeight = no;
970     Bool HasValidWidthBullet = no;
971     Bool HasValidWidthHR = no;
972     Bool HasTriggeredMissingLongDesc = no;
973 
974     AttVal* av;
975 
976     if (Level1_Enabled( doc ))
977     {
978         /* Checks all image attributes for invalid values within attributes */
979         for (av = node->attributes; av != NULL; av = av->next)
980         {
981             /*
982                Checks for valid ALT attribute.
983                The length of the alt text must be less than 150 characters
984                long.
985             */
986             if ( attrIsALT(av) )
987             {
988                 if (av->value != NULL)
989                 {
990                     if ((TY_(tmbstrlen)(av->value) < 150) &&
991                         (IsPlaceholderAlt (av->value) == no) &&
992                         (IsPlaceHolderObject (av->value) == no) &&
993                         (EndsWithBytes (av->value) == no) &&
994                         (IsImage (av->value) == no))
995                     {
996                         HasAlt = yes;
997                     }
998 
999                     else if (TY_(tmbstrlen)(av->value) > 150)
1000                     {
1001                         HasAlt = yes;
1002                         TY_(ReportAccessError)( doc, node, IMG_ALT_SUSPICIOUS_TOO_LONG );
1003                     }
1004 
1005                     else if (IsImage (av->value) == yes)
1006                     {
1007                         HasAlt = yes;
1008                         TY_(ReportAccessError)( doc, node, IMG_ALT_SUSPICIOUS_FILENAME);
1009                     }
1010 
1011                     else if (IsPlaceholderAlt (av->value) == yes)
1012                     {
1013                         HasAlt = yes;
1014                         TY_(ReportAccessError)( doc, node, IMG_ALT_SUSPICIOUS_PLACEHOLDER);
1015                     }
1016 
1017                     else if (EndsWithBytes (av->value) == yes)
1018                     {
1019                         HasAlt = yes;
1020                         TY_(ReportAccessError)( doc, node, IMG_ALT_SUSPICIOUS_FILE_SIZE);
1021                     }
1022                 }
1023             }
1024 
1025             /*
1026                Checks for width values of 'bullets' and 'horizontal
1027                rules' for validity.
1028 
1029                Valid pixel width for 'bullets' must be < 30, and > 150 for
1030                horizontal rules.
1031             */
1032             else if ( attrIsWIDTH(av) )
1033             {
1034                 /* Longdesc attribute needed if width attribute is not present. */
1035                 if ( hasValue(av) )
1036                 {
1037                     int width = atoi( av->value );
1038                     if ( width < 30 )
1039                         HasValidWidthBullet = yes;
1040 
1041                     if ( width > 150 )
1042                         HasValidWidthHR = yes;
1043                 }
1044             }
1045 
1046             /*
1047                Checks for height values of 'bullets' and horizontal
1048                rules for validity.
1049 
1050                Valid pixel height for 'bullets' and horizontal rules
1051                mustt be < 30.
1052             */
1053             else if ( attrIsHEIGHT(av) )
1054             {
1055                 /* Longdesc attribute needed if height attribute not present. */
1056                 if ( hasValue(av) && atoi(av->value) < 30 )
1057                     HasValidHeight = yes;
1058             }
1059 
1060             /*
1061                Checks for longdesc and determines validity.
1062                The length of the 'longdesc' must be > 1
1063             */
1064             else if ( attrIsLONGDESC(av) )
1065             {
1066                 if ( hasValue(av) && TY_(tmbstrlen)(av->value) > 1 )
1067                     HasLongDesc = yes;
1068               }
1069 
1070             /*
1071                Checks for 'USEMAP' attribute.  Ensures that
1072                text links are provided for client-side image maps
1073             */
1074             else if ( attrIsUSEMAP(av) )
1075             {
1076                 if ( hasValue(av) )
1077                     doc->access.HasUseMap = yes;
1078             }
1079 
1080             else if ( attrIsISMAP(av) )
1081             {
1082                 HasIsMap = yes;
1083             }
1084         }
1085 
1086 
1087         /*
1088             Check to see if a dLINK is present.  The ANCHOR element must
1089             be present following the IMG element.  The text found between
1090             the ANCHOR tags must be < 6 characters long, and must contain
1091             the letter 'd'.
1092         */
1093         if ( nodeIsA(node->next) )
1094         {
1095             node = node->next;
1096 
1097             /*
1098                 Node following the anchor must be a text node
1099                 for dLINK to exist
1100             */
1101 
1102             if (node->content != NULL && (node->content)->tag == NULL)
1103             {
1104                 /* Number of characters found within the text node */
1105                 ctmbstr word = textFromOneNode( doc, node->content);
1106 
1107                 if ((TY_(tmbstrcmp)(word,"d") == 0)||
1108                     (TY_(tmbstrcmp)(word,"D") == 0))
1109                 {
1110                     HasDLINK = yes;
1111                 }
1112             }
1113         }
1114 
1115         /*
1116             Special case check for dLINK.  This will occur if there is
1117             whitespace between the <img> and <a> elements.  Ignores
1118             whitespace and continues check for dLINK.
1119         */
1120 
1121         if ( node->next && !node->next->tag )
1122         {
1123             node = node->next;
1124 
1125             if ( nodeIsA(node->next) )
1126             {
1127                 node = node->next;
1128 
1129                 /*
1130                     Node following the ANCHOR must be a text node
1131                     for dLINK to exist
1132                 */
1133                 if (node->content != NULL && node->content->tag == NULL)
1134                 {
1135                     /* Number of characters found within the text node */
1136                     ctmbstr word = textFromOneNode( doc, node->content );
1137 
1138                     if ((TY_(tmbstrcmp)(word, "d") == 0)||
1139                         (TY_(tmbstrcmp)(word, "D") == 0))
1140                     {
1141                         HasDLINK = yes;
1142                     }
1143                 }
1144             }
1145         }
1146 
1147         if ((HasAlt == no)&&
1148             (HasValidWidthBullet == yes)&&
1149             (HasValidHeight == yes))
1150         {
1151         }
1152 
1153         if ((HasAlt == no)&&
1154             (HasValidWidthHR == yes)&&
1155             (HasValidHeight == yes))
1156         {
1157         }
1158 
1159         if (HasAlt == no)
1160         {
1161             TY_(ReportAccessError)( doc, node, IMG_MISSING_ALT);
1162         }
1163 
1164         if ((HasLongDesc == no)&&
1165             (HasValidHeight ==yes)&&
1166             ((HasValidWidthHR == yes)||
1167              (HasValidWidthBullet == yes)))
1168         {
1169             HasTriggeredMissingLongDesc = yes;
1170         }
1171 
1172         if (HasTriggeredMissingLongDesc == no)
1173         {
1174             if ((HasDLINK == yes)&&
1175                 (HasLongDesc == no))
1176             {
1177                 TY_(ReportAccessError)( doc, node, IMG_MISSING_LONGDESC);
1178             }
1179 
1180             if ((HasLongDesc == yes)&&
1181                 (HasDLINK == no))
1182             {
1183                 TY_(ReportAccessError)( doc, node, IMG_MISSING_DLINK);
1184             }
1185 
1186             if ((HasLongDesc == no)&&
1187                 (HasDLINK == no))
1188             {
1189                 TY_(ReportAccessError)( doc, node, IMG_MISSING_LONGDESC_DLINK);
1190             }
1191         }
1192 
1193         if (HasIsMap == yes)
1194         {
1195             TY_(ReportAccessError)( doc, node, IMAGE_MAP_SERVER_SIDE_REQUIRES_CONVERSION);
1196 
1197             TY_(ReportAccessError)( doc, node, IMG_MAP_SERVER_REQUIRES_TEXT_LINKS);
1198         }
1199     }
1200 }
1201 
1202 
1203 /***********************************************************
1204 * CheckApplet
1205 *
1206 * Checks APPLET element to check for validity pertaining
1207 * the 'ALT' attribute.  An appropriate warning message is
1208 * displayed  to indicate the error. An appropriate warning
1209 * message is displayed to indicate the error.  If no 'ALT'
1210 * text is present, then there must be alternate content
1211 * within the APPLET element.
1212 ***********************************************************/
1213 
CheckApplet(TidyDocImpl * doc,Node * node)1214 static void CheckApplet( TidyDocImpl* doc, Node* node )
1215 {
1216     Bool HasAlt = no;
1217     Bool HasDescription = no;
1218 
1219     AttVal* av;
1220 
1221     if (Level1_Enabled( doc ))
1222     {
1223         /* Checks for attributes within the APPLET element */
1224         for (av = node->attributes; av != NULL; av = av->next)
1225         {
1226             /*
1227                Checks for valid ALT attribute.
1228                The length of the alt text must be > 4 characters in length
1229                but must be < 150 characters long.
1230             */
1231 
1232             if ( attrIsALT(av) )
1233             {
1234                 if (av->value != NULL)
1235                 {
1236                     HasAlt = yes;
1237                 }
1238             }
1239         }
1240 
1241         if (HasAlt == no)
1242         {
1243             /* Must have alternate text representation for that element */
1244             if (node->content != NULL)
1245             {
1246                 ctmbstr word = NULL;
1247 
1248                 if ( node->content->tag == NULL )
1249                     word = textFromOneNode( doc, node->content);
1250 
1251                 if ( node->content->content != NULL &&
1252                      node->content->content->tag == NULL )
1253                 {
1254                     word = textFromOneNode( doc, node->content->content);
1255                 }
1256 
1257                 if ( word != NULL && !IsWhitespace(word) )
1258                     HasDescription = yes;
1259             }
1260         }
1261 
1262         if ( !HasDescription && !HasAlt )
1263         {
1264             TY_(ReportAccessError)( doc, node, APPLET_MISSING_ALT );
1265         }
1266     }
1267 }
1268 
1269 
1270 /*******************************************************************
1271 * CheckObject
1272 *
1273 * Checks to verify whether the OBJECT element contains
1274 * 'ALT' text, and to see that the sound file selected is
1275 * of a valid sound file type.  OBJECT must have an alternate text
1276 * representation.
1277 *******************************************************************/
1278 
CheckObject(TidyDocImpl * doc,Node * node)1279 static void CheckObject( TidyDocImpl* doc, Node* node )
1280 {
1281     Bool HasAlt = no;
1282     Bool HasDescription = no;
1283 
1284     if (Level1_Enabled( doc ))
1285     {
1286         if ( node->content != NULL)
1287         {
1288             if ( node->content->type != TextNode )
1289             {
1290                 Node* tnode = node->content;
1291                 AttVal* av;
1292 
1293                 for ( av=tnode->attributes; av; av = av->next )
1294                 {
1295                     if ( attrIsALT(av) )
1296                     {
1297                         HasAlt = yes;
1298                         break;
1299                     }
1300                 }
1301             }
1302 
1303             /* Must have alternate text representation for that element */
1304             if ( !HasAlt )
1305             {
1306                 ctmbstr word = NULL;
1307 
1308                 if ( TY_(nodeIsText)(node->content) )
1309                     word = textFromOneNode( doc, node->content );
1310 
1311                 if ( word == NULL &&
1312                      TY_(nodeIsText)(node->content->content) )
1313                 {
1314                     word = textFromOneNode( doc, node->content->content );
1315                 }
1316 
1317                 if ( word != NULL && !IsWhitespace(word) )
1318                     HasDescription = yes;
1319             }
1320         }
1321 
1322         if ( !HasAlt && !HasDescription )
1323         {
1324             TY_(ReportAccessError)( doc, node, OBJECT_MISSING_ALT );
1325         }
1326     }
1327 }
1328 
1329 
1330 /***************************************************************
1331 * CheckMissingStyleSheets
1332 *
1333 * Ensures that stylesheets are used to control the presentation.
1334 ***************************************************************/
1335 
CheckMissingStyleSheets(TidyDocImpl * doc,Node * node)1336 static Bool CheckMissingStyleSheets( TidyDocImpl* doc, Node* node )
1337 {
1338     AttVal* av;
1339     Node* content;
1340     Bool sspresent = no;
1341 
1342     for ( content = node->content;
1343           !sspresent && content != NULL;
1344           content = content->next )
1345     {
1346         sspresent = ( nodeIsLINK(content)  ||
1347                       nodeIsSTYLE(content) ||
1348                       nodeIsFONT(content)  ||
1349                       nodeIsBASEFONT(content) );
1350 
1351         for ( av = content->attributes;
1352               !sspresent && av != NULL;
1353               av = av->next )
1354         {
1355             sspresent = ( attrIsSTYLE(av) || attrIsTEXT(av)  ||
1356                           attrIsVLINK(av) || attrIsALINK(av) ||
1357                           attrIsLINK(av) );
1358 
1359             if ( !sspresent && attrIsREL(av) )
1360             {
1361                 sspresent = AttrValueIs(av, "stylesheet");
1362             }
1363         }
1364 
1365         if ( ! sspresent )
1366             sspresent = CheckMissingStyleSheets( doc, content );
1367     }
1368     return sspresent;
1369 }
1370 
1371 
1372 /*******************************************************************
1373 * CheckFrame
1374 *
1375 * Checks if the URL is valid and to check if a 'LONGDESC' is needed
1376 * within the FRAME element.  If a 'LONGDESC' is needed, the value must
1377 * be valid. The URL must end with the file extension, htm, or html.
1378 * Also, checks to ensure that the 'SRC' and 'TITLE' values are valid.
1379 *******************************************************************/
1380 
CheckFrame(TidyDocImpl * doc,Node * node)1381 static void CheckFrame( TidyDocImpl* doc, Node* node )
1382 {
1383     Bool HasTitle = no;
1384     AttVal* av;
1385 
1386     doc->access.numFrames++;
1387 
1388     if (Level1_Enabled( doc ))
1389     {
1390         /* Checks for attributes within the FRAME element */
1391         for (av = node->attributes; av != NULL; av = av->next)
1392         {
1393             /* Checks if 'LONGDESC' value is valid only if present */
1394             if ( attrIsLONGDESC(av) )
1395             {
1396                 if ( hasValue(av) && TY_(tmbstrlen)(av->value) > 1 )
1397                 {
1398                     doc->access.HasCheckedLongDesc++;
1399                 }
1400             }
1401 
1402             /* Checks for valid 'SRC' value within the frame element */
1403             else if ( attrIsSRC(av) )
1404             {
1405                 if ( hasValue(av) && !IsValidSrcExtension(av->value) )
1406                 {
1407                     TY_(ReportAccessError)( doc, node, FRAME_SRC_INVALID );
1408                 }
1409             }
1410 
1411             /* Checks for valid 'TITLE' value within frame element */
1412             else if ( attrIsTITLE(av) )
1413             {
1414                 if ( hasValue(av) )
1415                     HasTitle = yes;
1416 
1417                 if ( !HasTitle )
1418                 {
1419                     if ( av->value == NULL || TY_(tmbstrlen)(av->value) == 0 )
1420                     {
1421                         HasTitle = yes;
1422                         TY_(ReportAccessError)( doc, node, FRAME_TITLE_INVALID_NULL);
1423                     }
1424                     else
1425                     {
1426                         if ( IsWhitespace(av->value) && TY_(tmbstrlen)(av->value) > 0 )
1427                         {
1428                             HasTitle = yes;
1429                             TY_(ReportAccessError)( doc, node, FRAME_TITLE_INVALID_SPACES );
1430                         }
1431                     }
1432                 }
1433             }
1434         }
1435 
1436         if ( !HasTitle )
1437         {
1438             TY_(ReportAccessError)( doc, node, FRAME_MISSING_TITLE);
1439         }
1440 
1441         if ( doc->access.numFrames==3 && doc->access.HasCheckedLongDesc<3 )
1442         {
1443             doc->access.numFrames = 0;
1444             TY_(ReportAccessError)( doc, node, FRAME_MISSING_LONGDESC );
1445         }
1446     }
1447 }
1448 
1449 
1450 /****************************************************************
1451 * CheckIFrame
1452 *
1453 * Checks if 'SRC' value is valid.  Must end in appropriate
1454 * file extension.
1455 ****************************************************************/
1456 
CheckIFrame(TidyDocImpl * doc,Node * node)1457 static void CheckIFrame( TidyDocImpl* doc, Node* node )
1458 {
1459     if (Level1_Enabled( doc ))
1460     {
1461         /* Checks for valid 'SRC' value within the IFRAME element */
1462         AttVal* av = attrGetSRC( node );
1463         if ( hasValue(av) )
1464         {
1465             if ( !IsValidSrcExtension(av->value) )
1466                 TY_(ReportAccessError)( doc, node, FRAME_SRC_INVALID );
1467         }
1468     }
1469 }
1470 
1471 
1472 /**********************************************************************
1473 * CheckAnchorAccess
1474 *
1475 * Checks that the sound file is valid, and to ensure that
1476 * text transcript is present describing the 'HREF' within the
1477 * ANCHOR element.  Also checks to see ensure that the 'TARGET' attribute
1478 * (if it exists) is not NULL and does not contain '_new' or '_blank'.
1479 **********************************************************************/
1480 
CheckAnchorAccess(TidyDocImpl * doc,Node * node)1481 static void CheckAnchorAccess( TidyDocImpl* doc, Node* node )
1482 {
1483     AttVal* av;
1484     Bool HasDescription = no;
1485     Bool HasTriggeredLink = no;
1486 
1487     /* Checks for attributes within the ANCHOR element */
1488     for ( av = node->attributes; av != NULL; av = av->next )
1489     {
1490         if (Level1_Enabled( doc ))
1491         {
1492             /* Must be of valid sound file type */
1493             if ( attrIsHREF(av) )
1494             {
1495                 if ( hasValue(av) )
1496                 {
1497                     tmbchar ext[ 20 ];
1498                     GetFileExtension (av->value, ext, sizeof(ext) );
1499 
1500                     /* Checks to see if multimedia is used */
1501                     if ( IsValidMediaExtension(av->value) )
1502                     {
1503                         TY_(ReportAccessError)( doc, node, MULTIMEDIA_REQUIRES_TEXT );
1504                     }
1505 
1506                     /*
1507                         Checks for validity of sound file, and checks to see if
1508                         the file is described within the document, or by a link
1509                         that is present which gives the description.
1510                     */
1511                     if ( TY_(tmbstrlen)(ext) < 6 && TY_(tmbstrlen)(ext) > 0 )
1512                     {
1513                         int errcode = IsSoundFile( av->value );
1514                         if ( errcode )
1515                         {
1516                             if (node->next != NULL)
1517                             {
1518                                 if (node->next->tag == NULL)
1519                                 {
1520                                     ctmbstr word = textFromOneNode( doc, node->next);
1521 
1522                                     /* Must contain at least one letter in the text */
1523                                     if (IsWhitespace (word) == no)
1524                                     {
1525                                         HasDescription = yes;
1526                                     }
1527                                 }
1528                             }
1529 
1530                             /* Must contain text description of sound file */
1531                             if ( !HasDescription )
1532                             {
1533                                 TY_(ReportAccessError)( doc, node, errcode );
1534                             }
1535                         }
1536                     }
1537                 }
1538             }
1539         }
1540 
1541         if (Level2_Enabled( doc ))
1542         {
1543             /* Checks 'TARGET' attribute for validity if it exists */
1544             if ( attrIsTARGET(av) )
1545             {
1546                 if (AttrValueIs(av, "_new"))
1547                 {
1548                     TY_(ReportAccessError)( doc, node, NEW_WINDOWS_REQUIRE_WARNING_NEW);
1549                 }
1550                 else if (AttrValueIs(av, "_blank"))
1551                 {
1552                     TY_(ReportAccessError)( doc, node, NEW_WINDOWS_REQUIRE_WARNING_BLANK);
1553                 }
1554             }
1555         }
1556     }
1557 
1558     if (Level2_Enabled( doc ))
1559     {
1560         if ((node->content != NULL)&&
1561             (node->content->tag == NULL))
1562         {
1563             ctmbstr word = textFromOneNode( doc, node->content);
1564 
1565             if ((word != NULL)&&
1566                 (IsWhitespace (word) == no))
1567             {
1568                 if (TY_(tmbstrcmp) (word, "more") == 0)
1569                 {
1570                     HasTriggeredLink = yes;
1571                 }
1572 
1573                 if (TY_(tmbstrcmp) (word, "click here") == 0)
1574                 {
1575                     TY_(ReportAccessError)( doc, node, LINK_TEXT_NOT_MEANINGFUL_CLICK_HERE);
1576                 }
1577 
1578                 if (HasTriggeredLink == no)
1579                 {
1580                     if (TY_(tmbstrlen)(word) < 6)
1581                     {
1582                         TY_(ReportAccessError)( doc, node, LINK_TEXT_NOT_MEANINGFUL);
1583                     }
1584                 }
1585 
1586                 if (TY_(tmbstrlen)(word) > 60)
1587                 {
1588                     TY_(ReportAccessError)( doc, node, LINK_TEXT_TOO_LONG);
1589                 }
1590 
1591             }
1592         }
1593 
1594         if (node->content == NULL)
1595         {
1596             TY_(ReportAccessError)( doc, node, LINK_TEXT_MISSING);
1597         }
1598     }
1599 }
1600 
1601 
1602 /************************************************************
1603 * CheckArea
1604 *
1605 * Checks attributes within the AREA element to
1606 * determine if the 'ALT' text and 'HREF' values are valid.
1607 * Also checks to see ensure that the 'TARGET' attribute
1608 * (if it exists) is not NULL and does not contain '_new'
1609 * or '_blank'.
1610 ************************************************************/
1611 
CheckArea(TidyDocImpl * doc,Node * node)1612 static void CheckArea( TidyDocImpl* doc, Node* node )
1613 {
1614     Bool HasAlt = no;
1615     AttVal* av;
1616 
1617     /* Checks all attributes within the AREA element */
1618     for (av = node->attributes; av != NULL; av = av->next)
1619     {
1620         if (Level1_Enabled( doc ))
1621         {
1622             /*
1623               Checks for valid ALT attribute.
1624               The length of the alt text must be > 4 characters long
1625               but must be less than 150 characters long.
1626             */
1627 
1628             if ( attrIsALT(av) )
1629             {
1630                 /* The check for validity */
1631                 if (av->value != NULL)
1632                 {
1633                     HasAlt = yes;
1634                 }
1635             }
1636         }
1637 
1638         if (Level2_Enabled( doc ))
1639         {
1640             if ( attrIsTARGET(av) )
1641             {
1642                 if (AttrValueIs(av, "_new"))
1643                 {
1644                     TY_(ReportAccessError)( doc, node, NEW_WINDOWS_REQUIRE_WARNING_NEW);
1645                 }
1646                 else if (AttrValueIs(av, "_blank"))
1647                 {
1648                     TY_(ReportAccessError)( doc, node, NEW_WINDOWS_REQUIRE_WARNING_BLANK);
1649                 }
1650             }
1651         }
1652     }
1653 
1654     if (Level1_Enabled( doc ))
1655     {
1656         /* AREA must contain alt text */
1657         if (HasAlt == no)
1658         {
1659             TY_(ReportAccessError)( doc, node, AREA_MISSING_ALT);
1660         }
1661     }
1662 }
1663 
1664 
1665 /***************************************************
1666 * CheckScript
1667 *
1668 * Checks the SCRIPT element to ensure that a
1669 * NOSCRIPT section follows the SCRIPT.
1670 ***************************************************/
1671 
CheckScriptAcc(TidyDocImpl * doc,Node * node)1672 static void CheckScriptAcc( TidyDocImpl* doc, Node* node )
1673 {
1674     if (Level1_Enabled( doc ))
1675     {
1676         /* NOSCRIPT element must appear immediately following SCRIPT element */
1677         if ( node->next == NULL || !nodeIsNOSCRIPT(node->next) )
1678         {
1679             TY_(ReportAccessError)( doc, node, SCRIPT_MISSING_NOSCRIPT);
1680         }
1681     }
1682 }
1683 
1684 
1685 /**********************************************************
1686 * CheckRows
1687 *
1688 * Check to see that each table has a row of headers if
1689 * a column of columns doesn't exist.
1690 **********************************************************/
1691 
CheckRows(TidyDocImpl * doc,Node * node)1692 static void CheckRows( TidyDocImpl* doc, Node* node )
1693 {
1694     int numTR = 0;
1695     int numValidTH = 0;
1696 
1697     doc->access.CheckedHeaders++;
1698 
1699     for (; node != NULL; node = node->next )
1700     {
1701         numTR++;
1702         if ( nodeIsTH(node->content) )
1703         {
1704             doc->access.HasTH = yes;
1705             if ( TY_(nodeIsText)(node->content->content) )
1706             {
1707                 ctmbstr word = textFromOneNode( doc, node->content->content);
1708                 if ( !IsWhitespace(word) )
1709                     numValidTH++;
1710             }
1711         }
1712     }
1713 
1714     if (numTR == numValidTH)
1715         doc->access.HasValidRowHeaders = yes;
1716 
1717     if ( numTR >= 2 &&
1718          numTR > numValidTH &&
1719          numValidTH >= 2 &&
1720          doc->access.HasTH == yes )
1721         doc->access.HasInvalidRowHeader = yes;
1722 }
1723 
1724 
1725 /**********************************************************
1726 * CheckColumns
1727 *
1728 * Check to see that each table has a column of headers if
1729 * a row of columns doesn't exist.
1730 **********************************************************/
1731 
CheckColumns(TidyDocImpl * doc,Node * node)1732 static void CheckColumns( TidyDocImpl* doc, Node* node )
1733 {
1734     Node* tnode;
1735     int numTH = 0;
1736     Bool isMissingHeader = no;
1737 
1738     doc->access.CheckedHeaders++;
1739 
1740     /* Table must have row of headers if headers for columns don't exist */
1741     if ( nodeIsTH(node->content) )
1742     {
1743         doc->access.HasTH = yes;
1744 
1745         for ( tnode = node->content; tnode; tnode = tnode->next )
1746         {
1747             if ( nodeIsTH(tnode) )
1748             {
1749                 if ( TY_(nodeIsText)(tnode->content) )
1750                 {
1751                     ctmbstr word = textFromOneNode( doc, tnode->content);
1752                     if ( !IsWhitespace(word) )
1753                         numTH++;
1754                 }
1755             }
1756             else
1757             {
1758                 isMissingHeader = yes;
1759             }
1760         }
1761     }
1762 
1763     if ( !isMissingHeader && numTH > 0 )
1764         doc->access.HasValidColumnHeaders = yes;
1765 
1766     if ( isMissingHeader && numTH >= 2 )
1767         doc->access.HasInvalidColumnHeader = yes;
1768 }
1769 
1770 
1771 /*****************************************************
1772 * CheckTH
1773 *
1774 * Checks to see if the header provided for a table
1775 * requires an abbreviation. (only required if the
1776 * length of the header is greater than 15 characters)
1777 *****************************************************/
1778 
CheckTH(TidyDocImpl * doc,Node * node)1779 static void CheckTH( TidyDocImpl* doc, Node* node )
1780 {
1781     Bool HasAbbr = no;
1782     ctmbstr word = NULL;
1783     AttVal* av;
1784 
1785     if (Level3_Enabled( doc ))
1786     {
1787         /* Checks TH element for 'ABBR' attribute */
1788         for (av = node->attributes; av != NULL; av = av->next)
1789         {
1790             if ( attrIsABBR(av) )
1791             {
1792                 /* Value must not be NULL and must be less than 15 characters */
1793                 if ((av->value != NULL)&&
1794                     (IsWhitespace (av->value) == no))
1795                 {
1796                     HasAbbr = yes;
1797                 }
1798 
1799                 if ((av->value == NULL)||
1800                     (TY_(tmbstrlen)(av->value) == 0))
1801                 {
1802                     HasAbbr = yes;
1803                     TY_(ReportAccessError)( doc, node, TABLE_MAY_REQUIRE_HEADER_ABBR_NULL);
1804                 }
1805 
1806                 if ((IsWhitespace (av->value) == yes)&&
1807                     (TY_(tmbstrlen)(av->value) > 0))
1808                 {
1809                     HasAbbr = yes;
1810                     TY_(ReportAccessError)( doc, node, TABLE_MAY_REQUIRE_HEADER_ABBR_SPACES);
1811                 }
1812             }
1813         }
1814 
1815         /* If the header is greater than 15 characters, an abbreviation is needed */
1816         word = textFromOneNode( doc, node->content);
1817 
1818         if ((word != NULL)&&
1819             (IsWhitespace (word) == no))
1820         {
1821             /* Must have 'ABBR' attribute if header is > 15 characters */
1822             if ((TY_(tmbstrlen)(word) > 15)&&
1823                 (HasAbbr == no))
1824             {
1825                 TY_(ReportAccessError)( doc, node, TABLE_MAY_REQUIRE_HEADER_ABBR);
1826             }
1827         }
1828     }
1829 }
1830 
1831 
1832 /*****************************************************************
1833 * CheckMultiHeaders
1834 *
1835 * Layout tables should make sense when linearized.
1836 * TABLE must contain at least one TH element.
1837 * This technique applies only to tables used for layout purposes,
1838 * not to data tables. Checks for column of multiple headers.
1839 *****************************************************************/
1840 
CheckMultiHeaders(TidyDocImpl * doc,Node * node)1841 static void CheckMultiHeaders( TidyDocImpl* doc, Node* node )
1842 {
1843     Node* TNode;
1844     Node* temp;
1845 
1846     Bool validColSpanRows = yes;
1847     Bool validColSpanColumns = yes;
1848 
1849     int flag = 0;
1850 
1851     if (Level1_Enabled( doc ))
1852     {
1853         if (node->content != NULL)
1854         {
1855             TNode = node->content;
1856 
1857             /*
1858                Checks for column of multiple headers found
1859                within a data table.
1860             */
1861             while (TNode != NULL)
1862             {
1863                 if ( nodeIsTR(TNode) )
1864                 {
1865                     flag = 0; /* Issue #168 - access test 5-2-1-2 */
1866                     if (TNode->content != NULL)
1867                     {
1868                         temp = TNode->content;
1869 
1870                         /* The number of TH elements found within TR element */
1871                         if (flag == 0)
1872                         {
1873                             while (temp != NULL)
1874                             {
1875                                 /*
1876                                    Must contain at least one TH element
1877                                    within in the TR element
1878                                 */
1879                                 if ( nodeIsTH(temp) )
1880                                 {
1881                                     AttVal* av;
1882                                     for (av = temp->attributes; av != NULL; av = av->next)
1883                                     {
1884                                         if ( attrIsCOLSPAN(av)
1885                                              && (atoi(av->value) > 1) )
1886                                             validColSpanColumns = no;
1887 
1888                                         if ( attrIsROWSPAN(av)
1889                                              && (atoi(av->value) > 1) )
1890                                             validColSpanRows = no;
1891                                     }
1892                                 }
1893 
1894                                 temp = temp->next;
1895                             }
1896 
1897                             flag = 1;
1898                         }
1899                     }
1900                 }
1901 
1902                 TNode = TNode->next;
1903             }
1904 
1905             /* Displays HTML 4 Table Algorithm when multiple column of headers used */
1906             if (validColSpanRows == no)
1907             {
1908                 TY_(ReportAccessError)( doc, node, DATA_TABLE_REQUIRE_MARKUP_ROW_HEADERS );
1909                 TY_(Dialogue)( doc, TEXT_HTML_T_ALGORITHM );
1910             }
1911 
1912             if (validColSpanColumns == no)
1913             {
1914                 TY_(ReportAccessError)( doc, node, DATA_TABLE_REQUIRE_MARKUP_COLUMN_HEADERS );
1915                 TY_(Dialogue)( doc, TEXT_HTML_T_ALGORITHM );
1916             }
1917         }
1918     }
1919 }
1920 
1921 
1922 /****************************************************
1923 * CheckTable
1924 *
1925 * Checks the TABLE element to ensure that the
1926 * table is not missing any headers.  Must have either
1927 * a row or column of headers.
1928 ****************************************************/
1929 
CheckTable(TidyDocImpl * doc,Node * node)1930 static void CheckTable( TidyDocImpl* doc, Node* node )
1931 {
1932     Node* TNode;
1933     Node* temp;
1934 
1935     tmbstr word = NULL;
1936 
1937     int numTR = 0;
1938 
1939     Bool HasSummary = no;
1940     Bool HasCaption = no;
1941 
1942     if (Level3_Enabled( doc ))
1943     {
1944         AttVal* av;
1945         /* Table must have a 'SUMMARY' describing the purpose of the table */
1946         for (av = node->attributes; av != NULL; av = av->next)
1947         {
1948             if ( attrIsSUMMARY(av) )
1949             {
1950                 if ( hasValue(av) )
1951                 {
1952                     HasSummary = yes;
1953 
1954                     if (AttrContains(av, "summary") &&
1955                         AttrContains(av, "table"))
1956                     {
1957                         TY_(ReportAccessError)( doc, node, TABLE_SUMMARY_INVALID_PLACEHOLDER );
1958                     }
1959                 }
1960 
1961                 if ( av->value == NULL || TY_(tmbstrlen)(av->value) == 0 )
1962                 {
1963                     HasSummary = yes;
1964                     TY_(ReportAccessError)( doc, node, TABLE_SUMMARY_INVALID_NULL );
1965                 }
1966                 else if ( IsWhitespace(av->value) && TY_(tmbstrlen)(av->value) > 0 )
1967                 {
1968                     HasSummary = yes;
1969                     TY_(ReportAccessError)( doc, node, TABLE_SUMMARY_INVALID_SPACES );
1970                 }
1971             }
1972         }
1973 
1974         /* TABLE must have content. */
1975         if (node->content == NULL)
1976         {
1977             TY_(ReportAccessError)( doc, node, DATA_TABLE_MISSING_HEADERS);
1978 
1979             return;
1980         }
1981     }
1982 
1983     if (Level1_Enabled( doc ))
1984     {
1985         /* Checks for multiple headers */
1986         CheckMultiHeaders( doc, node );
1987     }
1988 
1989     if (Level2_Enabled( doc ))
1990     {
1991         /* Table must have a CAPTION describing the purpose of the table */
1992         if ( nodeIsCAPTION(node->content) )
1993         {
1994             TNode = node->content;
1995 
1996             if (TNode->content && TNode->content->tag == NULL)
1997             {
1998                 word = getTextNodeClear( doc, TNode);
1999             }
2000 
2001             if ( !IsWhitespace(word) )
2002             {
2003                 HasCaption = yes;
2004             }
2005         }
2006 
2007         if (HasCaption == no)
2008         {
2009             TY_(ReportAccessError)( doc, node, TABLE_MISSING_CAPTION);
2010         }
2011     }
2012 
2013 
2014     if (node->content != NULL)
2015     {
2016         if ( nodeIsCAPTION(node->content) && nodeIsTR(node->content->next) )
2017         {
2018             CheckColumns( doc, node->content->next );
2019         }
2020         else if ( nodeIsTR(node->content) )
2021         {
2022             CheckColumns( doc, node->content );
2023         }
2024     }
2025 
2026     if ( ! doc->access.HasValidColumnHeaders )
2027     {
2028         if (node->content != NULL)
2029         {
2030             if ( nodeIsCAPTION(node->content) && nodeIsTR(node->content->next) )
2031             {
2032                 CheckRows( doc, node->content->next);
2033             }
2034             else if ( nodeIsTR(node->content) )
2035             {
2036                 CheckRows( doc, node->content);
2037             }
2038         }
2039     }
2040 
2041 
2042     if (Level3_Enabled( doc ))
2043     {
2044         /* Suppress warning for missing 'SUMMARY for HTML 2.0 and HTML 3.2 */
2045         if (HasSummary == no)
2046         {
2047             TY_(ReportAccessError)( doc, node, TABLE_MISSING_SUMMARY);
2048         }
2049     }
2050 
2051     if (Level2_Enabled( doc ))
2052     {
2053         if (node->content != NULL)
2054         {
2055             temp = node->content;
2056 
2057             while (temp != NULL)
2058             {
2059                 if ( nodeIsTR(temp) )
2060                 {
2061                     numTR++;
2062                 }
2063 
2064                 temp = temp->next;
2065             }
2066 
2067             if (numTR == 1)
2068             {
2069                 TY_(ReportAccessError)( doc, node, LAYOUT_TABLES_LINEARIZE_PROPERLY);
2070             }
2071         }
2072 
2073         if ( doc->access.HasTH )
2074         {
2075             TY_(ReportAccessError)( doc, node, LAYOUT_TABLE_INVALID_MARKUP);
2076         }
2077     }
2078 
2079     if (Level1_Enabled( doc ))
2080     {
2081         if ( doc->access.CheckedHeaders == 2 )
2082         {
2083             if ( !doc->access.HasValidRowHeaders &&
2084                  !doc->access.HasValidColumnHeaders &&
2085                  !doc->access.HasInvalidRowHeader &&
2086                  !doc->access.HasInvalidColumnHeader  )
2087             {
2088                 TY_(ReportAccessError)( doc, node, DATA_TABLE_MISSING_HEADERS);
2089             }
2090 
2091             if ( !doc->access.HasValidRowHeaders &&
2092                  doc->access.HasInvalidRowHeader )
2093             {
2094                 TY_(ReportAccessError)( doc, node, DATA_TABLE_MISSING_HEADERS_ROW);
2095             }
2096 
2097             if ( !doc->access.HasValidColumnHeaders &&
2098                  doc->access.HasInvalidColumnHeader )
2099             {
2100                 TY_(ReportAccessError)( doc, node, DATA_TABLE_MISSING_HEADERS_COLUMN);
2101             }
2102         }
2103     }
2104 }
2105 
2106 
2107 /***************************************************
2108 * CheckASCII
2109 *
2110 * Checks for valid text equivalents for XMP and PRE
2111 * elements for ASCII art.  Ensures that there is
2112 * a skip over link to skip multi-lined ASCII art.
2113 ***************************************************/
2114 
CheckASCII(TidyDocImpl * doc,Node * node)2115 static void CheckASCII( TidyDocImpl* doc, Node* node )
2116 {
2117     Node* temp1;
2118     Node* temp2;
2119 
2120     tmbstr skipOver = NULL;
2121     Bool IsAscii = no;
2122     int HasSkipOverLink = 0;
2123 
2124     uint i, x;
2125     int newLines = -1;
2126     tmbchar compareLetter;
2127     int matchingCount = 0;
2128     AttVal* av;
2129 
2130     if (Level1_Enabled( doc ) && node->content)
2131     {
2132         /*
2133            Checks the text within the PRE and XMP tags to see if ascii
2134            art is present
2135         */
2136         for (i = node->content->start + 1; i < node->content->end; i++)
2137         {
2138             matchingCount = 0;
2139 
2140             /* Counts the number of lines of text */
2141             if (doc->lexer->lexbuf[i] == '\n')
2142             {
2143                 newLines++;
2144             }
2145 
2146             compareLetter = doc->lexer->lexbuf[i];
2147 
2148             /* Counts consecutive character matches */
2149             for (x = i; x < i + 5; x++)
2150             {
2151                 if (doc->lexer->lexbuf[x] == compareLetter)
2152                 {
2153                     matchingCount++;
2154                 }
2155 
2156                 else
2157                 {
2158                     break;
2159                 }
2160             }
2161 
2162             /* Must have at least 5 consecutive character matches */
2163             if (matchingCount >= 5)
2164             {
2165                 break;
2166             }
2167         }
2168 
2169         /*
2170            Must have more than 6 lines of text OR 5 or more consecutive
2171            letters that are the same for there to be ascii art
2172         */
2173         if (newLines >= 6 || matchingCount >= 5)
2174         {
2175             IsAscii = yes;
2176         }
2177 
2178         /* Checks for skip over link if ASCII art is present */
2179         if (IsAscii == yes)
2180         {
2181             if (node->prev != NULL && node->prev->prev != NULL)
2182             {
2183                 temp1 = node->prev->prev;
2184 
2185                 /* Checks for 'HREF' attribute */
2186                 for (av = temp1->attributes; av != NULL; av = av->next)
2187                 {
2188                     if ( attrIsHREF(av) && hasValue(av) )
2189                     {
2190                         skipOver = av->value;
2191                         HasSkipOverLink++;
2192                     }
2193                 }
2194             }
2195         }
2196     }
2197 
2198     if (Level2_Enabled( doc ))
2199     {
2200         /*
2201            Checks for A element following PRE to ensure proper skipover link
2202            only if there is an A element preceding PRE.
2203         */
2204         if (HasSkipOverLink == 1)
2205         {
2206             if ( nodeIsA(node->next) )
2207             {
2208                 temp2 = node->next;
2209 
2210                 /* Checks for 'NAME' attribute */
2211                 for (av = temp2->attributes; av != NULL; av = av->next)
2212                 {
2213                     if ( attrIsNAME(av) && hasValue(av) )
2214                     {
2215                         /*
2216                            Value within the 'HREF' attribute must be the same
2217                            as the value within the 'NAME' attribute for valid
2218                            skipover.
2219                         */
2220                         if ( strstr(skipOver, av->value) != NULL )
2221                         {
2222                             HasSkipOverLink++;
2223                         }
2224                     }
2225                 }
2226             }
2227         }
2228 
2229         if (IsAscii == yes)
2230         {
2231             TY_(ReportAccessError)( doc, node, ASCII_REQUIRES_DESCRIPTION);
2232             if (Level3_Enabled( doc ) && (HasSkipOverLink < 2))
2233                 TY_(ReportAccessError)( doc, node, SKIPOVER_ASCII_ART);
2234         }
2235 
2236     }
2237 }
2238 
2239 
2240 /***********************************************************
2241 * CheckFormControls
2242 *
2243 * <form> must have valid 'FOR' attribute, and <label> must
2244 * have valid 'ID' attribute for valid form control.
2245 ***********************************************************/
2246 
CheckFormControls(TidyDocImpl * doc,Node * node)2247 static void CheckFormControls( TidyDocImpl* doc, Node* node )
2248 {
2249     if ( !doc->access.HasValidFor &&
2250          doc->access.HasValidId )
2251     {
2252         TY_(ReportAccessError)( doc, node, ASSOCIATE_LABELS_EXPLICITLY_FOR);
2253     }
2254 
2255     if ( !doc->access.HasValidId &&
2256          doc->access.HasValidFor )
2257     {
2258         TY_(ReportAccessError)( doc, node, ASSOCIATE_LABELS_EXPLICITLY_ID);
2259     }
2260 
2261     if ( !doc->access.HasValidId &&
2262          !doc->access.HasValidFor )
2263     {
2264         TY_(ReportAccessError)( doc, node, ASSOCIATE_LABELS_EXPLICITLY);
2265     }
2266 }
2267 
2268 
2269 /************************************************************
2270 * CheckLabel
2271 *
2272 * Check for valid 'FOR' attribute within the LABEL element
2273 ************************************************************/
2274 
CheckLabel(TidyDocImpl * doc,Node * node)2275 static void CheckLabel( TidyDocImpl* doc, Node* node )
2276 {
2277     if (Level2_Enabled( doc ))
2278     {
2279         /* Checks for valid 'FOR' attribute */
2280         AttVal* av = attrGetFOR( node );
2281         if ( hasValue(av) )
2282             doc->access.HasValidFor = yes;
2283 
2284         if ( ++doc->access.ForID == 2 )
2285         {
2286             doc->access.ForID = 0;
2287             CheckFormControls( doc, node );
2288         }
2289     }
2290 }
2291 
2292 
2293 /************************************************************
2294 * CheckInputLabel
2295 *
2296 * Checks for valid 'ID' attribute within the INPUT element.
2297 * Checks to see if there is a LABEL directly before
2298 * or after the INPUT element determined by the 'TYPE'.
2299 * Each INPUT element must have a LABEL describing the form.
2300 ************************************************************/
2301 
CheckInputLabel(TidyDocImpl * doc,Node * node)2302 static void CheckInputLabel( TidyDocImpl* doc, Node* node )
2303 {
2304     if (Level2_Enabled( doc ))
2305     {
2306         AttVal* av;
2307 
2308         /* Checks attributes within the INPUT element */
2309         for (av = node->attributes; av != NULL; av = av->next)
2310         {
2311             /* Must have valid 'ID' value */
2312             if ( attrIsID(av) && hasValue(av) )
2313                 doc->access.HasValidId = yes;
2314         }
2315 
2316         if ( ++doc->access.ForID == 2 )
2317         {
2318             doc->access.ForID = 0;
2319             CheckFormControls( doc, node );
2320         }
2321     }
2322 }
2323 
2324 
2325 /***************************************************************
2326 * CheckInputAttributes
2327 *
2328 * INPUT element must have a valid 'ALT' attribute if the
2329 * 'VALUE' attribute is present.
2330 ***************************************************************/
2331 
CheckInputAttributes(TidyDocImpl * doc,Node * node)2332 static void CheckInputAttributes( TidyDocImpl* doc, Node* node )
2333 {
2334     Bool HasAlt = no;
2335     Bool MustHaveAlt = no;
2336     AttVal* av;
2337 
2338     /* Checks attributes within the INPUT element */
2339     for (av = node->attributes; av != NULL; av = av->next)
2340     {
2341         /* 'VALUE' must be found if the 'TYPE' is 'text' or 'checkbox' */
2342         if ( attrIsTYPE(av) && hasValue(av) )
2343         {
2344             if (Level1_Enabled( doc ))
2345             {
2346                 if (AttrValueIs(av, "image"))
2347                 {
2348                     MustHaveAlt = yes;
2349                 }
2350             }
2351 
2352         }
2353 
2354         if ( attrIsALT(av) && hasValue(av) )
2355         {
2356             HasAlt = yes;
2357         }
2358     }
2359 
2360     if ( MustHaveAlt && !HasAlt )
2361     {
2362         TY_(ReportAccessError)( doc, node, IMG_BUTTON_MISSING_ALT );
2363     }
2364 
2365 }
2366 
2367 
2368 /***************************************************************
2369 * CheckFrameSet
2370 *
2371 * Frameset must have valid NOFRAME section.  Must contain some
2372 * text but must not contain information telling user to update
2373 * browsers,
2374 ***************************************************************/
2375 
CheckFrameSet(TidyDocImpl * doc,Node * node)2376 static void CheckFrameSet( TidyDocImpl* doc, Node* node )
2377 {
2378     Node* temp;
2379     Bool HasNoFrames = no;
2380 
2381     if (Level1_Enabled( doc ))
2382     {
2383         if ( doc->badAccess & BA_INVALID_LINK_NOFRAMES )
2384         {
2385            TY_(ReportAccessError)( doc, node, NOFRAMES_INVALID_LINK);
2386            doc->badAccess &= ~BA_INVALID_LINK_NOFRAMES; /* emit only once */
2387         }
2388         for ( temp = node->content; temp != NULL ; temp = temp->next )
2389         {
2390             if ( nodeIsNOFRAMES(temp) )
2391             {
2392                 HasNoFrames = yes;
2393 
2394                 if ( temp->content && nodeIsP(temp->content->content) )
2395                 {
2396                     Node* para = temp->content->content;
2397                     if ( TY_(nodeIsText)(para->content) )
2398                     {
2399                         ctmbstr word = textFromOneNode( doc, para->content );
2400                         if ( word && strstr(word, "browser") != NULL )
2401                             TY_(ReportAccessError)( doc, para, NOFRAMES_INVALID_CONTENT );
2402                     }
2403                 }
2404                 else if (temp->content == NULL)
2405                     TY_(ReportAccessError)( doc, temp, NOFRAMES_INVALID_NO_VALUE);
2406                 else if ( temp->content &&
2407                           IsWhitespace(textFromOneNode(doc, temp->content)) )
2408                     TY_(ReportAccessError)( doc, temp, NOFRAMES_INVALID_NO_VALUE);
2409             }
2410         }
2411 
2412         if (HasNoFrames == no)
2413             TY_(ReportAccessError)( doc, node, FRAME_MISSING_NOFRAMES);
2414     }
2415 }
2416 
2417 
2418 /***********************************************************
2419 * CheckHeaderNesting
2420 *
2421 * Checks for heading increases and decreases.  Headings must
2422 * not increase by more than one header level, but may
2423 * decrease at from any level to any level.  Text within
2424 * headers must not be more than 20 words in length.
2425 ***********************************************************/
2426 
CheckHeaderNesting(TidyDocImpl * doc,Node * node)2427 static void CheckHeaderNesting( TidyDocImpl* doc, Node* node )
2428 {
2429     Node* temp;
2430     uint i;
2431     int numWords = 1;
2432 
2433     Bool IsValidIncrease = no;
2434     Bool NeedsDescription = no;
2435 
2436     if (Level2_Enabled( doc ))
2437     {
2438         /*
2439            Text within header element cannot contain more than 20 words without
2440            a separate description
2441         */
2442         if (node->content != NULL && node->content->tag == NULL)
2443         {
2444             ctmbstr word = textFromOneNode( doc, node->content);
2445 
2446             for (i = 0; i < TY_(tmbstrlen)(word); i++)
2447             {
2448                 if (word[i] == ' ')
2449                 {
2450                     numWords++;
2451                 }
2452             }
2453 
2454             if (numWords > 20)
2455             {
2456                 NeedsDescription = yes;
2457             }
2458         }
2459 
2460         /* Header following must be same level or same plus 1 for
2461         ** valid heading increase size.  E.g. H1 -> H1, H2.  H3 -> H3, H4
2462         */
2463         if ( TY_(nodeIsHeader)(node) )
2464         {
2465             uint level = TY_(nodeHeaderLevel)( node );
2466             IsValidIncrease = yes;
2467 
2468             for ( temp = node->next; temp != NULL; temp = temp->next )
2469             {
2470                 uint nested = TY_(nodeHeaderLevel)( temp );
2471                 if ( nested >= level )
2472                 {
2473                     IsValidIncrease = ( nested <= level + 1 );
2474                     break;
2475                 }
2476             }
2477         }
2478 
2479         if ( !IsValidIncrease )
2480             TY_(ReportAccessError)( doc, node, HEADERS_IMPROPERLY_NESTED );
2481 
2482         if ( NeedsDescription )
2483             TY_(ReportAccessError)( doc, node, HEADER_USED_FORMAT_TEXT );
2484     }
2485 }
2486 
2487 
2488 /*************************************************************
2489 * CheckParagraphHeader
2490 *
2491 * Checks to ensure that P elements are not headings.  Must be
2492 * greater than 10 words in length, and they must not be in bold,
2493 * or italics, or underlined, etc.
2494 *************************************************************/
2495 
CheckParagraphHeader(TidyDocImpl * doc,Node * node)2496 static void CheckParagraphHeader( TidyDocImpl* doc, Node* node )
2497 {
2498     Bool IsNotHeader = no;
2499     Node* temp;
2500 
2501     if (Level2_Enabled( doc ))
2502     {
2503         /* Cannot contain text formatting elements */
2504         if (node->content != NULL)
2505         {
2506             if (node->content->tag != NULL)
2507             {
2508                 temp = node->content;
2509 
2510                 while (temp != NULL)
2511                 {
2512                     if (temp->tag == NULL)
2513                     {
2514                         IsNotHeader = yes;
2515                         break;
2516                     }
2517 
2518                     temp = temp->next;
2519                 }
2520             }
2521 
2522             if ( !IsNotHeader )
2523             {
2524                 if ( nodeIsSTRONG(node->content) )
2525                 {
2526                     TY_(ReportAccessError)( doc, node, POTENTIAL_HEADER_BOLD);
2527                 }
2528 
2529                 if ( nodeIsU(node->content) )
2530                 {
2531                     TY_(ReportAccessError)( doc, node, POTENTIAL_HEADER_UNDERLINE);
2532                 }
2533 
2534                 if ( nodeIsEM(node->content) )
2535                 {
2536                     TY_(ReportAccessError)( doc, node, POTENTIAL_HEADER_ITALICS);
2537                 }
2538             }
2539         }
2540     }
2541 }
2542 
2543 
2544 /****************************************************************
2545 * CheckEmbed
2546 *
2547 * Checks to see if 'SRC' is a multimedia type.  Must have
2548 * syncronized captions if used.
2549 ****************************************************************/
2550 
CheckEmbed(TidyDocImpl * doc,Node * node)2551 static void CheckEmbed( TidyDocImpl* doc, Node* node )
2552 {
2553     if (Level1_Enabled( doc ))
2554     {
2555         AttVal* av = attrGetSRC( node );
2556         if ( hasValue(av) && IsValidMediaExtension(av->value) )
2557         {
2558              TY_(ReportAccessError)( doc, node, MULTIMEDIA_REQUIRES_TEXT );
2559         }
2560     }
2561 }
2562 
2563 
2564 /*********************************************************************
2565 * CheckHTMLAccess
2566 *
2567 * Checks HTML element for valid 'LANG' attribute.  Must be a valid
2568 * language.  ie. 'fr' or 'en'
2569 ********************************************************************/
2570 
CheckHTMLAccess(TidyDocImpl * doc,Node * node)2571 static void CheckHTMLAccess( TidyDocImpl* doc, Node* node )
2572 {
2573     Bool ValidLang = no;
2574 
2575     if (Level3_Enabled( doc ))
2576     {
2577         AttVal* av = attrGetLANG( node );
2578         if ( av )
2579         {
2580             ValidLang = yes;
2581             if ( !hasValue(av) )
2582                 TY_(ReportAccessError)( doc, node, LANGUAGE_INVALID );
2583         }
2584         if ( !ValidLang )
2585             TY_(ReportAccessError)( doc, node, LANGUAGE_NOT_IDENTIFIED );
2586     }
2587 }
2588 
2589 
2590 /********************************************************
2591 * CheckBlink
2592 *
2593 * Document must not contain the BLINK element.
2594 * It is invalid HTML/XHTML.
2595 *********************************************************/
2596 
CheckBlink(TidyDocImpl * doc,Node * node)2597 static void CheckBlink( TidyDocImpl* doc, Node* node )
2598 {
2599 
2600     if (Level2_Enabled( doc ))
2601     {
2602         /* Checks to see if text is found within the BLINK element. */
2603         if ( TY_(nodeIsText)(node->content) )
2604         {
2605             ctmbstr word = textFromOneNode( doc, node->content );
2606             if ( !IsWhitespace(word) )
2607             {
2608                 TY_(ReportAccessError)( doc, node, REMOVE_BLINK_MARQUEE );
2609             }
2610         }
2611     }
2612 }
2613 
2614 
2615 /********************************************************
2616 * CheckMarquee
2617 *
2618 * Document must not contain the MARQUEE element.
2619 * It is invalid HTML/XHTML.
2620 ********************************************************/
2621 
2622 
CheckMarquee(TidyDocImpl * doc,Node * node)2623 static void CheckMarquee( TidyDocImpl* doc, Node* node )
2624 {
2625     if (Level2_Enabled( doc ))
2626     {
2627         /* Checks to see if there is text in between the MARQUEE element */
2628         if ( TY_(nodeIsText)(node) )
2629         {
2630             ctmbstr word = textFromOneNode( doc, node->content);
2631             if ( !IsWhitespace(word) )
2632             {
2633                 TY_(ReportAccessError)( doc, node, REMOVE_BLINK_MARQUEE );
2634             }
2635         }
2636     }
2637 }
2638 
2639 
2640 /**********************************************************
2641 * CheckLink
2642 *
2643 * 'REL' attribute within the LINK element must not contain
2644 * 'stylesheet'.  HTML/XHTML document is unreadable when
2645 * style sheets are applied.  -- CPR huh?
2646 **********************************************************/
2647 
CheckLink(TidyDocImpl * doc,Node * node)2648 static void CheckLink( TidyDocImpl* doc, Node* node )
2649 {
2650     Bool HasRel = no;
2651     Bool HasType = no;
2652 
2653     if (Level1_Enabled( doc ))
2654     {
2655         AttVal* av;
2656         /* Check for valid 'REL' and 'TYPE' attribute */
2657         for (av = node->attributes; av != NULL; av = av->next)
2658         {
2659             if ( attrIsREL(av) && hasValue(av) )
2660             {
2661                 if (AttrContains(av, "stylesheet"))
2662                     HasRel = yes;
2663             }
2664 
2665             if ( attrIsTYPE(av) && hasValue(av) )
2666             {
2667                 HasType = yes;
2668             }
2669         }
2670 
2671         if (HasRel && HasType)
2672             TY_(ReportAccessError)( doc, node, STYLESHEETS_REQUIRE_TESTING_LINK );
2673     }
2674 }
2675 
2676 
2677 /*******************************************************
2678 * CheckStyle
2679 *
2680 * Document must not contain STYLE element.  HTML/XHTML
2681 * document is unreadable when style sheets are applied.
2682 *******************************************************/
2683 
CheckStyle(TidyDocImpl * doc,Node * node)2684 static void CheckStyle( TidyDocImpl* doc, Node* node )
2685 {
2686     if (Level1_Enabled( doc ))
2687     {
2688         TY_(ReportAccessError)( doc, node, STYLESHEETS_REQUIRE_TESTING_STYLE_ELEMENT );
2689     }
2690 }
2691 
2692 
2693 /*************************************************************
2694 * DynamicContent
2695 *
2696 * Verify that equivalents of dynamic content are updated and
2697 * available as often as the dynamic content.
2698 *************************************************************/
2699 
2700 
DynamicContent(TidyDocImpl * doc,Node * node)2701 static void DynamicContent( TidyDocImpl* doc, Node* node )
2702 {
2703     if (Level1_Enabled( doc ))
2704     {
2705         uint msgcode = 0;
2706         if ( nodeIsAPPLET(node) )
2707             msgcode = TEXT_EQUIVALENTS_REQUIRE_UPDATING_APPLET;
2708         else if ( nodeIsSCRIPT(node) )
2709             msgcode = TEXT_EQUIVALENTS_REQUIRE_UPDATING_SCRIPT;
2710         else if ( nodeIsOBJECT(node) )
2711             msgcode = TEXT_EQUIVALENTS_REQUIRE_UPDATING_OBJECT;
2712 
2713         if ( msgcode )
2714             TY_(ReportAccessError)( doc, node, msgcode );
2715     }
2716 }
2717 
2718 
2719 /*************************************************************
2720 * ProgrammaticObjects
2721 *
2722 * Verify that the page is usable when programmatic objects
2723 * are disabled.
2724 *************************************************************/
2725 
ProgrammaticObjects(TidyDocImpl * doc,Node * node)2726 static void ProgrammaticObjects( TidyDocImpl* doc, Node* node )
2727 {
2728     if (Level1_Enabled( doc ))
2729     {
2730         int msgcode = 0;
2731         if ( nodeIsSCRIPT(node) )
2732             msgcode = PROGRAMMATIC_OBJECTS_REQUIRE_TESTING_SCRIPT;
2733         else if ( nodeIsOBJECT(node) )
2734             msgcode = PROGRAMMATIC_OBJECTS_REQUIRE_TESTING_OBJECT;
2735         else if ( nodeIsEMBED(node) )
2736             msgcode = PROGRAMMATIC_OBJECTS_REQUIRE_TESTING_EMBED;
2737         else if ( nodeIsAPPLET(node) )
2738             msgcode = PROGRAMMATIC_OBJECTS_REQUIRE_TESTING_APPLET;
2739 
2740         if ( msgcode )
2741             TY_(ReportAccessError)( doc, node, msgcode );
2742     }
2743 }
2744 
2745 
2746 /*************************************************************
2747 * AccessibleCompatible
2748 *
2749 * Verify that programmatic objects are directly accessible.
2750 *************************************************************/
2751 
AccessibleCompatible(TidyDocImpl * doc,Node * node)2752 static void AccessibleCompatible( TidyDocImpl* doc, Node* node )
2753 {
2754     if (Level1_Enabled( doc ))
2755     {
2756         int msgcode = 0;
2757         if ( nodeIsSCRIPT(node) )
2758             msgcode = ENSURE_PROGRAMMATIC_OBJECTS_ACCESSIBLE_SCRIPT;
2759         else if ( nodeIsOBJECT(node) )
2760             msgcode = ENSURE_PROGRAMMATIC_OBJECTS_ACCESSIBLE_OBJECT;
2761         else if ( nodeIsEMBED(node) )
2762             msgcode = ENSURE_PROGRAMMATIC_OBJECTS_ACCESSIBLE_EMBED;
2763         else if ( nodeIsAPPLET(node) )
2764             msgcode = ENSURE_PROGRAMMATIC_OBJECTS_ACCESSIBLE_APPLET;
2765 
2766         if ( msgcode )
2767             TY_(ReportAccessError)( doc, node, msgcode );
2768     }
2769 }
2770 
2771 
2772 /**************************************************
2773 * CheckFlicker
2774 *
2775 * Verify that the page does not cause flicker.
2776 **************************************************/
2777 
CheckFlicker(TidyDocImpl * doc,Node * node)2778 static void CheckFlicker( TidyDocImpl* doc, Node* node )
2779 {
2780     if (Level1_Enabled( doc ))
2781     {
2782         int msgcode = 0;
2783         if ( nodeIsSCRIPT(node) )
2784             msgcode = REMOVE_FLICKER_SCRIPT;
2785         else if ( nodeIsOBJECT(node) )
2786             msgcode = REMOVE_FLICKER_OBJECT;
2787         else if ( nodeIsEMBED(node) )
2788             msgcode = REMOVE_FLICKER_EMBED;
2789         else if ( nodeIsAPPLET(node) )
2790             msgcode = REMOVE_FLICKER_APPLET;
2791 
2792         /* Checks for animated gif within the <img> tag. */
2793         else if ( nodeIsIMG(node) )
2794         {
2795             AttVal* av = attrGetSRC( node );
2796             if ( hasValue(av) )
2797             {
2798                 tmbchar ext[20];
2799                 GetFileExtension( av->value, ext, sizeof(ext) );
2800                 if ( TY_(tmbstrcasecmp)(ext, ".gif") == 0 )
2801                     msgcode = REMOVE_FLICKER_ANIMATED_GIF;
2802             }
2803         }
2804 
2805         if ( msgcode )
2806             TY_(ReportAccessError)( doc, node, msgcode );
2807     }
2808 }
2809 
2810 
2811 /**********************************************************
2812 * CheckDeprecated
2813 *
2814 * APPLET, BASEFONT, CENTER, FONT, ISINDEX,
2815 * S, STRIKE, and U should not be used.  Becomes deprecated
2816 * HTML if any of the above are used.
2817 **********************************************************/
2818 
CheckDeprecated(TidyDocImpl * doc,Node * node)2819 static void CheckDeprecated( TidyDocImpl* doc, Node* node )
2820 {
2821     if (Level2_Enabled( doc ))
2822     {
2823         int msgcode = 0;
2824         if ( nodeIsAPPLET(node) )
2825             msgcode = REPLACE_DEPRECATED_HTML_APPLET;
2826         else if ( nodeIsBASEFONT(node) )
2827             msgcode = REPLACE_DEPRECATED_HTML_BASEFONT;
2828         else if ( nodeIsCENTER(node) )
2829             msgcode = REPLACE_DEPRECATED_HTML_CENTER;
2830         else if ( nodeIsDIR(node) )
2831             msgcode = REPLACE_DEPRECATED_HTML_DIR;
2832         else if ( nodeIsFONT(node) )
2833             msgcode = REPLACE_DEPRECATED_HTML_FONT;
2834         else if ( nodeIsISINDEX(node) )
2835             msgcode = REPLACE_DEPRECATED_HTML_ISINDEX;
2836         else if ( nodeIsMENU(node) )
2837             msgcode = REPLACE_DEPRECATED_HTML_MENU;
2838         else if ( nodeIsS(node) )
2839             msgcode = REPLACE_DEPRECATED_HTML_S;
2840         else if ( nodeIsSTRIKE(node) )
2841             msgcode = REPLACE_DEPRECATED_HTML_STRIKE;
2842         else if ( nodeIsU(node) )
2843             msgcode = REPLACE_DEPRECATED_HTML_U;
2844 
2845         if ( msgcode )
2846             TY_(ReportAccessError)( doc, node, msgcode );
2847     }
2848 }
2849 
2850 
2851 /************************************************************
2852 * CheckScriptKeyboardAccessible
2853 *
2854 * Elements must have a device independent event handler if
2855 * they have any of the following device dependent event
2856 * handlers.
2857 ************************************************************/
2858 
CheckScriptKeyboardAccessible(TidyDocImpl * doc,Node * node)2859 static void CheckScriptKeyboardAccessible( TidyDocImpl* doc, Node* node )
2860 {
2861     Node* content;
2862     int HasOnMouseDown = 0;
2863     int HasOnMouseUp = 0;
2864     int HasOnClick = 0;
2865     int HasOnMouseOut = 0;
2866     int HasOnMouseOver = 0;
2867     int HasOnMouseMove = 0;
2868 
2869     if (Level2_Enabled( doc ))
2870     {
2871         AttVal* av;
2872         /* Checks all elements for their attributes */
2873         for (av = node->attributes; av != NULL; av = av->next)
2874         {
2875             /* Must also have 'ONKEYDOWN' attribute with 'ONMOUSEDOWN' */
2876             if ( attrIsOnMOUSEDOWN(av) )
2877                 HasOnMouseDown++;
2878 
2879             /* Must also have 'ONKEYUP' attribute with 'ONMOUSEUP' */
2880             if ( attrIsOnMOUSEUP(av) )
2881                 HasOnMouseUp++;
2882 
2883             /* Must also have 'ONKEYPRESS' attribute with 'ONCLICK' */
2884             if ( attrIsOnCLICK(av) )
2885                 HasOnClick++;
2886 
2887             /* Must also have 'ONBLUR' attribute with 'ONMOUSEOUT' */
2888             if ( attrIsOnMOUSEOUT(av) )
2889                 HasOnMouseOut++;
2890 
2891             if ( attrIsOnMOUSEOVER(av) )
2892                 HasOnMouseOver++;
2893 
2894             if ( attrIsOnMOUSEMOVE(av) )
2895                 HasOnMouseMove++;
2896 
2897             if ( attrIsOnKEYDOWN(av) )
2898                 HasOnMouseDown++;
2899 
2900             if ( attrIsOnKEYUP(av) )
2901                 HasOnMouseUp++;
2902 
2903             if ( attrIsOnKEYPRESS(av) )
2904                 HasOnClick++;
2905 
2906             if ( attrIsOnBLUR(av) )
2907                 HasOnMouseOut++;
2908         }
2909 
2910         if ( HasOnMouseDown == 1 )
2911             TY_(ReportAccessError)( doc, node, SCRIPT_NOT_KEYBOARD_ACCESSIBLE_ON_MOUSE_DOWN);
2912 
2913         if ( HasOnMouseUp == 1 )
2914             TY_(ReportAccessError)( doc, node, SCRIPT_NOT_KEYBOARD_ACCESSIBLE_ON_MOUSE_UP);
2915 
2916         if ( HasOnClick == 1 )
2917             TY_(ReportAccessError)( doc, node, SCRIPT_NOT_KEYBOARD_ACCESSIBLE_ON_CLICK);
2918         if ( HasOnMouseOut == 1 )
2919             TY_(ReportAccessError)( doc, node, SCRIPT_NOT_KEYBOARD_ACCESSIBLE_ON_MOUSE_OUT);
2920 
2921         if ( HasOnMouseOver == 1 )
2922             TY_(ReportAccessError)( doc, node, SCRIPT_NOT_KEYBOARD_ACCESSIBLE_ON_MOUSE_OVER);
2923 
2924         if ( HasOnMouseMove == 1 )
2925             TY_(ReportAccessError)( doc, node, SCRIPT_NOT_KEYBOARD_ACCESSIBLE_ON_MOUSE_MOVE);
2926 
2927         /* Recursively check all child nodes.
2928          */
2929         for ( content = node->content; content != NULL; content = content->next )
2930             CheckScriptKeyboardAccessible( doc, content );
2931     }
2932 }
2933 
2934 
2935 /**********************************************************
2936 * CheckMetaData
2937 *
2938 * Must have at least one of these elements in the document.
2939 * META, LINK, TITLE or ADDRESS.  <meta> must contain
2940 * a "content" attribute that doesn't contain a URL, and
2941 * an "http-Equiv" attribute that doesn't contain 'refresh'.
2942 **********************************************************/
2943 
2944 
CheckMetaData(TidyDocImpl * doc,Node * node,Bool HasMetaData)2945 static Bool CheckMetaData( TidyDocImpl* doc, Node* node, Bool HasMetaData )
2946 {
2947     Bool HasHttpEquiv = no;
2948     Bool HasContent = no;
2949     Bool ContainsAttr = no;
2950 
2951     if (Level2_Enabled( doc ))
2952     {
2953         if ( nodeIsMETA(node) )
2954         {
2955             AttVal* av;
2956             for (av = node->attributes; av != NULL; av = av->next)
2957             {
2958                 if ( attrIsHTTP_EQUIV(av) && hasValue(av) )
2959                 {
2960                     ContainsAttr = yes;
2961 
2962                     /* Must not have an auto-refresh */
2963                     if (AttrValueIs(av, "refresh"))
2964                     {
2965                         HasHttpEquiv = yes;
2966                         TY_(ReportAccessError)( doc, node, REMOVE_AUTO_REFRESH );
2967                     }
2968                 }
2969 
2970                 if ( attrIsCONTENT(av) && hasValue(av) )
2971                 {
2972                     ContainsAttr = yes;
2973 
2974                     /* If the value is not an integer, then it must not be a URL */
2975                     if ( TY_(tmbstrncmp)(av->value, "http:", 5) == 0)
2976                     {
2977                         HasContent = yes;
2978                         TY_(ReportAccessError)( doc, node, REMOVE_AUTO_REDIRECT);
2979                     }
2980                 }
2981                 if (TY_(IsHTML5Mode)(doc) && attrIsCHARSET(av) && hasValue(av))
2982                 {
2983                     ContainsAttr = yes;
2984                 }
2985             }
2986 
2987             if ( HasContent || HasHttpEquiv )
2988             {
2989                 HasMetaData = yes;
2990                 TY_(ReportAccessError)( doc, node, METADATA_MISSING_REDIRECT_AUTOREFRESH);
2991             }
2992             else
2993             {
2994                 if ( ContainsAttr && !HasContent && !HasHttpEquiv )
2995                     HasMetaData = yes;
2996             }
2997         }
2998 
2999         if ( !HasMetaData &&
3000              nodeIsADDRESS(node) &&
3001              nodeIsA(node->content) )
3002         {
3003             HasMetaData = yes;
3004         }
3005 
3006         if ( !HasMetaData &&
3007              !nodeIsTITLE(node) &&
3008              TY_(nodeIsText)(node->content) )
3009         {
3010             ctmbstr word = textFromOneNode( doc, node->content );
3011             if ( !IsWhitespace(word) )
3012                 HasMetaData = yes;
3013         }
3014 
3015         if( !HasMetaData && nodeIsLINK(node) )
3016         {
3017             AttVal* av = attrGetREL(node);
3018             if( !AttrContains(av, "stylesheet") )
3019                 HasMetaData = yes;
3020         }
3021 
3022         /* Check for MetaData */
3023         for ( node = node->content; node; node = node->next )
3024         {
3025             HasMetaData = CheckMetaData( doc, node, HasMetaData );
3026         }
3027     }
3028     return HasMetaData;
3029 }
3030 
3031 
3032 /*******************************************************
3033 * MetaDataPresent
3034 *
3035 * Determines if MetaData is present in document
3036 *******************************************************/
3037 
MetaDataPresent(TidyDocImpl * doc,Node * node)3038 static void MetaDataPresent( TidyDocImpl* doc, Node* node )
3039 {
3040     if (Level2_Enabled( doc ))
3041     {
3042         TY_(ReportAccessError)( doc, node, METADATA_MISSING );
3043     }
3044 }
3045 
3046 
3047 /*****************************************************
3048 * CheckDocType
3049 *
3050 * Checks that every HTML/XHTML document contains a
3051 * '!DOCTYPE' before the root node. ie.  <HTML>
3052 *****************************************************/
3053 
CheckDocType(TidyDocImpl * doc)3054 static void CheckDocType( TidyDocImpl* doc )
3055 {
3056     if (Level2_Enabled( doc ))
3057     {
3058         Node* DTnode = TY_(FindDocType)(doc);
3059 
3060         /* If the doctype has been added by tidy, DTnode->end will be 0. */
3061         if (DTnode && DTnode->end != 0)
3062         {
3063             ctmbstr word = textFromOneNode( doc, DTnode);
3064             if (TY_(IsHTML5Mode)(doc))
3065             {
3066                 if ((strstr(word, "HTML") == NULL) &&
3067                     (strstr(word, "html") == NULL))
3068                     DTnode = NULL;
3069             }
3070             else {
3071                 if ((strstr(word, "HTML PUBLIC") == NULL) &&
3072                     (strstr(word, "html PUBLIC") == NULL))
3073                     DTnode = NULL;
3074             }
3075         }
3076         if (!DTnode)
3077            TY_(ReportAccessError)( doc, &doc->root, DOCTYPE_MISSING);
3078     }
3079 }
3080 
3081 
3082 
3083 /********************************************************
3084 * CheckMapLinks
3085 *
3086 * Checks to see if an HREF for A element matches HREF
3087 * for AREA element.  There must be an HREF attribute
3088 * of an A element for every HREF of an AREA element.
3089 ********************************************************/
3090 
urlMatch(ctmbstr url1,ctmbstr url2)3091 static Bool urlMatch( ctmbstr url1, ctmbstr url2 )
3092 {
3093   /* TODO: Make host part case-insensitive and
3094   ** remainder case-sensitive.
3095   */
3096   return ( TY_(tmbstrcmp)( url1, url2 ) == 0 );
3097 }
3098 
FindLinkA(TidyDocImpl * doc,Node * node,ctmbstr url)3099 static Bool FindLinkA( TidyDocImpl* doc, Node* node, ctmbstr url )
3100 {
3101   Bool found = no;
3102   for ( node = node->content; !found && node; node = node->next )
3103   {
3104     if ( nodeIsA(node) )
3105     {
3106       AttVal* href = attrGetHREF( node );
3107       found = ( hasValue(href) && urlMatch(url, href->value) );
3108     }
3109     else
3110         found = FindLinkA( doc, node, url );
3111   }
3112   return found;
3113 }
3114 
CheckMapLinks(TidyDocImpl * doc,Node * node)3115 static void CheckMapLinks( TidyDocImpl* doc, Node* node )
3116 {
3117     Node* child;
3118 
3119     if (!Level3_Enabled( doc ))
3120         return;
3121 
3122     /* Stores the 'HREF' link of an AREA element within a MAP element */
3123     for ( child = node->content; child != NULL; child = child->next )
3124     {
3125         if ( nodeIsAREA(child) )
3126         {
3127             /* Checks for 'HREF' attribute */
3128             AttVal* href = attrGetHREF( child );
3129             if ( hasValue(href) &&
3130                  !FindLinkA( doc, &doc->root, href->value ) )
3131             {
3132                 TY_(ReportAccessError)( doc, node, IMG_MAP_CLIENT_MISSING_TEXT_LINKS );
3133             }
3134         }
3135     }
3136 }
3137 
3138 
3139 /****************************************************
3140 * CheckForStyleAttribute
3141 *
3142 * Checks all elements within the document to check
3143 * for the use of 'STYLE' attribute.
3144 ****************************************************/
3145 
CheckForStyleAttribute(TidyDocImpl * doc,Node * node)3146 static void CheckForStyleAttribute( TidyDocImpl* doc, Node* node )
3147 {
3148     Node* content;
3149     if (Level1_Enabled( doc ))
3150     {
3151         /* Must not contain 'STYLE' attribute */
3152         AttVal* style = attrGetSTYLE( node );
3153         if ( hasValue(style) )
3154         {
3155             TY_(ReportAccessError)( doc, node, STYLESHEETS_REQUIRE_TESTING_STYLE_ATTR );
3156         }
3157     }
3158 
3159     /* Recursively check all child nodes.
3160     */
3161     for ( content = node->content; content != NULL; content = content->next )
3162         CheckForStyleAttribute( doc, content );
3163 }
3164 
3165 
3166 /*****************************************************
3167 * CheckForListElements
3168 *
3169 * Checks document for list elements (<ol>, <ul>, <li>)
3170 *****************************************************/
3171 
CheckForListElements(TidyDocImpl * doc,Node * node)3172 static void CheckForListElements( TidyDocImpl* doc, Node* node )
3173 {
3174     if ( nodeIsLI(node) )
3175     {
3176         doc->access.ListElements++;
3177     }
3178     else if ( nodeIsOL(node) || nodeIsUL(node) )
3179     {
3180         doc->access.OtherListElements++;
3181     }
3182 
3183     for ( node = node->content; node != NULL; node = node->next )
3184     {
3185         CheckForListElements( doc, node );
3186     }
3187 }
3188 
3189 
3190 /******************************************************
3191 * CheckListUsage
3192 *
3193 * Ensures that lists are properly used.  <ol> and <ul>
3194 * must contain <li> within itself, and <li> must not be
3195 * by itself.
3196 ******************************************************/
3197 
CheckListUsage(TidyDocImpl * doc,Node * node)3198 static void CheckListUsage( TidyDocImpl* doc, Node* node )
3199 {
3200     int msgcode = 0;
3201 
3202     if (!Level2_Enabled( doc ))
3203         return;
3204 
3205     if ( nodeIsOL(node) )
3206         msgcode = LIST_USAGE_INVALID_OL;
3207     else if ( nodeIsUL(node) )
3208         msgcode = LIST_USAGE_INVALID_UL;
3209 
3210     if ( msgcode )
3211     {
3212        /*
3213        ** Check that OL/UL
3214        ** a) has LI child,
3215        ** b) was not added by Tidy parser
3216        ** IFF OL/UL node is implicit
3217        */
3218        if ( !nodeIsLI(node->content) ) {
3219             TY_(ReportAccessError)( doc, node, msgcode );
3220        } else if ( node->implicit ) {  /* if a tidy added node */
3221             TY_(ReportAccessError)( doc, node, LIST_USAGE_INVALID_LI );
3222        }
3223     }
3224     else if ( nodeIsLI(node) )
3225     {
3226         /* Check that LI parent
3227         ** a) exists,
3228         ** b) is either OL or UL
3229         ** IFF the LI parent was added by Tidy
3230         ** ie, if it is marked 'implicit', then
3231         ** emit warnings LIST_USAGE_INVALID_UL or
3232         ** warning LIST_USAGE_INVALID_OL tests
3233         */
3234         if ( node->parent == NULL ||
3235              ( !nodeIsOL(node->parent) && !nodeIsUL(node->parent) ) )
3236         {
3237             TY_(ReportAccessError)( doc, node, LIST_USAGE_INVALID_LI );
3238         } else if ( node->implicit && node->parent &&
3239                     ( nodeIsOL(node->parent) || nodeIsUL(node->parent) ) ) {
3240             /* if tidy added LI node, then */
3241             msgcode = nodeIsUL(node->parent) ?
3242                 LIST_USAGE_INVALID_UL : LIST_USAGE_INVALID_OL;
3243             TY_(ReportAccessError)( doc, node, msgcode );
3244         }
3245     }
3246 }
3247 
3248 /************************************************************
3249 * InitAccessibilityChecks
3250 *
3251 * Initializes the AccessibilityChecks variables as necessary
3252 ************************************************************/
3253 
InitAccessibilityChecks(TidyDocImpl * doc,int level123)3254 static void InitAccessibilityChecks( TidyDocImpl* doc, int level123 )
3255 {
3256     TidyClearMemory( &doc->access, sizeof(doc->access) );
3257     doc->access.PRIORITYCHK = level123;
3258 }
3259 
3260 /************************************************************
3261 * CleanupAccessibilityChecks
3262 *
3263 * Cleans up the AccessibilityChecks variables as necessary
3264 ************************************************************/
3265 
3266 
FreeAccessibilityChecks(TidyDocImpl * ARG_UNUSED (doc))3267 static void FreeAccessibilityChecks( TidyDocImpl* ARG_UNUSED(doc) )
3268 {
3269     /* free any memory allocated for the lists
3270 
3271     Linked List of Links not used.  Just search document as
3272     AREA tags are encountered.  Same algorithm, but no
3273     data structures necessary.
3274 
3275     current = start;
3276     while (current)
3277     {
3278         void    *templink = (void *)current;
3279 
3280         current = current->next;
3281         TidyDocFree(doc, templink);
3282     }
3283     start = NULL;
3284     */
3285 }
3286 
3287 /************************************************************
3288 * AccessibilityChecks
3289 *
3290 * Traverses through the individual nodes of the tree
3291 * and checks attributes and elements for accessibility.
3292 * after the tree structure has been formed.
3293 ************************************************************/
3294 
AccessibilityCheckNode(TidyDocImpl * doc,Node * node)3295 static void AccessibilityCheckNode( TidyDocImpl* doc, Node* node )
3296 {
3297     Node* content;
3298 
3299     /* Check BODY for color contrast */
3300     if ( nodeIsBODY(node) )
3301     {
3302         CheckColorContrast( doc, node );
3303     }
3304 
3305     /* Checks document for MetaData */
3306     else if ( nodeIsHEAD(node) )
3307     {
3308         if ( !CheckMetaData( doc, node, no ) )
3309           MetaDataPresent( doc, node );
3310     }
3311 
3312     /* Check the ANCHOR tag */
3313     else if ( nodeIsA(node) )
3314     {
3315         CheckAnchorAccess( doc, node );
3316     }
3317 
3318     /* Check the IMAGE tag */
3319     else if ( nodeIsIMG(node) )
3320     {
3321         CheckFlicker( doc, node );
3322         CheckColorAvailable( doc, node );
3323         CheckImage( doc, node );
3324     }
3325 
3326         /* Checks MAP for client-side text links */
3327     else if ( nodeIsMAP(node) )
3328     {
3329         CheckMapLinks( doc, node );
3330     }
3331 
3332     /* Check the AREA tag */
3333     else if ( nodeIsAREA(node) )
3334     {
3335         CheckArea( doc, node );
3336     }
3337 
3338     /* Check the APPLET tag */
3339     else if ( nodeIsAPPLET(node) )
3340     {
3341         CheckDeprecated( doc, node );
3342         ProgrammaticObjects( doc, node );
3343         DynamicContent( doc, node );
3344         AccessibleCompatible( doc, node );
3345         CheckFlicker( doc, node );
3346         CheckColorAvailable( doc, node );
3347         CheckApplet(doc, node );
3348     }
3349 
3350     /* Check the OBJECT tag */
3351     else if ( nodeIsOBJECT(node) )
3352     {
3353         ProgrammaticObjects( doc, node );
3354         DynamicContent( doc, node );
3355         AccessibleCompatible( doc, node );
3356         CheckFlicker( doc, node );
3357         CheckColorAvailable( doc, node );
3358         CheckObject( doc, node );
3359     }
3360 
3361     /* Check the FRAME tag */
3362     else if ( nodeIsFRAME(node) )
3363     {
3364         CheckFrame( doc, node );
3365     }
3366 
3367     /* Check the IFRAME tag */
3368     else if ( nodeIsIFRAME(node) )
3369     {
3370         CheckIFrame( doc, node );
3371     }
3372 
3373     /* Check the SCRIPT tag */
3374     else if ( nodeIsSCRIPT(node) )
3375     {
3376         DynamicContent( doc, node );
3377         ProgrammaticObjects( doc, node );
3378         AccessibleCompatible( doc, node );
3379         CheckFlicker( doc, node );
3380         CheckColorAvailable( doc, node );
3381         CheckScriptAcc( doc, node );
3382     }
3383 
3384     /* Check the TABLE tag */
3385     else if ( nodeIsTABLE(node) )
3386     {
3387         CheckColorContrast( doc, node );
3388         CheckTable( doc, node );
3389     }
3390 
3391     /* Check the PRE for ASCII art */
3392     else if ( nodeIsPRE(node) || nodeIsXMP(node) )
3393     {
3394         CheckASCII( doc, node );
3395     }
3396 
3397     /* Check the LABEL tag */
3398     else if ( nodeIsLABEL(node) )
3399     {
3400         CheckLabel( doc, node );
3401     }
3402 
3403     /* Check INPUT tag for validity */
3404     else if ( nodeIsINPUT(node) )
3405     {
3406         CheckColorAvailable( doc, node );
3407         CheckInputLabel( doc, node );
3408         CheckInputAttributes( doc, node );
3409     }
3410 
3411     /* Checks FRAMESET element for NOFRAME section */
3412     else if ( nodeIsFRAMESET(node) )
3413     {
3414         CheckFrameSet( doc, node );
3415     }
3416 
3417     /* Checks for header elements for valid header increase */
3418     else if ( TY_(nodeIsHeader)(node) )
3419     {
3420         CheckHeaderNesting( doc, node );
3421     }
3422 
3423     /* Checks P element to ensure that it is not a header */
3424     else if ( nodeIsP(node) )
3425     {
3426         CheckParagraphHeader( doc, node );
3427     }
3428 
3429     /* Checks HTML elemnt for valid 'LANG' */
3430     else if ( nodeIsHTML(node) )
3431     {
3432         CheckHTMLAccess( doc, node );
3433     }
3434 
3435     /* Checks BLINK for any blinking text */
3436     else if ( nodeIsBLINK(node) )
3437     {
3438         CheckBlink( doc, node );
3439     }
3440 
3441     /* Checks MARQUEE for any MARQUEE text */
3442     else if ( nodeIsMARQUEE(node) )
3443     {
3444         CheckMarquee( doc, node );
3445     }
3446 
3447     /* Checks LINK for 'REL' attribute */
3448     else if ( nodeIsLINK(node) )
3449     {
3450         CheckLink( doc, node );
3451     }
3452 
3453     /* Checks to see if STYLE is used */
3454     else if ( nodeIsSTYLE(node) )
3455     {
3456         CheckColorContrast( doc, node );
3457         CheckStyle( doc, node );
3458     }
3459 
3460     /* Checks to see if EMBED is used */
3461     else if ( nodeIsEMBED(node) )
3462     {
3463         CheckEmbed( doc, node );
3464         ProgrammaticObjects( doc, node );
3465         AccessibleCompatible( doc, node );
3466         CheckFlicker( doc, node );
3467     }
3468 
3469     /* Deprecated HTML if the following tags are found in the document */
3470     else if ( nodeIsBASEFONT(node) ||
3471               nodeIsCENTER(node)   ||
3472               nodeIsISINDEX(node)  ||
3473               nodeIsU(node)        ||
3474               nodeIsFONT(node)     ||
3475               nodeIsDIR(node)      ||
3476               nodeIsS(node)        ||
3477               nodeIsSTRIKE(node)   ||
3478               nodeIsMENU(node) )
3479     {
3480         CheckDeprecated( doc, node );
3481     }
3482 
3483     /* Checks for 'ABBR' attribute if needed */
3484     else if ( nodeIsTH(node) )
3485     {
3486         CheckTH( doc, node );
3487     }
3488 
3489     /* Ensures that lists are properly used */
3490     else if ( nodeIsLI(node) || nodeIsOL(node) || nodeIsUL(node) )
3491     {
3492         CheckListUsage( doc, node );
3493     }
3494 
3495     /* Recursively check all child nodes.
3496     */
3497     for ( content = node->content; content != NULL; content = content->next )
3498     {
3499         AccessibilityCheckNode( doc, content );
3500     }
3501 }
3502 
3503 
TY_(AccessibilityChecks)3504 void TY_(AccessibilityChecks)( TidyDocImpl* doc )
3505 {
3506     /* Initialize */
3507     InitAccessibilityChecks( doc, cfg(doc, TidyAccessibilityCheckLevel) );
3508 
3509     /* Hello there, ladies and gentlemen... */
3510     TY_(Dialogue)( doc, STRING_HELLO_ACCESS );
3511 
3512     /* Checks all elements for script accessibility */
3513     CheckScriptKeyboardAccessible( doc, &doc->root );
3514 
3515     /* Checks entire document for the use of 'STYLE' attribute */
3516     CheckForStyleAttribute( doc, &doc->root );
3517 
3518     /* Checks for '!DOCTYPE' */
3519     CheckDocType( doc );
3520 
3521 
3522     /* Checks to see if stylesheets are used to control the layout */
3523     if ( Level2_Enabled( doc )
3524          && ! CheckMissingStyleSheets( doc, &doc->root ) )
3525     {
3526         TY_(ReportAccessError)( doc, &doc->root, STYLE_SHEET_CONTROL_PRESENTATION );
3527     }
3528 
3529     /* Check to see if any list elements are found within the document */
3530     CheckForListElements( doc, &doc->root );
3531 
3532     /* Recursively apply all remaining checks to
3533     ** each node in document.
3534     */
3535     AccessibilityCheckNode( doc, &doc->root );
3536 
3537     /* Cleanup */
3538     FreeAccessibilityChecks( doc );
3539 }
3540 
3541