1 /* tags.c -- recognize HTML tags
2 
3   (c) 1998-2008 (W3C) MIT, ERCIM, Keio University
4   See tidyp.h for the copyright notice.
5 
6   The HTML tags are stored as 8 bit ASCII strings.
7 
8 */
9 
10 #include "tidy-int.h"
11 #include "message.h"
12 #include "tmbstr.h"
13 
14 /* Attribute checking methods */
15 static CheckAttribs CheckIMG;
16 static CheckAttribs CheckLINK;
17 static CheckAttribs CheckAREA;
18 static CheckAttribs CheckTABLE;
19 static CheckAttribs CheckCaption;
20 static CheckAttribs CheckSCRIPT;
21 static CheckAttribs CheckSTYLE;
22 static CheckAttribs CheckHTML;
23 static CheckAttribs CheckFORM;
24 static CheckAttribs CheckMETA;
25 
26 #define VERS_ELEM_A          (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
27 #define VERS_ELEM_ABBR       (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
28 #define VERS_ELEM_ACRONYM    (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
29 #define VERS_ELEM_ADDRESS    (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
30 #define VERS_ELEM_APPLET     (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|xxxx|xxxx|xxxx|xxxx|xxxx)
31 #define VERS_ELEM_AREA       (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
32 #define VERS_ELEM_B          (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
33 #define VERS_ELEM_BASE       (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
34 #define VERS_ELEM_BASEFONT   (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|xxxx|xxxx|xxxx|xxxx|xxxx)
35 #define VERS_ELEM_BDO        (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
36 #define VERS_ELEM_BIG        (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
37 #define VERS_ELEM_BLOCKQUOTE (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
38 #define VERS_ELEM_BODY       (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
39 #define VERS_ELEM_BR         (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
40 #define VERS_ELEM_BUTTON     (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
41 #define VERS_ELEM_CAPTION    (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
42 #define VERS_ELEM_CENTER     (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|xxxx|xxxx|xxxx|xxxx|xxxx)
43 #define VERS_ELEM_CITE       (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
44 #define VERS_ELEM_CODE       (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
45 #define VERS_ELEM_COL        (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
46 #define VERS_ELEM_COLGROUP   (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
47 #define VERS_ELEM_DD         (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
48 #define VERS_ELEM_DEL        (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
49 #define VERS_ELEM_DFN        (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
50 #define VERS_ELEM_DIR        (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|xxxx|xxxx|xxxx|xxxx|xxxx)
51 #define VERS_ELEM_DIV        (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
52 #define VERS_ELEM_DL         (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
53 #define VERS_ELEM_DT         (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
54 #define VERS_ELEM_EM         (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
55 #define VERS_ELEM_FIELDSET   (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
56 #define VERS_ELEM_FONT       (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|xxxx|xxxx|xxxx|xxxx|xxxx)
57 #define VERS_ELEM_FORM       (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
58 #define VERS_ELEM_FRAME      (xxxx|xxxx|xxxx|xxxx|xxxx|H40F|H41F|X10F|xxxx|xxxx|xxxx|xxxx|xxxx)
59 #define VERS_ELEM_FRAMESET   (xxxx|xxxx|xxxx|xxxx|xxxx|H40F|H41F|X10F|xxxx|xxxx|xxxx|xxxx|xxxx)
60 #define VERS_ELEM_H1         (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
61 #define VERS_ELEM_H2         (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
62 #define VERS_ELEM_H3         (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
63 #define VERS_ELEM_H4         (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
64 #define VERS_ELEM_H5         (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
65 #define VERS_ELEM_H6         (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
66 #define VERS_ELEM_HEAD       (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
67 #define VERS_ELEM_HR         (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
68 #define VERS_ELEM_HTML       (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
69 #define VERS_ELEM_I          (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
70 #define VERS_ELEM_IFRAME     (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|xxxx|xxxx|xxxx|xxxx|xxxx)
71 #define VERS_ELEM_IMG        (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
72 #define VERS_ELEM_INPUT      (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
73 #define VERS_ELEM_INS        (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
74 #define VERS_ELEM_ISINDEX    (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|xxxx|xxxx|xxxx|xxxx|xxxx)
75 #define VERS_ELEM_KBD        (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
76 #define VERS_ELEM_LABEL      (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
77 #define VERS_ELEM_LEGEND     (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
78 #define VERS_ELEM_LI         (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
79 #define VERS_ELEM_LINK       (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
80 #define VERS_ELEM_LISTING    (HT20|HT32|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx)
81 #define VERS_ELEM_MAP        (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
82 #define VERS_ELEM_MENU       (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|xxxx|xxxx|xxxx|xxxx|xxxx)
83 #define VERS_ELEM_META       (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
84 #define VERS_ELEM_NEXTID     (HT20|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx)
85 #define VERS_ELEM_NOFRAMES   (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|xxxx|xxxx|xxxx|xxxx|xxxx)
86 #define VERS_ELEM_NOSCRIPT   (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
87 #define VERS_ELEM_OBJECT     (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
88 #define VERS_ELEM_OL         (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
89 #define VERS_ELEM_OPTGROUP   (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
90 #define VERS_ELEM_OPTION     (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
91 #define VERS_ELEM_P          (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
92 #define VERS_ELEM_PARAM      (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
93 #define VERS_ELEM_PLAINTEXT  (HT20|HT32|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx)
94 #define VERS_ELEM_PRE        (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
95 #define VERS_ELEM_Q          (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
96 #define VERS_ELEM_RB         (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|XH11|xxxx)
97 #define VERS_ELEM_RBC        (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|XH11|xxxx)
98 #define VERS_ELEM_RP         (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|XH11|xxxx)
99 #define VERS_ELEM_RT         (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|XH11|xxxx)
100 #define VERS_ELEM_RTC        (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|XH11|xxxx)
101 #define VERS_ELEM_RUBY       (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|XH11|xxxx)
102 #define VERS_ELEM_S          (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|xxxx|xxxx|xxxx|xxxx|xxxx)
103 #define VERS_ELEM_SAMP       (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
104 #define VERS_ELEM_SCRIPT     (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
105 #define VERS_ELEM_SELECT     (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
106 #define VERS_ELEM_SMALL      (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
107 #define VERS_ELEM_SPAN       (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
108 #define VERS_ELEM_STRIKE     (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|xxxx|xxxx|xxxx|xxxx|xxxx)
109 #define VERS_ELEM_STRONG     (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
110 #define VERS_ELEM_STYLE      (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
111 #define VERS_ELEM_SUB        (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
112 #define VERS_ELEM_SUP        (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
113 #define VERS_ELEM_TABLE      (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
114 #define VERS_ELEM_TBODY      (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
115 #define VERS_ELEM_TD         (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
116 #define VERS_ELEM_TEXTAREA   (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
117 #define VERS_ELEM_TFOOT      (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
118 #define VERS_ELEM_TH         (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
119 #define VERS_ELEM_THEAD      (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
120 #define VERS_ELEM_TITLE      (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
121 #define VERS_ELEM_TR         (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
122 #define VERS_ELEM_TT         (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
123 #define VERS_ELEM_U          (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|xxxx|xxxx|xxxx|xxxx|xxxx)
124 #define VERS_ELEM_UL         (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
125 #define VERS_ELEM_VAR        (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
126 #define VERS_ELEM_XMP        (HT20|HT32|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx)
127 
128 static const Dict tag_defs[] =
129 {
130   { TidyTag_UNKNOWN,    "unknown!",   VERS_UNKNOWN,         NULL,                       (0),                                           NULL,          NULL           },
131 
132   /* W3C defined elements */
133   { TidyTag_A,          "a",          VERS_ELEM_A,          &TY_(W3CAttrsFor_A)[0],          (CM_INLINE),                                   TY_(ParseInline),   NULL           },
134   { TidyTag_ABBR,       "abbr",       VERS_ELEM_ABBR,       &TY_(W3CAttrsFor_ABBR)[0],       (CM_INLINE),                                   TY_(ParseInline),   NULL           },
135   { TidyTag_ACRONYM,    "acronym",    VERS_ELEM_ACRONYM,    &TY_(W3CAttrsFor_ACRONYM)[0],    (CM_INLINE),                                   TY_(ParseInline),   NULL           },
136   { TidyTag_ADDRESS,    "address",    VERS_ELEM_ADDRESS,    &TY_(W3CAttrsFor_ADDRESS)[0],    (CM_BLOCK),                                    TY_(ParseInline),   NULL           },
137   { TidyTag_APPLET,     "applet",     VERS_ELEM_APPLET,     &TY_(W3CAttrsFor_APPLET)[0],     (CM_OBJECT|CM_IMG|CM_INLINE|CM_PARAM),         TY_(ParseBlock),    NULL           },
138   { TidyTag_AREA,       "area",       VERS_ELEM_AREA,       &TY_(W3CAttrsFor_AREA)[0],       (CM_BLOCK|CM_EMPTY),                           TY_(ParseEmpty),    CheckAREA      },
139   { TidyTag_B,          "b",          VERS_ELEM_B,          &TY_(W3CAttrsFor_B)[0],          (CM_INLINE),                                   TY_(ParseInline),   NULL           },
140   { TidyTag_BASE,       "base",       VERS_ELEM_BASE,       &TY_(W3CAttrsFor_BASE)[0],       (CM_HEAD|CM_EMPTY),                            TY_(ParseEmpty),    NULL           },
141   { TidyTag_BASEFONT,   "basefont",   VERS_ELEM_BASEFONT,   &TY_(W3CAttrsFor_BASEFONT)[0],   (CM_INLINE|CM_EMPTY),                          TY_(ParseEmpty),    NULL           },
142   { TidyTag_BDO,        "bdo",        VERS_ELEM_BDO,        &TY_(W3CAttrsFor_BDO)[0],        (CM_INLINE),                                   TY_(ParseInline),   NULL           },
143   { TidyTag_BIG,        "big",        VERS_ELEM_BIG,        &TY_(W3CAttrsFor_BIG)[0],        (CM_INLINE),                                   TY_(ParseInline),   NULL           },
144   { TidyTag_BLOCKQUOTE, "blockquote", VERS_ELEM_BLOCKQUOTE, &TY_(W3CAttrsFor_BLOCKQUOTE)[0], (CM_BLOCK),                                    TY_(ParseBlock),    NULL           },
145   { TidyTag_BODY,       "body",       VERS_ELEM_BODY,       &TY_(W3CAttrsFor_BODY)[0],       (CM_HTML|CM_OPT|CM_OMITST),                    TY_(ParseBody),     NULL           },
146   { TidyTag_BR,         "br",         VERS_ELEM_BR,         &TY_(W3CAttrsFor_BR)[0],         (CM_INLINE|CM_EMPTY),                          TY_(ParseEmpty),    NULL           },
147   { TidyTag_BUTTON,     "button",     VERS_ELEM_BUTTON,     &TY_(W3CAttrsFor_BUTTON)[0],     (CM_INLINE),                                   TY_(ParseBlock),    NULL           },
148   { TidyTag_CAPTION,    "caption",    VERS_ELEM_CAPTION,    &TY_(W3CAttrsFor_CAPTION)[0],    (CM_TABLE),                                    TY_(ParseInline),   CheckCaption   },
149   { TidyTag_CENTER,     "center",     VERS_ELEM_CENTER,     &TY_(W3CAttrsFor_CENTER)[0],     (CM_BLOCK),                                    TY_(ParseBlock),    NULL           },
150   { TidyTag_CITE,       "cite",       VERS_ELEM_CITE,       &TY_(W3CAttrsFor_CITE)[0],       (CM_INLINE),                                   TY_(ParseInline),   NULL           },
151   { TidyTag_CODE,       "code",       VERS_ELEM_CODE,       &TY_(W3CAttrsFor_CODE)[0],       (CM_INLINE),                                   TY_(ParseInline),   NULL           },
152   { TidyTag_COL,        "col",        VERS_ELEM_COL,        &TY_(W3CAttrsFor_COL)[0],        (CM_TABLE|CM_EMPTY),                           TY_(ParseEmpty),    NULL           },
153   { TidyTag_COLGROUP,   "colgroup",   VERS_ELEM_COLGROUP,   &TY_(W3CAttrsFor_COLGROUP)[0],   (CM_TABLE|CM_OPT),                             TY_(ParseColGroup), NULL           },
154   { TidyTag_DD,         "dd",         VERS_ELEM_DD,         &TY_(W3CAttrsFor_DD)[0],         (CM_DEFLIST|CM_OPT|CM_NO_INDENT),              TY_(ParseBlock),    NULL           },
155   { TidyTag_DEL,        "del",        VERS_ELEM_DEL,        &TY_(W3CAttrsFor_DEL)[0],        (CM_INLINE|CM_BLOCK|CM_MIXED),                 TY_(ParseInline),   NULL           },
156   { TidyTag_DFN,        "dfn",        VERS_ELEM_DFN,        &TY_(W3CAttrsFor_DFN)[0],        (CM_INLINE),                                   TY_(ParseInline),   NULL           },
157   { TidyTag_DIR,        "dir",        VERS_ELEM_DIR,        &TY_(W3CAttrsFor_DIR)[0],        (CM_BLOCK|CM_OBSOLETE),                        TY_(ParseList),     NULL           },
158   { TidyTag_DIV,        "div",        VERS_ELEM_DIV,        &TY_(W3CAttrsFor_DIV)[0],        (CM_BLOCK),                                    TY_(ParseBlock),    NULL           },
159   { TidyTag_DL,         "dl",         VERS_ELEM_DL,         &TY_(W3CAttrsFor_DL)[0],         (CM_BLOCK),                                    TY_(ParseDefList),  NULL           },
160   { TidyTag_DT,         "dt",         VERS_ELEM_DT,         &TY_(W3CAttrsFor_DT)[0],         (CM_DEFLIST|CM_OPT|CM_NO_INDENT),              TY_(ParseInline),   NULL           },
161   { TidyTag_EM,         "em",         VERS_ELEM_EM,         &TY_(W3CAttrsFor_EM)[0],         (CM_INLINE),                                   TY_(ParseInline),   NULL           },
162   { TidyTag_FIELDSET,   "fieldset",   VERS_ELEM_FIELDSET,   &TY_(W3CAttrsFor_FIELDSET)[0],   (CM_BLOCK),                                    TY_(ParseBlock),    NULL           },
163   { TidyTag_FONT,       "font",       VERS_ELEM_FONT,       &TY_(W3CAttrsFor_FONT)[0],       (CM_INLINE),                                   TY_(ParseInline),   NULL           },
164   { TidyTag_FORM,       "form",       VERS_ELEM_FORM,       &TY_(W3CAttrsFor_FORM)[0],       (CM_BLOCK),                                    TY_(ParseBlock),    CheckFORM      },
165   { TidyTag_FRAME,      "frame",      VERS_ELEM_FRAME,      &TY_(W3CAttrsFor_FRAME)[0],      (CM_FRAMES|CM_EMPTY),                          TY_(ParseEmpty),    NULL           },
166   { TidyTag_FRAMESET,   "frameset",   VERS_ELEM_FRAMESET,   &TY_(W3CAttrsFor_FRAMESET)[0],   (CM_HTML|CM_FRAMES),                           TY_(ParseFrameSet), NULL           },
167   { TidyTag_H1,         "h1",         VERS_ELEM_H1,         &TY_(W3CAttrsFor_H1)[0],         (CM_BLOCK|CM_HEADING),                         TY_(ParseInline),   NULL           },
168   { TidyTag_H2,         "h2",         VERS_ELEM_H2,         &TY_(W3CAttrsFor_H2)[0],         (CM_BLOCK|CM_HEADING),                         TY_(ParseInline),   NULL           },
169   { TidyTag_H3,         "h3",         VERS_ELEM_H3,         &TY_(W3CAttrsFor_H3)[0],         (CM_BLOCK|CM_HEADING),                         TY_(ParseInline),   NULL           },
170   { TidyTag_H4,         "h4",         VERS_ELEM_H4,         &TY_(W3CAttrsFor_H4)[0],         (CM_BLOCK|CM_HEADING),                         TY_(ParseInline),   NULL           },
171   { TidyTag_H5,         "h5",         VERS_ELEM_H5,         &TY_(W3CAttrsFor_H5)[0],         (CM_BLOCK|CM_HEADING),                         TY_(ParseInline),   NULL           },
172   { TidyTag_H6,         "h6",         VERS_ELEM_H6,         &TY_(W3CAttrsFor_H6)[0],         (CM_BLOCK|CM_HEADING),                         TY_(ParseInline),   NULL           },
173   { TidyTag_HEAD,       "head",       VERS_ELEM_HEAD,       &TY_(W3CAttrsFor_HEAD)[0],       (CM_HTML|CM_OPT|CM_OMITST),                    TY_(ParseHead),     NULL           },
174   { TidyTag_HR,         "hr",         VERS_ELEM_HR,         &TY_(W3CAttrsFor_HR)[0],         (CM_BLOCK|CM_EMPTY),                           TY_(ParseEmpty),    NULL           },
175   { TidyTag_HTML,       "html",       VERS_ELEM_HTML,       &TY_(W3CAttrsFor_HTML)[0],       (CM_HTML|CM_OPT|CM_OMITST),                    TY_(ParseHTML),     CheckHTML      },
176   { TidyTag_I,          "i",          VERS_ELEM_I,          &TY_(W3CAttrsFor_I)[0],          (CM_INLINE),                                   TY_(ParseInline),   NULL           },
177   { TidyTag_IFRAME,     "iframe",     VERS_ELEM_IFRAME,     &TY_(W3CAttrsFor_IFRAME)[0],     (CM_INLINE),                                   TY_(ParseBlock),    NULL           },
178   { TidyTag_IMG,        "img",        VERS_ELEM_IMG,        &TY_(W3CAttrsFor_IMG)[0],        (CM_INLINE|CM_IMG|CM_EMPTY),                   TY_(ParseEmpty),    CheckIMG       },
179   { TidyTag_INPUT,      "input",      VERS_ELEM_INPUT,      &TY_(W3CAttrsFor_INPUT)[0],      (CM_INLINE|CM_IMG|CM_EMPTY),                   TY_(ParseEmpty),    NULL           },
180   { TidyTag_INS,        "ins",        VERS_ELEM_INS,        &TY_(W3CAttrsFor_INS)[0],        (CM_INLINE|CM_BLOCK|CM_MIXED),                 TY_(ParseInline),   NULL           },
181   { TidyTag_ISINDEX,    "isindex",    VERS_ELEM_ISINDEX,    &TY_(W3CAttrsFor_ISINDEX)[0],    (CM_BLOCK|CM_EMPTY),                           TY_(ParseEmpty),    NULL           },
182   { TidyTag_KBD,        "kbd",        VERS_ELEM_KBD,        &TY_(W3CAttrsFor_KBD)[0],        (CM_INLINE),                                   TY_(ParseInline),   NULL           },
183   { TidyTag_LABEL,      "label",      VERS_ELEM_LABEL,      &TY_(W3CAttrsFor_LABEL)[0],      (CM_INLINE),                                   TY_(ParseInline),   NULL           },
184   { TidyTag_LEGEND,     "legend",     VERS_ELEM_LEGEND,     &TY_(W3CAttrsFor_LEGEND)[0],     (CM_INLINE),                                   TY_(ParseInline),   NULL           },
185   { TidyTag_LI,         "li",         VERS_ELEM_LI,         &TY_(W3CAttrsFor_LI)[0],         (CM_LIST|CM_OPT|CM_NO_INDENT),                 TY_(ParseBlock),    NULL           },
186   { TidyTag_LINK,       "link",       VERS_ELEM_LINK,       &TY_(W3CAttrsFor_LINK)[0],       (CM_HEAD|CM_EMPTY),                            TY_(ParseEmpty),    CheckLINK      },
187   { TidyTag_LISTING,    "listing",    VERS_ELEM_LISTING,    &TY_(W3CAttrsFor_LISTING)[0],    (CM_BLOCK|CM_OBSOLETE),                        TY_(ParsePre),      NULL           },
188   { TidyTag_MAP,        "map",        VERS_ELEM_MAP,        &TY_(W3CAttrsFor_MAP)[0],        (CM_INLINE),                                   TY_(ParseBlock),    NULL           },
189   { TidyTag_MENU,       "menu",       VERS_ELEM_MENU,       &TY_(W3CAttrsFor_MENU)[0],       (CM_BLOCK|CM_OBSOLETE),                        TY_(ParseList),     NULL           },
190   { TidyTag_META,       "meta",       VERS_ELEM_META,       &TY_(W3CAttrsFor_META)[0],       (CM_HEAD|CM_EMPTY),                            TY_(ParseEmpty),    CheckMETA      },
191   { TidyTag_NOFRAMES,   "noframes",   VERS_ELEM_NOFRAMES,   &TY_(W3CAttrsFor_NOFRAMES)[0],   (CM_BLOCK|CM_FRAMES),                          TY_(ParseNoFrames), NULL           },
192   { TidyTag_NOSCRIPT,   "noscript",   VERS_ELEM_NOSCRIPT,   &TY_(W3CAttrsFor_NOSCRIPT)[0],   (CM_BLOCK|CM_INLINE|CM_MIXED),                 TY_(ParseBlock),    NULL           },
193   { TidyTag_OBJECT,     "object",     VERS_ELEM_OBJECT,     &TY_(W3CAttrsFor_OBJECT)[0],     (CM_OBJECT|CM_HEAD|CM_IMG|CM_INLINE|CM_PARAM), TY_(ParseBlock),    NULL           },
194   { TidyTag_OL,         "ol",         VERS_ELEM_OL,         &TY_(W3CAttrsFor_OL)[0],         (CM_BLOCK),                                    TY_(ParseList),     NULL           },
195   { TidyTag_OPTGROUP,   "optgroup",   VERS_ELEM_OPTGROUP,   &TY_(W3CAttrsFor_OPTGROUP)[0],   (CM_FIELD|CM_OPT),                             TY_(ParseOptGroup), NULL           },
196   { TidyTag_OPTION,     "option",     VERS_ELEM_OPTION,     &TY_(W3CAttrsFor_OPTION)[0],     (CM_FIELD|CM_OPT),                             TY_(ParseText),     NULL           },
197   { TidyTag_P,          "p",          VERS_ELEM_P,          &TY_(W3CAttrsFor_P)[0],          (CM_BLOCK|CM_OPT),                             TY_(ParseInline),   NULL           },
198   { TidyTag_PARAM,      "param",      VERS_ELEM_PARAM,      &TY_(W3CAttrsFor_PARAM)[0],      (CM_INLINE|CM_EMPTY),                          TY_(ParseEmpty),    NULL           },
199   { TidyTag_PLAINTEXT,  "plaintext",  VERS_ELEM_PLAINTEXT,  &TY_(W3CAttrsFor_PLAINTEXT)[0],  (CM_BLOCK|CM_OBSOLETE),                        TY_(ParsePre),      NULL           },
200   { TidyTag_PRE,        "pre",        VERS_ELEM_PRE,        &TY_(W3CAttrsFor_PRE)[0],        (CM_BLOCK),                                    TY_(ParsePre),      NULL           },
201   { TidyTag_Q,          "q",          VERS_ELEM_Q,          &TY_(W3CAttrsFor_Q)[0],          (CM_INLINE),                                   TY_(ParseInline),   NULL           },
202   { TidyTag_RB,         "rb",         VERS_ELEM_RB,         &TY_(W3CAttrsFor_RB)[0],         (CM_INLINE),                                   TY_(ParseInline),   NULL           },
203   { TidyTag_RBC,        "rbc",        VERS_ELEM_RBC,        &TY_(W3CAttrsFor_RBC)[0],        (CM_INLINE),                                   TY_(ParseInline),   NULL           },
204   { TidyTag_RP,         "rp",         VERS_ELEM_RP,         &TY_(W3CAttrsFor_RP)[0],         (CM_INLINE),                                   TY_(ParseInline),   NULL           },
205   { TidyTag_RT,         "rt",         VERS_ELEM_RT,         &TY_(W3CAttrsFor_RT)[0],         (CM_INLINE),                                   TY_(ParseInline),   NULL           },
206   { TidyTag_RTC,        "rtc",        VERS_ELEM_RTC,        &TY_(W3CAttrsFor_RTC)[0],        (CM_INLINE),                                   TY_(ParseInline),   NULL           },
207   { TidyTag_RUBY,       "ruby",       VERS_ELEM_RUBY,       &TY_(W3CAttrsFor_RUBY)[0],       (CM_INLINE),                                   TY_(ParseInline),   NULL           },
208   { TidyTag_S,          "s",          VERS_ELEM_S,          &TY_(W3CAttrsFor_S)[0],          (CM_INLINE),                                   TY_(ParseInline),   NULL           },
209   { TidyTag_SAMP,       "samp",       VERS_ELEM_SAMP,       &TY_(W3CAttrsFor_SAMP)[0],       (CM_INLINE),                                   TY_(ParseInline),   NULL           },
210   { TidyTag_SCRIPT,     "script",     VERS_ELEM_SCRIPT,     &TY_(W3CAttrsFor_SCRIPT)[0],     (CM_HEAD|CM_MIXED|CM_BLOCK|CM_INLINE),         TY_(ParseScript),   CheckSCRIPT    },
211   { TidyTag_SELECT,     "select",     VERS_ELEM_SELECT,     &TY_(W3CAttrsFor_SELECT)[0],     (CM_INLINE|CM_FIELD),                          TY_(ParseSelect),   NULL           },
212   { TidyTag_SMALL,      "small",      VERS_ELEM_SMALL,      &TY_(W3CAttrsFor_SMALL)[0],      (CM_INLINE),                                   TY_(ParseInline),   NULL           },
213   { TidyTag_SPAN,       "span",       VERS_ELEM_SPAN,       &TY_(W3CAttrsFor_SPAN)[0],       (CM_INLINE),                                   TY_(ParseInline),   NULL           },
214   { TidyTag_STRIKE,     "strike",     VERS_ELEM_STRIKE,     &TY_(W3CAttrsFor_STRIKE)[0],     (CM_INLINE),                                   TY_(ParseInline),   NULL           },
215   { TidyTag_STRONG,     "strong",     VERS_ELEM_STRONG,     &TY_(W3CAttrsFor_STRONG)[0],     (CM_INLINE),                                   TY_(ParseInline),   NULL           },
216   { TidyTag_STYLE,      "style",      VERS_ELEM_STYLE,      &TY_(W3CAttrsFor_STYLE)[0],      (CM_HEAD),                                     TY_(ParseScript),   CheckSTYLE     },
217   { TidyTag_SUB,        "sub",        VERS_ELEM_SUB,        &TY_(W3CAttrsFor_SUB)[0],        (CM_INLINE),                                   TY_(ParseInline),   NULL           },
218   { TidyTag_SUP,        "sup",        VERS_ELEM_SUP,        &TY_(W3CAttrsFor_SUP)[0],        (CM_INLINE),                                   TY_(ParseInline),   NULL           },
219   { TidyTag_TABLE,      "table",      VERS_ELEM_TABLE,      &TY_(W3CAttrsFor_TABLE)[0],      (CM_BLOCK),                                    TY_(ParseTableTag), CheckTABLE     },
220   { TidyTag_TBODY,      "tbody",      VERS_ELEM_TBODY,      &TY_(W3CAttrsFor_TBODY)[0],      (CM_TABLE|CM_ROWGRP|CM_OPT),                   TY_(ParseRowGroup), NULL           },
221   { TidyTag_TD,         "td",         VERS_ELEM_TD,         &TY_(W3CAttrsFor_TD)[0],         (CM_ROW|CM_OPT|CM_NO_INDENT),                  TY_(ParseBlock),    NULL           },
222   { TidyTag_TEXTAREA,   "textarea",   VERS_ELEM_TEXTAREA,   &TY_(W3CAttrsFor_TEXTAREA)[0],   (CM_INLINE|CM_FIELD),                          TY_(ParseText),     NULL           },
223   { TidyTag_TFOOT,      "tfoot",      VERS_ELEM_TFOOT,      &TY_(W3CAttrsFor_TFOOT)[0],      (CM_TABLE|CM_ROWGRP|CM_OPT),                   TY_(ParseRowGroup), NULL           },
224   { TidyTag_TH,         "th",         VERS_ELEM_TH,         &TY_(W3CAttrsFor_TH)[0],         (CM_ROW|CM_OPT|CM_NO_INDENT),                  TY_(ParseBlock),    NULL           },
225   { TidyTag_THEAD,      "thead",      VERS_ELEM_THEAD,      &TY_(W3CAttrsFor_THEAD)[0],      (CM_TABLE|CM_ROWGRP|CM_OPT),                   TY_(ParseRowGroup), NULL           },
226   { TidyTag_TITLE,      "title",      VERS_ELEM_TITLE,      &TY_(W3CAttrsFor_TITLE)[0],      (CM_HEAD),                                     TY_(ParseTitle),    NULL           },
227   { TidyTag_TR,         "tr",         VERS_ELEM_TR,         &TY_(W3CAttrsFor_TR)[0],         (CM_TABLE|CM_OPT),                             TY_(ParseRow),      NULL           },
228   { TidyTag_TT,         "tt",         VERS_ELEM_TT,         &TY_(W3CAttrsFor_TT)[0],         (CM_INLINE),                                   TY_(ParseInline),   NULL           },
229   { TidyTag_U,          "u",          VERS_ELEM_U,          &TY_(W3CAttrsFor_U)[0],          (CM_INLINE),                                   TY_(ParseInline),   NULL           },
230   { TidyTag_UL,         "ul",         VERS_ELEM_UL,         &TY_(W3CAttrsFor_UL)[0],         (CM_BLOCK),                                    TY_(ParseList),     NULL           },
231   { TidyTag_VAR,        "var",        VERS_ELEM_VAR,        &TY_(W3CAttrsFor_VAR)[0],        (CM_INLINE),                                   TY_(ParseInline),   NULL           },
232   { TidyTag_XMP,        "xmp",        VERS_ELEM_XMP,        &TY_(W3CAttrsFor_XMP)[0],        (CM_BLOCK|CM_OBSOLETE),                        TY_(ParsePre),      NULL           },
233   { TidyTag_NEXTID,     "nextid",     VERS_ELEM_NEXTID,     &TY_(W3CAttrsFor_NEXTID)[0],     (CM_HEAD|CM_EMPTY),                            TY_(ParseEmpty),    NULL           },
234 
235   /* proprietary elements */
236   { TidyTag_ALIGN,      "align",      VERS_NETSCAPE,        NULL,                       (CM_BLOCK),                                    TY_(ParseBlock),    NULL           },
237   { TidyTag_BGSOUND,    "bgsound",    VERS_MICROSOFT,       NULL,                       (CM_HEAD|CM_EMPTY),                            TY_(ParseEmpty),    NULL           },
238   { TidyTag_BLINK,      "blink",      VERS_PROPRIETARY,     NULL,                       (CM_INLINE),                                   TY_(ParseInline),   NULL           },
239   { TidyTag_COMMENT,    "comment",    VERS_MICROSOFT,       NULL,                       (CM_INLINE),                                   TY_(ParseInline),   NULL           },
240   { TidyTag_EMBED,      "embed",      VERS_NETSCAPE,        NULL,                       (CM_INLINE|CM_IMG|CM_EMPTY),                   TY_(ParseEmpty),    NULL           },
241   { TidyTag_ILAYER,     "ilayer",     VERS_NETSCAPE,        NULL,                       (CM_INLINE),                                   TY_(ParseInline),   NULL           },
242   { TidyTag_KEYGEN,     "keygen",     VERS_NETSCAPE,        NULL,                       (CM_INLINE|CM_EMPTY),                          TY_(ParseEmpty),    NULL           },
243   { TidyTag_LAYER,      "layer",      VERS_NETSCAPE,        NULL,                       (CM_BLOCK),                                    TY_(ParseBlock),    NULL           },
244   { TidyTag_MARQUEE,    "marquee",    VERS_MICROSOFT,       NULL,                       (CM_INLINE|CM_OPT),                            TY_(ParseInline),   NULL           },
245   { TidyTag_MULTICOL,   "multicol",   VERS_NETSCAPE,        NULL,                       (CM_BLOCK),                                    TY_(ParseBlock),    NULL           },
246   { TidyTag_NOBR,       "nobr",       VERS_PROPRIETARY,     NULL,                       (CM_INLINE),                                   TY_(ParseInline),   NULL           },
247   { TidyTag_NOEMBED,    "noembed",    VERS_NETSCAPE,        NULL,                       (CM_INLINE),                                   TY_(ParseInline),   NULL           },
248   { TidyTag_NOLAYER,    "nolayer",    VERS_NETSCAPE,        NULL,                       (CM_BLOCK|CM_INLINE|CM_MIXED),                 TY_(ParseBlock),    NULL           },
249   { TidyTag_NOSAVE,     "nosave",     VERS_NETSCAPE,        NULL,                       (CM_BLOCK),                                    TY_(ParseBlock),    NULL           },
250   { TidyTag_SERVER,     "server",     VERS_NETSCAPE,        NULL,                       (CM_HEAD|CM_MIXED|CM_BLOCK|CM_INLINE),         TY_(ParseScript),   NULL           },
251   { TidyTag_SERVLET,    "servlet",    VERS_SUN,             NULL,                       (CM_OBJECT|CM_IMG|CM_INLINE|CM_PARAM),         TY_(ParseBlock),    NULL           },
252   { TidyTag_SPACER,     "spacer",     VERS_NETSCAPE,        NULL,                       (CM_INLINE|CM_EMPTY),                          TY_(ParseEmpty),    NULL           },
253   { TidyTag_WBR,        "wbr",        VERS_PROPRIETARY,     NULL,                       (CM_INLINE|CM_EMPTY),                          TY_(ParseEmpty),    NULL           },
254 
255   /* this must be the final entry */
256   { (TidyTagId)0,        NULL,         0,                    NULL,                       (0),                                           NULL,          NULL           }
257 };
258 
259 #if ELEMENT_HASH_LOOKUP
tagsHash(ctmbstr s)260 static uint tagsHash(ctmbstr s)
261 {
262     uint hashval;
263 
264     for (hashval = 0; *s != '\0'; s++)
265         hashval = *s + 31*hashval;
266 
267     return hashval % ELEMENT_HASH_SIZE;
268 }
269 
tagsInstall(TidyDocImpl * doc,TidyTagImpl * tags,const Dict * old)270 static const Dict *tagsInstall(TidyDocImpl* doc, TidyTagImpl* tags, const Dict* old)
271 {
272     DictHash *np;
273     uint hashval;
274 
275     if (old)
276     {
277         np = (DictHash *)TidyDocAlloc(doc, sizeof(*np));
278         np->tag = old;
279 
280         hashval = tagsHash(old->name);
281         np->next = tags->hashtab[hashval];
282         tags->hashtab[hashval] = np;
283     }
284 
285     return old;
286 }
287 
tagsRemoveFromHash(TidyDocImpl * doc,TidyTagImpl * tags,ctmbstr s)288 static void tagsRemoveFromHash( TidyDocImpl* doc, TidyTagImpl* tags, ctmbstr s )
289 {
290     uint h = tagsHash(s);
291     DictHash *p, *prev = NULL;
292     for (p = tags->hashtab[h]; p && p->tag; p = p->next)
293     {
294         if (TY_(tmbstrcmp)(s, p->tag->name) == 0)
295         {
296             DictHash* next = p->next;
297             if ( prev )
298                 prev->next = next;
299             else
300                 tags->hashtab[h] = next;
301             TidyDocFree(doc, p);
302             return;
303         }
304         prev = p;
305     }
306 }
307 
tagsEmptyHash(TidyDocImpl * doc,TidyTagImpl * tags)308 static void tagsEmptyHash( TidyDocImpl* doc, TidyTagImpl* tags )
309 {
310     uint i;
311     DictHash *prev, *next;
312 
313     for (i = 0; i < ELEMENT_HASH_SIZE; ++i)
314     {
315         prev = NULL;
316         next = tags->hashtab[i];
317 
318         while(next)
319         {
320             prev = next->next;
321             TidyDocFree(doc, next);
322             next = prev;
323         }
324 
325         tags->hashtab[i] = NULL;
326     }
327 }
328 #endif /* ELEMENT_HASH_LOOKUP */
329 
tagsLookup(TidyDocImpl * doc,TidyTagImpl * tags,ctmbstr s)330 static const Dict* tagsLookup( TidyDocImpl* doc, TidyTagImpl* tags, ctmbstr s )
331 {
332     const Dict *np;
333 #if ELEMENT_HASH_LOOKUP
334     const DictHash* p;
335 #endif
336 
337     if (!s)
338         return NULL;
339 
340 #if ELEMENT_HASH_LOOKUP
341     /* this breaks if declared elements get changed between two   */
342     /* parser runs since Tidy would use the cached version rather */
343     /* than the new one.                                          */
344     /* However, as FreeDeclaredTags() correctly cleans the hash   */
345     /* this should not be true anymore.                           */
346     for (p = tags->hashtab[tagsHash(s)]; p && p->tag; p = p->next)
347         if (TY_(tmbstrcmp)(s, p->tag->name) == 0)
348             return p->tag;
349 
350     for (np = tag_defs + 1; np < tag_defs + N_TIDY_TAGS; ++np)
351         if (TY_(tmbstrcmp)(s, np->name) == 0)
352             return tagsInstall(doc, tags, np);
353 
354     for (np = tags->declared_tag_list; np; np = np->next)
355         if (TY_(tmbstrcmp)(s, np->name) == 0)
356             return tagsInstall(doc, tags, np);
357 #else
358 
359     for (np = tag_defs + 1; np < tag_defs + N_TIDY_TAGS; ++np)
360         if (TY_(tmbstrcmp)(s, np->name) == 0)
361             return np;
362 
363     for (np = tags->declared_tag_list; np; np = np->next)
364         if (TY_(tmbstrcmp)(s, np->name) == 0)
365             return np;
366 
367 #endif /* ELEMENT_HASH_LOOKUP */
368 
369     return NULL;
370 }
371 
NewDict(TidyDocImpl * doc,ctmbstr name)372 static Dict* NewDict( TidyDocImpl* doc, ctmbstr name )
373 {
374     Dict * const np = (Dict*) TidyDocAlloc( doc, sizeof(Dict) );
375     np->id = TidyTag_UNKNOWN;
376     np->name = name ? TY_(tmbstrdup)( doc->allocator, name ) : NULL;
377     np->versions = VERS_UNKNOWN;
378     np->attrvers = NULL;
379     np->model = CM_UNKNOWN;
380     np->parser = 0;
381     np->chkattrs = 0;
382     np->next = NULL;
383     return np;
384 }
385 
FreeDict(TidyDocImpl * doc,Dict * d)386 static void FreeDict( TidyDocImpl* doc, Dict *d )
387 {
388     if ( d )
389         TidyDocFree( doc, d->name );
390     TidyDocFree( doc, d );
391 }
392 
declare(TidyDocImpl * doc,TidyTagImpl * tags,ctmbstr name,uint versions,uint model,Parser * parser,CheckAttribs * chkattrs)393 static void declare( TidyDocImpl* doc, TidyTagImpl* tags,
394                      ctmbstr name, uint versions, uint model,
395                      Parser *parser, CheckAttribs *chkattrs )
396 {
397     if ( name )
398     {
399         Dict* np = (Dict*) tagsLookup( doc, tags, name );
400         if ( np == NULL )
401         {
402             np = NewDict( doc, name );
403             np->next = tags->declared_tag_list;
404             tags->declared_tag_list = np;
405         }
406 
407         /* Make sure we are not over-writing predefined tags */
408         if ( np->id == TidyTag_UNKNOWN )
409         {
410           np->versions = versions;
411           np->model   |= model;
412           np->parser   = parser;
413           np->chkattrs = chkattrs;
414           np->attrvers = NULL;
415         }
416     }
417 }
418 
419 /* public interface for finding tag by name */
TY_(FindTag)420 Bool TY_(FindTag)( TidyDocImpl* doc, Node *node )
421 {
422     const Dict *np = NULL;
423     if ( cfgBool(doc, TidyXmlTags) )
424     {
425         node->tag = doc->tags.xml_tags;
426         return yes;
427     }
428 
429     if ( node->element && (np = tagsLookup(doc, &doc->tags, node->element)) )
430     {
431         node->tag = np;
432         return yes;
433     }
434 
435     return no;
436 }
437 
TY_(LookupTagDef)438 const Dict* TY_(LookupTagDef)( TidyTagId tid )
439 {
440     const Dict *np;
441 
442     for (np = tag_defs + 1; np < tag_defs + N_TIDY_TAGS; ++np )
443         if (np->id == tid)
444             return np;
445 
446     return NULL;
447 }
448 
TY_(FindParser)449 Parser* TY_(FindParser)( TidyDocImpl* doc, Node *node )
450 {
451     const Dict* np = tagsLookup( doc, &doc->tags, node->element );
452     if ( np )
453         return np->parser;
454     return NULL;
455 }
456 
TY_(DefineTag)457 void TY_(DefineTag)( TidyDocImpl* doc, UserTagType tagType, ctmbstr name )
458 {
459     Parser* parser = 0;
460     uint cm = CM_UNKNOWN;
461     uint vers = VERS_PROPRIETARY;
462 
463     switch (tagType)
464     {
465     case tagtype_empty:
466         cm = CM_EMPTY|CM_NO_INDENT|CM_NEW;
467         parser = TY_(ParseBlock);
468         break;
469 
470     case tagtype_inline:
471         cm = CM_INLINE|CM_NO_INDENT|CM_NEW;
472         parser = TY_(ParseInline);
473         break;
474 
475     case tagtype_block:
476         cm = CM_BLOCK|CM_NO_INDENT|CM_NEW;
477         parser = TY_(ParseBlock);
478         break;
479 
480     case tagtype_pre:
481         cm = CM_BLOCK|CM_NO_INDENT|CM_NEW;
482         parser = TY_(ParsePre);
483         break;
484 
485     case tagtype_null:
486         break;
487     }
488     if ( cm && parser )
489         declare( doc, &doc->tags, name, vers, cm, parser, 0 );
490 }
491 
TY_(GetDeclaredTagList)492 TidyIterator   TY_(GetDeclaredTagList)( TidyDocImpl* doc )
493 {
494     return (TidyIterator) doc->tags.declared_tag_list;
495 }
496 
TY_(GetNextDeclaredTag)497 ctmbstr        TY_(GetNextDeclaredTag)( TidyDocImpl* ARG_UNUSED(doc),
498                                         UserTagType tagType, TidyIterator* iter )
499 {
500     ctmbstr name = NULL;
501     Dict* curr;
502     for ( curr = (Dict*) *iter; name == NULL && curr != NULL; curr = curr->next )
503     {
504         switch ( tagType )
505         {
506         case tagtype_empty:
507             if ( (curr->model & CM_EMPTY) != 0 )
508                 name = curr->name;
509             break;
510 
511         case tagtype_inline:
512             if ( (curr->model & CM_INLINE) != 0 )
513                 name = curr->name;
514             break;
515 
516         case tagtype_block:
517             if ( (curr->model & CM_BLOCK) != 0 &&
518                  curr->parser == TY_(ParseBlock) )
519                 name = curr->name;
520             break;
521 
522         case tagtype_pre:
523             if ( (curr->model & CM_BLOCK) != 0 &&
524                  curr->parser == TY_(ParsePre) )
525                 name = curr->name;
526             break;
527 
528         case tagtype_null:
529             break;
530         }
531     }
532     *iter = (TidyIterator) curr;
533     return name;
534 }
535 
TY_(InitTags)536 void TY_(InitTags)( TidyDocImpl* doc )
537 {
538     Dict* xml;
539     TidyTagImpl* tags = &doc->tags;
540 
541     TidyClearMemory( tags, sizeof(TidyTagImpl) );
542 
543     /* create dummy entry for all xml tags */
544     xml =  NewDict( doc, NULL );
545     xml->versions = VERS_XML;
546     xml->model = CM_BLOCK;
547     xml->parser = 0;
548     xml->chkattrs = 0;
549     xml->attrvers = NULL;
550     tags->xml_tags = xml;
551 }
552 
553 /* By default, zap all of them.  But allow
554 ** an single type to be specified.
555 */
TY_(FreeDeclaredTags)556 void TY_(FreeDeclaredTags)( TidyDocImpl* doc, UserTagType tagType )
557 {
558     TidyTagImpl* tags = &doc->tags;
559     Dict *curr, *next = NULL, *prev = NULL;
560 
561     for ( curr=tags->declared_tag_list; curr; curr = next )
562     {
563         Bool deleteIt = yes;
564         next = curr->next;
565         switch ( tagType )
566         {
567         case tagtype_empty:
568             deleteIt = ( curr->model & CM_EMPTY ) != 0;
569             break;
570 
571         case tagtype_inline:
572             deleteIt = ( curr->model & CM_INLINE ) != 0;
573             break;
574 
575         case tagtype_block:
576             deleteIt = ( (curr->model & CM_BLOCK) != 0 &&
577                          curr->parser == TY_(ParseBlock) );
578             break;
579 
580         case tagtype_pre:
581             deleteIt = ( (curr->model & CM_BLOCK) != 0 &&
582                          curr->parser == TY_(ParsePre) );
583             break;
584 
585         case tagtype_null:
586             break;
587         }
588 
589         if ( deleteIt )
590         {
591 #if ELEMENT_HASH_LOOKUP
592           tagsRemoveFromHash( doc, &doc->tags, curr->name );
593 #endif
594           FreeDict( doc, curr );
595           if ( prev )
596             prev->next = next;
597           else
598             tags->declared_tag_list = next;
599         }
600         else
601           prev = curr;
602     }
603 }
604 
TY_(FreeTags)605 void TY_(FreeTags)( TidyDocImpl* doc )
606 {
607     TidyTagImpl* tags = &doc->tags;
608 
609 #if ELEMENT_HASH_LOOKUP
610     tagsEmptyHash( doc, tags );
611 #endif
612     TY_(FreeDeclaredTags)( doc, tagtype_null );
613     FreeDict( doc, tags->xml_tags );
614 
615     /* get rid of dangling tag references */
616     TidyClearMemory( tags, sizeof(TidyTagImpl) );
617 }
618 
619 
620 /* default method for checking an element's attributes */
TY_(CheckAttributes)621 void TY_(CheckAttributes)( TidyDocImpl* doc, Node *node )
622 {
623     AttVal *next, *attval = node->attributes;
624     while (attval)
625     {
626         next = attval->next;
627         TY_(CheckAttribute)( doc, node, attval );
628         attval = next;
629     }
630 }
631 
632 /* methods for checking attributes for specific elements */
633 
CheckIMG(TidyDocImpl * doc,Node * node)634 void CheckIMG( TidyDocImpl* doc, Node *node )
635 {
636     Bool HasAlt = TY_(AttrGetById)(node, TidyAttr_ALT) != NULL;
637     Bool HasSrc = TY_(AttrGetById)(node, TidyAttr_SRC) != NULL;
638     Bool HasUseMap = TY_(AttrGetById)(node, TidyAttr_USEMAP) != NULL;
639     Bool HasIsMap = TY_(AttrGetById)(node, TidyAttr_ISMAP) != NULL;
640     Bool HasDataFld = TY_(AttrGetById)(node, TidyAttr_DATAFLD) != NULL;
641 
642     TY_(CheckAttributes)(doc, node);
643 
644     if ( !HasAlt )
645     {
646         if ( cfg(doc, TidyAccessibilityCheckLevel) == 0 )
647         {
648             doc->badAccess |= BA_MISSING_IMAGE_ALT;
649             TY_(ReportMissingAttr)( doc, node, "alt" );
650         }
651 
652         if ( cfgStr(doc, TidyAltText) )
653             TY_(AddAttribute)( doc, node, "alt", cfgStr(doc, TidyAltText) );
654     }
655 
656     if ( !HasSrc && !HasDataFld )
657         TY_(ReportMissingAttr)( doc, node, "src" );
658 
659     if ( cfg(doc, TidyAccessibilityCheckLevel) == 0 )
660     {
661         if ( HasIsMap && !HasUseMap )
662             TY_(ReportAttrError)( doc, node, NULL, MISSING_IMAGEMAP);
663     }
664 }
665 
CheckCaption(TidyDocImpl * doc,Node * node)666 void CheckCaption(TidyDocImpl* doc, Node *node)
667 {
668     AttVal *attval;
669 
670     TY_(CheckAttributes)(doc, node);
671 
672     attval = TY_(AttrGetById)(node, TidyAttr_ALIGN);
673 
674     if (!AttrHasValue(attval))
675         return;
676 
677     if (AttrValueIs(attval, "left") || AttrValueIs(attval, "right"))
678         TY_(ConstrainVersion)(doc, VERS_HTML40_LOOSE);
679     else if (AttrValueIs(attval, "top") || AttrValueIs(attval, "bottom"))
680         TY_(ConstrainVersion)(doc, ~(VERS_HTML20|VERS_HTML32));
681     else
682         TY_(ReportAttrError)(doc, node, attval, BAD_ATTRIBUTE_VALUE);
683 }
684 
CheckHTML(TidyDocImpl * doc,Node * node)685 void CheckHTML( TidyDocImpl* doc, Node *node )
686 {
687     TY_(CheckAttributes)(doc, node);
688 }
689 
CheckAREA(TidyDocImpl * doc,Node * node)690 void CheckAREA( TidyDocImpl* doc, Node *node )
691 {
692     Bool HasAlt = TY_(AttrGetById)(node, TidyAttr_ALT) != NULL;
693     Bool HasHref = TY_(AttrGetById)(node, TidyAttr_HREF) != NULL;
694     Bool HasNohref = TY_(AttrGetById)(node, TidyAttr_NOHREF) != NULL;
695 
696     TY_(CheckAttributes)(doc, node);
697 
698     if ( !HasAlt )
699     {
700         if ( cfg(doc, TidyAccessibilityCheckLevel) == 0 )
701         {
702             doc->badAccess |= BA_MISSING_LINK_ALT;
703             TY_(ReportMissingAttr)( doc, node, "alt" );
704         }
705     }
706 
707     if ( !HasHref && !HasNohref )
708         TY_(ReportMissingAttr)( doc, node, "href" );
709 }
710 
CheckTABLE(TidyDocImpl * doc,Node * node)711 void CheckTABLE( TidyDocImpl* doc, Node *node )
712 {
713     AttVal* attval;
714     Bool HasSummary = TY_(AttrGetById)(node, TidyAttr_SUMMARY) != NULL;
715 
716     TY_(CheckAttributes)(doc, node);
717 
718     /* a missing summary attribute is bad accessibility, no matter
719        what HTML version is involved; a document without is valid */
720     if (cfg(doc, TidyAccessibilityCheckLevel) == 0)
721     {
722         if (!HasSummary)
723         {
724             doc->badAccess |= BA_MISSING_SUMMARY;
725             TY_(ReportMissingAttr)( doc, node, "summary");
726         }
727     }
728 
729     /* convert <table border> to <table border="1"> */
730     if ( cfgBool(doc, TidyXmlOut) && (attval = TY_(AttrGetById)(node, TidyAttr_BORDER)) )
731     {
732         if (attval->value == NULL)
733             attval->value = TY_(tmbstrdup)(doc->allocator, "1");
734     }
735 }
736 
737 /* add missing type attribute when appropriate */
CheckSCRIPT(TidyDocImpl * doc,Node * node)738 void CheckSCRIPT( TidyDocImpl* doc, Node *node )
739 {
740     AttVal *lang, *type;
741     char buf[16];
742 
743     TY_(CheckAttributes)(doc, node);
744 
745     lang = TY_(AttrGetById)(node, TidyAttr_LANGUAGE);
746     type = TY_(AttrGetById)(node, TidyAttr_TYPE);
747 
748     if (!type)
749     {
750         /* check for javascript */
751         if (lang)
752         {
753             /* Test #696799. lang->value can be NULL. */
754             buf[0] = '\0';
755             TY_(tmbstrncpy)(buf, lang->value, sizeof(buf));
756             buf[10] = '\0';
757 
758             if (TY_(tmbstrncasecmp)(buf, "javascript", 10) == 0 ||
759                  TY_(tmbstrncasecmp)(buf,   "jscript",  7) == 0)
760             {
761                 TY_(AddAttribute)(doc, node, "type", "text/javascript");
762             }
763             else if (TY_(tmbstrcasecmp)(buf, "vbscript") == 0)
764             {
765                 /* per Randy Waki 8/6/01 */
766                 TY_(AddAttribute)(doc, node, "type", "text/vbscript");
767             }
768         }
769         else
770         {
771             TY_(AddAttribute)(doc, node, "type", "text/javascript");
772         }
773 
774         type = TY_(AttrGetById)(node, TidyAttr_TYPE);
775 
776         if (type != NULL)
777         {
778             TY_(ReportAttrError)(doc, node, type, INSERTING_ATTRIBUTE);
779         }
780         else
781         {
782             TY_(ReportMissingAttr)(doc, node, "type");
783         }
784     }
785 }
786 
787 
788 /* add missing type attribute when appropriate */
CheckSTYLE(TidyDocImpl * doc,Node * node)789 void CheckSTYLE( TidyDocImpl* doc, Node *node )
790 {
791     AttVal *type = TY_(AttrGetById)(node, TidyAttr_TYPE);
792 
793     TY_(CheckAttributes)( doc, node );
794 
795     if ( !type || !type->value || !TY_(tmbstrlen)(type->value) )
796     {
797         type = TY_(RepairAttrValue)(doc, node, "type", "text/css");
798         TY_(ReportAttrError)( doc, node, type, INSERTING_ATTRIBUTE );
799     }
800 }
801 
802 /* add missing type attribute when appropriate */
CheckLINK(TidyDocImpl * doc,Node * node)803 void CheckLINK( TidyDocImpl* doc, Node *node )
804 {
805     AttVal *rel = TY_(AttrGetById)(node, TidyAttr_REL);
806 
807     TY_(CheckAttributes)( doc, node );
808 
809     /* todo: <link rel="alternate stylesheet"> */
810     if (AttrValueIs(rel, "stylesheet"))
811     {
812         AttVal *type = TY_(AttrGetById)(node, TidyAttr_TYPE);
813         if (!type)
814         {
815             TY_(AddAttribute)( doc, node, "type", "text/css" );
816             type = TY_(AttrGetById)(node, TidyAttr_TYPE);
817             TY_(ReportAttrError)( doc, node, type, INSERTING_ATTRIBUTE );
818         }
819     }
820 }
821 
822 /* reports missing action attribute */
CheckFORM(TidyDocImpl * doc,Node * node)823 void CheckFORM( TidyDocImpl* doc, Node *node )
824 {
825     AttVal *action = TY_(AttrGetById)(node, TidyAttr_ACTION);
826 
827     TY_(CheckAttributes)(doc, node);
828 
829     if (!action)
830         TY_(ReportMissingAttr)(doc, node, "action");
831 }
832 
833 /* reports missing content attribute */
CheckMETA(TidyDocImpl * doc,Node * node)834 void CheckMETA( TidyDocImpl* doc, Node *node )
835 {
836     AttVal *content = TY_(AttrGetById)(node, TidyAttr_CONTENT);
837 
838     TY_(CheckAttributes)(doc, node);
839 
840     if (!content)
841         TY_(ReportMissingAttr)( doc, node, "content" );
842     /* name or http-equiv attribute must also be set */
843 }
844 
845 
TY_(nodeIsText)846 Bool TY_(nodeIsText)( Node* node )
847 {
848   return ( node && node->type == TextNode );
849 }
850 
TY_(nodeHasText)851 Bool TY_(nodeHasText)( TidyDocImpl* doc, Node* node )
852 {
853   if ( doc && node )
854   {
855     uint ix;
856     Lexer* lexer = doc->lexer;
857     for ( ix = node->start; ix < node->end; ++ix )
858     {
859         /* whitespace */
860         if ( !TY_(IsWhite)( lexer->lexbuf[ix] ) )
861             return yes;
862     }
863   }
864   return no;
865 }
866 
TY_(nodeIsElement)867 Bool TY_(nodeIsElement)( Node* node )
868 {
869   return ( node &&
870            (node->type == StartTag || node->type == StartEndTag) );
871 }
872 
873 /* True if any of the bits requested are set.
874 */
TY_(nodeHasCM)875 Bool TY_(nodeHasCM)( Node* node, uint contentModel )
876 {
877   return ( node && node->tag &&
878            (node->tag->model & contentModel) != 0 );
879 }
880 
TY_(nodeCMIsBlock)881 Bool TY_(nodeCMIsBlock)( Node* node )
882 {
883   return TY_(nodeHasCM)( node, CM_BLOCK );
884 }
TY_(nodeCMIsInline)885 Bool TY_(nodeCMIsInline)( Node* node )
886 {
887   return TY_(nodeHasCM)( node, CM_INLINE );
888 }
TY_(nodeCMIsEmpty)889 Bool TY_(nodeCMIsEmpty)( Node* node )
890 {
891   return TY_(nodeHasCM)( node, CM_EMPTY );
892 }
893 
TY_(nodeIsHeader)894 Bool TY_(nodeIsHeader)( Node* node )
895 {
896     TidyTagId tid = TagId( node  );
897     return ( tid && (
898              tid == TidyTag_H1 ||
899              tid == TidyTag_H2 ||
900              tid == TidyTag_H3 ||
901              tid == TidyTag_H4 ||
902              tid == TidyTag_H5 ||
903              tid == TidyTag_H6 ));
904 }
905 
TY_(nodeHeaderLevel)906 uint TY_(nodeHeaderLevel)( Node* node )
907 {
908     TidyTagId tid = TagId( node  );
909     switch ( tid )
910     {
911     case TidyTag_H1:
912         return 1;
913     case TidyTag_H2:
914         return 2;
915     case TidyTag_H3:
916         return 3;
917     case TidyTag_H4:
918         return 4;
919     case TidyTag_H5:
920         return 5;
921     case TidyTag_H6:
922         return 6;
923     default:
924     {
925         /* fall through */
926     }
927     }
928     return 0;
929 }
930 
931 /*
932  * local variables:
933  * mode: c
934  * indent-tabs-mode: nil
935  * c-basic-offset: 4
936  * eval: (c-set-offset 'substatement-open 0)
937  * end:
938  */
939