1 /* tags.c -- recognize HTML tags
2
3 (c) 1998-2008 (W3C) MIT, ERCIM, Keio University
4 See tidyp.h for the copyright notice.
5
6 The HTML tags are stored as 8 bit ASCII strings.
7
8 */
9
10 #include "tidy-int.h"
11 #include "message.h"
12 #include "tmbstr.h"
13
14 /* Attribute checking methods */
15 static CheckAttribs CheckIMG;
16 static CheckAttribs CheckLINK;
17 static CheckAttribs CheckAREA;
18 static CheckAttribs CheckTABLE;
19 static CheckAttribs CheckCaption;
20 static CheckAttribs CheckSCRIPT;
21 static CheckAttribs CheckSTYLE;
22 static CheckAttribs CheckHTML;
23 static CheckAttribs CheckFORM;
24 static CheckAttribs CheckMETA;
25
26 #define VERS_ELEM_A (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
27 #define VERS_ELEM_ABBR (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
28 #define VERS_ELEM_ACRONYM (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
29 #define VERS_ELEM_ADDRESS (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
30 #define VERS_ELEM_APPLET (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|xxxx|xxxx|xxxx|xxxx|xxxx)
31 #define VERS_ELEM_AREA (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
32 #define VERS_ELEM_B (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
33 #define VERS_ELEM_BASE (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
34 #define VERS_ELEM_BASEFONT (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|xxxx|xxxx|xxxx|xxxx|xxxx)
35 #define VERS_ELEM_BDO (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
36 #define VERS_ELEM_BIG (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
37 #define VERS_ELEM_BLOCKQUOTE (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
38 #define VERS_ELEM_BODY (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
39 #define VERS_ELEM_BR (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
40 #define VERS_ELEM_BUTTON (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
41 #define VERS_ELEM_CAPTION (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
42 #define VERS_ELEM_CENTER (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|xxxx|xxxx|xxxx|xxxx|xxxx)
43 #define VERS_ELEM_CITE (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
44 #define VERS_ELEM_CODE (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
45 #define VERS_ELEM_COL (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
46 #define VERS_ELEM_COLGROUP (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
47 #define VERS_ELEM_DD (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
48 #define VERS_ELEM_DEL (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
49 #define VERS_ELEM_DFN (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
50 #define VERS_ELEM_DIR (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|xxxx|xxxx|xxxx|xxxx|xxxx)
51 #define VERS_ELEM_DIV (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
52 #define VERS_ELEM_DL (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
53 #define VERS_ELEM_DT (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
54 #define VERS_ELEM_EM (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
55 #define VERS_ELEM_FIELDSET (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
56 #define VERS_ELEM_FONT (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|xxxx|xxxx|xxxx|xxxx|xxxx)
57 #define VERS_ELEM_FORM (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
58 #define VERS_ELEM_FRAME (xxxx|xxxx|xxxx|xxxx|xxxx|H40F|H41F|X10F|xxxx|xxxx|xxxx|xxxx|xxxx)
59 #define VERS_ELEM_FRAMESET (xxxx|xxxx|xxxx|xxxx|xxxx|H40F|H41F|X10F|xxxx|xxxx|xxxx|xxxx|xxxx)
60 #define VERS_ELEM_H1 (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
61 #define VERS_ELEM_H2 (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
62 #define VERS_ELEM_H3 (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
63 #define VERS_ELEM_H4 (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
64 #define VERS_ELEM_H5 (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
65 #define VERS_ELEM_H6 (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
66 #define VERS_ELEM_HEAD (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
67 #define VERS_ELEM_HR (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
68 #define VERS_ELEM_HTML (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
69 #define VERS_ELEM_I (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
70 #define VERS_ELEM_IFRAME (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|xxxx|xxxx|xxxx|xxxx|xxxx)
71 #define VERS_ELEM_IMG (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
72 #define VERS_ELEM_INPUT (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
73 #define VERS_ELEM_INS (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
74 #define VERS_ELEM_ISINDEX (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|xxxx|xxxx|xxxx|xxxx|xxxx)
75 #define VERS_ELEM_KBD (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
76 #define VERS_ELEM_LABEL (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
77 #define VERS_ELEM_LEGEND (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
78 #define VERS_ELEM_LI (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
79 #define VERS_ELEM_LINK (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
80 #define VERS_ELEM_LISTING (HT20|HT32|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx)
81 #define VERS_ELEM_MAP (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
82 #define VERS_ELEM_MENU (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|xxxx|xxxx|xxxx|xxxx|xxxx)
83 #define VERS_ELEM_META (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
84 #define VERS_ELEM_NEXTID (HT20|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx)
85 #define VERS_ELEM_NOFRAMES (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|xxxx|xxxx|xxxx|xxxx|xxxx)
86 #define VERS_ELEM_NOSCRIPT (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
87 #define VERS_ELEM_OBJECT (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
88 #define VERS_ELEM_OL (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
89 #define VERS_ELEM_OPTGROUP (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
90 #define VERS_ELEM_OPTION (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
91 #define VERS_ELEM_P (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
92 #define VERS_ELEM_PARAM (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
93 #define VERS_ELEM_PLAINTEXT (HT20|HT32|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx)
94 #define VERS_ELEM_PRE (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
95 #define VERS_ELEM_Q (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
96 #define VERS_ELEM_RB (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|XH11|xxxx)
97 #define VERS_ELEM_RBC (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|XH11|xxxx)
98 #define VERS_ELEM_RP (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|XH11|xxxx)
99 #define VERS_ELEM_RT (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|XH11|xxxx)
100 #define VERS_ELEM_RTC (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|XH11|xxxx)
101 #define VERS_ELEM_RUBY (xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|XH11|xxxx)
102 #define VERS_ELEM_S (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|xxxx|xxxx|xxxx|xxxx|xxxx)
103 #define VERS_ELEM_SAMP (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
104 #define VERS_ELEM_SCRIPT (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
105 #define VERS_ELEM_SELECT (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
106 #define VERS_ELEM_SMALL (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
107 #define VERS_ELEM_SPAN (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
108 #define VERS_ELEM_STRIKE (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|xxxx|xxxx|xxxx|xxxx|xxxx)
109 #define VERS_ELEM_STRONG (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
110 #define VERS_ELEM_STYLE (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
111 #define VERS_ELEM_SUB (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
112 #define VERS_ELEM_SUP (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
113 #define VERS_ELEM_TABLE (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
114 #define VERS_ELEM_TBODY (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
115 #define VERS_ELEM_TD (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
116 #define VERS_ELEM_TEXTAREA (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
117 #define VERS_ELEM_TFOOT (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
118 #define VERS_ELEM_TH (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
119 #define VERS_ELEM_THEAD (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
120 #define VERS_ELEM_TITLE (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
121 #define VERS_ELEM_TR (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
122 #define VERS_ELEM_TT (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx)
123 #define VERS_ELEM_U (xxxx|HT32|H40T|H41T|X10T|H40F|H41F|X10F|xxxx|xxxx|xxxx|xxxx|xxxx)
124 #define VERS_ELEM_UL (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
125 #define VERS_ELEM_VAR (HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
126 #define VERS_ELEM_XMP (HT20|HT32|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx)
127
128 static const Dict tag_defs[] =
129 {
130 { TidyTag_UNKNOWN, "unknown!", VERS_UNKNOWN, NULL, (0), NULL, NULL },
131
132 /* W3C defined elements */
133 { TidyTag_A, "a", VERS_ELEM_A, &TY_(W3CAttrsFor_A)[0], (CM_INLINE), TY_(ParseInline), NULL },
134 { TidyTag_ABBR, "abbr", VERS_ELEM_ABBR, &TY_(W3CAttrsFor_ABBR)[0], (CM_INLINE), TY_(ParseInline), NULL },
135 { TidyTag_ACRONYM, "acronym", VERS_ELEM_ACRONYM, &TY_(W3CAttrsFor_ACRONYM)[0], (CM_INLINE), TY_(ParseInline), NULL },
136 { TidyTag_ADDRESS, "address", VERS_ELEM_ADDRESS, &TY_(W3CAttrsFor_ADDRESS)[0], (CM_BLOCK), TY_(ParseInline), NULL },
137 { TidyTag_APPLET, "applet", VERS_ELEM_APPLET, &TY_(W3CAttrsFor_APPLET)[0], (CM_OBJECT|CM_IMG|CM_INLINE|CM_PARAM), TY_(ParseBlock), NULL },
138 { TidyTag_AREA, "area", VERS_ELEM_AREA, &TY_(W3CAttrsFor_AREA)[0], (CM_BLOCK|CM_EMPTY), TY_(ParseEmpty), CheckAREA },
139 { TidyTag_B, "b", VERS_ELEM_B, &TY_(W3CAttrsFor_B)[0], (CM_INLINE), TY_(ParseInline), NULL },
140 { TidyTag_BASE, "base", VERS_ELEM_BASE, &TY_(W3CAttrsFor_BASE)[0], (CM_HEAD|CM_EMPTY), TY_(ParseEmpty), NULL },
141 { TidyTag_BASEFONT, "basefont", VERS_ELEM_BASEFONT, &TY_(W3CAttrsFor_BASEFONT)[0], (CM_INLINE|CM_EMPTY), TY_(ParseEmpty), NULL },
142 { TidyTag_BDO, "bdo", VERS_ELEM_BDO, &TY_(W3CAttrsFor_BDO)[0], (CM_INLINE), TY_(ParseInline), NULL },
143 { TidyTag_BIG, "big", VERS_ELEM_BIG, &TY_(W3CAttrsFor_BIG)[0], (CM_INLINE), TY_(ParseInline), NULL },
144 { TidyTag_BLOCKQUOTE, "blockquote", VERS_ELEM_BLOCKQUOTE, &TY_(W3CAttrsFor_BLOCKQUOTE)[0], (CM_BLOCK), TY_(ParseBlock), NULL },
145 { TidyTag_BODY, "body", VERS_ELEM_BODY, &TY_(W3CAttrsFor_BODY)[0], (CM_HTML|CM_OPT|CM_OMITST), TY_(ParseBody), NULL },
146 { TidyTag_BR, "br", VERS_ELEM_BR, &TY_(W3CAttrsFor_BR)[0], (CM_INLINE|CM_EMPTY), TY_(ParseEmpty), NULL },
147 { TidyTag_BUTTON, "button", VERS_ELEM_BUTTON, &TY_(W3CAttrsFor_BUTTON)[0], (CM_INLINE), TY_(ParseBlock), NULL },
148 { TidyTag_CAPTION, "caption", VERS_ELEM_CAPTION, &TY_(W3CAttrsFor_CAPTION)[0], (CM_TABLE), TY_(ParseInline), CheckCaption },
149 { TidyTag_CENTER, "center", VERS_ELEM_CENTER, &TY_(W3CAttrsFor_CENTER)[0], (CM_BLOCK), TY_(ParseBlock), NULL },
150 { TidyTag_CITE, "cite", VERS_ELEM_CITE, &TY_(W3CAttrsFor_CITE)[0], (CM_INLINE), TY_(ParseInline), NULL },
151 { TidyTag_CODE, "code", VERS_ELEM_CODE, &TY_(W3CAttrsFor_CODE)[0], (CM_INLINE), TY_(ParseInline), NULL },
152 { TidyTag_COL, "col", VERS_ELEM_COL, &TY_(W3CAttrsFor_COL)[0], (CM_TABLE|CM_EMPTY), TY_(ParseEmpty), NULL },
153 { TidyTag_COLGROUP, "colgroup", VERS_ELEM_COLGROUP, &TY_(W3CAttrsFor_COLGROUP)[0], (CM_TABLE|CM_OPT), TY_(ParseColGroup), NULL },
154 { TidyTag_DD, "dd", VERS_ELEM_DD, &TY_(W3CAttrsFor_DD)[0], (CM_DEFLIST|CM_OPT|CM_NO_INDENT), TY_(ParseBlock), NULL },
155 { TidyTag_DEL, "del", VERS_ELEM_DEL, &TY_(W3CAttrsFor_DEL)[0], (CM_INLINE|CM_BLOCK|CM_MIXED), TY_(ParseInline), NULL },
156 { TidyTag_DFN, "dfn", VERS_ELEM_DFN, &TY_(W3CAttrsFor_DFN)[0], (CM_INLINE), TY_(ParseInline), NULL },
157 { TidyTag_DIR, "dir", VERS_ELEM_DIR, &TY_(W3CAttrsFor_DIR)[0], (CM_BLOCK|CM_OBSOLETE), TY_(ParseList), NULL },
158 { TidyTag_DIV, "div", VERS_ELEM_DIV, &TY_(W3CAttrsFor_DIV)[0], (CM_BLOCK), TY_(ParseBlock), NULL },
159 { TidyTag_DL, "dl", VERS_ELEM_DL, &TY_(W3CAttrsFor_DL)[0], (CM_BLOCK), TY_(ParseDefList), NULL },
160 { TidyTag_DT, "dt", VERS_ELEM_DT, &TY_(W3CAttrsFor_DT)[0], (CM_DEFLIST|CM_OPT|CM_NO_INDENT), TY_(ParseInline), NULL },
161 { TidyTag_EM, "em", VERS_ELEM_EM, &TY_(W3CAttrsFor_EM)[0], (CM_INLINE), TY_(ParseInline), NULL },
162 { TidyTag_FIELDSET, "fieldset", VERS_ELEM_FIELDSET, &TY_(W3CAttrsFor_FIELDSET)[0], (CM_BLOCK), TY_(ParseBlock), NULL },
163 { TidyTag_FONT, "font", VERS_ELEM_FONT, &TY_(W3CAttrsFor_FONT)[0], (CM_INLINE), TY_(ParseInline), NULL },
164 { TidyTag_FORM, "form", VERS_ELEM_FORM, &TY_(W3CAttrsFor_FORM)[0], (CM_BLOCK), TY_(ParseBlock), CheckFORM },
165 { TidyTag_FRAME, "frame", VERS_ELEM_FRAME, &TY_(W3CAttrsFor_FRAME)[0], (CM_FRAMES|CM_EMPTY), TY_(ParseEmpty), NULL },
166 { TidyTag_FRAMESET, "frameset", VERS_ELEM_FRAMESET, &TY_(W3CAttrsFor_FRAMESET)[0], (CM_HTML|CM_FRAMES), TY_(ParseFrameSet), NULL },
167 { TidyTag_H1, "h1", VERS_ELEM_H1, &TY_(W3CAttrsFor_H1)[0], (CM_BLOCK|CM_HEADING), TY_(ParseInline), NULL },
168 { TidyTag_H2, "h2", VERS_ELEM_H2, &TY_(W3CAttrsFor_H2)[0], (CM_BLOCK|CM_HEADING), TY_(ParseInline), NULL },
169 { TidyTag_H3, "h3", VERS_ELEM_H3, &TY_(W3CAttrsFor_H3)[0], (CM_BLOCK|CM_HEADING), TY_(ParseInline), NULL },
170 { TidyTag_H4, "h4", VERS_ELEM_H4, &TY_(W3CAttrsFor_H4)[0], (CM_BLOCK|CM_HEADING), TY_(ParseInline), NULL },
171 { TidyTag_H5, "h5", VERS_ELEM_H5, &TY_(W3CAttrsFor_H5)[0], (CM_BLOCK|CM_HEADING), TY_(ParseInline), NULL },
172 { TidyTag_H6, "h6", VERS_ELEM_H6, &TY_(W3CAttrsFor_H6)[0], (CM_BLOCK|CM_HEADING), TY_(ParseInline), NULL },
173 { TidyTag_HEAD, "head", VERS_ELEM_HEAD, &TY_(W3CAttrsFor_HEAD)[0], (CM_HTML|CM_OPT|CM_OMITST), TY_(ParseHead), NULL },
174 { TidyTag_HR, "hr", VERS_ELEM_HR, &TY_(W3CAttrsFor_HR)[0], (CM_BLOCK|CM_EMPTY), TY_(ParseEmpty), NULL },
175 { TidyTag_HTML, "html", VERS_ELEM_HTML, &TY_(W3CAttrsFor_HTML)[0], (CM_HTML|CM_OPT|CM_OMITST), TY_(ParseHTML), CheckHTML },
176 { TidyTag_I, "i", VERS_ELEM_I, &TY_(W3CAttrsFor_I)[0], (CM_INLINE), TY_(ParseInline), NULL },
177 { TidyTag_IFRAME, "iframe", VERS_ELEM_IFRAME, &TY_(W3CAttrsFor_IFRAME)[0], (CM_INLINE), TY_(ParseBlock), NULL },
178 { TidyTag_IMG, "img", VERS_ELEM_IMG, &TY_(W3CAttrsFor_IMG)[0], (CM_INLINE|CM_IMG|CM_EMPTY), TY_(ParseEmpty), CheckIMG },
179 { TidyTag_INPUT, "input", VERS_ELEM_INPUT, &TY_(W3CAttrsFor_INPUT)[0], (CM_INLINE|CM_IMG|CM_EMPTY), TY_(ParseEmpty), NULL },
180 { TidyTag_INS, "ins", VERS_ELEM_INS, &TY_(W3CAttrsFor_INS)[0], (CM_INLINE|CM_BLOCK|CM_MIXED), TY_(ParseInline), NULL },
181 { TidyTag_ISINDEX, "isindex", VERS_ELEM_ISINDEX, &TY_(W3CAttrsFor_ISINDEX)[0], (CM_BLOCK|CM_EMPTY), TY_(ParseEmpty), NULL },
182 { TidyTag_KBD, "kbd", VERS_ELEM_KBD, &TY_(W3CAttrsFor_KBD)[0], (CM_INLINE), TY_(ParseInline), NULL },
183 { TidyTag_LABEL, "label", VERS_ELEM_LABEL, &TY_(W3CAttrsFor_LABEL)[0], (CM_INLINE), TY_(ParseInline), NULL },
184 { TidyTag_LEGEND, "legend", VERS_ELEM_LEGEND, &TY_(W3CAttrsFor_LEGEND)[0], (CM_INLINE), TY_(ParseInline), NULL },
185 { TidyTag_LI, "li", VERS_ELEM_LI, &TY_(W3CAttrsFor_LI)[0], (CM_LIST|CM_OPT|CM_NO_INDENT), TY_(ParseBlock), NULL },
186 { TidyTag_LINK, "link", VERS_ELEM_LINK, &TY_(W3CAttrsFor_LINK)[0], (CM_HEAD|CM_EMPTY), TY_(ParseEmpty), CheckLINK },
187 { TidyTag_LISTING, "listing", VERS_ELEM_LISTING, &TY_(W3CAttrsFor_LISTING)[0], (CM_BLOCK|CM_OBSOLETE), TY_(ParsePre), NULL },
188 { TidyTag_MAP, "map", VERS_ELEM_MAP, &TY_(W3CAttrsFor_MAP)[0], (CM_INLINE), TY_(ParseBlock), NULL },
189 { TidyTag_MENU, "menu", VERS_ELEM_MENU, &TY_(W3CAttrsFor_MENU)[0], (CM_BLOCK|CM_OBSOLETE), TY_(ParseList), NULL },
190 { TidyTag_META, "meta", VERS_ELEM_META, &TY_(W3CAttrsFor_META)[0], (CM_HEAD|CM_EMPTY), TY_(ParseEmpty), CheckMETA },
191 { TidyTag_NOFRAMES, "noframes", VERS_ELEM_NOFRAMES, &TY_(W3CAttrsFor_NOFRAMES)[0], (CM_BLOCK|CM_FRAMES), TY_(ParseNoFrames), NULL },
192 { TidyTag_NOSCRIPT, "noscript", VERS_ELEM_NOSCRIPT, &TY_(W3CAttrsFor_NOSCRIPT)[0], (CM_BLOCK|CM_INLINE|CM_MIXED), TY_(ParseBlock), NULL },
193 { TidyTag_OBJECT, "object", VERS_ELEM_OBJECT, &TY_(W3CAttrsFor_OBJECT)[0], (CM_OBJECT|CM_HEAD|CM_IMG|CM_INLINE|CM_PARAM), TY_(ParseBlock), NULL },
194 { TidyTag_OL, "ol", VERS_ELEM_OL, &TY_(W3CAttrsFor_OL)[0], (CM_BLOCK), TY_(ParseList), NULL },
195 { TidyTag_OPTGROUP, "optgroup", VERS_ELEM_OPTGROUP, &TY_(W3CAttrsFor_OPTGROUP)[0], (CM_FIELD|CM_OPT), TY_(ParseOptGroup), NULL },
196 { TidyTag_OPTION, "option", VERS_ELEM_OPTION, &TY_(W3CAttrsFor_OPTION)[0], (CM_FIELD|CM_OPT), TY_(ParseText), NULL },
197 { TidyTag_P, "p", VERS_ELEM_P, &TY_(W3CAttrsFor_P)[0], (CM_BLOCK|CM_OPT), TY_(ParseInline), NULL },
198 { TidyTag_PARAM, "param", VERS_ELEM_PARAM, &TY_(W3CAttrsFor_PARAM)[0], (CM_INLINE|CM_EMPTY), TY_(ParseEmpty), NULL },
199 { TidyTag_PLAINTEXT, "plaintext", VERS_ELEM_PLAINTEXT, &TY_(W3CAttrsFor_PLAINTEXT)[0], (CM_BLOCK|CM_OBSOLETE), TY_(ParsePre), NULL },
200 { TidyTag_PRE, "pre", VERS_ELEM_PRE, &TY_(W3CAttrsFor_PRE)[0], (CM_BLOCK), TY_(ParsePre), NULL },
201 { TidyTag_Q, "q", VERS_ELEM_Q, &TY_(W3CAttrsFor_Q)[0], (CM_INLINE), TY_(ParseInline), NULL },
202 { TidyTag_RB, "rb", VERS_ELEM_RB, &TY_(W3CAttrsFor_RB)[0], (CM_INLINE), TY_(ParseInline), NULL },
203 { TidyTag_RBC, "rbc", VERS_ELEM_RBC, &TY_(W3CAttrsFor_RBC)[0], (CM_INLINE), TY_(ParseInline), NULL },
204 { TidyTag_RP, "rp", VERS_ELEM_RP, &TY_(W3CAttrsFor_RP)[0], (CM_INLINE), TY_(ParseInline), NULL },
205 { TidyTag_RT, "rt", VERS_ELEM_RT, &TY_(W3CAttrsFor_RT)[0], (CM_INLINE), TY_(ParseInline), NULL },
206 { TidyTag_RTC, "rtc", VERS_ELEM_RTC, &TY_(W3CAttrsFor_RTC)[0], (CM_INLINE), TY_(ParseInline), NULL },
207 { TidyTag_RUBY, "ruby", VERS_ELEM_RUBY, &TY_(W3CAttrsFor_RUBY)[0], (CM_INLINE), TY_(ParseInline), NULL },
208 { TidyTag_S, "s", VERS_ELEM_S, &TY_(W3CAttrsFor_S)[0], (CM_INLINE), TY_(ParseInline), NULL },
209 { TidyTag_SAMP, "samp", VERS_ELEM_SAMP, &TY_(W3CAttrsFor_SAMP)[0], (CM_INLINE), TY_(ParseInline), NULL },
210 { TidyTag_SCRIPT, "script", VERS_ELEM_SCRIPT, &TY_(W3CAttrsFor_SCRIPT)[0], (CM_HEAD|CM_MIXED|CM_BLOCK|CM_INLINE), TY_(ParseScript), CheckSCRIPT },
211 { TidyTag_SELECT, "select", VERS_ELEM_SELECT, &TY_(W3CAttrsFor_SELECT)[0], (CM_INLINE|CM_FIELD), TY_(ParseSelect), NULL },
212 { TidyTag_SMALL, "small", VERS_ELEM_SMALL, &TY_(W3CAttrsFor_SMALL)[0], (CM_INLINE), TY_(ParseInline), NULL },
213 { TidyTag_SPAN, "span", VERS_ELEM_SPAN, &TY_(W3CAttrsFor_SPAN)[0], (CM_INLINE), TY_(ParseInline), NULL },
214 { TidyTag_STRIKE, "strike", VERS_ELEM_STRIKE, &TY_(W3CAttrsFor_STRIKE)[0], (CM_INLINE), TY_(ParseInline), NULL },
215 { TidyTag_STRONG, "strong", VERS_ELEM_STRONG, &TY_(W3CAttrsFor_STRONG)[0], (CM_INLINE), TY_(ParseInline), NULL },
216 { TidyTag_STYLE, "style", VERS_ELEM_STYLE, &TY_(W3CAttrsFor_STYLE)[0], (CM_HEAD), TY_(ParseScript), CheckSTYLE },
217 { TidyTag_SUB, "sub", VERS_ELEM_SUB, &TY_(W3CAttrsFor_SUB)[0], (CM_INLINE), TY_(ParseInline), NULL },
218 { TidyTag_SUP, "sup", VERS_ELEM_SUP, &TY_(W3CAttrsFor_SUP)[0], (CM_INLINE), TY_(ParseInline), NULL },
219 { TidyTag_TABLE, "table", VERS_ELEM_TABLE, &TY_(W3CAttrsFor_TABLE)[0], (CM_BLOCK), TY_(ParseTableTag), CheckTABLE },
220 { TidyTag_TBODY, "tbody", VERS_ELEM_TBODY, &TY_(W3CAttrsFor_TBODY)[0], (CM_TABLE|CM_ROWGRP|CM_OPT), TY_(ParseRowGroup), NULL },
221 { TidyTag_TD, "td", VERS_ELEM_TD, &TY_(W3CAttrsFor_TD)[0], (CM_ROW|CM_OPT|CM_NO_INDENT), TY_(ParseBlock), NULL },
222 { TidyTag_TEXTAREA, "textarea", VERS_ELEM_TEXTAREA, &TY_(W3CAttrsFor_TEXTAREA)[0], (CM_INLINE|CM_FIELD), TY_(ParseText), NULL },
223 { TidyTag_TFOOT, "tfoot", VERS_ELEM_TFOOT, &TY_(W3CAttrsFor_TFOOT)[0], (CM_TABLE|CM_ROWGRP|CM_OPT), TY_(ParseRowGroup), NULL },
224 { TidyTag_TH, "th", VERS_ELEM_TH, &TY_(W3CAttrsFor_TH)[0], (CM_ROW|CM_OPT|CM_NO_INDENT), TY_(ParseBlock), NULL },
225 { TidyTag_THEAD, "thead", VERS_ELEM_THEAD, &TY_(W3CAttrsFor_THEAD)[0], (CM_TABLE|CM_ROWGRP|CM_OPT), TY_(ParseRowGroup), NULL },
226 { TidyTag_TITLE, "title", VERS_ELEM_TITLE, &TY_(W3CAttrsFor_TITLE)[0], (CM_HEAD), TY_(ParseTitle), NULL },
227 { TidyTag_TR, "tr", VERS_ELEM_TR, &TY_(W3CAttrsFor_TR)[0], (CM_TABLE|CM_OPT), TY_(ParseRow), NULL },
228 { TidyTag_TT, "tt", VERS_ELEM_TT, &TY_(W3CAttrsFor_TT)[0], (CM_INLINE), TY_(ParseInline), NULL },
229 { TidyTag_U, "u", VERS_ELEM_U, &TY_(W3CAttrsFor_U)[0], (CM_INLINE), TY_(ParseInline), NULL },
230 { TidyTag_UL, "ul", VERS_ELEM_UL, &TY_(W3CAttrsFor_UL)[0], (CM_BLOCK), TY_(ParseList), NULL },
231 { TidyTag_VAR, "var", VERS_ELEM_VAR, &TY_(W3CAttrsFor_VAR)[0], (CM_INLINE), TY_(ParseInline), NULL },
232 { TidyTag_XMP, "xmp", VERS_ELEM_XMP, &TY_(W3CAttrsFor_XMP)[0], (CM_BLOCK|CM_OBSOLETE), TY_(ParsePre), NULL },
233 { TidyTag_NEXTID, "nextid", VERS_ELEM_NEXTID, &TY_(W3CAttrsFor_NEXTID)[0], (CM_HEAD|CM_EMPTY), TY_(ParseEmpty), NULL },
234
235 /* proprietary elements */
236 { TidyTag_ALIGN, "align", VERS_NETSCAPE, NULL, (CM_BLOCK), TY_(ParseBlock), NULL },
237 { TidyTag_BGSOUND, "bgsound", VERS_MICROSOFT, NULL, (CM_HEAD|CM_EMPTY), TY_(ParseEmpty), NULL },
238 { TidyTag_BLINK, "blink", VERS_PROPRIETARY, NULL, (CM_INLINE), TY_(ParseInline), NULL },
239 { TidyTag_COMMENT, "comment", VERS_MICROSOFT, NULL, (CM_INLINE), TY_(ParseInline), NULL },
240 { TidyTag_EMBED, "embed", VERS_NETSCAPE, NULL, (CM_INLINE|CM_IMG|CM_EMPTY), TY_(ParseEmpty), NULL },
241 { TidyTag_ILAYER, "ilayer", VERS_NETSCAPE, NULL, (CM_INLINE), TY_(ParseInline), NULL },
242 { TidyTag_KEYGEN, "keygen", VERS_NETSCAPE, NULL, (CM_INLINE|CM_EMPTY), TY_(ParseEmpty), NULL },
243 { TidyTag_LAYER, "layer", VERS_NETSCAPE, NULL, (CM_BLOCK), TY_(ParseBlock), NULL },
244 { TidyTag_MARQUEE, "marquee", VERS_MICROSOFT, NULL, (CM_INLINE|CM_OPT), TY_(ParseInline), NULL },
245 { TidyTag_MULTICOL, "multicol", VERS_NETSCAPE, NULL, (CM_BLOCK), TY_(ParseBlock), NULL },
246 { TidyTag_NOBR, "nobr", VERS_PROPRIETARY, NULL, (CM_INLINE), TY_(ParseInline), NULL },
247 { TidyTag_NOEMBED, "noembed", VERS_NETSCAPE, NULL, (CM_INLINE), TY_(ParseInline), NULL },
248 { TidyTag_NOLAYER, "nolayer", VERS_NETSCAPE, NULL, (CM_BLOCK|CM_INLINE|CM_MIXED), TY_(ParseBlock), NULL },
249 { TidyTag_NOSAVE, "nosave", VERS_NETSCAPE, NULL, (CM_BLOCK), TY_(ParseBlock), NULL },
250 { TidyTag_SERVER, "server", VERS_NETSCAPE, NULL, (CM_HEAD|CM_MIXED|CM_BLOCK|CM_INLINE), TY_(ParseScript), NULL },
251 { TidyTag_SERVLET, "servlet", VERS_SUN, NULL, (CM_OBJECT|CM_IMG|CM_INLINE|CM_PARAM), TY_(ParseBlock), NULL },
252 { TidyTag_SPACER, "spacer", VERS_NETSCAPE, NULL, (CM_INLINE|CM_EMPTY), TY_(ParseEmpty), NULL },
253 { TidyTag_WBR, "wbr", VERS_PROPRIETARY, NULL, (CM_INLINE|CM_EMPTY), TY_(ParseEmpty), NULL },
254
255 /* this must be the final entry */
256 { (TidyTagId)0, NULL, 0, NULL, (0), NULL, NULL }
257 };
258
259 #if ELEMENT_HASH_LOOKUP
tagsHash(ctmbstr s)260 static uint tagsHash(ctmbstr s)
261 {
262 uint hashval;
263
264 for (hashval = 0; *s != '\0'; s++)
265 hashval = *s + 31*hashval;
266
267 return hashval % ELEMENT_HASH_SIZE;
268 }
269
tagsInstall(TidyDocImpl * doc,TidyTagImpl * tags,const Dict * old)270 static const Dict *tagsInstall(TidyDocImpl* doc, TidyTagImpl* tags, const Dict* old)
271 {
272 DictHash *np;
273 uint hashval;
274
275 if (old)
276 {
277 np = (DictHash *)TidyDocAlloc(doc, sizeof(*np));
278 np->tag = old;
279
280 hashval = tagsHash(old->name);
281 np->next = tags->hashtab[hashval];
282 tags->hashtab[hashval] = np;
283 }
284
285 return old;
286 }
287
tagsRemoveFromHash(TidyDocImpl * doc,TidyTagImpl * tags,ctmbstr s)288 static void tagsRemoveFromHash( TidyDocImpl* doc, TidyTagImpl* tags, ctmbstr s )
289 {
290 uint h = tagsHash(s);
291 DictHash *p, *prev = NULL;
292 for (p = tags->hashtab[h]; p && p->tag; p = p->next)
293 {
294 if (TY_(tmbstrcmp)(s, p->tag->name) == 0)
295 {
296 DictHash* next = p->next;
297 if ( prev )
298 prev->next = next;
299 else
300 tags->hashtab[h] = next;
301 TidyDocFree(doc, p);
302 return;
303 }
304 prev = p;
305 }
306 }
307
tagsEmptyHash(TidyDocImpl * doc,TidyTagImpl * tags)308 static void tagsEmptyHash( TidyDocImpl* doc, TidyTagImpl* tags )
309 {
310 uint i;
311 DictHash *prev, *next;
312
313 for (i = 0; i < ELEMENT_HASH_SIZE; ++i)
314 {
315 prev = NULL;
316 next = tags->hashtab[i];
317
318 while(next)
319 {
320 prev = next->next;
321 TidyDocFree(doc, next);
322 next = prev;
323 }
324
325 tags->hashtab[i] = NULL;
326 }
327 }
328 #endif /* ELEMENT_HASH_LOOKUP */
329
tagsLookup(TidyDocImpl * doc,TidyTagImpl * tags,ctmbstr s)330 static const Dict* tagsLookup( TidyDocImpl* doc, TidyTagImpl* tags, ctmbstr s )
331 {
332 const Dict *np;
333 #if ELEMENT_HASH_LOOKUP
334 const DictHash* p;
335 #endif
336
337 if (!s)
338 return NULL;
339
340 #if ELEMENT_HASH_LOOKUP
341 /* this breaks if declared elements get changed between two */
342 /* parser runs since Tidy would use the cached version rather */
343 /* than the new one. */
344 /* However, as FreeDeclaredTags() correctly cleans the hash */
345 /* this should not be true anymore. */
346 for (p = tags->hashtab[tagsHash(s)]; p && p->tag; p = p->next)
347 if (TY_(tmbstrcmp)(s, p->tag->name) == 0)
348 return p->tag;
349
350 for (np = tag_defs + 1; np < tag_defs + N_TIDY_TAGS; ++np)
351 if (TY_(tmbstrcmp)(s, np->name) == 0)
352 return tagsInstall(doc, tags, np);
353
354 for (np = tags->declared_tag_list; np; np = np->next)
355 if (TY_(tmbstrcmp)(s, np->name) == 0)
356 return tagsInstall(doc, tags, np);
357 #else
358
359 for (np = tag_defs + 1; np < tag_defs + N_TIDY_TAGS; ++np)
360 if (TY_(tmbstrcmp)(s, np->name) == 0)
361 return np;
362
363 for (np = tags->declared_tag_list; np; np = np->next)
364 if (TY_(tmbstrcmp)(s, np->name) == 0)
365 return np;
366
367 #endif /* ELEMENT_HASH_LOOKUP */
368
369 return NULL;
370 }
371
NewDict(TidyDocImpl * doc,ctmbstr name)372 static Dict* NewDict( TidyDocImpl* doc, ctmbstr name )
373 {
374 Dict * const np = (Dict*) TidyDocAlloc( doc, sizeof(Dict) );
375 np->id = TidyTag_UNKNOWN;
376 np->name = name ? TY_(tmbstrdup)( doc->allocator, name ) : NULL;
377 np->versions = VERS_UNKNOWN;
378 np->attrvers = NULL;
379 np->model = CM_UNKNOWN;
380 np->parser = 0;
381 np->chkattrs = 0;
382 np->next = NULL;
383 return np;
384 }
385
FreeDict(TidyDocImpl * doc,Dict * d)386 static void FreeDict( TidyDocImpl* doc, Dict *d )
387 {
388 if ( d )
389 TidyDocFree( doc, d->name );
390 TidyDocFree( doc, d );
391 }
392
declare(TidyDocImpl * doc,TidyTagImpl * tags,ctmbstr name,uint versions,uint model,Parser * parser,CheckAttribs * chkattrs)393 static void declare( TidyDocImpl* doc, TidyTagImpl* tags,
394 ctmbstr name, uint versions, uint model,
395 Parser *parser, CheckAttribs *chkattrs )
396 {
397 if ( name )
398 {
399 Dict* np = (Dict*) tagsLookup( doc, tags, name );
400 if ( np == NULL )
401 {
402 np = NewDict( doc, name );
403 np->next = tags->declared_tag_list;
404 tags->declared_tag_list = np;
405 }
406
407 /* Make sure we are not over-writing predefined tags */
408 if ( np->id == TidyTag_UNKNOWN )
409 {
410 np->versions = versions;
411 np->model |= model;
412 np->parser = parser;
413 np->chkattrs = chkattrs;
414 np->attrvers = NULL;
415 }
416 }
417 }
418
419 /* public interface for finding tag by name */
TY_(FindTag)420 Bool TY_(FindTag)( TidyDocImpl* doc, Node *node )
421 {
422 const Dict *np = NULL;
423 if ( cfgBool(doc, TidyXmlTags) )
424 {
425 node->tag = doc->tags.xml_tags;
426 return yes;
427 }
428
429 if ( node->element && (np = tagsLookup(doc, &doc->tags, node->element)) )
430 {
431 node->tag = np;
432 return yes;
433 }
434
435 return no;
436 }
437
TY_(LookupTagDef)438 const Dict* TY_(LookupTagDef)( TidyTagId tid )
439 {
440 const Dict *np;
441
442 for (np = tag_defs + 1; np < tag_defs + N_TIDY_TAGS; ++np )
443 if (np->id == tid)
444 return np;
445
446 return NULL;
447 }
448
TY_(FindParser)449 Parser* TY_(FindParser)( TidyDocImpl* doc, Node *node )
450 {
451 const Dict* np = tagsLookup( doc, &doc->tags, node->element );
452 if ( np )
453 return np->parser;
454 return NULL;
455 }
456
TY_(DefineTag)457 void TY_(DefineTag)( TidyDocImpl* doc, UserTagType tagType, ctmbstr name )
458 {
459 Parser* parser = 0;
460 uint cm = CM_UNKNOWN;
461 uint vers = VERS_PROPRIETARY;
462
463 switch (tagType)
464 {
465 case tagtype_empty:
466 cm = CM_EMPTY|CM_NO_INDENT|CM_NEW;
467 parser = TY_(ParseBlock);
468 break;
469
470 case tagtype_inline:
471 cm = CM_INLINE|CM_NO_INDENT|CM_NEW;
472 parser = TY_(ParseInline);
473 break;
474
475 case tagtype_block:
476 cm = CM_BLOCK|CM_NO_INDENT|CM_NEW;
477 parser = TY_(ParseBlock);
478 break;
479
480 case tagtype_pre:
481 cm = CM_BLOCK|CM_NO_INDENT|CM_NEW;
482 parser = TY_(ParsePre);
483 break;
484
485 case tagtype_null:
486 break;
487 }
488 if ( cm && parser )
489 declare( doc, &doc->tags, name, vers, cm, parser, 0 );
490 }
491
TY_(GetDeclaredTagList)492 TidyIterator TY_(GetDeclaredTagList)( TidyDocImpl* doc )
493 {
494 return (TidyIterator) doc->tags.declared_tag_list;
495 }
496
TY_(GetNextDeclaredTag)497 ctmbstr TY_(GetNextDeclaredTag)( TidyDocImpl* ARG_UNUSED(doc),
498 UserTagType tagType, TidyIterator* iter )
499 {
500 ctmbstr name = NULL;
501 Dict* curr;
502 for ( curr = (Dict*) *iter; name == NULL && curr != NULL; curr = curr->next )
503 {
504 switch ( tagType )
505 {
506 case tagtype_empty:
507 if ( (curr->model & CM_EMPTY) != 0 )
508 name = curr->name;
509 break;
510
511 case tagtype_inline:
512 if ( (curr->model & CM_INLINE) != 0 )
513 name = curr->name;
514 break;
515
516 case tagtype_block:
517 if ( (curr->model & CM_BLOCK) != 0 &&
518 curr->parser == TY_(ParseBlock) )
519 name = curr->name;
520 break;
521
522 case tagtype_pre:
523 if ( (curr->model & CM_BLOCK) != 0 &&
524 curr->parser == TY_(ParsePre) )
525 name = curr->name;
526 break;
527
528 case tagtype_null:
529 break;
530 }
531 }
532 *iter = (TidyIterator) curr;
533 return name;
534 }
535
TY_(InitTags)536 void TY_(InitTags)( TidyDocImpl* doc )
537 {
538 Dict* xml;
539 TidyTagImpl* tags = &doc->tags;
540
541 TidyClearMemory( tags, sizeof(TidyTagImpl) );
542
543 /* create dummy entry for all xml tags */
544 xml = NewDict( doc, NULL );
545 xml->versions = VERS_XML;
546 xml->model = CM_BLOCK;
547 xml->parser = 0;
548 xml->chkattrs = 0;
549 xml->attrvers = NULL;
550 tags->xml_tags = xml;
551 }
552
553 /* By default, zap all of them. But allow
554 ** an single type to be specified.
555 */
TY_(FreeDeclaredTags)556 void TY_(FreeDeclaredTags)( TidyDocImpl* doc, UserTagType tagType )
557 {
558 TidyTagImpl* tags = &doc->tags;
559 Dict *curr, *next = NULL, *prev = NULL;
560
561 for ( curr=tags->declared_tag_list; curr; curr = next )
562 {
563 Bool deleteIt = yes;
564 next = curr->next;
565 switch ( tagType )
566 {
567 case tagtype_empty:
568 deleteIt = ( curr->model & CM_EMPTY ) != 0;
569 break;
570
571 case tagtype_inline:
572 deleteIt = ( curr->model & CM_INLINE ) != 0;
573 break;
574
575 case tagtype_block:
576 deleteIt = ( (curr->model & CM_BLOCK) != 0 &&
577 curr->parser == TY_(ParseBlock) );
578 break;
579
580 case tagtype_pre:
581 deleteIt = ( (curr->model & CM_BLOCK) != 0 &&
582 curr->parser == TY_(ParsePre) );
583 break;
584
585 case tagtype_null:
586 break;
587 }
588
589 if ( deleteIt )
590 {
591 #if ELEMENT_HASH_LOOKUP
592 tagsRemoveFromHash( doc, &doc->tags, curr->name );
593 #endif
594 FreeDict( doc, curr );
595 if ( prev )
596 prev->next = next;
597 else
598 tags->declared_tag_list = next;
599 }
600 else
601 prev = curr;
602 }
603 }
604
TY_(FreeTags)605 void TY_(FreeTags)( TidyDocImpl* doc )
606 {
607 TidyTagImpl* tags = &doc->tags;
608
609 #if ELEMENT_HASH_LOOKUP
610 tagsEmptyHash( doc, tags );
611 #endif
612 TY_(FreeDeclaredTags)( doc, tagtype_null );
613 FreeDict( doc, tags->xml_tags );
614
615 /* get rid of dangling tag references */
616 TidyClearMemory( tags, sizeof(TidyTagImpl) );
617 }
618
619
620 /* default method for checking an element's attributes */
TY_(CheckAttributes)621 void TY_(CheckAttributes)( TidyDocImpl* doc, Node *node )
622 {
623 AttVal *next, *attval = node->attributes;
624 while (attval)
625 {
626 next = attval->next;
627 TY_(CheckAttribute)( doc, node, attval );
628 attval = next;
629 }
630 }
631
632 /* methods for checking attributes for specific elements */
633
CheckIMG(TidyDocImpl * doc,Node * node)634 void CheckIMG( TidyDocImpl* doc, Node *node )
635 {
636 Bool HasAlt = TY_(AttrGetById)(node, TidyAttr_ALT) != NULL;
637 Bool HasSrc = TY_(AttrGetById)(node, TidyAttr_SRC) != NULL;
638 Bool HasUseMap = TY_(AttrGetById)(node, TidyAttr_USEMAP) != NULL;
639 Bool HasIsMap = TY_(AttrGetById)(node, TidyAttr_ISMAP) != NULL;
640 Bool HasDataFld = TY_(AttrGetById)(node, TidyAttr_DATAFLD) != NULL;
641
642 TY_(CheckAttributes)(doc, node);
643
644 if ( !HasAlt )
645 {
646 if ( cfg(doc, TidyAccessibilityCheckLevel) == 0 )
647 {
648 doc->badAccess |= BA_MISSING_IMAGE_ALT;
649 TY_(ReportMissingAttr)( doc, node, "alt" );
650 }
651
652 if ( cfgStr(doc, TidyAltText) )
653 TY_(AddAttribute)( doc, node, "alt", cfgStr(doc, TidyAltText) );
654 }
655
656 if ( !HasSrc && !HasDataFld )
657 TY_(ReportMissingAttr)( doc, node, "src" );
658
659 if ( cfg(doc, TidyAccessibilityCheckLevel) == 0 )
660 {
661 if ( HasIsMap && !HasUseMap )
662 TY_(ReportAttrError)( doc, node, NULL, MISSING_IMAGEMAP);
663 }
664 }
665
CheckCaption(TidyDocImpl * doc,Node * node)666 void CheckCaption(TidyDocImpl* doc, Node *node)
667 {
668 AttVal *attval;
669
670 TY_(CheckAttributes)(doc, node);
671
672 attval = TY_(AttrGetById)(node, TidyAttr_ALIGN);
673
674 if (!AttrHasValue(attval))
675 return;
676
677 if (AttrValueIs(attval, "left") || AttrValueIs(attval, "right"))
678 TY_(ConstrainVersion)(doc, VERS_HTML40_LOOSE);
679 else if (AttrValueIs(attval, "top") || AttrValueIs(attval, "bottom"))
680 TY_(ConstrainVersion)(doc, ~(VERS_HTML20|VERS_HTML32));
681 else
682 TY_(ReportAttrError)(doc, node, attval, BAD_ATTRIBUTE_VALUE);
683 }
684
CheckHTML(TidyDocImpl * doc,Node * node)685 void CheckHTML( TidyDocImpl* doc, Node *node )
686 {
687 TY_(CheckAttributes)(doc, node);
688 }
689
CheckAREA(TidyDocImpl * doc,Node * node)690 void CheckAREA( TidyDocImpl* doc, Node *node )
691 {
692 Bool HasAlt = TY_(AttrGetById)(node, TidyAttr_ALT) != NULL;
693 Bool HasHref = TY_(AttrGetById)(node, TidyAttr_HREF) != NULL;
694 Bool HasNohref = TY_(AttrGetById)(node, TidyAttr_NOHREF) != NULL;
695
696 TY_(CheckAttributes)(doc, node);
697
698 if ( !HasAlt )
699 {
700 if ( cfg(doc, TidyAccessibilityCheckLevel) == 0 )
701 {
702 doc->badAccess |= BA_MISSING_LINK_ALT;
703 TY_(ReportMissingAttr)( doc, node, "alt" );
704 }
705 }
706
707 if ( !HasHref && !HasNohref )
708 TY_(ReportMissingAttr)( doc, node, "href" );
709 }
710
CheckTABLE(TidyDocImpl * doc,Node * node)711 void CheckTABLE( TidyDocImpl* doc, Node *node )
712 {
713 AttVal* attval;
714 Bool HasSummary = TY_(AttrGetById)(node, TidyAttr_SUMMARY) != NULL;
715
716 TY_(CheckAttributes)(doc, node);
717
718 /* a missing summary attribute is bad accessibility, no matter
719 what HTML version is involved; a document without is valid */
720 if (cfg(doc, TidyAccessibilityCheckLevel) == 0)
721 {
722 if (!HasSummary)
723 {
724 doc->badAccess |= BA_MISSING_SUMMARY;
725 TY_(ReportMissingAttr)( doc, node, "summary");
726 }
727 }
728
729 /* convert <table border> to <table border="1"> */
730 if ( cfgBool(doc, TidyXmlOut) && (attval = TY_(AttrGetById)(node, TidyAttr_BORDER)) )
731 {
732 if (attval->value == NULL)
733 attval->value = TY_(tmbstrdup)(doc->allocator, "1");
734 }
735 }
736
737 /* add missing type attribute when appropriate */
CheckSCRIPT(TidyDocImpl * doc,Node * node)738 void CheckSCRIPT( TidyDocImpl* doc, Node *node )
739 {
740 AttVal *lang, *type;
741 char buf[16];
742
743 TY_(CheckAttributes)(doc, node);
744
745 lang = TY_(AttrGetById)(node, TidyAttr_LANGUAGE);
746 type = TY_(AttrGetById)(node, TidyAttr_TYPE);
747
748 if (!type)
749 {
750 /* check for javascript */
751 if (lang)
752 {
753 /* Test #696799. lang->value can be NULL. */
754 buf[0] = '\0';
755 TY_(tmbstrncpy)(buf, lang->value, sizeof(buf));
756 buf[10] = '\0';
757
758 if (TY_(tmbstrncasecmp)(buf, "javascript", 10) == 0 ||
759 TY_(tmbstrncasecmp)(buf, "jscript", 7) == 0)
760 {
761 TY_(AddAttribute)(doc, node, "type", "text/javascript");
762 }
763 else if (TY_(tmbstrcasecmp)(buf, "vbscript") == 0)
764 {
765 /* per Randy Waki 8/6/01 */
766 TY_(AddAttribute)(doc, node, "type", "text/vbscript");
767 }
768 }
769 else
770 {
771 TY_(AddAttribute)(doc, node, "type", "text/javascript");
772 }
773
774 type = TY_(AttrGetById)(node, TidyAttr_TYPE);
775
776 if (type != NULL)
777 {
778 TY_(ReportAttrError)(doc, node, type, INSERTING_ATTRIBUTE);
779 }
780 else
781 {
782 TY_(ReportMissingAttr)(doc, node, "type");
783 }
784 }
785 }
786
787
788 /* add missing type attribute when appropriate */
CheckSTYLE(TidyDocImpl * doc,Node * node)789 void CheckSTYLE( TidyDocImpl* doc, Node *node )
790 {
791 AttVal *type = TY_(AttrGetById)(node, TidyAttr_TYPE);
792
793 TY_(CheckAttributes)( doc, node );
794
795 if ( !type || !type->value || !TY_(tmbstrlen)(type->value) )
796 {
797 type = TY_(RepairAttrValue)(doc, node, "type", "text/css");
798 TY_(ReportAttrError)( doc, node, type, INSERTING_ATTRIBUTE );
799 }
800 }
801
802 /* add missing type attribute when appropriate */
CheckLINK(TidyDocImpl * doc,Node * node)803 void CheckLINK( TidyDocImpl* doc, Node *node )
804 {
805 AttVal *rel = TY_(AttrGetById)(node, TidyAttr_REL);
806
807 TY_(CheckAttributes)( doc, node );
808
809 /* todo: <link rel="alternate stylesheet"> */
810 if (AttrValueIs(rel, "stylesheet"))
811 {
812 AttVal *type = TY_(AttrGetById)(node, TidyAttr_TYPE);
813 if (!type)
814 {
815 TY_(AddAttribute)( doc, node, "type", "text/css" );
816 type = TY_(AttrGetById)(node, TidyAttr_TYPE);
817 TY_(ReportAttrError)( doc, node, type, INSERTING_ATTRIBUTE );
818 }
819 }
820 }
821
822 /* reports missing action attribute */
CheckFORM(TidyDocImpl * doc,Node * node)823 void CheckFORM( TidyDocImpl* doc, Node *node )
824 {
825 AttVal *action = TY_(AttrGetById)(node, TidyAttr_ACTION);
826
827 TY_(CheckAttributes)(doc, node);
828
829 if (!action)
830 TY_(ReportMissingAttr)(doc, node, "action");
831 }
832
833 /* reports missing content attribute */
CheckMETA(TidyDocImpl * doc,Node * node)834 void CheckMETA( TidyDocImpl* doc, Node *node )
835 {
836 AttVal *content = TY_(AttrGetById)(node, TidyAttr_CONTENT);
837
838 TY_(CheckAttributes)(doc, node);
839
840 if (!content)
841 TY_(ReportMissingAttr)( doc, node, "content" );
842 /* name or http-equiv attribute must also be set */
843 }
844
845
TY_(nodeIsText)846 Bool TY_(nodeIsText)( Node* node )
847 {
848 return ( node && node->type == TextNode );
849 }
850
TY_(nodeHasText)851 Bool TY_(nodeHasText)( TidyDocImpl* doc, Node* node )
852 {
853 if ( doc && node )
854 {
855 uint ix;
856 Lexer* lexer = doc->lexer;
857 for ( ix = node->start; ix < node->end; ++ix )
858 {
859 /* whitespace */
860 if ( !TY_(IsWhite)( lexer->lexbuf[ix] ) )
861 return yes;
862 }
863 }
864 return no;
865 }
866
TY_(nodeIsElement)867 Bool TY_(nodeIsElement)( Node* node )
868 {
869 return ( node &&
870 (node->type == StartTag || node->type == StartEndTag) );
871 }
872
873 /* True if any of the bits requested are set.
874 */
TY_(nodeHasCM)875 Bool TY_(nodeHasCM)( Node* node, uint contentModel )
876 {
877 return ( node && node->tag &&
878 (node->tag->model & contentModel) != 0 );
879 }
880
TY_(nodeCMIsBlock)881 Bool TY_(nodeCMIsBlock)( Node* node )
882 {
883 return TY_(nodeHasCM)( node, CM_BLOCK );
884 }
TY_(nodeCMIsInline)885 Bool TY_(nodeCMIsInline)( Node* node )
886 {
887 return TY_(nodeHasCM)( node, CM_INLINE );
888 }
TY_(nodeCMIsEmpty)889 Bool TY_(nodeCMIsEmpty)( Node* node )
890 {
891 return TY_(nodeHasCM)( node, CM_EMPTY );
892 }
893
TY_(nodeIsHeader)894 Bool TY_(nodeIsHeader)( Node* node )
895 {
896 TidyTagId tid = TagId( node );
897 return ( tid && (
898 tid == TidyTag_H1 ||
899 tid == TidyTag_H2 ||
900 tid == TidyTag_H3 ||
901 tid == TidyTag_H4 ||
902 tid == TidyTag_H5 ||
903 tid == TidyTag_H6 ));
904 }
905
TY_(nodeHeaderLevel)906 uint TY_(nodeHeaderLevel)( Node* node )
907 {
908 TidyTagId tid = TagId( node );
909 switch ( tid )
910 {
911 case TidyTag_H1:
912 return 1;
913 case TidyTag_H2:
914 return 2;
915 case TidyTag_H3:
916 return 3;
917 case TidyTag_H4:
918 return 4;
919 case TidyTag_H5:
920 return 5;
921 case TidyTag_H6:
922 return 6;
923 default:
924 {
925 /* fall through */
926 }
927 }
928 return 0;
929 }
930
931 /*
932 * local variables:
933 * mode: c
934 * indent-tabs-mode: nil
935 * c-basic-offset: 4
936 * eval: (c-set-offset 'substatement-open 0)
937 * end:
938 */
939