1 /* localize.c -- text strings and routines to handle errors and general messages
2
3 (c) 1998-2008 (W3C) MIT, ERCIM, Keio University
4 Portions Copyright University of Toronto
5 See tidy.h and access.h for the copyright notice.
6
7 You should only need to edit this file and tidy.c
8 to localize HTML tidy. *** This needs checking ***
9
10 CVS Info :
11
12 $Author: arnaud02 $
13 $Date: 2008/06/18 20:18:54 $
14 $Revision: 1.178 $
15
16 */
17
18 #include "tidy-int.h"
19 #include "lexer.h"
20 #include "streamio.h"
21 #include "message.h"
22 #include "tmbstr.h"
23 #include "utf8.h"
24
25 /* used to point to Web Accessibility Guidelines */
26 #define ACCESS_URL "http://www.w3.org/WAI/GL"
27
28 /* points to the Adaptive Technology Resource Centre at the
29 ** University of Toronto
30 */
31 #define ATRC_ACCESS_URL "http://www.aprompt.ca/Tidy/accessibilitychecks.html"
32
33 #include "version.h"
34
TY_(ReleaseDate)35 ctmbstr TY_(ReleaseDate)(void)
36 {
37 return TY_(release_date);
38 }
39
40 static struct _msgfmt
41 {
42 uint code;
43 ctmbstr fmt;
44 } const msgFormat[] =
45 {
46 /* ReportEncodingWarning */
47 { ENCODING_MISMATCH, "specified input encoding (%s) does not match actual input encoding (%s)" }, /* Warning */
48
49 /* ReportEncodingError */
50 { VENDOR_SPECIFIC_CHARS, "%s invalid character code %s" }, /* Error */
51 { INVALID_SGML_CHARS, "%s invalid character code %s" }, /* Error */
52 { INVALID_UTF8, "%s invalid UTF-8 bytes (char. code %s)" }, /* Error */
53 { INVALID_UTF16, "%s invalid UTF-16 surrogate pair (char. code %s)" }, /* Error */
54 { INVALID_NCR, "%s invalid numeric character reference %s" }, /* Error */
55
56 /* ReportEntityError */
57 { MISSING_SEMICOLON, "entity \"%s\" doesn't end in ';'" }, /* Warning in HTML, Error in XML/XHTML */
58 { MISSING_SEMICOLON_NCR, "numeric character reference \"%s\" doesn't end in ';'" }, /* Warning in HTML, Error in XML/XHTML */
59 { UNESCAPED_AMPERSAND, "unescaped & which should be written as &" }, /* Warning in HTML, Error in XHTML */
60 { UNKNOWN_ENTITY, "unescaped & or unknown entity \"%s\"" }, /* Error */
61 { APOS_UNDEFINED, "named entity ' only defined in XML/XHTML" }, /* Error in HTML (should only occur for HTML input) */
62
63 /* ReportAttrError */
64
65 /* attribute name */
66 { INSERTING_ATTRIBUTE, "%s inserting \"%s\" attribute" }, /* Warning in CheckLINK, Error otherwise */
67 { MISSING_ATTR_VALUE, "%s attribute \"%s\" lacks value" }, /* Warning in CheckUrl, Error otherwise */
68 { UNKNOWN_ATTRIBUTE, "%s unknown attribute \"%s\"" }, /* Error */
69 { PROPRIETARY_ATTRIBUTE, "%s proprietary attribute \"%s\"" }, /* Error */
70 { JOINING_ATTRIBUTE, "%s joining values of repeated attribute \"%s\"" }, /* Error */
71 { XML_ATTRIBUTE_VALUE, "%s has XML attribute \"%s\"" }, /* Error (but deprecated) */
72
73 /* attribute value */
74 { XML_ID_SYNTAX, "%s ID \"%s\" uses XML ID syntax" }, /* Warning if XHTML, Error if HTML */
75 { ATTR_VALUE_NOT_LCASE, "%s attribute value \"%s\" must be lower case for XHTML" }, /* Error if XHTML input, Notice if HTML input and XHTML outout */
76 { PROPRIETARY_ATTR_VALUE, "%s proprietary attribute value \"%s\"" }, /* Error */
77 { ANCHOR_NOT_UNIQUE, "%s anchor \"%s\" already defined" }, /* Error */
78
79 /* attribute name, attribute value */
80 { BAD_ATTRIBUTE_VALUE, "%s attribute \"%s\" has invalid value \"%s\"" }, /* Error */
81 { BAD_ATTRIBUTE_VALUE_REPLACED, "%s attribute \"%s\" had invalid value \"%s\" and has been replaced" }, /* Error */
82 { INVALID_ATTRIBUTE, "%s attribute name \"%s\" (value=\"%s\") is invalid" }, /* Error */
83
84 /* attribute value, attribute name */
85 { REPEATED_ATTRIBUTE, "%s dropping value \"%s\" for repeated attribute \"%s\"" }, /* Error */
86
87 /* no arguments */
88 { INVALID_XML_ID, "%s cannot copy name attribute to id" }, /* Warning */
89 { UNEXPECTED_GT, "%s missing '>' for end of tag" }, /* Warning if HTML, Error if XML/XHTML */
90 { UNEXPECTED_QUOTEMARK, "%s unexpected or duplicate quote mark" }, /* Error */
91 { MISSING_QUOTEMARK, "%s attribute with missing trailing quote mark" }, /* Error */
92 { UNEXPECTED_END_OF_FILE_ATTR, "%s end of file while parsing attributes" }, /* Error */
93 { ID_NAME_MISMATCH, "%s id and name attribute value mismatch" }, /* Error */
94 { BACKSLASH_IN_URI, "%s URI reference contains backslash. Typo?" }, /* Error */
95 { FIXED_BACKSLASH, "%s converting backslash in URI to slash" }, /* Error */
96 { ILLEGAL_URI_REFERENCE, "%s improperly escaped URI reference" }, /* Error */
97 { ESCAPED_ILLEGAL_URI, "%s escaping malformed URI reference" }, /* Error */
98 { NEWLINE_IN_URI, "%s discarding newline in URI reference" }, /* Error */
99 { WHITE_IN_URI, "%s discarding whitespace in URI reference" }, /* Error */
100 { UNEXPECTED_EQUALSIGN, "%s unexpected '=', expected attribute name" }, /* Error */
101 { MISSING_IMAGEMAP, "%s should use client-side image map" }, /* Warning (but deprecated) */
102
103 /* ReportMissingAttr */
104 { MISSING_ATTRIBUTE, "%s lacks \"%s\" attribute" }, /* Error */
105 /* ReportWarning */
106 { NESTED_EMPHASIS, "nested emphasis %s" }, /* Warning */
107 { NESTED_QUOTATION, "nested q elements, possible typo." }, /* Warning */
108 { OBSOLETE_ELEMENT, "replacing obsolete element %s by %s" }, /* Warning */
109 { COERCE_TO_ENDTAG_WARN, "<%s> is probably intended as </%s>" }, /* Warning */
110
111 /* ReportNotice */
112 { TRIM_EMPTY_ELEMENT, "trimming empty %s" }, /* Notice */
113 { REPLACING_ELEMENT, "replacing %s by %s" }, /* Notice */
114
115 /* ReportError */
116 { COERCE_TO_ENDTAG, "<%s> is probably intended as </%s>" }, /* Error */
117 { REPLACING_UNEX_ELEMENT, "replacing unexpected %s by %s" }, /* Error */
118 { MISSING_ENDTAG_FOR, "missing </%s>" }, /* Error */
119 { MISSING_ENDTAG_BEFORE, "missing </%s> before %s" }, /* Error */
120 { DISCARDING_UNEXPECTED, "discarding unexpected %s" }, /* Error */
121 { NON_MATCHING_ENDTAG, "replacing unexpected %s by </%s>" }, /* Error */
122 { TAG_NOT_ALLOWED_IN, "%s isn't allowed in <%s> elements" }, /* Error */
123 { MISSING_STARTTAG, "missing <%s>" }, /* Error */
124 { UNEXPECTED_ENDTAG, "unexpected </%s>" }, /* Error */
125 { TOO_MANY_ELEMENTS, "too many %s elements" }, /* Error */
126 { USING_BR_INPLACE_OF, "using <br> in place of %s" }, /* Error */
127 { INSERTING_TAG, "inserting implicit <%s>" }, /* Error */
128 { CANT_BE_NESTED, "%s can't be nested" }, /* Error */
129 { PROPRIETARY_ELEMENT, "%s is not approved by W3C" }, /* Error */
130 { ILLEGAL_NESTING, "%s shouldn't be nested" }, /* Error */
131 { NOFRAMES_CONTENT, "%s not inside 'noframes' element" }, /* Error */
132 { UNEXPECTED_END_OF_FILE, "unexpected end of file %s" }, /* Error */
133 { ELEMENT_NOT_EMPTY, "%s element not empty or not closed" }, /* Error */
134 { UNEXPECTED_ENDTAG_IN, "unexpected </%s> in <%s>" }, /* Error */
135 { TOO_MANY_ELEMENTS_IN, "too many %s elements in <%s>" }, /* Error */
136 { UNESCAPED_ELEMENT, "unescaped %s in pre content" }, /* Error (but deprecated) */
137
138 /* no arguments */
139 { DOCTYPE_AFTER_TAGS, "<!DOCTYPE> isn't allowed after elements" }, /* Error */
140 { MISSING_TITLE_ELEMENT, "inserting missing 'title' element" }, /* Error */
141 { INCONSISTENT_VERSION, "HTML DOCTYPE doesn't match content" }, /* Error */
142 { MISSING_DOCTYPE, "missing <!DOCTYPE> declaration" }, /* Error */
143 { CONTENT_AFTER_BODY, "content occurs after end of body" }, /* Error */
144 { MALFORMED_COMMENT, "adjacent hyphens within comment" }, /* Error */
145 { BAD_COMMENT_CHARS, "expecting -- or >" }, /* Error */
146 { BAD_CDATA_CONTENT, "'<' + '/' + letter not allowed here" }, /* Error */
147 { INCONSISTENT_NAMESPACE, "HTML namespace doesn't match content" }, /* Error */
148 { SPACE_PRECEDING_XMLDECL, "removing whitespace preceding XML Declaration" }, /* Error */
149 { MALFORMED_DOCTYPE, "discarding malformed <!DOCTYPE>" }, /* Error */
150 { BAD_XML_COMMENT, "XML comments can't contain --" }, /* Error (but deprecated) */
151 { DTYPE_NOT_UPPER_CASE, "SYSTEM, PUBLIC, W3C, DTD, EN must be upper case" }, /* Error (but deprecated) */
152 { ENCODING_IO_CONFLICT, "Output encoding does not work with standard output" }, /* Error (but deprecated) */
153
154 /* ReportFatal */
155 { SUSPECTED_MISSING_QUOTE, "missing quote mark for attribute value" }, /* Error? (not really sometimes) */
156 { DUPLICATE_FRAMESET, "repeated FRAMESET element" }, /* Error */
157 { UNKNOWN_ELEMENT, "%s is not recognized!" }, /* Error */
158 { UNEXPECTED_ENDTAG, "unexpected </%s>" }, /* Error */
159
160 /* */
161 { PREVIOUS_LOCATION, "<%s> previously mentioned" }, /* Info */
162
163 #if SUPPORT_ACCESSIBILITY_CHECKS
164
165 /* ReportAccess */
166 /*
167 List of error/warning messages. The error code corresponds to
168 the check that is listed in the AERT (HTML specifications).
169 */
170 { IMG_MISSING_ALT, "[1.1.1.1]: <img> missing 'alt' text." }, /* Access */
171 { IMG_ALT_SUSPICIOUS_FILENAME, "[1.1.1.2]: suspicious 'alt' text (filename)." }, /* Access */
172 { IMG_ALT_SUSPICIOUS_FILE_SIZE, "[1.1.1.3]: suspicious 'alt' text (file size)." }, /* Access */
173 { IMG_ALT_SUSPICIOUS_PLACEHOLDER, "[1.1.1.4]: suspicious 'alt' text (placeholder)." }, /* Access */
174 { IMG_ALT_SUSPICIOUS_TOO_LONG, "[1.1.1.10]: suspicious 'alt' text (too long)." }, /* Access */
175 { IMG_MISSING_LONGDESC_DLINK, "[1.1.2.1]: <img> missing 'longdesc' and d-link." }, /* Access */
176 { IMG_MISSING_DLINK, "[1.1.2.2]: <img> missing d-link." }, /* Access */
177 { IMG_MISSING_LONGDESC, "[1.1.2.3]: <img> missing 'longdesc'." }, /* Access */
178 { IMG_BUTTON_MISSING_ALT, "[1.1.3.1]: <img> (button) missing 'alt' text." }, /* Access */
179 { APPLET_MISSING_ALT, "[1.1.4.1]: <applet> missing alternate content." }, /* Access */
180 { OBJECT_MISSING_ALT, "[1.1.5.1]: <object> missing alternate content." }, /* Access */
181 { AUDIO_MISSING_TEXT_WAV, "[1.1.6.1]: audio missing text transcript (wav)." }, /* Access */
182 { AUDIO_MISSING_TEXT_AU, "[1.1.6.2]: audio missing text transcript (au)." }, /* Access */
183 { AUDIO_MISSING_TEXT_AIFF, "[1.1.6.3]: audio missing text transcript (aiff)." }, /* Access */
184 { AUDIO_MISSING_TEXT_SND, "[1.1.6.4]: audio missing text transcript (snd)." }, /* Access */
185 { AUDIO_MISSING_TEXT_RA, "[1.1.6.5]: audio missing text transcript (ra)." }, /* Access */
186 { AUDIO_MISSING_TEXT_RM, "[1.1.6.6]: audio missing text transcript (rm)." }, /* Access */
187 { FRAME_MISSING_LONGDESC, "[1.1.8.1]: <frame> may require 'longdesc'." }, /* Access */
188 { AREA_MISSING_ALT, "[1.1.9.1]: <area> missing 'alt' text." }, /* Access */
189 { SCRIPT_MISSING_NOSCRIPT, "[1.1.10.1]: <script> missing <noscript> section." }, /* Access */
190 { ASCII_REQUIRES_DESCRIPTION, "[1.1.12.1]: ascii art requires description." }, /* Access */
191 { IMG_MAP_SERVER_REQUIRES_TEXT_LINKS, "[1.2.1.1]: image map (server-side) requires text links." }, /* Access */
192 { MULTIMEDIA_REQUIRES_TEXT, "[1.4.1.1]: multimedia requires synchronized text equivalents." }, /* Access */
193 { IMG_MAP_CLIENT_MISSING_TEXT_LINKS, "[1.5.1.1]: image map (client-side) missing text links." }, /* Access */
194 { INFORMATION_NOT_CONVEYED_IMAGE, "[2.1.1.1]: ensure information not conveyed through color alone (image)." }, /* Access */
195 { INFORMATION_NOT_CONVEYED_APPLET, "[2.1.1.2]: ensure information not conveyed through color alone (applet)." }, /* Access */
196 { INFORMATION_NOT_CONVEYED_OBJECT, "[2.1.1.3]: ensure information not conveyed through color alone (object)." }, /* Access */
197 { INFORMATION_NOT_CONVEYED_SCRIPT, "[2.1.1.4]: ensure information not conveyed through color alone (script)." }, /* Access */
198 { INFORMATION_NOT_CONVEYED_INPUT, "[2.1.1.5]: ensure information not conveyed through color alone (input)." }, /* Access */
199 { COLOR_CONTRAST_TEXT, "[2.2.1.1]: poor color contrast (text)." }, /* Access */
200 { COLOR_CONTRAST_LINK, "[2.2.1.2]: poor color contrast (link)." }, /* Access */
201 { COLOR_CONTRAST_ACTIVE_LINK, "[2.2.1.3]: poor color contrast (active link)." }, /* Access */
202 { COLOR_CONTRAST_VISITED_LINK, "[2.2.1.4]: poor color contrast (visited link)." }, /* Access */
203 { DOCTYPE_MISSING, "[3.2.1.1]: <doctype> missing." }, /* Access */
204 { STYLE_SHEET_CONTROL_PRESENTATION, "[3.3.1.1]: use style sheets to control presentation." }, /* Access */
205 { HEADERS_IMPROPERLY_NESTED, "[3.5.1.1]: headers improperly nested." }, /* Access */
206 { POTENTIAL_HEADER_BOLD, "[3.5.2.1]: potential header (bold)." }, /* Access */
207 { POTENTIAL_HEADER_ITALICS, "[3.5.2.2]: potential header (italics)." }, /* Access */
208 { POTENTIAL_HEADER_UNDERLINE, "[3.5.2.3]: potential header (underline)." }, /* Access */
209 { HEADER_USED_FORMAT_TEXT, "[3.5.3.1]: header used to format text." }, /* Access */
210 { LIST_USAGE_INVALID_UL, "[3.6.1.1]: list usage invalid <ul>." }, /* Access */
211 { LIST_USAGE_INVALID_OL, "[3.6.1.2]: list usage invalid <ol>." }, /* Access */
212 { LIST_USAGE_INVALID_LI, "[3.6.1.4]: list usage invalid <li>." }, /* Access */
213 { INDICATE_CHANGES_IN_LANGUAGE, "[4.1.1.1]: indicate changes in language." }, /* Access */
214 { LANGUAGE_NOT_IDENTIFIED, "[4.3.1.1]: language not identified." }, /* Access */
215 { LANGUAGE_INVALID, "[4.3.1.2]: language attribute invalid." }, /* Access */
216 { DATA_TABLE_MISSING_HEADERS, "[5.1.2.1]: data <table> missing row/column headers (all)." }, /* Access */
217 { DATA_TABLE_MISSING_HEADERS_COLUMN, "[5.1.2.2]: data <table> missing row/column headers (1 col)." }, /* Access */
218 { DATA_TABLE_MISSING_HEADERS_ROW, "[5.1.2.3]: data <table> missing row/column headers (1 row)." }, /* Access */
219 { DATA_TABLE_REQUIRE_MARKUP_COLUMN_HEADERS, "[5.2.1.1]: data <table> may require markup (column headers)." }, /* Access */
220 { DATA_TABLE_REQUIRE_MARKUP_ROW_HEADERS, "[5.2.1.2]: data <table> may require markup (row headers)." }, /* Access */
221 { LAYOUT_TABLES_LINEARIZE_PROPERLY, "[5.3.1.1]: verify layout tables linearize properly." }, /* Access */
222 { LAYOUT_TABLE_INVALID_MARKUP, "[5.4.1.1]: invalid markup used in layout <table>." }, /* Access */
223 { TABLE_MISSING_SUMMARY, "[5.5.1.1]: <table> missing summary." }, /* Access */
224 { TABLE_SUMMARY_INVALID_NULL, "[5.5.1.2]: <table> summary invalid (null)." }, /* Access */
225 { TABLE_SUMMARY_INVALID_SPACES, "[5.5.1.3]: <table> summary invalid (spaces)." }, /* Access */
226 { TABLE_SUMMARY_INVALID_PLACEHOLDER, "[5.5.1.6]: <table> summary invalid (placeholder text)." }, /* Access */
227 { TABLE_MISSING_CAPTION, "[5.5.2.1]: <table> missing <caption>." }, /* Access */
228 { TABLE_MAY_REQUIRE_HEADER_ABBR, "[5.6.1.1]: <table> may require header abbreviations." }, /* Access */
229 { TABLE_MAY_REQUIRE_HEADER_ABBR_NULL, "[5.6.1.2]: <table> header abbreviations invalid (null)." }, /* Access */
230 { TABLE_MAY_REQUIRE_HEADER_ABBR_SPACES, "[5.6.1.3]: <table> header abbreviations invalid (spaces)." }, /* Access */
231 { STYLESHEETS_REQUIRE_TESTING_LINK, "[6.1.1.1]: style sheets require testing (link)." }, /* Access */
232 { STYLESHEETS_REQUIRE_TESTING_STYLE_ELEMENT, "[6.1.1.2]: style sheets require testing (style element)." }, /* Access */
233 { STYLESHEETS_REQUIRE_TESTING_STYLE_ATTR, "[6.1.1.3]: style sheets require testing (style attribute)." }, /* Access */
234 { FRAME_SRC_INVALID, "[6.2.1.1]: <frame> source invalid." }, /* Access */
235 { TEXT_EQUIVALENTS_REQUIRE_UPDATING_APPLET, "[6.2.2.1]: text equivalents require updating (applet)." }, /* Access */
236 { TEXT_EQUIVALENTS_REQUIRE_UPDATING_SCRIPT, "[6.2.2.2]: text equivalents require updating (script)." }, /* Access */
237 { TEXT_EQUIVALENTS_REQUIRE_UPDATING_OBJECT, "[6.2.2.3]: text equivalents require updating (object)." }, /* Access */
238 { PROGRAMMATIC_OBJECTS_REQUIRE_TESTING_SCRIPT, "[6.3.1.1]: programmatic objects require testing (script)." }, /* Access */
239 { PROGRAMMATIC_OBJECTS_REQUIRE_TESTING_OBJECT, "[6.3.1.2]: programmatic objects require testing (object)." }, /* Access */
240 { PROGRAMMATIC_OBJECTS_REQUIRE_TESTING_EMBED, "[6.3.1.3]: programmatic objects require testing (embed)." }, /* Access */
241 { PROGRAMMATIC_OBJECTS_REQUIRE_TESTING_APPLET, "[6.3.1.4]: programmatic objects require testing (applet)." }, /* Access */
242 { FRAME_MISSING_NOFRAMES, "[6.5.1.1]: <frameset> missing <noframes> section." }, /* Access */
243 { NOFRAMES_INVALID_NO_VALUE, "[6.5.1.2]: <noframes> section invalid (no value)." }, /* Access */
244 { NOFRAMES_INVALID_CONTENT, "[6.5.1.3]: <noframes> section invalid (content)." }, /* Access */
245 { NOFRAMES_INVALID_LINK, "[6.5.1.4]: <noframes> section invalid (link)." }, /* Access */
246 { REMOVE_FLICKER_SCRIPT, "[7.1.1.1]: remove flicker (script)." }, /* Access */
247 { REMOVE_FLICKER_OBJECT, "[7.1.1.2]: remove flicker (object)." }, /* Access */
248 { REMOVE_FLICKER_EMBED, "[7.1.1.3]: remove flicker (embed)." }, /* Access */
249 { REMOVE_FLICKER_APPLET, "[7.1.1.4]: remove flicker (applet)." }, /* Access */
250 { REMOVE_FLICKER_ANIMATED_GIF, "[7.1.1.5]: remove flicker (animated gif)." }, /* Access */
251 { REMOVE_BLINK_MARQUEE, "[7.2.1.1]: remove blink/marquee." }, /* Access */
252 { REMOVE_AUTO_REFRESH, "[7.4.1.1]: remove auto-refresh." }, /* Access */
253 { REMOVE_AUTO_REDIRECT, "[7.5.1.1]: remove auto-redirect." }, /* Access */
254 { ENSURE_PROGRAMMATIC_OBJECTS_ACCESSIBLE_SCRIPT, "[8.1.1.1]: ensure programmatic objects are accessible (script)." }, /* Access */
255 { ENSURE_PROGRAMMATIC_OBJECTS_ACCESSIBLE_OBJECT, "[8.1.1.2]: ensure programmatic objects are accessible (object)." }, /* Access */
256 { ENSURE_PROGRAMMATIC_OBJECTS_ACCESSIBLE_APPLET, "[8.1.1.3]: ensure programmatic objects are accessible (applet)." }, /* Access */
257 { ENSURE_PROGRAMMATIC_OBJECTS_ACCESSIBLE_EMBED, "[8.1.1.4]: ensure programmatic objects are accessible (embed)." }, /* Access */
258 { IMAGE_MAP_SERVER_SIDE_REQUIRES_CONVERSION, "[9.1.1.1]: image map (server-side) requires conversion." }, /* Access */
259 { SCRIPT_NOT_KEYBOARD_ACCESSIBLE_ON_MOUSE_DOWN, "[9.3.1.1]: <script> not keyboard accessible (onMouseDown)." }, /* Access */
260 { SCRIPT_NOT_KEYBOARD_ACCESSIBLE_ON_MOUSE_UP, "[9.3.1.2]: <script> not keyboard accessible (onMouseUp)." }, /* Access */
261 { SCRIPT_NOT_KEYBOARD_ACCESSIBLE_ON_CLICK, "[9.3.1.3]: <script> not keyboard accessible (onClick)." }, /* Access */
262 { SCRIPT_NOT_KEYBOARD_ACCESSIBLE_ON_MOUSE_OVER, "[9.3.1.4]: <script> not keyboard accessible (onMouseOver)." }, /* Access */
263 { SCRIPT_NOT_KEYBOARD_ACCESSIBLE_ON_MOUSE_OUT, "[9.3.1.5]: <script> not keyboard accessible (onMouseOut)." }, /* Access */
264 { SCRIPT_NOT_KEYBOARD_ACCESSIBLE_ON_MOUSE_MOVE, "[9.3.1.6]: <script> not keyboard accessible (onMouseMove)." }, /* Access */
265 { NEW_WINDOWS_REQUIRE_WARNING_NEW, "[10.1.1.1]: new windows require warning (_new)." }, /* Access */
266 { NEW_WINDOWS_REQUIRE_WARNING_BLANK, "[10.1.1.2]: new windows require warning (_blank)." }, /* Access */
267 { FORM_CONTROL_REQUIRES_DEFAULT_TEXT, "[10.4.1.1]: form control requires default text." }, /* Access */
268 { FORM_CONTROL_DEFAULT_TEXT_INVALID_NULL, "[10.4.1.2]: form control default text invalid (null)." }, /* Access */
269 { FORM_CONTROL_DEFAULT_TEXT_INVALID_SPACES, "[10.4.1.3]: form control default text invalid (spaces)." }, /* Access */
270 { REPLACE_DEPRECATED_HTML_APPLET, "[11.2.1.1]: replace deprecated html <applet>." }, /* Access */
271 { REPLACE_DEPRECATED_HTML_BASEFONT, "[11.2.1.2]: replace deprecated html <basefont>." }, /* Access */
272 { REPLACE_DEPRECATED_HTML_CENTER, "[11.2.1.3]: replace deprecated html <center>." }, /* Access */
273 { REPLACE_DEPRECATED_HTML_DIR, "[11.2.1.4]: replace deprecated html <dir>." }, /* Access */
274 { REPLACE_DEPRECATED_HTML_FONT, "[11.2.1.5]: replace deprecated html <font>." }, /* Access */
275 { REPLACE_DEPRECATED_HTML_ISINDEX, "[11.2.1.6]: replace deprecated html <isindex>." }, /* Access */
276 { REPLACE_DEPRECATED_HTML_MENU, "[11.2.1.7]: replace deprecated html <menu>." }, /* Access */
277 { REPLACE_DEPRECATED_HTML_S, "[11.2.1.8]: replace deprecated html <s>." }, /* Access */
278 { REPLACE_DEPRECATED_HTML_STRIKE, "[11.2.1.9]: replace deprecated html <strike>." }, /* Access */
279 { REPLACE_DEPRECATED_HTML_U, "[11.2.1.10]: replace deprecated html <u>." }, /* Access */
280 { FRAME_MISSING_TITLE, "[12.1.1.1]: <frame> missing title." }, /* Access */
281 { FRAME_TITLE_INVALID_NULL, "[12.1.1.2]: <frame> title invalid (null)." }, /* Access */
282 { FRAME_TITLE_INVALID_SPACES, "[12.1.1.3]: <frame> title invalid (spaces)." }, /* Access */
283 { ASSOCIATE_LABELS_EXPLICITLY, "[12.4.1.1]: associate labels explicitly with form controls." }, /* Access */
284 { ASSOCIATE_LABELS_EXPLICITLY_FOR, "[12.4.1.2]: associate labels explicitly with form controls (for)." }, /* Access */
285 { ASSOCIATE_LABELS_EXPLICITLY_ID, "[12.4.1.3]: associate labels explicitly with form controls (id)." }, /* Access */
286 { LINK_TEXT_NOT_MEANINGFUL, "[13.1.1.1]: link text not meaningful." }, /* Access */
287 { LINK_TEXT_MISSING, "[13.1.1.2]: link text missing." }, /* Access */
288 { LINK_TEXT_TOO_LONG, "[13.1.1.3]: link text too long." }, /* Access */
289 { LINK_TEXT_NOT_MEANINGFUL_CLICK_HERE, "[13.1.1.4]: link text not meaningful (click here)." }, /* Access */
290 { METADATA_MISSING, "[13.2.1.1]: Metadata missing." }, /* Access */
291 { METADATA_MISSING_REDIRECT_AUTOREFRESH, "[13.2.1.3]: Metadata missing (redirect/auto-refresh)." }, /* Access */
292 { SKIPOVER_ASCII_ART, "[13.10.1.1]: skip over ascii art." }, /* Access */
293
294 #endif /* SUPPORT_ACCESSIBILITY_CHECKS */
295
296 /* must be last */
297 { 0, NULL }
298 };
299
GetFormatFromCode(uint code)300 static ctmbstr GetFormatFromCode(uint code)
301 {
302 uint i;
303
304 for (i = 0; msgFormat[i].fmt; ++i)
305 if (msgFormat[i].code == code)
306 return msgFormat[i].fmt;
307
308 return NULL;
309 }
310
311 /*
312 Documentation of configuration options
313 */
314
315 /* Cross references */
316 static const TidyOptionId TidyXmlDeclLinks[] =
317 { TidyCharEncoding, TidyOutCharEncoding, TidyUnknownOption };
318 static const TidyOptionId TidyJoinClassesLinks[] =
319 { TidyJoinStyles, TidyDuplicateAttrs, TidyUnknownOption };
320 static const TidyOptionId TidyJoinStylesLinks[] =
321 { TidyJoinClasses, TidyDuplicateAttrs, TidyUnknownOption };
322 static const TidyOptionId TidyDuplicateAttrsLinks[] =
323 { TidyJoinClasses, TidyJoinStyles, TidyUnknownOption };
324 static const TidyOptionId TidyIndentContentLinks[] =
325 { TidyIndentSpaces, TidyUnknownOption };
326 static const TidyOptionId TidyIndentSpacesLinks[] =
327 { TidyIndentContent, TidyUnknownOption };
328 static const TidyOptionId TidyWrapAttValsLinks[] =
329 { TidyWrapScriptlets, TidyUnknownOption };
330 static const TidyOptionId TidyWrapScriptletsLinks[] =
331 { TidyWrapAttVals, TidyUnknownOption };
332 static const TidyOptionId TidyCharEncodingLinks[] =
333 { TidyInCharEncoding, TidyOutCharEncoding, TidyUnknownOption };
334 static const TidyOptionId TidyInCharEncodingLinks[] =
335 { TidyCharEncoding, TidyUnknownOption };
336 static const TidyOptionId TidyOutCharEncodingLinks[] =
337 { TidyCharEncoding, TidyUnknownOption };
338 static const TidyOptionId TidyErrFileLinks[] =
339 { TidyOutFile, TidyUnknownOption };
340 static const TidyOptionId TidyOutFileLinks[] =
341 { TidyErrFile, TidyUnknownOption };
342 static const TidyOptionId TidyBlockTagsLinks[] =
343 { TidyEmptyTags, TidyInlineTags, TidyPreTags, TidyUnknownOption };
344 static const TidyOptionId TidyEmptyTagsLinks[] =
345 { TidyBlockTags, TidyInlineTags, TidyPreTags, TidyUnknownOption };
346 static const TidyOptionId TidyInlineTagsLinks[] =
347 { TidyBlockTags, TidyEmptyTags, TidyPreTags, TidyUnknownOption };
348 static const TidyOptionId TidyPreTagsLinks[] =
349 { TidyBlockTags, TidyEmptyTags, TidyInlineTags, TidyUnknownOption };
350 static const TidyOptionId TidyMergeDivsLinks[] =
351 { TidyMakeClean, TidyMergeSpans, TidyUnknownOption };
352 static const TidyOptionId TidyMergeSpansLinks[] =
353 { TidyMakeClean, TidyMergeDivs, TidyUnknownOption };
354 static const TidyOptionId TidyAsciiCharsLinks[] =
355 { TidyMakeClean, TidyUnknownOption };
356 static const TidyOptionId TidyNumEntitiesLinks[] =
357 { TidyDoctype, TidyPreserveEntities, TidyUnknownOption };
358 static const TidyOptionId TidyDropFontTagsLinks[] =
359 { TidyMakeClean, TidyUnknownOption };
360 static const TidyOptionId TidyMakeCleanTagsLinks[] =
361 { TidyDropFontTags, TidyUnknownOption };
362
363 /* Documentation of options */
364 static const TidyOptionDoc option_docs[] =
365 {
366 {TidyXmlDecl,
367 "This option specifies if Tidy should add the XML declaration when "
368 "outputting XML or XHTML. Note that if the input already includes an "
369 "<?xml ... ?> declaration then this option will be ignored. "
370 "If the encoding for the output is different from \"ascii\", one of the "
371 "utf encodings or \"raw\", the declaration is always added as required by "
372 "the XML standard. "
373 , TidyXmlDeclLinks
374 },
375 {TidyXmlSpace,
376 "This option specifies if Tidy should add xml:space=\"preserve\" to "
377 "elements such as <PRE>, <STYLE> and <SCRIPT> when "
378 "generating XML. This is needed if the whitespace in such elements is to "
379 "be parsed appropriately without having access to the DTD. "
380 },
381 {TidyAltText,
382 "This option specifies the default \"alt=\" text Tidy uses for "
383 "<IMG> attributes. This feature is dangerous as it suppresses "
384 "further accessibility warnings. You are responsible for making your "
385 "documents accessible to people who can not see the images! "
386 },
387 {TidyXmlPIs,
388 "This option specifies if Tidy should change the parsing of processing "
389 "instructions to require ?> as the terminator rather than >. This "
390 "option is automatically set if the input is in XML. "
391 },
392 {TidyMakeBare,
393 "This option specifies if Tidy should strip Microsoft specific HTML "
394 "from Word 2000 documents, and output spaces rather than non-breaking "
395 "spaces where they exist in the input. "
396 },
397 {TidyCSSPrefix,
398 "This option specifies the prefix that Tidy uses for styles rules. By "
399 "default, \"c\" will be used. "
400 },
401 {TidyMakeClean,
402 "This option specifies if Tidy "
403 "should strip out surplus presentational tags and attributes replacing "
404 "them by style rules and structural markup as appropriate. It works well "
405 "on the HTML saved by Microsoft Office products. "
406 , TidyMakeCleanTagsLinks
407 },
408 {TidyDoctype,
409 "This option specifies the DOCTYPE declaration generated by Tidy. If set "
410 "to \"omit\" the output won't contain a DOCTYPE declaration. If set to "
411 "\"auto\" (the default) Tidy will use an educated guess based upon the "
412 "contents of the document. If set to \"strict\", Tidy will set the DOCTYPE "
413 "to the strict DTD. If set to \"loose\", the DOCTYPE is set to the loose "
414 "(transitional) DTD. Alternatively, you can supply a string for the formal "
415 "public identifier (FPI).<br />"
416 "<br />"
417 "For example: <br />"
418 "doctype: \"-//ACME//DTD HTML 3.14159//EN\"<br />"
419 "<br />"
420 "If you specify the FPI for an XHTML document, Tidy will set the "
421 "system identifier to an empty string. For an HTML document, Tidy adds a "
422 "system identifier only if one was already present in order to preserve "
423 "the processing mode of some browsers. Tidy leaves the DOCTYPE for "
424 "generic XML documents unchanged. <code>--doctype omit</code> implies "
425 "<code>--numeric-entities yes</code>. This option does not offer a "
426 "validation of the document conformance. "
427 },
428 {TidyDropEmptyParas,
429 "This option specifies if Tidy should discard empty paragraphs. "
430 },
431 {TidyDropFontTags,
432 "This option specifies if Tidy should discard <FONT> and "
433 "<CENTER> tags without creating the corresponding style rules. This "
434 "option can be set independently of the clean option. "
435 , TidyDropFontTagsLinks
436 },
437 {TidyDropPropAttrs,
438 "This option specifies if Tidy should strip out proprietary attributes, "
439 "such as MS data binding attributes. "
440 },
441 {TidyEncloseBlockText,
442 "This option specifies if Tidy should insert a <P> element to "
443 "enclose any text it finds in any element that allows mixed content for "
444 "HTML transitional but not HTML strict. "
445 },
446 {TidyEncloseBodyText,
447 "This option specifies if Tidy should enclose any text it finds in the "
448 "body element within a <P> element. This is useful when you want to "
449 "take existing HTML and use it with a style sheet. "
450 },
451 {TidyEscapeCdata,
452 "This option specifies if Tidy should convert <![CDATA[]]> "
453 "sections to normal text. "
454 },
455 {TidyFixComments,
456 "This option specifies if Tidy should replace unexpected hyphens with "
457 "\"=\" characters when it comes across adjacent hyphens. The default is "
458 "yes. This option is provided for users of Cold Fusion which uses the "
459 "comment syntax: <!--- ---> "
460 },
461 {TidyFixUri,
462 "This option specifies if Tidy should check attribute values that carry "
463 "URIs for illegal characters and if such are found, escape them as HTML 4 "
464 "recommends. "
465 },
466 {TidyHideComments,
467 "This option specifies if Tidy should print out comments. "
468 },
469 {TidyHideEndTags,
470 "This option specifies if Tidy should omit optional end-tags when "
471 "generating the pretty printed markup. This option is ignored if you are "
472 "outputting to XML. "
473 },
474 {TidyIndentCdata,
475 "This option specifies if Tidy should indent <![CDATA[]]> sections. "
476 },
477 {TidyXmlTags,
478 "This option specifies if Tidy should use the XML parser rather than the "
479 "error correcting HTML parser. "
480 },
481 {TidyJoinClasses,
482 "This option specifies if Tidy should combine class names to generate "
483 "a single new class name, if multiple class assignments are detected on "
484 "an element. "
485 , TidyJoinClassesLinks
486 },
487 {TidyJoinStyles,
488 "This option specifies if Tidy should combine styles to generate a single "
489 "new style, if multiple style values are detected on an element. "
490 , TidyJoinStylesLinks
491 },
492 {TidyLogicalEmphasis,
493 "This option specifies if Tidy should replace any occurrence of <I> "
494 "by <EM> and any occurrence of <B> by <STRONG>. In both "
495 "cases, the attributes are preserved unchanged. This option can be set "
496 "independently of the clean and drop-font-tags options. "
497 },
498 {TidyLowerLiterals,
499 "This option specifies if Tidy should convert the value of an attribute "
500 "that takes a list of predefined values to lower case. This is required "
501 "for XHTML documents. "
502 },
503 {TidyMergeDivs,
504 "Can be used to modify behavior of -c (--clean yes) option. "
505 "This option specifies if Tidy should merge nested <div> such as "
506 "\"<div><div>...</div></div>\". If set to "
507 "\"auto\", the attributes of the inner <div> are moved to the "
508 "outer one. As well, nested <div> with ID attributes are not "
509 "merged. If set to \"yes\", the attributes of the inner <div> "
510 "are discarded with the exception of \"class\" and \"style\". "
511 ,TidyMergeDivsLinks
512 },
513 {TidyMergeSpans,
514 "Can be used to modify behavior of -c (--clean yes) option. "
515 "This option specifies if Tidy should merge nested <span> such as "
516 "\"<span><span>...</span></span>\". The algorithm "
517 "is identical to the one used by --merge-divs. "
518 ,TidyMergeSpansLinks
519 },
520 #if SUPPORT_ASIAN_ENCODINGS
521 {TidyNCR,
522 "This option specifies if Tidy should allow numeric character references. "
523 },
524 #endif
525 {TidyBlockTags,
526 "This option specifies new block-level tags. This option takes a space or "
527 "comma separated list of tag names. Unless you declare new tags, Tidy will "
528 "refuse to generate a tidied file if the input includes previously unknown "
529 "tags. Note you can't change the content model for elements such as "
530 "<TABLE>, <UL>, <OL> and <DL>. This option is "
531 "ignored in XML mode. "
532 ,TidyBlockTagsLinks
533 },
534 {TidyEmptyTags,
535 "This option specifies new empty inline tags. This option takes a space "
536 "or comma separated list of tag names. Unless you declare new tags, Tidy "
537 "will refuse to generate a tidied file if the input includes previously "
538 "unknown tags. Remember to also declare empty tags as either inline or "
539 "blocklevel. This option is ignored in XML mode. "
540 ,TidyEmptyTagsLinks
541 },
542 {TidyInlineTags,
543 "This option specifies new non-empty inline tags. This option takes a "
544 "space or comma separated list of tag names. Unless you declare new tags, "
545 "Tidy will refuse to generate a tidied file if the input includes "
546 "previously unknown tags. This option is ignored in XML mode. "
547 ,TidyInlineTagsLinks
548 },
549 { TidyPreTags,
550 "This option specifies "
551 "new tags that are to be processed in exactly the same way as HTML's "
552 "<PRE> element. This option takes a space or comma separated list "
553 "of tag names. Unless you declare new tags, Tidy will refuse to generate "
554 "a tidied file if the input includes previously unknown tags. Note you "
555 "can not as yet add new CDATA elements (similar to <SCRIPT>). "
556 "This option is ignored in XML mode. "
557 ,TidyPreTagsLinks
558 },
559 {TidyNumEntities,
560 "This option specifies if Tidy should output entities other than the "
561 "built-in HTML entities (&amp;, &lt;, &gt; and &quot;) in "
562 "the numeric rather than the named entity form. Only entities compatible "
563 "with the DOCTYPE declaration generated are used. Entities that can be "
564 "represented in the output encoding are translated correspondingly. "
565 ,TidyNumEntitiesLinks
566 },
567 {TidyHtmlOut,
568 "This option specifies if Tidy should generate pretty printed output, "
569 "writing it as HTML. "
570 },
571 {TidyXhtmlOut,
572 "This option specifies if Tidy should generate pretty printed output, "
573 "writing it as extensible HTML. "
574 "This option causes Tidy to set the DOCTYPE and default namespace as "
575 "appropriate to XHTML. If a DOCTYPE or namespace is given they will "
576 "checked for consistency with the content of the document. In the case of "
577 "an inconsistency, the corrected values will appear in the output. For "
578 "XHTML, entities can be written as named or numeric entities according to "
579 "the setting of the \"numeric-entities\" option. The original case of tags "
580 "and attributes will be preserved, regardless of other options. "
581 },
582 {TidyXmlOut,
583 "This option specifies if Tidy should pretty print output, writing it as "
584 "well-formed XML. Any entities not defined in XML 1.0 will be written as "
585 "numeric entities to allow them to be parsed by a XML parser. The original "
586 "case of tags and attributes will be preserved, regardless of other "
587 "options. "
588 },
589 {TidyQuoteAmpersand,
590 "This option specifies if Tidy should output unadorned & characters as "
591 "&amp;. "
592 },
593 {TidyQuoteMarks,
594 "This option specifies if Tidy should output " characters as "
595 "&quot; as is preferred by some editing environments. The apostrophe "
596 "character ' is written out as &#39; since many web browsers don't yet "
597 "support &apos;. "
598 },
599 {TidyQuoteNbsp,
600 "This option specifies if Tidy should output non-breaking space characters "
601 "as entities, rather than as the Unicode character value 160 (decimal). "
602 },
603 {TidyDuplicateAttrs,
604 "This option specifies if Tidy should keep the first or last attribute, if "
605 "an attribute is repeated, e.g. has two align attributes. "
606 , TidyDuplicateAttrsLinks
607 },
608 {TidySortAttributes,
609 "This option specifies that tidy should sort attributes within an element "
610 "using the specified sort algorithm. If set to \"alpha\", the algorithm is "
611 "an ascending alphabetic sort. "
612 },
613 {TidyReplaceColor,
614 "This option specifies if Tidy should replace numeric values in color "
615 "attributes by HTML/XHTML color names where defined, e.g. replace "
616 "\"#ffffff\" with \"white\". "
617 },
618 {TidyBodyOnly,
619 "This option specifies if Tidy should print only the contents of the "
620 "body tag as an HTML fragment. If set to \"auto\", this is performed only "
621 "if the body tag has been inferred. Useful for incorporating "
622 "existing whole pages as a portion of another page. "
623 "This option has no effect if XML output is requested. "
624 },
625 {TidyUpperCaseAttrs,
626 "This option specifies if Tidy should output attribute names in upper "
627 "case. The default is no, which results in lower case attribute names, "
628 "except for XML input, where the original case is preserved. "
629 },
630 {TidyUpperCaseTags,
631 "This option specifies if Tidy should output tag names in upper case. "
632 "The default is no, which results in lower case tag names, except for XML "
633 "input, where the original case is preserved. "
634 },
635 {TidyWord2000,
636 "This option specifies if Tidy should go to great pains to strip out all "
637 "the surplus stuff Microsoft Word 2000 inserts when you save Word "
638 "documents as \"Web pages\". Doesn't handle embedded images or VML. "
639 "You should consider using Word's \"Save As: Web Page, Filtered\". "
640 },
641 {TidyAccessibilityCheckLevel,
642 "This option specifies what level of accessibility checking, if any, "
643 "that Tidy should do. Level 0 is equivalent to Tidy Classic's "
644 "accessibility checking. "
645 "For more information on Tidy's accessibility checking, visit the "
646 "<a href=\"http://www.aprompt.ca/Tidy/accessibilitychecks.html\" "
647 ">Adaptive Technology Resource Centre at the University of Toronto</a>. "
648 },
649 {TidyShowErrors,
650 "This option specifies the number Tidy uses to determine if further errors "
651 "should be shown. If set to 0, then no errors are shown. "
652 },
653 {TidyShowWarnings,
654 "This option specifies if Tidy should suppress warnings. This can be "
655 "useful when a few errors are hidden in a flurry of warnings. "
656 },
657 {TidyBreakBeforeBR,
658 "This option specifies if Tidy should output a line break before each "
659 "<BR> element. "
660 },
661 {TidyIndentContent,
662 "This option specifies if Tidy should indent block-level tags. If set to "
663 "\"auto\", this option causes Tidy to decide whether or not to indent the "
664 "content of tags such as TITLE, H1-H6, LI, TD, TD, or P depending on "
665 "whether or not the content includes a block-level element. You are "
666 "advised to avoid setting indent to yes as this can expose layout bugs in "
667 "some browsers. "
668 ,TidyIndentContentLinks
669 },
670 {TidyIndentAttributes,
671 "This option specifies if Tidy should begin each attribute on a new line. "
672 },
673 {TidyIndentSpaces,
674 "This option specifies the number of spaces Tidy uses to indent content, "
675 "when indentation is enabled. "
676 ,TidyIndentSpacesLinks
677 },
678 {TidyLiteralAttribs,
679 "This option specifies if Tidy should ensure that whitespace characters "
680 "within attribute values are passed through unchanged. "
681 },
682 {TidyShowMarkup,
683 "This option specifies if Tidy should generate a pretty printed version "
684 "of the markup. Note that Tidy won't generate a pretty printed version if "
685 "it finds significant errors (see force-output). "
686 },
687 #if SUPPORT_ASIAN_ENCODINGS
688 {TidyPunctWrap,
689 "This option specifies if Tidy should line wrap after some Unicode or "
690 "Chinese punctuation characters. "
691 },
692 #endif
693 {TidyBurstSlides,
694 "Currently not used. Tidy Classic only. "
695 },
696 {TidyTabSize,
697 "This option specifies the number of columns that Tidy uses between "
698 "successive tab stops. It is used to map tabs to spaces when reading the "
699 "input. Tidy never outputs tabs. "
700 },
701 {TidyVertSpace,
702 "This option specifies if Tidy should add some empty lines for "
703 "readability. "
704 },
705 {TidyWrapLen,
706 "This option specifies the right margin Tidy uses for line wrapping. Tidy "
707 "tries to wrap lines so that they do not exceed this length. Set wrap to "
708 "zero if you want to disable line wrapping. "
709 },
710 {TidyWrapAsp,
711 "This option specifies if Tidy should line wrap text contained within ASP "
712 "pseudo elements, which look like: <% ... %>. "
713 },
714 {TidyWrapAttVals,
715 "This option specifies if Tidy should line wrap attribute values, for "
716 "easier editing. This option can be set independently of "
717 "wrap-script-literals. "
718 ,TidyWrapAttValsLinks
719 },
720 {TidyWrapJste,
721 "This option specifies if Tidy should line wrap text contained within "
722 "JSTE pseudo elements, which look like: <# ... #>. "
723 },
724 {TidyWrapPhp,
725 "This option specifies if Tidy should line wrap text contained within PHP "
726 "pseudo elements, which look like: <?php ... ?>. "
727 },
728 {TidyWrapScriptlets,
729 "This option specifies if Tidy should line wrap string literals that "
730 "appear in script attributes. Tidy wraps long script string literals by "
731 "inserting a backslash character before the line break. "
732 ,TidyWrapScriptletsLinks
733 },
734 {TidyWrapSection,
735 "This option specifies if Tidy should line wrap text contained within "
736 "<![ ... ]> section tags. "
737 },
738 {TidyAsciiChars,
739 "Can be used to modify behavior of -c (--clean yes) option. If set "
740 "to \"yes\" when using -c, &emdash;, &rdquo;, and other named "
741 "character entities are downgraded to their closest ascii equivalents. "
742 ,TidyAsciiCharsLinks
743 },
744 {TidyCharEncoding,
745 "This option specifies the character encoding Tidy uses for both the input "
746 "and output. For ascii, Tidy will accept Latin-1 (ISO-8859-1) character "
747 "values, but will use entities for all characters whose value > 127. "
748 "For raw, Tidy will output values above 127 without translating them into "
749 "entities. For latin1, characters above 255 will be written as entities. "
750 "For utf8, Tidy assumes that both input and output is encoded as UTF-8. "
751 "You can use iso2022 for files encoded using the ISO-2022 family of "
752 "encodings e.g. ISO-2022-JP. For mac and win1252, Tidy will accept vendor "
753 "specific character values, but will use entities for all characters whose "
754 "value > 127. "
755 "For unsupported encodings, use an external utility to convert to and from "
756 "UTF-8. "
757 ,TidyCharEncodingLinks
758 },
759 {TidyInCharEncoding,
760 "This option specifies the character encoding Tidy uses for the input. See "
761 "char-encoding for more info. "
762 ,TidyInCharEncodingLinks
763 },
764 #if SUPPORT_ASIAN_ENCODINGS
765 {TidyLanguage,
766 "Currently not used, but this option specifies the language Tidy uses "
767 "(for instance \"en\"). "
768 },
769 #endif
770 #if SUPPORT_UTF16_ENCODINGS
771 {TidyOutputBOM,
772 "This option specifies if Tidy should write a Unicode Byte Order Mark "
773 "character (BOM; also known as Zero Width No-Break Space; has value of "
774 "U+FEFF) to the beginning of the output; only for UTF-8 and UTF-16 output "
775 "encodings. If set to \"auto\", this option causes Tidy to write a BOM to "
776 "the output only if a BOM was present at the beginning of the input. A BOM "
777 "is always written for XML/XHTML output using UTF-16 output encodings. "
778 },
779 #endif
780 {TidyOutCharEncoding,
781 "This option specifies the character encoding Tidy uses for the output. "
782 "See char-encoding for more info. May only be different from "
783 "input-encoding for Latin encodings (ascii, latin0, latin1, mac, win1252, "
784 "ibm858). "
785 ,TidyOutCharEncodingLinks
786 },
787 {TidyNewline,
788 "The default is appropriate to the current platform: CRLF on PC-DOS, "
789 "MS-Windows and OS/2, CR on Classic Mac OS, and LF everywhere else "
790 "(Unix and Linux). "
791 },
792 {TidyErrFile,
793 "This option specifies the error file Tidy uses for errors and warnings. "
794 "Normally errors and warnings are output to \"stderr\". "
795 ,TidyErrFileLinks
796 },
797 {TidyFixBackslash,
798 "This option specifies if Tidy should replace backslash characters "
799 "\"<code>\\</code>\" in URLs by forward slashes \"<code>/</code>\". "
800 },
801 {TidyForceOutput,
802 "This option specifies if Tidy should produce output even if errors are "
803 "encountered. Use this option with care - if Tidy reports an error, this "
804 "means Tidy was not able to, or is not sure how to, fix the error, so the "
805 "resulting output may not reflect your intention. "
806 },
807 {TidyEmacs,
808 "This option specifies if Tidy should change the format for reporting "
809 "errors and warnings to a format that is more easily parsed by GNU Emacs. "
810 },
811 {TidyEmacsFile,
812 "Used internally. "
813 },
814 {TidyKeepFileTimes,
815 "This option specifies if Tidy should keep the original modification time "
816 "of files that Tidy modifies in place. The default is no. Setting the "
817 "option to yes allows you to tidy files without causing these files to be "
818 "uploaded to a web server when using a tool such as SiteCopy. Note this "
819 "feature is not supported on some platforms. "
820 },
821 {TidyOutFile,
822 "This option specifies the output file Tidy uses for markup. Normally "
823 "markup is written to \"stdout\". "
824 ,TidyOutFileLinks
825 },
826 {TidyQuiet,
827 "This option specifies if Tidy should output the summary of the numbers "
828 "of errors and warnings, or the welcome or informational messages. "
829 },
830 {TidySlideStyle,
831 "Currently not used. Tidy Classic only. "
832 },
833 {TidyMark,
834 "This option specifies if Tidy should add a meta element to the document "
835 "head to indicate that the document has been tidied. Tidy won't add a meta "
836 "element if one is already present. "
837 },
838 {TidyWriteBack,
839 "This option specifies if Tidy should write back the tidied markup to the "
840 "same file it read from. You are advised to keep copies of important files "
841 "before tidying them, as on rare occasions the result may not be what you "
842 "expect. "
843 },
844 {TidyDecorateInferredUL,
845 "This option specifies if Tidy should decorate inferred UL elements with "
846 "some CSS markup to avoid indentation to the right. "
847 },
848 {TidyPreserveEntities,
849 "This option specifies if Tidy should preserve the well-formed entitites "
850 "as found in the input. "
851 },
852 {TidyAnchorAsName,
853 "This option controls the deletion or addition of the name attribute "
854 "in elements where it can serve as anchor. "
855 "If set to \"yes\", a name attribute, if not already existing, "
856 "is added along an existing id attribute if the DTD allows it. "
857 "If set to \"no\", any existing name attribute is removed "
858 "if an id attribute exists or has been added. "
859 },
860 {N_TIDY_OPTIONS,
861 NULL
862 }
863 };
864
TY_(OptGetDocDesc)865 const TidyOptionDoc* TY_(OptGetDocDesc)( TidyOptionId optId )
866 {
867 uint i = 0;
868
869 while( option_docs[i].opt != N_TIDY_OPTIONS )
870 {
871 if ( option_docs[i].opt == optId )
872 return &option_docs[i];
873 ++i;
874 }
875 return NULL;
876 }
877
878
LevelPrefix(TidyReportLevel level,char * buf,size_t count)879 static char* LevelPrefix( TidyReportLevel level, char* buf, size_t count )
880 {
881 *buf = 0;
882 switch ( level )
883 {
884 case TidyInfo:
885 TY_(tmbstrncpy)( buf, "Info: ", count );
886 break;
887 case TidyWarning:
888 TY_(tmbstrncpy)( buf, "Warning: ", count );
889 break;
890 case TidyConfig:
891 TY_(tmbstrncpy)( buf, "Config: ", count );
892 break;
893 case TidyAccess:
894 TY_(tmbstrncpy)( buf, "Access: ", count );
895 break;
896 case TidyError:
897 TY_(tmbstrncpy)( buf, "Error: ", count );
898 break;
899 case TidyBadDocument:
900 TY_(tmbstrncpy)( buf, "Document: ", count );
901 break;
902 case TidyFatal:
903 TY_(tmbstrncpy)( buf, "panic: ", count );
904 break;
905 }
906 return buf + TY_(tmbstrlen)( buf );
907 }
908
909 /* Updates document message counts and
910 ** compares counts to options to see if message
911 ** display should go forward.
912 */
UpdateCount(TidyDocImpl * doc,TidyReportLevel level)913 static Bool UpdateCount( TidyDocImpl* doc, TidyReportLevel level )
914 {
915 /* keep quiet after <ShowErrors> errors */
916 Bool go = ( doc->errors < cfg(doc, TidyShowErrors) );
917
918 switch ( level )
919 {
920 case TidyInfo:
921 doc->infoMessages++;
922 break;
923 case TidyWarning:
924 doc->warnings++;
925 go = go && cfgBool( doc, TidyShowWarnings );
926 break;
927 case TidyConfig:
928 doc->optionErrors++;
929 break;
930 case TidyAccess:
931 doc->accessErrors++;
932 break;
933 case TidyError:
934 doc->errors++;
935 break;
936 case TidyBadDocument:
937 doc->docErrors++;
938 break;
939 case TidyFatal:
940 /* Ack! */;
941 break;
942 }
943
944 return go;
945 }
946
ReportPosition(TidyDocImpl * doc,int line,int col,char * buf,size_t count)947 static char* ReportPosition(TidyDocImpl* doc, int line, int col, char* buf, size_t count)
948 {
949 *buf = 0;
950
951 /* Change formatting to be parsable by GNU Emacs */
952 if ( cfgBool(doc, TidyEmacs) && cfgStr(doc, TidyEmacsFile) )
953 TY_(tmbsnprintf)(buf, count, "%s:%d:%d: ",
954 cfgStr(doc, TidyEmacsFile), line, col);
955 else /* traditional format */
956 TY_(tmbsnprintf)(buf, count, "line %d column %d - ", line, col);
957 return buf + TY_(tmbstrlen)( buf );
958 }
959
960 /* General message writing routine.
961 ** Each message is a single warning, error, etc.
962 **
963 ** This routine will keep track of counts and,
964 ** if the caller has set a filter, it will be
965 ** called. The new preferred way of handling
966 ** Tidy diagnostics output is either a) define
967 ** a new output sink or b) install a message
968 ** filter routine.
969 */
970
971 static void messagePos( TidyDocImpl* doc, TidyReportLevel level,
972 int line, int col, ctmbstr msg, va_list args )
973 #ifdef __GNUC__
974 __attribute__((format(printf, 5, 0)))
975 #endif
976 ;
messagePos(TidyDocImpl * doc,TidyReportLevel level,int line,int col,ctmbstr msg,va_list args)977 static void messagePos( TidyDocImpl* doc, TidyReportLevel level,
978 int line, int col, ctmbstr msg, va_list args )
979 {
980 enum { sizeMessageBuf=2048 };
981 char *messageBuf = TidyDocAlloc(doc,sizeMessageBuf);
982 Bool go = UpdateCount( doc, level );
983
984 if ( go )
985 {
986 TY_(tmbvsnprintf)(messageBuf, sizeMessageBuf, msg, args);
987 if ( doc->mssgFilt )
988 {
989 TidyDoc tdoc = tidyImplToDoc( doc );
990 go = doc->mssgFilt( tdoc, level, line, col, messageBuf );
991 }
992 }
993
994 if ( go )
995 {
996 enum { sizeBuf=1024 };
997 char *buf = TidyDocAlloc(doc,sizeBuf);
998 const char *cp;
999 if ( line > 0 && col > 0 )
1000 {
1001 ReportPosition(doc, line, col, buf, sizeBuf);
1002 for ( cp = buf; *cp; ++cp )
1003 TY_(WriteChar)( *cp, doc->errout );
1004 }
1005
1006 LevelPrefix( level, buf, sizeBuf );
1007 for ( cp = buf; *cp; ++cp )
1008 TY_(WriteChar)( *cp, doc->errout );
1009
1010 for ( cp = messageBuf; *cp; ++cp )
1011 TY_(WriteChar)( *cp, doc->errout );
1012 TY_(WriteChar)( '\n', doc->errout );
1013 TidyDocFree(doc, buf);
1014 }
1015 TidyDocFree(doc, messageBuf);
1016 }
1017
1018 /* Reports error at current Lexer line/column. */
1019 static
1020 void message( TidyDocImpl* doc, TidyReportLevel level, ctmbstr msg, ... )
1021 #ifdef __GNUC__
1022 __attribute__((format(printf, 3, 4)))
1023 #endif
1024 ;
1025
1026 /* Reports error at node line/column. */
1027 static
1028 void messageNode( TidyDocImpl* doc, TidyReportLevel level,
1029 Node* node, ctmbstr msg, ... )
1030 #ifdef __GNUC__
1031 __attribute__((format(printf, 4, 5)))
1032 #endif
1033 ;
1034
1035 /* Reports error at given line/column. */
1036 static
1037 void messageLexer( TidyDocImpl* doc, TidyReportLevel level,
1038 ctmbstr msg, ... )
1039 #ifdef __GNUC__
1040 __attribute__((format(printf, 3, 4)))
1041 #endif
1042 ;
1043
1044 /* For general reporting. Emits nothing if --quiet yes */
1045 static
1046 void tidy_out( TidyDocImpl* doc, ctmbstr msg, ... )
1047 #ifdef __GNUC__
1048 __attribute__((format(printf, 2, 3)))
1049 #endif
1050 ;
1051
1052
message(TidyDocImpl * doc,TidyReportLevel level,ctmbstr msg,...)1053 void message( TidyDocImpl* doc, TidyReportLevel level, ctmbstr msg, ... )
1054 {
1055 va_list args;
1056 va_start( args, msg );
1057 messagePos( doc, level, 0, 0, msg, args );
1058 va_end( args );
1059 }
1060
1061
messageLexer(TidyDocImpl * doc,TidyReportLevel level,ctmbstr msg,...)1062 void messageLexer( TidyDocImpl* doc, TidyReportLevel level, ctmbstr msg, ... )
1063 {
1064 int line = ( doc->lexer ? doc->lexer->lines : 0 );
1065 int col = ( doc->lexer ? doc->lexer->columns : 0 );
1066
1067 va_list args;
1068 va_start( args, msg );
1069 messagePos( doc, level, line, col, msg, args );
1070 va_end( args );
1071 }
1072
messageNode(TidyDocImpl * doc,TidyReportLevel level,Node * node,ctmbstr msg,...)1073 void messageNode( TidyDocImpl* doc, TidyReportLevel level, Node* node,
1074 ctmbstr msg, ... )
1075 {
1076 int line = ( node ? node->line :
1077 ( doc->lexer ? doc->lexer->lines : 0 ) );
1078 int col = ( node ? node->column :
1079 ( doc->lexer ? doc->lexer->columns : 0 ) );
1080
1081 va_list args;
1082 va_start( args, msg );
1083 messagePos( doc, level, line, col, msg, args );
1084 va_end( args );
1085 }
1086
tidy_out(TidyDocImpl * doc,ctmbstr msg,...)1087 void tidy_out( TidyDocImpl* doc, ctmbstr msg, ... )
1088 {
1089 if ( !cfgBool(doc, TidyQuiet) )
1090 {
1091 ctmbstr cp;
1092 enum { sizeBuf=2048 };
1093 char *buf = TidyDocAlloc(doc,sizeBuf);
1094
1095 va_list args;
1096 va_start( args, msg );
1097 TY_(tmbvsnprintf)(buf, sizeBuf, msg, args);
1098 va_end( args );
1099
1100 for ( cp=buf; *cp; ++cp )
1101 TY_(WriteChar)( *cp, doc->errout );
1102 TidyDocFree(doc, buf);
1103 }
1104 }
1105
1106 #if 0
1107 void ShowVersion( TidyDocImpl* doc )
1108 {
1109 ctmbstr platform = "", helper = "";
1110
1111 #ifdef PLATFORM_NAME
1112 platform = PLATFORM_NAME;
1113 helper = " for ";
1114 #endif
1115
1116 tidy_out( doc, "\nHTML Tidy%s%s (release date: %s; built on %s, at %s)\n"
1117 "See http://tidy.sourceforge.net/ for details.\n",
1118 helper, platform, TY_(release_date), __DATE__, __TIME__ );
1119 }
1120 #endif
1121
TY_(FileError)1122 void TY_(FileError)( TidyDocImpl* doc, ctmbstr file, TidyReportLevel level )
1123 {
1124 message( doc, level, "Can't open \"%s\"\n", file );
1125 }
1126
TagToString(Node * tag,char * buf,size_t count)1127 static char* TagToString(Node* tag, char* buf, size_t count)
1128 {
1129 *buf = 0;
1130 if (tag)
1131 {
1132 if (TY_(nodeIsElement)(tag))
1133 TY_(tmbsnprintf)(buf, count, "<%s>", tag->element);
1134 else if (tag->type == EndTag)
1135 TY_(tmbsnprintf)(buf, count, "</%s>", tag->element);
1136 else if (tag->type == DocTypeTag)
1137 TY_(tmbsnprintf)(buf, count, "<!DOCTYPE>");
1138 else if (tag->type == TextNode)
1139 TY_(tmbsnprintf)(buf, count, "plain text");
1140 else if (tag->type == XmlDecl)
1141 TY_(tmbsnprintf)(buf, count, "XML declaration");
1142 else if (tag->element)
1143 TY_(tmbsnprintf)(buf, count, "%s", tag->element);
1144 }
1145 return buf + TY_(tmbstrlen)(buf);
1146 }
1147
1148 /* lexer is not defined when this is called */
TY_(ReportUnknownOption)1149 void TY_(ReportUnknownOption)( TidyDocImpl* doc, ctmbstr option )
1150 {
1151 assert( option != NULL );
1152 message( doc, TidyConfig, "unknown option: %s", option );
1153 }
1154
1155 /* lexer is not defined when this is called */
TY_(ReportBadArgument)1156 void TY_(ReportBadArgument)( TidyDocImpl* doc, ctmbstr option )
1157 {
1158 assert( option != NULL );
1159 message( doc, TidyConfig,
1160 "missing or malformed argument for option: %s", option );
1161 }
1162
NtoS(int n,tmbstr str)1163 static void NtoS(int n, tmbstr str)
1164 {
1165 tmbchar buf[40];
1166 int i;
1167
1168 for (i = 0;; ++i)
1169 {
1170 buf[i] = (tmbchar)( (n % 10) + '0' );
1171
1172 n = n / 10;
1173
1174 if (n == 0)
1175 break;
1176 }
1177
1178 n = i;
1179
1180 while (i >= 0)
1181 {
1182 str[n-i] = buf[i];
1183 --i;
1184 }
1185
1186 str[n+1] = '\0';
1187 }
1188
TY_(ReportEncodingWarning)1189 void TY_(ReportEncodingWarning)(TidyDocImpl* doc, uint code, uint encoding)
1190 {
1191 switch(code)
1192 {
1193 case ENCODING_MISMATCH:
1194 messageLexer(doc, TidyWarning, GetFormatFromCode(code),
1195 TY_(CharEncodingName)(doc->docIn->encoding),
1196 TY_(CharEncodingName)(encoding));
1197 doc->badChars |= BC_ENCODING_MISMATCH;
1198 break;
1199 }
1200 }
1201
TY_(ReportEncodingError)1202 void TY_(ReportEncodingError)(TidyDocImpl* doc, uint code, uint c, Bool discarded)
1203 {
1204 char buf[ 32 ] = {'\0'};
1205
1206 ctmbstr action = discarded ? "discarding" : "replacing";
1207 ctmbstr fmt = GetFormatFromCode(code);
1208
1209 /* An encoding mismatch is currently treated as a non-fatal error */
1210 switch (code)
1211 {
1212 case VENDOR_SPECIFIC_CHARS:
1213 NtoS(c, buf);
1214 doc->badChars |= BC_VENDOR_SPECIFIC_CHARS;
1215 break;
1216
1217 case INVALID_SGML_CHARS:
1218 NtoS(c, buf);
1219 doc->badChars |= BC_INVALID_SGML_CHARS;
1220 break;
1221
1222 case INVALID_UTF8:
1223 TY_(tmbsnprintf)(buf, sizeof(buf), "U+%04X", c);
1224 doc->badChars |= BC_INVALID_UTF8;
1225 break;
1226
1227 #if SUPPORT_UTF16_ENCODINGS
1228 case INVALID_UTF16:
1229 TY_(tmbsnprintf)(buf, sizeof(buf), "U+%04X", c);
1230 doc->badChars |= BC_INVALID_UTF16;
1231 break;
1232 #endif
1233
1234 case INVALID_NCR:
1235 NtoS(c, buf);
1236 doc->badChars |= BC_INVALID_NCR;
1237 break;
1238 }
1239
1240 if (fmt)
1241 messageLexer( doc, TidyWarning, fmt, action, buf );
1242 }
1243
TY_(ReportEntityError)1244 void TY_(ReportEntityError)( TidyDocImpl* doc, uint code, ctmbstr entity,
1245 int ARG_UNUSED(c) )
1246 {
1247 ctmbstr entityname = ( entity ? entity : "NULL" );
1248 ctmbstr fmt = GetFormatFromCode(code);
1249
1250 if (fmt)
1251 messageLexer( doc, TidyWarning, fmt, entityname );
1252 }
1253
TY_(ReportAttrError)1254 void TY_(ReportAttrError)(TidyDocImpl* doc, Node *node, AttVal *av, uint code)
1255 {
1256 char const *name = "NULL", *value = "NULL";
1257 char tagdesc[64];
1258 ctmbstr fmt = GetFormatFromCode(code);
1259
1260 assert( fmt != NULL );
1261
1262 TagToString(node, tagdesc, sizeof(tagdesc));
1263
1264 if (av)
1265 {
1266 if (av->attribute)
1267 name = av->attribute;
1268 if (av->value)
1269 value = av->value;
1270 }
1271
1272 switch (code)
1273 {
1274 case UNKNOWN_ATTRIBUTE:
1275 case INSERTING_ATTRIBUTE:
1276 case MISSING_ATTR_VALUE:
1277 case XML_ATTRIBUTE_VALUE:
1278 case PROPRIETARY_ATTRIBUTE:
1279 case JOINING_ATTRIBUTE:
1280 messageNode(doc, TidyWarning, node, fmt, tagdesc, name);
1281 break;
1282
1283 case BAD_ATTRIBUTE_VALUE:
1284 case BAD_ATTRIBUTE_VALUE_REPLACED:
1285 case INVALID_ATTRIBUTE:
1286 messageNode(doc, TidyWarning, node, fmt, tagdesc, name, value);
1287 break;
1288
1289 case UNEXPECTED_QUOTEMARK:
1290 case MISSING_QUOTEMARK:
1291 case ID_NAME_MISMATCH:
1292 case BACKSLASH_IN_URI:
1293 case FIXED_BACKSLASH:
1294 case ILLEGAL_URI_REFERENCE:
1295 case ESCAPED_ILLEGAL_URI:
1296 case NEWLINE_IN_URI:
1297 case WHITE_IN_URI:
1298 case UNEXPECTED_GT:
1299 case INVALID_XML_ID:
1300 case UNEXPECTED_EQUALSIGN:
1301 messageNode(doc, TidyWarning, node, fmt, tagdesc);
1302 break;
1303
1304 case XML_ID_SYNTAX:
1305 case PROPRIETARY_ATTR_VALUE:
1306 case ANCHOR_NOT_UNIQUE:
1307 case ATTR_VALUE_NOT_LCASE:
1308 messageNode(doc, TidyWarning, node, fmt, tagdesc, value);
1309 break;
1310
1311
1312 case MISSING_IMAGEMAP:
1313 messageNode(doc, TidyWarning, node, fmt, tagdesc);
1314 doc->badAccess |= BA_MISSING_IMAGE_MAP;
1315 break;
1316
1317 case REPEATED_ATTRIBUTE:
1318 messageNode(doc, TidyWarning, node, fmt, tagdesc, value, name);
1319 break;
1320
1321 case UNEXPECTED_END_OF_FILE_ATTR:
1322 /* on end of file adjust reported position to end of input */
1323 doc->lexer->lines = doc->docIn->curline;
1324 doc->lexer->columns = doc->docIn->curcol;
1325 messageLexer(doc, TidyWarning, fmt, tagdesc);
1326 break;
1327 }
1328 }
1329
TY_(ReportMissingAttr)1330 void TY_(ReportMissingAttr)( TidyDocImpl* doc, Node* node, ctmbstr name )
1331 {
1332 char tagdesc[ 64 ];
1333 ctmbstr fmt = GetFormatFromCode(MISSING_ATTRIBUTE);
1334
1335 assert( fmt != NULL );
1336 TagToString(node, tagdesc, sizeof(tagdesc));
1337 messageNode( doc, TidyWarning, node, fmt, tagdesc, name );
1338 }
1339
1340 #if SUPPORT_ACCESSIBILITY_CHECKS
1341
1342 /*********************************************************
1343 * Accessibility
1344 *
1345 * DisplayHTMLTableAlgorithm()
1346 *
1347 * If the table does contain 2 or more logical levels of
1348 * row or column headers, the HTML 4 table algorithm
1349 * to show the author how the headers are currently associated
1350 * with the cells.
1351 *********************************************************/
1352
TY_(DisplayHTMLTableAlgorithm)1353 void TY_(DisplayHTMLTableAlgorithm)( TidyDocImpl* doc )
1354 {
1355 tidy_out(doc, " \n");
1356 tidy_out(doc, " - First, search left from the cell's position to find row header cells.\n");
1357 tidy_out(doc, " - Then search upwards to find column header cells.\n");
1358 tidy_out(doc, " - The search in a given direction stops when the edge of the table is\n");
1359 tidy_out(doc, " reached or when a data cell is found after a header cell.\n");
1360 tidy_out(doc, " - Row headers are inserted into the list in the order they appear in\n");
1361 tidy_out(doc, " the table. \n");
1362 tidy_out(doc, " - For left-to-right tables, headers are inserted from left to right.\n");
1363 tidy_out(doc, " - Column headers are inserted after row headers, in \n");
1364 tidy_out(doc, " the order they appear in the table, from top to bottom. \n");
1365 tidy_out(doc, " - If a header cell has the headers attribute set, then the headers \n");
1366 tidy_out(doc, " referenced by this attribute are inserted into the list and the \n");
1367 tidy_out(doc, " search stops for the current direction.\n");
1368 tidy_out(doc, " TD cells that set the axis attribute are also treated as header cells.\n");
1369 tidy_out(doc, " \n");
1370 }
1371
TY_(ReportAccessWarning)1372 void TY_(ReportAccessWarning)( TidyDocImpl* doc, Node* node, uint code )
1373 {
1374 ctmbstr fmt = GetFormatFromCode(code);
1375 doc->badAccess |= BA_WAI;
1376 messageNode( doc, TidyAccess, node, fmt );
1377 }
1378
TY_(ReportAccessError)1379 void TY_(ReportAccessError)( TidyDocImpl* doc, Node* node, uint code )
1380 {
1381 ctmbstr fmt = GetFormatFromCode(code);
1382 doc->badAccess |= BA_WAI;
1383 messageNode( doc, TidyAccess, node, fmt );
1384 }
1385
1386 #endif /* SUPPORT_ACCESSIBILITY_CHECKS */
1387
TY_(ReportWarning)1388 void TY_(ReportWarning)(TidyDocImpl* doc, Node *element, Node *node, uint code)
1389 {
1390 Node* rpt = (element ? element : node);
1391 ctmbstr fmt = GetFormatFromCode(code);
1392 char nodedesc[256] = { 0 };
1393 char elemdesc[256] = { 0 };
1394
1395 assert( fmt != NULL );
1396
1397 TagToString(node, nodedesc, sizeof(nodedesc));
1398
1399 switch (code)
1400 {
1401 case NESTED_QUOTATION:
1402 messageNode(doc, TidyWarning, rpt, fmt);
1403 break;
1404
1405 case OBSOLETE_ELEMENT:
1406 TagToString(element, elemdesc, sizeof(elemdesc));
1407 messageNode(doc, TidyWarning, rpt, fmt, elemdesc, nodedesc);
1408 break;
1409
1410 case NESTED_EMPHASIS:
1411 messageNode(doc, TidyWarning, rpt, fmt, nodedesc);
1412 break;
1413 case COERCE_TO_ENDTAG_WARN:
1414 messageNode(doc, TidyWarning, rpt, fmt, node->element, node->element);
1415 break;
1416 }
1417 }
1418
TY_(ReportNotice)1419 void TY_(ReportNotice)(TidyDocImpl* doc, Node *element, Node *node, uint code)
1420 {
1421 Node* rpt = ( element ? element : node );
1422 ctmbstr fmt = GetFormatFromCode(code);
1423 char nodedesc[256] = { 0 };
1424 char elemdesc[256] = { 0 };
1425
1426 assert( fmt != NULL );
1427
1428 TagToString(node, nodedesc, sizeof(nodedesc));
1429
1430 switch (code)
1431 {
1432 case TRIM_EMPTY_ELEMENT:
1433 TagToString(element, elemdesc, sizeof(nodedesc));
1434 messageNode(doc, TidyWarning, element, fmt, elemdesc);
1435 break;
1436
1437 case REPLACING_ELEMENT:
1438 TagToString(element, elemdesc, sizeof(elemdesc));
1439 messageNode(doc, TidyWarning, rpt, fmt, elemdesc, nodedesc);
1440 break;
1441 }
1442 }
1443
TY_(ReportError)1444 void TY_(ReportError)(TidyDocImpl* doc, Node *element, Node *node, uint code)
1445 {
1446 char nodedesc[ 256 ] = {0};
1447 char elemdesc[ 256 ] = {0};
1448 Node* rpt = ( element ? element : node );
1449 ctmbstr fmt = GetFormatFromCode(code);
1450
1451 assert( fmt != NULL );
1452
1453 TagToString(node, nodedesc, sizeof(nodedesc));
1454
1455 switch ( code )
1456 {
1457 case MISSING_STARTTAG:
1458 case UNEXPECTED_ENDTAG:
1459 case TOO_MANY_ELEMENTS:
1460 case INSERTING_TAG:
1461 messageNode(doc, TidyWarning, node, fmt, node->element);
1462 break;
1463
1464 case USING_BR_INPLACE_OF:
1465 case CANT_BE_NESTED:
1466 case PROPRIETARY_ELEMENT:
1467 case UNESCAPED_ELEMENT:
1468 case NOFRAMES_CONTENT:
1469 messageNode(doc, TidyWarning, node, fmt, nodedesc);
1470 break;
1471
1472 case MISSING_TITLE_ELEMENT:
1473 case INCONSISTENT_VERSION:
1474 case MALFORMED_DOCTYPE:
1475 case CONTENT_AFTER_BODY:
1476 case MALFORMED_COMMENT:
1477 case BAD_COMMENT_CHARS:
1478 case BAD_XML_COMMENT:
1479 case BAD_CDATA_CONTENT:
1480 case INCONSISTENT_NAMESPACE:
1481 case DOCTYPE_AFTER_TAGS:
1482 case DTYPE_NOT_UPPER_CASE:
1483 messageNode(doc, TidyWarning, rpt, fmt);
1484 break;
1485
1486 case COERCE_TO_ENDTAG:
1487 case NON_MATCHING_ENDTAG:
1488 messageNode(doc, TidyWarning, rpt, fmt, node->element, node->element);
1489 break;
1490
1491 case UNEXPECTED_ENDTAG_IN:
1492 case TOO_MANY_ELEMENTS_IN:
1493 messageNode(doc, TidyWarning, node, fmt, node->element, element->element);
1494 if (cfgBool( doc, TidyShowWarnings ))
1495 messageNode(doc, TidyInfo, node, GetFormatFromCode(PREVIOUS_LOCATION),
1496 element->element);
1497 break;
1498
1499 case ENCODING_IO_CONFLICT:
1500 case MISSING_DOCTYPE:
1501 case SPACE_PRECEDING_XMLDECL:
1502 messageNode(doc, TidyWarning, node, fmt);
1503 break;
1504
1505 case TRIM_EMPTY_ELEMENT:
1506 case ILLEGAL_NESTING:
1507 case UNEXPECTED_END_OF_FILE:
1508 case ELEMENT_NOT_EMPTY:
1509 TagToString(element, elemdesc, sizeof(elemdesc));
1510 messageNode(doc, TidyWarning, element, fmt, elemdesc);
1511 break;
1512
1513
1514 case MISSING_ENDTAG_FOR:
1515 messageNode(doc, TidyWarning, rpt, fmt, element->element);
1516 break;
1517
1518 case MISSING_ENDTAG_BEFORE:
1519 messageNode(doc, TidyWarning, rpt, fmt, element->element, nodedesc);
1520 break;
1521
1522 case DISCARDING_UNEXPECTED:
1523 /* Force error if in a bad form */
1524 messageNode(doc, doc->badForm ? TidyError : TidyWarning, node, fmt, nodedesc);
1525 break;
1526
1527 case TAG_NOT_ALLOWED_IN:
1528 messageNode(doc, TidyWarning, node, fmt, nodedesc, element->element);
1529 if (cfgBool( doc, TidyShowWarnings ))
1530 messageNode(doc, TidyInfo, element,
1531 GetFormatFromCode(PREVIOUS_LOCATION), element->element);
1532 break;
1533
1534 case REPLACING_UNEX_ELEMENT:
1535 TagToString(element, elemdesc, sizeof(elemdesc));
1536 messageNode(doc, TidyWarning, rpt, fmt, elemdesc, nodedesc);
1537 break;
1538 }
1539 }
1540
TY_(ReportFatal)1541 void TY_(ReportFatal)( TidyDocImpl* doc, Node *element, Node *node, uint code)
1542 {
1543 char nodedesc[ 256 ] = {0};
1544 Node* rpt = ( element ? element : node );
1545 ctmbstr fmt = GetFormatFromCode(code);
1546
1547 switch ( code )
1548 {
1549 case SUSPECTED_MISSING_QUOTE:
1550 case DUPLICATE_FRAMESET:
1551 messageNode(doc, TidyError, rpt, fmt);
1552 break;
1553
1554 case UNKNOWN_ELEMENT:
1555 TagToString(node, nodedesc, sizeof(nodedesc));
1556 messageNode( doc, TidyError, node, fmt, nodedesc );
1557 break;
1558
1559 case UNEXPECTED_ENDTAG_IN:
1560 messageNode(doc, TidyError, node, fmt, node->element, element->element);
1561 break;
1562
1563 case UNEXPECTED_ENDTAG: /* generated by XML docs */
1564 messageNode(doc, TidyError, node, fmt, node->element);
1565 break;
1566 }
1567 }
1568
TY_(ErrorSummary)1569 void TY_(ErrorSummary)( TidyDocImpl* doc )
1570 {
1571 ctmbstr encnam = "specified";
1572 int charenc = cfg( doc, TidyCharEncoding );
1573 if ( charenc == WIN1252 )
1574 encnam = "Windows-1252";
1575 else if ( charenc == MACROMAN )
1576 encnam = "MacRoman";
1577 else if ( charenc == IBM858 )
1578 encnam = "ibm858";
1579 else if ( charenc == LATIN0 )
1580 encnam = "latin0";
1581
1582 /* adjust badAccess to that it is 0 if frames are ok */
1583 if ( doc->badAccess & (BA_USING_FRAMES | BA_USING_NOFRAMES) )
1584 {
1585 if (!((doc->badAccess & BA_USING_FRAMES) && !(doc->badAccess & BA_USING_NOFRAMES)))
1586 doc->badAccess &= ~(BA_USING_FRAMES | BA_USING_NOFRAMES);
1587 }
1588
1589 if (doc->badChars)
1590 {
1591 #if 0
1592 if ( doc->badChars & WINDOWS_CHARS )
1593 {
1594 tidy_out(doc, "Characters codes for the Microsoft Windows fonts in the range\n");
1595 tidy_out(doc, "128 - 159 may not be recognized on other platforms. You are\n");
1596 tidy_out(doc, "instead recommended to use named entities, e.g. ™ rather\n");
1597 tidy_out(doc, "than Windows character code 153 (0x2122 in Unicode). Note that\n");
1598 tidy_out(doc, "as of February 1998 few browsers support the new entities.\n\n");
1599 }
1600 #endif
1601 if (doc->badChars & BC_VENDOR_SPECIFIC_CHARS)
1602 {
1603
1604 tidy_out(doc, "It is unlikely that vendor-specific, system-dependent encodings\n");
1605 tidy_out(doc, "work widely enough on the World Wide Web; you should avoid using the \n");
1606 tidy_out(doc, "%s", encnam );
1607 tidy_out(doc, " character encoding, instead you are recommended to\n" );
1608 tidy_out(doc, "use named entities, e.g. ™.\n\n");
1609 }
1610 if ((doc->badChars & BC_INVALID_SGML_CHARS) || (doc->badChars & BC_INVALID_NCR))
1611 {
1612 tidy_out(doc, "Character codes 128 to 159 (U+0080 to U+009F) are not allowed in HTML;\n");
1613 tidy_out(doc, "even if they were, they would likely be unprintable control characters.\n");
1614 tidy_out(doc, "Tidy assumed you wanted to refer to a character with the same byte value in the \n");
1615 tidy_out(doc, "%s", encnam );
1616 tidy_out(doc, " encoding and replaced that reference with the Unicode equivalent.\n\n" );
1617 }
1618 if (doc->badChars & BC_INVALID_UTF8)
1619 {
1620 tidy_out(doc, "Character codes for UTF-8 must be in the range: U+0000 to U+10FFFF.\n");
1621 tidy_out(doc, "The definition of UTF-8 in Annex D of ISO/IEC 10646-1:2000 also\n");
1622 tidy_out(doc, "allows for the use of five- and six-byte sequences to encode\n");
1623 tidy_out(doc, "characters that are outside the range of the Unicode character set;\n");
1624 tidy_out(doc, "those five- and six-byte sequences are illegal for the use of\n");
1625 tidy_out(doc, "UTF-8 as a transformation of Unicode characters. ISO/IEC 10646\n");
1626 tidy_out(doc, "does not allow mapping of unpaired surrogates, nor U+FFFE and U+FFFF\n");
1627 tidy_out(doc, "(but it does allow other noncharacters). For more information please refer to\n");
1628 tidy_out(doc, "http://www.unicode.org/unicode and http://www.cl.cam.ac.uk/~mgk25/unicode.html\n\n");
1629 }
1630
1631 #if SUPPORT_UTF16_ENCODINGS
1632
1633 if (doc->badChars & BC_INVALID_UTF16)
1634 {
1635 tidy_out(doc, "Character codes for UTF-16 must be in the range: U+0000 to U+10FFFF.\n");
1636 tidy_out(doc, "The definition of UTF-16 in Annex C of ISO/IEC 10646-1:2000 does not allow the\n");
1637 tidy_out(doc, "mapping of unpaired surrogates. For more information please refer to\n");
1638 tidy_out(doc, "http://www.unicode.org/unicode and http://www.cl.cam.ac.uk/~mgk25/unicode.html\n\n");
1639 }
1640
1641 #endif
1642
1643 if (doc->badChars & BC_INVALID_URI)
1644 {
1645 tidy_out(doc, "URIs must be properly escaped, they must not contain unescaped\n");
1646 tidy_out(doc, "characters below U+0021 including the space character and not\n");
1647 tidy_out(doc, "above U+007E. Tidy escapes the URI for you as recommended by\n");
1648 tidy_out(doc, "HTML 4.01 section B.2.1 and XML 1.0 section 4.2.2. Some user agents\n");
1649 tidy_out(doc, "use another algorithm to escape such URIs and some server-sided\n");
1650 tidy_out(doc, "scripts depend on that. If you want to depend on that, you must\n");
1651 tidy_out(doc, "escape the URI by your own. For more information please refer to\n");
1652 tidy_out(doc, "http://www.w3.org/International/O-URL-and-ident.html\n\n");
1653 }
1654 }
1655
1656 if (doc->badForm)
1657 {
1658 tidy_out(doc, "You may need to move one or both of the <form> and </form>\n");
1659 tidy_out(doc, "tags. HTML elements should be properly nested and form elements\n");
1660 tidy_out(doc, "are no exception. For instance you should not place the <form>\n");
1661 tidy_out(doc, "in one table cell and the </form> in another. If the <form> is\n");
1662 tidy_out(doc, "placed before a table, the </form> cannot be placed inside the\n");
1663 tidy_out(doc, "table! Note that one form can't be nested inside another!\n\n");
1664 }
1665
1666 if (doc->badAccess)
1667 {
1668 /* Tidy "classic" accessibility tests */
1669 if ( cfg(doc, TidyAccessibilityCheckLevel) == 0 )
1670 {
1671 if (doc->badAccess & BA_MISSING_SUMMARY)
1672 {
1673 tidy_out(doc, "The table summary attribute should be used to describe\n");
1674 tidy_out(doc, "the table structure. It is very helpful for people using\n");
1675 tidy_out(doc, "non-visual browsers. The scope and headers attributes for\n");
1676 tidy_out(doc, "table cells are useful for specifying which headers apply\n");
1677 tidy_out(doc, "to each table cell, enabling non-visual browsers to provide\n");
1678 tidy_out(doc, "a meaningful context for each cell.\n\n");
1679 }
1680
1681 if (doc->badAccess & BA_MISSING_IMAGE_ALT)
1682 {
1683 tidy_out(doc, "The alt attribute should be used to give a short description\n");
1684 tidy_out(doc, "of an image; longer descriptions should be given with the\n");
1685 tidy_out(doc, "longdesc attribute which takes a URL linked to the description.\n");
1686 tidy_out(doc, "These measures are needed for people using non-graphical browsers.\n\n");
1687 }
1688
1689 if (doc->badAccess & BA_MISSING_IMAGE_MAP)
1690 {
1691 tidy_out(doc, "Use client-side image maps in preference to server-side image\n");
1692 tidy_out(doc, "maps as the latter are inaccessible to people using non-\n");
1693 tidy_out(doc, "graphical browsers. In addition, client-side maps are easier\n");
1694 tidy_out(doc, "to set up and provide immediate feedback to users.\n\n");
1695 }
1696
1697 if (doc->badAccess & BA_MISSING_LINK_ALT)
1698 {
1699 tidy_out(doc, "For hypertext links defined using a client-side image map, you\n");
1700 tidy_out(doc, "need to use the alt attribute to provide a textual description\n");
1701 tidy_out(doc, "of the link for people using non-graphical browsers.\n\n");
1702 }
1703
1704 if ((doc->badAccess & BA_USING_FRAMES) && !(doc->badAccess & BA_USING_NOFRAMES))
1705 {
1706 tidy_out(doc, "Pages designed using frames presents problems for\n");
1707 tidy_out(doc, "people who are either blind or using a browser that\n");
1708 tidy_out(doc, "doesn't support frames. A frames-based page should always\n");
1709 tidy_out(doc, "include an alternative layout inside a NOFRAMES element.\n\n");
1710 }
1711
1712 }
1713
1714 tidy_out(doc, "For further advice on how to make your pages accessible\n");
1715 tidy_out(doc, "see %s", ACCESS_URL );
1716 if ( cfg(doc, TidyAccessibilityCheckLevel) > 0 )
1717 tidy_out(doc, " and %s", ATRC_ACCESS_URL );
1718 tidy_out(doc, ".\n" );
1719 tidy_out(doc, ". You may also want to try\n" );
1720 tidy_out(doc, "\"http://www.cast.org/bobby/\" which is a free Web-based\n");
1721 tidy_out(doc, "service for checking URLs for accessibility.\n\n");
1722 }
1723
1724 if (doc->badLayout)
1725 {
1726 if (doc->badLayout & USING_LAYER)
1727 {
1728 tidy_out(doc, "The Cascading Style Sheets (CSS) Positioning mechanism\n");
1729 tidy_out(doc, "is recommended in preference to the proprietary <LAYER>\n");
1730 tidy_out(doc, "element due to limited vendor support for LAYER.\n\n");
1731 }
1732
1733 if (doc->badLayout & USING_SPACER)
1734 {
1735 tidy_out(doc, "You are recommended to use CSS for controlling white\n");
1736 tidy_out(doc, "space (e.g. for indentation, margins and line spacing).\n");
1737 tidy_out(doc, "The proprietary <SPACER> element has limited vendor support.\n\n");
1738 }
1739
1740 if (doc->badLayout & USING_FONT)
1741 {
1742 tidy_out(doc, "You are recommended to use CSS to specify the font and\n");
1743 tidy_out(doc, "properties such as its size and color. This will reduce\n");
1744 tidy_out(doc, "the size of HTML files and make them easier to maintain\n");
1745 tidy_out(doc, "compared with using <FONT> elements.\n\n");
1746 }
1747
1748 if (doc->badLayout & USING_NOBR)
1749 {
1750 tidy_out(doc, "You are recommended to use CSS to control line wrapping.\n");
1751 tidy_out(doc, "Use \"white-space: nowrap\" to inhibit wrapping in place\n");
1752 tidy_out(doc, "of inserting <NOBR>...</NOBR> into the markup.\n\n");
1753 }
1754
1755 if (doc->badLayout & USING_BODY)
1756 {
1757 tidy_out(doc, "You are recommended to use CSS to specify page and link colors\n");
1758 }
1759 }
1760 }
1761
1762 #if 0
1763 void TY_(UnknownOption)( TidyDocImpl* doc, char c )
1764 {
1765 message( doc, TidyConfig,
1766 "unrecognized option -%c use -help to list options\n", c );
1767 }
1768
1769 void TY_(UnknownFile)( TidyDocImpl* doc, ctmbstr program, ctmbstr file )
1770 {
1771 message( doc, TidyConfig,
1772 "%s: can't open file \"%s\"\n", program, file );
1773 }
1774 #endif
1775
TY_(NeedsAuthorIntervention)1776 void TY_(NeedsAuthorIntervention)( TidyDocImpl* doc )
1777 {
1778 tidy_out(doc, "This document has errors that must be fixed before\n");
1779 tidy_out(doc, "using HTML Tidy to generate a tidied up version.\n\n");
1780 }
1781
TY_(GeneralInfo)1782 void TY_(GeneralInfo)( TidyDocImpl* doc )
1783 {
1784 tidy_out(doc, "To learn more about HTML Tidy see http://tidy.sourceforge.net\n");
1785 tidy_out(doc, "Please fill bug reports and queries using the \"tracker\" on the Tidy web site.\n");
1786 tidy_out(doc, "Additionally, questions can be sent to html-tidy@w3.org\n");
1787 tidy_out(doc, "HTML and CSS specifications are available from http://www.w3.org/\n");
1788 tidy_out(doc, "Lobby your company to join W3C, see http://www.w3.org/Consortium\n");
1789 }
1790
1791 #if SUPPORT_ACCESSIBILITY_CHECKS
1792
TY_(AccessibilityHelloMessage)1793 void TY_(AccessibilityHelloMessage)( TidyDocImpl* doc )
1794 {
1795 tidy_out( doc, "\n" );
1796 tidy_out( doc, "Accessibility Checks: Version 0.1\n" );
1797 tidy_out( doc, "\n" );
1798 }
1799
1800 #endif /* SUPPORT_ACCESSIBILITY_CHECKS */
1801
1802 #if 0
1803 void TY_(HelloMessage)( TidyDocImpl* doc, ctmbstr date, ctmbstr filename )
1804 {
1805 tmbchar buf[ 2048 ];
1806 ctmbstr platform = "", helper = "";
1807 ctmbstr msgfmt = "\nHTML Tidy for %s (vers %s; built on %s, at %s)\n"
1808 "Parsing \"%s\"\n";
1809
1810 #ifdef PLATFORM_NAME
1811 platform = PLATFORM_NAME;
1812 helper = " for ";
1813 #endif
1814
1815 if ( TY_(tmbstrcmp)(filename, "stdin") == 0 )
1816 {
1817 /* Filename will be ignored at end of varargs */
1818 msgfmt = "\nHTML Tidy for %s (vers %s; built on %s, at %s)\n"
1819 "Parsing console input (stdin)\n";
1820 }
1821
1822 TY_(tmbsnprintf)(buf, sizeof(buf), msgfmt, helper, platform,
1823 date, __DATE__, __TIME__, filename);
1824 tidy_out( doc, buf );
1825 }
1826 #endif
1827
TY_(ReportMarkupVersion)1828 void TY_(ReportMarkupVersion)( TidyDocImpl* doc )
1829 {
1830 if (doc->givenDoctype)
1831 {
1832 /* todo: deal with non-ASCII characters in FPI */
1833 message(doc, TidyInfo, "Doctype given is \"%s\"", doc->givenDoctype);
1834 }
1835
1836 if ( ! cfgBool(doc, TidyXmlTags) )
1837 {
1838 Bool isXhtml = doc->lexer->isvoyager;
1839 uint apparentVers;
1840 ctmbstr vers;
1841
1842 apparentVers = TY_(ApparentVersion)( doc );
1843
1844 vers = TY_(HTMLVersionNameFromCode)( apparentVers, isXhtml );
1845
1846 if (!vers)
1847 vers = "HTML Proprietary";
1848
1849 message( doc, TidyInfo, "Document content looks like %s", vers );
1850
1851 /* Warn about missing sytem identifier (SI) in emitted doctype */
1852 if ( TY_(WarnMissingSIInEmittedDocType)( doc ) )
1853 message( doc, TidyInfo, "No system identifier in emitted doctype" );
1854 }
1855 }
1856
TY_(ReportNumWarnings)1857 void TY_(ReportNumWarnings)( TidyDocImpl* doc )
1858 {
1859 if ( doc->warnings > 0 || doc->errors > 0 )
1860 {
1861 tidy_out( doc, "%u %s, %u %s were found!",
1862 doc->warnings, doc->warnings == 1 ? "warning" : "warnings",
1863 doc->errors, doc->errors == 1 ? "error" : "errors" );
1864
1865 if ( doc->errors > cfg(doc, TidyShowErrors) ||
1866 !cfgBool(doc, TidyShowWarnings) )
1867 tidy_out( doc, " Not all warnings/errors were shown.\n\n" );
1868 else
1869 tidy_out( doc, "\n\n" );
1870 }
1871 else
1872 tidy_out( doc, "No warnings or errors were found.\n\n" );
1873 }
1874
1875 /*
1876 * local variables:
1877 * mode: c
1878 * indent-tabs-mode: nil
1879 * c-basic-offset: 4
1880 * eval: (c-set-offset 'substatement-open 0)
1881 * end:
1882 */
1883