1 /*
2 config.c -- read config file and manage config properties
3
4 (c) 1998-2008 (W3C) MIT, ERCIM, Keio University
5 See tidy.h for the copyright notice.
6
7 CVS Info :
8
9 $Author: arnaud02 $
10 $Date: 2008/06/18 20:18:54 $
11 $Revision: 1.111 $
12
13 */
14
15 /*
16 config files associate a property name with a value.
17
18 // comments can start at the beginning of a line
19 # comments can start at the beginning of a line
20 name: short values fit onto one line
21 name: a really long value that
22 continues on the next line
23
24 property names are case insensitive and should be less than
25 60 characters in length and must start at the begining of
26 the line, as whitespace at the start of a line signifies a
27 line continuation.
28 */
29
30 #include "config.h"
31 #include "tidy-int.h"
32 #include "message.h"
33 #include "tmbstr.h"
34 #include "tags.h"
35
36 #ifdef WINDOWS_OS
37 #include <io.h>
38 #else
39 #ifdef DMALLOC
40 /*
41 macro for valloc() in dmalloc.h may conflict with declaration for valloc() in unistd.h -
42 we don't need (debugging for) valloc() here. dmalloc.h should come last but it doesn't.
43 */
44 #ifdef valloc
45 #undef valloc
46 #endif
47 #endif
48 #include <unistd.h>
49 #endif
50
51 #ifdef TIDY_WIN32_MLANG_SUPPORT
52 #include "win32tc.h"
53 #endif
54
TY_(InitConfig)55 void TY_(InitConfig)( TidyDocImpl* doc )
56 {
57 TidyClearMemory( &doc->config, sizeof(TidyConfigImpl) );
58 TY_(ResetConfigToDefault)( doc );
59 }
60
TY_(FreeConfig)61 void TY_(FreeConfig)( TidyDocImpl* doc )
62 {
63 TY_(ResetConfigToDefault)( doc );
64 TY_(TakeConfigSnapshot)( doc );
65 }
66
67
68 /* Arrange so index can be cast to enum
69 */
70 static const ctmbstr boolPicks[] =
71 {
72 "no",
73 "yes",
74 NULL
75 };
76
77 static const ctmbstr autoBoolPicks[] =
78 {
79 "no",
80 "yes",
81 "auto",
82 NULL
83 };
84
85 static const ctmbstr repeatAttrPicks[] =
86 {
87 "keep-first",
88 "keep-last",
89 NULL
90 };
91
92 static const ctmbstr accessPicks[] =
93 {
94 "0 (Tidy Classic)",
95 "1 (Priority 1 Checks)",
96 "2 (Priority 2 Checks)",
97 "3 (Priority 3 Checks)",
98 NULL
99 };
100
101 static const ctmbstr charEncPicks[] =
102 {
103 "raw",
104 "ascii",
105 "latin0",
106 "latin1",
107 "utf8",
108 #ifndef NO_NATIVE_ISO2022_SUPPORT
109 "iso2022",
110 #endif
111 "mac",
112 "win1252",
113 "ibm858",
114
115 #if SUPPORT_UTF16_ENCODINGS
116 "utf16le",
117 "utf16be",
118 "utf16",
119 #endif
120
121 #if SUPPORT_ASIAN_ENCODINGS
122 "big5",
123 "shiftjis",
124 #endif
125
126 NULL
127 };
128
129 static const ctmbstr newlinePicks[] =
130 {
131 "LF",
132 "CRLF",
133 "CR",
134 NULL
135 };
136
137 static const ctmbstr doctypePicks[] =
138 {
139 "omit",
140 "auto",
141 "strict",
142 "transitional",
143 "user",
144 NULL
145 };
146
147 static const ctmbstr sorterPicks[] =
148 {
149 "none",
150 "alpha",
151 NULL
152 };
153
154 #define MU TidyMarkup
155 #define DG TidyDiagnostics
156 #define PP TidyPrettyPrint
157 #define CE TidyEncoding
158 #define MS TidyMiscellaneous
159
160 #define IN TidyInteger
161 #define BL TidyBoolean
162 #define ST TidyString
163
164 #define XX (TidyConfigCategory)-1
165 #define XY (TidyOptionType)-1
166
167 #define DLF DEFAULT_NL_CONFIG
168
169 /* If Accessibility checks not supported, make config setting read-only */
170 #if SUPPORT_ACCESSIBILITY_CHECKS
171 #define ParseAcc ParseInt
172 #else
173 #define ParseAcc NULL
174 #endif
175
176 static void AdjustConfig( TidyDocImpl* doc );
177
178 /* parser for integer values */
179 static ParseProperty ParseInt;
180
181 /* parser for 't'/'f', 'true'/'false', 'y'/'n', 'yes'/'no' or '1'/'0' */
182 static ParseProperty ParseBool;
183
184 /* parser for 't'/'f', 'true'/'false', 'y'/'n', 'yes'/'no', '1'/'0'
185 or 'auto' */
186 static ParseProperty ParseAutoBool;
187
188 /* a string excluding whitespace */
189 static ParseProperty ParseName;
190
191 /* a CSS1 selector - CSS class naming for -clean option */
192 static ParseProperty ParseCSS1Selector;
193
194 /* a string including whitespace */
195 static ParseProperty ParseString;
196
197 /* a space or comma separated list of tag names */
198 static ParseProperty ParseTagNames;
199
200 /* alpha */
201 static ParseProperty ParseSorter;
202
203 /* RAW, ASCII, LATIN0, LATIN1, UTF8, ISO2022, MACROMAN,
204 WIN1252, IBM858, UTF16LE, UTF16BE, UTF16, BIG5, SHIFTJIS
205 */
206 static ParseProperty ParseCharEnc;
207 static ParseProperty ParseNewline;
208
209 /* omit | auto | strict | loose | <fpi> */
210 static ParseProperty ParseDocType;
211
212 /* keep-first or keep-last? */
213 static ParseProperty ParseRepeatAttr;
214
215
216 static const TidyOptionImpl option_defs[] =
217 {
218 { TidyUnknownOption, MS, "unknown!", IN, 0, NULL, NULL },
219 { TidyIndentSpaces, PP, "indent-spaces", IN, 2, ParseInt, NULL },
220 { TidyWrapLen, PP, "wrap", IN, 68, ParseInt, NULL },
221 { TidyTabSize, PP, "tab-size", IN, 8, ParseInt, NULL },
222 { TidyCharEncoding, CE, "char-encoding", IN, ASCII, ParseCharEnc, charEncPicks },
223 { TidyInCharEncoding, CE, "input-encoding", IN, LATIN1, ParseCharEnc, charEncPicks },
224 { TidyOutCharEncoding, CE, "output-encoding", IN, ASCII, ParseCharEnc, charEncPicks },
225 { TidyNewline, CE, "newline", IN, DLF, ParseNewline, newlinePicks },
226 { TidyDoctypeMode, MU, "doctype-mode", IN, TidyDoctypeAuto, NULL, doctypePicks },
227 { TidyDoctype, MU, "doctype", ST, 0, ParseDocType, doctypePicks },
228 { TidyDuplicateAttrs, MU, "repeated-attributes", IN, TidyKeepLast, ParseRepeatAttr, repeatAttrPicks },
229 { TidyAltText, MU, "alt-text", ST, 0, ParseString, NULL },
230
231 /* obsolete */
232 { TidySlideStyle, MS, "slide-style", ST, 0, ParseName, NULL },
233
234 { TidyErrFile, MS, "error-file", ST, 0, ParseString, NULL },
235 { TidyOutFile, MS, "output-file", ST, 0, ParseString, NULL },
236 { TidyWriteBack, MS, "write-back", BL, no, ParseBool, boolPicks },
237 { TidyShowMarkup, PP, "markup", BL, yes, ParseBool, boolPicks },
238 { TidyShowWarnings, DG, "show-warnings", BL, yes, ParseBool, boolPicks },
239 { TidyQuiet, MS, "quiet", BL, no, ParseBool, boolPicks },
240 { TidyIndentContent, PP, "indent", IN, TidyNoState, ParseAutoBool, autoBoolPicks },
241 { TidyHideEndTags, MU, "hide-endtags", BL, no, ParseBool, boolPicks },
242 { TidyXmlTags, MU, "input-xml", BL, no, ParseBool, boolPicks },
243 { TidyXmlOut, MU, "output-xml", BL, no, ParseBool, boolPicks },
244 { TidyXhtmlOut, MU, "output-xhtml", BL, no, ParseBool, boolPicks },
245 { TidyHtmlOut, MU, "output-html", BL, no, ParseBool, boolPicks },
246 { TidyXmlDecl, MU, "add-xml-decl", BL, no, ParseBool, boolPicks },
247 { TidyUpperCaseTags, MU, "uppercase-tags", BL, no, ParseBool, boolPicks },
248 { TidyUpperCaseAttrs, MU, "uppercase-attributes", BL, no, ParseBool, boolPicks },
249 { TidyMakeBare, MU, "bare", BL, no, ParseBool, boolPicks },
250 { TidyMakeClean, MU, "clean", BL, no, ParseBool, boolPicks },
251 { TidyLogicalEmphasis, MU, "logical-emphasis", BL, no, ParseBool, boolPicks },
252 { TidyDropPropAttrs, MU, "drop-proprietary-attributes", BL, no, ParseBool, boolPicks },
253 { TidyDropFontTags, MU, "drop-font-tags", BL, no, ParseBool, boolPicks },
254 { TidyDropEmptyParas, MU, "drop-empty-paras", BL, yes, ParseBool, boolPicks },
255 { TidyFixComments, MU, "fix-bad-comments", BL, yes, ParseBool, boolPicks },
256 { TidyBreakBeforeBR, PP, "break-before-br", BL, no, ParseBool, boolPicks },
257
258 /* obsolete */
259 { TidyBurstSlides, PP, "split", BL, no, ParseBool, boolPicks },
260
261 { TidyNumEntities, MU, "numeric-entities", BL, no, ParseBool, boolPicks },
262 { TidyQuoteMarks, MU, "quote-marks", BL, no, ParseBool, boolPicks },
263 { TidyQuoteNbsp, MU, "quote-nbsp", BL, yes, ParseBool, boolPicks },
264 { TidyQuoteAmpersand, MU, "quote-ampersand", BL, yes, ParseBool, boolPicks },
265 { TidyWrapAttVals, PP, "wrap-attributes", BL, no, ParseBool, boolPicks },
266 { TidyWrapScriptlets, PP, "wrap-script-literals", BL, no, ParseBool, boolPicks },
267 { TidyWrapSection, PP, "wrap-sections", BL, yes, ParseBool, boolPicks },
268 { TidyWrapAsp, PP, "wrap-asp", BL, yes, ParseBool, boolPicks },
269 { TidyWrapJste, PP, "wrap-jste", BL, yes, ParseBool, boolPicks },
270 { TidyWrapPhp, PP, "wrap-php", BL, yes, ParseBool, boolPicks },
271 { TidyFixBackslash, MU, "fix-backslash", BL, yes, ParseBool, boolPicks },
272 { TidyIndentAttributes, PP, "indent-attributes", BL, no, ParseBool, boolPicks },
273 { TidyXmlPIs, MU, "assume-xml-procins", BL, no, ParseBool, boolPicks },
274 { TidyXmlSpace, MU, "add-xml-space", BL, no, ParseBool, boolPicks },
275 { TidyEncloseBodyText, MU, "enclose-text", BL, no, ParseBool, boolPicks },
276 { TidyEncloseBlockText, MU, "enclose-block-text", BL, no, ParseBool, boolPicks },
277 { TidyKeepFileTimes, MS, "keep-time", BL, no, ParseBool, boolPicks },
278 { TidyWord2000, MU, "word-2000", BL, no, ParseBool, boolPicks },
279 { TidyMark, MS, "tidy-mark", BL, yes, ParseBool, boolPicks },
280 { TidyEmacs, MS, "gnu-emacs", BL, no, ParseBool, boolPicks },
281 { TidyEmacsFile, MS, "gnu-emacs-file", ST, 0, ParseString, NULL },
282 { TidyLiteralAttribs, MU, "literal-attributes", BL, no, ParseBool, boolPicks },
283 { TidyBodyOnly, MU, "show-body-only", IN, no, ParseAutoBool, autoBoolPicks },
284 { TidyFixUri, MU, "fix-uri", BL, yes, ParseBool, boolPicks },
285 { TidyLowerLiterals, MU, "lower-literals", BL, yes, ParseBool, boolPicks },
286 { TidyHideComments, MU, "hide-comments", BL, no, ParseBool, boolPicks },
287 { TidyIndentCdata, MU, "indent-cdata", BL, no, ParseBool, boolPicks },
288 { TidyForceOutput, MS, "force-output", BL, no, ParseBool, boolPicks },
289 { TidyShowErrors, DG, "show-errors", IN, 6, ParseInt, NULL },
290 { TidyAsciiChars, CE, "ascii-chars", BL, no, ParseBool, boolPicks },
291 { TidyJoinClasses, MU, "join-classes", BL, no, ParseBool, boolPicks },
292 { TidyJoinStyles, MU, "join-styles", BL, yes, ParseBool, boolPicks },
293 { TidyEscapeCdata, MU, "escape-cdata", BL, no, ParseBool, boolPicks },
294 #if SUPPORT_ASIAN_ENCODINGS
295 { TidyLanguage, CE, "language", ST, 0, ParseName, NULL },
296 { TidyNCR, MU, "ncr", BL, yes, ParseBool, boolPicks },
297 #endif
298 #if SUPPORT_UTF16_ENCODINGS
299 { TidyOutputBOM, CE, "output-bom", IN, TidyAutoState, ParseAutoBool, autoBoolPicks },
300 #endif
301 { TidyReplaceColor, MU, "replace-color", BL, no, ParseBool, boolPicks },
302 { TidyCSSPrefix, MU, "css-prefix", ST, 0, ParseCSS1Selector, NULL },
303 { TidyInlineTags, MU, "new-inline-tags", ST, 0, ParseTagNames, NULL },
304 { TidyBlockTags, MU, "new-blocklevel-tags", ST, 0, ParseTagNames, NULL },
305 { TidyEmptyTags, MU, "new-empty-tags", ST, 0, ParseTagNames, NULL },
306 { TidyPreTags, MU, "new-pre-tags", ST, 0, ParseTagNames, NULL },
307 { TidyAccessibilityCheckLevel, DG, "accessibility-check", IN, 0, ParseAcc, accessPicks },
308 { TidyVertSpace, PP, "vertical-space", BL, no, ParseBool, boolPicks },
309 #if SUPPORT_ASIAN_ENCODINGS
310 { TidyPunctWrap, PP, "punctuation-wrap", BL, no, ParseBool, boolPicks },
311 #endif
312 { TidyMergeDivs, MU, "merge-divs", IN, TidyAutoState, ParseAutoBool, autoBoolPicks },
313 { TidyDecorateInferredUL, MU, "decorate-inferred-ul", BL, no, ParseBool, boolPicks },
314 { TidyPreserveEntities, MU, "preserve-entities", BL, no, ParseBool, boolPicks },
315 { TidySortAttributes, PP, "sort-attributes", IN, TidySortAttrNone,ParseSorter, sorterPicks },
316 { TidyMergeSpans, MU, "merge-spans", IN, TidyAutoState, ParseAutoBool, autoBoolPicks },
317 { TidyAnchorAsName, MU, "anchor-as-name", BL, yes, ParseBool, boolPicks },
318 { N_TIDY_OPTIONS, XX, NULL, XY, 0, NULL, NULL }
319 };
320
321 /* Should only be called by options set by name
322 ** thus, it is cheaper to do a few scans than set
323 ** up every option in a hash table.
324 */
TY_(lookupOption)325 const TidyOptionImpl* TY_(lookupOption)( ctmbstr s )
326 {
327 const TidyOptionImpl* np = option_defs;
328 for ( /**/; np < option_defs + N_TIDY_OPTIONS; ++np )
329 {
330 if ( TY_(tmbstrcasecmp)(s, np->name) == 0 )
331 return np;
332 }
333 return NULL;
334 }
335
TY_(getOption)336 const TidyOptionImpl* TY_(getOption)( TidyOptionId optId )
337 {
338 if ( optId < N_TIDY_OPTIONS )
339 return option_defs + optId;
340 return NULL;
341 }
342
343
FreeOptionValue(TidyDocImpl * doc,const TidyOptionImpl * option,TidyOptionValue * value)344 static void FreeOptionValue( TidyDocImpl* doc, const TidyOptionImpl* option, TidyOptionValue* value )
345 {
346 if ( option->type == TidyString && value->p && value->p != option->pdflt )
347 TidyDocFree( doc, value->p );
348 }
349
CopyOptionValue(TidyDocImpl * doc,const TidyOptionImpl * option,TidyOptionValue * oldval,const TidyOptionValue * newval)350 static void CopyOptionValue( TidyDocImpl* doc, const TidyOptionImpl* option,
351 TidyOptionValue* oldval, const TidyOptionValue* newval )
352 {
353 assert( oldval != NULL );
354 FreeOptionValue( doc, option, oldval );
355
356 if ( option->type == TidyString )
357 {
358 if ( newval->p && newval->p != option->pdflt )
359 oldval->p = TY_(tmbstrdup)( doc->allocator, newval->p );
360 else
361 oldval->p = newval->p;
362 }
363 else
364 oldval->v = newval->v;
365 }
366
367
SetOptionValue(TidyDocImpl * doc,TidyOptionId optId,ctmbstr val)368 static Bool SetOptionValue( TidyDocImpl* doc, TidyOptionId optId, ctmbstr val )
369 {
370 const TidyOptionImpl* option = &option_defs[ optId ];
371 Bool status = ( optId < N_TIDY_OPTIONS );
372 if ( status )
373 {
374 assert( option->id == optId && option->type == TidyString );
375 FreeOptionValue( doc, option, &doc->config.value[ optId ] );
376 doc->config.value[ optId ].p = TY_(tmbstrdup)( doc->allocator, val );
377 }
378 return status;
379 }
380
TY_(SetOptionInt)381 Bool TY_(SetOptionInt)( TidyDocImpl* doc, TidyOptionId optId, ulong val )
382 {
383 Bool status = ( optId < N_TIDY_OPTIONS );
384 if ( status )
385 {
386 assert( option_defs[ optId ].type == TidyInteger );
387 doc->config.value[ optId ].v = val;
388 }
389 return status;
390 }
391
TY_(SetOptionBool)392 Bool TY_(SetOptionBool)( TidyDocImpl* doc, TidyOptionId optId, Bool val )
393 {
394 Bool status = ( optId < N_TIDY_OPTIONS );
395 if ( status )
396 {
397 assert( option_defs[ optId ].type == TidyBoolean );
398 doc->config.value[ optId ].v = val;
399 }
400 return status;
401 }
402
GetOptionDefault(const TidyOptionImpl * option,TidyOptionValue * dflt)403 static void GetOptionDefault( const TidyOptionImpl* option,
404 TidyOptionValue* dflt )
405 {
406 if ( option->type == TidyString )
407 dflt->p = (char*)option->pdflt;
408 else
409 dflt->v = option->dflt;
410 }
411
OptionValueEqDefault(const TidyOptionImpl * option,const TidyOptionValue * val)412 static Bool OptionValueEqDefault( const TidyOptionImpl* option,
413 const TidyOptionValue* val )
414 {
415 return ( option->type == TidyString ) ?
416 val->p == option->pdflt :
417 val->v == option->dflt;
418 }
419
TY_(ResetOptionToDefault)420 Bool TY_(ResetOptionToDefault)( TidyDocImpl* doc, TidyOptionId optId )
421 {
422 Bool status = ( optId > 0 && optId < N_TIDY_OPTIONS );
423 if ( status )
424 {
425 TidyOptionValue dflt;
426 const TidyOptionImpl* option = option_defs + optId;
427 TidyOptionValue* value = &doc->config.value[ optId ];
428 assert( optId == option->id );
429 GetOptionDefault( option, &dflt );
430 CopyOptionValue( doc, option, value, &dflt );
431 }
432 return status;
433 }
434
ReparseTagType(TidyDocImpl * doc,TidyOptionId optId)435 static void ReparseTagType( TidyDocImpl* doc, TidyOptionId optId )
436 {
437 ctmbstr tagdecl = cfgStr( doc, optId );
438 tmbstr dupdecl = TY_(tmbstrdup)( doc->allocator, tagdecl );
439 TY_(ParseConfigValue)( doc, optId, dupdecl );
440 TidyDocFree( doc, dupdecl );
441 }
442
OptionValueIdentical(const TidyOptionImpl * option,const TidyOptionValue * val1,const TidyOptionValue * val2)443 static Bool OptionValueIdentical( const TidyOptionImpl* option,
444 const TidyOptionValue* val1,
445 const TidyOptionValue* val2 )
446 {
447 if ( option->type == TidyString )
448 {
449 if ( val1->p == val2->p )
450 return yes;
451 if ( !val1->p || !val2->p )
452 return no;
453 return TY_(tmbstrcmp)( val1->p, val2->p ) == 0;
454 }
455 else
456 return val1->v == val2->v;
457 }
458
NeedReparseTagDecls(const TidyOptionValue * current,const TidyOptionValue * new,uint * changedUserTags)459 static Bool NeedReparseTagDecls( const TidyOptionValue* current,
460 const TidyOptionValue* new,
461 uint *changedUserTags )
462 {
463 Bool ret = no;
464 uint ixVal;
465 const TidyOptionImpl* option = option_defs;
466 *changedUserTags = tagtype_null;
467
468 for ( ixVal=0; ixVal < N_TIDY_OPTIONS; ++option, ++ixVal )
469 {
470 assert( ixVal == (uint) option->id );
471 switch (option->id)
472 {
473 #define TEST_USERTAGS(USERTAGOPTION,USERTAGTYPE) \
474 case USERTAGOPTION: \
475 if (!OptionValueIdentical(option,¤t[ixVal],&new[ixVal])) \
476 { \
477 *changedUserTags |= USERTAGTYPE; \
478 ret = yes; \
479 } \
480 break
481 TEST_USERTAGS(TidyInlineTags,tagtype_inline);
482 TEST_USERTAGS(TidyBlockTags,tagtype_block);
483 TEST_USERTAGS(TidyEmptyTags,tagtype_empty);
484 TEST_USERTAGS(TidyPreTags,tagtype_pre);
485 default:
486 break;
487 }
488 }
489 return ret;
490 }
491
ReparseTagDecls(TidyDocImpl * doc,uint changedUserTags)492 static void ReparseTagDecls( TidyDocImpl* doc, uint changedUserTags )
493 {
494 #define REPARSE_USERTAGS(USERTAGOPTION,USERTAGTYPE) \
495 if ( changedUserTags & USERTAGTYPE ) \
496 { \
497 TY_(FreeDeclaredTags)( doc, USERTAGTYPE ); \
498 ReparseTagType( doc, USERTAGOPTION ); \
499 }
500 REPARSE_USERTAGS(TidyInlineTags,tagtype_inline);
501 REPARSE_USERTAGS(TidyBlockTags,tagtype_block);
502 REPARSE_USERTAGS(TidyEmptyTags,tagtype_empty);
503 REPARSE_USERTAGS(TidyPreTags,tagtype_pre);
504 }
505
TY_(ResetConfigToDefault)506 void TY_(ResetConfigToDefault)( TidyDocImpl* doc )
507 {
508 uint ixVal;
509 const TidyOptionImpl* option = option_defs;
510 TidyOptionValue* value = &doc->config.value[ 0 ];
511 for ( ixVal=0; ixVal < N_TIDY_OPTIONS; ++option, ++ixVal )
512 {
513 TidyOptionValue dflt;
514 assert( ixVal == (uint) option->id );
515 GetOptionDefault( option, &dflt );
516 CopyOptionValue( doc, option, &value[ixVal], &dflt );
517 }
518 TY_(FreeDeclaredTags)( doc, tagtype_null );
519 }
520
TY_(TakeConfigSnapshot)521 void TY_(TakeConfigSnapshot)( TidyDocImpl* doc )
522 {
523 uint ixVal;
524 const TidyOptionImpl* option = option_defs;
525 const TidyOptionValue* value = &doc->config.value[ 0 ];
526 TidyOptionValue* snap = &doc->config.snapshot[ 0 ];
527
528 AdjustConfig( doc ); /* Make sure it's consistent */
529 for ( ixVal=0; ixVal < N_TIDY_OPTIONS; ++option, ++ixVal )
530 {
531 assert( ixVal == (uint) option->id );
532 CopyOptionValue( doc, option, &snap[ixVal], &value[ixVal] );
533 }
534 }
535
TY_(ResetConfigToSnapshot)536 void TY_(ResetConfigToSnapshot)( TidyDocImpl* doc )
537 {
538 uint ixVal;
539 const TidyOptionImpl* option = option_defs;
540 TidyOptionValue* value = &doc->config.value[ 0 ];
541 const TidyOptionValue* snap = &doc->config.snapshot[ 0 ];
542 uint changedUserTags;
543 Bool needReparseTagsDecls = NeedReparseTagDecls( value, snap,
544 &changedUserTags );
545
546 for ( ixVal=0; ixVal < N_TIDY_OPTIONS; ++option, ++ixVal )
547 {
548 assert( ixVal == (uint) option->id );
549 CopyOptionValue( doc, option, &value[ixVal], &snap[ixVal] );
550 }
551 if ( needReparseTagsDecls )
552 ReparseTagDecls( doc, changedUserTags );
553 }
554
TY_(CopyConfig)555 void TY_(CopyConfig)( TidyDocImpl* docTo, TidyDocImpl* docFrom )
556 {
557 if ( docTo != docFrom )
558 {
559 uint ixVal;
560 const TidyOptionImpl* option = option_defs;
561 const TidyOptionValue* from = &docFrom->config.value[ 0 ];
562 TidyOptionValue* to = &docTo->config.value[ 0 ];
563 uint changedUserTags;
564 Bool needReparseTagsDecls = NeedReparseTagDecls( to, from,
565 &changedUserTags );
566
567 TY_(TakeConfigSnapshot)( docTo );
568 for ( ixVal=0; ixVal < N_TIDY_OPTIONS; ++option, ++ixVal )
569 {
570 assert( ixVal == (uint) option->id );
571 CopyOptionValue( docTo, option, &to[ixVal], &from[ixVal] );
572 }
573 if ( needReparseTagsDecls )
574 ReparseTagDecls( docTo, changedUserTags );
575 AdjustConfig( docTo ); /* Make sure it's consistent */
576 }
577 }
578
579
580 #ifdef _DEBUG
581
582 /* Debug accessor functions will be type-safe and assert option type match */
TY_(_cfgGet)583 ulong TY_(_cfgGet)( TidyDocImpl* doc, TidyOptionId optId )
584 {
585 assert( optId < N_TIDY_OPTIONS );
586 return doc->config.value[ optId ].v;
587 }
588
TY_(_cfgGetBool)589 Bool TY_(_cfgGetBool)( TidyDocImpl* doc, TidyOptionId optId )
590 {
591 ulong val = TY_(_cfgGet)( doc, optId );
592 const TidyOptionImpl* opt = &option_defs[ optId ];
593 assert( opt && opt->type == TidyBoolean );
594 return (Bool) val;
595 }
596
TY_(_cfgGetAutoBool)597 TidyTriState TY_(_cfgGetAutoBool)( TidyDocImpl* doc, TidyOptionId optId )
598 {
599 ulong val = TY_(_cfgGet)( doc, optId );
600 const TidyOptionImpl* opt = &option_defs[ optId ];
601 assert( opt && opt->type == TidyInteger
602 && opt->parser == ParseAutoBool );
603 return (TidyTriState) val;
604 }
605
TY_(_cfgGetString)606 ctmbstr TY_(_cfgGetString)( TidyDocImpl* doc, TidyOptionId optId )
607 {
608 const TidyOptionImpl* opt;
609
610 assert( optId < N_TIDY_OPTIONS );
611 opt = &option_defs[ optId ];
612 assert( opt && opt->type == TidyString );
613 return doc->config.value[ optId ].p;
614 }
615 #endif
616
617
618 #if 0
619 /* for use with Gnu Emacs */
620 void SetEmacsFilename( TidyDocImpl* doc, ctmbstr filename )
621 {
622 SetOptionValue( doc, TidyEmacsFile, filename );
623 }
624 #endif
625
GetC(TidyConfigImpl * config)626 static tchar GetC( TidyConfigImpl* config )
627 {
628 if ( config->cfgIn )
629 return TY_(ReadChar)( config->cfgIn );
630 return EndOfStream;
631 }
632
FirstChar(TidyConfigImpl * config)633 static tchar FirstChar( TidyConfigImpl* config )
634 {
635 config->c = GetC( config );
636 return config->c;
637 }
638
AdvanceChar(TidyConfigImpl * config)639 static tchar AdvanceChar( TidyConfigImpl* config )
640 {
641 if ( config->c != EndOfStream )
642 config->c = GetC( config );
643 return config->c;
644 }
645
SkipWhite(TidyConfigImpl * config)646 static tchar SkipWhite( TidyConfigImpl* config )
647 {
648 while ( TY_(IsWhite)(config->c) && !TY_(IsNewline)(config->c) )
649 config->c = GetC( config );
650 return config->c;
651 }
652
653 /* skip until end of line
654 static tchar SkipToEndofLine( TidyConfigImpl* config )
655 {
656 while ( config->c != EndOfStream )
657 {
658 config->c = GetC( config );
659 if ( config->c == '\n' || config->c == '\r' )
660 break;
661 }
662 return config->c;
663 }
664 */
665
666 /*
667 skip over line continuations
668 to start of next property
669 */
NextProperty(TidyConfigImpl * config)670 static uint NextProperty( TidyConfigImpl* config )
671 {
672 do
673 {
674 /* skip to end of line */
675 while ( config->c != '\n' && config->c != '\r' && config->c != EndOfStream )
676 config->c = GetC( config );
677
678 /* treat \r\n \r or \n as line ends */
679 if ( config->c == '\r' )
680 config->c = GetC( config );
681
682 if ( config->c == '\n' )
683 config->c = GetC( config );
684 }
685 while ( TY_(IsWhite)(config->c) ); /* line continuation? */
686
687 return config->c;
688 }
689
690 /*
691 Todd Lewis contributed this code for expanding
692 ~/foo or ~your/foo according to $HOME and your
693 user name. This will work partially on any system
694 which defines $HOME. Support for ~user/foo will
695 work on systems that support getpwnam(userid),
696 namely Unix/Linux.
697 */
ExpandTilde(TidyDocImpl * doc,ctmbstr filename)698 static ctmbstr ExpandTilde( TidyDocImpl* doc, ctmbstr filename )
699 {
700 char *home_dir = NULL;
701
702 if ( !filename )
703 return NULL;
704
705 if ( filename[0] != '~' )
706 return filename;
707
708 if (filename[1] == '/')
709 {
710 home_dir = getenv("HOME");
711 if ( home_dir )
712 ++filename;
713 }
714 #ifdef SUPPORT_GETPWNAM
715 else
716 {
717 struct passwd *passwd = NULL;
718 ctmbstr s = filename + 1;
719 tmbstr t;
720
721 while ( *s && *s != '/' )
722 s++;
723
724 if ( t = TidyDocAlloc(doc, s - filename) )
725 {
726 memcpy(t, filename+1, s-filename-1);
727 t[s-filename-1] = 0;
728
729 passwd = getpwnam(t);
730
731 TidyDocFree(doc, t);
732 }
733
734 if ( passwd )
735 {
736 filename = s;
737 home_dir = passwd->pw_dir;
738 }
739 }
740 #endif /* SUPPORT_GETPWNAM */
741
742 if ( home_dir )
743 {
744 uint len = TY_(tmbstrlen)(filename) + TY_(tmbstrlen)(home_dir) + 1;
745 tmbstr p = (tmbstr)TidyDocAlloc( doc, len );
746 TY_(tmbstrcpy)( p, home_dir );
747 TY_(tmbstrcat)( p, filename );
748 return (ctmbstr) p;
749 }
750 return (ctmbstr) filename;
751 }
752
tidyFileExists(TidyDoc tdoc,ctmbstr filename)753 Bool TIDY_CALL tidyFileExists( TidyDoc tdoc, ctmbstr filename )
754 {
755 TidyDocImpl* doc = tidyDocToImpl( tdoc );
756 ctmbstr fname = (tmbstr) ExpandTilde( doc, filename );
757 #ifndef NO_ACCESS_SUPPORT
758 Bool exists = ( access(fname, 0) == 0 );
759 #else
760 Bool exists;
761 /* at present */
762 FILE* fin = fopen(fname, "r");
763 if (fin != NULL)
764 fclose(fin);
765 exists = ( fin != NULL );
766 #endif
767 if ( fname != filename )
768 TidyDocFree( doc, (tmbstr) fname );
769 return exists;
770 }
771
772
773 #ifndef TIDY_MAX_NAME
774 #define TIDY_MAX_NAME 64
775 #endif
776
TY_(ParseConfigFile)777 int TY_(ParseConfigFile)( TidyDocImpl* doc, ctmbstr file )
778 {
779 return TY_(ParseConfigFileEnc)( doc, file, "ascii" );
780 }
781
782 /* open the file and parse its contents
783 */
TY_(ParseConfigFileEnc)784 int TY_(ParseConfigFileEnc)( TidyDocImpl* doc, ctmbstr file, ctmbstr charenc )
785 {
786 uint opterrs = doc->optionErrors;
787 tmbstr fname = (tmbstr) ExpandTilde( doc, file );
788 TidyConfigImpl* cfg = &doc->config;
789 FILE* fin = fopen( fname, "r" );
790 int enc = TY_(CharEncodingId)( doc, charenc );
791
792 if ( fin == NULL || enc < 0 )
793 {
794 TY_(FileError)( doc, fname, TidyConfig );
795 return -1;
796 }
797 else
798 {
799 tchar c;
800 cfg->cfgIn = TY_(FileInput)( doc, fin, enc );
801 c = FirstChar( cfg );
802
803 for ( c = SkipWhite(cfg); c != EndOfStream; c = NextProperty(cfg) )
804 {
805 uint ix = 0;
806 tmbchar name[ TIDY_MAX_NAME ] = {0};
807
808 /* // or # start a comment */
809 if ( c == '/' || c == '#' )
810 continue;
811
812 while ( ix < sizeof(name)-1 && c != '\n' && c != EndOfStream && c != ':' )
813 {
814 name[ ix++ ] = (tmbchar) c; /* Option names all ASCII */
815 c = AdvanceChar( cfg );
816 }
817
818 if ( c == ':' )
819 {
820 const TidyOptionImpl* option = TY_(lookupOption)( name );
821 c = AdvanceChar( cfg );
822 if ( option )
823 option->parser( doc, option );
824 else
825 {
826 if (NULL != doc->pOptCallback)
827 {
828 TidyConfigImpl* cfg = &doc->config;
829 tmbchar buf[8192];
830 uint i = 0;
831 tchar delim = 0;
832 Bool waswhite = yes;
833
834 tchar c = SkipWhite( cfg );
835
836 if ( c == '"' || c == '\'' )
837 {
838 delim = c;
839 c = AdvanceChar( cfg );
840 }
841
842 while ( i < sizeof(buf)-2 && c != EndOfStream && c != '\r' && c != '\n' )
843 {
844 if ( delim && c == delim )
845 break;
846
847 if ( TY_(IsWhite)(c) )
848 {
849 if ( waswhite )
850 {
851 c = AdvanceChar( cfg );
852 continue;
853 }
854 c = ' ';
855 }
856 else
857 waswhite = no;
858
859 buf[i++] = (tmbchar) c;
860 c = AdvanceChar( cfg );
861 }
862 buf[i] = '\0';
863 if (no == (*doc->pOptCallback)( name, buf ))
864 TY_(ReportUnknownOption)( doc, name );
865 }
866 else
867 TY_(ReportUnknownOption)( doc, name );
868 }
869 }
870 }
871
872 TY_(freeFileSource)(&cfg->cfgIn->source, yes);
873 TY_(freeStreamIn)( cfg->cfgIn );
874 cfg->cfgIn = NULL;
875 }
876
877 if ( fname != (tmbstr) file )
878 TidyDocFree( doc, fname );
879
880 AdjustConfig( doc );
881
882 /* any new config errors? If so, return warning status. */
883 return (doc->optionErrors > opterrs ? 1 : 0);
884 }
885
886 /* returns false if unknown option, missing parameter,
887 ** or option doesn't use parameter
888 */
TY_(ParseConfigOption)889 Bool TY_(ParseConfigOption)( TidyDocImpl* doc, ctmbstr optnam, ctmbstr optval )
890 {
891 const TidyOptionImpl* option = TY_(lookupOption)( optnam );
892 Bool status = ( option != NULL );
893 if ( !status )
894 {
895 /* Not a standard tidy option. Check to see if the user application
896 recognizes it */
897 if (NULL != doc->pOptCallback)
898 status = (*doc->pOptCallback)( optnam, optval );
899 if (!status)
900 TY_(ReportUnknownOption)( doc, optnam );
901 }
902 else
903 status = TY_(ParseConfigValue)( doc, option->id, optval );
904 return status;
905 }
906
907 /* returns false if unknown option, missing parameter,
908 ** or option doesn't use parameter
909 */
TY_(ParseConfigValue)910 Bool TY_(ParseConfigValue)( TidyDocImpl* doc, TidyOptionId optId, ctmbstr optval )
911 {
912 const TidyOptionImpl* option = option_defs + optId;
913 Bool status = ( optId < N_TIDY_OPTIONS && optval != NULL );
914
915 if ( !status )
916 TY_(ReportBadArgument)( doc, option->name );
917 else
918 {
919 TidyBuffer inbuf; /* Set up input source */
920 tidyBufInitWithAllocator( &inbuf, doc->allocator );
921 tidyBufAttach( &inbuf, (byte*)optval, TY_(tmbstrlen)(optval)+1 );
922 doc->config.cfgIn = TY_(BufferInput)( doc, &inbuf, ASCII );
923 doc->config.c = GetC( &doc->config );
924
925 status = option->parser( doc, option );
926
927 TY_(freeStreamIn)(doc->config.cfgIn); /* Release input source */
928 doc->config.cfgIn = NULL;
929 tidyBufDetach( &inbuf );
930 }
931 return status;
932 }
933
934
935 /* ensure that char encodings are self consistent */
TY_(AdjustCharEncoding)936 Bool TY_(AdjustCharEncoding)( TidyDocImpl* doc, int encoding )
937 {
938 int outenc = -1;
939 int inenc = -1;
940
941 switch( encoding )
942 {
943 case MACROMAN:
944 inenc = MACROMAN;
945 outenc = ASCII;
946 break;
947
948 case WIN1252:
949 inenc = WIN1252;
950 outenc = ASCII;
951 break;
952
953 case IBM858:
954 inenc = IBM858;
955 outenc = ASCII;
956 break;
957
958 case ASCII:
959 inenc = LATIN1;
960 outenc = ASCII;
961 break;
962
963 case LATIN0:
964 inenc = LATIN0;
965 outenc = ASCII;
966 break;
967
968 case RAW:
969 case LATIN1:
970 case UTF8:
971 #ifndef NO_NATIVE_ISO2022_SUPPORT
972 case ISO2022:
973 #endif
974
975 #if SUPPORT_UTF16_ENCODINGS
976 case UTF16LE:
977 case UTF16BE:
978 case UTF16:
979 #endif
980 #if SUPPORT_ASIAN_ENCODINGS
981 case SHIFTJIS:
982 case BIG5:
983 #endif
984 inenc = outenc = encoding;
985 break;
986 }
987
988 if ( inenc >= 0 )
989 {
990 TY_(SetOptionInt)( doc, TidyCharEncoding, encoding );
991 TY_(SetOptionInt)( doc, TidyInCharEncoding, inenc );
992 TY_(SetOptionInt)( doc, TidyOutCharEncoding, outenc );
993 return yes;
994 }
995 return no;
996 }
997
998 /* ensure that config is self consistent */
AdjustConfig(TidyDocImpl * doc)999 void AdjustConfig( TidyDocImpl* doc )
1000 {
1001 if ( cfgBool(doc, TidyEncloseBlockText) )
1002 TY_(SetOptionBool)( doc, TidyEncloseBodyText, yes );
1003
1004 if ( cfgAutoBool(doc, TidyIndentContent) == TidyNoState )
1005 TY_(SetOptionInt)( doc, TidyIndentSpaces, 0 );
1006
1007 /* disable wrapping */
1008 if ( cfg(doc, TidyWrapLen) == 0 )
1009 TY_(SetOptionInt)( doc, TidyWrapLen, 0x7FFFFFFF );
1010
1011 /* Word 2000 needs o:p to be declared as inline */
1012 if ( cfgBool(doc, TidyWord2000) )
1013 {
1014 doc->config.defined_tags |= tagtype_inline;
1015 TY_(DefineTag)( doc, tagtype_inline, "o:p" );
1016 }
1017
1018 /* #480701 disable XHTML output flag if both output-xhtml and xml input are set */
1019 if ( cfgBool(doc, TidyXmlTags) )
1020 TY_(SetOptionBool)( doc, TidyXhtmlOut, no );
1021
1022 /* XHTML is written in lower case */
1023 if ( cfgBool(doc, TidyXhtmlOut) )
1024 {
1025 TY_(SetOptionBool)( doc, TidyXmlOut, yes );
1026 TY_(SetOptionBool)( doc, TidyUpperCaseTags, no );
1027 TY_(SetOptionBool)( doc, TidyUpperCaseAttrs, no );
1028 /* TY_(SetOptionBool)( doc, TidyXmlPIs, yes ); */
1029 }
1030
1031 /* if XML in, then XML out */
1032 if ( cfgBool(doc, TidyXmlTags) )
1033 {
1034 TY_(SetOptionBool)( doc, TidyXmlOut, yes );
1035 TY_(SetOptionBool)( doc, TidyXmlPIs, yes );
1036 }
1037
1038 /* #427837 - fix by Dave Raggett 02 Jun 01
1039 ** generate <?xml version="1.0" encoding="iso-8859-1"?>
1040 ** if the output character encoding is Latin-1 etc.
1041 */
1042 if ( cfg(doc, TidyOutCharEncoding) != ASCII &&
1043 cfg(doc, TidyOutCharEncoding) != UTF8 &&
1044 #if SUPPORT_UTF16_ENCODINGS
1045 cfg(doc, TidyOutCharEncoding) != UTF16 &&
1046 cfg(doc, TidyOutCharEncoding) != UTF16BE &&
1047 cfg(doc, TidyOutCharEncoding) != UTF16LE &&
1048 #endif
1049 cfg(doc, TidyOutCharEncoding) != RAW &&
1050 cfgBool(doc, TidyXmlOut) )
1051 {
1052 TY_(SetOptionBool)( doc, TidyXmlDecl, yes );
1053 }
1054
1055 /* XML requires end tags */
1056 if ( cfgBool(doc, TidyXmlOut) )
1057 {
1058 #if SUPPORT_UTF16_ENCODINGS
1059 /* XML requires a BOM on output if using UTF-16 encoding */
1060 ulong enc = cfg( doc, TidyOutCharEncoding );
1061 if ( enc == UTF16LE || enc == UTF16BE || enc == UTF16 )
1062 TY_(SetOptionInt)( doc, TidyOutputBOM, yes );
1063 #endif
1064 TY_(SetOptionBool)( doc, TidyQuoteAmpersand, yes );
1065 TY_(SetOptionBool)( doc, TidyHideEndTags, no );
1066 }
1067 }
1068
1069 /* unsigned integers */
ParseInt(TidyDocImpl * doc,const TidyOptionImpl * entry)1070 Bool ParseInt( TidyDocImpl* doc, const TidyOptionImpl* entry )
1071 {
1072 ulong number = 0;
1073 Bool digits = no;
1074 TidyConfigImpl* cfg = &doc->config;
1075 tchar c = SkipWhite( cfg );
1076
1077 while ( TY_(IsDigit)(c) )
1078 {
1079 number = c - '0' + (10 * number);
1080 digits = yes;
1081 c = AdvanceChar( cfg );
1082 }
1083
1084 if ( !digits )
1085 TY_(ReportBadArgument)( doc, entry->name );
1086 else
1087 TY_(SetOptionInt)( doc, entry->id, number );
1088 return digits;
1089 }
1090
1091 /* true/false or yes/no or 0/1 or "auto" only looks at 1st char */
ParseTriState(TidyTriState theState,TidyDocImpl * doc,const TidyOptionImpl * entry,ulong * flag)1092 static Bool ParseTriState( TidyTriState theState, TidyDocImpl* doc,
1093 const TidyOptionImpl* entry, ulong* flag )
1094 {
1095 TidyConfigImpl* cfg = &doc->config;
1096 tchar c = SkipWhite( cfg );
1097
1098 if (c == 't' || c == 'T' || c == 'y' || c == 'Y' || c == '1')
1099 *flag = yes;
1100 else if (c == 'f' || c == 'F' || c == 'n' || c == 'N' || c == '0')
1101 *flag = no;
1102 else if (theState == TidyAutoState && (c == 'a' || c =='A'))
1103 *flag = TidyAutoState;
1104 else
1105 {
1106 TY_(ReportBadArgument)( doc, entry->name );
1107 return no;
1108 }
1109
1110 return yes;
1111 }
1112
1113 /* cr, lf or crlf */
ParseNewline(TidyDocImpl * doc,const TidyOptionImpl * entry)1114 Bool ParseNewline( TidyDocImpl* doc, const TidyOptionImpl* entry )
1115 {
1116 int nl = -1;
1117 tmbchar work[ 16 ] = {0};
1118 tmbstr cp = work, end = work + sizeof(work);
1119 TidyConfigImpl* cfg = &doc->config;
1120 tchar c = SkipWhite( cfg );
1121
1122 while ( c!=EndOfStream && cp < end && !TY_(IsWhite)(c) && c != '\r' && c != '\n' )
1123 {
1124 *cp++ = (tmbchar) c;
1125 c = AdvanceChar( cfg );
1126 }
1127 *cp = 0;
1128
1129 if ( TY_(tmbstrcasecmp)(work, "lf") == 0 )
1130 nl = TidyLF;
1131 else if ( TY_(tmbstrcasecmp)(work, "crlf") == 0 )
1132 nl = TidyCRLF;
1133 else if ( TY_(tmbstrcasecmp)(work, "cr") == 0 )
1134 nl = TidyCR;
1135
1136 if ( nl < TidyLF || nl > TidyCR )
1137 TY_(ReportBadArgument)( doc, entry->name );
1138 else
1139 TY_(SetOptionInt)( doc, entry->id, nl );
1140 return ( nl >= TidyLF && nl <= TidyCR );
1141 }
1142
ParseBool(TidyDocImpl * doc,const TidyOptionImpl * entry)1143 Bool ParseBool( TidyDocImpl* doc, const TidyOptionImpl* entry )
1144 {
1145 ulong flag = 0;
1146 Bool status = ParseTriState( TidyNoState, doc, entry, &flag );
1147 if ( status )
1148 TY_(SetOptionBool)( doc, entry->id, flag != 0 );
1149 return status;
1150 }
1151
ParseAutoBool(TidyDocImpl * doc,const TidyOptionImpl * entry)1152 Bool ParseAutoBool( TidyDocImpl* doc, const TidyOptionImpl* entry )
1153 {
1154 ulong flag = 0;
1155 Bool status = ParseTriState( TidyAutoState, doc, entry, &flag );
1156 if ( status )
1157 TY_(SetOptionInt)( doc, entry->id, flag );
1158 return status;
1159 }
1160
1161 /* a string excluding whitespace */
ParseName(TidyDocImpl * doc,const TidyOptionImpl * option)1162 Bool ParseName( TidyDocImpl* doc, const TidyOptionImpl* option )
1163 {
1164 tmbchar buf[ 1024 ] = {0};
1165 uint i = 0;
1166 uint c = SkipWhite( &doc->config );
1167
1168 while ( i < sizeof(buf)-2 && c != EndOfStream && !TY_(IsWhite)(c) )
1169 {
1170 buf[i++] = (tmbchar) c;
1171 c = AdvanceChar( &doc->config );
1172 }
1173 buf[i] = 0;
1174
1175 if ( i == 0 )
1176 TY_(ReportBadArgument)( doc, option->name );
1177 else
1178 SetOptionValue( doc, option->id, buf );
1179 return ( i > 0 );
1180 }
1181
1182 /* #508936 - CSS class naming for -clean option */
ParseCSS1Selector(TidyDocImpl * doc,const TidyOptionImpl * option)1183 Bool ParseCSS1Selector( TidyDocImpl* doc, const TidyOptionImpl* option )
1184 {
1185 char buf[256] = {0};
1186 uint i = 0;
1187 uint c = SkipWhite( &doc->config );
1188
1189 while ( i < sizeof(buf)-2 && c != EndOfStream && !TY_(IsWhite)(c) )
1190 {
1191 buf[i++] = (tmbchar) c;
1192 c = AdvanceChar( &doc->config );
1193 }
1194 buf[i] = '\0';
1195
1196 if ( i == 0 || !TY_(IsCSS1Selector)(buf) ) {
1197 TY_(ReportBadArgument)( doc, option->name );
1198 return no;
1199 }
1200
1201 buf[i++] = '-'; /* Make sure any escaped Unicode is terminated */
1202 buf[i] = 0; /* so valid class names are generated after */
1203 /* Tidy appends last digits. */
1204
1205 SetOptionValue( doc, option->id, buf );
1206 return yes;
1207 }
1208
1209 /* Coordinates Config update and Tags data */
DeclareUserTag(TidyDocImpl * doc,TidyOptionId optId,UserTagType tagType,ctmbstr name)1210 static void DeclareUserTag( TidyDocImpl* doc, TidyOptionId optId,
1211 UserTagType tagType, ctmbstr name )
1212 {
1213 ctmbstr prvval = cfgStr( doc, optId );
1214 tmbstr catval = NULL;
1215 ctmbstr theval = name;
1216 if ( prvval )
1217 {
1218 uint len = TY_(tmbstrlen)(name) + TY_(tmbstrlen)(prvval) + 3;
1219 catval = TY_(tmbstrndup)( doc->allocator, prvval, len );
1220 TY_(tmbstrcat)( catval, ", " );
1221 TY_(tmbstrcat)( catval, name );
1222 theval = catval;
1223 }
1224 TY_(DefineTag)( doc, tagType, name );
1225 SetOptionValue( doc, optId, theval );
1226 if ( catval )
1227 TidyDocFree( doc, catval );
1228 }
1229
1230 /* a space or comma separated list of tag names */
ParseTagNames(TidyDocImpl * doc,const TidyOptionImpl * option)1231 Bool ParseTagNames( TidyDocImpl* doc, const TidyOptionImpl* option )
1232 {
1233 TidyConfigImpl* cfg = &doc->config;
1234 tmbchar buf[1024];
1235 uint i = 0, nTags = 0;
1236 uint c = SkipWhite( cfg );
1237 UserTagType ttyp = tagtype_null;
1238
1239 switch ( option->id )
1240 {
1241 case TidyInlineTags: ttyp = tagtype_inline; break;
1242 case TidyBlockTags: ttyp = tagtype_block; break;
1243 case TidyEmptyTags: ttyp = tagtype_empty; break;
1244 case TidyPreTags: ttyp = tagtype_pre; break;
1245 default:
1246 TY_(ReportUnknownOption)( doc, option->name );
1247 return no;
1248 }
1249
1250 SetOptionValue( doc, option->id, NULL );
1251 TY_(FreeDeclaredTags)( doc, ttyp );
1252 cfg->defined_tags |= ttyp;
1253
1254 do
1255 {
1256 if (c == ' ' || c == '\t' || c == ',')
1257 {
1258 c = AdvanceChar( cfg );
1259 continue;
1260 }
1261
1262 if ( c == '\r' || c == '\n' )
1263 {
1264 uint c2 = AdvanceChar( cfg );
1265 if ( c == '\r' && c2 == '\n' )
1266 c = AdvanceChar( cfg );
1267 else
1268 c = c2;
1269
1270 if ( !TY_(IsWhite)(c) )
1271 {
1272 buf[i] = 0;
1273 TY_(UngetChar)( c, cfg->cfgIn );
1274 TY_(UngetChar)( '\n', cfg->cfgIn );
1275 break;
1276 }
1277 }
1278
1279 /*
1280 if ( c == '\n' )
1281 {
1282 c = AdvanceChar( cfg );
1283 if ( !TY_(IsWhite)(c) )
1284 {
1285 buf[i] = 0;
1286 TY_(UngetChar)( c, cfg->cfgIn );
1287 TY_(UngetChar)( '\n', cfg->cfgIn );
1288 break;
1289 }
1290 }
1291 */
1292
1293 while ( i < sizeof(buf)-2 && c != EndOfStream && !TY_(IsWhite)(c) && c != ',' )
1294 {
1295 buf[i++] = (tmbchar) c;
1296 c = AdvanceChar( cfg );
1297 }
1298
1299 buf[i] = '\0';
1300 if (i == 0) /* Skip empty tag definition. Possible when */
1301 continue; /* there is a trailing space on the line. */
1302
1303 /* add tag to dictionary */
1304 DeclareUserTag( doc, option->id, ttyp, buf );
1305 i = 0;
1306 ++nTags;
1307 }
1308 while ( c != EndOfStream );
1309
1310 if ( i > 0 )
1311 DeclareUserTag( doc, option->id, ttyp, buf );
1312 return ( nTags > 0 );
1313 }
1314
1315 /* a string including whitespace */
1316 /* munges whitespace sequences */
1317
ParseString(TidyDocImpl * doc,const TidyOptionImpl * option)1318 Bool ParseString( TidyDocImpl* doc, const TidyOptionImpl* option )
1319 {
1320 TidyConfigImpl* cfg = &doc->config;
1321 tmbchar buf[8192];
1322 uint i = 0;
1323 tchar delim = 0;
1324 Bool waswhite = yes;
1325
1326 tchar c = SkipWhite( cfg );
1327
1328 if ( c == '"' || c == '\'' )
1329 {
1330 delim = c;
1331 c = AdvanceChar( cfg );
1332 }
1333
1334 while ( i < sizeof(buf)-2 && c != EndOfStream && c != '\r' && c != '\n' )
1335 {
1336 if ( delim && c == delim )
1337 break;
1338
1339 if ( TY_(IsWhite)(c) )
1340 {
1341 if ( waswhite )
1342 {
1343 c = AdvanceChar( cfg );
1344 continue;
1345 }
1346 c = ' ';
1347 }
1348 else
1349 waswhite = no;
1350
1351 buf[i++] = (tmbchar) c;
1352 c = AdvanceChar( cfg );
1353 }
1354 buf[i] = '\0';
1355
1356 SetOptionValue( doc, option->id, buf );
1357 return yes;
1358 }
1359
ParseCharEnc(TidyDocImpl * doc,const TidyOptionImpl * option)1360 Bool ParseCharEnc( TidyDocImpl* doc, const TidyOptionImpl* option )
1361 {
1362 tmbchar buf[64] = {0};
1363 uint i = 0;
1364 int enc = ASCII;
1365 Bool validEncoding = yes;
1366 tchar c = SkipWhite( &doc->config );
1367
1368 while ( i < sizeof(buf)-2 && c != EndOfStream && !TY_(IsWhite)(c) )
1369 {
1370 buf[i++] = (tmbchar) TY_(ToLower)( c );
1371 c = AdvanceChar( &doc->config );
1372 }
1373 buf[i] = 0;
1374
1375 enc = TY_(CharEncodingId)( doc, buf );
1376
1377 #ifdef TIDY_WIN32_MLANG_SUPPORT
1378 /* limit support to --input-encoding */
1379 if (option->id != TidyInCharEncoding && enc > WIN32MLANG)
1380 enc = -1;
1381 #endif
1382
1383 if ( enc < 0 )
1384 {
1385 validEncoding = no;
1386 TY_(ReportBadArgument)( doc, option->name );
1387 }
1388 else
1389 TY_(SetOptionInt)( doc, option->id, enc );
1390
1391 if ( validEncoding && option->id == TidyCharEncoding )
1392 TY_(AdjustCharEncoding)( doc, enc );
1393 return validEncoding;
1394 }
1395
1396
TY_(CharEncodingId)1397 int TY_(CharEncodingId)( TidyDocImpl* ARG_UNUSED(doc), ctmbstr charenc )
1398 {
1399 int enc = TY_(GetCharEncodingFromOptName)( charenc );
1400
1401 #ifdef TIDY_WIN32_MLANG_SUPPORT
1402 if (enc == -1)
1403 {
1404 uint wincp = TY_(Win32MLangGetCPFromName)(doc->allocator, charenc);
1405 if (wincp)
1406 enc = wincp;
1407 }
1408 #endif
1409
1410 return enc;
1411 }
1412
TY_(CharEncodingName)1413 ctmbstr TY_(CharEncodingName)( int encoding )
1414 {
1415 ctmbstr encodingName = TY_(GetEncodingNameFromTidyId)(encoding);
1416
1417 if (!encodingName)
1418 encodingName = "unknown";
1419
1420 return encodingName;
1421 }
1422
TY_(CharEncodingOptName)1423 ctmbstr TY_(CharEncodingOptName)( int encoding )
1424 {
1425 ctmbstr encodingName = TY_(GetEncodingOptNameFromTidyId)(encoding);
1426
1427 if (!encodingName)
1428 encodingName = "unknown";
1429
1430 return encodingName;
1431 }
1432
1433 /*
1434 doctype: omit | auto | strict | loose | <fpi>
1435
1436 where the fpi is a string similar to
1437
1438 "-//ACME//DTD HTML 3.14159//EN"
1439 */
ParseDocType(TidyDocImpl * doc,const TidyOptionImpl * option)1440 Bool ParseDocType( TidyDocImpl* doc, const TidyOptionImpl* option )
1441 {
1442 tmbchar buf[ 32 ] = {0};
1443 uint i = 0;
1444 Bool status = yes;
1445 TidyDoctypeModes dtmode = TidyDoctypeAuto;
1446
1447 TidyConfigImpl* cfg = &doc->config;
1448 tchar c = SkipWhite( cfg );
1449
1450 /* "-//ACME//DTD HTML 3.14159//EN" or similar */
1451
1452 if ( c == '"' || c == '\'' )
1453 {
1454 status = ParseString(doc, option);
1455 if (status)
1456 TY_(SetOptionInt)( doc, TidyDoctypeMode, TidyDoctypeUser );
1457
1458 return status;
1459 }
1460
1461 /* read first word */
1462 while ( i < sizeof(buf)-1 && c != EndOfStream && !TY_(IsWhite)(c) )
1463 {
1464 buf[i++] = (tmbchar) c;
1465 c = AdvanceChar( cfg );
1466 }
1467 buf[i] = '\0';
1468
1469 if ( TY_(tmbstrcasecmp)(buf, "auto") == 0 )
1470 dtmode = TidyDoctypeAuto;
1471 else if ( TY_(tmbstrcasecmp)(buf, "omit") == 0 )
1472 dtmode = TidyDoctypeOmit;
1473 else if ( TY_(tmbstrcasecmp)(buf, "strict") == 0 )
1474 dtmode = TidyDoctypeStrict;
1475 else if ( TY_(tmbstrcasecmp)(buf, "loose") == 0 ||
1476 TY_(tmbstrcasecmp)(buf, "transitional") == 0 )
1477 dtmode = TidyDoctypeLoose;
1478 else
1479 {
1480 TY_(ReportBadArgument)( doc, option->name );
1481 status = no;
1482 }
1483
1484 if ( status )
1485 TY_(SetOptionInt)( doc, TidyDoctypeMode, dtmode );
1486 return status;
1487 }
1488
ParseRepeatAttr(TidyDocImpl * doc,const TidyOptionImpl * option)1489 Bool ParseRepeatAttr( TidyDocImpl* doc, const TidyOptionImpl* option )
1490 {
1491 Bool status = yes;
1492 tmbchar buf[64] = {0};
1493 uint i = 0;
1494
1495 TidyConfigImpl* cfg = &doc->config;
1496 tchar c = SkipWhite( cfg );
1497
1498 while (i < sizeof(buf)-1 && c != EndOfStream && !TY_(IsWhite)(c))
1499 {
1500 buf[i++] = (tmbchar) c;
1501 c = AdvanceChar( cfg );
1502 }
1503 buf[i] = '\0';
1504
1505 if ( TY_(tmbstrcasecmp)(buf, "keep-first") == 0 )
1506 cfg->value[ TidyDuplicateAttrs ].v = TidyKeepFirst;
1507 else if ( TY_(tmbstrcasecmp)(buf, "keep-last") == 0 )
1508 cfg->value[ TidyDuplicateAttrs ].v = TidyKeepLast;
1509 else
1510 {
1511 TY_(ReportBadArgument)( doc, option->name );
1512 status = no;
1513 }
1514 return status;
1515 }
1516
ParseSorter(TidyDocImpl * doc,const TidyOptionImpl * option)1517 Bool ParseSorter( TidyDocImpl* doc, const TidyOptionImpl* option )
1518 {
1519 Bool status = yes;
1520 tmbchar buf[64] = {0};
1521 uint i = 0;
1522
1523 TidyConfigImpl* cfg = &doc->config;
1524 tchar c = SkipWhite( cfg );
1525
1526 while (i < sizeof(buf)-1 && c != EndOfStream && !TY_(IsWhite)(c))
1527 {
1528 buf[i++] = (tmbchar) c;
1529 c = AdvanceChar( cfg );
1530 }
1531 buf[i] = '\0';
1532
1533 if ( TY_(tmbstrcasecmp)(buf, "alpha") == 0 )
1534 cfg->value[ TidySortAttributes ].v = TidySortAttrAlpha;
1535 else if ( TY_(tmbstrcasecmp)(buf, "none") == 0)
1536 cfg->value[ TidySortAttributes ].v = TidySortAttrNone;
1537 else
1538 {
1539 TY_(ReportBadArgument)( doc, option->name );
1540 status = no;
1541 }
1542 return status;
1543 }
1544
1545 /* Use TidyOptionId as iterator.
1546 ** Send index of 1st option after TidyOptionUnknown as start of list.
1547 */
TY_(getOptionList)1548 TidyIterator TY_(getOptionList)( TidyDocImpl* ARG_UNUSED(doc) )
1549 {
1550 return (TidyIterator) (size_t)1;
1551 }
1552
1553 /* Check if this item is last valid option.
1554 ** If so, zero out iterator.
1555 */
TY_(getNextOption)1556 const TidyOptionImpl* TY_(getNextOption)( TidyDocImpl* ARG_UNUSED(doc),
1557 TidyIterator* iter )
1558 {
1559 const TidyOptionImpl* option = NULL;
1560 size_t optId;
1561 assert( iter != NULL );
1562 optId = (size_t) *iter;
1563 if ( optId > TidyUnknownOption && optId < N_TIDY_OPTIONS )
1564 {
1565 option = &option_defs[ optId ];
1566 optId++;
1567 }
1568 *iter = (TidyIterator) ( optId < N_TIDY_OPTIONS ? optId : (size_t)0 );
1569 return option;
1570 }
1571
1572 /* Use a 1-based array index as iterator: 0 == end-of-list
1573 */
TY_(getOptionPickList)1574 TidyIterator TY_(getOptionPickList)( const TidyOptionImpl* option )
1575 {
1576 size_t ix = 0;
1577 if ( option && option->pickList )
1578 ix = 1;
1579 return (TidyIterator) ix;
1580 }
1581
TY_(getNextOptionPick)1582 ctmbstr TY_(getNextOptionPick)( const TidyOptionImpl* option,
1583 TidyIterator* iter )
1584 {
1585 size_t ix;
1586 ctmbstr val = NULL;
1587 assert( option!=NULL && iter != NULL );
1588
1589 ix = (size_t) *iter;
1590 if ( ix > 0 && ix < 16 && option->pickList )
1591 val = option->pickList[ ix-1 ];
1592 *iter = (TidyIterator) ( val && option->pickList[ix] ? ix + 1 : (size_t)0 );
1593 return val;
1594 }
1595
WriteOptionString(const TidyOptionImpl * option,ctmbstr sval,StreamOut * out)1596 static int WriteOptionString( const TidyOptionImpl* option,
1597 ctmbstr sval, StreamOut* out )
1598 {
1599 ctmbstr cp = option->name;
1600 while ( *cp )
1601 TY_(WriteChar)( *cp++, out );
1602 TY_(WriteChar)( ':', out );
1603 TY_(WriteChar)( ' ', out );
1604 cp = sval;
1605 while ( *cp )
1606 TY_(WriteChar)( *cp++, out );
1607 TY_(WriteChar)( '\n', out );
1608 return 0;
1609 }
1610
WriteOptionInt(const TidyOptionImpl * option,uint ival,StreamOut * out)1611 static int WriteOptionInt( const TidyOptionImpl* option, uint ival, StreamOut* out )
1612 {
1613 tmbchar sval[ 32 ] = {0};
1614 TY_(tmbsnprintf)(sval, sizeof(sval), "%u", ival );
1615 return WriteOptionString( option, sval, out );
1616 }
1617
WriteOptionBool(const TidyOptionImpl * option,Bool bval,StreamOut * out)1618 static int WriteOptionBool( const TidyOptionImpl* option, Bool bval, StreamOut* out )
1619 {
1620 ctmbstr sval = bval ? "yes" : "no";
1621 return WriteOptionString( option, sval, out );
1622 }
1623
WriteOptionPick(const TidyOptionImpl * option,uint ival,StreamOut * out)1624 static int WriteOptionPick( const TidyOptionImpl* option, uint ival, StreamOut* out )
1625 {
1626 uint ix;
1627 const ctmbstr* val = option->pickList;
1628 for ( ix=0; val[ix] && ix<ival; ++ix )
1629 /**/;
1630 if ( ix==ival && val[ix] )
1631 return WriteOptionString( option, val[ix], out );
1632 return -1;
1633 }
1634
TY_(ConfigDiffThanSnapshot)1635 Bool TY_(ConfigDiffThanSnapshot)( TidyDocImpl* doc )
1636 {
1637 int diff = memcmp( &doc->config.value, &doc->config.snapshot,
1638 N_TIDY_OPTIONS * sizeof(uint) );
1639 return ( diff != 0 );
1640 }
1641
TY_(ConfigDiffThanDefault)1642 Bool TY_(ConfigDiffThanDefault)( TidyDocImpl* doc )
1643 {
1644 Bool diff = no;
1645 const TidyOptionImpl* option = option_defs + 1;
1646 const TidyOptionValue* val = doc->config.value;
1647 for ( /**/; !diff && option && option->name; ++option, ++val )
1648 {
1649 diff = !OptionValueEqDefault( option, val );
1650 }
1651 return diff;
1652 }
1653
1654
SaveConfigToStream(TidyDocImpl * doc,StreamOut * out)1655 static int SaveConfigToStream( TidyDocImpl* doc, StreamOut* out )
1656 {
1657 int rc = 0;
1658 const TidyOptionImpl* option;
1659 for ( option=option_defs+1; 0==rc && option && option->name; ++option )
1660 {
1661 const TidyOptionValue* val = &doc->config.value[ option->id ];
1662 if ( option->parser == NULL )
1663 continue;
1664 if ( OptionValueEqDefault( option, val ) && option->id != TidyDoctype)
1665 continue;
1666
1667 if ( option->id == TidyDoctype ) /* Special case */
1668 {
1669 ulong dtmode = cfg( doc, TidyDoctypeMode );
1670 if ( dtmode == TidyDoctypeUser )
1671 {
1672 tmbstr t;
1673
1674 /* add 2 double quotes */
1675 if (( t = (tmbstr)TidyDocAlloc( doc, TY_(tmbstrlen)( val->p ) + 2 ) ))
1676 {
1677 t[0] = '\"'; t[1] = 0;
1678
1679 TY_(tmbstrcat)( t, val->p );
1680 TY_(tmbstrcat)( t, "\"" );
1681 rc = WriteOptionString( option, t, out );
1682
1683 TidyDocFree( doc, t );
1684 }
1685 }
1686 else if ( dtmode == option_defs[TidyDoctypeMode].dflt )
1687 continue;
1688 else
1689 rc = WriteOptionPick( option, dtmode, out );
1690 }
1691 else if ( option->pickList )
1692 rc = WriteOptionPick( option, val->v, out );
1693 else
1694 {
1695 switch ( option->type )
1696 {
1697 case TidyString:
1698 rc = WriteOptionString( option, val->p, out );
1699 break;
1700 case TidyInteger:
1701 rc = WriteOptionInt( option, val->v, out );
1702 break;
1703 case TidyBoolean:
1704 rc = WriteOptionBool( option, val->v ? yes : no, out );
1705 break;
1706 }
1707 }
1708 }
1709 return rc;
1710 }
1711
TY_(SaveConfigFile)1712 int TY_(SaveConfigFile)( TidyDocImpl* doc, ctmbstr cfgfil )
1713 {
1714 int status = -1;
1715 StreamOut* out = NULL;
1716 uint outenc = cfg( doc, TidyOutCharEncoding );
1717 uint nl = cfg( doc, TidyNewline );
1718 FILE* fout = fopen( cfgfil, "wb" );
1719 if ( fout )
1720 {
1721 out = TY_(FileOutput)( doc, fout, outenc, nl );
1722 status = SaveConfigToStream( doc, out );
1723 fclose( fout );
1724 TidyDocFree( doc, out );
1725 }
1726 return status;
1727 }
1728
TY_(SaveConfigSink)1729 int TY_(SaveConfigSink)( TidyDocImpl* doc, TidyOutputSink* sink )
1730 {
1731 uint outenc = cfg( doc, TidyOutCharEncoding );
1732 uint nl = cfg( doc, TidyNewline );
1733 StreamOut* out = TY_(UserOutput)( doc, sink, outenc, nl );
1734 int status = SaveConfigToStream( doc, out );
1735 TidyDocFree( doc, out );
1736 return status;
1737 }
1738
1739 /*
1740 * local variables:
1741 * mode: c
1742 * indent-tabs-mode: nil
1743 * c-basic-offset: 4
1744 * eval: (c-set-offset 'substatement-open 0)
1745 * end:
1746 */
1747