1 /*
2   config.c -- read config file and manage config properties
3 
4   (c) 1998-2008 (W3C) MIT, ERCIM, Keio University
5   See tidy.h for the copyright notice.
6 
7   CVS Info :
8 
9     $Author: arnaud02 $
10     $Date: 2008/06/18 20:18:54 $
11     $Revision: 1.111 $
12 
13 */
14 
15 /*
16   config files associate a property name with a value.
17 
18   // comments can start at the beginning of a line
19   # comments can start at the beginning of a line
20   name: short values fit onto one line
21   name: a really long value that
22    continues on the next line
23 
24   property names are case insensitive and should be less than
25   60 characters in length and must start at the begining of
26   the line, as whitespace at the start of a line signifies a
27   line continuation.
28 */
29 
30 #include "config.h"
31 #include "tidy-int.h"
32 #include "message.h"
33 #include "tmbstr.h"
34 #include "tags.h"
35 
36 #ifdef WINDOWS_OS
37 #include <io.h>
38 #else
39 #ifdef DMALLOC
40 /*
41    macro for valloc() in dmalloc.h may conflict with declaration for valloc() in unistd.h -
42    we don't need (debugging for) valloc() here. dmalloc.h should come last but it doesn't.
43 */
44 #ifdef valloc
45 #undef valloc
46 #endif
47 #endif
48 #include <unistd.h>
49 #endif
50 
51 #ifdef TIDY_WIN32_MLANG_SUPPORT
52 #include "win32tc.h"
53 #endif
54 
TY_(InitConfig)55 void TY_(InitConfig)( TidyDocImpl* doc )
56 {
57     TidyClearMemory( &doc->config, sizeof(TidyConfigImpl) );
58     TY_(ResetConfigToDefault)( doc );
59 }
60 
TY_(FreeConfig)61 void TY_(FreeConfig)( TidyDocImpl* doc )
62 {
63     TY_(ResetConfigToDefault)( doc );
64     TY_(TakeConfigSnapshot)( doc );
65 }
66 
67 
68 /* Arrange so index can be cast to enum
69 */
70 static const ctmbstr boolPicks[] =
71 {
72   "no",
73   "yes",
74   NULL
75 };
76 
77 static const ctmbstr autoBoolPicks[] =
78 {
79   "no",
80   "yes",
81   "auto",
82   NULL
83 };
84 
85 static const ctmbstr repeatAttrPicks[] =
86 {
87   "keep-first",
88   "keep-last",
89   NULL
90 };
91 
92 static const ctmbstr accessPicks[] =
93 {
94   "0 (Tidy Classic)",
95   "1 (Priority 1 Checks)",
96   "2 (Priority 2 Checks)",
97   "3 (Priority 3 Checks)",
98   NULL
99 };
100 
101 static const ctmbstr charEncPicks[] =
102 {
103   "raw",
104   "ascii",
105   "latin0",
106   "latin1",
107   "utf8",
108 #ifndef NO_NATIVE_ISO2022_SUPPORT
109   "iso2022",
110 #endif
111   "mac",
112   "win1252",
113   "ibm858",
114 
115 #if SUPPORT_UTF16_ENCODINGS
116   "utf16le",
117   "utf16be",
118   "utf16",
119 #endif
120 
121 #if SUPPORT_ASIAN_ENCODINGS
122   "big5",
123   "shiftjis",
124 #endif
125 
126   NULL
127 };
128 
129 static const ctmbstr newlinePicks[] =
130 {
131   "LF",
132   "CRLF",
133   "CR",
134   NULL
135 };
136 
137 static const ctmbstr doctypePicks[] =
138 {
139   "omit",
140   "auto",
141   "strict",
142   "transitional",
143   "user",
144   NULL
145 };
146 
147 static const ctmbstr sorterPicks[] =
148 {
149   "none",
150   "alpha",
151   NULL
152 };
153 
154 #define MU TidyMarkup
155 #define DG TidyDiagnostics
156 #define PP TidyPrettyPrint
157 #define CE TidyEncoding
158 #define MS TidyMiscellaneous
159 
160 #define IN TidyInteger
161 #define BL TidyBoolean
162 #define ST TidyString
163 
164 #define XX (TidyConfigCategory)-1
165 #define XY (TidyOptionType)-1
166 
167 #define DLF DEFAULT_NL_CONFIG
168 
169 /* If Accessibility checks not supported, make config setting read-only */
170 #if SUPPORT_ACCESSIBILITY_CHECKS
171 #define ParseAcc ParseInt
172 #else
173 #define ParseAcc NULL
174 #endif
175 
176 static void AdjustConfig( TidyDocImpl* doc );
177 
178 /* parser for integer values */
179 static ParseProperty ParseInt;
180 
181 /* parser for 't'/'f', 'true'/'false', 'y'/'n', 'yes'/'no' or '1'/'0' */
182 static ParseProperty ParseBool;
183 
184 /* parser for 't'/'f', 'true'/'false', 'y'/'n', 'yes'/'no', '1'/'0'
185    or 'auto' */
186 static ParseProperty ParseAutoBool;
187 
188 /* a string excluding whitespace */
189 static ParseProperty ParseName;
190 
191 /* a CSS1 selector - CSS class naming for -clean option */
192 static ParseProperty ParseCSS1Selector;
193 
194 /* a string including whitespace */
195 static ParseProperty ParseString;
196 
197 /* a space or comma separated list of tag names */
198 static ParseProperty ParseTagNames;
199 
200 /* alpha */
201 static ParseProperty ParseSorter;
202 
203 /* RAW, ASCII, LATIN0, LATIN1, UTF8, ISO2022, MACROMAN,
204    WIN1252, IBM858, UTF16LE, UTF16BE, UTF16, BIG5, SHIFTJIS
205 */
206 static ParseProperty ParseCharEnc;
207 static ParseProperty ParseNewline;
208 
209 /* omit | auto | strict | loose | <fpi> */
210 static ParseProperty ParseDocType;
211 
212 /* keep-first or keep-last? */
213 static ParseProperty ParseRepeatAttr;
214 
215 
216 static const TidyOptionImpl option_defs[] =
217 {
218   { TidyUnknownOption,           MS, "unknown!",                    IN, 0,               NULL,              NULL            },
219   { TidyIndentSpaces,            PP, "indent-spaces",               IN, 2,               ParseInt,          NULL            },
220   { TidyWrapLen,                 PP, "wrap",                        IN, 68,              ParseInt,          NULL            },
221   { TidyTabSize,                 PP, "tab-size",                    IN, 8,               ParseInt,          NULL            },
222   { TidyCharEncoding,            CE, "char-encoding",               IN, ASCII,           ParseCharEnc,      charEncPicks    },
223   { TidyInCharEncoding,          CE, "input-encoding",              IN, LATIN1,          ParseCharEnc,      charEncPicks    },
224   { TidyOutCharEncoding,         CE, "output-encoding",             IN, ASCII,           ParseCharEnc,      charEncPicks    },
225   { TidyNewline,                 CE, "newline",                     IN, DLF,             ParseNewline,      newlinePicks    },
226   { TidyDoctypeMode,             MU, "doctype-mode",                IN, TidyDoctypeAuto, NULL,              doctypePicks    },
227   { TidyDoctype,                 MU, "doctype",                     ST, 0,               ParseDocType,      doctypePicks    },
228   { TidyDuplicateAttrs,          MU, "repeated-attributes",         IN, TidyKeepLast,    ParseRepeatAttr,   repeatAttrPicks },
229   { TidyAltText,                 MU, "alt-text",                    ST, 0,               ParseString,       NULL            },
230 
231   /* obsolete */
232   { TidySlideStyle,              MS, "slide-style",                 ST, 0,               ParseName,         NULL            },
233 
234   { TidyErrFile,                 MS, "error-file",                  ST, 0,               ParseString,       NULL            },
235   { TidyOutFile,                 MS, "output-file",                 ST, 0,               ParseString,       NULL            },
236   { TidyWriteBack,               MS, "write-back",                  BL, no,              ParseBool,         boolPicks       },
237   { TidyShowMarkup,              PP, "markup",                      BL, yes,             ParseBool,         boolPicks       },
238   { TidyShowWarnings,            DG, "show-warnings",               BL, yes,             ParseBool,         boolPicks       },
239   { TidyQuiet,                   MS, "quiet",                       BL, no,              ParseBool,         boolPicks       },
240   { TidyIndentContent,           PP, "indent",                      IN, TidyNoState,     ParseAutoBool,     autoBoolPicks   },
241   { TidyHideEndTags,             MU, "hide-endtags",                BL, no,              ParseBool,         boolPicks       },
242   { TidyXmlTags,                 MU, "input-xml",                   BL, no,              ParseBool,         boolPicks       },
243   { TidyXmlOut,                  MU, "output-xml",                  BL, no,              ParseBool,         boolPicks       },
244   { TidyXhtmlOut,                MU, "output-xhtml",                BL, no,              ParseBool,         boolPicks       },
245   { TidyHtmlOut,                 MU, "output-html",                 BL, no,              ParseBool,         boolPicks       },
246   { TidyXmlDecl,                 MU, "add-xml-decl",                BL, no,              ParseBool,         boolPicks       },
247   { TidyUpperCaseTags,           MU, "uppercase-tags",              BL, no,              ParseBool,         boolPicks       },
248   { TidyUpperCaseAttrs,          MU, "uppercase-attributes",        BL, no,              ParseBool,         boolPicks       },
249   { TidyMakeBare,                MU, "bare",                        BL, no,              ParseBool,         boolPicks       },
250   { TidyMakeClean,               MU, "clean",                       BL, no,              ParseBool,         boolPicks       },
251   { TidyLogicalEmphasis,         MU, "logical-emphasis",            BL, no,              ParseBool,         boolPicks       },
252   { TidyDropPropAttrs,           MU, "drop-proprietary-attributes", BL, no,              ParseBool,         boolPicks       },
253   { TidyDropFontTags,            MU, "drop-font-tags",              BL, no,              ParseBool,         boolPicks       },
254   { TidyDropEmptyParas,          MU, "drop-empty-paras",            BL, yes,             ParseBool,         boolPicks       },
255   { TidyFixComments,             MU, "fix-bad-comments",            BL, yes,             ParseBool,         boolPicks       },
256   { TidyBreakBeforeBR,           PP, "break-before-br",             BL, no,              ParseBool,         boolPicks       },
257 
258   /* obsolete */
259   { TidyBurstSlides,             PP, "split",                       BL, no,              ParseBool,         boolPicks       },
260 
261   { TidyNumEntities,             MU, "numeric-entities",            BL, no,              ParseBool,         boolPicks       },
262   { TidyQuoteMarks,              MU, "quote-marks",                 BL, no,              ParseBool,         boolPicks       },
263   { TidyQuoteNbsp,               MU, "quote-nbsp",                  BL, yes,             ParseBool,         boolPicks       },
264   { TidyQuoteAmpersand,          MU, "quote-ampersand",             BL, yes,             ParseBool,         boolPicks       },
265   { TidyWrapAttVals,             PP, "wrap-attributes",             BL, no,              ParseBool,         boolPicks       },
266   { TidyWrapScriptlets,          PP, "wrap-script-literals",        BL, no,              ParseBool,         boolPicks       },
267   { TidyWrapSection,             PP, "wrap-sections",               BL, yes,             ParseBool,         boolPicks       },
268   { TidyWrapAsp,                 PP, "wrap-asp",                    BL, yes,             ParseBool,         boolPicks       },
269   { TidyWrapJste,                PP, "wrap-jste",                   BL, yes,             ParseBool,         boolPicks       },
270   { TidyWrapPhp,                 PP, "wrap-php",                    BL, yes,             ParseBool,         boolPicks       },
271   { TidyFixBackslash,            MU, "fix-backslash",               BL, yes,             ParseBool,         boolPicks       },
272   { TidyIndentAttributes,        PP, "indent-attributes",           BL, no,              ParseBool,         boolPicks       },
273   { TidyXmlPIs,                  MU, "assume-xml-procins",          BL, no,              ParseBool,         boolPicks       },
274   { TidyXmlSpace,                MU, "add-xml-space",               BL, no,              ParseBool,         boolPicks       },
275   { TidyEncloseBodyText,         MU, "enclose-text",                BL, no,              ParseBool,         boolPicks       },
276   { TidyEncloseBlockText,        MU, "enclose-block-text",          BL, no,              ParseBool,         boolPicks       },
277   { TidyKeepFileTimes,           MS, "keep-time",                   BL, no,              ParseBool,         boolPicks       },
278   { TidyWord2000,                MU, "word-2000",                   BL, no,              ParseBool,         boolPicks       },
279   { TidyMark,                    MS, "tidy-mark",                   BL, yes,             ParseBool,         boolPicks       },
280   { TidyEmacs,                   MS, "gnu-emacs",                   BL, no,              ParseBool,         boolPicks       },
281   { TidyEmacsFile,               MS, "gnu-emacs-file",              ST, 0,               ParseString,       NULL            },
282   { TidyLiteralAttribs,          MU, "literal-attributes",          BL, no,              ParseBool,         boolPicks       },
283   { TidyBodyOnly,                MU, "show-body-only",              IN, no,              ParseAutoBool,     autoBoolPicks   },
284   { TidyFixUri,                  MU, "fix-uri",                     BL, yes,             ParseBool,         boolPicks       },
285   { TidyLowerLiterals,           MU, "lower-literals",              BL, yes,             ParseBool,         boolPicks       },
286   { TidyHideComments,            MU, "hide-comments",               BL, no,              ParseBool,         boolPicks       },
287   { TidyIndentCdata,             MU, "indent-cdata",                BL, no,              ParseBool,         boolPicks       },
288   { TidyForceOutput,             MS, "force-output",                BL, no,              ParseBool,         boolPicks       },
289   { TidyShowErrors,              DG, "show-errors",                 IN, 6,               ParseInt,          NULL            },
290   { TidyAsciiChars,              CE, "ascii-chars",                 BL, no,              ParseBool,         boolPicks       },
291   { TidyJoinClasses,             MU, "join-classes",                BL, no,              ParseBool,         boolPicks       },
292   { TidyJoinStyles,              MU, "join-styles",                 BL, yes,             ParseBool,         boolPicks       },
293   { TidyEscapeCdata,             MU, "escape-cdata",                BL, no,              ParseBool,         boolPicks       },
294 #if SUPPORT_ASIAN_ENCODINGS
295   { TidyLanguage,                CE, "language",                    ST, 0,               ParseName,         NULL            },
296   { TidyNCR,                     MU, "ncr",                         BL, yes,             ParseBool,         boolPicks       },
297 #endif
298 #if SUPPORT_UTF16_ENCODINGS
299   { TidyOutputBOM,               CE, "output-bom",                  IN, TidyAutoState,   ParseAutoBool,     autoBoolPicks   },
300 #endif
301   { TidyReplaceColor,            MU, "replace-color",               BL, no,              ParseBool,         boolPicks       },
302   { TidyCSSPrefix,               MU, "css-prefix",                  ST, 0,               ParseCSS1Selector, NULL            },
303   { TidyInlineTags,              MU, "new-inline-tags",             ST, 0,               ParseTagNames,     NULL            },
304   { TidyBlockTags,               MU, "new-blocklevel-tags",         ST, 0,               ParseTagNames,     NULL            },
305   { TidyEmptyTags,               MU, "new-empty-tags",              ST, 0,               ParseTagNames,     NULL            },
306   { TidyPreTags,                 MU, "new-pre-tags",                ST, 0,               ParseTagNames,     NULL            },
307   { TidyAccessibilityCheckLevel, DG, "accessibility-check",         IN, 0,               ParseAcc,          accessPicks     },
308   { TidyVertSpace,               PP, "vertical-space",              BL, no,              ParseBool,         boolPicks       },
309 #if SUPPORT_ASIAN_ENCODINGS
310   { TidyPunctWrap,               PP, "punctuation-wrap",            BL, no,              ParseBool,         boolPicks       },
311 #endif
312   { TidyMergeDivs,               MU, "merge-divs",                  IN, TidyAutoState,   ParseAutoBool,     autoBoolPicks   },
313   { TidyDecorateInferredUL,      MU, "decorate-inferred-ul",        BL, no,              ParseBool,         boolPicks       },
314   { TidyPreserveEntities,        MU, "preserve-entities",           BL, no,              ParseBool,         boolPicks       },
315   { TidySortAttributes,          PP, "sort-attributes",             IN, TidySortAttrNone,ParseSorter,       sorterPicks     },
316   { TidyMergeSpans,              MU, "merge-spans",                 IN, TidyAutoState,   ParseAutoBool,     autoBoolPicks   },
317   { TidyAnchorAsName,            MU, "anchor-as-name",              BL, yes,             ParseBool,         boolPicks       },
318   { N_TIDY_OPTIONS,              XX, NULL,                          XY, 0,               NULL,              NULL            }
319 };
320 
321 /* Should only be called by options set by name
322 ** thus, it is cheaper to do a few scans than set
323 ** up every option in a hash table.
324 */
TY_(lookupOption)325 const TidyOptionImpl* TY_(lookupOption)( ctmbstr s )
326 {
327     const TidyOptionImpl* np = option_defs;
328     for ( /**/; np < option_defs + N_TIDY_OPTIONS; ++np )
329     {
330         if ( TY_(tmbstrcasecmp)(s, np->name) == 0 )
331             return np;
332     }
333     return NULL;
334 }
335 
TY_(getOption)336 const TidyOptionImpl* TY_(getOption)( TidyOptionId optId )
337 {
338   if ( optId < N_TIDY_OPTIONS )
339       return option_defs + optId;
340   return NULL;
341 }
342 
343 
FreeOptionValue(TidyDocImpl * doc,const TidyOptionImpl * option,TidyOptionValue * value)344 static void FreeOptionValue( TidyDocImpl* doc, const TidyOptionImpl* option, TidyOptionValue* value )
345 {
346     if ( option->type == TidyString && value->p && value->p != option->pdflt )
347         TidyDocFree( doc, value->p );
348 }
349 
CopyOptionValue(TidyDocImpl * doc,const TidyOptionImpl * option,TidyOptionValue * oldval,const TidyOptionValue * newval)350 static void CopyOptionValue( TidyDocImpl* doc, const TidyOptionImpl* option,
351                              TidyOptionValue* oldval, const TidyOptionValue* newval )
352 {
353     assert( oldval != NULL );
354     FreeOptionValue( doc, option, oldval );
355 
356     if ( option->type == TidyString )
357     {
358         if ( newval->p && newval->p != option->pdflt )
359             oldval->p = TY_(tmbstrdup)( doc->allocator, newval->p );
360         else
361             oldval->p = newval->p;
362     }
363     else
364         oldval->v = newval->v;
365 }
366 
367 
SetOptionValue(TidyDocImpl * doc,TidyOptionId optId,ctmbstr val)368 static Bool SetOptionValue( TidyDocImpl* doc, TidyOptionId optId, ctmbstr val )
369 {
370    const TidyOptionImpl* option = &option_defs[ optId ];
371    Bool status = ( optId < N_TIDY_OPTIONS );
372    if ( status )
373    {
374       assert( option->id == optId && option->type == TidyString );
375       FreeOptionValue( doc, option, &doc->config.value[ optId ] );
376       doc->config.value[ optId ].p = TY_(tmbstrdup)( doc->allocator, val );
377    }
378    return status;
379 }
380 
TY_(SetOptionInt)381 Bool TY_(SetOptionInt)( TidyDocImpl* doc, TidyOptionId optId, ulong val )
382 {
383    Bool status = ( optId < N_TIDY_OPTIONS );
384    if ( status )
385    {
386        assert( option_defs[ optId ].type == TidyInteger );
387        doc->config.value[ optId ].v = val;
388    }
389    return status;
390 }
391 
TY_(SetOptionBool)392 Bool TY_(SetOptionBool)( TidyDocImpl* doc, TidyOptionId optId, Bool val )
393 {
394    Bool status = ( optId < N_TIDY_OPTIONS );
395    if ( status )
396    {
397        assert( option_defs[ optId ].type == TidyBoolean );
398        doc->config.value[ optId ].v = val;
399    }
400    return status;
401 }
402 
GetOptionDefault(const TidyOptionImpl * option,TidyOptionValue * dflt)403 static void GetOptionDefault( const TidyOptionImpl* option,
404                               TidyOptionValue* dflt )
405 {
406     if ( option->type == TidyString )
407         dflt->p = (char*)option->pdflt;
408     else
409         dflt->v = option->dflt;
410 }
411 
OptionValueEqDefault(const TidyOptionImpl * option,const TidyOptionValue * val)412 static Bool OptionValueEqDefault( const TidyOptionImpl* option,
413                                   const TidyOptionValue* val )
414 {
415     return ( option->type == TidyString ) ?
416         val->p == option->pdflt :
417         val->v == option->dflt;
418 }
419 
TY_(ResetOptionToDefault)420 Bool TY_(ResetOptionToDefault)( TidyDocImpl* doc, TidyOptionId optId )
421 {
422     Bool status = ( optId > 0 && optId < N_TIDY_OPTIONS );
423     if ( status )
424     {
425         TidyOptionValue dflt;
426         const TidyOptionImpl* option = option_defs + optId;
427         TidyOptionValue* value = &doc->config.value[ optId ];
428         assert( optId == option->id );
429         GetOptionDefault( option, &dflt );
430         CopyOptionValue( doc, option, value, &dflt );
431     }
432     return status;
433 }
434 
ReparseTagType(TidyDocImpl * doc,TidyOptionId optId)435 static void ReparseTagType( TidyDocImpl* doc, TidyOptionId optId )
436 {
437     ctmbstr tagdecl = cfgStr( doc, optId );
438     tmbstr dupdecl = TY_(tmbstrdup)( doc->allocator, tagdecl );
439     TY_(ParseConfigValue)( doc, optId, dupdecl );
440     TidyDocFree( doc, dupdecl );
441 }
442 
OptionValueIdentical(const TidyOptionImpl * option,const TidyOptionValue * val1,const TidyOptionValue * val2)443 static Bool OptionValueIdentical( const TidyOptionImpl* option,
444                                   const TidyOptionValue* val1,
445                                   const TidyOptionValue* val2 )
446 {
447     if ( option->type == TidyString )
448     {
449         if ( val1->p == val2->p )
450             return yes;
451         if ( !val1->p || !val2->p )
452             return no;
453         return TY_(tmbstrcmp)( val1->p, val2->p ) == 0;
454     }
455     else
456         return val1->v == val2->v;
457 }
458 
NeedReparseTagDecls(const TidyOptionValue * current,const TidyOptionValue * new,uint * changedUserTags)459 static Bool NeedReparseTagDecls( const TidyOptionValue* current,
460                                  const TidyOptionValue* new,
461                                  uint *changedUserTags )
462 {
463     Bool ret = no;
464     uint ixVal;
465     const TidyOptionImpl* option = option_defs;
466     *changedUserTags = tagtype_null;
467 
468     for ( ixVal=0; ixVal < N_TIDY_OPTIONS; ++option, ++ixVal )
469     {
470         assert( ixVal == (uint) option->id );
471         switch (option->id)
472         {
473 #define TEST_USERTAGS(USERTAGOPTION,USERTAGTYPE) \
474         case USERTAGOPTION: \
475             if (!OptionValueIdentical(option,&current[ixVal],&new[ixVal])) \
476             { \
477                 *changedUserTags |= USERTAGTYPE; \
478                 ret = yes; \
479             } \
480             break
481             TEST_USERTAGS(TidyInlineTags,tagtype_inline);
482             TEST_USERTAGS(TidyBlockTags,tagtype_block);
483             TEST_USERTAGS(TidyEmptyTags,tagtype_empty);
484             TEST_USERTAGS(TidyPreTags,tagtype_pre);
485         default:
486             break;
487         }
488     }
489     return ret;
490 }
491 
ReparseTagDecls(TidyDocImpl * doc,uint changedUserTags)492 static void ReparseTagDecls( TidyDocImpl* doc, uint changedUserTags  )
493 {
494 #define REPARSE_USERTAGS(USERTAGOPTION,USERTAGTYPE) \
495     if ( changedUserTags & USERTAGTYPE ) \
496     { \
497         TY_(FreeDeclaredTags)( doc, USERTAGTYPE ); \
498         ReparseTagType( doc, USERTAGOPTION ); \
499     }
500     REPARSE_USERTAGS(TidyInlineTags,tagtype_inline);
501     REPARSE_USERTAGS(TidyBlockTags,tagtype_block);
502     REPARSE_USERTAGS(TidyEmptyTags,tagtype_empty);
503     REPARSE_USERTAGS(TidyPreTags,tagtype_pre);
504 }
505 
TY_(ResetConfigToDefault)506 void TY_(ResetConfigToDefault)( TidyDocImpl* doc )
507 {
508     uint ixVal;
509     const TidyOptionImpl* option = option_defs;
510     TidyOptionValue* value = &doc->config.value[ 0 ];
511     for ( ixVal=0; ixVal < N_TIDY_OPTIONS; ++option, ++ixVal )
512     {
513         TidyOptionValue dflt;
514         assert( ixVal == (uint) option->id );
515         GetOptionDefault( option, &dflt );
516         CopyOptionValue( doc, option, &value[ixVal], &dflt );
517     }
518     TY_(FreeDeclaredTags)( doc, tagtype_null );
519 }
520 
TY_(TakeConfigSnapshot)521 void TY_(TakeConfigSnapshot)( TidyDocImpl* doc )
522 {
523     uint ixVal;
524     const TidyOptionImpl* option = option_defs;
525     const TidyOptionValue* value = &doc->config.value[ 0 ];
526     TidyOptionValue* snap  = &doc->config.snapshot[ 0 ];
527 
528     AdjustConfig( doc );  /* Make sure it's consistent */
529     for ( ixVal=0; ixVal < N_TIDY_OPTIONS; ++option, ++ixVal )
530     {
531         assert( ixVal == (uint) option->id );
532         CopyOptionValue( doc, option, &snap[ixVal], &value[ixVal] );
533     }
534 }
535 
TY_(ResetConfigToSnapshot)536 void TY_(ResetConfigToSnapshot)( TidyDocImpl* doc )
537 {
538     uint ixVal;
539     const TidyOptionImpl* option = option_defs;
540     TidyOptionValue* value = &doc->config.value[ 0 ];
541     const TidyOptionValue* snap  = &doc->config.snapshot[ 0 ];
542     uint changedUserTags;
543     Bool needReparseTagsDecls = NeedReparseTagDecls( value, snap,
544                                                      &changedUserTags );
545 
546     for ( ixVal=0; ixVal < N_TIDY_OPTIONS; ++option, ++ixVal )
547     {
548         assert( ixVal == (uint) option->id );
549         CopyOptionValue( doc, option, &value[ixVal], &snap[ixVal] );
550     }
551     if ( needReparseTagsDecls )
552         ReparseTagDecls( doc, changedUserTags );
553 }
554 
TY_(CopyConfig)555 void TY_(CopyConfig)( TidyDocImpl* docTo, TidyDocImpl* docFrom )
556 {
557     if ( docTo != docFrom )
558     {
559         uint ixVal;
560         const TidyOptionImpl* option = option_defs;
561         const TidyOptionValue* from = &docFrom->config.value[ 0 ];
562         TidyOptionValue* to   = &docTo->config.value[ 0 ];
563         uint changedUserTags;
564         Bool needReparseTagsDecls = NeedReparseTagDecls( to, from,
565                                                          &changedUserTags );
566 
567         TY_(TakeConfigSnapshot)( docTo );
568         for ( ixVal=0; ixVal < N_TIDY_OPTIONS; ++option, ++ixVal )
569         {
570             assert( ixVal == (uint) option->id );
571             CopyOptionValue( docTo, option, &to[ixVal], &from[ixVal] );
572         }
573         if ( needReparseTagsDecls )
574             ReparseTagDecls( docTo, changedUserTags  );
575         AdjustConfig( docTo );  /* Make sure it's consistent */
576     }
577 }
578 
579 
580 #ifdef _DEBUG
581 
582 /* Debug accessor functions will be type-safe and assert option type match */
TY_(_cfgGet)583 ulong   TY_(_cfgGet)( TidyDocImpl* doc, TidyOptionId optId )
584 {
585   assert( optId < N_TIDY_OPTIONS );
586   return doc->config.value[ optId ].v;
587 }
588 
TY_(_cfgGetBool)589 Bool    TY_(_cfgGetBool)( TidyDocImpl* doc, TidyOptionId optId )
590 {
591   ulong val = TY_(_cfgGet)( doc, optId );
592   const TidyOptionImpl* opt = &option_defs[ optId ];
593   assert( opt && opt->type == TidyBoolean );
594   return (Bool) val;
595 }
596 
TY_(_cfgGetAutoBool)597 TidyTriState    TY_(_cfgGetAutoBool)( TidyDocImpl* doc, TidyOptionId optId )
598 {
599   ulong val = TY_(_cfgGet)( doc, optId );
600   const TidyOptionImpl* opt = &option_defs[ optId ];
601   assert( opt && opt->type == TidyInteger
602           && opt->parser == ParseAutoBool );
603   return (TidyTriState) val;
604 }
605 
TY_(_cfgGetString)606 ctmbstr TY_(_cfgGetString)( TidyDocImpl* doc, TidyOptionId optId )
607 {
608   const TidyOptionImpl* opt;
609 
610   assert( optId < N_TIDY_OPTIONS );
611   opt = &option_defs[ optId ];
612   assert( opt && opt->type == TidyString );
613   return doc->config.value[ optId ].p;
614 }
615 #endif
616 
617 
618 #if 0
619 /* for use with Gnu Emacs */
620 void SetEmacsFilename( TidyDocImpl* doc, ctmbstr filename )
621 {
622     SetOptionValue( doc, TidyEmacsFile, filename );
623 }
624 #endif
625 
GetC(TidyConfigImpl * config)626 static tchar GetC( TidyConfigImpl* config )
627 {
628     if ( config->cfgIn )
629         return TY_(ReadChar)( config->cfgIn );
630     return EndOfStream;
631 }
632 
FirstChar(TidyConfigImpl * config)633 static tchar FirstChar( TidyConfigImpl* config )
634 {
635     config->c = GetC( config );
636     return config->c;
637 }
638 
AdvanceChar(TidyConfigImpl * config)639 static tchar AdvanceChar( TidyConfigImpl* config )
640 {
641     if ( config->c != EndOfStream )
642         config->c = GetC( config );
643     return config->c;
644 }
645 
SkipWhite(TidyConfigImpl * config)646 static tchar SkipWhite( TidyConfigImpl* config )
647 {
648     while ( TY_(IsWhite)(config->c) && !TY_(IsNewline)(config->c) )
649         config->c = GetC( config );
650     return config->c;
651 }
652 
653 /* skip until end of line
654 static tchar SkipToEndofLine( TidyConfigImpl* config )
655 {
656     while ( config->c != EndOfStream )
657     {
658         config->c = GetC( config );
659         if ( config->c == '\n' || config->c == '\r' )
660             break;
661     }
662     return config->c;
663 }
664 */
665 
666 /*
667  skip over line continuations
668  to start of next property
669 */
NextProperty(TidyConfigImpl * config)670 static uint NextProperty( TidyConfigImpl* config )
671 {
672     do
673     {
674         /* skip to end of line */
675         while ( config->c != '\n' &&  config->c != '\r' &&  config->c != EndOfStream )
676              config->c = GetC( config );
677 
678         /* treat  \r\n   \r  or  \n as line ends */
679         if ( config->c == '\r' )
680              config->c = GetC( config );
681 
682         if ( config->c == '\n' )
683             config->c = GetC( config );
684     }
685     while ( TY_(IsWhite)(config->c) );  /* line continuation? */
686 
687     return config->c;
688 }
689 
690 /*
691  Todd Lewis contributed this code for expanding
692  ~/foo or ~your/foo according to $HOME and your
693  user name. This will work partially on any system
694  which defines $HOME.  Support for ~user/foo will
695  work on systems that support getpwnam(userid),
696  namely Unix/Linux.
697 */
ExpandTilde(TidyDocImpl * doc,ctmbstr filename)698 static ctmbstr ExpandTilde( TidyDocImpl* doc, ctmbstr filename )
699 {
700     char *home_dir = NULL;
701 
702     if ( !filename )
703         return NULL;
704 
705     if ( filename[0] != '~' )
706         return filename;
707 
708     if (filename[1] == '/')
709     {
710         home_dir = getenv("HOME");
711         if ( home_dir )
712             ++filename;
713     }
714 #ifdef SUPPORT_GETPWNAM
715     else
716     {
717         struct passwd *passwd = NULL;
718         ctmbstr s = filename + 1;
719         tmbstr t;
720 
721         while ( *s && *s != '/' )
722             s++;
723 
724         if ( t = TidyDocAlloc(doc, s - filename) )
725         {
726             memcpy(t, filename+1, s-filename-1);
727             t[s-filename-1] = 0;
728 
729             passwd = getpwnam(t);
730 
731             TidyDocFree(doc, t);
732         }
733 
734         if ( passwd )
735         {
736             filename = s;
737             home_dir = passwd->pw_dir;
738         }
739     }
740 #endif /* SUPPORT_GETPWNAM */
741 
742     if ( home_dir )
743     {
744         uint len = TY_(tmbstrlen)(filename) + TY_(tmbstrlen)(home_dir) + 1;
745         tmbstr p = (tmbstr)TidyDocAlloc( doc, len );
746         TY_(tmbstrcpy)( p, home_dir );
747         TY_(tmbstrcat)( p, filename );
748         return (ctmbstr) p;
749     }
750     return (ctmbstr) filename;
751 }
752 
tidyFileExists(TidyDoc tdoc,ctmbstr filename)753 Bool TIDY_CALL tidyFileExists( TidyDoc tdoc, ctmbstr filename )
754 {
755   TidyDocImpl* doc = tidyDocToImpl( tdoc );
756   ctmbstr fname = (tmbstr) ExpandTilde( doc, filename );
757 #ifndef NO_ACCESS_SUPPORT
758   Bool exists = ( access(fname, 0) == 0 );
759 #else
760   Bool exists;
761   /* at present */
762   FILE* fin = fopen(fname, "r");
763   if (fin != NULL)
764       fclose(fin);
765   exists = ( fin != NULL );
766 #endif
767   if ( fname != filename )
768       TidyDocFree( doc, (tmbstr) fname );
769   return exists;
770 }
771 
772 
773 #ifndef TIDY_MAX_NAME
774 #define TIDY_MAX_NAME 64
775 #endif
776 
TY_(ParseConfigFile)777 int TY_(ParseConfigFile)( TidyDocImpl* doc, ctmbstr file )
778 {
779     return TY_(ParseConfigFileEnc)( doc, file, "ascii" );
780 }
781 
782 /* open the file and parse its contents
783 */
TY_(ParseConfigFileEnc)784 int TY_(ParseConfigFileEnc)( TidyDocImpl* doc, ctmbstr file, ctmbstr charenc )
785 {
786     uint opterrs = doc->optionErrors;
787     tmbstr fname = (tmbstr) ExpandTilde( doc, file );
788     TidyConfigImpl* cfg = &doc->config;
789     FILE* fin = fopen( fname, "r" );
790     int enc = TY_(CharEncodingId)( doc, charenc );
791 
792     if ( fin == NULL || enc < 0 )
793     {
794         TY_(FileError)( doc, fname, TidyConfig );
795         return -1;
796     }
797     else
798     {
799         tchar c;
800         cfg->cfgIn = TY_(FileInput)( doc, fin, enc );
801         c = FirstChar( cfg );
802 
803         for ( c = SkipWhite(cfg); c != EndOfStream; c = NextProperty(cfg) )
804         {
805             uint ix = 0;
806             tmbchar name[ TIDY_MAX_NAME ] = {0};
807 
808             /* // or # start a comment */
809             if ( c == '/' || c == '#' )
810                 continue;
811 
812             while ( ix < sizeof(name)-1 && c != '\n' && c != EndOfStream && c != ':' )
813             {
814                 name[ ix++ ] = (tmbchar) c;  /* Option names all ASCII */
815                 c = AdvanceChar( cfg );
816             }
817 
818             if ( c == ':' )
819             {
820                 const TidyOptionImpl* option = TY_(lookupOption)( name );
821                 c = AdvanceChar( cfg );
822                 if ( option )
823                     option->parser( doc, option );
824                 else
825                 {
826                     if (NULL != doc->pOptCallback)
827                     {
828                         TidyConfigImpl* cfg = &doc->config;
829                         tmbchar buf[8192];
830                         uint i = 0;
831                         tchar delim = 0;
832                         Bool waswhite = yes;
833 
834                         tchar c = SkipWhite( cfg );
835 
836                         if ( c == '"' || c == '\'' )
837                         {
838                             delim = c;
839                             c = AdvanceChar( cfg );
840                         }
841 
842                         while ( i < sizeof(buf)-2 && c != EndOfStream && c != '\r' && c != '\n' )
843                         {
844                             if ( delim && c == delim )
845                                 break;
846 
847                             if ( TY_(IsWhite)(c) )
848                             {
849                                 if ( waswhite )
850                                 {
851                                     c = AdvanceChar( cfg );
852                                     continue;
853                                 }
854                                 c = ' ';
855                             }
856                             else
857                                 waswhite = no;
858 
859                             buf[i++] = (tmbchar) c;
860                             c = AdvanceChar( cfg );
861                         }
862                         buf[i] = '\0';
863                         if (no == (*doc->pOptCallback)( name, buf ))
864                             TY_(ReportUnknownOption)( doc, name );
865                     }
866                     else
867                         TY_(ReportUnknownOption)( doc, name );
868                 }
869             }
870         }
871 
872         TY_(freeFileSource)(&cfg->cfgIn->source, yes);
873         TY_(freeStreamIn)( cfg->cfgIn );
874         cfg->cfgIn = NULL;
875     }
876 
877     if ( fname != (tmbstr) file )
878         TidyDocFree( doc, fname );
879 
880     AdjustConfig( doc );
881 
882     /* any new config errors? If so, return warning status. */
883     return (doc->optionErrors > opterrs ? 1 : 0);
884 }
885 
886 /* returns false if unknown option, missing parameter,
887 ** or option doesn't use parameter
888 */
TY_(ParseConfigOption)889 Bool TY_(ParseConfigOption)( TidyDocImpl* doc, ctmbstr optnam, ctmbstr optval )
890 {
891     const TidyOptionImpl* option = TY_(lookupOption)( optnam );
892     Bool status = ( option != NULL );
893     if ( !status )
894     {
895         /* Not a standard tidy option.  Check to see if the user application
896            recognizes it  */
897         if (NULL != doc->pOptCallback)
898             status = (*doc->pOptCallback)( optnam, optval );
899         if (!status)
900             TY_(ReportUnknownOption)( doc, optnam );
901     }
902     else
903         status = TY_(ParseConfigValue)( doc, option->id, optval );
904     return status;
905 }
906 
907 /* returns false if unknown option, missing parameter,
908 ** or option doesn't use parameter
909 */
TY_(ParseConfigValue)910 Bool TY_(ParseConfigValue)( TidyDocImpl* doc, TidyOptionId optId, ctmbstr optval )
911 {
912     const TidyOptionImpl* option = option_defs + optId;
913     Bool status = ( optId < N_TIDY_OPTIONS && optval != NULL );
914 
915     if ( !status )
916         TY_(ReportBadArgument)( doc, option->name );
917     else
918     {
919         TidyBuffer inbuf;            /* Set up input source */
920         tidyBufInitWithAllocator( &inbuf, doc->allocator );
921         tidyBufAttach( &inbuf, (byte*)optval, TY_(tmbstrlen)(optval)+1 );
922         doc->config.cfgIn = TY_(BufferInput)( doc, &inbuf, ASCII );
923         doc->config.c = GetC( &doc->config );
924 
925         status = option->parser( doc, option );
926 
927         TY_(freeStreamIn)(doc->config.cfgIn);  /* Release input source */
928         doc->config.cfgIn  = NULL;
929         tidyBufDetach( &inbuf );
930     }
931     return status;
932 }
933 
934 
935 /* ensure that char encodings are self consistent */
TY_(AdjustCharEncoding)936 Bool  TY_(AdjustCharEncoding)( TidyDocImpl* doc, int encoding )
937 {
938     int outenc = -1;
939     int inenc = -1;
940 
941     switch( encoding )
942     {
943     case MACROMAN:
944         inenc = MACROMAN;
945         outenc = ASCII;
946         break;
947 
948     case WIN1252:
949         inenc = WIN1252;
950         outenc = ASCII;
951         break;
952 
953     case IBM858:
954         inenc = IBM858;
955         outenc = ASCII;
956         break;
957 
958     case ASCII:
959         inenc = LATIN1;
960         outenc = ASCII;
961         break;
962 
963     case LATIN0:
964         inenc = LATIN0;
965         outenc = ASCII;
966         break;
967 
968     case RAW:
969     case LATIN1:
970     case UTF8:
971 #ifndef NO_NATIVE_ISO2022_SUPPORT
972     case ISO2022:
973 #endif
974 
975 #if SUPPORT_UTF16_ENCODINGS
976     case UTF16LE:
977     case UTF16BE:
978     case UTF16:
979 #endif
980 #if SUPPORT_ASIAN_ENCODINGS
981     case SHIFTJIS:
982     case BIG5:
983 #endif
984         inenc = outenc = encoding;
985         break;
986     }
987 
988     if ( inenc >= 0 )
989     {
990         TY_(SetOptionInt)( doc, TidyCharEncoding, encoding );
991         TY_(SetOptionInt)( doc, TidyInCharEncoding, inenc );
992         TY_(SetOptionInt)( doc, TidyOutCharEncoding, outenc );
993         return yes;
994     }
995     return no;
996 }
997 
998 /* ensure that config is self consistent */
AdjustConfig(TidyDocImpl * doc)999 void AdjustConfig( TidyDocImpl* doc )
1000 {
1001     if ( cfgBool(doc, TidyEncloseBlockText) )
1002         TY_(SetOptionBool)( doc, TidyEncloseBodyText, yes );
1003 
1004     if ( cfgAutoBool(doc, TidyIndentContent) == TidyNoState )
1005         TY_(SetOptionInt)( doc, TidyIndentSpaces, 0 );
1006 
1007     /* disable wrapping */
1008     if ( cfg(doc, TidyWrapLen) == 0 )
1009         TY_(SetOptionInt)( doc, TidyWrapLen, 0x7FFFFFFF );
1010 
1011     /* Word 2000 needs o:p to be declared as inline */
1012     if ( cfgBool(doc, TidyWord2000) )
1013     {
1014         doc->config.defined_tags |= tagtype_inline;
1015         TY_(DefineTag)( doc, tagtype_inline, "o:p" );
1016     }
1017 
1018     /* #480701 disable XHTML output flag if both output-xhtml and xml input are set */
1019     if ( cfgBool(doc, TidyXmlTags) )
1020         TY_(SetOptionBool)( doc, TidyXhtmlOut, no );
1021 
1022     /* XHTML is written in lower case */
1023     if ( cfgBool(doc, TidyXhtmlOut) )
1024     {
1025         TY_(SetOptionBool)( doc, TidyXmlOut, yes );
1026         TY_(SetOptionBool)( doc, TidyUpperCaseTags, no );
1027         TY_(SetOptionBool)( doc, TidyUpperCaseAttrs, no );
1028         /* TY_(SetOptionBool)( doc, TidyXmlPIs, yes ); */
1029     }
1030 
1031     /* if XML in, then XML out */
1032     if ( cfgBool(doc, TidyXmlTags) )
1033     {
1034         TY_(SetOptionBool)( doc, TidyXmlOut, yes );
1035         TY_(SetOptionBool)( doc, TidyXmlPIs, yes );
1036     }
1037 
1038     /* #427837 - fix by Dave Raggett 02 Jun 01
1039     ** generate <?xml version="1.0" encoding="iso-8859-1"?>
1040     ** if the output character encoding is Latin-1 etc.
1041     */
1042     if ( cfg(doc, TidyOutCharEncoding) != ASCII &&
1043          cfg(doc, TidyOutCharEncoding) != UTF8 &&
1044 #if SUPPORT_UTF16_ENCODINGS
1045          cfg(doc, TidyOutCharEncoding) != UTF16 &&
1046          cfg(doc, TidyOutCharEncoding) != UTF16BE &&
1047          cfg(doc, TidyOutCharEncoding) != UTF16LE &&
1048 #endif
1049          cfg(doc, TidyOutCharEncoding) != RAW &&
1050          cfgBool(doc, TidyXmlOut) )
1051     {
1052         TY_(SetOptionBool)( doc, TidyXmlDecl, yes );
1053     }
1054 
1055     /* XML requires end tags */
1056     if ( cfgBool(doc, TidyXmlOut) )
1057     {
1058 #if SUPPORT_UTF16_ENCODINGS
1059         /* XML requires a BOM on output if using UTF-16 encoding */
1060         ulong enc = cfg( doc, TidyOutCharEncoding );
1061         if ( enc == UTF16LE || enc == UTF16BE || enc == UTF16 )
1062             TY_(SetOptionInt)( doc, TidyOutputBOM, yes );
1063 #endif
1064         TY_(SetOptionBool)( doc, TidyQuoteAmpersand, yes );
1065         TY_(SetOptionBool)( doc, TidyHideEndTags, no );
1066     }
1067 }
1068 
1069 /* unsigned integers */
ParseInt(TidyDocImpl * doc,const TidyOptionImpl * entry)1070 Bool ParseInt( TidyDocImpl* doc, const TidyOptionImpl* entry )
1071 {
1072     ulong number = 0;
1073     Bool digits = no;
1074     TidyConfigImpl* cfg = &doc->config;
1075     tchar c = SkipWhite( cfg );
1076 
1077     while ( TY_(IsDigit)(c) )
1078     {
1079         number = c - '0' + (10 * number);
1080         digits = yes;
1081         c = AdvanceChar( cfg );
1082     }
1083 
1084     if ( !digits )
1085         TY_(ReportBadArgument)( doc, entry->name );
1086     else
1087         TY_(SetOptionInt)( doc, entry->id, number );
1088     return digits;
1089 }
1090 
1091 /* true/false or yes/no or 0/1 or "auto" only looks at 1st char */
ParseTriState(TidyTriState theState,TidyDocImpl * doc,const TidyOptionImpl * entry,ulong * flag)1092 static Bool ParseTriState( TidyTriState theState, TidyDocImpl* doc,
1093                            const TidyOptionImpl* entry, ulong* flag )
1094 {
1095     TidyConfigImpl* cfg = &doc->config;
1096     tchar c = SkipWhite( cfg );
1097 
1098     if (c == 't' || c == 'T' || c == 'y' || c == 'Y' || c == '1')
1099         *flag = yes;
1100     else if (c == 'f' || c == 'F' || c == 'n' || c == 'N' || c == '0')
1101         *flag = no;
1102     else if (theState == TidyAutoState && (c == 'a' || c =='A'))
1103         *flag = TidyAutoState;
1104     else
1105     {
1106         TY_(ReportBadArgument)( doc, entry->name );
1107         return no;
1108     }
1109 
1110     return yes;
1111 }
1112 
1113 /* cr, lf or crlf */
ParseNewline(TidyDocImpl * doc,const TidyOptionImpl * entry)1114 Bool ParseNewline( TidyDocImpl* doc, const TidyOptionImpl* entry )
1115 {
1116     int nl = -1;
1117     tmbchar work[ 16 ] = {0};
1118     tmbstr cp = work, end = work + sizeof(work);
1119     TidyConfigImpl* cfg = &doc->config;
1120     tchar c = SkipWhite( cfg );
1121 
1122     while ( c!=EndOfStream && cp < end && !TY_(IsWhite)(c) && c != '\r' && c != '\n' )
1123     {
1124         *cp++ = (tmbchar) c;
1125         c = AdvanceChar( cfg );
1126     }
1127     *cp = 0;
1128 
1129     if ( TY_(tmbstrcasecmp)(work, "lf") == 0 )
1130         nl = TidyLF;
1131     else if ( TY_(tmbstrcasecmp)(work, "crlf") == 0 )
1132         nl = TidyCRLF;
1133     else if ( TY_(tmbstrcasecmp)(work, "cr") == 0 )
1134         nl = TidyCR;
1135 
1136     if ( nl < TidyLF || nl > TidyCR )
1137         TY_(ReportBadArgument)( doc, entry->name );
1138     else
1139         TY_(SetOptionInt)( doc, entry->id, nl );
1140     return ( nl >= TidyLF && nl <= TidyCR );
1141 }
1142 
ParseBool(TidyDocImpl * doc,const TidyOptionImpl * entry)1143 Bool ParseBool( TidyDocImpl* doc, const TidyOptionImpl* entry )
1144 {
1145     ulong flag = 0;
1146     Bool status = ParseTriState( TidyNoState, doc, entry, &flag );
1147     if ( status )
1148         TY_(SetOptionBool)( doc, entry->id, flag != 0 );
1149     return status;
1150 }
1151 
ParseAutoBool(TidyDocImpl * doc,const TidyOptionImpl * entry)1152 Bool ParseAutoBool( TidyDocImpl* doc, const TidyOptionImpl* entry )
1153 {
1154     ulong flag = 0;
1155     Bool status = ParseTriState( TidyAutoState, doc, entry, &flag );
1156     if ( status )
1157         TY_(SetOptionInt)( doc, entry->id, flag );
1158     return status;
1159 }
1160 
1161 /* a string excluding whitespace */
ParseName(TidyDocImpl * doc,const TidyOptionImpl * option)1162 Bool ParseName( TidyDocImpl* doc, const TidyOptionImpl* option )
1163 {
1164     tmbchar buf[ 1024 ] = {0};
1165     uint i = 0;
1166     uint c = SkipWhite( &doc->config );
1167 
1168     while ( i < sizeof(buf)-2 && c != EndOfStream && !TY_(IsWhite)(c) )
1169     {
1170         buf[i++] = (tmbchar) c;
1171         c = AdvanceChar( &doc->config );
1172     }
1173     buf[i] = 0;
1174 
1175     if ( i == 0 )
1176         TY_(ReportBadArgument)( doc, option->name );
1177     else
1178         SetOptionValue( doc, option->id, buf );
1179     return ( i > 0 );
1180 }
1181 
1182 /* #508936 - CSS class naming for -clean option */
ParseCSS1Selector(TidyDocImpl * doc,const TidyOptionImpl * option)1183 Bool ParseCSS1Selector( TidyDocImpl* doc, const TidyOptionImpl* option )
1184 {
1185     char buf[256] = {0};
1186     uint i = 0;
1187     uint c = SkipWhite( &doc->config );
1188 
1189     while ( i < sizeof(buf)-2 && c != EndOfStream && !TY_(IsWhite)(c) )
1190     {
1191         buf[i++] = (tmbchar) c;
1192         c = AdvanceChar( &doc->config );
1193     }
1194     buf[i] = '\0';
1195 
1196     if ( i == 0 || !TY_(IsCSS1Selector)(buf) ) {
1197         TY_(ReportBadArgument)( doc, option->name );
1198         return no;
1199     }
1200 
1201     buf[i++] = '-';  /* Make sure any escaped Unicode is terminated */
1202     buf[i] = 0;      /* so valid class names are generated after */
1203                      /* Tidy appends last digits. */
1204 
1205     SetOptionValue( doc, option->id, buf );
1206     return yes;
1207 }
1208 
1209 /* Coordinates Config update and Tags data */
DeclareUserTag(TidyDocImpl * doc,TidyOptionId optId,UserTagType tagType,ctmbstr name)1210 static void DeclareUserTag( TidyDocImpl* doc, TidyOptionId optId,
1211                             UserTagType tagType, ctmbstr name )
1212 {
1213   ctmbstr prvval = cfgStr( doc, optId );
1214   tmbstr catval = NULL;
1215   ctmbstr theval = name;
1216   if ( prvval )
1217   {
1218     uint len = TY_(tmbstrlen)(name) + TY_(tmbstrlen)(prvval) + 3;
1219     catval = TY_(tmbstrndup)( doc->allocator, prvval, len );
1220     TY_(tmbstrcat)( catval, ", " );
1221     TY_(tmbstrcat)( catval, name );
1222     theval = catval;
1223   }
1224   TY_(DefineTag)( doc, tagType, name );
1225   SetOptionValue( doc, optId, theval );
1226   if ( catval )
1227     TidyDocFree( doc, catval );
1228 }
1229 
1230 /* a space or comma separated list of tag names */
ParseTagNames(TidyDocImpl * doc,const TidyOptionImpl * option)1231 Bool ParseTagNames( TidyDocImpl* doc, const TidyOptionImpl* option )
1232 {
1233     TidyConfigImpl* cfg = &doc->config;
1234     tmbchar buf[1024];
1235     uint i = 0, nTags = 0;
1236     uint c = SkipWhite( cfg );
1237     UserTagType ttyp = tagtype_null;
1238 
1239     switch ( option->id )
1240     {
1241     case TidyInlineTags:  ttyp = tagtype_inline;    break;
1242     case TidyBlockTags:   ttyp = tagtype_block;     break;
1243     case TidyEmptyTags:   ttyp = tagtype_empty;     break;
1244     case TidyPreTags:     ttyp = tagtype_pre;       break;
1245     default:
1246        TY_(ReportUnknownOption)( doc, option->name );
1247        return no;
1248     }
1249 
1250     SetOptionValue( doc, option->id, NULL );
1251     TY_(FreeDeclaredTags)( doc, ttyp );
1252     cfg->defined_tags |= ttyp;
1253 
1254     do
1255     {
1256         if (c == ' ' || c == '\t' || c == ',')
1257         {
1258             c = AdvanceChar( cfg );
1259             continue;
1260         }
1261 
1262         if ( c == '\r' || c == '\n' )
1263         {
1264             uint c2 = AdvanceChar( cfg );
1265             if ( c == '\r' && c2 == '\n' )
1266                 c = AdvanceChar( cfg );
1267             else
1268                 c = c2;
1269 
1270             if ( !TY_(IsWhite)(c) )
1271             {
1272                 buf[i] = 0;
1273                 TY_(UngetChar)( c, cfg->cfgIn );
1274                 TY_(UngetChar)( '\n', cfg->cfgIn );
1275                 break;
1276             }
1277         }
1278 
1279         /*
1280         if ( c == '\n' )
1281         {
1282             c = AdvanceChar( cfg );
1283             if ( !TY_(IsWhite)(c) )
1284             {
1285                 buf[i] = 0;
1286                 TY_(UngetChar)( c, cfg->cfgIn );
1287                 TY_(UngetChar)( '\n', cfg->cfgIn );
1288                 break;
1289             }
1290         }
1291         */
1292 
1293         while ( i < sizeof(buf)-2 && c != EndOfStream && !TY_(IsWhite)(c) && c != ',' )
1294         {
1295             buf[i++] = (tmbchar) c;
1296             c = AdvanceChar( cfg );
1297         }
1298 
1299         buf[i] = '\0';
1300         if (i == 0)          /* Skip empty tag definition.  Possible when */
1301             continue;        /* there is a trailing space on the line. */
1302 
1303         /* add tag to dictionary */
1304         DeclareUserTag( doc, option->id, ttyp, buf );
1305         i = 0;
1306         ++nTags;
1307     }
1308     while ( c != EndOfStream );
1309 
1310     if ( i > 0 )
1311       DeclareUserTag( doc, option->id, ttyp, buf );
1312     return ( nTags > 0 );
1313 }
1314 
1315 /* a string including whitespace */
1316 /* munges whitespace sequences */
1317 
ParseString(TidyDocImpl * doc,const TidyOptionImpl * option)1318 Bool ParseString( TidyDocImpl* doc, const TidyOptionImpl* option )
1319 {
1320     TidyConfigImpl* cfg = &doc->config;
1321     tmbchar buf[8192];
1322     uint i = 0;
1323     tchar delim = 0;
1324     Bool waswhite = yes;
1325 
1326     tchar c = SkipWhite( cfg );
1327 
1328     if ( c == '"' || c == '\'' )
1329     {
1330         delim = c;
1331         c = AdvanceChar( cfg );
1332     }
1333 
1334     while ( i < sizeof(buf)-2 && c != EndOfStream && c != '\r' && c != '\n' )
1335     {
1336         if ( delim && c == delim )
1337             break;
1338 
1339         if ( TY_(IsWhite)(c) )
1340         {
1341             if ( waswhite )
1342             {
1343                 c = AdvanceChar( cfg );
1344                 continue;
1345             }
1346             c = ' ';
1347         }
1348         else
1349             waswhite = no;
1350 
1351         buf[i++] = (tmbchar) c;
1352         c = AdvanceChar( cfg );
1353     }
1354     buf[i] = '\0';
1355 
1356     SetOptionValue( doc, option->id, buf );
1357     return yes;
1358 }
1359 
ParseCharEnc(TidyDocImpl * doc,const TidyOptionImpl * option)1360 Bool ParseCharEnc( TidyDocImpl* doc, const TidyOptionImpl* option )
1361 {
1362     tmbchar buf[64] = {0};
1363     uint i = 0;
1364     int enc = ASCII;
1365     Bool validEncoding = yes;
1366     tchar c = SkipWhite( &doc->config );
1367 
1368     while ( i < sizeof(buf)-2 && c != EndOfStream && !TY_(IsWhite)(c) )
1369     {
1370         buf[i++] = (tmbchar) TY_(ToLower)( c );
1371         c = AdvanceChar( &doc->config );
1372     }
1373     buf[i] = 0;
1374 
1375     enc = TY_(CharEncodingId)( doc, buf );
1376 
1377 #ifdef TIDY_WIN32_MLANG_SUPPORT
1378     /* limit support to --input-encoding */
1379     if (option->id != TidyInCharEncoding && enc > WIN32MLANG)
1380         enc = -1;
1381 #endif
1382 
1383     if ( enc < 0 )
1384     {
1385         validEncoding = no;
1386         TY_(ReportBadArgument)( doc, option->name );
1387     }
1388     else
1389         TY_(SetOptionInt)( doc, option->id, enc );
1390 
1391     if ( validEncoding && option->id == TidyCharEncoding )
1392         TY_(AdjustCharEncoding)( doc, enc );
1393     return validEncoding;
1394 }
1395 
1396 
TY_(CharEncodingId)1397 int TY_(CharEncodingId)( TidyDocImpl* ARG_UNUSED(doc), ctmbstr charenc )
1398 {
1399     int enc = TY_(GetCharEncodingFromOptName)( charenc );
1400 
1401 #ifdef TIDY_WIN32_MLANG_SUPPORT
1402     if (enc == -1)
1403     {
1404         uint wincp = TY_(Win32MLangGetCPFromName)(doc->allocator, charenc);
1405         if (wincp)
1406             enc = wincp;
1407     }
1408 #endif
1409 
1410     return enc;
1411 }
1412 
TY_(CharEncodingName)1413 ctmbstr TY_(CharEncodingName)( int encoding )
1414 {
1415     ctmbstr encodingName = TY_(GetEncodingNameFromTidyId)(encoding);
1416 
1417     if (!encodingName)
1418         encodingName = "unknown";
1419 
1420     return encodingName;
1421 }
1422 
TY_(CharEncodingOptName)1423 ctmbstr TY_(CharEncodingOptName)( int encoding )
1424 {
1425     ctmbstr encodingName = TY_(GetEncodingOptNameFromTidyId)(encoding);
1426 
1427     if (!encodingName)
1428         encodingName = "unknown";
1429 
1430     return encodingName;
1431 }
1432 
1433 /*
1434    doctype: omit | auto | strict | loose | <fpi>
1435 
1436    where the fpi is a string similar to
1437 
1438       "-//ACME//DTD HTML 3.14159//EN"
1439 */
ParseDocType(TidyDocImpl * doc,const TidyOptionImpl * option)1440 Bool ParseDocType( TidyDocImpl* doc, const TidyOptionImpl* option )
1441 {
1442     tmbchar buf[ 32 ] = {0};
1443     uint i = 0;
1444     Bool status = yes;
1445     TidyDoctypeModes dtmode = TidyDoctypeAuto;
1446 
1447     TidyConfigImpl* cfg = &doc->config;
1448     tchar c = SkipWhite( cfg );
1449 
1450     /* "-//ACME//DTD HTML 3.14159//EN" or similar */
1451 
1452     if ( c == '"' || c == '\'' )
1453     {
1454         status = ParseString(doc, option);
1455         if (status)
1456             TY_(SetOptionInt)( doc, TidyDoctypeMode, TidyDoctypeUser );
1457 
1458         return status;
1459     }
1460 
1461     /* read first word */
1462     while ( i < sizeof(buf)-1 && c != EndOfStream && !TY_(IsWhite)(c) )
1463     {
1464         buf[i++] = (tmbchar) c;
1465         c = AdvanceChar( cfg );
1466     }
1467     buf[i] = '\0';
1468 
1469     if ( TY_(tmbstrcasecmp)(buf, "auto") == 0 )
1470         dtmode = TidyDoctypeAuto;
1471     else if ( TY_(tmbstrcasecmp)(buf, "omit") == 0 )
1472         dtmode = TidyDoctypeOmit;
1473     else if ( TY_(tmbstrcasecmp)(buf, "strict") == 0 )
1474         dtmode = TidyDoctypeStrict;
1475     else if ( TY_(tmbstrcasecmp)(buf, "loose") == 0 ||
1476               TY_(tmbstrcasecmp)(buf, "transitional") == 0 )
1477         dtmode = TidyDoctypeLoose;
1478     else
1479     {
1480         TY_(ReportBadArgument)( doc, option->name );
1481         status = no;
1482     }
1483 
1484     if ( status )
1485         TY_(SetOptionInt)( doc, TidyDoctypeMode, dtmode );
1486     return status;
1487 }
1488 
ParseRepeatAttr(TidyDocImpl * doc,const TidyOptionImpl * option)1489 Bool ParseRepeatAttr( TidyDocImpl* doc, const TidyOptionImpl* option )
1490 {
1491     Bool status = yes;
1492     tmbchar buf[64] = {0};
1493     uint i = 0;
1494 
1495     TidyConfigImpl* cfg = &doc->config;
1496     tchar c = SkipWhite( cfg );
1497 
1498     while (i < sizeof(buf)-1 && c != EndOfStream && !TY_(IsWhite)(c))
1499     {
1500         buf[i++] = (tmbchar) c;
1501         c = AdvanceChar( cfg );
1502     }
1503     buf[i] = '\0';
1504 
1505     if ( TY_(tmbstrcasecmp)(buf, "keep-first") == 0 )
1506         cfg->value[ TidyDuplicateAttrs ].v = TidyKeepFirst;
1507     else if ( TY_(tmbstrcasecmp)(buf, "keep-last") == 0 )
1508         cfg->value[ TidyDuplicateAttrs ].v = TidyKeepLast;
1509     else
1510     {
1511         TY_(ReportBadArgument)( doc, option->name );
1512         status = no;
1513     }
1514     return status;
1515 }
1516 
ParseSorter(TidyDocImpl * doc,const TidyOptionImpl * option)1517 Bool ParseSorter( TidyDocImpl* doc, const TidyOptionImpl* option )
1518 {
1519     Bool status = yes;
1520     tmbchar buf[64] = {0};
1521     uint i = 0;
1522 
1523     TidyConfigImpl* cfg = &doc->config;
1524     tchar c = SkipWhite( cfg );
1525 
1526     while (i < sizeof(buf)-1 && c != EndOfStream && !TY_(IsWhite)(c))
1527     {
1528         buf[i++] = (tmbchar) c;
1529         c = AdvanceChar( cfg );
1530     }
1531     buf[i] = '\0';
1532 
1533     if ( TY_(tmbstrcasecmp)(buf, "alpha") == 0 )
1534         cfg->value[ TidySortAttributes ].v = TidySortAttrAlpha;
1535     else if ( TY_(tmbstrcasecmp)(buf, "none") == 0)
1536         cfg->value[ TidySortAttributes ].v = TidySortAttrNone;
1537     else
1538     {
1539         TY_(ReportBadArgument)( doc, option->name );
1540         status = no;
1541     }
1542     return status;
1543 }
1544 
1545 /* Use TidyOptionId as iterator.
1546 ** Send index of 1st option after TidyOptionUnknown as start of list.
1547 */
TY_(getOptionList)1548 TidyIterator TY_(getOptionList)( TidyDocImpl* ARG_UNUSED(doc) )
1549 {
1550     return (TidyIterator) (size_t)1;
1551 }
1552 
1553 /* Check if this item is last valid option.
1554 ** If so, zero out iterator.
1555 */
TY_(getNextOption)1556 const TidyOptionImpl*  TY_(getNextOption)( TidyDocImpl* ARG_UNUSED(doc),
1557                                            TidyIterator* iter )
1558 {
1559   const TidyOptionImpl* option = NULL;
1560   size_t optId;
1561   assert( iter != NULL );
1562   optId = (size_t) *iter;
1563   if ( optId > TidyUnknownOption && optId < N_TIDY_OPTIONS )
1564   {
1565     option = &option_defs[ optId ];
1566     optId++;
1567   }
1568   *iter = (TidyIterator) ( optId < N_TIDY_OPTIONS ? optId : (size_t)0 );
1569   return option;
1570 }
1571 
1572 /* Use a 1-based array index as iterator: 0 == end-of-list
1573 */
TY_(getOptionPickList)1574 TidyIterator TY_(getOptionPickList)( const TidyOptionImpl* option )
1575 {
1576     size_t ix = 0;
1577     if ( option && option->pickList )
1578         ix = 1;
1579     return (TidyIterator) ix;
1580 }
1581 
TY_(getNextOptionPick)1582 ctmbstr      TY_(getNextOptionPick)( const TidyOptionImpl* option,
1583                                      TidyIterator* iter )
1584 {
1585     size_t ix;
1586     ctmbstr val = NULL;
1587     assert( option!=NULL && iter != NULL );
1588 
1589     ix = (size_t) *iter;
1590     if ( ix > 0 && ix < 16 && option->pickList )
1591         val = option->pickList[ ix-1 ];
1592     *iter = (TidyIterator) ( val && option->pickList[ix] ? ix + 1 : (size_t)0 );
1593     return val;
1594 }
1595 
WriteOptionString(const TidyOptionImpl * option,ctmbstr sval,StreamOut * out)1596 static int  WriteOptionString( const TidyOptionImpl* option,
1597                                ctmbstr sval, StreamOut* out )
1598 {
1599   ctmbstr cp = option->name;
1600   while ( *cp )
1601       TY_(WriteChar)( *cp++, out );
1602   TY_(WriteChar)( ':', out );
1603   TY_(WriteChar)( ' ', out );
1604   cp = sval;
1605   while ( *cp )
1606       TY_(WriteChar)( *cp++, out );
1607   TY_(WriteChar)( '\n', out );
1608   return 0;
1609 }
1610 
WriteOptionInt(const TidyOptionImpl * option,uint ival,StreamOut * out)1611 static int  WriteOptionInt( const TidyOptionImpl* option, uint ival, StreamOut* out )
1612 {
1613   tmbchar sval[ 32 ] = {0};
1614   TY_(tmbsnprintf)(sval, sizeof(sval), "%u", ival );
1615   return WriteOptionString( option, sval, out );
1616 }
1617 
WriteOptionBool(const TidyOptionImpl * option,Bool bval,StreamOut * out)1618 static int  WriteOptionBool( const TidyOptionImpl* option, Bool bval, StreamOut* out )
1619 {
1620   ctmbstr sval = bval ? "yes" : "no";
1621   return WriteOptionString( option, sval, out );
1622 }
1623 
WriteOptionPick(const TidyOptionImpl * option,uint ival,StreamOut * out)1624 static int  WriteOptionPick( const TidyOptionImpl* option, uint ival, StreamOut* out )
1625 {
1626     uint ix;
1627     const ctmbstr* val = option->pickList;
1628     for ( ix=0; val[ix] && ix<ival; ++ix )
1629         /**/;
1630     if ( ix==ival && val[ix] )
1631         return WriteOptionString( option, val[ix], out );
1632     return -1;
1633 }
1634 
TY_(ConfigDiffThanSnapshot)1635 Bool  TY_(ConfigDiffThanSnapshot)( TidyDocImpl* doc )
1636 {
1637   int diff = memcmp( &doc->config.value, &doc->config.snapshot,
1638                      N_TIDY_OPTIONS * sizeof(uint) );
1639   return ( diff != 0 );
1640 }
1641 
TY_(ConfigDiffThanDefault)1642 Bool  TY_(ConfigDiffThanDefault)( TidyDocImpl* doc )
1643 {
1644   Bool diff = no;
1645   const TidyOptionImpl* option = option_defs + 1;
1646   const TidyOptionValue* val = doc->config.value;
1647   for ( /**/; !diff && option && option->name; ++option, ++val )
1648   {
1649       diff = !OptionValueEqDefault( option, val );
1650   }
1651   return diff;
1652 }
1653 
1654 
SaveConfigToStream(TidyDocImpl * doc,StreamOut * out)1655 static int  SaveConfigToStream( TidyDocImpl* doc, StreamOut* out )
1656 {
1657     int rc = 0;
1658     const TidyOptionImpl* option;
1659     for ( option=option_defs+1; 0==rc && option && option->name; ++option )
1660     {
1661         const TidyOptionValue* val = &doc->config.value[ option->id ];
1662         if ( option->parser == NULL )
1663             continue;
1664         if ( OptionValueEqDefault( option, val ) && option->id != TidyDoctype)
1665             continue;
1666 
1667         if ( option->id == TidyDoctype )  /* Special case */
1668         {
1669           ulong dtmode = cfg( doc, TidyDoctypeMode );
1670           if ( dtmode == TidyDoctypeUser )
1671           {
1672             tmbstr t;
1673 
1674             /* add 2 double quotes */
1675             if (( t = (tmbstr)TidyDocAlloc( doc, TY_(tmbstrlen)( val->p ) + 2 ) ))
1676             {
1677               t[0] = '\"'; t[1] = 0;
1678 
1679               TY_(tmbstrcat)( t, val->p );
1680               TY_(tmbstrcat)( t, "\"" );
1681               rc = WriteOptionString( option, t, out );
1682 
1683               TidyDocFree( doc, t );
1684             }
1685           }
1686           else if ( dtmode == option_defs[TidyDoctypeMode].dflt )
1687             continue;
1688           else
1689             rc = WriteOptionPick( option, dtmode, out );
1690         }
1691         else if ( option->pickList )
1692           rc = WriteOptionPick( option, val->v, out );
1693         else
1694         {
1695           switch ( option->type )
1696           {
1697           case TidyString:
1698             rc = WriteOptionString( option, val->p, out );
1699             break;
1700           case TidyInteger:
1701             rc = WriteOptionInt( option, val->v, out );
1702             break;
1703           case TidyBoolean:
1704             rc = WriteOptionBool( option, val->v ? yes : no, out );
1705             break;
1706           }
1707         }
1708     }
1709     return rc;
1710 }
1711 
TY_(SaveConfigFile)1712 int  TY_(SaveConfigFile)( TidyDocImpl* doc, ctmbstr cfgfil )
1713 {
1714     int status = -1;
1715     StreamOut* out = NULL;
1716     uint outenc = cfg( doc, TidyOutCharEncoding );
1717     uint nl = cfg( doc, TidyNewline );
1718     FILE* fout = fopen( cfgfil, "wb" );
1719     if ( fout )
1720     {
1721         out = TY_(FileOutput)( doc, fout, outenc, nl );
1722         status = SaveConfigToStream( doc, out );
1723         fclose( fout );
1724         TidyDocFree( doc, out );
1725     }
1726     return status;
1727 }
1728 
TY_(SaveConfigSink)1729 int  TY_(SaveConfigSink)( TidyDocImpl* doc, TidyOutputSink* sink )
1730 {
1731     uint outenc = cfg( doc, TidyOutCharEncoding );
1732     uint nl = cfg( doc, TidyNewline );
1733     StreamOut* out = TY_(UserOutput)( doc, sink, outenc, nl );
1734     int status = SaveConfigToStream( doc, out );
1735     TidyDocFree( doc, out );
1736     return status;
1737 }
1738 
1739 /*
1740  * local variables:
1741  * mode: c
1742  * indent-tabs-mode: nil
1743  * c-basic-offset: 4
1744  * eval: (c-set-offset 'substatement-open 0)
1745  * end:
1746  */
1747