1 /***************************************************************************//**
2  * @file
3  * HTML TidyLib command line driver.
4  *
5  * This console application utilizing LibTidy in order to offer a complete
6  * console application offering all of the features of LibTidy.
7  *
8  * @author  HTACG, et al (consult git log)
9  *
10  * @copyright
11  *     Copyright (c) 1998-2017 World Wide Web Consortium (Massachusetts
12  *     Institute of Technology, European Research Consortium for Informatics
13  *     and Mathematics, Keio University) and HTACG.
14  * @par
15  *     All Rights Reserved.
16  * @par
17  *     See `tidy.h` for the complete license.
18  *
19  * @date Additional updates: consult git log
20  ******************************************************************************/
21 
22 #include "tidy.h"
23 #include "tidybuffio.h"
24 #include "locale.h"
25 #include "sprtf.h"
26 
27 #if defined(_WIN32)
28 #  include <windows.h>    /* Force console to UTF8. */
29 #endif
30 
31 #if defined(ENABLE_DEBUG_LOG) && defined(_MSC_VER) && defined(_CRTDBG_MAP_ALLOC)
32 #  include <stdlib.h>
33 #  include <crtdbg.h>
34 #endif
35 
36 /** Tidy will send errors to this file, which will be stderr later. */
37 static FILE* errout = NULL;
38 
39 #if defined(_WIN32)
40    static uint win_cp; /* original Windows code page */
41 #  if (defined(_MSC_VER) && (_MSC_VER < 1900))
42 #    define snprintf _snprintf
43 #  endif
44 #endif
45 
46 
47 /** @defgroup console_application Tidy Console Application
48  ** @copydoc tidy.c
49  ** @{
50  */
51 
52 
53 /* MARK: - Miscellaneous Utilities */
54 /***************************************************************************//**
55  ** @defgroup utilities_misc Miscellaneous Utilities
56  ** This group contains general utilities used in the console application.
57  *******************************************************************************
58  ** @{
59  */
60 
61 
62 /** Indicates whether or not two filenames are the same.
63  ** @result Returns a Bool indicating whether the filenames are the same.
64  */
samefile(ctmbstr filename1,ctmbstr filename2)65 static Bool samefile(ctmbstr filename1, /**< First filename */
66                      ctmbstr filename2  /**< Second filename */
67                      )
68 {
69 #if FILENAMES_CASE_SENSITIVE
70     return ( strcmp( filename1, filename2 ) == 0 );
71 #else
72     return ( strcasecmp( filename1, filename2 ) == 0 );
73 #endif
74 }
75 
76 
77 /** Handles exit cleanup.
78  */
tidy_cleanup(void)79 static void tidy_cleanup( void )
80 {
81 #if defined(_WIN32)
82     /* Restore original Windows code page. */
83     SetConsoleOutputCP(win_cp);
84 #endif
85 }
86 
87 
88 /** Exits with an error in the event of an out of memory condition.
89  */
outOfMemory(void)90 static void outOfMemory(void)
91 {
92     fprintf(stderr, "%s", tidyLocalizedString(TC_STRING_OUT_OF_MEMORY));
93     exit(1);
94 }
95 
96 
97 /** Create a new, allocated string with a format and arguments.
98  ** @result Returns a new, allocated string that you must free.
99  */
stringWithFormat(const ctmbstr fmt,...)100 static tmbstr stringWithFormat(const ctmbstr fmt, /**< The format string. */
101                                ...                /**< Variable arguments. */
102                                )
103 {
104     va_list argList;
105     tmbstr result = NULL;
106     int len = 0;
107 
108     va_start(argList, fmt);
109     len = vsnprintf( result, 0, fmt, argList );
110     va_end(argList);
111 
112     if (!(result = malloc( len + 1) ))
113         outOfMemory();
114 
115     va_start(argList, fmt);
116     vsnprintf( result, len + 1, fmt, argList);
117     va_end(argList);
118 
119     return result;
120 }
121 
122 
123 /** @} end utilities_misc group */
124 /* MARK: - Output Helping Functions */
125 /***************************************************************************//**
126  ** @defgroup utilities_output Output Helping Functions
127  ** This group functions that aid the formatting of output.
128  *******************************************************************************
129  ** @{
130  */
131 
132 
133 /** Used by `print1Column`, `print2Columns` and `print3Columns` to manage
134  ** wrapping text within columns.
135  ** @result The pointer to the next part of the string to output.
136  */
cutToWhiteSpace(const char * s,uint offset,char * sbuf)137 static const char *cutToWhiteSpace(const char *s, /**< starting point of desired string to output */
138                                    uint offset,   /**< column width desired */
139                                    char *sbuf     /**< the buffer to output */
140                                    )
141 {
142     if (!s)
143     {
144         sbuf[0] = '\0';
145         return NULL;
146     }
147     else if (strlen(s) <= offset)
148     {
149         strcpy(sbuf,s);
150         sbuf[offset] = '\0';
151         return NULL;
152     }
153     else
154     {
155         uint j, l, n;
156         /* scan forward looking for newline */
157         j = 0;
158         while(j < offset && s[j] != '\n')
159             ++j;
160         if ( j == offset ) {
161             /* scan backward looking for first space */
162             j = offset;
163             while(j && s[j] != ' ')
164                 --j;
165             l = j;
166             n = j+1;
167             /* no white space */
168             if (j==0)
169             {
170                 l = offset;
171                 n = offset;
172             }
173         } else
174         {
175             l = j;
176             n = j+1;
177         }
178         strncpy(sbuf,s,l);
179         sbuf[l] = '\0';
180         return s+n;
181     }
182 }
183 
184 
185 /** Outputs one column of text.
186  */
print1Column(const char * fmt,uint l1,const char * c1)187 static void print1Column(const char* fmt, /**< The format string for formatting the output. */
188                          uint l1,         /**< The width of the column. */
189                          const char *c1   /**< The content of the column. */
190                          )
191 {
192     const char *pc1=c1;
193     char *c1buf = (char *)malloc(l1+1);
194     if (!c1buf) outOfMemory();
195 
196     do
197     {
198         pc1 = cutToWhiteSpace(pc1, l1, c1buf);
199         printf(fmt, c1buf[0] !='\0' ? c1buf : "");
200     } while (pc1);
201     free(c1buf);
202 }
203 
204 
205 /** Outputs two columns of text.
206  */
print2Columns(const char * fmt,uint l1,uint l2,const char * c1,const char * c2)207 static void print2Columns(const char* fmt, /**< The format string for formatting the output. */
208                           uint l1,         /**< The width of column 1. */
209                           uint l2,         /**< The width of column 2. */
210                           const char *c1,  /**< The contents of column 1. */
211                           const char *c2   /**< The contents of column 2. */
212 )
213 {
214     const char *pc1=c1, *pc2=c2;
215     char *c1buf = (char *)malloc(l1+1);
216     char *c2buf = (char *)malloc(l2+1);
217     if (!c1buf) outOfMemory();
218     if (!c2buf) outOfMemory();
219 
220     do
221     {
222         pc1 = cutToWhiteSpace(pc1, l1, c1buf);
223         pc2 = cutToWhiteSpace(pc2, l2, c2buf);
224         printf(fmt, l1, l1, c1buf[0]!='\0'?c1buf:"",
225                     l2, l2, c2buf[0]!='\0'?c2buf:"");
226     } while (pc1 || pc2);
227     free(c1buf);
228     free(c2buf);
229 }
230 
231 
232 /** Outputs three columns of text.
233  */
print3Columns(const char * fmt,uint l1,uint l2,uint l3,const char * c1,const char * c2,const char * c3)234 static void print3Columns(const char* fmt, /**< The three column format string. */
235                           uint l1,         /**< Width of column 1. */
236                           uint l2,         /**< Width of column 2. */
237                           uint l3,         /**< Width of column 3. */
238                           const char *c1,  /**< Content of column 1. */
239                           const char *c2,  /**< Content of column 2. */
240                           const char *c3   /**< Content of column 3. */
241                           )
242 {
243     const char *pc1=c1, *pc2=c2, *pc3=c3;
244     char *c1buf = (char *)malloc(l1+1);
245     char *c2buf = (char *)malloc(l2+1);
246     char *c3buf = (char *)malloc(l3+1);
247     if (!c1buf) outOfMemory();
248     if (!c2buf) outOfMemory();
249     if (!c3buf) outOfMemory();
250 
251     do
252     {
253         pc1 = cutToWhiteSpace(pc1, l1, c1buf);
254         pc2 = cutToWhiteSpace(pc2, l2, c2buf);
255         pc3 = cutToWhiteSpace(pc3, l3, c3buf);
256         printf(fmt,
257                c1buf[0]!='\0'?c1buf:"",
258                c2buf[0]!='\0'?c2buf:"",
259                c3buf[0]!='\0'?c3buf:"");
260     } while (pc1 || pc2 || pc3);
261     free(c1buf);
262     free(c2buf);
263     free(c3buf);
264 }
265 
266 
267 /** Provides the `unknown option` output to the current errout.
268  */
unknownOption(TidyDoc tdoc,uint c)269 static void unknownOption(TidyDoc tdoc, /**< The Tidy document. */
270                           uint c        /**< The unknown option. */
271                           )
272 {
273     fprintf( errout, tidyLocalizedString( TC_STRING_UNKNOWN_OPTION ), (char)c );
274     fprintf( errout, "\n");
275 }
276 
277 
278 /** @} end utilities_output group */
279 /* MARK: - CLI Options Utilities */
280 /***************************************************************************//**
281  ** @defgroup options_cli CLI Options Utilities
282  ** These structures, arrays, declarations, and definitions are used throughout
283  ** this console application.
284  *******************************************************************************
285  ** @{
286  */
287 
288 
289 /** @name Format strings and decorations used in output.
290  ** @{
291  */
292 
293 static const char helpfmt[] = " %-*.*s %-*.*s\n";
294 static const char helpul[]  = "-----------------------------------------------------------------";
295 static const char fmt[]     = "%-27.27s %-9.9s  %-40.40s\n";
296 static const char ul[]      = "=================================================================";
297 
298 /** @} */
299 
300 /** This enum is used to categorize the options for help output.
301  */
302 typedef enum
303 {
304     CmdOptFileManip,
305     CmdOptCatFIRST = CmdOptFileManip,
306     CmdOptProcDir,
307     CmdOptCharEnc,
308     CmdOptMisc,
309     CmdOptXML,
310     CmdOptCatLAST
311 } CmdOptCategory;
312 
313 /** This array contains headings that will be used in help ouput.
314  */
315 static const struct {
316     ctmbstr mnemonic;  /**< Used in XML as a class. */
317     uint key;          /**< Key to fetch the localized string. */
318 } cmdopt_catname[] = {
319     { "file-manip", TC_STRING_FILE_MANIP },
320     { "process-directives", TC_STRING_PROCESS_DIRECTIVES },
321     { "char-encoding", TC_STRING_CHAR_ENCODING },
322     { "misc", TC_STRING_MISC },
323     { "xml", TC_STRING_XML }
324 };
325 
326 /** The struct and subsequent array keep the help output structured
327  ** because we _also_ output all of this stuff as as XML.
328  */
329 typedef struct {
330     CmdOptCategory cat; /**< Category */
331     ctmbstr name1;      /**< Name */
332     uint key;           /**< Key to fetch the localized description. */
333     uint subKey;        /**< Secondary substitution key. */
334     ctmbstr eqconfig;   /**< Equivalent configuration option */
335     ctmbstr name2;      /**< Name */
336     ctmbstr name3;      /**< Name */
337 } CmdOptDesc;
338 
339 /** All instances of %s will be substituted with localized string
340  ** specified by the subKey field.
341  */
342 static const CmdOptDesc cmdopt_defs[] =  {
343     { CmdOptFileManip, "-output <%s>",           TC_OPT_OUTPUT,   TC_LABEL_FILE, "output-file: <%s>", "-o <%s>" },
344     { CmdOptFileManip, "-config <%s>",           TC_OPT_CONFIG,   TC_LABEL_FILE, NULL },
345     { CmdOptFileManip, "-file <%s>",             TC_OPT_FILE,     TC_LABEL_FILE, "error-file: <%s>", "-f <%s>" },
346     { CmdOptFileManip, "-modify",                TC_OPT_MODIFY,   0,             "write-back: yes", "-m" },
347     { CmdOptProcDir,   "-indent",                TC_OPT_INDENT,   0,             "indent: auto", "-i" },
348     { CmdOptProcDir,   "-wrap <%s>",             TC_OPT_WRAP,     TC_LABEL_COL,  "wrap: <%s>", "-w <%s>" },
349     { CmdOptProcDir,   "-upper",                 TC_OPT_UPPER,    0,             "uppercase-tags: yes", "-u" },
350     { CmdOptProcDir,   "-clean",                 TC_OPT_CLEAN,    0,             "clean: yes", "-c" },
351     { CmdOptProcDir,   "-bare",                  TC_OPT_BARE,     0,             "bare: yes", "-b" },
352     { CmdOptProcDir,   "-gdoc",                  TC_OPT_GDOC,     0,             "gdoc: yes", "-g" },
353     { CmdOptProcDir,   "-numeric",               TC_OPT_NUMERIC,  0,             "numeric-entities: yes", "-n" },
354     { CmdOptProcDir,   "-errors",                TC_OPT_ERRORS,   0,             "markup: no", "-e" },
355     { CmdOptProcDir,   "-quiet",                 TC_OPT_QUIET,    0,             "quiet: yes", "-q" },
356     { CmdOptProcDir,   "-omit",                  TC_OPT_OMIT,     0,             "omit-optional-tags: yes" },
357     { CmdOptProcDir,   "-xml",                   TC_OPT_XML,      0,             "input-xml: yes" },
358     { CmdOptProcDir,   "-asxml",                 TC_OPT_ASXML,    0,             "output-xhtml: yes", "-asxhtml" },
359     { CmdOptProcDir,   "-ashtml",                TC_OPT_ASHTML,   0,             "output-html: yes" },
360     { CmdOptProcDir,   "-access <%s>",           TC_OPT_ACCESS,   TC_LABEL_LEVL, "accessibility-check: <%s>" },
361     { CmdOptCharEnc,   "-raw",                   TC_OPT_RAW,      0,             NULL },
362     { CmdOptCharEnc,   "-ascii",                 TC_OPT_ASCII,    0,             NULL },
363     { CmdOptCharEnc,   "-latin0",                TC_OPT_LATIN0,   0,             NULL },
364     { CmdOptCharEnc,   "-latin1",                TC_OPT_LATIN1,   0,             NULL },
365 #ifndef NO_NATIVE_ISO2022_SUPPORT
366     { CmdOptCharEnc,   "-iso2022",               TC_OPT_ISO2022,  0,             NULL },
367 #endif
368     { CmdOptCharEnc,   "-utf8",                  TC_OPT_UTF8,     0,             NULL },
369     { CmdOptCharEnc,   "-mac",                   TC_OPT_MAC,      0,             NULL },
370     { CmdOptCharEnc,   "-win1252",               TC_OPT_WIN1252,  0,             NULL },
371     { CmdOptCharEnc,   "-ibm858",                TC_OPT_IBM858,   0,             NULL },
372     { CmdOptCharEnc,   "-utf16le",               TC_OPT_UTF16LE,  0,             NULL },
373     { CmdOptCharEnc,   "-utf16be",               TC_OPT_UTF16BE,  0,             NULL },
374     { CmdOptCharEnc,   "-utf16",                 TC_OPT_UTF16,    0,             NULL },
375     { CmdOptCharEnc,   "-big5",                  TC_OPT_BIG5,     0,             NULL },
376     { CmdOptCharEnc,   "-shiftjis",              TC_OPT_SHIFTJIS, 0,             NULL },
377     { CmdOptMisc,      "-version",               TC_OPT_VERSION,  0,             NULL,  "-v" },
378     { CmdOptMisc,      "-help",                  TC_OPT_HELP,     0,             NULL,  "-h", "-?" },
379     { CmdOptMisc,      "-help-config",           TC_OPT_HELPCFG,  0,             NULL },
380     { CmdOptMisc,      "-help-env",              TC_OPT_HELPENV,  0,             NULL },
381     { CmdOptMisc,      "-show-config",           TC_OPT_SHOWCFG,  0,             NULL },
382     { CmdOptMisc,      "-export-config",         TC_OPT_EXP_CFG,  0,             NULL },
383     { CmdOptMisc,      "-export-default-config", TC_OPT_EXP_DEF,  0,             NULL },
384     { CmdOptMisc,      "-help-option <%s>",      TC_OPT_HELPOPT,  TC_LABEL_OPT,  NULL },
385     { CmdOptMisc,      "-language <%s>",         TC_OPT_LANGUAGE, TC_LABEL_LANG, "language: <%s>" },
386     { CmdOptXML,       "-xml-help",              TC_OPT_XMLHELP,  0,             NULL },
387     { CmdOptXML,       "-xml-config",            TC_OPT_XMLCFG,   0,             NULL },
388     { CmdOptXML,       "-xml-strings",           TC_OPT_XMLSTRG,  0,             NULL },
389     { CmdOptXML,       "-xml-error-strings",     TC_OPT_XMLERRS,  0,             NULL },
390     { CmdOptXML,       "-xml-options-strings",   TC_OPT_XMLOPTS,  0,             NULL },
391     { CmdOptMisc,      NULL,                   0,               0,             NULL }
392 };
393 
394 
395 /** Option names aren't localized, but the sample fields should be localized.
396  ** For example, `<file>` should be `<archivo>` in Spanish.
397  ** @param pos A CmdOptDesc array with fields that must be localized.
398  */
localize_option_names(CmdOptDesc * pos)399 static void localize_option_names( CmdOptDesc *pos)
400 {
401     ctmbstr fileString = tidyLocalizedString(pos->subKey);
402     pos->name1 = stringWithFormat(pos->name1, fileString);
403     if ( pos->name2 )
404         pos->name2 = stringWithFormat(pos->name2, fileString);
405     if ( pos->name3 )
406         pos->name3 = stringWithFormat(pos->name3, fileString);
407     if ( pos->eqconfig )
408         pos->eqconfig = stringWithFormat(pos->eqconfig, fileString);
409 }
410 
411 
412 /** Escape a name for XML output. For example, `-output <file>` becomes
413  ** `-output &lt;file&gt;` for use in XML.
414  ** @param name The option name to escape.
415  ** @result Returns an allocated string.
416  */
get_escaped_name(ctmbstr name)417 static tmbstr get_escaped_name( ctmbstr name )
418 {
419     tmbstr escpName;
420     char aux[2];
421     uint len = 0;
422     ctmbstr c;
423     for(c=name; *c!='\0'; ++c)
424         switch(*c)
425     {
426         case '<':
427         case '>':
428             len += 4;
429             break;
430         case '"':
431             len += 6;
432             break;
433         default:
434             len += 1;
435             break;
436     }
437 
438     escpName = (tmbstr)malloc(len+1);
439     if (!escpName) outOfMemory();
440     escpName[0] = '\0';
441 
442     aux[1] = '\0';
443     for(c=name; *c!='\0'; ++c)
444         switch(*c)
445     {
446         case '<':
447             strcat(escpName, "&lt;");
448             break;
449         case '>':
450             strcat(escpName, "&gt;");
451             break;
452         case '"':
453             strcat(escpName, "&quot;");
454             break;
455         default:
456             aux[0] = *c;
457             strcat(escpName, aux);
458             break;
459     }
460 
461     return escpName;
462 }
463 
464 
465 /** @} end CLI Options Definitions Utilities group */
466 /* MARK: - Configuration Options Utilities */
467 /***************************************************************************//**
468  ** @defgroup utilities_cli_options Configuration Options Utilities
469  ** Provide utilities to manipulate configuration options for output.
470  *******************************************************************************
471  ** @{
472  */
473 
474 
475 /** Utility to determine if an option has a picklist.
476  ** @param topt The option to check.
477  ** @result Returns a Bool indicating whether the option has a picklist or not.
478  */
hasPickList(TidyOption topt)479 static Bool hasPickList( TidyOption topt )
480 {
481     TidyIterator pos;
482 
483     if ( tidyOptGetType( topt ) != TidyInteger)
484         return no;
485 
486     pos = tidyOptGetPickList( topt );
487 
488     return tidyOptGetNextPick( topt, &pos ) != NULL;
489 }
490 
491 /** Returns the configuration category id for the specified configuration
492  ** category id. This will be used as an XML class attribute value.
493  ** @param id The TidyConfigCategory for which to lookup the category name.
494  ** @result Returns the configuration category, such as "diagnostics".
495  */
ConfigCategoryId(TidyConfigCategory id)496 static ctmbstr ConfigCategoryId( TidyConfigCategory id )
497 {
498     if (id >= TidyDiagnostics && id <= TidyInternalCategory)
499         return tidyErrorCodeAsKey( id );
500 
501     fprintf(stderr, tidyLocalizedString(TC_STRING_FATAL_ERROR), (int)id);
502     fprintf(stderr, "\n");
503 
504     assert(0);
505     abort();
506     return "never_here"; /* only for the compiler warning */
507 }
508 
509 /** Structure maintains a description of a configuration ption.
510  */
511 typedef struct {
512     ctmbstr name;         /**< Name */
513     ctmbstr cat;          /**< Category */
514     uint    catid;        /**< Category ID */
515     ctmbstr type;         /**< "String, ... */
516     ctmbstr vals;         /**< Potential values. If NULL, use an external function */
517     ctmbstr def;          /**< default */
518     tmbchar tempdefs[80]; /**< storage for default such as integer */
519     Bool haveVals;        /**< if yes, vals is valid */
520 } OptionDesc;
521 
522 /** A type for a function pointer for a function used to print out options
523  ** descriptions.
524  ** @param TidyDoc The document.
525  ** @param TidyOption The Tidy option.
526  ** @param OptionDesc A pointer to the option description structure.
527  */
528 typedef void (*OptionFunc)( TidyDoc, TidyOption, OptionDesc * );
529 
530 
531 /** Create OptionDesc "d" related to "opt"
532  */
GetOption(TidyDoc tdoc,TidyOption topt,OptionDesc * d)533 static void GetOption(TidyDoc tdoc,    /**< The tidy document. */
534                       TidyOption topt, /**< The option to create a description for. */
535                       OptionDesc *d    /**< [out] The new option description. */
536                       )
537 {
538     TidyOptionId optId = tidyOptGetId( topt );
539     TidyOptionType optTyp = tidyOptGetType( topt );
540 
541     d->name = tidyOptGetName( topt );
542     d->cat = ConfigCategoryId( tidyOptGetCategory( topt ) );
543     d->catid = tidyOptGetCategory( topt );
544     d->vals = NULL;
545     d->def = NULL;
546     d->haveVals = yes;
547 
548     /* Handle special cases first. */
549     switch ( optId )
550     {
551         case TidyInlineTags:
552         case TidyBlockTags:
553         case TidyEmptyTags:
554         case TidyPreTags:
555             d->type = "Tag Names";
556             d->vals = "tagX, tagY, ...";
557             d->def = NULL;
558             break;
559 
560         case TidyPriorityAttributes:
561             d->type = "Attributes Names";
562             d->vals = "attributeX, attributeY, ...";
563             d->def = NULL;
564             break;
565 
566         case TidyCharEncoding:
567         case TidyInCharEncoding:
568         case TidyOutCharEncoding:
569             d->type = "Encoding";
570             d->def = tidyOptGetEncName( tdoc, optId );
571             if (!d->def)
572                 d->def = "?";
573             d->vals = NULL;
574             break;
575 
576             /* General case will handle remaining */
577         default:
578             switch ( optTyp )
579         {
580             case TidyBoolean:
581                 d->type = "Boolean";
582                 d->def = tidyOptGetCurrPick( tdoc, optId );
583                 break;
584 
585             case TidyInteger:
586                 if (hasPickList(topt))
587                 {
588                     d->type = "Enum";
589                     d->def = tidyOptGetCurrPick( tdoc, optId );
590                 }
591                 else
592                 {
593                     uint idef;
594                     d->type = "Integer";
595                     if ( optId == TidyWrapLen )
596                         d->vals = "0 (no wrapping), 1, 2, ...";
597                     else
598                         d->vals = "0, 1, 2, ...";
599 
600                     idef = tidyOptGetInt( tdoc, optId );
601                     sprintf(d->tempdefs, "%u", idef);
602                     d->def = d->tempdefs;
603                 }
604                 break;
605 
606             case TidyString:
607                 d->type = "String";
608                 d->vals = NULL;
609                 d->haveVals = no;
610                 d->def = tidyOptGetValue( tdoc, optId );
611                 break;
612         }
613     }
614 }
615 
616 /** Array holding all options. Contains a trailing sentinel.
617  */
618 typedef struct {
619     TidyOption topt[N_TIDY_OPTIONS];
620 } AllOption_t;
621 
622 /** A simple option comparator, used for sorting the options.
623  ** @result Returns an integer indicating the result of the comparison.
624  */
cmpOpt(const void * e1_,const void * e2_)625 static int cmpOpt(const void* e1_, /**< Item A to compare. */
626                   const void *e2_  /**< Item B to compare. */
627                   )
628 {
629     const TidyOption* e1 = (const TidyOption*)e1_;
630     const TidyOption* e2 = (const TidyOption*)e2_;
631     return strcmp(tidyOptGetName(*e1), tidyOptGetName(*e2));
632 }
633 
634 /** Returns options sorted.
635  */
getSortedOption(TidyDoc tdoc,AllOption_t * tOption)636 static void getSortedOption(TidyDoc tdoc,         /**< The Tidy document. */
637                             AllOption_t *tOption  /**< [out] The list of options. */
638                             )
639 {
640     TidyIterator pos = tidyGetOptionList( tdoc );
641     uint i = 0;
642 
643     while ( pos )
644     {
645         TidyOption topt = tidyGetNextOption( tdoc, &pos );
646         tOption->topt[i] = topt;
647         ++i;
648     }
649     tOption->topt[i] = NULL; /* sentinel */
650 
651     qsort(tOption->topt,
652           i, /* there are i items, not including the sentinal */
653           sizeof(tOption->topt[0]),
654           cmpOpt);
655 }
656 
657 /** An iterator for the sorted options.
658  */
ForEachSortedOption(TidyDoc tdoc,OptionFunc OptionPrint)659 static void ForEachSortedOption(TidyDoc tdoc,          /**< The Tidy document. */
660                                 OptionFunc OptionPrint /**< The printing function to be used. */
661                                 )
662 {
663     AllOption_t tOption;
664     const TidyOption *topt;
665 
666     getSortedOption( tdoc, &tOption );
667     for( topt = tOption.topt; *topt; ++topt)
668     {
669         OptionDesc d;
670 
671         GetOption( tdoc, *topt, &d );
672         (*OptionPrint)( tdoc, *topt, &d );
673     }
674 }
675 
676 /** An iterator for the unsorted options.
677  */
ForEachOption(TidyDoc tdoc,OptionFunc OptionPrint)678 static void ForEachOption(TidyDoc tdoc,          /**< The Tidy document. */
679                           OptionFunc OptionPrint /**< The printing function to be used. */
680 )
681 {
682     TidyIterator pos = tidyGetOptionList( tdoc );
683 
684     while ( pos )
685     {
686         TidyOption topt = tidyGetNextOption( tdoc, &pos );
687         OptionDesc d;
688 
689         GetOption( tdoc, topt, &d );
690         (*OptionPrint)( tdoc, topt, &d );
691     }
692 }
693 
694 /** Prints an option's allowed value as specified in its pick list.
695  ** @param topt The Tidy option.
696  */
PrintAllowedValuesFromPick(TidyOption topt)697 static void PrintAllowedValuesFromPick( TidyOption topt )
698 {
699     TidyIterator pos = tidyOptGetPickList( topt );
700     Bool first = yes;
701     ctmbstr def;
702     while ( pos )
703     {
704         if (first)
705             first = no;
706         else
707             printf(", ");
708         def = tidyOptGetNextPick( topt, &pos );
709         printf("%s", def);
710     }
711 }
712 
713 /** Prints an option's allowed values.
714  */
PrintAllowedValues(TidyOption topt,const OptionDesc * d)715 static void PrintAllowedValues(TidyOption topt,    /**< The Tidy option. */
716                                const OptionDesc *d /**< The OptionDesc for the option. */
717                                )
718 {
719     if (d->vals)
720         printf( "%s", d->vals );
721     else
722         PrintAllowedValuesFromPick( topt );
723 }
724 
725 
726 /** @} end utilities_cli_options group */
727 /* MARK: - Provide the -help Service */
728 /***************************************************************************//**
729  ** @defgroup service_help Provide the -help Service
730  *******************************************************************************
731  ** @{
732  */
733 
734 
735 /** Retrieve the option's name(s) from the structure as a single string,
736  ** localizing the field values if application. For example, this might
737  ** return `-output <file>, -o <file>`.
738  ** @param pos A CmdOptDesc array item for which to get the names.
739  ** @result Returns the name(s) for the option as a single string.
740  */
get_option_names(const CmdOptDesc * pos)741 static tmbstr get_option_names( const CmdOptDesc* pos )
742 {
743     tmbstr name;
744     uint len;
745     CmdOptDesc localPos = *pos;
746 
747     localize_option_names( &localPos );
748 
749     len = strlen(localPos.name1);
750     if (localPos.name2)
751         len += 2+strlen(localPos.name2);
752     if (localPos.name3)
753         len += 2+strlen(localPos.name3);
754 
755     name = (tmbstr)malloc(len+1);
756     if (!name) outOfMemory();
757     strcpy(name, localPos.name1);
758     free((tmbstr)localPos.name1);
759     if (localPos.name2)
760     {
761         strcat(name, ", ");
762         strcat(name, localPos.name2);
763         free((tmbstr)localPos.name2);
764     }
765     if (localPos.name3)
766     {
767         strcat(name, ", ");
768         strcat(name, localPos.name3);
769         free((tmbstr)localPos.name3);
770     }
771     return name;
772 }
773 
774 
775 /** Returns the final name of the tidy executable by eliminating the path
776  ** name components from the executable name.
777  ** @param prog The path of the current executable.
778  */
get_final_name(ctmbstr prog)779 static ctmbstr get_final_name( ctmbstr prog )
780 {
781     ctmbstr name = prog;
782     int c;
783     size_t i;
784     size_t len = strlen(prog);
785 
786     for (i = 0; i < len; i++)
787     {
788         c = prog[i];
789         if ((( c == '/' ) || ( c == '\\' )) && prog[i+1])
790         {
791             name = &prog[i+1];
792         }
793     }
794 
795     return name;
796 }
797 
798 /** Outputs all of the complete help options (text).
799  ** @param tdoc The Tidydoc whose options are being printed.
800  */
print_help_options(TidyDoc tdoc)801 static void print_help_options( TidyDoc tdoc )
802 {
803     CmdOptCategory cat = CmdOptCatFIRST;
804     const CmdOptDesc* pos = cmdopt_defs;
805     uint col1, col2;
806     uint width = 78;
807 
808     for( cat=CmdOptCatFIRST; cat!=CmdOptCatLAST; ++cat)
809     {
810         ctmbstr name = tidyLocalizedString(cmdopt_catname[cat].key);
811         size_t len = width < strlen(name) ? width : strlen(name);
812         printf( "%s\n", name );
813         printf( "%*.*s\n", (int)len, (int)len, helpul );
814 
815         /* Tidy's "standard" 78-column output was always 25:52 ratio, so let's
816            try to preserve this approximately 1:2 ratio regardless of whatever
817            silly thing the user might have set for a console width, with a
818            maximum of 50 characters for the first column.
819          */
820         col1 = width / 3;             /* one third of the available */
821         col1 = col1 < 1 ? 1 : col1;   /* at least 1 */
822         col1 = col1 > 35 ? 35 : col1; /* no greater than 35 */
823         col2 = width - col1 - 2;      /* allow two spaces */
824         col2 = col2 < 1 ? 1 : col2;   /* at least 1 */
825 
826         for( pos=cmdopt_defs; pos->name1; ++pos)
827         {
828             tmbstr name;
829             if (pos->cat != cat)
830                 continue;
831             name = get_option_names( pos );
832             print2Columns( helpfmt, col1, col2, name, tidyLocalizedString( pos->key ) );
833             free(name);
834         }
835         printf("\n");
836     }
837 }
838 
839 /** Handles the -help service.
840  */
help(TidyDoc tdoc,ctmbstr prog)841 static void help(TidyDoc tdoc, /**< The tidy document for which help is showing. */
842                  ctmbstr prog  /**< The path of the current executable. */
843                  )
844 {
845     tmbstr temp_string = NULL;
846     uint width = 78;
847 
848     printf( tidyLocalizedString(TC_TXT_HELP_1), get_final_name(prog), tidyLibraryVersion() );
849     printf("\n");
850 
851     if ( tidyPlatform() )
852         temp_string = stringWithFormat( tidyLocalizedString(TC_TXT_HELP_2A), tidyPlatform() );
853     else
854         temp_string = stringWithFormat( tidyLocalizedString(TC_TXT_HELP_2B) );
855 
856     width = width < strlen(temp_string) ? width : strlen(temp_string);
857     printf( "%s\n", temp_string );
858     printf( "%*.*s\n\n", width, width, ul);
859     free( temp_string );
860 
861     print_help_options( tdoc );
862 
863 
864     printf("\n");
865 #if defined(TIDY_CONFIG_FILE) && defined(TIDY_USER_CONFIG_FILE)
866     temp_string = stringWithFormat( tidyLocalizedString(TC_TXT_HELP_3A), TIDY_CONFIG_FILE, TIDY_USER_CONFIG_FILE );
867     printf( tidyLocalizedString(TC_TXT_HELP_3), temp_string );
868     free( temp_string );
869 #else
870     printf( tidyLocalizedString(TC_TXT_HELP_3), "\n" );
871 #endif
872     printf("\n");
873 }
874 
875 /** @} end service_help group */
876 /* MARK: - Provide the -help-config Service */
877 /***************************************************************************//**
878  ** @defgroup service_help_config Provide the -help-config Service
879  *******************************************************************************
880  ** @{
881  */
882 
883 
884 /** Retrieves allowed values from an option's pick list.
885  ** @param topt A TidyOption for which to get the allowed values.
886  ** @result A string containing the allowed values.
887  */
GetAllowedValuesFromPick(TidyOption topt)888 static tmbstr GetAllowedValuesFromPick( TidyOption topt )
889 {
890     TidyIterator pos;
891     Bool first;
892     ctmbstr def;
893     uint len = 0;
894     tmbstr val;
895 
896     pos = tidyOptGetPickList( topt );
897     first = yes;
898     while ( pos )
899     {
900         if (first)
901             first = no;
902         else
903             len += 2;
904         def = tidyOptGetNextPick( topt, &pos );
905         len += strlen(def);
906     }
907     val = (tmbstr)malloc(len+1);
908     if (!val) outOfMemory();
909     val[0] = '\0';
910     pos = tidyOptGetPickList( topt );
911     first = yes;
912     while ( pos )
913     {
914         if (first)
915             first = no;
916         else
917             strcat(val, ", ");
918         def = tidyOptGetNextPick( topt, &pos );
919         strcat(val, def);
920     }
921     return val;
922 }
923 
924 /** Retrieves allowed values for an option.
925  ** @result A string containing the allowed values.
926  */
GetAllowedValues(TidyOption topt,const OptionDesc * d)927 static tmbstr GetAllowedValues(TidyOption topt,    /**< A TidyOption for which to get the allowed values. */
928                                const OptionDesc *d /**< A pointer to the OptionDesc array. */
929                                )
930 {
931     if (d->vals)
932     {
933         tmbstr val = (tmbstr)malloc(1+strlen(d->vals));
934         if (!val) outOfMemory();
935         strcpy(val, d->vals);
936         return val;
937     }
938     else
939         return GetAllowedValuesFromPick( topt );
940 }
941 
942 /** Prints a single option.
943  */
printOption(TidyDoc ARG_UNUSED (tdoc),TidyOption topt,OptionDesc * d)944 static void printOption(TidyDoc ARG_UNUSED(tdoc), /**< The Tidy document. */
945                         TidyOption topt,          /**< The option to print. */
946                         OptionDesc *d             /**< A pointer to the OptionDesc array. */
947                         )
948 {
949     if (tidyOptGetCategory( topt ) == TidyInternalCategory )
950         return;
951 
952     if ( *d->name || *d->type )
953     {
954         ctmbstr pval = d->vals;
955         tmbstr val = NULL;
956         if (!d->haveVals)
957         {
958             pval = "-";
959         }
960         else if (pval == NULL)
961         {
962             val = GetAllowedValues( topt, d);
963             pval = val;
964         }
965         print3Columns( fmt, 27, 9, 40, d->name, d->type, pval );
966         if (val)
967             free(val);
968     }
969 }
970 
971 /** Handles the -help-config service.
972  ** @remark We will not support console word wrapping for the configuration
973  **         options table. If users really have a small console, then they
974  *          should make it wider or output to a file.
975  ** @param tdoc The Tidy document.
976  */
optionhelp(TidyDoc tdoc)977 static void optionhelp( TidyDoc tdoc )
978 {
979     printf( "\n" );
980     printf( "%s", tidyLocalizedString( TC_TXT_HELP_CONFIG ) );
981 
982     printf( fmt,
983            tidyLocalizedString( TC_TXT_HELP_CONFIG_NAME ),
984            tidyLocalizedString( TC_TXT_HELP_CONFIG_TYPE ),
985            tidyLocalizedString( TC_TXT_HELP_CONFIG_ALLW ) );
986 
987     printf( fmt, ul, ul, ul );
988 
989     ForEachSortedOption( tdoc, printOption );
990 }
991 
992 
993 /** @} end service_help_config group */
994 /* MARK: - Provide the -help-env Service */
995 /***************************************************************************//**
996  ** @defgroup service_help_env Provide the -help-env Service
997  *******************************************************************************
998  ** @{
999  */
1000 
1001 
1002 /** Handles the -help-env service.
1003  ** @param tdoc The Tidy document.
1004  */
helpEnv(TidyDoc tdoc)1005 static void helpEnv( TidyDoc tdoc )
1006 {
1007     tmbstr subst = "";
1008     Bool uses_env = getenv("HTML_TIDY") != NULL;
1009     ctmbstr env_var = uses_env ? getenv("HTML_TIDY"): tidyLocalizedString( TC_TXT_HELP_ENV_1B );
1010 
1011 #if defined( TIDY_CONFIG_FILE ) && defined( TIDY_USER_CONFIG_FILE )
1012     subst = stringWithFormat( tidyLocalizedString(TC_TXT_HELP_ENV_1A), TIDY_CONFIG_FILE, TIDY_USER_CONFIG_FILE );
1013 #endif
1014 
1015     env_var = env_var != NULL ? env_var : tidyLocalizedString( TC_TXT_HELP_ENV_1B );
1016 
1017     printf( "\n" );
1018     printf( tidyLocalizedString( TC_TXT_HELP_ENV_1), subst, env_var );
1019 
1020 #if defined( TIDY_CONFIG_FILE ) && defined( TIDY_USER_CONFIG_FILE )
1021     if ( uses_env )
1022         printf( tidyLocalizedString( TC_TXT_HELP_ENV_1C ), TIDY_USER_CONFIG_FILE );
1023     free( subst );
1024 #endif
1025 
1026     printf( "\n" );
1027 }
1028 
1029 
1030 
1031 /** @} end service_help_env group */
1032 /* MARK: - Provide the -help-option Service */
1033 /***************************************************************************//**
1034  ** @defgroup service_help_option Provide the -help-option Service
1035  *******************************************************************************
1036  ** @{
1037  */
1038 
1039 
1040 /** Cleans up the HTML-laden option descriptions for console output. It's
1041  ** just a simple HTML filtering/replacement function.
1042  ** @param description The option description.
1043  ** @result Returns an allocated string with some HTML stripped away.
1044  */
cleanup_description(ctmbstr description)1045 static tmbstr cleanup_description( ctmbstr description )
1046 {
1047     /* Substitutions - this might be a good spot to introduce platform
1048      dependent definitions for colorized output on different terminals
1049      that support, for example, ANSI escape sequences. The assumption
1050      is made the Mac and Linux targets support ANSI colors, but even
1051      so debugger terminals may not. Note that the line-wrapping
1052      function also doesn't account for non-printing characters. */
1053     static struct {
1054         ctmbstr tag;
1055         ctmbstr replacement;
1056     } const replacements[] = {
1057         { "lt",       "<"          },
1058         { "gt",       ">"          },
1059         { "br/",      "\n\n"       },
1060 #if defined(LINUX_OS) || defined(MAC_OS_X)
1061         { "code",     "\x1b[36m"   },
1062         { "/code",    "\x1b[0m"    },
1063         { "em",       "\x1b[4m"   },
1064         { "/em",      "\x1b[0m"    },
1065         { "strong",   "\x1b[31m"   },
1066         { "/strong",  "\x1b[0m"    },
1067 #endif
1068         /* MUST be last */
1069         { NULL,       NULL         },
1070     };
1071 
1072     /* State Machine Setup */
1073     typedef enum {
1074         s_DONE,
1075         s_DATA,
1076         s_WRITING,
1077         s_TAG_OPEN,
1078         s_TAG_NAME,
1079         s_ERROR,
1080         s_LAST /* MUST be last */
1081     } states;
1082 
1083     typedef enum {
1084         c_NIL,
1085         c_EOF,
1086         c_BRACKET_CLOSE,
1087         c_BRACKET_OPEN,
1088         c_OTHER
1089     } charstates;
1090 
1091     typedef enum {
1092         a_NIL,
1093         a_BUILD_NAME,
1094         a_CONSUME,
1095         a_EMIT,
1096         a_EMIT_SUBS,
1097         a_WRITE,
1098         a_ERROR
1099     } actions;
1100 
1101     typedef struct {
1102         states state;
1103         charstates charstate;
1104         actions action;
1105         states next_state;
1106     } transitionType;
1107 
1108     const transitionType transitions[] = {
1109         { s_DATA,           c_EOF,           a_NIL,        s_DONE           },
1110         { s_DATA,           c_BRACKET_OPEN,  a_CONSUME,    s_TAG_OPEN       },
1111         /* special case allows ; */
1112         { s_DATA,           c_BRACKET_CLOSE, a_EMIT,       s_WRITING        },
1113         { s_DATA,           c_OTHER,         a_EMIT,       s_WRITING        },
1114         { s_WRITING,        c_OTHER,         a_WRITE,      s_DATA           },
1115         { s_WRITING,        c_BRACKET_CLOSE, a_WRITE,      s_DATA           },
1116         { s_TAG_OPEN,       c_EOF,           a_ERROR,      s_DONE           },
1117         { s_TAG_OPEN,       c_OTHER,         a_NIL,        s_TAG_NAME       },
1118         { s_TAG_NAME,       c_BRACKET_OPEN,  a_ERROR,      s_DONE           },
1119         { s_TAG_NAME,       c_EOF,           a_ERROR,      s_DONE           },
1120         { s_TAG_NAME,       c_BRACKET_CLOSE, a_EMIT_SUBS,  s_WRITING        },
1121         { s_TAG_NAME,       c_OTHER,         a_BUILD_NAME, s_TAG_NAME       },
1122         { s_ERROR,          0,               a_ERROR,      s_DONE           },
1123         { s_DONE,           0,               a_NIL,        0                },
1124         /* MUST be last: */
1125         { s_LAST,           0,               0,            0                },
1126     };
1127 
1128     /* Output Setup */
1129     tmbstr result = NULL;
1130     int g_result = 100;  /* minimum buffer grow size */
1131     int l_result = 0;    /* buffer current size */
1132     int i_result = 0;    /* current string position */
1133     int writer_len = 0;  /* writer length */
1134 
1135     ctmbstr writer = NULL;
1136 
1137     /* Current tag name setup */
1138     tmbstr name = NULL; /* tag name */
1139     int g_name = 10;    /* buffer grow size */
1140     int l_name = 0;     /* buffer current size */
1141     int i_name = 0;     /* current string position */
1142 
1143     /* Pump Setup */
1144     int i = 0;
1145     states state = s_DATA;
1146     charstates charstate;
1147     char c;
1148     int j = 0, k = 0;
1149     transitionType transition;
1150 
1151     if ( !description || (strlen(description) < 1) )
1152     {
1153         return NULL;
1154     }
1155 
1156     /* Process the HTML Snippet */
1157     do {
1158         c = description[i];
1159 
1160         /* Determine secondary state. */
1161         switch (c)
1162         {
1163             case '\0':
1164                 charstate = c_EOF;
1165                 break;
1166 
1167             case '<':
1168             case '&':
1169                 charstate = c_BRACKET_OPEN;
1170                 break;
1171 
1172             case '>':
1173             case ';':
1174                 charstate = c_BRACKET_CLOSE;
1175                 break;
1176 
1177             default:
1178                 charstate = c_OTHER;
1179                 break;
1180         }
1181 
1182         /* Find the correct instruction */
1183         j = 0;
1184         while (transitions[j].state != s_LAST)
1185         {
1186             transition = transitions[j];
1187             if ( transition.state == state && transition.charstate == charstate ) {
1188                 switch ( transition.action )
1189                 {
1190                         /* This action is building the name of an HTML tag. */
1191                     case a_BUILD_NAME:
1192                         if ( !name )
1193                         {
1194                             l_name = g_name;
1195                             name = calloc(l_name, 1);
1196                         }
1197 
1198                         if ( i_name >= l_name )
1199                         {
1200                             l_name = l_name + g_name;
1201                             name = realloc(name, l_name);
1202                         }
1203 
1204                         strncpy(name + i_name, &c, 1);
1205                         i_name++;
1206                         i++;
1207                         break;
1208 
1209                         /* This character will be emitted into the output
1210                          stream. The only purpose of this action is to
1211                          ensure that `writer` is NULL as a flag that we
1212                          will output the current `c` */
1213                     case a_EMIT:
1214                         writer = NULL; /* flag to use c */
1215                         break;
1216 
1217                         /* Now that we've consumed a tag, we will emit the
1218                          substitution if any has been specified in
1219                          `replacements`. */
1220                     case a_EMIT_SUBS:
1221                         name[i_name] = '\0';
1222                         i_name = 0;
1223                         k = 0;
1224                         writer = "";
1225                         while ( replacements[k].tag )
1226                         {
1227                             if ( strcmp( replacements[k].tag, name ) == 0 )
1228                             {
1229                                 writer = replacements[k].replacement;
1230                             }
1231                             k++;
1232                         }
1233                         break;
1234 
1235                         /* This action will add to our `result` string, expanding
1236                          the buffer as necessary in reasonable chunks. */
1237                     case a_WRITE:
1238                         if ( !writer )
1239                             writer_len = 1;
1240                         else
1241                             writer_len = strlen( writer );
1242                         /* Lazy buffer creation */
1243                         if ( !result )
1244                         {
1245                             l_result = writer_len + g_result;
1246                             result = calloc(l_result, 1);
1247                         }
1248                         /* Grow the buffer if needed */
1249                         if ( i_result + writer_len >= l_result )
1250                         {
1251                             l_result = l_result + writer_len + g_result;
1252                             result = realloc(result, l_result);
1253                         }
1254                         /* Add current writer to the buffer */
1255                         if ( !writer )
1256                         {
1257                             result[i_result] = c;
1258                             result[i_result +1] = '\0';
1259                         }
1260                         else
1261                         {
1262                             strncpy( result + i_result, writer, writer_len );
1263                         }
1264 
1265                         i_result += writer_len;
1266                         i++;
1267                         break;
1268 
1269                         /* This action could be more robust but it serves the
1270                          current purpose. Cross our fingers and count on our
1271                          localizers not to give bad HTML descriptions. */
1272                     case a_ERROR:
1273                         printf("<Error> The localized string probably has bad HTML.\n");
1274                         goto EXIT_CLEANLY;
1275 
1276                         /* Just a NOP. */
1277                     case a_NIL:
1278                         break;
1279 
1280                         /* The default case also handles the CONSUME action. */
1281                     default:
1282                         i++;
1283                         break;
1284                 }
1285 
1286                 state = transition.next_state;
1287                 break;
1288             }
1289             j++;
1290         }
1291     } while ( description[i] );
1292 
1293 EXIT_CLEANLY:
1294 
1295     if ( name )
1296         free(name);
1297     return result;
1298 }
1299 
1300 
1301 /** Handles the -help-option service.
1302  */
optionDescribe(TidyDoc tdoc,char * option)1303 static void optionDescribe(TidyDoc tdoc, /**< The Tidy Document */
1304                            char *option  /**< The name of the option. */
1305                            )
1306 {
1307     tmbstr result = NULL;
1308     Bool allocated = no;
1309     TidyOptionId topt = tidyOptGetIdForName( option );
1310     uint tcat = tidyOptGetCategory( tidyGetOption(tdoc, topt));
1311 
1312     if (topt < N_TIDY_OPTIONS && tcat != TidyInternalCategory )
1313     {
1314         result = cleanup_description( tidyOptGetDoc( tdoc, tidyGetOption( tdoc, topt ) ) );
1315         allocated = yes;
1316     }
1317     else
1318     {
1319         result = (tmbstr)tidyLocalizedString(TC_STRING_UNKNOWN_OPTION_B);
1320     }
1321 
1322     printf( "\n" );
1323     printf( "`--%s`\n\n", option );
1324     print1Column( "%-78.78s\n", 78, result );
1325     printf( "\n" );
1326     if ( allocated )
1327         free ( result );
1328 }
1329 
1330 
1331 /** @} end service_help_option group */
1332 /* MARK: - Provide the -lang help Service */
1333 /***************************************************************************//**
1334  ** @defgroup service_lang_help Provide the -lang help Service
1335  *******************************************************************************
1336  ** @{
1337  */
1338 
1339 
1340 /** Prints the Windows language names that Tidy recognizes, using the specified
1341  ** format string.
1342  ** @param format A format string used to display the Windows language names,
1343  **        or NULL to use the built-in default format.
1344  */
tidyPrintWindowsLanguageNames(ctmbstr format)1345 void tidyPrintWindowsLanguageNames( ctmbstr format )
1346 {
1347     const tidyLocaleMapItem *item;
1348     TidyIterator i = getWindowsLanguageList();
1349     ctmbstr winName;
1350     ctmbstr posixName;
1351 
1352     while (i) {
1353         item = getNextWindowsLanguage(&i);
1354         winName = TidyLangWindowsName( item );
1355         posixName = TidyLangPosixName( item );
1356         if ( format )
1357             printf( format, winName, posixName );
1358         else
1359             printf( "%-20s -> %s\n", winName, posixName );
1360     }
1361 }
1362 
1363 
1364 /** Prints the languages the are currently built into Tidy, using the specified
1365  ** format string.
1366  ** @param format A format string used to display the Windows language names,
1367  **        or NULL to use the built-in default format.
1368  */
tidyPrintTidyLanguageNames(ctmbstr format)1369 void tidyPrintTidyLanguageNames( ctmbstr format )
1370 {
1371     ctmbstr item;
1372     TidyIterator i = getInstalledLanguageList();
1373 
1374     while (i) {
1375         item = getNextInstalledLanguage(&i);
1376         if ( format )
1377             printf( format, item );
1378         else
1379             printf( "%s\n", item );
1380     }
1381 }
1382 
1383 
1384 /** Handles the -lang help service.
1385  ** @remark We will not support console word wrapping for the tables. If users
1386  **         really have a small console, then they should make it wider or
1387  **         output to a file.
1388  ** @param tdoc The Tidy document.
1389  */
lang_help(TidyDoc tdoc)1390 static void lang_help( TidyDoc tdoc )
1391 {
1392     printf( "\n" );
1393     printf( "%s", tidyLocalizedString(TC_TXT_HELP_LANG_1) );
1394     printf( "\n" );
1395     tidyPrintWindowsLanguageNames("  %-20s -> %s\n");
1396     printf( "\n" );
1397     printf( "%s", tidyLocalizedString(TC_TXT_HELP_LANG_2) );
1398     printf( "\n" );
1399     tidyPrintTidyLanguageNames("  %s\n");
1400     printf( "\n" );
1401     printf( tidyLocalizedString(TC_TXT_HELP_LANG_3), tidyGetLanguage() );
1402     printf( "\n" );
1403 }
1404 
1405 
1406 /** @} end service_lang_help group */
1407 /* MARK: - Provide the -show-config Service */
1408 /***************************************************************************//**
1409  ** @defgroup service_show_config Provide the -show-config Service
1410  *******************************************************************************
1411  ** @{
1412  */
1413 
1414 
1415 /** Prints the option value for a given option.
1416  */
printOptionValues(TidyDoc ARG_UNUSED (tdoc),TidyOption topt,OptionDesc * d)1417 static void printOptionValues(TidyDoc ARG_UNUSED(tdoc),  /**< The Tidy document. */
1418                               TidyOption topt,           /**< The option for which to show values. */
1419                               OptionDesc *d              /**< The OptionDesc array. */
1420                               )
1421 {
1422     TidyOptionId optId = tidyOptGetId( topt );
1423 
1424     if ( tidyOptGetCategory(topt) == TidyInternalCategory )
1425         return;
1426 
1427     switch ( optId )
1428     {
1429         case TidyInlineTags:
1430         case TidyBlockTags:
1431         case TidyEmptyTags:
1432         case TidyPreTags:
1433         {
1434             TidyIterator pos = tidyOptGetDeclTagList( tdoc );
1435             while ( pos )
1436             {
1437                 d->def = tidyOptGetNextDeclTag(tdoc, optId, &pos);
1438                 if ( pos )
1439                 {
1440                     printf( fmt, d->name, d->type, d->def );
1441                     d->name = "";
1442                     d->type = "";
1443                 }
1444             }
1445         }
1446             break;
1447         case TidyPriorityAttributes: /* Is #697 - This case seems missing */
1448         {
1449             TidyIterator itAttr = tidyOptGetPriorityAttrList(tdoc);
1450             if (itAttr && (itAttr != (TidyIterator)-1))
1451             {
1452                 while (itAttr)
1453                 {
1454                     d->def = tidyOptGetNextPriorityAttr(tdoc, &itAttr);
1455                     if (itAttr)
1456                     {
1457                         printf(fmt, d->name, d->type, d->def);
1458                         d->name = "";
1459                         d->type = "";
1460                     }
1461                 }
1462             }
1463         }
1464             break;
1465         default:
1466             break;
1467     }
1468 
1469     /* fix for http://tidy.sf.net/bug/873921 */
1470     if ( *d->name || *d->type || (d->def && *d->def) )
1471     {
1472         if ( ! d->def )
1473             d->def = "";
1474         printf( fmt, d->name, d->type, d->def );
1475     }
1476 }
1477 
1478 /** Handles the -show-config service.
1479  ** @remark We will not support console word wrapping for the table. If users
1480  **         really have a small console, then they should make it wider or
1481  **         output to a file.
1482  ** @param tdoc The Tidy Document.
1483  */
optionvalues(TidyDoc tdoc)1484 static void optionvalues( TidyDoc tdoc )
1485 {
1486     printf( "\n%s\n", tidyLocalizedString(TC_STRING_CONF_HEADER) );
1487     printf( fmt, tidyLocalizedString(TC_STRING_CONF_NAME),
1488            tidyLocalizedString(TC_STRING_CONF_TYPE),
1489            tidyLocalizedString(TC_STRING_CONF_VALUE) );
1490     printf( fmt, ul, ul, ul );
1491 
1492     ForEachSortedOption( tdoc, printOptionValues );
1493 }
1494 
1495 
1496 /** @} end service_show_config group */
1497 /* MARK: - Provide the -export-config Services */
1498 /***************************************************************************//**
1499  ** @defgroup service_export_config Provide the -export-config Services
1500  *******************************************************************************
1501  ** @{
1502  */
1503 
1504 /* Is #697 - specialised service to 'invert' a buffers content
1505    split on a space character */
invertBuffer(TidyBuffer * src,TidyBuffer * dst)1506 static void invertBuffer(TidyBuffer *src, TidyBuffer *dst)
1507 {
1508     uint len = src->size;
1509     char *in = (char *)src->bp;
1510     char *cp;
1511     if (!in)
1512         return;
1513     while (len)
1514     {
1515         unsigned char uc;
1516         len--;
1517         uc = in[len];
1518         if (uc == ' ')
1519         {
1520             in[len] = 0;
1521             cp = &in[len + 1];
1522             if (dst->size)
1523                 tidyBufAppend(dst, " ", 1);
1524             tidyBufAppend(dst, cp, strlen(cp));
1525         }
1526     }
1527     if (dst->size)
1528         tidyBufAppend(dst, " ", 1);
1529     tidyBufAppend(dst, in, strlen(in));
1530 }
1531 
1532 /** Prints the option value for a given option.
1533  */
printOptionExportValues(TidyDoc ARG_UNUSED (tdoc),TidyOption topt,OptionDesc * d)1534 static void printOptionExportValues(TidyDoc ARG_UNUSED(tdoc),  /**< The Tidy document. */
1535                                     TidyOption topt,           /**< The option for which to show values. */
1536                                     OptionDesc *d              /**< The OptionDesc array. */
1537                                     )
1538 {
1539     TidyOptionId optId = tidyOptGetId( topt );
1540     TidyBuffer buf1, buf2;
1541 
1542     if ( tidyOptGetCategory(topt) == TidyInternalCategory )
1543         return;
1544 
1545     switch ( optId )
1546     {
1547         case TidyInlineTags:
1548         case TidyBlockTags:
1549         case TidyEmptyTags:
1550         case TidyPreTags:
1551         {
1552             TidyIterator pos = tidyOptGetDeclTagList( tdoc );
1553             if ( pos )  /* Is #697 - one or more values */
1554             {
1555                 tidyBufInit(&buf1);
1556                 tidyBufInit(&buf2);
1557                 while (pos)
1558                 {
1559                     d->def = tidyOptGetNextDeclTag(tdoc, optId, &pos);
1560                     if (d->def)
1561                     {
1562                         if (buf1.size)
1563                             tidyBufAppend(&buf1, " ", 1);
1564                         tidyBufAppend(&buf1, (void *)d->def, strlen(d->def));
1565                     }
1566                 }
1567                 invertBuffer(&buf1, &buf2); /* Is #697 - specialised service to invert words */
1568                 tidyBufAppend(&buf2, (void *)"\0", 1); /* is this really required? */
1569                 printf("%s: %s\n", d->name, buf2.bp);
1570                 d->name = "";
1571                 d->type = "";
1572                 d->def = 0;
1573                 tidyBufFree(&buf1);
1574                 tidyBufFree(&buf2);
1575             }
1576         }
1577             break;
1578         case TidyPriorityAttributes: /* Is #697 - This case seems missing */
1579         {
1580             TidyIterator itAttr = tidyOptGetPriorityAttrList(tdoc);
1581             if (itAttr && (itAttr != (TidyIterator)-1))
1582             {
1583                 tidyBufInit(&buf1);
1584                 while (itAttr)
1585                 {
1586                     d->def = tidyOptGetNextPriorityAttr(tdoc, &itAttr);
1587                     if (d->def)
1588                     {
1589                         if (buf1.size)
1590                             tidyBufAppend(&buf1, " ", 1);
1591                         tidyBufAppend(&buf1, (void *)d->def, strlen(d->def));
1592                     }
1593                 }
1594                 tidyBufAppend(&buf1, (void *)"\0", 1); /* is this really required? */
1595                 printf("%s: %s\n", d->name, buf1.bp);
1596                 d->name = "";
1597                 d->type = "";
1598                 d->def = 0;
1599                 tidyBufFree(&buf1);
1600             }
1601         }
1602         break;
1603         default:
1604             break;
1605     }
1606 
1607     /* fix for http://tidy.sf.net/bug/873921 */
1608     if ( *d->name || *d->type || (d->def && *d->def) )
1609     {
1610         if ( ! d->def )
1611             d->def = "";
1612         printf( "%s: %s\n", d->name, d->def );
1613     }
1614 }
1615 
1616 /** Handles the -export-config service.
1617  ** @param tdoc The Tidy Document.
1618  */
exportOptionValues(TidyDoc tdoc)1619 static void exportOptionValues( TidyDoc tdoc )
1620 {
1621     ForEachSortedOption( tdoc, printOptionExportValues );
1622 }
1623 
1624 /** Handles the -export-default-config service.
1625  ** @param tdoc The Tidy Document.
1626  */
exportDefaultOptionValues(TidyDoc tdoc)1627 static void exportDefaultOptionValues( TidyDoc tdoc )
1628 {
1629     tidyOptResetAllToDefault( tdoc );
1630     ForEachSortedOption( tdoc, printOptionExportValues );
1631 }
1632 
1633 
1634 /** @} end service_export_config group */
1635 /* MARK: - Provide the -version Service */
1636 /***************************************************************************//**
1637  ** @defgroup service_version Provide the -version Service
1638  *******************************************************************************
1639  ** @{
1640  */
1641 
1642 
1643 /** Handles the -version service.
1644  */
version(TidyDoc tdoc)1645 static void version( TidyDoc tdoc )
1646 {
1647     if ( tidyPlatform() )
1648         printf( tidyLocalizedString( TC_STRING_VERS_A ), tidyPlatform(), tidyLibraryVersion() );
1649     else
1650         printf( tidyLocalizedString( TC_STRING_VERS_B ), tidyLibraryVersion() );
1651 
1652     printf("\n");
1653 }
1654 
1655 
1656 /** @} end service_version group */
1657 /* MARK: - Provide the -xml-config Service */
1658 /***************************************************************************//**
1659  ** @defgroup service_xml_config Provide the -xml-config Service
1660  *******************************************************************************
1661  ** @{
1662  */
1663 
1664 
1665 /** Prints for XML an option's <description>.
1666  */
printXMLDescription(TidyDoc tdoc,TidyOption topt)1667 static void printXMLDescription(TidyDoc tdoc,   /**< The Tidy document. */
1668                                 TidyOption topt /**< The option. */
1669                                 )
1670 {
1671     ctmbstr doc = tidyOptGetDoc( tdoc, topt );
1672 
1673     if (doc)
1674         printf("  <description>%s</description>\n", doc);
1675     else
1676     {
1677         printf("  <description />\n");
1678         fprintf(stderr, tidyLocalizedString(TC_STRING_OPT_NOT_DOCUMENTED),
1679                 tidyOptGetName( topt ));
1680         fprintf(stderr, "\n");
1681 
1682     }
1683 }
1684 
1685 /** Prints for XML an option's `<seealso>`.
1686  */
printXMLCrossRef(TidyDoc tdoc,TidyOption topt)1687 static void printXMLCrossRef(TidyDoc tdoc,   /**< The Tidy document. */
1688                              TidyOption topt /**< The option. */
1689                              )
1690 {
1691     TidyOption optLinked;
1692     TidyIterator pos = tidyOptGetDocLinksList(tdoc, topt);
1693     while( pos )
1694     {
1695         optLinked = tidyOptGetNextDocLinks(tdoc, &pos );
1696         printf("  <seealso>%s</seealso>\n",tidyOptGetName(optLinked));
1697     }
1698 }
1699 
1700 
1701 /** Prints for XML an option's `<eqconfig>`.
1702  */
printXMLCrossRefEqConsole(TidyDoc tdoc,TidyOption topt)1703 static void printXMLCrossRefEqConsole(TidyDoc tdoc,   /**< The Tidy document. */
1704                                       TidyOption topt /**< The option. */
1705                                       )
1706 {
1707     const CmdOptDesc* pos = cmdopt_defs;
1708     const CmdOptDesc* hit = NULL;
1709     CmdOptDesc localHit;
1710     enum { sizeBuffer = 50 }; /* largest config name is 27 chars so far... */
1711     char buffer[sizeBuffer];
1712 
1713     for( pos=cmdopt_defs; pos->name1; ++pos)
1714     {
1715         snprintf(buffer, sizeBuffer, "%s:", tidyOptGetName( topt ));
1716         if ( pos->eqconfig && (strncmp(buffer, pos->eqconfig, strlen(buffer)) == 0) )
1717         {
1718             hit = pos;
1719             break;
1720         }
1721     }
1722 
1723     if ( hit )
1724     {
1725         tmbstr localName;
1726         localHit = *hit;
1727         localize_option_names( &localHit );
1728         printf("  <eqconsole>%s</eqconsole>\n", localName = get_escaped_name(localHit.name1));
1729         free((tmbstr)localHit.name1);
1730         free(localName);
1731         if ( localHit.name2 )
1732         {
1733             printf("  <eqconsole>%s</eqconsole>\n", localName = get_escaped_name(localHit.name2));
1734             free((tmbstr)localHit.name2);
1735             free(localName);
1736         }
1737         if ( localHit.name3 )
1738         {
1739             printf("  <eqconsole>%s</eqconsole>\n", localName = get_escaped_name(localHit.name3));
1740             free((tmbstr)localHit.name3);
1741             free(localName);
1742         }
1743         if ( localHit.eqconfig ) /* Is. #791 */
1744         {
1745             free((tmbstr)localHit.eqconfig);
1746         }
1747 
1748     }
1749     else
1750         printf("  %s\n", "  <eqconsole />");
1751 }
1752 
1753 
1754 /** Prints for XML an option.
1755  */
printXMLOption(TidyDoc tdoc,TidyOption topt,OptionDesc * d)1756 static void printXMLOption(TidyDoc tdoc,    /**< The Tidy document. */
1757                            TidyOption topt, /**< The option. */
1758                            OptionDesc *d    /**< The OptionDesc for the option. */
1759                            )
1760 {
1761     if ( tidyOptGetCategory(topt) == TidyInternalCategory )
1762         return;
1763 
1764     printf( " <option class=\"%s\">\n", d->cat );
1765     printf  ("  <name>%s</name>\n",d->name);
1766     printf  ("  <type>%s</type>\n",d->type);
1767     if (d->def)
1768         printf("  <default>%s</default>\n",d->def);
1769     else
1770         printf("  <default />\n");
1771     if (d->haveVals)
1772     {
1773         printf("  <example>");
1774         PrintAllowedValues( topt, d );
1775         printf("</example>\n");
1776     }
1777     else
1778     {
1779         printf("  <example />\n");
1780     }
1781     printXMLDescription( tdoc, topt );
1782     printXMLCrossRef( tdoc, topt );
1783     printXMLCrossRefEqConsole( tdoc, topt );
1784     printf( " </option>\n" );
1785 }
1786 
1787 
1788 /** Handles the -xml-config service.
1789  ** @param tdoc The Tidy document.
1790  */
XMLoptionhelp(TidyDoc tdoc)1791 static void XMLoptionhelp( TidyDoc tdoc )
1792 {
1793     printf( "<?xml version=\"1.0\"?>\n"
1794            "<config version=\"%s\">\n", tidyLibraryVersion());
1795     ForEachOption( tdoc, printXMLOption );
1796     printf( "</config>\n" );
1797 }
1798 
1799 
1800 /** @} end service_xml_config group */
1801 /* MARK: - Provide the -xml-error-strings Service */
1802 /***************************************************************************//**
1803  ** @defgroup service_xml_error_strings Provide the -xml-error-strings Service
1804  *******************************************************************************
1805  ** @{
1806  */
1807 
1808 
1809 /** Handles the -xml-error-strings service.
1810  ** This service is primarily helpful to developers who need to generate an
1811  ** updated list of strings to expect when using one of the message callbacks.
1812  ** Included in the output is the current string associated with the error
1813  ** symbol.
1814  ** @param tdoc The Tidy document.
1815  **/
xml_error_strings(TidyDoc tdoc)1816 static void xml_error_strings( TidyDoc tdoc )
1817 {
1818     uint errorCode;
1819     ctmbstr localizedString;
1820     TidyIterator j = getErrorCodeList();
1821 
1822     printf( "<?xml version=\"1.0\"?>\n" );
1823     printf( "<error_strings version=\"%s\">\n", tidyLibraryVersion());
1824 
1825     while (j) {
1826         errorCode = getNextErrorCode(&j);
1827         localizedString = tidyLocalizedString(errorCode);
1828         printf( " <error_string>\n" );
1829         printf( "  <name>%s</name>\n", tidyErrorCodeAsKey(errorCode));
1830         if ( localizedString )
1831             printf( "  <string class=\"%s\"><![CDATA[%s]]></string>\n", tidyGetLanguage(), localizedString );
1832         else
1833             printf( "  <string class=\"%s\">NULL</string>\n", tidyGetLanguage() );
1834 
1835         printf( " </error_string>\n" );
1836     }
1837 
1838     printf( "</error_strings>\n" );
1839 }
1840 
1841 
1842 /** @} end service_xml_error_strings group */
1843 /* MARK: - Provide the -xml-help Service */
1844 /***************************************************************************//**
1845  ** @defgroup service_xmlhelp Provide the -xml-help Service
1846  *******************************************************************************
1847  ** @{
1848  */
1849 
1850 /** Outputs an XML element for a CLI option, escaping special characters as
1851  ** required. For example, it might print `<name>-output &lt;file&gt;</name>`.
1852  */
print_xml_help_option_element(ctmbstr element,ctmbstr name)1853 static void print_xml_help_option_element(ctmbstr element, /**< XML element name. */
1854                                           ctmbstr name     /**< The contents of the element. */
1855                                           )
1856 {
1857     tmbstr escpName;
1858     if (!name)
1859         return;
1860 
1861     printf("  <%s>%s</%s>\n", element, escpName = get_escaped_name(name), element);
1862     free(escpName);
1863 }
1864 
1865 /** Provides the -xml-help service.
1866  */
xml_help(void)1867 static void xml_help( void )
1868 {
1869     const CmdOptDesc* pos;
1870     CmdOptDesc localPos;
1871 
1872     printf( "<?xml version=\"1.0\"?>\n"
1873            "<cmdline version=\"%s\">\n", tidyLibraryVersion());
1874 
1875     for( pos=cmdopt_defs; pos->name1; ++pos)
1876     {
1877         localPos = *pos;
1878         localize_option_names(&localPos);
1879         printf(" <option class=\"%s\">\n", cmdopt_catname[pos->cat].mnemonic );
1880         print_xml_help_option_element("name", localPos.name1);
1881         print_xml_help_option_element("name", localPos.name2);
1882         print_xml_help_option_element("name", localPos.name3);
1883         print_xml_help_option_element("description", tidyLocalizedString( pos->key ) );
1884         if (pos->eqconfig)
1885             print_xml_help_option_element("eqconfig", localPos.eqconfig);
1886         else
1887             printf("  <eqconfig />\n");
1888         printf(" </option>\n");
1889 
1890         if (localPos.name1) free((tmbstr)localPos.name1);
1891         if (localPos.name2) free((tmbstr)localPos.name2);
1892         if (localPos.name3) free((tmbstr)localPos.name3);
1893         if (localPos.eqconfig) free((tmbstr)localPos.eqconfig); /* Is. #791 */
1894     }
1895 
1896     printf( "</cmdline>\n" );
1897 }
1898 
1899 
1900 /** @} end service_xmlhelp group */
1901 /* MARK: - Provide the -xml-options-strings Service */
1902 /***************************************************************************//**
1903  ** @defgroup service_xml_opts_strings Provide the -xml-options-strings Service
1904  *******************************************************************************
1905  ** @{
1906  */
1907 
1908 
1909 /** Handles printing of option description for -xml-options-strings service.
1910  **/
printXMLOptionString(TidyDoc tdoc,TidyOption topt,OptionDesc * d)1911 static void printXMLOptionString(TidyDoc tdoc,    /**< The Tidy document. */
1912                                  TidyOption topt, /**< The option. */
1913                                  OptionDesc *d    /**< The OptionDesc array. */
1914                                  )
1915 {
1916     if ( tidyOptGetCategory(topt) == TidyInternalCategory )
1917         return;
1918 
1919     printf( " <option>\n" );
1920     printf( "  <name>%s</name>\n",d->name);
1921     printf( "  <string class=\"%s\"><![CDATA[%s]]></string>\n", tidyGetLanguage(), tidyOptGetDoc( tdoc, topt ) );
1922     printf( " </option>\n" );
1923 }
1924 
1925 
1926 /** Handles the -xml-options-strings service.
1927  ** This service is primarily helpful to developers and localizers to test
1928  ** that option description strings as represented on screen output are
1929  ** correct and do not break tidy.
1930  ** @param tdoc The Tidy document.
1931  */
xml_options_strings(TidyDoc tdoc)1932 static void xml_options_strings( TidyDoc tdoc )
1933 {
1934     printf( "<?xml version=\"1.0\"?>\n"
1935            "<options_strings version=\"%s\">\n", tidyLibraryVersion());
1936     ForEachOption( tdoc, printXMLOptionString);
1937     printf( "</options_strings>\n" );
1938 }
1939 
1940 
1941 /** @} end service_xml_opts_strings group */
1942 /* MARK: - Provide the -xml-strings Service */
1943 /***************************************************************************//**
1944  ** @defgroup service_xml_strings Provide the -xml-strings Service
1945  *******************************************************************************
1946  ** @{
1947  */
1948 
1949 
1950 /** Handles the -xml-strings service.
1951  ** This service was primarily helpful to developers and localizers to compare
1952  ** localized strings to the built in `en` strings. It's probably better to use
1953  ** our POT/PO workflow with your favorite tools, or simply diff the language
1954  ** header files directly.
1955  ** @note The attribute `id` is not a specification, promise, or part of an
1956  **       API. You must not depend on this value. For strings meant for error
1957  **       output, the `label` attribute will contain the stringified version of
1958  **       the internal key for the string.
1959  */
xml_strings(void)1960 static void xml_strings( void )
1961 {
1962     uint i;
1963     TidyIterator j;
1964 
1965     ctmbstr current_language = tidyGetLanguage();
1966     ctmbstr current_label;
1967     Bool skip_current = strcmp( current_language, "en" ) == 0;
1968     Bool matches_base;
1969 
1970     printf( "<?xml version=\"1.0\"?>\n"
1971            "<localized_strings version=\"%s\">\n", tidyLibraryVersion());
1972 
1973     j = getStringKeyList();
1974     while (j) {
1975         i = getNextStringKey(&j);
1976         current_label = tidyErrorCodeAsKey(i);
1977         if (!strcmp(current_label, "UNDEFINED"))
1978             current_label = "";
1979         printf( "<localized_string id=\"%u\" label=\"%s\">\n", i, current_label );
1980         printf( " <string class=\"%s\">", "en" );
1981         printf("%s", tidyDefaultString(i));
1982         printf( "</string>\n" );
1983         if ( !skip_current ) {
1984             matches_base = strcmp( tidyLocalizedString(i), tidyDefaultString(i) ) == 0;
1985             printf( " <string class=\"%s\" same_as_base=\"%s\">", tidyGetLanguage(), matches_base ? "yes" : "no" );
1986             printf("%s", tidyLocalizedString(i));
1987             printf( "</string>\n" );
1988         }
1989         printf( "</localized_string>\n");
1990     }
1991 
1992     printf( "</localized_strings>\n" );
1993 }
1994 
1995 
1996 /** @} end service_xml_strings group */
1997 /* MARK: - Experimental Stuff */
1998 /***************************************************************************//**
1999  ** @defgroup experimental_stuff Experimental Stuff
2000  ** From time to time the developers might leave stuff here that you can use
2001  ** to experiment on their own, or that they're using to experiment with.
2002  *******************************************************************************
2003  ** @{
2004  */
2005 
2006 
2007 /** This callback from LibTidy allows the console application to examine an
2008  ** error message before allowing LibTidy to display it. Currently the body
2009  ** of the function is not compiled into Tidy, but if you're interested in
2010  ** how to use the new message API, then enable it. Possible applications in
2011  ** future console Tidy might be to do things like:
2012  ** - allow user-defined filtering
2013  ** - sort the report output by line number
2014  ** - other things that are user facing and best not put into LibTidy
2015  **   proper.
2016  */
reportCallback(TidyMessage tmessage)2017 static Bool TIDY_CALL reportCallback(TidyMessage tmessage)
2018 {
2019 #if 0
2020     TidyIterator pos;
2021     TidyMessageArgument arg;
2022     TidyFormatParameterType messageType;
2023     ctmbstr messageFormat;
2024 
2025     printf("FILTER: %s, %s\n", tidyGetMessageKey( tmessage ), tidyGetMessageOutput( tmessage ));
2026 
2027     /* loop through the arguments, if any, and print their details */
2028     pos = tidyGetMessageArguments( tmessage );
2029     while ( pos )
2030     {
2031         arg = tidyGetNextMessageArgument( tmessage, &pos );
2032         messageType = tidyGetArgType( tmessage, &arg );
2033         messageFormat = tidyGetArgFormat( tmessage, &arg );
2034         printf( "  Type = %u, Format = %s, Value = ", messageType, messageFormat );
2035 
2036         switch (messageType)
2037         {
2038             case tidyFormatType_STRING:
2039                 printf("%s\n", tidyGetArgValueString( tmessage, &arg ));
2040                 break;
2041 
2042             case tidyFormatType_INT:
2043                 printf("%d\n", tidyGetArgValueInt( tmessage, &arg));
2044                 break;
2045 
2046             case tidyFormatType_UINT:
2047                 printf("%u\n", tidyGetArgValueUInt( tmessage, &arg));
2048                 break;
2049 
2050             case tidyFormatType_DOUBLE:
2051                 printf("%g\n", tidyGetArgValueDouble( tmessage, &arg));
2052                 break;
2053 
2054             default:
2055                 printf("%s", "unknown so far\n");
2056         }
2057     }
2058 
2059     return no;  /* suppress LibTidy's own output of this message */
2060 #else
2061     return yes; /* needed so Tidy will not block output of this message */
2062 #endif
2063 }
2064 
2065 
2066 /** @} end experimental_stuff group */
2067 /* MARK: - main() */
2068 /***************************************************************************//**
2069  ** @defgroup main Main
2070  ** Let's do something here!
2071  *******************************************************************************
2072  ** @{
2073  */
2074 
2075 
main(int argc,char ** argv)2076 int main( int argc, char** argv )
2077 {
2078     ctmbstr prog = argv[0];
2079     ctmbstr cfgfil = NULL, errfil = NULL, htmlfil = NULL;
2080     TidyDoc tdoc = NULL;
2081     int status = 0;
2082 
2083     uint contentErrors = 0;
2084     uint contentWarnings = 0;
2085     uint accessWarnings = 0;
2086 
2087 #if defined(ENABLE_DEBUG_LOG) && defined(_MSC_VER)
2088 #  if defined(_CRTDBG_MAP_ALLOC)
2089     _CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF | _CRTDBG_LEAK_CHECK_DF);
2090 #  endif
2091 #endif
2092 
2093     tdoc = tidyCreate();
2094 
2095     tidySetMessageCallback( tdoc, reportCallback); /* experimental group */
2096     errout = stderr;  /* initialize to stderr */
2097 
2098     /* Set an atexit handler. */
2099     atexit( tidy_cleanup );
2100 
2101 #if defined(_WIN32)
2102     /* Force Windows console to use UTF, otherwise many characters will
2103      * be garbage. Note that East Asian languages *are* supported, but
2104      * only when Windows OS locale (not console only!) is set to an
2105      * East Asian language.
2106      */
2107     win_cp = GetConsoleOutputCP();
2108     SetConsoleOutputCP(CP_UTF8);
2109 #endif
2110 
2111     /*
2112      * Look for default configuration files using any of
2113      * the following possibilities:
2114      *  - TIDY_CONFIG_FILE - from tidyplatform.h, typically /etc/tidy.conf
2115      *  - HTML_TIDY        - environment variable
2116      *  - TIDY_USER_CONFIG_FILE - from tidyplatform.h, typically ~/tidy.conf
2117      */
2118 
2119 #ifdef TIDY_CONFIG_FILE
2120     if ( tidyFileExists( tdoc, TIDY_CONFIG_FILE) )
2121     {
2122         status = tidyLoadConfig( tdoc, TIDY_CONFIG_FILE );
2123         if ( status != 0 ) {
2124             fprintf(errout, tidyLocalizedString( TC_MAIN_ERROR_LOAD_CONFIG ), TIDY_CONFIG_FILE, status);
2125             fprintf(errout, "\n");
2126         }
2127     }
2128 #endif /* TIDY_CONFIG_FILE */
2129 
2130     if ( (cfgfil = getenv("HTML_TIDY")) != NULL )
2131     {
2132         status = tidyLoadConfig( tdoc, cfgfil );
2133         if ( status != 0 ) {
2134             fprintf(errout, tidyLocalizedString( TC_MAIN_ERROR_LOAD_CONFIG ), cfgfil, status);
2135             fprintf(errout, "\n");
2136         }
2137     }
2138 #ifdef TIDY_USER_CONFIG_FILE
2139     else if ( tidyFileExists( tdoc, TIDY_USER_CONFIG_FILE) )
2140     {
2141         status = tidyLoadConfig( tdoc, TIDY_USER_CONFIG_FILE );
2142         if ( status != 0 ) {
2143             fprintf(errout, tidyLocalizedString( TC_MAIN_ERROR_LOAD_CONFIG ), TIDY_USER_CONFIG_FILE, status);
2144             fprintf(errout, "\n");
2145         }
2146     }
2147 #endif /* TIDY_USER_CONFIG_FILE */
2148 
2149 
2150     /*
2151      * Read command line
2152      */
2153 
2154     while ( argc > 0 )
2155     {
2156         if (argc > 1 && argv[1][0] == '-')
2157         {
2158             /* support -foo and --foo */
2159             ctmbstr arg = argv[1] + 1;
2160 
2161             if ( strcasecmp(arg, "xml") == 0)
2162                 tidyOptSetBool( tdoc, TidyXmlTags, yes );
2163 
2164             else if ( strcasecmp(arg,   "asxml") == 0 ||
2165                      strcasecmp(arg, "asxhtml") == 0 )
2166             {
2167                 tidyOptSetBool( tdoc, TidyXhtmlOut, yes );
2168             }
2169             else if ( strcasecmp(arg,   "ashtml") == 0 )
2170                 tidyOptSetBool( tdoc, TidyHtmlOut, yes );
2171 
2172             else if ( strcasecmp(arg, "indent") == 0 )
2173             {
2174                 tidyOptSetInt( tdoc, TidyIndentContent, TidyAutoState );
2175                 if ( tidyOptGetInt(tdoc, TidyIndentSpaces) == 0 )
2176                     tidyOptResetToDefault( tdoc, TidyIndentSpaces );
2177             }
2178             else if ( strcasecmp(arg, "omit") == 0 )
2179                 tidyOptSetBool( tdoc, TidyOmitOptionalTags, yes );
2180 
2181             else if ( strcasecmp(arg, "upper") == 0 )
2182                 tidyOptSetBool( tdoc, TidyUpperCaseTags, yes );
2183 
2184             else if ( strcasecmp(arg, "clean") == 0 )
2185                 tidyOptSetBool( tdoc, TidyMakeClean, yes );
2186 
2187             else if ( strcasecmp(arg, "gdoc") == 0 )
2188                 tidyOptSetBool( tdoc, TidyGDocClean, yes );
2189 
2190             else if ( strcasecmp(arg, "bare") == 0 )
2191                 tidyOptSetBool( tdoc, TidyMakeBare, yes );
2192 
2193             else if ( strcasecmp(arg, "raw") == 0     ||
2194                      strcasecmp(arg, "ascii") == 0    ||
2195                      strcasecmp(arg, "latin0") == 0   ||
2196                      strcasecmp(arg, "latin1") == 0   ||
2197                      strcasecmp(arg, "utf8") == 0     ||
2198 #ifndef NO_NATIVE_ISO2022_SUPPORT
2199                      strcasecmp(arg, "iso2022") == 0  ||
2200 #endif
2201                      strcasecmp(arg, "utf16le") == 0  ||
2202                      strcasecmp(arg, "utf16be") == 0  ||
2203                      strcasecmp(arg, "utf16") == 0    ||
2204                      strcasecmp(arg, "shiftjis") == 0 ||
2205                      strcasecmp(arg, "big5") == 0     ||
2206                      strcasecmp(arg, "mac") == 0      ||
2207                      strcasecmp(arg, "win1252") == 0  ||
2208                      strcasecmp(arg, "ibm858") == 0 )
2209             {
2210                 tidySetCharEncoding( tdoc, arg );
2211             }
2212             else if ( strcasecmp(arg, "numeric") == 0 )
2213                 tidyOptSetBool( tdoc, TidyNumEntities, yes );
2214 
2215             else if ( strcasecmp(arg, "modify") == 0 ||
2216                      strcasecmp(arg, "change") == 0 ||  /* obsolete */
2217                      strcasecmp(arg, "update") == 0 )   /* obsolete */
2218             {
2219                 tidyOptSetBool( tdoc, TidyWriteBack, yes );
2220             }
2221             else if ( strcasecmp(arg, "errors") == 0 )
2222                 tidyOptSetBool( tdoc, TidyShowMarkup, no );
2223 
2224             else if ( strcasecmp(arg, "quiet") == 0 )
2225                 tidyOptSetBool( tdoc, TidyQuiet, yes );
2226 
2227             /* Currenly user must specify a language
2228              prior to anything that causes output */
2229             else if ( strcasecmp(arg, "language") == 0 ||
2230                      strcasecmp(arg,     "lang") == 0 )
2231                 if ( argc >= 3)
2232                 {
2233                     if ( strcasecmp(argv[2], "help") == 0 )
2234                     {
2235                         lang_help( tdoc );
2236                         exit(0);
2237                     }
2238                     if ( !tidySetLanguage( argv[2] ) )
2239                     {
2240                         printf(tidyLocalizedString(TC_STRING_LANG_NOT_FOUND),
2241                                argv[2], tidyGetLanguage());
2242                         printf("\n");
2243                     }
2244                     --argc;
2245                     ++argv;
2246                 }
2247                 else
2248                 {
2249                     printf( "%s\n", tidyLocalizedString(TC_STRING_LANG_MUST_SPECIFY));
2250                 }
2251 
2252             else if ( strcasecmp(arg, "help") == 0 ||
2253                         strcasecmp(arg, "-help") == 0 ||
2254                         strcasecmp(arg,    "h") == 0 || *arg == '?' )
2255             {
2256                 help( tdoc, prog );
2257                 tidyRelease( tdoc );
2258                 return 0; /* success */
2259             }
2260             else if ( strcasecmp(arg, "xml-help") == 0)
2261             {
2262                 xml_help( );
2263                 tidyRelease( tdoc );
2264                 return 0; /* success */
2265             }
2266             else if ( strcasecmp(arg, "xml-error-strings") == 0)
2267             {
2268                 xml_error_strings( tdoc );
2269                 tidyRelease( tdoc );
2270                 return 0; /* success */
2271             }
2272             else if ( strcasecmp(arg, "xml-options-strings") == 0)
2273             {
2274                 xml_options_strings( tdoc );
2275                 tidyRelease( tdoc );
2276                 return 0; /* success */
2277             }
2278             else if ( strcasecmp(arg, "xml-strings") == 0)
2279             {
2280                 xml_strings( );
2281                 tidyRelease( tdoc );
2282                 return 0; /* success */
2283             }
2284             else if ( strcasecmp(arg, "help-config") == 0 )
2285             {
2286                 optionhelp( tdoc );
2287                 tidyRelease( tdoc );
2288                 return 0; /* success */
2289             }
2290             else if ( strcasecmp(arg, "help-env") == 0 )
2291             {
2292                 helpEnv( tdoc );
2293                 tidyRelease( tdoc );
2294                 return 0; /* success */
2295             }
2296             else if ( strcasecmp(arg, "help-option") == 0 )
2297             {
2298                 if ( argc >= 3)
2299                 {
2300                     optionDescribe( tdoc, argv[2] );
2301                 }
2302                 else
2303                 {
2304                     printf( "%s\n", tidyLocalizedString(TC_STRING_MUST_SPECIFY));
2305                 }
2306                 tidyRelease( tdoc );
2307                 return 0; /* success */
2308             }
2309             else if ( strcasecmp(arg, "xml-config") == 0 )
2310             {
2311                 XMLoptionhelp( tdoc );
2312                 tidyRelease( tdoc );
2313                 return 0; /* success */
2314             }
2315             else if ( strcasecmp(arg, "show-config") == 0 )
2316             {
2317                 optionvalues( tdoc );
2318                 tidyRelease( tdoc );
2319                 return 0; /* success */
2320             }
2321             else if ( strcasecmp(arg, "export-config") == 0 )
2322             {
2323                 exportOptionValues( tdoc );
2324                 tidyRelease( tdoc );
2325                 return 0; /* success */
2326             }
2327             else if ( strcasecmp(arg, "export-default-config") == 0 )
2328             {
2329                 exportDefaultOptionValues( tdoc );
2330                 tidyRelease( tdoc );
2331                 return 0; /* success */
2332             }
2333             else if ( strcasecmp(arg, "config") == 0 )
2334             {
2335                 if ( argc >= 3 )
2336                 {
2337                     ctmbstr post;
2338 
2339                     tidyLoadConfig( tdoc, argv[2] );
2340 
2341                     /* Set new error output stream if setting changed */
2342                     post = tidyOptGetValue( tdoc, TidyErrFile );
2343                     if ( post && (!errfil || !samefile(errfil, post)) )
2344                     {
2345                         errfil = post;
2346                         errout = tidySetErrorFile( tdoc, post );
2347                     }
2348 
2349                     --argc;
2350                     ++argv;
2351                 }
2352             }
2353 
2354             else if ( strcasecmp(arg, "output") == 0 ||
2355                         strcasecmp(arg, "-output-file") == 0 ||
2356                         strcasecmp(arg, "o") == 0 )
2357             {
2358                 if ( argc >= 3 )
2359                 {
2360                     tidyOptSetValue( tdoc, TidyOutFile, argv[2] );
2361                     --argc;
2362                     ++argv;
2363                 }
2364             }
2365             else if ( strcasecmp(arg,  "file") == 0 ||
2366                         strcasecmp(arg, "-file") == 0 ||
2367                         strcasecmp(arg,     "f") == 0 )
2368             {
2369                 if ( argc >= 3 )
2370                 {
2371                     errfil = argv[2];
2372                     errout = tidySetErrorFile( tdoc, errfil );
2373                     --argc;
2374                     ++argv;
2375                 }
2376             }
2377             else if ( strcasecmp(arg,  "wrap") == 0 ||
2378                         strcasecmp(arg, "-wrap") == 0 ||
2379                         strcasecmp(arg,     "w") == 0 )
2380             {
2381                 if ( argc >= 3 )
2382                 {
2383                     uint wraplen = 0;
2384                     int nfields = sscanf( argv[2], "%u", &wraplen );
2385                     tidyOptSetInt( tdoc, TidyWrapLen, wraplen );
2386                     if (nfields > 0)
2387                     {
2388                         --argc;
2389                         ++argv;
2390                     }
2391                 }
2392             }
2393             else if ( strcasecmp(arg,  "version") == 0 ||
2394                         strcasecmp(arg, "-version") == 0 ||
2395                         strcasecmp(arg,        "v") == 0 )
2396             {
2397                 version( tdoc );
2398                 tidyRelease( tdoc );
2399                 return 0;  /* success */
2400 
2401             }
2402             else if ( strncmp(argv[1], "--", 2 ) == 0)
2403             {
2404                 if ( tidyOptParseValue(tdoc, argv[1]+2, argv[2]) )
2405                 {
2406                     /* Set new error output stream if setting changed */
2407                     ctmbstr post = tidyOptGetValue( tdoc, TidyErrFile );
2408                     if ( post && (!errfil || !samefile(errfil, post)) )
2409                     {
2410                         errfil = post;
2411                         errout = tidySetErrorFile( tdoc, post );
2412                     }
2413 
2414                     ++argv;
2415                     --argc;
2416                 }
2417             }
2418                 else if ( strcasecmp(arg, "access") == 0 )
2419                 {
2420                     if ( argc >= 3 )
2421                     {
2422                         uint acclvl = 0;
2423                         int nfields = sscanf( argv[2], "%u", &acclvl );
2424                         tidyOptSetInt( tdoc, TidyAccessibilityCheckLevel, acclvl );
2425                         if (nfields > 0)
2426                         {
2427                             --argc;
2428                             ++argv;
2429                         }
2430                     }
2431                 }
2432 
2433                 else
2434                 {
2435                     uint c;
2436                     ctmbstr s = argv[1];
2437 
2438                     while ( (c = *++s) != '\0' )
2439                     {
2440                         switch ( c )
2441                         {
2442                             case 'i':
2443                                 tidyOptSetInt( tdoc, TidyIndentContent, TidyAutoState );
2444                                 if ( tidyOptGetInt(tdoc, TidyIndentSpaces) == 0 )
2445                                     tidyOptResetToDefault( tdoc, TidyIndentSpaces );
2446                                 break;
2447 
2448                             case 'u':
2449                                 tidyOptSetBool( tdoc, TidyUpperCaseTags, yes );
2450                                 break;
2451 
2452                             case 'c':
2453                                 tidyOptSetBool( tdoc, TidyMakeClean, yes );
2454                                 break;
2455 
2456                             case 'g':
2457                                 tidyOptSetBool( tdoc, TidyGDocClean, yes );
2458                                 break;
2459 
2460                             case 'b':
2461                                 tidyOptSetBool( tdoc, TidyMakeBare, yes );
2462                                 break;
2463 
2464                             case 'n':
2465                                 tidyOptSetBool( tdoc, TidyNumEntities, yes );
2466                                 break;
2467 
2468                             case 'm':
2469                                 tidyOptSetBool( tdoc, TidyWriteBack, yes );
2470                                 break;
2471 
2472                             case 'e':
2473                                 tidyOptSetBool( tdoc, TidyShowMarkup, no );
2474                                 break;
2475 
2476                             case 'q':
2477                                 tidyOptSetBool( tdoc, TidyQuiet, yes );
2478                                 break;
2479 
2480                             default:
2481                                 unknownOption( tdoc, c );
2482                                 break;
2483                         }
2484                     }
2485                 }
2486 
2487             --argc;
2488             ++argv;
2489             continue;
2490         }
2491 
2492 
2493         if ( argc > 1 )
2494         {
2495             htmlfil = argv[1];
2496 #ifdef ENABLE_DEBUG_LOG
2497             SPRTF("Tidy: '%s'\n", htmlfil);
2498 #else /* !ENABLE_DEBUG_LOG */
2499             /* Is #713 - show-filename option */
2500             if (tidyOptGetBool(tdoc, TidyShowFilename))
2501             {
2502                 fprintf(errout, "Tidy: '%s'", htmlfil);
2503                 fprintf(errout, "\n");
2504             }
2505 #endif /* ENABLE_DEBUG_LOG yes/no */
2506             if ( tidyOptGetBool(tdoc, TidyEmacs) || tidyOptGetBool(tdoc, TidyShowFilename))
2507                 tidySetEmacsFile( tdoc, htmlfil );
2508             status = tidyParseFile( tdoc, htmlfil );
2509         }
2510         else
2511         {
2512             htmlfil = "stdin";
2513             status = tidyParseStdin( tdoc );
2514         }
2515 
2516         if ( status >= 0 )
2517             status = tidyCleanAndRepair( tdoc );
2518 
2519         if ( status >= 0 ) {
2520             status = tidyRunDiagnostics( tdoc );
2521         }
2522         if ( status > 1 ) /* If errors, do we want to force output? */
2523             status = ( tidyOptGetBool(tdoc, TidyForceOutput) ? status : -1 );
2524 
2525         if ( status >= 0 && tidyOptGetBool(tdoc, TidyShowMarkup) )
2526         {
2527             if ( tidyOptGetBool(tdoc, TidyWriteBack) && argc > 1 )
2528                 status = tidySaveFile( tdoc, htmlfil );
2529             else
2530             {
2531                 ctmbstr outfil = tidyOptGetValue( tdoc, TidyOutFile );
2532                 if ( outfil ) {
2533                     status = tidySaveFile( tdoc, outfil );
2534                 } else {
2535 #ifdef ENABLE_DEBUG_LOG
2536                     static char tmp_buf[264];
2537                     sprintf(tmp_buf,"%s.html",get_log_file());
2538                     status = tidySaveFile( tdoc, tmp_buf );
2539                     SPRTF("Saved tidied content to '%s'\n",tmp_buf);
2540 #else
2541                     status = tidySaveStdout( tdoc );
2542 #endif
2543                 }
2544             }
2545         }
2546 
2547         contentErrors   += tidyErrorCount( tdoc );
2548         contentWarnings += tidyWarningCount( tdoc );
2549         accessWarnings  += tidyAccessWarningCount( tdoc );
2550 
2551         --argc;
2552         ++argv;
2553 
2554         if ( argc <= 1 )
2555             break;
2556     } /* read command line loop */
2557 
2558     /* blank line for screen formatting */
2559     if ( errout == stderr && !contentErrors && !tidyOptGetBool( tdoc, TidyQuiet ) )
2560         fprintf(errout, "\n");
2561 
2562     /* footnote printing only if errors or warnings */
2563     if ( contentErrors + contentWarnings > 0 )
2564         tidyErrorSummary(tdoc);
2565 
2566     /* prints the general info, if applicable */
2567     tidyGeneralInfo(tdoc);
2568 
2569     /* called to free hash tables etc. */
2570     tidyRelease( tdoc );
2571 
2572     /* return status can be used by scripts */
2573     if ( contentErrors > 0 )
2574         return 2;
2575 
2576     if ( contentWarnings > 0 )
2577         return 1;
2578 
2579     /* 0 signifies all is ok */
2580     return 0;
2581 }
2582 
2583 
2584 /** @} end main group */
2585 /** @} end console_application group */
2586 
2587 
2588 /*
2589  * local variables:
2590  * mode: c
2591  * indent-tabs-mode: nil
2592  * c-basic-offset: 4
2593  * eval: (c-set-offset 'substatement-open 0)
2594  * end:
2595  */
2596