1 /* tidylib.c -- internal library definitions
2 
3   (c) 1998-2008 (W3C) MIT, ERCIM, Keio University
4   See tidy.h for the copyright notice.
5 
6   Defines HTML Tidy API implemented by tidy library.
7 
8   Very rough initial cut for discussion purposes.
9 
10   Public interface is const-correct and doesn't explicitly depend
11   on any globals.  Thus, thread-safety may be introduced w/out
12   changing the interface.
13 
14   Looking ahead to a C++ wrapper, C functions always pass
15   this-equivalent as 1st arg.
16 
17   Created 2001-05-20 by Charles Reitzel
18 
19 */
20 
21 #include <errno.h>
22 
23 #include "tidy-int.h"
24 #include "parser.h"
25 #include "clean.h"
26 #include "gdoc.h"
27 #include "config.h"
28 #include "message.h"
29 #include "messageobj.h"
30 #include "pprint.h"
31 #include "entities.h"
32 #include "tmbstr.h"
33 #include "utf8.h"
34 #include "mappedio.h"
35 #include "language.h"
36 #include "attrs.h"
37 #include "sprtf.h"
38 #if SUPPORT_LOCALIZATIONS
39 #  include "stdlib.h"
40 #  include "locale.h"
41 #endif
42 
43 /* Create/Destroy a Tidy "document" object */
44 static TidyDocImpl* tidyDocCreate( TidyAllocator *allocator );
45 static void         tidyDocRelease( TidyDocImpl* impl );
46 
47 static int          tidyDocStatus( TidyDocImpl* impl );
48 
49 /* Parse Markup */
50 static int          tidyDocParseFile( TidyDocImpl* impl, ctmbstr htmlfil );
51 static int          tidyDocParseStdin( TidyDocImpl* impl );
52 static int          tidyDocParseString( TidyDocImpl* impl, ctmbstr content );
53 static int          tidyDocParseBuffer( TidyDocImpl* impl, TidyBuffer* inbuf );
54 static int          tidyDocParseSource( TidyDocImpl* impl, TidyInputSource* docIn );
55 
56 
57 /* Execute post-parse diagnostics and cleanup.
58 ** Note, the order is important.  You will get different
59 ** results from the diagnostics depending on if they are run
60 ** pre-or-post repair.
61 */
62 static int          tidyDocRunDiagnostics( TidyDocImpl* doc );
63 static void         tidyDocReportDoctype( TidyDocImpl* doc );
64 static int          tidyDocCleanAndRepair( TidyDocImpl* doc );
65 
66 
67 /* Save cleaned up file to file/buffer/sink */
68 static int          tidyDocSaveFile( TidyDocImpl* impl, ctmbstr htmlfil );
69 static int          tidyDocSaveStdout( TidyDocImpl* impl );
70 static int          tidyDocSaveString( TidyDocImpl* impl, tmbstr buffer, uint* buflen );
71 static int          tidyDocSaveBuffer( TidyDocImpl* impl, TidyBuffer* outbuf );
72 static int          tidyDocSaveSink( TidyDocImpl* impl, TidyOutputSink* docOut );
73 static int          tidyDocSaveStream( TidyDocImpl* impl, StreamOut* out );
74 
75 
76 /* Tidy public interface
77 **
78 ** Most functions return an integer:
79 **
80 ** 0    -> SUCCESS
81 ** >0   -> WARNING
82 ** <0   -> ERROR
83 **
84 */
85 
tidyCreate(void)86 TidyDoc TIDY_CALL       tidyCreate(void)
87 {
88   TidyDocImpl* impl = tidyDocCreate( &TY_(g_default_allocator) );
89   return tidyImplToDoc( impl );
90 }
91 
tidyCreateWithAllocator(TidyAllocator * allocator)92 TidyDoc TIDY_CALL tidyCreateWithAllocator( TidyAllocator *allocator )
93 {
94   TidyDocImpl* impl = tidyDocCreate( allocator );
95   return tidyImplToDoc( impl );
96 }
97 
tidyRelease(TidyDoc tdoc)98 void TIDY_CALL          tidyRelease( TidyDoc tdoc )
99 {
100   TidyDocImpl* impl = tidyDocToImpl( tdoc );
101   tidyDocRelease( impl );
102 }
103 
tidyDocCreate(TidyAllocator * allocator)104 TidyDocImpl* tidyDocCreate( TidyAllocator *allocator )
105 {
106     TidyDocImpl* doc = (TidyDocImpl*)TidyAlloc( allocator, sizeof(TidyDocImpl) );
107     TidyClearMemory( doc, sizeof(*doc) );
108     doc->allocator = allocator;
109 
110     TY_(InitMap)();
111     TY_(InitTags)( doc );
112     TY_(InitAttrs)( doc );
113     TY_(InitConfig)( doc );
114     TY_(InitPrintBuf)( doc );
115 
116     /* Set the locale for tidy's output. This both configures
117     ** LibTidy to use the environment's locale as well as the
118     ** standard library.
119     */
120 #if SUPPORT_LOCALIZATIONS
121     if ( TY_(tidyGetLanguageSetByUser)() == no )
122     {
123         if( ! TY_(tidySetLanguage)( getenv( "LC_MESSAGES" ) ) )
124         {
125             if( ! TY_(tidySetLanguage)( getenv( "LANG" ) ) )
126             {
127                 /*\
128                 *  Is. #770 #783 #780 #790 and maybe others -
129                 *  TY_(tidySetLanguage)( setlocale( LC_ALL, "" ) );
130                 *  this seems a 'bad' choice!
131                \*/
132             }
133         }
134     }
135 #endif
136 
137     /* By default, wire tidy messages to standard error.
138     ** Document input will be set by parsing routines.
139     ** Document output will be set by pretty print routines.
140     ** Config input will be set by config parsing routines.
141     ** But we need to start off with a way to report errors.
142     */
143     doc->errout = TY_(StdErrOutput)();
144     return doc;
145 }
146 
tidyDocRelease(TidyDocImpl * doc)147 void          tidyDocRelease( TidyDocImpl* doc )
148 {
149     /* doc in/out opened and closed by parse/print routines */
150     if ( doc )
151     {
152         assert( doc->docIn == NULL );
153         assert( doc->docOut == NULL );
154 
155         TY_(ReleaseStreamOut)( doc, doc->errout );
156         doc->errout = NULL;
157 
158         TY_(FreePrintBuf)( doc );
159         TY_(FreeNode)(doc, &doc->root);
160         TidyClearMemory(&doc->root, sizeof(Node));
161 
162         if (doc->givenDoctype)
163             TidyDocFree(doc, doc->givenDoctype);
164 
165         TY_(FreeConfig)( doc );
166         TY_(FreeAttrTable)( doc );
167         TY_(FreeAttrPriorityList)( doc );
168         TY_(FreeMutedMessageList( doc ));
169         TY_(FreeTags)( doc );
170         /*\
171          *  Issue #186 - Now FreeNode depend on the doctype, so the lexer is needed
172          *  to determine which hash is to be used, so free it last.
173         \*/
174         TY_(FreeLexer)( doc );
175         TidyDocFree( doc, doc );
176     }
177 }
178 
179 /* Let application store a chunk of data w/ each Tidy tdocance.
180 ** Useful for callbacks.
181 */
tidySetAppData(TidyDoc tdoc,void * appData)182 void TIDY_CALL        tidySetAppData( TidyDoc tdoc, void* appData )
183 {
184   TidyDocImpl* impl = tidyDocToImpl( tdoc );
185   if ( impl )
186     impl->appData = appData;
187 }
tidyGetAppData(TidyDoc tdoc)188 void* TIDY_CALL       tidyGetAppData( TidyDoc tdoc )
189 {
190   TidyDocImpl* impl = tidyDocToImpl( tdoc );
191   if ( impl )
192     return impl->appData;
193   return NULL;
194 }
195 
tidyReleaseDate(void)196 ctmbstr TIDY_CALL     tidyReleaseDate(void)
197 {
198     return TY_(ReleaseDate)();
199 }
200 
tidyLibraryVersion(void)201 ctmbstr TIDY_CALL     tidyLibraryVersion(void)
202 {
203     return TY_(tidyLibraryVersion)();
204 }
205 
tidyPlatform(void)206 ctmbstr TIDY_CALL     tidyPlatform(void)
207 {
208 #ifdef PLATFORM_NAME
209     return PLATFORM_NAME;
210 #else
211     return NULL;
212 #endif
213 }
214 
215 
216 /* Get/set configuration options
217 */
tidySetOptionCallback(TidyDoc tdoc,TidyOptCallback pOptCallback)218 Bool TIDY_CALL     tidySetOptionCallback( TidyDoc tdoc, TidyOptCallback pOptCallback )
219 {
220   TidyDocImpl* impl = tidyDocToImpl( tdoc );
221   if ( impl )
222   {
223     impl->pOptCallback = pOptCallback;
224     return yes;
225   }
226   return no;
227 }
228 
tidySetConfigCallback(TidyDoc tdoc,TidyConfigCallback pConfigCallback)229 Bool TIDY_CALL     tidySetConfigCallback(TidyDoc tdoc, TidyConfigCallback pConfigCallback)
230 {
231   TidyDocImpl* impl = tidyDocToImpl( tdoc );
232   if ( impl )
233   {
234     impl->pConfigCallback = pConfigCallback;
235     return yes;
236   }
237   return no;
238 }
239 
tidySetConfigChangeCallback(TidyDoc tdoc,TidyConfigChangeCallback pCallback)240 Bool TIDY_CALL    tidySetConfigChangeCallback(TidyDoc tdoc, TidyConfigChangeCallback pCallback)
241 {
242   TidyDocImpl* impl = tidyDocToImpl( tdoc );
243   if ( impl )
244   {
245     impl->pConfigChangeCallback = pCallback;
246     return yes;
247   }
248   return no;
249 }
250 
251 
252 
tidyLoadConfig(TidyDoc tdoc,ctmbstr cfgfil)253 int TIDY_CALL     tidyLoadConfig( TidyDoc tdoc, ctmbstr cfgfil )
254 {
255     TidyDocImpl* impl = tidyDocToImpl( tdoc );
256     if ( impl )
257         return TY_(ParseConfigFile)( impl, cfgfil );
258     return -EINVAL;
259 }
260 
tidyLoadConfigEnc(TidyDoc tdoc,ctmbstr cfgfil,ctmbstr charenc)261 int TIDY_CALL     tidyLoadConfigEnc( TidyDoc tdoc, ctmbstr cfgfil, ctmbstr charenc )
262 {
263     TidyDocImpl* impl = tidyDocToImpl( tdoc );
264     if ( impl )
265         return TY_(ParseConfigFileEnc)( impl, cfgfil, charenc );
266     return -EINVAL;
267 }
268 
tidySetCharEncoding(TidyDoc tdoc,ctmbstr encnam)269 int TIDY_CALL         tidySetCharEncoding( TidyDoc tdoc, ctmbstr encnam )
270 {
271     TidyDocImpl* impl = tidyDocToImpl( tdoc );
272     if ( impl )
273     {
274         int enc = TY_(CharEncodingId)( impl, encnam );
275         if ( enc >= 0 && TY_(AdjustCharEncoding)(impl, enc) )
276             return 0;
277 
278         TY_(ReportBadArgument)( impl, "char-encoding" );
279     }
280     return -EINVAL;
281 }
282 
tidySetInCharEncoding(TidyDoc tdoc,ctmbstr encnam)283 int TIDY_CALL           tidySetInCharEncoding( TidyDoc tdoc, ctmbstr encnam )
284 {
285     TidyDocImpl* impl = tidyDocToImpl( tdoc );
286     if ( impl )
287     {
288         int enc = TY_(CharEncodingId)( impl, encnam );
289         if ( enc >= 0 && TY_(SetOptionInt)( impl, TidyInCharEncoding, enc ) )
290             return 0;
291 
292         TY_(ReportBadArgument)( impl, "in-char-encoding" );
293     }
294     return -EINVAL;
295 }
296 
tidySetOutCharEncoding(TidyDoc tdoc,ctmbstr encnam)297 int TIDY_CALL           tidySetOutCharEncoding( TidyDoc tdoc, ctmbstr encnam )
298 {
299     TidyDocImpl* impl = tidyDocToImpl( tdoc );
300     if ( impl )
301     {
302         int enc = TY_(CharEncodingId)( impl, encnam );
303         if ( enc >= 0 && TY_(SetOptionInt)( impl, TidyOutCharEncoding, enc ) )
304             return 0;
305 
306         TY_(ReportBadArgument)( impl, "out-char-encoding" );
307     }
308     return -EINVAL;
309 }
310 
tidyOptGetIdForName(ctmbstr optnam)311 TidyOptionId TIDY_CALL tidyOptGetIdForName( ctmbstr optnam )
312 {
313     const TidyOptionImpl* option = TY_(lookupOption)( optnam );
314     if ( option )
315         return option->id;
316     return N_TIDY_OPTIONS;  /* Error */
317 }
318 
tidyGetOptionList(TidyDoc tdoc)319 TidyIterator TIDY_CALL  tidyGetOptionList( TidyDoc tdoc )
320 {
321     TidyDocImpl* impl = tidyDocToImpl( tdoc );
322     if ( impl )
323         return TY_(getOptionList)( impl );
324     return (TidyIterator) -1;
325 }
326 
tidyGetNextOption(TidyDoc tdoc,TidyIterator * pos)327 TidyOption TIDY_CALL    tidyGetNextOption( TidyDoc tdoc, TidyIterator* pos )
328 {
329     TidyDocImpl* impl = tidyDocToImpl( tdoc );
330     const TidyOptionImpl* option = NULL;
331     if ( impl )
332         option = TY_(getNextOption)( impl, pos );
333     else if ( pos )
334         *pos = 0;
335     return tidyImplToOption( option );
336 }
337 
338 
tidyGetOption(TidyDoc ARG_UNUSED (tdoc),TidyOptionId optId)339 TidyOption TIDY_CALL    tidyGetOption( TidyDoc ARG_UNUSED(tdoc), TidyOptionId optId )
340 {
341     const TidyOptionImpl* option = TY_(getOption)( optId );
342     return tidyImplToOption( option );
343 }
tidyGetOptionByName(TidyDoc ARG_UNUSED (doc),ctmbstr optnam)344 TidyOption TIDY_CALL    tidyGetOptionByName( TidyDoc ARG_UNUSED(doc), ctmbstr optnam )
345 {
346     const TidyOptionImpl* option = TY_(lookupOption)( optnam );
347     return tidyImplToOption( option );
348 }
349 
tidyOptGetId(TidyOption topt)350 TidyOptionId TIDY_CALL  tidyOptGetId( TidyOption topt )
351 {
352     const TidyOptionImpl* option = tidyOptionToImpl( topt );
353     if ( option )
354         return option->id;
355     return N_TIDY_OPTIONS;
356 }
tidyOptGetName(TidyOption topt)357 ctmbstr TIDY_CALL       tidyOptGetName( TidyOption topt )
358 {
359     const TidyOptionImpl* option = tidyOptionToImpl( topt );
360     if ( option )
361         return option->name;
362     return NULL;
363 }
tidyOptGetType(TidyOption topt)364 TidyOptionType TIDY_CALL tidyOptGetType( TidyOption topt )
365 {
366     const TidyOptionImpl* option = tidyOptionToImpl( topt );
367     if ( option )
368         return option->type;
369     return (TidyOptionType) -1;
370 }
tidyOptionIsList(TidyOption opt)371 Bool TIDY_CALL           tidyOptionIsList( TidyOption opt )
372 {
373     const TidyOptionImpl* option = tidyOptionToImpl( opt );
374     if ( option )
375         return TY_(getOptionIsList)( option->id );
376     return no;
377 }
tidyOptGetCategory(TidyOption topt)378 TidyConfigCategory TIDY_CALL tidyOptGetCategory( TidyOption topt )
379 {
380     const TidyOptionImpl* option = tidyOptionToImpl( topt );
381     if ( option )
382         return option->category;
383     return (TidyConfigCategory) -1;
384 }
tidyOptGetDefault(TidyOption topt)385 ctmbstr TIDY_CALL       tidyOptGetDefault( TidyOption topt )
386 {
387     const TidyOptionImpl* option = tidyOptionToImpl( topt );
388     /* Special case for TidyDoctype, because it is declared as string */
389     if ( option && option->id == TidyDoctype )
390     {
391         const TidyOptionImpl* newopt = TY_(getOption)( TidyDoctypeMode );
392         return TY_(GetPickListLabelForPick)( TidyDoctypeMode, newopt->dflt );
393     }
394     if ( option && option->type == TidyString )
395         return option->pdflt; /* Issue #306 - fix an old typo hidden by a cast! */
396     return NULL;
397 }
tidyOptGetDefaultInt(TidyOption topt)398 ulong TIDY_CALL          tidyOptGetDefaultInt( TidyOption topt )
399 {
400     const TidyOptionImpl* option = tidyOptionToImpl( topt );
401     if ( option && option->type != TidyString )
402         return option->dflt;
403 
404     /* Special case for TidyDoctype, because it has a picklist */
405     if ( option->id == TidyDoctype )
406     {
407         const TidyOptionImpl* newopt = TY_(getOption)( TidyDoctypeMode );
408         return newopt->dflt;
409     }
410 
411     return ~0U;
412 }
tidyOptGetDefaultBool(TidyOption topt)413 Bool TIDY_CALL          tidyOptGetDefaultBool( TidyOption topt )
414 {
415     const TidyOptionImpl* option = tidyOptionToImpl( topt );
416     if ( option && option->type != TidyString )
417         return ( option->dflt ? yes : no );
418     return no;
419 }
tidyOptIsReadOnly(TidyOption topt)420 Bool TIDY_CALL          tidyOptIsReadOnly( TidyOption topt )
421 {
422     const TidyOptionImpl* option = tidyOptionToImpl( topt );
423     if ( option  )
424         return ( option->parser == NULL );
425     return yes;
426 }
427 
428 
tidyOptGetPickList(TidyOption topt)429 TidyIterator TIDY_CALL  tidyOptGetPickList( TidyOption topt )
430 {
431     const TidyOptionImpl* option = tidyOptionToImpl( topt );
432     if ( option )
433       return TY_(getOptionPickList)( option );
434     return (TidyIterator) -1;
435 }
tidyOptGetNextPick(TidyOption topt,TidyIterator * pos)436 ctmbstr TIDY_CALL       tidyOptGetNextPick( TidyOption topt, TidyIterator* pos )
437 {
438     const TidyOptionImpl* option = tidyOptionToImpl( topt );
439     if ( option )
440         return TY_(getNextOptionPick)( option, pos );
441     return NULL;
442 }
443 
444 
tidyOptGetValue(TidyDoc tdoc,TidyOptionId optId)445 ctmbstr TIDY_CALL       tidyOptGetValue( TidyDoc tdoc, TidyOptionId optId )
446 {
447     TidyDocImpl* impl = tidyDocToImpl( tdoc );
448     ctmbstr optval = NULL;
449     if ( impl )
450     {
451         if ( optId == TidyDoctype )
452         {
453             /* Special case for TidyDoctype, because it has a picklist and is a string. */
454             uint pick = tidyOptGetInt( tdoc, TidyDoctypeMode );
455             if ( pick != TidyDoctypeUser )
456             {
457                 optval = TY_(GetPickListLabelForPick)( TidyDoctypeMode, pick );
458             } else {
459                 optval = cfgStr( impl, optId );
460             }
461         } else {
462             /* Standard case. */
463             optval = cfgStr( impl, optId );
464         }
465     }
466     return optval;
467 }
tidyOptSetValue(TidyDoc tdoc,TidyOptionId optId,ctmbstr val)468 Bool TIDY_CALL        tidyOptSetValue( TidyDoc tdoc, TidyOptionId optId, ctmbstr val )
469 {
470   TidyDocImpl* impl = tidyDocToImpl( tdoc );
471   if ( impl )
472     return TY_(ParseConfigValue)( impl, optId, val );
473   return no;
474 }
tidyOptParseValue(TidyDoc tdoc,ctmbstr optnam,ctmbstr val)475 Bool TIDY_CALL        tidyOptParseValue( TidyDoc tdoc, ctmbstr optnam, ctmbstr val )
476 {
477   TidyDocImpl* impl = tidyDocToImpl( tdoc );
478   if ( impl )
479     return TY_(ParseConfigOption)( impl, optnam, val );
480   return no;
481 }
482 
tidyOptGetInt(TidyDoc tdoc,TidyOptionId optId)483 ulong TIDY_CALL        tidyOptGetInt( TidyDoc tdoc, TidyOptionId optId )
484 {
485     TidyDocImpl* impl = tidyDocToImpl( tdoc );
486     ulong opti = 0;
487     if ( impl )
488     {
489         /* Special case for TidyDoctype, because it has a picklist */
490         if ( optId == TidyDoctype )
491             opti = cfg( impl, TidyDoctypeMode);
492         else
493             opti = cfg( impl, optId );
494     }
495     return opti;
496 }
497 
tidyOptSetInt(TidyDoc tdoc,TidyOptionId optId,ulong val)498 Bool TIDY_CALL        tidyOptSetInt( TidyDoc tdoc, TidyOptionId optId, ulong val )
499 {
500     TidyDocImpl* impl = tidyDocToImpl( tdoc );
501     if ( impl )
502     {
503         /* Special case for TidyDoctype, because it has a picklist */
504         if ( optId == TidyDoctype )
505             return TY_(SetOptionInt)( impl, TidyDoctypeMode, val );
506         else
507             return TY_(SetOptionInt)( impl, optId, val );
508     }
509     return no;
510 }
511 
tidyOptGetBool(TidyDoc tdoc,TidyOptionId optId)512 Bool TIDY_CALL         tidyOptGetBool( TidyDoc tdoc, TidyOptionId optId )
513 {
514     TidyDocImpl* impl = tidyDocToImpl( tdoc );
515     Bool optb = no;
516     if ( impl )
517     {
518         const TidyOptionImpl* option = TY_(getOption)( optId );
519         if ( option )
520         {
521             optb = cfgBool( impl, optId );
522         }
523     }
524     return optb;
525 }
526 
tidyOptSetBool(TidyDoc tdoc,TidyOptionId optId,Bool val)527 Bool TIDY_CALL        tidyOptSetBool( TidyDoc tdoc, TidyOptionId optId, Bool val )
528 {
529     TidyDocImpl* impl = tidyDocToImpl( tdoc );
530     if ( impl )
531         return TY_(SetOptionBool)( impl, optId, val );
532     return no;
533 }
534 
tidyOptGetEncName(TidyDoc tdoc,TidyOptionId optId)535 ctmbstr TIDY_CALL       tidyOptGetEncName( TidyDoc tdoc, TidyOptionId optId )
536 {
537   uint enc = tidyOptGetInt( tdoc, optId );
538   return TY_(CharEncodingOptName)( enc );
539 }
540 
tidyOptGetCurrPick(TidyDoc tdoc,TidyOptionId optId)541 ctmbstr TIDY_CALL       tidyOptGetCurrPick( TidyDoc tdoc, TidyOptionId optId )
542 {
543     uint pick = tidyOptGetInt( tdoc, optId );
544     return TY_(GetPickListLabelForPick)( optId, pick );
545 }
546 
547 
tidyOptGetDeclTagList(TidyDoc tdoc)548 TidyIterator TIDY_CALL tidyOptGetDeclTagList( TidyDoc tdoc )
549 {
550     TidyDocImpl* impl = tidyDocToImpl( tdoc );
551     TidyIterator declIter = 0;
552     if ( impl )
553         declIter = TY_(GetDeclaredTagList)( impl );
554     return declIter;
555 }
556 
tidyOptGetNextDeclTag(TidyDoc tdoc,TidyOptionId optId,TidyIterator * iter)557 ctmbstr TIDY_CALL       tidyOptGetNextDeclTag( TidyDoc tdoc, TidyOptionId optId,
558                                      TidyIterator* iter )
559 {
560     TidyDocImpl* impl = tidyDocToImpl( tdoc );
561     ctmbstr tagnam = NULL;
562     if ( impl )
563     {
564         UserTagType tagtyp = tagtype_null;
565         if ( optId == TidyInlineTags )
566             tagtyp = tagtype_inline;
567         else if ( optId == TidyBlockTags )
568             tagtyp = tagtype_block;
569         else if ( optId == TidyEmptyTags )
570             tagtyp = tagtype_empty;
571         else if ( optId == TidyPreTags )
572             tagtyp = tagtype_pre;
573         if ( tagtyp != tagtype_null )
574             tagnam = TY_(GetNextDeclaredTag)( impl, tagtyp, iter );
575     }
576     return tagnam;
577 }
578 
tidyOptGetPriorityAttrList(TidyDoc tdoc)579 TidyIterator TIDY_CALL tidyOptGetPriorityAttrList( TidyDoc tdoc )
580 {
581     TidyDocImpl* impl = tidyDocToImpl( tdoc );
582     if ( impl )
583         return TY_(getPriorityAttrList)( impl );
584     return (TidyIterator) -1;
585 }
586 
tidyOptGetNextPriorityAttr(TidyDoc tdoc,TidyIterator * iter)587 ctmbstr TIDY_CALL      tidyOptGetNextPriorityAttr(TidyDoc tdoc, TidyIterator* iter )
588 {
589     TidyDocImpl* impl = tidyDocToImpl( tdoc );
590     ctmbstr result = NULL;
591     if ( impl )
592         result = TY_(getNextPriorityAttr)( impl, iter );
593     else if ( iter )
594         *iter = 0;
595     return result;
596 }
597 
tidyOptGetMutedMessageList(TidyDoc tdoc)598 TidyIterator TIDY_CALL tidyOptGetMutedMessageList( TidyDoc tdoc )
599 {
600     TidyDocImpl* impl = tidyDocToImpl( tdoc );
601     if ( impl )
602         return TY_(getMutedMessageList)( impl );
603     return (TidyIterator) -1;
604 }
605 
tidyOptGetNextMutedMessage(TidyDoc tdoc,TidyIterator * iter)606 ctmbstr TIDY_CALL      tidyOptGetNextMutedMessage(TidyDoc tdoc, TidyIterator* iter )
607 {
608     TidyDocImpl* impl = tidyDocToImpl( tdoc );
609     ctmbstr result = NULL;
610     if ( impl )
611         result = TY_(getNextMutedMessage)( impl, iter );
612     else if ( iter )
613         *iter = 0;
614     return result;
615 }
616 
tidyOptGetDoc(TidyDoc ARG_UNUSED (tdoc),TidyOption opt)617 ctmbstr TIDY_CALL tidyOptGetDoc( TidyDoc ARG_UNUSED(tdoc), TidyOption opt )
618 {
619     const TidyOptionId optId = tidyOptGetId( opt );
620     return tidyLocalizedString(optId);
621 }
622 
623 #if SUPPORT_CONSOLE_APP
624 /* TODO - GROUP ALL CONSOLE-ONLY FUNCTIONS */
tidyOptGetDocLinksList(TidyDoc ARG_UNUSED (tdoc),TidyOption opt)625 TidyIterator TIDY_CALL tidyOptGetDocLinksList( TidyDoc ARG_UNUSED(tdoc), TidyOption opt )
626 {
627     const TidyOptionId optId = tidyOptGetId( opt );
628     const TidyOptionDoc* docDesc = TY_(OptGetDocDesc)( optId );
629     if (docDesc && docDesc->links)
630         return (TidyIterator)docDesc->links;
631     return (TidyIterator)NULL;
632 }
633 #endif /* SUPPORT_CONSOLE_APP */
634 
tidyOptGetNextDocLinks(TidyDoc tdoc,TidyIterator * pos)635 TidyOption TIDY_CALL tidyOptGetNextDocLinks( TidyDoc tdoc, TidyIterator* pos )
636 {
637     const TidyOptionId* curr = (const TidyOptionId *)*pos;
638     TidyOption opt;
639 
640     if (*curr == TidyUnknownOption)
641     {
642         *pos = (TidyIterator)NULL;
643         return (TidyOption)0;
644     }
645     opt = tidyGetOption(tdoc, *curr);
646     curr++;
647     *pos = (*curr == TidyUnknownOption ) ?
648         (TidyIterator)NULL:(TidyIterator)curr;
649     return opt;
650 }
651 
tidyOptSaveFile(TidyDoc tdoc,ctmbstr cfgfil)652 int TIDY_CALL tidyOptSaveFile( TidyDoc tdoc, ctmbstr cfgfil )
653 {
654     TidyDocImpl* impl = tidyDocToImpl( tdoc );
655     if ( impl )
656         return TY_(SaveConfigFile)( impl, cfgfil );
657     return -EINVAL;
658 }
659 
tidyOptSaveSink(TidyDoc tdoc,TidyOutputSink * sink)660 int TIDY_CALL tidyOptSaveSink( TidyDoc tdoc, TidyOutputSink* sink )
661 {
662     TidyDocImpl* impl = tidyDocToImpl( tdoc );
663     if ( impl )
664         return TY_(SaveConfigSink)( impl, sink );
665     return -EINVAL;
666 }
667 
tidyOptSnapshot(TidyDoc tdoc)668 Bool TIDY_CALL tidyOptSnapshot( TidyDoc tdoc )
669 {
670     TidyDocImpl* impl = tidyDocToImpl( tdoc );
671     if ( impl )
672     {
673         TY_(TakeConfigSnapshot)( impl );
674         return yes;
675     }
676     return no;
677 }
tidyOptResetToSnapshot(TidyDoc tdoc)678 Bool TIDY_CALL tidyOptResetToSnapshot( TidyDoc tdoc )
679 {
680     TidyDocImpl* impl = tidyDocToImpl( tdoc );
681     if ( impl )
682     {
683         TY_(ResetConfigToSnapshot)( impl );
684         return yes;
685     }
686     return no;
687 }
tidyOptResetAllToDefault(TidyDoc tdoc)688 Bool TIDY_CALL tidyOptResetAllToDefault( TidyDoc tdoc )
689 {
690     TidyDocImpl* impl = tidyDocToImpl( tdoc );
691     if ( impl )
692     {
693         TY_(ResetConfigToDefault)( impl );
694         return yes;
695     }
696     return no;
697 }
698 
tidyOptResetToDefault(TidyDoc tdoc,TidyOptionId optId)699 Bool TIDY_CALL tidyOptResetToDefault( TidyDoc tdoc, TidyOptionId optId )
700 {
701     TidyDocImpl* impl = tidyDocToImpl( tdoc );
702     if ( impl )
703         return TY_(ResetOptionToDefault)( impl, optId );
704     return no;
705 }
706 
tidyOptDiffThanDefault(TidyDoc tdoc)707 Bool TIDY_CALL tidyOptDiffThanDefault( TidyDoc tdoc )
708 {
709     TidyDocImpl* impl = tidyDocToImpl( tdoc );
710     if ( impl )
711         return TY_(ConfigDiffThanDefault)( impl );
712     return no;
713 }
tidyOptDiffThanSnapshot(TidyDoc tdoc)714 Bool TIDY_CALL          tidyOptDiffThanSnapshot( TidyDoc tdoc )
715 {
716     TidyDocImpl* impl = tidyDocToImpl( tdoc );
717     if ( impl )
718         return TY_(ConfigDiffThanSnapshot)( impl );
719     return no;
720 }
721 
tidyOptCopyConfig(TidyDoc to,TidyDoc from)722 Bool TIDY_CALL tidyOptCopyConfig( TidyDoc to, TidyDoc from )
723 {
724     TidyDocImpl* docTo = tidyDocToImpl( to );
725     TidyDocImpl* docFrom = tidyDocToImpl( from );
726     if ( docTo && docFrom )
727     {
728         TY_(CopyConfig)( docTo, docFrom );
729         return yes;
730     }
731     return no;
732 }
733 
734 
735 /* I/O and Message handling interface
736 **
737 ** By default, Tidy will define, create and use instance of input and output
738 ** handlers for standard C buffered I/O (i.e. FILE* stdin, FILE* stdout and
739 ** FILE* stderr for content input, content output and diagnostic output,
740 ** respectively.  A FILE* cfgFile input handler will be used for config files.
741 ** Command line options will just be set directly.
742 */
743 
tidySetEmacsFile(TidyDoc tdoc,ctmbstr filePath)744 void TIDY_CALL tidySetEmacsFile( TidyDoc tdoc, ctmbstr filePath )
745 {
746     tidyOptSetValue( tdoc, TidyEmacsFile, filePath );
747 }
748 
tidyGetEmacsFile(TidyDoc tdoc)749 ctmbstr TIDY_CALL tidyGetEmacsFile( TidyDoc tdoc )
750 {
751     return tidyOptGetValue( tdoc, TidyEmacsFile );
752 }
753 
754 
755 /* Use TidyReportFilter to filter messages by diagnostic level:
756 ** info, warning, etc.  Just set diagnostic output
757 ** handler to redirect all diagnostics output.  Return true
758 ** to proceed with output, false to cancel.
759 */
tidySetReportFilter(TidyDoc tdoc,TidyReportFilter filt)760 Bool TIDY_CALL tidySetReportFilter( TidyDoc tdoc, TidyReportFilter filt )
761 {
762   TidyDocImpl* impl = tidyDocToImpl( tdoc );
763   if ( impl )
764   {
765     impl->reportFilter = filt;
766     return yes;
767   }
768   return no;
769 }
770 
771 /* tidySetReportCallback functions similar to TidyReportFilter, but provides the
772  * string version of the internal enum name so that LibTidy users can use
773 ** the string as a lookup key for providing their own error localizations.
774 ** See the string key definitions in tidyenum.h.
775 */
tidySetReportCallback(TidyDoc tdoc,TidyReportCallback filt)776 Bool TIDY_CALL tidySetReportCallback( TidyDoc tdoc, TidyReportCallback filt )
777 {
778   TidyDocImpl* impl = tidyDocToImpl( tdoc );
779   if ( impl )
780   {
781     impl->reportCallback = filt;
782     return yes;
783   }
784   return no;
785 }
786 
tidySetMessageCallback(TidyDoc tdoc,TidyMessageCallback filt)787 Bool TIDY_CALL tidySetMessageCallback( TidyDoc tdoc, TidyMessageCallback filt )
788 {
789     TidyDocImpl* impl = tidyDocToImpl( tdoc );
790     if ( impl )
791     {
792         impl->messageCallback = filt;
793         return yes;
794     }
795     return no;
796 }
797 
tidyGetMessageDoc(TidyMessage tmessage)798 TidyDoc TIDY_CALL tidyGetMessageDoc( TidyMessage tmessage )
799 {
800     TidyMessageImpl *message = tidyMessageToImpl(tmessage);
801     TidyDocImpl* doc = TY_(getMessageDoc)(*message);
802     return tidyImplToDoc(doc);
803 }
804 
tidyGetMessageCode(TidyMessage tmessage)805 uint TIDY_CALL tidyGetMessageCode( TidyMessage tmessage )
806 {
807     TidyMessageImpl *message = tidyMessageToImpl(tmessage);
808     return TY_(getMessageCode)(*message);
809 }
810 
tidyGetMessageKey(TidyMessage tmessage)811 ctmbstr TIDY_CALL tidyGetMessageKey( TidyMessage tmessage )
812 {
813     TidyMessageImpl *message = tidyMessageToImpl(tmessage);
814     return TY_(getMessageKey)(*message);
815 }
816 
tidyGetMessageLine(TidyMessage tmessage)817 int TIDY_CALL tidyGetMessageLine( TidyMessage tmessage )
818 {
819     TidyMessageImpl *message = tidyMessageToImpl(tmessage);
820     return TY_(getMessageLine)(*message);
821 }
822 
tidyGetMessageColumn(TidyMessage tmessage)823 int TIDY_CALL tidyGetMessageColumn( TidyMessage tmessage )
824 {
825     TidyMessageImpl *message = tidyMessageToImpl(tmessage);
826     return TY_(getMessageColumn)(*message);
827 }
828 
tidyGetMessageLevel(TidyMessage tmessage)829 TidyReportLevel TIDY_CALL tidyGetMessageLevel( TidyMessage tmessage )
830 {
831     TidyMessageImpl *message = tidyMessageToImpl(tmessage);
832     return TY_(getMessageLevel)(*message);
833 }
834 
tidyGetMessageIsMuted(TidyMessage tmessage)835 Bool TIDY_CALL tidyGetMessageIsMuted( TidyMessage tmessage )
836 {
837     TidyMessageImpl *message = tidyMessageToImpl(tmessage);
838     return TY_(getMessageIsMuted)(*message);
839 }
840 
tidyGetMessageFormatDefault(TidyMessage tmessage)841 ctmbstr TIDY_CALL tidyGetMessageFormatDefault( TidyMessage tmessage )
842 {
843     TidyMessageImpl *message = tidyMessageToImpl(tmessage);
844     return TY_(getMessageFormatDefault)(*message);
845 }
846 
tidyGetMessageFormat(TidyMessage tmessage)847 ctmbstr TIDY_CALL tidyGetMessageFormat( TidyMessage tmessage )
848 {
849     TidyMessageImpl *message = tidyMessageToImpl(tmessage);
850     return TY_(getMessageFormat)(*message);
851 }
852 
tidyGetMessageDefault(TidyMessage tmessage)853 ctmbstr TIDY_CALL tidyGetMessageDefault( TidyMessage tmessage )
854 {
855     TidyMessageImpl *message = tidyMessageToImpl(tmessage);
856     return TY_(getMessageDefault)(*message);
857 }
858 
tidyGetMessage(TidyMessage tmessage)859 ctmbstr TIDY_CALL tidyGetMessage( TidyMessage tmessage )
860 {
861     TidyMessageImpl *message = tidyMessageToImpl(tmessage);
862     return TY_(getMessage)(*message);
863 }
864 
tidyGetMessagePosDefault(TidyMessage tmessage)865 ctmbstr TIDY_CALL tidyGetMessagePosDefault( TidyMessage tmessage )
866 {
867     TidyMessageImpl *message = tidyMessageToImpl(tmessage);
868     return TY_(getMessagePosDefault)(*message);
869 }
870 
tidyGetMessagePos(TidyMessage tmessage)871 ctmbstr TIDY_CALL tidyGetMessagePos( TidyMessage tmessage )
872 {
873     TidyMessageImpl *message = tidyMessageToImpl(tmessage);
874     return TY_(getMessagePos)(*message);
875 }
876 
tidyGetMessagePrefixDefault(TidyMessage tmessage)877 ctmbstr TIDY_CALL tidyGetMessagePrefixDefault( TidyMessage tmessage )
878 {
879     TidyMessageImpl *message = tidyMessageToImpl(tmessage);
880     return TY_(getMessagePrefixDefault)(*message);
881 }
882 
tidyGetMessagePrefix(TidyMessage tmessage)883 ctmbstr TIDY_CALL tidyGetMessagePrefix( TidyMessage tmessage )
884 {
885     TidyMessageImpl *message = tidyMessageToImpl(tmessage);
886     return TY_(getMessagePrefix)(*message);
887 }
888 
889 
tidyGetMessageOutputDefault(TidyMessage tmessage)890 ctmbstr TIDY_CALL tidyGetMessageOutputDefault( TidyMessage tmessage )
891 {
892     TidyMessageImpl *message = tidyMessageToImpl(tmessage);
893     return TY_(getMessageOutputDefault)(*message);
894 }
895 
tidyGetMessageOutput(TidyMessage tmessage)896 ctmbstr TIDY_CALL tidyGetMessageOutput( TidyMessage tmessage )
897 {
898     TidyMessageImpl *message = tidyMessageToImpl(tmessage);
899     return TY_(getMessageOutput)(*message);
900 }
901 
tidyGetMessageArguments(TidyMessage tmessage)902 TidyIterator TIDY_CALL tidyGetMessageArguments( TidyMessage tmessage )
903 {
904     TidyMessageImpl *message = tidyMessageToImpl(tmessage);
905     return TY_(getMessageArguments)(*message);
906 }
907 
tidyGetNextMessageArgument(TidyMessage tmessage,TidyIterator * iter)908 TidyMessageArgument TIDY_CALL tidyGetNextMessageArgument( TidyMessage tmessage, TidyIterator* iter )
909 {
910     TidyMessageImpl *message = tidyMessageToImpl(tmessage);
911     return TY_(getNextMessageArgument)(*message, iter);
912 }
913 
tidyGetArgType(TidyMessage tmessage,TidyMessageArgument * arg)914 TidyFormatParameterType TIDY_CALL tidyGetArgType( TidyMessage tmessage, TidyMessageArgument* arg )
915 {
916     TidyMessageImpl *message = tidyMessageToImpl(tmessage);
917     return TY_(getArgType)(*message, arg);
918 }
919 
tidyGetArgFormat(TidyMessage tmessage,TidyMessageArgument * arg)920 ctmbstr TIDY_CALL tidyGetArgFormat( TidyMessage tmessage, TidyMessageArgument* arg )
921 {
922     TidyMessageImpl *message = tidyMessageToImpl(tmessage);
923     return TY_(getArgFormat)(*message, arg);
924 }
925 
tidyGetArgValueString(TidyMessage tmessage,TidyMessageArgument * arg)926 ctmbstr TIDY_CALL tidyGetArgValueString( TidyMessage tmessage, TidyMessageArgument* arg )
927 {
928     TidyMessageImpl *message = tidyMessageToImpl(tmessage);
929     return TY_(getArgValueString)(*message, arg);
930 }
931 
tidyGetArgValueUInt(TidyMessage tmessage,TidyMessageArgument * arg)932 uint TIDY_CALL tidyGetArgValueUInt( TidyMessage tmessage, TidyMessageArgument* arg )
933 {
934     TidyMessageImpl *message = tidyMessageToImpl(tmessage);
935     return TY_(getArgValueUInt)(*message, arg);
936 }
937 
tidyGetArgValueInt(TidyMessage tmessage,TidyMessageArgument * arg)938 int TIDY_CALL tidyGetArgValueInt( TidyMessage tmessage, TidyMessageArgument* arg )
939 {
940     TidyMessageImpl *message = tidyMessageToImpl(tmessage);
941     return TY_(getArgValueInt)(*message, arg);
942 }
943 
tidyGetArgValueDouble(TidyMessage tmessage,TidyMessageArgument * arg)944 double TIDY_CALL tidyGetArgValueDouble( TidyMessage tmessage, TidyMessageArgument* arg )
945 {
946     TidyMessageImpl *message = tidyMessageToImpl(tmessage);
947     return TY_(getArgValueDouble)(*message, arg);
948 }
949 
950 
tidySetErrorFile(TidyDoc tdoc,ctmbstr errfilnam)951 FILE* TIDY_CALL   tidySetErrorFile( TidyDoc tdoc, ctmbstr errfilnam )
952 {
953     TidyDocImpl* impl = tidyDocToImpl( tdoc );
954     if ( impl )
955     {
956         FILE* errout = fopen( errfilnam, "wb" );
957         if ( errout )
958         {
959             uint outenc = cfg( impl, TidyOutCharEncoding );
960             uint nl = cfg( impl, TidyNewline );
961             TY_(ReleaseStreamOut)( impl, impl->errout );
962             impl->errout = TY_(FileOutput)( impl, errout, outenc, nl );
963             return errout;
964         }
965         else /* Emit message to current error sink */
966             TY_(ReportFileError)( impl, errfilnam, FILE_CANT_OPEN );
967     }
968     return NULL;
969 }
970 
tidySetErrorBuffer(TidyDoc tdoc,TidyBuffer * errbuf)971 int TIDY_CALL    tidySetErrorBuffer( TidyDoc tdoc, TidyBuffer* errbuf )
972 {
973     TidyDocImpl* impl = tidyDocToImpl( tdoc );
974     if ( impl )
975     {
976         uint outenc = cfg( impl, TidyOutCharEncoding );
977         uint nl = cfg( impl, TidyNewline );
978         TY_(ReleaseStreamOut)( impl, impl->errout );
979         impl->errout = TY_(BufferOutput)( impl, errbuf, outenc, nl );
980         return ( impl->errout ? 0 : -ENOMEM );
981     }
982     return -EINVAL;
983 }
984 
tidySetErrorSink(TidyDoc tdoc,TidyOutputSink * sink)985 int TIDY_CALL    tidySetErrorSink( TidyDoc tdoc, TidyOutputSink* sink )
986 {
987     TidyDocImpl* impl = tidyDocToImpl( tdoc );
988     if ( impl )
989     {
990         uint outenc = cfg( impl, TidyOutCharEncoding );
991         uint nl = cfg( impl, TidyNewline );
992         TY_(ReleaseStreamOut)( impl, impl->errout );
993         impl->errout = TY_(UserOutput)( impl, sink, outenc, nl );
994         return ( impl->errout ? 0 : -ENOMEM );
995     }
996     return -EINVAL;
997 }
998 
999 /* Use TidyPPProgress to monitor the progress of the pretty printer.
1000  */
tidySetPrettyPrinterCallback(TidyDoc tdoc,TidyPPProgress callback)1001 Bool TIDY_CALL        tidySetPrettyPrinterCallback(TidyDoc tdoc, TidyPPProgress callback)
1002 {
1003     TidyDocImpl* impl = tidyDocToImpl( tdoc );
1004     if ( impl )
1005     {
1006         impl->progressCallback = callback;
1007         return yes;
1008     }
1009     return no;
1010 }
1011 
1012 
1013 /* Document info */
tidyStatus(TidyDoc tdoc)1014 int TIDY_CALL        tidyStatus( TidyDoc tdoc )
1015 {
1016     TidyDocImpl* impl = tidyDocToImpl( tdoc );
1017     int tidyStat = -EINVAL;
1018     if ( impl )
1019         tidyStat = tidyDocStatus( impl );
1020     return tidyStat;
1021 }
tidyDetectedHtmlVersion(TidyDoc ARG_UNUSED (tdoc))1022 int TIDY_CALL        tidyDetectedHtmlVersion( TidyDoc ARG_UNUSED(tdoc) )
1023 {
1024     TidyDocImpl* impl = tidyDocToImpl( tdoc );
1025     return TY_(HTMLVersionNumberFromCode)( impl->lexer->versionEmitted );
1026 }
1027 
tidyDetectedXhtml(TidyDoc ARG_UNUSED (tdoc))1028 Bool TIDY_CALL        tidyDetectedXhtml( TidyDoc ARG_UNUSED(tdoc) )
1029 {
1030     TidyDocImpl* impl = tidyDocToImpl( tdoc );
1031     return impl->lexer->isvoyager;
1032 }
tidyDetectedGenericXml(TidyDoc ARG_UNUSED (tdoc))1033 Bool TIDY_CALL        tidyDetectedGenericXml( TidyDoc ARG_UNUSED(tdoc) )
1034 {
1035     TidyDocImpl* impl = tidyDocToImpl( tdoc );
1036     return impl->xmlDetected;
1037 }
1038 
tidyErrorCount(TidyDoc tdoc)1039 uint TIDY_CALL       tidyErrorCount( TidyDoc tdoc )
1040 {
1041     TidyDocImpl* impl = tidyDocToImpl( tdoc );
1042     uint count = 0xFFFFFFFF;
1043     if ( impl )
1044         count = impl->errors;
1045     return count;
1046 }
tidyWarningCount(TidyDoc tdoc)1047 uint TIDY_CALL       tidyWarningCount( TidyDoc tdoc )
1048 {
1049     TidyDocImpl* impl = tidyDocToImpl( tdoc );
1050     uint count = 0xFFFFFFFF;
1051     if ( impl )
1052         count = impl->warnings;
1053     return count;
1054 }
tidyAccessWarningCount(TidyDoc tdoc)1055 uint TIDY_CALL       tidyAccessWarningCount( TidyDoc tdoc )
1056 {
1057     TidyDocImpl* impl = tidyDocToImpl( tdoc );
1058     uint count = 0xFFFFFFFF;
1059     if ( impl )
1060         count = impl->accessErrors;
1061     return count;
1062 }
tidyConfigErrorCount(TidyDoc tdoc)1063 uint TIDY_CALL       tidyConfigErrorCount( TidyDoc tdoc )
1064 {
1065     TidyDocImpl* impl = tidyDocToImpl( tdoc );
1066     uint count = 0xFFFFFFFF;
1067     if ( impl )
1068         count = impl->optionErrors;
1069     return count;
1070 }
1071 
1072 
1073 /* Error reporting functions
1074 */
tidyErrorSummary(TidyDoc tdoc)1075 void TIDY_CALL         tidyErrorSummary( TidyDoc tdoc )
1076 {
1077     TidyDocImpl* impl = tidyDocToImpl( tdoc );
1078     if ( impl )
1079         TY_(ErrorSummary)( impl );
1080 }
tidyGeneralInfo(TidyDoc tdoc)1081 void TIDY_CALL         tidyGeneralInfo( TidyDoc tdoc )
1082 {
1083     TidyDocImpl* impl = tidyDocToImpl( tdoc );
1084     if ( impl )
1085     {
1086         TY_(Dialogue)( impl, TEXT_GENERAL_INFO );
1087         TY_(Dialogue)( impl, TEXT_GENERAL_INFO_PLEA );
1088     }
1089 }
1090 
1091 
1092 /* I/O Functions
1093 **
1094 ** Initial version supports only whole-file operations.
1095 ** Do not expose Tidy StreamIn or Out data structures - yet.
1096 */
1097 
1098 /* Parse/load Functions
1099 **
1100 ** HTML/XHTML version determined from input.
1101 */
tidyParseFile(TidyDoc tdoc,ctmbstr filnam)1102 int TIDY_CALL  tidyParseFile( TidyDoc tdoc, ctmbstr filnam )
1103 {
1104     TidyDocImpl* doc = tidyDocToImpl( tdoc );
1105     return tidyDocParseFile( doc, filnam );
1106 }
tidyParseStdin(TidyDoc tdoc)1107 int TIDY_CALL  tidyParseStdin( TidyDoc tdoc )
1108 {
1109     TidyDocImpl* doc = tidyDocToImpl( tdoc );
1110     return tidyDocParseStdin( doc );
1111 }
tidyParseString(TidyDoc tdoc,ctmbstr content)1112 int TIDY_CALL  tidyParseString( TidyDoc tdoc, ctmbstr content )
1113 {
1114     TidyDocImpl* doc = tidyDocToImpl( tdoc );
1115     return tidyDocParseString( doc, content );
1116 }
tidyParseBuffer(TidyDoc tdoc,TidyBuffer * inbuf)1117 int TIDY_CALL  tidyParseBuffer( TidyDoc tdoc, TidyBuffer* inbuf )
1118 {
1119     TidyDocImpl* doc = tidyDocToImpl( tdoc );
1120     return tidyDocParseBuffer( doc, inbuf );
1121 }
tidyParseSource(TidyDoc tdoc,TidyInputSource * source)1122 int TIDY_CALL  tidyParseSource( TidyDoc tdoc, TidyInputSource* source )
1123 {
1124     TidyDocImpl* doc = tidyDocToImpl( tdoc );
1125     return tidyDocParseSource( doc, source );
1126 }
1127 
1128 #ifdef WIN32
1129 #define M_IS_DIR _S_IFDIR
1130 #else // !WIN32
1131 #define M_IS_DIR S_IFDIR
1132 #endif
tidyDocParseFile(TidyDocImpl * doc,ctmbstr filnam)1133 int   tidyDocParseFile( TidyDocImpl* doc, ctmbstr filnam )
1134 {
1135     int status = -ENOENT;
1136     FILE* fin = 0;
1137     struct stat sbuf = { 0 }; /* Is. #681 - read-only files */
1138     if ( stat(filnam,&sbuf) != 0 )
1139     {
1140         TY_(ReportFileError)( doc, filnam, FILE_NOT_FILE );
1141         return status;
1142     }
1143     if (sbuf.st_mode & M_IS_DIR) /* and /NOT/ if a DIRECTORY */
1144     {
1145         TY_(ReportFileError)(doc, filnam, FILE_NOT_FILE);
1146         return status;
1147     }
1148 
1149 #ifdef _WIN32
1150     return TY_(DocParseFileWithMappedFile)( doc, filnam );
1151 #else
1152 
1153     fin = fopen( filnam, "rb" );
1154 
1155 #if PRESERVE_FILE_TIMES
1156     {
1157         /* get last modified time */
1158         TidyClearMemory(&doc->filetimes, sizeof(doc->filetimes));
1159         if (fin && cfgBool(doc, TidyKeepFileTimes) &&
1160             fstat(fileno(fin), &sbuf) != -1)
1161         {
1162             doc->filetimes.actime = sbuf.st_atime;
1163             doc->filetimes.modtime = sbuf.st_mtime;
1164         }
1165     }
1166 #endif
1167 
1168     if ( fin )
1169     {
1170         StreamIn* in = TY_(FileInput)( doc, fin, cfg( doc, TidyInCharEncoding ));
1171         if ( !in )
1172         {
1173             fclose( fin );
1174             return status;
1175         }
1176         status = TY_(DocParseStream)( doc, in );
1177         TY_(freeFileSource)(&in->source, yes);
1178         TY_(freeStreamIn)(in);
1179     }
1180     else /* Error message! */
1181         TY_(ReportFileError)( doc, filnam, FILE_CANT_OPEN );
1182     return status;
1183 #endif
1184 }
1185 
tidyDocParseStdin(TidyDocImpl * doc)1186 int   tidyDocParseStdin( TidyDocImpl* doc )
1187 {
1188     StreamIn* in = TY_(FileInput)( doc, stdin, cfg( doc, TidyInCharEncoding ));
1189     int status = TY_(DocParseStream)( doc, in );
1190     TY_(freeFileSource)(&in->source, yes);
1191     TY_(freeStreamIn)(in);
1192     return status;
1193 }
1194 
tidyDocParseBuffer(TidyDocImpl * doc,TidyBuffer * inbuf)1195 int   tidyDocParseBuffer( TidyDocImpl* doc, TidyBuffer* inbuf )
1196 {
1197     int status = -EINVAL;
1198     if ( inbuf )
1199     {
1200         StreamIn* in = TY_(BufferInput)( doc, inbuf, cfg( doc, TidyInCharEncoding ));
1201         status = TY_(DocParseStream)( doc, in );
1202         TY_(freeStreamIn)(in);
1203     }
1204     return status;
1205 }
1206 
tidyDocParseString(TidyDocImpl * doc,ctmbstr content)1207 int   tidyDocParseString( TidyDocImpl* doc, ctmbstr content )
1208 {
1209     int status = -EINVAL;
1210     TidyBuffer inbuf;
1211     StreamIn* in = NULL;
1212 
1213     if ( content )
1214     {
1215         tidyBufInitWithAllocator( &inbuf, doc->allocator );
1216         tidyBufAttach( &inbuf, (byte*)content, TY_(tmbstrlen)(content)+1 );
1217         in = TY_(BufferInput)( doc, &inbuf, cfg( doc, TidyInCharEncoding ));
1218         status = TY_(DocParseStream)( doc, in );
1219         tidyBufDetach( &inbuf );
1220         TY_(freeStreamIn)(in);
1221     }
1222     return status;
1223 }
1224 
tidyDocParseSource(TidyDocImpl * doc,TidyInputSource * source)1225 int   tidyDocParseSource( TidyDocImpl* doc, TidyInputSource* source )
1226 {
1227     StreamIn* in = TY_(UserInput)( doc, source, cfg( doc, TidyInCharEncoding ));
1228     int status = TY_(DocParseStream)( doc, in );
1229     TY_(freeStreamIn)(in);
1230     return status;
1231 }
1232 
1233 
1234 /* Print/save Functions
1235 **
1236 */
tidySaveFile(TidyDoc tdoc,ctmbstr filnam)1237 int TIDY_CALL        tidySaveFile( TidyDoc tdoc, ctmbstr filnam )
1238 {
1239     TidyDocImpl* doc = tidyDocToImpl( tdoc );
1240     return tidyDocSaveFile( doc, filnam );
1241 }
tidySaveStdout(TidyDoc tdoc)1242 int TIDY_CALL        tidySaveStdout( TidyDoc tdoc )
1243 {
1244     TidyDocImpl* doc = tidyDocToImpl( tdoc );
1245     return tidyDocSaveStdout( doc );
1246 }
tidySaveString(TidyDoc tdoc,tmbstr buffer,uint * buflen)1247 int TIDY_CALL        tidySaveString( TidyDoc tdoc, tmbstr buffer, uint* buflen )
1248 {
1249     TidyDocImpl* doc = tidyDocToImpl( tdoc );
1250     return tidyDocSaveString( doc, buffer, buflen );
1251 }
tidySaveBuffer(TidyDoc tdoc,TidyBuffer * outbuf)1252 int TIDY_CALL        tidySaveBuffer( TidyDoc tdoc, TidyBuffer* outbuf )
1253 {
1254     TidyDocImpl* doc = tidyDocToImpl( tdoc );
1255     return tidyDocSaveBuffer( doc, outbuf );
1256 }
tidySaveSink(TidyDoc tdoc,TidyOutputSink * sink)1257 int TIDY_CALL        tidySaveSink( TidyDoc tdoc, TidyOutputSink* sink )
1258 {
1259     TidyDocImpl* doc = tidyDocToImpl( tdoc );
1260     return tidyDocSaveSink( doc, sink );
1261 }
1262 
tidyDocSaveFile(TidyDocImpl * doc,ctmbstr filnam)1263 int         tidyDocSaveFile( TidyDocImpl* doc, ctmbstr filnam )
1264 {
1265     int status = -ENOENT;
1266     FILE* fout = NULL;
1267 
1268     /* Don't zap input file if no output */
1269     if ( doc->errors > 0 &&
1270          cfgBool(doc, TidyWriteBack) && !cfgBool(doc, TidyForceOutput) )
1271         status = tidyDocStatus( doc );
1272     else
1273         fout = fopen( filnam, "wb" );
1274 
1275     if ( fout )
1276     {
1277         uint outenc = cfg( doc, TidyOutCharEncoding );
1278         uint nl = cfg( doc, TidyNewline );
1279         StreamOut* out = TY_(FileOutput)( doc, fout, outenc, nl );
1280 
1281         status = tidyDocSaveStream( doc, out );
1282 
1283         fclose( fout );
1284         TidyDocFree( doc, out );
1285 
1286 #if PRESERVE_FILE_TIMES
1287         if ( doc->filetimes.actime )
1288         {
1289             /* set file last accessed/modified times to original values */
1290             utime( filnam, &doc->filetimes );
1291             TidyClearMemory( &doc->filetimes, sizeof(doc->filetimes) );
1292         }
1293 #endif /* PRESERVFILETIMES */
1294     }
1295     if ( status < 0 ) /* Error message! */
1296         TY_(ReportFileError)( doc, filnam, FILE_CANT_OPEN );
1297     return status;
1298 }
1299 
1300 
1301 
1302 /* Note, _setmode() does NOT work on Win2K Pro w/ VC++ 6.0 SP3.
1303 ** The code has been left in in case it works w/ other compilers
1304 ** or operating systems.  If stdout is in Text mode, be aware that
1305 ** it will garble UTF16 documents.  In text mode, when it encounters
1306 ** a single byte of value 10 (0xA), it will insert a single byte
1307 ** value 13 (0xD) just before it.  This has the effect of garbling
1308 ** the entire document.
1309 */
1310 
1311 #if !defined(NO_SETMODE_SUPPORT)
1312 #  if defined(_WIN32) || defined(OS2_OS)
1313 #   include <fcntl.h>
1314 #     include <io.h>
1315 #   endif
1316 #endif
1317 
tidyDocSaveStdout(TidyDocImpl * doc)1318 int         tidyDocSaveStdout( TidyDocImpl* doc )
1319 {
1320 #if !defined(NO_SETMODE_SUPPORT)
1321 #  if defined(_WIN32) || defined(OS2_OS)
1322     int oldstdoutmode = -1, oldstderrmode = -1;
1323 #  endif
1324 #endif
1325 
1326     int status = 0;
1327     uint outenc = cfg( doc, TidyOutCharEncoding );
1328     uint nl = cfg( doc, TidyNewline );
1329     StreamOut* out = TY_(FileOutput)( doc, stdout, outenc, nl );
1330 
1331 #if !defined(NO_SETMODE_SUPPORT)
1332 #  if defined(_WIN32) || defined(OS2_OS)
1333     oldstdoutmode = setmode( fileno(stdout), _O_BINARY );
1334     oldstderrmode = setmode( fileno(stderr), _O_BINARY );
1335 #  endif
1336 #endif
1337 
1338     if ( 0 == status )
1339       status = tidyDocSaveStream( doc, out );
1340 
1341     fflush(stdout);
1342     fflush(stderr);
1343 
1344 #if !defined(NO_SETMODE_SUPPORT)
1345 #  if defined(_WIN32) || defined(OS2_OS)
1346     if ( oldstdoutmode != -1 )
1347         oldstdoutmode = setmode( fileno(stdout), oldstdoutmode );
1348     if ( oldstderrmode != -1 )
1349         oldstderrmode = setmode( fileno(stderr), oldstderrmode );
1350 #  endif
1351 #endif
1352 
1353     TidyDocFree( doc, out );
1354     return status;
1355 }
1356 
tidyDocSaveString(TidyDocImpl * doc,tmbstr buffer,uint * buflen)1357 int         tidyDocSaveString( TidyDocImpl* doc, tmbstr buffer, uint* buflen )
1358 {
1359     uint outenc = cfg( doc, TidyOutCharEncoding );
1360     uint nl = cfg( doc, TidyNewline );
1361     TidyBuffer outbuf;
1362     StreamOut* out;
1363     int status;
1364 
1365     tidyBufInitWithAllocator( &outbuf, doc->allocator );
1366     out = TY_(BufferOutput)( doc, &outbuf, outenc, nl );
1367     status = tidyDocSaveStream( doc, out );
1368 
1369     if ( outbuf.size > *buflen )
1370         status = -ENOMEM;
1371     else
1372         memcpy( buffer, outbuf.bp, outbuf.size );
1373 
1374     *buflen = outbuf.size;
1375     tidyBufFree( &outbuf );
1376     TidyDocFree( doc, out );
1377     return status;
1378 }
1379 
tidyDocSaveBuffer(TidyDocImpl * doc,TidyBuffer * outbuf)1380 int         tidyDocSaveBuffer( TidyDocImpl* doc, TidyBuffer* outbuf )
1381 {
1382     int status = -EINVAL;
1383     if ( outbuf )
1384     {
1385         uint outenc = cfg( doc, TidyOutCharEncoding );
1386         uint nl = cfg( doc, TidyNewline );
1387         StreamOut* out = TY_(BufferOutput)( doc, outbuf, outenc, nl );
1388 
1389         status = tidyDocSaveStream( doc, out );
1390         TidyDocFree( doc, out );
1391     }
1392     return status;
1393 }
1394 
tidyDocSaveSink(TidyDocImpl * doc,TidyOutputSink * sink)1395 int         tidyDocSaveSink( TidyDocImpl* doc, TidyOutputSink* sink )
1396 {
1397     uint outenc = cfg( doc, TidyOutCharEncoding );
1398     uint nl = cfg( doc, TidyNewline );
1399     StreamOut* out = TY_(UserOutput)( doc, sink, outenc, nl );
1400     int status = tidyDocSaveStream( doc, out );
1401     TidyDocFree( doc, out );
1402     return status;
1403 }
1404 
tidyDocStatus(TidyDocImpl * doc)1405 int         tidyDocStatus( TidyDocImpl* doc )
1406 {
1407     if ( doc->errors > 0 )
1408         return 2;
1409     if ( doc->warnings > 0 || doc->accessErrors > 0 )
1410         return 1;
1411     return 0;
1412 }
1413 
1414 
1415 
tidyCleanAndRepair(TidyDoc tdoc)1416 int TIDY_CALL        tidyCleanAndRepair( TidyDoc tdoc )
1417 {
1418     TidyDocImpl* impl = tidyDocToImpl( tdoc );
1419     if ( impl )
1420       return tidyDocCleanAndRepair( impl );
1421     return -EINVAL;
1422 }
1423 
tidyRunDiagnostics(TidyDoc tdoc)1424 int TIDY_CALL        tidyRunDiagnostics( TidyDoc tdoc )
1425 {
1426     TidyDocImpl* impl = tidyDocToImpl( tdoc );
1427     if ( impl )
1428       return tidyDocRunDiagnostics( impl );
1429     return -EINVAL;
1430 }
1431 
tidyReportDoctype(TidyDoc tdoc)1432 int TIDY_CALL        tidyReportDoctype( TidyDoc tdoc )
1433 {
1434     int iret = -EINVAL;
1435     TidyDocImpl* impl = tidyDocToImpl( tdoc );
1436     if ( impl ) {
1437       tidyDocReportDoctype( impl );
1438       iret = 0;
1439     }
1440     return iret;
1441 }
1442 
1443 /* Workhorse functions.
1444 **
1445 ** Parse requires input source, all input config items
1446 ** and diagnostic sink to have all been set before calling.
1447 **
1448 ** Emit likewise requires that document sink and all
1449 ** pretty printing options have been set.
1450 */
1451 static ctmbstr integrity = "\nPanic - tree has lost its integrity\n";
1452 
TY_(DocParseStream)1453 int         TY_(DocParseStream)( TidyDocImpl* doc, StreamIn* in )
1454 {
1455     Bool xmlIn = cfgBool( doc, TidyXmlTags );
1456     TidyConfigChangeCallback callback = doc->pConfigChangeCallback;
1457 
1458     int bomEnc;
1459     doc->pConfigChangeCallback = NULL;
1460 
1461     assert( doc != NULL && in != NULL );
1462     assert( doc->docIn == NULL );
1463     doc->docIn = in;
1464 
1465     TY_(ResetTags)(doc);             /* Reset table to html5 mode */
1466     TY_(TakeConfigSnapshot)( doc );  /* Save config state */
1467     TY_(AdjustConfig)( doc );        /* Ensure config internal consistency */
1468     TY_(FreeAnchors)( doc );
1469 
1470     TY_(FreeNode)(doc, &doc->root);
1471     TidyClearMemory(&doc->root, sizeof(Node));
1472 
1473     if (doc->givenDoctype)
1474         TidyDocFree(doc, doc->givenDoctype);
1475     /*\
1476      *  Issue #186 - Now FreeNode depend on the doctype, so the lexer is needed
1477      *  to determine which hash is to be used, so free it last.
1478     \*/
1479     TY_(FreeLexer)( doc );
1480     doc->givenDoctype = NULL;
1481 
1482     doc->lexer = TY_(NewLexer)( doc );
1483     /* doc->lexer->root = &doc->root; */
1484     doc->root.line = doc->lexer->lines;
1485     doc->root.column = doc->lexer->columns;
1486     doc->inputHadBOM = no;
1487     doc->xmlDetected = no;
1488 
1489     bomEnc = TY_(ReadBOMEncoding)(in);
1490 
1491     if (bomEnc != -1)
1492     {
1493         in->encoding = bomEnc;
1494         TY_(SetOptionInt)(doc, TidyInCharEncoding, bomEnc);
1495     }
1496 
1497     /* Tidy doesn't alter the doctype for generic XML docs */
1498     if ( xmlIn )
1499     {
1500         TY_(ParseXMLDocument)( doc );
1501         if ( !TY_(CheckNodeIntegrity)( &doc->root ) )
1502             TidyPanic( doc->allocator, integrity );
1503     }
1504     else
1505     {
1506         doc->warnings = 0;
1507         TY_(ParseDocument)( doc );
1508         if ( !TY_(CheckNodeIntegrity)( &doc->root ) )
1509             TidyPanic( doc->allocator, integrity );
1510     }
1511 
1512     doc->docIn = NULL;
1513     doc->pConfigChangeCallback = callback;
1514 
1515     return tidyDocStatus( doc );
1516 }
1517 
tidyDocRunDiagnostics(TidyDocImpl * doc)1518 int         tidyDocRunDiagnostics( TidyDocImpl* doc )
1519 {
1520     TY_(ReportMarkupVersion)( doc );
1521     TY_(ReportNumWarnings)( doc );
1522 
1523     if ( doc->errors > 0 && !cfgBool( doc, TidyForceOutput ) )
1524         TY_(Dialogue)(doc, STRING_NEEDS_INTERVENTION );
1525 
1526      return tidyDocStatus( doc );
1527 }
1528 
tidyDocReportDoctype(TidyDocImpl * doc)1529 void         tidyDocReportDoctype( TidyDocImpl* doc )
1530 {
1531         TY_(ReportMarkupVersion)( doc );
1532 }
1533 
1534 
1535 /*****************************************************************************
1536  *  HTML5 STUFF
1537  *****************************************************************************/
1538 #if 0 && defined(ENABLE_DEBUG_LOG)
1539 extern void show_not_html5(void);
1540 /* -----------------------------
1541 List tags that do not have version HTML5 (HT50|XH50)
1542 
1543 acronym applet basefont big center dir font frame frameset isindex
1544 listing noframes plaintext rb rbc rtc strike tt xmp nextid
1545 align bgsound blink comment ilayer layer marquee multicol nobr noembed
1546 nolayer nosave server servlet spacer
1547 
1548 Listed total 35 tags that do not have version 393216
1549    ------------------------------ */
1550 
1551 static void list_not_html5(void)
1552 {
1553     static Bool done_list = no;
1554     if (done_list == no) {
1555         done_list = yes;
1556         show_not_html5();
1557     }
1558 }
1559 #endif
1560 
1561 /* What about <blink>, <s> stike-through, <u> underline */
1562 static struct _html5Info
1563 {
1564     const char *tag;
1565     uint id;
1566 } const html5Info[] = {
1567     {"acronym", TidyTag_ACRONYM},
1568     {"applet", TidyTag_APPLET  },
1569     {"basefont",TidyTag_BASEFONT },
1570     { "big", TidyTag_BIG },
1571     { "center", TidyTag_CENTER },
1572     { "dir", TidyTag_DIR },
1573     { "font", TidyTag_FONT },
1574     { "frame", TidyTag_FRAME},
1575     { "frameset", TidyTag_FRAMESET},
1576     { "noframes", TidyTag_NOFRAMES },
1577     { "strike", TidyTag_STRIKE },
1578     { "tt", TidyTag_TT },
1579     { 0, 0 }
1580 };
inRemovedInfo(uint tid)1581 static Bool inRemovedInfo( uint tid )
1582 {
1583     int i;
1584     for (i = 0; ; i++) {
1585         if (html5Info[i].tag == 0)
1586             break;
1587         if (html5Info[i].id == tid)
1588             return yes;
1589     }
1590     return no;
1591 }
1592 
1593 /* Things that should not be in an HTML5 body. This is special for CheckHTML5(),
1594  and we might just want to remove CheckHTML5()'s output altogether and count
1595  on the default --strict-tags-attributes.
1596  */
1597 static int BadBody5Attribs[] = {
1598     TidyAttr_BACKGROUND,
1599     TidyAttr_BGCOLOR,
1600     TidyAttr_TEXT,
1601     TidyAttr_LINK,
1602     TidyAttr_VLINK,
1603     TidyAttr_ALINK,
1604     TidyAttr_UNKNOWN /* Must be last! */
1605 };
1606 
nodeHasAlignAttr(Node * node)1607 static Bool nodeHasAlignAttr( Node *node )
1608 {
1609     /* #define attrIsALIGN(av) AttrIsId( av, TidyAttr_ALIGN  ) */
1610     AttVal* av;
1611     for ( av = node->attributes; av != NULL; av = av->next ) {
1612         if (attrIsALIGN(av))
1613             return yes;
1614     }
1615     return no;
1616 }
1617 
1618 /*
1619  *  Perform special checks for HTML, even when we're not using the default
1620  *  option `--strict-tags-attributes yes`. This will ensure that HTML5 warning
1621  *  and error output is given regardless of the new option, and ensure that
1622  *  cleanup takes place. This provides mostly consistent Tidy behavior even with
1623  *  the introduction of this new option. Note that strings have changed, though,
1624  *  in order to maintain consistency with the `--strict-tags-attributes`
1625  *  messages.
1626  *
1627  *  See also: http://www.whatwg.org/specs/web-apps/current-work/multipage/obsolete.html#obsolete
1628  */
TY_(CheckHTML5)1629 static void TY_(CheckHTML5)( TidyDocImpl* doc, Node* node )
1630 {
1631     Bool clean = cfgBool( doc, TidyMakeClean );
1632     Bool already_strict = cfgBool( doc, TidyStrictTagsAttr );
1633     Node* body = TY_(FindBody)( doc );
1634     Bool warn = yes;    /* should this be a warning, error, or report??? */
1635     AttVal* attr = NULL;
1636     int i = 0;
1637 
1638     while (node)
1639     {
1640         if ( nodeHasAlignAttr( node ) ) {
1641             /* @todo: Is this for ALL elements that accept an 'align' attribute,
1642              * or should this be a sub-set test?
1643              */
1644 
1645             /* We will only emit this message if `--strict-tags-attributes==no`;
1646              * otherwise if yes this message will be output during later
1647              * checking.
1648              */
1649             if ( !already_strict )
1650                 TY_(ReportAttrError)(doc, node, TY_(AttrGetById)(node, TidyAttr_ALIGN), MISMATCHED_ATTRIBUTE_WARN);
1651         }
1652         if ( node == body ) {
1653             i = 0;
1654             /* We will only emit these messages if `--strict-tags-attributes==no`;
1655              * otherwise if yes these messages will be output during later
1656              * checking.
1657              */
1658             if ( !already_strict ) {
1659                 while ( BadBody5Attribs[i] != TidyAttr_UNKNOWN ) {
1660                     attr = TY_(AttrGetById)(node, BadBody5Attribs[i]);
1661                     if ( attr )
1662                         TY_(ReportAttrError)(doc, node, attr , MISMATCHED_ATTRIBUTE_WARN);
1663                     i++;
1664                 }
1665             }
1666         } else
1667         if ( nodeIsACRONYM(node) ) {
1668             if (clean) {
1669                 /* Replace with 'abbr' with warning to that effect.
1670                  * Maybe should use static void RenameElem( TidyDocImpl* doc, Node* node, TidyTagId tid )
1671                  */
1672                 TY_(CoerceNode)(doc, node, TidyTag_ABBR, warn, no);
1673             } else {
1674                 if ( !already_strict )
1675                     TY_(Report)(doc, node, node, REMOVED_HTML5);
1676             }
1677         } else
1678         if ( nodeIsAPPLET(node) ) {
1679             if (clean) {
1680                 /* replace with 'object' with warning to that effect
1681                  * maybe should use static void RenameElem( TidyDocImpl* doc, Node* node, TidyTagId tid )
1682                  */
1683                 TY_(CoerceNode)(doc, node, TidyTag_OBJECT, warn, no);
1684             } else {
1685                 if ( !already_strict )
1686                     TY_(Report)(doc, node, node, REMOVED_HTML5);
1687             }
1688         } else
1689         if ( nodeIsBASEFONT(node) ) {
1690             /* basefont: CSS equivalent 'font-size', 'font-family' and 'color'
1691              * on body or class on each subsequent element.
1692              * Difficult - If it is the first body element, then could consider
1693              * adding that to the <body> as a whole, else could perhaps apply it
1694              * to all subsequent elements. But also in consideration is the fact
1695              * that it was NOT supported in many browsers.
1696              * - For now just report a warning
1697              */
1698             if ( !already_strict )
1699                 TY_(Report)(doc, node, node, REMOVED_HTML5);
1700         } else
1701         if ( nodeIsBIG(node) ) {
1702             /* big: CSS equivalent 'font-size:larger'
1703              * so could replace the <big> ... </big> with
1704              * <span style="font-size: larger"> ... </span>
1705              * then replace <big> with <span>
1706              * Need to think about that...
1707              * Could use -
1708              *   TY_(AddStyleProperty)( doc, node, "font-size: larger" );
1709              *   TY_(CoerceNode)(doc, node, TidyTag_SPAN, no, no);
1710              * Alternatively generated a <style> but how to get the style name
1711              * TY_(AddAttribute)( doc, node, "class", "????" );
1712              * Also maybe need a specific message like
1713              * Element '%s' replaced with 'span' with a 'font-size: larger style attribute
1714              * maybe should use static void RenameElem( TidyDocImpl* doc, Node* node, TidyTagId tid )
1715              */
1716             if (clean) {
1717                 TY_(AddStyleProperty)( doc, node, "font-size: larger" );
1718                 TY_(CoerceNode)(doc, node, TidyTag_SPAN, warn, no);
1719             } else {
1720                 if ( !already_strict )
1721                     TY_(Report)(doc, node, node, REMOVED_HTML5);
1722             }
1723         } else
1724         if ( nodeIsCENTER(node) ) {
1725             /* center: CSS equivalent 'text-align:center'
1726              * and 'margin-left:auto; margin-right:auto' on descendant blocks
1727              * Tidy already handles this if 'clean' by SILENTLY generating the
1728              * <style> and adding a <div class="c1"> around the elements.
1729              * see: static Bool Center2Div( TidyDocImpl* doc, Node *node, Node **pnode)
1730              */
1731             if ( !already_strict )
1732                 TY_(Report)(doc, node, node, REMOVED_HTML5);
1733         } else
1734         if ( nodeIsDIR(node) ) {
1735             /* dir: replace by <ul>
1736              * Tidy already actions this and issues a warning
1737              * Should this be CHANGED???
1738              */
1739             if ( !already_strict )
1740                 TY_(Report)(doc, node, node, REMOVED_HTML5);
1741         } else
1742         if ( nodeIsFONT(node) ) {
1743             /* Tidy already handles this -
1744              * If 'clean' replaced by CSS, else
1745              * if is NOT clean, and doctype html5 then warnings issued
1746              * done in Bool Font2Span( TidyDocImpl* doc, Node *node, Node **pnode ) (I think?)
1747              */
1748             if ( !already_strict )
1749                 TY_(Report)(doc, node, node, REMOVED_HTML5);
1750         } else
1751         if (( nodesIsFRAME(node) ) || ( nodeIsFRAMESET(node) ) || ( nodeIsNOFRAMES(node) )) {
1752             /* YOW: What to do here?????? Maybe <iframe>????
1753              */
1754             if ( !already_strict )
1755                 TY_(Report)(doc, node, node, REMOVED_HTML5);
1756         } else
1757         if ( nodeIsSTRIKE(node) ) {
1758             /* strike: CSS equivalent 'text-decoration:line-through'
1759              * maybe should use static void RenameElem( TidyDocImpl* doc, Node* node, TidyTagId tid )
1760              */
1761             if (clean) {
1762                 TY_(AddStyleProperty)( doc, node, "text-decoration: line-through" );
1763                 TY_(CoerceNode)(doc, node, TidyTag_SPAN, warn, no);
1764             } else {
1765                 if ( !already_strict )
1766                     TY_(Report)(doc, node, node, REMOVED_HTML5);
1767             }
1768         } else
1769         if ( nodeIsTT(node) ) {
1770             /* tt: CSS equivalent 'font-family:monospace'
1771              * Tidy presently does nothing. Tidy5 issues a warning
1772              * But like the 'clean' <font> replacement this could also be replaced with CSS
1773              * maybe should use static void RenameElem( TidyDocImpl* doc, Node* node, TidyTagId tid )
1774              */
1775             if (clean) {
1776                 TY_(AddStyleProperty)( doc, node, "font-family: monospace" );
1777                 TY_(CoerceNode)(doc, node, TidyTag_SPAN, warn, no);
1778             } else {
1779                 if ( !already_strict )
1780                     TY_(Report)(doc, node, node, REMOVED_HTML5);
1781             }
1782         } else
1783             if (TY_(nodeIsElement)(node)) {
1784                 if (node->tag) {
1785                     if ( (!(node->tag->versions & VERS_HTML5) && !(node->tag->versions & VERS_PROPRIETARY)) || (inRemovedInfo(node->tag->id)) ) {
1786                         if ( !already_strict )
1787                             TY_(Report)(doc, node, node, REMOVED_HTML5);
1788                     }
1789                 }
1790             }
1791 
1792         if (node->content)
1793             TY_(CheckHTML5)( doc, node->content );
1794 
1795         node = node->next;
1796     }
1797 }
1798 /*****************************************************************************
1799  *  END HTML5 STUFF
1800  *****************************************************************************/
1801 
1802 
1803 /*
1804  * Check and report HTML tags and attributes that are:
1805  *  - Proprietary, and/or
1806  *  - Not supported in the current version of HTML, defined as the version
1807  *    of HTML that we are emitting.
1808  * Proprietary items are reported as WARNINGS, and version mismatches will
1809  * be reported as WARNING or ERROR in the following conditions:
1810  *  - ERROR if the emitted doctype is a strict doctype.
1811  *  - WARNING if the emitted doctype is a non-strict doctype.
1812  * The propriety checks are *always* run as they have always been an integral
1813  * part of Tidy. The version checks are controlled by `strict-tags-attributes`.
1814  */
TY_(CheckHTMLTagsAttribsVersions)1815 static void TY_(CheckHTMLTagsAttribsVersions)( TidyDocImpl* doc, Node* node )
1816 {
1817     uint versionEmitted = doc->lexer->versionEmitted;
1818     uint declared = doc->lexer->doctype;
1819     uint version = versionEmitted == 0 ? declared : versionEmitted;
1820     int tagReportType = VERS_STRICT & version ? ELEMENT_VERS_MISMATCH_ERROR : ELEMENT_VERS_MISMATCH_WARN;
1821     int attrReportType = VERS_STRICT & version ? MISMATCHED_ATTRIBUTE_ERROR : MISMATCHED_ATTRIBUTE_WARN;
1822     Bool check_versions = cfgBool( doc, TidyStrictTagsAttr );
1823     AttVal *next_attr, *attval;
1824     Bool attrIsProprietary = no;
1825     Bool attrIsMismatched = yes;
1826     Bool tagLooksCustom = no;
1827     Bool htmlIs5 = (doc->lexer->doctype & VERS_HTML5) > 0;
1828 
1829     while (node)
1830     {
1831         /* This bit here handles our HTML tags */
1832         if ( TY_(nodeIsElement)(node) && node->tag ) {
1833 
1834             /* Leave XML stuff alone. */
1835             if ( !cfgBool(doc, TidyXmlTags) )
1836             {
1837                 /* Version mismatches take priority. */
1838                 if ( check_versions && !(node->tag->versions & version) )
1839                 {
1840                     TY_(Report)(doc, NULL, node, tagReportType );
1841                 }
1842                 /* If it's not mismatched, it could still be proprietary. */
1843                 else if ( node->tag->versions & VERS_PROPRIETARY )
1844                 {
1845                     if ( !cfgBool(doc, TidyMakeClean) ||
1846                         ( !nodeIsNOBR(node) && !nodeIsWBR(node) ) )
1847                     {
1848                         /* It looks custom, despite whether it's a known tag. */
1849                         tagLooksCustom = TY_(nodeIsAutonomousCustomFormat)( node );
1850 
1851                         /* If we're in HTML5 mode and the tag does not look
1852                            like a valid custom tag, then issue a warning.
1853                            Appearance is good enough because invalid tags have
1854                            been dropped. Also, if we're not in HTML5 mode, then
1855                            then everything that reaches here gets the warning.
1856                            Everything else can be ignored. */
1857 
1858                         if ( (htmlIs5 && !tagLooksCustom) || !htmlIs5 )
1859                         {
1860                             TY_(Report)(doc, NULL, node, PROPRIETARY_ELEMENT );
1861                         }
1862 
1863                         if ( nodeIsLAYER(node) )
1864                             doc->badLayout |= USING_LAYER;
1865                         else if ( nodeIsSPACER(node) )
1866                             doc->badLayout |= USING_SPACER;
1867                         else if ( nodeIsNOBR(node) )
1868                             doc->badLayout |= USING_NOBR;
1869                     }
1870                 }
1871             }
1872         }
1873 
1874         /* And this bit here handles our attributes */
1875         if (TY_(nodeIsElement)(node))
1876         {
1877             attval = node->attributes;
1878 
1879             while (attval)
1880             {
1881                 next_attr = attval->next;
1882 
1883                 attrIsProprietary = TY_(AttributeIsProprietary)(node, attval);
1884                 /* Is. #729 - always check version match if HTML5 */
1885                 attrIsMismatched = (check_versions | htmlIs5) ? TY_(AttributeIsMismatched)(node, attval, doc) : no;
1886                 /* Let the PROPRIETARY_ATTRIBUTE warning have precedence. */
1887                 if ( attrIsProprietary )
1888                 {
1889                     if ( cfgBool(doc, TidyWarnPropAttrs) )
1890                         TY_(ReportAttrError)(doc, node, attval, PROPRIETARY_ATTRIBUTE);
1891                 }
1892                 else if ( attrIsMismatched )
1893                 {
1894                     if (htmlIs5)
1895                     {
1896                         /* Is. #729 - In html5 TidyStrictTagsAttr controls error or warn */
1897                         TY_(ReportAttrError)(doc, node, attval,
1898                             check_versions ? MISMATCHED_ATTRIBUTE_ERROR : MISMATCHED_ATTRIBUTE_WARN);
1899                     }
1900                     else
1901                         TY_(ReportAttrError)(doc, node, attval, attrReportType);
1902 
1903                 }
1904 
1905                 /* @todo: do we need a new option to drop mismatches? Or should we
1906                  simply drop them? */
1907                 if ( ( attrIsProprietary || attrIsMismatched ) && cfgBool(doc, TidyDropPropAttrs) )
1908                     TY_(RemoveAttribute)( doc, node, attval );
1909 
1910                 attval = next_attr;
1911             }
1912         }
1913 
1914         if (node->content)
1915             TY_(CheckHTMLTagsAttribsVersions)( doc, node->content );
1916 
1917         node = node->next;
1918     }
1919 }
1920 
1921 
1922 #if defined(ENABLE_DEBUG_LOG)
1923 /* *** FOR DEBUG ONLY *** */
dbg_get_lexer_type(void * vp)1924 const char *dbg_get_lexer_type( void *vp )
1925 {
1926     Node *node = (Node *)vp;
1927     switch ( node->type )
1928     {
1929     case RootNode:      return "Root";
1930     case DocTypeTag:    return "DocType";
1931     case CommentTag:    return "Comment";
1932     case ProcInsTag:    return "ProcIns";
1933     case TextNode:      return "Text";
1934     case StartTag:      return "StartTag";
1935     case EndTag:        return "EndTag";
1936     case StartEndTag:   return "StartEnd";
1937     case CDATATag:      return "CDATA";
1938     case SectionTag:    return "Section";
1939     case AspTag:        return "Asp";
1940     case JsteTag:       return "Jste";
1941     case PhpTag:        return "Php";
1942     case XmlDecl:       return "XmlDecl";
1943     }
1944     return "Uncased";
1945 }
1946 
1947 /* NOTE: THis matches the above lexer type, except when element has a name */
dbg_get_element_name(void * vp)1948 const char *dbg_get_element_name( void *vp )
1949 {
1950     Node *node = (Node *)vp;
1951     switch ( node->type )
1952     {
1953     case TidyNode_Root:       return "Root";
1954     case TidyNode_DocType:    return "DocType";
1955     case TidyNode_Comment:    return "Comment";
1956     case TidyNode_ProcIns:    return "ProcIns";
1957     case TidyNode_Text:       return "Text";
1958     case TidyNode_CDATA:      return "CDATA";
1959     case TidyNode_Section:    return "Section";
1960     case TidyNode_Asp:        return "Asp";
1961     case TidyNode_Jste:       return "Jste";
1962     case TidyNode_Php:        return "Php";
1963     case TidyNode_XmlDecl:    return "XmlDecl";
1964 
1965     case TidyNode_Start:
1966     case TidyNode_End:
1967     case TidyNode_StartEnd:
1968     default:
1969         if (node->element)
1970             return node->element;
1971     }
1972     return "Unknown";
1973 }
1974 
dbg_show_node(TidyDocImpl * doc,Node * node,int caller,int indent)1975 void dbg_show_node( TidyDocImpl* doc, Node *node, int caller, int indent )
1976 {
1977     AttVal* av;
1978     Lexer* lexer = doc->lexer;
1979     ctmbstr call = "";
1980     ctmbstr name = dbg_get_element_name(node);
1981     ctmbstr type = dbg_get_lexer_type(node);
1982     ctmbstr impl = node->implicit ? "implicit" : "";
1983     switch ( caller )
1984     {
1985     case 1: call = "discard";   break;
1986     case 2: call = "trim";      break;
1987     case 3: call = "test";      break;
1988     }
1989     while (indent--)
1990         SPRTF(" ");
1991     if (strcmp(type,name))
1992         SPRTF("%s %s %s %s", type, name, impl, call );
1993     else
1994         SPRTF("%s %s %s", name, impl, call );
1995     if (lexer && (strcmp("Text",name) == 0)) {
1996         uint len = node->end - node->start;
1997         uint i;
1998         SPRTF(" (%d) '", len);
1999         if (len < 40) {
2000             /* show it all */
2001             for (i = node->start; i < node->end; i++) {
2002                 SPRTF("%c", lexer->lexbuf[i]);
2003             }
2004         } else {
2005             /* partial display */
2006             uint max = 19;
2007             for (i = node->start; i < max; i++) {
2008                 SPRTF("%c", lexer->lexbuf[i]);
2009             }
2010             SPRTF("...");
2011             i = node->end - 19;
2012             for (; i < node->end; i++) {
2013                 SPRTF("%c", lexer->lexbuf[i]);
2014             }
2015         }
2016         SPRTF("'");
2017     }
2018     for (av = node->attributes; av; av = av->next) {
2019         name = av->attribute;
2020         if (name) {
2021             SPRTF(" %s",name);
2022             if (av->value) {
2023                 SPRTF("=\"%s\"", av->value);
2024             }
2025         }
2026     }
2027 
2028     SPRTF("\n");
2029 }
2030 
dbg_show_all_nodes(TidyDocImpl * doc,Node * node,int indent)2031 void dbg_show_all_nodes( TidyDocImpl* doc, Node *node, int indent )
2032 {
2033     while (node)
2034     {
2035         dbg_show_node( doc, node, 0, indent );
2036         dbg_show_all_nodes( doc, node->content, indent + 1 );
2037         node = node->next;
2038     }
2039 }
2040 
2041 #endif
2042 
tidyDocCleanAndRepair(TidyDocImpl * doc)2043 int         tidyDocCleanAndRepair( TidyDocImpl* doc )
2044 {
2045     Bool word2K   = cfgBool( doc, TidyWord2000 );
2046     Bool logical  = cfgBool( doc, TidyLogicalEmphasis );
2047     Bool clean    = cfgBool( doc, TidyMakeClean );
2048     Bool gdoc     = cfgBool( doc, TidyGDocClean );
2049     Bool htmlOut  = cfgBool( doc, TidyHtmlOut );
2050     Bool xmlOut   = cfgBool( doc, TidyXmlOut );
2051     Bool xhtmlOut = cfgBool( doc, TidyXhtmlOut );
2052     Bool xmlDecl  = cfgBool( doc, TidyXmlDecl );
2053     Bool tidyMark = cfgBool( doc, TidyMark );
2054     Bool tidyXmlTags = cfgBool( doc, TidyXmlTags );
2055     Bool wantNameAttr = cfgBool( doc, TidyAnchorAsName );
2056     Bool mergeEmphasis = cfgBool( doc, TidyMergeEmphasis );
2057     Node* node;
2058     TidyConfigChangeCallback callback = doc->pConfigChangeCallback;
2059     doc->pConfigChangeCallback = NULL;
2060 
2061 #if defined(ENABLE_DEBUG_LOG)
2062     SPRTF("All nodes BEFORE clean and repair\n");
2063     dbg_show_all_nodes( doc, &doc->root, 0  );
2064 #endif
2065     if (tidyXmlTags)
2066     {
2067         doc->pConfigChangeCallback = callback;
2068         return tidyDocStatus( doc );
2069     }
2070 
2071     /* Issue #567 - move style elements from body to head */
2072     TY_(CleanStyle)(doc, &doc->root);
2073 
2074     /* simplifies <b><b> ... </b> ...</b> etc. */
2075     if ( mergeEmphasis )
2076         TY_(NestedEmphasis)( doc, &doc->root );
2077 
2078     /* cleans up <dir>indented text</dir> etc. */
2079     TY_(List2BQ)( doc, &doc->root );
2080     TY_(BQ2Div)( doc, &doc->root );
2081 
2082     /* replaces i by em and b by strong */
2083     if ( logical )
2084         TY_(EmFromI)( doc, &doc->root );
2085 
2086     if ( word2K && TY_(IsWord2000)(doc) )
2087     {
2088         /* prune Word2000's <![if ...]> ... <![endif]> */
2089         TY_(DropSections)( doc, &doc->root );
2090 
2091         /* drop style & class attributes and empty p, span elements */
2092         TY_(CleanWord2000)( doc, &doc->root );
2093         TY_(DropEmptyElements)(doc, &doc->root);
2094     }
2095 
2096     /* replaces presentational markup by style rules */
2097     if ( clean )
2098         TY_(CleanDocument)( doc );
2099 
2100     /* clean up html exported by Google Docs */
2101     if ( gdoc )
2102         TY_(CleanGoogleDocument)( doc );
2103 
2104     /*  Reconcile http-equiv meta element with output encoding  */
2105     TY_(TidyMetaCharset)(doc);
2106 
2107     if ( !TY_(CheckNodeIntegrity)( &doc->root ) )
2108         TidyPanic( doc->allocator, integrity );
2109 
2110     /* remember given doctype for reporting */
2111     node = TY_(FindDocType)(doc);
2112 
2113     if (node)
2114     {
2115         AttVal* fpi = TY_(GetAttrByName)(node, "PUBLIC");
2116         if (AttrHasValue(fpi))
2117         {
2118             if (doc->givenDoctype)
2119                 TidyDocFree(doc, doc->givenDoctype);
2120             doc->givenDoctype = TY_(tmbstrdup)(doc->allocator,fpi->value);
2121         }
2122     }
2123 
2124     if ( doc->root.content )
2125     {
2126         /* If we had XHTML input but want HTML output */
2127         if ( htmlOut && doc->lexer->isvoyager )
2128         {
2129             Node* node = TY_(FindDocType)(doc);
2130             /* Remove reference, but do not free */
2131             if (node)
2132               TY_(RemoveNode)(node);
2133         }
2134 
2135         if (xhtmlOut && !htmlOut)
2136         {
2137             TY_(SetXHTMLDocType)(doc);
2138             TY_(FixAnchors)(doc, &doc->root, wantNameAttr, yes);
2139             TY_(FixXhtmlNamespace)(doc, yes);
2140             TY_(FixLanguageInformation)(doc, &doc->root, yes, yes);
2141         }
2142         else
2143         {
2144             TY_(FixDocType)(doc);
2145             TY_(FixAnchors)(doc, &doc->root, wantNameAttr, yes);
2146             TY_(FixXhtmlNamespace)(doc, no);
2147             TY_(FixLanguageInformation)(doc, &doc->root, no, yes);
2148         }
2149 
2150         if (tidyMark )
2151             TY_(AddGenerator)(doc);
2152 
2153     }
2154 
2155     /* ensure presence of initial <?xml version="1.0"?> */
2156     if ( xmlOut && xmlDecl )
2157         TY_(FixXmlDecl)( doc );
2158 
2159     /* At this point the apparent doctype is going to be as stable as
2160        it can ever be, so we can start detecting things that shouldn't
2161        be in this version of HTML
2162      */
2163     if (doc->lexer)
2164     {
2165         /*\
2166          *  Issue #429 #426 - These services can only be used
2167          *  when there is a document loaded, ie a lexer created.
2168          *  But really should not be calling a Clean and Repair
2169          *  service with no doc!
2170         \*/
2171         if (doc->lexer->versionEmitted & VERS_HTML5)
2172             TY_(CheckHTML5)( doc, &doc->root );
2173         TY_(CheckHTMLTagsAttribsVersions)( doc, &doc->root );
2174 
2175         if ( !doc->lexer->isvoyager && doc->xmlDetected )
2176         {
2177             TY_(Report)(doc, NULL, TY_(FindXmlDecl)(doc), XML_DECLARATION_DETECTED );
2178 
2179         }
2180     }
2181 
2182     TY_(CleanHead)(doc); /* Is #692 - discard multiple <title> tags */
2183 
2184 #if defined(ENABLE_DEBUG_LOG)
2185     SPRTF("All nodes AFTER clean and repair\n");
2186     dbg_show_all_nodes( doc, &doc->root, 0  );
2187 #endif
2188 
2189     doc->pConfigChangeCallback = callback;
2190     return tidyDocStatus( doc );
2191 }
2192 
2193 static
showBodyOnly(TidyDocImpl * doc,TidyTriState bodyOnly)2194 Bool showBodyOnly( TidyDocImpl* doc, TidyTriState bodyOnly )
2195 {
2196     Node* node;
2197 
2198     switch( bodyOnly )
2199     {
2200     case TidyNoState:
2201         return no;
2202     case TidyYesState:
2203         return yes;
2204     default:
2205         node = TY_(FindBody)( doc );
2206         if (node && node->implicit )
2207             return yes;
2208     }
2209     return no;
2210 }
2211 
2212 
tidyDocSaveStream(TidyDocImpl * doc,StreamOut * out)2213 int         tidyDocSaveStream( TidyDocImpl* doc, StreamOut* out )
2214 {
2215     Bool showMarkup  = cfgBool( doc, TidyShowMarkup );
2216     Bool forceOutput = cfgBool( doc, TidyForceOutput );
2217     Bool outputBOM   = ( cfgAutoBool(doc, TidyOutputBOM) == TidyYesState );
2218     Bool smartBOM    = ( cfgAutoBool(doc, TidyOutputBOM) == TidyAutoState );
2219     Bool xmlOut      = cfgBool( doc, TidyXmlOut );
2220     Bool xhtmlOut    = cfgBool( doc, TidyXhtmlOut );
2221     TidyTriState bodyOnly    = cfgAutoBool( doc, TidyBodyOnly );
2222 
2223     Bool dropComments = cfgBool(doc, TidyHideComments);
2224     Bool makeClean    = cfgBool(doc, TidyMakeClean);
2225     Bool asciiChars   = cfgBool(doc, TidyAsciiChars);
2226     Bool makeBare     = cfgBool(doc, TidyMakeBare);
2227     Bool escapeCDATA  = cfgBool(doc, TidyEscapeCdata);
2228     Bool ppWithTabs   = cfgBool(doc, TidyPPrintTabs);
2229     TidyAttrSortStrategy sortAttrStrat = cfg(doc, TidySortAttributes);
2230     TidyConfigChangeCallback callback = doc->pConfigChangeCallback;
2231     doc->pConfigChangeCallback = NULL;
2232 
2233     if (ppWithTabs)
2234         TY_(PPrintTabs)();
2235     else
2236         TY_(PPrintSpaces)();
2237 
2238     if (escapeCDATA)
2239         TY_(ConvertCDATANodes)(doc, &doc->root);
2240 
2241     if (dropComments)
2242         TY_(DropComments)(doc, &doc->root);
2243 
2244     if (makeClean)
2245     {
2246         /* noop */
2247         TY_(DropFontElements)(doc, &doc->root, NULL);
2248     }
2249 
2250     if ((makeClean && asciiChars) || makeBare)
2251         TY_(DowngradeTypography)(doc, &doc->root);
2252 
2253     if (makeBare)
2254         /* Note: no longer replaces &nbsp; in */
2255         /* attribute values / non-text tokens */
2256         TY_(NormalizeSpaces)(doc->lexer, &doc->root);
2257     else
2258         TY_(ReplacePreformattedSpaces)(doc, &doc->root);
2259 
2260     TY_(SortAttributes)(doc, &doc->root, sortAttrStrat);
2261 
2262     if ( showMarkup && (doc->errors == 0 || forceOutput) )
2263     {
2264         /* Output a Byte Order Mark if required */
2265         if ( outputBOM || (doc->inputHadBOM && smartBOM) )
2266             TY_(outBOM)( out );
2267 
2268         /* No longer necessary. No DOCTYPE == HTML 3.2,
2269         ** which gives you only the basic character entities,
2270         ** which are safe in any browser.
2271         ** if ( !TY_(FindDocType)(doc) )
2272         **    TY_(SetOptionBool)( doc, TidyNumEntities, yes );
2273         */
2274 
2275         doc->docOut = out;
2276         if ( xmlOut && !xhtmlOut )
2277             TY_(PPrintXMLTree)( doc, NORMAL, 0, &doc->root );
2278         else if ( showBodyOnly( doc, bodyOnly ) )
2279             TY_(PrintBody)( doc );
2280         else
2281             TY_(PPrintTree)( doc, NORMAL, 0, &doc->root );
2282 
2283         TY_(PFlushLine)( doc, 0 );
2284         doc->docOut = NULL;
2285     }
2286 
2287     /* @jsd: removing this should solve #673, and allow saving of the buffer multiple times. */
2288 //    TY_(ResetConfigToSnapshot)( doc );
2289     doc->pConfigChangeCallback = callback;
2290 
2291     return tidyDocStatus( doc );
2292 }
2293 
2294 /* Tree traversal functions
2295 **
2296 ** The big issue here is the degree to which we should mimic
2297 ** a DOM and/or SAX nodes.
2298 **
2299 ** Is it 100% possible (and, if so, how difficult is it) to
2300 ** emit SAX events from this API?  If SAX events are possible,
2301 ** is that 100% of data needed to build a DOM?
2302 */
2303 
tidyGetRoot(TidyDoc tdoc)2304 TidyNode TIDY_CALL   tidyGetRoot( TidyDoc tdoc )
2305 {
2306     TidyDocImpl* impl = tidyDocToImpl( tdoc );
2307     Node* node = NULL;
2308     if ( impl )
2309         node = &impl->root;
2310     return tidyImplToNode( node );
2311 }
2312 
tidyGetHtml(TidyDoc tdoc)2313 TidyNode TIDY_CALL   tidyGetHtml( TidyDoc tdoc )
2314 {
2315   TidyDocImpl* impl = tidyDocToImpl( tdoc );
2316   Node* node = NULL;
2317   if ( impl )
2318       node = TY_(FindHTML)( impl );
2319   return tidyImplToNode( node );
2320 }
2321 
tidyGetHead(TidyDoc tdoc)2322 TidyNode TIDY_CALL    tidyGetHead( TidyDoc tdoc )
2323 {
2324   TidyDocImpl* impl = tidyDocToImpl( tdoc );
2325   Node* node = NULL;
2326   if ( impl )
2327       node = TY_(FindHEAD)( impl );
2328   return tidyImplToNode( node );
2329 }
2330 
tidyGetBody(TidyDoc tdoc)2331 TidyNode TIDY_CALL    tidyGetBody( TidyDoc tdoc )
2332 {
2333   TidyDocImpl* impl = tidyDocToImpl( tdoc );
2334   Node* node = NULL;
2335   if ( impl )
2336       node = TY_(FindBody)( impl );
2337   return tidyImplToNode( node );
2338 }
2339 
2340 /* parent / child */
tidyGetParent(TidyNode tnod)2341 TidyNode TIDY_CALL    tidyGetParent( TidyNode tnod )
2342 {
2343   Node* nimp = tidyNodeToImpl( tnod );
2344   return tidyImplToNode( nimp->parent );
2345 }
tidyGetChild(TidyNode tnod)2346 TidyNode TIDY_CALL    tidyGetChild( TidyNode tnod )
2347 {
2348   Node* nimp = tidyNodeToImpl( tnod );
2349   return tidyImplToNode( nimp->content );
2350 }
2351 
2352 /* remove a node */
tidyDiscardElement(TidyDoc tdoc,TidyNode tnod)2353 TidyNode TIDY_CALL    tidyDiscardElement( TidyDoc tdoc, TidyNode tnod )
2354 {
2355   TidyDocImpl* doc = tidyDocToImpl( tdoc );
2356   Node* nimp = tidyNodeToImpl( tnod );
2357   Node* next = TY_(DiscardElement)( doc, nimp );
2358   return tidyImplToNode( next );
2359 }
2360 
2361 /* siblings */
tidyGetNext(TidyNode tnod)2362 TidyNode TIDY_CALL    tidyGetNext( TidyNode tnod )
2363 {
2364   Node* nimp = tidyNodeToImpl( tnod );
2365   return tidyImplToNode( nimp->next );
2366 }
tidyGetPrev(TidyNode tnod)2367 TidyNode TIDY_CALL    tidyGetPrev( TidyNode tnod )
2368 {
2369   Node* nimp = tidyNodeToImpl( tnod );
2370   return tidyImplToNode( nimp->prev );
2371 }
2372 
2373 /* Node info */
tidyNodeGetType(TidyNode tnod)2374 TidyNodeType TIDY_CALL tidyNodeGetType( TidyNode tnod )
2375 {
2376   Node* nimp = tidyNodeToImpl( tnod );
2377   TidyNodeType ntyp = TidyNode_Root;
2378   if ( nimp )
2379     ntyp = (TidyNodeType) nimp->type;
2380   return ntyp;
2381 }
2382 
tidyNodeLine(TidyNode tnod)2383 uint TIDY_CALL tidyNodeLine( TidyNode tnod )
2384 {
2385   Node* nimp = tidyNodeToImpl( tnod );
2386   uint line = 0;
2387   if ( nimp )
2388     line = nimp->line;
2389   return line;
2390 }
tidyNodeColumn(TidyNode tnod)2391 uint TIDY_CALL tidyNodeColumn( TidyNode tnod )
2392 {
2393   Node* nimp = tidyNodeToImpl( tnod );
2394   uint col = 0;
2395   if ( nimp )
2396     col = nimp->column;
2397   return col;
2398 }
2399 
tidyNodeGetName(TidyNode tnod)2400 ctmbstr TIDY_CALL tidyNodeGetName( TidyNode tnod )
2401 {
2402   Node* nimp = tidyNodeToImpl( tnod );
2403   ctmbstr nnam = NULL;
2404   if ( nimp )
2405     nnam = nimp->element;
2406   return nnam;
2407 }
2408 
2409 
tidyNodeHasText(TidyDoc tdoc,TidyNode tnod)2410 Bool TIDY_CALL tidyNodeHasText( TidyDoc tdoc, TidyNode tnod )
2411 {
2412   TidyDocImpl* doc = tidyDocToImpl( tdoc );
2413   if ( doc )
2414       return TY_(nodeHasText)( doc, tidyNodeToImpl(tnod) );
2415   return no;
2416 }
2417 
2418 
tidyNodeGetText(TidyDoc tdoc,TidyNode tnod,TidyBuffer * outbuf)2419 Bool TIDY_CALL tidyNodeGetText( TidyDoc tdoc, TidyNode tnod, TidyBuffer* outbuf )
2420 {
2421   TidyDocImpl* doc = tidyDocToImpl( tdoc );
2422   Node* nimp = tidyNodeToImpl( tnod );
2423   if ( doc && nimp && outbuf )
2424   {
2425       uint outenc     = cfg( doc, TidyOutCharEncoding );
2426       uint nl         = cfg( doc, TidyNewline );
2427       StreamOut* out  = TY_(BufferOutput)( doc, outbuf, outenc, nl );
2428       Bool xmlOut     = cfgBool( doc, TidyXmlOut );
2429       Bool xhtmlOut   = cfgBool( doc, TidyXhtmlOut );
2430 
2431       doc->docOut = out;
2432       if ( xmlOut && !xhtmlOut )
2433           TY_(PPrintXMLTree)( doc, NORMAL, 0, nimp );
2434       else
2435           TY_(PPrintTree)( doc, NORMAL, 0, nimp );
2436 
2437       TY_(PFlushLine)( doc, 0 );
2438       doc->docOut = NULL;
2439 
2440       TidyDocFree( doc, out );
2441       return yes;
2442   }
2443   return no;
2444 }
2445 
tidyNodeGetValue(TidyDoc tdoc,TidyNode tnod,TidyBuffer * buf)2446 Bool TIDY_CALL tidyNodeGetValue( TidyDoc tdoc, TidyNode tnod, TidyBuffer* buf )
2447 {
2448     TidyDocImpl *doc = tidyDocToImpl( tdoc );
2449     Node *node = tidyNodeToImpl( tnod );
2450     if ( doc == NULL || node == NULL || buf == NULL )
2451         return no;
2452 
2453     switch( node->type ) {
2454     case TextNode:
2455     case CDATATag:
2456     case CommentTag:
2457     case ProcInsTag:
2458     case SectionTag:
2459     case AspTag:
2460     case JsteTag:
2461     case PhpTag:
2462     {
2463         tidyBufClear( buf );
2464         tidyBufAppend( buf, doc->lexer->lexbuf + node->start,
2465                        node->end - node->start );
2466         break;
2467     }
2468     default:
2469         /* The node doesn't have a value */
2470         return no;
2471     }
2472 
2473     return yes;
2474 }
2475 
tidyNodeIsProp(TidyDoc ARG_UNUSED (tdoc),TidyNode tnod)2476 Bool TIDY_CALL tidyNodeIsProp( TidyDoc ARG_UNUSED(tdoc), TidyNode tnod )
2477 {
2478   Node* nimp = tidyNodeToImpl( tnod );
2479   Bool isProprietary = yes;
2480   if ( nimp )
2481   {
2482     switch ( nimp->type )
2483     {
2484     case RootNode:
2485     case DocTypeTag:
2486     case CommentTag:
2487     case XmlDecl:
2488     case ProcInsTag:
2489     case TextNode:
2490     case CDATATag:
2491         isProprietary = no;
2492         break;
2493 
2494     case SectionTag:
2495     case AspTag:
2496     case JsteTag:
2497     case PhpTag:
2498         isProprietary = yes;
2499         break;
2500 
2501     case StartTag:
2502     case EndTag:
2503     case StartEndTag:
2504         isProprietary = ( nimp->tag
2505                           ? (nimp->tag->versions&VERS_PROPRIETARY)!=0
2506                           : yes );
2507         break;
2508     }
2509   }
2510   return isProprietary;
2511 }
2512 
tidyNodeGetId(TidyNode tnod)2513 TidyTagId TIDY_CALL tidyNodeGetId(TidyNode tnod)
2514 {
2515     Node* nimp = tidyNodeToImpl(tnod);
2516 
2517     TidyTagId tagId = TidyTag_UNKNOWN;
2518     if (nimp && nimp->tag)
2519         tagId = nimp->tag->id;
2520 
2521     return tagId;
2522 }
2523 
2524 
2525 /* Iterate over attribute values */
tidyAttrFirst(TidyNode tnod)2526 TidyAttr TIDY_CALL   tidyAttrFirst( TidyNode tnod )
2527 {
2528   Node* nimp = tidyNodeToImpl( tnod );
2529   AttVal* attval = NULL;
2530   if ( nimp )
2531     attval = nimp->attributes;
2532   return tidyImplToAttr( attval );
2533 }
tidyAttrNext(TidyAttr tattr)2534 TidyAttr TIDY_CALL    tidyAttrNext( TidyAttr tattr )
2535 {
2536   AttVal* attval = tidyAttrToImpl( tattr );
2537   AttVal* nxtval = NULL;
2538   if ( attval )
2539     nxtval = attval->next;
2540   return tidyImplToAttr( nxtval );
2541 }
2542 
tidyAttrName(TidyAttr tattr)2543 ctmbstr TIDY_CALL       tidyAttrName( TidyAttr tattr )
2544 {
2545   AttVal* attval = tidyAttrToImpl( tattr );
2546   ctmbstr anam = NULL;
2547   if ( attval )
2548     anam = attval->attribute;
2549   return anam;
2550 }
tidyAttrValue(TidyAttr tattr)2551 ctmbstr TIDY_CALL       tidyAttrValue( TidyAttr tattr )
2552 {
2553   AttVal* attval = tidyAttrToImpl( tattr );
2554   ctmbstr aval = NULL;
2555   if ( attval )
2556     aval = attval->value;
2557   return aval;
2558 }
2559 
tidyAttrDiscard(TidyDoc tdoc,TidyNode tnod,TidyAttr tattr)2560 void TIDY_CALL           tidyAttrDiscard( TidyDoc tdoc, TidyNode tnod, TidyAttr tattr )
2561 {
2562   TidyDocImpl* impl = tidyDocToImpl( tdoc );
2563   Node* nimp = tidyNodeToImpl( tnod );
2564   AttVal* attval = tidyAttrToImpl( tattr );
2565   TY_(RemoveAttribute)( impl, nimp, attval );
2566 }
2567 
tidyAttrGetId(TidyAttr tattr)2568 TidyAttrId TIDY_CALL tidyAttrGetId( TidyAttr tattr )
2569 {
2570   AttVal* attval = tidyAttrToImpl( tattr );
2571   TidyAttrId attrId = TidyAttr_UNKNOWN;
2572   if ( attval && attval->dict )
2573     attrId = attval->dict->id;
2574   return attrId;
2575 }
2576 
tidyAttrGetById(TidyNode tnod,TidyAttrId attId)2577 TidyAttr TIDY_CALL tidyAttrGetById( TidyNode tnod, TidyAttrId attId )
2578 {
2579     Node* nimp = tidyNodeToImpl(tnod);
2580     return tidyImplToAttr( TY_(AttrGetById)( nimp, attId ) );
2581 }
2582 
2583 
tidyAttrIsEvent(TidyAttr tattr)2584 Bool TIDY_CALL tidyAttrIsEvent( TidyAttr tattr )
2585 {
2586     return TY_(attrIsEvent)( tidyAttrToImpl(tattr) );
2587 }
2588 
2589 
2590 /*******************************************************************
2591  ** Message Key Management
2592  *******************************************************************/
tidyErrorCodeAsKey(uint code)2593 ctmbstr TIDY_CALL tidyErrorCodeAsKey(uint code)
2594 {
2595     return TY_(tidyErrorCodeAsKey)( code );
2596 }
2597 
tidyErrorCodeFromKey(ctmbstr code)2598 uint TIDY_CALL tidyErrorCodeFromKey(ctmbstr code)
2599 {
2600     return TY_(tidyErrorCodeFromKey)( code );
2601 }
2602 
getErrorCodeList()2603 TidyIterator TIDY_CALL getErrorCodeList()
2604 {
2605     return TY_(getErrorCodeList)();
2606 }
2607 
getNextErrorCode(TidyIterator * iter)2608 uint TIDY_CALL getNextErrorCode( TidyIterator* iter )
2609 {
2610     return TY_(getNextErrorCode)(iter);
2611 }
2612 
2613 
2614 /*******************************************************************
2615  ** Localization Support
2616  *******************************************************************/
2617 
2618 
tidySetLanguage(ctmbstr languageCode)2619 Bool TIDY_CALL tidySetLanguage( ctmbstr languageCode )
2620 {
2621     Bool result = TY_(tidySetLanguage)( languageCode );
2622 
2623     if ( result )
2624         TY_(tidySetLanguageSetByUser)();
2625 
2626     return result;
2627 }
2628 
tidyGetLanguage()2629 ctmbstr TIDY_CALL tidyGetLanguage()
2630 {
2631     return TY_(tidyGetLanguage)();
2632 }
2633 
tidyLocalizedStringN(uint messageType,uint quantity)2634 ctmbstr TIDY_CALL tidyLocalizedStringN( uint messageType, uint quantity )
2635 {
2636     return TY_(tidyLocalizedStringN)( messageType, quantity);
2637 }
2638 
tidyLocalizedString(uint messageType)2639 ctmbstr TIDY_CALL tidyLocalizedString( uint messageType )
2640 {
2641     return TY_(tidyLocalizedString)( messageType );
2642 }
2643 
tidyDefaultString(uint messageType)2644 ctmbstr TIDY_CALL tidyDefaultString( uint messageType )
2645 {
2646     return TY_(tidyDefaultString)( messageType );
2647 }
2648 
getStringKeyList()2649 TidyIterator TIDY_CALL getStringKeyList()
2650 {
2651     return TY_(getStringKeyList)();
2652 }
2653 
getNextStringKey(TidyIterator * iter)2654 uint TIDY_CALL getNextStringKey( TidyIterator* iter )
2655 {
2656     return TY_(getNextStringKey)( iter );
2657 }
2658 
getWindowsLanguageList()2659 TidyIterator TIDY_CALL getWindowsLanguageList()
2660 {
2661     return TY_(getWindowsLanguageList)();
2662 }
2663 
2664 //#define tidyOptionToImpl( topt )    ((const TidyOptionImpl*)(topt))
2665 //#define tidyImplToOption( option )  ((TidyOption)(option))
2666 
getNextWindowsLanguage(TidyIterator * iter)2667 const tidyLocaleMapItem* TIDY_CALL getNextWindowsLanguage( TidyIterator* iter )
2668 {
2669     /* Get a real structure */
2670     const tidyLocaleMapItemImpl *item = TY_(getNextWindowsLanguage)( iter );
2671 
2672     /* Return it as the opaque version */
2673     return ((tidyLocaleMapItem*)(item));
2674 }
2675 
2676 
TidyLangWindowsName(const tidyLocaleMapItem * item)2677 ctmbstr TIDY_CALL TidyLangWindowsName( const tidyLocaleMapItem *item )
2678 {
2679     return TY_(TidyLangWindowsName)( (tidyLocaleMapItemImpl*)(item) );
2680 }
2681 
2682 
TidyLangPosixName(const tidyLocaleMapItem * item)2683 ctmbstr TIDY_CALL TidyLangPosixName( const tidyLocaleMapItem *item )
2684 {
2685     return TY_(TidyLangPosixName)( (tidyLocaleMapItemImpl*)(item) );
2686 }
2687 
2688 
getInstalledLanguageList()2689 TidyIterator TIDY_CALL getInstalledLanguageList()
2690 {
2691     return TY_(getInstalledLanguageList)();
2692 }
2693 
2694 
getNextInstalledLanguage(TidyIterator * iter)2695 ctmbstr TIDY_CALL getNextInstalledLanguage( TidyIterator* iter )
2696 {
2697     return TY_(getNextInstalledLanguage)( iter );
2698 }
2699 
2700 
2701 
2702 
2703 /*
2704  * local variables:
2705  * mode: c
2706  * indent-tabs-mode: nil
2707  * c-basic-offset: 4
2708  * eval: (c-set-offset 'substatement-open 0)
2709  * end:
2710  */
2711