1 /* tidylib.c -- internal library definitions
2
3 (c) 1998-2008 (W3C) MIT, ERCIM, Keio University
4 See tidy.h for the copyright notice.
5
6 Defines HTML Tidy API implemented by tidy library.
7
8 Very rough initial cut for discussion purposes.
9
10 Public interface is const-correct and doesn't explicitly depend
11 on any globals. Thus, thread-safety may be introduced w/out
12 changing the interface.
13
14 Looking ahead to a C++ wrapper, C functions always pass
15 this-equivalent as 1st arg.
16
17 Created 2001-05-20 by Charles Reitzel
18
19 */
20
21 #include <errno.h>
22
23 #include "tidy-int.h"
24 #include "parser.h"
25 #include "clean.h"
26 #include "gdoc.h"
27 #include "config.h"
28 #include "message.h"
29 #include "messageobj.h"
30 #include "pprint.h"
31 #include "entities.h"
32 #include "tmbstr.h"
33 #include "utf8.h"
34 #include "mappedio.h"
35 #include "language.h"
36 #include "attrs.h"
37 #include "sprtf.h"
38 #if SUPPORT_LOCALIZATIONS
39 # include "stdlib.h"
40 # include "locale.h"
41 #endif
42
43 /* Create/Destroy a Tidy "document" object */
44 static TidyDocImpl* tidyDocCreate( TidyAllocator *allocator );
45 static void tidyDocRelease( TidyDocImpl* impl );
46
47 static int tidyDocStatus( TidyDocImpl* impl );
48
49 /* Parse Markup */
50 static int tidyDocParseFile( TidyDocImpl* impl, ctmbstr htmlfil );
51 static int tidyDocParseStdin( TidyDocImpl* impl );
52 static int tidyDocParseString( TidyDocImpl* impl, ctmbstr content );
53 static int tidyDocParseBuffer( TidyDocImpl* impl, TidyBuffer* inbuf );
54 static int tidyDocParseSource( TidyDocImpl* impl, TidyInputSource* docIn );
55
56
57 /* Execute post-parse diagnostics and cleanup.
58 ** Note, the order is important. You will get different
59 ** results from the diagnostics depending on if they are run
60 ** pre-or-post repair.
61 */
62 static int tidyDocRunDiagnostics( TidyDocImpl* doc );
63 static void tidyDocReportDoctype( TidyDocImpl* doc );
64 static int tidyDocCleanAndRepair( TidyDocImpl* doc );
65
66
67 /* Save cleaned up file to file/buffer/sink */
68 static int tidyDocSaveFile( TidyDocImpl* impl, ctmbstr htmlfil );
69 static int tidyDocSaveStdout( TidyDocImpl* impl );
70 static int tidyDocSaveString( TidyDocImpl* impl, tmbstr buffer, uint* buflen );
71 static int tidyDocSaveBuffer( TidyDocImpl* impl, TidyBuffer* outbuf );
72 static int tidyDocSaveSink( TidyDocImpl* impl, TidyOutputSink* docOut );
73 static int tidyDocSaveStream( TidyDocImpl* impl, StreamOut* out );
74
75
76 /* Tidy public interface
77 **
78 ** Most functions return an integer:
79 **
80 ** 0 -> SUCCESS
81 ** >0 -> WARNING
82 ** <0 -> ERROR
83 **
84 */
85
tidyCreate(void)86 TidyDoc TIDY_CALL tidyCreate(void)
87 {
88 TidyDocImpl* impl = tidyDocCreate( &TY_(g_default_allocator) );
89 return tidyImplToDoc( impl );
90 }
91
tidyCreateWithAllocator(TidyAllocator * allocator)92 TidyDoc TIDY_CALL tidyCreateWithAllocator( TidyAllocator *allocator )
93 {
94 TidyDocImpl* impl = tidyDocCreate( allocator );
95 return tidyImplToDoc( impl );
96 }
97
tidyRelease(TidyDoc tdoc)98 void TIDY_CALL tidyRelease( TidyDoc tdoc )
99 {
100 TidyDocImpl* impl = tidyDocToImpl( tdoc );
101 tidyDocRelease( impl );
102 }
103
tidyDocCreate(TidyAllocator * allocator)104 TidyDocImpl* tidyDocCreate( TidyAllocator *allocator )
105 {
106 TidyDocImpl* doc = (TidyDocImpl*)TidyAlloc( allocator, sizeof(TidyDocImpl) );
107 TidyClearMemory( doc, sizeof(*doc) );
108 doc->allocator = allocator;
109
110 TY_(InitMap)();
111 TY_(InitTags)( doc );
112 TY_(InitAttrs)( doc );
113 TY_(InitConfig)( doc );
114 TY_(InitPrintBuf)( doc );
115
116 /* Set the locale for tidy's output. This both configures
117 ** LibTidy to use the environment's locale as well as the
118 ** standard library.
119 */
120 #if SUPPORT_LOCALIZATIONS
121 if ( TY_(tidyGetLanguageSetByUser)() == no )
122 {
123 if( ! TY_(tidySetLanguage)( getenv( "LC_MESSAGES" ) ) )
124 {
125 if( ! TY_(tidySetLanguage)( getenv( "LANG" ) ) )
126 {
127 /*\
128 * Is. #770 #783 #780 #790 and maybe others -
129 * TY_(tidySetLanguage)( setlocale( LC_ALL, "" ) );
130 * this seems a 'bad' choice!
131 \*/
132 }
133 }
134 }
135 #endif
136
137 /* By default, wire tidy messages to standard error.
138 ** Document input will be set by parsing routines.
139 ** Document output will be set by pretty print routines.
140 ** Config input will be set by config parsing routines.
141 ** But we need to start off with a way to report errors.
142 */
143 doc->errout = TY_(StdErrOutput)();
144 return doc;
145 }
146
tidyDocRelease(TidyDocImpl * doc)147 void tidyDocRelease( TidyDocImpl* doc )
148 {
149 /* doc in/out opened and closed by parse/print routines */
150 if ( doc )
151 {
152 assert( doc->docIn == NULL );
153 assert( doc->docOut == NULL );
154
155 TY_(ReleaseStreamOut)( doc, doc->errout );
156 doc->errout = NULL;
157
158 TY_(FreePrintBuf)( doc );
159 TY_(FreeNode)(doc, &doc->root);
160 TidyClearMemory(&doc->root, sizeof(Node));
161
162 if (doc->givenDoctype)
163 TidyDocFree(doc, doc->givenDoctype);
164
165 TY_(FreeConfig)( doc );
166 TY_(FreeAttrTable)( doc );
167 TY_(FreeAttrPriorityList)( doc );
168 TY_(FreeMutedMessageList( doc ));
169 TY_(FreeTags)( doc );
170 /*\
171 * Issue #186 - Now FreeNode depend on the doctype, so the lexer is needed
172 * to determine which hash is to be used, so free it last.
173 \*/
174 TY_(FreeLexer)( doc );
175 TidyDocFree( doc, doc );
176 }
177 }
178
179 /* Let application store a chunk of data w/ each Tidy tdocance.
180 ** Useful for callbacks.
181 */
tidySetAppData(TidyDoc tdoc,void * appData)182 void TIDY_CALL tidySetAppData( TidyDoc tdoc, void* appData )
183 {
184 TidyDocImpl* impl = tidyDocToImpl( tdoc );
185 if ( impl )
186 impl->appData = appData;
187 }
tidyGetAppData(TidyDoc tdoc)188 void* TIDY_CALL tidyGetAppData( TidyDoc tdoc )
189 {
190 TidyDocImpl* impl = tidyDocToImpl( tdoc );
191 if ( impl )
192 return impl->appData;
193 return NULL;
194 }
195
tidyReleaseDate(void)196 ctmbstr TIDY_CALL tidyReleaseDate(void)
197 {
198 return TY_(ReleaseDate)();
199 }
200
tidyLibraryVersion(void)201 ctmbstr TIDY_CALL tidyLibraryVersion(void)
202 {
203 return TY_(tidyLibraryVersion)();
204 }
205
tidyPlatform(void)206 ctmbstr TIDY_CALL tidyPlatform(void)
207 {
208 #ifdef PLATFORM_NAME
209 return PLATFORM_NAME;
210 #else
211 return NULL;
212 #endif
213 }
214
215
216 /* Get/set configuration options
217 */
tidySetOptionCallback(TidyDoc tdoc,TidyOptCallback pOptCallback)218 Bool TIDY_CALL tidySetOptionCallback( TidyDoc tdoc, TidyOptCallback pOptCallback )
219 {
220 TidyDocImpl* impl = tidyDocToImpl( tdoc );
221 if ( impl )
222 {
223 impl->pOptCallback = pOptCallback;
224 return yes;
225 }
226 return no;
227 }
228
tidySetConfigCallback(TidyDoc tdoc,TidyConfigCallback pConfigCallback)229 Bool TIDY_CALL tidySetConfigCallback(TidyDoc tdoc, TidyConfigCallback pConfigCallback)
230 {
231 TidyDocImpl* impl = tidyDocToImpl( tdoc );
232 if ( impl )
233 {
234 impl->pConfigCallback = pConfigCallback;
235 return yes;
236 }
237 return no;
238 }
239
tidySetConfigChangeCallback(TidyDoc tdoc,TidyConfigChangeCallback pCallback)240 Bool TIDY_CALL tidySetConfigChangeCallback(TidyDoc tdoc, TidyConfigChangeCallback pCallback)
241 {
242 TidyDocImpl* impl = tidyDocToImpl( tdoc );
243 if ( impl )
244 {
245 impl->pConfigChangeCallback = pCallback;
246 return yes;
247 }
248 return no;
249 }
250
251
252
tidyLoadConfig(TidyDoc tdoc,ctmbstr cfgfil)253 int TIDY_CALL tidyLoadConfig( TidyDoc tdoc, ctmbstr cfgfil )
254 {
255 TidyDocImpl* impl = tidyDocToImpl( tdoc );
256 if ( impl )
257 return TY_(ParseConfigFile)( impl, cfgfil );
258 return -EINVAL;
259 }
260
tidyLoadConfigEnc(TidyDoc tdoc,ctmbstr cfgfil,ctmbstr charenc)261 int TIDY_CALL tidyLoadConfigEnc( TidyDoc tdoc, ctmbstr cfgfil, ctmbstr charenc )
262 {
263 TidyDocImpl* impl = tidyDocToImpl( tdoc );
264 if ( impl )
265 return TY_(ParseConfigFileEnc)( impl, cfgfil, charenc );
266 return -EINVAL;
267 }
268
tidySetCharEncoding(TidyDoc tdoc,ctmbstr encnam)269 int TIDY_CALL tidySetCharEncoding( TidyDoc tdoc, ctmbstr encnam )
270 {
271 TidyDocImpl* impl = tidyDocToImpl( tdoc );
272 if ( impl )
273 {
274 int enc = TY_(CharEncodingId)( impl, encnam );
275 if ( enc >= 0 && TY_(AdjustCharEncoding)(impl, enc) )
276 return 0;
277
278 TY_(ReportBadArgument)( impl, "char-encoding" );
279 }
280 return -EINVAL;
281 }
282
tidySetInCharEncoding(TidyDoc tdoc,ctmbstr encnam)283 int TIDY_CALL tidySetInCharEncoding( TidyDoc tdoc, ctmbstr encnam )
284 {
285 TidyDocImpl* impl = tidyDocToImpl( tdoc );
286 if ( impl )
287 {
288 int enc = TY_(CharEncodingId)( impl, encnam );
289 if ( enc >= 0 && TY_(SetOptionInt)( impl, TidyInCharEncoding, enc ) )
290 return 0;
291
292 TY_(ReportBadArgument)( impl, "in-char-encoding" );
293 }
294 return -EINVAL;
295 }
296
tidySetOutCharEncoding(TidyDoc tdoc,ctmbstr encnam)297 int TIDY_CALL tidySetOutCharEncoding( TidyDoc tdoc, ctmbstr encnam )
298 {
299 TidyDocImpl* impl = tidyDocToImpl( tdoc );
300 if ( impl )
301 {
302 int enc = TY_(CharEncodingId)( impl, encnam );
303 if ( enc >= 0 && TY_(SetOptionInt)( impl, TidyOutCharEncoding, enc ) )
304 return 0;
305
306 TY_(ReportBadArgument)( impl, "out-char-encoding" );
307 }
308 return -EINVAL;
309 }
310
tidyOptGetIdForName(ctmbstr optnam)311 TidyOptionId TIDY_CALL tidyOptGetIdForName( ctmbstr optnam )
312 {
313 const TidyOptionImpl* option = TY_(lookupOption)( optnam );
314 if ( option )
315 return option->id;
316 return N_TIDY_OPTIONS; /* Error */
317 }
318
tidyGetOptionList(TidyDoc tdoc)319 TidyIterator TIDY_CALL tidyGetOptionList( TidyDoc tdoc )
320 {
321 TidyDocImpl* impl = tidyDocToImpl( tdoc );
322 if ( impl )
323 return TY_(getOptionList)( impl );
324 return (TidyIterator) -1;
325 }
326
tidyGetNextOption(TidyDoc tdoc,TidyIterator * pos)327 TidyOption TIDY_CALL tidyGetNextOption( TidyDoc tdoc, TidyIterator* pos )
328 {
329 TidyDocImpl* impl = tidyDocToImpl( tdoc );
330 const TidyOptionImpl* option = NULL;
331 if ( impl )
332 option = TY_(getNextOption)( impl, pos );
333 else if ( pos )
334 *pos = 0;
335 return tidyImplToOption( option );
336 }
337
338
tidyGetOption(TidyDoc ARG_UNUSED (tdoc),TidyOptionId optId)339 TidyOption TIDY_CALL tidyGetOption( TidyDoc ARG_UNUSED(tdoc), TidyOptionId optId )
340 {
341 const TidyOptionImpl* option = TY_(getOption)( optId );
342 return tidyImplToOption( option );
343 }
tidyGetOptionByName(TidyDoc ARG_UNUSED (doc),ctmbstr optnam)344 TidyOption TIDY_CALL tidyGetOptionByName( TidyDoc ARG_UNUSED(doc), ctmbstr optnam )
345 {
346 const TidyOptionImpl* option = TY_(lookupOption)( optnam );
347 return tidyImplToOption( option );
348 }
349
tidyOptGetId(TidyOption topt)350 TidyOptionId TIDY_CALL tidyOptGetId( TidyOption topt )
351 {
352 const TidyOptionImpl* option = tidyOptionToImpl( topt );
353 if ( option )
354 return option->id;
355 return N_TIDY_OPTIONS;
356 }
tidyOptGetName(TidyOption topt)357 ctmbstr TIDY_CALL tidyOptGetName( TidyOption topt )
358 {
359 const TidyOptionImpl* option = tidyOptionToImpl( topt );
360 if ( option )
361 return option->name;
362 return NULL;
363 }
tidyOptGetType(TidyOption topt)364 TidyOptionType TIDY_CALL tidyOptGetType( TidyOption topt )
365 {
366 const TidyOptionImpl* option = tidyOptionToImpl( topt );
367 if ( option )
368 return option->type;
369 return (TidyOptionType) -1;
370 }
tidyOptionIsList(TidyOption opt)371 Bool TIDY_CALL tidyOptionIsList( TidyOption opt )
372 {
373 const TidyOptionImpl* option = tidyOptionToImpl( opt );
374 if ( option )
375 return TY_(getOptionIsList)( option->id );
376 return no;
377 }
tidyOptGetCategory(TidyOption topt)378 TidyConfigCategory TIDY_CALL tidyOptGetCategory( TidyOption topt )
379 {
380 const TidyOptionImpl* option = tidyOptionToImpl( topt );
381 if ( option )
382 return option->category;
383 return (TidyConfigCategory) -1;
384 }
tidyOptGetDefault(TidyOption topt)385 ctmbstr TIDY_CALL tidyOptGetDefault( TidyOption topt )
386 {
387 const TidyOptionImpl* option = tidyOptionToImpl( topt );
388 /* Special case for TidyDoctype, because it is declared as string */
389 if ( option && option->id == TidyDoctype )
390 {
391 const TidyOptionImpl* newopt = TY_(getOption)( TidyDoctypeMode );
392 return TY_(GetPickListLabelForPick)( TidyDoctypeMode, newopt->dflt );
393 }
394 if ( option && option->type == TidyString )
395 return option->pdflt; /* Issue #306 - fix an old typo hidden by a cast! */
396 return NULL;
397 }
tidyOptGetDefaultInt(TidyOption topt)398 ulong TIDY_CALL tidyOptGetDefaultInt( TidyOption topt )
399 {
400 const TidyOptionImpl* option = tidyOptionToImpl( topt );
401 if ( option && option->type != TidyString )
402 return option->dflt;
403
404 /* Special case for TidyDoctype, because it has a picklist */
405 if ( option->id == TidyDoctype )
406 {
407 const TidyOptionImpl* newopt = TY_(getOption)( TidyDoctypeMode );
408 return newopt->dflt;
409 }
410
411 return ~0U;
412 }
tidyOptGetDefaultBool(TidyOption topt)413 Bool TIDY_CALL tidyOptGetDefaultBool( TidyOption topt )
414 {
415 const TidyOptionImpl* option = tidyOptionToImpl( topt );
416 if ( option && option->type != TidyString )
417 return ( option->dflt ? yes : no );
418 return no;
419 }
tidyOptIsReadOnly(TidyOption topt)420 Bool TIDY_CALL tidyOptIsReadOnly( TidyOption topt )
421 {
422 const TidyOptionImpl* option = tidyOptionToImpl( topt );
423 if ( option )
424 return ( option->parser == NULL );
425 return yes;
426 }
427
428
tidyOptGetPickList(TidyOption topt)429 TidyIterator TIDY_CALL tidyOptGetPickList( TidyOption topt )
430 {
431 const TidyOptionImpl* option = tidyOptionToImpl( topt );
432 if ( option )
433 return TY_(getOptionPickList)( option );
434 return (TidyIterator) -1;
435 }
tidyOptGetNextPick(TidyOption topt,TidyIterator * pos)436 ctmbstr TIDY_CALL tidyOptGetNextPick( TidyOption topt, TidyIterator* pos )
437 {
438 const TidyOptionImpl* option = tidyOptionToImpl( topt );
439 if ( option )
440 return TY_(getNextOptionPick)( option, pos );
441 return NULL;
442 }
443
444
tidyOptGetValue(TidyDoc tdoc,TidyOptionId optId)445 ctmbstr TIDY_CALL tidyOptGetValue( TidyDoc tdoc, TidyOptionId optId )
446 {
447 TidyDocImpl* impl = tidyDocToImpl( tdoc );
448 ctmbstr optval = NULL;
449 if ( impl )
450 {
451 if ( optId == TidyDoctype )
452 {
453 /* Special case for TidyDoctype, because it has a picklist and is a string. */
454 uint pick = tidyOptGetInt( tdoc, TidyDoctypeMode );
455 if ( pick != TidyDoctypeUser )
456 {
457 optval = TY_(GetPickListLabelForPick)( TidyDoctypeMode, pick );
458 } else {
459 optval = cfgStr( impl, optId );
460 }
461 } else {
462 /* Standard case. */
463 optval = cfgStr( impl, optId );
464 }
465 }
466 return optval;
467 }
tidyOptSetValue(TidyDoc tdoc,TidyOptionId optId,ctmbstr val)468 Bool TIDY_CALL tidyOptSetValue( TidyDoc tdoc, TidyOptionId optId, ctmbstr val )
469 {
470 TidyDocImpl* impl = tidyDocToImpl( tdoc );
471 if ( impl )
472 return TY_(ParseConfigValue)( impl, optId, val );
473 return no;
474 }
tidyOptParseValue(TidyDoc tdoc,ctmbstr optnam,ctmbstr val)475 Bool TIDY_CALL tidyOptParseValue( TidyDoc tdoc, ctmbstr optnam, ctmbstr val )
476 {
477 TidyDocImpl* impl = tidyDocToImpl( tdoc );
478 if ( impl )
479 return TY_(ParseConfigOption)( impl, optnam, val );
480 return no;
481 }
482
tidyOptGetInt(TidyDoc tdoc,TidyOptionId optId)483 ulong TIDY_CALL tidyOptGetInt( TidyDoc tdoc, TidyOptionId optId )
484 {
485 TidyDocImpl* impl = tidyDocToImpl( tdoc );
486 ulong opti = 0;
487 if ( impl )
488 {
489 /* Special case for TidyDoctype, because it has a picklist */
490 if ( optId == TidyDoctype )
491 opti = cfg( impl, TidyDoctypeMode);
492 else
493 opti = cfg( impl, optId );
494 }
495 return opti;
496 }
497
tidyOptSetInt(TidyDoc tdoc,TidyOptionId optId,ulong val)498 Bool TIDY_CALL tidyOptSetInt( TidyDoc tdoc, TidyOptionId optId, ulong val )
499 {
500 TidyDocImpl* impl = tidyDocToImpl( tdoc );
501 if ( impl )
502 {
503 /* Special case for TidyDoctype, because it has a picklist */
504 if ( optId == TidyDoctype )
505 return TY_(SetOptionInt)( impl, TidyDoctypeMode, val );
506 else
507 return TY_(SetOptionInt)( impl, optId, val );
508 }
509 return no;
510 }
511
tidyOptGetBool(TidyDoc tdoc,TidyOptionId optId)512 Bool TIDY_CALL tidyOptGetBool( TidyDoc tdoc, TidyOptionId optId )
513 {
514 TidyDocImpl* impl = tidyDocToImpl( tdoc );
515 Bool optb = no;
516 if ( impl )
517 {
518 const TidyOptionImpl* option = TY_(getOption)( optId );
519 if ( option )
520 {
521 optb = cfgBool( impl, optId );
522 }
523 }
524 return optb;
525 }
526
tidyOptSetBool(TidyDoc tdoc,TidyOptionId optId,Bool val)527 Bool TIDY_CALL tidyOptSetBool( TidyDoc tdoc, TidyOptionId optId, Bool val )
528 {
529 TidyDocImpl* impl = tidyDocToImpl( tdoc );
530 if ( impl )
531 return TY_(SetOptionBool)( impl, optId, val );
532 return no;
533 }
534
tidyOptGetEncName(TidyDoc tdoc,TidyOptionId optId)535 ctmbstr TIDY_CALL tidyOptGetEncName( TidyDoc tdoc, TidyOptionId optId )
536 {
537 uint enc = tidyOptGetInt( tdoc, optId );
538 return TY_(CharEncodingOptName)( enc );
539 }
540
tidyOptGetCurrPick(TidyDoc tdoc,TidyOptionId optId)541 ctmbstr TIDY_CALL tidyOptGetCurrPick( TidyDoc tdoc, TidyOptionId optId )
542 {
543 uint pick = tidyOptGetInt( tdoc, optId );
544 return TY_(GetPickListLabelForPick)( optId, pick );
545 }
546
547
tidyOptGetDeclTagList(TidyDoc tdoc)548 TidyIterator TIDY_CALL tidyOptGetDeclTagList( TidyDoc tdoc )
549 {
550 TidyDocImpl* impl = tidyDocToImpl( tdoc );
551 TidyIterator declIter = 0;
552 if ( impl )
553 declIter = TY_(GetDeclaredTagList)( impl );
554 return declIter;
555 }
556
tidyOptGetNextDeclTag(TidyDoc tdoc,TidyOptionId optId,TidyIterator * iter)557 ctmbstr TIDY_CALL tidyOptGetNextDeclTag( TidyDoc tdoc, TidyOptionId optId,
558 TidyIterator* iter )
559 {
560 TidyDocImpl* impl = tidyDocToImpl( tdoc );
561 ctmbstr tagnam = NULL;
562 if ( impl )
563 {
564 UserTagType tagtyp = tagtype_null;
565 if ( optId == TidyInlineTags )
566 tagtyp = tagtype_inline;
567 else if ( optId == TidyBlockTags )
568 tagtyp = tagtype_block;
569 else if ( optId == TidyEmptyTags )
570 tagtyp = tagtype_empty;
571 else if ( optId == TidyPreTags )
572 tagtyp = tagtype_pre;
573 if ( tagtyp != tagtype_null )
574 tagnam = TY_(GetNextDeclaredTag)( impl, tagtyp, iter );
575 }
576 return tagnam;
577 }
578
tidyOptGetPriorityAttrList(TidyDoc tdoc)579 TidyIterator TIDY_CALL tidyOptGetPriorityAttrList( TidyDoc tdoc )
580 {
581 TidyDocImpl* impl = tidyDocToImpl( tdoc );
582 if ( impl )
583 return TY_(getPriorityAttrList)( impl );
584 return (TidyIterator) -1;
585 }
586
tidyOptGetNextPriorityAttr(TidyDoc tdoc,TidyIterator * iter)587 ctmbstr TIDY_CALL tidyOptGetNextPriorityAttr(TidyDoc tdoc, TidyIterator* iter )
588 {
589 TidyDocImpl* impl = tidyDocToImpl( tdoc );
590 ctmbstr result = NULL;
591 if ( impl )
592 result = TY_(getNextPriorityAttr)( impl, iter );
593 else if ( iter )
594 *iter = 0;
595 return result;
596 }
597
tidyOptGetMutedMessageList(TidyDoc tdoc)598 TidyIterator TIDY_CALL tidyOptGetMutedMessageList( TidyDoc tdoc )
599 {
600 TidyDocImpl* impl = tidyDocToImpl( tdoc );
601 if ( impl )
602 return TY_(getMutedMessageList)( impl );
603 return (TidyIterator) -1;
604 }
605
tidyOptGetNextMutedMessage(TidyDoc tdoc,TidyIterator * iter)606 ctmbstr TIDY_CALL tidyOptGetNextMutedMessage(TidyDoc tdoc, TidyIterator* iter )
607 {
608 TidyDocImpl* impl = tidyDocToImpl( tdoc );
609 ctmbstr result = NULL;
610 if ( impl )
611 result = TY_(getNextMutedMessage)( impl, iter );
612 else if ( iter )
613 *iter = 0;
614 return result;
615 }
616
tidyOptGetDoc(TidyDoc ARG_UNUSED (tdoc),TidyOption opt)617 ctmbstr TIDY_CALL tidyOptGetDoc( TidyDoc ARG_UNUSED(tdoc), TidyOption opt )
618 {
619 const TidyOptionId optId = tidyOptGetId( opt );
620 return tidyLocalizedString(optId);
621 }
622
623 #if SUPPORT_CONSOLE_APP
624 /* TODO - GROUP ALL CONSOLE-ONLY FUNCTIONS */
tidyOptGetDocLinksList(TidyDoc ARG_UNUSED (tdoc),TidyOption opt)625 TidyIterator TIDY_CALL tidyOptGetDocLinksList( TidyDoc ARG_UNUSED(tdoc), TidyOption opt )
626 {
627 const TidyOptionId optId = tidyOptGetId( opt );
628 const TidyOptionDoc* docDesc = TY_(OptGetDocDesc)( optId );
629 if (docDesc && docDesc->links)
630 return (TidyIterator)docDesc->links;
631 return (TidyIterator)NULL;
632 }
633 #endif /* SUPPORT_CONSOLE_APP */
634
tidyOptGetNextDocLinks(TidyDoc tdoc,TidyIterator * pos)635 TidyOption TIDY_CALL tidyOptGetNextDocLinks( TidyDoc tdoc, TidyIterator* pos )
636 {
637 const TidyOptionId* curr = (const TidyOptionId *)*pos;
638 TidyOption opt;
639
640 if (*curr == TidyUnknownOption)
641 {
642 *pos = (TidyIterator)NULL;
643 return (TidyOption)0;
644 }
645 opt = tidyGetOption(tdoc, *curr);
646 curr++;
647 *pos = (*curr == TidyUnknownOption ) ?
648 (TidyIterator)NULL:(TidyIterator)curr;
649 return opt;
650 }
651
tidyOptSaveFile(TidyDoc tdoc,ctmbstr cfgfil)652 int TIDY_CALL tidyOptSaveFile( TidyDoc tdoc, ctmbstr cfgfil )
653 {
654 TidyDocImpl* impl = tidyDocToImpl( tdoc );
655 if ( impl )
656 return TY_(SaveConfigFile)( impl, cfgfil );
657 return -EINVAL;
658 }
659
tidyOptSaveSink(TidyDoc tdoc,TidyOutputSink * sink)660 int TIDY_CALL tidyOptSaveSink( TidyDoc tdoc, TidyOutputSink* sink )
661 {
662 TidyDocImpl* impl = tidyDocToImpl( tdoc );
663 if ( impl )
664 return TY_(SaveConfigSink)( impl, sink );
665 return -EINVAL;
666 }
667
tidyOptSnapshot(TidyDoc tdoc)668 Bool TIDY_CALL tidyOptSnapshot( TidyDoc tdoc )
669 {
670 TidyDocImpl* impl = tidyDocToImpl( tdoc );
671 if ( impl )
672 {
673 TY_(TakeConfigSnapshot)( impl );
674 return yes;
675 }
676 return no;
677 }
tidyOptResetToSnapshot(TidyDoc tdoc)678 Bool TIDY_CALL tidyOptResetToSnapshot( TidyDoc tdoc )
679 {
680 TidyDocImpl* impl = tidyDocToImpl( tdoc );
681 if ( impl )
682 {
683 TY_(ResetConfigToSnapshot)( impl );
684 return yes;
685 }
686 return no;
687 }
tidyOptResetAllToDefault(TidyDoc tdoc)688 Bool TIDY_CALL tidyOptResetAllToDefault( TidyDoc tdoc )
689 {
690 TidyDocImpl* impl = tidyDocToImpl( tdoc );
691 if ( impl )
692 {
693 TY_(ResetConfigToDefault)( impl );
694 return yes;
695 }
696 return no;
697 }
698
tidyOptResetToDefault(TidyDoc tdoc,TidyOptionId optId)699 Bool TIDY_CALL tidyOptResetToDefault( TidyDoc tdoc, TidyOptionId optId )
700 {
701 TidyDocImpl* impl = tidyDocToImpl( tdoc );
702 if ( impl )
703 return TY_(ResetOptionToDefault)( impl, optId );
704 return no;
705 }
706
tidyOptDiffThanDefault(TidyDoc tdoc)707 Bool TIDY_CALL tidyOptDiffThanDefault( TidyDoc tdoc )
708 {
709 TidyDocImpl* impl = tidyDocToImpl( tdoc );
710 if ( impl )
711 return TY_(ConfigDiffThanDefault)( impl );
712 return no;
713 }
tidyOptDiffThanSnapshot(TidyDoc tdoc)714 Bool TIDY_CALL tidyOptDiffThanSnapshot( TidyDoc tdoc )
715 {
716 TidyDocImpl* impl = tidyDocToImpl( tdoc );
717 if ( impl )
718 return TY_(ConfigDiffThanSnapshot)( impl );
719 return no;
720 }
721
tidyOptCopyConfig(TidyDoc to,TidyDoc from)722 Bool TIDY_CALL tidyOptCopyConfig( TidyDoc to, TidyDoc from )
723 {
724 TidyDocImpl* docTo = tidyDocToImpl( to );
725 TidyDocImpl* docFrom = tidyDocToImpl( from );
726 if ( docTo && docFrom )
727 {
728 TY_(CopyConfig)( docTo, docFrom );
729 return yes;
730 }
731 return no;
732 }
733
734
735 /* I/O and Message handling interface
736 **
737 ** By default, Tidy will define, create and use instance of input and output
738 ** handlers for standard C buffered I/O (i.e. FILE* stdin, FILE* stdout and
739 ** FILE* stderr for content input, content output and diagnostic output,
740 ** respectively. A FILE* cfgFile input handler will be used for config files.
741 ** Command line options will just be set directly.
742 */
743
tidySetEmacsFile(TidyDoc tdoc,ctmbstr filePath)744 void TIDY_CALL tidySetEmacsFile( TidyDoc tdoc, ctmbstr filePath )
745 {
746 tidyOptSetValue( tdoc, TidyEmacsFile, filePath );
747 }
748
tidyGetEmacsFile(TidyDoc tdoc)749 ctmbstr TIDY_CALL tidyGetEmacsFile( TidyDoc tdoc )
750 {
751 return tidyOptGetValue( tdoc, TidyEmacsFile );
752 }
753
754
755 /* Use TidyReportFilter to filter messages by diagnostic level:
756 ** info, warning, etc. Just set diagnostic output
757 ** handler to redirect all diagnostics output. Return true
758 ** to proceed with output, false to cancel.
759 */
tidySetReportFilter(TidyDoc tdoc,TidyReportFilter filt)760 Bool TIDY_CALL tidySetReportFilter( TidyDoc tdoc, TidyReportFilter filt )
761 {
762 TidyDocImpl* impl = tidyDocToImpl( tdoc );
763 if ( impl )
764 {
765 impl->reportFilter = filt;
766 return yes;
767 }
768 return no;
769 }
770
771 /* tidySetReportCallback functions similar to TidyReportFilter, but provides the
772 * string version of the internal enum name so that LibTidy users can use
773 ** the string as a lookup key for providing their own error localizations.
774 ** See the string key definitions in tidyenum.h.
775 */
tidySetReportCallback(TidyDoc tdoc,TidyReportCallback filt)776 Bool TIDY_CALL tidySetReportCallback( TidyDoc tdoc, TidyReportCallback filt )
777 {
778 TidyDocImpl* impl = tidyDocToImpl( tdoc );
779 if ( impl )
780 {
781 impl->reportCallback = filt;
782 return yes;
783 }
784 return no;
785 }
786
tidySetMessageCallback(TidyDoc tdoc,TidyMessageCallback filt)787 Bool TIDY_CALL tidySetMessageCallback( TidyDoc tdoc, TidyMessageCallback filt )
788 {
789 TidyDocImpl* impl = tidyDocToImpl( tdoc );
790 if ( impl )
791 {
792 impl->messageCallback = filt;
793 return yes;
794 }
795 return no;
796 }
797
tidyGetMessageDoc(TidyMessage tmessage)798 TidyDoc TIDY_CALL tidyGetMessageDoc( TidyMessage tmessage )
799 {
800 TidyMessageImpl *message = tidyMessageToImpl(tmessage);
801 TidyDocImpl* doc = TY_(getMessageDoc)(*message);
802 return tidyImplToDoc(doc);
803 }
804
tidyGetMessageCode(TidyMessage tmessage)805 uint TIDY_CALL tidyGetMessageCode( TidyMessage tmessage )
806 {
807 TidyMessageImpl *message = tidyMessageToImpl(tmessage);
808 return TY_(getMessageCode)(*message);
809 }
810
tidyGetMessageKey(TidyMessage tmessage)811 ctmbstr TIDY_CALL tidyGetMessageKey( TidyMessage tmessage )
812 {
813 TidyMessageImpl *message = tidyMessageToImpl(tmessage);
814 return TY_(getMessageKey)(*message);
815 }
816
tidyGetMessageLine(TidyMessage tmessage)817 int TIDY_CALL tidyGetMessageLine( TidyMessage tmessage )
818 {
819 TidyMessageImpl *message = tidyMessageToImpl(tmessage);
820 return TY_(getMessageLine)(*message);
821 }
822
tidyGetMessageColumn(TidyMessage tmessage)823 int TIDY_CALL tidyGetMessageColumn( TidyMessage tmessage )
824 {
825 TidyMessageImpl *message = tidyMessageToImpl(tmessage);
826 return TY_(getMessageColumn)(*message);
827 }
828
tidyGetMessageLevel(TidyMessage tmessage)829 TidyReportLevel TIDY_CALL tidyGetMessageLevel( TidyMessage tmessage )
830 {
831 TidyMessageImpl *message = tidyMessageToImpl(tmessage);
832 return TY_(getMessageLevel)(*message);
833 }
834
tidyGetMessageIsMuted(TidyMessage tmessage)835 Bool TIDY_CALL tidyGetMessageIsMuted( TidyMessage tmessage )
836 {
837 TidyMessageImpl *message = tidyMessageToImpl(tmessage);
838 return TY_(getMessageIsMuted)(*message);
839 }
840
tidyGetMessageFormatDefault(TidyMessage tmessage)841 ctmbstr TIDY_CALL tidyGetMessageFormatDefault( TidyMessage tmessage )
842 {
843 TidyMessageImpl *message = tidyMessageToImpl(tmessage);
844 return TY_(getMessageFormatDefault)(*message);
845 }
846
tidyGetMessageFormat(TidyMessage tmessage)847 ctmbstr TIDY_CALL tidyGetMessageFormat( TidyMessage tmessage )
848 {
849 TidyMessageImpl *message = tidyMessageToImpl(tmessage);
850 return TY_(getMessageFormat)(*message);
851 }
852
tidyGetMessageDefault(TidyMessage tmessage)853 ctmbstr TIDY_CALL tidyGetMessageDefault( TidyMessage tmessage )
854 {
855 TidyMessageImpl *message = tidyMessageToImpl(tmessage);
856 return TY_(getMessageDefault)(*message);
857 }
858
tidyGetMessage(TidyMessage tmessage)859 ctmbstr TIDY_CALL tidyGetMessage( TidyMessage tmessage )
860 {
861 TidyMessageImpl *message = tidyMessageToImpl(tmessage);
862 return TY_(getMessage)(*message);
863 }
864
tidyGetMessagePosDefault(TidyMessage tmessage)865 ctmbstr TIDY_CALL tidyGetMessagePosDefault( TidyMessage tmessage )
866 {
867 TidyMessageImpl *message = tidyMessageToImpl(tmessage);
868 return TY_(getMessagePosDefault)(*message);
869 }
870
tidyGetMessagePos(TidyMessage tmessage)871 ctmbstr TIDY_CALL tidyGetMessagePos( TidyMessage tmessage )
872 {
873 TidyMessageImpl *message = tidyMessageToImpl(tmessage);
874 return TY_(getMessagePos)(*message);
875 }
876
tidyGetMessagePrefixDefault(TidyMessage tmessage)877 ctmbstr TIDY_CALL tidyGetMessagePrefixDefault( TidyMessage tmessage )
878 {
879 TidyMessageImpl *message = tidyMessageToImpl(tmessage);
880 return TY_(getMessagePrefixDefault)(*message);
881 }
882
tidyGetMessagePrefix(TidyMessage tmessage)883 ctmbstr TIDY_CALL tidyGetMessagePrefix( TidyMessage tmessage )
884 {
885 TidyMessageImpl *message = tidyMessageToImpl(tmessage);
886 return TY_(getMessagePrefix)(*message);
887 }
888
889
tidyGetMessageOutputDefault(TidyMessage tmessage)890 ctmbstr TIDY_CALL tidyGetMessageOutputDefault( TidyMessage tmessage )
891 {
892 TidyMessageImpl *message = tidyMessageToImpl(tmessage);
893 return TY_(getMessageOutputDefault)(*message);
894 }
895
tidyGetMessageOutput(TidyMessage tmessage)896 ctmbstr TIDY_CALL tidyGetMessageOutput( TidyMessage tmessage )
897 {
898 TidyMessageImpl *message = tidyMessageToImpl(tmessage);
899 return TY_(getMessageOutput)(*message);
900 }
901
tidyGetMessageArguments(TidyMessage tmessage)902 TidyIterator TIDY_CALL tidyGetMessageArguments( TidyMessage tmessage )
903 {
904 TidyMessageImpl *message = tidyMessageToImpl(tmessage);
905 return TY_(getMessageArguments)(*message);
906 }
907
tidyGetNextMessageArgument(TidyMessage tmessage,TidyIterator * iter)908 TidyMessageArgument TIDY_CALL tidyGetNextMessageArgument( TidyMessage tmessage, TidyIterator* iter )
909 {
910 TidyMessageImpl *message = tidyMessageToImpl(tmessage);
911 return TY_(getNextMessageArgument)(*message, iter);
912 }
913
tidyGetArgType(TidyMessage tmessage,TidyMessageArgument * arg)914 TidyFormatParameterType TIDY_CALL tidyGetArgType( TidyMessage tmessage, TidyMessageArgument* arg )
915 {
916 TidyMessageImpl *message = tidyMessageToImpl(tmessage);
917 return TY_(getArgType)(*message, arg);
918 }
919
tidyGetArgFormat(TidyMessage tmessage,TidyMessageArgument * arg)920 ctmbstr TIDY_CALL tidyGetArgFormat( TidyMessage tmessage, TidyMessageArgument* arg )
921 {
922 TidyMessageImpl *message = tidyMessageToImpl(tmessage);
923 return TY_(getArgFormat)(*message, arg);
924 }
925
tidyGetArgValueString(TidyMessage tmessage,TidyMessageArgument * arg)926 ctmbstr TIDY_CALL tidyGetArgValueString( TidyMessage tmessage, TidyMessageArgument* arg )
927 {
928 TidyMessageImpl *message = tidyMessageToImpl(tmessage);
929 return TY_(getArgValueString)(*message, arg);
930 }
931
tidyGetArgValueUInt(TidyMessage tmessage,TidyMessageArgument * arg)932 uint TIDY_CALL tidyGetArgValueUInt( TidyMessage tmessage, TidyMessageArgument* arg )
933 {
934 TidyMessageImpl *message = tidyMessageToImpl(tmessage);
935 return TY_(getArgValueUInt)(*message, arg);
936 }
937
tidyGetArgValueInt(TidyMessage tmessage,TidyMessageArgument * arg)938 int TIDY_CALL tidyGetArgValueInt( TidyMessage tmessage, TidyMessageArgument* arg )
939 {
940 TidyMessageImpl *message = tidyMessageToImpl(tmessage);
941 return TY_(getArgValueInt)(*message, arg);
942 }
943
tidyGetArgValueDouble(TidyMessage tmessage,TidyMessageArgument * arg)944 double TIDY_CALL tidyGetArgValueDouble( TidyMessage tmessage, TidyMessageArgument* arg )
945 {
946 TidyMessageImpl *message = tidyMessageToImpl(tmessage);
947 return TY_(getArgValueDouble)(*message, arg);
948 }
949
950
tidySetErrorFile(TidyDoc tdoc,ctmbstr errfilnam)951 FILE* TIDY_CALL tidySetErrorFile( TidyDoc tdoc, ctmbstr errfilnam )
952 {
953 TidyDocImpl* impl = tidyDocToImpl( tdoc );
954 if ( impl )
955 {
956 FILE* errout = fopen( errfilnam, "wb" );
957 if ( errout )
958 {
959 uint outenc = cfg( impl, TidyOutCharEncoding );
960 uint nl = cfg( impl, TidyNewline );
961 TY_(ReleaseStreamOut)( impl, impl->errout );
962 impl->errout = TY_(FileOutput)( impl, errout, outenc, nl );
963 return errout;
964 }
965 else /* Emit message to current error sink */
966 TY_(ReportFileError)( impl, errfilnam, FILE_CANT_OPEN );
967 }
968 return NULL;
969 }
970
tidySetErrorBuffer(TidyDoc tdoc,TidyBuffer * errbuf)971 int TIDY_CALL tidySetErrorBuffer( TidyDoc tdoc, TidyBuffer* errbuf )
972 {
973 TidyDocImpl* impl = tidyDocToImpl( tdoc );
974 if ( impl )
975 {
976 uint outenc = cfg( impl, TidyOutCharEncoding );
977 uint nl = cfg( impl, TidyNewline );
978 TY_(ReleaseStreamOut)( impl, impl->errout );
979 impl->errout = TY_(BufferOutput)( impl, errbuf, outenc, nl );
980 return ( impl->errout ? 0 : -ENOMEM );
981 }
982 return -EINVAL;
983 }
984
tidySetErrorSink(TidyDoc tdoc,TidyOutputSink * sink)985 int TIDY_CALL tidySetErrorSink( TidyDoc tdoc, TidyOutputSink* sink )
986 {
987 TidyDocImpl* impl = tidyDocToImpl( tdoc );
988 if ( impl )
989 {
990 uint outenc = cfg( impl, TidyOutCharEncoding );
991 uint nl = cfg( impl, TidyNewline );
992 TY_(ReleaseStreamOut)( impl, impl->errout );
993 impl->errout = TY_(UserOutput)( impl, sink, outenc, nl );
994 return ( impl->errout ? 0 : -ENOMEM );
995 }
996 return -EINVAL;
997 }
998
999 /* Use TidyPPProgress to monitor the progress of the pretty printer.
1000 */
tidySetPrettyPrinterCallback(TidyDoc tdoc,TidyPPProgress callback)1001 Bool TIDY_CALL tidySetPrettyPrinterCallback(TidyDoc tdoc, TidyPPProgress callback)
1002 {
1003 TidyDocImpl* impl = tidyDocToImpl( tdoc );
1004 if ( impl )
1005 {
1006 impl->progressCallback = callback;
1007 return yes;
1008 }
1009 return no;
1010 }
1011
1012
1013 /* Document info */
tidyStatus(TidyDoc tdoc)1014 int TIDY_CALL tidyStatus( TidyDoc tdoc )
1015 {
1016 TidyDocImpl* impl = tidyDocToImpl( tdoc );
1017 int tidyStat = -EINVAL;
1018 if ( impl )
1019 tidyStat = tidyDocStatus( impl );
1020 return tidyStat;
1021 }
tidyDetectedHtmlVersion(TidyDoc ARG_UNUSED (tdoc))1022 int TIDY_CALL tidyDetectedHtmlVersion( TidyDoc ARG_UNUSED(tdoc) )
1023 {
1024 TidyDocImpl* impl = tidyDocToImpl( tdoc );
1025 return TY_(HTMLVersionNumberFromCode)( impl->lexer->versionEmitted );
1026 }
1027
tidyDetectedXhtml(TidyDoc ARG_UNUSED (tdoc))1028 Bool TIDY_CALL tidyDetectedXhtml( TidyDoc ARG_UNUSED(tdoc) )
1029 {
1030 TidyDocImpl* impl = tidyDocToImpl( tdoc );
1031 return impl->lexer->isvoyager;
1032 }
tidyDetectedGenericXml(TidyDoc ARG_UNUSED (tdoc))1033 Bool TIDY_CALL tidyDetectedGenericXml( TidyDoc ARG_UNUSED(tdoc) )
1034 {
1035 TidyDocImpl* impl = tidyDocToImpl( tdoc );
1036 return impl->xmlDetected;
1037 }
1038
tidyErrorCount(TidyDoc tdoc)1039 uint TIDY_CALL tidyErrorCount( TidyDoc tdoc )
1040 {
1041 TidyDocImpl* impl = tidyDocToImpl( tdoc );
1042 uint count = 0xFFFFFFFF;
1043 if ( impl )
1044 count = impl->errors;
1045 return count;
1046 }
tidyWarningCount(TidyDoc tdoc)1047 uint TIDY_CALL tidyWarningCount( TidyDoc tdoc )
1048 {
1049 TidyDocImpl* impl = tidyDocToImpl( tdoc );
1050 uint count = 0xFFFFFFFF;
1051 if ( impl )
1052 count = impl->warnings;
1053 return count;
1054 }
tidyAccessWarningCount(TidyDoc tdoc)1055 uint TIDY_CALL tidyAccessWarningCount( TidyDoc tdoc )
1056 {
1057 TidyDocImpl* impl = tidyDocToImpl( tdoc );
1058 uint count = 0xFFFFFFFF;
1059 if ( impl )
1060 count = impl->accessErrors;
1061 return count;
1062 }
tidyConfigErrorCount(TidyDoc tdoc)1063 uint TIDY_CALL tidyConfigErrorCount( TidyDoc tdoc )
1064 {
1065 TidyDocImpl* impl = tidyDocToImpl( tdoc );
1066 uint count = 0xFFFFFFFF;
1067 if ( impl )
1068 count = impl->optionErrors;
1069 return count;
1070 }
1071
1072
1073 /* Error reporting functions
1074 */
tidyErrorSummary(TidyDoc tdoc)1075 void TIDY_CALL tidyErrorSummary( TidyDoc tdoc )
1076 {
1077 TidyDocImpl* impl = tidyDocToImpl( tdoc );
1078 if ( impl )
1079 TY_(ErrorSummary)( impl );
1080 }
tidyGeneralInfo(TidyDoc tdoc)1081 void TIDY_CALL tidyGeneralInfo( TidyDoc tdoc )
1082 {
1083 TidyDocImpl* impl = tidyDocToImpl( tdoc );
1084 if ( impl )
1085 {
1086 TY_(Dialogue)( impl, TEXT_GENERAL_INFO );
1087 TY_(Dialogue)( impl, TEXT_GENERAL_INFO_PLEA );
1088 }
1089 }
1090
1091
1092 /* I/O Functions
1093 **
1094 ** Initial version supports only whole-file operations.
1095 ** Do not expose Tidy StreamIn or Out data structures - yet.
1096 */
1097
1098 /* Parse/load Functions
1099 **
1100 ** HTML/XHTML version determined from input.
1101 */
tidyParseFile(TidyDoc tdoc,ctmbstr filnam)1102 int TIDY_CALL tidyParseFile( TidyDoc tdoc, ctmbstr filnam )
1103 {
1104 TidyDocImpl* doc = tidyDocToImpl( tdoc );
1105 return tidyDocParseFile( doc, filnam );
1106 }
tidyParseStdin(TidyDoc tdoc)1107 int TIDY_CALL tidyParseStdin( TidyDoc tdoc )
1108 {
1109 TidyDocImpl* doc = tidyDocToImpl( tdoc );
1110 return tidyDocParseStdin( doc );
1111 }
tidyParseString(TidyDoc tdoc,ctmbstr content)1112 int TIDY_CALL tidyParseString( TidyDoc tdoc, ctmbstr content )
1113 {
1114 TidyDocImpl* doc = tidyDocToImpl( tdoc );
1115 return tidyDocParseString( doc, content );
1116 }
tidyParseBuffer(TidyDoc tdoc,TidyBuffer * inbuf)1117 int TIDY_CALL tidyParseBuffer( TidyDoc tdoc, TidyBuffer* inbuf )
1118 {
1119 TidyDocImpl* doc = tidyDocToImpl( tdoc );
1120 return tidyDocParseBuffer( doc, inbuf );
1121 }
tidyParseSource(TidyDoc tdoc,TidyInputSource * source)1122 int TIDY_CALL tidyParseSource( TidyDoc tdoc, TidyInputSource* source )
1123 {
1124 TidyDocImpl* doc = tidyDocToImpl( tdoc );
1125 return tidyDocParseSource( doc, source );
1126 }
1127
1128 #ifdef WIN32
1129 #define M_IS_DIR _S_IFDIR
1130 #else // !WIN32
1131 #define M_IS_DIR S_IFDIR
1132 #endif
tidyDocParseFile(TidyDocImpl * doc,ctmbstr filnam)1133 int tidyDocParseFile( TidyDocImpl* doc, ctmbstr filnam )
1134 {
1135 int status = -ENOENT;
1136 FILE* fin = 0;
1137 struct stat sbuf = { 0 }; /* Is. #681 - read-only files */
1138 if ( stat(filnam,&sbuf) != 0 )
1139 {
1140 TY_(ReportFileError)( doc, filnam, FILE_NOT_FILE );
1141 return status;
1142 }
1143 if (sbuf.st_mode & M_IS_DIR) /* and /NOT/ if a DIRECTORY */
1144 {
1145 TY_(ReportFileError)(doc, filnam, FILE_NOT_FILE);
1146 return status;
1147 }
1148
1149 #ifdef _WIN32
1150 return TY_(DocParseFileWithMappedFile)( doc, filnam );
1151 #else
1152
1153 fin = fopen( filnam, "rb" );
1154
1155 #if PRESERVE_FILE_TIMES
1156 {
1157 /* get last modified time */
1158 TidyClearMemory(&doc->filetimes, sizeof(doc->filetimes));
1159 if (fin && cfgBool(doc, TidyKeepFileTimes) &&
1160 fstat(fileno(fin), &sbuf) != -1)
1161 {
1162 doc->filetimes.actime = sbuf.st_atime;
1163 doc->filetimes.modtime = sbuf.st_mtime;
1164 }
1165 }
1166 #endif
1167
1168 if ( fin )
1169 {
1170 StreamIn* in = TY_(FileInput)( doc, fin, cfg( doc, TidyInCharEncoding ));
1171 if ( !in )
1172 {
1173 fclose( fin );
1174 return status;
1175 }
1176 status = TY_(DocParseStream)( doc, in );
1177 TY_(freeFileSource)(&in->source, yes);
1178 TY_(freeStreamIn)(in);
1179 }
1180 else /* Error message! */
1181 TY_(ReportFileError)( doc, filnam, FILE_CANT_OPEN );
1182 return status;
1183 #endif
1184 }
1185
tidyDocParseStdin(TidyDocImpl * doc)1186 int tidyDocParseStdin( TidyDocImpl* doc )
1187 {
1188 StreamIn* in = TY_(FileInput)( doc, stdin, cfg( doc, TidyInCharEncoding ));
1189 int status = TY_(DocParseStream)( doc, in );
1190 TY_(freeFileSource)(&in->source, yes);
1191 TY_(freeStreamIn)(in);
1192 return status;
1193 }
1194
tidyDocParseBuffer(TidyDocImpl * doc,TidyBuffer * inbuf)1195 int tidyDocParseBuffer( TidyDocImpl* doc, TidyBuffer* inbuf )
1196 {
1197 int status = -EINVAL;
1198 if ( inbuf )
1199 {
1200 StreamIn* in = TY_(BufferInput)( doc, inbuf, cfg( doc, TidyInCharEncoding ));
1201 status = TY_(DocParseStream)( doc, in );
1202 TY_(freeStreamIn)(in);
1203 }
1204 return status;
1205 }
1206
tidyDocParseString(TidyDocImpl * doc,ctmbstr content)1207 int tidyDocParseString( TidyDocImpl* doc, ctmbstr content )
1208 {
1209 int status = -EINVAL;
1210 TidyBuffer inbuf;
1211 StreamIn* in = NULL;
1212
1213 if ( content )
1214 {
1215 tidyBufInitWithAllocator( &inbuf, doc->allocator );
1216 tidyBufAttach( &inbuf, (byte*)content, TY_(tmbstrlen)(content)+1 );
1217 in = TY_(BufferInput)( doc, &inbuf, cfg( doc, TidyInCharEncoding ));
1218 status = TY_(DocParseStream)( doc, in );
1219 tidyBufDetach( &inbuf );
1220 TY_(freeStreamIn)(in);
1221 }
1222 return status;
1223 }
1224
tidyDocParseSource(TidyDocImpl * doc,TidyInputSource * source)1225 int tidyDocParseSource( TidyDocImpl* doc, TidyInputSource* source )
1226 {
1227 StreamIn* in = TY_(UserInput)( doc, source, cfg( doc, TidyInCharEncoding ));
1228 int status = TY_(DocParseStream)( doc, in );
1229 TY_(freeStreamIn)(in);
1230 return status;
1231 }
1232
1233
1234 /* Print/save Functions
1235 **
1236 */
tidySaveFile(TidyDoc tdoc,ctmbstr filnam)1237 int TIDY_CALL tidySaveFile( TidyDoc tdoc, ctmbstr filnam )
1238 {
1239 TidyDocImpl* doc = tidyDocToImpl( tdoc );
1240 return tidyDocSaveFile( doc, filnam );
1241 }
tidySaveStdout(TidyDoc tdoc)1242 int TIDY_CALL tidySaveStdout( TidyDoc tdoc )
1243 {
1244 TidyDocImpl* doc = tidyDocToImpl( tdoc );
1245 return tidyDocSaveStdout( doc );
1246 }
tidySaveString(TidyDoc tdoc,tmbstr buffer,uint * buflen)1247 int TIDY_CALL tidySaveString( TidyDoc tdoc, tmbstr buffer, uint* buflen )
1248 {
1249 TidyDocImpl* doc = tidyDocToImpl( tdoc );
1250 return tidyDocSaveString( doc, buffer, buflen );
1251 }
tidySaveBuffer(TidyDoc tdoc,TidyBuffer * outbuf)1252 int TIDY_CALL tidySaveBuffer( TidyDoc tdoc, TidyBuffer* outbuf )
1253 {
1254 TidyDocImpl* doc = tidyDocToImpl( tdoc );
1255 return tidyDocSaveBuffer( doc, outbuf );
1256 }
tidySaveSink(TidyDoc tdoc,TidyOutputSink * sink)1257 int TIDY_CALL tidySaveSink( TidyDoc tdoc, TidyOutputSink* sink )
1258 {
1259 TidyDocImpl* doc = tidyDocToImpl( tdoc );
1260 return tidyDocSaveSink( doc, sink );
1261 }
1262
tidyDocSaveFile(TidyDocImpl * doc,ctmbstr filnam)1263 int tidyDocSaveFile( TidyDocImpl* doc, ctmbstr filnam )
1264 {
1265 int status = -ENOENT;
1266 FILE* fout = NULL;
1267
1268 /* Don't zap input file if no output */
1269 if ( doc->errors > 0 &&
1270 cfgBool(doc, TidyWriteBack) && !cfgBool(doc, TidyForceOutput) )
1271 status = tidyDocStatus( doc );
1272 else
1273 fout = fopen( filnam, "wb" );
1274
1275 if ( fout )
1276 {
1277 uint outenc = cfg( doc, TidyOutCharEncoding );
1278 uint nl = cfg( doc, TidyNewline );
1279 StreamOut* out = TY_(FileOutput)( doc, fout, outenc, nl );
1280
1281 status = tidyDocSaveStream( doc, out );
1282
1283 fclose( fout );
1284 TidyDocFree( doc, out );
1285
1286 #if PRESERVE_FILE_TIMES
1287 if ( doc->filetimes.actime )
1288 {
1289 /* set file last accessed/modified times to original values */
1290 utime( filnam, &doc->filetimes );
1291 TidyClearMemory( &doc->filetimes, sizeof(doc->filetimes) );
1292 }
1293 #endif /* PRESERVFILETIMES */
1294 }
1295 if ( status < 0 ) /* Error message! */
1296 TY_(ReportFileError)( doc, filnam, FILE_CANT_OPEN );
1297 return status;
1298 }
1299
1300
1301
1302 /* Note, _setmode() does NOT work on Win2K Pro w/ VC++ 6.0 SP3.
1303 ** The code has been left in in case it works w/ other compilers
1304 ** or operating systems. If stdout is in Text mode, be aware that
1305 ** it will garble UTF16 documents. In text mode, when it encounters
1306 ** a single byte of value 10 (0xA), it will insert a single byte
1307 ** value 13 (0xD) just before it. This has the effect of garbling
1308 ** the entire document.
1309 */
1310
1311 #if !defined(NO_SETMODE_SUPPORT)
1312 # if defined(_WIN32) || defined(OS2_OS)
1313 # include <fcntl.h>
1314 # include <io.h>
1315 # endif
1316 #endif
1317
tidyDocSaveStdout(TidyDocImpl * doc)1318 int tidyDocSaveStdout( TidyDocImpl* doc )
1319 {
1320 #if !defined(NO_SETMODE_SUPPORT)
1321 # if defined(_WIN32) || defined(OS2_OS)
1322 int oldstdoutmode = -1, oldstderrmode = -1;
1323 # endif
1324 #endif
1325
1326 int status = 0;
1327 uint outenc = cfg( doc, TidyOutCharEncoding );
1328 uint nl = cfg( doc, TidyNewline );
1329 StreamOut* out = TY_(FileOutput)( doc, stdout, outenc, nl );
1330
1331 #if !defined(NO_SETMODE_SUPPORT)
1332 # if defined(_WIN32) || defined(OS2_OS)
1333 oldstdoutmode = setmode( fileno(stdout), _O_BINARY );
1334 oldstderrmode = setmode( fileno(stderr), _O_BINARY );
1335 # endif
1336 #endif
1337
1338 if ( 0 == status )
1339 status = tidyDocSaveStream( doc, out );
1340
1341 fflush(stdout);
1342 fflush(stderr);
1343
1344 #if !defined(NO_SETMODE_SUPPORT)
1345 # if defined(_WIN32) || defined(OS2_OS)
1346 if ( oldstdoutmode != -1 )
1347 oldstdoutmode = setmode( fileno(stdout), oldstdoutmode );
1348 if ( oldstderrmode != -1 )
1349 oldstderrmode = setmode( fileno(stderr), oldstderrmode );
1350 # endif
1351 #endif
1352
1353 TidyDocFree( doc, out );
1354 return status;
1355 }
1356
tidyDocSaveString(TidyDocImpl * doc,tmbstr buffer,uint * buflen)1357 int tidyDocSaveString( TidyDocImpl* doc, tmbstr buffer, uint* buflen )
1358 {
1359 uint outenc = cfg( doc, TidyOutCharEncoding );
1360 uint nl = cfg( doc, TidyNewline );
1361 TidyBuffer outbuf;
1362 StreamOut* out;
1363 int status;
1364
1365 tidyBufInitWithAllocator( &outbuf, doc->allocator );
1366 out = TY_(BufferOutput)( doc, &outbuf, outenc, nl );
1367 status = tidyDocSaveStream( doc, out );
1368
1369 if ( outbuf.size > *buflen )
1370 status = -ENOMEM;
1371 else
1372 memcpy( buffer, outbuf.bp, outbuf.size );
1373
1374 *buflen = outbuf.size;
1375 tidyBufFree( &outbuf );
1376 TidyDocFree( doc, out );
1377 return status;
1378 }
1379
tidyDocSaveBuffer(TidyDocImpl * doc,TidyBuffer * outbuf)1380 int tidyDocSaveBuffer( TidyDocImpl* doc, TidyBuffer* outbuf )
1381 {
1382 int status = -EINVAL;
1383 if ( outbuf )
1384 {
1385 uint outenc = cfg( doc, TidyOutCharEncoding );
1386 uint nl = cfg( doc, TidyNewline );
1387 StreamOut* out = TY_(BufferOutput)( doc, outbuf, outenc, nl );
1388
1389 status = tidyDocSaveStream( doc, out );
1390 TidyDocFree( doc, out );
1391 }
1392 return status;
1393 }
1394
tidyDocSaveSink(TidyDocImpl * doc,TidyOutputSink * sink)1395 int tidyDocSaveSink( TidyDocImpl* doc, TidyOutputSink* sink )
1396 {
1397 uint outenc = cfg( doc, TidyOutCharEncoding );
1398 uint nl = cfg( doc, TidyNewline );
1399 StreamOut* out = TY_(UserOutput)( doc, sink, outenc, nl );
1400 int status = tidyDocSaveStream( doc, out );
1401 TidyDocFree( doc, out );
1402 return status;
1403 }
1404
tidyDocStatus(TidyDocImpl * doc)1405 int tidyDocStatus( TidyDocImpl* doc )
1406 {
1407 if ( doc->errors > 0 )
1408 return 2;
1409 if ( doc->warnings > 0 || doc->accessErrors > 0 )
1410 return 1;
1411 return 0;
1412 }
1413
1414
1415
tidyCleanAndRepair(TidyDoc tdoc)1416 int TIDY_CALL tidyCleanAndRepair( TidyDoc tdoc )
1417 {
1418 TidyDocImpl* impl = tidyDocToImpl( tdoc );
1419 if ( impl )
1420 return tidyDocCleanAndRepair( impl );
1421 return -EINVAL;
1422 }
1423
tidyRunDiagnostics(TidyDoc tdoc)1424 int TIDY_CALL tidyRunDiagnostics( TidyDoc tdoc )
1425 {
1426 TidyDocImpl* impl = tidyDocToImpl( tdoc );
1427 if ( impl )
1428 return tidyDocRunDiagnostics( impl );
1429 return -EINVAL;
1430 }
1431
tidyReportDoctype(TidyDoc tdoc)1432 int TIDY_CALL tidyReportDoctype( TidyDoc tdoc )
1433 {
1434 int iret = -EINVAL;
1435 TidyDocImpl* impl = tidyDocToImpl( tdoc );
1436 if ( impl ) {
1437 tidyDocReportDoctype( impl );
1438 iret = 0;
1439 }
1440 return iret;
1441 }
1442
1443 /* Workhorse functions.
1444 **
1445 ** Parse requires input source, all input config items
1446 ** and diagnostic sink to have all been set before calling.
1447 **
1448 ** Emit likewise requires that document sink and all
1449 ** pretty printing options have been set.
1450 */
1451 static ctmbstr integrity = "\nPanic - tree has lost its integrity\n";
1452
TY_(DocParseStream)1453 int TY_(DocParseStream)( TidyDocImpl* doc, StreamIn* in )
1454 {
1455 Bool xmlIn = cfgBool( doc, TidyXmlTags );
1456 TidyConfigChangeCallback callback = doc->pConfigChangeCallback;
1457
1458 int bomEnc;
1459 doc->pConfigChangeCallback = NULL;
1460
1461 assert( doc != NULL && in != NULL );
1462 assert( doc->docIn == NULL );
1463 doc->docIn = in;
1464
1465 TY_(ResetTags)(doc); /* Reset table to html5 mode */
1466 TY_(TakeConfigSnapshot)( doc ); /* Save config state */
1467 TY_(AdjustConfig)( doc ); /* Ensure config internal consistency */
1468 TY_(FreeAnchors)( doc );
1469
1470 TY_(FreeNode)(doc, &doc->root);
1471 TidyClearMemory(&doc->root, sizeof(Node));
1472
1473 if (doc->givenDoctype)
1474 TidyDocFree(doc, doc->givenDoctype);
1475 /*\
1476 * Issue #186 - Now FreeNode depend on the doctype, so the lexer is needed
1477 * to determine which hash is to be used, so free it last.
1478 \*/
1479 TY_(FreeLexer)( doc );
1480 doc->givenDoctype = NULL;
1481
1482 doc->lexer = TY_(NewLexer)( doc );
1483 /* doc->lexer->root = &doc->root; */
1484 doc->root.line = doc->lexer->lines;
1485 doc->root.column = doc->lexer->columns;
1486 doc->inputHadBOM = no;
1487 doc->xmlDetected = no;
1488
1489 bomEnc = TY_(ReadBOMEncoding)(in);
1490
1491 if (bomEnc != -1)
1492 {
1493 in->encoding = bomEnc;
1494 TY_(SetOptionInt)(doc, TidyInCharEncoding, bomEnc);
1495 }
1496
1497 /* Tidy doesn't alter the doctype for generic XML docs */
1498 if ( xmlIn )
1499 {
1500 TY_(ParseXMLDocument)( doc );
1501 if ( !TY_(CheckNodeIntegrity)( &doc->root ) )
1502 TidyPanic( doc->allocator, integrity );
1503 }
1504 else
1505 {
1506 doc->warnings = 0;
1507 TY_(ParseDocument)( doc );
1508 if ( !TY_(CheckNodeIntegrity)( &doc->root ) )
1509 TidyPanic( doc->allocator, integrity );
1510 }
1511
1512 doc->docIn = NULL;
1513 doc->pConfigChangeCallback = callback;
1514
1515 return tidyDocStatus( doc );
1516 }
1517
tidyDocRunDiagnostics(TidyDocImpl * doc)1518 int tidyDocRunDiagnostics( TidyDocImpl* doc )
1519 {
1520 TY_(ReportMarkupVersion)( doc );
1521 TY_(ReportNumWarnings)( doc );
1522
1523 if ( doc->errors > 0 && !cfgBool( doc, TidyForceOutput ) )
1524 TY_(Dialogue)(doc, STRING_NEEDS_INTERVENTION );
1525
1526 return tidyDocStatus( doc );
1527 }
1528
tidyDocReportDoctype(TidyDocImpl * doc)1529 void tidyDocReportDoctype( TidyDocImpl* doc )
1530 {
1531 TY_(ReportMarkupVersion)( doc );
1532 }
1533
1534
1535 /*****************************************************************************
1536 * HTML5 STUFF
1537 *****************************************************************************/
1538 #if 0 && defined(ENABLE_DEBUG_LOG)
1539 extern void show_not_html5(void);
1540 /* -----------------------------
1541 List tags that do not have version HTML5 (HT50|XH50)
1542
1543 acronym applet basefont big center dir font frame frameset isindex
1544 listing noframes plaintext rb rbc rtc strike tt xmp nextid
1545 align bgsound blink comment ilayer layer marquee multicol nobr noembed
1546 nolayer nosave server servlet spacer
1547
1548 Listed total 35 tags that do not have version 393216
1549 ------------------------------ */
1550
1551 static void list_not_html5(void)
1552 {
1553 static Bool done_list = no;
1554 if (done_list == no) {
1555 done_list = yes;
1556 show_not_html5();
1557 }
1558 }
1559 #endif
1560
1561 /* What about <blink>, <s> stike-through, <u> underline */
1562 static struct _html5Info
1563 {
1564 const char *tag;
1565 uint id;
1566 } const html5Info[] = {
1567 {"acronym", TidyTag_ACRONYM},
1568 {"applet", TidyTag_APPLET },
1569 {"basefont",TidyTag_BASEFONT },
1570 { "big", TidyTag_BIG },
1571 { "center", TidyTag_CENTER },
1572 { "dir", TidyTag_DIR },
1573 { "font", TidyTag_FONT },
1574 { "frame", TidyTag_FRAME},
1575 { "frameset", TidyTag_FRAMESET},
1576 { "noframes", TidyTag_NOFRAMES },
1577 { "strike", TidyTag_STRIKE },
1578 { "tt", TidyTag_TT },
1579 { 0, 0 }
1580 };
inRemovedInfo(uint tid)1581 static Bool inRemovedInfo( uint tid )
1582 {
1583 int i;
1584 for (i = 0; ; i++) {
1585 if (html5Info[i].tag == 0)
1586 break;
1587 if (html5Info[i].id == tid)
1588 return yes;
1589 }
1590 return no;
1591 }
1592
1593 /* Things that should not be in an HTML5 body. This is special for CheckHTML5(),
1594 and we might just want to remove CheckHTML5()'s output altogether and count
1595 on the default --strict-tags-attributes.
1596 */
1597 static int BadBody5Attribs[] = {
1598 TidyAttr_BACKGROUND,
1599 TidyAttr_BGCOLOR,
1600 TidyAttr_TEXT,
1601 TidyAttr_LINK,
1602 TidyAttr_VLINK,
1603 TidyAttr_ALINK,
1604 TidyAttr_UNKNOWN /* Must be last! */
1605 };
1606
nodeHasAlignAttr(Node * node)1607 static Bool nodeHasAlignAttr( Node *node )
1608 {
1609 /* #define attrIsALIGN(av) AttrIsId( av, TidyAttr_ALIGN ) */
1610 AttVal* av;
1611 for ( av = node->attributes; av != NULL; av = av->next ) {
1612 if (attrIsALIGN(av))
1613 return yes;
1614 }
1615 return no;
1616 }
1617
1618 /*
1619 * Perform special checks for HTML, even when we're not using the default
1620 * option `--strict-tags-attributes yes`. This will ensure that HTML5 warning
1621 * and error output is given regardless of the new option, and ensure that
1622 * cleanup takes place. This provides mostly consistent Tidy behavior even with
1623 * the introduction of this new option. Note that strings have changed, though,
1624 * in order to maintain consistency with the `--strict-tags-attributes`
1625 * messages.
1626 *
1627 * See also: http://www.whatwg.org/specs/web-apps/current-work/multipage/obsolete.html#obsolete
1628 */
TY_(CheckHTML5)1629 static void TY_(CheckHTML5)( TidyDocImpl* doc, Node* node )
1630 {
1631 Bool clean = cfgBool( doc, TidyMakeClean );
1632 Bool already_strict = cfgBool( doc, TidyStrictTagsAttr );
1633 Node* body = TY_(FindBody)( doc );
1634 Bool warn = yes; /* should this be a warning, error, or report??? */
1635 AttVal* attr = NULL;
1636 int i = 0;
1637
1638 while (node)
1639 {
1640 if ( nodeHasAlignAttr( node ) ) {
1641 /* @todo: Is this for ALL elements that accept an 'align' attribute,
1642 * or should this be a sub-set test?
1643 */
1644
1645 /* We will only emit this message if `--strict-tags-attributes==no`;
1646 * otherwise if yes this message will be output during later
1647 * checking.
1648 */
1649 if ( !already_strict )
1650 TY_(ReportAttrError)(doc, node, TY_(AttrGetById)(node, TidyAttr_ALIGN), MISMATCHED_ATTRIBUTE_WARN);
1651 }
1652 if ( node == body ) {
1653 i = 0;
1654 /* We will only emit these messages if `--strict-tags-attributes==no`;
1655 * otherwise if yes these messages will be output during later
1656 * checking.
1657 */
1658 if ( !already_strict ) {
1659 while ( BadBody5Attribs[i] != TidyAttr_UNKNOWN ) {
1660 attr = TY_(AttrGetById)(node, BadBody5Attribs[i]);
1661 if ( attr )
1662 TY_(ReportAttrError)(doc, node, attr , MISMATCHED_ATTRIBUTE_WARN);
1663 i++;
1664 }
1665 }
1666 } else
1667 if ( nodeIsACRONYM(node) ) {
1668 if (clean) {
1669 /* Replace with 'abbr' with warning to that effect.
1670 * Maybe should use static void RenameElem( TidyDocImpl* doc, Node* node, TidyTagId tid )
1671 */
1672 TY_(CoerceNode)(doc, node, TidyTag_ABBR, warn, no);
1673 } else {
1674 if ( !already_strict )
1675 TY_(Report)(doc, node, node, REMOVED_HTML5);
1676 }
1677 } else
1678 if ( nodeIsAPPLET(node) ) {
1679 if (clean) {
1680 /* replace with 'object' with warning to that effect
1681 * maybe should use static void RenameElem( TidyDocImpl* doc, Node* node, TidyTagId tid )
1682 */
1683 TY_(CoerceNode)(doc, node, TidyTag_OBJECT, warn, no);
1684 } else {
1685 if ( !already_strict )
1686 TY_(Report)(doc, node, node, REMOVED_HTML5);
1687 }
1688 } else
1689 if ( nodeIsBASEFONT(node) ) {
1690 /* basefont: CSS equivalent 'font-size', 'font-family' and 'color'
1691 * on body or class on each subsequent element.
1692 * Difficult - If it is the first body element, then could consider
1693 * adding that to the <body> as a whole, else could perhaps apply it
1694 * to all subsequent elements. But also in consideration is the fact
1695 * that it was NOT supported in many browsers.
1696 * - For now just report a warning
1697 */
1698 if ( !already_strict )
1699 TY_(Report)(doc, node, node, REMOVED_HTML5);
1700 } else
1701 if ( nodeIsBIG(node) ) {
1702 /* big: CSS equivalent 'font-size:larger'
1703 * so could replace the <big> ... </big> with
1704 * <span style="font-size: larger"> ... </span>
1705 * then replace <big> with <span>
1706 * Need to think about that...
1707 * Could use -
1708 * TY_(AddStyleProperty)( doc, node, "font-size: larger" );
1709 * TY_(CoerceNode)(doc, node, TidyTag_SPAN, no, no);
1710 * Alternatively generated a <style> but how to get the style name
1711 * TY_(AddAttribute)( doc, node, "class", "????" );
1712 * Also maybe need a specific message like
1713 * Element '%s' replaced with 'span' with a 'font-size: larger style attribute
1714 * maybe should use static void RenameElem( TidyDocImpl* doc, Node* node, TidyTagId tid )
1715 */
1716 if (clean) {
1717 TY_(AddStyleProperty)( doc, node, "font-size: larger" );
1718 TY_(CoerceNode)(doc, node, TidyTag_SPAN, warn, no);
1719 } else {
1720 if ( !already_strict )
1721 TY_(Report)(doc, node, node, REMOVED_HTML5);
1722 }
1723 } else
1724 if ( nodeIsCENTER(node) ) {
1725 /* center: CSS equivalent 'text-align:center'
1726 * and 'margin-left:auto; margin-right:auto' on descendant blocks
1727 * Tidy already handles this if 'clean' by SILENTLY generating the
1728 * <style> and adding a <div class="c1"> around the elements.
1729 * see: static Bool Center2Div( TidyDocImpl* doc, Node *node, Node **pnode)
1730 */
1731 if ( !already_strict )
1732 TY_(Report)(doc, node, node, REMOVED_HTML5);
1733 } else
1734 if ( nodeIsDIR(node) ) {
1735 /* dir: replace by <ul>
1736 * Tidy already actions this and issues a warning
1737 * Should this be CHANGED???
1738 */
1739 if ( !already_strict )
1740 TY_(Report)(doc, node, node, REMOVED_HTML5);
1741 } else
1742 if ( nodeIsFONT(node) ) {
1743 /* Tidy already handles this -
1744 * If 'clean' replaced by CSS, else
1745 * if is NOT clean, and doctype html5 then warnings issued
1746 * done in Bool Font2Span( TidyDocImpl* doc, Node *node, Node **pnode ) (I think?)
1747 */
1748 if ( !already_strict )
1749 TY_(Report)(doc, node, node, REMOVED_HTML5);
1750 } else
1751 if (( nodesIsFRAME(node) ) || ( nodeIsFRAMESET(node) ) || ( nodeIsNOFRAMES(node) )) {
1752 /* YOW: What to do here?????? Maybe <iframe>????
1753 */
1754 if ( !already_strict )
1755 TY_(Report)(doc, node, node, REMOVED_HTML5);
1756 } else
1757 if ( nodeIsSTRIKE(node) ) {
1758 /* strike: CSS equivalent 'text-decoration:line-through'
1759 * maybe should use static void RenameElem( TidyDocImpl* doc, Node* node, TidyTagId tid )
1760 */
1761 if (clean) {
1762 TY_(AddStyleProperty)( doc, node, "text-decoration: line-through" );
1763 TY_(CoerceNode)(doc, node, TidyTag_SPAN, warn, no);
1764 } else {
1765 if ( !already_strict )
1766 TY_(Report)(doc, node, node, REMOVED_HTML5);
1767 }
1768 } else
1769 if ( nodeIsTT(node) ) {
1770 /* tt: CSS equivalent 'font-family:monospace'
1771 * Tidy presently does nothing. Tidy5 issues a warning
1772 * But like the 'clean' <font> replacement this could also be replaced with CSS
1773 * maybe should use static void RenameElem( TidyDocImpl* doc, Node* node, TidyTagId tid )
1774 */
1775 if (clean) {
1776 TY_(AddStyleProperty)( doc, node, "font-family: monospace" );
1777 TY_(CoerceNode)(doc, node, TidyTag_SPAN, warn, no);
1778 } else {
1779 if ( !already_strict )
1780 TY_(Report)(doc, node, node, REMOVED_HTML5);
1781 }
1782 } else
1783 if (TY_(nodeIsElement)(node)) {
1784 if (node->tag) {
1785 if ( (!(node->tag->versions & VERS_HTML5) && !(node->tag->versions & VERS_PROPRIETARY)) || (inRemovedInfo(node->tag->id)) ) {
1786 if ( !already_strict )
1787 TY_(Report)(doc, node, node, REMOVED_HTML5);
1788 }
1789 }
1790 }
1791
1792 if (node->content)
1793 TY_(CheckHTML5)( doc, node->content );
1794
1795 node = node->next;
1796 }
1797 }
1798 /*****************************************************************************
1799 * END HTML5 STUFF
1800 *****************************************************************************/
1801
1802
1803 /*
1804 * Check and report HTML tags and attributes that are:
1805 * - Proprietary, and/or
1806 * - Not supported in the current version of HTML, defined as the version
1807 * of HTML that we are emitting.
1808 * Proprietary items are reported as WARNINGS, and version mismatches will
1809 * be reported as WARNING or ERROR in the following conditions:
1810 * - ERROR if the emitted doctype is a strict doctype.
1811 * - WARNING if the emitted doctype is a non-strict doctype.
1812 * The propriety checks are *always* run as they have always been an integral
1813 * part of Tidy. The version checks are controlled by `strict-tags-attributes`.
1814 */
TY_(CheckHTMLTagsAttribsVersions)1815 static void TY_(CheckHTMLTagsAttribsVersions)( TidyDocImpl* doc, Node* node )
1816 {
1817 uint versionEmitted = doc->lexer->versionEmitted;
1818 uint declared = doc->lexer->doctype;
1819 uint version = versionEmitted == 0 ? declared : versionEmitted;
1820 int tagReportType = VERS_STRICT & version ? ELEMENT_VERS_MISMATCH_ERROR : ELEMENT_VERS_MISMATCH_WARN;
1821 int attrReportType = VERS_STRICT & version ? MISMATCHED_ATTRIBUTE_ERROR : MISMATCHED_ATTRIBUTE_WARN;
1822 Bool check_versions = cfgBool( doc, TidyStrictTagsAttr );
1823 AttVal *next_attr, *attval;
1824 Bool attrIsProprietary = no;
1825 Bool attrIsMismatched = yes;
1826 Bool tagLooksCustom = no;
1827 Bool htmlIs5 = (doc->lexer->doctype & VERS_HTML5) > 0;
1828
1829 while (node)
1830 {
1831 /* This bit here handles our HTML tags */
1832 if ( TY_(nodeIsElement)(node) && node->tag ) {
1833
1834 /* Leave XML stuff alone. */
1835 if ( !cfgBool(doc, TidyXmlTags) )
1836 {
1837 /* Version mismatches take priority. */
1838 if ( check_versions && !(node->tag->versions & version) )
1839 {
1840 TY_(Report)(doc, NULL, node, tagReportType );
1841 }
1842 /* If it's not mismatched, it could still be proprietary. */
1843 else if ( node->tag->versions & VERS_PROPRIETARY )
1844 {
1845 if ( !cfgBool(doc, TidyMakeClean) ||
1846 ( !nodeIsNOBR(node) && !nodeIsWBR(node) ) )
1847 {
1848 /* It looks custom, despite whether it's a known tag. */
1849 tagLooksCustom = TY_(nodeIsAutonomousCustomFormat)( node );
1850
1851 /* If we're in HTML5 mode and the tag does not look
1852 like a valid custom tag, then issue a warning.
1853 Appearance is good enough because invalid tags have
1854 been dropped. Also, if we're not in HTML5 mode, then
1855 then everything that reaches here gets the warning.
1856 Everything else can be ignored. */
1857
1858 if ( (htmlIs5 && !tagLooksCustom) || !htmlIs5 )
1859 {
1860 TY_(Report)(doc, NULL, node, PROPRIETARY_ELEMENT );
1861 }
1862
1863 if ( nodeIsLAYER(node) )
1864 doc->badLayout |= USING_LAYER;
1865 else if ( nodeIsSPACER(node) )
1866 doc->badLayout |= USING_SPACER;
1867 else if ( nodeIsNOBR(node) )
1868 doc->badLayout |= USING_NOBR;
1869 }
1870 }
1871 }
1872 }
1873
1874 /* And this bit here handles our attributes */
1875 if (TY_(nodeIsElement)(node))
1876 {
1877 attval = node->attributes;
1878
1879 while (attval)
1880 {
1881 next_attr = attval->next;
1882
1883 attrIsProprietary = TY_(AttributeIsProprietary)(node, attval);
1884 /* Is. #729 - always check version match if HTML5 */
1885 attrIsMismatched = (check_versions | htmlIs5) ? TY_(AttributeIsMismatched)(node, attval, doc) : no;
1886 /* Let the PROPRIETARY_ATTRIBUTE warning have precedence. */
1887 if ( attrIsProprietary )
1888 {
1889 if ( cfgBool(doc, TidyWarnPropAttrs) )
1890 TY_(ReportAttrError)(doc, node, attval, PROPRIETARY_ATTRIBUTE);
1891 }
1892 else if ( attrIsMismatched )
1893 {
1894 if (htmlIs5)
1895 {
1896 /* Is. #729 - In html5 TidyStrictTagsAttr controls error or warn */
1897 TY_(ReportAttrError)(doc, node, attval,
1898 check_versions ? MISMATCHED_ATTRIBUTE_ERROR : MISMATCHED_ATTRIBUTE_WARN);
1899 }
1900 else
1901 TY_(ReportAttrError)(doc, node, attval, attrReportType);
1902
1903 }
1904
1905 /* @todo: do we need a new option to drop mismatches? Or should we
1906 simply drop them? */
1907 if ( ( attrIsProprietary || attrIsMismatched ) && cfgBool(doc, TidyDropPropAttrs) )
1908 TY_(RemoveAttribute)( doc, node, attval );
1909
1910 attval = next_attr;
1911 }
1912 }
1913
1914 if (node->content)
1915 TY_(CheckHTMLTagsAttribsVersions)( doc, node->content );
1916
1917 node = node->next;
1918 }
1919 }
1920
1921
1922 #if defined(ENABLE_DEBUG_LOG)
1923 /* *** FOR DEBUG ONLY *** */
dbg_get_lexer_type(void * vp)1924 const char *dbg_get_lexer_type( void *vp )
1925 {
1926 Node *node = (Node *)vp;
1927 switch ( node->type )
1928 {
1929 case RootNode: return "Root";
1930 case DocTypeTag: return "DocType";
1931 case CommentTag: return "Comment";
1932 case ProcInsTag: return "ProcIns";
1933 case TextNode: return "Text";
1934 case StartTag: return "StartTag";
1935 case EndTag: return "EndTag";
1936 case StartEndTag: return "StartEnd";
1937 case CDATATag: return "CDATA";
1938 case SectionTag: return "Section";
1939 case AspTag: return "Asp";
1940 case JsteTag: return "Jste";
1941 case PhpTag: return "Php";
1942 case XmlDecl: return "XmlDecl";
1943 }
1944 return "Uncased";
1945 }
1946
1947 /* NOTE: THis matches the above lexer type, except when element has a name */
dbg_get_element_name(void * vp)1948 const char *dbg_get_element_name( void *vp )
1949 {
1950 Node *node = (Node *)vp;
1951 switch ( node->type )
1952 {
1953 case TidyNode_Root: return "Root";
1954 case TidyNode_DocType: return "DocType";
1955 case TidyNode_Comment: return "Comment";
1956 case TidyNode_ProcIns: return "ProcIns";
1957 case TidyNode_Text: return "Text";
1958 case TidyNode_CDATA: return "CDATA";
1959 case TidyNode_Section: return "Section";
1960 case TidyNode_Asp: return "Asp";
1961 case TidyNode_Jste: return "Jste";
1962 case TidyNode_Php: return "Php";
1963 case TidyNode_XmlDecl: return "XmlDecl";
1964
1965 case TidyNode_Start:
1966 case TidyNode_End:
1967 case TidyNode_StartEnd:
1968 default:
1969 if (node->element)
1970 return node->element;
1971 }
1972 return "Unknown";
1973 }
1974
dbg_show_node(TidyDocImpl * doc,Node * node,int caller,int indent)1975 void dbg_show_node( TidyDocImpl* doc, Node *node, int caller, int indent )
1976 {
1977 AttVal* av;
1978 Lexer* lexer = doc->lexer;
1979 ctmbstr call = "";
1980 ctmbstr name = dbg_get_element_name(node);
1981 ctmbstr type = dbg_get_lexer_type(node);
1982 ctmbstr impl = node->implicit ? "implicit" : "";
1983 switch ( caller )
1984 {
1985 case 1: call = "discard"; break;
1986 case 2: call = "trim"; break;
1987 case 3: call = "test"; break;
1988 }
1989 while (indent--)
1990 SPRTF(" ");
1991 if (strcmp(type,name))
1992 SPRTF("%s %s %s %s", type, name, impl, call );
1993 else
1994 SPRTF("%s %s %s", name, impl, call );
1995 if (lexer && (strcmp("Text",name) == 0)) {
1996 uint len = node->end - node->start;
1997 uint i;
1998 SPRTF(" (%d) '", len);
1999 if (len < 40) {
2000 /* show it all */
2001 for (i = node->start; i < node->end; i++) {
2002 SPRTF("%c", lexer->lexbuf[i]);
2003 }
2004 } else {
2005 /* partial display */
2006 uint max = 19;
2007 for (i = node->start; i < max; i++) {
2008 SPRTF("%c", lexer->lexbuf[i]);
2009 }
2010 SPRTF("...");
2011 i = node->end - 19;
2012 for (; i < node->end; i++) {
2013 SPRTF("%c", lexer->lexbuf[i]);
2014 }
2015 }
2016 SPRTF("'");
2017 }
2018 for (av = node->attributes; av; av = av->next) {
2019 name = av->attribute;
2020 if (name) {
2021 SPRTF(" %s",name);
2022 if (av->value) {
2023 SPRTF("=\"%s\"", av->value);
2024 }
2025 }
2026 }
2027
2028 SPRTF("\n");
2029 }
2030
dbg_show_all_nodes(TidyDocImpl * doc,Node * node,int indent)2031 void dbg_show_all_nodes( TidyDocImpl* doc, Node *node, int indent )
2032 {
2033 while (node)
2034 {
2035 dbg_show_node( doc, node, 0, indent );
2036 dbg_show_all_nodes( doc, node->content, indent + 1 );
2037 node = node->next;
2038 }
2039 }
2040
2041 #endif
2042
tidyDocCleanAndRepair(TidyDocImpl * doc)2043 int tidyDocCleanAndRepair( TidyDocImpl* doc )
2044 {
2045 Bool word2K = cfgBool( doc, TidyWord2000 );
2046 Bool logical = cfgBool( doc, TidyLogicalEmphasis );
2047 Bool clean = cfgBool( doc, TidyMakeClean );
2048 Bool gdoc = cfgBool( doc, TidyGDocClean );
2049 Bool htmlOut = cfgBool( doc, TidyHtmlOut );
2050 Bool xmlOut = cfgBool( doc, TidyXmlOut );
2051 Bool xhtmlOut = cfgBool( doc, TidyXhtmlOut );
2052 Bool xmlDecl = cfgBool( doc, TidyXmlDecl );
2053 Bool tidyMark = cfgBool( doc, TidyMark );
2054 Bool tidyXmlTags = cfgBool( doc, TidyXmlTags );
2055 Bool wantNameAttr = cfgBool( doc, TidyAnchorAsName );
2056 Bool mergeEmphasis = cfgBool( doc, TidyMergeEmphasis );
2057 Node* node;
2058 TidyConfigChangeCallback callback = doc->pConfigChangeCallback;
2059 doc->pConfigChangeCallback = NULL;
2060
2061 #if defined(ENABLE_DEBUG_LOG)
2062 SPRTF("All nodes BEFORE clean and repair\n");
2063 dbg_show_all_nodes( doc, &doc->root, 0 );
2064 #endif
2065 if (tidyXmlTags)
2066 {
2067 doc->pConfigChangeCallback = callback;
2068 return tidyDocStatus( doc );
2069 }
2070
2071 /* Issue #567 - move style elements from body to head */
2072 TY_(CleanStyle)(doc, &doc->root);
2073
2074 /* simplifies <b><b> ... </b> ...</b> etc. */
2075 if ( mergeEmphasis )
2076 TY_(NestedEmphasis)( doc, &doc->root );
2077
2078 /* cleans up <dir>indented text</dir> etc. */
2079 TY_(List2BQ)( doc, &doc->root );
2080 TY_(BQ2Div)( doc, &doc->root );
2081
2082 /* replaces i by em and b by strong */
2083 if ( logical )
2084 TY_(EmFromI)( doc, &doc->root );
2085
2086 if ( word2K && TY_(IsWord2000)(doc) )
2087 {
2088 /* prune Word2000's <![if ...]> ... <![endif]> */
2089 TY_(DropSections)( doc, &doc->root );
2090
2091 /* drop style & class attributes and empty p, span elements */
2092 TY_(CleanWord2000)( doc, &doc->root );
2093 TY_(DropEmptyElements)(doc, &doc->root);
2094 }
2095
2096 /* replaces presentational markup by style rules */
2097 if ( clean )
2098 TY_(CleanDocument)( doc );
2099
2100 /* clean up html exported by Google Docs */
2101 if ( gdoc )
2102 TY_(CleanGoogleDocument)( doc );
2103
2104 /* Reconcile http-equiv meta element with output encoding */
2105 TY_(TidyMetaCharset)(doc);
2106
2107 if ( !TY_(CheckNodeIntegrity)( &doc->root ) )
2108 TidyPanic( doc->allocator, integrity );
2109
2110 /* remember given doctype for reporting */
2111 node = TY_(FindDocType)(doc);
2112
2113 if (node)
2114 {
2115 AttVal* fpi = TY_(GetAttrByName)(node, "PUBLIC");
2116 if (AttrHasValue(fpi))
2117 {
2118 if (doc->givenDoctype)
2119 TidyDocFree(doc, doc->givenDoctype);
2120 doc->givenDoctype = TY_(tmbstrdup)(doc->allocator,fpi->value);
2121 }
2122 }
2123
2124 if ( doc->root.content )
2125 {
2126 /* If we had XHTML input but want HTML output */
2127 if ( htmlOut && doc->lexer->isvoyager )
2128 {
2129 Node* node = TY_(FindDocType)(doc);
2130 /* Remove reference, but do not free */
2131 if (node)
2132 TY_(RemoveNode)(node);
2133 }
2134
2135 if (xhtmlOut && !htmlOut)
2136 {
2137 TY_(SetXHTMLDocType)(doc);
2138 TY_(FixAnchors)(doc, &doc->root, wantNameAttr, yes);
2139 TY_(FixXhtmlNamespace)(doc, yes);
2140 TY_(FixLanguageInformation)(doc, &doc->root, yes, yes);
2141 }
2142 else
2143 {
2144 TY_(FixDocType)(doc);
2145 TY_(FixAnchors)(doc, &doc->root, wantNameAttr, yes);
2146 TY_(FixXhtmlNamespace)(doc, no);
2147 TY_(FixLanguageInformation)(doc, &doc->root, no, yes);
2148 }
2149
2150 if (tidyMark )
2151 TY_(AddGenerator)(doc);
2152
2153 }
2154
2155 /* ensure presence of initial <?xml version="1.0"?> */
2156 if ( xmlOut && xmlDecl )
2157 TY_(FixXmlDecl)( doc );
2158
2159 /* At this point the apparent doctype is going to be as stable as
2160 it can ever be, so we can start detecting things that shouldn't
2161 be in this version of HTML
2162 */
2163 if (doc->lexer)
2164 {
2165 /*\
2166 * Issue #429 #426 - These services can only be used
2167 * when there is a document loaded, ie a lexer created.
2168 * But really should not be calling a Clean and Repair
2169 * service with no doc!
2170 \*/
2171 if (doc->lexer->versionEmitted & VERS_HTML5)
2172 TY_(CheckHTML5)( doc, &doc->root );
2173 TY_(CheckHTMLTagsAttribsVersions)( doc, &doc->root );
2174
2175 if ( !doc->lexer->isvoyager && doc->xmlDetected )
2176 {
2177 TY_(Report)(doc, NULL, TY_(FindXmlDecl)(doc), XML_DECLARATION_DETECTED );
2178
2179 }
2180 }
2181
2182 TY_(CleanHead)(doc); /* Is #692 - discard multiple <title> tags */
2183
2184 #if defined(ENABLE_DEBUG_LOG)
2185 SPRTF("All nodes AFTER clean and repair\n");
2186 dbg_show_all_nodes( doc, &doc->root, 0 );
2187 #endif
2188
2189 doc->pConfigChangeCallback = callback;
2190 return tidyDocStatus( doc );
2191 }
2192
2193 static
showBodyOnly(TidyDocImpl * doc,TidyTriState bodyOnly)2194 Bool showBodyOnly( TidyDocImpl* doc, TidyTriState bodyOnly )
2195 {
2196 Node* node;
2197
2198 switch( bodyOnly )
2199 {
2200 case TidyNoState:
2201 return no;
2202 case TidyYesState:
2203 return yes;
2204 default:
2205 node = TY_(FindBody)( doc );
2206 if (node && node->implicit )
2207 return yes;
2208 }
2209 return no;
2210 }
2211
2212
tidyDocSaveStream(TidyDocImpl * doc,StreamOut * out)2213 int tidyDocSaveStream( TidyDocImpl* doc, StreamOut* out )
2214 {
2215 Bool showMarkup = cfgBool( doc, TidyShowMarkup );
2216 Bool forceOutput = cfgBool( doc, TidyForceOutput );
2217 Bool outputBOM = ( cfgAutoBool(doc, TidyOutputBOM) == TidyYesState );
2218 Bool smartBOM = ( cfgAutoBool(doc, TidyOutputBOM) == TidyAutoState );
2219 Bool xmlOut = cfgBool( doc, TidyXmlOut );
2220 Bool xhtmlOut = cfgBool( doc, TidyXhtmlOut );
2221 TidyTriState bodyOnly = cfgAutoBool( doc, TidyBodyOnly );
2222
2223 Bool dropComments = cfgBool(doc, TidyHideComments);
2224 Bool makeClean = cfgBool(doc, TidyMakeClean);
2225 Bool asciiChars = cfgBool(doc, TidyAsciiChars);
2226 Bool makeBare = cfgBool(doc, TidyMakeBare);
2227 Bool escapeCDATA = cfgBool(doc, TidyEscapeCdata);
2228 Bool ppWithTabs = cfgBool(doc, TidyPPrintTabs);
2229 TidyAttrSortStrategy sortAttrStrat = cfg(doc, TidySortAttributes);
2230 TidyConfigChangeCallback callback = doc->pConfigChangeCallback;
2231 doc->pConfigChangeCallback = NULL;
2232
2233 if (ppWithTabs)
2234 TY_(PPrintTabs)();
2235 else
2236 TY_(PPrintSpaces)();
2237
2238 if (escapeCDATA)
2239 TY_(ConvertCDATANodes)(doc, &doc->root);
2240
2241 if (dropComments)
2242 TY_(DropComments)(doc, &doc->root);
2243
2244 if (makeClean)
2245 {
2246 /* noop */
2247 TY_(DropFontElements)(doc, &doc->root, NULL);
2248 }
2249
2250 if ((makeClean && asciiChars) || makeBare)
2251 TY_(DowngradeTypography)(doc, &doc->root);
2252
2253 if (makeBare)
2254 /* Note: no longer replaces in */
2255 /* attribute values / non-text tokens */
2256 TY_(NormalizeSpaces)(doc->lexer, &doc->root);
2257 else
2258 TY_(ReplacePreformattedSpaces)(doc, &doc->root);
2259
2260 TY_(SortAttributes)(doc, &doc->root, sortAttrStrat);
2261
2262 if ( showMarkup && (doc->errors == 0 || forceOutput) )
2263 {
2264 /* Output a Byte Order Mark if required */
2265 if ( outputBOM || (doc->inputHadBOM && smartBOM) )
2266 TY_(outBOM)( out );
2267
2268 /* No longer necessary. No DOCTYPE == HTML 3.2,
2269 ** which gives you only the basic character entities,
2270 ** which are safe in any browser.
2271 ** if ( !TY_(FindDocType)(doc) )
2272 ** TY_(SetOptionBool)( doc, TidyNumEntities, yes );
2273 */
2274
2275 doc->docOut = out;
2276 if ( xmlOut && !xhtmlOut )
2277 TY_(PPrintXMLTree)( doc, NORMAL, 0, &doc->root );
2278 else if ( showBodyOnly( doc, bodyOnly ) )
2279 TY_(PrintBody)( doc );
2280 else
2281 TY_(PPrintTree)( doc, NORMAL, 0, &doc->root );
2282
2283 TY_(PFlushLine)( doc, 0 );
2284 doc->docOut = NULL;
2285 }
2286
2287 /* @jsd: removing this should solve #673, and allow saving of the buffer multiple times. */
2288 // TY_(ResetConfigToSnapshot)( doc );
2289 doc->pConfigChangeCallback = callback;
2290
2291 return tidyDocStatus( doc );
2292 }
2293
2294 /* Tree traversal functions
2295 **
2296 ** The big issue here is the degree to which we should mimic
2297 ** a DOM and/or SAX nodes.
2298 **
2299 ** Is it 100% possible (and, if so, how difficult is it) to
2300 ** emit SAX events from this API? If SAX events are possible,
2301 ** is that 100% of data needed to build a DOM?
2302 */
2303
tidyGetRoot(TidyDoc tdoc)2304 TidyNode TIDY_CALL tidyGetRoot( TidyDoc tdoc )
2305 {
2306 TidyDocImpl* impl = tidyDocToImpl( tdoc );
2307 Node* node = NULL;
2308 if ( impl )
2309 node = &impl->root;
2310 return tidyImplToNode( node );
2311 }
2312
tidyGetHtml(TidyDoc tdoc)2313 TidyNode TIDY_CALL tidyGetHtml( TidyDoc tdoc )
2314 {
2315 TidyDocImpl* impl = tidyDocToImpl( tdoc );
2316 Node* node = NULL;
2317 if ( impl )
2318 node = TY_(FindHTML)( impl );
2319 return tidyImplToNode( node );
2320 }
2321
tidyGetHead(TidyDoc tdoc)2322 TidyNode TIDY_CALL tidyGetHead( TidyDoc tdoc )
2323 {
2324 TidyDocImpl* impl = tidyDocToImpl( tdoc );
2325 Node* node = NULL;
2326 if ( impl )
2327 node = TY_(FindHEAD)( impl );
2328 return tidyImplToNode( node );
2329 }
2330
tidyGetBody(TidyDoc tdoc)2331 TidyNode TIDY_CALL tidyGetBody( TidyDoc tdoc )
2332 {
2333 TidyDocImpl* impl = tidyDocToImpl( tdoc );
2334 Node* node = NULL;
2335 if ( impl )
2336 node = TY_(FindBody)( impl );
2337 return tidyImplToNode( node );
2338 }
2339
2340 /* parent / child */
tidyGetParent(TidyNode tnod)2341 TidyNode TIDY_CALL tidyGetParent( TidyNode tnod )
2342 {
2343 Node* nimp = tidyNodeToImpl( tnod );
2344 return tidyImplToNode( nimp->parent );
2345 }
tidyGetChild(TidyNode tnod)2346 TidyNode TIDY_CALL tidyGetChild( TidyNode tnod )
2347 {
2348 Node* nimp = tidyNodeToImpl( tnod );
2349 return tidyImplToNode( nimp->content );
2350 }
2351
2352 /* remove a node */
tidyDiscardElement(TidyDoc tdoc,TidyNode tnod)2353 TidyNode TIDY_CALL tidyDiscardElement( TidyDoc tdoc, TidyNode tnod )
2354 {
2355 TidyDocImpl* doc = tidyDocToImpl( tdoc );
2356 Node* nimp = tidyNodeToImpl( tnod );
2357 Node* next = TY_(DiscardElement)( doc, nimp );
2358 return tidyImplToNode( next );
2359 }
2360
2361 /* siblings */
tidyGetNext(TidyNode tnod)2362 TidyNode TIDY_CALL tidyGetNext( TidyNode tnod )
2363 {
2364 Node* nimp = tidyNodeToImpl( tnod );
2365 return tidyImplToNode( nimp->next );
2366 }
tidyGetPrev(TidyNode tnod)2367 TidyNode TIDY_CALL tidyGetPrev( TidyNode tnod )
2368 {
2369 Node* nimp = tidyNodeToImpl( tnod );
2370 return tidyImplToNode( nimp->prev );
2371 }
2372
2373 /* Node info */
tidyNodeGetType(TidyNode tnod)2374 TidyNodeType TIDY_CALL tidyNodeGetType( TidyNode tnod )
2375 {
2376 Node* nimp = tidyNodeToImpl( tnod );
2377 TidyNodeType ntyp = TidyNode_Root;
2378 if ( nimp )
2379 ntyp = (TidyNodeType) nimp->type;
2380 return ntyp;
2381 }
2382
tidyNodeLine(TidyNode tnod)2383 uint TIDY_CALL tidyNodeLine( TidyNode tnod )
2384 {
2385 Node* nimp = tidyNodeToImpl( tnod );
2386 uint line = 0;
2387 if ( nimp )
2388 line = nimp->line;
2389 return line;
2390 }
tidyNodeColumn(TidyNode tnod)2391 uint TIDY_CALL tidyNodeColumn( TidyNode tnod )
2392 {
2393 Node* nimp = tidyNodeToImpl( tnod );
2394 uint col = 0;
2395 if ( nimp )
2396 col = nimp->column;
2397 return col;
2398 }
2399
tidyNodeGetName(TidyNode tnod)2400 ctmbstr TIDY_CALL tidyNodeGetName( TidyNode tnod )
2401 {
2402 Node* nimp = tidyNodeToImpl( tnod );
2403 ctmbstr nnam = NULL;
2404 if ( nimp )
2405 nnam = nimp->element;
2406 return nnam;
2407 }
2408
2409
tidyNodeHasText(TidyDoc tdoc,TidyNode tnod)2410 Bool TIDY_CALL tidyNodeHasText( TidyDoc tdoc, TidyNode tnod )
2411 {
2412 TidyDocImpl* doc = tidyDocToImpl( tdoc );
2413 if ( doc )
2414 return TY_(nodeHasText)( doc, tidyNodeToImpl(tnod) );
2415 return no;
2416 }
2417
2418
tidyNodeGetText(TidyDoc tdoc,TidyNode tnod,TidyBuffer * outbuf)2419 Bool TIDY_CALL tidyNodeGetText( TidyDoc tdoc, TidyNode tnod, TidyBuffer* outbuf )
2420 {
2421 TidyDocImpl* doc = tidyDocToImpl( tdoc );
2422 Node* nimp = tidyNodeToImpl( tnod );
2423 if ( doc && nimp && outbuf )
2424 {
2425 uint outenc = cfg( doc, TidyOutCharEncoding );
2426 uint nl = cfg( doc, TidyNewline );
2427 StreamOut* out = TY_(BufferOutput)( doc, outbuf, outenc, nl );
2428 Bool xmlOut = cfgBool( doc, TidyXmlOut );
2429 Bool xhtmlOut = cfgBool( doc, TidyXhtmlOut );
2430
2431 doc->docOut = out;
2432 if ( xmlOut && !xhtmlOut )
2433 TY_(PPrintXMLTree)( doc, NORMAL, 0, nimp );
2434 else
2435 TY_(PPrintTree)( doc, NORMAL, 0, nimp );
2436
2437 TY_(PFlushLine)( doc, 0 );
2438 doc->docOut = NULL;
2439
2440 TidyDocFree( doc, out );
2441 return yes;
2442 }
2443 return no;
2444 }
2445
tidyNodeGetValue(TidyDoc tdoc,TidyNode tnod,TidyBuffer * buf)2446 Bool TIDY_CALL tidyNodeGetValue( TidyDoc tdoc, TidyNode tnod, TidyBuffer* buf )
2447 {
2448 TidyDocImpl *doc = tidyDocToImpl( tdoc );
2449 Node *node = tidyNodeToImpl( tnod );
2450 if ( doc == NULL || node == NULL || buf == NULL )
2451 return no;
2452
2453 switch( node->type ) {
2454 case TextNode:
2455 case CDATATag:
2456 case CommentTag:
2457 case ProcInsTag:
2458 case SectionTag:
2459 case AspTag:
2460 case JsteTag:
2461 case PhpTag:
2462 {
2463 tidyBufClear( buf );
2464 tidyBufAppend( buf, doc->lexer->lexbuf + node->start,
2465 node->end - node->start );
2466 break;
2467 }
2468 default:
2469 /* The node doesn't have a value */
2470 return no;
2471 }
2472
2473 return yes;
2474 }
2475
tidyNodeIsProp(TidyDoc ARG_UNUSED (tdoc),TidyNode tnod)2476 Bool TIDY_CALL tidyNodeIsProp( TidyDoc ARG_UNUSED(tdoc), TidyNode tnod )
2477 {
2478 Node* nimp = tidyNodeToImpl( tnod );
2479 Bool isProprietary = yes;
2480 if ( nimp )
2481 {
2482 switch ( nimp->type )
2483 {
2484 case RootNode:
2485 case DocTypeTag:
2486 case CommentTag:
2487 case XmlDecl:
2488 case ProcInsTag:
2489 case TextNode:
2490 case CDATATag:
2491 isProprietary = no;
2492 break;
2493
2494 case SectionTag:
2495 case AspTag:
2496 case JsteTag:
2497 case PhpTag:
2498 isProprietary = yes;
2499 break;
2500
2501 case StartTag:
2502 case EndTag:
2503 case StartEndTag:
2504 isProprietary = ( nimp->tag
2505 ? (nimp->tag->versions&VERS_PROPRIETARY)!=0
2506 : yes );
2507 break;
2508 }
2509 }
2510 return isProprietary;
2511 }
2512
tidyNodeGetId(TidyNode tnod)2513 TidyTagId TIDY_CALL tidyNodeGetId(TidyNode tnod)
2514 {
2515 Node* nimp = tidyNodeToImpl(tnod);
2516
2517 TidyTagId tagId = TidyTag_UNKNOWN;
2518 if (nimp && nimp->tag)
2519 tagId = nimp->tag->id;
2520
2521 return tagId;
2522 }
2523
2524
2525 /* Iterate over attribute values */
tidyAttrFirst(TidyNode tnod)2526 TidyAttr TIDY_CALL tidyAttrFirst( TidyNode tnod )
2527 {
2528 Node* nimp = tidyNodeToImpl( tnod );
2529 AttVal* attval = NULL;
2530 if ( nimp )
2531 attval = nimp->attributes;
2532 return tidyImplToAttr( attval );
2533 }
tidyAttrNext(TidyAttr tattr)2534 TidyAttr TIDY_CALL tidyAttrNext( TidyAttr tattr )
2535 {
2536 AttVal* attval = tidyAttrToImpl( tattr );
2537 AttVal* nxtval = NULL;
2538 if ( attval )
2539 nxtval = attval->next;
2540 return tidyImplToAttr( nxtval );
2541 }
2542
tidyAttrName(TidyAttr tattr)2543 ctmbstr TIDY_CALL tidyAttrName( TidyAttr tattr )
2544 {
2545 AttVal* attval = tidyAttrToImpl( tattr );
2546 ctmbstr anam = NULL;
2547 if ( attval )
2548 anam = attval->attribute;
2549 return anam;
2550 }
tidyAttrValue(TidyAttr tattr)2551 ctmbstr TIDY_CALL tidyAttrValue( TidyAttr tattr )
2552 {
2553 AttVal* attval = tidyAttrToImpl( tattr );
2554 ctmbstr aval = NULL;
2555 if ( attval )
2556 aval = attval->value;
2557 return aval;
2558 }
2559
tidyAttrDiscard(TidyDoc tdoc,TidyNode tnod,TidyAttr tattr)2560 void TIDY_CALL tidyAttrDiscard( TidyDoc tdoc, TidyNode tnod, TidyAttr tattr )
2561 {
2562 TidyDocImpl* impl = tidyDocToImpl( tdoc );
2563 Node* nimp = tidyNodeToImpl( tnod );
2564 AttVal* attval = tidyAttrToImpl( tattr );
2565 TY_(RemoveAttribute)( impl, nimp, attval );
2566 }
2567
tidyAttrGetId(TidyAttr tattr)2568 TidyAttrId TIDY_CALL tidyAttrGetId( TidyAttr tattr )
2569 {
2570 AttVal* attval = tidyAttrToImpl( tattr );
2571 TidyAttrId attrId = TidyAttr_UNKNOWN;
2572 if ( attval && attval->dict )
2573 attrId = attval->dict->id;
2574 return attrId;
2575 }
2576
tidyAttrGetById(TidyNode tnod,TidyAttrId attId)2577 TidyAttr TIDY_CALL tidyAttrGetById( TidyNode tnod, TidyAttrId attId )
2578 {
2579 Node* nimp = tidyNodeToImpl(tnod);
2580 return tidyImplToAttr( TY_(AttrGetById)( nimp, attId ) );
2581 }
2582
2583
tidyAttrIsEvent(TidyAttr tattr)2584 Bool TIDY_CALL tidyAttrIsEvent( TidyAttr tattr )
2585 {
2586 return TY_(attrIsEvent)( tidyAttrToImpl(tattr) );
2587 }
2588
2589
2590 /*******************************************************************
2591 ** Message Key Management
2592 *******************************************************************/
tidyErrorCodeAsKey(uint code)2593 ctmbstr TIDY_CALL tidyErrorCodeAsKey(uint code)
2594 {
2595 return TY_(tidyErrorCodeAsKey)( code );
2596 }
2597
tidyErrorCodeFromKey(ctmbstr code)2598 uint TIDY_CALL tidyErrorCodeFromKey(ctmbstr code)
2599 {
2600 return TY_(tidyErrorCodeFromKey)( code );
2601 }
2602
getErrorCodeList()2603 TidyIterator TIDY_CALL getErrorCodeList()
2604 {
2605 return TY_(getErrorCodeList)();
2606 }
2607
getNextErrorCode(TidyIterator * iter)2608 uint TIDY_CALL getNextErrorCode( TidyIterator* iter )
2609 {
2610 return TY_(getNextErrorCode)(iter);
2611 }
2612
2613
2614 /*******************************************************************
2615 ** Localization Support
2616 *******************************************************************/
2617
2618
tidySetLanguage(ctmbstr languageCode)2619 Bool TIDY_CALL tidySetLanguage( ctmbstr languageCode )
2620 {
2621 Bool result = TY_(tidySetLanguage)( languageCode );
2622
2623 if ( result )
2624 TY_(tidySetLanguageSetByUser)();
2625
2626 return result;
2627 }
2628
tidyGetLanguage()2629 ctmbstr TIDY_CALL tidyGetLanguage()
2630 {
2631 return TY_(tidyGetLanguage)();
2632 }
2633
tidyLocalizedStringN(uint messageType,uint quantity)2634 ctmbstr TIDY_CALL tidyLocalizedStringN( uint messageType, uint quantity )
2635 {
2636 return TY_(tidyLocalizedStringN)( messageType, quantity);
2637 }
2638
tidyLocalizedString(uint messageType)2639 ctmbstr TIDY_CALL tidyLocalizedString( uint messageType )
2640 {
2641 return TY_(tidyLocalizedString)( messageType );
2642 }
2643
tidyDefaultString(uint messageType)2644 ctmbstr TIDY_CALL tidyDefaultString( uint messageType )
2645 {
2646 return TY_(tidyDefaultString)( messageType );
2647 }
2648
getStringKeyList()2649 TidyIterator TIDY_CALL getStringKeyList()
2650 {
2651 return TY_(getStringKeyList)();
2652 }
2653
getNextStringKey(TidyIterator * iter)2654 uint TIDY_CALL getNextStringKey( TidyIterator* iter )
2655 {
2656 return TY_(getNextStringKey)( iter );
2657 }
2658
getWindowsLanguageList()2659 TidyIterator TIDY_CALL getWindowsLanguageList()
2660 {
2661 return TY_(getWindowsLanguageList)();
2662 }
2663
2664 //#define tidyOptionToImpl( topt ) ((const TidyOptionImpl*)(topt))
2665 //#define tidyImplToOption( option ) ((TidyOption)(option))
2666
getNextWindowsLanguage(TidyIterator * iter)2667 const tidyLocaleMapItem* TIDY_CALL getNextWindowsLanguage( TidyIterator* iter )
2668 {
2669 /* Get a real structure */
2670 const tidyLocaleMapItemImpl *item = TY_(getNextWindowsLanguage)( iter );
2671
2672 /* Return it as the opaque version */
2673 return ((tidyLocaleMapItem*)(item));
2674 }
2675
2676
TidyLangWindowsName(const tidyLocaleMapItem * item)2677 ctmbstr TIDY_CALL TidyLangWindowsName( const tidyLocaleMapItem *item )
2678 {
2679 return TY_(TidyLangWindowsName)( (tidyLocaleMapItemImpl*)(item) );
2680 }
2681
2682
TidyLangPosixName(const tidyLocaleMapItem * item)2683 ctmbstr TIDY_CALL TidyLangPosixName( const tidyLocaleMapItem *item )
2684 {
2685 return TY_(TidyLangPosixName)( (tidyLocaleMapItemImpl*)(item) );
2686 }
2687
2688
getInstalledLanguageList()2689 TidyIterator TIDY_CALL getInstalledLanguageList()
2690 {
2691 return TY_(getInstalledLanguageList)();
2692 }
2693
2694
getNextInstalledLanguage(TidyIterator * iter)2695 ctmbstr TIDY_CALL getNextInstalledLanguage( TidyIterator* iter )
2696 {
2697 return TY_(getNextInstalledLanguage)( iter );
2698 }
2699
2700
2701
2702
2703 /*
2704 * local variables:
2705 * mode: c
2706 * indent-tabs-mode: nil
2707 * c-basic-offset: 4
2708 * eval: (c-set-offset 'substatement-open 0)
2709 * end:
2710 */
2711