1 /* tidylib.c -- internal library definitions
2
3 (c) 1998-2008 (W3C) MIT, ERCIM, Keio University
4 See tidyp.h for the copyright notice.
5
6 Defines HTML Tidy API implemented by tidy library.
7
8 Very rough initial cut for discussion purposes.
9
10 Public interface is const-correct and doesn't explicitly depend
11 on any globals. Thus, thread-safety may be introduced w/out
12 changing the interface.
13
14 Looking ahead to a C++ wrapper, C functions always pass
15 this-equivalent as 1st arg.
16
17 Created 2001-05-20 by Charles Reitzel
18
19 */
20
21 #include <errno.h>
22
23 #include "tidy-int.h"
24 #include "parser.h"
25 #include "clean.h"
26 #include "config.h"
27 #include "message.h"
28 #include "pprint.h"
29 #include "entities.h"
30 #include "tmbstr.h"
31 #include "utf8.h"
32 #include "mappedio.h"
33
34 #ifdef TIDY_WIN32_MLANG_SUPPORT
35 #include "win32tc.h"
36 #endif
37
38 /* Create/Destroy a Tidy "document" object */
39 static TidyDocImpl* tidyDocCreate( TidyAllocator *allocator );
40 static void tidyDocRelease( TidyDocImpl* impl );
41
42 static int tidyDocStatus( TidyDocImpl* impl );
43
44 /* Parse Markup */
45 static int tidyDocParseFile( TidyDocImpl* impl, ctmbstr htmlfil );
46 static int tidyDocParseStdin( TidyDocImpl* impl );
47 static int tidyDocParseString( TidyDocImpl* impl, ctmbstr content );
48 static int tidyDocParseBuffer( TidyDocImpl* impl, TidyBuffer* inbuf );
49 static int tidyDocParseSource( TidyDocImpl* impl, TidyInputSource* docIn );
50
51
52 /* Execute post-parse diagnostics and cleanup.
53 ** Note, the order is important. You will get different
54 ** results from the diagnostics depending on if they are run
55 ** pre-or-post repair.
56 */
57 static int tidyDocRunDiagnostics( TidyDocImpl* doc );
58 static int tidyDocCleanAndRepair( TidyDocImpl* doc );
59
60
61 /* Save cleaned up file to file/buffer/sink */
62 static int tidyDocSaveFile( TidyDocImpl* impl, ctmbstr htmlfil );
63 static int tidyDocSaveStdout( TidyDocImpl* impl );
64 static int tidyDocSaveString( TidyDocImpl* impl, tmbstr buffer, uint* buflen );
65 static int tidyDocSaveBuffer( TidyDocImpl* impl, TidyBuffer* outbuf );
66 static int tidyDocSaveSink( TidyDocImpl* impl, TidyOutputSink* docOut );
67 static int tidyDocSaveStream( TidyDocImpl* impl, StreamOut* out );
68
69 #ifdef NEVER
tidyDocToImpl(TidyDoc tdoc)70 TidyDocImpl* tidyDocToImpl( TidyDoc tdoc )
71 {
72 return (TidyDocImpl*) tdoc;
73 }
tidyImplToDoc(TidyDocImpl * impl)74 TidyDoc tidyImplToDoc( TidyDocImpl* impl )
75 {
76 return (TidyDoc) impl;
77 }
78
tidyNodeToImpl(TidyNode tnod)79 Node* tidyNodeToImpl( TidyNode tnod )
80 {
81 return (Node*) tnod;
82 }
tidyImplToNode(Node * node)83 TidyNode tidyImplToNode( Node* node )
84 {
85 return (TidyNode) node;
86 }
87
tidyAttrToImpl(TidyAttr tattr)88 AttVal* tidyAttrToImpl( TidyAttr tattr )
89 {
90 return (AttVal*) tattr;
91 }
tidyImplToAttr(AttVal * attval)92 TidyAttr tidyImplToAttr( AttVal* attval )
93 {
94 return (TidyAttr) attval;
95 }
96
tidyOptionToImpl(TidyOption topt)97 const TidyOptionImpl* tidyOptionToImpl( TidyOption topt )
98 {
99 return (const TidyOptionImpl*) topt;
100 }
tidyImplToOption(const TidyOptionImpl * option)101 TidyOption tidyImplToOption( const TidyOptionImpl* option )
102 {
103 return (TidyOption) option;
104 }
105 #endif
106
107 /* Tidy public interface
108 **
109 ** Most functions return an integer:
110 **
111 ** 0 -> SUCCESS
112 ** >0 -> WARNING
113 ** <0 -> ERROR
114 **
115 */
116
tidyCreate(void)117 TidyDoc TIDY_CALL tidyCreate(void)
118 {
119 TidyDocImpl* impl = tidyDocCreate( &TY_(g_default_allocator) );
120 return tidyImplToDoc( impl );
121 }
122
tidyCreateWithAllocator(TidyAllocator * allocator)123 TidyDoc TIDY_CALL tidyCreateWithAllocator( TidyAllocator *allocator )
124 {
125 TidyDocImpl* impl = tidyDocCreate( allocator );
126 return tidyImplToDoc( impl );
127 }
128
tidyRelease(TidyDoc tdoc)129 void TIDY_CALL tidyRelease( TidyDoc tdoc )
130 {
131 TidyDocImpl* impl = tidyDocToImpl( tdoc );
132 tidyDocRelease( impl );
133 }
134
tidyDocCreate(TidyAllocator * allocator)135 TidyDocImpl* tidyDocCreate( TidyAllocator *allocator )
136 {
137 TidyDocImpl* doc = (TidyDocImpl*)TidyAlloc( allocator, sizeof(TidyDocImpl) );
138 TidyClearMemory( doc, sizeof(*doc) );
139 doc->allocator = allocator;
140
141 TY_(InitMap)();
142 TY_(InitTags)( doc );
143 TY_(InitAttrs)( doc );
144 TY_(InitConfig)( doc );
145 TY_(InitPrintBuf)( doc );
146
147 /* By default, wire tidy messages to standard error.
148 ** Document input will be set by parsing routines.
149 ** Document output will be set by pretty print routines.
150 ** Config input will be set by config parsing routines.
151 ** But we need to start off with a way to report errors.
152 */
153 doc->errout = TY_(StdErrOutput)();
154 return doc;
155 }
156
tidyDocRelease(TidyDocImpl * doc)157 void tidyDocRelease( TidyDocImpl* doc )
158 {
159 /* doc in/out opened and closed by parse/print routines */
160 if ( doc )
161 {
162 assert( doc->docIn == NULL );
163 assert( doc->docOut == NULL );
164
165 TY_(ReleaseStreamOut)( doc, doc->errout );
166 doc->errout = NULL;
167
168 TY_(FreePrintBuf)( doc );
169 TY_(FreeLexer)( doc );
170 TY_(FreeNode)(doc, &doc->root);
171 TidyClearMemory(&doc->root, sizeof(Node));
172
173 if (doc->givenDoctype)
174 TidyDocFree(doc, doc->givenDoctype);
175
176 TY_(FreeConfig)( doc );
177 TY_(FreeAttrTable)( doc );
178 TY_(FreeTags)( doc );
179 TidyDocFree( doc, doc );
180 }
181 }
182
183 /* Let application store a chunk of data w/ each Tidy tdocance.
184 ** Useful for callbacks.
185 */
tidySetAppData(TidyDoc tdoc,void * appData)186 void TIDY_CALL tidySetAppData( TidyDoc tdoc, void* appData )
187 {
188 TidyDocImpl* impl = tidyDocToImpl( tdoc );
189 if ( impl )
190 impl->appData = appData;
191 }
tidyGetAppData(TidyDoc tdoc)192 void* TIDY_CALL tidyGetAppData( TidyDoc tdoc )
193 {
194 TidyDocImpl* impl = tidyDocToImpl( tdoc );
195 if ( impl )
196 return impl->appData;
197 return NULL;
198 }
199
tidyVersion(void)200 ctmbstr TIDY_CALL tidyVersion(void)
201 {
202 return TY_(Version)();
203 }
204
205
206 /* Get/set configuration options
207 */
tidySetOptionCallback(TidyDoc tdoc,TidyOptCallback pOptCallback)208 Bool TIDY_CALL tidySetOptionCallback( TidyDoc tdoc, TidyOptCallback pOptCallback )
209 {
210 TidyDocImpl* impl = tidyDocToImpl( tdoc );
211 if ( impl )
212 {
213 impl->pOptCallback = pOptCallback;
214 return yes;
215 }
216 return no;
217 }
218
219
tidyLoadConfig(TidyDoc tdoc,ctmbstr cfgfil)220 int TIDY_CALL tidyLoadConfig( TidyDoc tdoc, ctmbstr cfgfil )
221 {
222 TidyDocImpl* impl = tidyDocToImpl( tdoc );
223 if ( impl )
224 return TY_(ParseConfigFile)( impl, cfgfil );
225 return -EINVAL;
226 }
227
tidyLoadConfigEnc(TidyDoc tdoc,ctmbstr cfgfil,ctmbstr charenc)228 int TIDY_CALL tidyLoadConfigEnc( TidyDoc tdoc, ctmbstr cfgfil, ctmbstr charenc )
229 {
230 TidyDocImpl* impl = tidyDocToImpl( tdoc );
231 if ( impl )
232 return TY_(ParseConfigFileEnc)( impl, cfgfil, charenc );
233 return -EINVAL;
234 }
235
tidySetCharEncoding(TidyDoc tdoc,ctmbstr encnam)236 int TIDY_CALL tidySetCharEncoding( TidyDoc tdoc, ctmbstr encnam )
237 {
238 TidyDocImpl* impl = tidyDocToImpl( tdoc );
239 if ( impl )
240 {
241 int enc = TY_(CharEncodingId)( impl, encnam );
242 if ( enc >= 0 && TY_(AdjustCharEncoding)(impl, enc) )
243 return 0;
244
245 TY_(ReportBadArgument)( impl, "char-encoding" );
246 }
247 return -EINVAL;
248 }
249
tidySetInCharEncoding(TidyDoc tdoc,ctmbstr encnam)250 int TIDY_CALL tidySetInCharEncoding( TidyDoc tdoc, ctmbstr encnam )
251 {
252 TidyDocImpl* impl = tidyDocToImpl( tdoc );
253 if ( impl )
254 {
255 int enc = TY_(CharEncodingId)( impl, encnam );
256 if ( enc >= 0 && TY_(SetOptionInt)( impl, TidyInCharEncoding, enc ) )
257 return 0;
258
259 TY_(ReportBadArgument)( impl, "in-char-encoding" );
260 }
261 return -EINVAL;
262 }
263
tidySetOutCharEncoding(TidyDoc tdoc,ctmbstr encnam)264 int TIDY_CALL tidySetOutCharEncoding( TidyDoc tdoc, ctmbstr encnam )
265 {
266 TidyDocImpl* impl = tidyDocToImpl( tdoc );
267 if ( impl )
268 {
269 int enc = TY_(CharEncodingId)( impl, encnam );
270 if ( enc >= 0 && TY_(SetOptionInt)( impl, TidyOutCharEncoding, enc ) )
271 return 0;
272
273 TY_(ReportBadArgument)( impl, "out-char-encoding" );
274 }
275 return -EINVAL;
276 }
277
tidyOptGetIdForName(ctmbstr optnam)278 TidyOptionId TIDY_CALL tidyOptGetIdForName( ctmbstr optnam )
279 {
280 const TidyOptionImpl* option = TY_(lookupOption)( optnam );
281 if ( option )
282 return option->id;
283 return N_TIDY_OPTIONS; /* Error */
284 }
285
tidyGetOptionList(TidyDoc tdoc)286 TidyIterator TIDY_CALL tidyGetOptionList( TidyDoc tdoc )
287 {
288 TidyDocImpl* impl = tidyDocToImpl( tdoc );
289 if ( impl )
290 return TY_(getOptionList)( impl );
291 return (TidyIterator) -1;
292 }
293
tidyGetNextOption(TidyDoc tdoc,TidyIterator * pos)294 TidyOption TIDY_CALL tidyGetNextOption( TidyDoc tdoc, TidyIterator* pos )
295 {
296 TidyDocImpl* impl = tidyDocToImpl( tdoc );
297 const TidyOptionImpl* option = NULL;
298 if ( impl )
299 option = TY_(getNextOption)( impl, pos );
300 else if ( pos )
301 *pos = 0;
302 return tidyImplToOption( option );
303 }
304
305
tidyGetOption(TidyDoc ARG_UNUSED (tdoc),TidyOptionId optId)306 TidyOption TIDY_CALL tidyGetOption( TidyDoc ARG_UNUSED(tdoc), TidyOptionId optId )
307 {
308 const TidyOptionImpl* option = TY_(getOption)( optId );
309 return tidyImplToOption( option );
310 }
tidyGetOptionByName(TidyDoc ARG_UNUSED (doc),ctmbstr optnam)311 TidyOption TIDY_CALL tidyGetOptionByName( TidyDoc ARG_UNUSED(doc), ctmbstr optnam )
312 {
313 const TidyOptionImpl* option = TY_(lookupOption)( optnam );
314 return tidyImplToOption( option );
315 }
316
tidyOptGetId(TidyOption topt)317 TidyOptionId TIDY_CALL tidyOptGetId( TidyOption topt )
318 {
319 const TidyOptionImpl* option = tidyOptionToImpl( topt );
320 if ( option )
321 return option->id;
322 return N_TIDY_OPTIONS;
323 }
tidyOptGetName(TidyOption topt)324 ctmbstr TIDY_CALL tidyOptGetName( TidyOption topt )
325 {
326 const TidyOptionImpl* option = tidyOptionToImpl( topt );
327 if ( option )
328 return option->name;
329 return NULL;
330 }
tidyOptGetType(TidyOption topt)331 TidyOptionType TIDY_CALL tidyOptGetType( TidyOption topt )
332 {
333 const TidyOptionImpl* option = tidyOptionToImpl( topt );
334 if ( option )
335 return option->type;
336 return (TidyOptionType) -1;
337 }
tidyOptGetCategory(TidyOption topt)338 TidyConfigCategory TIDY_CALL tidyOptGetCategory( TidyOption topt )
339 {
340 const TidyOptionImpl* option = tidyOptionToImpl( topt );
341 if ( option )
342 return option->category;
343 return (TidyConfigCategory) -1;
344 }
tidyOptGetDefault(TidyOption topt)345 ctmbstr TIDY_CALL tidyOptGetDefault( TidyOption topt )
346 {
347 const TidyOptionImpl* option = tidyOptionToImpl( topt );
348 if ( option && option->type == TidyString )
349 return (ctmbstr) option->dflt;
350 return NULL;
351 }
tidyOptGetDefaultInt(TidyOption topt)352 ulong TIDY_CALL tidyOptGetDefaultInt( TidyOption topt )
353 {
354 const TidyOptionImpl* option = tidyOptionToImpl( topt );
355 if ( option && option->type != TidyString )
356 return option->dflt;
357 return ~0U;
358 }
tidyOptGetDefaultBool(TidyOption topt)359 Bool TIDY_CALL tidyOptGetDefaultBool( TidyOption topt )
360 {
361 const TidyOptionImpl* option = tidyOptionToImpl( topt );
362 if ( option && option->type != TidyString )
363 return ( option->dflt ? yes : no );
364 return no;
365 }
tidyOptIsReadOnly(TidyOption topt)366 Bool TIDY_CALL tidyOptIsReadOnly( TidyOption topt )
367 {
368 const TidyOptionImpl* option = tidyOptionToImpl( topt );
369 if ( option )
370 return ( option->parser == NULL );
371 return yes;
372 }
373
374
tidyOptGetPickList(TidyOption topt)375 TidyIterator TIDY_CALL tidyOptGetPickList( TidyOption topt )
376 {
377 const TidyOptionImpl* option = tidyOptionToImpl( topt );
378 if ( option )
379 return TY_(getOptionPickList)( option );
380 return (TidyIterator) -1;
381 }
tidyOptGetNextPick(TidyOption topt,TidyIterator * pos)382 ctmbstr TIDY_CALL tidyOptGetNextPick( TidyOption topt, TidyIterator* pos )
383 {
384 const TidyOptionImpl* option = tidyOptionToImpl( topt );
385 if ( option )
386 return TY_(getNextOptionPick)( option, pos );
387 return NULL;
388 }
389
390
tidyOptGetValue(TidyDoc tdoc,TidyOptionId optId)391 ctmbstr TIDY_CALL tidyOptGetValue( TidyDoc tdoc, TidyOptionId optId )
392 {
393 TidyDocImpl* impl = tidyDocToImpl( tdoc );
394 ctmbstr optval = NULL;
395 if ( impl )
396 optval = cfgStr( impl, optId );
397 return optval;
398 }
tidyOptSetValue(TidyDoc tdoc,TidyOptionId optId,ctmbstr val)399 Bool TIDY_CALL tidyOptSetValue( TidyDoc tdoc, TidyOptionId optId, ctmbstr val )
400 {
401 TidyDocImpl* impl = tidyDocToImpl( tdoc );
402 if ( impl )
403 return TY_(ParseConfigValue)( impl, optId, val );
404 return no;
405 }
tidyOptParseValue(TidyDoc tdoc,ctmbstr optnam,ctmbstr val)406 Bool TIDY_CALL tidyOptParseValue( TidyDoc tdoc, ctmbstr optnam, ctmbstr val )
407 {
408 TidyDocImpl* impl = tidyDocToImpl( tdoc );
409 if ( impl )
410 return TY_(ParseConfigOption)( impl, optnam, val );
411 return no;
412 }
413
tidyOptGetInt(TidyDoc tdoc,TidyOptionId optId)414 ulong TIDY_CALL tidyOptGetInt( TidyDoc tdoc, TidyOptionId optId )
415 {
416 TidyDocImpl* impl = tidyDocToImpl( tdoc );
417 ulong opti = 0;
418 if ( impl )
419 opti = cfg( impl, optId );
420 return opti;
421 }
422
tidyOptSetInt(TidyDoc tdoc,TidyOptionId optId,ulong val)423 Bool TIDY_CALL tidyOptSetInt( TidyDoc tdoc, TidyOptionId optId, ulong val )
424 {
425 TidyDocImpl* impl = tidyDocToImpl( tdoc );
426 if ( impl )
427 return TY_(SetOptionInt)( impl, optId, val );
428 return no;
429 }
430
tidyOptGetBool(TidyDoc tdoc,TidyOptionId optId)431 Bool TIDY_CALL tidyOptGetBool( TidyDoc tdoc, TidyOptionId optId )
432 {
433 TidyDocImpl* impl = tidyDocToImpl( tdoc );
434 Bool optb = no;
435 if ( impl )
436 {
437 const TidyOptionImpl* option = TY_(getOption)( optId );
438 if ( option )
439 {
440 optb = cfgBool( impl, optId );
441 }
442 }
443 return optb;
444 }
445
tidyOptSetBool(TidyDoc tdoc,TidyOptionId optId,Bool val)446 Bool TIDY_CALL tidyOptSetBool( TidyDoc tdoc, TidyOptionId optId, Bool val )
447 {
448 TidyDocImpl* impl = tidyDocToImpl( tdoc );
449 if ( impl )
450 return TY_(SetOptionBool)( impl, optId, val );
451 return no;
452 }
453
tidyOptGetEncName(TidyDoc tdoc,TidyOptionId optId)454 ctmbstr TIDY_CALL tidyOptGetEncName( TidyDoc tdoc, TidyOptionId optId )
455 {
456 uint enc = tidyOptGetInt( tdoc, optId );
457 return TY_(CharEncodingOptName)( enc );
458 }
459
tidyOptGetCurrPick(TidyDoc tdoc,TidyOptionId optId)460 ctmbstr TIDY_CALL tidyOptGetCurrPick( TidyDoc tdoc, TidyOptionId optId )
461 {
462 const TidyOptionImpl* option = TY_(getOption)( optId );
463 if ( option && option->pickList )
464 {
465 uint ix, pick = tidyOptGetInt( tdoc, optId );
466 const ctmbstr* pL = option->pickList;
467 for ( ix=0; *pL && ix < pick; ++ix )
468 ++pL;
469 if ( *pL )
470 return *pL;
471 }
472 return NULL;
473 }
474
475
tidyOptGetDeclTagList(TidyDoc tdoc)476 TidyIterator TIDY_CALL tidyOptGetDeclTagList( TidyDoc tdoc )
477 {
478 TidyDocImpl* impl = tidyDocToImpl( tdoc );
479 TidyIterator declIter = 0;
480 if ( impl )
481 declIter = TY_(GetDeclaredTagList)( impl );
482 return declIter;
483 }
484
tidyOptGetNextDeclTag(TidyDoc tdoc,TidyOptionId optId,TidyIterator * iter)485 ctmbstr TIDY_CALL tidyOptGetNextDeclTag( TidyDoc tdoc, TidyOptionId optId,
486 TidyIterator* iter )
487 {
488 TidyDocImpl* impl = tidyDocToImpl( tdoc );
489 ctmbstr tagnam = NULL;
490 if ( impl )
491 {
492 UserTagType tagtyp = tagtype_null;
493 if ( optId == TidyInlineTags )
494 tagtyp = tagtype_inline;
495 else if ( optId == TidyBlockTags )
496 tagtyp = tagtype_block;
497 else if ( optId == TidyEmptyTags )
498 tagtyp = tagtype_empty;
499 else if ( optId == TidyPreTags )
500 tagtyp = tagtype_pre;
501 if ( tagtyp != tagtype_null )
502 tagnam = TY_(GetNextDeclaredTag)( impl, tagtyp, iter );
503 }
504 return tagnam;
505 }
506
tidyOptGetDoc(TidyDoc ARG_UNUSED (tdoc),TidyOption opt)507 ctmbstr TIDY_CALL tidyOptGetDoc( TidyDoc ARG_UNUSED(tdoc), TidyOption opt )
508 {
509 const TidyOptionId optId = tidyOptGetId( opt );
510 const TidyOptionDoc* docDesc = TY_(OptGetDocDesc)( optId );
511 return docDesc ? docDesc->doc : NULL;
512 }
513
tidyOptGetDocLinksList(TidyDoc ARG_UNUSED (tdoc),TidyOption opt)514 TidyIterator TIDY_CALL tidyOptGetDocLinksList( TidyDoc ARG_UNUSED(tdoc), TidyOption opt )
515 {
516 const TidyOptionId optId = tidyOptGetId( opt );
517 const TidyOptionDoc* docDesc = TY_(OptGetDocDesc)( optId );
518 if (docDesc && docDesc->links)
519 return (TidyIterator)docDesc->links;
520 return (TidyIterator)NULL;
521 }
522
tidyOptGetNextDocLinks(TidyDoc tdoc,TidyIterator * pos)523 TidyOption TIDY_CALL tidyOptGetNextDocLinks( TidyDoc tdoc, TidyIterator* pos )
524 {
525 const TidyOptionId* curr = (const TidyOptionId *)*pos;
526 TidyOption opt;
527
528 if (*curr == TidyUnknownOption)
529 {
530 *pos = (TidyIterator)NULL;
531 return (TidyOption)0;
532 }
533 opt = tidyGetOption(tdoc, *curr);
534 curr++;
535 *pos = (*curr == TidyUnknownOption ) ?
536 (TidyIterator)NULL:(TidyIterator)curr;
537 return opt;
538 }
539
tidyOptSaveFile(TidyDoc tdoc,ctmbstr cfgfil)540 int TIDY_CALL tidyOptSaveFile( TidyDoc tdoc, ctmbstr cfgfil )
541 {
542 TidyDocImpl* impl = tidyDocToImpl( tdoc );
543 if ( impl )
544 return TY_(SaveConfigFile)( impl, cfgfil );
545 return -EINVAL;
546 }
547
tidyOptSaveSink(TidyDoc tdoc,TidyOutputSink * sink)548 int TIDY_CALL tidyOptSaveSink( TidyDoc tdoc, TidyOutputSink* sink )
549 {
550 TidyDocImpl* impl = tidyDocToImpl( tdoc );
551 if ( impl )
552 return TY_(SaveConfigSink)( impl, sink );
553 return -EINVAL;
554 }
555
tidyOptSnapshot(TidyDoc tdoc)556 Bool TIDY_CALL tidyOptSnapshot( TidyDoc tdoc )
557 {
558 TidyDocImpl* impl = tidyDocToImpl( tdoc );
559 if ( impl )
560 {
561 TY_(TakeConfigSnapshot)( impl );
562 return yes;
563 }
564 return no;
565 }
tidyOptResetToSnapshot(TidyDoc tdoc)566 Bool TIDY_CALL tidyOptResetToSnapshot( TidyDoc tdoc )
567 {
568 TidyDocImpl* impl = tidyDocToImpl( tdoc );
569 if ( impl )
570 {
571 TY_(ResetConfigToSnapshot)( impl );
572 return yes;
573 }
574 return no;
575 }
tidyOptResetAllToDefault(TidyDoc tdoc)576 Bool TIDY_CALL tidyOptResetAllToDefault( TidyDoc tdoc )
577 {
578 TidyDocImpl* impl = tidyDocToImpl( tdoc );
579 if ( impl )
580 {
581 TY_(ResetConfigToDefault)( impl );
582 return yes;
583 }
584 return no;
585 }
586
tidyOptResetToDefault(TidyDoc tdoc,TidyOptionId optId)587 Bool TIDY_CALL tidyOptResetToDefault( TidyDoc tdoc, TidyOptionId optId )
588 {
589 TidyDocImpl* impl = tidyDocToImpl( tdoc );
590 if ( impl )
591 return TY_(ResetOptionToDefault)( impl, optId );
592 return no;
593 }
594
tidyOptDiffThanDefault(TidyDoc tdoc)595 Bool TIDY_CALL tidyOptDiffThanDefault( TidyDoc tdoc )
596 {
597 TidyDocImpl* impl = tidyDocToImpl( tdoc );
598 if ( impl )
599 return TY_(ConfigDiffThanDefault)( impl );
600 return no;
601 }
tidyOptDiffThanSnapshot(TidyDoc tdoc)602 Bool TIDY_CALL tidyOptDiffThanSnapshot( TidyDoc tdoc )
603 {
604 TidyDocImpl* impl = tidyDocToImpl( tdoc );
605 if ( impl )
606 return TY_(ConfigDiffThanSnapshot)( impl );
607 return no;
608 }
609
tidyOptCopyConfig(TidyDoc to,TidyDoc from)610 Bool TIDY_CALL tidyOptCopyConfig( TidyDoc to, TidyDoc from )
611 {
612 TidyDocImpl* docTo = tidyDocToImpl( to );
613 TidyDocImpl* docFrom = tidyDocToImpl( from );
614 if ( docTo && docFrom )
615 {
616 TY_(CopyConfig)( docTo, docFrom );
617 return yes;
618 }
619 return no;
620 }
621
622
623 /* I/O and Message handling interface
624 **
625 ** By default, Tidy will define, create and use
626 ** tdocances of input and output handlers for
627 ** standard C buffered I/O (i.e. FILE* stdin,
628 ** FILE* stdout and FILE* stderr for content
629 ** input, content output and diagnostic output,
630 ** respectively. A FILE* cfgFile input handler
631 ** will be used for config files. Command line
632 ** options will just be set directly.
633 */
634
635 /* Use TidyReportFilter to filter messages by diagnostic level:
636 ** info, warning, etc. Just set diagnostic output
637 ** handler to redirect all diagnostics output. Return true
638 ** to proceed with output, false to cancel.
639 */
tidySetReportFilter(TidyDoc tdoc,TidyReportFilter filt)640 Bool TIDY_CALL tidySetReportFilter( TidyDoc tdoc, TidyReportFilter filt )
641 {
642 TidyDocImpl* impl = tidyDocToImpl( tdoc );
643 if ( impl )
644 {
645 impl->mssgFilt = filt;
646 return yes;
647 }
648 return no;
649 }
650
tidySetErrorFile(TidyDoc tdoc,ctmbstr errfilnam)651 FILE* TIDY_CALL tidySetErrorFile( TidyDoc tdoc, ctmbstr errfilnam )
652 {
653 TidyDocImpl* impl = tidyDocToImpl( tdoc );
654 if ( impl )
655 {
656 FILE* errout = fopen( errfilnam, "wb" );
657 if ( errout )
658 {
659 uint outenc = cfg( impl, TidyOutCharEncoding );
660 uint nl = cfg( impl, TidyNewline );
661 TY_(ReleaseStreamOut)( impl, impl->errout );
662 impl->errout = TY_(FileOutput)( impl, errout, outenc, nl );
663 return errout;
664 }
665 else /* Emit message to current error sink */
666 TY_(FileError)( impl, errfilnam, TidyError );
667 }
668 return NULL;
669 }
670
tidySetErrorBuffer(TidyDoc tdoc,TidyBuffer * errbuf)671 int TIDY_CALL tidySetErrorBuffer( TidyDoc tdoc, TidyBuffer* errbuf )
672 {
673 TidyDocImpl* impl = tidyDocToImpl( tdoc );
674 if ( impl )
675 {
676 uint outenc = cfg( impl, TidyOutCharEncoding );
677 uint nl = cfg( impl, TidyNewline );
678 TY_(ReleaseStreamOut)( impl, impl->errout );
679 impl->errout = TY_(BufferOutput)( impl, errbuf, outenc, nl );
680 return ( impl->errout ? 0 : -ENOMEM );
681 }
682 return -EINVAL;
683 }
684
tidySetErrorSink(TidyDoc tdoc,TidyOutputSink * sink)685 int TIDY_CALL tidySetErrorSink( TidyDoc tdoc, TidyOutputSink* sink )
686 {
687 TidyDocImpl* impl = tidyDocToImpl( tdoc );
688 if ( impl )
689 {
690 uint outenc = cfg( impl, TidyOutCharEncoding );
691 uint nl = cfg( impl, TidyNewline );
692 TY_(ReleaseStreamOut)( impl, impl->errout );
693 impl->errout = TY_(UserOutput)( impl, sink, outenc, nl );
694 return ( impl->errout ? 0 : -ENOMEM );
695 }
696 return -EINVAL;
697 }
698
699
700 /* Document info */
tidyStatus(TidyDoc tdoc)701 int TIDY_CALL tidyStatus( TidyDoc tdoc )
702 {
703 TidyDocImpl* impl = tidyDocToImpl( tdoc );
704 int tidyStat = -EINVAL;
705 if ( impl )
706 tidyStat = tidyDocStatus( impl );
707 return tidyStat;
708 }
tidyDetectedHtmlVersion(TidyDoc ARG_UNUSED (tdoc))709 int TIDY_CALL tidyDetectedHtmlVersion( TidyDoc ARG_UNUSED(tdoc) )
710 {
711 /* TidyDocImpl* impl = tidyDocToImpl( tdoc ); */
712 return 0;
713 }
tidyDetectedXhtml(TidyDoc ARG_UNUSED (tdoc))714 Bool TIDY_CALL tidyDetectedXhtml( TidyDoc ARG_UNUSED(tdoc) )
715 {
716 /* TidyDocImpl* impl = tidyDocToImpl( tdoc ); */
717 return no;
718 }
tidyDetectedGenericXml(TidyDoc ARG_UNUSED (tdoc))719 Bool TIDY_CALL tidyDetectedGenericXml( TidyDoc ARG_UNUSED(tdoc) )
720 {
721 /* TidyDocImpl* impl = tidyDocToImpl( tdoc ); */
722 return no;
723 }
724
tidyErrorCount(TidyDoc tdoc)725 uint TIDY_CALL tidyErrorCount( TidyDoc tdoc )
726 {
727 TidyDocImpl* impl = tidyDocToImpl( tdoc );
728 uint count = 0xFFFFFFFF;
729 if ( impl )
730 count = impl->errors;
731 return count;
732 }
tidyWarningCount(TidyDoc tdoc)733 uint TIDY_CALL tidyWarningCount( TidyDoc tdoc )
734 {
735 TidyDocImpl* impl = tidyDocToImpl( tdoc );
736 uint count = 0xFFFFFFFF;
737 if ( impl )
738 count = impl->warnings;
739 return count;
740 }
tidyAccessWarningCount(TidyDoc tdoc)741 uint TIDY_CALL tidyAccessWarningCount( TidyDoc tdoc )
742 {
743 TidyDocImpl* impl = tidyDocToImpl( tdoc );
744 uint count = 0xFFFFFFFF;
745 if ( impl )
746 count = impl->accessErrors;
747 return count;
748 }
tidyConfigErrorCount(TidyDoc tdoc)749 uint TIDY_CALL tidyConfigErrorCount( TidyDoc tdoc )
750 {
751 TidyDocImpl* impl = tidyDocToImpl( tdoc );
752 uint count = 0xFFFFFFFF;
753 if ( impl )
754 count = impl->optionErrors;
755 return count;
756 }
757
758
759 /* Error reporting functions
760 */
tidyErrorSummary(TidyDoc tdoc)761 void TIDY_CALL tidyErrorSummary( TidyDoc tdoc )
762 {
763 TidyDocImpl* impl = tidyDocToImpl( tdoc );
764 if ( impl )
765 TY_(ErrorSummary)( impl );
766 }
tidyGeneralInfo(TidyDoc tdoc)767 void TIDY_CALL tidyGeneralInfo( TidyDoc tdoc )
768 {
769 TidyDocImpl* impl = tidyDocToImpl( tdoc );
770 if ( impl )
771 TY_(GeneralInfo)( impl );
772 }
773
774
775 /* I/O Functions
776 **
777 ** Initial version supports only whole-file operations.
778 ** Do not expose Tidy StreamIn or Out data structures - yet.
779 */
780
781 /* Parse/load Functions
782 **
783 ** HTML/XHTML version determined from input.
784 */
tidyParseFile(TidyDoc tdoc,ctmbstr filnam)785 int TIDY_CALL tidyParseFile( TidyDoc tdoc, ctmbstr filnam )
786 {
787 TidyDocImpl* doc = tidyDocToImpl( tdoc );
788 return tidyDocParseFile( doc, filnam );
789 }
tidyParseStdin(TidyDoc tdoc)790 int TIDY_CALL tidyParseStdin( TidyDoc tdoc )
791 {
792 TidyDocImpl* doc = tidyDocToImpl( tdoc );
793 return tidyDocParseStdin( doc );
794 }
tidyParseString(TidyDoc tdoc,ctmbstr content)795 int TIDY_CALL tidyParseString( TidyDoc tdoc, ctmbstr content )
796 {
797 TidyDocImpl* doc = tidyDocToImpl( tdoc );
798 return tidyDocParseString( doc, content );
799 }
tidyParseBuffer(TidyDoc tdoc,TidyBuffer * inbuf)800 int TIDY_CALL tidyParseBuffer( TidyDoc tdoc, TidyBuffer* inbuf )
801 {
802 TidyDocImpl* doc = tidyDocToImpl( tdoc );
803 return tidyDocParseBuffer( doc, inbuf );
804 }
tidyParseSource(TidyDoc tdoc,TidyInputSource * source)805 int TIDY_CALL tidyParseSource( TidyDoc tdoc, TidyInputSource* source )
806 {
807 TidyDocImpl* doc = tidyDocToImpl( tdoc );
808 return tidyDocParseSource( doc, source );
809 }
810
811
tidyDocParseFile(TidyDocImpl * doc,ctmbstr filnam)812 int tidyDocParseFile( TidyDocImpl* doc, ctmbstr filnam )
813 {
814 #ifdef _WIN32
815 return TY_(DocParseFileWithMappedFile)( doc, filnam );
816 #else
817 int status = -ENOENT;
818 FILE* fin = fopen( filnam, "rb" );
819
820 #if PRESERVE_FILE_TIMES
821 struct stat sbuf;
822
823 memset( &sbuf, 0, sizeof(sbuf) );
824 /* get last modified time */
825 TidyClearMemory( &doc->filetimes, sizeof(doc->filetimes) );
826 if ( fin && cfgBool(doc,TidyKeepFileTimes) &&
827 fstat(fileno(fin), &sbuf) != -1 )
828 {
829 doc->filetimes.actime = sbuf.st_atime;
830 doc->filetimes.modtime = sbuf.st_mtime;
831 }
832 #endif
833
834 if ( fin )
835 {
836 StreamIn* in = TY_(FileInput)( doc, fin, cfg( doc, TidyInCharEncoding ));
837 if ( !in )
838 {
839 fclose( fin );
840 return status;
841 }
842 status = TY_(DocParseStream)( doc, in );
843 TY_(freeFileSource)(&in->source, yes);
844 TY_(freeStreamIn)(in);
845 }
846 else /* Error message! */
847 TY_(FileError)( doc, filnam, TidyError );
848 return status;
849 #endif
850 }
851
tidyDocParseStdin(TidyDocImpl * doc)852 int tidyDocParseStdin( TidyDocImpl* doc )
853 {
854 StreamIn* in = TY_(FileInput)( doc, stdin, cfg( doc, TidyInCharEncoding ));
855 const int status = TY_(DocParseStream)( doc, in );
856 TY_(freeStreamIn)(in);
857 return status;
858 }
859
tidyDocParseBuffer(TidyDocImpl * doc,TidyBuffer * inbuf)860 int tidyDocParseBuffer( TidyDocImpl* doc, TidyBuffer* inbuf )
861 {
862 int status = -EINVAL;
863 if ( inbuf )
864 {
865 StreamIn* in = TY_(BufferInput)( doc, inbuf, cfg( doc, TidyInCharEncoding ));
866 status = TY_(DocParseStream)( doc, in );
867 TY_(freeStreamIn)(in);
868 }
869 return status;
870 }
871
tidyDocParseString(TidyDocImpl * doc,ctmbstr content)872 int tidyDocParseString( TidyDocImpl* doc, ctmbstr content )
873 {
874 int status = -EINVAL;
875
876 if ( content )
877 {
878 TidyBuffer inbuf;
879 StreamIn* in = NULL;
880 tidyBufInitWithAllocator( &inbuf, doc->allocator );
881 tidyBufAttach( &inbuf, (byte*)content, TY_(tmbstrlen)(content)+1 );
882 in = TY_(BufferInput)( doc, &inbuf, cfg( doc, TidyInCharEncoding ));
883 status = TY_(DocParseStream)( doc, in );
884 tidyBufDetach( &inbuf );
885 TY_(freeStreamIn)(in);
886 }
887 return status;
888 }
889
tidyDocParseSource(TidyDocImpl * doc,TidyInputSource * source)890 int tidyDocParseSource( TidyDocImpl* doc, TidyInputSource* source )
891 {
892 StreamIn* const in = TY_(UserInput)( doc, source, cfg( doc, TidyInCharEncoding ));
893 const int status = TY_(DocParseStream)( doc, in );
894 TY_(freeStreamIn)(in);
895 return status;
896 }
897
898
899 /* Print/save Functions
900 **
901 */
tidySaveFile(TidyDoc tdoc,ctmbstr filnam)902 int TIDY_CALL tidySaveFile( TidyDoc tdoc, ctmbstr filnam )
903 {
904 TidyDocImpl* doc = tidyDocToImpl( tdoc );
905 return tidyDocSaveFile( doc, filnam );
906 }
tidySaveStdout(TidyDoc tdoc)907 int TIDY_CALL tidySaveStdout( TidyDoc tdoc )
908 {
909 TidyDocImpl* doc = tidyDocToImpl( tdoc );
910 return tidyDocSaveStdout( doc );
911 }
tidySaveString(TidyDoc tdoc,tmbstr buffer,uint * buflen)912 int TIDY_CALL tidySaveString( TidyDoc tdoc, tmbstr buffer, uint* buflen )
913 {
914 TidyDocImpl* doc = tidyDocToImpl( tdoc );
915 return tidyDocSaveString( doc, buffer, buflen );
916 }
tidySaveBuffer(TidyDoc tdoc,TidyBuffer * outbuf)917 int TIDY_CALL tidySaveBuffer( TidyDoc tdoc, TidyBuffer* outbuf )
918 {
919 TidyDocImpl* doc = tidyDocToImpl( tdoc );
920 return tidyDocSaveBuffer( doc, outbuf );
921 }
tidySaveSink(TidyDoc tdoc,TidyOutputSink * sink)922 int TIDY_CALL tidySaveSink( TidyDoc tdoc, TidyOutputSink* sink )
923 {
924 TidyDocImpl* doc = tidyDocToImpl( tdoc );
925 return tidyDocSaveSink( doc, sink );
926 }
927
tidyDocSaveFile(TidyDocImpl * doc,ctmbstr filnam)928 int tidyDocSaveFile( TidyDocImpl* doc, ctmbstr filnam )
929 {
930 int status = -ENOENT;
931 FILE* fout = NULL;
932
933 /* Don't zap input file if no output */
934 if ( doc->errors > 0 &&
935 cfgBool(doc, TidyWriteBack) && !cfgBool(doc, TidyForceOutput) )
936 status = tidyDocStatus( doc );
937 else
938 fout = fopen( filnam, "wb" );
939
940 if ( fout )
941 {
942 uint outenc = cfg( doc, TidyOutCharEncoding );
943 uint nl = cfg( doc, TidyNewline );
944 StreamOut* out = TY_(FileOutput)( doc, fout, outenc, nl );
945
946 status = tidyDocSaveStream( doc, out );
947
948 fclose( fout );
949 TidyDocFree( doc, out );
950
951 #if PRESERVE_FILE_TIMES
952 if ( doc->filetimes.actime )
953 {
954 /* set file last accessed/modified times to original values */
955 utime( filnam, &doc->filetimes );
956 TidyClearMemory( &doc->filetimes, sizeof(doc->filetimes) );
957 }
958 #endif /* PRESERVFILETIMES */
959 }
960 if ( status < 0 ) /* Error message! */
961 TY_(FileError)( doc, filnam, TidyError );
962 return status;
963 }
964
965
966
967 /* Note, _setmode() does NOT work on Win2K Pro w/ VC++ 6.0 SP3.
968 ** The code has been left in in case it works w/ other compilers
969 ** or operating systems. If stdout is in Text mode, be aware that
970 ** it will garble UTF16 documents. In text mode, when it encounters
971 ** a single byte of value 10 (0xA), it will insert a single byte
972 ** value 13 (0xD) just before it. This has the effect of garbling
973 ** the entire document.
974 */
975
976 #if !defined(NO_SETMODE_SUPPORT)
977
978 #if defined(_WIN32) || defined(OS2_OS)
979 #include <fcntl.h>
980 #include <io.h>
981 #endif
982
983 #endif
984
tidyDocSaveStdout(TidyDocImpl * doc)985 int tidyDocSaveStdout( TidyDocImpl* doc )
986 {
987 #if !defined(NO_SETMODE_SUPPORT)
988
989 #if defined(_WIN32) || defined(OS2_OS)
990 int oldstdoutmode = -1, oldstderrmode = -1;
991 #endif
992
993 #endif
994 int status = 0;
995 uint outenc = cfg( doc, TidyOutCharEncoding );
996 uint nl = cfg( doc, TidyNewline );
997 StreamOut* out = TY_(FileOutput)( doc, stdout, outenc, nl );
998
999 #if !defined(NO_SETMODE_SUPPORT)
1000
1001 #if defined(_WIN32) || defined(OS2_OS)
1002 oldstdoutmode = setmode( fileno(stdout), _O_BINARY );
1003 oldstderrmode = setmode( fileno(stderr), _O_BINARY );
1004 #endif
1005
1006 #endif
1007
1008 if ( 0 == status )
1009 status = tidyDocSaveStream( doc, out );
1010
1011 fflush(stdout);
1012 fflush(stderr);
1013
1014 #if !defined(NO_SETMODE_SUPPORT)
1015
1016 #if defined(_WIN32) || defined(OS2_OS)
1017 if ( oldstdoutmode != -1 )
1018 oldstdoutmode = setmode( fileno(stdout), oldstdoutmode );
1019 if ( oldstderrmode != -1 )
1020 oldstderrmode = setmode( fileno(stderr), oldstderrmode );
1021 #endif
1022
1023 #endif
1024
1025 TidyDocFree( doc, out );
1026 return status;
1027 }
1028
tidyDocSaveString(TidyDocImpl * doc,tmbstr buffer,uint * buflen)1029 int tidyDocSaveString( TidyDocImpl* doc, tmbstr buffer, uint* buflen )
1030 {
1031 uint outenc = cfg( doc, TidyOutCharEncoding );
1032 uint nl = cfg( doc, TidyNewline );
1033 TidyBuffer outbuf;
1034 StreamOut* out;
1035 int status;
1036
1037 tidyBufInitWithAllocator( &outbuf, doc->allocator );
1038 out = TY_(BufferOutput)( doc, &outbuf, outenc, nl );
1039 status = tidyDocSaveStream( doc, out );
1040
1041 if ( outbuf.size > *buflen )
1042 status = -ENOMEM;
1043 else
1044 memcpy( buffer, outbuf.bp, outbuf.size );
1045
1046 *buflen = outbuf.size;
1047 tidyBufFree( &outbuf );
1048 TidyDocFree( doc, out );
1049 return status;
1050 }
1051
tidyDocSaveBuffer(TidyDocImpl * doc,TidyBuffer * outbuf)1052 int tidyDocSaveBuffer( TidyDocImpl* doc, TidyBuffer* outbuf )
1053 {
1054 int status = -EINVAL;
1055 if ( outbuf )
1056 {
1057 uint outenc = cfg( doc, TidyOutCharEncoding );
1058 uint nl = cfg( doc, TidyNewline );
1059 StreamOut* out = TY_(BufferOutput)( doc, outbuf, outenc, nl );
1060
1061 status = tidyDocSaveStream( doc, out );
1062 TidyDocFree( doc, out );
1063 }
1064 return status;
1065 }
1066
tidyDocSaveSink(TidyDocImpl * doc,TidyOutputSink * sink)1067 int tidyDocSaveSink( TidyDocImpl* doc, TidyOutputSink* sink )
1068 {
1069 uint outenc = cfg( doc, TidyOutCharEncoding );
1070 uint nl = cfg( doc, TidyNewline );
1071 StreamOut* out = TY_(UserOutput)( doc, sink, outenc, nl );
1072 int status = tidyDocSaveStream( doc, out );
1073 TidyDocFree( doc, out );
1074 return status;
1075 }
1076
tidyDocStatus(TidyDocImpl * doc)1077 int tidyDocStatus( TidyDocImpl* doc )
1078 {
1079 if ( doc->errors > 0 )
1080 return 2;
1081 if ( doc->warnings > 0 || doc->accessErrors > 0 )
1082 return 1;
1083 return 0;
1084 }
1085
1086
1087
tidyCleanAndRepair(TidyDoc tdoc)1088 int TIDY_CALL tidyCleanAndRepair( TidyDoc tdoc )
1089 {
1090 TidyDocImpl* impl = tidyDocToImpl( tdoc );
1091 if ( impl )
1092 return tidyDocCleanAndRepair( impl );
1093 return -EINVAL;
1094 }
1095
tidyRunDiagnostics(TidyDoc tdoc)1096 int TIDY_CALL tidyRunDiagnostics( TidyDoc tdoc )
1097 {
1098 TidyDocImpl* impl = tidyDocToImpl( tdoc );
1099 if ( impl )
1100 return tidyDocRunDiagnostics( impl );
1101 return -EINVAL;
1102 }
1103
1104
1105 /* Workhorse functions.
1106 **
1107 ** Parse requires input source, all input config items
1108 ** and diagnostic sink to have all been set before calling.
1109 **
1110 ** Emit likewise requires that document sink and all
1111 ** pretty printing options have been set.
1112 */
1113 static ctmbstr integrity = "\nPanic - tree has lost its integrity\n";
1114
TY_(DocParseStream)1115 int TY_(DocParseStream)( TidyDocImpl* doc, StreamIn* in )
1116 {
1117 Bool xmlIn = cfgBool( doc, TidyXmlTags );
1118 int bomEnc;
1119
1120 assert( doc != NULL && in != NULL );
1121 assert( doc->docIn == NULL );
1122 doc->docIn = in;
1123
1124 TY_(TakeConfigSnapshot)( doc ); /* Save config state */
1125 TY_(FreeLexer)( doc );
1126 TY_(FreeAnchors)( doc );
1127
1128 TY_(FreeNode)(doc, &doc->root);
1129 TidyClearMemory(&doc->root, sizeof(Node));
1130
1131 if (doc->givenDoctype)
1132 TidyDocFree(doc, doc->givenDoctype);
1133
1134 doc->givenDoctype = NULL;
1135
1136 doc->lexer = TY_(NewLexer)( doc );
1137 /* doc->lexer->root = &doc->root; */
1138 doc->root.line = doc->lexer->lines;
1139 doc->root.column = doc->lexer->columns;
1140 doc->inputHadBOM = no;
1141
1142 bomEnc = TY_(ReadBOMEncoding)(in);
1143
1144 if (bomEnc != -1)
1145 {
1146 in->encoding = bomEnc;
1147 TY_(SetOptionInt)(doc, TidyInCharEncoding, bomEnc);
1148 }
1149
1150 #ifdef TIDY_WIN32_MLANG_SUPPORT
1151 if (in->encoding > WIN32MLANG)
1152 TY_(Win32MLangInitInputTranscoder)(in, in->encoding);
1153 #endif /* TIDY_WIN32_MLANG_SUPPORT */
1154
1155 /* Tidy doesn't alter the doctype for generic XML docs */
1156 if ( xmlIn )
1157 {
1158 TY_(ParseXMLDocument)( doc );
1159 if ( !TY_(CheckNodeIntegrity)( &doc->root ) )
1160 TidyPanic( doc->allocator, integrity );
1161 }
1162 else
1163 {
1164 doc->warnings = 0;
1165 TY_(ParseDocument)( doc );
1166 if ( !TY_(CheckNodeIntegrity)( &doc->root ) )
1167 TidyPanic( doc->allocator, integrity );
1168 }
1169
1170 #ifdef TIDY_WIN32_MLANG_SUPPORT
1171 TY_(Win32MLangUninitInputTranscoder)(in);
1172 #endif /* TIDY_WIN32_MLANG_SUPPORT */
1173
1174 doc->docIn = NULL;
1175 return tidyDocStatus( doc );
1176 }
1177
tidyDocRunDiagnostics(TidyDocImpl * doc)1178 int tidyDocRunDiagnostics( TidyDocImpl* doc )
1179 {
1180 Bool quiet = cfgBool( doc, TidyQuiet );
1181 Bool force = cfgBool( doc, TidyForceOutput );
1182
1183 if ( !quiet )
1184 {
1185
1186 TY_(ReportMarkupVersion)( doc );
1187 TY_(ReportNumWarnings)( doc );
1188 }
1189
1190 if ( doc->errors > 0 && !force )
1191 TY_(NeedsAuthorIntervention)( doc );
1192
1193 return tidyDocStatus( doc );
1194 }
1195
tidyDocCleanAndRepair(TidyDocImpl * doc)1196 int tidyDocCleanAndRepair( TidyDocImpl* doc )
1197 {
1198 Bool word2K = cfgBool( doc, TidyWord2000 );
1199 Bool logical = cfgBool( doc, TidyLogicalEmphasis );
1200 Bool clean = cfgBool( doc, TidyMakeClean );
1201 Bool dropFont = cfgBool( doc, TidyDropFontTags );
1202 Bool htmlOut = cfgBool( doc, TidyHtmlOut );
1203 Bool xmlOut = cfgBool( doc, TidyXmlOut );
1204 Bool xhtmlOut = cfgBool( doc, TidyXhtmlOut );
1205 Bool xmlDecl = cfgBool( doc, TidyXmlDecl );
1206 Bool tidyMark = cfgBool( doc, TidyMark );
1207 Bool tidyXmlTags = cfgBool( doc, TidyXmlTags );
1208 Bool wantNameAttr = cfgBool( doc, TidyAnchorAsName );
1209 Node* node;
1210
1211 if (tidyXmlTags)
1212 return tidyDocStatus( doc );
1213
1214 /* simplifies <b><b> ... </b> ...</b> etc. */
1215 TY_(NestedEmphasis)( doc, &doc->root );
1216
1217 /* cleans up <dir>indented text</dir> etc. */
1218 TY_(List2BQ)( doc, &doc->root );
1219 TY_(BQ2Div)( doc, &doc->root );
1220
1221 /* replaces i by em and b by strong */
1222 if ( logical )
1223 TY_(EmFromI)( doc, &doc->root );
1224
1225 if ( word2K && TY_(IsWord2000)(doc) )
1226 {
1227 /* prune Word2000's <![if ...]> ... <![endif]> */
1228 TY_(DropSections)( doc, &doc->root );
1229
1230 /* drop style & class attributes and empty p, span elements */
1231 TY_(CleanWord2000)( doc, &doc->root );
1232 TY_(DropEmptyElements)(doc, &doc->root);
1233 }
1234
1235 /* replaces presentational markup by style rules */
1236 if ( clean || dropFont )
1237 TY_(CleanDocument)( doc );
1238
1239 /* Move terminating <br /> tags from out of paragraphs */
1240 /*! Do we want to do this for all block-level elements? */
1241
1242 /* This is disabled due to http://tidy.sf.net/bug/681116 */
1243
1244 /* Reconcile http-equiv meta element with output encoding */
1245 if (cfg( doc, TidyOutCharEncoding) != RAW
1246 #ifndef NO_NATIVE_ISO2022_SUPPORT
1247 && cfg( doc, TidyOutCharEncoding) != ISO2022
1248 #endif
1249 )
1250 TY_(VerifyHTTPEquiv)( doc, TY_(FindHEAD)( doc ));
1251
1252 if ( !TY_(CheckNodeIntegrity)( &doc->root ) )
1253 TidyPanic( doc->allocator, integrity );
1254
1255 /* remember given doctype for reporting */
1256 node = TY_(FindDocType)(doc);
1257 if (node)
1258 {
1259 AttVal* fpi = TY_(GetAttrByName)(node, "PUBLIC");
1260 if (AttrHasValue(fpi))
1261 {
1262 if (doc->givenDoctype)
1263 TidyDocFree(doc, doc->givenDoctype);
1264 doc->givenDoctype = TY_(tmbstrdup)(doc->allocator,fpi->value);
1265 }
1266 }
1267
1268 if ( doc->root.content )
1269 {
1270 /* If we had XHTML input but want HTML output */
1271 if ( htmlOut && doc->lexer->isvoyager )
1272 {
1273 Node* node = TY_(FindDocType)(doc);
1274 /* Remove reference, but do not free */
1275 if (node)
1276 TY_(RemoveNode)(node);
1277 }
1278
1279 if (xhtmlOut && !htmlOut)
1280 {
1281 TY_(SetXHTMLDocType)(doc);
1282 TY_(FixAnchors)(doc, &doc->root, wantNameAttr, yes);
1283 TY_(FixXhtmlNamespace)(doc, yes);
1284 TY_(FixLanguageInformation)(doc, &doc->root, yes, yes);
1285 }
1286 else
1287 {
1288 TY_(FixDocType)(doc);
1289 TY_(FixAnchors)(doc, &doc->root, wantNameAttr, yes);
1290 TY_(FixXhtmlNamespace)(doc, no);
1291 TY_(FixLanguageInformation)(doc, &doc->root, no, yes);
1292 }
1293
1294 if (tidyMark )
1295 TY_(AddGenerator)(doc);
1296 }
1297
1298 /* ensure presence of initial <?xml version="1.0"?> */
1299 if ( xmlOut && xmlDecl )
1300 TY_(FixXmlDecl)( doc );
1301
1302 return tidyDocStatus( doc );
1303 }
1304
1305 static
showBodyOnly(TidyDocImpl * doc,TidyTriState bodyOnly)1306 Bool showBodyOnly( TidyDocImpl* doc, TidyTriState bodyOnly )
1307 {
1308 Node* node;
1309
1310 switch( bodyOnly )
1311 {
1312 case TidyNoState:
1313 return no;
1314 case TidyYesState:
1315 return yes;
1316 default:
1317 node = TY_(FindBody)( doc );
1318 if (node && node->implicit )
1319 return yes;
1320 }
1321 return no;
1322 }
1323
1324
tidyDocSaveStream(TidyDocImpl * doc,StreamOut * out)1325 int tidyDocSaveStream( TidyDocImpl* doc, StreamOut* out )
1326 {
1327 Bool showMarkup = cfgBool( doc, TidyShowMarkup );
1328 Bool forceOutput = cfgBool( doc, TidyForceOutput );
1329 #if SUPPORT_UTF16_ENCODINGS
1330 Bool outputBOM = ( cfgAutoBool(doc, TidyOutputBOM) == TidyYesState );
1331 Bool smartBOM = ( cfgAutoBool(doc, TidyOutputBOM) == TidyAutoState );
1332 #endif
1333 Bool xmlOut = cfgBool( doc, TidyXmlOut );
1334 Bool xhtmlOut = cfgBool( doc, TidyXhtmlOut );
1335 TidyTriState bodyOnly = cfgAutoBool( doc, TidyBodyOnly );
1336
1337 Bool dropComments = cfgBool(doc, TidyHideComments);
1338 Bool makeClean = cfgBool(doc, TidyMakeClean);
1339 Bool asciiChars = cfgBool(doc, TidyAsciiChars);
1340 Bool makeBare = cfgBool(doc, TidyMakeBare);
1341 Bool escapeCDATA = cfgBool(doc, TidyEscapeCdata);
1342 TidyAttrSortStrategy sortAttrStrat = cfg(doc, TidySortAttributes);
1343
1344 if (escapeCDATA)
1345 TY_(ConvertCDATANodes)(doc, &doc->root);
1346
1347 if (dropComments)
1348 TY_(DropComments)(doc, &doc->root);
1349
1350 if (makeClean)
1351 {
1352 /* noop */
1353 TY_(DropFontElements)(doc, &doc->root, NULL);
1354 TY_(WbrToSpace)(doc, &doc->root);
1355 }
1356
1357 if ((makeClean && asciiChars) || makeBare)
1358 TY_(DowngradeTypography)(doc, &doc->root);
1359
1360 if (makeBare)
1361 /* Note: no longer replaces in */
1362 /* attribute values / non-text tokens */
1363 TY_(NormalizeSpaces)(doc->lexer, &doc->root);
1364 else
1365 TY_(ReplacePreformattedSpaces)(doc, &doc->root);
1366
1367 if ( sortAttrStrat != TidySortAttrNone )
1368 TY_(SortAttributes)(&doc->root, sortAttrStrat);
1369
1370 if ( showMarkup && (doc->errors == 0 || forceOutput) )
1371 {
1372 #if SUPPORT_UTF16_ENCODINGS
1373 /* Output a Byte Order Mark if required */
1374 if ( outputBOM || (doc->inputHadBOM && smartBOM) )
1375 TY_(outBOM)( out );
1376 #endif
1377
1378 /* No longer necessary. No DOCTYPE == HTML 3.2,
1379 ** which gives you only the basic character entities,
1380 ** which are safe in any browser.
1381 ** if ( !TY_(FindDocType)(doc) )
1382 ** TY_(SetOptionBool)( doc, TidyNumEntities, yes );
1383 */
1384
1385 doc->docOut = out;
1386 if ( xmlOut && !xhtmlOut )
1387 TY_(PPrintXMLTree)( doc, NORMAL, 0, &doc->root );
1388 else if ( showBodyOnly( doc, bodyOnly ) )
1389 TY_(PrintBody)( doc );
1390 else
1391 TY_(PPrintTree)( doc, NORMAL, 0, &doc->root );
1392
1393 TY_(PFlushLine)( doc, 0 );
1394 doc->docOut = NULL;
1395 }
1396
1397 TY_(ResetConfigToSnapshot)( doc );
1398 return tidyDocStatus( doc );
1399 }
1400
1401 /* Tree traversal functions
1402 **
1403 ** The big issue here is the degree to which we should mimic
1404 ** a DOM and/or SAX nodes.
1405 **
1406 ** Is it 100% possible (and, if so, how difficult is it) to
1407 ** emit SAX events from this API? If SAX events are possible,
1408 ** is that 100% of data needed to build a DOM?
1409 */
1410
tidyGetRoot(TidyDoc tdoc)1411 TidyNode TIDY_CALL tidyGetRoot( TidyDoc tdoc )
1412 {
1413 TidyDocImpl* impl = tidyDocToImpl( tdoc );
1414 Node* node = NULL;
1415 if ( impl )
1416 node = &impl->root;
1417 return tidyImplToNode( node );
1418 }
1419
tidyGetHtml(TidyDoc tdoc)1420 TidyNode TIDY_CALL tidyGetHtml( TidyDoc tdoc )
1421 {
1422 TidyDocImpl* impl = tidyDocToImpl( tdoc );
1423 Node* node = NULL;
1424 if ( impl )
1425 node = TY_(FindHTML)( impl );
1426 return tidyImplToNode( node );
1427 }
1428
tidyGetHead(TidyDoc tdoc)1429 TidyNode TIDY_CALL tidyGetHead( TidyDoc tdoc )
1430 {
1431 TidyDocImpl* impl = tidyDocToImpl( tdoc );
1432 Node* node = NULL;
1433 if ( impl )
1434 node = TY_(FindHEAD)( impl );
1435 return tidyImplToNode( node );
1436 }
1437
tidyGetBody(TidyDoc tdoc)1438 TidyNode TIDY_CALL tidyGetBody( TidyDoc tdoc )
1439 {
1440 TidyDocImpl* impl = tidyDocToImpl( tdoc );
1441 Node* node = NULL;
1442 if ( impl )
1443 node = TY_(FindBody)( impl );
1444 return tidyImplToNode( node );
1445 }
1446
1447 /* parent / child */
tidyGetParent(TidyNode tnod)1448 TidyNode TIDY_CALL tidyGetParent( TidyNode tnod )
1449 {
1450 Node* nimp = tidyNodeToImpl( tnod );
1451 return tidyImplToNode( nimp->parent );
1452 }
tidyGetChild(TidyNode tnod)1453 TidyNode TIDY_CALL tidyGetChild( TidyNode tnod )
1454 {
1455 Node* nimp = tidyNodeToImpl( tnod );
1456 return tidyImplToNode( nimp->content );
1457 }
1458
1459 /* siblings */
tidyGetNext(TidyNode tnod)1460 TidyNode TIDY_CALL tidyGetNext( TidyNode tnod )
1461 {
1462 Node* nimp = tidyNodeToImpl( tnod );
1463 return tidyImplToNode( nimp->next );
1464 }
tidyGetPrev(TidyNode tnod)1465 TidyNode TIDY_CALL tidyGetPrev( TidyNode tnod )
1466 {
1467 Node* nimp = tidyNodeToImpl( tnod );
1468 return tidyImplToNode( nimp->prev );
1469 }
1470
1471 /* Node info */
tidyNodeGetType(TidyNode tnod)1472 TidyNodeType TIDY_CALL tidyNodeGetType( TidyNode tnod )
1473 {
1474 Node* nimp = tidyNodeToImpl( tnod );
1475 TidyNodeType ntyp = TidyNode_Root;
1476 if ( nimp )
1477 ntyp = (TidyNodeType) nimp->type;
1478 return ntyp;
1479 }
1480
tidyNodeLine(TidyNode tnod)1481 uint TIDY_CALL tidyNodeLine( TidyNode tnod )
1482 {
1483 Node* nimp = tidyNodeToImpl( tnod );
1484 uint line = 0;
1485 if ( nimp )
1486 line = nimp->line;
1487 return line;
1488 }
tidyNodeColumn(TidyNode tnod)1489 uint TIDY_CALL tidyNodeColumn( TidyNode tnod )
1490 {
1491 Node* nimp = tidyNodeToImpl( tnod );
1492 uint col = 0;
1493 if ( nimp )
1494 col = nimp->column;
1495 return col;
1496 }
1497
tidyNodeGetName(TidyNode tnod)1498 ctmbstr TIDY_CALL tidyNodeGetName( TidyNode tnod )
1499 {
1500 Node* nimp = tidyNodeToImpl( tnod );
1501 ctmbstr nnam = NULL;
1502 if ( nimp )
1503 nnam = nimp->element;
1504 return nnam;
1505 }
1506
1507
tidyNodeHasText(TidyDoc tdoc,TidyNode tnod)1508 Bool TIDY_CALL tidyNodeHasText( TidyDoc tdoc, TidyNode tnod )
1509 {
1510 TidyDocImpl* doc = tidyDocToImpl( tdoc );
1511 if ( doc )
1512 return TY_(nodeHasText)( doc, tidyNodeToImpl(tnod) );
1513 return no;
1514 }
1515
1516
tidyNodeGetText(TidyDoc tdoc,TidyNode tnod,TidyBuffer * outbuf)1517 Bool TIDY_CALL tidyNodeGetText( TidyDoc tdoc, TidyNode tnod, TidyBuffer* outbuf )
1518 {
1519 TidyDocImpl* doc = tidyDocToImpl( tdoc );
1520 Node* nimp = tidyNodeToImpl( tnod );
1521 if ( doc && nimp && outbuf )
1522 {
1523 uint outenc = cfg( doc, TidyOutCharEncoding );
1524 uint nl = cfg( doc, TidyNewline );
1525 StreamOut* out = TY_(BufferOutput)( doc, outbuf, outenc, nl );
1526 Bool xmlOut = cfgBool( doc, TidyXmlOut );
1527 Bool xhtmlOut = cfgBool( doc, TidyXhtmlOut );
1528
1529 doc->docOut = out;
1530 if ( xmlOut && !xhtmlOut )
1531 TY_(PPrintXMLTree)( doc, NORMAL, 0, nimp );
1532 else
1533 TY_(PPrintTree)( doc, NORMAL, 0, nimp );
1534
1535 TY_(PFlushLine)( doc, 0 );
1536 doc->docOut = NULL;
1537
1538 TidyDocFree( doc, out );
1539 return yes;
1540 }
1541 return no;
1542 }
1543
tidyNodeGetValue(TidyDoc tdoc,TidyNode tnod,TidyBuffer * buf)1544 Bool TIDY_CALL tidyNodeGetValue( TidyDoc tdoc, TidyNode tnod, TidyBuffer* buf )
1545 {
1546 TidyDocImpl *doc = tidyDocToImpl( tdoc );
1547 Node *node = tidyNodeToImpl( tnod );
1548 if ( doc == NULL || node == NULL || buf == NULL )
1549 return no;
1550
1551 switch( node->type ) {
1552 case TextNode:
1553 case CDATATag:
1554 case CommentTag:
1555 case ProcInsTag:
1556 case SectionTag:
1557 case AspTag:
1558 case JsteTag:
1559 case PhpTag:
1560 {
1561 tidyBufClear( buf );
1562 tidyBufAppend( buf, doc->lexer->lexbuf + node->start,
1563 node->end - node->start );
1564 break;
1565 }
1566 default:
1567 /* The node doesn't have a value */
1568 return no;
1569 }
1570
1571 return yes;
1572 }
1573
tidyNodeIsProp(TidyDoc ARG_UNUSED (tdoc),TidyNode tnod)1574 Bool TIDY_CALL tidyNodeIsProp( TidyDoc ARG_UNUSED(tdoc), TidyNode tnod )
1575 {
1576 Node* nimp = tidyNodeToImpl( tnod );
1577 Bool isProprietary = yes;
1578 if ( nimp )
1579 {
1580 switch ( nimp->type )
1581 {
1582 case RootNode:
1583 case DocTypeTag:
1584 case CommentTag:
1585 case XmlDecl:
1586 case ProcInsTag:
1587 case TextNode:
1588 case CDATATag:
1589 isProprietary = no;
1590 break;
1591
1592 case SectionTag:
1593 case AspTag:
1594 case JsteTag:
1595 case PhpTag:
1596 isProprietary = yes;
1597 break;
1598
1599 case StartTag:
1600 case EndTag:
1601 case StartEndTag:
1602 isProprietary = ( nimp->tag
1603 ? (nimp->tag->versions&VERS_PROPRIETARY)!=0
1604 : yes );
1605 break;
1606 }
1607 }
1608 return isProprietary;
1609 }
1610
tidyNodeGetId(TidyNode tnod)1611 TidyTagId TIDY_CALL tidyNodeGetId(TidyNode tnod)
1612 {
1613 Node* nimp = tidyNodeToImpl(tnod);
1614
1615 TidyTagId tagId = TidyTag_UNKNOWN;
1616 if (nimp && nimp->tag)
1617 tagId = nimp->tag->id;
1618
1619 return tagId;
1620 }
1621
1622
1623 /* Null for non-element nodes and all pure HTML
1624 cmbstr tidyNodeNsLocal( TidyNode tnod )
1625 {
1626 }
1627 cmbstr tidyNodeNsPrefix( TidyNode tnod )
1628 {
1629 }
1630 cmbstr tidyNodeNsUri( TidyNode tnod )
1631 {
1632 }
1633 */
1634
1635 /* Iterate over attribute values */
tidyAttrFirst(TidyNode tnod)1636 TidyAttr TIDY_CALL tidyAttrFirst( TidyNode tnod )
1637 {
1638 Node* nimp = tidyNodeToImpl( tnod );
1639 AttVal* attval = NULL;
1640 if ( nimp )
1641 attval = nimp->attributes;
1642 return tidyImplToAttr( attval );
1643 }
tidyAttrNext(TidyAttr tattr)1644 TidyAttr TIDY_CALL tidyAttrNext( TidyAttr tattr )
1645 {
1646 AttVal* attval = tidyAttrToImpl( tattr );
1647 AttVal* nxtval = NULL;
1648 if ( attval )
1649 nxtval = attval->next;
1650 return tidyImplToAttr( nxtval );
1651 }
1652
tidyAttrName(TidyAttr tattr)1653 ctmbstr TIDY_CALL tidyAttrName( TidyAttr tattr )
1654 {
1655 AttVal* attval = tidyAttrToImpl( tattr );
1656 ctmbstr anam = NULL;
1657 if ( attval )
1658 anam = attval->attribute;
1659 return anam;
1660 }
tidyAttrValue(TidyAttr tattr)1661 ctmbstr TIDY_CALL tidyAttrValue( TidyAttr tattr )
1662 {
1663 AttVal* attval = tidyAttrToImpl( tattr );
1664 ctmbstr aval = NULL;
1665 if ( attval )
1666 aval = attval->value;
1667 return aval;
1668 }
1669
1670 /* Null for pure HTML
1671 ctmbstr tidyAttrNsLocal( TidyAttr tattr )
1672 {
1673 }
1674 ctmbstr tidyAttrNsPrefix( TidyAttr tattr )
1675 {
1676 }
1677 ctmbstr tidyAttrNsUri( TidyAttr tattr )
1678 {
1679 }
1680 */
1681
tidyAttrGetId(TidyAttr tattr)1682 TidyAttrId TIDY_CALL tidyAttrGetId( TidyAttr tattr )
1683 {
1684 AttVal* attval = tidyAttrToImpl( tattr );
1685 TidyAttrId attrId = TidyAttr_UNKNOWN;
1686 if ( attval && attval->dict )
1687 attrId = attval->dict->id;
1688 return attrId;
1689 }
tidyAttrIsProp(TidyAttr tattr)1690 Bool TIDY_CALL tidyAttrIsProp( TidyAttr tattr )
1691 {
1692 AttVal* attval = tidyAttrToImpl( tattr );
1693 Bool isProprietary = yes;
1694 if ( attval )
1695 isProprietary = ( attval->dict
1696 ? (attval->dict->versions & VERS_PROPRIETARY) != 0
1697 : yes );
1698 return isProprietary;
1699 }
1700
1701 /*
1702 * local variables:
1703 * mode: c
1704 * indent-tabs-mode: nil
1705 * c-basic-offset: 4
1706 * eval: (c-set-offset 'substatement-open 0)
1707 * end:
1708 */
1709