1 #ifndef __TAGS_H__
2 #define __TAGS_H__
4 /**************************************************************************//**
5  * @file
6  * Recognize HTML Tags.
7  *
8  * The HTML tags are stored as 8 bit ASCII strings.
9  * Use lookupw() to find a tag given a wide char string.
10  *
11  * @author  HTACG, et al (consult git log)
12  *
13  * @copyright
14  *     Copyright (c) 1998-2017 World Wide Web Consortium (Massachusetts
15  *     Institute of Technology, European Research Consortium for Informatics
16  *     and Mathematics, Keio University) and HTACG.
17  * @par
18  *     All Rights Reserved.
19  * @par
20  *     See `tidy.h` for the complete license.
21  *
22  * @date Additional updates: consult git log
23  *
24  ******************************************************************************/
26 #include "forward.h"
27 #include "attrdict.h"
29 /** @addtogroup internal_api */
30 /** @{ */
33 /***************************************************************************//**
34  ** @defgroup tags_h HTML Tags
35  **
36  ** This module organizes all of Tidy's HTML tag operations, such as parsing
37  ** tags, defining tags, and user-defined tags.
38  **
39  ** @{
40  ******************************************************************************/
43 /** @name Basic Structures and Tag Operations.
44  ** These structures form the backbone of Tidy tag processing, and the
45  ** functions in this group provide basic operations with tags and nodes.
46  */
47 /** @{ */
50 /** This enumeration defines the types of user-defined tags that can be
51  ** created.
52  */
53 typedef enum
54 {
55     tagtype_null = 0,   /**< First item marker. */
56     tagtype_empty = 1,  /**< Tag is an empty element. */
57     tagtype_inline = 2, /**< Tag is an inline element. */
58     tagtype_block = 4,  /**< Tag is a block level element. */
59     tagtype_pre = 8     /**< Tag is a preformatted tag. */
60 } UserTagType;
63 /** This typedef describes a function to be used to parse HTML of a Tidy tag.
64  */
65 typedef void (Parser)( TidyDocImpl* doc, Node *node, GetTokenMode mode );
68 /** This typedef describes a function be be used to check the attributes
69  ** of a Tidy tag.
70  */
71 typedef void (CheckAttribs)( TidyDocImpl* doc, Node *node );
74 /** Defines a dictionary entry for a single Tidy tag, including all of the
75  ** relevant information that it requires.
76  */
77 struct _Dict
78 {
79     TidyTagId           id;       /**< Identifier for this tag. */
80     tmbstr              name;     /**< The tag name. */
81     uint                versions; /**< Accumulates potential HTML versions. See TY_(ConstrainVersion). */
82     AttrVersion const * attrvers; /**< Accumulates potential HTML versions for attributes. */
83     uint                model;    /**< Indicates the relevant content models for the tag. See lexer.h; there is no enum. */
84     Parser*             parser;   /**< Specifies the parser to use for this tag. */
85     CheckAttribs*       chkattrs; /**< Specifies the function to check this tag's attributes. */
86     Dict*               next;     /**< Link to next tag. */
87 };
90 /** This enum indicates the maximum size of the has table for tag hash lookup.
91  */
92 enum
93 {
94     ELEMENT_HASH_SIZE=178u  /**< Maximum number of tags in the hash table. */
95 };
98 /** This structure provide hash lookup for Tidy tags.
99  */
100 typedef struct _DictHash
101 {
102     Dict const*         tag;   /**< The current tag. */
103     struct _DictHash*   next;  /**< The next tag. */
104 } DictHash;
107 /** This structure consists of the lists of all tags known to Tidy.
108  */
109 typedef struct _TidyTagImpl
110 {
111     Dict* xml_tags;                        /**< Placeholder for all xml tags. */
112     Dict* declared_tag_list;               /**< User-declared tags. */
113     DictHash* hashtab[ELEMENT_HASH_SIZE];  /**< All of Tidy's built-in tags. */
114 } TidyTagImpl;
117 /** Coordinates Config update and Tags data.
118  ** @param doc The Tidy document.
119  ** @param opt The option the tag is intended for.
120  ** @param name The name of the new tag.
121  */
122 TY_PRIVATE void TY_(DeclareUserTag)( TidyDocImpl* doc, const TidyOptionImpl* opt, ctmbstr name );
125 /** Interface for finding a tag by TidyTagId.
126  ** @param tid The TidyTagId to search for.
127  ** @returns An instance of a Tidy tag.
128  */
129 TY_PRIVATE const Dict* TY_(LookupTagDef)( TidyTagId tid );
131 /** Assigns the node's tag.
132  ** @param doc The Tidy document.
133  ** @param node The node to assign the tag to.
134  ** @returns Returns a bool indicating whether or not the tag was assigned.
135  */
136 TY_PRIVATE Bool    TY_(FindTag)( TidyDocImpl* doc, Node *node );
139 /** Finds the parser function for a given node.
140  ** @param doc The Tidy document.
141  ** @param node The node to lookup.
142  ** @returns The parser for the given node.
143  */
144 TY_PRIVATE Parser* TY_(FindParser)( TidyDocImpl* doc, Node *node );
147 /** Defines a new user-defined tag.
148  ** @param doc The Tidy document.
149  ** @param tagType The type of user-defined tag to define.
150  ** @param name The name of the new tag.
151  */
152 TY_PRIVATE void    TY_(DefineTag)( TidyDocImpl* doc, UserTagType tagType, ctmbstr name );
155 /** Frees user-defined tags of the given type, or all user tags in given
156  ** `tagtype_null`.
157  ** @param doc The Tidy document.
158  ** @param tagType The type of tag to free, or `tagtype_null` to free all
159  **        user-defined tags.
160  */
161 TY_PRIVATE void    TY_(FreeDeclaredTags)( TidyDocImpl* doc, UserTagType tagType );
164 /** Initiates an iterator for a list of user-declared tags, including autonomous
165  ** custom tags detected in the document if @ref TidyUseCustomTags is not set to
166  ** **no**.
167  ** @param doc An instance of a TidyDocImp to query.
168  ** @result Returns a TidyIterator, which is a token used to represent the
169  **         current position in a list within LibTidy.
170  */
171 TY_PRIVATE TidyIterator   TY_(GetDeclaredTagList)( TidyDocImpl* doc );
174 /** Given a valid TidyIterator initiated with TY_(GetDeclaredTagList)(),
175  ** returns a string representing a user-declared or autonomous custom tag.
176  ** @remark Specifying tagType limits the scope of the tags to one of
177  **         @ref UserTagType types. Note that autonomous custom tags (if used)
178  **         are added to one of these option types, depending on the value of
179  **         @ref TidyUseCustomTags.
180  ** @param doc The Tidy document.
181  ** @param tagType The type of tag to iterate through.
182  ** @param iter The iterator token provided initially by
183  **        TY_(GetDeclaredTagList)().
184  ** @result A string containing the next tag.
185  */
186 TY_PRIVATE ctmbstr        TY_(GetNextDeclaredTag)( TidyDocImpl* doc, UserTagType tagType,
187                                         TidyIterator* iter );
190 /** Initializes tags and tag structures for the given Tidy document.
191  ** @param doc The Tidy document.
192  */
193 TY_PRIVATE void TY_(InitTags)( TidyDocImpl* doc );
196 /** Frees the tags and structures used by Tidy for tags.
197  ** @param doc The Tidy document.
198  */
199 TY_PRIVATE void TY_(FreeTags)( TidyDocImpl* doc );
202 /** Tidy defaults to HTML5 mode. If the <!DOCTYPE ...> is found to NOT be
203  ** HTML5, then adjust the tags table to HTML4 mode.
204  ** @param doc The Tidy document.
205  */
206 TY_PRIVATE void TY_(AdjustTags)( TidyDocImpl *doc );
209 /** Reset the tags table back to default HTML5 mode.
210  ** @param doc The Tidy document.
211  */
212 TY_PRIVATE void TY_(ResetTags)( TidyDocImpl *doc );
215 /** Indicates whether or not the Tidy is procesing in HTML5 mode.
216  ** @param doc The Tidy document.
217  ** @returns Returns `yes` if processing in HTML5 mode.
218  */
219 TY_PRIVATE Bool TY_(IsHTML5Mode)( TidyDocImpl *doc );
222 /** @} */
223 /** @name Parser Methods And Attribute Checker Functions for Tags
224  ** These functions define the parsers and attribute checking functions for
225  ** each of Tidy's tags.
226  */
227 /** @{ */
230 TY_PRIVATE Parser TY_(ParseHTML);
231 TY_PRIVATE Parser TY_(ParseHead);
232 TY_PRIVATE Parser TY_(ParseTitle);
233 TY_PRIVATE Parser TY_(ParseScript);
234 TY_PRIVATE Parser TY_(ParseFrameSet);
235 TY_PRIVATE Parser TY_(ParseNoFrames);
236 TY_PRIVATE Parser TY_(ParseBody);
237 TY_PRIVATE Parser TY_(ParsePre);
238 TY_PRIVATE Parser TY_(ParseList);
239 TY_PRIVATE Parser TY_(ParseDefList);
240 TY_PRIVATE Parser TY_(ParseBlock);
241 TY_PRIVATE Parser TY_(ParseInline);
242 TY_PRIVATE Parser TY_(ParseEmpty);
243 TY_PRIVATE Parser TY_(ParseTableTag);
244 TY_PRIVATE Parser TY_(ParseColGroup);
245 TY_PRIVATE Parser TY_(ParseRowGroup);
246 TY_PRIVATE Parser TY_(ParseRow);
247 TY_PRIVATE Parser TY_(ParseSelect);
248 TY_PRIVATE Parser TY_(ParseOptGroup);
249 TY_PRIVATE Parser TY_(ParseText);
250 TY_PRIVATE Parser TY_(ParseDatalist);
251 TY_PRIVATE Parser TY_(ParseNamespace);
253 TY_PRIVATE CheckAttribs TY_(CheckAttributes);
256 /** @} */
257 /** @name Other Tag and Node Lookup Functions
258  ** These functions perform additional lookup on tags and nodes.
259  */
260 /** @{ */
263 /** Gets the TidyTagId of the given node. 0 == TidyTag_UNKNOWN.
264  */
265 #define TagId(node)        ((node) && (node)->tag ? (node)->tag->id : TidyTag_UNKNOWN)
268 /** Determines if the given node is of the given tag id type.
269  */
270 #define TagIsId(node, tid) ((node) && (node)->tag && (node)->tag->id == tid)
273 /** Inquires whether or not the given node is a text node.
274  ** @param node The node being interrogated.
275  ** @returns The status of the inquiry.
276  */
277 TY_PRIVATE Bool TY_(nodeIsText)( Node* node );
280 /** Inquires whether or not the given node is an element node.
281  ** @param node The node being interrogated.
282  ** @returns The status of the inquiry.
283  */
284 TY_PRIVATE Bool TY_(nodeIsElement)( Node* node );
287 /** Inquires whether or not the given node has any text.
288  ** @param doc The Tidy document.
289  ** @param node The node being interrogated.
290  ** @returns The status of the inquiry.
291  */
292 TY_PRIVATE Bool TY_(nodeHasText)( TidyDocImpl* doc, Node* node );
295 /** Inquires whether the given element looks like it's an autonomous custom
296  ** element tag.
297  ** @param element A string to be checked.
298  ** @returns The status of the inquiry.
299  */
300 TY_PRIVATE Bool TY_(elementIsAutonomousCustomFormat)( ctmbstr element );
303 /** Inquires whether the given node looks like it's an autonomous custom
304  ** element tag.
305  ** @param node The node being interrogated.
306  ** @returns The status of the inquiry.
307  */
308 TY_PRIVATE Bool TY_(nodeIsAutonomousCustomFormat)( Node* node );
311 /** True if the node looks like it's an autonomous custom element tag, and
312  ** TidyCustomTags is not disabled, and we're in HTML5 mode, which are all
313  ** requirements for valid autonomous custom tags.
314  ** @param doc The Tidy document.
315  ** @param node The node being interrogated.
316  ** @returns The status of the inquiry.
317  */
318 TY_PRIVATE Bool TY_(nodeIsAutonomousCustomTag)( TidyDocImpl* doc, Node* node );
321 /** Does the node have the indicated content model? True if any of the bits
322  ** requested are set.
323  ** @param node The node being interrogated.
324  ** @param contentModel The content model to check against.
325  ** @returns The status of the inquiry.
326  */
327 TY_PRIVATE Bool TY_(nodeHasCM)( Node* node, uint contentModel );
330 /** Does the content model of the node include block?
331  ** @param node The node being interrogated.
332  ** @returns The status of the inquiry.
333  */
334 TY_PRIVATE Bool TY_(nodeCMIsBlock)( Node* node );
337 /** Does the content model of the node include inline?
338  ** @param node The node being interrogated.
339  ** @returns The status of the inquiry.
340  */
341 TY_PRIVATE Bool TY_(nodeCMIsInline)( Node* node );
344 /** Does the content model of the node include empty?
345  ** @param node The node being interrogated.
346  ** @returns The status of the inquiry.
347  */
348 TY_PRIVATE Bool TY_(nodeCMIsEmpty)( Node* node );
351 /** Is the node a header, such as H1, H2, ..., H6?
352  ** @param node The node being interrogated.
353  ** @returns The status of the inquiry.
354  */
355 TY_PRIVATE Bool TY_(nodeIsHeader)( Node* node );
358 /** Inquires as to the header level of the given node: 1, 2, ..., 6.
359  ** @param node The node being interrogated.
360  ** @returns The header level.
361  */
362 TY_PRIVATE uint TY_(nodeHeaderLevel)( Node* node );
365 #define nodeIsHTML( node )       TagIsId( node, TidyTag_HTML )
366 #define nodeIsHEAD( node )       TagIsId( node, TidyTag_HEAD )
367 #define nodeIsTITLE( node )      TagIsId( node, TidyTag_TITLE )
368 #define nodeIsBASE( node )       TagIsId( node, TidyTag_BASE )
369 #define nodeIsMETA( node )       TagIsId( node, TidyTag_META )
370 #define nodeIsBODY( node )       TagIsId( node, TidyTag_BODY )
371 #define nodeIsFRAMESET( node )   TagIsId( node, TidyTag_FRAMESET )
372 #define nodeIsFRAME( node )      TagIsId( node, TidyTag_FRAME )
373 #define nodeIsIFRAME( node )     TagIsId( node, TidyTag_IFRAME )
374 #define nodeIsNOFRAMES( node )   TagIsId( node, TidyTag_NOFRAMES )
375 #define nodeIsHR( node )         TagIsId( node, TidyTag_HR )
376 #define nodeIsH1( node )         TagIsId( node, TidyTag_H1 )
377 #define nodeIsH2( node )         TagIsId( node, TidyTag_H2 )
378 #define nodeIsPRE( node )        TagIsId( node, TidyTag_PRE )
379 #define nodeIsLISTING( node )    TagIsId( node, TidyTag_LISTING )
380 #define nodeIsP( node )          TagIsId( node, TidyTag_P )
381 #define nodeIsUL( node )         TagIsId( node, TidyTag_UL )
382 #define nodeIsOL( node )         TagIsId( node, TidyTag_OL )
383 #define nodeIsDL( node )         TagIsId( node, TidyTag_DL )
384 #define nodeIsDIR( node )        TagIsId( node, TidyTag_DIR )
385 #define nodeIsLI( node )         TagIsId( node, TidyTag_LI )
386 #define nodeIsDT( node )         TagIsId( node, TidyTag_DT )
387 #define nodeIsDD( node )         TagIsId( node, TidyTag_DD )
388 #define nodeIsTABLE( node )      TagIsId( node, TidyTag_TABLE )
389 #define nodeIsCAPTION( node )    TagIsId( node, TidyTag_CAPTION )
390 #define nodeIsTD( node )         TagIsId( node, TidyTag_TD )
391 #define nodeIsTH( node )         TagIsId( node, TidyTag_TH )
392 #define nodeIsTR( node )         TagIsId( node, TidyTag_TR )
393 #define nodeIsCOL( node )        TagIsId( node, TidyTag_COL )
394 #define nodeIsCOLGROUP( node )   TagIsId( node, TidyTag_COLGROUP )
395 #define nodeIsBR( node )         TagIsId( node, TidyTag_BR )
396 #define nodeIsA( node )          TagIsId( node, TidyTag_A )
397 #define nodeIsLINK( node )       TagIsId( node, TidyTag_LINK )
398 #define nodeIsB( node )          TagIsId( node, TidyTag_B )
399 #define nodeIsI( node )          TagIsId( node, TidyTag_I )
400 #define nodeIsSTRONG( node )     TagIsId( node, TidyTag_STRONG )
401 #define nodeIsEM( node )         TagIsId( node, TidyTag_EM )
402 #define nodeIsBIG( node )        TagIsId( node, TidyTag_BIG )
403 #define nodeIsSMALL( node )      TagIsId( node, TidyTag_SMALL )
404 #define nodeIsPARAM( node )      TagIsId( node, TidyTag_PARAM )
405 #define nodeIsOPTION( node )     TagIsId( node, TidyTag_OPTION )
406 #define nodeIsOPTGROUP( node )   TagIsId( node, TidyTag_OPTGROUP )
407 #define nodeIsIMG( node )        TagIsId( node, TidyTag_IMG )
408 #define nodeIsMAP( node )        TagIsId( node, TidyTag_MAP )
409 #define nodeIsAREA( node )       TagIsId( node, TidyTag_AREA )
410 #define nodeIsNOBR( node )       TagIsId( node, TidyTag_NOBR )
411 #define nodeIsWBR( node )        TagIsId( node, TidyTag_WBR )
412 #define nodeIsFONT( node )       TagIsId( node, TidyTag_FONT )
413 #define nodeIsLAYER( node )      TagIsId( node, TidyTag_LAYER )
414 #define nodeIsSPACER( node )     TagIsId( node, TidyTag_SPACER )
415 #define nodeIsCENTER( node )     TagIsId( node, TidyTag_CENTER )
416 #define nodeIsSTYLE( node )      TagIsId( node, TidyTag_STYLE )
417 #define nodeIsSCRIPT( node )     TagIsId( node, TidyTag_SCRIPT )
418 #define nodeIsNOSCRIPT( node )   TagIsId( node, TidyTag_NOSCRIPT )
419 #define nodeIsFORM( node )       TagIsId( node, TidyTag_FORM )
420 #define nodeIsTEXTAREA( node )   TagIsId( node, TidyTag_TEXTAREA )
421 #define nodeIsBLOCKQUOTE( node ) TagIsId( node, TidyTag_BLOCKQUOTE )
422 #define nodeIsAPPLET( node )     TagIsId( node, TidyTag_APPLET )
423 #define nodeIsOBJECT( node )     TagIsId( node, TidyTag_OBJECT )
424 #define nodeIsDIV( node )        TagIsId( node, TidyTag_DIV )
425 #define nodeIsSPAN( node )       TagIsId( node, TidyTag_SPAN )
426 #define nodeIsINPUT( node )      TagIsId( node, TidyTag_INPUT )
427 #define nodeIsQ( node )          TagIsId( node, TidyTag_Q )
428 #define nodeIsLABEL( node )      TagIsId( node, TidyTag_LABEL )
429 #define nodeIsH3( node )         TagIsId( node, TidyTag_H3 )
430 #define nodeIsH4( node )         TagIsId( node, TidyTag_H4 )
431 #define nodeIsH5( node )         TagIsId( node, TidyTag_H5 )
432 #define nodeIsH6( node )         TagIsId( node, TidyTag_H6 )
433 #define nodeIsADDRESS( node )    TagIsId( node, TidyTag_ADDRESS )
434 #define nodeIsXMP( node )        TagIsId( node, TidyTag_XMP )
435 #define nodeIsSELECT( node )     TagIsId( node, TidyTag_SELECT )
436 #define nodeIsBLINK( node )      TagIsId( node, TidyTag_BLINK )
437 #define nodeIsMARQUEE( node )    TagIsId( node, TidyTag_MARQUEE )
438 #define nodeIsEMBED( node )      TagIsId( node, TidyTag_EMBED )
439 #define nodeIsBASEFONT( node )   TagIsId( node, TidyTag_BASEFONT )
440 #define nodeIsISINDEX( node )    TagIsId( node, TidyTag_ISINDEX )
441 #define nodeIsS( node )          TagIsId( node, TidyTag_S )
442 #define nodeIsSTRIKE( node )     TagIsId( node, TidyTag_STRIKE )
443 #define nodeIsSUB( node )        TagIsId( node, TidyTag_SUB )
444 #define nodeIsSUP( node )        TagIsId( node, TidyTag_SUP )
445 #define nodeIsU( node )          TagIsId( node, TidyTag_U )
446 #define nodeIsMENU( node )       TagIsId( node, TidyTag_MENU )
447 #define nodeIsMAIN( node )       TagIsId( node, TidyTag_MAIN )
448 #define nodeIsBUTTON( node )     TagIsId( node, TidyTag_BUTTON )
449 #define nodeIsCANVAS( node )     TagIsId( node, TidyTag_CANVAS )
450 #define nodeIsPROGRESS( node )   TagIsId( node, TidyTag_PROGRESS )
452 #define nodeIsINS( node )        TagIsId( node, TidyTag_INS )
453 #define nodeIsDEL( node )        TagIsId( node, TidyTag_DEL )
455 #define nodeIsSVG( node )        TagIsId( node, TidyTag_SVG )
457 /* HTML5 */
458 #define nodeIsDATALIST( node )   TagIsId( node, TidyTag_DATALIST )
459 #define nodeIsDATA( node )       TagIsId( node, TidyTag_DATA )
460 #define nodeIsMATHML( node )     TagIsId( node, TidyTag_MATHML ) /* #130 MathML attr and entity fix! */
462 /* NOT in HTML 5 */
463 #define nodeIsACRONYM( node )    TagIsId( node, TidyTag_ACRONYM )
464 #define nodesIsFRAME( node )     TagIsId( node, TidyTag_FRAME )
465 #define nodeIsTT( node )         TagIsId( node, TidyTag_TT )
468 /** @} name */
469 /** @} tags_h group */
470 /** @} internal_api addtogroup */
473 #endif /* __TAGS_H__ */