1 #ifndef XML_TOK_H
2 
3 /* xml/html tokenizer (c) �yvind Kol�s 2002 */
4 
5 #define inbufsize 4096
6 #define outbufsize 4096
7 
8 enum {
9 	t_none,
10 	t_whitespace,
11 	t_prolog,
12 	t_dtd,
13 	t_comment,
14 	t_word,
15 	t_tag,
16 	t_closetag,
17 	t_closeemptytag,
18 	t_endtag,
19 	t_att,
20 	t_val,
21 	t_eof,
22 	t_entity,
23 	t_error
24 };
25 
26 
27 typedef struct {
28 	FILE *file_in;
29 	int state;
30 	char rbuf[outbufsize];
31 	char curtag[outbufsize];
32 	int c;
33 	int c_held;
34 
35 
36 	unsigned char inbuf[inbufsize];
37 	int inbuflen;
38 	int inbufpos;
39 
40 	int line_no;
41 } xml_tok_state;
42 
43 xml_tok_state *xml_tok_init (FILE * file_in);
44 void xml_tok_cleanup (xml_tok_state * t);
45 
46 /*	get an xml token
47  *	@param data pointer to pointer to string where the result *             ing data should be put
48  * @return token type
49  *
50  */
51 int xml_tok_get (xml_tok_state * t, char **data);
52 
53 /** get a html token
54  * same as above, but tries to balance bad html parse
55  * trees, tries to do an automatic transformation from
56  * html to xhtml
57  */
58 int html_tok_get (xml_tok_state * t, char **data);
59 
60 #define XML_TOK_H
61 #endif /*XML_TOK_H */
62