1 #ifndef XML_TOK_H 2 3 /* xml/html tokenizer (c) �yvind Kol�s 2002 */ 4 5 #define inbufsize 4096 6 #define outbufsize 4096 7 8 enum { 9 t_none, 10 t_whitespace, 11 t_prolog, 12 t_dtd, 13 t_comment, 14 t_word, 15 t_tag, 16 t_closetag, 17 t_closeemptytag, 18 t_endtag, 19 t_att, 20 t_val, 21 t_eof, 22 t_entity, 23 t_error 24 }; 25 26 27 typedef struct { 28 FILE *file_in; 29 int state; 30 char rbuf[outbufsize]; 31 char curtag[outbufsize]; 32 int c; 33 int c_held; 34 35 36 unsigned char inbuf[inbufsize]; 37 int inbuflen; 38 int inbufpos; 39 40 int line_no; 41 } xml_tok_state; 42 43 xml_tok_state *xml_tok_init (FILE * file_in); 44 void xml_tok_cleanup (xml_tok_state * t); 45 46 /* get an xml token 47 * @param data pointer to pointer to string where the result * ing data should be put 48 * @return token type 49 * 50 */ 51 int xml_tok_get (xml_tok_state * t, char **data); 52 53 /** get a html token 54 * same as above, but tries to balance bad html parse 55 * trees, tries to do an automatic transformation from 56 * html to xhtml 57 */ 58 int html_tok_get (xml_tok_state * t, char **data); 59 60 #define XML_TOK_H 61 #endif /*XML_TOK_H */ 62