1 /* 2 * Copyright (c) 2002, Jon Travis 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 */ 26 27 #ifndef EKHTML_DOT_H 28 #define EKHTML_DOT_H 29 30 #include <stdio.h> 31 32 #ifdef __cplusplus 33 extern "C" { 34 #endif 35 36 /*! 37 * \file ekhtml.h 38 * \brief Main El-Kabong header file. 39 * 40 * This header defines everything that a program should need to use 41 * the El-Kabong library. 42 */ 43 44 /** 45 * A string object, which is not NUL terminated. 46 * For speed reasons, El-Kabong does not deal with zero-terminated 47 * strings. 48 */ 49 50 typedef struct ekhtml_string_t { 51 const char *str; /**< Actual string data */ 52 size_t len; /**< Length of the data in `str` */ 53 } ekhtml_string_t; 54 55 /** 56 * Attribute object, passed into callbacks. 57 * When ekhtml parses tags containing key/value attributes, it will pass 58 * this structure representing those values into the callbacks. Note, for 59 * speed reasons, things such as the 'name' and 'value' fields are not 60 * terminated with '\0', and therefore have an associated length 61 * field (namelen, vallen). 62 */ 63 64 typedef struct ekhtml_attr_t { 65 ekhtml_string_t name; /**< Name of the attribute */ 66 ekhtml_string_t val; /**< Value of the attribute */ 67 unsigned int isBoolean; /**< True of the attribute is boolean */ 68 struct ekhtml_attr_t *next; /**< Pointer to next attribute in the list */ 69 } ekhtml_attr_t; 70 71 /* 72 * Typedefs for function callback types 73 */ 74 75 /** 76 * The parser object. 77 * The parser object holds state information, such as which callbacks 78 * to invoke when reading tags, how much data is being processed, etc. 79 */ 80 81 typedef struct ekhtml_parser_t ekhtml_parser_t; 82 83 /** 84 * Callback for simple data. 85 * Callback functions of this form are used to process data which is 86 * not part of a start or end tag. This callback may also be used 87 * to process the body of comment tags. 88 * 89 * I.e. <FOO>data_to_process</FOO> 90 * The data passed into the callback function will be "data_to_process" 91 * 92 * @param cbdata Callback data, as previously set by ekhtml_parser_cbdata_set 93 * @param data A pointer to the data in-between tags. 94 * 95 * @see ekhtml_parser_cbdata_set() 96 * @see ekhtml_parser_datacb_set() 97 */ 98 99 typedef void (*ekhtml_data_cb_t)(void *cbdata, ekhtml_string_t *data); 100 101 /** 102 * Callback for start tags. 103 * Callback functions of this form are used to process start tags. 104 * 105 * I.e. <FOO>data_to_process</FOO> 106 * The tag passed into the callback will be "FOO" with a length of 3. 107 * 108 * @param cbdata Callback data, as previously set by ekhtml_parser_cbdata_set 109 * @param tag A pointer to tag name. This is a traditional NUL terminated 110 * string. 111 * @param attrs Attributes of the tag. 112 * 113 * @see ekhtml_parser_cbdata_set() 114 * @see ekhtml_parser_startcb_add() 115 */ 116 117 typedef void (*ekhtml_starttag_cb_t)(void *cbdata, ekhtml_string_t *tag, 118 ekhtml_attr_t *attrs); 119 120 /** 121 * Callback for end tags. 122 * Callback functions of this form are used to process end tags. 123 * 124 * I.e. <FOO>data_to_process</FOO> 125 * The tag passed into the callback will be "FOO" with a length of 3. 126 * 127 * @param cbdata Callback data, as previously set by ekhtml_parser_cbdata_set 128 * @param tag A pointer to tag name. This is a traditional NUL terminated 129 * string. 130 * 131 * @see ekhtml_parser_cbdata_set() 132 * @see ekhtml_parser_endcb_add() 133 */ 134 135 typedef void (*ekhtml_endtag_cb_t)(void *cbdata, ekhtml_string_t *tag); 136 137 /** 138 * Create a new parser object. 139 * This routine creates a new parser object, with no set callback 140 * functions or state. 141 * 142 * @param cbdata Callback data to use when invoking callbacks 143 * 144 * @returns A new ekhtml_parser_t object 145 * 146 * @see ekhtml_parser_cbdata_set() 147 */ 148 149 extern ekhtml_parser_t *ekhtml_parser_new(void *cbdata); 150 151 /** 152 * Destroys a parser object and all memory associated with it. 153 * After calling this routine, the parser should no longer be 154 * used, as any results would be undefined. 155 * 156 * @param parser The parser to destroy 157 * 158 * @see ekhtml_parser_new() 159 */ 160 161 extern void ekhtml_parser_destroy(ekhtml_parser_t *parser); 162 163 /** 164 * Set the callback data for the parser. 165 * This routine sets the callback data which is passed to set callbacks. 166 * 167 * @param parser Parser to set the callback data for 168 * @param cbdata Callback data the parser should use to pass to callbacks 169 */ 170 171 extern void ekhtml_parser_cbdata_set(ekhtml_parser_t *parser, void *cbdata); 172 173 /** 174 * Set the parser's data callback. 175 * This routine sets the callback which should be invoked for 176 * non-tagged data. 177 * 178 * @param parser Parser to set the callback for 179 * @param cb Callback to invoke when processing non-tagged data 180 */ 181 182 extern void ekhtml_parser_datacb_set(ekhtml_parser_t *parser, 183 ekhtml_data_cb_t cb); 184 185 /** 186 * Set the parser's comment callback. 187 * This routine sets the callback which should be invoked when 188 * the parser processes a comment. 189 * 190 * @param parser Parser to set the callback for 191 * @param cb Callback to invoke when processing a comment 192 */ 193 194 extern void ekhtml_parser_commentcb_set(ekhtml_parser_t *parser, 195 ekhtml_data_cb_t cb); 196 197 /** 198 * Feed data for the parser to process. 199 * Feed data into the HTML parser. This routine will fill up the 200 * internal buffer until it can go no more, then flush the data 201 * and refill. If there is more data that is required than the 202 * internal buffer can hold, it will be resized 203 * 204 * @param parser Parser to feed data to 205 * @param data Data to feed to the parser 206 */ 207 208 extern void ekhtml_parser_feed(ekhtml_parser_t *parser, 209 ekhtml_string_t *data); 210 211 /** 212 * Flush the parser innards. 213 * When this function is invoked, the parser will flush all data that is 214 * currently held, and any remaining state is saved. All data which is 215 * processed is removed from the parser, and the internal buffer is 216 * reshuffled. 217 * 218 * @param parser Parser to flush 219 * @param flushall If true, will flush all data, even if tags are not 220 * complete (i.e. "<FO") 221 * @returns 1 if action was taken (i.e. bytes were processed and the 222 * internal buffer was reshuffled) else 0 223 */ 224 225 extern int ekhtml_parser_flush(ekhtml_parser_t *parser, int flushall); 226 227 /** 228 * Add a callback for a start tag. 229 * This routine sets the callback which should be invoked when 230 * the parser processes a start tag. Both specific tags, and 231 * unknown tags can be used with this method. 232 * 233 * @param parser Parser to set the callback for 234 * @param tag Name of the tag to call `cb` for. If `tag` is NULL, then 235 * any tags which are unknown to the parser will be sent 236 * to the callback specified by `cb`. 237 * @param cb Callback to invoke 238 */ 239 240 extern void ekhtml_parser_startcb_add(ekhtml_parser_t *parser, const char *tag, 241 ekhtml_starttag_cb_t cb); 242 243 /** 244 * Add a callback for an end tag. 245 * This routine sets the callback which should be invoked when 246 * the parser processes an end tag. Both specific tags, and 247 * unknown tags can be used with this method. 248 * 249 * @param parser Parser to set the callback for 250 * @param tag Name of the tag to call `cb` for. If `tag` is NULL, then 251 * any tags which are unknown to the parser will be sent 252 * to the callback specified by `cb`. 253 * @param cb Callback to invoke 254 */ 255 256 extern void ekhtml_parser_endcb_add(ekhtml_parser_t *parser, const char *tag, 257 ekhtml_endtag_cb_t cb); 258 259 /** EKHTML_BLOCKSIZE = # of blocks to allocate per chunk */ 260 #define EKHTML_BLOCKSIZE (1024 * 4) 261 262 #ifdef __cplusplus 263 } 264 #endif 265 266 #endif 267