1 /*
2  * Copyright (c) 2002, Jon Travis
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 
27 #ifndef EKHTML_DOT_H
28 #define EKHTML_DOT_H
29 
30 #include <stdio.h>
31 
32 #ifdef __cplusplus
33 extern "C" {
34 #endif
35 
36 /*!
37  * \file ekhtml.h
38  * \brief Main El-Kabong header file.
39  *
40  * This header defines everything that a program should need to use
41  * the El-Kabong library.
42  */
43 
44 /**
45  * A string object, which is not NUL terminated.
46  * For speed reasons, El-Kabong does not deal with zero-terminated
47  * strings.
48  */
49 
50 typedef struct ekhtml_string_t {
51     const char *str;    /**< Actual string data          */
52     size_t      len;    /**< Length of the data in `str` */
53 } ekhtml_string_t;
54 
55 /**
56  * Attribute object, passed into callbacks.
57  * When ekhtml parses tags containing key/value attributes, it will pass
58  * this structure representing those values into the callbacks.  Note, for
59  * speed reasons, things such as the 'name' and 'value' fields are not
60  * terminated with '\0', and therefore have an associated length
61  * field (namelen, vallen).
62  */
63 
64 typedef struct ekhtml_attr_t {
65     ekhtml_string_t       name;       /**< Name of the attribute             */
66     ekhtml_string_t       val;        /**< Value of the attribute            */
67     unsigned int          isBoolean;  /**< True of the attribute is boolean  */
68     struct ekhtml_attr_t *next;  /**< Pointer to next attribute in the list  */
69 } ekhtml_attr_t;
70 
71 /*
72  * Typedefs for function callback types
73  */
74 
75 /**
76  * The parser object.
77  * The parser object holds state information, such as which callbacks
78  * to invoke when reading tags, how much data is being processed, etc.
79  */
80 
81 typedef struct ekhtml_parser_t ekhtml_parser_t;
82 
83 /**
84  * Callback for simple data.
85  * Callback functions of this form are used to process data which is
86  * not part of a start or end tag.  This callback may also be used
87  * to process the body of comment tags.
88  *
89  * I.e. <FOO>data_to_process</FOO>
90  * The data passed into the callback function will be "data_to_process"
91  *
92  * @param cbdata Callback data, as previously set by ekhtml_parser_cbdata_set
93  * @param data   A pointer to the data in-between tags.
94  *
95  * @see ekhtml_parser_cbdata_set()
96  * @see ekhtml_parser_datacb_set()
97  */
98 
99 typedef void (*ekhtml_data_cb_t)(void *cbdata, ekhtml_string_t *data);
100 
101 /**
102  * Callback for start tags.
103  * Callback functions of this form are used to process start tags.
104  *
105  * I.e. <FOO>data_to_process</FOO>
106  * The tag passed into the callback will be "FOO" with a length of 3.
107  *
108  * @param cbdata Callback data, as previously set by ekhtml_parser_cbdata_set
109  * @param tag    A pointer to tag name.  This is a traditional NUL terminated
110  *               string.
111  * @param attrs  Attributes of the tag.
112  *
113  * @see ekhtml_parser_cbdata_set()
114  * @see ekhtml_parser_startcb_add()
115  */
116 
117 typedef void (*ekhtml_starttag_cb_t)(void *cbdata, ekhtml_string_t *tag,
118 				     ekhtml_attr_t *attrs);
119 
120 /**
121  * Callback for end tags.
122  * Callback functions of this form are used to process end tags.
123  *
124  * I.e. <FOO>data_to_process</FOO>
125  * The tag passed into the callback will be "FOO" with a length of 3.
126  *
127  * @param cbdata Callback data, as previously set by ekhtml_parser_cbdata_set
128  * @param tag    A pointer to tag name.  This is a traditional NUL terminated
129  *               string.
130  *
131  * @see ekhtml_parser_cbdata_set()
132  * @see ekhtml_parser_endcb_add()
133  */
134 
135 typedef void (*ekhtml_endtag_cb_t)(void *cbdata, ekhtml_string_t *tag);
136 
137 /**
138  * Create a new parser object.
139  * This routine creates a new parser object, with no set callback
140  * functions or state.
141  *
142  * @param cbdata  Callback data to use when invoking callbacks
143  *
144  * @returns A new ekhtml_parser_t object
145  *
146  * @see ekhtml_parser_cbdata_set()
147  */
148 
149 extern ekhtml_parser_t *ekhtml_parser_new(void *cbdata);
150 
151 /**
152  * Destroys a parser object and all memory associated with it.
153  * After calling this routine, the parser should no longer be
154  * used, as any results would be undefined.
155  *
156  * @param parser  The parser to destroy
157  *
158  * @see ekhtml_parser_new()
159  */
160 
161 extern void ekhtml_parser_destroy(ekhtml_parser_t *parser);
162 
163 /**
164  * Set the callback data for the parser.
165  * This routine sets the callback data which is passed to set callbacks.
166  *
167  * @param parser  Parser to set the callback data for
168  * @param cbdata  Callback data the parser should use to pass to callbacks
169  */
170 
171 extern void ekhtml_parser_cbdata_set(ekhtml_parser_t *parser, void *cbdata);
172 
173 /**
174  * Set the parser's data callback.
175  * This routine sets the callback which should be invoked for
176  * non-tagged data.
177  *
178  * @param parser  Parser to set the callback for
179  * @param cb      Callback to invoke when processing non-tagged data
180  */
181 
182 extern void ekhtml_parser_datacb_set(ekhtml_parser_t *parser,
183                                      ekhtml_data_cb_t cb);
184 
185 /**
186  * Set the parser's comment callback.
187  * This routine sets the callback which should be invoked when
188  * the parser processes a comment.
189  *
190  * @param parser  Parser to set the callback for
191  * @param cb      Callback to invoke when processing a comment
192  */
193 
194 extern void ekhtml_parser_commentcb_set(ekhtml_parser_t *parser,
195                                         ekhtml_data_cb_t cb);
196 
197 /**
198  * Feed data for the parser to process.
199  * Feed data into the HTML parser.  This routine will fill up the
200  * internal buffer until it can go no more, then flush the data
201  * and refill.  If there is more data that is required than the
202  * internal buffer can hold, it will be resized
203  *
204  * @param parser  Parser to feed data to
205  * @param data    Data to feed to the parser
206  */
207 
208 extern void ekhtml_parser_feed(ekhtml_parser_t *parser,
209                                ekhtml_string_t *data);
210 
211 /**
212  * Flush the parser innards.
213  * When this function is invoked, the parser will flush all data that is
214  * currently held, and any remaining state is saved.  All data which is
215  * processed is removed from the parser, and the internal buffer is
216  * reshuffled.
217  *
218  * @param parser   Parser to flush
219  * @param flushall If true, will flush all data, even if tags are not
220  *                 complete (i.e. "<FO")
221  * @returns 1 if action was taken (i.e. bytes were processed and the
222  *          internal buffer was reshuffled) else 0
223  */
224 
225 extern int ekhtml_parser_flush(ekhtml_parser_t *parser, int flushall);
226 
227 /**
228  * Add a callback for a start tag.
229  * This routine sets the callback which should be invoked when
230  * the parser processes a start tag.  Both specific tags, and
231  * unknown tags can be used with this method.
232  *
233  * @param parser  Parser to set the callback for
234  * @param tag     Name of the tag to call `cb` for.  If `tag` is NULL, then
235  *                any tags which are unknown to the parser will be sent
236  *                to the callback specified by `cb`.
237  * @param cb      Callback to invoke
238  */
239 
240 extern void ekhtml_parser_startcb_add(ekhtml_parser_t *parser, const char *tag,
241 				      ekhtml_starttag_cb_t cb);
242 
243 /**
244  * Add a callback for an end tag.
245  * This routine sets the callback which should be invoked when
246  * the parser processes an end tag.  Both specific tags, and
247  * unknown tags can be used with this method.
248  *
249  * @param parser  Parser to set the callback for
250  * @param tag     Name of the tag to call `cb` for.  If `tag` is NULL, then
251  *                any tags which are unknown to the parser will be sent
252  *                to the callback specified by `cb`.
253  * @param cb      Callback to invoke
254  */
255 
256 extern void ekhtml_parser_endcb_add(ekhtml_parser_t *parser, const char *tag,
257 				    ekhtml_endtag_cb_t cb);
258 
259 /** EKHTML_BLOCKSIZE = # of blocks to allocate per chunk */
260 #define EKHTML_BLOCKSIZE (1024 * 4)
261 
262 #ifdef __cplusplus
263 }
264 #endif
265 
266 #endif
267