1 /*
2 * Copyright 2001 Niels Provos <provos@citi.umich.edu>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 * must display the following acknowledgement:
15 * This product includes software developed by Niels Provos.
16 * 4. The name of the author may not be used to endorse or promote products
17 * derived from this software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30
31 #include <sys/types.h>
32 #include <sys/queue.h>
33 #include <sys/time.h>
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37
38 #include "config.h"
39
40 #include <event.h>
41
42 #include "http.h"
43 #include "html.h"
44 #include "util.h"
45
46 char **
html_attr_find(char ** attr,char * name)47 html_attr_find(char **attr, char *name)
48 {
49 while (*attr != NULL) {
50 if (!strcasecmp(*attr, name))
51 break;
52
53 attr += 2;
54 }
55
56 return (attr);
57 }
58
59 void
html_free_cb(struct html_cb * cb)60 html_free_cb(struct html_cb *cb)
61 {
62 if (cb->name != NULL)
63 free(cb->name);
64
65 free(cb);
66 }
67
68 int
html_register_cb(struct html_parse * p,char * name,void (* callback)(void *,char *,char **))69 html_register_cb(struct html_parse *p, char *name,
70 void (*callback)(void *, char *, char **))
71 {
72 struct html_cb *cb;
73
74 cb = malloc(sizeof (struct html_cb));
75 if (cb == NULL)
76 return (-1);
77
78 cb->cb = callback;
79 if ((cb->name = strdup(name)) == NULL)
80 goto out;
81
82 TAILQ_INSERT_TAIL(&p->cbqueue, cb, next);
83
84 return (0);
85
86 out:
87 html_free_cb(cb);
88 return (-1);
89 }
90
91 struct html_parse *
html_newparser(void)92 html_newparser(void)
93 {
94 struct html_parse *p;
95
96 p = calloc(1, sizeof(struct html_parse));
97 if (p == NULL)
98 return (NULL);
99
100 TAILQ_INIT(&p->cbqueue);
101
102 return (p);
103 }
104
105 void
html_freeparser(struct html_parse * p)106 html_freeparser(struct html_parse *p)
107 {
108 struct html_cb *cb;
109
110 if (p->base != NULL)
111 free(p->base);
112
113 for (cb = TAILQ_FIRST(&p->cbqueue); cb;
114 cb = TAILQ_FIRST(&p->cbqueue)) {
115 TAILQ_REMOVE(&p->cbqueue, cb, next);
116 html_free_cb(cb);
117 }
118
119 free(p);
120 }
121
122 int
html_parse_setbase(struct html_parse * p,char * base)123 html_parse_setbase(struct html_parse *p, char *base)
124 {
125 if (p->base != NULL)
126 free(p->base);
127
128 p->base = strdup(base);
129
130 return (p->base == NULL ? -1 : 0);
131 }
132
133 void
tag_start(struct html_parse * p,char * el,char ** attr)134 tag_start(struct html_parse *p, char *el, char **attr)
135 {
136 struct html_cb *cb;
137 void *arg;
138
139 arg = p->data != NULL ? p->data : p;
140
141 TAILQ_FOREACH(cb, &p->cbqueue, next) {
142 if (!strcasecmp(cb->name, el)) {
143 cb->cb(arg, el, attr);
144 break;
145 }
146 }
147 }
148
149 void
tag_end(struct html_parse * parser,char * el)150 tag_end(struct html_parse *parser, char *el)
151 {
152
153 }
154
155 #define WHITESPACE " \r\n\t"
156 #define WHITEEND " \r\n\t>"
157 #define ATTRDELIM " \r\n\t=>"
158
159 void
html_parsetag(struct html_parse * parser,char * start,char * end)160 html_parsetag(struct html_parse *parser, char *start, char *end)
161 {
162 char *element, *elend, *attr, *attrend;
163 char **pattr, **pattrend;
164 char *attrlist[MAXATTR*2 + 2];
165 int endtag = 0, i;
166 char quoted;
167
168 start = start + 1 + strspn(start + 1, WHITESPACE);
169 elend = strpbrk(start, WHITEEND);
170
171 if (start >= end)
172 return;
173
174 if (*start == '/') {
175 endtag = 1;
176 start++;
177
178 if (start >= end)
179 return;
180 }
181
182 if ((element = strdupend(start, elend)) == NULL)
183 return;
184
185 memset(attrlist, 0, sizeof(attrlist));
186
187 if (endtag) {
188 tag_end(parser, element);
189 goto out;
190 }
191
192 attr = elend;
193 pattr = attrlist;
194 pattrend = pattr + MAXATTR*2;
195 while (attr < end && pattr < pattrend) {
196 attr += strspn(attr, WHITESPACE);
197 attrend = strpbrk(attr, ATTRDELIM);
198
199 if (attrend >= end)
200 break;
201
202 *pattr = strdupend(attr, attrend);
203 if (*pattr == NULL)
204 goto out;
205 pattr++;
206
207 attr = attrend + strspn(attrend, ATTRDELIM);
208 if (*attr == '"' || *attr == '\'') {
209 char delim[5];
210
211 quoted = *attr;
212 attr++;
213 sprintf(delim, "%c>\r\n", quoted);
214 attrend = strpbrk(attr, delim);
215 } else {
216 quoted = '\0';
217 attrend = strpbrk(attr, WHITEEND);
218 }
219
220 if (attrend == NULL)
221 goto out;
222
223 *pattr = strdupend(attr, attrend);
224 if (*pattr == NULL)
225 goto out;
226 pattr++;
227
228 if (*attrend == quoted)
229 attrend++;
230
231 attr = attrend;
232 }
233
234 tag_start(parser, element, attrlist);
235
236 out:
237 for (i = 0; i < MAXATTR * 2; i++)
238 if (attrlist[i] != NULL)
239 free(attrlist[i]);
240 free(element);
241 }
242
243 int
html_parser(struct html_parse * parser,char * body,size_t len)244 html_parser(struct html_parse *parser, char *body, size_t len)
245 {
246 int res = 0;
247 char *p, *end;
248
249 p = body;
250 end = p + len;
251
252 while (p < end) {
253 char *tagend;
254
255 p = strchr(p, '<');
256 if (p == NULL)
257 break;
258
259 if (!strncmp(p, "<!--", 4)) {
260 /* Skip comments */
261 tagend = strstr(p, "-->");
262 if (tagend == NULL)
263 break;
264 p = tagend + 3;
265 continue;
266 } else
267 tagend = strchr(p, '>');
268
269 if (tagend == NULL)
270 break;
271
272 html_parsetag(parser, p, tagend);
273 p = tagend + 1;
274 }
275
276 return (res);
277 }
278
279