1 /*
2 MiddleMan filtering proxy server
3 Copyright (C) 2002 Jason McLaughlin
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2 of the License, or
8 (at your option) any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */
19
20 #include <stdio.h>
21 #include <string.h>
22 #include "proto.h"
23
24 HTMLSTREAM *htmlstream_new()
25 {
26 HTMLSTREAM *ret;
27
28 ret = xmalloc(sizeof(HTMLSTREAM));
29 ret->tree = ret->tail = NULL;
30 ret->buffer = filebuf_new();
31 ret->startpos = 0;
32 ret->incomment = FALSE;
33 ret->callbacks = NULL;
34
35 return ret;
header_load(HEADER_LIST * header_list,XML_LIST * xml_list)36 }
37
38 void htmlstream_free(HTMLSTREAM *hs) {
39 if (hs->callbacks != NULL) hash_destroy(hs->callbacks);
40 if (hs->tree != NULL) htmlstream_tree_free(hs->tree);
41 filebuf_free(hs->buffer);
42 xfree(hs);
43 }
44
45 void htmlstream_tree_free(struct htmlstream_node *node) {
46 struct htmlstream_node *tmp;
47
48 for (; node; node = tmp) {
49 FREE_AND_NULL(node->text);
50 if (node->tag != NULL) htmlstream_tag_free(node->tag);
51 if (node->up != NULL) htmlstream_tree_free(node->up);
52
53 tmp = node->next;
54
55 xfree(node);
56 }
57 }
58
59 void htmlstream_tag_free(struct htmlstream_tag *tag) {
60 struct htmlstream_tag_property *property;
61
62 FREE_AND_NULL(tag->name);
63
64 while (tag->properties != NULL) {
65 property = tag->properties->next;
66
67 FREE_AND_NULL(tag->properties->name);
68 FREE_AND_NULL(tag->properties->value);
69
70 xfree(tag->properties);
71
72 tag->properties = property;
73 }
74
75 xfree(tag);
76 }
77
78 void htmlstream_add(HTMLSTREAM *hs, char *data, int len) {
79 int i, highestlevel, warned = FALSE;
80 char *ptr;
81 struct htmlstream_node *node = NULL;
82 struct HASH_LIST *hl;
83 struct htmlstream_callback *cb;
84
85 if (data != NULL) filebuf_add(hs->buffer, data ,len);
86
87 for (i = hs->startpos; i < hs->buffer->size; i++) {
88 if (hs->incomment == TRUE && hs->buffer->data[i] == '>' && hs->buffer->data[i - 1] == '-')
89 hs->incomment = FALSE;
90 else if (hs->incomment == FALSE && hs->buffer->data[i] == '>' && hs->buffer->data[hs->startpos] == '<') {
91 if (hs->buffer->data[hs->startpos + 1] == '/' && i != hs->startpos + 1) {
92 for (highestlevel = 0, node = hs->tail; node; node = node->prev) {
93 if (node->level > highestlevel) highestlevel = node->level;
94 if (highestlevel > HTML_NESTING_LIMIT) {
95 /* nesting is limited because it can exhaust
96 memory when the tree is free'ed using a
97 recursive function, or when the tree is
98 displayed with the htmltree URL command */
99 if (warned == FALSE) {
100 putlog(MMLOG_WARN, "HTML nesting limit reached");
101 warned = TRUE;
102 }
103
104 break;
105 }
106
107 if (node->tag != NULL && !strncasecmp(node->tag->name, &hs->buffer->data[hs->startpos + 2], i - hs->startpos - 2)) {
108 if (node->prev != NULL && node->prev->up == NULL) {
109 /* this tag closes a previous tag,
110 bump this part of the tree up to the next
111 level. */
112 hs->tail = node->prev;
113 node->prev->level = highestlevel + 1;
114 node->prev->next = NULL;
115 node->prev->up = node;
116 node->down = node->prev;
117 node->prev = NULL;
118 }
119
120 break;
121 }
122 }
123
124 node = NULL;
125 } else {
126 node = xmalloc(sizeof(struct htmlstream_node));
127 node->text = NULL;
128 node->tag = htmlstream_tag_parse(&hs->buffer->data[hs->startpos], i - hs->startpos + 1);
129 }
130
131 hs->startpos = i;
132 } else if (hs->incomment == FALSE && (hs->buffer->data[i] == '<' || (data == NULL && i == hs->buffer->size - 1))) {
133 /* text area has ended, either because we see a starting tag
134 or the stream has ended. */
135
136 if (data != NULL) {
137 if (i >= hs->buffer->size - 4) return;
138 if (!strncmp(&hs->buffer->data[i], "<!--", 4)) {
139 hs->incomment = TRUE;
140 continue;
141 }
142 }
143
144 if (i != hs->startpos && !isempty(&hs->buffer->data[hs->startpos + 1], i - hs->startpos - 1)) {
145 if (hs->tail != NULL && hs->tail->text != NULL) {
146 /* this is possible if the last tag was a closing
147 tag that didn't match anything */
148 ptr = xstrndup(&hs->buffer->data[hs->startpos + 1], i - hs->startpos - 1);
149 hs->tail->text = string_append(hs->tail->text, ptr);
150 xfree(ptr);
151 } else {
152 node = xmalloc(sizeof(struct htmlstream_node));
153 node->tag = NULL;
154
155 if (hs->startpos == 0)
156 /* startpos is the beginning of the buffer, so there's
157 no > there */
158 node->text = xstrndup(&hs->buffer->data[hs->startpos], i - hs->startpos);
159 else
160 node->text = xstrndup(&hs->buffer->data[hs->startpos + 1], i - hs->startpos - 1);
161 }
162 }
163
164 hs->startpos = i;
165 }
166
167 if (node != NULL) {
168 node->next = NULL;
169 node->down = node->up = NULL;
170 node->level = 0;
171
172 if (hs->tree == NULL) {
173 hs->tree = hs->tail = node;
174 node->prev = NULL;
175 } else {
176 hs->tail->next = node;
177 node->prev = hs->tail;
178 hs->tail = node;
179 }
180
181 if (hs->callbacks != NULL && node->tag != NULL) {
182 hl = hash_search(hs->callbacks, node->tag->name);
183 if (hl != NULL) {
184 cb = hl->data;
185 cb->func(hs, node, cb->arg);
186 }
187 }
188
189 node = NULL;
190 }
191 }
192 }
193
194 struct htmlstream_tag *htmlstream_tag_parse(char *tag, int len) {
195 int i, indquote = FALSE;
196 int startname = 0, endname = 0, startvalue = 0, endvalue = 0;
197 struct htmlstream_tag *ret;
198 struct htmlstream_tag_property *property, *tail = NULL;
199
200 if (len < 2) return NULL;
201
202 ret = xmalloc(sizeof(struct htmlstream_tag));
203 ret->name = NULL;
204 ret->properties = NULL;
205
206 for (i = 1; i < len; i++) {
207 if (tag[i] == ' ' || tag[i] == '=' || tag[i] == '>' || tag[i] == '\"') {
208 if (tag[i] == '\"') {
209 indquote = !indquote;
210
211 if (indquote == TRUE) continue;
212 } else if (indquote == TRUE) continue;
213
214 if (startname != 0 && endname == 0)
header_xml(HEADER_LIST * header_list,XML_LIST * xml_list)215 endname = i;
216 if (startvalue != 0 && endvalue == 0)
217 endvalue = i;
218
219 if (ret->name == NULL && endname != 0) {
220 ret->name = xstrndup(tag + startname, endname - startname);
221 startname = endname = 0;
222 } else if (endvalue != 0) {
223 property = xmalloc(sizeof(struct htmlstream_tag_property));
224 property->name = xstrndup(tag + startname, endname - startname);
225 property->value = xstrndup(tag + startvalue, endvalue - startvalue);
226 property->next = NULL;
227
228 if (tail == NULL)
229 ret->properties = tail = property;
230 else {
231 tail->next = property;
232 tail = tail->next;
233 }
234
235 startname = endname = startvalue = endvalue = 0;
236 }
237
238 continue;
239 }
240
241 if (startname == 0)
242 startname = i;
243 else if (startvalue == 0 && endname != 0)
244 startvalue = i;
245 }
246
247 if (ret->name == NULL) {
248 htmlstream_tag_free(ret);
249 ret = NULL;
250 }
251
252 return ret;
253 }
254
255 int htmlstream_callback_add(HTMLSTREAM *hs, char *tag, void *func, void *arg) {
256 struct HASH_LIST *hl;
257 struct htmlstream_callback *cb;
258
259 if (hs->callbacks != NULL && hash_search(hs->callbacks, tag))
260 return -1;
261
262 if (hs->callbacks == NULL)
263 hs->callbacks = hash_create(HSTREAM_HASH_SIZE);
264
265 cb = xmalloc(sizeof(struct htmlstream_callback));
266 cb->func = func;
267 cb->arg = arg;
268
269 hl = hash_insert(hs->callbacks, xstrdup(tag), cb);
270
271 return TRUE;
272 }
273