1 /*
2     MiddleMan filtering proxy server
3     Copyright (C) 2002  Jason McLaughlin
4 
5     This program is free software; you can redistribute it and/or modify
6     it under the terms of the GNU General Public License as published by
7     the Free Software Foundation; either version 2 of the License, or
8     (at your option) any later version.
9 
10     This program is distributed in the hope that it will be useful,
11     but WITHOUT ANY WARRANTY; without even the implied warranty of
12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13     GNU General Public License for more details.
14 
15     You should have received a copy of the GNU General Public License
16     along with this program; if not, write to the Free Software
17     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
18 */
19 
20 #include <stdio.h>
21 #include <string.h>
22 #include "proto.h"
23 
24 HTMLSTREAM *htmlstream_new()
25 {
26 	HTMLSTREAM *ret;
27 
28 	ret = xmalloc(sizeof(HTMLSTREAM));
29 	ret->tree = ret->tail = NULL;
30 	ret->buffer = filebuf_new();
31 	ret->startpos = 0;
32 	ret->incomment = FALSE;
33 	ret->callbacks = NULL;
34 
35 	return ret;
header_load(HEADER_LIST * header_list,XML_LIST * xml_list)36 }
37 
38 void htmlstream_free(HTMLSTREAM *hs) {
39 	if (hs->callbacks != NULL) hash_destroy(hs->callbacks);
40 	if (hs->tree != NULL) htmlstream_tree_free(hs->tree);
41 	filebuf_free(hs->buffer);
42 	xfree(hs);
43 }
44 
45 void htmlstream_tree_free(struct htmlstream_node *node) {
46 	struct htmlstream_node *tmp;
47 
48 	for (; node; node = tmp) {
49 		FREE_AND_NULL(node->text);
50 		if (node->tag != NULL) htmlstream_tag_free(node->tag);
51 		if (node->up != NULL) htmlstream_tree_free(node->up);
52 
53 		tmp = node->next;
54 
55 		xfree(node);
56 	}
57 }
58 
59 void htmlstream_tag_free(struct htmlstream_tag *tag) {
60 	struct htmlstream_tag_property *property;
61 
62 	FREE_AND_NULL(tag->name);
63 
64 	while (tag->properties != NULL) {
65 		property = tag->properties->next;
66 
67 		FREE_AND_NULL(tag->properties->name);
68 		FREE_AND_NULL(tag->properties->value);
69 
70 		xfree(tag->properties);
71 
72 		tag->properties = property;
73 	}
74 
75 	xfree(tag);
76 }
77 
78 void htmlstream_add(HTMLSTREAM *hs, char *data, int len) {
79 	int i, highestlevel, warned = FALSE;
80 	char *ptr;
81 	struct htmlstream_node *node = NULL;
82 	struct HASH_LIST *hl;
83 	struct htmlstream_callback *cb;
84 
85 	if (data != NULL) filebuf_add(hs->buffer, data ,len);
86 
87 	for (i = hs->startpos; i < hs->buffer->size; i++) {
88 		if (hs->incomment == TRUE && hs->buffer->data[i] == '>' && hs->buffer->data[i - 1] == '-')
89 			hs->incomment = FALSE;
90 		else if (hs->incomment == FALSE && hs->buffer->data[i] == '>' && hs->buffer->data[hs->startpos] == '<') {
91 			if (hs->buffer->data[hs->startpos + 1] == '/' && i != hs->startpos + 1) {
92 				for (highestlevel = 0, node = hs->tail; node; node = node->prev) {
93 					if (node->level > highestlevel) highestlevel = node->level;
94 					if (highestlevel > HTML_NESTING_LIMIT) {
95 						/* nesting is limited because it can exhaust
96 						   memory when the tree is free'ed using a
97 						   recursive function, or when the tree is
98 						   displayed with the htmltree URL command */
99 						if (warned == FALSE) {
100 							putlog(MMLOG_WARN, "HTML nesting limit reached");
101 							warned = TRUE;
102 						}
103 
104 						break;
105 					}
106 
107 					if (node->tag != NULL && !strncasecmp(node->tag->name, &hs->buffer->data[hs->startpos + 2], i - hs->startpos - 2)) {
108 						if (node->prev != NULL && node->prev->up == NULL) {
109 							/* this tag closes a previous tag,
110 							   bump this part of the tree up to the next
111 							   level. */
112 							hs->tail = node->prev;
113 							node->prev->level = highestlevel + 1;
114 							node->prev->next = NULL;
115 							node->prev->up = node;
116 							node->down = node->prev;
117 							node->prev = NULL;
118 						}
119 
120 						break;
121 					}
122 				}
123 
124 				node = NULL;
125 			} else {
126 				node = xmalloc(sizeof(struct htmlstream_node));
127 				node->text = NULL;
128 				node->tag = htmlstream_tag_parse(&hs->buffer->data[hs->startpos], i - hs->startpos + 1);
129 			}
130 
131 			hs->startpos = i;
132 		} else if (hs->incomment == FALSE && (hs->buffer->data[i] == '<' || (data == NULL && i == hs->buffer->size - 1))) {
133 			/* text area has ended, either because we see a starting tag
134 			   or the stream has ended. */
135 
136 			if (data != NULL) {
137 				if (i >= hs->buffer->size - 4) return;
138 				if (!strncmp(&hs->buffer->data[i], "<!--", 4)) {
139 					hs->incomment = TRUE;
140 					continue;
141 				}
142 			}
143 
144 			if (i != hs->startpos && !isempty(&hs->buffer->data[hs->startpos + 1], i - hs->startpos - 1)) {
145 				if (hs->tail != NULL && hs->tail->text != NULL) {
146 					/* this is possible if the last tag was a closing
147 					   tag that didn't match anything */
148 					ptr = xstrndup(&hs->buffer->data[hs->startpos + 1], i - hs->startpos - 1);
149 					hs->tail->text = string_append(hs->tail->text, ptr);
150 					xfree(ptr);
151 				} else  {
152 					node = xmalloc(sizeof(struct htmlstream_node));
153 					node->tag = NULL;
154 
155 					if (hs->startpos == 0)
156 						/* startpos is the beginning of the buffer, so there's
157 						   no > there */
158 						node->text = xstrndup(&hs->buffer->data[hs->startpos], i - hs->startpos);
159 					else
160 						node->text = xstrndup(&hs->buffer->data[hs->startpos + 1], i - hs->startpos - 1);
161 				}
162 			}
163 
164 			hs->startpos = i;
165 		}
166 
167 		if (node != NULL) {
168 			node->next = NULL;
169 			node->down = node->up = NULL;
170 			node->level = 0;
171 
172 			if (hs->tree == NULL) {
173 				hs->tree = hs->tail = node;
174 				node->prev = NULL;
175 			} else {
176 				hs->tail->next = node;
177 				node->prev = hs->tail;
178 				hs->tail = node;
179 			}
180 
181 			if (hs->callbacks != NULL && node->tag != NULL) {
182 				hl = hash_search(hs->callbacks, node->tag->name);
183 				if (hl != NULL) {
184 					cb = hl->data;
185 					cb->func(hs, node, cb->arg);
186 				}
187 			}
188 
189 			node = NULL;
190 		}
191 	}
192 }
193 
194 struct htmlstream_tag *htmlstream_tag_parse(char *tag, int len) {
195 	int i, indquote = FALSE;
196 	int startname = 0, endname = 0, startvalue = 0, endvalue = 0;
197 	struct htmlstream_tag *ret;
198 	struct htmlstream_tag_property *property, *tail = NULL;
199 
200 	if (len < 2) return NULL;
201 
202 	ret = xmalloc(sizeof(struct htmlstream_tag));
203 	ret->name = NULL;
204 	ret->properties = NULL;
205 
206 	for (i = 1; i < len; i++) {
207 		if (tag[i] == ' ' || tag[i] == '=' || tag[i] == '>' || tag[i] == '\"') {
208 			if (tag[i] == '\"') {
209 				indquote = !indquote;
210 
211 				if (indquote == TRUE) continue;
212 			} else if (indquote == TRUE) continue;
213 
214 			if (startname != 0 && endname == 0)
header_xml(HEADER_LIST * header_list,XML_LIST * xml_list)215 				endname = i;
216 			if (startvalue != 0 && endvalue == 0)
217 				endvalue = i;
218 
219 			if (ret->name == NULL && endname != 0) {
220 				ret->name = xstrndup(tag + startname, endname - startname);
221 				startname = endname = 0;
222 			} else if (endvalue != 0) {
223 				property = xmalloc(sizeof(struct htmlstream_tag_property));
224 				property->name = xstrndup(tag + startname, endname - startname);
225 				property->value = xstrndup(tag + startvalue, endvalue - startvalue);
226 				property->next = NULL;
227 
228 				if (tail == NULL)
229 					ret->properties = tail = property;
230 				else {
231 					tail->next = property;
232 					tail = tail->next;
233 				}
234 
235 				startname = endname = startvalue = endvalue = 0;
236 			}
237 
238 			continue;
239 		}
240 
241 		if (startname == 0)
242 			startname = i;
243 		else if (startvalue == 0 && endname != 0)
244 			startvalue = i;
245 	}
246 
247 	if (ret->name == NULL) {
248 		htmlstream_tag_free(ret);
249 		ret = NULL;
250 	}
251 
252 	return ret;
253 }
254 
255 int htmlstream_callback_add(HTMLSTREAM *hs, char *tag, void *func, void *arg) {
256 	struct HASH_LIST *hl;
257 	struct htmlstream_callback *cb;
258 
259 	if (hs->callbacks != NULL && hash_search(hs->callbacks, tag))
260 		return -1;
261 
262 	if (hs->callbacks == NULL)
263 		hs->callbacks = hash_create(HSTREAM_HASH_SIZE);
264 
265 	cb = xmalloc(sizeof(struct htmlstream_callback));
266 	cb->func = func;
267 	cb->arg = arg;
268 
269 	hl = hash_insert(hs->callbacks, xstrdup(tag), cb);
270 
271 	return TRUE;
272 }
273