1 /*
2  * Copyright 2008 Rob Kendrick <rjek@netsurf-browser.org>
3  *
4  * This file is part of NetSurf.
5  *
6  * NetSurf is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; version 2 of the License.
9  *
10  * NetSurf is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
17  */
18 
19 /**
20  * \file
21  * data scheme handling.  See http://tools.ietf.org/html/rfc2397
22  */
23 
24 #include <stdbool.h>
25 #include <string.h>
26 #include <stdarg.h>
27 #include <stdlib.h>
28 #include <libwapcaplet/libwapcaplet.h>
29 #include <nsutils/base64.h>
30 
31 #include "netsurf/inttypes.h"
32 #include "utils/url.h"
33 #include "utils/nsurl.h"
34 #include "utils/corestrings.h"
35 #include "utils/log.h"
36 #include "utils/utils.h"
37 #include "utils/ring.h"
38 
39 #include "content/fetch.h"
40 #include "content/fetchers.h"
41 #include "content/fetchers/data.h"
42 
43 struct fetch_data_context {
44 	struct fetch *parent_fetch;
45 	nsurl *url;
46 	char *mimetype;
47 	char *data;
48 	size_t datalen;
49 	bool base64;
50 
51 	bool aborted;
52 	bool locked;
53 
54 	struct fetch_data_context *r_next, *r_prev;
55 };
56 
57 static struct fetch_data_context *ring = NULL;
58 
fetch_data_initialise(lwc_string * scheme)59 static bool fetch_data_initialise(lwc_string *scheme)
60 {
61 	NSLOG(netsurf, INFO, "fetch_data_initialise called for %s",
62 	      lwc_string_data(scheme));
63 
64 	return true;
65 }
66 
fetch_data_finalise(lwc_string * scheme)67 static void fetch_data_finalise(lwc_string *scheme)
68 {
69 	NSLOG(netsurf, INFO, "fetch_data_finalise called for %s",
70 	      lwc_string_data(scheme));
71 }
72 
fetch_data_can_fetch(const nsurl * url)73 static bool fetch_data_can_fetch(const nsurl *url)
74 {
75 	return true;
76 }
77 
fetch_data_send_callback(const fetch_msg * msg,struct fetch_data_context * c)78 static void fetch_data_send_callback(const fetch_msg *msg,
79 		struct fetch_data_context *c)
80 {
81 	c->locked = true;
82 	fetch_send_callback(msg, c->parent_fetch);
83 	c->locked = false;
84 }
85 
fetch_data_send_header(struct fetch_data_context * ctx,const char * fmt,...)86 static void fetch_data_send_header(struct fetch_data_context *ctx,
87 		const char *fmt, ...)
88 {
89 	char header[64];
90 	fetch_msg msg;
91 	va_list ap;
92 	int len;
93 
94 	va_start(ap, fmt);
95 	len = vsnprintf(header, sizeof(header), fmt, ap);
96 	va_end(ap);
97 
98 	if (len >= (int)sizeof(header) || len < 0) {
99 		return;
100 	}
101 
102 	msg.type = FETCH_HEADER;
103 	msg.data.header_or_data.len = len;
104 	msg.data.header_or_data.buf = (const uint8_t *)header;
105 	fetch_data_send_callback(&msg, ctx);
106 }
107 
fetch_data_setup(struct fetch * parent_fetch,nsurl * url,bool only_2xx,bool downgrade_tls,const char * post_urlenc,const struct fetch_multipart_data * post_multipart,const char ** headers)108 static void *fetch_data_setup(struct fetch *parent_fetch, nsurl *url,
109 		 bool only_2xx, bool downgrade_tls, const char *post_urlenc,
110 		 const struct fetch_multipart_data *post_multipart,
111 		 const char **headers)
112 {
113 	struct fetch_data_context *ctx = calloc(1, sizeof(*ctx));
114 
115 	if (ctx == NULL)
116 		return NULL;
117 
118 	ctx->parent_fetch = parent_fetch;
119 	ctx->url = nsurl_ref(url);
120 
121 	RING_INSERT(ring, ctx);
122 
123 	return ctx;
124 }
125 
fetch_data_start(void * ctx)126 static bool fetch_data_start(void *ctx)
127 {
128 	return true;
129 }
130 
fetch_data_free(void * ctx)131 static void fetch_data_free(void *ctx)
132 {
133 	struct fetch_data_context *c = ctx;
134 
135 	nsurl_unref(c->url);
136 	free(c->data);
137 	free(c->mimetype);
138 	free(ctx);
139 }
140 
fetch_data_abort(void * ctx)141 static void fetch_data_abort(void *ctx)
142 {
143 	struct fetch_data_context *c = ctx;
144 
145 	/* To avoid the poll loop having to deal with the fetch context
146 	 * disappearing from under it, we simply flag the abort here.
147 	 * The poll loop itself will perform the appropriate cleanup.
148 	 */
149 	c->aborted = true;
150 }
151 
fetch_data_process(struct fetch_data_context * c)152 static bool fetch_data_process(struct fetch_data_context *c)
153 {
154 	nserror res;
155 	fetch_msg msg;
156 	const char *params;
157 	const char *comma;
158 	char *unescaped;
159 	size_t unescaped_len;
160 
161 	/* format of a data: URL is:
162 	 *   data:[<mimetype>][;base64],<data>
163 	 * The mimetype is optional.  If it is missing, the , before the
164 	 * data must still be there.
165 	 */
166 
167 	NSLOG(netsurf, DEEPDEBUG, "url: %.140s", nsurl_access(c->url));
168 
169 	if (nsurl_length(c->url) < 6) {
170 		/* 6 is the minimum possible length (data:,) */
171 		msg.type = FETCH_ERROR;
172 		msg.data.error = "Malformed data: URL";
173 		fetch_data_send_callback(&msg, c);
174 		return false;
175 	}
176 
177 	/* skip the data: part */
178 	params = nsurl_access(c->url) + SLEN("data:");
179 
180 	/* find the comma */
181 	if ( (comma = strchr(params, ',')) == NULL) {
182 		msg.type = FETCH_ERROR;
183 		msg.data.error = "Malformed data: URL";
184 		fetch_data_send_callback(&msg, c);
185 		return false;
186 	}
187 
188 	if (params[0] == ',') {
189 		/* there is no mimetype here, assume text/plain */
190 		c->mimetype = strdup("text/plain;charset=US-ASCII");
191 	} else {
192 		/* make a copy of everything between data: and the comma */
193 		c->mimetype = strndup(params, comma - params);
194 	}
195 
196 	if (c->mimetype == NULL) {
197 		msg.type = FETCH_ERROR;
198 		msg.data.error =
199 			"Unable to allocate memory for mimetype in data: URL";
200 		fetch_data_send_callback(&msg, c);
201 		return false;
202 	}
203 
204 	if (strcmp(c->mimetype + strlen(c->mimetype) - 7, ";base64") == 0) {
205 		c->base64 = true;
206 		c->mimetype[strlen(c->mimetype) - 7] = '\0';
207 	} else {
208 		c->base64 = false;
209 	}
210 
211 	/* URL unescape the data first, just incase some insane page
212 	 * decides to nest URL and base64 encoding.  Like, say, Acid2.
213 	 */
214 	res = url_unescape(comma + 1, 0, &unescaped_len, &unescaped);
215 	if (res != NSERROR_OK) {
216 		msg.type = FETCH_ERROR;
217 		msg.data.error = "Unable to URL decode data: URL";
218 		fetch_data_send_callback(&msg, c);
219 		return false;
220 	}
221 
222 	if (c->base64) {
223 		if ((nsu_base64_decode_alloc((uint8_t *)unescaped,
224 					     unescaped_len,
225 					     (uint8_t **)&c->data,
226 					     &c->datalen) != NSUERROR_OK) ||
227 		    (c->data == NULL)) {
228 			msg.type = FETCH_ERROR;
229 			msg.data.error = "Unable to Base64 decode data: URL";
230 			fetch_data_send_callback(&msg, c);
231 			free(unescaped);
232 			return false;
233 		}
234 		free(unescaped);
235 	} else {
236 		c->datalen = unescaped_len;
237 		c->data = unescaped;
238 	}
239 
240 
241 	return true;
242 }
243 
fetch_data_poll(lwc_string * scheme)244 static void fetch_data_poll(lwc_string *scheme)
245 {
246 	fetch_msg msg;
247 	struct fetch_data_context *c, *save_ring = NULL;
248 
249 	/* Iterate over ring, processing each pending fetch */
250 	while (ring != NULL) {
251 		/* Take the first entry from the ring */
252 		c = ring;
253 		RING_REMOVE(ring, c);
254 
255 		/* Ignore fetches that have been flagged as locked.
256 		 * This allows safe re-entrant calls to this function.
257 		 * Re-entrancy can occur if, as a result of a callback,
258 		 * the interested party causes fetch_poll() to be called
259 		 * again.
260 		 */
261 		if (c->locked == true) {
262 			RING_INSERT(save_ring, c);
263 			continue;
264 		}
265 
266 		/* Only process non-aborted fetches */
267 		if (c->aborted == false && fetch_data_process(c) == true) {
268 			fetch_set_http_code(c->parent_fetch, 200);
269 			NSLOG(netsurf, INFO,
270 			      "setting data: MIME type to %s, length to %"PRIsizet,
271 			      c->mimetype,
272 			      c->datalen);
273 			/* Any callback can result in the fetch being aborted.
274 			 * Therefore, we _must_ check for this after _every_
275 			 * call to fetch_data_send_callback().
276 			 */
277 			fetch_data_send_header(c, "Content-Type: %s",
278 					c->mimetype);
279 
280 			if (c->aborted == false) {
281 				fetch_data_send_header(c, "Content-Length: %"
282 						PRIsizet, c->datalen);
283 			}
284 
285 			if (c->aborted == false) {
286 				/* Set max-age to 1 year. */
287 				fetch_data_send_header(c, "Cache-Control: "
288 						"max-age=31536000");
289 			}
290 
291 			if (c->aborted == false) {
292 				msg.type = FETCH_DATA;
293 				msg.data.header_or_data.buf =
294 						(const uint8_t *) c->data;
295 				msg.data.header_or_data.len = c->datalen;
296 				fetch_data_send_callback(&msg, c);
297 			}
298 
299 			if (c->aborted == false) {
300 				msg.type = FETCH_FINISHED;
301 				fetch_data_send_callback(&msg, c);
302 			}
303 		} else {
304 			NSLOG(netsurf, INFO, "Processing of %.140s failed!",
305 			      nsurl_access(c->url));
306 
307 			/* Ensure that we're unlocked here. If we aren't,
308 			 * then fetch_data_process() is broken.
309 			 */
310 			assert(c->locked == false);
311 		}
312 
313 		/* And now finish */
314 		fetch_remove_from_queues(c->parent_fetch);
315 		fetch_free(c->parent_fetch);
316 	}
317 
318 	/* Finally, if we saved any fetches which were locked, put them back
319 	 * into the ring for next time
320 	 */
321 	ring = save_ring;
322 }
323 
fetch_data_register(void)324 nserror fetch_data_register(void)
325 {
326 	lwc_string *scheme = lwc_string_ref(corestring_lwc_data);
327 	const struct fetcher_operation_table fetcher_ops = {
328 		.initialise = fetch_data_initialise,
329 		.acceptable = fetch_data_can_fetch,
330 		.setup = fetch_data_setup,
331 		.start = fetch_data_start,
332 		.abort = fetch_data_abort,
333 		.free = fetch_data_free,
334 		.poll = fetch_data_poll,
335 		.finalise = fetch_data_finalise
336 	};
337 
338 	return fetcher_add(scheme, &fetcher_ops);
339 }
340