xref: /minix/external/bsd/fetch/dist/libfetch/fetch.c (revision 9f988b79)
1 /*	$NetBSD: fetch.c,v 1.1.1.8 2009/08/21 15:12:27 joerg Exp $	*/
2 /*-
3  * Copyright (c) 1998-2004 Dag-Erling Co�dan Sm�rgrav
4  * Copyright (c) 2008 Joerg Sonnenberger <joerg@NetBSD.org>
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer
12  *    in this position and unchanged.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. The name of the author may not be used to endorse or promote products
17  *    derived from this software without specific prior written permission
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  *
30  * $FreeBSD: fetch.c,v 1.41 2007/12/19 00:26:36 des Exp $
31  */
32 
33 #if HAVE_CONFIG_H
34 #include "config.h"
35 #endif
36 #ifndef NETBSD
37 #include <nbcompat.h>
38 #endif
39 
40 #include <ctype.h>
41 #include <errno.h>
42 #include <stdio.h>
43 #include <stdlib.h>
44 #include <string.h>
45 
46 #include "fetch.h"
47 #include "common.h"
48 
49 auth_t	 fetchAuthMethod;
50 int	 fetchLastErrCode;
51 char	 fetchLastErrString[MAXERRSTRING];
52 int	 fetchTimeout;
53 volatile int	 fetchRestartCalls = 1;
54 int	 fetchDebug;
55 
56 
57 /*** Local data **************************************************************/
58 
59 /*
60  * Error messages for parser errors
61  */
62 #define URL_MALFORMED		1
63 #define URL_BAD_SCHEME		2
64 #define URL_BAD_PORT		3
65 static struct fetcherr url_errlist[] = {
66 	{ URL_MALFORMED,	FETCH_URL,	"Malformed URL" },
67 	{ URL_BAD_SCHEME,	FETCH_URL,	"Invalid URL scheme" },
68 	{ URL_BAD_PORT,		FETCH_URL,	"Invalid server port" },
69 	{ -1,			FETCH_UNKNOWN,	"Unknown parser error" }
70 };
71 
72 
73 /*** Public API **************************************************************/
74 
75 /*
76  * Select the appropriate protocol for the URL scheme, and return a
77  * read-only stream connected to the document referenced by the URL.
78  * Also fill out the struct url_stat.
79  */
80 fetchIO *
81 fetchXGet(struct url *URL, struct url_stat *us, const char *flags)
82 {
83 
84 	if (us != NULL) {
85 		us->size = -1;
86 		us->atime = us->mtime = 0;
87 	}
88 	if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
89 		return (fetchXGetFile(URL, us, flags));
90 	else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0)
91 		return (fetchXGetFTP(URL, us, flags));
92 	else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
93 		return (fetchXGetHTTP(URL, us, flags));
94 	else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0)
95 		return (fetchXGetHTTP(URL, us, flags));
96 	url_seterr(URL_BAD_SCHEME);
97 	return (NULL);
98 }
99 
100 /*
101  * Select the appropriate protocol for the URL scheme, and return a
102  * read-only stream connected to the document referenced by the URL.
103  */
104 fetchIO *
105 fetchGet(struct url *URL, const char *flags)
106 {
107 	return (fetchXGet(URL, NULL, flags));
108 }
109 
110 /*
111  * Select the appropriate protocol for the URL scheme, and return a
112  * write-only stream connected to the document referenced by the URL.
113  */
114 fetchIO *
115 fetchPut(struct url *URL, const char *flags)
116 {
117 
118 	if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
119 		return (fetchPutFile(URL, flags));
120 	else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0)
121 		return (fetchPutFTP(URL, flags));
122 	else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
123 		return (fetchPutHTTP(URL, flags));
124 	else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0)
125 		return (fetchPutHTTP(URL, flags));
126 	url_seterr(URL_BAD_SCHEME);
127 	return (NULL);
128 }
129 
130 /*
131  * Select the appropriate protocol for the URL scheme, and return the
132  * size of the document referenced by the URL if it exists.
133  */
134 int
135 fetchStat(struct url *URL, struct url_stat *us, const char *flags)
136 {
137 
138 	if (us != NULL) {
139 		us->size = -1;
140 		us->atime = us->mtime = 0;
141 	}
142 	if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
143 		return (fetchStatFile(URL, us, flags));
144 	else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0)
145 		return (fetchStatFTP(URL, us, flags));
146 	else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
147 		return (fetchStatHTTP(URL, us, flags));
148 	else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0)
149 		return (fetchStatHTTP(URL, us, flags));
150 	url_seterr(URL_BAD_SCHEME);
151 	return (-1);
152 }
153 
154 /*
155  * Select the appropriate protocol for the URL scheme, and return a
156  * list of files in the directory pointed to by the URL.
157  */
158 int
159 fetchList(struct url_list *ue, struct url *URL, const char *pattern,
160     const char *flags)
161 {
162 
163 	if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
164 		return (fetchListFile(ue, URL, pattern, flags));
165 	else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0)
166 		return (fetchListFTP(ue, URL, pattern, flags));
167 	else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
168 		return (fetchListHTTP(ue, URL, pattern, flags));
169 	else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0)
170 		return (fetchListHTTP(ue, URL, pattern, flags));
171 	url_seterr(URL_BAD_SCHEME);
172 	return -1;
173 }
174 
175 /*
176  * Attempt to parse the given URL; if successful, call fetchXGet().
177  */
178 fetchIO *
179 fetchXGetURL(const char *URL, struct url_stat *us, const char *flags)
180 {
181 	struct url *u;
182 	fetchIO *f;
183 
184 	if ((u = fetchParseURL(URL)) == NULL)
185 		return (NULL);
186 
187 	f = fetchXGet(u, us, flags);
188 
189 	fetchFreeURL(u);
190 	return (f);
191 }
192 
193 /*
194  * Attempt to parse the given URL; if successful, call fetchGet().
195  */
196 fetchIO *
197 fetchGetURL(const char *URL, const char *flags)
198 {
199 	return (fetchXGetURL(URL, NULL, flags));
200 }
201 
202 /*
203  * Attempt to parse the given URL; if successful, call fetchPut().
204  */
205 fetchIO *
206 fetchPutURL(const char *URL, const char *flags)
207 {
208 	struct url *u;
209 	fetchIO *f;
210 
211 	if ((u = fetchParseURL(URL)) == NULL)
212 		return (NULL);
213 
214 	f = fetchPut(u, flags);
215 
216 	fetchFreeURL(u);
217 	return (f);
218 }
219 
220 /*
221  * Attempt to parse the given URL; if successful, call fetchStat().
222  */
223 int
224 fetchStatURL(const char *URL, struct url_stat *us, const char *flags)
225 {
226 	struct url *u;
227 	int s;
228 
229 	if ((u = fetchParseURL(URL)) == NULL)
230 		return (-1);
231 
232 	s = fetchStat(u, us, flags);
233 
234 	fetchFreeURL(u);
235 	return (s);
236 }
237 
238 /*
239  * Attempt to parse the given URL; if successful, call fetchList().
240  */
241 int
242 fetchListURL(struct url_list *ue, const char *URL, const char *pattern,
243     const char *flags)
244 {
245 	struct url *u;
246 	int rv;
247 
248 	if ((u = fetchParseURL(URL)) == NULL)
249 		return -1;
250 
251 	rv = fetchList(ue, u, pattern, flags);
252 
253 	fetchFreeURL(u);
254 	return rv;
255 }
256 
257 /*
258  * Make a URL
259  */
260 struct url *
261 fetchMakeURL(const char *scheme, const char *host, int port, const char *doc,
262     const char *user, const char *pwd)
263 {
264 	struct url *u;
265 
266 	if (!scheme || (!host && !doc)) {
267 		url_seterr(URL_MALFORMED);
268 		return (NULL);
269 	}
270 
271 	if (port < 0 || port > 65535) {
272 		url_seterr(URL_BAD_PORT);
273 		return (NULL);
274 	}
275 
276 	/* allocate struct url */
277 	if ((u = calloc(1, sizeof(*u))) == NULL) {
278 		fetch_syserr();
279 		return (NULL);
280 	}
281 
282 	if ((u->doc = strdup(doc ? doc : "/")) == NULL) {
283 		fetch_syserr();
284 		free(u);
285 		return (NULL);
286 	}
287 
288 #define seturl(x) snprintf(u->x, sizeof(u->x), "%s", x)
289 	seturl(scheme);
290 	seturl(host);
291 	seturl(user);
292 	seturl(pwd);
293 #undef seturl
294 	u->port = port;
295 
296 	return (u);
297 }
298 
299 int
300 fetch_urlpath_safe(char x)
301 {
302 	if ((x >= '0' && x <= '9') || (x >= 'A' && x <= 'Z') ||
303 	    (x >= 'a' && x <= 'z'))
304 		return 1;
305 
306 	switch (x) {
307 	case '$':
308 	case '-':
309 	case '_':
310 	case '.':
311 	case '+':
312 	case '!':
313 	case '*':
314 	case '\'':
315 	case '(':
316 	case ')':
317 	case ',':
318 	/* The following are allowed in segment and path components: */
319 	case '?':
320 	case ':':
321 	case '@':
322 	case '&':
323 	case '=':
324 	case '/':
325 	case ';':
326 	/* If something is already quoted... */
327 	case '%':
328 		return 1;
329 	default:
330 		return 0;
331 	}
332 }
333 
334 /*
335  * Copy an existing URL.
336  */
337 struct url *
338 fetchCopyURL(const struct url *src)
339 {
340 	struct url *dst;
341 	char *doc;
342 
343 	/* allocate struct url */
344 	if ((dst = malloc(sizeof(*dst))) == NULL) {
345 		fetch_syserr();
346 		return (NULL);
347 	}
348 	if ((doc = strdup(src->doc)) == NULL) {
349 		fetch_syserr();
350 		free(dst);
351 		return (NULL);
352 	}
353 	*dst = *src;
354 	dst->doc = doc;
355 
356 	return dst;
357 }
358 
359 /*
360  * Split an URL into components. URL syntax is:
361  * [method:/][/[user[:pwd]@]host[:port]/][document]
362  * This almost, but not quite, RFC1738 URL syntax.
363  */
364 struct url *
365 fetchParseURL(const char *URL)
366 {
367 	const char *p, *q;
368 	struct url *u;
369 	size_t i, count;
370 	int pre_quoted;
371 
372 	/* allocate struct url */
373 	if ((u = calloc(1, sizeof(*u))) == NULL) {
374 		fetch_syserr();
375 		return (NULL);
376 	}
377 
378 	if (*URL == '/') {
379 		pre_quoted = 0;
380 		strcpy(u->scheme, SCHEME_FILE);
381 		p = URL;
382 		goto quote_doc;
383 	}
384 	if (strncmp(URL, "file:", 5) == 0) {
385 		pre_quoted = 1;
386 		strcpy(u->scheme, SCHEME_FILE);
387 		URL += 5;
388 		if (URL[0] != '/' || URL[1] != '/' || URL[2] != '/') {
389 			url_seterr(URL_MALFORMED);
390 			goto ouch;
391 		}
392 		p = URL + 2;
393 		goto quote_doc;
394 	}
395 	if (strncmp(URL, "http:", 5) == 0 ||
396 	    strncmp(URL, "https:", 6) == 0) {
397 		pre_quoted = 1;
398 		if (URL[4] == ':') {
399 			strcpy(u->scheme, SCHEME_HTTP);
400 			URL += 5;
401 		} else {
402 			strcpy(u->scheme, SCHEME_HTTPS);
403 			URL += 6;
404 		}
405 
406 		if (URL[0] != '/' || URL[1] != '/') {
407 			url_seterr(URL_MALFORMED);
408 			goto ouch;
409 		}
410 		URL += 2;
411 		p = URL;
412 		goto find_user;
413 	}
414 	if (strncmp(URL, "ftp:", 4) == 0) {
415 		pre_quoted = 1;
416 		strcpy(u->scheme, SCHEME_FTP);
417 		URL += 4;
418 		if (URL[0] != '/' || URL[1] != '/') {
419 			url_seterr(URL_MALFORMED);
420 			goto ouch;
421 		}
422 		URL += 2;
423 		p = URL;
424 		goto find_user;
425 	}
426 
427 	url_seterr(URL_BAD_SCHEME);
428 	goto ouch;
429 
430 find_user:
431 	p = strpbrk(URL, "/@");
432 	if (p != NULL && *p == '@') {
433 		/* username */
434 		for (q = URL, i = 0; (*q != ':') && (*q != '@'); q++) {
435 			if (i < URL_USERLEN)
436 				u->user[i++] = *q;
437 		}
438 
439 		/* password */
440 		if (*q == ':') {
441 			for (q++, i = 0; (*q != '@'); q++)
442 				if (i < URL_PWDLEN)
443 					u->pwd[i++] = *q;
444 		}
445 
446 		p++;
447 	} else {
448 		p = URL;
449 	}
450 
451 	/* hostname */
452 #ifdef INET6
453 	if (*p == '[' && (q = strchr(p + 1, ']')) != NULL &&
454 	    (*++q == '\0' || *q == '/' || *q == ':')) {
455 		if ((i = q - p - 2) > URL_HOSTLEN)
456 			i = URL_HOSTLEN;
457 		strncpy(u->host, ++p, i);
458 		p = q;
459 	} else
460 #endif
461 		for (i = 0; *p && (*p != '/') && (*p != ':'); p++)
462 			if (i < URL_HOSTLEN)
463 				u->host[i++] = *p;
464 
465 	/* port */
466 	if (*p == ':') {
467 		for (q = ++p; *q && (*q != '/'); q++)
468 			if (isdigit((unsigned char)*q))
469 				u->port = u->port * 10 + (*q - '0');
470 			else {
471 				/* invalid port */
472 				url_seterr(URL_BAD_PORT);
473 				goto ouch;
474 			}
475 		p = q;
476 	}
477 
478 	/* document */
479 	if (!*p)
480 		p = "/";
481 
482 quote_doc:
483 	count = 1;
484 	for (i = 0; p[i] != '\0'; ++i) {
485 		if ((!pre_quoted && p[i] == '%') ||
486 		    !fetch_urlpath_safe(p[i]))
487 			count += 3;
488 		else
489 			++count;
490 	}
491 
492 	if ((u->doc = malloc(count)) == NULL) {
493 		fetch_syserr();
494 		goto ouch;
495 	}
496 	for (i = 0; *p != '\0'; ++p) {
497 		if ((!pre_quoted && *p == '%') ||
498 		    !fetch_urlpath_safe(*p)) {
499 			u->doc[i++] = '%';
500 			if ((unsigned char)*p < 160)
501 				u->doc[i++] = '0' + ((unsigned char)*p) / 16;
502 			else
503 				u->doc[i++] = 'a' - 10 + ((unsigned char)*p) / 16;
504 			if ((unsigned char)*p % 16 < 10)
505 				u->doc[i++] = '0' + ((unsigned char)*p) % 16;
506 			else
507 				u->doc[i++] = 'a' - 10 + ((unsigned char)*p) % 16;
508 		} else
509 			u->doc[i++] = *p;
510 	}
511 	u->doc[i] = '\0';
512 
513 	return (u);
514 
515 ouch:
516 	free(u);
517 	return (NULL);
518 }
519 
520 /*
521  * Free a URL
522  */
523 void
524 fetchFreeURL(struct url *u)
525 {
526 	free(u->doc);
527 	free(u);
528 }
529 
530 static char
531 xdigit2digit(char digit)
532 {
533 	digit = tolower((unsigned char)digit);
534 	if (digit >= 'a' && digit <= 'f')
535 		digit = digit - 'a' + 10;
536 	else
537 		digit = digit - '0';
538 
539 	return digit;
540 }
541 
542 /*
543  * Unquote whole URL.
544  * Skips optional parts like query or fragment identifier.
545  */
546 char *
547 fetchUnquotePath(struct url *url)
548 {
549 	char *unquoted;
550 	const char *iter;
551 	size_t i;
552 
553 	if ((unquoted = malloc(strlen(url->doc) + 1)) == NULL)
554 		return NULL;
555 
556 	for (i = 0, iter = url->doc; *iter != '\0'; ++iter) {
557 		if (*iter == '#' || *iter == '?')
558 			break;
559 		if (iter[0] != '%' ||
560 		    !isxdigit((unsigned char)iter[1]) ||
561 		    !isxdigit((unsigned char)iter[2])) {
562 			unquoted[i++] = *iter;
563 			continue;
564 		}
565 		unquoted[i++] = xdigit2digit(iter[1]) * 16 +
566 		    xdigit2digit(iter[2]);
567 		iter += 2;
568 	}
569 	unquoted[i] = '\0';
570 	return unquoted;
571 }
572 
573 
574 /*
575  * Extract the file name component of a URL.
576  */
577 char *
578 fetchUnquoteFilename(struct url *url)
579 {
580 	char *unquoted, *filename;
581 	const char *last_slash;
582 
583 	if ((unquoted = fetchUnquotePath(url)) == NULL)
584 		return NULL;
585 
586 	if ((last_slash = strrchr(unquoted, '/')) == NULL)
587 		return unquoted;
588 	filename = strdup(last_slash + 1);
589 	free(unquoted);
590 	return filename;
591 }
592 
593 char *
594 fetchStringifyURL(const struct url *url)
595 {
596 	size_t total;
597 	char *doc;
598 
599 	/* scheme :// user : pwd @ host :port doc */
600 	total = strlen(url->scheme) + 3 + strlen(url->user) + 1 +
601 	    strlen(url->pwd) + 1 + strlen(url->host) + 6 + strlen(url->doc) + 1;
602 	if ((doc = malloc(total)) == NULL)
603 		return NULL;
604 	if (url->port != 0)
605 		snprintf(doc, total, "%s%s%s%s%s%s%s:%d%s",
606 		    url->scheme,
607 		    url->scheme[0] != '\0' ? "://" : "",
608 		    url->user,
609 		    url->pwd[0] != '\0' ? ":" : "",
610 		    url->pwd,
611 		    url->user[0] != '\0' || url->pwd[0] != '\0' ? "@" : "",
612 		    url->host,
613 		    (int)url->port,
614 		    url->doc);
615 	else {
616 		snprintf(doc, total, "%s%s%s%s%s%s%s%s",
617 		    url->scheme,
618 		    url->scheme[0] != '\0' ? "://" : "",
619 		    url->user,
620 		    url->pwd[0] != '\0' ? ":" : "",
621 		    url->pwd,
622 		    url->user[0] != '\0' || url->pwd[0] != '\0' ? "@" : "",
623 		    url->host,
624 		    url->doc);
625 	}
626 	return doc;
627 }
628