xref: /freebsd/lib/libfetch/fetch.c (revision 224e0c2f)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 1998-2004 Dag-Erling Smørgrav
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer
12  *    in this position and unchanged.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. The name of the author may not be used to endorse or promote products
17  *    derived from this software without specific prior written permission
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  */
30 
31 #include <sys/cdefs.h>
32 __FBSDID("$FreeBSD$");
33 
34 #include <sys/param.h>
35 #include <sys/errno.h>
36 
37 #include <ctype.h>
38 #include <stdio.h>
39 #include <stdlib.h>
40 #include <string.h>
41 
42 #include "fetch.h"
43 #include "common.h"
44 
45 auth_t	 fetchAuthMethod;
46 int	 fetchLastErrCode;
47 char	 fetchLastErrString[MAXERRSTRING];
48 int	 fetchTimeout;
49 int	 fetchRestartCalls = 1;
50 int	 fetchDebug;
51 
52 
53 /*** Local data **************************************************************/
54 
55 /*
56  * Error messages for parser errors
57  */
58 #define URL_MALFORMED		1
59 #define URL_BAD_SCHEME		2
60 #define URL_BAD_PORT		3
61 static struct fetcherr url_errlist[] = {
62 	{ URL_MALFORMED,	FETCH_URL,	"Malformed URL" },
63 	{ URL_BAD_SCHEME,	FETCH_URL,	"Invalid URL scheme" },
64 	{ URL_BAD_PORT,		FETCH_URL,	"Invalid server port" },
65 	{ -1,			FETCH_UNKNOWN,	"Unknown parser error" }
66 };
67 
68 
69 /*** Public API **************************************************************/
70 
71 /*
72  * Select the appropriate protocol for the URL scheme, and return a
73  * read-only stream connected to the document referenced by the URL.
74  * Also fill out the struct url_stat.
75  */
76 FILE *
77 fetchXGet(struct url *URL, struct url_stat *us, const char *flags)
78 {
79 
80 	if (us != NULL) {
81 		us->size = -1;
82 		us->atime = us->mtime = 0;
83 	}
84 	if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
85 		return (fetchXGetFile(URL, us, flags));
86 	else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0)
87 		return (fetchXGetFTP(URL, us, flags));
88 	else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
89 		return (fetchXGetHTTP(URL, us, flags));
90 	else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0)
91 		return (fetchXGetHTTP(URL, us, flags));
92 	url_seterr(URL_BAD_SCHEME);
93 	return (NULL);
94 }
95 
96 /*
97  * Select the appropriate protocol for the URL scheme, and return a
98  * read-only stream connected to the document referenced by the URL.
99  */
100 FILE *
101 fetchGet(struct url *URL, const char *flags)
102 {
103 	return (fetchXGet(URL, NULL, flags));
104 }
105 
106 /*
107  * Select the appropriate protocol for the URL scheme, and return a
108  * write-only stream connected to the document referenced by the URL.
109  */
110 FILE *
111 fetchPut(struct url *URL, const char *flags)
112 {
113 
114 	if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
115 		return (fetchPutFile(URL, flags));
116 	else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0)
117 		return (fetchPutFTP(URL, flags));
118 	else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
119 		return (fetchPutHTTP(URL, flags));
120 	else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0)
121 		return (fetchPutHTTP(URL, flags));
122 	url_seterr(URL_BAD_SCHEME);
123 	return (NULL);
124 }
125 
126 /*
127  * Select the appropriate protocol for the URL scheme, and return the
128  * size of the document referenced by the URL if it exists.
129  */
130 int
131 fetchStat(struct url *URL, struct url_stat *us, const char *flags)
132 {
133 
134 	if (us != NULL) {
135 		us->size = -1;
136 		us->atime = us->mtime = 0;
137 	}
138 	if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
139 		return (fetchStatFile(URL, us, flags));
140 	else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0)
141 		return (fetchStatFTP(URL, us, flags));
142 	else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
143 		return (fetchStatHTTP(URL, us, flags));
144 	else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0)
145 		return (fetchStatHTTP(URL, us, flags));
146 	url_seterr(URL_BAD_SCHEME);
147 	return (-1);
148 }
149 
150 /*
151  * Select the appropriate protocol for the URL scheme, and return a
152  * list of files in the directory pointed to by the URL.
153  */
154 struct url_ent *
155 fetchList(struct url *URL, const char *flags)
156 {
157 
158 	if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
159 		return (fetchListFile(URL, flags));
160 	else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0)
161 		return (fetchListFTP(URL, flags));
162 	else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
163 		return (fetchListHTTP(URL, flags));
164 	else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0)
165 		return (fetchListHTTP(URL, flags));
166 	url_seterr(URL_BAD_SCHEME);
167 	return (NULL);
168 }
169 
170 /*
171  * Attempt to parse the given URL; if successful, call fetchXGet().
172  */
173 FILE *
174 fetchXGetURL(const char *URL, struct url_stat *us, const char *flags)
175 {
176 	struct url *u;
177 	FILE *f;
178 
179 	if ((u = fetchParseURL(URL)) == NULL)
180 		return (NULL);
181 
182 	f = fetchXGet(u, us, flags);
183 
184 	fetchFreeURL(u);
185 	return (f);
186 }
187 
188 /*
189  * Attempt to parse the given URL; if successful, call fetchGet().
190  */
191 FILE *
192 fetchGetURL(const char *URL, const char *flags)
193 {
194 	return (fetchXGetURL(URL, NULL, flags));
195 }
196 
197 /*
198  * Attempt to parse the given URL; if successful, call fetchPut().
199  */
200 FILE *
201 fetchPutURL(const char *URL, const char *flags)
202 {
203 	struct url *u;
204 	FILE *f;
205 
206 	if ((u = fetchParseURL(URL)) == NULL)
207 		return (NULL);
208 
209 	f = fetchPut(u, flags);
210 
211 	fetchFreeURL(u);
212 	return (f);
213 }
214 
215 /*
216  * Attempt to parse the given URL; if successful, call fetchStat().
217  */
218 int
219 fetchStatURL(const char *URL, struct url_stat *us, const char *flags)
220 {
221 	struct url *u;
222 	int s;
223 
224 	if ((u = fetchParseURL(URL)) == NULL)
225 		return (-1);
226 
227 	s = fetchStat(u, us, flags);
228 
229 	fetchFreeURL(u);
230 	return (s);
231 }
232 
233 /*
234  * Attempt to parse the given URL; if successful, call fetchList().
235  */
236 struct url_ent *
237 fetchListURL(const char *URL, const char *flags)
238 {
239 	struct url *u;
240 	struct url_ent *ue;
241 
242 	if ((u = fetchParseURL(URL)) == NULL)
243 		return (NULL);
244 
245 	ue = fetchList(u, flags);
246 
247 	fetchFreeURL(u);
248 	return (ue);
249 }
250 
251 /*
252  * Make a URL
253  */
254 struct url *
255 fetchMakeURL(const char *scheme, const char *host, int port, const char *doc,
256     const char *user, const char *pwd)
257 {
258 	struct url *u;
259 
260 	if (!scheme || (!host && !doc)) {
261 		url_seterr(URL_MALFORMED);
262 		return (NULL);
263 	}
264 
265 	if (port < 0 || port > 65535) {
266 		url_seterr(URL_BAD_PORT);
267 		return (NULL);
268 	}
269 
270 	/* allocate struct url */
271 	if ((u = calloc(1, sizeof(*u))) == NULL) {
272 		fetch_syserr();
273 		return (NULL);
274 	}
275 
276 	if ((u->doc = strdup(doc ? doc : "/")) == NULL) {
277 		fetch_syserr();
278 		free(u);
279 		return (NULL);
280 	}
281 
282 #define seturl(x) snprintf(u->x, sizeof(u->x), "%s", x)
283 	seturl(scheme);
284 	seturl(host);
285 	seturl(user);
286 	seturl(pwd);
287 #undef seturl
288 	u->port = port;
289 	u->netrcfd = -2;
290 
291 	return (u);
292 }
293 
294 /*
295  * Return value of the given hex digit.
296  */
297 static int
298 fetch_hexval(char ch)
299 {
300 
301 	if (ch >= '0' && ch <= '9')
302 		return (ch - '0');
303 	else if (ch >= 'a' && ch <= 'f')
304 		return (ch - 'a' + 10);
305 	else if (ch >= 'A' && ch <= 'F')
306 		return (ch - 'A' + 10);
307 	return (-1);
308 }
309 
310 /*
311  * Decode percent-encoded URL component from src into dst, stopping at end
312  * of string, or at @ or : separators.  Returns a pointer to the unhandled
313  * part of the input string (null terminator, @, or :).  No terminator is
314  * written to dst (it is the caller's responsibility).
315  */
316 static const char *
317 fetch_pctdecode(char *dst, const char *src, size_t dlen)
318 {
319 	int d1, d2;
320 	char c;
321 	const char *s;
322 
323 	for (s = src; *s != '\0' && *s != '@' && *s != ':'; s++) {
324 		if (s[0] == '%' && (d1 = fetch_hexval(s[1])) >= 0 &&
325 		    (d2 = fetch_hexval(s[2])) >= 0 && (d1 > 0 || d2 > 0)) {
326 			c = d1 << 4 | d2;
327 			s += 2;
328 		} else {
329 			c = *s;
330 		}
331 		if (dlen-- > 0)
332 			*dst++ = c;
333 	}
334 	return (s);
335 }
336 
337 /*
338  * Split an URL into components. URL syntax is:
339  * [method:/][/[user[:pwd]@]host[:port]/][document]
340  * This almost, but not quite, RFC1738 URL syntax.
341  */
342 struct url *
343 fetchParseURL(const char *URL)
344 {
345 	char *doc;
346 	const char *p, *q;
347 	struct url *u;
348 	int i;
349 
350 	/* allocate struct url */
351 	if ((u = calloc(1, sizeof(*u))) == NULL) {
352 		fetch_syserr();
353 		return (NULL);
354 	}
355 	u->netrcfd = -2;
356 
357 	/* scheme name */
358 	if ((p = strstr(URL, ":/"))) {
359 		snprintf(u->scheme, URL_SCHEMELEN+1,
360 		    "%.*s", (int)(p - URL), URL);
361 		URL = ++p;
362 		/*
363 		 * Only one slash: no host, leave slash as part of document
364 		 * Two slashes: host follows, strip slashes
365 		 */
366 		if (URL[1] == '/')
367 			URL = (p += 2);
368 	} else {
369 		p = URL;
370 	}
371 	if (!*URL || *URL == '/' || *URL == '.' ||
372 	    (u->scheme[0] == '\0' &&
373 		strchr(URL, '/') == NULL && strchr(URL, ':') == NULL))
374 		goto nohost;
375 
376 	p = strpbrk(URL, "/@");
377 	if (p && *p == '@') {
378 		/* username */
379 		q = fetch_pctdecode(u->user, URL, URL_USERLEN);
380 
381 		/* password */
382 		if (*q == ':')
383 			q = fetch_pctdecode(u->pwd, q + 1, URL_PWDLEN);
384 
385 		p++;
386 	} else {
387 		p = URL;
388 	}
389 
390 	/* hostname */
391 	if (*p == '[' && (q = strchr(p + 1, ']')) != NULL &&
392 	    (*++q == '\0' || *q == '/' || *q == ':')) {
393 		if ((i = q - p) > MAXHOSTNAMELEN)
394 			i = MAXHOSTNAMELEN;
395 		strncpy(u->host, p, i);
396 		p = q;
397 	} else {
398 		for (i = 0; *p && (*p != '/') && (*p != ':'); p++)
399 			if (i < MAXHOSTNAMELEN)
400 				u->host[i++] = *p;
401 	}
402 
403 	/* port */
404 	if (*p == ':') {
405 		for (q = ++p; *q && (*q != '/'); q++)
406 			if (isdigit((unsigned char)*q))
407 				u->port = u->port * 10 + (*q - '0');
408 			else {
409 				/* invalid port */
410 				url_seterr(URL_BAD_PORT);
411 				goto ouch;
412 			}
413 		p = q;
414 	}
415 
416 nohost:
417 	/* document */
418 	if (!*p)
419 		p = "/";
420 
421 	if (strcasecmp(u->scheme, SCHEME_HTTP) == 0 ||
422 	    strcasecmp(u->scheme, SCHEME_HTTPS) == 0) {
423 		const char hexnums[] = "0123456789abcdef";
424 
425 		/* percent-escape whitespace. */
426 		if ((doc = malloc(strlen(p) * 3 + 1)) == NULL) {
427 			fetch_syserr();
428 			goto ouch;
429 		}
430 		u->doc = doc;
431 		while (*p != '\0') {
432 			if (!isspace((unsigned char)*p)) {
433 				*doc++ = *p++;
434 			} else {
435 				*doc++ = '%';
436 				*doc++ = hexnums[((unsigned int)*p) >> 4];
437 				*doc++ = hexnums[((unsigned int)*p) & 0xf];
438 				p++;
439 			}
440 		}
441 		*doc = '\0';
442 	} else if ((u->doc = strdup(p)) == NULL) {
443 		fetch_syserr();
444 		goto ouch;
445 	}
446 
447 	DEBUG(fprintf(stderr,
448 		  "scheme:   \"%s\"\n"
449 		  "user:     \"%s\"\n"
450 		  "password: \"%s\"\n"
451 		  "host:     \"%s\"\n"
452 		  "port:     \"%d\"\n"
453 		  "document: \"%s\"\n",
454 		  u->scheme, u->user, u->pwd,
455 		  u->host, u->port, u->doc));
456 
457 	return (u);
458 
459 ouch:
460 	free(u);
461 	return (NULL);
462 }
463 
464 /*
465  * Free a URL
466  */
467 void
468 fetchFreeURL(struct url *u)
469 {
470 	free(u->doc);
471 	free(u);
472 }
473