xref: /dragonfly/lib/libfetch/fetch.c (revision 58645856)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 1998-2004 Dag-Erling Smørgrav
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer
12  *    in this position and unchanged.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. The name of the author may not be used to endorse or promote products
17  *    derived from this software without specific prior written permission
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  *
30  * $FreeBSD: head/lib/libfetch/fetch.c 357579 2020-02-05 16:55:00Z emaste $
31  */
32 
33 #include <sys/cdefs.h>
34 
35 #include <sys/param.h>
36 
37 #include <netinet/in.h>
38 
39 #include <errno.h>
40 #include <ctype.h>
41 #include <stdio.h>
42 #include <stdlib.h>
43 #include <string.h>
44 
45 #include "fetch.h"
46 #include "common.h"
47 
48 auth_t	 fetchAuthMethod;
49 int	 fetchLastErrCode;
50 char	 fetchLastErrString[MAXERRSTRING];
51 int	 fetchTimeout;
52 int	 fetchRestartCalls = 1;
53 int	 fetchDebug;
54 
55 
56 /*** Local data **************************************************************/
57 
58 /*
59  * Error messages for parser errors
60  */
61 #define URL_MALFORMED		1
62 #define URL_BAD_SCHEME		2
63 #define URL_BAD_PORT		3
64 static struct fetcherr url_errlist[] = {
65 	{ URL_MALFORMED,	FETCH_URL,	"Malformed URL" },
66 	{ URL_BAD_SCHEME,	FETCH_URL,	"Invalid URL scheme" },
67 	{ URL_BAD_PORT,		FETCH_URL,	"Invalid server port" },
68 	{ -1,			FETCH_UNKNOWN,	"Unknown parser error" }
69 };
70 
71 
72 /*** Public API **************************************************************/
73 
74 /*
75  * Select the appropriate protocol for the URL scheme, and return a
76  * read-only stream connected to the document referenced by the URL.
77  * Also fill out the struct url_stat.
78  */
79 FILE *
80 fetchXGet(struct url *URL, struct url_stat *us, const char *flags)
81 {
82 
83 	if (us != NULL) {
84 		us->size = -1;
85 		us->atime = us->mtime = 0;
86 	}
87 	if (strcmp(URL->scheme, SCHEME_FILE) == 0)
88 		return (fetchXGetFile(URL, us, flags));
89 	else if (strcmp(URL->scheme, SCHEME_FTP) == 0)
90 		return (fetchXGetFTP(URL, us, flags));
91 	else if (strcmp(URL->scheme, SCHEME_HTTP) == 0)
92 		return (fetchXGetHTTP(URL, us, flags));
93 	else if (strcmp(URL->scheme, SCHEME_HTTPS) == 0)
94 		return (fetchXGetHTTP(URL, us, flags));
95 	url_seterr(URL_BAD_SCHEME);
96 	return (NULL);
97 }
98 
99 /*
100  * Select the appropriate protocol for the URL scheme, and return a
101  * read-only stream connected to the document referenced by the URL.
102  */
103 FILE *
104 fetchGet(struct url *URL, const char *flags)
105 {
106 	return (fetchXGet(URL, NULL, flags));
107 }
108 
109 /*
110  * Select the appropriate protocol for the URL scheme, and return a
111  * write-only stream connected to the document referenced by the URL.
112  */
113 FILE *
114 fetchPut(struct url *URL, const char *flags)
115 {
116 
117 	if (strcmp(URL->scheme, SCHEME_FILE) == 0)
118 		return (fetchPutFile(URL, flags));
119 	else if (strcmp(URL->scheme, SCHEME_FTP) == 0)
120 		return (fetchPutFTP(URL, flags));
121 	else if (strcmp(URL->scheme, SCHEME_HTTP) == 0)
122 		return (fetchPutHTTP(URL, flags));
123 	else if (strcmp(URL->scheme, SCHEME_HTTPS) == 0)
124 		return (fetchPutHTTP(URL, flags));
125 	url_seterr(URL_BAD_SCHEME);
126 	return (NULL);
127 }
128 
129 /*
130  * Select the appropriate protocol for the URL scheme, and return the
131  * size of the document referenced by the URL if it exists.
132  */
133 int
134 fetchStat(struct url *URL, struct url_stat *us, const char *flags)
135 {
136 
137 	if (us != NULL) {
138 		us->size = -1;
139 		us->atime = us->mtime = 0;
140 	}
141 	if (strcmp(URL->scheme, SCHEME_FILE) == 0)
142 		return (fetchStatFile(URL, us, flags));
143 	else if (strcmp(URL->scheme, SCHEME_FTP) == 0)
144 		return (fetchStatFTP(URL, us, flags));
145 	else if (strcmp(URL->scheme, SCHEME_HTTP) == 0)
146 		return (fetchStatHTTP(URL, us, flags));
147 	else if (strcmp(URL->scheme, SCHEME_HTTPS) == 0)
148 		return (fetchStatHTTP(URL, us, flags));
149 	url_seterr(URL_BAD_SCHEME);
150 	return (-1);
151 }
152 
153 /*
154  * Select the appropriate protocol for the URL scheme, and return a
155  * list of files in the directory pointed to by the URL.
156  */
157 struct url_ent *
158 fetchList(struct url *URL, const char *flags)
159 {
160 
161 	if (strcmp(URL->scheme, SCHEME_FILE) == 0)
162 		return (fetchListFile(URL, flags));
163 	else if (strcmp(URL->scheme, SCHEME_FTP) == 0)
164 		return (fetchListFTP(URL, flags));
165 	else if (strcmp(URL->scheme, SCHEME_HTTP) == 0)
166 		return (fetchListHTTP(URL, flags));
167 	else if (strcmp(URL->scheme, SCHEME_HTTPS) == 0)
168 		return (fetchListHTTP(URL, flags));
169 	url_seterr(URL_BAD_SCHEME);
170 	return (NULL);
171 }
172 
173 /*
174  * Attempt to parse the given URL; if successful, call fetchXGet().
175  */
176 FILE *
177 fetchXGetURL(const char *URL, struct url_stat *us, const char *flags)
178 {
179 	struct url *u;
180 	FILE *f;
181 
182 	if ((u = fetchParseURL(URL)) == NULL)
183 		return (NULL);
184 
185 	f = fetchXGet(u, us, flags);
186 
187 	fetchFreeURL(u);
188 	return (f);
189 }
190 
191 /*
192  * Attempt to parse the given URL; if successful, call fetchGet().
193  */
194 FILE *
195 fetchGetURL(const char *URL, const char *flags)
196 {
197 	return (fetchXGetURL(URL, NULL, flags));
198 }
199 
200 /*
201  * Attempt to parse the given URL; if successful, call fetchPut().
202  */
203 FILE *
204 fetchPutURL(const char *URL, const char *flags)
205 {
206 	struct url *u;
207 	FILE *f;
208 
209 	if ((u = fetchParseURL(URL)) == NULL)
210 		return (NULL);
211 
212 	f = fetchPut(u, flags);
213 
214 	fetchFreeURL(u);
215 	return (f);
216 }
217 
218 /*
219  * Attempt to parse the given URL; if successful, call fetchStat().
220  */
221 int
222 fetchStatURL(const char *URL, struct url_stat *us, const char *flags)
223 {
224 	struct url *u;
225 	int s;
226 
227 	if ((u = fetchParseURL(URL)) == NULL)
228 		return (-1);
229 
230 	s = fetchStat(u, us, flags);
231 
232 	fetchFreeURL(u);
233 	return (s);
234 }
235 
236 /*
237  * Attempt to parse the given URL; if successful, call fetchList().
238  */
239 struct url_ent *
240 fetchListURL(const char *URL, const char *flags)
241 {
242 	struct url *u;
243 	struct url_ent *ue;
244 
245 	if ((u = fetchParseURL(URL)) == NULL)
246 		return (NULL);
247 
248 	ue = fetchList(u, flags);
249 
250 	fetchFreeURL(u);
251 	return (ue);
252 }
253 
254 /*
255  * Make a URL
256  */
257 struct url *
258 fetchMakeURL(const char *scheme, const char *host, int port, const char *doc,
259     const char *user, const char *pwd)
260 {
261 	struct url *u;
262 
263 	if (!scheme || (!host && !doc)) {
264 		url_seterr(URL_MALFORMED);
265 		return (NULL);
266 	}
267 
268 	if (port < 0 || port > 65535) {
269 		url_seterr(URL_BAD_PORT);
270 		return (NULL);
271 	}
272 
273 	/* allocate struct url */
274 	if ((u = calloc(1, sizeof(*u))) == NULL) {
275 		fetch_syserr();
276 		return (NULL);
277 	}
278 	u->netrcfd = -1;
279 
280 	if ((u->doc = strdup(doc ? doc : "/")) == NULL) {
281 		fetch_syserr();
282 		free(u);
283 		return (NULL);
284 	}
285 
286 #define seturl(x) snprintf(u->x, sizeof(u->x), "%s", x)
287 	seturl(scheme);
288 	seturl(host);
289 	seturl(user);
290 	seturl(pwd);
291 #undef seturl
292 	u->port = port;
293 
294 	return (u);
295 }
296 
297 /*
298  * Return value of the given hex digit.
299  */
300 static int
301 fetch_hexval(char ch)
302 {
303 
304 	if (ch >= '0' && ch <= '9')
305 		return (ch - '0');
306 	else if (ch >= 'a' && ch <= 'f')
307 		return (ch - 'a' + 10);
308 	else if (ch >= 'A' && ch <= 'F')
309 		return (ch - 'A' + 10);
310 	return (-1);
311 }
312 
313 /*
314  * Decode percent-encoded URL component from src into dst, stopping at end
315  * of string, or at @ or : separators.  Returns a pointer to the unhandled
316  * part of the input string (null terminator, @, or :).  No terminator is
317  * written to dst (it is the caller's responsibility).
318  */
319 static const char *
320 fetch_pctdecode(char *dst, const char *src, size_t dlen)
321 {
322 	int d1, d2;
323 	char c;
324 	const char *s;
325 
326 	for (s = src; *s != '\0' && *s != '@' && *s != ':'; s++) {
327 		if (s[0] == '%' && (d1 = fetch_hexval(s[1])) >= 0 &&
328 		    (d2 = fetch_hexval(s[2])) >= 0 && (d1 > 0 || d2 > 0)) {
329 			c = d1 << 4 | d2;
330 			s += 2;
331 		} else if (s[0] == '%') {
332 			/* Invalid escape sequence. */
333 			return (NULL);
334 		} else {
335 			c = *s;
336 		}
337 		if (dlen-- > 0)
338 			*dst++ = c;
339 		else
340 			return (NULL);
341 	}
342 	return (s);
343 }
344 
345 /*
346  * Split an URL into components. URL syntax is:
347  * [method:/][/[user[:pwd]@]host[:port]/][document]
348  * This almost, but not quite, RFC1738 URL syntax.
349  */
350 struct url *
351 fetchParseURL(const char *URL)
352 {
353 	char *doc;
354 	const char *p, *q;
355 	struct url *u;
356 	int i, n;
357 
358 	/* allocate struct url */
359 	if ((u = calloc(1, sizeof(*u))) == NULL) {
360 		fetch_syserr();
361 		return (NULL);
362 	}
363 	u->netrcfd = -1;
364 
365 	/* scheme name */
366 	if ((p = strstr(URL, ":/"))) {
367                 if (p - URL > URL_SCHEMELEN)
368                         goto ouch;
369                 for (i = 0; URL + i < p; i++)
370                         u->scheme[i] = tolower((unsigned char)URL[i]);
371 		URL = ++p;
372 		/*
373 		 * Only one slash: no host, leave slash as part of document
374 		 * Two slashes: host follows, strip slashes
375 		 */
376 		if (URL[1] == '/')
377 			URL = (p += 2);
378 	} else {
379 		p = URL;
380 	}
381 	if (!*URL || *URL == '/' || *URL == '.' ||
382 	    (u->scheme[0] == '\0' &&
383 		strchr(URL, '/') == NULL && strchr(URL, ':') == NULL))
384 		goto nohost;
385 
386 	p = strpbrk(URL, "/@");
387 	if (p && *p == '@') {
388 		/* username */
389 		q = fetch_pctdecode(u->user, URL, URL_USERLEN);
390 		if (q == NULL)
391 			goto ouch;
392 
393 		/* password */
394 		if (*q == ':') {
395 			q = fetch_pctdecode(u->pwd, q + 1, URL_PWDLEN);
396 			if (q == NULL)
397 				goto ouch;
398 		}
399 		p++;
400 	} else {
401 		p = URL;
402 	}
403 
404 	/* hostname */
405 	if (*p == '[') {
406 		q = p + 1 + strspn(p + 1, ":0123456789ABCDEFabcdef");
407 		if (*q++ != ']')
408 			goto ouch;
409 	} else {
410 		/* valid characters in a DNS name */
411 		q = p + strspn(p, "-." "0123456789"
412 		    "ABCDEFGHIJKLMNOPQRSTUVWXYZ" "_"
413 		    "abcdefghijklmnopqrstuvwxyz");
414 	}
415 	if ((*q != '\0' && *q != '/' && *q != ':') || q - p > MAXHOSTNAMELEN)
416 		goto ouch;
417 	for (i = 0; p + i < q; i++)
418 		u->host[i] = tolower((unsigned char)p[i]);
419 	u->host[i] = '\0';
420 	p = q;
421 
422 	/* port */
423 	if (*p == ':') {
424 		for (n = 0, q = ++p; *q && (*q != '/'); q++) {
425 			if (*q >= '0' && *q <= '9' && n < INT_MAX / 10) {
426 				n = n * 10 + (*q - '0');
427 			} else {
428 				/* invalid port */
429 				url_seterr(URL_BAD_PORT);
430 				goto ouch;
431 			}
432 		}
433 		if (n < 1 || n > IPPORT_MAX)
434 			goto ouch;
435 		u->port = n;
436 		p = q;
437 	}
438 
439 nohost:
440 	/* document */
441 	if (!*p)
442 		p = "/";
443 
444 	if (strcmp(u->scheme, SCHEME_HTTP) == 0 ||
445 	    strcmp(u->scheme, SCHEME_HTTPS) == 0) {
446 		const char hexnums[] = "0123456789abcdef";
447 
448 		/* percent-escape whitespace. */
449 		if ((doc = malloc(strlen(p) * 3 + 1)) == NULL) {
450 			fetch_syserr();
451 			goto ouch;
452 		}
453 		u->doc = doc;
454 		while (*p != '\0') {
455 			if (!isspace((unsigned char)*p)) {
456 				*doc++ = *p++;
457 			} else {
458 				*doc++ = '%';
459 				*doc++ = hexnums[((unsigned int)*p) >> 4];
460 				*doc++ = hexnums[((unsigned int)*p) & 0xf];
461 				p++;
462 			}
463 		}
464 		*doc = '\0';
465 	} else if ((u->doc = strdup(p)) == NULL) {
466 		fetch_syserr();
467 		goto ouch;
468 	}
469 
470 	DEBUGF("scheme:   \"%s\"\n"
471 	    "user:     \"%s\"\n"
472 	    "password: \"%s\"\n"
473 	    "host:     \"%s\"\n"
474 	    "port:     \"%d\"\n"
475 	    "document: \"%s\"\n",
476 	    u->scheme, u->user, u->pwd,
477 	    u->host, u->port, u->doc);
478 
479 	return (u);
480 
481 ouch:
482 	free(u);
483 	return (NULL);
484 }
485 
486 /*
487  * Free a URL
488  */
489 void
490 fetchFreeURL(struct url *u)
491 {
492 	free(u->doc);
493 	free(u);
494 }
495