xref: /freebsd/lib/libfetch/fetch.c (revision 315ee00f)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 1998-2004 Dag-Erling Smørgrav
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer
12  *    in this position and unchanged.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. The name of the author may not be used to endorse or promote products
17  *    derived from this software without specific prior written permission
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  */
30 
31 #include <sys/cdefs.h>
32 #include <sys/param.h>
33 
34 #include <netinet/in.h>
35 
36 #include <errno.h>
37 #include <ctype.h>
38 #include <stdio.h>
39 #include <stdlib.h>
40 #include <string.h>
41 
42 #include "fetch.h"
43 #include "common.h"
44 
45 auth_t	 fetchAuthMethod;
46 int	 fetchLastErrCode;
47 char	 fetchLastErrString[MAXERRSTRING];
48 int	 fetchTimeout;
49 int	 fetchRestartCalls = 1;
50 int	 fetchDebug;
51 
52 
53 /*** Local data **************************************************************/
54 
55 /*
56  * Error messages for parser errors
57  */
58 #define URL_MALFORMED		1
59 #define URL_BAD_SCHEME		2
60 #define URL_BAD_PORT		3
61 static struct fetcherr url_errlist[] = {
62 	{ URL_MALFORMED,	FETCH_URL,	"Malformed URL" },
63 	{ URL_BAD_SCHEME,	FETCH_URL,	"Invalid URL scheme" },
64 	{ URL_BAD_PORT,		FETCH_URL,	"Invalid server port" },
65 	{ -1,			FETCH_UNKNOWN,	"Unknown parser error" }
66 };
67 
68 
69 /*** Public API **************************************************************/
70 
71 /*
72  * Select the appropriate protocol for the URL scheme, and return a
73  * read-only stream connected to the document referenced by the URL.
74  * Also fill out the struct url_stat.
75  */
76 FILE *
77 fetchXGet(struct url *URL, struct url_stat *us, const char *flags)
78 {
79 
80 	if (us != NULL) {
81 		us->size = -1;
82 		us->atime = us->mtime = 0;
83 	}
84 	if (strcmp(URL->scheme, SCHEME_FILE) == 0)
85 		return (fetchXGetFile(URL, us, flags));
86 	else if (strcmp(URL->scheme, SCHEME_FTP) == 0)
87 		return (fetchXGetFTP(URL, us, flags));
88 	else if (strcmp(URL->scheme, SCHEME_HTTP) == 0)
89 		return (fetchXGetHTTP(URL, us, flags));
90 	else if (strcmp(URL->scheme, SCHEME_HTTPS) == 0)
91 		return (fetchXGetHTTP(URL, us, flags));
92 	url_seterr(URL_BAD_SCHEME);
93 	return (NULL);
94 }
95 
96 /*
97  * Select the appropriate protocol for the URL scheme, and return a
98  * read-only stream connected to the document referenced by the URL.
99  */
100 FILE *
101 fetchGet(struct url *URL, const char *flags)
102 {
103 	return (fetchXGet(URL, NULL, flags));
104 }
105 
106 /*
107  * Select the appropriate protocol for the URL scheme, and return a
108  * write-only stream connected to the document referenced by the URL.
109  */
110 FILE *
111 fetchPut(struct url *URL, const char *flags)
112 {
113 
114 	if (strcmp(URL->scheme, SCHEME_FILE) == 0)
115 		return (fetchPutFile(URL, flags));
116 	else if (strcmp(URL->scheme, SCHEME_FTP) == 0)
117 		return (fetchPutFTP(URL, flags));
118 	else if (strcmp(URL->scheme, SCHEME_HTTP) == 0)
119 		return (fetchPutHTTP(URL, flags));
120 	else if (strcmp(URL->scheme, SCHEME_HTTPS) == 0)
121 		return (fetchPutHTTP(URL, flags));
122 	url_seterr(URL_BAD_SCHEME);
123 	return (NULL);
124 }
125 
126 /*
127  * Select the appropriate protocol for the URL scheme, and return the
128  * size of the document referenced by the URL if it exists.
129  */
130 int
131 fetchStat(struct url *URL, struct url_stat *us, const char *flags)
132 {
133 
134 	if (us != NULL) {
135 		us->size = -1;
136 		us->atime = us->mtime = 0;
137 	}
138 	if (strcmp(URL->scheme, SCHEME_FILE) == 0)
139 		return (fetchStatFile(URL, us, flags));
140 	else if (strcmp(URL->scheme, SCHEME_FTP) == 0)
141 		return (fetchStatFTP(URL, us, flags));
142 	else if (strcmp(URL->scheme, SCHEME_HTTP) == 0)
143 		return (fetchStatHTTP(URL, us, flags));
144 	else if (strcmp(URL->scheme, SCHEME_HTTPS) == 0)
145 		return (fetchStatHTTP(URL, us, flags));
146 	url_seterr(URL_BAD_SCHEME);
147 	return (-1);
148 }
149 
150 /*
151  * Select the appropriate protocol for the URL scheme, and return a
152  * list of files in the directory pointed to by the URL.
153  */
154 struct url_ent *
155 fetchList(struct url *URL, const char *flags)
156 {
157 
158 	if (strcmp(URL->scheme, SCHEME_FILE) == 0)
159 		return (fetchListFile(URL, flags));
160 	else if (strcmp(URL->scheme, SCHEME_FTP) == 0)
161 		return (fetchListFTP(URL, flags));
162 	else if (strcmp(URL->scheme, SCHEME_HTTP) == 0)
163 		return (fetchListHTTP(URL, flags));
164 	else if (strcmp(URL->scheme, SCHEME_HTTPS) == 0)
165 		return (fetchListHTTP(URL, flags));
166 	url_seterr(URL_BAD_SCHEME);
167 	return (NULL);
168 }
169 
170 /*
171  * Attempt to parse the given URL; if successful, call fetchXGet().
172  */
173 FILE *
174 fetchXGetURL(const char *URL, struct url_stat *us, const char *flags)
175 {
176 	struct url *u;
177 	FILE *f;
178 
179 	if ((u = fetchParseURL(URL)) == NULL)
180 		return (NULL);
181 
182 	f = fetchXGet(u, us, flags);
183 
184 	fetchFreeURL(u);
185 	return (f);
186 }
187 
188 /*
189  * Attempt to parse the given URL; if successful, call fetchGet().
190  */
191 FILE *
192 fetchGetURL(const char *URL, const char *flags)
193 {
194 	return (fetchXGetURL(URL, NULL, flags));
195 }
196 
197 /*
198  * Attempt to parse the given URL; if successful, call fetchPut().
199  */
200 FILE *
201 fetchPutURL(const char *URL, const char *flags)
202 {
203 	struct url *u;
204 	FILE *f;
205 
206 	if ((u = fetchParseURL(URL)) == NULL)
207 		return (NULL);
208 
209 	f = fetchPut(u, flags);
210 
211 	fetchFreeURL(u);
212 	return (f);
213 }
214 
215 /*
216  * Attempt to parse the given URL; if successful, call fetchStat().
217  */
218 int
219 fetchStatURL(const char *URL, struct url_stat *us, const char *flags)
220 {
221 	struct url *u;
222 	int s;
223 
224 	if ((u = fetchParseURL(URL)) == NULL)
225 		return (-1);
226 
227 	s = fetchStat(u, us, flags);
228 
229 	fetchFreeURL(u);
230 	return (s);
231 }
232 
233 /*
234  * Attempt to parse the given URL; if successful, call fetchList().
235  */
236 struct url_ent *
237 fetchListURL(const char *URL, const char *flags)
238 {
239 	struct url *u;
240 	struct url_ent *ue;
241 
242 	if ((u = fetchParseURL(URL)) == NULL)
243 		return (NULL);
244 
245 	ue = fetchList(u, flags);
246 
247 	fetchFreeURL(u);
248 	return (ue);
249 }
250 
251 /*
252  * Make a URL
253  */
254 struct url *
255 fetchMakeURL(const char *scheme, const char *host, int port, const char *doc,
256     const char *user, const char *pwd)
257 {
258 	struct url *u;
259 
260 	if (!scheme || (!host && !doc)) {
261 		url_seterr(URL_MALFORMED);
262 		return (NULL);
263 	}
264 
265 	if (port < 0 || port > 65535) {
266 		url_seterr(URL_BAD_PORT);
267 		return (NULL);
268 	}
269 
270 	/* allocate struct url */
271 	if ((u = calloc(1, sizeof(*u))) == NULL) {
272 		fetch_syserr();
273 		return (NULL);
274 	}
275 	u->netrcfd = -1;
276 
277 	if ((u->doc = strdup(doc ? doc : "/")) == NULL) {
278 		fetch_syserr();
279 		free(u);
280 		return (NULL);
281 	}
282 
283 #define seturl(x) snprintf(u->x, sizeof(u->x), "%s", x)
284 	seturl(scheme);
285 	seturl(host);
286 	seturl(user);
287 	seturl(pwd);
288 #undef seturl
289 	u->port = port;
290 
291 	return (u);
292 }
293 
294 /*
295  * Return value of the given hex digit.
296  */
297 static int
298 fetch_hexval(char ch)
299 {
300 
301 	if (ch >= '0' && ch <= '9')
302 		return (ch - '0');
303 	else if (ch >= 'a' && ch <= 'f')
304 		return (ch - 'a' + 10);
305 	else if (ch >= 'A' && ch <= 'F')
306 		return (ch - 'A' + 10);
307 	return (-1);
308 }
309 
310 /*
311  * Decode percent-encoded URL component from src into dst, stopping at end
312  * of string, or at @ or : separators.  Returns a pointer to the unhandled
313  * part of the input string (null terminator, @, or :).  No terminator is
314  * written to dst (it is the caller's responsibility).
315  */
316 static const char *
317 fetch_pctdecode(char *dst, const char *src, size_t dlen)
318 {
319 	int d1, d2;
320 	char c;
321 	const char *s;
322 
323 	for (s = src; *s != '\0' && *s != '@' && *s != ':'; s++) {
324 		if (s[0] == '%' && (d1 = fetch_hexval(s[1])) >= 0 &&
325 		    (d2 = fetch_hexval(s[2])) >= 0 && (d1 > 0 || d2 > 0)) {
326 			c = d1 << 4 | d2;
327 			s += 2;
328 		} else if (s[0] == '%') {
329 			/* Invalid escape sequence. */
330 			return (NULL);
331 		} else {
332 			c = *s;
333 		}
334 		if (dlen-- > 0)
335 			*dst++ = c;
336 		else
337 			return (NULL);
338 	}
339 	return (s);
340 }
341 
342 /*
343  * Split an URL into components. URL syntax is:
344  * [method:/][/[user[:pwd]@]host[:port]/][document]
345  * This almost, but not quite, RFC1738 URL syntax.
346  */
347 struct url *
348 fetchParseURL(const char *URL)
349 {
350 	char *doc;
351 	const char *p, *q;
352 	struct url *u;
353 	int i, n;
354 
355 	/* allocate struct url */
356 	if ((u = calloc(1, sizeof(*u))) == NULL) {
357 		fetch_syserr();
358 		return (NULL);
359 	}
360 	u->netrcfd = -1;
361 
362 	/* scheme name */
363 	if ((p = strstr(URL, ":/"))) {
364                 if (p - URL > URL_SCHEMELEN)
365                         goto ouch;
366                 for (i = 0; URL + i < p; i++)
367                         u->scheme[i] = tolower((unsigned char)URL[i]);
368 		URL = ++p;
369 		/*
370 		 * Only one slash: no host, leave slash as part of document
371 		 * Two slashes: host follows, strip slashes
372 		 */
373 		if (URL[1] == '/')
374 			URL = (p += 2);
375 	} else {
376 		p = URL;
377 	}
378 	if (!*URL || *URL == '/' || *URL == '.' ||
379 	    (u->scheme[0] == '\0' &&
380 		strchr(URL, '/') == NULL && strchr(URL, ':') == NULL))
381 		goto nohost;
382 
383 	p = strpbrk(URL, "/@");
384 	if (p && *p == '@') {
385 		/* username */
386 		q = fetch_pctdecode(u->user, URL, URL_USERLEN);
387 		if (q == NULL)
388 			goto ouch;
389 
390 		/* password */
391 		if (*q == ':') {
392 			q = fetch_pctdecode(u->pwd, q + 1, URL_PWDLEN);
393 			if (q == NULL)
394 				goto ouch;
395 		}
396 		p++;
397 	} else {
398 		p = URL;
399 	}
400 
401 	/* hostname */
402 	if (*p == '[') {
403 		q = p + 1 + strspn(p + 1, ":0123456789ABCDEFabcdef");
404 		if (*q++ != ']')
405 			goto ouch;
406 	} else {
407 		/* valid characters in a DNS name */
408 		q = p + strspn(p, "-." "0123456789"
409 		    "ABCDEFGHIJKLMNOPQRSTUVWXYZ" "_"
410 		    "abcdefghijklmnopqrstuvwxyz");
411 	}
412 	if ((*q != '\0' && *q != '/' && *q != ':') || q - p > MAXHOSTNAMELEN)
413 		goto ouch;
414 	for (i = 0; p + i < q; i++)
415 		u->host[i] = tolower((unsigned char)p[i]);
416 	u->host[i] = '\0';
417 	p = q;
418 
419 	/* port */
420 	if (*p == ':') {
421 		for (n = 0, q = ++p; *q && (*q != '/'); q++) {
422 			if (*q >= '0' && *q <= '9' && n < INT_MAX / 10) {
423 				n = n * 10 + (*q - '0');
424 			} else {
425 				/* invalid port */
426 				url_seterr(URL_BAD_PORT);
427 				goto ouch;
428 			}
429 		}
430 		if (n < 1 || n > IPPORT_MAX)
431 			goto ouch;
432 		u->port = n;
433 		p = q;
434 	}
435 
436 nohost:
437 	/* document */
438 	if (!*p)
439 		p = "/";
440 
441 	if (strcmp(u->scheme, SCHEME_HTTP) == 0 ||
442 	    strcmp(u->scheme, SCHEME_HTTPS) == 0) {
443 		const char hexnums[] = "0123456789abcdef";
444 
445 		/* percent-escape whitespace. */
446 		if ((doc = malloc(strlen(p) * 3 + 1)) == NULL) {
447 			fetch_syserr();
448 			goto ouch;
449 		}
450 		u->doc = doc;
451 		while (*p != '\0') {
452 			if (!isspace((unsigned char)*p)) {
453 				*doc++ = *p++;
454 			} else {
455 				*doc++ = '%';
456 				*doc++ = hexnums[((unsigned int)*p) >> 4];
457 				*doc++ = hexnums[((unsigned int)*p) & 0xf];
458 				p++;
459 			}
460 		}
461 		*doc = '\0';
462 	} else if ((u->doc = strdup(p)) == NULL) {
463 		fetch_syserr();
464 		goto ouch;
465 	}
466 
467 	DEBUGF("scheme:   \"%s\"\n"
468 	    "user:     \"%s\"\n"
469 	    "password: \"%s\"\n"
470 	    "host:     \"%s\"\n"
471 	    "port:     \"%d\"\n"
472 	    "document: \"%s\"\n",
473 	    u->scheme, u->user, u->pwd,
474 	    u->host, u->port, u->doc);
475 
476 	return (u);
477 
478 ouch:
479 	free(u);
480 	return (NULL);
481 }
482 
483 /*
484  * Free a URL
485  */
486 void
487 fetchFreeURL(struct url *u)
488 {
489 	free(u->doc);
490 	free(u);
491 }
492