1 /*- 2 * Copyright (c) 1998-2004 Dag-Erling Co�dan Sm�rgrav 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer 10 * in this position and unchanged. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. The name of the author may not be used to endorse or promote products 15 * derived from this software without specific prior written permission 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 * 28 * $FreeBSD: src/lib/libfetch/fetch.c,v 1.42 2008/12/17 18:00:18 murray Exp $ 29 */ 30 31 #include <sys/param.h> 32 #include <sys/errno.h> 33 34 #include <ctype.h> 35 #include <stdio.h> 36 #include <stdlib.h> 37 #include <string.h> 38 39 #include "fetch.h" 40 #include "common.h" 41 42 auth_t fetchAuthMethod; 43 int fetchLastErrCode; 44 char fetchLastErrString[MAXERRSTRING]; 45 int fetchTimeout; 46 int fetchRestartCalls = 1; 47 int fetchDebug; 48 49 50 /*** Local data **************************************************************/ 51 52 /* 53 * Error messages for parser errors 54 */ 55 #define URL_MALFORMED 1 56 #define URL_BAD_SCHEME 2 57 #define URL_BAD_PORT 3 58 static struct fetcherr url_errlist[] = { 59 { URL_MALFORMED, FETCH_URL, "Malformed URL" }, 60 { URL_BAD_SCHEME, FETCH_URL, "Invalid URL scheme" }, 61 { URL_BAD_PORT, FETCH_URL, "Invalid server port" }, 62 { -1, FETCH_UNKNOWN, "Unknown parser error" } 63 }; 64 65 66 /*** Public API **************************************************************/ 67 68 /* 69 * Select the appropriate protocol for the URL scheme, and return a 70 * read-only stream connected to the document referenced by the URL. 71 * Also fill out the struct url_stat. 72 */ 73 FILE * 74 fetchXGet(struct url *URL, struct url_stat *us, const char *flags) 75 { 76 77 if (us != NULL) { 78 us->size = -1; 79 us->atime = us->mtime = 0; 80 } 81 if (strcasecmp(URL->scheme, SCHEME_FILE) == 0) 82 return (fetchXGetFile(URL, us, flags)); 83 else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) 84 return (fetchXGetFTP(URL, us, flags)); 85 else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0) 86 return (fetchXGetHTTP(URL, us, flags)); 87 else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0) 88 return (fetchXGetHTTP(URL, us, flags)); 89 url_seterr(URL_BAD_SCHEME); 90 return (NULL); 91 } 92 93 /* 94 * Select the appropriate protocol for the URL scheme, and return a 95 * read-only stream connected to the document referenced by the URL. 96 */ 97 FILE * 98 fetchGet(struct url *URL, const char *flags) 99 { 100 return (fetchXGet(URL, NULL, flags)); 101 } 102 103 /* 104 * Select the appropriate protocol for the URL scheme, and return a 105 * write-only stream connected to the document referenced by the URL. 106 */ 107 FILE * 108 fetchPut(struct url *URL, const char *flags) 109 { 110 111 if (strcasecmp(URL->scheme, SCHEME_FILE) == 0) 112 return (fetchPutFile(URL, flags)); 113 else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) 114 return (fetchPutFTP(URL, flags)); 115 else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0) 116 return (fetchPutHTTP(URL, flags)); 117 else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0) 118 return (fetchPutHTTP(URL, flags)); 119 url_seterr(URL_BAD_SCHEME); 120 return (NULL); 121 } 122 123 /* 124 * Select the appropriate protocol for the URL scheme, and return the 125 * size of the document referenced by the URL if it exists. 126 */ 127 int 128 fetchStat(struct url *URL, struct url_stat *us, const char *flags) 129 { 130 131 if (us != NULL) { 132 us->size = -1; 133 us->atime = us->mtime = 0; 134 } 135 if (strcasecmp(URL->scheme, SCHEME_FILE) == 0) 136 return (fetchStatFile(URL, us, flags)); 137 else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) 138 return (fetchStatFTP(URL, us, flags)); 139 else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0) 140 return (fetchStatHTTP(URL, us, flags)); 141 else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0) 142 return (fetchStatHTTP(URL, us, flags)); 143 url_seterr(URL_BAD_SCHEME); 144 return (-1); 145 } 146 147 /* 148 * Select the appropriate protocol for the URL scheme, and return a 149 * list of files in the directory pointed to by the URL. 150 */ 151 struct url_ent * 152 fetchList(struct url *URL, const char *flags) 153 { 154 155 if (strcasecmp(URL->scheme, SCHEME_FILE) == 0) 156 return (fetchListFile(URL, flags)); 157 else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) 158 return (fetchListFTP(URL, flags)); 159 else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0) 160 return (fetchListHTTP(URL, flags)); 161 else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0) 162 return (fetchListHTTP(URL, flags)); 163 url_seterr(URL_BAD_SCHEME); 164 return (NULL); 165 } 166 167 /* 168 * Attempt to parse the given URL; if successful, call fetchXGet(). 169 */ 170 FILE * 171 fetchXGetURL(const char *URL, struct url_stat *us, const char *flags) 172 { 173 struct url *u; 174 FILE *f; 175 176 if ((u = fetchParseURL(URL)) == NULL) 177 return (NULL); 178 179 f = fetchXGet(u, us, flags); 180 181 fetchFreeURL(u); 182 return (f); 183 } 184 185 /* 186 * Attempt to parse the given URL; if successful, call fetchGet(). 187 */ 188 FILE * 189 fetchGetURL(const char *URL, const char *flags) 190 { 191 return (fetchXGetURL(URL, NULL, flags)); 192 } 193 194 /* 195 * Attempt to parse the given URL; if successful, call fetchPut(). 196 */ 197 FILE * 198 fetchPutURL(const char *URL, const char *flags) 199 { 200 struct url *u; 201 FILE *f; 202 203 if ((u = fetchParseURL(URL)) == NULL) 204 return (NULL); 205 206 f = fetchPut(u, flags); 207 208 fetchFreeURL(u); 209 return (f); 210 } 211 212 /* 213 * Attempt to parse the given URL; if successful, call fetchStat(). 214 */ 215 int 216 fetchStatURL(const char *URL, struct url_stat *us, const char *flags) 217 { 218 struct url *u; 219 int s; 220 221 if ((u = fetchParseURL(URL)) == NULL) 222 return (-1); 223 224 s = fetchStat(u, us, flags); 225 226 fetchFreeURL(u); 227 return (s); 228 } 229 230 /* 231 * Attempt to parse the given URL; if successful, call fetchList(). 232 */ 233 struct url_ent * 234 fetchListURL(const char *URL, const char *flags) 235 { 236 struct url *u; 237 struct url_ent *ue; 238 239 if ((u = fetchParseURL(URL)) == NULL) 240 return (NULL); 241 242 ue = fetchList(u, flags); 243 244 fetchFreeURL(u); 245 return (ue); 246 } 247 248 /* 249 * Make a URL 250 */ 251 struct url * 252 fetchMakeURL(const char *scheme, const char *host, int port, const char *doc, 253 const char *user, const char *pwd) 254 { 255 struct url *u; 256 257 if (!scheme || (!host && !doc)) { 258 url_seterr(URL_MALFORMED); 259 return (NULL); 260 } 261 262 if (port < 0 || port > 65535) { 263 url_seterr(URL_BAD_PORT); 264 return (NULL); 265 } 266 267 /* allocate struct url */ 268 if ((u = calloc(1, sizeof(*u))) == NULL) { 269 fetch_syserr(); 270 return (NULL); 271 } 272 273 if ((u->doc = strdup(doc ? doc : "/")) == NULL) { 274 fetch_syserr(); 275 free(u); 276 return (NULL); 277 } 278 279 #define seturl(x) snprintf(u->x, sizeof(u->x), "%s", x) 280 seturl(scheme); 281 seturl(host); 282 seturl(user); 283 seturl(pwd); 284 #undef seturl 285 u->port = port; 286 287 return (u); 288 } 289 290 /* 291 * Return value of the given hex digit. 292 */ 293 static int 294 fetch_hexval(char ch) 295 { 296 297 if (ch >= '0' && ch <= '9') 298 return (ch - '0'); 299 else if (ch >= 'a' && ch <= 'f') 300 return (ch - 'a' + 10); 301 else if (ch >= 'A' && ch <= 'F') 302 return (ch - 'A' + 10); 303 return (-1); 304 } 305 306 /* 307 * Decode percent-encoded URL component from src into dst, stopping at end 308 * of string, or at @ or : separators. Returns a pointer to the unhandled 309 * part of the input string (null terminator, @, or :). No terminator is 310 * written to dst (it is the caller's responsibility). 311 */ 312 static const char * 313 fetch_pctdecode(char *dst, const char *src, size_t dlen) 314 { 315 int d1, d2; 316 char c; 317 const char *s; 318 319 for (s = src; *s != '\0' && *s != '@' && *s != ':'; s++) { 320 if (s[0] == '%' && (d1 = fetch_hexval(s[1])) >= 0 && 321 (d2 = fetch_hexval(s[2])) >= 0 && (d1 > 0 || d2 > 0)) { 322 c = d1 << 4 | d2; 323 s += 2; 324 } else { 325 c = *s; 326 } 327 if (dlen-- > 0) 328 *dst++ = c; 329 } 330 return (s); 331 } 332 333 /* 334 * Split an URL into components. URL syntax is: 335 * [method:/][/[user[:pwd]@]host[:port]/][document] 336 * This almost, but not quite, RFC1738 URL syntax. 337 */ 338 struct url * 339 fetchParseURL(const char *URL) 340 { 341 char *doc; 342 const char *p, *q; 343 struct url *u; 344 int i; 345 346 /* allocate struct url */ 347 if ((u = calloc(1, sizeof(*u))) == NULL) { 348 fetch_syserr(); 349 return (NULL); 350 } 351 352 /* scheme name */ 353 if ((p = strstr(URL, ":/"))) { 354 snprintf(u->scheme, URL_SCHEMELEN+1, 355 "%.*s", (int)(p - URL), URL); 356 URL = ++p; 357 /* 358 * Only one slash: no host, leave slash as part of document 359 * Two slashes: host follows, strip slashes 360 */ 361 if (URL[1] == '/') 362 URL = (p += 2); 363 } else { 364 p = URL; 365 } 366 if (!*URL || *URL == '/' || *URL == '.' || 367 (u->scheme[0] == '\0' && 368 strchr(URL, '/') == NULL && strchr(URL, ':') == NULL)) 369 goto nohost; 370 371 p = strpbrk(URL, "/@"); 372 if (p && *p == '@') { 373 /* username */ 374 q = fetch_pctdecode(u->user, URL, URL_USERLEN); 375 376 /* password */ 377 if (*q == ':') 378 q = fetch_pctdecode(u->pwd, ++q, URL_PWDLEN); 379 380 p++; 381 } else { 382 p = URL; 383 } 384 385 /* hostname */ 386 #ifdef INET6 387 if (*p == '[' && (q = strchr(p + 1, ']')) != NULL && 388 (*++q == '\0' || *q == '/' || *q == ':')) { 389 if ((i = q - p - 2) > MAXHOSTNAMELEN) 390 i = MAXHOSTNAMELEN; 391 strncpy(u->host, ++p, i); 392 p = q; 393 } else 394 #endif 395 for (i = 0; *p && (*p != '/') && (*p != ':'); p++) 396 if (i < MAXHOSTNAMELEN) 397 u->host[i++] = *p; 398 399 /* port */ 400 if (*p == ':') { 401 for (q = ++p; *q && (*q != '/'); q++) 402 if (isdigit((unsigned char)*q)) 403 u->port = u->port * 10 + (*q - '0'); 404 else { 405 /* invalid port */ 406 url_seterr(URL_BAD_PORT); 407 goto ouch; 408 } 409 p = q; 410 } 411 412 nohost: 413 /* document */ 414 if (!*p) 415 p = "/"; 416 417 if (strcasecmp(u->scheme, SCHEME_HTTP) == 0 || 418 strcasecmp(u->scheme, SCHEME_HTTPS) == 0) { 419 const char hexnums[] = "0123456789abcdef"; 420 421 /* percent-escape whitespace. */ 422 if ((doc = malloc(strlen(p) * 3 + 1)) == NULL) { 423 fetch_syserr(); 424 goto ouch; 425 } 426 u->doc = doc; 427 while (*p != '\0') { 428 if (!isspace((unsigned char)*p)) { 429 *doc++ = *p++; 430 } else { 431 *doc++ = '%'; 432 *doc++ = hexnums[((unsigned int)*p) >> 4]; 433 *doc++ = hexnums[((unsigned int)*p) & 0xf]; 434 p++; 435 } 436 } 437 *doc = '\0'; 438 } else if ((u->doc = strdup(p)) == NULL) { 439 fetch_syserr(); 440 goto ouch; 441 } 442 443 DEBUG(fprintf(stderr, 444 "scheme: [%s]\n" 445 "user: [%s]\n" 446 "password: [%s]\n" 447 "host: [%s]\n" 448 "port: [%d]\n" 449 "document: [%s]\n", 450 u->scheme, u->user, u->pwd, 451 u->host, u->port, u->doc)); 452 453 return (u); 454 455 ouch: 456 free(u); 457 return (NULL); 458 } 459 460 /* 461 * Free a URL 462 */ 463 void 464 fetchFreeURL(struct url *u) 465 { 466 free(u->doc); 467 free(u); 468 } 469