1 /* $NetBSD: fetch.c,v 1.1.1.8 2009/08/21 15:12:27 joerg Exp $ */ 2 /*- 3 * Copyright (c) 1998-2004 Dag-Erling Co�dan Sm�rgrav 4 * Copyright (c) 2008 Joerg Sonnenberger <joerg@NetBSD.org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer 12 * in this position and unchanged. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. The name of the author may not be used to endorse or promote products 17 * derived from this software without specific prior written permission 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 20 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 21 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 22 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 24 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 28 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 * 30 * $FreeBSD: fetch.c,v 1.41 2007/12/19 00:26:36 des Exp $ 31 */ 32 33 #if HAVE_CONFIG_H 34 #include "config.h" 35 #endif 36 #ifndef NETBSD 37 #include <nbcompat.h> 38 #endif 39 40 #include <ctype.h> 41 #include <errno.h> 42 #include <stdio.h> 43 #include <stdlib.h> 44 #include <string.h> 45 46 #include "fetch.h" 47 #include "common.h" 48 49 auth_t fetchAuthMethod; 50 int fetchLastErrCode; 51 char fetchLastErrString[MAXERRSTRING]; 52 int fetchTimeout; 53 volatile int fetchRestartCalls = 1; 54 int fetchDebug; 55 56 57 /*** Local data **************************************************************/ 58 59 /* 60 * Error messages for parser errors 61 */ 62 #define URL_MALFORMED 1 63 #define URL_BAD_SCHEME 2 64 #define URL_BAD_PORT 3 65 static struct fetcherr url_errlist[] = { 66 { URL_MALFORMED, FETCH_URL, "Malformed URL" }, 67 { URL_BAD_SCHEME, FETCH_URL, "Invalid URL scheme" }, 68 { URL_BAD_PORT, FETCH_URL, "Invalid server port" }, 69 { -1, FETCH_UNKNOWN, "Unknown parser error" } 70 }; 71 72 73 /*** Public API **************************************************************/ 74 75 /* 76 * Select the appropriate protocol for the URL scheme, and return a 77 * read-only stream connected to the document referenced by the URL. 78 * Also fill out the struct url_stat. 79 */ 80 fetchIO * 81 fetchXGet(struct url *URL, struct url_stat *us, const char *flags) 82 { 83 84 if (us != NULL) { 85 us->size = -1; 86 us->atime = us->mtime = 0; 87 } 88 if (strcasecmp(URL->scheme, SCHEME_FILE) == 0) 89 return (fetchXGetFile(URL, us, flags)); 90 else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) 91 return (fetchXGetFTP(URL, us, flags)); 92 else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0) 93 return (fetchXGetHTTP(URL, us, flags)); 94 else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0) 95 return (fetchXGetHTTP(URL, us, flags)); 96 url_seterr(URL_BAD_SCHEME); 97 return (NULL); 98 } 99 100 /* 101 * Select the appropriate protocol for the URL scheme, and return a 102 * read-only stream connected to the document referenced by the URL. 103 */ 104 fetchIO * 105 fetchGet(struct url *URL, const char *flags) 106 { 107 return (fetchXGet(URL, NULL, flags)); 108 } 109 110 /* 111 * Select the appropriate protocol for the URL scheme, and return a 112 * write-only stream connected to the document referenced by the URL. 113 */ 114 fetchIO * 115 fetchPut(struct url *URL, const char *flags) 116 { 117 118 if (strcasecmp(URL->scheme, SCHEME_FILE) == 0) 119 return (fetchPutFile(URL, flags)); 120 else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) 121 return (fetchPutFTP(URL, flags)); 122 else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0) 123 return (fetchPutHTTP(URL, flags)); 124 else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0) 125 return (fetchPutHTTP(URL, flags)); 126 url_seterr(URL_BAD_SCHEME); 127 return (NULL); 128 } 129 130 /* 131 * Select the appropriate protocol for the URL scheme, and return the 132 * size of the document referenced by the URL if it exists. 133 */ 134 int 135 fetchStat(struct url *URL, struct url_stat *us, const char *flags) 136 { 137 138 if (us != NULL) { 139 us->size = -1; 140 us->atime = us->mtime = 0; 141 } 142 if (strcasecmp(URL->scheme, SCHEME_FILE) == 0) 143 return (fetchStatFile(URL, us, flags)); 144 else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) 145 return (fetchStatFTP(URL, us, flags)); 146 else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0) 147 return (fetchStatHTTP(URL, us, flags)); 148 else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0) 149 return (fetchStatHTTP(URL, us, flags)); 150 url_seterr(URL_BAD_SCHEME); 151 return (-1); 152 } 153 154 /* 155 * Select the appropriate protocol for the URL scheme, and return a 156 * list of files in the directory pointed to by the URL. 157 */ 158 int 159 fetchList(struct url_list *ue, struct url *URL, const char *pattern, 160 const char *flags) 161 { 162 163 if (strcasecmp(URL->scheme, SCHEME_FILE) == 0) 164 return (fetchListFile(ue, URL, pattern, flags)); 165 else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) 166 return (fetchListFTP(ue, URL, pattern, flags)); 167 else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0) 168 return (fetchListHTTP(ue, URL, pattern, flags)); 169 else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0) 170 return (fetchListHTTP(ue, URL, pattern, flags)); 171 url_seterr(URL_BAD_SCHEME); 172 return -1; 173 } 174 175 /* 176 * Attempt to parse the given URL; if successful, call fetchXGet(). 177 */ 178 fetchIO * 179 fetchXGetURL(const char *URL, struct url_stat *us, const char *flags) 180 { 181 struct url *u; 182 fetchIO *f; 183 184 if ((u = fetchParseURL(URL)) == NULL) 185 return (NULL); 186 187 f = fetchXGet(u, us, flags); 188 189 fetchFreeURL(u); 190 return (f); 191 } 192 193 /* 194 * Attempt to parse the given URL; if successful, call fetchGet(). 195 */ 196 fetchIO * 197 fetchGetURL(const char *URL, const char *flags) 198 { 199 return (fetchXGetURL(URL, NULL, flags)); 200 } 201 202 /* 203 * Attempt to parse the given URL; if successful, call fetchPut(). 204 */ 205 fetchIO * 206 fetchPutURL(const char *URL, const char *flags) 207 { 208 struct url *u; 209 fetchIO *f; 210 211 if ((u = fetchParseURL(URL)) == NULL) 212 return (NULL); 213 214 f = fetchPut(u, flags); 215 216 fetchFreeURL(u); 217 return (f); 218 } 219 220 /* 221 * Attempt to parse the given URL; if successful, call fetchStat(). 222 */ 223 int 224 fetchStatURL(const char *URL, struct url_stat *us, const char *flags) 225 { 226 struct url *u; 227 int s; 228 229 if ((u = fetchParseURL(URL)) == NULL) 230 return (-1); 231 232 s = fetchStat(u, us, flags); 233 234 fetchFreeURL(u); 235 return (s); 236 } 237 238 /* 239 * Attempt to parse the given URL; if successful, call fetchList(). 240 */ 241 int 242 fetchListURL(struct url_list *ue, const char *URL, const char *pattern, 243 const char *flags) 244 { 245 struct url *u; 246 int rv; 247 248 if ((u = fetchParseURL(URL)) == NULL) 249 return -1; 250 251 rv = fetchList(ue, u, pattern, flags); 252 253 fetchFreeURL(u); 254 return rv; 255 } 256 257 /* 258 * Make a URL 259 */ 260 struct url * 261 fetchMakeURL(const char *scheme, const char *host, int port, const char *doc, 262 const char *user, const char *pwd) 263 { 264 struct url *u; 265 266 if (!scheme || (!host && !doc)) { 267 url_seterr(URL_MALFORMED); 268 return (NULL); 269 } 270 271 if (port < 0 || port > 65535) { 272 url_seterr(URL_BAD_PORT); 273 return (NULL); 274 } 275 276 /* allocate struct url */ 277 if ((u = calloc(1, sizeof(*u))) == NULL) { 278 fetch_syserr(); 279 return (NULL); 280 } 281 282 if ((u->doc = strdup(doc ? doc : "/")) == NULL) { 283 fetch_syserr(); 284 free(u); 285 return (NULL); 286 } 287 288 #define seturl(x) snprintf(u->x, sizeof(u->x), "%s", x) 289 seturl(scheme); 290 seturl(host); 291 seturl(user); 292 seturl(pwd); 293 #undef seturl 294 u->port = port; 295 296 return (u); 297 } 298 299 int 300 fetch_urlpath_safe(char x) 301 { 302 if ((x >= '0' && x <= '9') || (x >= 'A' && x <= 'Z') || 303 (x >= 'a' && x <= 'z')) 304 return 1; 305 306 switch (x) { 307 case '$': 308 case '-': 309 case '_': 310 case '.': 311 case '+': 312 case '!': 313 case '*': 314 case '\'': 315 case '(': 316 case ')': 317 case ',': 318 /* The following are allowed in segment and path components: */ 319 case '?': 320 case ':': 321 case '@': 322 case '&': 323 case '=': 324 case '/': 325 case ';': 326 /* If something is already quoted... */ 327 case '%': 328 return 1; 329 default: 330 return 0; 331 } 332 } 333 334 /* 335 * Copy an existing URL. 336 */ 337 struct url * 338 fetchCopyURL(const struct url *src) 339 { 340 struct url *dst; 341 char *doc; 342 343 /* allocate struct url */ 344 if ((dst = malloc(sizeof(*dst))) == NULL) { 345 fetch_syserr(); 346 return (NULL); 347 } 348 if ((doc = strdup(src->doc)) == NULL) { 349 fetch_syserr(); 350 free(dst); 351 return (NULL); 352 } 353 *dst = *src; 354 dst->doc = doc; 355 356 return dst; 357 } 358 359 /* 360 * Split an URL into components. URL syntax is: 361 * [method:/][/[user[:pwd]@]host[:port]/][document] 362 * This almost, but not quite, RFC1738 URL syntax. 363 */ 364 struct url * 365 fetchParseURL(const char *URL) 366 { 367 const char *p, *q; 368 struct url *u; 369 size_t i, count; 370 int pre_quoted; 371 372 /* allocate struct url */ 373 if ((u = calloc(1, sizeof(*u))) == NULL) { 374 fetch_syserr(); 375 return (NULL); 376 } 377 378 if (*URL == '/') { 379 pre_quoted = 0; 380 strcpy(u->scheme, SCHEME_FILE); 381 p = URL; 382 goto quote_doc; 383 } 384 if (strncmp(URL, "file:", 5) == 0) { 385 pre_quoted = 1; 386 strcpy(u->scheme, SCHEME_FILE); 387 URL += 5; 388 if (URL[0] != '/' || URL[1] != '/' || URL[2] != '/') { 389 url_seterr(URL_MALFORMED); 390 goto ouch; 391 } 392 p = URL + 2; 393 goto quote_doc; 394 } 395 if (strncmp(URL, "http:", 5) == 0 || 396 strncmp(URL, "https:", 6) == 0) { 397 pre_quoted = 1; 398 if (URL[4] == ':') { 399 strcpy(u->scheme, SCHEME_HTTP); 400 URL += 5; 401 } else { 402 strcpy(u->scheme, SCHEME_HTTPS); 403 URL += 6; 404 } 405 406 if (URL[0] != '/' || URL[1] != '/') { 407 url_seterr(URL_MALFORMED); 408 goto ouch; 409 } 410 URL += 2; 411 p = URL; 412 goto find_user; 413 } 414 if (strncmp(URL, "ftp:", 4) == 0) { 415 pre_quoted = 1; 416 strcpy(u->scheme, SCHEME_FTP); 417 URL += 4; 418 if (URL[0] != '/' || URL[1] != '/') { 419 url_seterr(URL_MALFORMED); 420 goto ouch; 421 } 422 URL += 2; 423 p = URL; 424 goto find_user; 425 } 426 427 url_seterr(URL_BAD_SCHEME); 428 goto ouch; 429 430 find_user: 431 p = strpbrk(URL, "/@"); 432 if (p != NULL && *p == '@') { 433 /* username */ 434 for (q = URL, i = 0; (*q != ':') && (*q != '@'); q++) { 435 if (i < URL_USERLEN) 436 u->user[i++] = *q; 437 } 438 439 /* password */ 440 if (*q == ':') { 441 for (q++, i = 0; (*q != '@'); q++) 442 if (i < URL_PWDLEN) 443 u->pwd[i++] = *q; 444 } 445 446 p++; 447 } else { 448 p = URL; 449 } 450 451 /* hostname */ 452 #ifdef INET6 453 if (*p == '[' && (q = strchr(p + 1, ']')) != NULL && 454 (*++q == '\0' || *q == '/' || *q == ':')) { 455 if ((i = q - p - 2) > URL_HOSTLEN) 456 i = URL_HOSTLEN; 457 strncpy(u->host, ++p, i); 458 p = q; 459 } else 460 #endif 461 for (i = 0; *p && (*p != '/') && (*p != ':'); p++) 462 if (i < URL_HOSTLEN) 463 u->host[i++] = *p; 464 465 /* port */ 466 if (*p == ':') { 467 for (q = ++p; *q && (*q != '/'); q++) 468 if (isdigit((unsigned char)*q)) 469 u->port = u->port * 10 + (*q - '0'); 470 else { 471 /* invalid port */ 472 url_seterr(URL_BAD_PORT); 473 goto ouch; 474 } 475 p = q; 476 } 477 478 /* document */ 479 if (!*p) 480 p = "/"; 481 482 quote_doc: 483 count = 1; 484 for (i = 0; p[i] != '\0'; ++i) { 485 if ((!pre_quoted && p[i] == '%') || 486 !fetch_urlpath_safe(p[i])) 487 count += 3; 488 else 489 ++count; 490 } 491 492 if ((u->doc = malloc(count)) == NULL) { 493 fetch_syserr(); 494 goto ouch; 495 } 496 for (i = 0; *p != '\0'; ++p) { 497 if ((!pre_quoted && *p == '%') || 498 !fetch_urlpath_safe(*p)) { 499 u->doc[i++] = '%'; 500 if ((unsigned char)*p < 160) 501 u->doc[i++] = '0' + ((unsigned char)*p) / 16; 502 else 503 u->doc[i++] = 'a' - 10 + ((unsigned char)*p) / 16; 504 if ((unsigned char)*p % 16 < 10) 505 u->doc[i++] = '0' + ((unsigned char)*p) % 16; 506 else 507 u->doc[i++] = 'a' - 10 + ((unsigned char)*p) % 16; 508 } else 509 u->doc[i++] = *p; 510 } 511 u->doc[i] = '\0'; 512 513 return (u); 514 515 ouch: 516 free(u); 517 return (NULL); 518 } 519 520 /* 521 * Free a URL 522 */ 523 void 524 fetchFreeURL(struct url *u) 525 { 526 free(u->doc); 527 free(u); 528 } 529 530 static char 531 xdigit2digit(char digit) 532 { 533 digit = tolower((unsigned char)digit); 534 if (digit >= 'a' && digit <= 'f') 535 digit = digit - 'a' + 10; 536 else 537 digit = digit - '0'; 538 539 return digit; 540 } 541 542 /* 543 * Unquote whole URL. 544 * Skips optional parts like query or fragment identifier. 545 */ 546 char * 547 fetchUnquotePath(struct url *url) 548 { 549 char *unquoted; 550 const char *iter; 551 size_t i; 552 553 if ((unquoted = malloc(strlen(url->doc) + 1)) == NULL) 554 return NULL; 555 556 for (i = 0, iter = url->doc; *iter != '\0'; ++iter) { 557 if (*iter == '#' || *iter == '?') 558 break; 559 if (iter[0] != '%' || 560 !isxdigit((unsigned char)iter[1]) || 561 !isxdigit((unsigned char)iter[2])) { 562 unquoted[i++] = *iter; 563 continue; 564 } 565 unquoted[i++] = xdigit2digit(iter[1]) * 16 + 566 xdigit2digit(iter[2]); 567 iter += 2; 568 } 569 unquoted[i] = '\0'; 570 return unquoted; 571 } 572 573 574 /* 575 * Extract the file name component of a URL. 576 */ 577 char * 578 fetchUnquoteFilename(struct url *url) 579 { 580 char *unquoted, *filename; 581 const char *last_slash; 582 583 if ((unquoted = fetchUnquotePath(url)) == NULL) 584 return NULL; 585 586 if ((last_slash = strrchr(unquoted, '/')) == NULL) 587 return unquoted; 588 filename = strdup(last_slash + 1); 589 free(unquoted); 590 return filename; 591 } 592 593 char * 594 fetchStringifyURL(const struct url *url) 595 { 596 size_t total; 597 char *doc; 598 599 /* scheme :// user : pwd @ host :port doc */ 600 total = strlen(url->scheme) + 3 + strlen(url->user) + 1 + 601 strlen(url->pwd) + 1 + strlen(url->host) + 6 + strlen(url->doc) + 1; 602 if ((doc = malloc(total)) == NULL) 603 return NULL; 604 if (url->port != 0) 605 snprintf(doc, total, "%s%s%s%s%s%s%s:%d%s", 606 url->scheme, 607 url->scheme[0] != '\0' ? "://" : "", 608 url->user, 609 url->pwd[0] != '\0' ? ":" : "", 610 url->pwd, 611 url->user[0] != '\0' || url->pwd[0] != '\0' ? "@" : "", 612 url->host, 613 (int)url->port, 614 url->doc); 615 else { 616 snprintf(doc, total, "%s%s%s%s%s%s%s%s", 617 url->scheme, 618 url->scheme[0] != '\0' ? "://" : "", 619 url->user, 620 url->pwd[0] != '\0' ? ":" : "", 621 url->pwd, 622 url->user[0] != '\0' || url->pwd[0] != '\0' ? "@" : "", 623 url->host, 624 url->doc); 625 } 626 return doc; 627 } 628