1 /* $Id: http.c,v 1.15 2021/09/14 16:37:20 tb Exp $ */ 2 /* 3 * Copyright (c) 2016 Kristaps Dzonsons <kristaps@bsd.lv> 4 * 5 * Permission to use, copy, modify, and distribute this software for any 6 * purpose with or without fee is hereby granted, provided that the above 7 * copyright notice and this permission notice appear in all copies. 8 * 9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES 10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 */ 17 18 #include <sys/types.h> 19 #include <sys/socket.h> 20 21 #include <arpa/inet.h> 22 #include <netinet/in.h> 23 24 #include <ctype.h> 25 #include <err.h> 26 #include <limits.h> 27 #include <netdb.h> 28 #include <stdio.h> 29 #include <stdint.h> 30 #include <stdlib.h> 31 #include <string.h> 32 #include <tls.h> 33 #include <unistd.h> 34 35 #include "http.h" 36 #include <tls.h> 37 38 /* 39 * A buffer for transferring HTTP/S data. 40 */ 41 struct httpxfer { 42 char *hbuf; /* header transfer buffer */ 43 size_t hbufsz; /* header buffer size */ 44 int headok; /* header has been parsed */ 45 char *bbuf; /* body transfer buffer */ 46 size_t bbufsz; /* body buffer size */ 47 int bodyok; /* body has been parsed */ 48 char *headbuf; /* lookaside buffer for headers */ 49 struct httphead *head; /* parsed headers */ 50 size_t headsz; /* number of headers */ 51 }; 52 53 /* 54 * An HTTP/S connection object. 55 */ 56 struct http { 57 int fd; /* connected socket */ 58 short port; /* port number */ 59 struct source src; /* endpoint (raw) host */ 60 char *path; /* path to request */ 61 char *host; /* name of endpoint host */ 62 struct tls *ctx; /* if TLS */ 63 writefp writer; /* write function */ 64 readfp reader; /* read function */ 65 }; 66 67 struct tls_config *tlscfg; 68 69 static ssize_t 70 dosysread(char *buf, size_t sz, const struct http *http) 71 { 72 ssize_t rc; 73 74 rc = read(http->fd, buf, sz); 75 if (rc == -1) 76 warn("%s: read", http->src.ip); 77 return rc; 78 } 79 80 static ssize_t 81 dosyswrite(const void *buf, size_t sz, const struct http *http) 82 { 83 ssize_t rc; 84 85 rc = write(http->fd, buf, sz); 86 if (rc == -1) 87 warn("%s: write", http->src.ip); 88 return rc; 89 } 90 91 static ssize_t 92 dotlsread(char *buf, size_t sz, const struct http *http) 93 { 94 ssize_t rc; 95 96 do { 97 rc = tls_read(http->ctx, buf, sz); 98 } while (rc == TLS_WANT_POLLIN || rc == TLS_WANT_POLLOUT); 99 100 if (rc == -1) 101 warnx("%s: tls_read: %s", http->src.ip, 102 tls_error(http->ctx)); 103 return rc; 104 } 105 106 static ssize_t 107 dotlswrite(const void *buf, size_t sz, const struct http *http) 108 { 109 ssize_t rc; 110 111 do { 112 rc = tls_write(http->ctx, buf, sz); 113 } while (rc == TLS_WANT_POLLIN || rc == TLS_WANT_POLLOUT); 114 115 if (rc == -1) 116 warnx("%s: tls_write: %s", http->src.ip, 117 tls_error(http->ctx)); 118 return rc; 119 } 120 121 int 122 http_init(void) 123 { 124 if (tlscfg != NULL) 125 return 0; 126 127 tlscfg = tls_config_new(); 128 if (tlscfg == NULL) { 129 warn("tls_config_new"); 130 goto err; 131 } 132 133 if (tls_config_set_ca_file(tlscfg, tls_default_ca_cert_file()) == -1) { 134 warn("tls_config_set_ca_file: %s", tls_config_error(tlscfg)); 135 goto err; 136 } 137 138 return 0; 139 140 err: 141 tls_config_free(tlscfg); 142 tlscfg = NULL; 143 144 return -1; 145 } 146 147 static ssize_t 148 http_read(char *buf, size_t sz, const struct http *http) 149 { 150 ssize_t ssz, xfer; 151 152 xfer = 0; 153 do { 154 if ((ssz = http->reader(buf, sz, http)) < 0) 155 return -1; 156 if (ssz == 0) 157 break; 158 xfer += ssz; 159 sz -= ssz; 160 buf += ssz; 161 } while (ssz > 0 && sz > 0); 162 163 return xfer; 164 } 165 166 static int 167 http_write(const char *buf, size_t sz, const struct http *http) 168 { 169 ssize_t ssz, xfer; 170 171 xfer = sz; 172 while (sz > 0) { 173 if ((ssz = http->writer(buf, sz, http)) < 0) 174 return -1; 175 sz -= ssz; 176 buf += (size_t)ssz; 177 } 178 return xfer; 179 } 180 181 void 182 http_disconnect(struct http *http) 183 { 184 int rc; 185 186 if (http->ctx != NULL) { 187 /* TLS connection. */ 188 do { 189 rc = tls_close(http->ctx); 190 } while (rc == TLS_WANT_POLLIN || rc == TLS_WANT_POLLOUT); 191 192 if (rc < 0) 193 warnx("%s: tls_close: %s", http->src.ip, 194 tls_error(http->ctx)); 195 196 tls_free(http->ctx); 197 } 198 if (http->fd != -1) { 199 if (close(http->fd) == -1) 200 warn("%s: close", http->src.ip); 201 } 202 203 http->fd = -1; 204 http->ctx = NULL; 205 } 206 207 void 208 http_free(struct http *http) 209 { 210 211 if (http == NULL) 212 return; 213 http_disconnect(http); 214 free(http->host); 215 free(http->path); 216 free(http->src.ip); 217 free(http); 218 } 219 220 struct http * 221 http_alloc(const struct source *addrs, size_t addrsz, 222 const char *host, short port, const char *path) 223 { 224 struct sockaddr_storage ss; 225 int family, fd, c; 226 socklen_t len; 227 size_t cur, i = 0; 228 struct http *http; 229 230 /* Do this while we still have addresses to connect. */ 231 again: 232 if (i == addrsz) 233 return NULL; 234 cur = i++; 235 236 /* Convert to PF_INET or PF_INET6 address from string. */ 237 238 memset(&ss, 0, sizeof(struct sockaddr_storage)); 239 240 if (addrs[cur].family == 4) { 241 family = PF_INET; 242 ((struct sockaddr_in *)&ss)->sin_family = AF_INET; 243 ((struct sockaddr_in *)&ss)->sin_port = htons(port); 244 c = inet_pton(AF_INET, addrs[cur].ip, 245 &((struct sockaddr_in *)&ss)->sin_addr); 246 len = sizeof(struct sockaddr_in); 247 } else if (addrs[cur].family == 6) { 248 family = PF_INET6; 249 ((struct sockaddr_in6 *)&ss)->sin6_family = AF_INET6; 250 ((struct sockaddr_in6 *)&ss)->sin6_port = htons(port); 251 c = inet_pton(AF_INET6, addrs[cur].ip, 252 &((struct sockaddr_in6 *)&ss)->sin6_addr); 253 len = sizeof(struct sockaddr_in6); 254 } else { 255 warnx("%s: unknown family", addrs[cur].ip); 256 goto again; 257 } 258 259 if (c < 0) { 260 warn("%s: inet_ntop", addrs[cur].ip); 261 goto again; 262 } else if (c == 0) { 263 warnx("%s: inet_ntop", addrs[cur].ip); 264 goto again; 265 } 266 267 /* Create socket and connect. */ 268 269 fd = socket(family, SOCK_STREAM, 0); 270 if (fd == -1) { 271 warn("%s: socket", addrs[cur].ip); 272 goto again; 273 } else if (connect(fd, (struct sockaddr *)&ss, len) == -1) { 274 warn("%s: connect", addrs[cur].ip); 275 close(fd); 276 goto again; 277 } 278 279 /* Allocate the communicator. */ 280 281 http = calloc(1, sizeof(struct http)); 282 if (http == NULL) { 283 warn("calloc"); 284 close(fd); 285 return NULL; 286 } 287 http->fd = fd; 288 http->port = port; 289 http->src.family = addrs[cur].family; 290 http->src.ip = strdup(addrs[cur].ip); 291 http->host = strdup(host); 292 http->path = strdup(path); 293 if (http->src.ip == NULL || http->host == NULL || http->path == NULL) { 294 warn("strdup"); 295 goto err; 296 } 297 298 /* If necessary, do our TLS setup. */ 299 300 if (port != 443) { 301 http->writer = dosyswrite; 302 http->reader = dosysread; 303 return http; 304 } 305 306 http->writer = dotlswrite; 307 http->reader = dotlsread; 308 309 if ((http->ctx = tls_client()) == NULL) { 310 warn("tls_client"); 311 goto err; 312 } else if (tls_configure(http->ctx, tlscfg) == -1) { 313 warnx("%s: tls_configure: %s", 314 http->src.ip, tls_error(http->ctx)); 315 goto err; 316 } 317 318 if (tls_connect_socket(http->ctx, http->fd, http->host) != 0) { 319 warnx("%s: tls_connect_socket: %s, %s", http->src.ip, 320 http->host, tls_error(http->ctx)); 321 goto err; 322 } 323 324 return http; 325 err: 326 http_free(http); 327 return NULL; 328 } 329 330 struct httpxfer * 331 http_open(const struct http *http, const void *p, size_t psz) 332 { 333 char *req; 334 int c; 335 struct httpxfer *trans; 336 337 if (p == NULL) { 338 c = asprintf(&req, 339 "GET %s HTTP/1.0\r\n" 340 "Host: %s\r\n" 341 "\r\n", 342 http->path, http->host); 343 } else { 344 c = asprintf(&req, 345 "POST %s HTTP/1.0\r\n" 346 "Host: %s\r\n" 347 "Content-Type: application/ocsp-request\r\n" 348 "Content-Length: %zu\r\n" 349 "\r\n", 350 http->path, http->host, psz); 351 } 352 if (c == -1) { 353 warn("asprintf"); 354 return NULL; 355 } else if (!http_write(req, c, http)) { 356 free(req); 357 return NULL; 358 } else if (p != NULL && !http_write(p, psz, http)) { 359 free(req); 360 return NULL; 361 } 362 363 free(req); 364 365 trans = calloc(1, sizeof(struct httpxfer)); 366 if (trans == NULL) 367 warn("calloc"); 368 return trans; 369 } 370 371 void 372 http_close(struct httpxfer *x) 373 { 374 375 if (x == NULL) 376 return; 377 free(x->hbuf); 378 free(x->bbuf); 379 free(x->headbuf); 380 free(x->head); 381 free(x); 382 } 383 384 /* 385 * Read the HTTP body from the wire. 386 * If invoked multiple times, this will return the same pointer with the 387 * same data (or NULL, if the original invocation returned NULL). 388 * Returns NULL if read or allocation errors occur. 389 * You must not free the returned pointer. 390 */ 391 char * 392 http_body_read(const struct http *http, struct httpxfer *trans, size_t *sz) 393 { 394 char buf[BUFSIZ]; 395 ssize_t ssz; 396 void *pp; 397 size_t szp; 398 399 if (sz == NULL) 400 sz = &szp; 401 402 /* Have we already parsed this? */ 403 404 if (trans->bodyok > 0) { 405 *sz = trans->bbufsz; 406 return trans->bbuf; 407 } else if (trans->bodyok < 0) 408 return NULL; 409 410 *sz = 0; 411 trans->bodyok = -1; 412 413 do { 414 /* If less than sizeof(buf), at EOF. */ 415 if ((ssz = http_read(buf, sizeof(buf), http)) < 0) 416 return NULL; 417 else if (ssz == 0) 418 break; 419 420 pp = recallocarray(trans->bbuf, 421 trans->bbufsz, trans->bbufsz + ssz, 1); 422 if (pp == NULL) { 423 warn("recallocarray"); 424 return NULL; 425 } 426 trans->bbuf = pp; 427 memcpy(trans->bbuf + trans->bbufsz, buf, ssz); 428 trans->bbufsz += ssz; 429 } while (ssz == sizeof(buf)); 430 431 trans->bodyok = 1; 432 *sz = trans->bbufsz; 433 return trans->bbuf; 434 } 435 436 struct httphead * 437 http_head_get(const char *v, struct httphead *h, size_t hsz) 438 { 439 size_t i; 440 441 for (i = 0; i < hsz; i++) { 442 if (strcmp(h[i].key, v)) 443 continue; 444 return &h[i]; 445 } 446 return NULL; 447 } 448 449 /* 450 * Look through the headers and determine our HTTP code. 451 * This will return -1 on failure, otherwise the code. 452 */ 453 int 454 http_head_status(const struct http *http, struct httphead *h, size_t sz) 455 { 456 int rc; 457 unsigned int code; 458 struct httphead *st; 459 460 if ((st = http_head_get("Status", h, sz)) == NULL) { 461 warnx("%s: no status header", http->src.ip); 462 return -1; 463 } 464 465 rc = sscanf(st->val, "%*s %u %*s", &code); 466 if (rc < 0) { 467 warn("sscanf"); 468 return -1; 469 } else if (rc != 1) { 470 warnx("%s: cannot convert status header", http->src.ip); 471 return -1; 472 } 473 return code; 474 } 475 476 /* 477 * Parse headers from the transfer. 478 * Malformed headers are skipped. 479 * A special "Status" header is added for the HTTP status line. 480 * This can only happen once http_head_read has been called with 481 * success. 482 * This can be invoked multiple times: it will only parse the headers 483 * once and after that it will just return the cache. 484 * You must not free the returned pointer. 485 * If the original header parse failed, or if memory allocation fails 486 * internally, this returns NULL. 487 */ 488 struct httphead * 489 http_head_parse(const struct http *http, struct httpxfer *trans, size_t *sz) 490 { 491 size_t hsz, szp; 492 struct httphead *h; 493 char *cp, *ep, *ccp, *buf; 494 495 if (sz == NULL) 496 sz = &szp; 497 498 /* 499 * If we've already parsed the headers, return the 500 * previously-parsed buffer now. 501 * If we have errors on the stream, return NULL now. 502 */ 503 504 if (trans->head != NULL) { 505 *sz = trans->headsz; 506 return trans->head; 507 } else if (trans->headok <= 0) 508 return NULL; 509 510 if ((buf = strdup(trans->hbuf)) == NULL) { 511 warn("strdup"); 512 return NULL; 513 } 514 hsz = 0; 515 cp = buf; 516 517 do { 518 if ((cp = strstr(cp, "\r\n")) != NULL) 519 cp += 2; 520 hsz++; 521 } while (cp != NULL); 522 523 /* 524 * Allocate headers, then step through the data buffer, parsing 525 * out headers as we have them. 526 * We know at this point that the buffer is NUL-terminated in 527 * the usual way. 528 */ 529 530 h = calloc(hsz, sizeof(struct httphead)); 531 if (h == NULL) { 532 warn("calloc"); 533 free(buf); 534 return NULL; 535 } 536 537 *sz = hsz; 538 hsz = 0; 539 cp = buf; 540 541 do { 542 if ((ep = strstr(cp, "\r\n")) != NULL) { 543 *ep = '\0'; 544 ep += 2; 545 } 546 if (hsz == 0) { 547 h[hsz].key = "Status"; 548 h[hsz++].val = cp; 549 continue; 550 } 551 552 /* Skip bad headers. */ 553 if ((ccp = strchr(cp, ':')) == NULL) { 554 warnx("%s: header without separator", http->src.ip); 555 continue; 556 } 557 558 *ccp++ = '\0'; 559 while (isspace((int)*ccp)) 560 ccp++; 561 h[hsz].key = cp; 562 h[hsz++].val = ccp; 563 } while ((cp = ep) != NULL); 564 565 trans->headbuf = buf; 566 trans->head = h; 567 trans->headsz = hsz; 568 return h; 569 } 570 571 /* 572 * Read the HTTP headers from the wire. 573 * If invoked multiple times, this will return the same pointer with the 574 * same data (or NULL, if the original invocation returned NULL). 575 * Returns NULL if read or allocation errors occur. 576 * You must not free the returned pointer. 577 */ 578 char * 579 http_head_read(const struct http *http, struct httpxfer *trans, size_t *sz) 580 { 581 char buf[BUFSIZ]; 582 ssize_t ssz; 583 char *ep; 584 void *pp; 585 size_t szp; 586 587 if (sz == NULL) 588 sz = &szp; 589 590 /* Have we already parsed this? */ 591 592 if (trans->headok > 0) { 593 *sz = trans->hbufsz; 594 return trans->hbuf; 595 } else if (trans->headok < 0) 596 return NULL; 597 598 *sz = 0; 599 ep = NULL; 600 trans->headok = -1; 601 602 /* 603 * Begin by reading by BUFSIZ blocks until we reach the header 604 * termination marker (two CRLFs). 605 * We might read into our body, but that's ok: we'll copy out 606 * the body parts into our body buffer afterward. 607 */ 608 609 do { 610 /* If less than sizeof(buf), at EOF. */ 611 if ((ssz = http_read(buf, sizeof(buf), http)) < 0) 612 return NULL; 613 else if (ssz == 0) 614 break; 615 pp = realloc(trans->hbuf, trans->hbufsz + ssz); 616 if (pp == NULL) { 617 warn("realloc"); 618 return NULL; 619 } 620 trans->hbuf = pp; 621 memcpy(trans->hbuf + trans->hbufsz, buf, ssz); 622 trans->hbufsz += ssz; 623 /* Search for end of headers marker. */ 624 ep = memmem(trans->hbuf, trans->hbufsz, "\r\n\r\n", 4); 625 } while (ep == NULL && ssz == sizeof(buf)); 626 627 if (ep == NULL) { 628 warnx("%s: partial transfer", http->src.ip); 629 return NULL; 630 } 631 *ep = '\0'; 632 633 /* 634 * The header data is invalid if it has any binary characters in 635 * it: check that now. 636 * This is important because we want to guarantee that all 637 * header keys and pairs are properly NUL-terminated. 638 */ 639 640 if (strlen(trans->hbuf) != (uintptr_t)(ep - trans->hbuf)) { 641 warnx("%s: binary data in header", http->src.ip); 642 return NULL; 643 } 644 645 /* 646 * Copy remaining buffer into body buffer. 647 */ 648 649 ep += 4; 650 trans->bbufsz = (trans->hbuf + trans->hbufsz) - ep; 651 trans->bbuf = malloc(trans->bbufsz); 652 if (trans->bbuf == NULL) { 653 warn("malloc"); 654 return NULL; 655 } 656 memcpy(trans->bbuf, ep, trans->bbufsz); 657 658 trans->headok = 1; 659 *sz = trans->hbufsz; 660 return trans->hbuf; 661 } 662 663 void 664 http_get_free(struct httpget *g) 665 { 666 667 if (g == NULL) 668 return; 669 http_close(g->xfer); 670 http_free(g->http); 671 free(g); 672 } 673 674 struct httpget * 675 http_get(const struct source *addrs, size_t addrsz, const char *domain, 676 short port, const char *path, const void *post, size_t postsz) 677 { 678 struct http *h; 679 struct httpxfer *x; 680 struct httpget *g; 681 struct httphead *head; 682 size_t headsz, bodsz, headrsz; 683 int code; 684 char *bod, *headr; 685 686 h = http_alloc(addrs, addrsz, domain, port, path); 687 if (h == NULL) 688 return NULL; 689 690 if ((x = http_open(h, post, postsz)) == NULL) { 691 http_free(h); 692 return NULL; 693 } else if ((headr = http_head_read(h, x, &headrsz)) == NULL) { 694 http_close(x); 695 http_free(h); 696 return NULL; 697 } else if ((bod = http_body_read(h, x, &bodsz)) == NULL) { 698 http_close(x); 699 http_free(h); 700 return NULL; 701 } 702 703 http_disconnect(h); 704 705 if ((head = http_head_parse(h, x, &headsz)) == NULL) { 706 http_close(x); 707 http_free(h); 708 return NULL; 709 } else if ((code = http_head_status(h, head, headsz)) < 0) { 710 http_close(x); 711 http_free(h); 712 return NULL; 713 } 714 715 if ((g = calloc(1, sizeof(struct httpget))) == NULL) { 716 warn("calloc"); 717 http_close(x); 718 http_free(h); 719 return NULL; 720 } 721 722 g->headpart = headr; 723 g->headpartsz = headrsz; 724 g->bodypart = bod; 725 g->bodypartsz = bodsz; 726 g->head = head; 727 g->headsz = headsz; 728 g->code = code; 729 g->xfer = x; 730 g->http = h; 731 return g; 732 } 733 734 #if 0 735 int 736 main(void) 737 { 738 struct httpget *g; 739 struct httphead *httph; 740 size_t i, httphsz; 741 struct source addrs[2]; 742 size_t addrsz; 743 744 #if 0 745 addrs[0].ip = "127.0.0.1"; 746 addrs[0].family = 4; 747 addrsz = 1; 748 #else 749 addrs[0].ip = "2a00:1450:400a:806::2004"; 750 addrs[0].family = 6; 751 addrs[1].ip = "193.135.3.123"; 752 addrs[1].family = 4; 753 addrsz = 2; 754 #endif 755 756 if (http_init() == -1) 757 errx(EXIT_FAILURE, "http_init"); 758 759 #if 0 760 g = http_get(addrs, addrsz, "localhost", 80, "/index.html"); 761 #else 762 g = http_get(addrs, addrsz, "www.google.ch", 80, "/index.html", 763 NULL, 0); 764 #endif 765 766 if (g == NULL) 767 errx(EXIT_FAILURE, "http_get"); 768 769 httph = http_head_parse(g->http, g->xfer, &httphsz); 770 warnx("code: %d", g->code); 771 772 for (i = 0; i < httphsz; i++) 773 warnx("head: [%s]=[%s]", httph[i].key, httph[i].val); 774 775 http_get_free(g); 776 return (EXIT_SUCCESS); 777 } 778 #endif 779