1 /* $Id: http.c,v 1.17 2023/04/19 12:58:16 jsg Exp $ */ 2 /* 3 * Copyright (c) 2016 Kristaps Dzonsons <kristaps@bsd.lv> 4 * 5 * Permission to use, copy, modify, and distribute this software for any 6 * purpose with or without fee is hereby granted, provided that the above 7 * copyright notice and this permission notice appear in all copies. 8 * 9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES 10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 */ 17 18 #include <sys/types.h> 19 #include <sys/socket.h> 20 21 #include <arpa/inet.h> 22 #include <netinet/in.h> 23 24 #include <ctype.h> 25 #include <err.h> 26 #include <limits.h> 27 #include <netdb.h> 28 #include <stdio.h> 29 #include <stdint.h> 30 #include <stdlib.h> 31 #include <string.h> 32 #include <tls.h> 33 #include <unistd.h> 34 35 #include "http.h" 36 37 /* 38 * A buffer for transferring HTTP/S data. 39 */ 40 struct httpxfer { 41 char *hbuf; /* header transfer buffer */ 42 size_t hbufsz; /* header buffer size */ 43 int headok; /* header has been parsed */ 44 char *bbuf; /* body transfer buffer */ 45 size_t bbufsz; /* body buffer size */ 46 int bodyok; /* body has been parsed */ 47 char *headbuf; /* lookaside buffer for headers */ 48 struct httphead *head; /* parsed headers */ 49 size_t headsz; /* number of headers */ 50 }; 51 52 /* 53 * An HTTP/S connection object. 54 */ 55 struct http { 56 int fd; /* connected socket */ 57 short port; /* port number */ 58 struct source src; /* endpoint (raw) host */ 59 char *path; /* path to request */ 60 char *host; /* name of endpoint host */ 61 struct tls *ctx; /* if TLS */ 62 writefp writer; /* write function */ 63 readfp reader; /* read function */ 64 }; 65 66 struct tls_config *tlscfg; 67 68 static ssize_t 69 dosysread(char *buf, size_t sz, const struct http *http) 70 { 71 ssize_t rc; 72 73 rc = read(http->fd, buf, sz); 74 if (rc == -1) 75 warn("%s: read", http->src.ip); 76 return rc; 77 } 78 79 static ssize_t 80 dosyswrite(const void *buf, size_t sz, const struct http *http) 81 { 82 ssize_t rc; 83 84 rc = write(http->fd, buf, sz); 85 if (rc == -1) 86 warn("%s: write", http->src.ip); 87 return rc; 88 } 89 90 static ssize_t 91 dotlsread(char *buf, size_t sz, const struct http *http) 92 { 93 ssize_t rc; 94 95 do { 96 rc = tls_read(http->ctx, buf, sz); 97 } while (rc == TLS_WANT_POLLIN || rc == TLS_WANT_POLLOUT); 98 99 if (rc == -1) 100 warnx("%s: tls_read: %s", http->src.ip, 101 tls_error(http->ctx)); 102 return rc; 103 } 104 105 static ssize_t 106 dotlswrite(const void *buf, size_t sz, const struct http *http) 107 { 108 ssize_t rc; 109 110 do { 111 rc = tls_write(http->ctx, buf, sz); 112 } while (rc == TLS_WANT_POLLIN || rc == TLS_WANT_POLLOUT); 113 114 if (rc == -1) 115 warnx("%s: tls_write: %s", http->src.ip, 116 tls_error(http->ctx)); 117 return rc; 118 } 119 120 int 121 http_init(void) 122 { 123 if (tlscfg != NULL) 124 return 0; 125 126 tlscfg = tls_config_new(); 127 if (tlscfg == NULL) { 128 warn("tls_config_new"); 129 goto err; 130 } 131 132 if (tls_config_set_ca_file(tlscfg, tls_default_ca_cert_file()) == -1) { 133 warn("tls_config_set_ca_file: %s", tls_config_error(tlscfg)); 134 goto err; 135 } 136 137 return 0; 138 139 err: 140 tls_config_free(tlscfg); 141 tlscfg = NULL; 142 143 return -1; 144 } 145 146 static ssize_t 147 http_read(char *buf, size_t sz, const struct http *http) 148 { 149 ssize_t ssz, xfer; 150 151 xfer = 0; 152 do { 153 if ((ssz = http->reader(buf, sz, http)) < 0) 154 return -1; 155 if (ssz == 0) 156 break; 157 xfer += ssz; 158 sz -= ssz; 159 buf += ssz; 160 } while (ssz > 0 && sz > 0); 161 162 return xfer; 163 } 164 165 static int 166 http_write(const char *buf, size_t sz, const struct http *http) 167 { 168 ssize_t ssz, xfer; 169 170 xfer = sz; 171 while (sz > 0) { 172 if ((ssz = http->writer(buf, sz, http)) < 0) 173 return -1; 174 sz -= ssz; 175 buf += (size_t)ssz; 176 } 177 return xfer; 178 } 179 180 void 181 http_disconnect(struct http *http) 182 { 183 int rc; 184 185 if (http->ctx != NULL) { 186 /* TLS connection. */ 187 do { 188 rc = tls_close(http->ctx); 189 } while (rc == TLS_WANT_POLLIN || rc == TLS_WANT_POLLOUT); 190 191 if (rc < 0) 192 warnx("%s: tls_close: %s", http->src.ip, 193 tls_error(http->ctx)); 194 195 tls_free(http->ctx); 196 } 197 if (http->fd != -1) { 198 if (close(http->fd) == -1) 199 warn("%s: close", http->src.ip); 200 } 201 202 http->fd = -1; 203 http->ctx = NULL; 204 } 205 206 void 207 http_free(struct http *http) 208 { 209 210 if (http == NULL) 211 return; 212 http_disconnect(http); 213 free(http->host); 214 free(http->path); 215 free(http->src.ip); 216 free(http); 217 } 218 219 struct http * 220 http_alloc(const struct source *addrs, size_t addrsz, 221 const char *host, short port, const char *path) 222 { 223 struct sockaddr_storage ss; 224 int family, fd, c; 225 socklen_t len; 226 size_t cur, i = 0; 227 struct http *http; 228 229 /* Do this while we still have addresses to connect. */ 230 again: 231 if (i == addrsz) 232 return NULL; 233 cur = i++; 234 235 /* Convert to PF_INET or PF_INET6 address from string. */ 236 237 memset(&ss, 0, sizeof(struct sockaddr_storage)); 238 239 if (addrs[cur].family == 4) { 240 family = PF_INET; 241 ((struct sockaddr_in *)&ss)->sin_family = AF_INET; 242 ((struct sockaddr_in *)&ss)->sin_port = htons(port); 243 c = inet_pton(AF_INET, addrs[cur].ip, 244 &((struct sockaddr_in *)&ss)->sin_addr); 245 len = sizeof(struct sockaddr_in); 246 } else if (addrs[cur].family == 6) { 247 family = PF_INET6; 248 ((struct sockaddr_in6 *)&ss)->sin6_family = AF_INET6; 249 ((struct sockaddr_in6 *)&ss)->sin6_port = htons(port); 250 c = inet_pton(AF_INET6, addrs[cur].ip, 251 &((struct sockaddr_in6 *)&ss)->sin6_addr); 252 len = sizeof(struct sockaddr_in6); 253 } else { 254 warnx("%s: unknown family", addrs[cur].ip); 255 goto again; 256 } 257 258 if (c < 0) { 259 warn("%s: inet_ntop", addrs[cur].ip); 260 goto again; 261 } else if (c == 0) { 262 warnx("%s: inet_ntop", addrs[cur].ip); 263 goto again; 264 } 265 266 /* Create socket and connect. */ 267 268 fd = socket(family, SOCK_STREAM, 0); 269 if (fd == -1) { 270 warn("%s: socket", addrs[cur].ip); 271 goto again; 272 } else if (connect(fd, (struct sockaddr *)&ss, len) == -1) { 273 warn("%s: connect", addrs[cur].ip); 274 close(fd); 275 goto again; 276 } 277 278 /* Allocate the communicator. */ 279 280 http = calloc(1, sizeof(struct http)); 281 if (http == NULL) { 282 warn("calloc"); 283 close(fd); 284 return NULL; 285 } 286 http->fd = fd; 287 http->port = port; 288 http->src.family = addrs[cur].family; 289 http->src.ip = strdup(addrs[cur].ip); 290 http->host = strdup(host); 291 http->path = strdup(path); 292 if (http->src.ip == NULL || http->host == NULL || http->path == NULL) { 293 warn("strdup"); 294 goto err; 295 } 296 297 /* If necessary, do our TLS setup. */ 298 299 if (port != 443) { 300 http->writer = dosyswrite; 301 http->reader = dosysread; 302 return http; 303 } 304 305 http->writer = dotlswrite; 306 http->reader = dotlsread; 307 308 if ((http->ctx = tls_client()) == NULL) { 309 warn("tls_client"); 310 goto err; 311 } else if (tls_configure(http->ctx, tlscfg) == -1) { 312 warnx("%s: tls_configure: %s", 313 http->src.ip, tls_error(http->ctx)); 314 goto err; 315 } 316 317 if (tls_connect_socket(http->ctx, http->fd, http->host) != 0) { 318 warnx("%s: tls_connect_socket: %s, %s", http->src.ip, 319 http->host, tls_error(http->ctx)); 320 goto err; 321 } 322 323 return http; 324 err: 325 http_free(http); 326 return NULL; 327 } 328 329 struct httpxfer * 330 http_open(const struct http *http, const void *p, size_t psz) 331 { 332 char *req; 333 int c; 334 struct httpxfer *trans; 335 336 if (p == NULL) { 337 c = asprintf(&req, 338 "GET %s HTTP/1.0\r\n" 339 "Host: %s\r\n" 340 "\r\n", 341 http->path, http->host); 342 } else { 343 c = asprintf(&req, 344 "POST %s HTTP/1.0\r\n" 345 "Host: %s\r\n" 346 "Content-Type: application/ocsp-request\r\n" 347 "Content-Length: %zu\r\n" 348 "\r\n", 349 http->path, http->host, psz); 350 } 351 if (c == -1) { 352 warn("asprintf"); 353 return NULL; 354 } else if (!http_write(req, c, http)) { 355 free(req); 356 return NULL; 357 } else if (p != NULL && !http_write(p, psz, http)) { 358 free(req); 359 return NULL; 360 } 361 362 free(req); 363 364 trans = calloc(1, sizeof(struct httpxfer)); 365 if (trans == NULL) 366 warn("calloc"); 367 return trans; 368 } 369 370 void 371 http_close(struct httpxfer *x) 372 { 373 374 if (x == NULL) 375 return; 376 free(x->hbuf); 377 free(x->bbuf); 378 free(x->headbuf); 379 free(x->head); 380 free(x); 381 } 382 383 /* 384 * Read the HTTP body from the wire. 385 * If invoked multiple times, this will return the same pointer with the 386 * same data (or NULL, if the original invocation returned NULL). 387 * Returns NULL if read or allocation errors occur. 388 * You must not free the returned pointer. 389 */ 390 char * 391 http_body_read(const struct http *http, struct httpxfer *trans, size_t *sz) 392 { 393 char buf[BUFSIZ]; 394 ssize_t ssz; 395 void *pp; 396 size_t szp; 397 398 if (sz == NULL) 399 sz = &szp; 400 401 /* Have we already parsed this? */ 402 403 if (trans->bodyok > 0) { 404 *sz = trans->bbufsz; 405 return trans->bbuf; 406 } else if (trans->bodyok < 0) 407 return NULL; 408 409 *sz = 0; 410 trans->bodyok = -1; 411 412 do { 413 /* If less than sizeof(buf), at EOF. */ 414 if ((ssz = http_read(buf, sizeof(buf), http)) < 0) 415 return NULL; 416 else if (ssz == 0) 417 break; 418 419 pp = recallocarray(trans->bbuf, 420 trans->bbufsz, trans->bbufsz + ssz, 1); 421 if (pp == NULL) { 422 warn("recallocarray"); 423 return NULL; 424 } 425 trans->bbuf = pp; 426 memcpy(trans->bbuf + trans->bbufsz, buf, ssz); 427 trans->bbufsz += ssz; 428 } while (ssz == sizeof(buf)); 429 430 trans->bodyok = 1; 431 *sz = trans->bbufsz; 432 return trans->bbuf; 433 } 434 435 struct httphead * 436 http_head_get(const char *v, struct httphead *h, size_t hsz) 437 { 438 size_t i; 439 440 for (i = 0; i < hsz; i++) { 441 if (strcmp(h[i].key, v)) 442 continue; 443 return &h[i]; 444 } 445 return NULL; 446 } 447 448 /* 449 * Look through the headers and determine our HTTP code. 450 * This will return -1 on failure, otherwise the code. 451 */ 452 int 453 http_head_status(const struct http *http, struct httphead *h, size_t sz) 454 { 455 int rc; 456 unsigned int code; 457 struct httphead *st; 458 459 if ((st = http_head_get("Status", h, sz)) == NULL) { 460 warnx("%s: no status header", http->src.ip); 461 return -1; 462 } 463 464 rc = sscanf(st->val, "%*s %u %*s", &code); 465 if (rc < 0) { 466 warn("sscanf"); 467 return -1; 468 } else if (rc != 1) { 469 warnx("%s: cannot convert status header", http->src.ip); 470 return -1; 471 } 472 return code; 473 } 474 475 /* 476 * Parse headers from the transfer. 477 * Malformed headers are skipped. 478 * A special "Status" header is added for the HTTP status line. 479 * This can only happen once http_head_read has been called with 480 * success. 481 * This can be invoked multiple times: it will only parse the headers 482 * once and after that it will just return the cache. 483 * You must not free the returned pointer. 484 * If the original header parse failed, or if memory allocation fails 485 * internally, this returns NULL. 486 */ 487 struct httphead * 488 http_head_parse(const struct http *http, struct httpxfer *trans, size_t *sz) 489 { 490 size_t hsz, szp; 491 struct httphead *h; 492 char *cp, *ep, *ccp, *buf; 493 494 if (sz == NULL) 495 sz = &szp; 496 497 /* 498 * If we've already parsed the headers, return the 499 * previously-parsed buffer now. 500 * If we have errors on the stream, return NULL now. 501 */ 502 503 if (trans->head != NULL) { 504 *sz = trans->headsz; 505 return trans->head; 506 } else if (trans->headok <= 0) 507 return NULL; 508 509 if ((buf = strdup(trans->hbuf)) == NULL) { 510 warn("strdup"); 511 return NULL; 512 } 513 hsz = 0; 514 cp = buf; 515 516 do { 517 if ((cp = strstr(cp, "\r\n")) != NULL) 518 cp += 2; 519 hsz++; 520 } while (cp != NULL); 521 522 /* 523 * Allocate headers, then step through the data buffer, parsing 524 * out headers as we have them. 525 * We know at this point that the buffer is NUL-terminated in 526 * the usual way. 527 */ 528 529 h = calloc(hsz, sizeof(struct httphead)); 530 if (h == NULL) { 531 warn("calloc"); 532 free(buf); 533 return NULL; 534 } 535 536 *sz = hsz; 537 hsz = 0; 538 cp = buf; 539 540 do { 541 if ((ep = strstr(cp, "\r\n")) != NULL) { 542 *ep = '\0'; 543 ep += 2; 544 } 545 if (hsz == 0) { 546 h[hsz].key = "Status"; 547 h[hsz++].val = cp; 548 continue; 549 } 550 551 /* Skip bad headers. */ 552 if ((ccp = strchr(cp, ':')) == NULL) { 553 warnx("%s: header without separator", http->src.ip); 554 continue; 555 } 556 557 *ccp++ = '\0'; 558 while (isspace((unsigned char)*ccp)) 559 ccp++; 560 h[hsz].key = cp; 561 h[hsz++].val = ccp; 562 } while ((cp = ep) != NULL); 563 564 trans->headbuf = buf; 565 trans->head = h; 566 trans->headsz = hsz; 567 return h; 568 } 569 570 /* 571 * Read the HTTP headers from the wire. 572 * If invoked multiple times, this will return the same pointer with the 573 * same data (or NULL, if the original invocation returned NULL). 574 * Returns NULL if read or allocation errors occur. 575 * You must not free the returned pointer. 576 */ 577 char * 578 http_head_read(const struct http *http, struct httpxfer *trans, size_t *sz) 579 { 580 char buf[BUFSIZ]; 581 ssize_t ssz; 582 char *ep; 583 void *pp; 584 size_t szp; 585 586 if (sz == NULL) 587 sz = &szp; 588 589 /* Have we already parsed this? */ 590 591 if (trans->headok > 0) { 592 *sz = trans->hbufsz; 593 return trans->hbuf; 594 } else if (trans->headok < 0) 595 return NULL; 596 597 *sz = 0; 598 ep = NULL; 599 trans->headok = -1; 600 601 /* 602 * Begin by reading by BUFSIZ blocks until we reach the header 603 * termination marker (two CRLFs). 604 * We might read into our body, but that's ok: we'll copy out 605 * the body parts into our body buffer afterward. 606 */ 607 608 do { 609 /* If less than sizeof(buf), at EOF. */ 610 if ((ssz = http_read(buf, sizeof(buf), http)) < 0) 611 return NULL; 612 else if (ssz == 0) 613 break; 614 pp = realloc(trans->hbuf, trans->hbufsz + ssz); 615 if (pp == NULL) { 616 warn("realloc"); 617 return NULL; 618 } 619 trans->hbuf = pp; 620 memcpy(trans->hbuf + trans->hbufsz, buf, ssz); 621 trans->hbufsz += ssz; 622 /* Search for end of headers marker. */ 623 ep = memmem(trans->hbuf, trans->hbufsz, "\r\n\r\n", 4); 624 } while (ep == NULL && ssz == sizeof(buf)); 625 626 if (ep == NULL) { 627 warnx("%s: partial transfer", http->src.ip); 628 return NULL; 629 } 630 *ep = '\0'; 631 632 /* 633 * The header data is invalid if it has any binary characters in 634 * it: check that now. 635 * This is important because we want to guarantee that all 636 * header keys and pairs are properly NUL-terminated. 637 */ 638 639 if (strlen(trans->hbuf) != (uintptr_t)(ep - trans->hbuf)) { 640 warnx("%s: binary data in header", http->src.ip); 641 return NULL; 642 } 643 644 /* 645 * Copy remaining buffer into body buffer. 646 */ 647 648 ep += 4; 649 trans->bbufsz = (trans->hbuf + trans->hbufsz) - ep; 650 trans->bbuf = malloc(trans->bbufsz); 651 if (trans->bbuf == NULL) { 652 warn("malloc"); 653 return NULL; 654 } 655 memcpy(trans->bbuf, ep, trans->bbufsz); 656 657 trans->headok = 1; 658 *sz = trans->hbufsz; 659 return trans->hbuf; 660 } 661 662 void 663 http_get_free(struct httpget *g) 664 { 665 666 if (g == NULL) 667 return; 668 http_close(g->xfer); 669 http_free(g->http); 670 free(g); 671 } 672 673 struct httpget * 674 http_get(const struct source *addrs, size_t addrsz, const char *domain, 675 short port, const char *path, const void *post, size_t postsz) 676 { 677 struct http *h; 678 struct httpxfer *x; 679 struct httpget *g; 680 struct httphead *head; 681 size_t headsz, bodsz, headrsz; 682 int code; 683 char *bod, *headr; 684 685 h = http_alloc(addrs, addrsz, domain, port, path); 686 if (h == NULL) 687 return NULL; 688 689 if ((x = http_open(h, post, postsz)) == NULL) { 690 http_free(h); 691 return NULL; 692 } else if ((headr = http_head_read(h, x, &headrsz)) == NULL) { 693 http_close(x); 694 http_free(h); 695 return NULL; 696 } else if ((bod = http_body_read(h, x, &bodsz)) == NULL) { 697 http_close(x); 698 http_free(h); 699 return NULL; 700 } 701 702 http_disconnect(h); 703 704 if ((head = http_head_parse(h, x, &headsz)) == NULL) { 705 http_close(x); 706 http_free(h); 707 return NULL; 708 } else if ((code = http_head_status(h, head, headsz)) < 0) { 709 http_close(x); 710 http_free(h); 711 return NULL; 712 } 713 714 if ((g = calloc(1, sizeof(struct httpget))) == NULL) { 715 warn("calloc"); 716 http_close(x); 717 http_free(h); 718 return NULL; 719 } 720 721 g->headpart = headr; 722 g->headpartsz = headrsz; 723 g->bodypart = bod; 724 g->bodypartsz = bodsz; 725 g->head = head; 726 g->headsz = headsz; 727 g->code = code; 728 g->xfer = x; 729 g->http = h; 730 return g; 731 } 732 733 #if 0 734 int 735 main(void) 736 { 737 struct httpget *g; 738 struct httphead *httph; 739 size_t i, httphsz; 740 struct source addrs[2]; 741 size_t addrsz; 742 743 #if 0 744 addrs[0].ip = "127.0.0.1"; 745 addrs[0].family = 4; 746 addrsz = 1; 747 #else 748 addrs[0].ip = "2a00:1450:400a:806::2004"; 749 addrs[0].family = 6; 750 addrs[1].ip = "193.135.3.123"; 751 addrs[1].family = 4; 752 addrsz = 2; 753 #endif 754 755 if (http_init() == -1) 756 errx(EXIT_FAILURE, "http_init"); 757 758 #if 0 759 g = http_get(addrs, addrsz, "localhost", 80, "/index.html"); 760 #else 761 g = http_get(addrs, addrsz, "www.google.ch", 80, "/index.html", 762 NULL, 0); 763 #endif 764 765 if (g == NULL) 766 errx(EXIT_FAILURE, "http_get"); 767 768 httph = http_head_parse(g->http, g->xfer, &httphsz); 769 warnx("code: %d", g->code); 770 771 for (i = 0; i < httphsz; i++) 772 warnx("head: [%s]=[%s]", httph[i].key, httph[i].val); 773 774 http_get_free(g); 775 return (EXIT_SUCCESS); 776 } 777 #endif 778