1 /* $NetBSD: tcp_usrreq.c,v 1.184 2014/07/07 17:13:56 rtr Exp $ */ 2 3 /* 4 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the project nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 /*- 33 * Copyright (c) 1997, 1998, 2005, 2006 The NetBSD Foundation, Inc. 34 * All rights reserved. 35 * 36 * This code is derived from software contributed to The NetBSD Foundation 37 * by Jason R. Thorpe and Kevin M. Lahey of the Numerical Aerospace Simulation 38 * Facility, NASA Ames Research Center. 39 * This code is derived from software contributed to The NetBSD Foundation 40 * by Charles M. Hannum. 41 * This code is derived from software contributed to The NetBSD Foundation 42 * by Rui Paulo. 43 * 44 * Redistribution and use in source and binary forms, with or without 45 * modification, are permitted provided that the following conditions 46 * are met: 47 * 1. Redistributions of source code must retain the above copyright 48 * notice, this list of conditions and the following disclaimer. 49 * 2. Redistributions in binary form must reproduce the above copyright 50 * notice, this list of conditions and the following disclaimer in the 51 * documentation and/or other materials provided with the distribution. 52 * 53 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 54 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 55 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 56 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 57 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 58 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 59 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 60 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 61 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 62 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 63 * POSSIBILITY OF SUCH DAMAGE. 64 */ 65 66 /* 67 * Copyright (c) 1982, 1986, 1988, 1993, 1995 68 * The Regents of the University of California. All rights reserved. 69 * 70 * Redistribution and use in source and binary forms, with or without 71 * modification, are permitted provided that the following conditions 72 * are met: 73 * 1. Redistributions of source code must retain the above copyright 74 * notice, this list of conditions and the following disclaimer. 75 * 2. Redistributions in binary form must reproduce the above copyright 76 * notice, this list of conditions and the following disclaimer in the 77 * documentation and/or other materials provided with the distribution. 78 * 3. Neither the name of the University nor the names of its contributors 79 * may be used to endorse or promote products derived from this software 80 * without specific prior written permission. 81 * 82 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 83 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 84 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 85 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 86 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 87 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 88 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 89 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 90 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 91 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 92 * SUCH DAMAGE. 93 * 94 * @(#)tcp_usrreq.c 8.5 (Berkeley) 6/21/95 95 */ 96 97 /* 98 * TCP protocol interface to socket abstraction. 99 */ 100 101 #include <sys/cdefs.h> 102 __KERNEL_RCSID(0, "$NetBSD: tcp_usrreq.c,v 1.184 2014/07/07 17:13:56 rtr Exp $"); 103 104 #include "opt_inet.h" 105 #include "opt_ipsec.h" 106 #include "opt_tcp_debug.h" 107 #include "opt_mbuftrace.h" 108 109 #include <sys/param.h> 110 #include <sys/systm.h> 111 #include <sys/kernel.h> 112 #include <sys/malloc.h> 113 #include <sys/mbuf.h> 114 #include <sys/socket.h> 115 #include <sys/socketvar.h> 116 #include <sys/protosw.h> 117 #include <sys/errno.h> 118 #include <sys/stat.h> 119 #include <sys/proc.h> 120 #include <sys/domain.h> 121 #include <sys/sysctl.h> 122 #include <sys/kauth.h> 123 #include <sys/uidinfo.h> 124 125 #include <net/if.h> 126 #include <net/route.h> 127 128 #include <netinet/in.h> 129 #include <netinet/in_systm.h> 130 #include <netinet/in_var.h> 131 #include <netinet/ip.h> 132 #include <netinet/in_pcb.h> 133 #include <netinet/ip_var.h> 134 #include <netinet/in_offload.h> 135 136 #ifdef INET6 137 #ifndef INET 138 #include <netinet/in.h> 139 #endif 140 #include <netinet/ip6.h> 141 #include <netinet6/in6_pcb.h> 142 #include <netinet6/ip6_var.h> 143 #include <netinet6/scope6_var.h> 144 #endif 145 146 #include <netinet/tcp.h> 147 #include <netinet/tcp_fsm.h> 148 #include <netinet/tcp_seq.h> 149 #include <netinet/tcp_timer.h> 150 #include <netinet/tcp_var.h> 151 #include <netinet/tcp_private.h> 152 #include <netinet/tcp_congctl.h> 153 #include <netinet/tcpip.h> 154 #include <netinet/tcp_debug.h> 155 #include <netinet/tcp_vtw.h> 156 157 #include "opt_tcp_space.h" 158 159 /* 160 * Process a TCP user request for TCP tb. If this is a send request 161 * then m is the mbuf chain of send data. If this is a timer expiration 162 * (called from the software clock routine), then timertype tells which timer. 163 */ 164 static int 165 tcp_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam, 166 struct mbuf *control, struct lwp *l) 167 { 168 struct inpcb *inp; 169 #ifdef INET6 170 struct in6pcb *in6p; 171 #endif 172 struct tcpcb *tp = NULL; 173 int s; 174 int error = 0; 175 #ifdef TCP_DEBUG 176 int ostate = 0; 177 #endif 178 int family; /* family of the socket */ 179 180 KASSERT(req != PRU_ATTACH); 181 KASSERT(req != PRU_DETACH); 182 KASSERT(req != PRU_CONTROL); 183 KASSERT(req != PRU_SENSE); 184 185 family = so->so_proto->pr_domain->dom_family; 186 187 s = splsoftnet(); 188 189 if (req == PRU_PURGEIF) { 190 mutex_enter(softnet_lock); 191 switch (family) { 192 #ifdef INET 193 case PF_INET: 194 in_pcbpurgeif0(&tcbtable, (struct ifnet *)control); 195 in_purgeif((struct ifnet *)control); 196 in_pcbpurgeif(&tcbtable, (struct ifnet *)control); 197 break; 198 #endif 199 #ifdef INET6 200 case PF_INET6: 201 in6_pcbpurgeif0(&tcbtable, (struct ifnet *)control); 202 in6_purgeif((struct ifnet *)control); 203 in6_pcbpurgeif(&tcbtable, (struct ifnet *)control); 204 break; 205 #endif 206 default: 207 mutex_exit(softnet_lock); 208 splx(s); 209 return (EAFNOSUPPORT); 210 } 211 mutex_exit(softnet_lock); 212 splx(s); 213 return (0); 214 } 215 216 KASSERT(solocked(so)); 217 218 switch (family) { 219 #ifdef INET 220 case PF_INET: 221 inp = sotoinpcb(so); 222 #ifdef INET6 223 in6p = NULL; 224 #endif 225 break; 226 #endif 227 #ifdef INET6 228 case PF_INET6: 229 inp = NULL; 230 in6p = sotoin6pcb(so); 231 break; 232 #endif 233 default: 234 splx(s); 235 return EAFNOSUPPORT; 236 } 237 KASSERT(!control || (req == PRU_SEND || req == PRU_SENDOOB)); 238 #ifdef INET6 239 /* XXX: KASSERT((inp != NULL) ^ (in6p != NULL)); */ 240 #endif 241 /* 242 * When a TCP is attached to a socket, then there will be 243 * a (struct inpcb) pointed at by the socket, and this 244 * structure will point at a subsidary (struct tcpcb). 245 */ 246 if (inp == NULL 247 #ifdef INET6 248 && in6p == NULL 249 #endif 250 ) 251 { 252 error = EINVAL; 253 goto release; 254 } 255 #ifdef INET 256 if (inp) { 257 tp = intotcpcb(inp); 258 /* WHAT IF TP IS 0? */ 259 #ifdef KPROF 260 tcp_acounts[tp->t_state][req]++; 261 #endif 262 #ifdef TCP_DEBUG 263 ostate = tp->t_state; 264 #endif 265 } 266 #endif 267 #ifdef INET6 268 if (in6p) { 269 tp = in6totcpcb(in6p); 270 /* WHAT IF TP IS 0? */ 271 #ifdef KPROF 272 tcp_acounts[tp->t_state][req]++; 273 #endif 274 #ifdef TCP_DEBUG 275 ostate = tp->t_state; 276 #endif 277 } 278 #endif 279 280 switch (req) { 281 282 /* 283 * Give the socket an address. 284 */ 285 case PRU_BIND: 286 switch (family) { 287 #ifdef INET 288 case PF_INET: 289 error = in_pcbbind(inp, nam, l); 290 break; 291 #endif 292 #ifdef INET6 293 case PF_INET6: 294 error = in6_pcbbind(in6p, nam, l); 295 if (!error) { 296 /* mapped addr case */ 297 if (IN6_IS_ADDR_V4MAPPED(&in6p->in6p_laddr)) 298 tp->t_family = AF_INET; 299 else 300 tp->t_family = AF_INET6; 301 } 302 break; 303 #endif 304 } 305 break; 306 307 /* 308 * Prepare to accept connections. 309 */ 310 case PRU_LISTEN: 311 #ifdef INET 312 if (inp && inp->inp_lport == 0) { 313 error = in_pcbbind(inp, NULL, l); 314 if (error) 315 break; 316 } 317 #endif 318 #ifdef INET6 319 if (in6p && in6p->in6p_lport == 0) { 320 error = in6_pcbbind(in6p, NULL, l); 321 if (error) 322 break; 323 } 324 #endif 325 tp->t_state = TCPS_LISTEN; 326 break; 327 328 /* 329 * Initiate connection to peer. 330 * Create a template for use in transmissions on this connection. 331 * Enter SYN_SENT state, and mark socket as connecting. 332 * Start keep-alive timer, and seed output sequence space. 333 * Send initial segment on connection. 334 */ 335 case PRU_CONNECT: 336 #ifdef INET 337 if (inp) { 338 if (inp->inp_lport == 0) { 339 error = in_pcbbind(inp, NULL, l); 340 if (error) 341 break; 342 } 343 error = in_pcbconnect(inp, nam, l); 344 } 345 #endif 346 #ifdef INET6 347 if (in6p) { 348 if (in6p->in6p_lport == 0) { 349 error = in6_pcbbind(in6p, NULL, l); 350 if (error) 351 break; 352 } 353 error = in6_pcbconnect(in6p, nam, l); 354 if (!error) { 355 /* mapped addr case */ 356 if (IN6_IS_ADDR_V4MAPPED(&in6p->in6p_faddr)) 357 tp->t_family = AF_INET; 358 else 359 tp->t_family = AF_INET6; 360 } 361 } 362 #endif 363 if (error) 364 break; 365 tp->t_template = tcp_template(tp); 366 if (tp->t_template == 0) { 367 #ifdef INET 368 if (inp) 369 in_pcbdisconnect(inp); 370 #endif 371 #ifdef INET6 372 if (in6p) 373 in6_pcbdisconnect(in6p); 374 #endif 375 error = ENOBUFS; 376 break; 377 } 378 /* 379 * Compute window scaling to request. 380 * XXX: This should be moved to tcp_output(). 381 */ 382 while (tp->request_r_scale < TCP_MAX_WINSHIFT && 383 (TCP_MAXWIN << tp->request_r_scale) < sb_max) 384 tp->request_r_scale++; 385 soisconnecting(so); 386 TCP_STATINC(TCP_STAT_CONNATTEMPT); 387 tp->t_state = TCPS_SYN_SENT; 388 TCP_TIMER_ARM(tp, TCPT_KEEP, tp->t_keepinit); 389 tp->iss = tcp_new_iss(tp, 0); 390 tcp_sendseqinit(tp); 391 error = tcp_output(tp); 392 break; 393 394 /* 395 * Create a TCP connection between two sockets. 396 */ 397 case PRU_CONNECT2: 398 error = EOPNOTSUPP; 399 break; 400 401 /* 402 * Initiate disconnect from peer. 403 * If connection never passed embryonic stage, just drop; 404 * else if don't need to let data drain, then can just drop anyways, 405 * else have to begin TCP shutdown process: mark socket disconnecting, 406 * drain unread data, state switch to reflect user close, and 407 * send segment (e.g. FIN) to peer. Socket will be really disconnected 408 * when peer sends FIN and acks ours. 409 * 410 * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB. 411 */ 412 case PRU_DISCONNECT: 413 tp = tcp_disconnect(tp); 414 break; 415 416 /* 417 * Accept a connection. Essentially all the work is 418 * done at higher levels; just return the address 419 * of the peer, storing through addr. 420 */ 421 case PRU_ACCEPT: 422 #ifdef INET 423 if (inp) 424 in_setpeeraddr(inp, nam); 425 #endif 426 #ifdef INET6 427 if (in6p) 428 in6_setpeeraddr(in6p, nam); 429 #endif 430 break; 431 432 /* 433 * Mark the connection as being incapable of further output. 434 */ 435 case PRU_SHUTDOWN: 436 socantsendmore(so); 437 tp = tcp_usrclosed(tp); 438 if (tp) 439 error = tcp_output(tp); 440 break; 441 442 /* 443 * After a receive, possibly send window update to peer. 444 */ 445 case PRU_RCVD: 446 /* 447 * soreceive() calls this function when a user receives 448 * ancillary data on a listening socket. We don't call 449 * tcp_output in such a case, since there is no header 450 * template for a listening socket and hence the kernel 451 * will panic. 452 */ 453 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) != 0) 454 (void) tcp_output(tp); 455 break; 456 457 /* 458 * Do a send by putting data in output queue and updating urgent 459 * marker if URG set. Possibly send more data. 460 */ 461 case PRU_SEND: 462 if (control && control->m_len) { 463 m_freem(control); 464 m_freem(m); 465 error = EINVAL; 466 break; 467 } 468 sbappendstream(&so->so_snd, m); 469 error = tcp_output(tp); 470 break; 471 472 /* 473 * Abort the TCP. 474 */ 475 case PRU_ABORT: 476 tp = tcp_drop(tp, ECONNABORTED); 477 break; 478 479 case PRU_RCVOOB: 480 if (control && control->m_len) { 481 m_freem(control); 482 m_freem(m); 483 error = EINVAL; 484 break; 485 } 486 if ((so->so_oobmark == 0 && 487 (so->so_state & SS_RCVATMARK) == 0) || 488 so->so_options & SO_OOBINLINE || 489 tp->t_oobflags & TCPOOB_HADDATA) { 490 error = EINVAL; 491 break; 492 } 493 if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) { 494 error = EWOULDBLOCK; 495 break; 496 } 497 m->m_len = 1; 498 *mtod(m, char *) = tp->t_iobc; 499 if (((long)nam & MSG_PEEK) == 0) 500 tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA); 501 break; 502 503 case PRU_SENDOOB: 504 if (sbspace(&so->so_snd) < -512) { 505 m_freem(m); 506 error = ENOBUFS; 507 break; 508 } 509 /* 510 * According to RFC961 (Assigned Protocols), 511 * the urgent pointer points to the last octet 512 * of urgent data. We continue, however, 513 * to consider it to indicate the first octet 514 * of data past the urgent section. 515 * Otherwise, snd_up should be one lower. 516 */ 517 sbappendstream(&so->so_snd, m); 518 tp->snd_up = tp->snd_una + so->so_snd.sb_cc; 519 tp->t_force = 1; 520 error = tcp_output(tp); 521 tp->t_force = 0; 522 break; 523 524 case PRU_SOCKADDR: 525 #ifdef INET 526 if (inp) 527 in_setsockaddr(inp, nam); 528 #endif 529 #ifdef INET6 530 if (in6p) 531 in6_setsockaddr(in6p, nam); 532 #endif 533 break; 534 535 case PRU_PEERADDR: 536 #ifdef INET 537 if (inp) 538 in_setpeeraddr(inp, nam); 539 #endif 540 #ifdef INET6 541 if (in6p) 542 in6_setpeeraddr(in6p, nam); 543 #endif 544 break; 545 546 default: 547 panic("tcp_usrreq"); 548 } 549 #ifdef TCP_DEBUG 550 if (tp && (so->so_options & SO_DEBUG)) 551 tcp_trace(TA_USER, ostate, tp, NULL, req); 552 #endif 553 554 release: 555 splx(s); 556 return (error); 557 } 558 559 static void 560 change_keepalive(struct socket *so, struct tcpcb *tp) 561 { 562 tp->t_maxidle = tp->t_keepcnt * tp->t_keepintvl; 563 TCP_TIMER_DISARM(tp, TCPT_KEEP); 564 TCP_TIMER_DISARM(tp, TCPT_2MSL); 565 566 if (tp->t_state == TCPS_SYN_RECEIVED || 567 tp->t_state == TCPS_SYN_SENT) { 568 TCP_TIMER_ARM(tp, TCPT_KEEP, tp->t_keepinit); 569 } else if (so->so_options & SO_KEEPALIVE && 570 tp->t_state <= TCPS_CLOSE_WAIT) { 571 TCP_TIMER_ARM(tp, TCPT_KEEP, tp->t_keepintvl); 572 } else { 573 TCP_TIMER_ARM(tp, TCPT_KEEP, tp->t_keepidle); 574 } 575 576 if ((tp->t_state == TCPS_FIN_WAIT_2) && (tp->t_maxidle > 0)) 577 TCP_TIMER_ARM(tp, TCPT_2MSL, tp->t_maxidle); 578 } 579 580 581 int 582 tcp_ctloutput(int op, struct socket *so, struct sockopt *sopt) 583 { 584 int error = 0, s; 585 struct inpcb *inp; 586 #ifdef INET6 587 struct in6pcb *in6p; 588 #endif 589 struct tcpcb *tp; 590 u_int ui; 591 int family; /* family of the socket */ 592 int level, optname, optval; 593 594 level = sopt->sopt_level; 595 optname = sopt->sopt_name; 596 597 family = so->so_proto->pr_domain->dom_family; 598 599 s = splsoftnet(); 600 switch (family) { 601 #ifdef INET 602 case PF_INET: 603 inp = sotoinpcb(so); 604 #ifdef INET6 605 in6p = NULL; 606 #endif 607 break; 608 #endif 609 #ifdef INET6 610 case PF_INET6: 611 inp = NULL; 612 in6p = sotoin6pcb(so); 613 break; 614 #endif 615 default: 616 splx(s); 617 panic("%s: af %d", __func__, family); 618 } 619 #ifndef INET6 620 if (inp == NULL) 621 #else 622 if (inp == NULL && in6p == NULL) 623 #endif 624 { 625 splx(s); 626 return (ECONNRESET); 627 } 628 if (level != IPPROTO_TCP) { 629 switch (family) { 630 #ifdef INET 631 case PF_INET: 632 error = ip_ctloutput(op, so, sopt); 633 break; 634 #endif 635 #ifdef INET6 636 case PF_INET6: 637 error = ip6_ctloutput(op, so, sopt); 638 break; 639 #endif 640 } 641 splx(s); 642 return (error); 643 } 644 if (inp) 645 tp = intotcpcb(inp); 646 #ifdef INET6 647 else if (in6p) 648 tp = in6totcpcb(in6p); 649 #endif 650 else 651 tp = NULL; 652 653 switch (op) { 654 case PRCO_SETOPT: 655 switch (optname) { 656 #ifdef TCP_SIGNATURE 657 case TCP_MD5SIG: 658 error = sockopt_getint(sopt, &optval); 659 if (error) 660 break; 661 if (optval > 0) 662 tp->t_flags |= TF_SIGNATURE; 663 else 664 tp->t_flags &= ~TF_SIGNATURE; 665 break; 666 #endif /* TCP_SIGNATURE */ 667 668 case TCP_NODELAY: 669 error = sockopt_getint(sopt, &optval); 670 if (error) 671 break; 672 if (optval) 673 tp->t_flags |= TF_NODELAY; 674 else 675 tp->t_flags &= ~TF_NODELAY; 676 break; 677 678 case TCP_MAXSEG: 679 error = sockopt_getint(sopt, &optval); 680 if (error) 681 break; 682 if (optval > 0 && optval <= tp->t_peermss) 683 tp->t_peermss = optval; /* limit on send size */ 684 else 685 error = EINVAL; 686 break; 687 #ifdef notyet 688 case TCP_CONGCTL: 689 /* XXX string overflow XXX */ 690 error = tcp_congctl_select(tp, sopt->sopt_data); 691 break; 692 #endif 693 694 case TCP_KEEPIDLE: 695 error = sockopt_get(sopt, &ui, sizeof(ui)); 696 if (error) 697 break; 698 if (ui > 0) { 699 tp->t_keepidle = ui; 700 change_keepalive(so, tp); 701 } else 702 error = EINVAL; 703 break; 704 705 case TCP_KEEPINTVL: 706 error = sockopt_get(sopt, &ui, sizeof(ui)); 707 if (error) 708 break; 709 if (ui > 0) { 710 tp->t_keepintvl = ui; 711 change_keepalive(so, tp); 712 } else 713 error = EINVAL; 714 break; 715 716 case TCP_KEEPCNT: 717 error = sockopt_get(sopt, &ui, sizeof(ui)); 718 if (error) 719 break; 720 if (ui > 0) { 721 tp->t_keepcnt = ui; 722 change_keepalive(so, tp); 723 } else 724 error = EINVAL; 725 break; 726 727 case TCP_KEEPINIT: 728 error = sockopt_get(sopt, &ui, sizeof(ui)); 729 if (error) 730 break; 731 if (ui > 0) { 732 tp->t_keepinit = ui; 733 change_keepalive(so, tp); 734 } else 735 error = EINVAL; 736 break; 737 738 default: 739 error = ENOPROTOOPT; 740 break; 741 } 742 break; 743 744 case PRCO_GETOPT: 745 switch (optname) { 746 #ifdef TCP_SIGNATURE 747 case TCP_MD5SIG: 748 optval = (tp->t_flags & TF_SIGNATURE) ? 1 : 0; 749 error = sockopt_set(sopt, &optval, sizeof(optval)); 750 break; 751 #endif 752 case TCP_NODELAY: 753 optval = tp->t_flags & TF_NODELAY; 754 error = sockopt_set(sopt, &optval, sizeof(optval)); 755 break; 756 case TCP_MAXSEG: 757 optval = tp->t_peermss; 758 error = sockopt_set(sopt, &optval, sizeof(optval)); 759 break; 760 #ifdef notyet 761 case TCP_CONGCTL: 762 break; 763 #endif 764 default: 765 error = ENOPROTOOPT; 766 break; 767 } 768 break; 769 } 770 splx(s); 771 return (error); 772 } 773 774 #ifndef TCP_SENDSPACE 775 #define TCP_SENDSPACE 1024*32 776 #endif 777 int tcp_sendspace = TCP_SENDSPACE; 778 #ifndef TCP_RECVSPACE 779 #define TCP_RECVSPACE 1024*32 780 #endif 781 int tcp_recvspace = TCP_RECVSPACE; 782 783 /* 784 * tcp_attach: attach TCP protocol to socket, allocating internet protocol 785 * control block, TCP control block, buffer space and entering LISTEN state 786 * if to accept connections. 787 */ 788 static int 789 tcp_attach(struct socket *so, int proto) 790 { 791 struct tcpcb *tp; 792 struct inpcb *inp; 793 #ifdef INET6 794 struct in6pcb *in6p; 795 #endif 796 int s, error, family; 797 798 /* Assign the lock (must happen even if we will error out). */ 799 s = splsoftnet(); 800 sosetlock(so); 801 KASSERT(solocked(so)); 802 803 family = so->so_proto->pr_domain->dom_family; 804 switch (family) { 805 #ifdef INET 806 case PF_INET: 807 inp = sotoinpcb(so); 808 #ifdef INET6 809 in6p = NULL; 810 #endif 811 break; 812 #endif 813 #ifdef INET6 814 case PF_INET6: 815 inp = NULL; 816 in6p = sotoin6pcb(so); 817 break; 818 #endif 819 default: 820 error = EAFNOSUPPORT; 821 goto out; 822 } 823 824 KASSERT(inp == NULL); 825 #ifdef INET6 826 KASSERT(in6p == NULL); 827 #endif 828 829 #ifdef MBUFTRACE 830 so->so_mowner = &tcp_sock_mowner; 831 so->so_rcv.sb_mowner = &tcp_sock_rx_mowner; 832 so->so_snd.sb_mowner = &tcp_sock_tx_mowner; 833 #endif 834 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { 835 error = soreserve(so, tcp_sendspace, tcp_recvspace); 836 if (error) 837 goto out; 838 } 839 840 so->so_rcv.sb_flags |= SB_AUTOSIZE; 841 so->so_snd.sb_flags |= SB_AUTOSIZE; 842 843 switch (family) { 844 #ifdef INET 845 case PF_INET: 846 error = in_pcballoc(so, &tcbtable); 847 if (error) 848 goto out; 849 inp = sotoinpcb(so); 850 #ifdef INET6 851 in6p = NULL; 852 #endif 853 break; 854 #endif 855 #ifdef INET6 856 case PF_INET6: 857 error = in6_pcballoc(so, &tcbtable); 858 if (error) 859 goto out; 860 inp = NULL; 861 in6p = sotoin6pcb(so); 862 break; 863 #endif 864 default: 865 error = EAFNOSUPPORT; 866 goto out; 867 } 868 if (inp) 869 tp = tcp_newtcpcb(family, (void *)inp); 870 #ifdef INET6 871 else if (in6p) 872 tp = tcp_newtcpcb(family, (void *)in6p); 873 #endif 874 else 875 tp = NULL; 876 877 if (tp == NULL) { 878 int nofd = so->so_state & SS_NOFDREF; /* XXX */ 879 880 so->so_state &= ~SS_NOFDREF; /* don't free the socket yet */ 881 #ifdef INET 882 if (inp) 883 in_pcbdetach(inp); 884 #endif 885 #ifdef INET6 886 if (in6p) 887 in6_pcbdetach(in6p); 888 #endif 889 so->so_state |= nofd; 890 error = ENOBUFS; 891 goto out; 892 } 893 tp->t_state = TCPS_CLOSED; 894 if ((so->so_options & SO_LINGER) && so->so_linger == 0) { 895 so->so_linger = TCP_LINGERTIME; 896 } 897 out: 898 KASSERT(solocked(so)); 899 splx(s); 900 return error; 901 } 902 903 static void 904 tcp_detach(struct socket *so) 905 { 906 struct inpcb *inp; 907 #ifdef INET6 908 struct in6pcb *in6p; 909 #endif 910 struct tcpcb *tp = NULL; 911 int s, family; 912 913 KASSERT(solocked(so)); 914 915 s = splsoftnet(); 916 family = so->so_proto->pr_domain->dom_family; 917 switch (family) { 918 #ifdef INET 919 case PF_INET: 920 inp = sotoinpcb(so); 921 tp = intotcpcb(inp); 922 break; 923 #endif 924 #ifdef INET6 925 case PF_INET6: 926 in6p = sotoin6pcb(so); 927 tp = in6totcpcb(in6p); 928 break; 929 #endif 930 default: 931 splx(s); 932 return; 933 } 934 KASSERT(tp != NULL); 935 (void)tcp_disconnect(tp); 936 splx(s); 937 } 938 939 static int 940 tcp_ioctl(struct socket *so, u_long cmd, void *nam, struct ifnet *ifp) 941 { 942 switch (so->so_proto->pr_domain->dom_family) { 943 #ifdef INET 944 case PF_INET: 945 return in_control(so, cmd, nam, ifp); 946 #endif 947 #ifdef INET6 948 case PF_INET6: 949 return in6_control(so, cmd, nam, ifp); 950 #endif 951 default: 952 return EAFNOSUPPORT; 953 } 954 } 955 956 static int 957 tcp_stat(struct socket *so, struct stat *ub) 958 { 959 KASSERT(solocked(so)); 960 961 /* stat: don't bother with a blocksize. */ 962 return 0; 963 } 964 965 /* 966 * Initiate (or continue) disconnect. 967 * If embryonic state, just send reset (once). 968 * If in ``let data drain'' option and linger null, just drop. 969 * Otherwise (hard), mark socket disconnecting and drop 970 * current input data; switch states based on user close, and 971 * send segment to peer (with FIN). 972 */ 973 struct tcpcb * 974 tcp_disconnect(struct tcpcb *tp) 975 { 976 struct socket *so; 977 978 if (tp->t_inpcb) 979 so = tp->t_inpcb->inp_socket; 980 #ifdef INET6 981 else if (tp->t_in6pcb) 982 so = tp->t_in6pcb->in6p_socket; 983 #endif 984 else 985 so = NULL; 986 987 if (TCPS_HAVEESTABLISHED(tp->t_state) == 0) 988 tp = tcp_close(tp); 989 else if ((so->so_options & SO_LINGER) && so->so_linger == 0) 990 tp = tcp_drop(tp, 0); 991 else { 992 soisdisconnecting(so); 993 sbflush(&so->so_rcv); 994 tp = tcp_usrclosed(tp); 995 if (tp) 996 (void) tcp_output(tp); 997 } 998 return (tp); 999 } 1000 1001 /* 1002 * User issued close, and wish to trail through shutdown states: 1003 * if never received SYN, just forget it. If got a SYN from peer, 1004 * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN. 1005 * If already got a FIN from peer, then almost done; go to LAST_ACK 1006 * state. In all other cases, have already sent FIN to peer (e.g. 1007 * after PRU_SHUTDOWN), and just have to play tedious game waiting 1008 * for peer to send FIN or not respond to keep-alives, etc. 1009 * We can let the user exit from the close as soon as the FIN is acked. 1010 */ 1011 struct tcpcb * 1012 tcp_usrclosed(struct tcpcb *tp) 1013 { 1014 1015 switch (tp->t_state) { 1016 1017 case TCPS_CLOSED: 1018 case TCPS_LISTEN: 1019 case TCPS_SYN_SENT: 1020 tp->t_state = TCPS_CLOSED; 1021 tp = tcp_close(tp); 1022 break; 1023 1024 case TCPS_SYN_RECEIVED: 1025 case TCPS_ESTABLISHED: 1026 tp->t_state = TCPS_FIN_WAIT_1; 1027 break; 1028 1029 case TCPS_CLOSE_WAIT: 1030 tp->t_state = TCPS_LAST_ACK; 1031 break; 1032 } 1033 if (tp && tp->t_state >= TCPS_FIN_WAIT_2) { 1034 struct socket *so; 1035 if (tp->t_inpcb) 1036 so = tp->t_inpcb->inp_socket; 1037 #ifdef INET6 1038 else if (tp->t_in6pcb) 1039 so = tp->t_in6pcb->in6p_socket; 1040 #endif 1041 else 1042 so = NULL; 1043 if (so) 1044 soisdisconnected(so); 1045 /* 1046 * If we are in FIN_WAIT_2, we arrived here because the 1047 * application did a shutdown of the send side. Like the 1048 * case of a transition from FIN_WAIT_1 to FIN_WAIT_2 after 1049 * a full close, we start a timer to make sure sockets are 1050 * not left in FIN_WAIT_2 forever. 1051 */ 1052 if ((tp->t_state == TCPS_FIN_WAIT_2) && (tp->t_maxidle > 0)) 1053 TCP_TIMER_ARM(tp, TCPT_2MSL, tp->t_maxidle); 1054 else if (tp->t_state == TCPS_TIME_WAIT 1055 && ((tp->t_inpcb 1056 && (tcp4_vtw_enable & 1) 1057 && vtw_add(AF_INET, tp)) 1058 || 1059 (tp->t_in6pcb 1060 && (tcp6_vtw_enable & 1) 1061 && vtw_add(AF_INET6, tp)))) { 1062 tp = 0; 1063 } 1064 } 1065 return (tp); 1066 } 1067 1068 /* 1069 * sysctl helper routine for net.inet.ip.mssdflt. it can't be less 1070 * than 32. 1071 */ 1072 static int 1073 sysctl_net_inet_tcp_mssdflt(SYSCTLFN_ARGS) 1074 { 1075 int error, mssdflt; 1076 struct sysctlnode node; 1077 1078 mssdflt = tcp_mssdflt; 1079 node = *rnode; 1080 node.sysctl_data = &mssdflt; 1081 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 1082 if (error || newp == NULL) 1083 return (error); 1084 1085 if (mssdflt < 32) 1086 return (EINVAL); 1087 tcp_mssdflt = mssdflt; 1088 1089 mutex_enter(softnet_lock); 1090 tcp_tcpcb_template(); 1091 mutex_exit(softnet_lock); 1092 1093 return (0); 1094 } 1095 1096 /* 1097 * sysctl helper for TCP CB template update 1098 */ 1099 static int 1100 sysctl_update_tcpcb_template(SYSCTLFN_ARGS) 1101 { 1102 int t, error; 1103 struct sysctlnode node; 1104 1105 /* follow procedures in sysctl(9) manpage */ 1106 t = *(int *)rnode->sysctl_data; 1107 node = *rnode; 1108 node.sysctl_data = &t; 1109 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 1110 if (error || newp == NULL) 1111 return error; 1112 1113 if (t < 0) 1114 return EINVAL; 1115 1116 *(int *)rnode->sysctl_data = t; 1117 1118 mutex_enter(softnet_lock); 1119 tcp_tcpcb_template(); 1120 mutex_exit(softnet_lock); 1121 1122 return 0; 1123 } 1124 1125 /* 1126 * sysctl helper routine for setting port related values under 1127 * net.inet.ip and net.inet6.ip6. does basic range checking and does 1128 * additional checks for each type. this code has placed in 1129 * tcp_input.c since INET and INET6 both use the same tcp code. 1130 * 1131 * this helper is not static so that both inet and inet6 can use it. 1132 */ 1133 int 1134 sysctl_net_inet_ip_ports(SYSCTLFN_ARGS) 1135 { 1136 int error, tmp; 1137 int apmin, apmax; 1138 #ifndef IPNOPRIVPORTS 1139 int lpmin, lpmax; 1140 #endif /* IPNOPRIVPORTS */ 1141 struct sysctlnode node; 1142 1143 if (namelen != 0) 1144 return (EINVAL); 1145 1146 switch (name[-3]) { 1147 #ifdef INET 1148 case PF_INET: 1149 apmin = anonportmin; 1150 apmax = anonportmax; 1151 #ifndef IPNOPRIVPORTS 1152 lpmin = lowportmin; 1153 lpmax = lowportmax; 1154 #endif /* IPNOPRIVPORTS */ 1155 break; 1156 #endif /* INET */ 1157 #ifdef INET6 1158 case PF_INET6: 1159 apmin = ip6_anonportmin; 1160 apmax = ip6_anonportmax; 1161 #ifndef IPNOPRIVPORTS 1162 lpmin = ip6_lowportmin; 1163 lpmax = ip6_lowportmax; 1164 #endif /* IPNOPRIVPORTS */ 1165 break; 1166 #endif /* INET6 */ 1167 default: 1168 return (EINVAL); 1169 } 1170 1171 /* 1172 * insert temporary copy into node, perform lookup on 1173 * temporary, then restore pointer 1174 */ 1175 node = *rnode; 1176 tmp = *(int*)rnode->sysctl_data; 1177 node.sysctl_data = &tmp; 1178 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 1179 if (error || newp == NULL) 1180 return (error); 1181 1182 /* 1183 * simple port range check 1184 */ 1185 if (tmp < 0 || tmp > 65535) 1186 return (EINVAL); 1187 1188 /* 1189 * per-node range checks 1190 */ 1191 switch (rnode->sysctl_num) { 1192 case IPCTL_ANONPORTMIN: 1193 case IPV6CTL_ANONPORTMIN: 1194 if (tmp >= apmax) 1195 return (EINVAL); 1196 #ifndef IPNOPRIVPORTS 1197 if (tmp < IPPORT_RESERVED) 1198 return (EINVAL); 1199 #endif /* IPNOPRIVPORTS */ 1200 break; 1201 1202 case IPCTL_ANONPORTMAX: 1203 case IPV6CTL_ANONPORTMAX: 1204 if (apmin >= tmp) 1205 return (EINVAL); 1206 #ifndef IPNOPRIVPORTS 1207 if (tmp < IPPORT_RESERVED) 1208 return (EINVAL); 1209 #endif /* IPNOPRIVPORTS */ 1210 break; 1211 1212 #ifndef IPNOPRIVPORTS 1213 case IPCTL_LOWPORTMIN: 1214 case IPV6CTL_LOWPORTMIN: 1215 if (tmp >= lpmax || 1216 tmp > IPPORT_RESERVEDMAX || 1217 tmp < IPPORT_RESERVEDMIN) 1218 return (EINVAL); 1219 break; 1220 1221 case IPCTL_LOWPORTMAX: 1222 case IPV6CTL_LOWPORTMAX: 1223 if (lpmin >= tmp || 1224 tmp > IPPORT_RESERVEDMAX || 1225 tmp < IPPORT_RESERVEDMIN) 1226 return (EINVAL); 1227 break; 1228 #endif /* IPNOPRIVPORTS */ 1229 1230 default: 1231 return (EINVAL); 1232 } 1233 1234 *(int*)rnode->sysctl_data = tmp; 1235 1236 return (0); 1237 } 1238 1239 static inline int 1240 copyout_uid(struct socket *sockp, void *oldp, size_t *oldlenp) 1241 { 1242 if (oldp) { 1243 size_t sz; 1244 uid_t uid; 1245 int error; 1246 1247 if (sockp->so_cred == NULL) 1248 return EPERM; 1249 1250 uid = kauth_cred_geteuid(sockp->so_cred); 1251 sz = MIN(sizeof(uid), *oldlenp); 1252 if ((error = copyout(&uid, oldp, sz)) != 0) 1253 return error; 1254 } 1255 *oldlenp = sizeof(uid_t); 1256 return 0; 1257 } 1258 1259 static inline int 1260 inet4_ident_core(struct in_addr raddr, u_int rport, 1261 struct in_addr laddr, u_int lport, 1262 void *oldp, size_t *oldlenp, 1263 struct lwp *l, int dodrop) 1264 { 1265 struct inpcb *inp; 1266 struct socket *sockp; 1267 1268 inp = in_pcblookup_connect(&tcbtable, raddr, rport, laddr, lport, 0); 1269 1270 if (inp == NULL || (sockp = inp->inp_socket) == NULL) 1271 return ESRCH; 1272 1273 if (dodrop) { 1274 struct tcpcb *tp; 1275 int error; 1276 1277 if (inp == NULL || (tp = intotcpcb(inp)) == NULL || 1278 (inp->inp_socket->so_options & SO_ACCEPTCONN) != 0) 1279 return ESRCH; 1280 1281 error = kauth_authorize_network(l->l_cred, KAUTH_NETWORK_SOCKET, 1282 KAUTH_REQ_NETWORK_SOCKET_DROP, inp->inp_socket, tp, NULL); 1283 if (error) 1284 return (error); 1285 1286 (void)tcp_drop(tp, ECONNABORTED); 1287 return 0; 1288 } 1289 else 1290 return copyout_uid(sockp, oldp, oldlenp); 1291 } 1292 1293 #ifdef INET6 1294 static inline int 1295 inet6_ident_core(struct in6_addr *raddr, u_int rport, 1296 struct in6_addr *laddr, u_int lport, 1297 void *oldp, size_t *oldlenp, 1298 struct lwp *l, int dodrop) 1299 { 1300 struct in6pcb *in6p; 1301 struct socket *sockp; 1302 1303 in6p = in6_pcblookup_connect(&tcbtable, raddr, rport, laddr, lport, 0, 0); 1304 1305 if (in6p == NULL || (sockp = in6p->in6p_socket) == NULL) 1306 return ESRCH; 1307 1308 if (dodrop) { 1309 struct tcpcb *tp; 1310 int error; 1311 1312 if (in6p == NULL || (tp = in6totcpcb(in6p)) == NULL || 1313 (in6p->in6p_socket->so_options & SO_ACCEPTCONN) != 0) 1314 return ESRCH; 1315 1316 error = kauth_authorize_network(l->l_cred, KAUTH_NETWORK_SOCKET, 1317 KAUTH_REQ_NETWORK_SOCKET_DROP, in6p->in6p_socket, tp, NULL); 1318 if (error) 1319 return (error); 1320 1321 (void)tcp_drop(tp, ECONNABORTED); 1322 return 0; 1323 } 1324 else 1325 return copyout_uid(sockp, oldp, oldlenp); 1326 } 1327 #endif 1328 1329 /* 1330 * sysctl helper routine for the net.inet.tcp.drop and 1331 * net.inet6.tcp6.drop nodes. 1332 */ 1333 #define sysctl_net_inet_tcp_drop sysctl_net_inet_tcp_ident 1334 1335 /* 1336 * sysctl helper routine for the net.inet.tcp.ident and 1337 * net.inet6.tcp6.ident nodes. contains backwards compat code for the 1338 * old way of looking up the ident information for ipv4 which involves 1339 * stuffing the port/addr pairs into the mib lookup. 1340 */ 1341 static int 1342 sysctl_net_inet_tcp_ident(SYSCTLFN_ARGS) 1343 { 1344 #ifdef INET 1345 struct sockaddr_in *si4[2]; 1346 #endif /* INET */ 1347 #ifdef INET6 1348 struct sockaddr_in6 *si6[2]; 1349 #endif /* INET6 */ 1350 struct sockaddr_storage sa[2]; 1351 int error, pf, dodrop; 1352 1353 dodrop = name[-1] == TCPCTL_DROP; 1354 if (dodrop) { 1355 if (oldp != NULL || *oldlenp != 0) 1356 return EINVAL; 1357 if (newp == NULL) 1358 return EPERM; 1359 if (newlen < sizeof(sa)) 1360 return ENOMEM; 1361 } 1362 if (namelen != 4 && namelen != 0) 1363 return EINVAL; 1364 if (name[-2] != IPPROTO_TCP) 1365 return EINVAL; 1366 pf = name[-3]; 1367 1368 /* old style lookup, ipv4 only */ 1369 if (namelen == 4) { 1370 #ifdef INET 1371 struct in_addr laddr, raddr; 1372 u_int lport, rport; 1373 1374 if (pf != PF_INET) 1375 return EPROTONOSUPPORT; 1376 raddr.s_addr = (uint32_t)name[0]; 1377 rport = (u_int)name[1]; 1378 laddr.s_addr = (uint32_t)name[2]; 1379 lport = (u_int)name[3]; 1380 1381 mutex_enter(softnet_lock); 1382 error = inet4_ident_core(raddr, rport, laddr, lport, 1383 oldp, oldlenp, l, dodrop); 1384 mutex_exit(softnet_lock); 1385 return error; 1386 #else /* INET */ 1387 return EINVAL; 1388 #endif /* INET */ 1389 } 1390 1391 if (newp == NULL || newlen != sizeof(sa)) 1392 return EINVAL; 1393 error = copyin(newp, &sa, newlen); 1394 if (error) 1395 return error; 1396 1397 /* 1398 * requested families must match 1399 */ 1400 if (pf != sa[0].ss_family || sa[0].ss_family != sa[1].ss_family) 1401 return EINVAL; 1402 1403 switch (pf) { 1404 #ifdef INET6 1405 case PF_INET6: 1406 si6[0] = (struct sockaddr_in6*)&sa[0]; 1407 si6[1] = (struct sockaddr_in6*)&sa[1]; 1408 if (si6[0]->sin6_len != sizeof(*si6[0]) || 1409 si6[1]->sin6_len != sizeof(*si6[1])) 1410 return EINVAL; 1411 1412 if (!IN6_IS_ADDR_V4MAPPED(&si6[0]->sin6_addr) && 1413 !IN6_IS_ADDR_V4MAPPED(&si6[1]->sin6_addr)) { 1414 error = sa6_embedscope(si6[0], ip6_use_defzone); 1415 if (error) 1416 return error; 1417 error = sa6_embedscope(si6[1], ip6_use_defzone); 1418 if (error) 1419 return error; 1420 1421 mutex_enter(softnet_lock); 1422 error = inet6_ident_core(&si6[0]->sin6_addr, 1423 si6[0]->sin6_port, &si6[1]->sin6_addr, 1424 si6[1]->sin6_port, oldp, oldlenp, l, dodrop); 1425 mutex_exit(softnet_lock); 1426 return error; 1427 } 1428 1429 if (IN6_IS_ADDR_V4MAPPED(&si6[0]->sin6_addr) != 1430 IN6_IS_ADDR_V4MAPPED(&si6[1]->sin6_addr)) 1431 return EINVAL; 1432 1433 in6_sin6_2_sin_in_sock((struct sockaddr *)&sa[0]); 1434 in6_sin6_2_sin_in_sock((struct sockaddr *)&sa[1]); 1435 /*FALLTHROUGH*/ 1436 #endif /* INET6 */ 1437 #ifdef INET 1438 case PF_INET: 1439 si4[0] = (struct sockaddr_in*)&sa[0]; 1440 si4[1] = (struct sockaddr_in*)&sa[1]; 1441 if (si4[0]->sin_len != sizeof(*si4[0]) || 1442 si4[0]->sin_len != sizeof(*si4[1])) 1443 return EINVAL; 1444 1445 mutex_enter(softnet_lock); 1446 error = inet4_ident_core(si4[0]->sin_addr, si4[0]->sin_port, 1447 si4[1]->sin_addr, si4[1]->sin_port, 1448 oldp, oldlenp, l, dodrop); 1449 mutex_exit(softnet_lock); 1450 return error; 1451 #endif /* INET */ 1452 default: 1453 return EPROTONOSUPPORT; 1454 } 1455 } 1456 1457 /* 1458 * sysctl helper for the inet and inet6 pcblists. handles tcp/udp and 1459 * inet/inet6, as well as raw pcbs for each. specifically not 1460 * declared static so that raw sockets and udp/udp6 can use it as 1461 * well. 1462 */ 1463 int 1464 sysctl_inpcblist(SYSCTLFN_ARGS) 1465 { 1466 #ifdef INET 1467 struct sockaddr_in *in; 1468 const struct inpcb *inp; 1469 #endif 1470 #ifdef INET6 1471 struct sockaddr_in6 *in6; 1472 const struct in6pcb *in6p; 1473 #endif 1474 struct inpcbtable *pcbtbl = __UNCONST(rnode->sysctl_data); 1475 const struct inpcb_hdr *inph; 1476 struct tcpcb *tp; 1477 struct kinfo_pcb pcb; 1478 char *dp; 1479 size_t len, needed, elem_size, out_size; 1480 int error, elem_count, pf, proto, pf2; 1481 1482 if (namelen != 4) 1483 return (EINVAL); 1484 1485 if (oldp != NULL) { 1486 len = *oldlenp; 1487 elem_size = name[2]; 1488 elem_count = name[3]; 1489 if (elem_size != sizeof(pcb)) 1490 return EINVAL; 1491 } else { 1492 len = 0; 1493 elem_count = INT_MAX; 1494 elem_size = sizeof(pcb); 1495 } 1496 error = 0; 1497 dp = oldp; 1498 out_size = elem_size; 1499 needed = 0; 1500 1501 if (namelen == 1 && name[0] == CTL_QUERY) 1502 return (sysctl_query(SYSCTLFN_CALL(rnode))); 1503 1504 if (name - oname != 4) 1505 return (EINVAL); 1506 1507 pf = oname[1]; 1508 proto = oname[2]; 1509 pf2 = (oldp != NULL) ? pf : 0; 1510 1511 mutex_enter(softnet_lock); 1512 1513 TAILQ_FOREACH(inph, &pcbtbl->inpt_queue, inph_queue) { 1514 #ifdef INET 1515 inp = (const struct inpcb *)inph; 1516 #endif 1517 #ifdef INET6 1518 in6p = (const struct in6pcb *)inph; 1519 #endif 1520 1521 if (inph->inph_af != pf) 1522 continue; 1523 1524 if (kauth_authorize_network(l->l_cred, KAUTH_NETWORK_SOCKET, 1525 KAUTH_REQ_NETWORK_SOCKET_CANSEE, inph->inph_socket, NULL, 1526 NULL) != 0) 1527 continue; 1528 1529 memset(&pcb, 0, sizeof(pcb)); 1530 1531 pcb.ki_family = pf; 1532 pcb.ki_type = proto; 1533 1534 switch (pf2) { 1535 case 0: 1536 /* just probing for size */ 1537 break; 1538 #ifdef INET 1539 case PF_INET: 1540 pcb.ki_family = inp->inp_socket->so_proto-> 1541 pr_domain->dom_family; 1542 pcb.ki_type = inp->inp_socket->so_proto-> 1543 pr_type; 1544 pcb.ki_protocol = inp->inp_socket->so_proto-> 1545 pr_protocol; 1546 pcb.ki_pflags = inp->inp_flags; 1547 1548 pcb.ki_sostate = inp->inp_socket->so_state; 1549 pcb.ki_prstate = inp->inp_state; 1550 if (proto == IPPROTO_TCP) { 1551 tp = intotcpcb(inp); 1552 pcb.ki_tstate = tp->t_state; 1553 pcb.ki_tflags = tp->t_flags; 1554 } 1555 1556 pcb.ki_pcbaddr = PTRTOUINT64(inp); 1557 pcb.ki_ppcbaddr = PTRTOUINT64(inp->inp_ppcb); 1558 pcb.ki_sockaddr = PTRTOUINT64(inp->inp_socket); 1559 1560 pcb.ki_rcvq = inp->inp_socket->so_rcv.sb_cc; 1561 pcb.ki_sndq = inp->inp_socket->so_snd.sb_cc; 1562 1563 in = satosin(&pcb.ki_src); 1564 in->sin_len = sizeof(*in); 1565 in->sin_family = pf; 1566 in->sin_port = inp->inp_lport; 1567 in->sin_addr = inp->inp_laddr; 1568 if (pcb.ki_prstate >= INP_CONNECTED) { 1569 in = satosin(&pcb.ki_dst); 1570 in->sin_len = sizeof(*in); 1571 in->sin_family = pf; 1572 in->sin_port = inp->inp_fport; 1573 in->sin_addr = inp->inp_faddr; 1574 } 1575 break; 1576 #endif 1577 #ifdef INET6 1578 case PF_INET6: 1579 pcb.ki_family = in6p->in6p_socket->so_proto-> 1580 pr_domain->dom_family; 1581 pcb.ki_type = in6p->in6p_socket->so_proto->pr_type; 1582 pcb.ki_protocol = in6p->in6p_socket->so_proto-> 1583 pr_protocol; 1584 pcb.ki_pflags = in6p->in6p_flags; 1585 1586 pcb.ki_sostate = in6p->in6p_socket->so_state; 1587 pcb.ki_prstate = in6p->in6p_state; 1588 if (proto == IPPROTO_TCP) { 1589 tp = in6totcpcb(in6p); 1590 pcb.ki_tstate = tp->t_state; 1591 pcb.ki_tflags = tp->t_flags; 1592 } 1593 1594 pcb.ki_pcbaddr = PTRTOUINT64(in6p); 1595 pcb.ki_ppcbaddr = PTRTOUINT64(in6p->in6p_ppcb); 1596 pcb.ki_sockaddr = PTRTOUINT64(in6p->in6p_socket); 1597 1598 pcb.ki_rcvq = in6p->in6p_socket->so_rcv.sb_cc; 1599 pcb.ki_sndq = in6p->in6p_socket->so_snd.sb_cc; 1600 1601 in6 = satosin6(&pcb.ki_src); 1602 in6->sin6_len = sizeof(*in6); 1603 in6->sin6_family = pf; 1604 in6->sin6_port = in6p->in6p_lport; 1605 in6->sin6_flowinfo = in6p->in6p_flowinfo; 1606 in6->sin6_addr = in6p->in6p_laddr; 1607 in6->sin6_scope_id = 0; /* XXX? */ 1608 1609 if (pcb.ki_prstate >= IN6P_CONNECTED) { 1610 in6 = satosin6(&pcb.ki_dst); 1611 in6->sin6_len = sizeof(*in6); 1612 in6->sin6_family = pf; 1613 in6->sin6_port = in6p->in6p_fport; 1614 in6->sin6_flowinfo = in6p->in6p_flowinfo; 1615 in6->sin6_addr = in6p->in6p_faddr; 1616 in6->sin6_scope_id = 0; /* XXX? */ 1617 } 1618 break; 1619 #endif 1620 } 1621 1622 if (len >= elem_size && elem_count > 0) { 1623 error = copyout(&pcb, dp, out_size); 1624 if (error) { 1625 mutex_exit(softnet_lock); 1626 return (error); 1627 } 1628 dp += elem_size; 1629 len -= elem_size; 1630 } 1631 needed += elem_size; 1632 if (elem_count > 0 && elem_count != INT_MAX) 1633 elem_count--; 1634 } 1635 1636 *oldlenp = needed; 1637 if (oldp == NULL) 1638 *oldlenp += PCB_SLOP * sizeof(struct kinfo_pcb); 1639 1640 mutex_exit(softnet_lock); 1641 1642 return (error); 1643 } 1644 1645 static int 1646 sysctl_tcp_congctl(SYSCTLFN_ARGS) 1647 { 1648 struct sysctlnode node; 1649 int error; 1650 char newname[TCPCC_MAXLEN]; 1651 1652 strlcpy(newname, tcp_congctl_global_name, sizeof(newname) - 1); 1653 1654 node = *rnode; 1655 node.sysctl_data = newname; 1656 node.sysctl_size = sizeof(newname); 1657 1658 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 1659 1660 if (error || 1661 newp == NULL || 1662 strncmp(newname, tcp_congctl_global_name, sizeof(newname)) == 0) 1663 return error; 1664 1665 mutex_enter(softnet_lock); 1666 error = tcp_congctl_select(NULL, newname); 1667 mutex_exit(softnet_lock); 1668 1669 return error; 1670 } 1671 1672 static int 1673 sysctl_tcp_init_win(SYSCTLFN_ARGS) 1674 { 1675 int error; 1676 u_int iw; 1677 struct sysctlnode node; 1678 1679 iw = *(u_int *)rnode->sysctl_data; 1680 node = *rnode; 1681 node.sysctl_data = &iw; 1682 node.sysctl_size = sizeof(iw); 1683 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 1684 if (error || newp == NULL) 1685 return error; 1686 1687 if (iw >= __arraycount(tcp_init_win_max)) 1688 return EINVAL; 1689 *(u_int *)rnode->sysctl_data = iw; 1690 return 0; 1691 } 1692 1693 static int 1694 sysctl_tcp_keep(SYSCTLFN_ARGS) 1695 { 1696 int error; 1697 u_int tmp; 1698 struct sysctlnode node; 1699 1700 node = *rnode; 1701 tmp = *(u_int *)rnode->sysctl_data; 1702 node.sysctl_data = &tmp; 1703 1704 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 1705 if (error || newp == NULL) 1706 return error; 1707 1708 mutex_enter(softnet_lock); 1709 1710 *(u_int *)rnode->sysctl_data = tmp; 1711 tcp_tcpcb_template(); /* update the template */ 1712 1713 mutex_exit(softnet_lock); 1714 return 0; 1715 } 1716 1717 static int 1718 sysctl_net_inet_tcp_stats(SYSCTLFN_ARGS) 1719 { 1720 1721 return (NETSTAT_SYSCTL(tcpstat_percpu, TCP_NSTATS)); 1722 } 1723 1724 /* 1725 * this (second stage) setup routine is a replacement for tcp_sysctl() 1726 * (which is currently used for ipv4 and ipv6) 1727 */ 1728 static void 1729 sysctl_net_inet_tcp_setup2(struct sysctllog **clog, int pf, const char *pfname, 1730 const char *tcpname) 1731 { 1732 const struct sysctlnode *sack_node; 1733 const struct sysctlnode *abc_node; 1734 const struct sysctlnode *ecn_node; 1735 const struct sysctlnode *congctl_node; 1736 const struct sysctlnode *mslt_node; 1737 const struct sysctlnode *vtw_node; 1738 #ifdef TCP_DEBUG 1739 extern struct tcp_debug tcp_debug[TCP_NDEBUG]; 1740 extern int tcp_debx; 1741 #endif 1742 1743 sysctl_createv(clog, 0, NULL, NULL, 1744 CTLFLAG_PERMANENT, 1745 CTLTYPE_NODE, pfname, NULL, 1746 NULL, 0, NULL, 0, 1747 CTL_NET, pf, CTL_EOL); 1748 sysctl_createv(clog, 0, NULL, NULL, 1749 CTLFLAG_PERMANENT, 1750 CTLTYPE_NODE, tcpname, 1751 SYSCTL_DESCR("TCP related settings"), 1752 NULL, 0, NULL, 0, 1753 CTL_NET, pf, IPPROTO_TCP, CTL_EOL); 1754 1755 sysctl_createv(clog, 0, NULL, NULL, 1756 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1757 CTLTYPE_INT, "rfc1323", 1758 SYSCTL_DESCR("Enable RFC1323 TCP extensions"), 1759 sysctl_update_tcpcb_template, 0, &tcp_do_rfc1323, 0, 1760 CTL_NET, pf, IPPROTO_TCP, TCPCTL_RFC1323, CTL_EOL); 1761 sysctl_createv(clog, 0, NULL, NULL, 1762 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1763 CTLTYPE_INT, "sendspace", 1764 SYSCTL_DESCR("Default TCP send buffer size"), 1765 NULL, 0, &tcp_sendspace, 0, 1766 CTL_NET, pf, IPPROTO_TCP, TCPCTL_SENDSPACE, CTL_EOL); 1767 sysctl_createv(clog, 0, NULL, NULL, 1768 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1769 CTLTYPE_INT, "recvspace", 1770 SYSCTL_DESCR("Default TCP receive buffer size"), 1771 NULL, 0, &tcp_recvspace, 0, 1772 CTL_NET, pf, IPPROTO_TCP, TCPCTL_RECVSPACE, CTL_EOL); 1773 sysctl_createv(clog, 0, NULL, NULL, 1774 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1775 CTLTYPE_INT, "mssdflt", 1776 SYSCTL_DESCR("Default maximum segment size"), 1777 sysctl_net_inet_tcp_mssdflt, 0, &tcp_mssdflt, 0, 1778 CTL_NET, pf, IPPROTO_TCP, TCPCTL_MSSDFLT, CTL_EOL); 1779 sysctl_createv(clog, 0, NULL, NULL, 1780 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1781 CTLTYPE_INT, "minmss", 1782 SYSCTL_DESCR("Lower limit for TCP maximum segment size"), 1783 NULL, 0, &tcp_minmss, 0, 1784 CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL); 1785 sysctl_createv(clog, 0, NULL, NULL, 1786 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1787 CTLTYPE_INT, "msl", 1788 SYSCTL_DESCR("Maximum Segment Life"), 1789 NULL, 0, &tcp_msl, 0, 1790 CTL_NET, pf, IPPROTO_TCP, TCPCTL_MSL, CTL_EOL); 1791 sysctl_createv(clog, 0, NULL, NULL, 1792 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1793 CTLTYPE_INT, "syn_cache_limit", 1794 SYSCTL_DESCR("Maximum number of entries in the TCP " 1795 "compressed state engine"), 1796 NULL, 0, &tcp_syn_cache_limit, 0, 1797 CTL_NET, pf, IPPROTO_TCP, TCPCTL_SYN_CACHE_LIMIT, 1798 CTL_EOL); 1799 sysctl_createv(clog, 0, NULL, NULL, 1800 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1801 CTLTYPE_INT, "syn_bucket_limit", 1802 SYSCTL_DESCR("Maximum number of entries per hash " 1803 "bucket in the TCP compressed state " 1804 "engine"), 1805 NULL, 0, &tcp_syn_bucket_limit, 0, 1806 CTL_NET, pf, IPPROTO_TCP, TCPCTL_SYN_BUCKET_LIMIT, 1807 CTL_EOL); 1808 #if 0 /* obsoleted */ 1809 sysctl_createv(clog, 0, NULL, NULL, 1810 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1811 CTLTYPE_INT, "syn_cache_interval", 1812 SYSCTL_DESCR("TCP compressed state engine's timer interval"), 1813 NULL, 0, &tcp_syn_cache_interval, 0, 1814 CTL_NET, pf, IPPROTO_TCP, TCPCTL_SYN_CACHE_INTER, 1815 CTL_EOL); 1816 #endif 1817 sysctl_createv(clog, 0, NULL, NULL, 1818 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1819 CTLTYPE_INT, "init_win", 1820 SYSCTL_DESCR("Initial TCP congestion window"), 1821 sysctl_tcp_init_win, 0, &tcp_init_win, 0, 1822 CTL_NET, pf, IPPROTO_TCP, TCPCTL_INIT_WIN, CTL_EOL); 1823 sysctl_createv(clog, 0, NULL, NULL, 1824 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1825 CTLTYPE_INT, "mss_ifmtu", 1826 SYSCTL_DESCR("Use interface MTU for calculating MSS"), 1827 NULL, 0, &tcp_mss_ifmtu, 0, 1828 CTL_NET, pf, IPPROTO_TCP, TCPCTL_MSS_IFMTU, CTL_EOL); 1829 sysctl_createv(clog, 0, NULL, &sack_node, 1830 CTLFLAG_PERMANENT, 1831 CTLTYPE_NODE, "sack", 1832 SYSCTL_DESCR("RFC2018 Selective ACKnowledgement tunables"), 1833 NULL, 0, NULL, 0, 1834 CTL_NET, pf, IPPROTO_TCP, TCPCTL_SACK, CTL_EOL); 1835 1836 /* Congctl subtree */ 1837 sysctl_createv(clog, 0, NULL, &congctl_node, 1838 CTLFLAG_PERMANENT, 1839 CTLTYPE_NODE, "congctl", 1840 SYSCTL_DESCR("TCP Congestion Control"), 1841 NULL, 0, NULL, 0, 1842 CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL); 1843 sysctl_createv(clog, 0, &congctl_node, NULL, 1844 CTLFLAG_PERMANENT, 1845 CTLTYPE_STRING, "available", 1846 SYSCTL_DESCR("Available Congestion Control Mechanisms"), 1847 NULL, 0, tcp_congctl_avail, 0, CTL_CREATE, CTL_EOL); 1848 sysctl_createv(clog, 0, &congctl_node, NULL, 1849 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1850 CTLTYPE_STRING, "selected", 1851 SYSCTL_DESCR("Selected Congestion Control Mechanism"), 1852 sysctl_tcp_congctl, 0, NULL, TCPCC_MAXLEN, 1853 CTL_CREATE, CTL_EOL); 1854 1855 sysctl_createv(clog, 0, NULL, NULL, 1856 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1857 CTLTYPE_INT, "win_scale", 1858 SYSCTL_DESCR("Use RFC1323 window scale options"), 1859 sysctl_update_tcpcb_template, 0, &tcp_do_win_scale, 0, 1860 CTL_NET, pf, IPPROTO_TCP, TCPCTL_WSCALE, CTL_EOL); 1861 sysctl_createv(clog, 0, NULL, NULL, 1862 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1863 CTLTYPE_INT, "timestamps", 1864 SYSCTL_DESCR("Use RFC1323 time stamp options"), 1865 sysctl_update_tcpcb_template, 0, &tcp_do_timestamps, 0, 1866 CTL_NET, pf, IPPROTO_TCP, TCPCTL_TSTAMP, CTL_EOL); 1867 sysctl_createv(clog, 0, NULL, NULL, 1868 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1869 CTLTYPE_INT, "compat_42", 1870 SYSCTL_DESCR("Enable workarounds for 4.2BSD TCP bugs"), 1871 NULL, 0, &tcp_compat_42, 0, 1872 CTL_NET, pf, IPPROTO_TCP, TCPCTL_COMPAT_42, CTL_EOL); 1873 sysctl_createv(clog, 0, NULL, NULL, 1874 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1875 CTLTYPE_INT, "cwm", 1876 SYSCTL_DESCR("Hughes/Touch/Heidemann Congestion Window " 1877 "Monitoring"), 1878 NULL, 0, &tcp_cwm, 0, 1879 CTL_NET, pf, IPPROTO_TCP, TCPCTL_CWM, CTL_EOL); 1880 sysctl_createv(clog, 0, NULL, NULL, 1881 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1882 CTLTYPE_INT, "cwm_burstsize", 1883 SYSCTL_DESCR("Congestion Window Monitoring allowed " 1884 "burst count in packets"), 1885 NULL, 0, &tcp_cwm_burstsize, 0, 1886 CTL_NET, pf, IPPROTO_TCP, TCPCTL_CWM_BURSTSIZE, 1887 CTL_EOL); 1888 sysctl_createv(clog, 0, NULL, NULL, 1889 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1890 CTLTYPE_INT, "ack_on_push", 1891 SYSCTL_DESCR("Immediately return ACK when PSH is " 1892 "received"), 1893 NULL, 0, &tcp_ack_on_push, 0, 1894 CTL_NET, pf, IPPROTO_TCP, TCPCTL_ACK_ON_PUSH, CTL_EOL); 1895 sysctl_createv(clog, 0, NULL, NULL, 1896 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1897 CTLTYPE_INT, "keepidle", 1898 SYSCTL_DESCR("Allowed connection idle ticks before a " 1899 "keepalive probe is sent"), 1900 sysctl_tcp_keep, 0, &tcp_keepidle, 0, 1901 CTL_NET, pf, IPPROTO_TCP, TCPCTL_KEEPIDLE, CTL_EOL); 1902 sysctl_createv(clog, 0, NULL, NULL, 1903 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1904 CTLTYPE_INT, "keepintvl", 1905 SYSCTL_DESCR("Ticks before next keepalive probe is sent"), 1906 sysctl_tcp_keep, 0, &tcp_keepintvl, 0, 1907 CTL_NET, pf, IPPROTO_TCP, TCPCTL_KEEPINTVL, CTL_EOL); 1908 sysctl_createv(clog, 0, NULL, NULL, 1909 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1910 CTLTYPE_INT, "keepcnt", 1911 SYSCTL_DESCR("Number of keepalive probes to send"), 1912 sysctl_tcp_keep, 0, &tcp_keepcnt, 0, 1913 CTL_NET, pf, IPPROTO_TCP, TCPCTL_KEEPCNT, CTL_EOL); 1914 sysctl_createv(clog, 0, NULL, NULL, 1915 CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE, 1916 CTLTYPE_INT, "slowhz", 1917 SYSCTL_DESCR("Keepalive ticks per second"), 1918 NULL, PR_SLOWHZ, NULL, 0, 1919 CTL_NET, pf, IPPROTO_TCP, TCPCTL_SLOWHZ, CTL_EOL); 1920 sysctl_createv(clog, 0, NULL, NULL, 1921 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1922 CTLTYPE_INT, "log_refused", 1923 SYSCTL_DESCR("Log refused TCP connections"), 1924 NULL, 0, &tcp_log_refused, 0, 1925 CTL_NET, pf, IPPROTO_TCP, TCPCTL_LOG_REFUSED, CTL_EOL); 1926 #if 0 /* obsoleted */ 1927 sysctl_createv(clog, 0, NULL, NULL, 1928 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1929 CTLTYPE_INT, "rstratelimit", NULL, 1930 NULL, 0, &tcp_rst_ratelim, 0, 1931 CTL_NET, pf, IPPROTO_TCP, TCPCTL_RSTRATELIMIT, CTL_EOL); 1932 #endif 1933 sysctl_createv(clog, 0, NULL, NULL, 1934 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1935 CTLTYPE_INT, "rstppslimit", 1936 SYSCTL_DESCR("Maximum number of RST packets to send " 1937 "per second"), 1938 NULL, 0, &tcp_rst_ppslim, 0, 1939 CTL_NET, pf, IPPROTO_TCP, TCPCTL_RSTPPSLIMIT, CTL_EOL); 1940 sysctl_createv(clog, 0, NULL, NULL, 1941 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1942 CTLTYPE_INT, "delack_ticks", 1943 SYSCTL_DESCR("Number of ticks to delay sending an ACK"), 1944 NULL, 0, &tcp_delack_ticks, 0, 1945 CTL_NET, pf, IPPROTO_TCP, TCPCTL_DELACK_TICKS, CTL_EOL); 1946 sysctl_createv(clog, 0, NULL, NULL, 1947 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1948 CTLTYPE_INT, "init_win_local", 1949 SYSCTL_DESCR("Initial TCP window size (in segments)"), 1950 sysctl_tcp_init_win, 0, &tcp_init_win_local, 0, 1951 CTL_NET, pf, IPPROTO_TCP, TCPCTL_INIT_WIN_LOCAL, 1952 CTL_EOL); 1953 sysctl_createv(clog, 0, NULL, NULL, 1954 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1955 CTLTYPE_STRUCT, "ident", 1956 SYSCTL_DESCR("RFC1413 Identification Protocol lookups"), 1957 sysctl_net_inet_tcp_ident, 0, NULL, sizeof(uid_t), 1958 CTL_NET, pf, IPPROTO_TCP, TCPCTL_IDENT, CTL_EOL); 1959 sysctl_createv(clog, 0, NULL, NULL, 1960 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1961 CTLTYPE_INT, "do_loopback_cksum", 1962 SYSCTL_DESCR("Perform TCP checksum on loopback"), 1963 NULL, 0, &tcp_do_loopback_cksum, 0, 1964 CTL_NET, pf, IPPROTO_TCP, TCPCTL_LOOPBACKCKSUM, 1965 CTL_EOL); 1966 sysctl_createv(clog, 0, NULL, NULL, 1967 CTLFLAG_PERMANENT, 1968 CTLTYPE_STRUCT, "pcblist", 1969 SYSCTL_DESCR("TCP protocol control block list"), 1970 sysctl_inpcblist, 0, &tcbtable, 0, 1971 CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, 1972 CTL_EOL); 1973 sysctl_createv(clog, 0, NULL, NULL, 1974 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1975 CTLTYPE_INT, "keepinit", 1976 SYSCTL_DESCR("Ticks before initial tcp connection times out"), 1977 sysctl_tcp_keep, 0, &tcp_keepinit, 0, 1978 CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL); 1979 1980 /* TCP socket buffers auto-sizing nodes */ 1981 sysctl_createv(clog, 0, NULL, NULL, 1982 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1983 CTLTYPE_INT, "recvbuf_auto", 1984 SYSCTL_DESCR("Enable automatic receive " 1985 "buffer sizing (experimental)"), 1986 NULL, 0, &tcp_do_autorcvbuf, 0, 1987 CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL); 1988 sysctl_createv(clog, 0, NULL, NULL, 1989 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1990 CTLTYPE_INT, "recvbuf_inc", 1991 SYSCTL_DESCR("Incrementor step size of " 1992 "automatic receive buffer"), 1993 NULL, 0, &tcp_autorcvbuf_inc, 0, 1994 CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL); 1995 sysctl_createv(clog, 0, NULL, NULL, 1996 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1997 CTLTYPE_INT, "recvbuf_max", 1998 SYSCTL_DESCR("Max size of automatic receive buffer"), 1999 NULL, 0, &tcp_autorcvbuf_max, 0, 2000 CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL); 2001 2002 sysctl_createv(clog, 0, NULL, NULL, 2003 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2004 CTLTYPE_INT, "sendbuf_auto", 2005 SYSCTL_DESCR("Enable automatic send " 2006 "buffer sizing (experimental)"), 2007 NULL, 0, &tcp_do_autosndbuf, 0, 2008 CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL); 2009 sysctl_createv(clog, 0, NULL, NULL, 2010 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2011 CTLTYPE_INT, "sendbuf_inc", 2012 SYSCTL_DESCR("Incrementor step size of " 2013 "automatic send buffer"), 2014 NULL, 0, &tcp_autosndbuf_inc, 0, 2015 CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL); 2016 sysctl_createv(clog, 0, NULL, NULL, 2017 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2018 CTLTYPE_INT, "sendbuf_max", 2019 SYSCTL_DESCR("Max size of automatic send buffer"), 2020 NULL, 0, &tcp_autosndbuf_max, 0, 2021 CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL); 2022 2023 /* ECN subtree */ 2024 sysctl_createv(clog, 0, NULL, &ecn_node, 2025 CTLFLAG_PERMANENT, 2026 CTLTYPE_NODE, "ecn", 2027 SYSCTL_DESCR("RFC3168 Explicit Congestion Notification"), 2028 NULL, 0, NULL, 0, 2029 CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL); 2030 sysctl_createv(clog, 0, &ecn_node, NULL, 2031 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2032 CTLTYPE_INT, "enable", 2033 SYSCTL_DESCR("Enable TCP Explicit Congestion " 2034 "Notification"), 2035 NULL, 0, &tcp_do_ecn, 0, CTL_CREATE, CTL_EOL); 2036 sysctl_createv(clog, 0, &ecn_node, NULL, 2037 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2038 CTLTYPE_INT, "maxretries", 2039 SYSCTL_DESCR("Number of times to retry ECN setup " 2040 "before disabling ECN on the connection"), 2041 NULL, 0, &tcp_ecn_maxretries, 0, CTL_CREATE, CTL_EOL); 2042 2043 /* SACK gets it's own little subtree. */ 2044 sysctl_createv(clog, 0, NULL, &sack_node, 2045 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2046 CTLTYPE_INT, "enable", 2047 SYSCTL_DESCR("Enable RFC2018 Selective ACKnowledgement"), 2048 NULL, 0, &tcp_do_sack, 0, 2049 CTL_NET, pf, IPPROTO_TCP, TCPCTL_SACK, CTL_CREATE, CTL_EOL); 2050 sysctl_createv(clog, 0, NULL, &sack_node, 2051 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2052 CTLTYPE_INT, "maxholes", 2053 SYSCTL_DESCR("Maximum number of TCP SACK holes allowed per connection"), 2054 NULL, 0, &tcp_sack_tp_maxholes, 0, 2055 CTL_NET, pf, IPPROTO_TCP, TCPCTL_SACK, CTL_CREATE, CTL_EOL); 2056 sysctl_createv(clog, 0, NULL, &sack_node, 2057 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2058 CTLTYPE_INT, "globalmaxholes", 2059 SYSCTL_DESCR("Global maximum number of TCP SACK holes"), 2060 NULL, 0, &tcp_sack_globalmaxholes, 0, 2061 CTL_NET, pf, IPPROTO_TCP, TCPCTL_SACK, CTL_CREATE, CTL_EOL); 2062 sysctl_createv(clog, 0, NULL, &sack_node, 2063 CTLFLAG_PERMANENT, 2064 CTLTYPE_INT, "globalholes", 2065 SYSCTL_DESCR("Global number of TCP SACK holes"), 2066 NULL, 0, &tcp_sack_globalholes, 0, 2067 CTL_NET, pf, IPPROTO_TCP, TCPCTL_SACK, CTL_CREATE, CTL_EOL); 2068 2069 sysctl_createv(clog, 0, NULL, NULL, 2070 CTLFLAG_PERMANENT, 2071 CTLTYPE_STRUCT, "stats", 2072 SYSCTL_DESCR("TCP statistics"), 2073 sysctl_net_inet_tcp_stats, 0, NULL, 0, 2074 CTL_NET, pf, IPPROTO_TCP, TCPCTL_STATS, 2075 CTL_EOL); 2076 sysctl_createv(clog, 0, NULL, NULL, 2077 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2078 CTLTYPE_INT, "local_by_rtt", 2079 SYSCTL_DESCR("Use RTT estimator to decide which hosts " 2080 "are local"), 2081 NULL, 0, &tcp_rttlocal, 0, 2082 CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL); 2083 #ifdef TCP_DEBUG 2084 sysctl_createv(clog, 0, NULL, NULL, 2085 CTLFLAG_PERMANENT, 2086 CTLTYPE_STRUCT, "debug", 2087 SYSCTL_DESCR("TCP sockets debug information"), 2088 NULL, 0, &tcp_debug, sizeof(tcp_debug), 2089 CTL_NET, pf, IPPROTO_TCP, TCPCTL_DEBUG, 2090 CTL_EOL); 2091 sysctl_createv(clog, 0, NULL, NULL, 2092 CTLFLAG_PERMANENT, 2093 CTLTYPE_INT, "debx", 2094 SYSCTL_DESCR("Number of TCP debug sockets messages"), 2095 NULL, 0, &tcp_debx, sizeof(tcp_debx), 2096 CTL_NET, pf, IPPROTO_TCP, TCPCTL_DEBX, 2097 CTL_EOL); 2098 #endif 2099 sysctl_createv(clog, 0, NULL, NULL, 2100 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2101 CTLTYPE_STRUCT, "drop", 2102 SYSCTL_DESCR("TCP drop connection"), 2103 sysctl_net_inet_tcp_drop, 0, NULL, 0, 2104 CTL_NET, pf, IPPROTO_TCP, TCPCTL_DROP, CTL_EOL); 2105 sysctl_createv(clog, 0, NULL, NULL, 2106 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2107 CTLTYPE_INT, "iss_hash", 2108 SYSCTL_DESCR("Enable RFC 1948 ISS by cryptographic " 2109 "hash computation"), 2110 NULL, 0, &tcp_do_rfc1948, sizeof(tcp_do_rfc1948), 2111 CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, 2112 CTL_EOL); 2113 2114 /* ABC subtree */ 2115 2116 sysctl_createv(clog, 0, NULL, &abc_node, 2117 CTLFLAG_PERMANENT, CTLTYPE_NODE, "abc", 2118 SYSCTL_DESCR("RFC3465 Appropriate Byte Counting (ABC)"), 2119 NULL, 0, NULL, 0, 2120 CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL); 2121 sysctl_createv(clog, 0, &abc_node, NULL, 2122 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2123 CTLTYPE_INT, "enable", 2124 SYSCTL_DESCR("Enable RFC3465 Appropriate Byte Counting"), 2125 NULL, 0, &tcp_do_abc, 0, CTL_CREATE, CTL_EOL); 2126 sysctl_createv(clog, 0, &abc_node, NULL, 2127 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2128 CTLTYPE_INT, "aggressive", 2129 SYSCTL_DESCR("1: L=2*SMSS 0: L=1*SMSS"), 2130 NULL, 0, &tcp_abc_aggressive, 0, CTL_CREATE, CTL_EOL); 2131 2132 /* MSL tuning subtree */ 2133 2134 sysctl_createv(clog, 0, NULL, &mslt_node, 2135 CTLFLAG_PERMANENT, CTLTYPE_NODE, "mslt", 2136 SYSCTL_DESCR("MSL Tuning for TIME_WAIT truncation"), 2137 NULL, 0, NULL, 0, 2138 CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL); 2139 sysctl_createv(clog, 0, &mslt_node, NULL, 2140 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2141 CTLTYPE_INT, "enable", 2142 SYSCTL_DESCR("Enable TIME_WAIT truncation"), 2143 NULL, 0, &tcp_msl_enable, 0, CTL_CREATE, CTL_EOL); 2144 sysctl_createv(clog, 0, &mslt_node, NULL, 2145 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2146 CTLTYPE_INT, "loopback", 2147 SYSCTL_DESCR("MSL value to use for loopback connections"), 2148 NULL, 0, &tcp_msl_loop, 0, CTL_CREATE, CTL_EOL); 2149 sysctl_createv(clog, 0, &mslt_node, NULL, 2150 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2151 CTLTYPE_INT, "local", 2152 SYSCTL_DESCR("MSL value to use for local connections"), 2153 NULL, 0, &tcp_msl_local, 0, CTL_CREATE, CTL_EOL); 2154 sysctl_createv(clog, 0, &mslt_node, NULL, 2155 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2156 CTLTYPE_INT, "remote", 2157 SYSCTL_DESCR("MSL value to use for remote connections"), 2158 NULL, 0, &tcp_msl_remote, 0, CTL_CREATE, CTL_EOL); 2159 sysctl_createv(clog, 0, &mslt_node, NULL, 2160 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2161 CTLTYPE_INT, "remote_threshold", 2162 SYSCTL_DESCR("RTT estimate value to promote local to remote"), 2163 NULL, 0, &tcp_msl_remote_threshold, 0, CTL_CREATE, CTL_EOL); 2164 2165 /* vestigial TIME_WAIT tuning subtree */ 2166 2167 sysctl_createv(clog, 0, NULL, &vtw_node, 2168 CTLFLAG_PERMANENT, CTLTYPE_NODE, "vtw", 2169 SYSCTL_DESCR("Tuning for Vestigial TIME_WAIT"), 2170 NULL, 0, NULL, 0, 2171 CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL); 2172 sysctl_createv(clog, 0, &vtw_node, NULL, 2173 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 2174 CTLTYPE_INT, "enable", 2175 SYSCTL_DESCR("Enable Vestigial TIME_WAIT"), 2176 sysctl_tcp_vtw_enable, 0, 2177 (pf == AF_INET) ? &tcp4_vtw_enable : &tcp6_vtw_enable, 2178 0, CTL_CREATE, CTL_EOL); 2179 sysctl_createv(clog, 0, &vtw_node, NULL, 2180 CTLFLAG_PERMANENT|CTLFLAG_READONLY, 2181 CTLTYPE_INT, "entries", 2182 SYSCTL_DESCR("Maximum number of vestigial TIME_WAIT entries"), 2183 NULL, 0, &tcp_vtw_entries, 0, CTL_CREATE, CTL_EOL); 2184 } 2185 2186 void 2187 tcp_usrreq_init(void) 2188 { 2189 2190 #ifdef INET 2191 sysctl_net_inet_tcp_setup2(NULL, PF_INET, "inet", "tcp"); 2192 #endif 2193 #ifdef INET6 2194 sysctl_net_inet_tcp_setup2(NULL, PF_INET6, "inet6", "tcp6"); 2195 #endif 2196 } 2197 2198 PR_WRAP_USRREQS(tcp) 2199 #define tcp_attach tcp_attach_wrapper 2200 #define tcp_detach tcp_detach_wrapper 2201 #define tcp_ioctl tcp_ioctl_wrapper 2202 #define tcp_stat tcp_stat_wrapper 2203 #define tcp_usrreq tcp_usrreq_wrapper 2204 2205 const struct pr_usrreqs tcp_usrreqs = { 2206 .pr_attach = tcp_attach, 2207 .pr_detach = tcp_detach, 2208 .pr_ioctl = tcp_ioctl, 2209 .pr_stat = tcp_stat, 2210 .pr_generic = tcp_usrreq, 2211 }; 2212