1 /* $NetBSD: tcp_usrreq.c,v 1.74 2002/10/22 03:14:16 simonb Exp $ */ 2 3 /* 4 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the project nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 /*- 33 * Copyright (c) 1997, 1998 The NetBSD Foundation, Inc. 34 * All rights reserved. 35 * 36 * This code is derived from software contributed to The NetBSD Foundation 37 * by Jason R. Thorpe and Kevin M. Lahey of the Numerical Aerospace Simulation 38 * Facility, NASA Ames Research Center. 39 * 40 * Redistribution and use in source and binary forms, with or without 41 * modification, are permitted provided that the following conditions 42 * are met: 43 * 1. Redistributions of source code must retain the above copyright 44 * notice, this list of conditions and the following disclaimer. 45 * 2. Redistributions in binary form must reproduce the above copyright 46 * notice, this list of conditions and the following disclaimer in the 47 * documentation and/or other materials provided with the distribution. 48 * 3. All advertising materials mentioning features or use of this software 49 * must display the following acknowledgement: 50 * This product includes software developed by the NetBSD 51 * Foundation, Inc. and its contributors. 52 * 4. Neither the name of The NetBSD Foundation nor the names of its 53 * contributors may be used to endorse or promote products derived 54 * from this software without specific prior written permission. 55 * 56 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 57 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 58 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 59 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 60 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 61 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 62 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 63 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 64 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 65 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 66 * POSSIBILITY OF SUCH DAMAGE. 67 */ 68 69 /* 70 * Copyright (c) 1982, 1986, 1988, 1993, 1995 71 * The Regents of the University of California. All rights reserved. 72 * 73 * Redistribution and use in source and binary forms, with or without 74 * modification, are permitted provided that the following conditions 75 * are met: 76 * 1. Redistributions of source code must retain the above copyright 77 * notice, this list of conditions and the following disclaimer. 78 * 2. Redistributions in binary form must reproduce the above copyright 79 * notice, this list of conditions and the following disclaimer in the 80 * documentation and/or other materials provided with the distribution. 81 * 3. All advertising materials mentioning features or use of this software 82 * must display the following acknowledgement: 83 * This product includes software developed by the University of 84 * California, Berkeley and its contributors. 85 * 4. Neither the name of the University nor the names of its contributors 86 * may be used to endorse or promote products derived from this software 87 * without specific prior written permission. 88 * 89 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 90 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 91 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 92 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 93 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 94 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 95 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 96 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 97 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 98 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 99 * SUCH DAMAGE. 100 * 101 * @(#)tcp_usrreq.c 8.5 (Berkeley) 6/21/95 102 */ 103 104 #include <sys/cdefs.h> 105 __KERNEL_RCSID(0, "$NetBSD: tcp_usrreq.c,v 1.74 2002/10/22 03:14:16 simonb Exp $"); 106 107 #include "opt_inet.h" 108 #include "opt_ipsec.h" 109 #include "opt_tcp_debug.h" 110 111 #include <sys/param.h> 112 #include <sys/systm.h> 113 #include <sys/kernel.h> 114 #include <sys/malloc.h> 115 #include <sys/mbuf.h> 116 #include <sys/socket.h> 117 #include <sys/socketvar.h> 118 #include <sys/protosw.h> 119 #include <sys/errno.h> 120 #include <sys/stat.h> 121 #include <sys/proc.h> 122 #include <sys/domain.h> 123 #include <sys/sysctl.h> 124 125 #include <net/if.h> 126 #include <net/route.h> 127 128 #include <netinet/in.h> 129 #include <netinet/in_systm.h> 130 #include <netinet/in_var.h> 131 #include <netinet/ip.h> 132 #include <netinet/in_pcb.h> 133 #include <netinet/ip_var.h> 134 135 #ifdef INET6 136 #ifndef INET 137 #include <netinet/in.h> 138 #endif 139 #include <netinet/ip6.h> 140 #include <netinet6/in6_pcb.h> 141 #include <netinet6/ip6_var.h> 142 #endif 143 144 #include <netinet/tcp.h> 145 #include <netinet/tcp_fsm.h> 146 #include <netinet/tcp_seq.h> 147 #include <netinet/tcp_timer.h> 148 #include <netinet/tcp_var.h> 149 #include <netinet/tcpip.h> 150 #include <netinet/tcp_debug.h> 151 152 #include "opt_tcp_space.h" 153 154 #ifdef IPSEC 155 #include <netinet6/ipsec.h> 156 #endif /*IPSEC*/ 157 158 /* 159 * TCP protocol interface to socket abstraction. 160 */ 161 extern char *tcpstates[]; 162 163 /* 164 * Process a TCP user request for TCP tb. If this is a send request 165 * then m is the mbuf chain of send data. If this is a timer expiration 166 * (called from the software clock routine), then timertype tells which timer. 167 */ 168 /*ARGSUSED*/ 169 int 170 tcp_usrreq(so, req, m, nam, control, p) 171 struct socket *so; 172 int req; 173 struct mbuf *m, *nam, *control; 174 struct proc *p; 175 { 176 struct inpcb *inp; 177 #ifdef INET6 178 struct in6pcb *in6p; 179 #endif 180 struct tcpcb *tp = NULL; 181 int s; 182 int error = 0; 183 #ifdef TCP_DEBUG 184 int ostate = 0; 185 #endif 186 int family; /* family of the socket */ 187 188 family = so->so_proto->pr_domain->dom_family; 189 190 if (req == PRU_CONTROL) { 191 switch (family) { 192 #ifdef INET 193 case PF_INET: 194 return (in_control(so, (long)m, (caddr_t)nam, 195 (struct ifnet *)control, p)); 196 #endif 197 #ifdef INET6 198 case PF_INET6: 199 return (in6_control(so, (long)m, (caddr_t)nam, 200 (struct ifnet *)control, p)); 201 #endif 202 default: 203 return EAFNOSUPPORT; 204 } 205 } 206 207 if (req == PRU_PURGEIF) { 208 switch (family) { 209 #ifdef INET 210 case PF_INET: 211 in_pcbpurgeif0(&tcbtable, (struct ifnet *)control); 212 in_purgeif((struct ifnet *)control); 213 in_pcbpurgeif(&tcbtable, (struct ifnet *)control); 214 break; 215 #endif 216 #ifdef INET6 217 case PF_INET6: 218 in6_pcbpurgeif0(&tcb6, (struct ifnet *)control); 219 in6_purgeif((struct ifnet *)control); 220 in6_pcbpurgeif(&tcb6, (struct ifnet *)control); 221 break; 222 #endif 223 default: 224 return (EAFNOSUPPORT); 225 } 226 return (0); 227 } 228 229 s = splsoftnet(); 230 switch (family) { 231 #ifdef INET 232 case PF_INET: 233 inp = sotoinpcb(so); 234 #ifdef INET6 235 in6p = NULL; 236 #endif 237 break; 238 #endif 239 #ifdef INET6 240 case PF_INET6: 241 inp = NULL; 242 in6p = sotoin6pcb(so); 243 break; 244 #endif 245 default: 246 splx(s); 247 return EAFNOSUPPORT; 248 } 249 250 #ifdef DIAGNOSTIC 251 #ifdef INET6 252 if (inp && in6p) 253 panic("tcp_usrreq: both inp and in6p set to non-NULL"); 254 #endif 255 if (req != PRU_SEND && req != PRU_SENDOOB && control) 256 panic("tcp_usrreq: unexpected control mbuf"); 257 #endif 258 /* 259 * When a TCP is attached to a socket, then there will be 260 * a (struct inpcb) pointed at by the socket, and this 261 * structure will point at a subsidary (struct tcpcb). 262 */ 263 #ifndef INET6 264 if (inp == 0 && req != PRU_ATTACH) 265 #else 266 if ((inp == 0 && in6p == 0) && req != PRU_ATTACH) 267 #endif 268 { 269 error = EINVAL; 270 goto release; 271 } 272 #ifdef INET 273 if (inp) { 274 tp = intotcpcb(inp); 275 /* WHAT IF TP IS 0? */ 276 #ifdef KPROF 277 tcp_acounts[tp->t_state][req]++; 278 #endif 279 #ifdef TCP_DEBUG 280 ostate = tp->t_state; 281 #endif 282 } 283 #endif 284 #ifdef INET6 285 if (in6p) { 286 tp = in6totcpcb(in6p); 287 /* WHAT IF TP IS 0? */ 288 #ifdef KPROF 289 tcp_acounts[tp->t_state][req]++; 290 #endif 291 #ifdef TCP_DEBUG 292 ostate = tp->t_state; 293 #endif 294 } 295 #endif 296 297 switch (req) { 298 299 /* 300 * TCP attaches to socket via PRU_ATTACH, reserving space, 301 * and an internet control block. 302 */ 303 case PRU_ATTACH: 304 #ifndef INET6 305 if (inp != 0) 306 #else 307 if (inp != 0 || in6p != 0) 308 #endif 309 { 310 error = EISCONN; 311 break; 312 } 313 error = tcp_attach(so); 314 if (error) 315 break; 316 if ((so->so_options & SO_LINGER) && so->so_linger == 0) 317 so->so_linger = TCP_LINGERTIME; 318 tp = sototcpcb(so); 319 break; 320 321 /* 322 * PRU_DETACH detaches the TCP protocol from the socket. 323 */ 324 case PRU_DETACH: 325 tp = tcp_disconnect(tp); 326 break; 327 328 /* 329 * Give the socket an address. 330 */ 331 case PRU_BIND: 332 switch (family) { 333 #ifdef INET 334 case PF_INET: 335 error = in_pcbbind(inp, nam, p); 336 break; 337 #endif 338 #ifdef INET6 339 case PF_INET6: 340 error = in6_pcbbind(in6p, nam, p); 341 if (!error) { 342 /* mapped addr case */ 343 if (IN6_IS_ADDR_V4MAPPED(&in6p->in6p_laddr)) 344 tp->t_family = AF_INET; 345 else 346 tp->t_family = AF_INET6; 347 } 348 break; 349 #endif 350 } 351 break; 352 353 /* 354 * Prepare to accept connections. 355 */ 356 case PRU_LISTEN: 357 #ifdef INET 358 if (inp && inp->inp_lport == 0) { 359 error = in_pcbbind(inp, (struct mbuf *)0, 360 (struct proc *)0); 361 if (error) 362 break; 363 } 364 #endif 365 #ifdef INET6 366 if (in6p && in6p->in6p_lport == 0) { 367 error = in6_pcbbind(in6p, (struct mbuf *)0, 368 (struct proc *)0); 369 if (error) 370 break; 371 } 372 #endif 373 tp->t_state = TCPS_LISTEN; 374 break; 375 376 /* 377 * Initiate connection to peer. 378 * Create a template for use in transmissions on this connection. 379 * Enter SYN_SENT state, and mark socket as connecting. 380 * Start keep-alive timer, and seed output sequence space. 381 * Send initial segment on connection. 382 */ 383 case PRU_CONNECT: 384 #ifdef INET 385 if (inp) { 386 if (inp->inp_lport == 0) { 387 error = in_pcbbind(inp, (struct mbuf *)0, 388 (struct proc *)0); 389 if (error) 390 break; 391 } 392 error = in_pcbconnect(inp, nam); 393 } 394 #endif 395 #ifdef INET6 396 if (in6p) { 397 if (in6p->in6p_lport == 0) { 398 error = in6_pcbbind(in6p, (struct mbuf *)0, 399 (struct proc *)0); 400 if (error) 401 break; 402 } 403 error = in6_pcbconnect(in6p, nam); 404 if (!error) { 405 /* mapped addr case */ 406 if (IN6_IS_ADDR_V4MAPPED(&in6p->in6p_faddr)) 407 tp->t_family = AF_INET; 408 else 409 tp->t_family = AF_INET6; 410 } 411 } 412 #endif 413 if (error) 414 break; 415 tp->t_template = tcp_template(tp); 416 if (tp->t_template == 0) { 417 #ifdef INET 418 if (inp) 419 in_pcbdisconnect(inp); 420 #endif 421 #ifdef INET6 422 if (in6p) 423 in6_pcbdisconnect(in6p); 424 #endif 425 error = ENOBUFS; 426 break; 427 } 428 /* Compute window scaling to request. */ 429 while (tp->request_r_scale < TCP_MAX_WINSHIFT && 430 (TCP_MAXWIN << tp->request_r_scale) < so->so_rcv.sb_hiwat) 431 tp->request_r_scale++; 432 soisconnecting(so); 433 tcpstat.tcps_connattempt++; 434 tp->t_state = TCPS_SYN_SENT; 435 TCP_TIMER_ARM(tp, TCPT_KEEP, TCPTV_KEEP_INIT); 436 tp->iss = tcp_new_iss(tp, 0); 437 tcp_sendseqinit(tp); 438 error = tcp_output(tp); 439 break; 440 441 /* 442 * Create a TCP connection between two sockets. 443 */ 444 case PRU_CONNECT2: 445 error = EOPNOTSUPP; 446 break; 447 448 /* 449 * Initiate disconnect from peer. 450 * If connection never passed embryonic stage, just drop; 451 * else if don't need to let data drain, then can just drop anyways, 452 * else have to begin TCP shutdown process: mark socket disconnecting, 453 * drain unread data, state switch to reflect user close, and 454 * send segment (e.g. FIN) to peer. Socket will be really disconnected 455 * when peer sends FIN and acks ours. 456 * 457 * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB. 458 */ 459 case PRU_DISCONNECT: 460 tp = tcp_disconnect(tp); 461 break; 462 463 /* 464 * Accept a connection. Essentially all the work is 465 * done at higher levels; just return the address 466 * of the peer, storing through addr. 467 */ 468 case PRU_ACCEPT: 469 #ifdef INET 470 if (inp) 471 in_setpeeraddr(inp, nam); 472 #endif 473 #ifdef INET6 474 if (in6p) 475 in6_setpeeraddr(in6p, nam); 476 #endif 477 break; 478 479 /* 480 * Mark the connection as being incapable of further output. 481 */ 482 case PRU_SHUTDOWN: 483 socantsendmore(so); 484 tp = tcp_usrclosed(tp); 485 if (tp) 486 error = tcp_output(tp); 487 break; 488 489 /* 490 * After a receive, possibly send window update to peer. 491 */ 492 case PRU_RCVD: 493 /* 494 * soreceive() calls this function when a user receives 495 * ancillary data on a listening socket. We don't call 496 * tcp_output in such a case, since there is no header 497 * template for a listening socket and hence the kernel 498 * will panic. 499 */ 500 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) != 0) 501 (void) tcp_output(tp); 502 break; 503 504 /* 505 * Do a send by putting data in output queue and updating urgent 506 * marker if URG set. Possibly send more data. 507 */ 508 case PRU_SEND: 509 if (control && control->m_len) { 510 m_freem(control); 511 m_freem(m); 512 error = EINVAL; 513 break; 514 } 515 sbappendstream(&so->so_snd, m); 516 error = tcp_output(tp); 517 break; 518 519 /* 520 * Abort the TCP. 521 */ 522 case PRU_ABORT: 523 tp = tcp_drop(tp, ECONNABORTED); 524 break; 525 526 case PRU_SENSE: 527 /* 528 * stat: don't bother with a blocksize. 529 */ 530 splx(s); 531 return (0); 532 533 case PRU_RCVOOB: 534 if (control && control->m_len) { 535 m_freem(control); 536 m_freem(m); 537 error = EINVAL; 538 break; 539 } 540 if ((so->so_oobmark == 0 && 541 (so->so_state & SS_RCVATMARK) == 0) || 542 so->so_options & SO_OOBINLINE || 543 tp->t_oobflags & TCPOOB_HADDATA) { 544 error = EINVAL; 545 break; 546 } 547 if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) { 548 error = EWOULDBLOCK; 549 break; 550 } 551 m->m_len = 1; 552 *mtod(m, caddr_t) = tp->t_iobc; 553 if (((long)nam & MSG_PEEK) == 0) 554 tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA); 555 break; 556 557 case PRU_SENDOOB: 558 if (sbspace(&so->so_snd) < -512) { 559 m_freem(m); 560 error = ENOBUFS; 561 break; 562 } 563 /* 564 * According to RFC961 (Assigned Protocols), 565 * the urgent pointer points to the last octet 566 * of urgent data. We continue, however, 567 * to consider it to indicate the first octet 568 * of data past the urgent section. 569 * Otherwise, snd_up should be one lower. 570 */ 571 sbappendstream(&so->so_snd, m); 572 tp->snd_up = tp->snd_una + so->so_snd.sb_cc; 573 tp->t_force = 1; 574 error = tcp_output(tp); 575 tp->t_force = 0; 576 break; 577 578 case PRU_SOCKADDR: 579 #ifdef INET 580 if (inp) 581 in_setsockaddr(inp, nam); 582 #endif 583 #ifdef INET6 584 if (in6p) 585 in6_setsockaddr(in6p, nam); 586 #endif 587 break; 588 589 case PRU_PEERADDR: 590 #ifdef INET 591 if (inp) 592 in_setpeeraddr(inp, nam); 593 #endif 594 #ifdef INET6 595 if (in6p) 596 in6_setpeeraddr(in6p, nam); 597 #endif 598 break; 599 600 default: 601 panic("tcp_usrreq"); 602 } 603 #ifdef TCP_DEBUG 604 if (tp && (so->so_options & SO_DEBUG)) 605 tcp_trace(TA_USER, ostate, tp, NULL, req); 606 #endif 607 608 release: 609 splx(s); 610 return (error); 611 } 612 613 int 614 tcp_ctloutput(op, so, level, optname, mp) 615 int op; 616 struct socket *so; 617 int level, optname; 618 struct mbuf **mp; 619 { 620 int error = 0, s; 621 struct inpcb *inp; 622 #ifdef INET6 623 struct in6pcb *in6p; 624 #endif 625 struct tcpcb *tp; 626 struct mbuf *m; 627 int i; 628 int family; /* family of the socket */ 629 630 family = so->so_proto->pr_domain->dom_family; 631 632 s = splsoftnet(); 633 switch (family) { 634 #ifdef INET 635 case PF_INET: 636 inp = sotoinpcb(so); 637 #ifdef INET6 638 in6p = NULL; 639 #endif 640 break; 641 #endif 642 #ifdef INET6 643 case PF_INET6: 644 inp = NULL; 645 in6p = sotoin6pcb(so); 646 break; 647 #endif 648 default: 649 splx(s); 650 return EAFNOSUPPORT; 651 } 652 #ifndef INET6 653 if (inp == NULL) 654 #else 655 if (inp == NULL && in6p == NULL) 656 #endif 657 { 658 splx(s); 659 if (op == PRCO_SETOPT && *mp) 660 (void) m_free(*mp); 661 return (ECONNRESET); 662 } 663 if (level != IPPROTO_TCP) { 664 switch (family) { 665 #ifdef INET 666 case PF_INET: 667 error = ip_ctloutput(op, so, level, optname, mp); 668 break; 669 #endif 670 #ifdef INET6 671 case PF_INET6: 672 error = ip6_ctloutput(op, so, level, optname, mp); 673 break; 674 #endif 675 } 676 splx(s); 677 return (error); 678 } 679 if (inp) 680 tp = intotcpcb(inp); 681 #ifdef INET6 682 else if (in6p) 683 tp = in6totcpcb(in6p); 684 #endif 685 else 686 tp = NULL; 687 688 switch (op) { 689 690 case PRCO_SETOPT: 691 m = *mp; 692 switch (optname) { 693 694 case TCP_NODELAY: 695 if (m == NULL || m->m_len < sizeof (int)) 696 error = EINVAL; 697 else if (*mtod(m, int *)) 698 tp->t_flags |= TF_NODELAY; 699 else 700 tp->t_flags &= ~TF_NODELAY; 701 break; 702 703 case TCP_MAXSEG: 704 if (m && (i = *mtod(m, int *)) > 0 && 705 i <= tp->t_peermss) 706 tp->t_peermss = i; /* limit on send size */ 707 else 708 error = EINVAL; 709 break; 710 711 default: 712 error = ENOPROTOOPT; 713 break; 714 } 715 if (m) 716 (void) m_free(m); 717 break; 718 719 case PRCO_GETOPT: 720 *mp = m = m_get(M_WAIT, MT_SOOPTS); 721 m->m_len = sizeof(int); 722 723 switch (optname) { 724 case TCP_NODELAY: 725 *mtod(m, int *) = tp->t_flags & TF_NODELAY; 726 break; 727 case TCP_MAXSEG: 728 *mtod(m, int *) = tp->t_peermss; 729 break; 730 default: 731 error = ENOPROTOOPT; 732 break; 733 } 734 break; 735 } 736 splx(s); 737 return (error); 738 } 739 740 #ifndef TCP_SENDSPACE 741 #define TCP_SENDSPACE 1024*16 742 #endif 743 int tcp_sendspace = TCP_SENDSPACE; 744 #ifndef TCP_RECVSPACE 745 #define TCP_RECVSPACE 1024*16 746 #endif 747 int tcp_recvspace = TCP_RECVSPACE; 748 749 /* 750 * Attach TCP protocol to socket, allocating 751 * internet protocol control block, tcp control block, 752 * bufer space, and entering LISTEN state if to accept connections. 753 */ 754 int 755 tcp_attach(so) 756 struct socket *so; 757 { 758 struct tcpcb *tp; 759 struct inpcb *inp; 760 #ifdef INET6 761 struct in6pcb *in6p; 762 #endif 763 int error; 764 int family; /* family of the socket */ 765 766 family = so->so_proto->pr_domain->dom_family; 767 768 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { 769 error = soreserve(so, tcp_sendspace, tcp_recvspace); 770 if (error) 771 return (error); 772 } 773 switch (family) { 774 #ifdef INET 775 case PF_INET: 776 error = in_pcballoc(so, &tcbtable); 777 if (error) 778 return (error); 779 inp = sotoinpcb(so); 780 #ifdef INET6 781 in6p = NULL; 782 #endif 783 break; 784 #endif 785 #ifdef INET6 786 case PF_INET6: 787 error = in6_pcballoc(so, &tcb6); 788 if (error) 789 return (error); 790 inp = NULL; 791 in6p = sotoin6pcb(so); 792 break; 793 #endif 794 default: 795 return EAFNOSUPPORT; 796 } 797 if (inp) 798 tp = tcp_newtcpcb(family, (void *)inp); 799 #ifdef INET6 800 else if (in6p) 801 tp = tcp_newtcpcb(family, (void *)in6p); 802 #endif 803 else 804 tp = NULL; 805 806 if (tp == 0) { 807 int nofd = so->so_state & SS_NOFDREF; /* XXX */ 808 809 so->so_state &= ~SS_NOFDREF; /* don't free the socket yet */ 810 #ifdef INET 811 if (inp) 812 in_pcbdetach(inp); 813 #endif 814 #ifdef INET6 815 if (in6p) 816 in6_pcbdetach(in6p); 817 #endif 818 so->so_state |= nofd; 819 return (ENOBUFS); 820 } 821 tp->t_state = TCPS_CLOSED; 822 return (0); 823 } 824 825 /* 826 * Initiate (or continue) disconnect. 827 * If embryonic state, just send reset (once). 828 * If in ``let data drain'' option and linger null, just drop. 829 * Otherwise (hard), mark socket disconnecting and drop 830 * current input data; switch states based on user close, and 831 * send segment to peer (with FIN). 832 */ 833 struct tcpcb * 834 tcp_disconnect(tp) 835 struct tcpcb *tp; 836 { 837 struct socket *so; 838 839 if (tp->t_inpcb) 840 so = tp->t_inpcb->inp_socket; 841 #ifdef INET6 842 else if (tp->t_in6pcb) 843 so = tp->t_in6pcb->in6p_socket; 844 #endif 845 else 846 so = NULL; 847 848 if (TCPS_HAVEESTABLISHED(tp->t_state) == 0) 849 tp = tcp_close(tp); 850 else if ((so->so_options & SO_LINGER) && so->so_linger == 0) 851 tp = tcp_drop(tp, 0); 852 else { 853 soisdisconnecting(so); 854 sbflush(&so->so_rcv); 855 tp = tcp_usrclosed(tp); 856 if (tp) 857 (void) tcp_output(tp); 858 } 859 return (tp); 860 } 861 862 /* 863 * User issued close, and wish to trail through shutdown states: 864 * if never received SYN, just forget it. If got a SYN from peer, 865 * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN. 866 * If already got a FIN from peer, then almost done; go to LAST_ACK 867 * state. In all other cases, have already sent FIN to peer (e.g. 868 * after PRU_SHUTDOWN), and just have to play tedious game waiting 869 * for peer to send FIN or not respond to keep-alives, etc. 870 * We can let the user exit from the close as soon as the FIN is acked. 871 */ 872 struct tcpcb * 873 tcp_usrclosed(tp) 874 struct tcpcb *tp; 875 { 876 877 switch (tp->t_state) { 878 879 case TCPS_CLOSED: 880 case TCPS_LISTEN: 881 case TCPS_SYN_SENT: 882 tp->t_state = TCPS_CLOSED; 883 tp = tcp_close(tp); 884 break; 885 886 case TCPS_SYN_RECEIVED: 887 case TCPS_ESTABLISHED: 888 tp->t_state = TCPS_FIN_WAIT_1; 889 break; 890 891 case TCPS_CLOSE_WAIT: 892 tp->t_state = TCPS_LAST_ACK; 893 break; 894 } 895 if (tp && tp->t_state >= TCPS_FIN_WAIT_2) { 896 struct socket *so; 897 if (tp->t_inpcb) 898 so = tp->t_inpcb->inp_socket; 899 #ifdef INET6 900 else if (tp->t_in6pcb) 901 so = tp->t_in6pcb->in6p_socket; 902 #endif 903 else 904 so = NULL; 905 soisdisconnected(so); 906 /* 907 * If we are in FIN_WAIT_2, we arrived here because the 908 * application did a shutdown of the send side. Like the 909 * case of a transition from FIN_WAIT_1 to FIN_WAIT_2 after 910 * a full close, we start a timer to make sure sockets are 911 * not left in FIN_WAIT_2 forever. 912 */ 913 if ((tp->t_state == TCPS_FIN_WAIT_2) && (tcp_maxidle > 0)) 914 TCP_TIMER_ARM(tp, TCPT_2MSL, tcp_maxidle); 915 } 916 return (tp); 917 } 918 919 static const struct { 920 unsigned int valid : 1; 921 unsigned int rdonly : 1; 922 int *var; 923 int val; 924 } tcp_ctlvars[] = TCPCTL_VARIABLES; 925 926 /* 927 * Sysctl for tcp variables. 928 */ 929 int 930 tcp_sysctl(name, namelen, oldp, oldlenp, newp, newlen) 931 int *name; 932 u_int namelen; 933 void *oldp; 934 size_t *oldlenp; 935 void *newp; 936 size_t newlen; 937 { 938 int error, saved_value = 0; 939 940 /* All sysctl names at this level are terminal. */ 941 if (namelen != 1) 942 return (ENOTDIR); 943 944 if (name[0] < sizeof(tcp_ctlvars)/sizeof(tcp_ctlvars[0]) 945 && tcp_ctlvars[name[0]].valid) { 946 if (tcp_ctlvars[name[0]].rdonly) { 947 return (sysctl_rdint(oldp, oldlenp, newp, 948 tcp_ctlvars[name[0]].val)); 949 } else { 950 switch (name[0]) { 951 case TCPCTL_MSSDFLT: 952 saved_value = tcp_mssdflt; 953 break; 954 } 955 error = sysctl_int(oldp, oldlenp, newp, newlen, 956 tcp_ctlvars[name[0]].var); 957 if (error) 958 return (error); 959 switch (name[0]) { 960 case TCPCTL_MSSDFLT: 961 if (tcp_mssdflt < 32) { 962 tcp_mssdflt = saved_value; 963 return (EINVAL); 964 } 965 break; 966 } 967 return (0); 968 } 969 } 970 971 return (ENOPROTOOPT); 972 } 973