1 /* $NetBSD: tcp_usrreq.c,v 1.70 2002/03/11 10:06:12 martin Exp $ */ 2 3 /* 4 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the project nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 /*- 33 * Copyright (c) 1997, 1998 The NetBSD Foundation, Inc. 34 * All rights reserved. 35 * 36 * This code is derived from software contributed to The NetBSD Foundation 37 * by Jason R. Thorpe and Kevin M. Lahey of the Numerical Aerospace Simulation 38 * Facility, NASA Ames Research Center. 39 * 40 * Redistribution and use in source and binary forms, with or without 41 * modification, are permitted provided that the following conditions 42 * are met: 43 * 1. Redistributions of source code must retain the above copyright 44 * notice, this list of conditions and the following disclaimer. 45 * 2. Redistributions in binary form must reproduce the above copyright 46 * notice, this list of conditions and the following disclaimer in the 47 * documentation and/or other materials provided with the distribution. 48 * 3. All advertising materials mentioning features or use of this software 49 * must display the following acknowledgement: 50 * This product includes software developed by the NetBSD 51 * Foundation, Inc. and its contributors. 52 * 4. Neither the name of The NetBSD Foundation nor the names of its 53 * contributors may be used to endorse or promote products derived 54 * from this software without specific prior written permission. 55 * 56 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 57 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 58 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 59 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 60 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 61 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 62 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 63 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 64 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 65 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 66 * POSSIBILITY OF SUCH DAMAGE. 67 */ 68 69 /* 70 * Copyright (c) 1982, 1986, 1988, 1993, 1995 71 * The Regents of the University of California. All rights reserved. 72 * 73 * Redistribution and use in source and binary forms, with or without 74 * modification, are permitted provided that the following conditions 75 * are met: 76 * 1. Redistributions of source code must retain the above copyright 77 * notice, this list of conditions and the following disclaimer. 78 * 2. Redistributions in binary form must reproduce the above copyright 79 * notice, this list of conditions and the following disclaimer in the 80 * documentation and/or other materials provided with the distribution. 81 * 3. All advertising materials mentioning features or use of this software 82 * must display the following acknowledgement: 83 * This product includes software developed by the University of 84 * California, Berkeley and its contributors. 85 * 4. Neither the name of the University nor the names of its contributors 86 * may be used to endorse or promote products derived from this software 87 * without specific prior written permission. 88 * 89 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 90 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 91 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 92 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 93 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 94 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 95 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 96 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 97 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 98 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 99 * SUCH DAMAGE. 100 * 101 * @(#)tcp_usrreq.c 8.5 (Berkeley) 6/21/95 102 */ 103 104 #include <sys/cdefs.h> 105 __KERNEL_RCSID(0, "$NetBSD: tcp_usrreq.c,v 1.70 2002/03/11 10:06:12 martin Exp $"); 106 107 #include "opt_inet.h" 108 #include "opt_ipsec.h" 109 #include "opt_tcp_debug.h" 110 111 #include <sys/param.h> 112 #include <sys/systm.h> 113 #include <sys/kernel.h> 114 #include <sys/malloc.h> 115 #include <sys/mbuf.h> 116 #include <sys/socket.h> 117 #include <sys/socketvar.h> 118 #include <sys/protosw.h> 119 #include <sys/errno.h> 120 #include <sys/stat.h> 121 #include <sys/proc.h> 122 #include <sys/domain.h> 123 #include <sys/sysctl.h> 124 125 #include <net/if.h> 126 #include <net/route.h> 127 128 #include <netinet/in.h> 129 #include <netinet/in_systm.h> 130 #include <netinet/in_var.h> 131 #include <netinet/ip.h> 132 #include <netinet/in_pcb.h> 133 #include <netinet/ip_var.h> 134 135 #ifdef INET6 136 #ifndef INET 137 #include <netinet/in.h> 138 #endif 139 #include <netinet/ip6.h> 140 #include <netinet6/in6_pcb.h> 141 #include <netinet6/ip6_var.h> 142 #endif 143 144 #include <netinet/tcp.h> 145 #include <netinet/tcp_fsm.h> 146 #include <netinet/tcp_seq.h> 147 #include <netinet/tcp_timer.h> 148 #include <netinet/tcp_var.h> 149 #include <netinet/tcpip.h> 150 #include <netinet/tcp_debug.h> 151 152 #include "opt_tcp_space.h" 153 154 #ifdef IPSEC 155 #include <netinet6/ipsec.h> 156 #endif /*IPSEC*/ 157 158 /* 159 * TCP protocol interface to socket abstraction. 160 */ 161 extern char *tcpstates[]; 162 163 /* 164 * Process a TCP user request for TCP tb. If this is a send request 165 * then m is the mbuf chain of send data. If this is a timer expiration 166 * (called from the software clock routine), then timertype tells which timer. 167 */ 168 /*ARGSUSED*/ 169 int 170 tcp_usrreq(so, req, m, nam, control, p) 171 struct socket *so; 172 int req; 173 struct mbuf *m, *nam, *control; 174 struct proc *p; 175 { 176 struct inpcb *inp; 177 #ifdef INET6 178 struct in6pcb *in6p; 179 #endif 180 struct tcpcb *tp = NULL; 181 int s; 182 int error = 0; 183 int ostate; 184 int family; /* family of the socket */ 185 186 family = so->so_proto->pr_domain->dom_family; 187 188 if (req == PRU_CONTROL) { 189 switch (family) { 190 #ifdef INET 191 case PF_INET: 192 return (in_control(so, (long)m, (caddr_t)nam, 193 (struct ifnet *)control, p)); 194 #endif 195 #ifdef INET6 196 case PF_INET6: 197 return (in6_control(so, (long)m, (caddr_t)nam, 198 (struct ifnet *)control, p)); 199 #endif 200 default: 201 return EAFNOSUPPORT; 202 } 203 } 204 205 if (req == PRU_PURGEIF) { 206 switch (family) { 207 #ifdef INET 208 case PF_INET: 209 in_pcbpurgeif0(&tcbtable, (struct ifnet *)control); 210 in_purgeif((struct ifnet *)control); 211 in_pcbpurgeif(&tcbtable, (struct ifnet *)control); 212 break; 213 #endif 214 #ifdef INET6 215 case PF_INET6: 216 in6_pcbpurgeif0(&tcb6, (struct ifnet *)control); 217 in6_purgeif((struct ifnet *)control); 218 in6_pcbpurgeif(&tcb6, (struct ifnet *)control); 219 break; 220 #endif 221 default: 222 return (EAFNOSUPPORT); 223 } 224 return (0); 225 } 226 227 s = splsoftnet(); 228 switch (family) { 229 #ifdef INET 230 case PF_INET: 231 inp = sotoinpcb(so); 232 #ifdef INET6 233 in6p = NULL; 234 #endif 235 break; 236 #endif 237 #ifdef INET6 238 case PF_INET6: 239 inp = NULL; 240 in6p = sotoin6pcb(so); 241 break; 242 #endif 243 default: 244 splx(s); 245 return EAFNOSUPPORT; 246 } 247 248 #ifdef DIAGNOSTIC 249 #ifdef INET6 250 if (inp && in6p) 251 panic("tcp_usrreq: both inp and in6p set to non-NULL"); 252 #endif 253 if (req != PRU_SEND && req != PRU_SENDOOB && control) 254 panic("tcp_usrreq: unexpected control mbuf"); 255 #endif 256 /* 257 * When a TCP is attached to a socket, then there will be 258 * a (struct inpcb) pointed at by the socket, and this 259 * structure will point at a subsidary (struct tcpcb). 260 */ 261 #ifndef INET6 262 if (inp == 0 && req != PRU_ATTACH) 263 #else 264 if ((inp == 0 && in6p == 0) && req != PRU_ATTACH) 265 #endif 266 { 267 error = EINVAL; 268 goto release; 269 } 270 #ifdef INET 271 if (inp) { 272 tp = intotcpcb(inp); 273 /* WHAT IF TP IS 0? */ 274 #ifdef KPROF 275 tcp_acounts[tp->t_state][req]++; 276 #endif 277 ostate = tp->t_state; 278 } 279 #endif 280 #ifdef INET6 281 if (in6p) { 282 tp = in6totcpcb(in6p); 283 /* WHAT IF TP IS 0? */ 284 #ifdef KPROF 285 tcp_acounts[tp->t_state][req]++; 286 #endif 287 ostate = tp->t_state; 288 } 289 #endif 290 else 291 ostate = 0; 292 293 switch (req) { 294 295 /* 296 * TCP attaches to socket via PRU_ATTACH, reserving space, 297 * and an internet control block. 298 */ 299 case PRU_ATTACH: 300 #ifndef INET6 301 if (inp != 0) 302 #else 303 if (inp != 0 || in6p != 0) 304 #endif 305 { 306 error = EISCONN; 307 break; 308 } 309 error = tcp_attach(so); 310 if (error) 311 break; 312 if ((so->so_options & SO_LINGER) && so->so_linger == 0) 313 so->so_linger = TCP_LINGERTIME; 314 tp = sototcpcb(so); 315 break; 316 317 /* 318 * PRU_DETACH detaches the TCP protocol from the socket. 319 */ 320 case PRU_DETACH: 321 tp = tcp_disconnect(tp); 322 break; 323 324 /* 325 * Give the socket an address. 326 */ 327 case PRU_BIND: 328 switch (family) { 329 #ifdef INET 330 case PF_INET: 331 error = in_pcbbind(inp, nam, p); 332 break; 333 #endif 334 #ifdef INET6 335 case PF_INET6: 336 error = in6_pcbbind(in6p, nam, p); 337 if (!error) { 338 /* mapped addr case */ 339 if (IN6_IS_ADDR_V4MAPPED(&in6p->in6p_laddr)) 340 tp->t_family = AF_INET; 341 else 342 tp->t_family = AF_INET6; 343 } 344 break; 345 #endif 346 } 347 break; 348 349 /* 350 * Prepare to accept connections. 351 */ 352 case PRU_LISTEN: 353 #ifdef INET 354 if (inp && inp->inp_lport == 0) { 355 error = in_pcbbind(inp, (struct mbuf *)0, 356 (struct proc *)0); 357 if (error) 358 break; 359 } 360 #endif 361 #ifdef INET6 362 if (in6p && in6p->in6p_lport == 0) { 363 error = in6_pcbbind(in6p, (struct mbuf *)0, 364 (struct proc *)0); 365 if (error) 366 break; 367 } 368 #endif 369 tp->t_state = TCPS_LISTEN; 370 break; 371 372 /* 373 * Initiate connection to peer. 374 * Create a template for use in transmissions on this connection. 375 * Enter SYN_SENT state, and mark socket as connecting. 376 * Start keep-alive timer, and seed output sequence space. 377 * Send initial segment on connection. 378 */ 379 case PRU_CONNECT: 380 #ifdef INET 381 if (inp) { 382 if (inp->inp_lport == 0) { 383 error = in_pcbbind(inp, (struct mbuf *)0, 384 (struct proc *)0); 385 if (error) 386 break; 387 } 388 error = in_pcbconnect(inp, nam); 389 } 390 #endif 391 #ifdef INET6 392 if (in6p) { 393 if (in6p->in6p_lport == 0) { 394 error = in6_pcbbind(in6p, (struct mbuf *)0, 395 (struct proc *)0); 396 if (error) 397 break; 398 } 399 error = in6_pcbconnect(in6p, nam); 400 if (!error) { 401 /* mapped addr case */ 402 if (IN6_IS_ADDR_V4MAPPED(&in6p->in6p_faddr)) 403 tp->t_family = AF_INET; 404 else 405 tp->t_family = AF_INET6; 406 } 407 } 408 #endif 409 if (error) 410 break; 411 tp->t_template = tcp_template(tp); 412 if (tp->t_template == 0) { 413 #ifdef INET 414 if (inp) 415 in_pcbdisconnect(inp); 416 #endif 417 #ifdef INET6 418 if (in6p) 419 in6_pcbdisconnect(in6p); 420 #endif 421 error = ENOBUFS; 422 break; 423 } 424 /* Compute window scaling to request. */ 425 while (tp->request_r_scale < TCP_MAX_WINSHIFT && 426 (TCP_MAXWIN << tp->request_r_scale) < so->so_rcv.sb_hiwat) 427 tp->request_r_scale++; 428 soisconnecting(so); 429 tcpstat.tcps_connattempt++; 430 tp->t_state = TCPS_SYN_SENT; 431 TCP_TIMER_ARM(tp, TCPT_KEEP, TCPTV_KEEP_INIT); 432 tp->iss = tcp_new_iss(tp, 0); 433 tcp_sendseqinit(tp); 434 error = tcp_output(tp); 435 break; 436 437 /* 438 * Create a TCP connection between two sockets. 439 */ 440 case PRU_CONNECT2: 441 error = EOPNOTSUPP; 442 break; 443 444 /* 445 * Initiate disconnect from peer. 446 * If connection never passed embryonic stage, just drop; 447 * else if don't need to let data drain, then can just drop anyways, 448 * else have to begin TCP shutdown process: mark socket disconnecting, 449 * drain unread data, state switch to reflect user close, and 450 * send segment (e.g. FIN) to peer. Socket will be really disconnected 451 * when peer sends FIN and acks ours. 452 * 453 * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB. 454 */ 455 case PRU_DISCONNECT: 456 tp = tcp_disconnect(tp); 457 break; 458 459 /* 460 * Accept a connection. Essentially all the work is 461 * done at higher levels; just return the address 462 * of the peer, storing through addr. 463 */ 464 case PRU_ACCEPT: 465 #ifdef INET 466 if (inp) 467 in_setpeeraddr(inp, nam); 468 #endif 469 #ifdef INET6 470 if (in6p) 471 in6_setpeeraddr(in6p, nam); 472 #endif 473 break; 474 475 /* 476 * Mark the connection as being incapable of further output. 477 */ 478 case PRU_SHUTDOWN: 479 socantsendmore(so); 480 tp = tcp_usrclosed(tp); 481 if (tp) 482 error = tcp_output(tp); 483 break; 484 485 /* 486 * After a receive, possibly send window update to peer. 487 */ 488 case PRU_RCVD: 489 /* 490 * soreceive() calls this function when a user receives 491 * ancillary data on a listening socket. We don't call 492 * tcp_output in such a case, since there is no header 493 * template for a listening socket and hence the kernel 494 * will panic. 495 */ 496 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) != 0) 497 (void) tcp_output(tp); 498 break; 499 500 /* 501 * Do a send by putting data in output queue and updating urgent 502 * marker if URG set. Possibly send more data. 503 */ 504 case PRU_SEND: 505 if (control && control->m_len) { 506 m_freem(control); 507 m_freem(m); 508 error = EINVAL; 509 break; 510 } 511 sbappend(&so->so_snd, m); 512 error = tcp_output(tp); 513 break; 514 515 /* 516 * Abort the TCP. 517 */ 518 case PRU_ABORT: 519 tp = tcp_drop(tp, ECONNABORTED); 520 break; 521 522 case PRU_SENSE: 523 /* 524 * stat: don't bother with a blocksize. 525 */ 526 splx(s); 527 return (0); 528 529 case PRU_RCVOOB: 530 if (control && control->m_len) { 531 m_freem(control); 532 m_freem(m); 533 error = EINVAL; 534 break; 535 } 536 if ((so->so_oobmark == 0 && 537 (so->so_state & SS_RCVATMARK) == 0) || 538 so->so_options & SO_OOBINLINE || 539 tp->t_oobflags & TCPOOB_HADDATA) { 540 error = EINVAL; 541 break; 542 } 543 if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) { 544 error = EWOULDBLOCK; 545 break; 546 } 547 m->m_len = 1; 548 *mtod(m, caddr_t) = tp->t_iobc; 549 if (((long)nam & MSG_PEEK) == 0) 550 tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA); 551 break; 552 553 case PRU_SENDOOB: 554 if (sbspace(&so->so_snd) < -512) { 555 m_freem(m); 556 error = ENOBUFS; 557 break; 558 } 559 /* 560 * According to RFC961 (Assigned Protocols), 561 * the urgent pointer points to the last octet 562 * of urgent data. We continue, however, 563 * to consider it to indicate the first octet 564 * of data past the urgent section. 565 * Otherwise, snd_up should be one lower. 566 */ 567 sbappend(&so->so_snd, m); 568 tp->snd_up = tp->snd_una + so->so_snd.sb_cc; 569 tp->t_force = 1; 570 error = tcp_output(tp); 571 tp->t_force = 0; 572 break; 573 574 case PRU_SOCKADDR: 575 #ifdef INET 576 if (inp) 577 in_setsockaddr(inp, nam); 578 #endif 579 #ifdef INET6 580 if (in6p) 581 in6_setsockaddr(in6p, nam); 582 #endif 583 break; 584 585 case PRU_PEERADDR: 586 #ifdef INET 587 if (inp) 588 in_setpeeraddr(inp, nam); 589 #endif 590 #ifdef INET6 591 if (in6p) 592 in6_setpeeraddr(in6p, nam); 593 #endif 594 break; 595 596 default: 597 panic("tcp_usrreq"); 598 } 599 #ifdef TCP_DEBUG 600 if (tp && (so->so_options & SO_DEBUG)) 601 tcp_trace(TA_USER, ostate, tp, NULL, req); 602 #endif 603 604 release: 605 splx(s); 606 return (error); 607 } 608 609 int 610 tcp_ctloutput(op, so, level, optname, mp) 611 int op; 612 struct socket *so; 613 int level, optname; 614 struct mbuf **mp; 615 { 616 int error = 0, s; 617 struct inpcb *inp; 618 #ifdef INET6 619 struct in6pcb *in6p; 620 #endif 621 struct tcpcb *tp; 622 struct mbuf *m; 623 int i; 624 int family; /* family of the socket */ 625 626 family = so->so_proto->pr_domain->dom_family; 627 628 s = splsoftnet(); 629 switch (family) { 630 #ifdef INET 631 case PF_INET: 632 inp = sotoinpcb(so); 633 #ifdef INET6 634 in6p = NULL; 635 #endif 636 break; 637 #endif 638 #ifdef INET6 639 case PF_INET6: 640 inp = NULL; 641 in6p = sotoin6pcb(so); 642 break; 643 #endif 644 default: 645 splx(s); 646 return EAFNOSUPPORT; 647 } 648 #ifndef INET6 649 if (inp == NULL) 650 #else 651 if (inp == NULL && in6p == NULL) 652 #endif 653 { 654 splx(s); 655 if (op == PRCO_SETOPT && *mp) 656 (void) m_free(*mp); 657 return (ECONNRESET); 658 } 659 if (level != IPPROTO_TCP) { 660 switch (family) { 661 #ifdef INET 662 case PF_INET: 663 error = ip_ctloutput(op, so, level, optname, mp); 664 break; 665 #endif 666 #ifdef INET6 667 case PF_INET6: 668 error = ip6_ctloutput(op, so, level, optname, mp); 669 break; 670 #endif 671 } 672 splx(s); 673 return (error); 674 } 675 if (inp) 676 tp = intotcpcb(inp); 677 #ifdef INET6 678 else if (in6p) 679 tp = in6totcpcb(in6p); 680 #endif 681 else 682 tp = NULL; 683 684 switch (op) { 685 686 case PRCO_SETOPT: 687 m = *mp; 688 switch (optname) { 689 690 case TCP_NODELAY: 691 if (m == NULL || m->m_len < sizeof (int)) 692 error = EINVAL; 693 else if (*mtod(m, int *)) 694 tp->t_flags |= TF_NODELAY; 695 else 696 tp->t_flags &= ~TF_NODELAY; 697 break; 698 699 case TCP_MAXSEG: 700 if (m && (i = *mtod(m, int *)) > 0 && 701 i <= tp->t_peermss) 702 tp->t_peermss = i; /* limit on send size */ 703 else 704 error = EINVAL; 705 break; 706 707 default: 708 error = ENOPROTOOPT; 709 break; 710 } 711 if (m) 712 (void) m_free(m); 713 break; 714 715 case PRCO_GETOPT: 716 *mp = m = m_get(M_WAIT, MT_SOOPTS); 717 m->m_len = sizeof(int); 718 719 switch (optname) { 720 case TCP_NODELAY: 721 *mtod(m, int *) = tp->t_flags & TF_NODELAY; 722 break; 723 case TCP_MAXSEG: 724 *mtod(m, int *) = tp->t_peermss; 725 break; 726 default: 727 error = ENOPROTOOPT; 728 break; 729 } 730 break; 731 } 732 splx(s); 733 return (error); 734 } 735 736 #ifndef TCP_SENDSPACE 737 #define TCP_SENDSPACE 1024*16; 738 #endif 739 int tcp_sendspace = TCP_SENDSPACE; 740 #ifndef TCP_RECVSPACE 741 #define TCP_RECVSPACE 1024*16; 742 #endif 743 int tcp_recvspace = TCP_RECVSPACE; 744 745 /* 746 * Attach TCP protocol to socket, allocating 747 * internet protocol control block, tcp control block, 748 * bufer space, and entering LISTEN state if to accept connections. 749 */ 750 int 751 tcp_attach(so) 752 struct socket *so; 753 { 754 struct tcpcb *tp; 755 struct inpcb *inp; 756 #ifdef INET6 757 struct in6pcb *in6p; 758 #endif 759 int error; 760 int family; /* family of the socket */ 761 762 family = so->so_proto->pr_domain->dom_family; 763 764 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { 765 error = soreserve(so, tcp_sendspace, tcp_recvspace); 766 if (error) 767 return (error); 768 } 769 switch (family) { 770 #ifdef INET 771 case PF_INET: 772 error = in_pcballoc(so, &tcbtable); 773 if (error) 774 return (error); 775 inp = sotoinpcb(so); 776 #ifdef INET6 777 in6p = NULL; 778 #endif 779 break; 780 #endif 781 #ifdef INET6 782 case PF_INET6: 783 error = in6_pcballoc(so, &tcb6); 784 if (error) 785 return (error); 786 inp = NULL; 787 in6p = sotoin6pcb(so); 788 break; 789 #endif 790 default: 791 return EAFNOSUPPORT; 792 } 793 if (inp) 794 tp = tcp_newtcpcb(family, (void *)inp); 795 #ifdef INET6 796 else if (in6p) 797 tp = tcp_newtcpcb(family, (void *)in6p); 798 #endif 799 else 800 tp = NULL; 801 802 if (tp == 0) { 803 int nofd = so->so_state & SS_NOFDREF; /* XXX */ 804 805 so->so_state &= ~SS_NOFDREF; /* don't free the socket yet */ 806 #ifdef INET 807 if (inp) 808 in_pcbdetach(inp); 809 #endif 810 #ifdef INET6 811 if (in6p) 812 in6_pcbdetach(in6p); 813 #endif 814 so->so_state |= nofd; 815 return (ENOBUFS); 816 } 817 tp->t_state = TCPS_CLOSED; 818 return (0); 819 } 820 821 /* 822 * Initiate (or continue) disconnect. 823 * If embryonic state, just send reset (once). 824 * If in ``let data drain'' option and linger null, just drop. 825 * Otherwise (hard), mark socket disconnecting and drop 826 * current input data; switch states based on user close, and 827 * send segment to peer (with FIN). 828 */ 829 struct tcpcb * 830 tcp_disconnect(tp) 831 struct tcpcb *tp; 832 { 833 struct socket *so; 834 835 if (tp->t_inpcb) 836 so = tp->t_inpcb->inp_socket; 837 #ifdef INET6 838 else if (tp->t_in6pcb) 839 so = tp->t_in6pcb->in6p_socket; 840 #endif 841 else 842 so = NULL; 843 844 if (TCPS_HAVEESTABLISHED(tp->t_state) == 0) 845 tp = tcp_close(tp); 846 else if ((so->so_options & SO_LINGER) && so->so_linger == 0) 847 tp = tcp_drop(tp, 0); 848 else { 849 soisdisconnecting(so); 850 sbflush(&so->so_rcv); 851 tp = tcp_usrclosed(tp); 852 if (tp) 853 (void) tcp_output(tp); 854 } 855 return (tp); 856 } 857 858 /* 859 * User issued close, and wish to trail through shutdown states: 860 * if never received SYN, just forget it. If got a SYN from peer, 861 * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN. 862 * If already got a FIN from peer, then almost done; go to LAST_ACK 863 * state. In all other cases, have already sent FIN to peer (e.g. 864 * after PRU_SHUTDOWN), and just have to play tedious game waiting 865 * for peer to send FIN or not respond to keep-alives, etc. 866 * We can let the user exit from the close as soon as the FIN is acked. 867 */ 868 struct tcpcb * 869 tcp_usrclosed(tp) 870 struct tcpcb *tp; 871 { 872 873 switch (tp->t_state) { 874 875 case TCPS_CLOSED: 876 case TCPS_LISTEN: 877 case TCPS_SYN_SENT: 878 tp->t_state = TCPS_CLOSED; 879 tp = tcp_close(tp); 880 break; 881 882 case TCPS_SYN_RECEIVED: 883 case TCPS_ESTABLISHED: 884 tp->t_state = TCPS_FIN_WAIT_1; 885 break; 886 887 case TCPS_CLOSE_WAIT: 888 tp->t_state = TCPS_LAST_ACK; 889 break; 890 } 891 if (tp && tp->t_state >= TCPS_FIN_WAIT_2) { 892 struct socket *so; 893 if (tp->t_inpcb) 894 so = tp->t_inpcb->inp_socket; 895 #ifdef INET6 896 else if (tp->t_in6pcb) 897 so = tp->t_in6pcb->in6p_socket; 898 #endif 899 else 900 so = NULL; 901 soisdisconnected(so); 902 /* 903 * If we are in FIN_WAIT_2, we arrived here because the 904 * application did a shutdown of the send side. Like the 905 * case of a transition from FIN_WAIT_1 to FIN_WAIT_2 after 906 * a full close, we start a timer to make sure sockets are 907 * not left in FIN_WAIT_2 forever. 908 */ 909 if ((tp->t_state == TCPS_FIN_WAIT_2) && (tcp_maxidle > 0)) 910 TCP_TIMER_ARM(tp, TCPT_2MSL, tcp_maxidle); 911 } 912 return (tp); 913 } 914 915 static const struct { 916 unsigned int valid : 1; 917 unsigned int rdonly : 1; 918 int *var; 919 int val; 920 } tcp_ctlvars[] = TCPCTL_VARIABLES; 921 922 /* 923 * Sysctl for tcp variables. 924 */ 925 int 926 tcp_sysctl(name, namelen, oldp, oldlenp, newp, newlen) 927 int *name; 928 u_int namelen; 929 void *oldp; 930 size_t *oldlenp; 931 void *newp; 932 size_t newlen; 933 { 934 int error, saved_value = 0; 935 936 /* All sysctl names at this level are terminal. */ 937 if (namelen != 1) 938 return (ENOTDIR); 939 940 if (name[0] < sizeof(tcp_ctlvars)/sizeof(tcp_ctlvars[0]) 941 && tcp_ctlvars[name[0]].valid) { 942 if (tcp_ctlvars[name[0]].rdonly) { 943 return (sysctl_rdint(oldp, oldlenp, newp, 944 tcp_ctlvars[name[0]].val)); 945 } else { 946 switch (name[0]) { 947 case TCPCTL_MSSDFLT: 948 saved_value = tcp_mssdflt; 949 break; 950 } 951 error = sysctl_int(oldp, oldlenp, newp, newlen, 952 tcp_ctlvars[name[0]].var); 953 if (error) 954 return (error); 955 switch (name[0]) { 956 case TCPCTL_MSSDFLT: 957 if (tcp_mssdflt < 32) { 958 tcp_mssdflt = saved_value; 959 return (EINVAL); 960 } 961 break; 962 } 963 return (0); 964 } 965 } 966 967 return (ENOPROTOOPT); 968 } 969