1 /* $OpenBSD: tcp_usrreq.c,v 1.107 2011/04/28 09:56:27 claudio Exp $ */ 2 /* $NetBSD: tcp_usrreq.c,v 1.20 1996/02/13 23:44:16 christos Exp $ */ 3 4 /* 5 * Copyright (c) 1982, 1986, 1988, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)COPYRIGHT 1.1 (NRL) 17 January 1995 33 * 34 * NRL grants permission for redistribution and use in source and binary 35 * forms, with or without modification, of the software and documentation 36 * created at NRL provided that the following conditions are met: 37 * 38 * 1. Redistributions of source code must retain the above copyright 39 * notice, this list of conditions and the following disclaimer. 40 * 2. Redistributions in binary form must reproduce the above copyright 41 * notice, this list of conditions and the following disclaimer in the 42 * documentation and/or other materials provided with the distribution. 43 * 3. All advertising materials mentioning features or use of this software 44 * must display the following acknowledgements: 45 * This product includes software developed by the University of 46 * California, Berkeley and its contributors. 47 * This product includes software developed at the Information 48 * Technology Division, US Naval Research Laboratory. 49 * 4. Neither the name of the NRL nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS 54 * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 55 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 56 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NRL OR 57 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 58 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 59 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 60 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 61 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 62 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 63 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 64 * 65 * The views and conclusions contained in the software and documentation 66 * are those of the authors and should not be interpreted as representing 67 * official policies, either expressed or implied, of the US Naval 68 * Research Laboratory (NRL). 69 */ 70 71 #include <sys/param.h> 72 #include <sys/systm.h> 73 #include <sys/mbuf.h> 74 #include <sys/socket.h> 75 #include <sys/socketvar.h> 76 #include <sys/protosw.h> 77 #include <sys/stat.h> 78 #include <sys/proc.h> 79 #include <sys/sysctl.h> 80 #include <sys/domain.h> 81 #include <sys/kernel.h> 82 #include <sys/pool.h> 83 84 #include <dev/rndvar.h> 85 86 #include <net/if.h> 87 #include <net/route.h> 88 89 #include <netinet/in.h> 90 #include <netinet/in_systm.h> 91 #include <netinet/in_var.h> 92 #include <netinet/ip.h> 93 #include <netinet/in_pcb.h> 94 #include <netinet/ip_var.h> 95 #include <netinet/tcp.h> 96 #include <netinet/tcp_fsm.h> 97 #include <netinet/tcp_seq.h> 98 #include <netinet/tcp_timer.h> 99 #include <netinet/tcp_var.h> 100 #include <netinet/tcpip.h> 101 #include <netinet/tcp_debug.h> 102 103 /* 104 * TCP protocol interface to socket abstraction. 105 */ 106 extern char *tcpstates[]; 107 extern int tcptv_keep_init; 108 109 extern int tcp_rst_ppslim; 110 111 /* from in_pcb.c */ 112 extern struct baddynamicports baddynamicports; 113 114 #ifndef TCP_SENDSPACE 115 #define TCP_SENDSPACE 1024*16 116 #endif 117 u_int tcp_sendspace = TCP_SENDSPACE; 118 #ifndef TCP_RECVSPACE 119 #define TCP_RECVSPACE 1024*16 120 #endif 121 u_int tcp_recvspace = TCP_RECVSPACE; 122 u_int tcp_autorcvbuf_inc = 16 * 1024; 123 124 int *tcpctl_vars[TCPCTL_MAXID] = TCPCTL_VARS; 125 126 struct inpcbtable tcbtable; 127 128 int tcp_ident(void *, size_t *, void *, size_t, int); 129 130 /* 131 * Process a TCP user request for TCP tb. If this is a send request 132 * then m is the mbuf chain of send data. If this is a timer expiration 133 * (called from the software clock routine), then timertype tells which timer. 134 */ 135 /*ARGSUSED*/ 136 int 137 tcp_usrreq(so, req, m, nam, control, p) 138 struct socket *so; 139 int req; 140 struct mbuf *m, *nam, *control; 141 struct proc *p; 142 { 143 struct sockaddr_in *sin; 144 struct inpcb *inp; 145 struct tcpcb *tp = NULL; 146 int s; 147 int error = 0; 148 short ostate; 149 150 if (req == PRU_CONTROL) { 151 #ifdef INET6 152 if (sotopf(so) == PF_INET6) 153 return in6_control(so, (u_long)m, (caddr_t)nam, 154 (struct ifnet *)control, 0); 155 else 156 #endif /* INET6 */ 157 return (in_control(so, (u_long)m, (caddr_t)nam, 158 (struct ifnet *)control)); 159 } 160 if (control && control->m_len) { 161 m_freem(control); 162 if (m) 163 m_freem(m); 164 return (EINVAL); 165 } 166 167 s = splsoftnet(); 168 inp = sotoinpcb(so); 169 /* 170 * When a TCP is attached to a socket, then there will be 171 * a (struct inpcb) pointed at by the socket, and this 172 * structure will point at a subsidiary (struct tcpcb). 173 */ 174 if (inp == 0 && req != PRU_ATTACH) { 175 error = so->so_error; 176 if (error == 0) 177 error = EINVAL; 178 splx(s); 179 /* 180 * The following corrects an mbuf leak under rare 181 * circumstances 182 */ 183 if (m && (req == PRU_SEND || req == PRU_SENDOOB)) 184 m_freem(m); 185 return (error); 186 } 187 if (inp) { 188 tp = intotcpcb(inp); 189 /* WHAT IF TP IS 0? */ 190 #ifdef KPROF 191 tcp_acounts[tp->t_state][req]++; 192 #endif 193 ostate = tp->t_state; 194 } else 195 ostate = 0; 196 switch (req) { 197 198 /* 199 * TCP attaches to socket via PRU_ATTACH, reserving space, 200 * and an internet control block. 201 */ 202 case PRU_ATTACH: 203 if (inp) { 204 error = EISCONN; 205 break; 206 } 207 error = tcp_attach(so); 208 if (error) 209 break; 210 if ((so->so_options & SO_LINGER) && so->so_linger == 0) 211 so->so_linger = TCP_LINGERTIME; 212 tp = sototcpcb(so); 213 break; 214 215 /* 216 * PRU_DETACH detaches the TCP protocol from the socket. 217 * If the protocol state is non-embryonic, then can't 218 * do this directly: have to initiate a PRU_DISCONNECT, 219 * which may finish later; embryonic TCB's can just 220 * be discarded here. 221 */ 222 case PRU_DETACH: 223 tp = tcp_disconnect(tp); 224 break; 225 226 /* 227 * Give the socket an address. 228 */ 229 case PRU_BIND: 230 #ifdef INET6 231 if (inp->inp_flags & INP_IPV6) 232 error = in6_pcbbind(inp, nam, p); 233 else 234 #endif 235 error = in_pcbbind(inp, nam, p); 236 if (error) 237 break; 238 break; 239 240 /* 241 * Prepare to accept connections. 242 */ 243 case PRU_LISTEN: 244 if (inp->inp_lport == 0) { 245 #ifdef INET6 246 if (inp->inp_flags & INP_IPV6) 247 error = in6_pcbbind(inp, NULL, p); 248 else 249 #endif 250 error = in_pcbbind(inp, NULL, p); 251 } 252 /* If the in_pcbbind() above is called, the tp->pf 253 should still be whatever it was before. */ 254 if (error == 0) 255 tp->t_state = TCPS_LISTEN; 256 break; 257 258 /* 259 * Initiate connection to peer. 260 * Create a template for use in transmissions on this connection. 261 * Enter SYN_SENT state, and mark socket as connecting. 262 * Start keep-alive timer, and seed output sequence space. 263 * Send initial segment on connection. 264 */ 265 case PRU_CONNECT: 266 sin = mtod(nam, struct sockaddr_in *); 267 268 #ifdef INET6 269 if (sin->sin_family == AF_INET6) { 270 struct in6_addr *in6_addr = &mtod(nam, 271 struct sockaddr_in6 *)->sin6_addr; 272 273 if (IN6_IS_ADDR_UNSPECIFIED(in6_addr) || 274 IN6_IS_ADDR_MULTICAST(in6_addr) || 275 IN6_IS_ADDR_V4MAPPED(in6_addr)) { 276 error = EINVAL; 277 break; 278 } 279 280 if (inp->inp_lport == 0) { 281 error = in6_pcbbind(inp, NULL, p); 282 if (error) 283 break; 284 } 285 error = in6_pcbconnect(inp, nam); 286 } else if (sin->sin_family == AF_INET) 287 #endif /* INET6 */ 288 { 289 if ((sin->sin_addr.s_addr == INADDR_ANY) || 290 IN_MULTICAST(sin->sin_addr.s_addr) || 291 in_broadcast(sin->sin_addr, NULL, 292 inp->inp_rtableid)) { 293 error = EINVAL; 294 break; 295 } 296 297 if (inp->inp_lport == 0) { 298 error = in_pcbbind(inp, NULL, p); 299 if (error) 300 break; 301 } 302 error = in_pcbconnect(inp, nam); 303 } 304 305 if (error) 306 break; 307 308 tp->t_template = tcp_template(tp); 309 if (tp->t_template == 0) { 310 in_pcbdisconnect(inp); 311 error = ENOBUFS; 312 break; 313 } 314 315 so->so_state |= SS_CONNECTOUT; 316 317 /* Compute window scaling to request. */ 318 tcp_rscale(tp, sb_max); 319 320 soisconnecting(so); 321 tcpstat.tcps_connattempt++; 322 tp->t_state = TCPS_SYN_SENT; 323 TCP_TIMER_ARM(tp, TCPT_KEEP, tcptv_keep_init); 324 tcp_set_iss_tsm(tp); 325 tcp_sendseqinit(tp); 326 #if defined(TCP_SACK) 327 tp->snd_last = tp->snd_una; 328 #endif 329 #if defined(TCP_SACK) && defined(TCP_FACK) 330 tp->snd_fack = tp->snd_una; 331 tp->retran_data = 0; 332 tp->snd_awnd = 0; 333 #endif 334 error = tcp_output(tp); 335 break; 336 337 /* 338 * Create a TCP connection between two sockets. 339 */ 340 case PRU_CONNECT2: 341 error = EOPNOTSUPP; 342 break; 343 344 /* 345 * Initiate disconnect from peer. 346 * If connection never passed embryonic stage, just drop; 347 * else if don't need to let data drain, then can just drop anyways, 348 * else have to begin TCP shutdown process: mark socket disconnecting, 349 * drain unread data, state switch to reflect user close, and 350 * send segment (e.g. FIN) to peer. Socket will be really disconnected 351 * when peer sends FIN and acks ours. 352 * 353 * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB. 354 */ 355 case PRU_DISCONNECT: 356 tp = tcp_disconnect(tp); 357 break; 358 359 /* 360 * Accept a connection. Essentially all the work is 361 * done at higher levels; just return the address 362 * of the peer, storing through addr. 363 */ 364 case PRU_ACCEPT: 365 #ifdef INET6 366 if (inp->inp_flags & INP_IPV6) 367 in6_setpeeraddr(inp, nam); 368 else 369 #endif 370 in_setpeeraddr(inp, nam); 371 break; 372 373 /* 374 * Mark the connection as being incapable of further output. 375 */ 376 case PRU_SHUTDOWN: 377 if (so->so_state & SS_CANTSENDMORE) 378 break; 379 socantsendmore(so); 380 tp = tcp_usrclosed(tp); 381 if (tp) 382 error = tcp_output(tp); 383 break; 384 385 /* 386 * After a receive, possibly send window update to peer. 387 */ 388 case PRU_RCVD: 389 /* 390 * soreceive() calls this function when a user receives 391 * ancillary data on a listening socket. We don't call 392 * tcp_output in such a case, since there is no header 393 * template for a listening socket and hence the kernel 394 * will panic. 395 */ 396 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) != 0) 397 (void) tcp_output(tp); 398 break; 399 400 /* 401 * Do a send by putting data in output queue and updating urgent 402 * marker if URG set. Possibly send more data. 403 */ 404 case PRU_SEND: 405 sbappendstream(&so->so_snd, m); 406 error = tcp_output(tp); 407 break; 408 409 /* 410 * Abort the TCP. 411 */ 412 case PRU_ABORT: 413 tp = tcp_drop(tp, ECONNABORTED); 414 break; 415 416 case PRU_SENSE: 417 ((struct stat *) m)->st_blksize = so->so_snd.sb_hiwat; 418 splx(s); 419 return (0); 420 421 case PRU_RCVOOB: 422 if ((so->so_oobmark == 0 && 423 (so->so_state & SS_RCVATMARK) == 0) || 424 so->so_options & SO_OOBINLINE || 425 tp->t_oobflags & TCPOOB_HADDATA) { 426 error = EINVAL; 427 break; 428 } 429 if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) { 430 error = EWOULDBLOCK; 431 break; 432 } 433 m->m_len = 1; 434 *mtod(m, caddr_t) = tp->t_iobc; 435 if (((long)nam & MSG_PEEK) == 0) 436 tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA); 437 break; 438 439 case PRU_SENDOOB: 440 if (sbspace(&so->so_snd) < -512) { 441 m_freem(m); 442 error = ENOBUFS; 443 break; 444 } 445 /* 446 * According to RFC961 (Assigned Protocols), 447 * the urgent pointer points to the last octet 448 * of urgent data. We continue, however, 449 * to consider it to indicate the first octet 450 * of data past the urgent section. 451 * Otherwise, snd_up should be one lower. 452 */ 453 sbappendstream(&so->so_snd, m); 454 tp->snd_up = tp->snd_una + so->so_snd.sb_cc; 455 tp->t_force = 1; 456 error = tcp_output(tp); 457 tp->t_force = 0; 458 break; 459 460 case PRU_SOCKADDR: 461 #ifdef INET6 462 if (inp->inp_flags & INP_IPV6) 463 in6_setsockaddr(inp, nam); 464 else 465 #endif 466 in_setsockaddr(inp, nam); 467 break; 468 469 case PRU_PEERADDR: 470 #ifdef INET6 471 if (inp->inp_flags & INP_IPV6) 472 in6_setpeeraddr(inp, nam); 473 else 474 #endif 475 in_setpeeraddr(inp, nam); 476 break; 477 478 default: 479 panic("tcp_usrreq"); 480 } 481 if (tp && (so->so_options & SO_DEBUG)) 482 tcp_trace(TA_USER, ostate, tp, (caddr_t)0, req, 0); 483 splx(s); 484 return (error); 485 } 486 487 int 488 tcp_ctloutput(op, so, level, optname, mp) 489 int op; 490 struct socket *so; 491 int level, optname; 492 struct mbuf **mp; 493 { 494 int error = 0, s; 495 struct inpcb *inp; 496 struct tcpcb *tp; 497 struct mbuf *m; 498 int i; 499 500 s = splsoftnet(); 501 inp = sotoinpcb(so); 502 if (inp == NULL) { 503 splx(s); 504 if (op == PRCO_SETOPT && *mp) 505 (void) m_free(*mp); 506 return (ECONNRESET); 507 } 508 #ifdef INET6 509 tp = intotcpcb(inp); 510 #endif /* INET6 */ 511 if (level != IPPROTO_TCP) { 512 switch (so->so_proto->pr_domain->dom_family) { 513 #ifdef INET6 514 case PF_INET6: 515 error = ip6_ctloutput(op, so, level, optname, mp); 516 break; 517 #endif /* INET6 */ 518 case PF_INET: 519 error = ip_ctloutput(op, so, level, optname, mp); 520 break; 521 default: 522 error = EAFNOSUPPORT; /*?*/ 523 break; 524 } 525 splx(s); 526 return (error); 527 } 528 #ifndef INET6 529 tp = intotcpcb(inp); 530 #endif /* !INET6 */ 531 532 switch (op) { 533 534 case PRCO_SETOPT: 535 m = *mp; 536 switch (optname) { 537 538 case TCP_NODELAY: 539 if (m == NULL || m->m_len < sizeof (int)) 540 error = EINVAL; 541 else if (*mtod(m, int *)) 542 tp->t_flags |= TF_NODELAY; 543 else 544 tp->t_flags &= ~TF_NODELAY; 545 break; 546 547 case TCP_MAXSEG: 548 if (m == NULL || m->m_len < sizeof (int)) { 549 error = EINVAL; 550 break; 551 } 552 553 i = *mtod(m, int *); 554 if (i > 0 && i <= tp->t_maxseg) 555 tp->t_maxseg = i; 556 else 557 error = EINVAL; 558 break; 559 560 #ifdef TCP_SACK 561 case TCP_SACK_ENABLE: 562 if (m == NULL || m->m_len < sizeof (int)) { 563 error = EINVAL; 564 break; 565 } 566 567 if (TCPS_HAVEESTABLISHED(tp->t_state)) { 568 error = EPERM; 569 break; 570 } 571 572 if (tp->t_flags & TF_SIGNATURE) { 573 error = EPERM; 574 break; 575 } 576 577 if (*mtod(m, int *)) 578 tp->sack_enable = 1; 579 else 580 tp->sack_enable = 0; 581 break; 582 #endif 583 #ifdef TCP_SIGNATURE 584 case TCP_MD5SIG: 585 if (m == NULL || m->m_len < sizeof (int)) { 586 error = EINVAL; 587 break; 588 } 589 590 if (TCPS_HAVEESTABLISHED(tp->t_state)) { 591 error = EPERM; 592 break; 593 } 594 595 if (*mtod(m, int *)) { 596 tp->t_flags |= TF_SIGNATURE; 597 #ifdef TCP_SACK 598 tp->sack_enable = 0; 599 #endif /* TCP_SACK */ 600 } else 601 tp->t_flags &= ~TF_SIGNATURE; 602 break; 603 #endif /* TCP_SIGNATURE */ 604 default: 605 error = ENOPROTOOPT; 606 break; 607 } 608 if (m) 609 (void) m_free(m); 610 break; 611 612 case PRCO_GETOPT: 613 *mp = m = m_get(M_WAIT, MT_SOOPTS); 614 m->m_len = sizeof(int); 615 616 switch (optname) { 617 case TCP_NODELAY: 618 *mtod(m, int *) = tp->t_flags & TF_NODELAY; 619 break; 620 case TCP_MAXSEG: 621 *mtod(m, int *) = tp->t_maxseg; 622 break; 623 #ifdef TCP_SACK 624 case TCP_SACK_ENABLE: 625 *mtod(m, int *) = tp->sack_enable; 626 break; 627 #endif 628 #ifdef TCP_SIGNATURE 629 case TCP_MD5SIG: 630 *mtod(m, int *) = tp->t_flags & TF_SIGNATURE; 631 break; 632 #endif 633 default: 634 error = ENOPROTOOPT; 635 break; 636 } 637 break; 638 } 639 splx(s); 640 return (error); 641 } 642 643 /* 644 * Attach TCP protocol to socket, allocating 645 * internet protocol control block, tcp control block, 646 * bufer space, and entering LISTEN state if to accept connections. 647 */ 648 int 649 tcp_attach(so) 650 struct socket *so; 651 { 652 struct tcpcb *tp; 653 struct inpcb *inp; 654 int error; 655 656 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0 || 657 sbcheckreserve(so->so_snd.sb_wat, tcp_sendspace) || 658 sbcheckreserve(so->so_rcv.sb_wat, tcp_recvspace)) { 659 error = soreserve(so, tcp_sendspace, tcp_recvspace); 660 if (error) 661 return (error); 662 } 663 664 error = in_pcballoc(so, &tcbtable); 665 if (error) 666 return (error); 667 inp = sotoinpcb(so); 668 tp = tcp_newtcpcb(inp); 669 if (tp == NULL) { 670 int nofd = so->so_state & SS_NOFDREF; /* XXX */ 671 672 so->so_state &= ~SS_NOFDREF; /* don't free the socket yet */ 673 in_pcbdetach(inp); 674 so->so_state |= nofd; 675 return (ENOBUFS); 676 } 677 tp->t_state = TCPS_CLOSED; 678 #ifdef INET6 679 /* we disallow IPv4 mapped address completely. */ 680 if (inp->inp_flags & INP_IPV6) 681 tp->pf = PF_INET6; 682 else 683 tp->pf = PF_INET; 684 #else 685 tp->pf = PF_INET; 686 #endif 687 return (0); 688 } 689 690 /* 691 * Initiate (or continue) disconnect. 692 * If embryonic state, just send reset (once). 693 * If in ``let data drain'' option and linger null, just drop. 694 * Otherwise (hard), mark socket disconnecting and drop 695 * current input data; switch states based on user close, and 696 * send segment to peer (with FIN). 697 */ 698 struct tcpcb * 699 tcp_disconnect(tp) 700 struct tcpcb *tp; 701 { 702 struct socket *so = tp->t_inpcb->inp_socket; 703 704 if (TCPS_HAVEESTABLISHED(tp->t_state) == 0) 705 tp = tcp_close(tp); 706 else if ((so->so_options & SO_LINGER) && so->so_linger == 0) 707 tp = tcp_drop(tp, 0); 708 else { 709 soisdisconnecting(so); 710 sbflush(&so->so_rcv); 711 tp = tcp_usrclosed(tp); 712 if (tp) 713 (void) tcp_output(tp); 714 } 715 return (tp); 716 } 717 718 /* 719 * User issued close, and wish to trail through shutdown states: 720 * if never received SYN, just forget it. If got a SYN from peer, 721 * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN. 722 * If already got a FIN from peer, then almost done; go to LAST_ACK 723 * state. In all other cases, have already sent FIN to peer (e.g. 724 * after PRU_SHUTDOWN), and just have to play tedious game waiting 725 * for peer to send FIN or not respond to keep-alives, etc. 726 * We can let the user exit from the close as soon as the FIN is acked. 727 */ 728 struct tcpcb * 729 tcp_usrclosed(tp) 730 struct tcpcb *tp; 731 { 732 733 switch (tp->t_state) { 734 735 case TCPS_CLOSED: 736 case TCPS_LISTEN: 737 case TCPS_SYN_SENT: 738 tp->t_state = TCPS_CLOSED; 739 tp = tcp_close(tp); 740 break; 741 742 case TCPS_SYN_RECEIVED: 743 case TCPS_ESTABLISHED: 744 tp->t_state = TCPS_FIN_WAIT_1; 745 break; 746 747 case TCPS_CLOSE_WAIT: 748 tp->t_state = TCPS_LAST_ACK; 749 break; 750 } 751 if (tp && tp->t_state >= TCPS_FIN_WAIT_2) { 752 soisdisconnected(tp->t_inpcb->inp_socket); 753 /* 754 * If we are in FIN_WAIT_2, we arrived here because the 755 * application did a shutdown of the send side. Like the 756 * case of a transition from FIN_WAIT_1 to FIN_WAIT_2 after 757 * a full close, we start a timer to make sure sockets are 758 * not left in FIN_WAIT_2 forever. 759 */ 760 if (tp->t_state == TCPS_FIN_WAIT_2) 761 TCP_TIMER_ARM(tp, TCPT_2MSL, tcp_maxidle); 762 } 763 return (tp); 764 } 765 766 /* 767 * Look up a socket for ident or tcpdrop, ... 768 */ 769 int 770 tcp_ident(void *oldp, size_t *oldlenp, void *newp, size_t newlen, int dodrop) 771 { 772 int error = 0, s; 773 struct tcp_ident_mapping tir; 774 struct inpcb *inp; 775 struct tcpcb *tp = NULL; 776 struct sockaddr_in *fin, *lin; 777 #ifdef INET6 778 struct sockaddr_in6 *fin6, *lin6; 779 struct in6_addr f6, l6; 780 #endif 781 if (dodrop) { 782 if (oldp != NULL || *oldlenp != 0) 783 return (EINVAL); 784 if (newp == NULL) 785 return (EPERM); 786 if (newlen < sizeof(tir)) 787 return (ENOMEM); 788 if ((error = copyin(newp, &tir, sizeof (tir))) != 0 ) 789 return (error); 790 } else { 791 if (oldp == NULL) 792 return (EINVAL); 793 if (*oldlenp < sizeof(tir)) 794 return (ENOMEM); 795 if (newp != NULL || newlen != 0) 796 return (EINVAL); 797 if ((error = copyin(oldp, &tir, sizeof (tir))) != 0 ) 798 return (error); 799 } 800 switch (tir.faddr.ss_family) { 801 #ifdef INET6 802 case AF_INET6: 803 fin6 = (struct sockaddr_in6 *)&tir.faddr; 804 error = in6_embedscope(&f6, fin6, NULL, NULL); 805 if (error) 806 return EINVAL; /*?*/ 807 lin6 = (struct sockaddr_in6 *)&tir.laddr; 808 error = in6_embedscope(&l6, lin6, NULL, NULL); 809 if (error) 810 return EINVAL; /*?*/ 811 break; 812 #endif 813 case AF_INET: 814 fin = (struct sockaddr_in *)&tir.faddr; 815 lin = (struct sockaddr_in *)&tir.laddr; 816 break; 817 default: 818 return (EINVAL); 819 } 820 821 s = splsoftnet(); 822 switch (tir.faddr.ss_family) { 823 #ifdef INET6 824 case AF_INET6: 825 inp = in6_pcbhashlookup(&tcbtable, &f6, 826 fin6->sin6_port, &l6, lin6->sin6_port); 827 break; 828 #endif 829 case AF_INET: 830 inp = in_pcbhashlookup(&tcbtable, fin->sin_addr, 831 fin->sin_port, lin->sin_addr, lin->sin_port , tir.rdomain); 832 break; 833 } 834 835 if (dodrop) { 836 if (inp && (tp = intotcpcb(inp)) && 837 ((inp->inp_socket->so_options & SO_ACCEPTCONN) == 0)) 838 tp = tcp_drop(tp, ECONNABORTED); 839 else 840 error = ESRCH; 841 splx(s); 842 return (error); 843 } 844 845 if (inp == NULL) { 846 ++tcpstat.tcps_pcbhashmiss; 847 switch (tir.faddr.ss_family) { 848 #ifdef INET6 849 case AF_INET6: 850 inp = in6_pcblookup_listen(&tcbtable, 851 &l6, lin6->sin6_port, 0, NULL); 852 break; 853 #endif 854 case AF_INET: 855 inp = in_pcblookup_listen(&tcbtable, 856 lin->sin_addr, lin->sin_port, 0, NULL, tir.rdomain); 857 break; 858 } 859 } 860 861 if (inp != NULL && (inp->inp_socket->so_state & SS_CONNECTOUT)) { 862 tir.ruid = inp->inp_socket->so_ruid; 863 tir.euid = inp->inp_socket->so_euid; 864 } else { 865 tir.ruid = -1; 866 tir.euid = -1; 867 } 868 splx(s); 869 870 *oldlenp = sizeof (tir); 871 error = copyout((void *)&tir, oldp, sizeof (tir)); 872 return (error); 873 } 874 875 /* 876 * Sysctl for tcp variables. 877 */ 878 int 879 tcp_sysctl(name, namelen, oldp, oldlenp, newp, newlen) 880 int *name; 881 u_int namelen; 882 void *oldp; 883 size_t *oldlenp; 884 void *newp; 885 size_t newlen; 886 { 887 int error, nval; 888 889 /* All sysctl names at this level are terminal. */ 890 if (namelen != 1) 891 return (ENOTDIR); 892 893 switch (name[0]) { 894 #ifdef TCP_SACK 895 case TCPCTL_SACK: 896 return (sysctl_int(oldp, oldlenp, newp, newlen, 897 &tcp_do_sack)); 898 #endif 899 case TCPCTL_SLOWHZ: 900 return (sysctl_rdint(oldp, oldlenp, newp, PR_SLOWHZ)); 901 902 case TCPCTL_BADDYNAMIC: 903 return (sysctl_struct(oldp, oldlenp, newp, newlen, 904 baddynamicports.tcp, sizeof(baddynamicports.tcp))); 905 906 case TCPCTL_IDENT: 907 return (tcp_ident(oldp, oldlenp, newp, newlen, 0)); 908 909 case TCPCTL_DROP: 910 return (tcp_ident(oldp, oldlenp, newp, newlen, 1)); 911 912 #ifdef TCP_ECN 913 case TCPCTL_ECN: 914 return (sysctl_int(oldp, oldlenp, newp, newlen, 915 &tcp_do_ecn)); 916 #endif 917 case TCPCTL_REASS_LIMIT: 918 nval = tcp_reass_limit; 919 error = sysctl_int(oldp, oldlenp, newp, newlen, &nval); 920 if (error) 921 return (error); 922 if (nval != tcp_reass_limit) { 923 error = pool_sethardlimit(&tcpqe_pool, nval, NULL, 0); 924 if (error) 925 return (error); 926 tcp_reass_limit = nval; 927 } 928 return (0); 929 #ifdef TCP_SACK 930 case TCPCTL_SACKHOLE_LIMIT: 931 nval = tcp_sackhole_limit; 932 error = sysctl_int(oldp, oldlenp, newp, newlen, &nval); 933 if (error) 934 return (error); 935 if (nval != tcp_sackhole_limit) { 936 error = pool_sethardlimit(&sackhl_pool, nval, NULL, 0); 937 if (error) 938 return (error); 939 tcp_sackhole_limit = nval; 940 } 941 return (0); 942 #endif 943 944 case TCPCTL_STATS: 945 if (newp != NULL) 946 return (EPERM); 947 return (sysctl_struct(oldp, oldlenp, newp, newlen, 948 &tcpstat, sizeof(tcpstat))); 949 950 default: 951 if (name[0] < TCPCTL_MAXID) 952 return (sysctl_int_arr(tcpctl_vars, name, namelen, 953 oldp, oldlenp, newp, newlen)); 954 return (ENOPROTOOPT); 955 } 956 /* NOTREACHED */ 957 } 958 959 /* 960 * Scale the send buffer so that inflight data is not accounted against 961 * the limit. The buffer will scale with the congestion window, if the 962 * the receiver stops acking data the window will shrink and therefor 963 * the buffer size will shrink as well. 964 * In low memory situation try to shrink the buffer to the initial size 965 * disabling the send buffer scaling as long as the situation persists. 966 */ 967 void 968 tcp_update_sndspace(struct tcpcb *tp) 969 { 970 struct socket *so = tp->t_inpcb->inp_socket; 971 u_long nmax; 972 973 if (sbchecklowmem()) 974 /* low on memory try to get rid of some */ 975 nmax = tcp_sendspace; 976 else if (so->so_snd.sb_wat != tcp_sendspace) 977 /* user requested buffer size, auto-scaling disabled */ 978 nmax = so->so_snd.sb_wat; 979 else 980 /* automatic buffer scaling */ 981 nmax = MIN(sb_max, so->so_snd.sb_wat + tp->snd_max - 982 tp->snd_una); 983 984 /* round to MSS boundary */ 985 nmax = roundup(nmax, tp->t_maxseg); 986 987 if (nmax != so->so_snd.sb_hiwat) 988 sbreserve(&so->so_snd, nmax); 989 } 990 991 /* 992 * Scale the recv buffer by looking at how much data was transfered in 993 * on approximated RTT. If more then a big part of the recv buffer was 994 * transfered during that time we increase the buffer by a constant. 995 * In low memory situation try to shrink the buffer to the initial size. 996 */ 997 void 998 tcp_update_rcvspace(struct tcpcb *tp) 999 { 1000 struct socket *so = tp->t_inpcb->inp_socket; 1001 u_long nmax = so->so_rcv.sb_hiwat; 1002 1003 if (sbchecklowmem()) 1004 /* low on memory try to get rid of some */ 1005 nmax = tcp_recvspace; 1006 else if (so->so_rcv.sb_wat != tcp_recvspace) 1007 /* user requested buffer size, auto-scaling disabled */ 1008 nmax = so->so_rcv.sb_wat; 1009 else { 1010 /* automatic buffer scaling */ 1011 if (tp->rfbuf_cnt > so->so_rcv.sb_hiwat / 8 * 7) 1012 nmax = MIN(sb_max, so->so_rcv.sb_hiwat + 1013 tcp_autorcvbuf_inc); 1014 } 1015 1016 if (nmax == so->so_rcv.sb_hiwat) 1017 return; 1018 1019 /* round to MSS boundary */ 1020 nmax = roundup(nmax, tp->t_maxseg); 1021 sbreserve(&so->so_rcv, nmax); 1022 } 1023