1 /* 2 * Copyright (c) 2003, 2004 Jeffrey M. Hsu. All rights reserved. 3 * Copyright (c) 2003, 2004 The DragonFly Project. All rights reserved. 4 * 5 * This code is derived from software contributed to The DragonFly Project 6 * by Jeffrey M. Hsu. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of The DragonFly Project nor the names of its 17 * contributors may be used to endorse or promote products derived 18 * from this software without specific, prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 23 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 24 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 25 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 26 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 27 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 28 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 29 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 30 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 */ 33 34 /* 35 * Copyright (c) 1982, 1986, 1988, 1993 36 * The Regents of the University of California. All rights reserved. 37 * 38 * Redistribution and use in source and binary forms, with or without 39 * modification, are permitted provided that the following conditions 40 * are met: 41 * 1. Redistributions of source code must retain the above copyright 42 * notice, this list of conditions and the following disclaimer. 43 * 2. Redistributions in binary form must reproduce the above copyright 44 * notice, this list of conditions and the following disclaimer in the 45 * documentation and/or other materials provided with the distribution. 46 * 3. All advertising materials mentioning features or use of this software 47 * must display the following acknowledgement: 48 * This product includes software developed by the University of 49 * California, Berkeley and its contributors. 50 * 4. Neither the name of the University nor the names of its contributors 51 * may be used to endorse or promote products derived from this software 52 * without specific prior written permission. 53 * 54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 64 * SUCH DAMAGE. 65 * 66 * From: @(#)tcp_usrreq.c 8.2 (Berkeley) 1/3/94 67 * $FreeBSD: src/sys/netinet/tcp_usrreq.c,v 1.51.2.17 2002/10/11 11:46:44 ume Exp $ 68 * $DragonFly: src/sys/netinet/tcp_usrreq.c,v 1.51 2008/09/29 20:52:23 dillon Exp $ 69 */ 70 71 #include "opt_ipsec.h" 72 #include "opt_inet.h" 73 #include "opt_inet6.h" 74 #include "opt_tcpdebug.h" 75 76 #include <sys/param.h> 77 #include <sys/systm.h> 78 #include <sys/kernel.h> 79 #include <sys/malloc.h> 80 #include <sys/sysctl.h> 81 #include <sys/globaldata.h> 82 #include <sys/thread.h> 83 84 #include <sys/mbuf.h> 85 #ifdef INET6 86 #include <sys/domain.h> 87 #endif /* INET6 */ 88 #include <sys/socket.h> 89 #include <sys/socketvar.h> 90 #include <sys/protosw.h> 91 92 #include <sys/thread2.h> 93 #include <sys/msgport2.h> 94 #include <sys/socketvar2.h> 95 96 #include <net/if.h> 97 #include <net/netisr.h> 98 #include <net/route.h> 99 100 #include <net/netmsg2.h> 101 102 #include <netinet/in.h> 103 #include <netinet/in_systm.h> 104 #ifdef INET6 105 #include <netinet/ip6.h> 106 #endif 107 #include <netinet/in_pcb.h> 108 #ifdef INET6 109 #include <netinet6/in6_pcb.h> 110 #endif 111 #include <netinet/in_var.h> 112 #include <netinet/ip_var.h> 113 #ifdef INET6 114 #include <netinet6/ip6_var.h> 115 #include <netinet6/tcp6_var.h> 116 #endif 117 #include <netinet/tcp.h> 118 #include <netinet/tcp_fsm.h> 119 #include <netinet/tcp_seq.h> 120 #include <netinet/tcp_timer.h> 121 #include <netinet/tcp_timer2.h> 122 #include <netinet/tcp_var.h> 123 #include <netinet/tcpip.h> 124 #ifdef TCPDEBUG 125 #include <netinet/tcp_debug.h> 126 #endif 127 128 #ifdef IPSEC 129 #include <netinet6/ipsec.h> 130 #endif /*IPSEC*/ 131 132 /* 133 * TCP protocol interface to socket abstraction. 134 */ 135 extern char *tcpstates[]; /* XXX ??? */ 136 137 static int tcp_attach (struct socket *, struct pru_attach_info *); 138 static void tcp_connect (netmsg_t msg); 139 #ifdef INET6 140 static void tcp6_connect (netmsg_t msg); 141 static int tcp6_connect_oncpu(struct tcpcb *tp, int flags, 142 struct mbuf **mp, 143 struct sockaddr_in6 *sin6, 144 struct in6_addr *addr6); 145 #endif /* INET6 */ 146 static struct tcpcb * 147 tcp_disconnect (struct tcpcb *); 148 static struct tcpcb * 149 tcp_usrclosed (struct tcpcb *); 150 151 #ifdef TCPDEBUG 152 #define TCPDEBUG0 int ostate = 0 153 #define TCPDEBUG1() ostate = tp ? tp->t_state : 0 154 #define TCPDEBUG2(req) if (tp && (so->so_options & SO_DEBUG)) \ 155 tcp_trace(TA_USER, ostate, tp, 0, 0, req) 156 #else 157 #define TCPDEBUG0 158 #define TCPDEBUG1() 159 #define TCPDEBUG2(req) 160 #endif 161 162 static int tcp_lport_extension = 1; 163 164 SYSCTL_INT(_net_inet_tcp, OID_AUTO, lportext, CTLFLAG_RW, 165 &tcp_lport_extension, 0, ""); 166 167 /* 168 * TCP attaches to socket via pru_attach(), reserving space, 169 * and an internet control block. This is likely occuring on 170 * cpu0 and may have to move later when we bind/connect. 171 */ 172 static void 173 tcp_usr_attach(netmsg_t msg) 174 { 175 struct socket *so = msg->base.nm_so; 176 struct pru_attach_info *ai = msg->attach.nm_ai; 177 int error; 178 struct inpcb *inp; 179 struct tcpcb *tp = 0; 180 TCPDEBUG0; 181 182 soreference(so); 183 inp = so->so_pcb; 184 TCPDEBUG1(); 185 if (inp) { 186 error = EISCONN; 187 goto out; 188 } 189 190 error = tcp_attach(so, ai); 191 if (error) 192 goto out; 193 194 if ((so->so_options & SO_LINGER) && so->so_linger == 0) 195 so->so_linger = TCP_LINGERTIME; 196 tp = sototcpcb(so); 197 out: 198 sofree(so); /* from ref above */ 199 TCPDEBUG2(PRU_ATTACH); 200 lwkt_replymsg(&msg->lmsg, error); 201 } 202 203 /* 204 * pru_detach() detaches the TCP protocol from the socket. 205 * If the protocol state is non-embryonic, then can't 206 * do this directly: have to initiate a pru_disconnect(), 207 * which may finish later; embryonic TCB's can just 208 * be discarded here. 209 */ 210 static void 211 tcp_usr_detach(netmsg_t msg) 212 { 213 struct socket *so = msg->base.nm_so; 214 int error = 0; 215 struct inpcb *inp; 216 struct tcpcb *tp; 217 TCPDEBUG0; 218 219 inp = so->so_pcb; 220 221 /* 222 * If the inp is already detached it may have been due to an async 223 * close. Just return as if no error occured. 224 * 225 * It's possible for the tcpcb (tp) to disconnect from the inp due 226 * to tcp_drop()->tcp_close() being called. This may occur *after* 227 * the detach message has been queued so we may find a NULL tp here. 228 */ 229 if (inp) { 230 if ((tp = intotcpcb(inp)) != NULL) { 231 TCPDEBUG1(); 232 tp = tcp_disconnect(tp); 233 TCPDEBUG2(PRU_DETACH); 234 } 235 } 236 lwkt_replymsg(&msg->lmsg, error); 237 } 238 239 /* 240 * NOTE: ignore_error is non-zero for certain disconnection races 241 * which we want to silently allow, otherwise close() may return 242 * an unexpected error. 243 * 244 * NOTE: The variables (msg) and (tp) are assumed. 245 */ 246 #define COMMON_START(so, inp, ignore_error) \ 247 TCPDEBUG0; \ 248 \ 249 inp = so->so_pcb; \ 250 do { \ 251 if (inp == NULL) { \ 252 error = ignore_error ? 0 : EINVAL; \ 253 tp = NULL; \ 254 goto out; \ 255 } \ 256 tp = intotcpcb(inp); \ 257 TCPDEBUG1(); \ 258 } while(0) 259 260 #define COMMON_END(req) \ 261 out: do { \ 262 TCPDEBUG2(req); \ 263 lwkt_replymsg(&msg->lmsg, error); \ 264 return; \ 265 } while(0) 266 267 /* 268 * Give the socket an address. 269 */ 270 static void 271 tcp_usr_bind(netmsg_t msg) 272 { 273 struct socket *so = msg->bind.base.nm_so; 274 struct sockaddr *nam = msg->bind.nm_nam; 275 struct thread *td = msg->bind.nm_td; 276 int error = 0; 277 struct inpcb *inp; 278 struct tcpcb *tp; 279 struct sockaddr_in *sinp; 280 281 COMMON_START(so, inp, 0); 282 283 /* 284 * Must check for multicast addresses and disallow binding 285 * to them. 286 */ 287 sinp = (struct sockaddr_in *)nam; 288 if (sinp->sin_family == AF_INET && 289 IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) { 290 error = EAFNOSUPPORT; 291 goto out; 292 } 293 error = in_pcbbind(inp, nam, td); 294 if (error) 295 goto out; 296 COMMON_END(PRU_BIND); 297 298 } 299 300 #ifdef INET6 301 302 static void 303 tcp6_usr_bind(netmsg_t msg) 304 { 305 struct socket *so = msg->bind.base.nm_so; 306 struct sockaddr *nam = msg->bind.nm_nam; 307 struct thread *td = msg->bind.nm_td; 308 int error = 0; 309 struct inpcb *inp; 310 struct tcpcb *tp; 311 struct sockaddr_in6 *sin6p; 312 313 COMMON_START(so, inp, 0); 314 315 /* 316 * Must check for multicast addresses and disallow binding 317 * to them. 318 */ 319 sin6p = (struct sockaddr_in6 *)nam; 320 if (sin6p->sin6_family == AF_INET6 && 321 IN6_IS_ADDR_MULTICAST(&sin6p->sin6_addr)) { 322 error = EAFNOSUPPORT; 323 goto out; 324 } 325 inp->inp_vflag &= ~INP_IPV4; 326 inp->inp_vflag |= INP_IPV6; 327 if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) { 328 if (IN6_IS_ADDR_UNSPECIFIED(&sin6p->sin6_addr)) 329 inp->inp_vflag |= INP_IPV4; 330 else if (IN6_IS_ADDR_V4MAPPED(&sin6p->sin6_addr)) { 331 struct sockaddr_in sin; 332 333 in6_sin6_2_sin(&sin, sin6p); 334 inp->inp_vflag |= INP_IPV4; 335 inp->inp_vflag &= ~INP_IPV6; 336 error = in_pcbbind(inp, (struct sockaddr *)&sin, td); 337 goto out; 338 } 339 } 340 error = in6_pcbbind(inp, nam, td); 341 if (error) 342 goto out; 343 COMMON_END(PRU_BIND); 344 } 345 #endif /* INET6 */ 346 347 #ifdef SMP 348 349 struct netmsg_inswildcard { 350 struct netmsg_base base; 351 struct inpcb *nm_inp; 352 }; 353 354 static void 355 in_pcbinswildcardhash_handler(netmsg_t msg) 356 { 357 struct netmsg_inswildcard *nm = (struct netmsg_inswildcard *)msg; 358 int cpu = mycpuid, nextcpu; 359 360 in_pcbinswildcardhash_oncpu(nm->nm_inp, &tcbinfo[cpu]); 361 362 nextcpu = cpu + 1; 363 if (nextcpu < ncpus2) 364 lwkt_forwardmsg(cpu_portfn(nextcpu), &nm->base.lmsg); 365 else 366 lwkt_replymsg(&nm->base.lmsg, 0); 367 } 368 369 #endif 370 371 /* 372 * Prepare to accept connections. 373 */ 374 static void 375 tcp_usr_listen(netmsg_t msg) 376 { 377 struct socket *so = msg->listen.base.nm_so; 378 struct thread *td = msg->listen.nm_td; 379 int error = 0; 380 struct inpcb *inp; 381 struct tcpcb *tp; 382 #ifdef SMP 383 struct netmsg_inswildcard nm; 384 #endif 385 386 COMMON_START(so, inp, 0); 387 388 if (tp->t_flags & TF_LISTEN) 389 goto out; 390 391 if (inp->inp_lport == 0) { 392 error = in_pcbbind(inp, NULL, td); 393 if (error) 394 goto out; 395 } 396 397 tp->t_state = TCPS_LISTEN; 398 tp->t_flags |= TF_LISTEN; 399 tp->tt_msg = NULL; /* Catch any invalid timer usage */ 400 401 #ifdef SMP 402 if (ncpus > 1) { 403 /* 404 * We have to set the flag because we can't have other cpus 405 * messing with our inp's flags. 406 */ 407 KASSERT(!(inp->inp_flags & INP_CONNECTED), 408 ("already on connhash\n")); 409 KASSERT(!(inp->inp_flags & INP_WILDCARD), 410 ("already on wildcardhash\n")); 411 KASSERT(!(inp->inp_flags & INP_WILDCARD_MP), 412 ("already on MP wildcardhash\n")); 413 inp->inp_flags |= INP_WILDCARD_MP; 414 415 KKASSERT(so->so_port == cpu_portfn(0)); 416 KKASSERT(&curthread->td_msgport == cpu_portfn(0)); 417 KKASSERT(inp->inp_pcbinfo == &tcbinfo[0]); 418 419 netmsg_init(&nm.base, NULL, &curthread->td_msgport, 420 MSGF_PRIORITY, in_pcbinswildcardhash_handler); 421 nm.nm_inp = inp; 422 lwkt_domsg(cpu_portfn(1), &nm.base.lmsg, 0); 423 } 424 #endif 425 in_pcbinswildcardhash(inp); 426 COMMON_END(PRU_LISTEN); 427 } 428 429 #ifdef INET6 430 431 static void 432 tcp6_usr_listen(netmsg_t msg) 433 { 434 struct socket *so = msg->listen.base.nm_so; 435 struct thread *td = msg->listen.nm_td; 436 int error = 0; 437 struct inpcb *inp; 438 struct tcpcb *tp; 439 #ifdef SMP 440 struct netmsg_inswildcard nm; 441 #endif 442 443 COMMON_START(so, inp, 0); 444 445 if (tp->t_flags & TF_LISTEN) 446 goto out; 447 448 if (inp->inp_lport == 0) { 449 if (!(inp->inp_flags & IN6P_IPV6_V6ONLY)) 450 inp->inp_vflag |= INP_IPV4; 451 else 452 inp->inp_vflag &= ~INP_IPV4; 453 error = in6_pcbbind(inp, NULL, td); 454 if (error) 455 goto out; 456 } 457 458 tp->t_state = TCPS_LISTEN; 459 tp->t_flags |= TF_LISTEN; 460 tp->tt_msg = NULL; /* Catch any invalid timer usage */ 461 462 #ifdef SMP 463 if (ncpus > 1) { 464 /* 465 * We have to set the flag because we can't have other cpus 466 * messing with our inp's flags. 467 */ 468 KASSERT(!(inp->inp_flags & INP_CONNECTED), 469 ("already on connhash\n")); 470 KASSERT(!(inp->inp_flags & INP_WILDCARD), 471 ("already on wildcardhash\n")); 472 KASSERT(!(inp->inp_flags & INP_WILDCARD_MP), 473 ("already on MP wildcardhash\n")); 474 inp->inp_flags |= INP_WILDCARD_MP; 475 476 KKASSERT(so->so_port == cpu_portfn(0)); 477 KKASSERT(&curthread->td_msgport == cpu_portfn(0)); 478 KKASSERT(inp->inp_pcbinfo == &tcbinfo[0]); 479 480 netmsg_init(&nm.base, NULL, &curthread->td_msgport, 481 MSGF_PRIORITY, in_pcbinswildcardhash_handler); 482 nm.nm_inp = inp; 483 lwkt_domsg(cpu_portfn(1), &nm.base.lmsg, 0); 484 } 485 #endif 486 in_pcbinswildcardhash(inp); 487 COMMON_END(PRU_LISTEN); 488 } 489 #endif /* INET6 */ 490 491 /* 492 * Initiate connection to peer. 493 * Create a template for use in transmissions on this connection. 494 * Enter SYN_SENT state, and mark socket as connecting. 495 * Start keep-alive timer, and seed output sequence space. 496 * Send initial segment on connection. 497 */ 498 static void 499 tcp_usr_connect(netmsg_t msg) 500 { 501 struct socket *so = msg->connect.base.nm_so; 502 struct sockaddr *nam = msg->connect.nm_nam; 503 struct thread *td = msg->connect.nm_td; 504 int error = 0; 505 struct inpcb *inp; 506 struct tcpcb *tp; 507 struct sockaddr_in *sinp; 508 509 COMMON_START(so, inp, 0); 510 511 /* 512 * Must disallow TCP ``connections'' to multicast addresses. 513 */ 514 sinp = (struct sockaddr_in *)nam; 515 if (sinp->sin_family == AF_INET 516 && IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) { 517 error = EAFNOSUPPORT; 518 goto out; 519 } 520 521 if (!prison_remote_ip(td, (struct sockaddr*)sinp)) { 522 error = EAFNOSUPPORT; /* IPv6 only jail */ 523 goto out; 524 } 525 526 tcp_connect(msg); 527 /* msg is invalid now */ 528 return; 529 out: 530 if (msg->connect.nm_m) { 531 m_freem(msg->connect.nm_m); 532 msg->connect.nm_m = NULL; 533 } 534 lwkt_replymsg(&msg->lmsg, error); 535 } 536 537 #ifdef INET6 538 539 static void 540 tcp6_usr_connect(netmsg_t msg) 541 { 542 struct socket *so = msg->connect.base.nm_so; 543 struct sockaddr *nam = msg->connect.nm_nam; 544 struct thread *td = msg->connect.nm_td; 545 int error = 0; 546 struct inpcb *inp; 547 struct tcpcb *tp; 548 struct sockaddr_in6 *sin6p; 549 550 COMMON_START(so, inp, 0); 551 552 /* 553 * Must disallow TCP ``connections'' to multicast addresses. 554 */ 555 sin6p = (struct sockaddr_in6 *)nam; 556 if (sin6p->sin6_family == AF_INET6 557 && IN6_IS_ADDR_MULTICAST(&sin6p->sin6_addr)) { 558 error = EAFNOSUPPORT; 559 goto out; 560 } 561 562 if (!prison_remote_ip(td, nam)) { 563 error = EAFNOSUPPORT; /* IPv4 only jail */ 564 goto out; 565 } 566 567 if (IN6_IS_ADDR_V4MAPPED(&sin6p->sin6_addr)) { 568 struct sockaddr_in *sinp; 569 570 if ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0) { 571 error = EINVAL; 572 goto out; 573 } 574 sinp = kmalloc(sizeof(*sinp), M_LWKTMSG, M_INTWAIT); 575 in6_sin6_2_sin(sinp, sin6p); 576 inp->inp_vflag |= INP_IPV4; 577 inp->inp_vflag &= ~INP_IPV6; 578 msg->connect.nm_nam = (struct sockaddr *)sinp; 579 msg->connect.nm_reconnect |= NMSG_RECONNECT_NAMALLOC; 580 tcp_connect(msg); 581 /* msg is invalid now */ 582 return; 583 } 584 inp->inp_vflag &= ~INP_IPV4; 585 inp->inp_vflag |= INP_IPV6; 586 inp->inp_inc.inc_isipv6 = 1; 587 588 msg->connect.nm_reconnect |= NMSG_RECONNECT_FALLBACK; 589 tcp6_connect(msg); 590 /* msg is invalid now */ 591 return; 592 out: 593 if (msg->connect.nm_m) { 594 m_freem(msg->connect.nm_m); 595 msg->connect.nm_m = NULL; 596 } 597 lwkt_replymsg(&msg->lmsg, error); 598 } 599 600 #endif /* INET6 */ 601 602 /* 603 * Initiate disconnect from peer. 604 * If connection never passed embryonic stage, just drop; 605 * else if don't need to let data drain, then can just drop anyways, 606 * else have to begin TCP shutdown process: mark socket disconnecting, 607 * drain unread data, state switch to reflect user close, and 608 * send segment (e.g. FIN) to peer. Socket will be really disconnected 609 * when peer sends FIN and acks ours. 610 * 611 * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB. 612 */ 613 static void 614 tcp_usr_disconnect(netmsg_t msg) 615 { 616 struct socket *so = msg->disconnect.base.nm_so; 617 int error = 0; 618 struct inpcb *inp; 619 struct tcpcb *tp; 620 621 COMMON_START(so, inp, 1); 622 tp = tcp_disconnect(tp); 623 COMMON_END(PRU_DISCONNECT); 624 } 625 626 /* 627 * Accept a connection. Essentially all the work is 628 * done at higher levels; just return the address 629 * of the peer, storing through addr. 630 */ 631 static void 632 tcp_usr_accept(netmsg_t msg) 633 { 634 struct socket *so = msg->accept.base.nm_so; 635 struct sockaddr **nam = msg->accept.nm_nam; 636 int error = 0; 637 struct inpcb *inp; 638 struct tcpcb *tp = NULL; 639 TCPDEBUG0; 640 641 inp = so->so_pcb; 642 if (so->so_state & SS_ISDISCONNECTED) { 643 error = ECONNABORTED; 644 goto out; 645 } 646 if (inp == 0) { 647 error = EINVAL; 648 goto out; 649 } 650 651 tp = intotcpcb(inp); 652 TCPDEBUG1(); 653 in_setpeeraddr(so, nam); 654 COMMON_END(PRU_ACCEPT); 655 } 656 657 #ifdef INET6 658 static void 659 tcp6_usr_accept(netmsg_t msg) 660 { 661 struct socket *so = msg->accept.base.nm_so; 662 struct sockaddr **nam = msg->accept.nm_nam; 663 int error = 0; 664 struct inpcb *inp; 665 struct tcpcb *tp = NULL; 666 TCPDEBUG0; 667 668 inp = so->so_pcb; 669 670 if (so->so_state & SS_ISDISCONNECTED) { 671 error = ECONNABORTED; 672 goto out; 673 } 674 if (inp == 0) { 675 error = EINVAL; 676 goto out; 677 } 678 tp = intotcpcb(inp); 679 TCPDEBUG1(); 680 in6_mapped_peeraddr(so, nam); 681 COMMON_END(PRU_ACCEPT); 682 } 683 #endif /* INET6 */ 684 /* 685 * Mark the connection as being incapable of further output. 686 */ 687 static void 688 tcp_usr_shutdown(netmsg_t msg) 689 { 690 struct socket *so = msg->shutdown.base.nm_so; 691 int error = 0; 692 struct inpcb *inp; 693 struct tcpcb *tp; 694 695 COMMON_START(so, inp, 0); 696 socantsendmore(so); 697 tp = tcp_usrclosed(tp); 698 if (tp) 699 error = tcp_output(tp); 700 COMMON_END(PRU_SHUTDOWN); 701 } 702 703 /* 704 * After a receive, possibly send window update to peer. 705 */ 706 static void 707 tcp_usr_rcvd(netmsg_t msg) 708 { 709 struct socket *so = msg->rcvd.base.nm_so; 710 int error = 0; 711 struct inpcb *inp; 712 struct tcpcb *tp; 713 714 COMMON_START(so, inp, 0); 715 tcp_output(tp); 716 COMMON_END(PRU_RCVD); 717 } 718 719 /* 720 * Do a send by putting data in output queue and updating urgent 721 * marker if URG set. Possibly send more data. Unlike the other 722 * pru_*() routines, the mbuf chains are our responsibility. We 723 * must either enqueue them or free them. The other pru_* routines 724 * generally are caller-frees. 725 */ 726 static void 727 tcp_usr_send(netmsg_t msg) 728 { 729 struct socket *so = msg->send.base.nm_so; 730 int flags = msg->send.nm_flags; 731 struct mbuf *m = msg->send.nm_m; 732 struct sockaddr *nam = msg->send.nm_addr; 733 struct mbuf *control = msg->send.nm_control; 734 struct thread *td = msg->send.nm_td; 735 int error = 0; 736 struct inpcb *inp; 737 struct tcpcb *tp; 738 #ifdef INET6 739 int isipv6; 740 #endif 741 TCPDEBUG0; 742 743 inp = so->so_pcb; 744 745 if (inp == NULL) { 746 /* 747 * OOPS! we lost a race, the TCP session got reset after 748 * we checked SS_CANTSENDMORE, eg: while doing uiomove or a 749 * network interrupt in the non-critical section of sosend(). 750 */ 751 m_freem(m); 752 if (control) 753 m_freem(control); 754 error = ECONNRESET; /* XXX EPIPE? */ 755 tp = NULL; 756 TCPDEBUG1(); 757 goto out; 758 } 759 #ifdef INET6 760 isipv6 = nam && nam->sa_family == AF_INET6; 761 #endif /* INET6 */ 762 tp = intotcpcb(inp); 763 TCPDEBUG1(); 764 if (control) { 765 /* TCP doesn't do control messages (rights, creds, etc) */ 766 if (control->m_len) { 767 m_freem(control); 768 m_freem(m); 769 error = EINVAL; 770 goto out; 771 } 772 m_freem(control); /* empty control, just free it */ 773 } 774 775 /* 776 * Don't let too much OOB data build up 777 */ 778 if (flags & PRUS_OOB) { 779 if (ssb_space(&so->so_snd) < -512) { 780 m_freem(m); 781 error = ENOBUFS; 782 goto out; 783 } 784 } 785 786 /* 787 * Do implied connect if not yet connected. Any data sent 788 * with the connect is handled by tcp_connect() and friends. 789 * 790 * NOTE! PROTOCOL THREAD MAY BE CHANGED BY THE CONNECT! 791 */ 792 if (nam && tp->t_state < TCPS_SYN_SENT) { 793 kprintf("implied fallback\n"); 794 msg->connect.nm_nam = nam; 795 msg->connect.nm_td = td; 796 msg->connect.nm_m = m; 797 msg->connect.nm_flags = flags; 798 msg->connect.nm_reconnect = NMSG_RECONNECT_FALLBACK; 799 #ifdef INET6 800 if (isipv6) 801 tcp6_connect(msg); 802 else 803 #endif /* INET6 */ 804 tcp_connect(msg); 805 /* msg invalid now */ 806 return; 807 } 808 809 /* 810 * Pump the data into the socket. 811 */ 812 if (m) 813 ssb_appendstream(&so->so_snd, m); 814 if (flags & PRUS_OOB) { 815 /* 816 * According to RFC961 (Assigned Protocols), 817 * the urgent pointer points to the last octet 818 * of urgent data. We continue, however, 819 * to consider it to indicate the first octet 820 * of data past the urgent section. 821 * Otherwise, snd_up should be one lower. 822 */ 823 tp->snd_up = tp->snd_una + so->so_snd.ssb_cc; 824 tp->t_flags |= TF_FORCE; 825 error = tcp_output(tp); 826 tp->t_flags &= ~TF_FORCE; 827 } else { 828 if (flags & PRUS_EOF) { 829 /* 830 * Close the send side of the connection after 831 * the data is sent. 832 */ 833 socantsendmore(so); 834 tp = tcp_usrclosed(tp); 835 } 836 if (tp != NULL) { 837 if (flags & PRUS_MORETOCOME) 838 tp->t_flags |= TF_MORETOCOME; 839 error = tcp_output(tp); 840 if (flags & PRUS_MORETOCOME) 841 tp->t_flags &= ~TF_MORETOCOME; 842 } 843 } 844 COMMON_END((flags & PRUS_OOB) ? PRU_SENDOOB : 845 ((flags & PRUS_EOF) ? PRU_SEND_EOF : PRU_SEND)); 846 } 847 848 /* 849 * NOTE: (so) is referenced from soabort*() and netmsg_pru_abort() 850 * will sofree() it when we return. 851 */ 852 static void 853 tcp_usr_abort(netmsg_t msg) 854 { 855 struct socket *so = msg->abort.base.nm_so; 856 int error = 0; 857 struct inpcb *inp; 858 struct tcpcb *tp; 859 860 COMMON_START(so, inp, 1); 861 tp = tcp_drop(tp, ECONNABORTED); 862 COMMON_END(PRU_ABORT); 863 } 864 865 /* 866 * Receive out-of-band data. 867 */ 868 static void 869 tcp_usr_rcvoob(netmsg_t msg) 870 { 871 struct socket *so = msg->rcvoob.base.nm_so; 872 struct mbuf *m = msg->rcvoob.nm_m; 873 int flags = msg->rcvoob.nm_flags; 874 int error = 0; 875 struct inpcb *inp; 876 struct tcpcb *tp; 877 878 COMMON_START(so, inp, 0); 879 if ((so->so_oobmark == 0 && 880 (so->so_state & SS_RCVATMARK) == 0) || 881 so->so_options & SO_OOBINLINE || 882 tp->t_oobflags & TCPOOB_HADDATA) { 883 error = EINVAL; 884 goto out; 885 } 886 if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) { 887 error = EWOULDBLOCK; 888 goto out; 889 } 890 m->m_len = 1; 891 *mtod(m, caddr_t) = tp->t_iobc; 892 if ((flags & MSG_PEEK) == 0) 893 tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA); 894 COMMON_END(PRU_RCVOOB); 895 } 896 897 /* xxx - should be const */ 898 struct pr_usrreqs tcp_usrreqs = { 899 .pru_abort = tcp_usr_abort, 900 .pru_accept = tcp_usr_accept, 901 .pru_attach = tcp_usr_attach, 902 .pru_bind = tcp_usr_bind, 903 .pru_connect = tcp_usr_connect, 904 .pru_connect2 = pr_generic_notsupp, 905 .pru_control = in_control_dispatch, 906 .pru_detach = tcp_usr_detach, 907 .pru_disconnect = tcp_usr_disconnect, 908 .pru_listen = tcp_usr_listen, 909 .pru_peeraddr = in_setpeeraddr_dispatch, 910 .pru_rcvd = tcp_usr_rcvd, 911 .pru_rcvoob = tcp_usr_rcvoob, 912 .pru_send = tcp_usr_send, 913 .pru_sense = pru_sense_null, 914 .pru_shutdown = tcp_usr_shutdown, 915 .pru_sockaddr = in_setsockaddr_dispatch, 916 .pru_sosend = sosend, 917 .pru_soreceive = soreceive 918 }; 919 920 #ifdef INET6 921 struct pr_usrreqs tcp6_usrreqs = { 922 .pru_abort = tcp_usr_abort, 923 .pru_accept = tcp6_usr_accept, 924 .pru_attach = tcp_usr_attach, 925 .pru_bind = tcp6_usr_bind, 926 .pru_connect = tcp6_usr_connect, 927 .pru_connect2 = pr_generic_notsupp, 928 .pru_control = in6_control_dispatch, 929 .pru_detach = tcp_usr_detach, 930 .pru_disconnect = tcp_usr_disconnect, 931 .pru_listen = tcp6_usr_listen, 932 .pru_peeraddr = in6_mapped_peeraddr_dispatch, 933 .pru_rcvd = tcp_usr_rcvd, 934 .pru_rcvoob = tcp_usr_rcvoob, 935 .pru_send = tcp_usr_send, 936 .pru_sense = pru_sense_null, 937 .pru_shutdown = tcp_usr_shutdown, 938 .pru_sockaddr = in6_mapped_sockaddr_dispatch, 939 .pru_sosend = sosend, 940 .pru_soreceive = soreceive 941 }; 942 #endif /* INET6 */ 943 944 static int 945 tcp_connect_oncpu(struct tcpcb *tp, int flags, struct mbuf *m, 946 struct sockaddr_in *sin, struct sockaddr_in *if_sin) 947 { 948 struct inpcb *inp = tp->t_inpcb, *oinp; 949 struct socket *so = inp->inp_socket; 950 struct route *ro = &inp->inp_route; 951 952 oinp = in_pcblookup_hash(&tcbinfo[mycpu->gd_cpuid], 953 sin->sin_addr, sin->sin_port, 954 (inp->inp_laddr.s_addr != INADDR_ANY ? 955 inp->inp_laddr : if_sin->sin_addr), 956 inp->inp_lport, 0, NULL); 957 if (oinp != NULL) { 958 m_freem(m); 959 return (EADDRINUSE); 960 } 961 if (inp->inp_laddr.s_addr == INADDR_ANY) 962 inp->inp_laddr = if_sin->sin_addr; 963 inp->inp_faddr = sin->sin_addr; 964 inp->inp_fport = sin->sin_port; 965 inp->inp_cpcbinfo = &tcbinfo[mycpu->gd_cpuid]; 966 in_pcbinsconnhash(inp); 967 968 /* 969 * We are now on the inpcb's owner CPU, if the cached route was 970 * freed because the rtentry's owner CPU is not the current CPU 971 * (e.g. in tcp_connect()), then we try to reallocate it here with 972 * the hope that a rtentry may be cloned from a RTF_PRCLONING 973 * rtentry. 974 */ 975 if (!(inp->inp_socket->so_options & SO_DONTROUTE) && /*XXX*/ 976 ro->ro_rt == NULL) { 977 bzero(&ro->ro_dst, sizeof(struct sockaddr_in)); 978 ro->ro_dst.sa_family = AF_INET; 979 ro->ro_dst.sa_len = sizeof(struct sockaddr_in); 980 ((struct sockaddr_in *)&ro->ro_dst)->sin_addr = 981 sin->sin_addr; 982 rtalloc(ro); 983 } 984 985 /* 986 * Now that no more errors can occur, change the protocol processing 987 * port to the current thread (which is the correct thread). 988 * 989 * Create TCP timer message now; we are on the tcpcb's owner 990 * CPU/thread. 991 */ 992 tcp_create_timermsg(tp, &curthread->td_msgport); 993 994 /* 995 * Compute window scaling to request. Use a larger scaling then 996 * needed for the initial receive buffer in case the receive buffer 997 * gets expanded. 998 */ 999 if (tp->request_r_scale < TCP_MIN_WINSHIFT) 1000 tp->request_r_scale = TCP_MIN_WINSHIFT; 1001 while (tp->request_r_scale < TCP_MAX_WINSHIFT && 1002 (TCP_MAXWIN << tp->request_r_scale) < so->so_rcv.ssb_hiwat 1003 ) { 1004 tp->request_r_scale++; 1005 } 1006 1007 soisconnecting(so); 1008 tcpstat.tcps_connattempt++; 1009 tp->t_state = TCPS_SYN_SENT; 1010 tcp_callout_reset(tp, tp->tt_keep, tcp_keepinit, tcp_timer_keep); 1011 tp->iss = tcp_new_isn(tp); 1012 tcp_sendseqinit(tp); 1013 if (m) { 1014 ssb_appendstream(&so->so_snd, m); 1015 m = NULL; 1016 if (flags & PRUS_OOB) 1017 tp->snd_up = tp->snd_una + so->so_snd.ssb_cc; 1018 } 1019 1020 /* 1021 * Close the send side of the connection after 1022 * the data is sent if flagged. 1023 */ 1024 if ((flags & (PRUS_OOB|PRUS_EOF)) == PRUS_EOF) { 1025 socantsendmore(so); 1026 tp = tcp_usrclosed(tp); 1027 } 1028 return (tcp_output(tp)); 1029 } 1030 1031 /* 1032 * Common subroutine to open a TCP connection to remote host specified 1033 * by struct sockaddr_in in mbuf *nam. Call in_pcbbind to assign a local 1034 * port number if needed. Call in_pcbladdr to do the routing and to choose 1035 * a local host address (interface). 1036 * Initialize connection parameters and enter SYN-SENT state. 1037 */ 1038 static void 1039 tcp_connect(netmsg_t msg) 1040 { 1041 struct socket *so = msg->connect.base.nm_so; 1042 struct sockaddr *nam = msg->connect.nm_nam; 1043 struct thread *td = msg->connect.nm_td; 1044 struct sockaddr_in *sin = (struct sockaddr_in *)nam; 1045 struct sockaddr_in *if_sin; 1046 struct inpcb *inp; 1047 struct tcpcb *tp; 1048 int error, calc_laddr = 1; 1049 #ifdef SMP 1050 lwkt_port_t port; 1051 #endif 1052 1053 COMMON_START(so, inp, 0); 1054 1055 /* 1056 * Reconnect our pcb if we have to 1057 */ 1058 if (msg->connect.nm_reconnect & NMSG_RECONNECT_RECONNECT) { 1059 msg->connect.nm_reconnect &= ~NMSG_RECONNECT_RECONNECT; 1060 in_pcblink(so->so_pcb, &tcbinfo[mycpu->gd_cpuid]); 1061 } 1062 1063 /* 1064 * Bind if we have to 1065 */ 1066 if (inp->inp_lport == 0) { 1067 if (tcp_lport_extension) { 1068 KKASSERT(inp->inp_laddr.s_addr == INADDR_ANY); 1069 1070 error = in_pcbladdr(inp, nam, &if_sin, td); 1071 if (error) 1072 goto out; 1073 inp->inp_laddr.s_addr = if_sin->sin_addr.s_addr; 1074 1075 error = in_pcbconn_bind(inp, nam, td); 1076 if (error) 1077 goto out; 1078 1079 calc_laddr = 0; 1080 } else { 1081 error = in_pcbbind(inp, NULL, td); 1082 if (error) 1083 goto out; 1084 } 1085 } 1086 1087 if (calc_laddr) { 1088 /* 1089 * Calculate the correct protocol processing thread. The 1090 * connect operation must run there. Set the forwarding 1091 * port before we forward the message or it will get bounced 1092 * right back to us. 1093 */ 1094 error = in_pcbladdr(inp, nam, &if_sin, td); 1095 if (error) 1096 goto out; 1097 } 1098 KKASSERT(inp->inp_socket == so); 1099 1100 #ifdef SMP 1101 port = tcp_addrport(sin->sin_addr.s_addr, sin->sin_port, 1102 (inp->inp_laddr.s_addr ? 1103 inp->inp_laddr.s_addr : if_sin->sin_addr.s_addr), 1104 inp->inp_lport); 1105 1106 if (port != &curthread->td_msgport) { 1107 struct route *ro = &inp->inp_route; 1108 1109 /* 1110 * in_pcbladdr() may have allocated a route entry for us 1111 * on the current CPU, but we need a route entry on the 1112 * inpcb's owner CPU, so free it here. 1113 */ 1114 if (ro->ro_rt != NULL) 1115 RTFREE(ro->ro_rt); 1116 bzero(ro, sizeof(*ro)); 1117 1118 /* 1119 * We are moving the protocol processing port the socket 1120 * is on, we have to unlink here and re-link on the 1121 * target cpu. 1122 */ 1123 in_pcbunlink(so->so_pcb, &tcbinfo[mycpu->gd_cpuid]); 1124 sosetport(so, port); 1125 msg->connect.nm_reconnect |= NMSG_RECONNECT_RECONNECT; 1126 msg->connect.base.nm_dispatch = tcp_connect; 1127 1128 lwkt_forwardmsg(port, &msg->connect.base.lmsg); 1129 /* msg invalid now */ 1130 return; 1131 } 1132 #else 1133 KKASSERT(so->so_port == &curthread->td_msgport); 1134 #endif 1135 error = tcp_connect_oncpu(tp, msg->connect.nm_flags, 1136 msg->connect.nm_m, sin, if_sin); 1137 msg->connect.nm_m = NULL; 1138 out: 1139 if (msg->connect.nm_m) { 1140 m_freem(msg->connect.nm_m); 1141 msg->connect.nm_m = NULL; 1142 } 1143 if (msg->connect.nm_reconnect & NMSG_RECONNECT_NAMALLOC) { 1144 kfree(msg->connect.nm_nam, M_LWKTMSG); 1145 msg->connect.nm_nam = NULL; 1146 } 1147 lwkt_replymsg(&msg->connect.base.lmsg, error); 1148 /* msg invalid now */ 1149 } 1150 1151 #ifdef INET6 1152 1153 static void 1154 tcp6_connect(netmsg_t msg) 1155 { 1156 struct tcpcb *tp; 1157 struct socket *so = msg->connect.base.nm_so; 1158 struct sockaddr *nam = msg->connect.nm_nam; 1159 struct thread *td = msg->connect.nm_td; 1160 struct inpcb *inp; 1161 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)nam; 1162 struct in6_addr *addr6; 1163 #ifdef SMP 1164 lwkt_port_t port; 1165 #endif 1166 int error; 1167 1168 COMMON_START(so, inp, 0); 1169 1170 /* 1171 * Reconnect our pcb if we have to 1172 */ 1173 if (msg->connect.nm_reconnect & NMSG_RECONNECT_RECONNECT) { 1174 msg->connect.nm_reconnect &= ~NMSG_RECONNECT_RECONNECT; 1175 in_pcblink(so->so_pcb, &tcbinfo[mycpu->gd_cpuid]); 1176 } 1177 1178 /* 1179 * Bind if we have to 1180 */ 1181 if (inp->inp_lport == 0) { 1182 error = in6_pcbbind(inp, NULL, td); 1183 if (error) 1184 goto out; 1185 } 1186 1187 /* 1188 * Cannot simply call in_pcbconnect, because there might be an 1189 * earlier incarnation of this same connection still in 1190 * TIME_WAIT state, creating an ADDRINUSE error. 1191 */ 1192 error = in6_pcbladdr(inp, nam, &addr6, td); 1193 if (error) 1194 goto out; 1195 1196 #ifdef SMP 1197 port = tcp6_addrport(); /* XXX hack for now, always cpu0 */ 1198 1199 if (port != &curthread->td_msgport) { 1200 struct route *ro = &inp->inp_route; 1201 1202 /* 1203 * in_pcbladdr() may have allocated a route entry for us 1204 * on the current CPU, but we need a route entry on the 1205 * inpcb's owner CPU, so free it here. 1206 */ 1207 if (ro->ro_rt != NULL) 1208 RTFREE(ro->ro_rt); 1209 bzero(ro, sizeof(*ro)); 1210 1211 in_pcbunlink(so->so_pcb, &tcbinfo[mycpu->gd_cpuid]); 1212 sosetport(so, port); 1213 msg->connect.nm_reconnect |= NMSG_RECONNECT_RECONNECT; 1214 msg->connect.base.nm_dispatch = tcp6_connect; 1215 1216 lwkt_forwardmsg(port, &msg->connect.base.lmsg); 1217 /* msg invalid now */ 1218 return; 1219 } 1220 #endif 1221 error = tcp6_connect_oncpu(tp, msg->connect.nm_flags, 1222 &msg->connect.nm_m, sin6, addr6); 1223 /* nm_m may still be intact */ 1224 out: 1225 if (error && (msg->connect.nm_reconnect & NMSG_RECONNECT_FALLBACK)) { 1226 tcp_connect(msg); 1227 /* msg invalid now */ 1228 } else { 1229 if (msg->connect.nm_m) { 1230 m_freem(msg->connect.nm_m); 1231 msg->connect.nm_m = NULL; 1232 } 1233 if (msg->connect.nm_reconnect & NMSG_RECONNECT_NAMALLOC) { 1234 kfree(msg->connect.nm_nam, M_LWKTMSG); 1235 msg->connect.nm_nam = NULL; 1236 } 1237 lwkt_replymsg(&msg->connect.base.lmsg, error); 1238 /* msg invalid now */ 1239 } 1240 } 1241 1242 static int 1243 tcp6_connect_oncpu(struct tcpcb *tp, int flags, struct mbuf **mp, 1244 struct sockaddr_in6 *sin6, struct in6_addr *addr6) 1245 { 1246 struct mbuf *m = *mp; 1247 struct inpcb *inp = tp->t_inpcb; 1248 struct socket *so = inp->inp_socket; 1249 struct inpcb *oinp; 1250 1251 /* 1252 * Cannot simply call in_pcbconnect, because there might be an 1253 * earlier incarnation of this same connection still in 1254 * TIME_WAIT state, creating an ADDRINUSE error. 1255 */ 1256 oinp = in6_pcblookup_hash(inp->inp_cpcbinfo, 1257 &sin6->sin6_addr, sin6->sin6_port, 1258 (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) ? 1259 addr6 : &inp->in6p_laddr), 1260 inp->inp_lport, 0, NULL); 1261 if (oinp) 1262 return (EADDRINUSE); 1263 1264 if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) 1265 inp->in6p_laddr = *addr6; 1266 inp->in6p_faddr = sin6->sin6_addr; 1267 inp->inp_fport = sin6->sin6_port; 1268 if ((sin6->sin6_flowinfo & IPV6_FLOWINFO_MASK) != 0) 1269 inp->in6p_flowinfo = sin6->sin6_flowinfo; 1270 in_pcbinsconnhash(inp); 1271 1272 /* 1273 * Now that no more errors can occur, change the protocol processing 1274 * port to the current thread (which is the correct thread). 1275 * 1276 * Create TCP timer message now; we are on the tcpcb's owner 1277 * CPU/thread. 1278 */ 1279 tcp_create_timermsg(tp, &curthread->td_msgport); 1280 1281 /* Compute window scaling to request. */ 1282 if (tp->request_r_scale < TCP_MIN_WINSHIFT) 1283 tp->request_r_scale = TCP_MIN_WINSHIFT; 1284 while (tp->request_r_scale < TCP_MAX_WINSHIFT && 1285 (TCP_MAXWIN << tp->request_r_scale) < so->so_rcv.ssb_hiwat) { 1286 tp->request_r_scale++; 1287 } 1288 1289 soisconnecting(so); 1290 tcpstat.tcps_connattempt++; 1291 tp->t_state = TCPS_SYN_SENT; 1292 tcp_callout_reset(tp, tp->tt_keep, tcp_keepinit, tcp_timer_keep); 1293 tp->iss = tcp_new_isn(tp); 1294 tcp_sendseqinit(tp); 1295 if (m) { 1296 ssb_appendstream(&so->so_snd, m); 1297 *mp = NULL; 1298 if (flags & PRUS_OOB) 1299 tp->snd_up = tp->snd_una + so->so_snd.ssb_cc; 1300 } 1301 1302 /* 1303 * Close the send side of the connection after 1304 * the data is sent if flagged. 1305 */ 1306 if ((flags & (PRUS_OOB|PRUS_EOF)) == PRUS_EOF) { 1307 socantsendmore(so); 1308 tp = tcp_usrclosed(tp); 1309 } 1310 return (tcp_output(tp)); 1311 } 1312 1313 #endif /* INET6 */ 1314 1315 /* 1316 * The new sockopt interface makes it possible for us to block in the 1317 * copyin/out step (if we take a page fault). Taking a page fault while 1318 * in a critical section is probably a Bad Thing. (Since sockets and pcbs 1319 * both now use TSM, there probably isn't any need for this function to 1320 * run in a critical section any more. This needs more examination.) 1321 */ 1322 void 1323 tcp_ctloutput(netmsg_t msg) 1324 { 1325 struct socket *so = msg->base.nm_so; 1326 struct sockopt *sopt = msg->ctloutput.nm_sopt; 1327 int error, opt, optval; 1328 struct inpcb *inp; 1329 struct tcpcb *tp; 1330 1331 error = 0; 1332 inp = so->so_pcb; 1333 if (inp == NULL) { 1334 error = ECONNRESET; 1335 goto done; 1336 } 1337 1338 if (sopt->sopt_level != IPPROTO_TCP) { 1339 #ifdef INET6 1340 if (INP_CHECK_SOCKAF(so, AF_INET6)) 1341 ip6_ctloutput_dispatch(msg); 1342 else 1343 #endif /* INET6 */ 1344 ip_ctloutput(msg); 1345 /* msg invalid now */ 1346 return; 1347 } 1348 tp = intotcpcb(inp); 1349 1350 switch (sopt->sopt_dir) { 1351 case SOPT_SET: 1352 error = soopt_to_kbuf(sopt, &optval, sizeof optval, 1353 sizeof optval); 1354 if (error) 1355 break; 1356 switch (sopt->sopt_name) { 1357 case TCP_FASTKEEP: 1358 if (optval > 0) { 1359 if ((tp->t_flags & TF_FASTKEEP) == 0) { 1360 tp->t_flags |= TF_FASTKEEP; 1361 tcp_timer_keep_activity(tp, 0); 1362 } 1363 } else { 1364 tp->t_flags &= ~TF_FASTKEEP; 1365 } 1366 break; 1367 #ifdef TCP_SIGNATURE 1368 case TCP_SIGNATURE_ENABLE: 1369 if (optval > 0) 1370 tp->t_flags |= TF_SIGNATURE; 1371 else 1372 tp->t_flags &= ~TF_SIGNATURE; 1373 break; 1374 #endif /* TCP_SIGNATURE */ 1375 case TCP_NODELAY: 1376 case TCP_NOOPT: 1377 switch (sopt->sopt_name) { 1378 case TCP_NODELAY: 1379 opt = TF_NODELAY; 1380 break; 1381 case TCP_NOOPT: 1382 opt = TF_NOOPT; 1383 break; 1384 default: 1385 opt = 0; /* dead code to fool gcc */ 1386 break; 1387 } 1388 1389 if (optval) 1390 tp->t_flags |= opt; 1391 else 1392 tp->t_flags &= ~opt; 1393 break; 1394 1395 case TCP_NOPUSH: 1396 if (optval) 1397 tp->t_flags |= TF_NOPUSH; 1398 else { 1399 tp->t_flags &= ~TF_NOPUSH; 1400 error = tcp_output(tp); 1401 } 1402 break; 1403 1404 case TCP_MAXSEG: 1405 /* 1406 * Must be between 0 and maxseg. If the requested 1407 * maxseg is too small to satisfy the desired minmss, 1408 * pump it up (silently so sysctl modifications of 1409 * minmss do not create unexpected program failures). 1410 * Handle degenerate cases. 1411 */ 1412 if (optval > 0 && optval <= tp->t_maxseg) { 1413 if (optval + 40 < tcp_minmss) { 1414 optval = tcp_minmss - 40; 1415 if (optval < 0) 1416 optval = 1; 1417 } 1418 tp->t_maxseg = optval; 1419 } else { 1420 error = EINVAL; 1421 } 1422 break; 1423 1424 default: 1425 error = ENOPROTOOPT; 1426 break; 1427 } 1428 break; 1429 1430 case SOPT_GET: 1431 switch (sopt->sopt_name) { 1432 #ifdef TCP_SIGNATURE 1433 case TCP_SIGNATURE_ENABLE: 1434 optval = (tp->t_flags & TF_SIGNATURE) ? 1 : 0; 1435 break; 1436 #endif /* TCP_SIGNATURE */ 1437 case TCP_NODELAY: 1438 optval = tp->t_flags & TF_NODELAY; 1439 break; 1440 case TCP_MAXSEG: 1441 optval = tp->t_maxseg; 1442 break; 1443 case TCP_NOOPT: 1444 optval = tp->t_flags & TF_NOOPT; 1445 break; 1446 case TCP_NOPUSH: 1447 optval = tp->t_flags & TF_NOPUSH; 1448 break; 1449 default: 1450 error = ENOPROTOOPT; 1451 break; 1452 } 1453 if (error == 0) 1454 soopt_from_kbuf(sopt, &optval, sizeof optval); 1455 break; 1456 } 1457 done: 1458 lwkt_replymsg(&msg->lmsg, error); 1459 } 1460 1461 /* 1462 * tcp_sendspace and tcp_recvspace are the default send and receive window 1463 * sizes, respectively. These are obsolescent (this information should 1464 * be set by the route). 1465 * 1466 * Use a default that does not require tcp window scaling to be turned 1467 * on. Individual programs or the administrator can increase the default. 1468 */ 1469 u_long tcp_sendspace = 57344; /* largest multiple of PAGE_SIZE < 64k */ 1470 SYSCTL_INT(_net_inet_tcp, TCPCTL_SENDSPACE, sendspace, CTLFLAG_RW, 1471 &tcp_sendspace , 0, "Maximum outgoing TCP datagram size"); 1472 u_long tcp_recvspace = 57344; /* largest multiple of PAGE_SIZE < 64k */ 1473 SYSCTL_INT(_net_inet_tcp, TCPCTL_RECVSPACE, recvspace, CTLFLAG_RW, 1474 &tcp_recvspace , 0, "Maximum incoming TCP datagram size"); 1475 1476 /* 1477 * Attach TCP protocol to socket, allocating internet protocol control 1478 * block, tcp control block, bufer space, and entering LISTEN state 1479 * if to accept connections. 1480 */ 1481 static int 1482 tcp_attach(struct socket *so, struct pru_attach_info *ai) 1483 { 1484 struct tcpcb *tp; 1485 struct inpcb *inp; 1486 int error; 1487 int cpu; 1488 #ifdef INET6 1489 int isipv6 = INP_CHECK_SOCKAF(so, AF_INET6) != 0; 1490 #endif 1491 1492 if (so->so_snd.ssb_hiwat == 0 || so->so_rcv.ssb_hiwat == 0) { 1493 lwkt_gettoken(&so->so_rcv.ssb_token); 1494 error = soreserve(so, tcp_sendspace, tcp_recvspace, 1495 ai->sb_rlimit); 1496 lwkt_reltoken(&so->so_rcv.ssb_token); 1497 if (error) 1498 return (error); 1499 } 1500 atomic_set_int(&so->so_rcv.ssb_flags, SSB_AUTOSIZE); 1501 atomic_set_int(&so->so_snd.ssb_flags, SSB_AUTOSIZE); 1502 cpu = mycpu->gd_cpuid; 1503 1504 /* 1505 * Set the default port for protocol processing. This will likely 1506 * change when we connect. 1507 */ 1508 error = in_pcballoc(so, &tcbinfo[cpu]); 1509 if (error) 1510 return (error); 1511 inp = so->so_pcb; 1512 #ifdef INET6 1513 if (isipv6) { 1514 inp->inp_vflag |= INP_IPV6; 1515 inp->in6p_hops = -1; /* use kernel default */ 1516 } 1517 else 1518 #endif 1519 inp->inp_vflag |= INP_IPV4; 1520 tp = tcp_newtcpcb(inp); 1521 if (tp == NULL) { 1522 /* 1523 * Make sure the socket is destroyed by the pcbdetach. 1524 */ 1525 soreference(so); 1526 #ifdef INET6 1527 if (isipv6) 1528 in6_pcbdetach(inp); 1529 else 1530 #endif 1531 in_pcbdetach(inp); 1532 sofree(so); /* from ref above */ 1533 return (ENOBUFS); 1534 } 1535 tp->t_state = TCPS_CLOSED; 1536 return (0); 1537 } 1538 1539 /* 1540 * Initiate (or continue) disconnect. 1541 * If embryonic state, just send reset (once). 1542 * If in ``let data drain'' option and linger null, just drop. 1543 * Otherwise (hard), mark socket disconnecting and drop 1544 * current input data; switch states based on user close, and 1545 * send segment to peer (with FIN). 1546 */ 1547 static struct tcpcb * 1548 tcp_disconnect(struct tcpcb *tp) 1549 { 1550 struct socket *so = tp->t_inpcb->inp_socket; 1551 1552 if (tp->t_state < TCPS_ESTABLISHED) { 1553 tp = tcp_close(tp); 1554 } else if ((so->so_options & SO_LINGER) && so->so_linger == 0) { 1555 tp = tcp_drop(tp, 0); 1556 } else { 1557 lwkt_gettoken(&so->so_rcv.ssb_token); 1558 soisdisconnecting(so); 1559 sbflush(&so->so_rcv.sb); 1560 tp = tcp_usrclosed(tp); 1561 if (tp) 1562 tcp_output(tp); 1563 lwkt_reltoken(&so->so_rcv.ssb_token); 1564 } 1565 return (tp); 1566 } 1567 1568 /* 1569 * User issued close, and wish to trail through shutdown states: 1570 * if never received SYN, just forget it. If got a SYN from peer, 1571 * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN. 1572 * If already got a FIN from peer, then almost done; go to LAST_ACK 1573 * state. In all other cases, have already sent FIN to peer (e.g. 1574 * after PRU_SHUTDOWN), and just have to play tedious game waiting 1575 * for peer to send FIN or not respond to keep-alives, etc. 1576 * We can let the user exit from the close as soon as the FIN is acked. 1577 */ 1578 static struct tcpcb * 1579 tcp_usrclosed(struct tcpcb *tp) 1580 { 1581 1582 switch (tp->t_state) { 1583 1584 case TCPS_CLOSED: 1585 case TCPS_LISTEN: 1586 tp->t_state = TCPS_CLOSED; 1587 tp = tcp_close(tp); 1588 break; 1589 1590 case TCPS_SYN_SENT: 1591 case TCPS_SYN_RECEIVED: 1592 tp->t_flags |= TF_NEEDFIN; 1593 break; 1594 1595 case TCPS_ESTABLISHED: 1596 tp->t_state = TCPS_FIN_WAIT_1; 1597 break; 1598 1599 case TCPS_CLOSE_WAIT: 1600 tp->t_state = TCPS_LAST_ACK; 1601 break; 1602 } 1603 if (tp && tp->t_state >= TCPS_FIN_WAIT_2) { 1604 soisdisconnected(tp->t_inpcb->inp_socket); 1605 /* To prevent the connection hanging in FIN_WAIT_2 forever. */ 1606 if (tp->t_state == TCPS_FIN_WAIT_2) { 1607 tcp_callout_reset(tp, tp->tt_2msl, tcp_maxidle, 1608 tcp_timer_2msl); 1609 } 1610 } 1611 return (tp); 1612 } 1613