1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* This file contains all TCP kernel socket related functions. */ 28 29 #include <sys/types.h> 30 #include <sys/strlog.h> 31 #include <sys/policy.h> 32 #include <sys/sockio.h> 33 #include <sys/strsubr.h> 34 #include <sys/strsun.h> 35 #include <sys/squeue_impl.h> 36 #include <sys/squeue.h> 37 #include <sys/tihdr.h> 38 #include <sys/timod.h> 39 #include <sys/tpicommon.h> 40 #include <sys/socketvar.h> 41 42 #include <inet/common.h> 43 #include <inet/proto_set.h> 44 #include <inet/ip.h> 45 #include <inet/tcp.h> 46 #include <inet/tcp_impl.h> 47 48 static void tcp_activate(sock_lower_handle_t, sock_upper_handle_t, 49 sock_upcalls_t *, int, cred_t *); 50 static int tcp_accept(sock_lower_handle_t, sock_lower_handle_t, 51 sock_upper_handle_t, cred_t *); 52 static int tcp_bind(sock_lower_handle_t, struct sockaddr *, 53 socklen_t, cred_t *); 54 static int tcp_listen(sock_lower_handle_t, int, cred_t *); 55 static int tcp_connect(sock_lower_handle_t, const struct sockaddr *, 56 socklen_t, sock_connid_t *, cred_t *); 57 static int tcp_getsockopt(sock_lower_handle_t, int, int, void *, 58 socklen_t *, cred_t *); 59 static int tcp_setsockopt(sock_lower_handle_t, int, int, const void *, 60 socklen_t, cred_t *); 61 static int tcp_sendmsg(sock_lower_handle_t, mblk_t *, struct nmsghdr *, 62 cred_t *cr); 63 static int tcp_shutdown(sock_lower_handle_t, int, cred_t *); 64 static void tcp_clr_flowctrl(sock_lower_handle_t); 65 static int tcp_ioctl(sock_lower_handle_t, int, intptr_t, int, int32_t *, 66 cred_t *); 67 static int tcp_close(sock_lower_handle_t, int, cred_t *); 68 69 sock_downcalls_t sock_tcp_downcalls = { 70 tcp_activate, 71 tcp_accept, 72 tcp_bind, 73 tcp_listen, 74 tcp_connect, 75 tcp_getpeername, 76 tcp_getsockname, 77 tcp_getsockopt, 78 tcp_setsockopt, 79 tcp_sendmsg, 80 NULL, 81 NULL, 82 NULL, 83 tcp_shutdown, 84 tcp_clr_flowctrl, 85 tcp_ioctl, 86 tcp_close, 87 }; 88 89 /* ARGSUSED */ 90 static void 91 tcp_activate(sock_lower_handle_t proto_handle, sock_upper_handle_t sock_handle, 92 sock_upcalls_t *sock_upcalls, int flags, cred_t *cr) 93 { 94 conn_t *connp = (conn_t *)proto_handle; 95 struct sock_proto_props sopp; 96 extern struct module_info tcp_rinfo; 97 98 ASSERT(connp->conn_upper_handle == NULL); 99 100 /* All Solaris components should pass a cred for this operation. */ 101 ASSERT(cr != NULL); 102 103 sopp.sopp_flags = SOCKOPT_RCVHIWAT | SOCKOPT_RCVLOWAT | 104 SOCKOPT_MAXPSZ | SOCKOPT_MAXBLK | SOCKOPT_RCVTIMER | 105 SOCKOPT_RCVTHRESH | SOCKOPT_MAXADDRLEN | SOCKOPT_MINPSZ; 106 107 sopp.sopp_rxhiwat = SOCKET_RECVHIWATER; 108 sopp.sopp_rxlowat = SOCKET_RECVLOWATER; 109 sopp.sopp_maxpsz = INFPSZ; 110 sopp.sopp_maxblk = INFPSZ; 111 sopp.sopp_rcvtimer = SOCKET_TIMER_INTERVAL; 112 sopp.sopp_rcvthresh = SOCKET_RECVHIWATER >> 3; 113 sopp.sopp_maxaddrlen = sizeof (sin6_t); 114 sopp.sopp_minpsz = (tcp_rinfo.mi_minpsz == 1) ? 0 : 115 tcp_rinfo.mi_minpsz; 116 117 connp->conn_upcalls = sock_upcalls; 118 connp->conn_upper_handle = sock_handle; 119 120 ASSERT(connp->conn_rcvbuf != 0 && 121 connp->conn_rcvbuf == connp->conn_tcp->tcp_rwnd); 122 (*sock_upcalls->su_set_proto_props)(sock_handle, &sopp); 123 } 124 125 static int 126 tcp_accept(sock_lower_handle_t lproto_handle, 127 sock_lower_handle_t eproto_handle, sock_upper_handle_t sock_handle, 128 cred_t *cr) 129 { 130 conn_t *lconnp, *econnp; 131 tcp_t *listener, *eager; 132 133 lconnp = (conn_t *)lproto_handle; 134 listener = lconnp->conn_tcp; 135 ASSERT(listener->tcp_state == TCPS_LISTEN); 136 econnp = (conn_t *)eproto_handle; 137 eager = econnp->conn_tcp; 138 ASSERT(eager->tcp_listener != NULL); 139 140 /* 141 * It is OK to manipulate these fields outside the eager's squeue 142 * because they will not start being used until tcp_accept_finish 143 * has been called. 144 */ 145 ASSERT(lconnp->conn_upper_handle != NULL); 146 ASSERT(econnp->conn_upper_handle == NULL); 147 econnp->conn_upper_handle = sock_handle; 148 econnp->conn_upcalls = lconnp->conn_upcalls; 149 ASSERT(IPCL_IS_NONSTR(econnp)); 150 return (tcp_accept_common(lconnp, econnp, cr)); 151 } 152 153 static int 154 tcp_bind(sock_lower_handle_t proto_handle, struct sockaddr *sa, 155 socklen_t len, cred_t *cr) 156 { 157 int error; 158 conn_t *connp = (conn_t *)proto_handle; 159 squeue_t *sqp = connp->conn_sqp; 160 161 /* All Solaris components should pass a cred for this operation. */ 162 ASSERT(cr != NULL); 163 164 ASSERT(sqp != NULL); 165 ASSERT(connp->conn_upper_handle != NULL); 166 167 error = squeue_synch_enter(sqp, connp, NULL); 168 if (error != 0) { 169 /* failed to enter */ 170 return (ENOSR); 171 } 172 173 /* binding to a NULL address really means unbind */ 174 if (sa == NULL) { 175 if (connp->conn_tcp->tcp_state < TCPS_LISTEN) 176 error = tcp_do_unbind(connp); 177 else 178 error = EINVAL; 179 } else { 180 error = tcp_do_bind(connp, sa, len, cr, B_TRUE); 181 } 182 183 squeue_synch_exit(sqp, connp); 184 185 if (error < 0) { 186 if (error == -TOUTSTATE) 187 error = EINVAL; 188 else 189 error = proto_tlitosyserr(-error); 190 } 191 192 return (error); 193 } 194 195 /* 196 * SOP_LISTEN() calls into tcp_listen(). 197 */ 198 /* ARGSUSED */ 199 static int 200 tcp_listen(sock_lower_handle_t proto_handle, int backlog, cred_t *cr) 201 { 202 conn_t *connp = (conn_t *)proto_handle; 203 int error; 204 squeue_t *sqp = connp->conn_sqp; 205 206 ASSERT(connp->conn_upper_handle != NULL); 207 208 /* All Solaris components should pass a cred for this operation. */ 209 ASSERT(cr != NULL); 210 211 error = squeue_synch_enter(sqp, connp, NULL); 212 if (error != 0) { 213 /* failed to enter */ 214 return (ENOBUFS); 215 } 216 217 error = tcp_do_listen(connp, NULL, 0, backlog, cr, B_FALSE); 218 if (error == 0) { 219 (*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle, 220 SOCK_OPCTL_ENAB_ACCEPT, (uintptr_t)backlog); 221 } else if (error < 0) { 222 if (error == -TOUTSTATE) 223 error = EINVAL; 224 else 225 error = proto_tlitosyserr(-error); 226 } 227 squeue_synch_exit(sqp, connp); 228 return (error); 229 } 230 231 static int 232 tcp_connect(sock_lower_handle_t proto_handle, const struct sockaddr *sa, 233 socklen_t len, sock_connid_t *id, cred_t *cr) 234 { 235 conn_t *connp = (conn_t *)proto_handle; 236 squeue_t *sqp = connp->conn_sqp; 237 int error; 238 239 ASSERT(connp->conn_upper_handle != NULL); 240 241 /* All Solaris components should pass a cred for this operation. */ 242 ASSERT(cr != NULL); 243 244 error = proto_verify_ip_addr(connp->conn_family, sa, len); 245 if (error != 0) { 246 return (error); 247 } 248 249 error = squeue_synch_enter(sqp, connp, NULL); 250 if (error != 0) { 251 /* failed to enter */ 252 return (ENOSR); 253 } 254 255 /* 256 * TCP supports quick connect, so no need to do an implicit bind 257 */ 258 error = tcp_do_connect(connp, sa, len, cr, curproc->p_pid); 259 if (error == 0) { 260 *id = connp->conn_tcp->tcp_connid; 261 } else if (error < 0) { 262 if (error == -TOUTSTATE) { 263 switch (connp->conn_tcp->tcp_state) { 264 case TCPS_SYN_SENT: 265 error = EALREADY; 266 break; 267 case TCPS_ESTABLISHED: 268 error = EISCONN; 269 break; 270 case TCPS_LISTEN: 271 error = EOPNOTSUPP; 272 break; 273 default: 274 error = EINVAL; 275 break; 276 } 277 } else { 278 error = proto_tlitosyserr(-error); 279 } 280 } 281 282 if (connp->conn_tcp->tcp_loopback) { 283 struct sock_proto_props sopp; 284 285 sopp.sopp_flags = SOCKOPT_LOOPBACK; 286 sopp.sopp_loopback = B_TRUE; 287 288 (*connp->conn_upcalls->su_set_proto_props)( 289 connp->conn_upper_handle, &sopp); 290 } 291 done: 292 squeue_synch_exit(sqp, connp); 293 294 return ((error == 0) ? EINPROGRESS : error); 295 } 296 297 /* ARGSUSED3 */ 298 int 299 tcp_getpeername(sock_lower_handle_t proto_handle, struct sockaddr *addr, 300 socklen_t *addrlenp, cred_t *cr) 301 { 302 conn_t *connp = (conn_t *)proto_handle; 303 tcp_t *tcp = connp->conn_tcp; 304 305 ASSERT(connp->conn_upper_handle != NULL); 306 /* All Solaris components should pass a cred for this operation. */ 307 ASSERT(cr != NULL); 308 309 ASSERT(tcp != NULL); 310 if (tcp->tcp_state < TCPS_SYN_RCVD) 311 return (ENOTCONN); 312 313 return (conn_getpeername(connp, addr, addrlenp)); 314 } 315 316 /* ARGSUSED3 */ 317 int 318 tcp_getsockname(sock_lower_handle_t proto_handle, struct sockaddr *addr, 319 socklen_t *addrlenp, cred_t *cr) 320 { 321 conn_t *connp = (conn_t *)proto_handle; 322 323 /* All Solaris components should pass a cred for this operation. */ 324 ASSERT(cr != NULL); 325 326 ASSERT(connp->conn_upper_handle != NULL); 327 return (conn_getsockname(connp, addr, addrlenp)); 328 } 329 330 /* returns UNIX error, the optlen is a value-result arg */ 331 static int 332 tcp_getsockopt(sock_lower_handle_t proto_handle, int level, int option_name, 333 void *optvalp, socklen_t *optlen, cred_t *cr) 334 { 335 conn_t *connp = (conn_t *)proto_handle; 336 squeue_t *sqp = connp->conn_sqp; 337 int error; 338 t_uscalar_t max_optbuf_len; 339 void *optvalp_buf; 340 int len; 341 342 ASSERT(connp->conn_upper_handle != NULL); 343 344 error = proto_opt_check(level, option_name, *optlen, &max_optbuf_len, 345 tcp_opt_obj.odb_opt_des_arr, 346 tcp_opt_obj.odb_opt_arr_cnt, 347 B_FALSE, B_TRUE, cr); 348 if (error != 0) { 349 if (error < 0) { 350 error = proto_tlitosyserr(-error); 351 } 352 return (error); 353 } 354 355 optvalp_buf = kmem_alloc(max_optbuf_len, KM_SLEEP); 356 357 error = squeue_synch_enter(sqp, connp, NULL); 358 if (error == ENOMEM) { 359 kmem_free(optvalp_buf, max_optbuf_len); 360 return (ENOMEM); 361 } 362 363 len = tcp_opt_get(connp, level, option_name, optvalp_buf); 364 squeue_synch_exit(sqp, connp); 365 366 if (len == -1) { 367 kmem_free(optvalp_buf, max_optbuf_len); 368 return (EINVAL); 369 } 370 371 /* 372 * update optlen and copy option value 373 */ 374 t_uscalar_t size = MIN(len, *optlen); 375 376 bcopy(optvalp_buf, optvalp, size); 377 bcopy(&size, optlen, sizeof (size)); 378 379 kmem_free(optvalp_buf, max_optbuf_len); 380 return (0); 381 } 382 383 static int 384 tcp_setsockopt(sock_lower_handle_t proto_handle, int level, int option_name, 385 const void *optvalp, socklen_t optlen, cred_t *cr) 386 { 387 conn_t *connp = (conn_t *)proto_handle; 388 squeue_t *sqp = connp->conn_sqp; 389 int error; 390 391 ASSERT(connp->conn_upper_handle != NULL); 392 /* 393 * Entering the squeue synchronously can result in a context switch, 394 * which can cause a rather sever performance degradation. So we try to 395 * handle whatever options we can without entering the squeue. 396 */ 397 if (level == IPPROTO_TCP) { 398 switch (option_name) { 399 case TCP_NODELAY: 400 if (optlen != sizeof (int32_t)) 401 return (EINVAL); 402 mutex_enter(&connp->conn_tcp->tcp_non_sq_lock); 403 connp->conn_tcp->tcp_naglim = *(int *)optvalp ? 1 : 404 connp->conn_tcp->tcp_mss; 405 mutex_exit(&connp->conn_tcp->tcp_non_sq_lock); 406 return (0); 407 default: 408 break; 409 } 410 } 411 412 error = squeue_synch_enter(sqp, connp, NULL); 413 if (error == ENOMEM) { 414 return (ENOMEM); 415 } 416 417 error = proto_opt_check(level, option_name, optlen, NULL, 418 tcp_opt_obj.odb_opt_des_arr, 419 tcp_opt_obj.odb_opt_arr_cnt, 420 B_TRUE, B_FALSE, cr); 421 422 if (error != 0) { 423 if (error < 0) { 424 error = proto_tlitosyserr(-error); 425 } 426 squeue_synch_exit(sqp, connp); 427 return (error); 428 } 429 430 error = tcp_opt_set(connp, SETFN_OPTCOM_NEGOTIATE, level, option_name, 431 optlen, (uchar_t *)optvalp, (uint_t *)&optlen, (uchar_t *)optvalp, 432 NULL, cr); 433 squeue_synch_exit(sqp, connp); 434 435 ASSERT(error >= 0); 436 437 return (error); 438 } 439 440 /* ARGSUSED */ 441 static int 442 tcp_sendmsg(sock_lower_handle_t proto_handle, mblk_t *mp, struct nmsghdr *msg, 443 cred_t *cr) 444 { 445 tcp_t *tcp; 446 uint32_t msize; 447 conn_t *connp = (conn_t *)proto_handle; 448 int32_t tcpstate; 449 450 /* All Solaris components should pass a cred for this operation. */ 451 ASSERT(cr != NULL); 452 453 ASSERT(connp->conn_ref >= 2); 454 ASSERT(connp->conn_upper_handle != NULL); 455 456 if (msg->msg_controllen != 0) { 457 freemsg(mp); 458 return (EOPNOTSUPP); 459 } 460 461 switch (DB_TYPE(mp)) { 462 case M_DATA: 463 tcp = connp->conn_tcp; 464 ASSERT(tcp != NULL); 465 466 tcpstate = tcp->tcp_state; 467 if (tcpstate < TCPS_ESTABLISHED) { 468 freemsg(mp); 469 /* 470 * We return ENOTCONN if the endpoint is trying to 471 * connect or has never been connected, and EPIPE if it 472 * has been disconnected. The connection id helps us 473 * distinguish between the last two cases. 474 */ 475 return ((tcpstate == TCPS_SYN_SENT) ? ENOTCONN : 476 ((tcp->tcp_connid > 0) ? EPIPE : ENOTCONN)); 477 } else if (tcpstate > TCPS_CLOSE_WAIT) { 478 freemsg(mp); 479 return (EPIPE); 480 } 481 482 msize = msgdsize(mp); 483 484 mutex_enter(&tcp->tcp_non_sq_lock); 485 tcp->tcp_squeue_bytes += msize; 486 /* 487 * Squeue Flow Control 488 */ 489 if (TCP_UNSENT_BYTES(tcp) > connp->conn_sndbuf) { 490 tcp_setqfull(tcp); 491 } 492 mutex_exit(&tcp->tcp_non_sq_lock); 493 494 /* 495 * The application may pass in an address in the msghdr, but 496 * we ignore the address on connection-oriented sockets. 497 * Just like BSD this code does not generate an error for 498 * TCP (a CONNREQUIRED socket) when sending to an address 499 * passed in with sendto/sendmsg. Instead the data is 500 * delivered on the connection as if no address had been 501 * supplied. 502 */ 503 CONN_INC_REF(connp); 504 505 if (msg->msg_flags & MSG_OOB) { 506 SQUEUE_ENTER_ONE(connp->conn_sqp, mp, tcp_output_urgent, 507 connp, NULL, tcp_squeue_flag, SQTAG_TCP_OUTPUT); 508 } else { 509 SQUEUE_ENTER_ONE(connp->conn_sqp, mp, tcp_output, 510 connp, NULL, tcp_squeue_flag, SQTAG_TCP_OUTPUT); 511 } 512 513 return (0); 514 515 default: 516 ASSERT(0); 517 } 518 519 freemsg(mp); 520 return (0); 521 } 522 523 /* ARGSUSED */ 524 static int 525 tcp_shutdown(sock_lower_handle_t proto_handle, int how, cred_t *cr) 526 { 527 conn_t *connp = (conn_t *)proto_handle; 528 tcp_t *tcp = connp->conn_tcp; 529 530 ASSERT(connp->conn_upper_handle != NULL); 531 532 /* All Solaris components should pass a cred for this operation. */ 533 ASSERT(cr != NULL); 534 535 /* 536 * X/Open requires that we check the connected state. 537 */ 538 if (tcp->tcp_state < TCPS_SYN_SENT) 539 return (ENOTCONN); 540 541 /* shutdown the send side */ 542 if (how != SHUT_RD) { 543 mblk_t *bp; 544 545 bp = allocb_wait(0, BPRI_HI, STR_NOSIG, NULL); 546 CONN_INC_REF(connp); 547 SQUEUE_ENTER_ONE(connp->conn_sqp, bp, tcp_shutdown_output, 548 connp, NULL, SQ_NODRAIN, SQTAG_TCP_SHUTDOWN_OUTPUT); 549 550 (*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle, 551 SOCK_OPCTL_SHUT_SEND, 0); 552 } 553 554 /* shutdown the recv side */ 555 if (how != SHUT_WR) 556 (*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle, 557 SOCK_OPCTL_SHUT_RECV, 0); 558 559 return (0); 560 } 561 562 static void 563 tcp_clr_flowctrl(sock_lower_handle_t proto_handle) 564 { 565 conn_t *connp = (conn_t *)proto_handle; 566 tcp_t *tcp = connp->conn_tcp; 567 mblk_t *mp; 568 int error; 569 570 ASSERT(connp->conn_upper_handle != NULL); 571 572 /* 573 * If tcp->tcp_rsrv_mp == NULL, it means that tcp_clr_flowctrl() 574 * is currently running. 575 */ 576 mutex_enter(&tcp->tcp_rsrv_mp_lock); 577 if ((mp = tcp->tcp_rsrv_mp) == NULL) { 578 mutex_exit(&tcp->tcp_rsrv_mp_lock); 579 return; 580 } 581 tcp->tcp_rsrv_mp = NULL; 582 mutex_exit(&tcp->tcp_rsrv_mp_lock); 583 584 error = squeue_synch_enter(connp->conn_sqp, connp, mp); 585 ASSERT(error == 0); 586 587 mutex_enter(&tcp->tcp_rsrv_mp_lock); 588 tcp->tcp_rsrv_mp = mp; 589 mutex_exit(&tcp->tcp_rsrv_mp_lock); 590 591 if (tcp->tcp_fused) { 592 tcp_fuse_backenable(tcp); 593 } else { 594 tcp->tcp_rwnd = connp->conn_rcvbuf; 595 /* 596 * Send back a window update immediately if TCP is above 597 * ESTABLISHED state and the increase of the rcv window 598 * that the other side knows is at least 1 MSS after flow 599 * control is lifted. 600 */ 601 if (tcp->tcp_state >= TCPS_ESTABLISHED && 602 tcp_rwnd_reopen(tcp) == TH_ACK_NEEDED) { 603 tcp_xmit_ctl(NULL, tcp, 604 (tcp->tcp_swnd == 0) ? tcp->tcp_suna : 605 tcp->tcp_snxt, tcp->tcp_rnxt, TH_ACK); 606 } 607 } 608 609 squeue_synch_exit(connp->conn_sqp, connp); 610 } 611 612 /* ARGSUSED */ 613 static int 614 tcp_ioctl(sock_lower_handle_t proto_handle, int cmd, intptr_t arg, 615 int mode, int32_t *rvalp, cred_t *cr) 616 { 617 conn_t *connp = (conn_t *)proto_handle; 618 int error; 619 620 ASSERT(connp->conn_upper_handle != NULL); 621 622 /* All Solaris components should pass a cred for this operation. */ 623 ASSERT(cr != NULL); 624 625 /* 626 * If we don't have a helper stream then create one. 627 * ip_create_helper_stream takes care of locking the conn_t, 628 * so this check for NULL is just a performance optimization. 629 */ 630 if (connp->conn_helper_info == NULL) { 631 tcp_stack_t *tcps = connp->conn_tcp->tcp_tcps; 632 633 /* 634 * Create a helper stream for non-STREAMS socket. 635 */ 636 error = ip_create_helper_stream(connp, tcps->tcps_ldi_ident); 637 if (error != 0) { 638 ip0dbg(("tcp_ioctl: create of IP helper stream " 639 "failed %d\n", error)); 640 return (error); 641 } 642 } 643 644 switch (cmd) { 645 case ND_SET: 646 case ND_GET: 647 case _SIOCSOCKFALLBACK: 648 case TCP_IOC_ABORT_CONN: 649 case TI_GETPEERNAME: 650 case TI_GETMYNAME: 651 ip1dbg(("tcp_ioctl: cmd 0x%x on non streams socket", 652 cmd)); 653 error = EINVAL; 654 break; 655 default: 656 /* 657 * If the conn is not closing, pass on to IP using 658 * helper stream. Bump the ioctlref to prevent tcp_close 659 * from closing the rq/wq out from underneath the ioctl 660 * if it ends up queued or aborted/interrupted. 661 */ 662 mutex_enter(&connp->conn_lock); 663 if (connp->conn_state_flags & (CONN_CLOSING)) { 664 mutex_exit(&connp->conn_lock); 665 error = EINVAL; 666 break; 667 } 668 CONN_INC_IOCTLREF_LOCKED(connp); 669 error = ldi_ioctl(connp->conn_helper_info->iphs_handle, 670 cmd, arg, mode, cr, rvalp); 671 CONN_DEC_IOCTLREF(connp); 672 break; 673 } 674 return (error); 675 } 676 677 /* ARGSUSED */ 678 static int 679 tcp_close(sock_lower_handle_t proto_handle, int flags, cred_t *cr) 680 { 681 conn_t *connp = (conn_t *)proto_handle; 682 683 ASSERT(connp->conn_upper_handle != NULL); 684 685 /* All Solaris components should pass a cred for this operation. */ 686 ASSERT(cr != NULL); 687 688 tcp_close_common(connp, flags); 689 690 ip_free_helper_stream(connp); 691 692 /* 693 * Drop IP's reference on the conn. This is the last reference 694 * on the connp if the state was less than established. If the 695 * connection has gone into timewait state, then we will have 696 * one ref for the TCP and one more ref (total of two) for the 697 * classifier connected hash list (a timewait connections stays 698 * in connected hash till closed). 699 * 700 * We can't assert the references because there might be other 701 * transient reference places because of some walkers or queued 702 * packets in squeue for the timewait state. 703 */ 704 CONN_DEC_REF(connp); 705 return (0); 706 } 707 708 /* ARGSUSED */ 709 sock_lower_handle_t 710 tcp_create(int family, int type, int proto, sock_downcalls_t **sock_downcalls, 711 uint_t *smodep, int *errorp, int flags, cred_t *credp) 712 { 713 conn_t *connp; 714 boolean_t isv6 = family == AF_INET6; 715 if (type != SOCK_STREAM || (family != AF_INET && family != AF_INET6) || 716 (proto != 0 && proto != IPPROTO_TCP)) { 717 *errorp = EPROTONOSUPPORT; 718 return (NULL); 719 } 720 721 connp = tcp_create_common(credp, isv6, B_TRUE, errorp); 722 if (connp == NULL) { 723 return (NULL); 724 } 725 726 /* 727 * Put the ref for TCP. Ref for IP was already put 728 * by ipcl_conn_create. Also Make the conn_t globally 729 * visible to walkers 730 */ 731 mutex_enter(&connp->conn_lock); 732 CONN_INC_REF_LOCKED(connp); 733 ASSERT(connp->conn_ref == 2); 734 connp->conn_state_flags &= ~CONN_INCIPIENT; 735 736 connp->conn_flags |= IPCL_NONSTR; 737 mutex_exit(&connp->conn_lock); 738 739 ASSERT(errorp != NULL); 740 *errorp = 0; 741 *sock_downcalls = &sock_tcp_downcalls; 742 *smodep = SM_CONNREQUIRED | SM_EXDATA | SM_ACCEPTSUPP | 743 SM_SENDFILESUPP; 744 745 return ((sock_lower_handle_t)connp); 746 } 747 748 int 749 tcp_fallback(sock_lower_handle_t proto_handle, queue_t *q, 750 boolean_t direct_sockfs, so_proto_quiesced_cb_t quiesced_cb) 751 { 752 tcp_t *tcp; 753 conn_t *connp = (conn_t *)proto_handle; 754 int error; 755 mblk_t *stropt_mp; 756 mblk_t *ordrel_mp; 757 758 tcp = connp->conn_tcp; 759 760 stropt_mp = allocb_wait(sizeof (struct stroptions), BPRI_HI, STR_NOSIG, 761 NULL); 762 763 /* Pre-allocate the T_ordrel_ind mblk. */ 764 ASSERT(tcp->tcp_ordrel_mp == NULL); 765 ordrel_mp = allocb_wait(sizeof (struct T_ordrel_ind), BPRI_HI, 766 STR_NOSIG, NULL); 767 ordrel_mp->b_datap->db_type = M_PROTO; 768 ((struct T_ordrel_ind *)ordrel_mp->b_rptr)->PRIM_type = T_ORDREL_IND; 769 ordrel_mp->b_wptr += sizeof (struct T_ordrel_ind); 770 771 /* 772 * Enter the squeue so that no new packets can come in 773 */ 774 error = squeue_synch_enter(connp->conn_sqp, connp, NULL); 775 if (error != 0) { 776 /* failed to enter, free all the pre-allocated messages. */ 777 freeb(stropt_mp); 778 freeb(ordrel_mp); 779 /* 780 * We cannot process the eager, so at least send out a 781 * RST so the peer can reconnect. 782 */ 783 if (tcp->tcp_listener != NULL) { 784 (void) tcp_eager_blowoff(tcp->tcp_listener, 785 tcp->tcp_conn_req_seqnum); 786 } 787 return (ENOMEM); 788 } 789 790 /* 791 * Both endpoints must be of the same type (either STREAMS or 792 * non-STREAMS) for fusion to be enabled. So if we are fused, 793 * we have to unfuse. 794 */ 795 if (tcp->tcp_fused) 796 tcp_unfuse(tcp); 797 798 /* 799 * No longer a direct socket 800 */ 801 connp->conn_flags &= ~IPCL_NONSTR; 802 tcp->tcp_ordrel_mp = ordrel_mp; 803 804 if (tcp->tcp_listener != NULL) { 805 /* The eager will deal with opts when accept() is called */ 806 freeb(stropt_mp); 807 tcp_fallback_eager(tcp, direct_sockfs); 808 } else { 809 tcp_fallback_noneager(tcp, stropt_mp, q, direct_sockfs, 810 quiesced_cb); 811 } 812 813 /* 814 * There should be atleast two ref's (IP + TCP) 815 */ 816 ASSERT(connp->conn_ref >= 2); 817 squeue_synch_exit(connp->conn_sqp, connp); 818 819 return (0); 820 } 821