1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 1995, 2010, Oracle and/or its affiliates. All rights reserved. 24 * Copyright 2015, Joyent, Inc. 25 * Copyright 2016 Nexenta Systems, Inc. All rights reserved. 26 * Copyright 2022 Garrett D'Amore 27 */ 28 29 #include <sys/types.h> 30 #include <sys/t_lock.h> 31 #include <sys/param.h> 32 #include <sys/systm.h> 33 #include <sys/buf.h> 34 #include <sys/conf.h> 35 #include <sys/cred.h> 36 #include <sys/kmem.h> 37 #include <sys/kmem_impl.h> 38 #include <sys/sysmacros.h> 39 #include <sys/vfs.h> 40 #include <sys/vnode.h> 41 #include <sys/debug.h> 42 #include <sys/errno.h> 43 #include <sys/time.h> 44 #include <sys/file.h> 45 #include <sys/open.h> 46 #include <sys/user.h> 47 #include <sys/termios.h> 48 #include <sys/stream.h> 49 #include <sys/strsubr.h> 50 #include <sys/strsun.h> 51 #include <sys/suntpi.h> 52 #include <sys/ddi.h> 53 #include <sys/esunddi.h> 54 #include <sys/flock.h> 55 #include <sys/modctl.h> 56 #include <sys/vtrace.h> 57 #include <sys/cmn_err.h> 58 #include <sys/pathname.h> 59 60 #include <sys/socket.h> 61 #include <sys/socketvar.h> 62 #include <sys/sockio.h> 63 #include <netinet/in.h> 64 #include <sys/un.h> 65 #include <sys/strsun.h> 66 67 #include <sys/tiuser.h> 68 #define _SUN_TPI_VERSION 2 69 #include <sys/tihdr.h> 70 #include <sys/timod.h> /* TI_GETMYNAME, TI_GETPEERNAME */ 71 72 #include <c2/audit.h> 73 74 #include <inet/common.h> 75 #include <inet/ip.h> 76 #include <inet/ip6.h> 77 #include <inet/tcp.h> 78 #include <inet/udp_impl.h> 79 80 #include <sys/zone.h> 81 82 #include <fs/sockfs/sockcommon.h> 83 #include <fs/sockfs/socktpi.h> 84 #include <fs/sockfs/socktpi_impl.h> 85 86 /* 87 * Possible failures when memory can't be allocated. The documented behavior: 88 * 89 * 5.5: 4.X: XNET: 90 * accept: ENOMEM/ENOSR/EINTR - (EINTR) ENOMEM/ENOBUFS/ENOSR/ 91 * EINTR 92 * (4.X does not document EINTR but returns it) 93 * bind: ENOSR - ENOBUFS/ENOSR 94 * connect: EINTR EINTR ENOBUFS/ENOSR/EINTR 95 * getpeername: ENOMEM/ENOSR ENOBUFS (-) ENOBUFS/ENOSR 96 * getsockname: ENOMEM/ENOSR ENOBUFS (-) ENOBUFS/ENOSR 97 * (4.X getpeername and getsockname do not fail in practice) 98 * getsockopt: ENOMEM/ENOSR - ENOBUFS/ENOSR 99 * listen: - - ENOBUFS 100 * recv: ENOMEM/ENOSR/EINTR EINTR ENOBUFS/ENOMEM/ENOSR/ 101 * EINTR 102 * send: ENOMEM/ENOSR/EINTR ENOBUFS/EINTR ENOBUFS/ENOMEM/ENOSR/ 103 * EINTR 104 * setsockopt: ENOMEM/ENOSR - ENOBUFS/ENOMEM/ENOSR 105 * shutdown: ENOMEM/ENOSR - ENOBUFS/ENOSR 106 * socket: ENOMEM/ENOSR ENOBUFS ENOBUFS/ENOMEM/ENOSR 107 * socketpair: ENOMEM/ENOSR - ENOBUFS/ENOMEM/ENOSR 108 * 109 * Resolution. When allocation fails: 110 * recv: return EINTR 111 * send: return EINTR 112 * connect, accept: EINTR 113 * bind, listen, shutdown (unbind, unix_close, disconnect): sleep 114 * socket, socketpair: ENOBUFS 115 * getpeername, getsockname: sleep 116 * getsockopt, setsockopt: sleep 117 */ 118 119 #ifdef SOCK_TEST 120 /* 121 * Variables that make sockfs do something other than the standard TPI 122 * for the AF_INET transports. 123 * 124 * solisten_tpi_tcp: 125 * TCP can handle a O_T_BIND_REQ with an increased backlog even though 126 * the transport is already bound. This is needed to avoid loosing the 127 * port number should listen() do a T_UNBIND_REQ followed by a 128 * O_T_BIND_REQ. 129 * 130 * soconnect_tpi_udp: 131 * UDP and ICMP can handle a T_CONN_REQ. 132 * This is needed to make the sequence of connect(), getsockname() 133 * return the local IP address used to send packets to the connected to 134 * destination. 135 * 136 * soconnect_tpi_tcp: 137 * TCP can handle a T_CONN_REQ without seeing a O_T_BIND_REQ. 138 * Set this to non-zero to send TPI conformant messages to TCP in this 139 * respect. This is a performance optimization. 140 * 141 * soaccept_tpi_tcp: 142 * TCP can handle a T_CONN_REQ without the acceptor being bound. 143 * This is a performance optimization that has been picked up in XTI. 144 * 145 * soaccept_tpi_multioptions: 146 * When inheriting SOL_SOCKET options from the listener to the accepting 147 * socket send them as a single message for AF_INET{,6}. 148 */ 149 int solisten_tpi_tcp = 0; 150 int soconnect_tpi_udp = 0; 151 int soconnect_tpi_tcp = 0; 152 int soaccept_tpi_tcp = 0; 153 int soaccept_tpi_multioptions = 1; 154 #else /* SOCK_TEST */ 155 #define soconnect_tpi_tcp 0 156 #define soconnect_tpi_udp 0 157 #define solisten_tpi_tcp 0 158 #define soaccept_tpi_tcp 0 159 #define soaccept_tpi_multioptions 1 160 #endif /* SOCK_TEST */ 161 162 #ifdef SOCK_TEST 163 extern int do_useracc; 164 extern clock_t sock_test_timelimit; 165 #endif /* SOCK_TEST */ 166 167 extern uint32_t ucredsize; 168 169 /* 170 * Some X/Open added checks might have to be backed out to keep SunOS 4.X 171 * applications working. Turn on this flag to disable these checks. 172 */ 173 int xnet_skip_checks = 0; 174 int xnet_check_print = 0; 175 int xnet_truncate_print = 0; 176 177 static void sotpi_destroy(struct sonode *); 178 static struct sonode *sotpi_create(struct sockparams *, int, int, int, int, 179 int, int *, cred_t *cr); 180 181 static boolean_t sotpi_info_create(struct sonode *, int); 182 static void sotpi_info_init(struct sonode *); 183 static void sotpi_info_fini(struct sonode *); 184 static void sotpi_info_destroy(struct sonode *); 185 186 /* 187 * Do direct function call to the transport layer below; this would 188 * also allow the transport to utilize read-side synchronous stream 189 * interface if necessary. This is a /etc/system tunable that must 190 * not be modified on a running system. By default this is enabled 191 * for performance reasons and may be disabled for debugging purposes. 192 */ 193 boolean_t socktpi_direct = B_TRUE; 194 195 static struct kmem_cache *socktpi_cache, *socktpi_unix_cache; 196 197 extern void sigintr(k_sigset_t *, int); 198 extern void sigunintr(k_sigset_t *); 199 200 static int sotpi_unbind(struct sonode *, int); 201 202 /* TPI sockfs sonode operations */ 203 int sotpi_init(struct sonode *, struct sonode *, struct cred *, 204 int); 205 static int sotpi_accept(struct sonode *, int, struct cred *, 206 struct sonode **); 207 static int sotpi_bind(struct sonode *, struct sockaddr *, socklen_t, 208 int, struct cred *); 209 static int sotpi_listen(struct sonode *, int, struct cred *); 210 static int sotpi_connect(struct sonode *, struct sockaddr *, 211 socklen_t, int, int, struct cred *); 212 extern int sotpi_recvmsg(struct sonode *, struct nmsghdr *, 213 struct uio *, struct cred *); 214 static int sotpi_sendmsg(struct sonode *, struct nmsghdr *, 215 struct uio *, struct cred *); 216 static int sotpi_sendmblk(struct sonode *, struct nmsghdr *, int, 217 struct cred *, mblk_t **); 218 static int sosend_dgramcmsg(struct sonode *, struct sockaddr *, socklen_t, 219 struct uio *, void *, t_uscalar_t, int); 220 static int sodgram_direct(struct sonode *, struct sockaddr *, 221 socklen_t, struct uio *, int); 222 extern int sotpi_getpeername(struct sonode *, struct sockaddr *, 223 socklen_t *, boolean_t, struct cred *); 224 static int sotpi_getsockname(struct sonode *, struct sockaddr *, 225 socklen_t *, struct cred *); 226 static int sotpi_shutdown(struct sonode *, int, struct cred *); 227 extern int sotpi_getsockopt(struct sonode *, int, int, void *, 228 socklen_t *, int, struct cred *); 229 extern int sotpi_setsockopt(struct sonode *, int, int, const void *, 230 socklen_t, struct cred *); 231 static int sotpi_ioctl(struct sonode *, int, intptr_t, int, struct cred *, 232 int32_t *); 233 static int socktpi_plumbioctl(struct vnode *, int, intptr_t, int, 234 struct cred *, int32_t *); 235 static int sotpi_poll(struct sonode *, short, int, short *, 236 struct pollhead **); 237 static int sotpi_close(struct sonode *, int, struct cred *); 238 239 static int i_sotpi_info_constructor(sotpi_info_t *); 240 static void i_sotpi_info_destructor(sotpi_info_t *); 241 242 sonodeops_t sotpi_sonodeops = { 243 sotpi_init, /* sop_init */ 244 sotpi_accept, /* sop_accept */ 245 sotpi_bind, /* sop_bind */ 246 sotpi_listen, /* sop_listen */ 247 sotpi_connect, /* sop_connect */ 248 sotpi_recvmsg, /* sop_recvmsg */ 249 sotpi_sendmsg, /* sop_sendmsg */ 250 sotpi_sendmblk, /* sop_sendmblk */ 251 sotpi_getpeername, /* sop_getpeername */ 252 sotpi_getsockname, /* sop_getsockname */ 253 sotpi_shutdown, /* sop_shutdown */ 254 sotpi_getsockopt, /* sop_getsockopt */ 255 sotpi_setsockopt, /* sop_setsockopt */ 256 sotpi_ioctl, /* sop_ioctl */ 257 sotpi_poll, /* sop_poll */ 258 sotpi_close, /* sop_close */ 259 }; 260 261 /* 262 * Return a TPI socket vnode. 263 * 264 * Note that sockets assume that the driver will clone (either itself 265 * or by using the clone driver) i.e. a socket() call will always 266 * result in a new vnode being created. 267 */ 268 269 /* 270 * Common create code for socket and accept. If tso is set the values 271 * from that node is used instead of issuing a T_INFO_REQ. 272 */ 273 274 /* ARGSUSED */ 275 static struct sonode * 276 sotpi_create(struct sockparams *sp, int family, int type, int protocol, 277 int version, int sflags, int *errorp, cred_t *cr) 278 { 279 struct sonode *so; 280 kmem_cache_t *cp; 281 282 ASSERT(sp->sp_sdev_info.sd_vnode != NULL); 283 284 if (family == AF_NCA) { 285 /* 286 * The request is for an NCA socket so for NL7C use the 287 * INET domain instead and mark NL7C_AF_NCA below. 288 */ 289 family = AF_INET; 290 /* 291 * NL7C is not supported in the non-global zone, 292 * we enforce this restriction here. 293 */ 294 if (getzoneid() != GLOBAL_ZONEID) { 295 *errorp = ENOTSUP; 296 return (NULL); 297 } 298 } 299 300 /* 301 * to be compatible with old tpi socket implementation ignore 302 * sleep flag (sflags) passed in 303 */ 304 cp = (family == AF_UNIX) ? socktpi_unix_cache : socktpi_cache; 305 so = kmem_cache_alloc(cp, KM_SLEEP); 306 if (so == NULL) { 307 *errorp = ENOMEM; 308 return (NULL); 309 } 310 311 sonode_init(so, sp, family, type, protocol, &sotpi_sonodeops); 312 sotpi_info_init(so); 313 314 if (version == SOV_DEFAULT) 315 version = so_default_version; 316 317 so->so_version = (short)version; 318 *errorp = 0; 319 320 return (so); 321 } 322 323 static void 324 sotpi_destroy(struct sonode *so) 325 { 326 kmem_cache_t *cp; 327 struct sockparams *origsp; 328 329 /* 330 * If there is a new dealloc function (ie. smod_destroy_func), 331 * then it should check the correctness of the ops. 332 */ 333 334 ASSERT(so->so_ops == &sotpi_sonodeops); 335 336 origsp = SOTOTPI(so)->sti_orig_sp; 337 338 sotpi_info_fini(so); 339 340 if (so->so_state & SS_FALLBACK_COMP) { 341 /* 342 * A fallback happend, which means that a sotpi_info_t struct 343 * was allocated (as opposed to being allocated from the TPI 344 * sonode cache. Therefore we explicitly free the struct 345 * here. 346 */ 347 sotpi_info_destroy(so); 348 ASSERT(origsp != NULL); 349 350 origsp->sp_smod_info->smod_sock_destroy_func(so); 351 SOCKPARAMS_DEC_REF(origsp); 352 } else { 353 sonode_fini(so); 354 cp = (so->so_family == AF_UNIX) ? socktpi_unix_cache : 355 socktpi_cache; 356 kmem_cache_free(cp, so); 357 } 358 } 359 360 /* ARGSUSED1 */ 361 int 362 sotpi_init(struct sonode *so, struct sonode *tso, struct cred *cr, int flags) 363 { 364 major_t maj; 365 dev_t newdev; 366 struct vnode *vp; 367 int error = 0; 368 struct stdata *stp; 369 370 sotpi_info_t *sti = SOTOTPI(so); 371 372 dprint(1, ("sotpi_init()\n")); 373 374 /* 375 * over write the sleep flag passed in but that is ok 376 * as tpi socket does not honor sleep flag. 377 */ 378 flags |= FREAD|FWRITE; 379 380 /* 381 * Record in so_flag that it is a clone. 382 */ 383 if (getmajor(sti->sti_dev) == clone_major) 384 so->so_flag |= SOCLONE; 385 386 if ((so->so_type == SOCK_STREAM || so->so_type == SOCK_DGRAM) && 387 (so->so_family == AF_INET || so->so_family == AF_INET6) && 388 (so->so_protocol == IPPROTO_TCP || so->so_protocol == IPPROTO_UDP || 389 so->so_protocol == IPPROTO_IP)) { 390 /* Tell tcp or udp that it's talking to sockets */ 391 flags |= SO_SOCKSTR; 392 393 /* 394 * Here we indicate to socktpi_open() our attempt to 395 * make direct calls between sockfs and transport. 396 * The final decision is left to socktpi_open(). 397 */ 398 sti->sti_direct = 1; 399 400 ASSERT(so->so_type != SOCK_DGRAM || tso == NULL); 401 if (so->so_type == SOCK_STREAM && tso != NULL) { 402 if (SOTOTPI(tso)->sti_direct) { 403 /* 404 * Inherit sti_direct from listener and pass 405 * SO_ACCEPTOR open flag to tcp, indicating 406 * that this is an accept fast-path instance. 407 */ 408 flags |= SO_ACCEPTOR; 409 } else { 410 /* 411 * sti_direct is not set on listener, meaning 412 * that the listener has been converted from 413 * a socket to a stream. Ensure that the 414 * acceptor inherits these settings. 415 */ 416 sti->sti_direct = 0; 417 flags &= ~SO_SOCKSTR; 418 } 419 } 420 } 421 422 /* 423 * Tell local transport that it is talking to sockets. 424 */ 425 if (so->so_family == AF_UNIX) { 426 flags |= SO_SOCKSTR; 427 } 428 429 vp = SOTOV(so); 430 newdev = vp->v_rdev; 431 maj = getmajor(newdev); 432 ASSERT(STREAMSTAB(maj)); 433 434 error = stropen(vp, &newdev, flags, cr); 435 436 stp = vp->v_stream; 437 if (error == 0) { 438 if (so->so_flag & SOCLONE) 439 ASSERT(newdev != vp->v_rdev); 440 mutex_enter(&so->so_lock); 441 sti->sti_dev = newdev; 442 vp->v_rdev = newdev; 443 mutex_exit(&so->so_lock); 444 445 if (stp->sd_flag & STRISTTY) { 446 /* 447 * this is a post SVR4 tty driver - a socket can not 448 * be a controlling terminal. Fail the open. 449 */ 450 (void) sotpi_close(so, flags, cr); 451 return (ENOTTY); /* XXX */ 452 } 453 454 ASSERT(stp->sd_wrq != NULL); 455 sti->sti_provinfo = tpi_findprov(stp->sd_wrq); 456 457 /* 458 * If caller is interested in doing direct function call 459 * interface to/from transport module, probe the module 460 * directly beneath the streamhead to see if it qualifies. 461 * 462 * We turn off the direct interface when qualifications fail. 463 * In the acceptor case, we simply turn off the sti_direct 464 * flag on the socket. We do the fallback after the accept 465 * has completed, before the new socket is returned to the 466 * application. 467 */ 468 if (sti->sti_direct) { 469 queue_t *tq = stp->sd_wrq->q_next; 470 471 /* 472 * sti_direct is currently supported and tested 473 * only for tcp/udp; this is the main reason to 474 * have the following assertions. 475 */ 476 ASSERT(so->so_family == AF_INET || 477 so->so_family == AF_INET6); 478 ASSERT(so->so_protocol == IPPROTO_UDP || 479 so->so_protocol == IPPROTO_TCP || 480 so->so_protocol == IPPROTO_IP); 481 ASSERT(so->so_type == SOCK_DGRAM || 482 so->so_type == SOCK_STREAM); 483 484 /* 485 * Abort direct call interface if the module directly 486 * underneath the stream head is not defined with the 487 * _D_DIRECT flag. This could happen in the tcp or 488 * udp case, when some other module is autopushed 489 * above it, or for some reasons the expected module 490 * isn't purely D_MP (which is the main requirement). 491 */ 492 if (!socktpi_direct || !(tq->q_flag & _QDIRECT) || 493 !(_OTHERQ(tq)->q_flag & _QDIRECT)) { 494 int rval; 495 496 /* Continue on without direct calls */ 497 sti->sti_direct = 0; 498 499 /* 500 * Cannot issue ioctl on fallback socket since 501 * there is no conn associated with the queue. 502 * The fallback downcall will notify the proto 503 * of the change. 504 */ 505 if (!(flags & SO_ACCEPTOR) && 506 !(flags & SO_FALLBACK)) { 507 if ((error = strioctl(vp, 508 _SIOCSOCKFALLBACK, 0, 0, K_TO_K, 509 cr, &rval)) != 0) { 510 (void) sotpi_close(so, flags, 511 cr); 512 return (error); 513 } 514 } 515 } 516 } 517 518 if (flags & SO_FALLBACK) { 519 /* 520 * The stream created does not have a conn. 521 * do stream set up after conn has been assigned 522 */ 523 return (error); 524 } 525 if (error = so_strinit(so, tso)) { 526 (void) sotpi_close(so, flags, cr); 527 return (error); 528 } 529 530 /* Enable sendfile() on AF_UNIX streams */ 531 if (so->so_family == AF_UNIX && so->so_type == SOCK_STREAM) { 532 mutex_enter(&so->so_lock); 533 so->so_mode |= SM_SENDFILESUPP; 534 mutex_exit(&so->so_lock); 535 } 536 537 /* Wildcard */ 538 if (so->so_protocol != so->so_sockparams->sp_protocol) { 539 int protocol = so->so_protocol; 540 /* 541 * Issue SO_PROTOTYPE setsockopt. 542 */ 543 error = sotpi_setsockopt(so, SOL_SOCKET, SO_PROTOTYPE, 544 &protocol, (t_uscalar_t)sizeof (protocol), cr); 545 if (error != 0) { 546 (void) sotpi_close(so, flags, cr); 547 /* 548 * Setsockopt often fails with ENOPROTOOPT but 549 * socket() should fail with 550 * EPROTONOSUPPORT/EPROTOTYPE. 551 */ 552 return (EPROTONOSUPPORT); 553 } 554 } 555 556 } else { 557 /* 558 * While the same socket can not be reopened (unlike specfs) 559 * the stream head sets STREOPENFAIL when the autopush fails. 560 */ 561 if ((stp != NULL) && 562 (stp->sd_flag & STREOPENFAIL)) { 563 /* 564 * Open failed part way through. 565 */ 566 mutex_enter(&stp->sd_lock); 567 stp->sd_flag &= ~STREOPENFAIL; 568 mutex_exit(&stp->sd_lock); 569 (void) sotpi_close(so, flags, cr); 570 return (error); 571 /*NOTREACHED*/ 572 } 573 ASSERT(stp == NULL); 574 } 575 TRACE_4(TR_FAC_SOCKFS, TR_SOCKFS_OPEN, 576 "sockfs open:maj %d vp %p so %p error %d", 577 maj, vp, so, error); 578 return (error); 579 } 580 581 /* 582 * Bind the socket to an unspecified address in sockfs only. 583 * Used for TCP/UDP transports where we know that the O_T_BIND_REQ isn't 584 * required in all cases. 585 */ 586 static void 587 so_automatic_bind(struct sonode *so) 588 { 589 sotpi_info_t *sti = SOTOTPI(so); 590 ASSERT(so->so_family == AF_INET || so->so_family == AF_INET6); 591 592 ASSERT(MUTEX_HELD(&so->so_lock)); 593 ASSERT(!(so->so_state & SS_ISBOUND)); 594 ASSERT(sti->sti_unbind_mp); 595 596 ASSERT(sti->sti_laddr_len <= sti->sti_laddr_maxlen); 597 bzero(sti->sti_laddr_sa, sti->sti_laddr_len); 598 sti->sti_laddr_sa->sa_family = so->so_family; 599 so->so_state |= SS_ISBOUND; 600 } 601 602 603 /* 604 * bind the socket. 605 * 606 * If the socket is already bound and none of _SOBIND_SOCKBSD or _SOBIND_XPG4_2 607 * are passed in we allow rebinding. Note that for backwards compatibility 608 * even "svr4" sockets pass in _SOBIND_SOCKBSD/SOV_SOCKBSD to sobind/bind. 609 * Thus the rebinding code is currently not executed. 610 * 611 * The constraints for rebinding are: 612 * - it is a SOCK_DGRAM, or 613 * - it is a SOCK_STREAM/SOCK_SEQPACKET that has not been connected 614 * and no listen() has been done. 615 * This rebinding code was added based on some language in the XNET book 616 * about not returning EINVAL it the protocol allows rebinding. However, 617 * this language is not present in the Posix socket draft. Thus maybe the 618 * rebinding logic should be deleted from the source. 619 * 620 * A null "name" can be used to unbind the socket if: 621 * - it is a SOCK_DGRAM, or 622 * - it is a SOCK_STREAM/SOCK_SEQPACKET that has not been connected 623 * and no listen() has been done. 624 */ 625 /* ARGSUSED */ 626 static int 627 sotpi_bindlisten(struct sonode *so, struct sockaddr *name, 628 socklen_t namelen, int backlog, int flags, struct cred *cr) 629 { 630 struct T_bind_req bind_req; 631 struct T_bind_ack *bind_ack; 632 int error = 0; 633 mblk_t *mp; 634 void *addr; 635 t_uscalar_t addrlen; 636 int unbind_on_err = 1; 637 boolean_t clear_acceptconn_on_err = B_FALSE; 638 boolean_t restore_backlog_on_err = B_FALSE; 639 int save_so_backlog; 640 t_scalar_t PRIM_type = O_T_BIND_REQ; 641 boolean_t tcp_udp_xport; 642 sotpi_info_t *sti = SOTOTPI(so); 643 644 dprintso(so, 1, ("sotpi_bindlisten(%p, %p, %d, %d, 0x%x) %s\n", 645 (void *)so, (void *)name, namelen, backlog, flags, 646 pr_state(so->so_state, so->so_mode))); 647 648 tcp_udp_xport = so->so_type == SOCK_STREAM || so->so_type == SOCK_DGRAM; 649 650 if (!(flags & _SOBIND_LOCK_HELD)) { 651 mutex_enter(&so->so_lock); 652 so_lock_single(so); /* Set SOLOCKED */ 653 } else { 654 ASSERT(MUTEX_HELD(&so->so_lock)); 655 ASSERT(so->so_flag & SOLOCKED); 656 } 657 658 /* 659 * Make sure that there is a preallocated unbind_req message 660 * before binding. This message allocated when the socket is 661 * created but it might be have been consumed. 662 */ 663 if (sti->sti_unbind_mp == NULL) { 664 dprintso(so, 1, ("sobind: allocating unbind_req\n")); 665 /* NOTE: holding so_lock while sleeping */ 666 sti->sti_unbind_mp = 667 soallocproto(sizeof (struct T_unbind_req), _ALLOC_SLEEP, 668 cr); 669 } 670 671 if (flags & _SOBIND_REBIND) { 672 /* 673 * Called from solisten after doing an sotpi_unbind() or 674 * potentially without the unbind (latter for AF_INET{,6}). 675 */ 676 ASSERT(name == NULL && namelen == 0); 677 678 if (so->so_family == AF_UNIX) { 679 ASSERT(sti->sti_ux_bound_vp); 680 addr = &sti->sti_ux_laddr; 681 addrlen = (t_uscalar_t)sizeof (sti->sti_ux_laddr); 682 dprintso(so, 1, ("sobind rebind UNIX: addrlen %d, " 683 "addr 0x%p, vp %p\n", 684 addrlen, 685 (void *)((struct so_ux_addr *)addr)->soua_vp, 686 (void *)sti->sti_ux_bound_vp)); 687 } else { 688 addr = sti->sti_laddr_sa; 689 addrlen = (t_uscalar_t)sti->sti_laddr_len; 690 } 691 } else if (flags & _SOBIND_UNSPEC) { 692 ASSERT(name == NULL && namelen == 0); 693 694 /* 695 * The caller checked SS_ISBOUND but not necessarily 696 * under so_lock 697 */ 698 if (so->so_state & SS_ISBOUND) { 699 /* No error */ 700 goto done; 701 } 702 703 /* Set an initial local address */ 704 switch (so->so_family) { 705 case AF_UNIX: 706 /* 707 * Use an address with same size as struct sockaddr 708 * just like BSD. 709 */ 710 sti->sti_laddr_len = 711 (socklen_t)sizeof (struct sockaddr); 712 ASSERT(sti->sti_laddr_len <= sti->sti_laddr_maxlen); 713 bzero(sti->sti_laddr_sa, sti->sti_laddr_len); 714 sti->sti_laddr_sa->sa_family = so->so_family; 715 716 /* 717 * Pass down an address with the implicit bind 718 * magic number and the rest all zeros. 719 * The transport will return a unique address. 720 */ 721 sti->sti_ux_laddr.soua_vp = NULL; 722 sti->sti_ux_laddr.soua_magic = SOU_MAGIC_IMPLICIT; 723 addr = &sti->sti_ux_laddr; 724 addrlen = (t_uscalar_t)sizeof (sti->sti_ux_laddr); 725 break; 726 727 case AF_INET: 728 case AF_INET6: 729 /* 730 * An unspecified bind in TPI has a NULL address. 731 * Set the address in sockfs to have the sa_family. 732 */ 733 sti->sti_laddr_len = (so->so_family == AF_INET) ? 734 (socklen_t)sizeof (sin_t) : 735 (socklen_t)sizeof (sin6_t); 736 ASSERT(sti->sti_laddr_len <= sti->sti_laddr_maxlen); 737 bzero(sti->sti_laddr_sa, sti->sti_laddr_len); 738 sti->sti_laddr_sa->sa_family = so->so_family; 739 addr = NULL; 740 addrlen = 0; 741 break; 742 743 default: 744 /* 745 * An unspecified bind in TPI has a NULL address. 746 * Set the address in sockfs to be zero length. 747 * 748 * Can not assume there is a sa_family for all 749 * protocol families. For example, AF_X25 does not 750 * have a family field. 751 */ 752 bzero(sti->sti_laddr_sa, sti->sti_laddr_len); 753 sti->sti_laddr_len = 0; /* XXX correct? */ 754 addr = NULL; 755 addrlen = 0; 756 break; 757 } 758 759 } else { 760 if (so->so_state & SS_ISBOUND) { 761 /* 762 * If it is ok to rebind the socket, first unbind 763 * with the transport. A rebind to the NULL address 764 * is interpreted as an unbind. 765 * Note that a bind to NULL in BSD does unbind the 766 * socket but it fails with EINVAL. 767 * Note that regular sockets set SOV_SOCKBSD i.e. 768 * _SOBIND_SOCKBSD gets set here hence no type of 769 * socket does currently allow rebinding. 770 * 771 * If the name is NULL just do an unbind. 772 */ 773 if (flags & (_SOBIND_SOCKBSD|_SOBIND_XPG4_2) && 774 name != NULL) { 775 error = EINVAL; 776 unbind_on_err = 0; 777 eprintsoline(so, error); 778 goto done; 779 } 780 if ((so->so_mode & SM_CONNREQUIRED) && 781 (so->so_state & SS_CANTREBIND)) { 782 error = EINVAL; 783 unbind_on_err = 0; 784 eprintsoline(so, error); 785 goto done; 786 } 787 error = sotpi_unbind(so, 0); 788 if (error) { 789 eprintsoline(so, error); 790 goto done; 791 } 792 ASSERT(!(so->so_state & SS_ISBOUND)); 793 if (name == NULL) { 794 so->so_state &= 795 ~(SS_ISCONNECTED|SS_ISCONNECTING); 796 goto done; 797 } 798 } 799 800 /* X/Open requires this check */ 801 if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) { 802 if (xnet_check_print) { 803 printf("sockfs: X/Open bind state check " 804 "caused EINVAL\n"); 805 } 806 error = EINVAL; 807 goto done; 808 } 809 810 switch (so->so_family) { 811 case AF_UNIX: 812 /* 813 * All AF_UNIX addresses are nul terminated 814 * when copied (copyin_name) in so the minimum 815 * length is 3 bytes. 816 */ 817 if (name == NULL || 818 (ssize_t)namelen <= sizeof (short) + 1) { 819 error = EISDIR; 820 eprintsoline(so, error); 821 goto done; 822 } 823 /* 824 * Verify so_family matches the bound family. 825 * BSD does not check this for AF_UNIX resulting 826 * in funny mknods. 827 */ 828 if (name->sa_family != so->so_family) { 829 error = EAFNOSUPPORT; 830 goto done; 831 } 832 break; 833 case AF_INET: 834 if (name == NULL) { 835 error = EINVAL; 836 eprintsoline(so, error); 837 goto done; 838 } 839 if ((size_t)namelen != sizeof (sin_t)) { 840 error = name->sa_family != so->so_family ? 841 EAFNOSUPPORT : EINVAL; 842 eprintsoline(so, error); 843 goto done; 844 } 845 if ((flags & _SOBIND_XPG4_2) && 846 (name->sa_family != so->so_family)) { 847 /* 848 * This check has to be made for X/Open 849 * sockets however application failures have 850 * been observed when it is applied to 851 * all sockets. 852 */ 853 error = EAFNOSUPPORT; 854 eprintsoline(so, error); 855 goto done; 856 } 857 /* 858 * Force a zero sa_family to match so_family. 859 * 860 * Some programs like inetd(8) don't set the 861 * family field. Other programs leave 862 * sin_family set to garbage - SunOS 4.X does 863 * not check the family field on a bind. 864 * We use the family field that 865 * was passed in to the socket() call. 866 */ 867 name->sa_family = so->so_family; 868 break; 869 870 case AF_INET6: { 871 #ifdef DEBUG 872 sin6_t *sin6 = (sin6_t *)name; 873 #endif /* DEBUG */ 874 875 if (name == NULL) { 876 error = EINVAL; 877 eprintsoline(so, error); 878 goto done; 879 } 880 if ((size_t)namelen != sizeof (sin6_t)) { 881 error = name->sa_family != so->so_family ? 882 EAFNOSUPPORT : EINVAL; 883 eprintsoline(so, error); 884 goto done; 885 } 886 if (name->sa_family != so->so_family) { 887 /* 888 * With IPv6 we require the family to match 889 * unlike in IPv4. 890 */ 891 error = EAFNOSUPPORT; 892 eprintsoline(so, error); 893 goto done; 894 } 895 #ifdef DEBUG 896 /* 897 * Verify that apps don't forget to clear 898 * sin6_scope_id etc 899 */ 900 if (sin6->sin6_scope_id != 0 && 901 !IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr)) { 902 zcmn_err(getzoneid(), CE_WARN, 903 "bind with uninitialized sin6_scope_id " 904 "(%d) on socket. Pid = %d\n", 905 (int)sin6->sin6_scope_id, 906 (int)curproc->p_pid); 907 } 908 if (sin6->__sin6_src_id != 0) { 909 zcmn_err(getzoneid(), CE_WARN, 910 "bind with uninitialized __sin6_src_id " 911 "(%d) on socket. Pid = %d\n", 912 (int)sin6->__sin6_src_id, 913 (int)curproc->p_pid); 914 } 915 #endif /* DEBUG */ 916 break; 917 } 918 default: 919 /* 920 * Don't do any length or sa_family check to allow 921 * non-sockaddr style addresses. 922 */ 923 if (name == NULL) { 924 error = EINVAL; 925 eprintsoline(so, error); 926 goto done; 927 } 928 break; 929 } 930 931 if (namelen > (t_uscalar_t)sti->sti_laddr_maxlen) { 932 error = ENAMETOOLONG; 933 eprintsoline(so, error); 934 goto done; 935 } 936 /* 937 * Save local address. 938 */ 939 sti->sti_laddr_len = (socklen_t)namelen; 940 ASSERT(sti->sti_laddr_len <= sti->sti_laddr_maxlen); 941 bcopy(name, sti->sti_laddr_sa, namelen); 942 943 addr = sti->sti_laddr_sa; 944 addrlen = (t_uscalar_t)sti->sti_laddr_len; 945 switch (so->so_family) { 946 case AF_INET6: 947 case AF_INET: 948 break; 949 case AF_UNIX: { 950 struct sockaddr_un *soun = 951 (struct sockaddr_un *)sti->sti_laddr_sa; 952 struct vnode *vp, *rvp; 953 struct vattr vattr; 954 955 ASSERT(sti->sti_ux_bound_vp == NULL); 956 /* 957 * Create vnode for the specified path name. 958 * Keep vnode held with a reference in sti_ux_bound_vp. 959 * Use the vnode pointer as the address used in the 960 * bind with the transport. 961 * 962 * Use the same mode as in BSD. In particular this does 963 * not observe the umask. 964 */ 965 /* MAXPATHLEN + soun_family + nul termination */ 966 if (sti->sti_laddr_len > 967 (socklen_t)(MAXPATHLEN + sizeof (short) + 1)) { 968 error = ENAMETOOLONG; 969 eprintsoline(so, error); 970 goto done; 971 } 972 vattr.va_type = VSOCK; 973 vattr.va_mode = 0777 & ~PTOU(curproc)->u_cmask; 974 vattr.va_mask = AT_TYPE|AT_MODE; 975 /* NOTE: holding so_lock */ 976 error = vn_create(soun->sun_path, UIO_SYSSPACE, &vattr, 977 EXCL, 0, &vp, CRMKNOD, 0, 0); 978 if (error) { 979 if (error == EEXIST) 980 error = EADDRINUSE; 981 eprintsoline(so, error); 982 goto done; 983 } 984 /* 985 * Establish pointer from the underlying filesystem 986 * vnode to the socket node. 987 * sti_ux_bound_vp and v_stream->sd_vnode form the 988 * cross-linkage between the underlying filesystem 989 * node and the socket node. 990 */ 991 992 if ((VOP_REALVP(vp, &rvp, NULL) == 0) && (vp != rvp)) { 993 VN_HOLD(rvp); 994 VN_RELE(vp); 995 vp = rvp; 996 } 997 998 ASSERT(SOTOV(so)->v_stream); 999 mutex_enter(&vp->v_lock); 1000 vp->v_stream = SOTOV(so)->v_stream; 1001 sti->sti_ux_bound_vp = vp; 1002 mutex_exit(&vp->v_lock); 1003 1004 /* 1005 * Use the vnode pointer value as a unique address 1006 * (together with the magic number to avoid conflicts 1007 * with implicit binds) in the transport provider. 1008 */ 1009 sti->sti_ux_laddr.soua_vp = 1010 (void *)sti->sti_ux_bound_vp; 1011 sti->sti_ux_laddr.soua_magic = SOU_MAGIC_EXPLICIT; 1012 addr = &sti->sti_ux_laddr; 1013 addrlen = (t_uscalar_t)sizeof (sti->sti_ux_laddr); 1014 dprintso(so, 1, ("sobind UNIX: addrlen %d, addr %p\n", 1015 addrlen, 1016 (void *)((struct so_ux_addr *)addr)->soua_vp)); 1017 break; 1018 } 1019 } /* end switch (so->so_family) */ 1020 } 1021 1022 /* 1023 * set SS_ACCEPTCONN before sending down O_T_BIND_REQ since 1024 * the transport can start passing up T_CONN_IND messages 1025 * as soon as it receives the bind req and strsock_proto() 1026 * insists that SS_ACCEPTCONN is set when processing T_CONN_INDs. 1027 */ 1028 if (flags & _SOBIND_LISTEN) { 1029 if ((so->so_state & SS_ACCEPTCONN) == 0) 1030 clear_acceptconn_on_err = B_TRUE; 1031 save_so_backlog = so->so_backlog; 1032 restore_backlog_on_err = B_TRUE; 1033 so->so_state |= SS_ACCEPTCONN; 1034 so->so_backlog = backlog; 1035 } 1036 1037 /* 1038 * We send a T_BIND_REQ for TCP/UDP since we know it supports it, 1039 * for other transports we will send in a O_T_BIND_REQ. 1040 */ 1041 if (tcp_udp_xport && 1042 (so->so_family == AF_INET || so->so_family == AF_INET6)) 1043 PRIM_type = T_BIND_REQ; 1044 1045 bind_req.PRIM_type = PRIM_type; 1046 bind_req.ADDR_length = addrlen; 1047 bind_req.ADDR_offset = (t_scalar_t)sizeof (bind_req); 1048 bind_req.CONIND_number = backlog; 1049 /* NOTE: holding so_lock while sleeping */ 1050 mp = soallocproto2(&bind_req, sizeof (bind_req), 1051 addr, addrlen, 0, _ALLOC_SLEEP, cr); 1052 sti->sti_laddr_valid = 0; 1053 1054 /* Done using sti_laddr_sa - can drop the lock */ 1055 mutex_exit(&so->so_lock); 1056 1057 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 1058 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 1059 if (error) { 1060 eprintsoline(so, error); 1061 mutex_enter(&so->so_lock); 1062 goto done; 1063 } 1064 1065 mutex_enter(&so->so_lock); 1066 error = sowaitprim(so, PRIM_type, T_BIND_ACK, 1067 (t_uscalar_t)sizeof (*bind_ack), &mp, 0); 1068 if (error) { 1069 eprintsoline(so, error); 1070 goto done; 1071 } 1072 ASSERT(mp); 1073 /* 1074 * Even if some TPI message (e.g. T_DISCON_IND) was received in 1075 * strsock_proto while the lock was dropped above, the bind 1076 * is allowed to complete. 1077 */ 1078 1079 /* Mark as bound. This will be undone if we detect errors below. */ 1080 if (flags & _SOBIND_NOXLATE) { 1081 ASSERT(so->so_family == AF_UNIX); 1082 sti->sti_faddr_noxlate = 1; 1083 } 1084 ASSERT(!(so->so_state & SS_ISBOUND) || (flags & _SOBIND_REBIND)); 1085 so->so_state |= SS_ISBOUND; 1086 ASSERT(sti->sti_unbind_mp); 1087 1088 /* note that we've already set SS_ACCEPTCONN above */ 1089 1090 /* 1091 * Recompute addrlen - an unspecied bind sent down an 1092 * address of length zero but we expect the appropriate length 1093 * in return. 1094 */ 1095 addrlen = (t_uscalar_t)(so->so_family == AF_UNIX ? 1096 sizeof (sti->sti_ux_laddr) : sti->sti_laddr_len); 1097 1098 bind_ack = (struct T_bind_ack *)mp->b_rptr; 1099 /* 1100 * The alignment restriction is really too strict but 1101 * we want enough alignment to inspect the fields of 1102 * a sockaddr_in. 1103 */ 1104 addr = sogetoff(mp, bind_ack->ADDR_offset, 1105 bind_ack->ADDR_length, 1106 __TPI_ALIGN_SIZE); 1107 if (addr == NULL) { 1108 freemsg(mp); 1109 error = EPROTO; 1110 eprintsoline(so, error); 1111 goto done; 1112 } 1113 if (!(flags & _SOBIND_UNSPEC)) { 1114 /* 1115 * Verify that the transport didn't return something we 1116 * did not want e.g. an address other than what we asked for. 1117 * 1118 * NOTE: These checks would go away if/when we switch to 1119 * using the new TPI (in which the transport would fail 1120 * the request instead of assigning a different address). 1121 * 1122 * NOTE2: For protocols that we don't know (i.e. any 1123 * other than AF_INET6, AF_INET and AF_UNIX), we 1124 * cannot know if the transport should be expected to 1125 * return the same address as that requested. 1126 * 1127 * NOTE3: For AF_INET and AF_INET6, TCP/UDP, we send 1128 * down a T_BIND_REQ. We use O_T_BIND_REQ for others. 1129 * 1130 * For example, in the case of netatalk it may be 1131 * inappropriate for the transport to return the 1132 * requested address (as it may have allocated a local 1133 * port number in behaviour similar to that of an 1134 * AF_INET bind request with a port number of zero). 1135 * 1136 * Given the definition of O_T_BIND_REQ, where the 1137 * transport may bind to an address other than the 1138 * requested address, it's not possible to determine 1139 * whether a returned address that differs from the 1140 * requested address is a reason to fail (because the 1141 * requested address was not available) or succeed 1142 * (because the transport allocated an appropriate 1143 * address and/or port). 1144 * 1145 * sockfs currently requires that the transport return 1146 * the requested address in the T_BIND_ACK, unless 1147 * there is code here to allow for any discrepancy. 1148 * Such code exists for AF_INET and AF_INET6. 1149 * 1150 * Netatalk chooses to return the requested address 1151 * rather than the (correct) allocated address. This 1152 * means that netatalk violates the TPI specification 1153 * (and would not function correctly if used from a 1154 * TLI application), but it does mean that it works 1155 * with sockfs. 1156 * 1157 * As noted above, using the newer XTI bind primitive 1158 * (T_BIND_REQ) in preference to O_T_BIND_REQ would 1159 * allow sockfs to be more sure about whether or not 1160 * the bind request had succeeded (as transports are 1161 * not permitted to bind to a different address than 1162 * that requested - they must return failure). 1163 * Unfortunately, support for T_BIND_REQ may not be 1164 * present in all transport implementations (netatalk, 1165 * for example, doesn't have it), making the 1166 * transition difficult. 1167 */ 1168 if (bind_ack->ADDR_length != addrlen) { 1169 /* Assumes that the requested address was in use */ 1170 freemsg(mp); 1171 error = EADDRINUSE; 1172 eprintsoline(so, error); 1173 goto done; 1174 } 1175 1176 switch (so->so_family) { 1177 case AF_INET6: 1178 case AF_INET: { 1179 sin_t *rname, *aname; 1180 1181 rname = (sin_t *)addr; 1182 aname = (sin_t *)sti->sti_laddr_sa; 1183 1184 /* 1185 * Take advantage of the alignment 1186 * of sin_port and sin6_port which fall 1187 * in the same place in their data structures. 1188 * Just use sin_port for either address family. 1189 * 1190 * This may become a problem if (heaven forbid) 1191 * there's a separate ipv6port_reserved... :-P 1192 * 1193 * Binding to port 0 has the semantics of letting 1194 * the transport bind to any port. 1195 * 1196 * If the transport is TCP or UDP since we had sent 1197 * a T_BIND_REQ we would not get a port other than 1198 * what we asked for. 1199 */ 1200 if (tcp_udp_xport) { 1201 /* 1202 * Pick up the new port number if we bound to 1203 * port 0. 1204 */ 1205 if (aname->sin_port == 0) 1206 aname->sin_port = rname->sin_port; 1207 sti->sti_laddr_valid = 1; 1208 break; 1209 } 1210 if (aname->sin_port != 0 && 1211 aname->sin_port != rname->sin_port) { 1212 freemsg(mp); 1213 error = EADDRINUSE; 1214 eprintsoline(so, error); 1215 goto done; 1216 } 1217 /* 1218 * Pick up the new port number if we bound to port 0. 1219 */ 1220 aname->sin_port = rname->sin_port; 1221 1222 /* 1223 * Unfortunately, addresses aren't _quite_ the same. 1224 */ 1225 if (so->so_family == AF_INET) { 1226 if (aname->sin_addr.s_addr != 1227 rname->sin_addr.s_addr) { 1228 freemsg(mp); 1229 error = EADDRNOTAVAIL; 1230 eprintsoline(so, error); 1231 goto done; 1232 } 1233 } else { 1234 sin6_t *rname6 = (sin6_t *)rname; 1235 sin6_t *aname6 = (sin6_t *)aname; 1236 1237 if (!IN6_ARE_ADDR_EQUAL(&aname6->sin6_addr, 1238 &rname6->sin6_addr)) { 1239 freemsg(mp); 1240 error = EADDRNOTAVAIL; 1241 eprintsoline(so, error); 1242 goto done; 1243 } 1244 } 1245 break; 1246 } 1247 case AF_UNIX: 1248 if (bcmp(addr, &sti->sti_ux_laddr, addrlen) != 0) { 1249 freemsg(mp); 1250 error = EADDRINUSE; 1251 eprintsoline(so, error); 1252 eprintso(so, 1253 ("addrlen %d, addr 0x%x, vp %p\n", 1254 addrlen, *((int *)addr), 1255 (void *)sti->sti_ux_bound_vp)); 1256 goto done; 1257 } 1258 sti->sti_laddr_valid = 1; 1259 break; 1260 default: 1261 /* 1262 * NOTE: This assumes that addresses can be 1263 * byte-compared for equivalence. 1264 */ 1265 if (bcmp(addr, sti->sti_laddr_sa, addrlen) != 0) { 1266 freemsg(mp); 1267 error = EADDRINUSE; 1268 eprintsoline(so, error); 1269 goto done; 1270 } 1271 /* 1272 * Don't mark sti_laddr_valid, as we cannot be 1273 * sure that the returned address is the real 1274 * bound address when talking to an unknown 1275 * transport. 1276 */ 1277 break; 1278 } 1279 } else { 1280 /* 1281 * Save for returned address for getsockname. 1282 * Needed for unspecific bind unless transport supports 1283 * the TI_GETMYNAME ioctl. 1284 * Do this for AF_INET{,6} even though they do, as 1285 * caching info here is much better performance than 1286 * a TPI/STREAMS trip to the transport for getsockname. 1287 * Any which can't for some reason _must_ _not_ set 1288 * sti_laddr_valid here for the caching version of 1289 * getsockname to not break; 1290 */ 1291 switch (so->so_family) { 1292 case AF_UNIX: 1293 /* 1294 * Record the address bound with the transport 1295 * for use by socketpair. 1296 */ 1297 bcopy(addr, &sti->sti_ux_laddr, addrlen); 1298 sti->sti_laddr_valid = 1; 1299 break; 1300 case AF_INET: 1301 case AF_INET6: 1302 ASSERT(sti->sti_laddr_len <= sti->sti_laddr_maxlen); 1303 bcopy(addr, sti->sti_laddr_sa, sti->sti_laddr_len); 1304 sti->sti_laddr_valid = 1; 1305 break; 1306 default: 1307 /* 1308 * Don't mark sti_laddr_valid, as we cannot be 1309 * sure that the returned address is the real 1310 * bound address when talking to an unknown 1311 * transport. 1312 */ 1313 break; 1314 } 1315 } 1316 1317 freemsg(mp); 1318 1319 done: 1320 if (error) { 1321 /* reset state & backlog to values held on entry */ 1322 if (clear_acceptconn_on_err == B_TRUE) 1323 so->so_state &= ~SS_ACCEPTCONN; 1324 if (restore_backlog_on_err == B_TRUE) 1325 so->so_backlog = save_so_backlog; 1326 1327 if (unbind_on_err && so->so_state & SS_ISBOUND) { 1328 int err; 1329 1330 err = sotpi_unbind(so, 0); 1331 /* LINTED - statement has no consequent: if */ 1332 if (err) { 1333 eprintsoline(so, error); 1334 } else { 1335 ASSERT(!(so->so_state & SS_ISBOUND)); 1336 } 1337 } 1338 } 1339 if (!(flags & _SOBIND_LOCK_HELD)) { 1340 so_unlock_single(so, SOLOCKED); 1341 mutex_exit(&so->so_lock); 1342 } else { 1343 ASSERT(MUTEX_HELD(&so->so_lock)); 1344 ASSERT(so->so_flag & SOLOCKED); 1345 } 1346 return (error); 1347 } 1348 1349 /* bind the socket */ 1350 static int 1351 sotpi_bind(struct sonode *so, struct sockaddr *name, socklen_t namelen, 1352 int flags, struct cred *cr) 1353 { 1354 if ((flags & _SOBIND_SOCKETPAIR) == 0) 1355 return (sotpi_bindlisten(so, name, namelen, 0, flags, cr)); 1356 1357 flags &= ~_SOBIND_SOCKETPAIR; 1358 return (sotpi_bindlisten(so, name, namelen, 1, flags, cr)); 1359 } 1360 1361 /* 1362 * Unbind a socket - used when bind() fails, when bind() specifies a NULL 1363 * address, or when listen needs to unbind and bind. 1364 * If the _SOUNBIND_REBIND flag is specified the addresses are retained 1365 * so that a sobind can pick them up. 1366 */ 1367 static int 1368 sotpi_unbind(struct sonode *so, int flags) 1369 { 1370 struct T_unbind_req unbind_req; 1371 int error = 0; 1372 mblk_t *mp; 1373 sotpi_info_t *sti = SOTOTPI(so); 1374 1375 dprintso(so, 1, ("sotpi_unbind(%p, 0x%x) %s\n", 1376 (void *)so, flags, pr_state(so->so_state, so->so_mode))); 1377 1378 ASSERT(MUTEX_HELD(&so->so_lock)); 1379 ASSERT(so->so_flag & SOLOCKED); 1380 1381 if (!(so->so_state & SS_ISBOUND)) { 1382 error = EINVAL; 1383 eprintsoline(so, error); 1384 goto done; 1385 } 1386 1387 mutex_exit(&so->so_lock); 1388 1389 /* 1390 * Flush the read and write side (except stream head read queue) 1391 * and send down T_UNBIND_REQ. 1392 */ 1393 (void) putnextctl1(strvp2wq(SOTOV(so)), M_FLUSH, FLUSHRW); 1394 1395 unbind_req.PRIM_type = T_UNBIND_REQ; 1396 mp = soallocproto1(&unbind_req, sizeof (unbind_req), 1397 0, _ALLOC_SLEEP, CRED()); 1398 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 1399 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 1400 mutex_enter(&so->so_lock); 1401 if (error) { 1402 eprintsoline(so, error); 1403 goto done; 1404 } 1405 1406 error = sowaitokack(so, T_UNBIND_REQ); 1407 if (error) { 1408 eprintsoline(so, error); 1409 goto done; 1410 } 1411 1412 /* 1413 * Even if some TPI message (e.g. T_DISCON_IND) was received in 1414 * strsock_proto while the lock was dropped above, the unbind 1415 * is allowed to complete. 1416 */ 1417 if (!(flags & _SOUNBIND_REBIND)) { 1418 /* 1419 * Clear out bound address. 1420 */ 1421 vnode_t *vp; 1422 1423 if ((vp = sti->sti_ux_bound_vp) != NULL) { 1424 sti->sti_ux_bound_vp = NULL; 1425 vn_rele_stream(vp); 1426 } 1427 /* Clear out address */ 1428 sti->sti_laddr_len = 0; 1429 } 1430 so->so_state &= ~(SS_ISBOUND|SS_ACCEPTCONN); 1431 sti->sti_laddr_valid = 0; 1432 1433 done: 1434 1435 /* If the caller held the lock don't release it here */ 1436 ASSERT(MUTEX_HELD(&so->so_lock)); 1437 ASSERT(so->so_flag & SOLOCKED); 1438 1439 return (error); 1440 } 1441 1442 /* 1443 * listen on the socket. 1444 * For TPI conforming transports this has to first unbind with the transport 1445 * and then bind again using the new backlog. 1446 */ 1447 /* ARGSUSED */ 1448 int 1449 sotpi_listen(struct sonode *so, int backlog, struct cred *cr) 1450 { 1451 int error = 0; 1452 sotpi_info_t *sti = SOTOTPI(so); 1453 1454 dprintso(so, 1, ("sotpi_listen(%p, %d) %s\n", 1455 (void *)so, backlog, pr_state(so->so_state, so->so_mode))); 1456 1457 if (sti->sti_serv_type == T_CLTS) 1458 return (EOPNOTSUPP); 1459 1460 /* 1461 * If the socket is ready to accept connections already, then 1462 * return without doing anything. This avoids a problem where 1463 * a second listen() call fails if a connection is pending and 1464 * leaves the socket unbound. Only when we are not unbinding 1465 * with the transport can we safely increase the backlog. 1466 */ 1467 if (so->so_state & SS_ACCEPTCONN && 1468 !((so->so_family == AF_INET || so->so_family == AF_INET6) && 1469 /*CONSTCOND*/ 1470 !solisten_tpi_tcp)) 1471 return (0); 1472 1473 if (so->so_state & SS_ISCONNECTED) 1474 return (EINVAL); 1475 1476 mutex_enter(&so->so_lock); 1477 so_lock_single(so); /* Set SOLOCKED */ 1478 1479 /* 1480 * If the listen doesn't change the backlog we do nothing. 1481 * This avoids an EPROTO error from the transport. 1482 */ 1483 if ((so->so_state & SS_ACCEPTCONN) && 1484 so->so_backlog == backlog) 1485 goto done; 1486 1487 if (!(so->so_state & SS_ISBOUND)) { 1488 /* 1489 * Must have been explicitly bound in the UNIX domain. 1490 */ 1491 if (so->so_family == AF_UNIX) { 1492 error = EINVAL; 1493 goto done; 1494 } 1495 error = sotpi_bindlisten(so, NULL, 0, backlog, 1496 _SOBIND_UNSPEC|_SOBIND_LOCK_HELD|_SOBIND_LISTEN, cr); 1497 } else if (backlog > 0) { 1498 /* 1499 * AF_INET{,6} hack to avoid losing the port. 1500 * Assumes that all AF_INET{,6} transports can handle a 1501 * O_T_BIND_REQ with a non-zero CONIND_number when the TPI 1502 * has already bound thus it is possible to avoid the unbind. 1503 */ 1504 if (!((so->so_family == AF_INET || so->so_family == AF_INET6) && 1505 /*CONSTCOND*/ 1506 !solisten_tpi_tcp)) { 1507 error = sotpi_unbind(so, _SOUNBIND_REBIND); 1508 if (error) 1509 goto done; 1510 } 1511 error = sotpi_bindlisten(so, NULL, 0, backlog, 1512 _SOBIND_REBIND|_SOBIND_LOCK_HELD|_SOBIND_LISTEN, cr); 1513 } else { 1514 so->so_state |= SS_ACCEPTCONN; 1515 so->so_backlog = backlog; 1516 } 1517 if (error) 1518 goto done; 1519 ASSERT(so->so_state & SS_ACCEPTCONN); 1520 done: 1521 so_unlock_single(so, SOLOCKED); 1522 mutex_exit(&so->so_lock); 1523 return (error); 1524 } 1525 1526 /* 1527 * Disconnect either a specified seqno or all (-1). 1528 * The former is used on listening sockets only. 1529 * 1530 * When seqno == -1 sodisconnect could call sotpi_unbind. However, 1531 * the current use of sodisconnect(seqno == -1) is only for shutdown 1532 * so there is no point (and potentially incorrect) to unbind. 1533 */ 1534 static int 1535 sodisconnect(struct sonode *so, t_scalar_t seqno, int flags) 1536 { 1537 struct T_discon_req discon_req; 1538 int error = 0; 1539 mblk_t *mp; 1540 1541 dprintso(so, 1, ("sodisconnect(%p, %d, 0x%x) %s\n", 1542 (void *)so, seqno, flags, pr_state(so->so_state, so->so_mode))); 1543 1544 if (!(flags & _SODISCONNECT_LOCK_HELD)) { 1545 mutex_enter(&so->so_lock); 1546 so_lock_single(so); /* Set SOLOCKED */ 1547 } else { 1548 ASSERT(MUTEX_HELD(&so->so_lock)); 1549 ASSERT(so->so_flag & SOLOCKED); 1550 } 1551 1552 if (!(so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING|SS_ACCEPTCONN))) { 1553 error = EINVAL; 1554 eprintsoline(so, error); 1555 goto done; 1556 } 1557 1558 mutex_exit(&so->so_lock); 1559 /* 1560 * Flush the write side (unless this is a listener) 1561 * and then send down a T_DISCON_REQ. 1562 * (Don't flush on listener since it could flush {O_}T_CONN_RES 1563 * and other messages.) 1564 */ 1565 if (!(so->so_state & SS_ACCEPTCONN)) 1566 (void) putnextctl1(strvp2wq(SOTOV(so)), M_FLUSH, FLUSHW); 1567 1568 discon_req.PRIM_type = T_DISCON_REQ; 1569 discon_req.SEQ_number = seqno; 1570 mp = soallocproto1(&discon_req, sizeof (discon_req), 1571 0, _ALLOC_SLEEP, CRED()); 1572 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 1573 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 1574 mutex_enter(&so->so_lock); 1575 if (error) { 1576 eprintsoline(so, error); 1577 goto done; 1578 } 1579 1580 error = sowaitokack(so, T_DISCON_REQ); 1581 if (error) { 1582 eprintsoline(so, error); 1583 goto done; 1584 } 1585 /* 1586 * Even if some TPI message (e.g. T_DISCON_IND) was received in 1587 * strsock_proto while the lock was dropped above, the disconnect 1588 * is allowed to complete. However, it is not possible to 1589 * assert that SS_ISCONNECTED|SS_ISCONNECTING are set. 1590 */ 1591 so->so_state &= ~(SS_ISCONNECTED|SS_ISCONNECTING); 1592 SOTOTPI(so)->sti_laddr_valid = 0; 1593 SOTOTPI(so)->sti_faddr_valid = 0; 1594 done: 1595 if (!(flags & _SODISCONNECT_LOCK_HELD)) { 1596 so_unlock_single(so, SOLOCKED); 1597 mutex_exit(&so->so_lock); 1598 } else { 1599 /* If the caller held the lock don't release it here */ 1600 ASSERT(MUTEX_HELD(&so->so_lock)); 1601 ASSERT(so->so_flag & SOLOCKED); 1602 } 1603 return (error); 1604 } 1605 1606 /* ARGSUSED */ 1607 int 1608 sotpi_accept(struct sonode *so, int fflag, struct cred *cr, 1609 struct sonode **nsop) 1610 { 1611 struct T_conn_ind *conn_ind; 1612 struct T_conn_res *conn_res; 1613 int error = 0; 1614 mblk_t *mp, *ack_mp; 1615 struct sonode *nso; 1616 vnode_t *nvp; 1617 void *src; 1618 t_uscalar_t srclen; 1619 void *opt; 1620 t_uscalar_t optlen; 1621 t_scalar_t PRIM_type; 1622 t_scalar_t SEQ_number; 1623 size_t sinlen; 1624 sotpi_info_t *sti = SOTOTPI(so); 1625 sotpi_info_t *nsti; 1626 1627 dprintso(so, 1, ("sotpi_accept(%p, 0x%x, %p) %s\n", 1628 (void *)so, fflag, (void *)nsop, 1629 pr_state(so->so_state, so->so_mode))); 1630 1631 /* 1632 * Defer single-threading the accepting socket until 1633 * the T_CONN_IND has been received and parsed and the 1634 * new sonode has been opened. 1635 */ 1636 1637 /* Check that we are not already connected */ 1638 if ((so->so_state & SS_ACCEPTCONN) == 0) 1639 goto conn_bad; 1640 again: 1641 if ((error = sowaitconnind(so, fflag, &mp)) != 0) 1642 goto e_bad; 1643 1644 ASSERT(mp != NULL); 1645 conn_ind = (struct T_conn_ind *)mp->b_rptr; 1646 1647 /* 1648 * Save SEQ_number for error paths. 1649 */ 1650 SEQ_number = conn_ind->SEQ_number; 1651 1652 srclen = conn_ind->SRC_length; 1653 src = sogetoff(mp, conn_ind->SRC_offset, srclen, 1); 1654 if (src == NULL) { 1655 error = EPROTO; 1656 freemsg(mp); 1657 eprintsoline(so, error); 1658 goto disconnect_unlocked; 1659 } 1660 optlen = conn_ind->OPT_length; 1661 switch (so->so_family) { 1662 case AF_INET: 1663 case AF_INET6: 1664 if ((optlen == sizeof (intptr_t)) && (sti->sti_direct != 0)) { 1665 bcopy(mp->b_rptr + conn_ind->OPT_offset, 1666 &opt, conn_ind->OPT_length); 1667 } else { 1668 /* 1669 * The transport (in this case TCP) hasn't sent up 1670 * a pointer to an instance for the accept fast-path. 1671 * Disable fast-path completely because the call to 1672 * sotpi_create() below would otherwise create an 1673 * incomplete TCP instance, which would lead to 1674 * problems when sockfs sends a normal T_CONN_RES 1675 * message down the new stream. 1676 */ 1677 if (sti->sti_direct) { 1678 int rval; 1679 /* 1680 * For consistency we inform tcp to disable 1681 * direct interface on the listener, though 1682 * we can certainly live without doing this 1683 * because no data will ever travel upstream 1684 * on the listening socket. 1685 */ 1686 sti->sti_direct = 0; 1687 (void) strioctl(SOTOV(so), _SIOCSOCKFALLBACK, 1688 0, 0, K_TO_K, cr, &rval); 1689 } 1690 opt = NULL; 1691 optlen = 0; 1692 } 1693 break; 1694 case AF_UNIX: 1695 default: 1696 if (optlen != 0) { 1697 opt = sogetoff(mp, conn_ind->OPT_offset, optlen, 1698 __TPI_ALIGN_SIZE); 1699 if (opt == NULL) { 1700 error = EPROTO; 1701 freemsg(mp); 1702 eprintsoline(so, error); 1703 goto disconnect_unlocked; 1704 } 1705 } 1706 if (so->so_family == AF_UNIX) { 1707 if (!sti->sti_faddr_noxlate) { 1708 src = NULL; 1709 srclen = 0; 1710 } 1711 /* Extract src address from options */ 1712 if (optlen != 0) 1713 so_getopt_srcaddr(opt, optlen, &src, &srclen); 1714 } 1715 break; 1716 } 1717 1718 /* 1719 * Create the new socket. 1720 */ 1721 nso = socket_newconn(so, NULL, NULL, SOCKET_SLEEP, &error); 1722 if (nso == NULL) { 1723 ASSERT(error != 0); 1724 /* 1725 * Accept can not fail with ENOBUFS. sotpi_create 1726 * sleeps waiting for memory until a signal is caught 1727 * so return EINTR. 1728 */ 1729 freemsg(mp); 1730 if (error == ENOBUFS) 1731 error = EINTR; 1732 goto e_disc_unl; 1733 } 1734 nvp = SOTOV(nso); 1735 nsti = SOTOTPI(nso); 1736 1737 #ifdef DEBUG 1738 /* 1739 * SO_DEBUG is used to trigger the dprint* and eprint* macros thus 1740 * it's inherited early to allow debugging of the accept code itself. 1741 */ 1742 nso->so_options |= so->so_options & SO_DEBUG; 1743 #endif /* DEBUG */ 1744 1745 /* 1746 * Save the SRC address from the T_CONN_IND 1747 * for getpeername to work on AF_UNIX and on transports that do not 1748 * support TI_GETPEERNAME. 1749 * 1750 * NOTE: AF_UNIX NUL termination is ensured by the sender's 1751 * copyin_name(). 1752 */ 1753 if (srclen > (t_uscalar_t)nsti->sti_faddr_maxlen) { 1754 error = EINVAL; 1755 freemsg(mp); 1756 eprintsoline(so, error); 1757 goto disconnect_vp_unlocked; 1758 } 1759 nsti->sti_faddr_len = (socklen_t)srclen; 1760 ASSERT(sti->sti_faddr_len <= sti->sti_faddr_maxlen); 1761 bcopy(src, nsti->sti_faddr_sa, srclen); 1762 nsti->sti_faddr_valid = 1; 1763 1764 /* 1765 * Record so_peercred and so_cpid from a cred in the T_CONN_IND. 1766 */ 1767 if ((DB_REF(mp) > 1) || MBLKSIZE(mp) < 1768 (sizeof (struct T_conn_res) + sizeof (intptr_t))) { 1769 cred_t *cr; 1770 pid_t cpid; 1771 1772 cr = msg_getcred(mp, &cpid); 1773 if (cr != NULL) { 1774 crhold(cr); 1775 nso->so_peercred = cr; 1776 nso->so_cpid = cpid; 1777 } 1778 freemsg(mp); 1779 1780 mp = soallocproto1(NULL, sizeof (struct T_conn_res) + 1781 sizeof (intptr_t), 0, _ALLOC_INTR, cr); 1782 if (mp == NULL) { 1783 /* 1784 * Accept can not fail with ENOBUFS. 1785 * A signal was caught so return EINTR. 1786 */ 1787 error = EINTR; 1788 eprintsoline(so, error); 1789 goto disconnect_vp_unlocked; 1790 } 1791 conn_res = (struct T_conn_res *)mp->b_rptr; 1792 } else { 1793 /* 1794 * For efficency reasons we use msg_extractcred; no crhold 1795 * needed since db_credp is cleared (i.e., we move the cred 1796 * from the message to so_peercred. 1797 */ 1798 nso->so_peercred = msg_extractcred(mp, &nso->so_cpid); 1799 1800 mp->b_rptr = DB_BASE(mp); 1801 conn_res = (struct T_conn_res *)mp->b_rptr; 1802 mp->b_wptr = mp->b_rptr + sizeof (struct T_conn_res); 1803 1804 mblk_setcred(mp, cr, curproc->p_pid); 1805 } 1806 1807 /* 1808 * New socket must be bound at least in sockfs and, except for AF_INET, 1809 * (or AF_INET6) it also has to be bound in the transport provider. 1810 * We set the local address in the sonode from the T_OK_ACK of the 1811 * T_CONN_RES. For this reason the address we bind to here isn't 1812 * important. 1813 */ 1814 if ((nso->so_family == AF_INET || nso->so_family == AF_INET6) && 1815 /*CONSTCOND*/ 1816 nso->so_type == SOCK_STREAM && !soaccept_tpi_tcp) { 1817 /* 1818 * Optimization for AF_INET{,6} transports 1819 * that can handle a T_CONN_RES without being bound. 1820 */ 1821 mutex_enter(&nso->so_lock); 1822 so_automatic_bind(nso); 1823 mutex_exit(&nso->so_lock); 1824 } else { 1825 /* Perform NULL bind with the transport provider. */ 1826 if ((error = sotpi_bind(nso, NULL, 0, _SOBIND_UNSPEC, 1827 cr)) != 0) { 1828 ASSERT(error != ENOBUFS); 1829 freemsg(mp); 1830 eprintsoline(nso, error); 1831 goto disconnect_vp_unlocked; 1832 } 1833 } 1834 1835 /* 1836 * Inherit SIOCSPGRP, SS_ASYNC before we send the {O_}T_CONN_RES 1837 * so that any data arriving on the new socket will cause the 1838 * appropriate signals to be delivered for the new socket. 1839 * 1840 * No other thread (except strsock_proto and strsock_misc) 1841 * can access the new socket thus we relax the locking. 1842 */ 1843 nso->so_pgrp = so->so_pgrp; 1844 nso->so_state |= so->so_state & SS_ASYNC; 1845 nsti->sti_faddr_noxlate = sti->sti_faddr_noxlate; 1846 1847 if (nso->so_pgrp != 0) { 1848 if ((error = so_set_events(nso, nvp, cr)) != 0) { 1849 eprintsoline(nso, error); 1850 error = 0; 1851 nso->so_pgrp = 0; 1852 } 1853 } 1854 1855 /* 1856 * Make note of the socket level options. TCP and IP level options 1857 * are already inherited. We could do all this after accept is 1858 * successful but doing it here simplifies code and no harm done 1859 * for error case. 1860 */ 1861 nso->so_options = so->so_options & (SO_DEBUG|SO_REUSEADDR|SO_KEEPALIVE| 1862 SO_DONTROUTE|SO_BROADCAST|SO_USELOOPBACK| 1863 SO_OOBINLINE|SO_DGRAM_ERRIND|SO_LINGER); 1864 nso->so_sndbuf = so->so_sndbuf; 1865 nso->so_rcvbuf = so->so_rcvbuf; 1866 if (nso->so_options & SO_LINGER) 1867 nso->so_linger = so->so_linger; 1868 1869 /* 1870 * Note that the following sti_direct code path should be 1871 * removed once we are confident that the direct sockets 1872 * do not result in any degradation. 1873 */ 1874 if (sti->sti_direct) { 1875 1876 ASSERT(opt != NULL); 1877 1878 conn_res->OPT_length = optlen; 1879 conn_res->OPT_offset = MBLKL(mp); 1880 bcopy(&opt, mp->b_wptr, optlen); 1881 mp->b_wptr += optlen; 1882 conn_res->PRIM_type = T_CONN_RES; 1883 conn_res->ACCEPTOR_id = 0; 1884 PRIM_type = T_CONN_RES; 1885 1886 /* Send down the T_CONN_RES on acceptor STREAM */ 1887 error = kstrputmsg(SOTOV(nso), mp, NULL, 1888 0, 0, MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 1889 if (error) { 1890 mutex_enter(&so->so_lock); 1891 so_lock_single(so); 1892 eprintsoline(so, error); 1893 goto disconnect_vp; 1894 } 1895 mutex_enter(&nso->so_lock); 1896 error = sowaitprim(nso, T_CONN_RES, T_OK_ACK, 1897 (t_uscalar_t)sizeof (struct T_ok_ack), &ack_mp, 0); 1898 if (error) { 1899 mutex_exit(&nso->so_lock); 1900 mutex_enter(&so->so_lock); 1901 so_lock_single(so); 1902 eprintsoline(so, error); 1903 goto disconnect_vp; 1904 } 1905 if (nso->so_family == AF_INET) { 1906 sin_t *sin; 1907 1908 sin = (sin_t *)(ack_mp->b_rptr + 1909 sizeof (struct T_ok_ack)); 1910 bcopy(sin, nsti->sti_laddr_sa, sizeof (sin_t)); 1911 nsti->sti_laddr_len = sizeof (sin_t); 1912 } else { 1913 sin6_t *sin6; 1914 1915 sin6 = (sin6_t *)(ack_mp->b_rptr + 1916 sizeof (struct T_ok_ack)); 1917 bcopy(sin6, nsti->sti_laddr_sa, sizeof (sin6_t)); 1918 nsti->sti_laddr_len = sizeof (sin6_t); 1919 } 1920 freemsg(ack_mp); 1921 1922 nso->so_state |= SS_ISCONNECTED; 1923 nso->so_proto_handle = (sock_lower_handle_t)opt; 1924 nsti->sti_laddr_valid = 1; 1925 1926 mutex_exit(&nso->so_lock); 1927 1928 /* 1929 * It's possible, through the use of autopush for example, 1930 * that the acceptor stream may not support sti_direct 1931 * semantics. If the new socket does not support sti_direct 1932 * we issue a _SIOCSOCKFALLBACK to inform the transport 1933 * as we would in the I_PUSH case. 1934 */ 1935 if (nsti->sti_direct == 0) { 1936 int rval; 1937 1938 if ((error = strioctl(SOTOV(nso), _SIOCSOCKFALLBACK, 1939 0, 0, K_TO_K, cr, &rval)) != 0) { 1940 mutex_enter(&so->so_lock); 1941 so_lock_single(so); 1942 eprintsoline(so, error); 1943 goto disconnect_vp; 1944 } 1945 } 1946 1947 /* 1948 * Pass out new socket. 1949 */ 1950 if (nsop != NULL) 1951 *nsop = nso; 1952 1953 return (0); 1954 } 1955 1956 /* 1957 * This is the non-performance case for sockets (e.g. AF_UNIX sockets) 1958 * which don't support the FireEngine accept fast-path. It is also 1959 * used when the virtual "sockmod" has been I_POP'd and I_PUSH'd 1960 * again. Neither sockfs nor TCP attempt to find out if some other 1961 * random module has been inserted in between (in which case we 1962 * should follow TLI accept behaviour). We blindly assume the worst 1963 * case and revert back to old behaviour i.e. TCP will not send us 1964 * any option (eager) and the accept should happen on the listener 1965 * queue. Any queued T_conn_ind have already got their options removed 1966 * by so_sock2_stream() when "sockmod" was I_POP'd. 1967 */ 1968 /* 1969 * Fill in the {O_}T_CONN_RES before getting SOLOCKED. 1970 */ 1971 if ((nso->so_mode & SM_ACCEPTOR_ID) == 0) { 1972 #ifdef _ILP32 1973 queue_t *q; 1974 1975 /* 1976 * Find read queue in driver 1977 * Can safely do this since we "own" nso/nvp. 1978 */ 1979 q = strvp2wq(nvp)->q_next; 1980 while (SAMESTR(q)) 1981 q = q->q_next; 1982 q = RD(q); 1983 conn_res->ACCEPTOR_id = (t_uscalar_t)q; 1984 #else 1985 conn_res->ACCEPTOR_id = (t_uscalar_t)getminor(nvp->v_rdev); 1986 #endif /* _ILP32 */ 1987 conn_res->PRIM_type = O_T_CONN_RES; 1988 PRIM_type = O_T_CONN_RES; 1989 } else { 1990 conn_res->ACCEPTOR_id = nsti->sti_acceptor_id; 1991 conn_res->PRIM_type = T_CONN_RES; 1992 PRIM_type = T_CONN_RES; 1993 } 1994 conn_res->SEQ_number = SEQ_number; 1995 conn_res->OPT_length = 0; 1996 conn_res->OPT_offset = 0; 1997 1998 mutex_enter(&so->so_lock); 1999 so_lock_single(so); /* Set SOLOCKED */ 2000 mutex_exit(&so->so_lock); 2001 2002 error = kstrputmsg(SOTOV(so), mp, NULL, 2003 0, 0, MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 2004 mutex_enter(&so->so_lock); 2005 if (error) { 2006 eprintsoline(so, error); 2007 goto disconnect_vp; 2008 } 2009 error = sowaitprim(so, PRIM_type, T_OK_ACK, 2010 (t_uscalar_t)sizeof (struct T_ok_ack), &ack_mp, 0); 2011 if (error) { 2012 eprintsoline(so, error); 2013 goto disconnect_vp; 2014 } 2015 mutex_exit(&so->so_lock); 2016 /* 2017 * If there is a sin/sin6 appended onto the T_OK_ACK use 2018 * that to set the local address. If this is not present 2019 * then we zero out the address and don't set the 2020 * sti_laddr_valid bit. For AF_UNIX endpoints we copy over 2021 * the pathname from the listening socket. 2022 * In the case where this is TCP or an AF_UNIX socket the 2023 * client side may have queued data or a T_ORDREL in the 2024 * transport. Having now sent the T_CONN_RES we may receive 2025 * those queued messages at any time. Hold the acceptor 2026 * so_lock until its state and laddr are finalized. 2027 */ 2028 mutex_enter(&nso->so_lock); 2029 sinlen = (nso->so_family == AF_INET) ? sizeof (sin_t) : sizeof (sin6_t); 2030 if ((nso->so_family == AF_INET) || (nso->so_family == AF_INET6) && 2031 MBLKL(ack_mp) == (sizeof (struct T_ok_ack) + sinlen)) { 2032 ack_mp->b_rptr += sizeof (struct T_ok_ack); 2033 bcopy(ack_mp->b_rptr, nsti->sti_laddr_sa, sinlen); 2034 nsti->sti_laddr_len = sinlen; 2035 nsti->sti_laddr_valid = 1; 2036 } else if (nso->so_family == AF_UNIX) { 2037 ASSERT(so->so_family == AF_UNIX); 2038 nsti->sti_laddr_len = sti->sti_laddr_len; 2039 ASSERT(nsti->sti_laddr_len <= nsti->sti_laddr_maxlen); 2040 bcopy(sti->sti_laddr_sa, nsti->sti_laddr_sa, 2041 nsti->sti_laddr_len); 2042 nsti->sti_laddr_valid = 1; 2043 } else { 2044 nsti->sti_laddr_len = sti->sti_laddr_len; 2045 ASSERT(nsti->sti_laddr_len <= nsti->sti_laddr_maxlen); 2046 bzero(nsti->sti_laddr_sa, nsti->sti_addr_size); 2047 nsti->sti_laddr_sa->sa_family = nso->so_family; 2048 } 2049 nso->so_state |= SS_ISCONNECTED; 2050 mutex_exit(&nso->so_lock); 2051 2052 freemsg(ack_mp); 2053 2054 mutex_enter(&so->so_lock); 2055 so_unlock_single(so, SOLOCKED); 2056 mutex_exit(&so->so_lock); 2057 2058 /* 2059 * Pass out new socket. 2060 */ 2061 if (nsop != NULL) 2062 *nsop = nso; 2063 2064 return (0); 2065 2066 2067 eproto_disc_unl: 2068 error = EPROTO; 2069 e_disc_unl: 2070 eprintsoline(so, error); 2071 goto disconnect_unlocked; 2072 2073 pr_disc_vp_unl: 2074 eprintsoline(so, error); 2075 disconnect_vp_unlocked: 2076 (void) VOP_CLOSE(nvp, 0, 1, 0, cr, NULL); 2077 VN_RELE(nvp); 2078 disconnect_unlocked: 2079 (void) sodisconnect(so, SEQ_number, 0); 2080 return (error); 2081 2082 pr_disc_vp: 2083 eprintsoline(so, error); 2084 disconnect_vp: 2085 (void) sodisconnect(so, SEQ_number, _SODISCONNECT_LOCK_HELD); 2086 so_unlock_single(so, SOLOCKED); 2087 mutex_exit(&so->so_lock); 2088 (void) VOP_CLOSE(nvp, 0, 1, 0, cr, NULL); 2089 VN_RELE(nvp); 2090 return (error); 2091 2092 conn_bad: /* Note: SunOS 4/BSD unconditionally returns EINVAL here */ 2093 error = (so->so_type == SOCK_DGRAM || so->so_type == SOCK_RAW) 2094 ? EOPNOTSUPP : EINVAL; 2095 e_bad: 2096 eprintsoline(so, error); 2097 return (error); 2098 } 2099 2100 /* 2101 * connect a socket. 2102 * 2103 * Allow SOCK_DGRAM sockets to reconnect (by specifying a new address) and to 2104 * unconnect (by specifying a null address). 2105 */ 2106 int 2107 sotpi_connect(struct sonode *so, 2108 struct sockaddr *name, 2109 socklen_t namelen, 2110 int fflag, 2111 int flags, 2112 struct cred *cr) 2113 { 2114 struct T_conn_req conn_req; 2115 int error = 0; 2116 mblk_t *mp; 2117 void *src; 2118 socklen_t srclen; 2119 void *addr; 2120 socklen_t addrlen; 2121 boolean_t need_unlock; 2122 sotpi_info_t *sti = SOTOTPI(so); 2123 2124 dprintso(so, 1, ("sotpi_connect(%p, %p, %d, 0x%x, 0x%x) %s\n", 2125 (void *)so, (void *)name, namelen, fflag, flags, 2126 pr_state(so->so_state, so->so_mode))); 2127 2128 /* 2129 * Preallocate the T_CONN_REQ mblk before grabbing SOLOCKED to 2130 * avoid sleeping for memory with SOLOCKED held. 2131 * We know that the T_CONN_REQ can't be larger than 2 * sti_faddr_maxlen 2132 * + sizeof (struct T_opthdr). 2133 * (the AF_UNIX so_ux_addr_xlate() does not make the address 2134 * exceed sti_faddr_maxlen). 2135 */ 2136 mp = soallocproto(sizeof (struct T_conn_req) + 2137 2 * sti->sti_faddr_maxlen + sizeof (struct T_opthdr), _ALLOC_INTR, 2138 cr); 2139 if (mp == NULL) { 2140 /* 2141 * Connect can not fail with ENOBUFS. A signal was 2142 * caught so return EINTR. 2143 */ 2144 error = EINTR; 2145 eprintsoline(so, error); 2146 return (error); 2147 } 2148 2149 mutex_enter(&so->so_lock); 2150 /* 2151 * Make sure there is a preallocated T_unbind_req message 2152 * before any binding. This message is allocated when the 2153 * socket is created. Since another thread can consume 2154 * so_unbind_mp by the time we return from so_lock_single(), 2155 * we should check the availability of so_unbind_mp after 2156 * we return from so_lock_single(). 2157 */ 2158 2159 so_lock_single(so); /* Set SOLOCKED */ 2160 need_unlock = B_TRUE; 2161 2162 if (sti->sti_unbind_mp == NULL) { 2163 dprintso(so, 1, ("sotpi_connect: allocating unbind_req\n")); 2164 /* NOTE: holding so_lock while sleeping */ 2165 sti->sti_unbind_mp = 2166 soallocproto(sizeof (struct T_unbind_req), _ALLOC_INTR, cr); 2167 if (sti->sti_unbind_mp == NULL) { 2168 error = EINTR; 2169 goto done; 2170 } 2171 } 2172 2173 /* 2174 * Can't have done a listen before connecting. 2175 */ 2176 if (so->so_state & SS_ACCEPTCONN) { 2177 error = EOPNOTSUPP; 2178 goto done; 2179 } 2180 2181 /* 2182 * Must be bound with the transport 2183 */ 2184 if (!(so->so_state & SS_ISBOUND)) { 2185 if ((so->so_family == AF_INET || so->so_family == AF_INET6) && 2186 /*CONSTCOND*/ 2187 so->so_type == SOCK_STREAM && !soconnect_tpi_tcp) { 2188 /* 2189 * Optimization for AF_INET{,6} transports 2190 * that can handle a T_CONN_REQ without being bound. 2191 */ 2192 so_automatic_bind(so); 2193 } else { 2194 error = sotpi_bind(so, NULL, 0, 2195 _SOBIND_UNSPEC|_SOBIND_LOCK_HELD, cr); 2196 if (error) 2197 goto done; 2198 } 2199 ASSERT(so->so_state & SS_ISBOUND); 2200 flags |= _SOCONNECT_DID_BIND; 2201 } 2202 2203 /* 2204 * Handle a connect to a name parameter of type AF_UNSPEC like a 2205 * connect to a null address. This is the portable method to 2206 * unconnect a socket. 2207 */ 2208 if ((namelen >= sizeof (sa_family_t)) && 2209 (name->sa_family == AF_UNSPEC)) { 2210 name = NULL; 2211 namelen = 0; 2212 } 2213 2214 /* 2215 * Check that we are not already connected. 2216 * A connection-oriented socket cannot be reconnected. 2217 * A connected connection-less socket can be 2218 * - connected to a different address by a subsequent connect 2219 * - "unconnected" by a connect to the NULL address 2220 */ 2221 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) { 2222 ASSERT(!(flags & _SOCONNECT_DID_BIND)); 2223 if (so->so_mode & SM_CONNREQUIRED) { 2224 /* Connection-oriented socket */ 2225 error = so->so_state & SS_ISCONNECTED ? 2226 EISCONN : EALREADY; 2227 goto done; 2228 } 2229 /* Connection-less socket */ 2230 if (name == NULL) { 2231 /* 2232 * Remove the connected state and clear SO_DGRAM_ERRIND 2233 * since it was set when the socket was connected. 2234 * If this is UDP also send down a T_DISCON_REQ. 2235 */ 2236 int val; 2237 2238 if ((so->so_family == AF_INET || 2239 so->so_family == AF_INET6) && 2240 (so->so_type == SOCK_DGRAM || 2241 so->so_type == SOCK_RAW) && 2242 /*CONSTCOND*/ 2243 !soconnect_tpi_udp) { 2244 /* XXX What about implicitly unbinding here? */ 2245 error = sodisconnect(so, -1, 2246 _SODISCONNECT_LOCK_HELD); 2247 } else { 2248 so->so_state &= 2249 ~(SS_ISCONNECTED | SS_ISCONNECTING); 2250 sti->sti_faddr_valid = 0; 2251 sti->sti_faddr_len = 0; 2252 } 2253 2254 /* Remove SOLOCKED since setsockopt will grab it */ 2255 so_unlock_single(so, SOLOCKED); 2256 mutex_exit(&so->so_lock); 2257 2258 val = 0; 2259 (void) sotpi_setsockopt(so, SOL_SOCKET, 2260 SO_DGRAM_ERRIND, &val, (t_uscalar_t)sizeof (val), 2261 cr); 2262 2263 mutex_enter(&so->so_lock); 2264 so_lock_single(so); /* Set SOLOCKED */ 2265 goto done; 2266 } 2267 } 2268 ASSERT(so->so_state & SS_ISBOUND); 2269 2270 if (name == NULL || namelen == 0) { 2271 error = EINVAL; 2272 goto done; 2273 } 2274 /* 2275 * Mark the socket if sti_faddr_sa represents the transport level 2276 * address. 2277 */ 2278 if (flags & _SOCONNECT_NOXLATE) { 2279 struct sockaddr_ux *soaddr_ux; 2280 2281 ASSERT(so->so_family == AF_UNIX); 2282 if (namelen != sizeof (struct sockaddr_ux)) { 2283 error = EINVAL; 2284 goto done; 2285 } 2286 soaddr_ux = (struct sockaddr_ux *)name; 2287 name = (struct sockaddr *)&soaddr_ux->sou_addr; 2288 namelen = sizeof (soaddr_ux->sou_addr); 2289 sti->sti_faddr_noxlate = 1; 2290 } 2291 2292 /* 2293 * Length and family checks. 2294 */ 2295 error = so_addr_verify(so, name, namelen); 2296 if (error) 2297 goto bad; 2298 2299 /* 2300 * Save foreign address. Needed for AF_UNIX as well as 2301 * transport providers that do not support TI_GETPEERNAME. 2302 * Also used for cached foreign address for TCP and UDP. 2303 */ 2304 if (namelen > (t_uscalar_t)sti->sti_faddr_maxlen) { 2305 error = EINVAL; 2306 goto done; 2307 } 2308 sti->sti_faddr_len = (socklen_t)namelen; 2309 ASSERT(sti->sti_faddr_len <= sti->sti_faddr_maxlen); 2310 bcopy(name, sti->sti_faddr_sa, namelen); 2311 sti->sti_faddr_valid = 1; 2312 2313 if (so->so_family == AF_UNIX) { 2314 if (sti->sti_faddr_noxlate) { 2315 /* 2316 * sti_faddr is a transport-level address, so 2317 * don't pass it as an option. Do save it in 2318 * sti_ux_faddr, used for connected DG send. 2319 */ 2320 src = NULL; 2321 srclen = 0; 2322 addr = sti->sti_faddr_sa; 2323 addrlen = (t_uscalar_t)sti->sti_faddr_len; 2324 bcopy(addr, &sti->sti_ux_faddr, 2325 sizeof (sti->sti_ux_faddr)); 2326 } else { 2327 /* 2328 * Pass the sockaddr_un source address as an option 2329 * and translate the remote address. 2330 * Holding so_lock thus sti_laddr_sa can not change. 2331 */ 2332 src = sti->sti_laddr_sa; 2333 srclen = (t_uscalar_t)sti->sti_laddr_len; 2334 dprintso(so, 1, 2335 ("sotpi_connect UNIX: srclen %d, src %p\n", 2336 srclen, src)); 2337 /* 2338 * Translate the destination address into our 2339 * internal form, and save it in sti_ux_faddr. 2340 * After this call, addr==&sti->sti_ux_taddr, 2341 * and we copy that to sti->sti_ux_faddr so 2342 * we save the connected peer address. 2343 */ 2344 error = so_ux_addr_xlate(so, 2345 sti->sti_faddr_sa, (socklen_t)sti->sti_faddr_len, 2346 (flags & _SOCONNECT_XPG4_2), 2347 &addr, &addrlen); 2348 if (error) 2349 goto bad; 2350 bcopy(&sti->sti_ux_taddr, &sti->sti_ux_faddr, 2351 sizeof (sti->sti_ux_faddr)); 2352 } 2353 } else { 2354 addr = sti->sti_faddr_sa; 2355 addrlen = (t_uscalar_t)sti->sti_faddr_len; 2356 src = NULL; 2357 srclen = 0; 2358 } 2359 /* 2360 * When connecting a datagram socket we issue the SO_DGRAM_ERRIND 2361 * option which asks the transport provider to send T_UDERR_IND 2362 * messages. These T_UDERR_IND messages are used to return connected 2363 * style errors (e.g. ECONNRESET) for connected datagram sockets. 2364 * 2365 * In addition, for UDP (and SOCK_RAW AF_INET{,6} sockets) 2366 * we send down a T_CONN_REQ. This is needed to let the 2367 * transport assign a local address that is consistent with 2368 * the remote address. Applications depend on a getsockname() 2369 * after a connect() to retrieve the "source" IP address for 2370 * the connected socket. Invalidate the cached local address 2371 * to force getsockname() to enquire of the transport. 2372 */ 2373 if (!(so->so_mode & SM_CONNREQUIRED)) { 2374 /* 2375 * Datagram socket. 2376 */ 2377 int32_t val; 2378 2379 so_unlock_single(so, SOLOCKED); 2380 mutex_exit(&so->so_lock); 2381 2382 val = 1; 2383 (void) sotpi_setsockopt(so, SOL_SOCKET, SO_DGRAM_ERRIND, 2384 &val, (t_uscalar_t)sizeof (val), cr); 2385 2386 mutex_enter(&so->so_lock); 2387 so_lock_single(so); /* Set SOLOCKED */ 2388 if ((so->so_family != AF_INET && so->so_family != AF_INET6) || 2389 (so->so_type != SOCK_DGRAM && so->so_type != SOCK_RAW) || 2390 soconnect_tpi_udp) { 2391 soisconnected(so); 2392 goto done; 2393 } 2394 /* 2395 * Send down T_CONN_REQ etc. 2396 * Clear fflag to avoid returning EWOULDBLOCK. 2397 */ 2398 fflag = 0; 2399 ASSERT(so->so_family != AF_UNIX); 2400 sti->sti_laddr_valid = 0; 2401 } else if (sti->sti_laddr_len != 0) { 2402 /* 2403 * If the local address or port was "any" then it may be 2404 * changed by the transport as a result of the 2405 * connect. Invalidate the cached version if we have one. 2406 */ 2407 switch (so->so_family) { 2408 case AF_INET: 2409 ASSERT(sti->sti_laddr_len == (socklen_t)sizeof (sin_t)); 2410 if (((sin_t *)sti->sti_laddr_sa)->sin_addr.s_addr == 2411 INADDR_ANY || 2412 ((sin_t *)sti->sti_laddr_sa)->sin_port == 0) 2413 sti->sti_laddr_valid = 0; 2414 break; 2415 2416 case AF_INET6: 2417 ASSERT(sti->sti_laddr_len == 2418 (socklen_t)sizeof (sin6_t)); 2419 if (IN6_IS_ADDR_UNSPECIFIED( 2420 &((sin6_t *)sti->sti_laddr_sa) ->sin6_addr) || 2421 IN6_IS_ADDR_V4MAPPED_ANY( 2422 &((sin6_t *)sti->sti_laddr_sa)->sin6_addr) || 2423 ((sin6_t *)sti->sti_laddr_sa)->sin6_port == 0) 2424 sti->sti_laddr_valid = 0; 2425 break; 2426 2427 default: 2428 break; 2429 } 2430 } 2431 2432 /* 2433 * Check for failure of an earlier call 2434 */ 2435 if (so->so_error != 0) 2436 goto so_bad; 2437 2438 /* 2439 * Send down T_CONN_REQ. Message was allocated above. 2440 */ 2441 conn_req.PRIM_type = T_CONN_REQ; 2442 conn_req.DEST_length = addrlen; 2443 conn_req.DEST_offset = (t_scalar_t)sizeof (conn_req); 2444 if (srclen == 0) { 2445 conn_req.OPT_length = 0; 2446 conn_req.OPT_offset = 0; 2447 soappendmsg(mp, &conn_req, sizeof (conn_req)); 2448 soappendmsg(mp, addr, addrlen); 2449 } else { 2450 /* 2451 * There is a AF_UNIX sockaddr_un to include as a source 2452 * address option. 2453 */ 2454 struct T_opthdr toh; 2455 2456 toh.level = SOL_SOCKET; 2457 toh.name = SO_SRCADDR; 2458 toh.len = (t_uscalar_t)(srclen + sizeof (struct T_opthdr)); 2459 toh.status = 0; 2460 conn_req.OPT_length = 2461 (t_scalar_t)(sizeof (toh) + _TPI_ALIGN_TOPT(srclen)); 2462 conn_req.OPT_offset = (t_scalar_t)(sizeof (conn_req) + 2463 _TPI_ALIGN_TOPT(addrlen)); 2464 2465 soappendmsg(mp, &conn_req, sizeof (conn_req)); 2466 soappendmsg(mp, addr, addrlen); 2467 mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen; 2468 soappendmsg(mp, &toh, sizeof (toh)); 2469 soappendmsg(mp, src, srclen); 2470 mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen; 2471 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 2472 } 2473 /* 2474 * Set SS_ISCONNECTING before sending down the T_CONN_REQ 2475 * in order to have the right state when the T_CONN_CON shows up. 2476 */ 2477 soisconnecting(so); 2478 mutex_exit(&so->so_lock); 2479 2480 if (AU_AUDITING()) 2481 audit_sock(T_CONN_REQ, strvp2wq(SOTOV(so)), mp, 0); 2482 2483 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 2484 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0); 2485 mp = NULL; 2486 mutex_enter(&so->so_lock); 2487 if (error != 0) 2488 goto bad; 2489 2490 if ((error = sowaitokack(so, T_CONN_REQ)) != 0) 2491 goto bad; 2492 2493 /* Allow other threads to access the socket */ 2494 so_unlock_single(so, SOLOCKED); 2495 need_unlock = B_FALSE; 2496 2497 /* 2498 * Wait until we get a T_CONN_CON or an error 2499 */ 2500 if ((error = sowaitconnected(so, fflag, 0)) != 0) { 2501 so_lock_single(so); /* Set SOLOCKED */ 2502 need_unlock = B_TRUE; 2503 } 2504 2505 done: 2506 freemsg(mp); 2507 switch (error) { 2508 case EINPROGRESS: 2509 case EALREADY: 2510 case EISCONN: 2511 case EINTR: 2512 /* Non-fatal errors */ 2513 sti->sti_laddr_valid = 0; 2514 /* FALLTHRU */ 2515 case 0: 2516 break; 2517 default: 2518 ASSERT(need_unlock); 2519 /* 2520 * Fatal errors: clear SS_ISCONNECTING in case it was set, 2521 * and invalidate local-address cache 2522 */ 2523 so->so_state &= ~SS_ISCONNECTING; 2524 sti->sti_laddr_valid = 0; 2525 /* A discon_ind might have already unbound us */ 2526 if ((flags & _SOCONNECT_DID_BIND) && 2527 (so->so_state & SS_ISBOUND)) { 2528 int err; 2529 2530 err = sotpi_unbind(so, 0); 2531 /* LINTED - statement has no conseq */ 2532 if (err) { 2533 eprintsoline(so, err); 2534 } 2535 } 2536 break; 2537 } 2538 if (need_unlock) 2539 so_unlock_single(so, SOLOCKED); 2540 mutex_exit(&so->so_lock); 2541 return (error); 2542 2543 so_bad: error = sogeterr(so, B_TRUE); 2544 bad: eprintsoline(so, error); 2545 goto done; 2546 } 2547 2548 /* ARGSUSED */ 2549 int 2550 sotpi_shutdown(struct sonode *so, int how, struct cred *cr) 2551 { 2552 struct T_ordrel_req ordrel_req; 2553 mblk_t *mp; 2554 uint_t old_state, state_change; 2555 int error = 0; 2556 sotpi_info_t *sti = SOTOTPI(so); 2557 2558 dprintso(so, 1, ("sotpi_shutdown(%p, %d) %s\n", 2559 (void *)so, how, pr_state(so->so_state, so->so_mode))); 2560 2561 mutex_enter(&so->so_lock); 2562 so_lock_single(so); /* Set SOLOCKED */ 2563 2564 /* 2565 * SunOS 4.X has no check for datagram sockets. 2566 * 5.X checks that it is connected (ENOTCONN) 2567 * X/Open requires that we check the connected state. 2568 */ 2569 if (!(so->so_state & SS_ISCONNECTED)) { 2570 if (!xnet_skip_checks) { 2571 error = ENOTCONN; 2572 if (xnet_check_print) { 2573 printf("sockfs: X/Open shutdown check " 2574 "caused ENOTCONN\n"); 2575 } 2576 } 2577 goto done; 2578 } 2579 /* 2580 * Record the current state and then perform any state changes. 2581 * Then use the difference between the old and new states to 2582 * determine which messages need to be sent. 2583 * This prevents e.g. duplicate T_ORDREL_REQ when there are 2584 * duplicate calls to shutdown(). 2585 */ 2586 old_state = so->so_state; 2587 2588 switch (how) { 2589 case 0: 2590 socantrcvmore(so); 2591 break; 2592 case 1: 2593 socantsendmore(so); 2594 break; 2595 case 2: 2596 socantsendmore(so); 2597 socantrcvmore(so); 2598 break; 2599 default: 2600 error = EINVAL; 2601 goto done; 2602 } 2603 2604 /* 2605 * Assumes that the SS_CANT* flags are never cleared in the above code. 2606 */ 2607 state_change = (so->so_state & (SS_CANTRCVMORE|SS_CANTSENDMORE)) - 2608 (old_state & (SS_CANTRCVMORE|SS_CANTSENDMORE)); 2609 ASSERT((state_change & ~(SS_CANTRCVMORE|SS_CANTSENDMORE)) == 0); 2610 2611 switch (state_change) { 2612 case 0: 2613 dprintso(so, 1, 2614 ("sotpi_shutdown: nothing to send in state 0x%x\n", 2615 so->so_state)); 2616 goto done; 2617 2618 case SS_CANTRCVMORE: 2619 mutex_exit(&so->so_lock); 2620 strseteof(SOTOV(so), 1); 2621 /* 2622 * strseteof takes care of read side wakeups, 2623 * pollwakeups, and signals. 2624 */ 2625 /* 2626 * Get the read lock before flushing data to avoid problems 2627 * with the T_EXDATA_IND MSG_PEEK code in sotpi_recvmsg. 2628 */ 2629 mutex_enter(&so->so_lock); 2630 (void) so_lock_read(so, 0); /* Set SOREADLOCKED */ 2631 mutex_exit(&so->so_lock); 2632 2633 /* Flush read side queue */ 2634 strflushrq(SOTOV(so), FLUSHALL); 2635 2636 mutex_enter(&so->so_lock); 2637 so_unlock_read(so); /* Clear SOREADLOCKED */ 2638 break; 2639 2640 case SS_CANTSENDMORE: 2641 mutex_exit(&so->so_lock); 2642 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 2643 mutex_enter(&so->so_lock); 2644 break; 2645 2646 case SS_CANTSENDMORE|SS_CANTRCVMORE: 2647 mutex_exit(&so->so_lock); 2648 strsetwerror(SOTOV(so), 0, 0, sogetwrerr); 2649 strseteof(SOTOV(so), 1); 2650 /* 2651 * strseteof takes care of read side wakeups, 2652 * pollwakeups, and signals. 2653 */ 2654 /* 2655 * Get the read lock before flushing data to avoid problems 2656 * with the T_EXDATA_IND MSG_PEEK code in sotpi_recvmsg. 2657 */ 2658 mutex_enter(&so->so_lock); 2659 (void) so_lock_read(so, 0); /* Set SOREADLOCKED */ 2660 mutex_exit(&so->so_lock); 2661 2662 /* Flush read side queue */ 2663 strflushrq(SOTOV(so), FLUSHALL); 2664 2665 mutex_enter(&so->so_lock); 2666 so_unlock_read(so); /* Clear SOREADLOCKED */ 2667 break; 2668 } 2669 2670 ASSERT(MUTEX_HELD(&so->so_lock)); 2671 2672 /* 2673 * If either SS_CANTSENDMORE or SS_CANTRCVMORE or both of them 2674 * was set due to this call and the new state has both of them set: 2675 * Send the AF_UNIX close indication 2676 * For T_COTS send a discon_ind 2677 * 2678 * If cantsend was set due to this call: 2679 * For T_COTSORD send an ordrel_ind 2680 * 2681 * Note that for T_CLTS there is no message sent here. 2682 */ 2683 if ((so->so_state & (SS_CANTRCVMORE|SS_CANTSENDMORE)) == 2684 (SS_CANTRCVMORE|SS_CANTSENDMORE)) { 2685 /* 2686 * For SunOS 4.X compatibility we tell the other end 2687 * that we are unable to receive at this point. 2688 */ 2689 if (so->so_family == AF_UNIX && sti->sti_serv_type != T_CLTS) 2690 so_unix_close(so); 2691 2692 if (sti->sti_serv_type == T_COTS) 2693 error = sodisconnect(so, -1, _SODISCONNECT_LOCK_HELD); 2694 } 2695 if ((state_change & SS_CANTSENDMORE) && 2696 (sti->sti_serv_type == T_COTS_ORD)) { 2697 /* Send an orderly release */ 2698 ordrel_req.PRIM_type = T_ORDREL_REQ; 2699 2700 mutex_exit(&so->so_lock); 2701 mp = soallocproto1(&ordrel_req, sizeof (ordrel_req), 2702 0, _ALLOC_SLEEP, cr); 2703 /* 2704 * Send down the T_ORDREL_REQ even if there is flow control. 2705 * This prevents shutdown from blocking. 2706 * Note that there is no T_OK_ACK for ordrel_req. 2707 */ 2708 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 2709 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 2710 mutex_enter(&so->so_lock); 2711 if (error) { 2712 eprintsoline(so, error); 2713 goto done; 2714 } 2715 } 2716 2717 done: 2718 so_unlock_single(so, SOLOCKED); 2719 mutex_exit(&so->so_lock); 2720 return (error); 2721 } 2722 2723 /* 2724 * For any connected SOCK_STREAM/SOCK_SEQPACKET AF_UNIX socket we send 2725 * a zero-length T_OPTDATA_REQ with the SO_UNIX_CLOSE option to inform the peer 2726 * that we have closed. 2727 * Also, for connected AF_UNIX SOCK_DGRAM sockets we send a zero-length 2728 * T_UNITDATA_REQ containing the same option. 2729 * 2730 * For SOCK_DGRAM half-connections (somebody connected to this end 2731 * but this end is not connect) we don't know where to send any 2732 * SO_UNIX_CLOSE. 2733 * 2734 * We have to ignore stream head errors just in case there has been 2735 * a shutdown(output). 2736 * Ignore any flow control to try to get the message more quickly to the peer. 2737 * While locally ignoring flow control solves the problem when there 2738 * is only the loopback transport on the stream it would not provide 2739 * the correct AF_UNIX socket semantics when one or more modules have 2740 * been pushed. 2741 */ 2742 void 2743 so_unix_close(struct sonode *so) 2744 { 2745 struct T_opthdr toh; 2746 mblk_t *mp; 2747 sotpi_info_t *sti = SOTOTPI(so); 2748 2749 ASSERT(MUTEX_HELD(&so->so_lock)); 2750 2751 ASSERT(so->so_family == AF_UNIX); 2752 2753 if ((so->so_state & (SS_ISCONNECTED|SS_ISBOUND)) != 2754 (SS_ISCONNECTED|SS_ISBOUND)) 2755 return; 2756 2757 dprintso(so, 1, ("so_unix_close(%p) %s\n", 2758 (void *)so, pr_state(so->so_state, so->so_mode))); 2759 2760 toh.level = SOL_SOCKET; 2761 toh.name = SO_UNIX_CLOSE; 2762 2763 /* zero length + header */ 2764 toh.len = (t_uscalar_t)sizeof (struct T_opthdr); 2765 toh.status = 0; 2766 2767 if (so->so_type == SOCK_STREAM || so->so_type == SOCK_SEQPACKET) { 2768 struct T_optdata_req tdr; 2769 2770 tdr.PRIM_type = T_OPTDATA_REQ; 2771 tdr.DATA_flag = 0; 2772 2773 tdr.OPT_length = (t_scalar_t)sizeof (toh); 2774 tdr.OPT_offset = (t_scalar_t)sizeof (tdr); 2775 2776 /* NOTE: holding so_lock while sleeping */ 2777 mp = soallocproto2(&tdr, sizeof (tdr), 2778 &toh, sizeof (toh), 0, _ALLOC_SLEEP, CRED()); 2779 } else { 2780 struct T_unitdata_req tudr; 2781 void *addr; 2782 socklen_t addrlen; 2783 void *src; 2784 socklen_t srclen; 2785 struct T_opthdr toh2; 2786 t_scalar_t size; 2787 2788 /* 2789 * We know this is an AF_UNIX connected DGRAM socket. 2790 * We therefore already have the destination address 2791 * in the internal form needed for this send. This is 2792 * similar to the sosend_dgram call later in this file 2793 * when there's no user-specified destination address. 2794 */ 2795 if (sti->sti_faddr_noxlate) { 2796 /* 2797 * Already have a transport internal address. Do not 2798 * pass any (transport internal) source address. 2799 */ 2800 addr = sti->sti_faddr_sa; 2801 addrlen = (t_uscalar_t)sti->sti_faddr_len; 2802 src = NULL; 2803 srclen = 0; 2804 } else { 2805 /* 2806 * Pass the sockaddr_un source address as an option 2807 * and translate the remote address. 2808 * Holding so_lock thus sti_laddr_sa can not change. 2809 */ 2810 src = sti->sti_laddr_sa; 2811 srclen = (socklen_t)sti->sti_laddr_len; 2812 dprintso(so, 1, 2813 ("so_ux_close: srclen %d, src %p\n", 2814 srclen, src)); 2815 /* 2816 * Use the destination address saved in connect. 2817 */ 2818 addr = &sti->sti_ux_faddr; 2819 addrlen = sizeof (sti->sti_ux_faddr); 2820 } 2821 tudr.PRIM_type = T_UNITDATA_REQ; 2822 tudr.DEST_length = addrlen; 2823 tudr.DEST_offset = (t_scalar_t)sizeof (tudr); 2824 if (srclen == 0) { 2825 tudr.OPT_length = (t_scalar_t)sizeof (toh); 2826 tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) + 2827 _TPI_ALIGN_TOPT(addrlen)); 2828 2829 size = tudr.OPT_offset + tudr.OPT_length; 2830 /* NOTE: holding so_lock while sleeping */ 2831 mp = soallocproto2(&tudr, sizeof (tudr), 2832 addr, addrlen, size, _ALLOC_SLEEP, CRED()); 2833 mp->b_wptr += (_TPI_ALIGN_TOPT(addrlen) - addrlen); 2834 soappendmsg(mp, &toh, sizeof (toh)); 2835 } else { 2836 /* 2837 * There is a AF_UNIX sockaddr_un to include as a 2838 * source address option. 2839 */ 2840 tudr.OPT_length = (t_scalar_t)(2 * sizeof (toh) + 2841 _TPI_ALIGN_TOPT(srclen)); 2842 tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) + 2843 _TPI_ALIGN_TOPT(addrlen)); 2844 2845 toh2.level = SOL_SOCKET; 2846 toh2.name = SO_SRCADDR; 2847 toh2.len = (t_uscalar_t)(srclen + 2848 sizeof (struct T_opthdr)); 2849 toh2.status = 0; 2850 2851 size = tudr.OPT_offset + tudr.OPT_length; 2852 2853 /* NOTE: holding so_lock while sleeping */ 2854 mp = soallocproto2(&tudr, sizeof (tudr), 2855 addr, addrlen, size, _ALLOC_SLEEP, CRED()); 2856 mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen; 2857 soappendmsg(mp, &toh, sizeof (toh)); 2858 soappendmsg(mp, &toh2, sizeof (toh2)); 2859 soappendmsg(mp, src, srclen); 2860 mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen; 2861 } 2862 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 2863 } 2864 mutex_exit(&so->so_lock); 2865 (void) kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 2866 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 2867 mutex_enter(&so->so_lock); 2868 } 2869 2870 /* 2871 * Called by sotpi_recvmsg when reading a non-zero amount of data. 2872 * In addition, the caller typically verifies that there is some 2873 * potential state to clear by checking 2874 * if (so->so_state & (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) 2875 * before calling this routine. 2876 * Note that such a check can be made without holding so_lock since 2877 * sotpi_recvmsg is single-threaded (using SOREADLOCKED) and only sotpi_recvmsg 2878 * decrements sti_oobsigcnt. 2879 * 2880 * When data is read *after* the point that all pending 2881 * oob data has been consumed the oob indication is cleared. 2882 * 2883 * This logic keeps select/poll returning POLLRDBAND and 2884 * SIOCATMARK returning true until we have read past 2885 * the mark. 2886 */ 2887 static void 2888 sorecv_update_oobstate(struct sonode *so) 2889 { 2890 sotpi_info_t *sti = SOTOTPI(so); 2891 2892 mutex_enter(&so->so_lock); 2893 ASSERT(so_verify_oobstate(so)); 2894 dprintso(so, 1, 2895 ("sorecv_update_oobstate: counts %d/%d state %s\n", 2896 sti->sti_oobsigcnt, 2897 sti->sti_oobcnt, pr_state(so->so_state, so->so_mode))); 2898 if (sti->sti_oobsigcnt == 0) { 2899 /* No more pending oob indications */ 2900 so->so_state &= ~(SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK); 2901 freemsg(so->so_oobmsg); 2902 so->so_oobmsg = NULL; 2903 } 2904 ASSERT(so_verify_oobstate(so)); 2905 mutex_exit(&so->so_lock); 2906 } 2907 2908 /* 2909 * Receive the next message on the queue. 2910 * If msg_controllen is non-zero when called the caller is interested in 2911 * any received control info (options). 2912 * If msg_namelen is non-zero when called the caller is interested in 2913 * any received source address. 2914 * The routine returns with msg_control and msg_name pointing to 2915 * kmem_alloc'ed memory which the caller has to free. 2916 */ 2917 /* ARGSUSED */ 2918 int 2919 sotpi_recvmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop, 2920 struct cred *cr) 2921 { 2922 union T_primitives *tpr; 2923 mblk_t *mp; 2924 uchar_t pri; 2925 int pflag, opflag; 2926 void *control; 2927 t_uscalar_t controllen; 2928 t_uscalar_t namelen; 2929 int so_state = so->so_state; /* Snapshot */ 2930 ssize_t saved_resid; 2931 rval_t rval; 2932 int flags; 2933 clock_t timout; 2934 int error = 0; 2935 sotpi_info_t *sti = SOTOTPI(so); 2936 2937 flags = msg->msg_flags; 2938 msg->msg_flags = 0; 2939 2940 dprintso(so, 1, ("sotpi_recvmsg(%p, %p, 0x%x) state %s err %d\n", 2941 (void *)so, (void *)msg, flags, 2942 pr_state(so->so_state, so->so_mode), so->so_error)); 2943 2944 if (so->so_version == SOV_STREAM) { 2945 so_update_attrs(so, SOACC); 2946 /* The imaginary "sockmod" has been popped - act as a stream */ 2947 return (strread(SOTOV(so), uiop, cr)); 2948 } 2949 2950 /* 2951 * If we are not connected because we have never been connected 2952 * we return ENOTCONN. If we have been connected (but are no longer 2953 * connected) then SS_CANTRCVMORE is set and we let kstrgetmsg return 2954 * the EOF. 2955 * 2956 * An alternative would be to post an ENOTCONN error in stream head 2957 * (read+write) and clear it when we're connected. However, that error 2958 * would cause incorrect poll/select behavior! 2959 */ 2960 if ((so_state & (SS_ISCONNECTED|SS_CANTRCVMORE)) == 0 && 2961 (so->so_mode & SM_CONNREQUIRED)) { 2962 return (ENOTCONN); 2963 } 2964 2965 /* 2966 * Note: SunOS 4.X checks uio_resid == 0 before going to sleep (but 2967 * after checking that the read queue is empty) and returns zero. 2968 * This implementation will sleep (in kstrgetmsg) even if uio_resid 2969 * is zero. 2970 */ 2971 2972 if (flags & MSG_OOB) { 2973 /* Check that the transport supports OOB */ 2974 if (!(so->so_mode & SM_EXDATA)) 2975 return (EOPNOTSUPP); 2976 so_update_attrs(so, SOACC); 2977 return (sorecvoob(so, msg, uiop, flags, 2978 (so->so_options & SO_OOBINLINE))); 2979 } 2980 2981 so_update_attrs(so, SOACC); 2982 2983 /* 2984 * Set msg_controllen and msg_namelen to zero here to make it 2985 * simpler in the cases that no control or name is returned. 2986 */ 2987 controllen = msg->msg_controllen; 2988 namelen = msg->msg_namelen; 2989 msg->msg_controllen = 0; 2990 msg->msg_namelen = 0; 2991 2992 dprintso(so, 1, ("sotpi_recvmsg: namelen %d controllen %d\n", 2993 namelen, controllen)); 2994 2995 mutex_enter(&so->so_lock); 2996 /* 2997 * Only one reader is allowed at any given time. This is needed 2998 * for T_EXDATA handling and, in the future, MSG_WAITALL. 2999 * 3000 * This is slightly different that BSD behavior in that it fails with 3001 * EWOULDBLOCK when using nonblocking io. In BSD the read queue access 3002 * is single-threaded using sblock(), which is dropped while waiting 3003 * for data to appear. The difference shows up e.g. if one 3004 * file descriptor does not have O_NONBLOCK but a dup'ed file descriptor 3005 * does use nonblocking io and different threads are reading each 3006 * file descriptor. In BSD there would never be an EWOULDBLOCK error 3007 * in this case as long as the read queue doesn't get empty. 3008 * In this implementation the thread using nonblocking io can 3009 * get an EWOULDBLOCK error due to the blocking thread executing 3010 * e.g. in the uiomove in kstrgetmsg. 3011 * This difference is not believed to be significant. 3012 */ 3013 /* Set SOREADLOCKED */ 3014 error = so_lock_read_intr(so, 3015 uiop->uio_fmode | ((flags & MSG_DONTWAIT) ? FNONBLOCK : 0)); 3016 mutex_exit(&so->so_lock); 3017 if (error) 3018 return (error); 3019 3020 /* 3021 * Tell kstrgetmsg to not inspect the stream head errors until all 3022 * queued data has been consumed. 3023 * Use a timeout=-1 to wait forever unless MSG_DONTWAIT is set. 3024 * Also, If uio_fmode indicates nonblocking kstrgetmsg will not block. 3025 * 3026 * MSG_WAITALL only applies to M_DATA and T_DATA_IND messages and 3027 * to T_OPTDATA_IND that do not contain any user-visible control msg. 3028 * Note that MSG_WAITALL set with MSG_PEEK is a noop. 3029 */ 3030 pflag = MSG_ANY | MSG_DELAYERROR; 3031 if (flags & MSG_PEEK) { 3032 pflag |= MSG_IPEEK; 3033 flags &= ~MSG_WAITALL; 3034 } 3035 if (so->so_mode & SM_ATOMIC) 3036 pflag |= MSG_DISCARDTAIL; 3037 3038 if (flags & MSG_DONTWAIT) 3039 timout = 0; 3040 else if (so->so_rcvtimeo != 0) 3041 timout = TICK_TO_MSEC(so->so_rcvtimeo); 3042 else 3043 timout = -1; 3044 opflag = pflag; 3045 retry: 3046 saved_resid = uiop->uio_resid; 3047 pri = 0; 3048 mp = NULL; 3049 error = kstrgetmsg(SOTOV(so), &mp, uiop, &pri, &pflag, 3050 timout, &rval); 3051 if (error != 0) { 3052 /* kstrgetmsg returns ETIME when timeout expires */ 3053 if (error == ETIME) 3054 error = EWOULDBLOCK; 3055 goto out; 3056 } 3057 /* 3058 * For datagrams the MOREDATA flag is used to set MSG_TRUNC. 3059 * For non-datagrams MOREDATA is used to set MSG_EOR. 3060 */ 3061 ASSERT(!(rval.r_val1 & MORECTL)); 3062 if ((rval.r_val1 & MOREDATA) && (so->so_mode & SM_ATOMIC)) 3063 msg->msg_flags |= MSG_TRUNC; 3064 3065 if (mp == NULL) { 3066 dprintso(so, 1, ("sotpi_recvmsg: got M_DATA\n")); 3067 /* 3068 * 4.3BSD and 4.4BSD clears the mark when peeking across it. 3069 * The draft Posix socket spec states that the mark should 3070 * not be cleared when peeking. We follow the latter. 3071 */ 3072 if ((so->so_state & 3073 (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) && 3074 (uiop->uio_resid != saved_resid) && 3075 !(flags & MSG_PEEK)) { 3076 sorecv_update_oobstate(so); 3077 } 3078 3079 mutex_enter(&so->so_lock); 3080 /* Set MSG_EOR based on MOREDATA */ 3081 if (!(rval.r_val1 & MOREDATA)) { 3082 if (so->so_state & SS_SAVEDEOR) { 3083 msg->msg_flags |= MSG_EOR; 3084 so->so_state &= ~SS_SAVEDEOR; 3085 } 3086 } 3087 /* 3088 * If some data was received (i.e. not EOF) and the 3089 * read/recv* has not been satisfied wait for some more. 3090 */ 3091 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 3092 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 3093 mutex_exit(&so->so_lock); 3094 pflag = opflag | MSG_NOMARK; 3095 goto retry; 3096 } 3097 goto out_locked; 3098 } 3099 3100 /* strsock_proto has already verified length and alignment */ 3101 tpr = (union T_primitives *)mp->b_rptr; 3102 dprintso(so, 1, ("sotpi_recvmsg: type %d\n", tpr->type)); 3103 3104 switch (tpr->type) { 3105 case T_DATA_IND: { 3106 if ((so->so_state & 3107 (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) && 3108 (uiop->uio_resid != saved_resid) && 3109 !(flags & MSG_PEEK)) { 3110 sorecv_update_oobstate(so); 3111 } 3112 3113 /* 3114 * Set msg_flags to MSG_EOR based on 3115 * MORE_flag and MOREDATA. 3116 */ 3117 mutex_enter(&so->so_lock); 3118 so->so_state &= ~SS_SAVEDEOR; 3119 if (!(tpr->data_ind.MORE_flag & 1)) { 3120 if (!(rval.r_val1 & MOREDATA)) 3121 msg->msg_flags |= MSG_EOR; 3122 else 3123 so->so_state |= SS_SAVEDEOR; 3124 } 3125 freemsg(mp); 3126 /* 3127 * If some data was received (i.e. not EOF) and the 3128 * read/recv* has not been satisfied wait for some more. 3129 */ 3130 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 3131 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 3132 mutex_exit(&so->so_lock); 3133 pflag = opflag | MSG_NOMARK; 3134 goto retry; 3135 } 3136 goto out_locked; 3137 } 3138 case T_UNITDATA_IND: { 3139 void *addr; 3140 t_uscalar_t addrlen; 3141 void *abuf; 3142 t_uscalar_t optlen; 3143 void *opt; 3144 3145 if ((so->so_state & 3146 (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) && 3147 (uiop->uio_resid != saved_resid) && 3148 !(flags & MSG_PEEK)) { 3149 sorecv_update_oobstate(so); 3150 } 3151 3152 if (namelen != 0) { 3153 /* Caller wants source address */ 3154 addrlen = tpr->unitdata_ind.SRC_length; 3155 addr = sogetoff(mp, 3156 tpr->unitdata_ind.SRC_offset, 3157 addrlen, 1); 3158 if (addr == NULL) { 3159 freemsg(mp); 3160 error = EPROTO; 3161 eprintsoline(so, error); 3162 goto out; 3163 } 3164 if (so->so_family == AF_UNIX) { 3165 /* 3166 * Can not use the transport level address. 3167 * If there is a SO_SRCADDR option carrying 3168 * the socket level address it will be 3169 * extracted below. 3170 */ 3171 addr = NULL; 3172 addrlen = 0; 3173 } 3174 } 3175 optlen = tpr->unitdata_ind.OPT_length; 3176 if (optlen != 0) { 3177 t_uscalar_t ncontrollen; 3178 3179 /* 3180 * Extract any source address option. 3181 * Determine how large cmsg buffer is needed. 3182 */ 3183 opt = sogetoff(mp, 3184 tpr->unitdata_ind.OPT_offset, 3185 optlen, __TPI_ALIGN_SIZE); 3186 3187 if (opt == NULL) { 3188 freemsg(mp); 3189 error = EPROTO; 3190 eprintsoline(so, error); 3191 goto out; 3192 } 3193 if (so->so_family == AF_UNIX) 3194 so_getopt_srcaddr(opt, optlen, &addr, &addrlen); 3195 ncontrollen = so_cmsglen(mp, opt, optlen, 3196 !(flags & MSG_XPG4_2)); 3197 if (controllen != 0) 3198 controllen = ncontrollen; 3199 else if (ncontrollen != 0) 3200 msg->msg_flags |= MSG_CTRUNC; 3201 } else { 3202 controllen = 0; 3203 } 3204 3205 if (namelen != 0) { 3206 /* 3207 * Return address to caller. 3208 * Caller handles truncation if length 3209 * exceeds msg_namelen. 3210 * NOTE: AF_UNIX NUL termination is ensured by 3211 * the sender's copyin_name(). 3212 */ 3213 abuf = kmem_alloc(addrlen, KM_SLEEP); 3214 3215 bcopy(addr, abuf, addrlen); 3216 msg->msg_name = abuf; 3217 msg->msg_namelen = addrlen; 3218 } 3219 3220 if (controllen != 0) { 3221 /* 3222 * Return control msg to caller. 3223 * Caller handles truncation if length 3224 * exceeds msg_controllen. 3225 */ 3226 control = kmem_zalloc(controllen, KM_SLEEP); 3227 3228 error = so_opt2cmsg(mp, opt, optlen, 3229 !(flags & MSG_XPG4_2), 3230 control, controllen); 3231 if (error) { 3232 freemsg(mp); 3233 if (msg->msg_namelen != 0) 3234 kmem_free(msg->msg_name, 3235 msg->msg_namelen); 3236 kmem_free(control, controllen); 3237 eprintsoline(so, error); 3238 goto out; 3239 } 3240 msg->msg_control = control; 3241 msg->msg_controllen = controllen; 3242 } 3243 3244 freemsg(mp); 3245 goto out; 3246 } 3247 case T_OPTDATA_IND: { 3248 struct T_optdata_req *tdr; 3249 void *opt; 3250 t_uscalar_t optlen; 3251 3252 if ((so->so_state & 3253 (SS_OOBPEND|SS_HAVEOOBDATA|SS_RCVATMARK)) && 3254 (uiop->uio_resid != saved_resid) && 3255 !(flags & MSG_PEEK)) { 3256 sorecv_update_oobstate(so); 3257 } 3258 3259 tdr = (struct T_optdata_req *)mp->b_rptr; 3260 optlen = tdr->OPT_length; 3261 if (optlen != 0) { 3262 t_uscalar_t ncontrollen; 3263 /* 3264 * Determine how large cmsg buffer is needed. 3265 */ 3266 opt = sogetoff(mp, 3267 tpr->optdata_ind.OPT_offset, 3268 optlen, __TPI_ALIGN_SIZE); 3269 3270 if (opt == NULL) { 3271 freemsg(mp); 3272 error = EPROTO; 3273 eprintsoline(so, error); 3274 goto out; 3275 } 3276 3277 ncontrollen = so_cmsglen(mp, opt, optlen, 3278 !(flags & MSG_XPG4_2)); 3279 if (controllen != 0) 3280 controllen = ncontrollen; 3281 else if (ncontrollen != 0) 3282 msg->msg_flags |= MSG_CTRUNC; 3283 } else { 3284 controllen = 0; 3285 } 3286 3287 if (controllen != 0) { 3288 /* 3289 * Return control msg to caller. 3290 * Caller handles truncation if length 3291 * exceeds msg_controllen. 3292 */ 3293 control = kmem_zalloc(controllen, KM_SLEEP); 3294 3295 error = so_opt2cmsg(mp, opt, optlen, 3296 !(flags & MSG_XPG4_2), 3297 control, controllen); 3298 if (error) { 3299 freemsg(mp); 3300 kmem_free(control, controllen); 3301 eprintsoline(so, error); 3302 goto out; 3303 } 3304 msg->msg_control = control; 3305 msg->msg_controllen = controllen; 3306 } 3307 3308 /* 3309 * Set msg_flags to MSG_EOR based on 3310 * DATA_flag and MOREDATA. 3311 */ 3312 mutex_enter(&so->so_lock); 3313 so->so_state &= ~SS_SAVEDEOR; 3314 if (!(tpr->data_ind.MORE_flag & 1)) { 3315 if (!(rval.r_val1 & MOREDATA)) 3316 msg->msg_flags |= MSG_EOR; 3317 else 3318 so->so_state |= SS_SAVEDEOR; 3319 } 3320 freemsg(mp); 3321 /* 3322 * If some data was received (i.e. not EOF) and the 3323 * read/recv* has not been satisfied wait for some more. 3324 * Not possible to wait if control info was received. 3325 */ 3326 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 3327 controllen == 0 && 3328 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 3329 mutex_exit(&so->so_lock); 3330 pflag = opflag | MSG_NOMARK; 3331 goto retry; 3332 } 3333 goto out_locked; 3334 } 3335 case T_EXDATA_IND: { 3336 dprintso(so, 1, 3337 ("sotpi_recvmsg: EXDATA_IND counts %d/%d consumed %ld " 3338 "state %s\n", 3339 sti->sti_oobsigcnt, sti->sti_oobcnt, 3340 saved_resid - uiop->uio_resid, 3341 pr_state(so->so_state, so->so_mode))); 3342 /* 3343 * kstrgetmsg handles MSGMARK so there is nothing to 3344 * inspect in the T_EXDATA_IND. 3345 * strsock_proto makes the stream head queue the T_EXDATA_IND 3346 * as a separate message with no M_DATA component. Furthermore, 3347 * the stream head does not consolidate M_DATA messages onto 3348 * an MSGMARK'ed message ensuring that the T_EXDATA_IND 3349 * remains a message by itself. This is needed since MSGMARK 3350 * marks both the whole message as well as the last byte 3351 * of the message. 3352 */ 3353 freemsg(mp); 3354 ASSERT(uiop->uio_resid == saved_resid); /* No data */ 3355 if (flags & MSG_PEEK) { 3356 /* 3357 * Even though we are peeking we consume the 3358 * T_EXDATA_IND thereby moving the mark information 3359 * to SS_RCVATMARK. Then the oob code below will 3360 * retry the peeking kstrgetmsg. 3361 * Note that the stream head read queue is 3362 * never flushed without holding SOREADLOCKED 3363 * thus the T_EXDATA_IND can not disappear 3364 * underneath us. 3365 */ 3366 dprintso(so, 1, 3367 ("sotpi_recvmsg: consume EXDATA_IND " 3368 "counts %d/%d state %s\n", 3369 sti->sti_oobsigcnt, 3370 sti->sti_oobcnt, 3371 pr_state(so->so_state, so->so_mode))); 3372 3373 pflag = MSG_ANY | MSG_DELAYERROR; 3374 if (so->so_mode & SM_ATOMIC) 3375 pflag |= MSG_DISCARDTAIL; 3376 3377 pri = 0; 3378 mp = NULL; 3379 3380 error = kstrgetmsg(SOTOV(so), &mp, uiop, 3381 &pri, &pflag, (clock_t)-1, &rval); 3382 ASSERT(uiop->uio_resid == saved_resid); 3383 3384 if (error) { 3385 #ifdef SOCK_DEBUG 3386 if (error != EWOULDBLOCK && error != EINTR) { 3387 eprintsoline(so, error); 3388 } 3389 #endif /* SOCK_DEBUG */ 3390 goto out; 3391 } 3392 ASSERT(mp); 3393 tpr = (union T_primitives *)mp->b_rptr; 3394 ASSERT(tpr->type == T_EXDATA_IND); 3395 freemsg(mp); 3396 } /* end "if (flags & MSG_PEEK)" */ 3397 3398 /* 3399 * Decrement the number of queued and pending oob. 3400 * 3401 * SS_RCVATMARK is cleared when we read past a mark. 3402 * SS_HAVEOOBDATA is cleared when we've read past the 3403 * last mark. 3404 * SS_OOBPEND is cleared if we've read past the last 3405 * mark and no (new) SIGURG has been posted. 3406 */ 3407 mutex_enter(&so->so_lock); 3408 ASSERT(so_verify_oobstate(so)); 3409 ASSERT(sti->sti_oobsigcnt >= sti->sti_oobcnt); 3410 ASSERT(sti->sti_oobsigcnt > 0); 3411 sti->sti_oobsigcnt--; 3412 ASSERT(sti->sti_oobcnt > 0); 3413 sti->sti_oobcnt--; 3414 /* 3415 * Since the T_EXDATA_IND has been removed from the stream 3416 * head, but we have not read data past the mark, 3417 * sockfs needs to track that the socket is still at the mark. 3418 * 3419 * Since no data was received call kstrgetmsg again to wait 3420 * for data. 3421 */ 3422 so->so_state |= SS_RCVATMARK; 3423 mutex_exit(&so->so_lock); 3424 dprintso(so, 1, 3425 ("sotpi_recvmsg: retry EXDATA_IND counts %d/%d state %s\n", 3426 sti->sti_oobsigcnt, sti->sti_oobcnt, 3427 pr_state(so->so_state, so->so_mode))); 3428 pflag = opflag; 3429 goto retry; 3430 } 3431 default: 3432 cmn_err(CE_CONT, "sotpi_recvmsg: so %p prim %d mp %p\n", 3433 (void *)so, tpr->type, (void *)mp); 3434 ASSERT(0); 3435 freemsg(mp); 3436 error = EPROTO; 3437 eprintsoline(so, error); 3438 goto out; 3439 } 3440 /* NOTREACHED */ 3441 out: 3442 mutex_enter(&so->so_lock); 3443 out_locked: 3444 so_unlock_read(so); /* Clear SOREADLOCKED */ 3445 mutex_exit(&so->so_lock); 3446 return (error); 3447 } 3448 3449 /* 3450 * Sending data with options on a datagram socket. 3451 * Assumes caller has verified that SS_ISBOUND etc. are set. 3452 * 3453 * For AF_UNIX the destination address may be already in 3454 * internal form, as indicated by sti->sti_faddr_noxlate 3455 * or the MSG_SENDTO_NOXLATE flag. Otherwise we need to 3456 * translate the destination address to internal form. 3457 * 3458 * The source address is passed as an option. If passing 3459 * file descriptors, those are passed as file pointers in 3460 * another option. 3461 */ 3462 static int 3463 sosend_dgramcmsg(struct sonode *so, struct sockaddr *name, socklen_t namelen, 3464 struct uio *uiop, void *control, t_uscalar_t controllen, int flags) 3465 { 3466 struct T_unitdata_req tudr; 3467 mblk_t *mp; 3468 int error; 3469 void *addr; 3470 socklen_t addrlen; 3471 void *src; 3472 socklen_t srclen; 3473 ssize_t len; 3474 int size; 3475 struct T_opthdr toh; 3476 struct fdbuf *fdbuf; 3477 t_uscalar_t optlen; 3478 void *fds; 3479 int fdlen; 3480 sotpi_info_t *sti = SOTOTPI(so); 3481 3482 ASSERT(name && namelen); 3483 ASSERT(control && controllen); 3484 3485 len = uiop->uio_resid; 3486 if (len > (ssize_t)sti->sti_tidu_size) { 3487 return (EMSGSIZE); 3488 } 3489 3490 if (sti->sti_faddr_noxlate == 0 && 3491 (flags & MSG_SENDTO_NOXLATE) == 0) { 3492 /* 3493 * Length and family checks. 3494 * Don't verify internal form. 3495 */ 3496 error = so_addr_verify(so, name, namelen); 3497 if (error) { 3498 eprintsoline(so, error); 3499 return (error); 3500 } 3501 } 3502 3503 if (so->so_family == AF_UNIX) { 3504 if (sti->sti_faddr_noxlate) { 3505 /* 3506 * Already have a transport internal address. Do not 3507 * pass any (transport internal) source address. 3508 */ 3509 addr = name; 3510 addrlen = namelen; 3511 src = NULL; 3512 srclen = 0; 3513 } else if (flags & MSG_SENDTO_NOXLATE) { 3514 /* 3515 * Have an internal form dest. address. 3516 * Pass the source address as usual. 3517 */ 3518 addr = name; 3519 addrlen = namelen; 3520 src = sti->sti_laddr_sa; 3521 srclen = (socklen_t)sti->sti_laddr_len; 3522 } else { 3523 /* 3524 * Pass the sockaddr_un source address as an option 3525 * and translate the remote address. 3526 * 3527 * Note that this code does not prevent sti_laddr_sa 3528 * from changing while it is being used. Thus 3529 * if an unbind+bind occurs concurrently with this 3530 * send the peer might see a partially new and a 3531 * partially old "from" address. 3532 */ 3533 src = sti->sti_laddr_sa; 3534 srclen = (socklen_t)sti->sti_laddr_len; 3535 dprintso(so, 1, 3536 ("sosend_dgramcmsg UNIX: srclen %d, src %p\n", 3537 srclen, src)); 3538 /* 3539 * The sendmsg caller specified a destination 3540 * address, which we must translate into our 3541 * internal form. addr = &sti->sti_ux_taddr 3542 */ 3543 error = so_ux_addr_xlate(so, name, namelen, 3544 (flags & MSG_XPG4_2), 3545 &addr, &addrlen); 3546 if (error) { 3547 eprintsoline(so, error); 3548 return (error); 3549 } 3550 } 3551 } else { 3552 addr = name; 3553 addrlen = namelen; 3554 src = NULL; 3555 srclen = 0; 3556 } 3557 optlen = so_optlen(control, controllen, 3558 !(flags & MSG_XPG4_2)); 3559 tudr.PRIM_type = T_UNITDATA_REQ; 3560 tudr.DEST_length = addrlen; 3561 tudr.DEST_offset = (t_scalar_t)sizeof (tudr); 3562 if (srclen != 0) 3563 tudr.OPT_length = (t_scalar_t)(optlen + sizeof (toh) + 3564 _TPI_ALIGN_TOPT(srclen)); 3565 else 3566 tudr.OPT_length = optlen; 3567 tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) + 3568 _TPI_ALIGN_TOPT(addrlen)); 3569 3570 size = tudr.OPT_offset + tudr.OPT_length; 3571 3572 /* 3573 * File descriptors only when SM_FDPASSING set. 3574 */ 3575 error = so_getfdopt(control, controllen, 3576 !(flags & MSG_XPG4_2), &fds, &fdlen); 3577 if (error) 3578 return (error); 3579 if (fdlen != -1) { 3580 if (!(so->so_mode & SM_FDPASSING)) 3581 return (EOPNOTSUPP); 3582 3583 error = fdbuf_create(fds, fdlen, &fdbuf); 3584 if (error) 3585 return (error); 3586 3587 /* 3588 * Pre-allocate enough additional space for lower level modules 3589 * to append an option (e.g. see tl_unitdata). The following 3590 * is enough extra space for the largest option we might append. 3591 */ 3592 size += sizeof (struct T_opthdr) + ucredsize; 3593 mp = fdbuf_allocmsg(size, fdbuf); 3594 } else { 3595 mp = soallocproto(size, _ALLOC_INTR, CRED()); 3596 if (mp == NULL) { 3597 /* 3598 * Caught a signal waiting for memory. 3599 * Let send* return EINTR. 3600 */ 3601 return (EINTR); 3602 } 3603 } 3604 soappendmsg(mp, &tudr, sizeof (tudr)); 3605 soappendmsg(mp, addr, addrlen); 3606 mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen; 3607 3608 if (fdlen != -1) { 3609 ASSERT(fdbuf != NULL); 3610 toh.level = SOL_SOCKET; 3611 toh.name = SO_FILEP; 3612 toh.len = fdbuf->fd_size + 3613 (t_uscalar_t)sizeof (struct T_opthdr); 3614 toh.status = 0; 3615 soappendmsg(mp, &toh, sizeof (toh)); 3616 soappendmsg(mp, fdbuf, fdbuf->fd_size); 3617 ASSERT(__TPI_TOPT_ISALIGNED(mp->b_wptr)); 3618 } 3619 if (srclen != 0) { 3620 /* 3621 * There is a AF_UNIX sockaddr_un to include as a source 3622 * address option. 3623 */ 3624 toh.level = SOL_SOCKET; 3625 toh.name = SO_SRCADDR; 3626 toh.len = (t_uscalar_t)(srclen + sizeof (struct T_opthdr)); 3627 toh.status = 0; 3628 soappendmsg(mp, &toh, sizeof (toh)); 3629 soappendmsg(mp, src, srclen); 3630 mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen; 3631 ASSERT(__TPI_TOPT_ISALIGNED(mp->b_wptr)); 3632 } 3633 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 3634 so_cmsg2opt(control, controllen, !(flags & MSG_XPG4_2), mp); 3635 /* 3636 * Normally at most 3 bytes left in the message, but we might have 3637 * allowed for extra space if we're passing fd's through. 3638 */ 3639 ASSERT(MBLKL(mp) <= (ssize_t)size); 3640 3641 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 3642 if (AU_AUDITING()) 3643 audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0); 3644 3645 error = kstrputmsg(SOTOV(so), mp, uiop, len, 0, MSG_BAND, 0); 3646 #ifdef SOCK_DEBUG 3647 if (error) { 3648 eprintsoline(so, error); 3649 } 3650 #endif /* SOCK_DEBUG */ 3651 return (error); 3652 } 3653 3654 /* 3655 * Sending data with options on a connected stream socket. 3656 * Assumes caller has verified that SS_ISCONNECTED is set. 3657 */ 3658 static int 3659 sosend_svccmsg(struct sonode *so, struct uio *uiop, int more, void *control, 3660 t_uscalar_t controllen, int flags) 3661 { 3662 struct T_optdata_req tdr; 3663 mblk_t *mp; 3664 int error; 3665 ssize_t iosize; 3666 int size; 3667 struct fdbuf *fdbuf; 3668 t_uscalar_t optlen; 3669 void *fds; 3670 int fdlen; 3671 struct T_opthdr toh; 3672 sotpi_info_t *sti = SOTOTPI(so); 3673 3674 dprintso(so, 1, 3675 ("sosend_svccmsg: resid %ld bytes\n", uiop->uio_resid)); 3676 3677 /* 3678 * Has to be bound and connected. However, since no locks are 3679 * held the state could have changed after sotpi_sendmsg checked it 3680 * thus it is not possible to ASSERT on the state. 3681 */ 3682 3683 /* Options on connection-oriented only when SM_OPTDATA set. */ 3684 if (!(so->so_mode & SM_OPTDATA)) 3685 return (EOPNOTSUPP); 3686 3687 do { 3688 /* 3689 * Set the MORE flag if uio_resid does not fit in this 3690 * message or if the caller passed in "more". 3691 * Error for transports with zero tidu_size. 3692 */ 3693 tdr.PRIM_type = T_OPTDATA_REQ; 3694 iosize = sti->sti_tidu_size; 3695 if (iosize <= 0) 3696 return (EMSGSIZE); 3697 if (uiop->uio_resid > iosize) { 3698 tdr.DATA_flag = 1; 3699 } else { 3700 if (more) 3701 tdr.DATA_flag = 1; 3702 else 3703 tdr.DATA_flag = 0; 3704 iosize = uiop->uio_resid; 3705 } 3706 dprintso(so, 1, ("sosend_svccmsg: sending %d, %ld bytes\n", 3707 tdr.DATA_flag, iosize)); 3708 3709 optlen = so_optlen(control, controllen, !(flags & MSG_XPG4_2)); 3710 tdr.OPT_length = optlen; 3711 tdr.OPT_offset = (t_scalar_t)sizeof (tdr); 3712 3713 size = (int)sizeof (tdr) + optlen; 3714 /* 3715 * File descriptors only when SM_FDPASSING set. 3716 */ 3717 error = so_getfdopt(control, controllen, 3718 !(flags & MSG_XPG4_2), &fds, &fdlen); 3719 if (error) 3720 return (error); 3721 if (fdlen != -1) { 3722 if (!(so->so_mode & SM_FDPASSING)) 3723 return (EOPNOTSUPP); 3724 3725 error = fdbuf_create(fds, fdlen, &fdbuf); 3726 if (error) 3727 return (error); 3728 3729 /* 3730 * Pre-allocate enough additional space for lower level 3731 * modules to append an option (e.g. see tl_unitdata). 3732 * The following is enough extra space for the largest 3733 * option we might append. 3734 */ 3735 size += sizeof (struct T_opthdr) + ucredsize; 3736 mp = fdbuf_allocmsg(size, fdbuf); 3737 } else { 3738 mp = soallocproto(size, _ALLOC_INTR, CRED()); 3739 if (mp == NULL) { 3740 /* 3741 * Caught a signal waiting for memory. 3742 * Let send* return EINTR. 3743 */ 3744 return (EINTR); 3745 } 3746 } 3747 soappendmsg(mp, &tdr, sizeof (tdr)); 3748 3749 if (fdlen != -1) { 3750 ASSERT(fdbuf != NULL); 3751 toh.level = SOL_SOCKET; 3752 toh.name = SO_FILEP; 3753 toh.len = fdbuf->fd_size + 3754 (t_uscalar_t)sizeof (struct T_opthdr); 3755 toh.status = 0; 3756 soappendmsg(mp, &toh, sizeof (toh)); 3757 soappendmsg(mp, fdbuf, fdbuf->fd_size); 3758 ASSERT(__TPI_TOPT_ISALIGNED(mp->b_wptr)); 3759 } 3760 so_cmsg2opt(control, controllen, !(flags & MSG_XPG4_2), mp); 3761 /* 3762 * Normally at most 3 bytes left in the message, but we might 3763 * have allowed for extra space if we're passing fd's through. 3764 */ 3765 ASSERT(MBLKL(mp) <= (ssize_t)size); 3766 3767 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 3768 3769 error = kstrputmsg(SOTOV(so), mp, uiop, iosize, 3770 0, MSG_BAND, 0); 3771 if (error) { 3772 eprintsoline(so, error); 3773 return (error); 3774 } 3775 control = NULL; 3776 if (uiop->uio_resid > 0) { 3777 /* 3778 * Recheck for fatal errors. Fail write even though 3779 * some data have been written. This is consistent 3780 * with strwrite semantics and BSD sockets semantics. 3781 */ 3782 if (so->so_state & SS_CANTSENDMORE) { 3783 eprintsoline(so, error); 3784 return (EPIPE); 3785 } 3786 if (so->so_error != 0) { 3787 mutex_enter(&so->so_lock); 3788 error = sogeterr(so, B_TRUE); 3789 mutex_exit(&so->so_lock); 3790 if (error != 0) { 3791 eprintsoline(so, error); 3792 return (error); 3793 } 3794 } 3795 } 3796 } while (uiop->uio_resid > 0); 3797 return (0); 3798 } 3799 3800 /* 3801 * Sending data on a datagram socket. 3802 * Assumes caller has verified that SS_ISBOUND etc. are set. 3803 * 3804 * For AF_UNIX the destination address may be already in 3805 * internal form, as indicated by sti->sti_faddr_noxlate 3806 * or the MSG_SENDTO_NOXLATE flag. Otherwise we need to 3807 * translate the destination address to internal form. 3808 * 3809 * The source address is passed as an option. 3810 */ 3811 int 3812 sosend_dgram(struct sonode *so, struct sockaddr *name, socklen_t namelen, 3813 struct uio *uiop, int flags) 3814 { 3815 struct T_unitdata_req tudr; 3816 mblk_t *mp; 3817 int error; 3818 void *addr; 3819 socklen_t addrlen; 3820 void *src; 3821 socklen_t srclen; 3822 ssize_t len; 3823 sotpi_info_t *sti = SOTOTPI(so); 3824 3825 ASSERT(name != NULL && namelen != 0); 3826 3827 len = uiop->uio_resid; 3828 if (len > sti->sti_tidu_size) { 3829 error = EMSGSIZE; 3830 goto done; 3831 } 3832 3833 if (sti->sti_faddr_noxlate == 0 && 3834 (flags & MSG_SENDTO_NOXLATE) == 0) { 3835 /* 3836 * Length and family checks. 3837 * Don't verify internal form. 3838 */ 3839 error = so_addr_verify(so, name, namelen); 3840 if (error != 0) 3841 goto done; 3842 } 3843 3844 if (sti->sti_direct) /* Never on AF_UNIX */ 3845 return (sodgram_direct(so, name, namelen, uiop, flags)); 3846 3847 if (so->so_family == AF_UNIX) { 3848 if (sti->sti_faddr_noxlate) { 3849 /* 3850 * Already have a transport internal address. Do not 3851 * pass any (transport internal) source address. 3852 */ 3853 addr = name; 3854 addrlen = namelen; 3855 src = NULL; 3856 srclen = 0; 3857 } else if (flags & MSG_SENDTO_NOXLATE) { 3858 /* 3859 * Have an internal form dest. address. 3860 * Pass the source address as usual. 3861 */ 3862 addr = name; 3863 addrlen = namelen; 3864 src = sti->sti_laddr_sa; 3865 srclen = (socklen_t)sti->sti_laddr_len; 3866 } else { 3867 /* 3868 * Pass the sockaddr_un source address as an option 3869 * and translate the remote address. 3870 * 3871 * Note that this code does not prevent sti_laddr_sa 3872 * from changing while it is being used. Thus 3873 * if an unbind+bind occurs concurrently with this 3874 * send the peer might see a partially new and a 3875 * partially old "from" address. 3876 */ 3877 src = sti->sti_laddr_sa; 3878 srclen = (socklen_t)sti->sti_laddr_len; 3879 dprintso(so, 1, 3880 ("sosend_dgram UNIX: srclen %d, src %p\n", 3881 srclen, src)); 3882 /* 3883 * The sendmsg caller specified a destination 3884 * address, which we must translate into our 3885 * internal form. addr = &sti->sti_ux_taddr 3886 */ 3887 error = so_ux_addr_xlate(so, name, namelen, 3888 (flags & MSG_XPG4_2), 3889 &addr, &addrlen); 3890 if (error) { 3891 eprintsoline(so, error); 3892 goto done; 3893 } 3894 } 3895 } else { 3896 addr = name; 3897 addrlen = namelen; 3898 src = NULL; 3899 srclen = 0; 3900 } 3901 tudr.PRIM_type = T_UNITDATA_REQ; 3902 tudr.DEST_length = addrlen; 3903 tudr.DEST_offset = (t_scalar_t)sizeof (tudr); 3904 if (srclen == 0) { 3905 tudr.OPT_length = 0; 3906 tudr.OPT_offset = 0; 3907 3908 mp = soallocproto2(&tudr, sizeof (tudr), 3909 addr, addrlen, 0, _ALLOC_INTR, CRED()); 3910 if (mp == NULL) { 3911 /* 3912 * Caught a signal waiting for memory. 3913 * Let send* return EINTR. 3914 */ 3915 error = EINTR; 3916 goto done; 3917 } 3918 } else { 3919 /* 3920 * There is a AF_UNIX sockaddr_un to include as a source 3921 * address option. 3922 */ 3923 struct T_opthdr toh; 3924 ssize_t size; 3925 3926 tudr.OPT_length = (t_scalar_t)(sizeof (toh) + 3927 _TPI_ALIGN_TOPT(srclen)); 3928 tudr.OPT_offset = (t_scalar_t)(sizeof (tudr) + 3929 _TPI_ALIGN_TOPT(addrlen)); 3930 3931 toh.level = SOL_SOCKET; 3932 toh.name = SO_SRCADDR; 3933 toh.len = (t_uscalar_t)(srclen + sizeof (struct T_opthdr)); 3934 toh.status = 0; 3935 3936 size = tudr.OPT_offset + tudr.OPT_length; 3937 mp = soallocproto2(&tudr, sizeof (tudr), 3938 addr, addrlen, size, _ALLOC_INTR, CRED()); 3939 if (mp == NULL) { 3940 /* 3941 * Caught a signal waiting for memory. 3942 * Let send* return EINTR. 3943 */ 3944 error = EINTR; 3945 goto done; 3946 } 3947 mp->b_wptr += _TPI_ALIGN_TOPT(addrlen) - addrlen; 3948 soappendmsg(mp, &toh, sizeof (toh)); 3949 soappendmsg(mp, src, srclen); 3950 mp->b_wptr += _TPI_ALIGN_TOPT(srclen) - srclen; 3951 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 3952 } 3953 3954 if (AU_AUDITING()) 3955 audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0); 3956 3957 error = kstrputmsg(SOTOV(so), mp, uiop, len, 0, MSG_BAND, 0); 3958 done: 3959 #ifdef SOCK_DEBUG 3960 if (error) { 3961 eprintsoline(so, error); 3962 } 3963 #endif /* SOCK_DEBUG */ 3964 return (error); 3965 } 3966 3967 /* 3968 * Sending data on a connected stream socket. 3969 * Assumes caller has verified that SS_ISCONNECTED is set. 3970 */ 3971 int 3972 sosend_svc(struct sonode *so, struct uio *uiop, t_scalar_t prim, int more, 3973 int sflag) 3974 { 3975 struct T_data_req tdr; 3976 mblk_t *mp; 3977 int error; 3978 ssize_t iosize; 3979 sotpi_info_t *sti = SOTOTPI(so); 3980 3981 dprintso(so, 1, 3982 ("sosend_svc: %p, resid %ld bytes, prim %d, sflag 0x%x\n", 3983 (void *)so, uiop->uio_resid, prim, sflag)); 3984 3985 /* 3986 * Has to be bound and connected. However, since no locks are 3987 * held the state could have changed after sotpi_sendmsg checked it 3988 * thus it is not possible to ASSERT on the state. 3989 */ 3990 3991 do { 3992 /* 3993 * Set the MORE flag if uio_resid does not fit in this 3994 * message or if the caller passed in "more". 3995 * Error for transports with zero tidu_size. 3996 */ 3997 tdr.PRIM_type = prim; 3998 iosize = sti->sti_tidu_size; 3999 if (iosize <= 0) 4000 return (EMSGSIZE); 4001 if (uiop->uio_resid > iosize) { 4002 tdr.MORE_flag = 1; 4003 } else { 4004 if (more) 4005 tdr.MORE_flag = 1; 4006 else 4007 tdr.MORE_flag = 0; 4008 iosize = uiop->uio_resid; 4009 } 4010 dprintso(so, 1, ("sosend_svc: sending 0x%x %d, %ld bytes\n", 4011 prim, tdr.MORE_flag, iosize)); 4012 mp = soallocproto1(&tdr, sizeof (tdr), 0, _ALLOC_INTR, CRED()); 4013 if (mp == NULL) { 4014 /* 4015 * Caught a signal waiting for memory. 4016 * Let send* return EINTR. 4017 */ 4018 return (EINTR); 4019 } 4020 4021 error = kstrputmsg(SOTOV(so), mp, uiop, iosize, 4022 0, sflag | MSG_BAND, 0); 4023 if (error) { 4024 eprintsoline(so, error); 4025 return (error); 4026 } 4027 if (uiop->uio_resid > 0) { 4028 /* 4029 * Recheck for fatal errors. Fail write even though 4030 * some data have been written. This is consistent 4031 * with strwrite semantics and BSD sockets semantics. 4032 */ 4033 if (so->so_state & SS_CANTSENDMORE) { 4034 eprintsoline(so, error); 4035 return (EPIPE); 4036 } 4037 if (so->so_error != 0) { 4038 mutex_enter(&so->so_lock); 4039 error = sogeterr(so, B_TRUE); 4040 mutex_exit(&so->so_lock); 4041 if (error != 0) { 4042 eprintsoline(so, error); 4043 return (error); 4044 } 4045 } 4046 } 4047 } while (uiop->uio_resid > 0); 4048 return (0); 4049 } 4050 4051 /* 4052 * Check the state for errors and call the appropriate send function. 4053 * 4054 * If MSG_DONTROUTE is set (and SO_DONTROUTE isn't already set) 4055 * this function issues a setsockopt to toggle SO_DONTROUTE before and 4056 * after sending the message. 4057 * 4058 * The caller may optionally specify a destination address, for either 4059 * stream or datagram sockets. This table summarizes the cases: 4060 * 4061 * Socket type Dest. given Connected Result 4062 * ----------- ----------- --------- -------------- 4063 * Stream * Yes send to conn. addr. 4064 * Stream * No error ENOTCONN 4065 * Dgram yes * send to given addr. 4066 * Dgram no yes send to conn. addr. 4067 * Dgram no no error EDESTADDRREQ 4068 * 4069 * There are subtleties around the destination address when using 4070 * AF_UNIX datagram sockets. When the sendmsg call specifies the 4071 * destination address, it's in (struct sockaddr_un) form and we 4072 * need to translate it to our internal form (struct so_ux_addr). 4073 * 4074 * When the sendmsg call does not specify a destination address 4075 * we're using the peer address saved during sotpi_connect, and 4076 * that address is already in internal form. In this case, the 4077 * (internal only) flag MSG_SENDTO_NOXLATE is set in the flags 4078 * passed to sosend_dgram or sosend_dgramcmsg to indicate that 4079 * those functions should skip translation to internal form. 4080 * Avoiding that translation is not only more efficient, but it's 4081 * also necessary when a process does a connect on an AF_UNIX 4082 * datagram socket and then drops privileges. After the process 4083 * has dropped privileges, it may no longer be able to lookup the 4084 * the external name in the filesystem, but it should still be 4085 * able to send messages on the connected socket by leaving the 4086 * destination name unspecified. 4087 * 4088 * Yet more subtleties arise with sockets connected by socketpair(), 4089 * which puts internal form addresses in the fields where normally 4090 * the external form is found, and sets sti_faddr_noxlate=1, which 4091 * (like flag MSG_SENDTO_NOXLATE) causes the sosend_dgram functions 4092 * to skip translation of destination addresses to internal form. 4093 * However, beware that the flag sti_faddr_noxlate=1 also triggers 4094 * different behaviour almost everywhere AF_UNIX addresses appear. 4095 */ 4096 static int 4097 sotpi_sendmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop, 4098 struct cred *cr) 4099 { 4100 int so_state; 4101 int so_mode; 4102 int error; 4103 struct sockaddr *name; 4104 t_uscalar_t namelen; 4105 int dontroute; 4106 int flags; 4107 sotpi_info_t *sti = SOTOTPI(so); 4108 4109 dprintso(so, 1, ("sotpi_sendmsg(%p, %p, 0x%x) state %s, error %d\n", 4110 (void *)so, (void *)msg, msg->msg_flags, 4111 pr_state(so->so_state, so->so_mode), so->so_error)); 4112 4113 if (so->so_version == SOV_STREAM) { 4114 /* The imaginary "sockmod" has been popped - act as a stream */ 4115 so_update_attrs(so, SOMOD); 4116 return (strwrite(SOTOV(so), uiop, cr)); 4117 } 4118 4119 mutex_enter(&so->so_lock); 4120 so_state = so->so_state; 4121 4122 if (so_state & SS_CANTSENDMORE) { 4123 mutex_exit(&so->so_lock); 4124 return (EPIPE); 4125 } 4126 4127 if (so->so_error != 0) { 4128 error = sogeterr(so, B_TRUE); 4129 if (error != 0) { 4130 mutex_exit(&so->so_lock); 4131 return (error); 4132 } 4133 } 4134 4135 name = (struct sockaddr *)msg->msg_name; 4136 namelen = msg->msg_namelen; 4137 flags = msg->msg_flags; 4138 4139 /* 4140 * Historically, this function does not validate the flags 4141 * passed in, and any errant bits are ignored. However, 4142 * we would not want any such errant flag bits accidently 4143 * being treated as one of the internal-only flags, so 4144 * clear the internal-only flag bits. 4145 */ 4146 flags &= ~MSG_SENDTO_NOXLATE; 4147 4148 so_mode = so->so_mode; 4149 4150 if (name == NULL) { 4151 if (!(so_state & SS_ISCONNECTED)) { 4152 mutex_exit(&so->so_lock); 4153 if (so_mode & SM_CONNREQUIRED) 4154 return (ENOTCONN); 4155 else 4156 return (EDESTADDRREQ); 4157 } 4158 /* 4159 * This is a connected socket. 4160 */ 4161 if (so_mode & SM_CONNREQUIRED) { 4162 /* 4163 * This is a connected STREAM socket, 4164 * destination not specified. 4165 */ 4166 name = NULL; 4167 namelen = 0; 4168 } else { 4169 /* 4170 * Datagram send on connected socket with 4171 * the destination name not specified. 4172 * Use the peer address from connect. 4173 */ 4174 if (so->so_family == AF_UNIX) { 4175 /* 4176 * Use the (internal form) address saved 4177 * in sotpi_connect. See above. 4178 */ 4179 name = (void *)&sti->sti_ux_faddr; 4180 namelen = sizeof (sti->sti_ux_faddr); 4181 flags |= MSG_SENDTO_NOXLATE; 4182 } else { 4183 ASSERT(sti->sti_faddr_sa); 4184 name = sti->sti_faddr_sa; 4185 namelen = (t_uscalar_t)sti->sti_faddr_len; 4186 } 4187 } 4188 } else { 4189 /* 4190 * Sendmsg specifies a destination name 4191 */ 4192 if (!(so_state & SS_ISCONNECTED) && 4193 (so_mode & SM_CONNREQUIRED)) { 4194 /* i.e. TCP not connected */ 4195 mutex_exit(&so->so_lock); 4196 return (ENOTCONN); 4197 } 4198 /* 4199 * Ignore the address on connection-oriented sockets. 4200 * Just like BSD this code does not generate an error for 4201 * TCP (a CONNREQUIRED socket) when sending to an address 4202 * passed in with sendto/sendmsg. Instead the data is 4203 * delivered on the connection as if no address had been 4204 * supplied. 4205 */ 4206 if ((so_state & SS_ISCONNECTED) && 4207 !(so_mode & SM_CONNREQUIRED)) { 4208 mutex_exit(&so->so_lock); 4209 return (EISCONN); 4210 } 4211 if (!(so_state & SS_ISBOUND)) { 4212 so_lock_single(so); /* Set SOLOCKED */ 4213 error = sotpi_bind(so, NULL, 0, 4214 _SOBIND_UNSPEC|_SOBIND_LOCK_HELD, cr); 4215 so_unlock_single(so, SOLOCKED); 4216 if (error) { 4217 mutex_exit(&so->so_lock); 4218 eprintsoline(so, error); 4219 return (error); 4220 } 4221 } 4222 /* 4223 * Handle delayed datagram errors. These are only queued 4224 * when the application sets SO_DGRAM_ERRIND. 4225 * Return the error if we are sending to the address 4226 * that was returned in the last T_UDERROR_IND. 4227 * If sending to some other address discard the delayed 4228 * error indication. 4229 */ 4230 if (sti->sti_delayed_error) { 4231 struct T_uderror_ind *tudi; 4232 void *addr; 4233 t_uscalar_t addrlen; 4234 boolean_t match = B_FALSE; 4235 4236 ASSERT(sti->sti_eaddr_mp); 4237 error = sti->sti_delayed_error; 4238 sti->sti_delayed_error = 0; 4239 tudi = 4240 (struct T_uderror_ind *)sti->sti_eaddr_mp->b_rptr; 4241 addrlen = tudi->DEST_length; 4242 addr = sogetoff(sti->sti_eaddr_mp, 4243 tudi->DEST_offset, addrlen, 1); 4244 ASSERT(addr); /* Checked by strsock_proto */ 4245 switch (so->so_family) { 4246 case AF_INET: { 4247 /* Compare just IP address and port */ 4248 sin_t *sin1 = (sin_t *)name; 4249 sin_t *sin2 = (sin_t *)addr; 4250 4251 if (addrlen == sizeof (sin_t) && 4252 namelen == addrlen && 4253 sin1->sin_port == sin2->sin_port && 4254 sin1->sin_addr.s_addr == 4255 sin2->sin_addr.s_addr) 4256 match = B_TRUE; 4257 break; 4258 } 4259 case AF_INET6: { 4260 /* Compare just IP address and port. Not flow */ 4261 sin6_t *sin1 = (sin6_t *)name; 4262 sin6_t *sin2 = (sin6_t *)addr; 4263 4264 if (addrlen == sizeof (sin6_t) && 4265 namelen == addrlen && 4266 sin1->sin6_port == sin2->sin6_port && 4267 IN6_ARE_ADDR_EQUAL(&sin1->sin6_addr, 4268 &sin2->sin6_addr)) 4269 match = B_TRUE; 4270 break; 4271 } 4272 case AF_UNIX: 4273 default: 4274 if (namelen == addrlen && 4275 bcmp(name, addr, namelen) == 0) 4276 match = B_TRUE; 4277 } 4278 if (match) { 4279 freemsg(sti->sti_eaddr_mp); 4280 sti->sti_eaddr_mp = NULL; 4281 mutex_exit(&so->so_lock); 4282 #ifdef DEBUG 4283 dprintso(so, 0, 4284 ("sockfs delayed error %d for %s\n", 4285 error, 4286 pr_addr(so->so_family, name, namelen))); 4287 #endif /* DEBUG */ 4288 return (error); 4289 } 4290 freemsg(sti->sti_eaddr_mp); 4291 sti->sti_eaddr_mp = NULL; 4292 } 4293 } 4294 mutex_exit(&so->so_lock); 4295 4296 dontroute = 0; 4297 if ((flags & MSG_DONTROUTE) && !(so->so_options & SO_DONTROUTE)) { 4298 uint32_t val; 4299 4300 val = 1; 4301 error = sotpi_setsockopt(so, SOL_SOCKET, SO_DONTROUTE, 4302 &val, (t_uscalar_t)sizeof (val), cr); 4303 if (error) 4304 return (error); 4305 dontroute = 1; 4306 } 4307 4308 if ((flags & MSG_OOB) && !(so_mode & SM_EXDATA)) { 4309 error = EOPNOTSUPP; 4310 goto done; 4311 } 4312 if (msg->msg_controllen != 0) { 4313 if (!(so_mode & SM_CONNREQUIRED)) { 4314 so_update_attrs(so, SOMOD); 4315 error = sosend_dgramcmsg(so, name, namelen, uiop, 4316 msg->msg_control, msg->msg_controllen, flags); 4317 } else { 4318 if (flags & MSG_OOB) { 4319 /* Can't generate T_EXDATA_REQ with options */ 4320 error = EOPNOTSUPP; 4321 goto done; 4322 } 4323 so_update_attrs(so, SOMOD); 4324 error = sosend_svccmsg(so, uiop, 4325 !(flags & MSG_EOR), 4326 msg->msg_control, msg->msg_controllen, 4327 flags); 4328 } 4329 goto done; 4330 } 4331 4332 so_update_attrs(so, SOMOD); 4333 if (!(so_mode & SM_CONNREQUIRED)) { 4334 /* 4335 * If there is no SO_DONTROUTE to turn off return immediately 4336 * from send_dgram. This can allow tail-call optimizations. 4337 */ 4338 if (!dontroute) { 4339 return (sosend_dgram(so, name, namelen, uiop, flags)); 4340 } 4341 error = sosend_dgram(so, name, namelen, uiop, flags); 4342 } else { 4343 t_scalar_t prim; 4344 int sflag; 4345 4346 /* Ignore msg_name in the connected state */ 4347 if (flags & MSG_OOB) { 4348 prim = T_EXDATA_REQ; 4349 /* 4350 * Send down T_EXDATA_REQ even if there is flow 4351 * control for data. 4352 */ 4353 sflag = MSG_IGNFLOW; 4354 } else { 4355 if (so_mode & SM_BYTESTREAM) { 4356 /* Byte stream transport - use write */ 4357 dprintso(so, 1, ("sotpi_sendmsg: write\n")); 4358 4359 /* Send M_DATA messages */ 4360 /* 4361 * If there is no SO_DONTROUTE to turn off, 4362 * sti_direct is on, and there is no flow 4363 * control, we can take the fast path. 4364 */ 4365 if (!dontroute && sti->sti_direct != 0 && 4366 canputnext(SOTOV(so)->v_stream->sd_wrq)) { 4367 return (sostream_direct(so, uiop, 4368 NULL, cr)); 4369 } 4370 error = strwrite(SOTOV(so), uiop, cr); 4371 goto done; 4372 } 4373 prim = T_DATA_REQ; 4374 sflag = 0; 4375 } 4376 /* 4377 * If there is no SO_DONTROUTE to turn off return immediately 4378 * from sosend_svc. This can allow tail-call optimizations. 4379 */ 4380 if (!dontroute) 4381 return (sosend_svc(so, uiop, prim, 4382 !(flags & MSG_EOR), sflag)); 4383 error = sosend_svc(so, uiop, prim, 4384 !(flags & MSG_EOR), sflag); 4385 } 4386 ASSERT(dontroute); 4387 done: 4388 if (dontroute) { 4389 uint32_t val; 4390 4391 val = 0; 4392 (void) sotpi_setsockopt(so, SOL_SOCKET, SO_DONTROUTE, 4393 &val, (t_uscalar_t)sizeof (val), cr); 4394 } 4395 return (error); 4396 } 4397 4398 /* 4399 * kstrwritemp() has very similar semantics as that of strwrite(). 4400 * The main difference is it obtains mblks from the caller and also 4401 * does not do any copy as done in strwrite() from user buffers to 4402 * kernel buffers. 4403 * 4404 * Currently, this routine is used by sendfile to send data allocated 4405 * within the kernel without any copying. This interface does not use the 4406 * synchronous stream interface as synch. stream interface implies 4407 * copying. 4408 */ 4409 int 4410 kstrwritemp(struct vnode *vp, mblk_t *mp, ushort_t fmode) 4411 { 4412 struct stdata *stp; 4413 struct queue *wqp; 4414 mblk_t *newmp; 4415 char waitflag; 4416 int tempmode; 4417 int error = 0; 4418 int done = 0; 4419 struct sonode *so; 4420 boolean_t direct; 4421 4422 ASSERT(vp->v_stream); 4423 stp = vp->v_stream; 4424 4425 so = VTOSO(vp); 4426 direct = _SOTOTPI(so)->sti_direct; 4427 4428 /* 4429 * This is the sockfs direct fast path. canputnext() need 4430 * not be accurate so we don't grab the sd_lock here. If 4431 * we get flow-controlled, we grab sd_lock just before the 4432 * do..while loop below to emulate what strwrite() does. 4433 */ 4434 wqp = stp->sd_wrq; 4435 if (canputnext(wqp) && direct && 4436 !(stp->sd_flag & (STWRERR|STRHUP|STPLEX))) { 4437 return (sostream_direct(so, NULL, mp, CRED())); 4438 } else if (stp->sd_flag & (STWRERR|STRHUP|STPLEX)) { 4439 /* Fast check of flags before acquiring the lock */ 4440 mutex_enter(&stp->sd_lock); 4441 error = strgeterr(stp, STWRERR|STRHUP|STPLEX, 0); 4442 mutex_exit(&stp->sd_lock); 4443 if (error != 0) { 4444 if (!(stp->sd_flag & STPLEX) && 4445 (stp->sd_wput_opt & SW_SIGPIPE)) { 4446 error = EPIPE; 4447 } 4448 return (error); 4449 } 4450 } 4451 4452 waitflag = WRITEWAIT; 4453 if (stp->sd_flag & OLDNDELAY) 4454 tempmode = fmode & ~FNDELAY; 4455 else 4456 tempmode = fmode; 4457 4458 mutex_enter(&stp->sd_lock); 4459 do { 4460 if (canputnext(wqp)) { 4461 mutex_exit(&stp->sd_lock); 4462 if (stp->sd_wputdatafunc != NULL) { 4463 newmp = (stp->sd_wputdatafunc)(vp, mp, NULL, 4464 NULL, NULL, NULL); 4465 if (newmp == NULL) { 4466 /* The caller will free mp */ 4467 return (ECOMM); 4468 } 4469 mp = newmp; 4470 } 4471 putnext(wqp, mp); 4472 return (0); 4473 } 4474 error = strwaitq(stp, waitflag, (ssize_t)0, tempmode, -1, 4475 &done); 4476 } while (error == 0 && !done); 4477 4478 mutex_exit(&stp->sd_lock); 4479 /* 4480 * EAGAIN tells the application to try again. ENOMEM 4481 * is returned only if the memory allocation size 4482 * exceeds the physical limits of the system. ENOMEM 4483 * can't be true here. 4484 */ 4485 if (error == ENOMEM) 4486 error = EAGAIN; 4487 return (error); 4488 } 4489 4490 /* ARGSUSED */ 4491 static int 4492 sotpi_sendmblk(struct sonode *so, struct nmsghdr *msg, int fflag, 4493 struct cred *cr, mblk_t **mpp) 4494 { 4495 int error; 4496 4497 switch (so->so_family) { 4498 case AF_INET: 4499 case AF_INET6: 4500 case AF_UNIX: 4501 break; 4502 default: 4503 return (EAFNOSUPPORT); 4504 4505 } 4506 4507 if (so->so_state & SS_CANTSENDMORE) 4508 return (EPIPE); 4509 4510 if (so->so_type != SOCK_STREAM) 4511 return (EOPNOTSUPP); 4512 4513 if ((so->so_state & SS_ISCONNECTED) == 0) 4514 return (ENOTCONN); 4515 4516 error = kstrwritemp(so->so_vnode, *mpp, fflag); 4517 if (error == 0) 4518 *mpp = NULL; 4519 return (error); 4520 } 4521 4522 /* 4523 * Sending data on a datagram socket. 4524 * Assumes caller has verified that SS_ISBOUND etc. are set. 4525 */ 4526 /* ARGSUSED */ 4527 static int 4528 sodgram_direct(struct sonode *so, struct sockaddr *name, 4529 socklen_t namelen, struct uio *uiop, int flags) 4530 { 4531 struct T_unitdata_req tudr; 4532 mblk_t *mp = NULL; 4533 int error = 0; 4534 void *addr; 4535 socklen_t addrlen; 4536 ssize_t len; 4537 struct stdata *stp = SOTOV(so)->v_stream; 4538 int so_state; 4539 queue_t *udp_wq; 4540 boolean_t connected; 4541 mblk_t *mpdata = NULL; 4542 sotpi_info_t *sti = SOTOTPI(so); 4543 uint32_t auditing = AU_AUDITING(); 4544 4545 ASSERT(name != NULL && namelen != 0); 4546 ASSERT(!(so->so_mode & SM_CONNREQUIRED)); 4547 ASSERT(!(so->so_mode & SM_EXDATA)); 4548 ASSERT(so->so_family == AF_INET || so->so_family == AF_INET6); 4549 ASSERT(SOTOV(so)->v_type == VSOCK); 4550 4551 /* Caller checked for proper length */ 4552 len = uiop->uio_resid; 4553 ASSERT(len <= sti->sti_tidu_size); 4554 4555 /* Length and family checks have been done by caller */ 4556 ASSERT(name->sa_family == so->so_family); 4557 ASSERT(so->so_family == AF_INET || 4558 (namelen == (socklen_t)sizeof (struct sockaddr_in6))); 4559 ASSERT(so->so_family == AF_INET6 || 4560 (namelen == (socklen_t)sizeof (struct sockaddr_in))); 4561 4562 addr = name; 4563 addrlen = namelen; 4564 4565 if (stp->sd_sidp != NULL && 4566 (error = straccess(stp, JCWRITE)) != 0) 4567 goto done; 4568 4569 so_state = so->so_state; 4570 4571 connected = so_state & SS_ISCONNECTED; 4572 if (!connected) { 4573 tudr.PRIM_type = T_UNITDATA_REQ; 4574 tudr.DEST_length = addrlen; 4575 tudr.DEST_offset = (t_scalar_t)sizeof (tudr); 4576 tudr.OPT_length = 0; 4577 tudr.OPT_offset = 0; 4578 4579 mp = soallocproto2(&tudr, sizeof (tudr), addr, addrlen, 0, 4580 _ALLOC_INTR, CRED()); 4581 if (mp == NULL) { 4582 /* 4583 * Caught a signal waiting for memory. 4584 * Let send* return EINTR. 4585 */ 4586 error = EINTR; 4587 goto done; 4588 } 4589 } 4590 4591 /* 4592 * For UDP we don't break up the copyin into smaller pieces 4593 * as in the TCP case. That means if ENOMEM is returned by 4594 * mcopyinuio() then the uio vector has not been modified at 4595 * all and we fallback to either strwrite() or kstrputmsg() 4596 * below. Note also that we never generate priority messages 4597 * from here. 4598 */ 4599 udp_wq = stp->sd_wrq->q_next; 4600 if (canput(udp_wq) && 4601 (mpdata = mcopyinuio(stp, uiop, -1, -1, &error)) != NULL) { 4602 ASSERT(DB_TYPE(mpdata) == M_DATA); 4603 ASSERT(uiop->uio_resid == 0); 4604 if (!connected) 4605 linkb(mp, mpdata); 4606 else 4607 mp = mpdata; 4608 if (auditing) 4609 audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0); 4610 4611 /* Always returns 0... */ 4612 return (udp_wput(udp_wq, mp)); 4613 } 4614 4615 ASSERT(mpdata == NULL); 4616 if (error != 0 && error != ENOMEM) { 4617 freemsg(mp); 4618 return (error); 4619 } 4620 4621 /* 4622 * For connected, let strwrite() handle the blocking case. 4623 * Otherwise we fall thru and use kstrputmsg(). 4624 */ 4625 if (connected) 4626 return (strwrite(SOTOV(so), uiop, CRED())); 4627 4628 if (auditing) 4629 audit_sock(T_UNITDATA_REQ, strvp2wq(SOTOV(so)), mp, 0); 4630 4631 error = kstrputmsg(SOTOV(so), mp, uiop, len, 0, MSG_BAND, 0); 4632 done: 4633 #ifdef SOCK_DEBUG 4634 if (error != 0) { 4635 eprintsoline(so, error); 4636 } 4637 #endif /* SOCK_DEBUG */ 4638 return (error); 4639 } 4640 4641 int 4642 sostream_direct(struct sonode *so, struct uio *uiop, mblk_t *mp, cred_t *cr) 4643 { 4644 struct stdata *stp = SOTOV(so)->v_stream; 4645 ssize_t iosize, rmax, maxblk; 4646 queue_t *tcp_wq = stp->sd_wrq->q_next; 4647 mblk_t *newmp; 4648 int error = 0, wflag = 0; 4649 4650 ASSERT(so->so_mode & SM_BYTESTREAM); 4651 ASSERT(SOTOV(so)->v_type == VSOCK); 4652 4653 if (stp->sd_sidp != NULL && 4654 (error = straccess(stp, JCWRITE)) != 0) 4655 return (error); 4656 4657 if (uiop == NULL) { 4658 /* 4659 * kstrwritemp() should have checked sd_flag and 4660 * flow-control before coming here. If we end up 4661 * here it means that we can simply pass down the 4662 * data to tcp. 4663 */ 4664 ASSERT(mp != NULL); 4665 if (stp->sd_wputdatafunc != NULL) { 4666 newmp = (stp->sd_wputdatafunc)(SOTOV(so), mp, NULL, 4667 NULL, NULL, NULL); 4668 if (newmp == NULL) { 4669 /* The caller will free mp */ 4670 return (ECOMM); 4671 } 4672 mp = newmp; 4673 } 4674 /* Always returns 0... */ 4675 return (tcp_wput(tcp_wq, mp)); 4676 } 4677 4678 /* Fallback to strwrite() to do proper error handling */ 4679 if (stp->sd_flag & (STWRERR|STRHUP|STPLEX|STRDELIM|OLDNDELAY)) 4680 return (strwrite(SOTOV(so), uiop, cr)); 4681 4682 rmax = stp->sd_qn_maxpsz; 4683 ASSERT(rmax >= 0 || rmax == INFPSZ); 4684 if (rmax == 0 || uiop->uio_resid <= 0) 4685 return (0); 4686 4687 if (rmax == INFPSZ) 4688 rmax = uiop->uio_resid; 4689 4690 maxblk = stp->sd_maxblk; 4691 4692 for (;;) { 4693 iosize = MIN(uiop->uio_resid, rmax); 4694 4695 mp = mcopyinuio(stp, uiop, iosize, maxblk, &error); 4696 if (mp == NULL) { 4697 /* 4698 * Fallback to strwrite() for ENOMEM; if this 4699 * is our first time in this routine and the uio 4700 * vector has not been modified, we will end up 4701 * calling strwrite() without any flag set. 4702 */ 4703 if (error == ENOMEM) 4704 goto slow_send; 4705 else 4706 return (error); 4707 } 4708 ASSERT(uiop->uio_resid >= 0); 4709 /* 4710 * If mp is non-NULL and ENOMEM is set, it means that 4711 * mcopyinuio() was able to break down some of the user 4712 * data into one or more mblks. Send the partial data 4713 * to tcp and let the rest be handled in strwrite(). 4714 */ 4715 ASSERT(error == 0 || error == ENOMEM); 4716 if (stp->sd_wputdatafunc != NULL) { 4717 newmp = (stp->sd_wputdatafunc)(SOTOV(so), mp, NULL, 4718 NULL, NULL, NULL); 4719 if (newmp == NULL) { 4720 /* The caller will free mp */ 4721 return (ECOMM); 4722 } 4723 mp = newmp; 4724 } 4725 (void) tcp_wput(tcp_wq, mp); /* Always returns 0 anyway. */ 4726 4727 wflag |= NOINTR; 4728 4729 if (uiop->uio_resid == 0) { /* No more data; we're done */ 4730 ASSERT(error == 0); 4731 break; 4732 } else if (error == ENOMEM || !canput(tcp_wq) || (stp->sd_flag & 4733 (STWRERR|STRHUP|STPLEX|STRDELIM|OLDNDELAY))) { 4734 slow_send: 4735 /* 4736 * We were able to send down partial data using 4737 * the direct call interface, but are now relying 4738 * on strwrite() to handle the non-fastpath cases. 4739 * If the socket is blocking we will sleep in 4740 * strwaitq() until write is permitted, otherwise, 4741 * we will need to return the amount of bytes 4742 * written so far back to the app. This is the 4743 * reason why we pass NOINTR flag to strwrite() 4744 * for non-blocking socket, because we don't want 4745 * to return EAGAIN when portion of the user data 4746 * has actually been sent down. 4747 */ 4748 return (strwrite_common(SOTOV(so), uiop, cr, wflag)); 4749 } 4750 } 4751 return (0); 4752 } 4753 4754 /* 4755 * Update sti_faddr by asking the transport (unless AF_UNIX). 4756 */ 4757 /* ARGSUSED */ 4758 int 4759 sotpi_getpeername(struct sonode *so, struct sockaddr *name, socklen_t *namelen, 4760 boolean_t accept, struct cred *cr) 4761 { 4762 struct strbuf strbuf; 4763 int error = 0, res; 4764 void *addr; 4765 t_uscalar_t addrlen; 4766 k_sigset_t smask; 4767 sotpi_info_t *sti = SOTOTPI(so); 4768 4769 dprintso(so, 1, ("sotpi_getpeername(%p) %s\n", 4770 (void *)so, pr_state(so->so_state, so->so_mode))); 4771 4772 ASSERT(*namelen > 0); 4773 mutex_enter(&so->so_lock); 4774 so_lock_single(so); /* Set SOLOCKED */ 4775 4776 if (accept) { 4777 bcopy(sti->sti_faddr_sa, name, 4778 MIN(*namelen, sti->sti_faddr_len)); 4779 *namelen = sti->sti_faddr_noxlate ? 0: sti->sti_faddr_len; 4780 goto done; 4781 } 4782 4783 if (!(so->so_state & SS_ISCONNECTED)) { 4784 error = ENOTCONN; 4785 goto done; 4786 } 4787 /* Added this check for X/Open */ 4788 if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) { 4789 error = EINVAL; 4790 if (xnet_check_print) { 4791 printf("sockfs: X/Open getpeername check => EINVAL\n"); 4792 } 4793 goto done; 4794 } 4795 4796 if (sti->sti_faddr_valid) { 4797 bcopy(sti->sti_faddr_sa, name, 4798 MIN(*namelen, sti->sti_faddr_len)); 4799 *namelen = sti->sti_faddr_noxlate ? 0: sti->sti_faddr_len; 4800 goto done; 4801 } 4802 4803 #ifdef DEBUG 4804 dprintso(so, 1, ("sotpi_getpeername (local): %s\n", 4805 pr_addr(so->so_family, sti->sti_faddr_sa, 4806 (t_uscalar_t)sti->sti_faddr_len))); 4807 #endif /* DEBUG */ 4808 4809 if (so->so_family == AF_UNIX) { 4810 /* Transport has different name space - return local info */ 4811 if (sti->sti_faddr_noxlate) 4812 *namelen = 0; 4813 error = 0; 4814 goto done; 4815 } 4816 4817 ASSERT(so->so_family != AF_UNIX && sti->sti_faddr_noxlate == 0); 4818 4819 ASSERT(sti->sti_faddr_sa); 4820 /* Allocate local buffer to use with ioctl */ 4821 addrlen = (t_uscalar_t)sti->sti_faddr_maxlen; 4822 mutex_exit(&so->so_lock); 4823 addr = kmem_alloc(addrlen, KM_SLEEP); 4824 4825 /* 4826 * Issue TI_GETPEERNAME with signals masked. 4827 * Put the result in sti_faddr_sa so that getpeername works after 4828 * a shutdown(output). 4829 * If the ioctl fails (e.g. due to a ECONNRESET) the error is reposted 4830 * back to the socket. 4831 */ 4832 strbuf.buf = addr; 4833 strbuf.maxlen = addrlen; 4834 strbuf.len = 0; 4835 4836 sigintr(&smask, 0); 4837 res = 0; 4838 ASSERT(cr); 4839 error = strioctl(SOTOV(so), TI_GETPEERNAME, (intptr_t)&strbuf, 4840 0, K_TO_K, cr, &res); 4841 sigunintr(&smask); 4842 4843 mutex_enter(&so->so_lock); 4844 /* 4845 * If there is an error record the error in so_error put don't fail 4846 * the getpeername. Instead fallback on the recorded 4847 * sti->sti_faddr_sa. 4848 */ 4849 if (error) { 4850 /* 4851 * Various stream head errors can be returned to the ioctl. 4852 * However, it is impossible to determine which ones of 4853 * these are really socket level errors that were incorrectly 4854 * consumed by the ioctl. Thus this code silently ignores the 4855 * error - to code explicitly does not reinstate the error 4856 * using soseterror(). 4857 * Experiments have shows that at least this set of 4858 * errors are reported and should not be reinstated on the 4859 * socket: 4860 * EINVAL E.g. if an I_LINK was in effect when 4861 * getpeername was called. 4862 * EPIPE The ioctl error semantics prefer the write 4863 * side error over the read side error. 4864 * ENOTCONN The transport just got disconnected but 4865 * sockfs had not yet seen the T_DISCON_IND 4866 * when issuing the ioctl. 4867 */ 4868 error = 0; 4869 } else if (res == 0 && strbuf.len > 0 && 4870 (so->so_state & SS_ISCONNECTED)) { 4871 ASSERT(strbuf.len <= (int)sti->sti_faddr_maxlen); 4872 sti->sti_faddr_len = (socklen_t)strbuf.len; 4873 bcopy(addr, sti->sti_faddr_sa, sti->sti_faddr_len); 4874 sti->sti_faddr_valid = 1; 4875 4876 bcopy(addr, name, MIN(*namelen, sti->sti_faddr_len)); 4877 *namelen = sti->sti_faddr_len; 4878 } 4879 kmem_free(addr, addrlen); 4880 #ifdef DEBUG 4881 dprintso(so, 1, ("sotpi_getpeername (tp): %s\n", 4882 pr_addr(so->so_family, sti->sti_faddr_sa, 4883 (t_uscalar_t)sti->sti_faddr_len))); 4884 #endif /* DEBUG */ 4885 done: 4886 so_unlock_single(so, SOLOCKED); 4887 mutex_exit(&so->so_lock); 4888 return (error); 4889 } 4890 4891 /* 4892 * Update sti_laddr by asking the transport (unless AF_UNIX). 4893 */ 4894 int 4895 sotpi_getsockname(struct sonode *so, struct sockaddr *name, socklen_t *namelen, 4896 struct cred *cr) 4897 { 4898 struct strbuf strbuf; 4899 int error = 0, res; 4900 void *addr; 4901 t_uscalar_t addrlen; 4902 k_sigset_t smask; 4903 sotpi_info_t *sti = SOTOTPI(so); 4904 4905 dprintso(so, 1, ("sotpi_getsockname(%p) %s\n", 4906 (void *)so, pr_state(so->so_state, so->so_mode))); 4907 4908 ASSERT(*namelen > 0); 4909 mutex_enter(&so->so_lock); 4910 so_lock_single(so); /* Set SOLOCKED */ 4911 4912 #ifdef DEBUG 4913 4914 dprintso(so, 1, ("sotpi_getsockname (local): %s\n", 4915 pr_addr(so->so_family, sti->sti_laddr_sa, 4916 (t_uscalar_t)sti->sti_laddr_len))); 4917 #endif /* DEBUG */ 4918 if (sti->sti_laddr_valid) { 4919 bcopy(sti->sti_laddr_sa, name, 4920 MIN(*namelen, sti->sti_laddr_len)); 4921 *namelen = sti->sti_laddr_len; 4922 goto done; 4923 } 4924 4925 if (so->so_family == AF_UNIX) { 4926 /* 4927 * Transport has different name space - return local info. If we 4928 * have enough space, let consumers know the family. 4929 */ 4930 if (*namelen >= sizeof (sa_family_t)) { 4931 name->sa_family = AF_UNIX; 4932 *namelen = sizeof (sa_family_t); 4933 } else { 4934 *namelen = 0; 4935 } 4936 error = 0; 4937 goto done; 4938 } 4939 if (!(so->so_state & SS_ISBOUND)) { 4940 /* If not bound, then nothing to return. */ 4941 error = 0; 4942 goto done; 4943 } 4944 4945 /* Allocate local buffer to use with ioctl */ 4946 addrlen = (t_uscalar_t)sti->sti_laddr_maxlen; 4947 mutex_exit(&so->so_lock); 4948 addr = kmem_alloc(addrlen, KM_SLEEP); 4949 4950 /* 4951 * Issue TI_GETMYNAME with signals masked. 4952 * Put the result in sti_laddr_sa so that getsockname works after 4953 * a shutdown(output). 4954 * If the ioctl fails (e.g. due to a ECONNRESET) the error is reposted 4955 * back to the socket. 4956 */ 4957 strbuf.buf = addr; 4958 strbuf.maxlen = addrlen; 4959 strbuf.len = 0; 4960 4961 sigintr(&smask, 0); 4962 res = 0; 4963 ASSERT(cr); 4964 error = strioctl(SOTOV(so), TI_GETMYNAME, (intptr_t)&strbuf, 4965 0, K_TO_K, cr, &res); 4966 sigunintr(&smask); 4967 4968 mutex_enter(&so->so_lock); 4969 /* 4970 * If there is an error record the error in so_error put don't fail 4971 * the getsockname. Instead fallback on the recorded 4972 * sti->sti_laddr_sa. 4973 */ 4974 if (error) { 4975 /* 4976 * Various stream head errors can be returned to the ioctl. 4977 * However, it is impossible to determine which ones of 4978 * these are really socket level errors that were incorrectly 4979 * consumed by the ioctl. Thus this code silently ignores the 4980 * error - to code explicitly does not reinstate the error 4981 * using soseterror(). 4982 * Experiments have shows that at least this set of 4983 * errors are reported and should not be reinstated on the 4984 * socket: 4985 * EINVAL E.g. if an I_LINK was in effect when 4986 * getsockname was called. 4987 * EPIPE The ioctl error semantics prefer the write 4988 * side error over the read side error. 4989 */ 4990 error = 0; 4991 } else if (res == 0 && strbuf.len > 0 && 4992 (so->so_state & SS_ISBOUND)) { 4993 ASSERT(strbuf.len <= (int)sti->sti_laddr_maxlen); 4994 sti->sti_laddr_len = (socklen_t)strbuf.len; 4995 bcopy(addr, sti->sti_laddr_sa, sti->sti_laddr_len); 4996 sti->sti_laddr_valid = 1; 4997 4998 bcopy(addr, name, MIN(sti->sti_laddr_len, *namelen)); 4999 *namelen = sti->sti_laddr_len; 5000 } 5001 kmem_free(addr, addrlen); 5002 #ifdef DEBUG 5003 dprintso(so, 1, ("sotpi_getsockname (tp): %s\n", 5004 pr_addr(so->so_family, sti->sti_laddr_sa, 5005 (t_uscalar_t)sti->sti_laddr_len))); 5006 #endif /* DEBUG */ 5007 done: 5008 so_unlock_single(so, SOLOCKED); 5009 mutex_exit(&so->so_lock); 5010 return (error); 5011 } 5012 5013 /* 5014 * Get socket options. For SOL_SOCKET options some options are handled 5015 * by the sockfs while others use the value recorded in the sonode as a 5016 * fallback should the T_SVR4_OPTMGMT_REQ fail. 5017 * 5018 * On the return most *optlenp bytes are copied to optval. 5019 */ 5020 /* ARGSUSED */ 5021 int 5022 sotpi_getsockopt(struct sonode *so, int level, int option_name, 5023 void *optval, socklen_t *optlenp, int flags, struct cred *cr) 5024 { 5025 struct T_optmgmt_req optmgmt_req; 5026 struct T_optmgmt_ack *optmgmt_ack; 5027 struct opthdr oh; 5028 struct opthdr *opt_res; 5029 mblk_t *mp = NULL; 5030 int error = 0; 5031 void *option = NULL; /* Set if fallback value */ 5032 t_uscalar_t maxlen = *optlenp; 5033 t_uscalar_t len; 5034 uint32_t value; 5035 struct timeval tmo_val; /* used for SO_RCVTIMEO, SO_SNDTIMEO */ 5036 struct timeval32 tmo_val32; 5037 struct so_snd_bufinfo snd_bufinfo; /* used for zero copy */ 5038 5039 dprintso(so, 1, ("sotpi_getsockopt(%p, 0x%x, 0x%x, %p, %p) %s\n", 5040 (void *)so, level, option_name, optval, (void *)optlenp, 5041 pr_state(so->so_state, so->so_mode))); 5042 5043 mutex_enter(&so->so_lock); 5044 so_lock_single(so); /* Set SOLOCKED */ 5045 5046 /* 5047 * Check for SOL_SOCKET options. 5048 * Certain SOL_SOCKET options are returned directly whereas 5049 * others only provide a default (fallback) value should 5050 * the T_SVR4_OPTMGMT_REQ fail. 5051 */ 5052 if (level == SOL_SOCKET) { 5053 /* Check parameters */ 5054 switch (option_name) { 5055 case SO_TYPE: 5056 case SO_ERROR: 5057 case SO_DEBUG: 5058 case SO_ACCEPTCONN: 5059 case SO_REUSEADDR: 5060 case SO_KEEPALIVE: 5061 case SO_DONTROUTE: 5062 case SO_BROADCAST: 5063 case SO_USELOOPBACK: 5064 case SO_OOBINLINE: 5065 case SO_SNDBUF: 5066 case SO_RCVBUF: 5067 #ifdef notyet 5068 case SO_SNDLOWAT: 5069 case SO_RCVLOWAT: 5070 #endif /* notyet */ 5071 case SO_DOMAIN: 5072 case SO_DGRAM_ERRIND: 5073 if (maxlen < (t_uscalar_t)sizeof (int32_t)) { 5074 error = EINVAL; 5075 eprintsoline(so, error); 5076 goto done2; 5077 } 5078 break; 5079 case SO_RCVTIMEO: 5080 case SO_SNDTIMEO: 5081 if (get_udatamodel() == DATAMODEL_NONE || 5082 get_udatamodel() == DATAMODEL_NATIVE) { 5083 if (maxlen < sizeof (struct timeval)) { 5084 error = EINVAL; 5085 eprintsoline(so, error); 5086 goto done2; 5087 } 5088 } else { 5089 if (maxlen < sizeof (struct timeval32)) { 5090 error = EINVAL; 5091 eprintsoline(so, error); 5092 goto done2; 5093 } 5094 5095 } 5096 break; 5097 case SO_LINGER: 5098 if (maxlen < (t_uscalar_t)sizeof (struct linger)) { 5099 error = EINVAL; 5100 eprintsoline(so, error); 5101 goto done2; 5102 } 5103 break; 5104 case SO_SND_BUFINFO: 5105 if (maxlen < (t_uscalar_t) 5106 sizeof (struct so_snd_bufinfo)) { 5107 error = EINVAL; 5108 eprintsoline(so, error); 5109 goto done2; 5110 } 5111 break; 5112 } 5113 5114 len = (t_uscalar_t)sizeof (uint32_t); /* Default */ 5115 5116 switch (option_name) { 5117 case SO_TYPE: 5118 value = so->so_type; 5119 option = &value; 5120 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 5121 5122 case SO_ERROR: 5123 value = sogeterr(so, B_TRUE); 5124 option = &value; 5125 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 5126 5127 case SO_ACCEPTCONN: 5128 if (so->so_state & SS_ACCEPTCONN) 5129 value = SO_ACCEPTCONN; 5130 else 5131 value = 0; 5132 #ifdef DEBUG 5133 if (value) { 5134 dprintso(so, 1, 5135 ("sotpi_getsockopt: 0x%x is set\n", 5136 option_name)); 5137 } else { 5138 dprintso(so, 1, 5139 ("sotpi_getsockopt: 0x%x not set\n", 5140 option_name)); 5141 } 5142 #endif /* DEBUG */ 5143 option = &value; 5144 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 5145 5146 case SO_DEBUG: 5147 case SO_REUSEADDR: 5148 case SO_KEEPALIVE: 5149 case SO_DONTROUTE: 5150 case SO_BROADCAST: 5151 case SO_USELOOPBACK: 5152 case SO_OOBINLINE: 5153 case SO_DGRAM_ERRIND: 5154 value = (so->so_options & option_name); 5155 #ifdef DEBUG 5156 if (value) { 5157 dprintso(so, 1, 5158 ("sotpi_getsockopt: 0x%x is set\n", 5159 option_name)); 5160 } else { 5161 dprintso(so, 1, 5162 ("sotpi_getsockopt: 0x%x not set\n", 5163 option_name)); 5164 } 5165 #endif /* DEBUG */ 5166 option = &value; 5167 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 5168 5169 /* 5170 * The following options are only returned by sockfs when the 5171 * T_SVR4_OPTMGMT_REQ fails. 5172 */ 5173 case SO_LINGER: 5174 option = &so->so_linger; 5175 len = (t_uscalar_t)sizeof (struct linger); 5176 break; 5177 case SO_SNDBUF: { 5178 ssize_t lvalue; 5179 5180 /* 5181 * If the option has not been set then get a default 5182 * value from the read queue. This value is 5183 * returned if the transport fails 5184 * the T_SVR4_OPTMGMT_REQ. 5185 */ 5186 lvalue = so->so_sndbuf; 5187 if (lvalue == 0) { 5188 mutex_exit(&so->so_lock); 5189 (void) strqget(strvp2wq(SOTOV(so))->q_next, 5190 QHIWAT, 0, &lvalue); 5191 mutex_enter(&so->so_lock); 5192 dprintso(so, 1, 5193 ("got SO_SNDBUF %ld from q\n", lvalue)); 5194 } 5195 value = (int)lvalue; 5196 option = &value; 5197 len = (t_uscalar_t)sizeof (so->so_sndbuf); 5198 break; 5199 } 5200 case SO_RCVBUF: { 5201 ssize_t lvalue; 5202 5203 /* 5204 * If the option has not been set then get a default 5205 * value from the read queue. This value is 5206 * returned if the transport fails 5207 * the T_SVR4_OPTMGMT_REQ. 5208 * 5209 * XXX If SO_RCVBUF has been set and this is an 5210 * XPG 4.2 application then do not ask the transport 5211 * since the transport might adjust the value and not 5212 * return exactly what was set by the application. 5213 * For non-XPG 4.2 application we return the value 5214 * that the transport is actually using. 5215 */ 5216 lvalue = so->so_rcvbuf; 5217 if (lvalue == 0) { 5218 mutex_exit(&so->so_lock); 5219 (void) strqget(RD(strvp2wq(SOTOV(so))), 5220 QHIWAT, 0, &lvalue); 5221 mutex_enter(&so->so_lock); 5222 dprintso(so, 1, 5223 ("got SO_RCVBUF %ld from q\n", lvalue)); 5224 } else if (flags & _SOGETSOCKOPT_XPG4_2) { 5225 value = (int)lvalue; 5226 option = &value; 5227 goto copyout; /* skip asking transport */ 5228 } 5229 value = (int)lvalue; 5230 option = &value; 5231 len = (t_uscalar_t)sizeof (so->so_rcvbuf); 5232 break; 5233 } 5234 case SO_DOMAIN: 5235 value = so->so_family; 5236 option = &value; 5237 goto copyout; /* No need to issue T_SVR4_OPTMGMT_REQ */ 5238 5239 #ifdef notyet 5240 /* 5241 * We do not implement the semantics of these options 5242 * thus we shouldn't implement the options either. 5243 */ 5244 case SO_SNDLOWAT: 5245 value = so->so_sndlowat; 5246 option = &value; 5247 break; 5248 case SO_RCVLOWAT: 5249 value = so->so_rcvlowat; 5250 option = &value; 5251 break; 5252 #endif /* notyet */ 5253 case SO_SNDTIMEO: 5254 case SO_RCVTIMEO: { 5255 clock_t val; 5256 5257 if (option_name == SO_RCVTIMEO) 5258 val = drv_hztousec(so->so_rcvtimeo); 5259 else 5260 val = drv_hztousec(so->so_sndtimeo); 5261 tmo_val.tv_sec = val / (1000 * 1000); 5262 tmo_val.tv_usec = val % (1000 * 1000); 5263 if (get_udatamodel() == DATAMODEL_NONE || 5264 get_udatamodel() == DATAMODEL_NATIVE) { 5265 option = &tmo_val; 5266 len = sizeof (struct timeval); 5267 } else { 5268 TIMEVAL_TO_TIMEVAL32(&tmo_val32, &tmo_val); 5269 option = &tmo_val32; 5270 len = sizeof (struct timeval32); 5271 } 5272 break; 5273 } 5274 case SO_SND_BUFINFO: { 5275 snd_bufinfo.sbi_wroff = 5276 (so->so_proto_props).sopp_wroff; 5277 snd_bufinfo.sbi_maxblk = 5278 (so->so_proto_props).sopp_maxblk; 5279 snd_bufinfo.sbi_maxpsz = 5280 (so->so_proto_props).sopp_maxpsz; 5281 snd_bufinfo.sbi_tail = 5282 (so->so_proto_props).sopp_tail; 5283 option = &snd_bufinfo; 5284 len = (t_uscalar_t)sizeof (struct so_snd_bufinfo); 5285 break; 5286 } 5287 } 5288 } 5289 5290 mutex_exit(&so->so_lock); 5291 5292 /* Send request */ 5293 optmgmt_req.PRIM_type = T_SVR4_OPTMGMT_REQ; 5294 optmgmt_req.MGMT_flags = T_CHECK; 5295 optmgmt_req.OPT_length = (t_scalar_t)(sizeof (oh) + maxlen); 5296 optmgmt_req.OPT_offset = (t_scalar_t)sizeof (optmgmt_req); 5297 5298 oh.level = level; 5299 oh.name = option_name; 5300 oh.len = maxlen; 5301 5302 mp = soallocproto3(&optmgmt_req, sizeof (optmgmt_req), 5303 &oh, sizeof (oh), NULL, maxlen, 0, _ALLOC_SLEEP, cr); 5304 /* Let option management work in the presence of data flow control */ 5305 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 5306 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 5307 mp = NULL; 5308 mutex_enter(&so->so_lock); 5309 if (error) { 5310 eprintsoline(so, error); 5311 goto done2; 5312 } 5313 error = sowaitprim(so, T_SVR4_OPTMGMT_REQ, T_OPTMGMT_ACK, 5314 (t_uscalar_t)(sizeof (*optmgmt_ack) + sizeof (*opt_res)), &mp, 0); 5315 if (error) { 5316 if (option != NULL) { 5317 /* We have a fallback value */ 5318 error = 0; 5319 goto copyout; 5320 } 5321 eprintsoline(so, error); 5322 goto done2; 5323 } 5324 ASSERT(mp); 5325 optmgmt_ack = (struct T_optmgmt_ack *)mp->b_rptr; 5326 opt_res = (struct opthdr *)sogetoff(mp, optmgmt_ack->OPT_offset, 5327 optmgmt_ack->OPT_length, __TPI_ALIGN_SIZE); 5328 if (opt_res == NULL) { 5329 if (option != NULL) { 5330 /* We have a fallback value */ 5331 error = 0; 5332 goto copyout; 5333 } 5334 error = EPROTO; 5335 eprintsoline(so, error); 5336 goto done; 5337 } 5338 option = &opt_res[1]; 5339 5340 /* check to ensure that the option is within bounds */ 5341 if (((uintptr_t)option + opt_res->len < (uintptr_t)option) || 5342 (uintptr_t)option + opt_res->len > (uintptr_t)mp->b_wptr) { 5343 if (option != NULL) { 5344 /* We have a fallback value */ 5345 error = 0; 5346 goto copyout; 5347 } 5348 error = EPROTO; 5349 eprintsoline(so, error); 5350 goto done; 5351 } 5352 5353 len = opt_res->len; 5354 5355 copyout: { 5356 t_uscalar_t size = MIN(len, maxlen); 5357 bcopy(option, optval, size); 5358 bcopy(&size, optlenp, sizeof (size)); 5359 } 5360 done: 5361 freemsg(mp); 5362 done2: 5363 so_unlock_single(so, SOLOCKED); 5364 mutex_exit(&so->so_lock); 5365 5366 return (error); 5367 } 5368 5369 /* 5370 * Set socket options. All options are passed down in a T_SVR4_OPTMGMT_REQ. 5371 * SOL_SOCKET options are also recorded in the sonode. A setsockopt for 5372 * SOL_SOCKET options will not fail just because the T_SVR4_OPTMGMT_REQ fails - 5373 * setsockopt has to work even if the transport does not support the option. 5374 */ 5375 /* ARGSUSED */ 5376 int 5377 sotpi_setsockopt(struct sonode *so, int level, int option_name, 5378 const void *optval, t_uscalar_t optlen, struct cred *cr) 5379 { 5380 struct T_optmgmt_req optmgmt_req; 5381 struct opthdr oh; 5382 mblk_t *mp; 5383 int error = 0; 5384 boolean_t handled = B_FALSE; 5385 5386 dprintso(so, 1, ("sotpi_setsockopt(%p, 0x%x, 0x%x, %p, %d) %s\n", 5387 (void *)so, level, option_name, optval, optlen, 5388 pr_state(so->so_state, so->so_mode))); 5389 5390 /* X/Open requires this check */ 5391 if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) { 5392 if (xnet_check_print) 5393 printf("sockfs: X/Open setsockopt check => EINVAL\n"); 5394 return (EINVAL); 5395 } 5396 5397 mutex_enter(&so->so_lock); 5398 so_lock_single(so); /* Set SOLOCKED */ 5399 mutex_exit(&so->so_lock); 5400 5401 optmgmt_req.PRIM_type = T_SVR4_OPTMGMT_REQ; 5402 optmgmt_req.MGMT_flags = T_NEGOTIATE; 5403 optmgmt_req.OPT_length = (t_scalar_t)sizeof (oh) + optlen; 5404 optmgmt_req.OPT_offset = (t_scalar_t)sizeof (optmgmt_req); 5405 5406 oh.level = level; 5407 oh.name = option_name; 5408 oh.len = optlen; 5409 5410 mp = soallocproto3(&optmgmt_req, sizeof (optmgmt_req), 5411 &oh, sizeof (oh), optval, optlen, 0, _ALLOC_SLEEP, cr); 5412 /* Let option management work in the presence of data flow control */ 5413 error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0, 5414 MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0); 5415 mp = NULL; 5416 mutex_enter(&so->so_lock); 5417 if (error) { 5418 eprintsoline(so, error); 5419 goto done2; 5420 } 5421 error = sowaitprim(so, T_SVR4_OPTMGMT_REQ, T_OPTMGMT_ACK, 5422 (t_uscalar_t)sizeof (struct T_optmgmt_ack), &mp, 0); 5423 if (error) { 5424 eprintsoline(so, error); 5425 goto done; 5426 } 5427 ASSERT(mp); 5428 /* No need to verify T_optmgmt_ack */ 5429 freemsg(mp); 5430 done: 5431 /* 5432 * Check for SOL_SOCKET options and record their values. 5433 * If we know about a SOL_SOCKET parameter and the transport 5434 * failed it with TBADOPT or TOUTSTATE (i.e. ENOPROTOOPT or 5435 * EPROTO) we let the setsockopt succeed. 5436 */ 5437 if (level == SOL_SOCKET) { 5438 /* Check parameters */ 5439 switch (option_name) { 5440 case SO_DEBUG: 5441 case SO_REUSEADDR: 5442 case SO_KEEPALIVE: 5443 case SO_DONTROUTE: 5444 case SO_BROADCAST: 5445 case SO_USELOOPBACK: 5446 case SO_OOBINLINE: 5447 case SO_SNDBUF: 5448 case SO_RCVBUF: 5449 #ifdef notyet 5450 case SO_SNDLOWAT: 5451 case SO_RCVLOWAT: 5452 #endif /* notyet */ 5453 case SO_DGRAM_ERRIND: 5454 if (optlen != (t_uscalar_t)sizeof (int32_t)) { 5455 error = EINVAL; 5456 eprintsoline(so, error); 5457 goto done2; 5458 } 5459 ASSERT(optval); 5460 handled = B_TRUE; 5461 break; 5462 case SO_SNDTIMEO: 5463 case SO_RCVTIMEO: 5464 if (get_udatamodel() == DATAMODEL_NONE || 5465 get_udatamodel() == DATAMODEL_NATIVE) { 5466 if (optlen != sizeof (struct timeval)) { 5467 error = EINVAL; 5468 eprintsoline(so, error); 5469 goto done2; 5470 } 5471 } else { 5472 if (optlen != sizeof (struct timeval32)) { 5473 error = EINVAL; 5474 eprintsoline(so, error); 5475 goto done2; 5476 } 5477 } 5478 ASSERT(optval); 5479 handled = B_TRUE; 5480 break; 5481 case SO_LINGER: 5482 if (optlen != (t_uscalar_t)sizeof (struct linger)) { 5483 error = EINVAL; 5484 eprintsoline(so, error); 5485 goto done2; 5486 } 5487 ASSERT(optval); 5488 handled = B_TRUE; 5489 break; 5490 } 5491 5492 #define intvalue (*(int32_t *)optval) 5493 5494 switch (option_name) { 5495 case SO_TYPE: 5496 case SO_ERROR: 5497 case SO_ACCEPTCONN: 5498 /* Can't be set */ 5499 error = ENOPROTOOPT; 5500 goto done2; 5501 case SO_LINGER: { 5502 struct linger *l = (struct linger *)optval; 5503 5504 so->so_linger.l_linger = l->l_linger; 5505 if (l->l_onoff) { 5506 so->so_linger.l_onoff = SO_LINGER; 5507 so->so_options |= SO_LINGER; 5508 } else { 5509 so->so_linger.l_onoff = 0; 5510 so->so_options &= ~SO_LINGER; 5511 } 5512 break; 5513 } 5514 5515 case SO_DEBUG: 5516 #ifdef SOCK_TEST 5517 if (intvalue & 2) 5518 sock_test_timelimit = 10 * hz; 5519 else 5520 sock_test_timelimit = 0; 5521 5522 if (intvalue & 4) 5523 do_useracc = 0; 5524 else 5525 do_useracc = 1; 5526 #endif /* SOCK_TEST */ 5527 /* FALLTHRU */ 5528 case SO_REUSEADDR: 5529 case SO_KEEPALIVE: 5530 case SO_DONTROUTE: 5531 case SO_BROADCAST: 5532 case SO_USELOOPBACK: 5533 case SO_OOBINLINE: 5534 case SO_DGRAM_ERRIND: 5535 if (intvalue != 0) { 5536 dprintso(so, 1, 5537 ("socket_setsockopt: setting 0x%x\n", 5538 option_name)); 5539 so->so_options |= option_name; 5540 } else { 5541 dprintso(so, 1, 5542 ("socket_setsockopt: clearing 0x%x\n", 5543 option_name)); 5544 so->so_options &= ~option_name; 5545 } 5546 break; 5547 /* 5548 * The following options are only returned by us when the 5549 * transport layer fails. 5550 * XXX XPG 4.2 applications retrieve SO_RCVBUF from sockfs 5551 * since the transport might adjust the value and not 5552 * return exactly what was set by the application. 5553 */ 5554 case SO_SNDBUF: 5555 so->so_sndbuf = intvalue; 5556 break; 5557 case SO_RCVBUF: 5558 so->so_rcvbuf = intvalue; 5559 break; 5560 case SO_RCVPSH: 5561 so->so_rcv_timer_interval = intvalue; 5562 break; 5563 #ifdef notyet 5564 /* 5565 * We do not implement the semantics of these options 5566 * thus we shouldn't implement the options either. 5567 */ 5568 case SO_SNDLOWAT: 5569 so->so_sndlowat = intvalue; 5570 break; 5571 case SO_RCVLOWAT: 5572 so->so_rcvlowat = intvalue; 5573 break; 5574 #endif /* notyet */ 5575 case SO_SNDTIMEO: 5576 case SO_RCVTIMEO: { 5577 struct timeval tl; 5578 clock_t val; 5579 5580 if (get_udatamodel() == DATAMODEL_NONE || 5581 get_udatamodel() == DATAMODEL_NATIVE) 5582 bcopy(&tl, (struct timeval *)optval, 5583 sizeof (struct timeval)); 5584 else 5585 TIMEVAL32_TO_TIMEVAL(&tl, 5586 (struct timeval32 *)optval); 5587 val = tl.tv_sec * 1000 * 1000 + tl.tv_usec; 5588 if (option_name == SO_RCVTIMEO) 5589 so->so_rcvtimeo = drv_usectohz(val); 5590 else 5591 so->so_sndtimeo = drv_usectohz(val); 5592 break; 5593 } 5594 } 5595 #undef intvalue 5596 5597 if (error) { 5598 if ((error == ENOPROTOOPT || error == EPROTO || 5599 error == EINVAL) && handled) { 5600 dprintso(so, 1, 5601 ("setsockopt: ignoring error %d for 0x%x\n", 5602 error, option_name)); 5603 error = 0; 5604 } 5605 } 5606 } 5607 done2: 5608 so_unlock_single(so, SOLOCKED); 5609 mutex_exit(&so->so_lock); 5610 return (error); 5611 } 5612 5613 /* 5614 * sotpi_close() is called when the last open reference goes away. 5615 */ 5616 /* ARGSUSED */ 5617 int 5618 sotpi_close(struct sonode *so, int flag, struct cred *cr) 5619 { 5620 struct vnode *vp = SOTOV(so); 5621 dev_t dev; 5622 int error = 0; 5623 sotpi_info_t *sti = SOTOTPI(so); 5624 5625 dprintso(so, 1, ("sotpi_close(%p, %x) %s\n", 5626 (void *)vp, flag, pr_state(so->so_state, so->so_mode))); 5627 5628 dev = sti->sti_dev; 5629 5630 ASSERT(STREAMSTAB(getmajor(dev))); 5631 5632 mutex_enter(&so->so_lock); 5633 so_lock_single(so); /* Set SOLOCKED */ 5634 5635 ASSERT(so_verify_oobstate(so)); 5636 5637 if (vp->v_stream != NULL) { 5638 vnode_t *ux_vp; 5639 5640 if (so->so_family == AF_UNIX) { 5641 /* Could avoid this when CANTSENDMORE for !dgram */ 5642 so_unix_close(so); 5643 } 5644 5645 mutex_exit(&so->so_lock); 5646 /* 5647 * Disassemble the linkage from the AF_UNIX underlying file 5648 * system vnode to this socket (by atomically clearing 5649 * v_stream in vn_rele_stream) before strclose clears sd_vnode 5650 * and frees the stream head. 5651 */ 5652 if ((ux_vp = sti->sti_ux_bound_vp) != NULL) { 5653 ASSERT(ux_vp->v_stream); 5654 sti->sti_ux_bound_vp = NULL; 5655 vn_rele_stream(ux_vp); 5656 } 5657 error = strclose(vp, flag, cr); 5658 vp->v_stream = NULL; 5659 mutex_enter(&so->so_lock); 5660 } 5661 5662 /* 5663 * Flush the T_DISCON_IND on sti_discon_ind_mp. 5664 */ 5665 so_flush_discon_ind(so); 5666 5667 so_unlock_single(so, SOLOCKED); 5668 mutex_exit(&so->so_lock); 5669 5670 /* 5671 * Needed for STREAMs. 5672 * Decrement the device driver's reference count for streams 5673 * opened via the clone dip. The driver was held in clone_open(). 5674 * The absence of clone_close() forces this asymmetry. 5675 */ 5676 if (so->so_flag & SOCLONE) 5677 ddi_rele_driver(getmajor(dev)); 5678 5679 return (error); 5680 } 5681 5682 static int 5683 sotpi_ioctl(struct sonode *so, int cmd, intptr_t arg, int mode, 5684 struct cred *cr, int32_t *rvalp) 5685 { 5686 struct vnode *vp = SOTOV(so); 5687 sotpi_info_t *sti = SOTOTPI(so); 5688 int error = 0; 5689 5690 dprintso(so, 0, ("sotpi_ioctl: cmd 0x%x, arg 0x%lx, state %s\n", 5691 cmd, arg, pr_state(so->so_state, so->so_mode))); 5692 5693 switch (cmd) { 5694 case SIOCSQPTR: 5695 /* 5696 * SIOCSQPTR is valid only when helper stream is created 5697 * by the protocol. 5698 */ 5699 case _I_INSERT: 5700 case _I_REMOVE: 5701 /* 5702 * Since there's no compelling reason to support these ioctls 5703 * on sockets, and doing so would increase the complexity 5704 * markedly, prevent it. 5705 */ 5706 return (EOPNOTSUPP); 5707 5708 case I_FIND: 5709 case I_LIST: 5710 case I_LOOK: 5711 case I_POP: 5712 case I_PUSH: 5713 /* 5714 * To prevent races and inconsistencies between the actual 5715 * state of the stream and the state according to the sonode, 5716 * we serialize all operations which modify or operate on the 5717 * list of modules on the socket's stream. 5718 */ 5719 mutex_enter(&sti->sti_plumb_lock); 5720 error = socktpi_plumbioctl(vp, cmd, arg, mode, cr, rvalp); 5721 mutex_exit(&sti->sti_plumb_lock); 5722 return (error); 5723 5724 default: 5725 if (so->so_version != SOV_STREAM) 5726 break; 5727 5728 /* 5729 * The imaginary "sockmod" has been popped; act as a stream. 5730 */ 5731 return (strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp)); 5732 } 5733 5734 ASSERT(so->so_version != SOV_STREAM); 5735 5736 /* 5737 * Process socket-specific ioctls. 5738 */ 5739 switch (cmd) { 5740 case FIONBIO: { 5741 int32_t value; 5742 5743 if (so_copyin((void *)arg, &value, sizeof (int32_t), 5744 (mode & (int)FKIOCTL))) 5745 return (EFAULT); 5746 5747 mutex_enter(&so->so_lock); 5748 if (value) { 5749 so->so_state |= SS_NDELAY; 5750 } else { 5751 so->so_state &= ~SS_NDELAY; 5752 } 5753 mutex_exit(&so->so_lock); 5754 return (0); 5755 } 5756 5757 case FIOASYNC: { 5758 int32_t value; 5759 5760 if (so_copyin((void *)arg, &value, sizeof (int32_t), 5761 (mode & (int)FKIOCTL))) 5762 return (EFAULT); 5763 5764 mutex_enter(&so->so_lock); 5765 /* 5766 * SS_ASYNC flag not already set correctly? 5767 * (!value != !(so->so_state & SS_ASYNC)) 5768 * but some engineers find that too hard to read. 5769 */ 5770 if (value == 0 && (so->so_state & SS_ASYNC) != 0 || 5771 value != 0 && (so->so_state & SS_ASYNC) == 0) 5772 error = so_flip_async(so, vp, mode, cr); 5773 mutex_exit(&so->so_lock); 5774 return (error); 5775 } 5776 5777 case SIOCSPGRP: 5778 case FIOSETOWN: { 5779 pid_t pgrp; 5780 5781 if (so_copyin((void *)arg, &pgrp, sizeof (pid_t), 5782 (mode & (int)FKIOCTL))) 5783 return (EFAULT); 5784 5785 mutex_enter(&so->so_lock); 5786 dprintso(so, 1, ("setown: new %d old %d\n", pgrp, so->so_pgrp)); 5787 /* Any change? */ 5788 if (pgrp != so->so_pgrp) 5789 error = so_set_siggrp(so, vp, pgrp, mode, cr); 5790 mutex_exit(&so->so_lock); 5791 return (error); 5792 } 5793 case SIOCGPGRP: 5794 case FIOGETOWN: 5795 if (so_copyout(&so->so_pgrp, (void *)arg, 5796 sizeof (pid_t), (mode & (int)FKIOCTL))) 5797 return (EFAULT); 5798 return (0); 5799 5800 case SIOCATMARK: { 5801 int retval; 5802 uint_t so_state; 5803 5804 /* 5805 * strwaitmark has a finite timeout after which it 5806 * returns -1 if the mark state is undetermined. 5807 * In order to avoid any race between the mark state 5808 * in sockfs and the mark state in the stream head this 5809 * routine loops until the mark state can be determined 5810 * (or the urgent data indication has been removed by some 5811 * other thread). 5812 */ 5813 do { 5814 mutex_enter(&so->so_lock); 5815 so_state = so->so_state; 5816 mutex_exit(&so->so_lock); 5817 if (so_state & SS_RCVATMARK) { 5818 retval = 1; 5819 } else if (!(so_state & SS_OOBPEND)) { 5820 /* 5821 * No SIGURG has been generated -- there is no 5822 * pending or present urgent data. Thus can't 5823 * possibly be at the mark. 5824 */ 5825 retval = 0; 5826 } else { 5827 /* 5828 * Have the stream head wait until there is 5829 * either some messages on the read queue, or 5830 * STRATMARK or STRNOTATMARK gets set. The 5831 * STRNOTATMARK flag is used so that the 5832 * transport can send up a MSGNOTMARKNEXT 5833 * M_DATA to indicate that it is not 5834 * at the mark and additional data is not about 5835 * to be send upstream. 5836 * 5837 * If the mark state is undetermined this will 5838 * return -1 and we will loop rechecking the 5839 * socket state. 5840 */ 5841 retval = strwaitmark(vp); 5842 } 5843 } while (retval == -1); 5844 5845 if (so_copyout(&retval, (void *)arg, sizeof (int), 5846 (mode & (int)FKIOCTL))) 5847 return (EFAULT); 5848 return (0); 5849 } 5850 5851 case I_FDINSERT: 5852 case I_SENDFD: 5853 case I_RECVFD: 5854 case I_ATMARK: 5855 case _SIOCSOCKFALLBACK: 5856 /* 5857 * These ioctls do not apply to sockets. I_FDINSERT can be 5858 * used to send M_PROTO messages without modifying the socket 5859 * state. I_SENDFD/RECVFD should not be used for socket file 5860 * descriptor passing since they assume a twisted stream. 5861 * SIOCATMARK must be used instead of I_ATMARK. 5862 * 5863 * _SIOCSOCKFALLBACK from an application should never be 5864 * processed. It is only generated by socktpi_open() or 5865 * in response to I_POP or I_PUSH. 5866 */ 5867 #ifdef DEBUG 5868 zcmn_err(getzoneid(), CE_WARN, 5869 "Unsupported STREAMS ioctl 0x%x on socket. " 5870 "Pid = %d\n", cmd, curproc->p_pid); 5871 #endif /* DEBUG */ 5872 return (EOPNOTSUPP); 5873 5874 case _I_GETPEERCRED: 5875 if ((mode & FKIOCTL) == 0) 5876 return (EINVAL); 5877 5878 mutex_enter(&so->so_lock); 5879 if ((so->so_mode & SM_CONNREQUIRED) == 0) { 5880 error = ENOTSUP; 5881 } else if ((so->so_state & SS_ISCONNECTED) == 0) { 5882 error = ENOTCONN; 5883 } else if (so->so_peercred != NULL) { 5884 k_peercred_t *kp = (k_peercred_t *)arg; 5885 kp->pc_cr = so->so_peercred; 5886 kp->pc_cpid = so->so_cpid; 5887 crhold(so->so_peercred); 5888 } else { 5889 error = EINVAL; 5890 } 5891 mutex_exit(&so->so_lock); 5892 return (error); 5893 5894 default: 5895 /* 5896 * Do the higher-order bits of the ioctl cmd indicate 5897 * that it is an I_* streams ioctl? 5898 */ 5899 if ((cmd & 0xffffff00U) == STR && 5900 so->so_version == SOV_SOCKBSD) { 5901 #ifdef DEBUG 5902 zcmn_err(getzoneid(), CE_WARN, 5903 "Unsupported STREAMS ioctl 0x%x on socket. " 5904 "Pid = %d\n", cmd, curproc->p_pid); 5905 #endif /* DEBUG */ 5906 return (EOPNOTSUPP); 5907 } 5908 return (strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp)); 5909 } 5910 } 5911 5912 /* 5913 * Handle plumbing-related ioctls. 5914 */ 5915 static int 5916 socktpi_plumbioctl(struct vnode *vp, int cmd, intptr_t arg, int mode, 5917 struct cred *cr, int32_t *rvalp) 5918 { 5919 static const char sockmod_name[] = "sockmod"; 5920 struct sonode *so = VTOSO(vp); 5921 char mname[FMNAMESZ + 1]; 5922 int error; 5923 sotpi_info_t *sti = SOTOTPI(so); 5924 5925 ASSERT(MUTEX_HELD(&sti->sti_plumb_lock)); 5926 5927 if (so->so_version == SOV_SOCKBSD) 5928 return (EOPNOTSUPP); 5929 5930 if (so->so_version == SOV_STREAM) { 5931 /* 5932 * The imaginary "sockmod" has been popped - act as a stream. 5933 * If this is a push of sockmod then change back to a socket. 5934 */ 5935 if (cmd == I_PUSH) { 5936 error = ((mode & FKIOCTL) ? copystr : copyinstr)( 5937 (void *)arg, mname, sizeof (mname), NULL); 5938 5939 if (error == 0 && strcmp(mname, sockmod_name) == 0) { 5940 dprintso(so, 0, ("socktpi_ioctl: going to " 5941 "socket version\n")); 5942 so_stream2sock(so); 5943 return (0); 5944 } 5945 } 5946 return (strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp)); 5947 } 5948 5949 switch (cmd) { 5950 case I_PUSH: 5951 if (sti->sti_direct) { 5952 mutex_enter(&so->so_lock); 5953 so_lock_single(so); 5954 mutex_exit(&so->so_lock); 5955 5956 error = strioctl(vp, _SIOCSOCKFALLBACK, 0, 0, K_TO_K, 5957 cr, rvalp); 5958 5959 mutex_enter(&so->so_lock); 5960 if (error == 0) 5961 sti->sti_direct = 0; 5962 so_unlock_single(so, SOLOCKED); 5963 mutex_exit(&so->so_lock); 5964 5965 if (error != 0) 5966 return (error); 5967 } 5968 5969 error = strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp); 5970 if (error == 0) 5971 sti->sti_pushcnt++; 5972 return (error); 5973 5974 case I_POP: 5975 if (sti->sti_pushcnt == 0) { 5976 /* Emulate sockmod being popped */ 5977 dprintso(so, 0, 5978 ("socktpi_ioctl: going to STREAMS version\n")); 5979 return (so_sock2stream(so)); 5980 } 5981 5982 error = strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp); 5983 if (error == 0) 5984 sti->sti_pushcnt--; 5985 return (error); 5986 5987 case I_LIST: { 5988 struct str_mlist *kmlistp, *umlistp; 5989 struct str_list kstrlist; 5990 ssize_t kstrlistsize; 5991 int i, nmods; 5992 5993 STRUCT_DECL(str_list, ustrlist); 5994 STRUCT_INIT(ustrlist, mode); 5995 5996 if (arg == 0) { 5997 error = strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp); 5998 if (error == 0) 5999 (*rvalp)++; /* Add one for sockmod */ 6000 return (error); 6001 } 6002 6003 error = so_copyin((void *)arg, STRUCT_BUF(ustrlist), 6004 STRUCT_SIZE(ustrlist), mode & FKIOCTL); 6005 if (error != 0) 6006 return (error); 6007 6008 nmods = STRUCT_FGET(ustrlist, sl_nmods); 6009 if (nmods <= 0) 6010 return (EINVAL); 6011 /* 6012 * Ceiling nmods at nstrpush to prevent someone from 6013 * maliciously consuming lots of kernel memory. 6014 */ 6015 nmods = MIN(nmods, nstrpush); 6016 6017 kstrlistsize = (nmods + 1) * sizeof (struct str_mlist); 6018 kstrlist.sl_nmods = nmods; 6019 kstrlist.sl_modlist = kmem_zalloc(kstrlistsize, KM_SLEEP); 6020 6021 error = strioctl(vp, cmd, (intptr_t)&kstrlist, mode, K_TO_K, 6022 cr, rvalp); 6023 if (error != 0) 6024 goto done; 6025 6026 /* 6027 * Considering the module list as a 0-based array of sl_nmods 6028 * modules, sockmod should conceptually exist at slot 6029 * sti_pushcnt. Insert sockmod at this location by sliding all 6030 * of the module names after so_pushcnt over by one. We know 6031 * that there will be room to do this since we allocated 6032 * sl_modlist with an additional slot. 6033 */ 6034 for (i = kstrlist.sl_nmods; i > sti->sti_pushcnt; i--) 6035 kstrlist.sl_modlist[i] = kstrlist.sl_modlist[i - 1]; 6036 6037 (void) strcpy(kstrlist.sl_modlist[i].l_name, sockmod_name); 6038 kstrlist.sl_nmods++; 6039 6040 /* 6041 * Copy all of the entries out to ustrlist. 6042 */ 6043 kmlistp = kstrlist.sl_modlist; 6044 umlistp = STRUCT_FGETP(ustrlist, sl_modlist); 6045 for (i = 0; i < nmods && i < kstrlist.sl_nmods; i++) { 6046 error = so_copyout(kmlistp++, umlistp++, 6047 sizeof (struct str_mlist), mode & FKIOCTL); 6048 if (error != 0) 6049 goto done; 6050 } 6051 6052 error = so_copyout(&i, (void *)arg, sizeof (int32_t), 6053 mode & FKIOCTL); 6054 if (error == 0) 6055 *rvalp = 0; 6056 done: 6057 kmem_free(kstrlist.sl_modlist, kstrlistsize); 6058 return (error); 6059 } 6060 case I_LOOK: 6061 if (sti->sti_pushcnt == 0) { 6062 return (so_copyout(sockmod_name, (void *)arg, 6063 sizeof (sockmod_name), mode & FKIOCTL)); 6064 } 6065 return (strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp)); 6066 6067 case I_FIND: 6068 error = strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp); 6069 if (error && error != EINVAL) 6070 return (error); 6071 6072 /* if not found and string was sockmod return 1 */ 6073 if (*rvalp == 0 || error == EINVAL) { 6074 error = ((mode & FKIOCTL) ? copystr : copyinstr)( 6075 (void *)arg, mname, sizeof (mname), NULL); 6076 if (error == ENAMETOOLONG) 6077 error = EINVAL; 6078 6079 if (error == 0 && strcmp(mname, sockmod_name) == 0) 6080 *rvalp = 1; 6081 } 6082 return (error); 6083 6084 default: 6085 panic("socktpi_plumbioctl: unknown ioctl %d", cmd); 6086 break; 6087 } 6088 6089 return (0); 6090 } 6091 6092 /* 6093 * Wrapper around the streams poll routine that implements socket poll 6094 * semantics. 6095 * The sockfs never calls pollwakeup itself - the stream head take care 6096 * of all pollwakeups. Since sockfs never holds so_lock when calling the 6097 * stream head there can never be a deadlock due to holding so_lock across 6098 * pollwakeup and acquiring so_lock in this routine. 6099 * 6100 * However, since the performance of VOP_POLL is critical we avoid 6101 * acquiring so_lock here. This is based on two assumptions: 6102 * - The poll implementation holds locks to serialize the VOP_POLL call 6103 * and a pollwakeup for the same pollhead. This ensures that should 6104 * e.g. so_state change during a socktpi_poll call the pollwakeup 6105 * (which strsock_* and strrput conspire to issue) is issued after 6106 * the state change. Thus the pollwakeup will block until VOP_POLL has 6107 * returned and then wake up poll and have it call VOP_POLL again. 6108 * - The reading of so_state without holding so_lock does not result in 6109 * stale data that is older than the latest state change that has dropped 6110 * so_lock. This is ensured by the mutex_exit issuing the appropriate 6111 * memory barrier to force the data into the coherency domain. 6112 */ 6113 static int 6114 sotpi_poll( 6115 struct sonode *so, 6116 short events, 6117 int anyyet, 6118 short *reventsp, 6119 struct pollhead **phpp) 6120 { 6121 short origevents = events; 6122 struct vnode *vp = SOTOV(so); 6123 int error; 6124 int so_state = so->so_state; /* snapshot */ 6125 sotpi_info_t *sti = SOTOTPI(so); 6126 6127 dprintso(so, 0, ("socktpi_poll(%p): state %s err %d\n", 6128 (void *)vp, pr_state(so_state, so->so_mode), so->so_error)); 6129 6130 ASSERT(vp->v_type == VSOCK); 6131 ASSERT(vp->v_stream != NULL); 6132 6133 if (so->so_version == SOV_STREAM) { 6134 /* The imaginary "sockmod" has been popped - act as a stream */ 6135 return (strpoll(vp->v_stream, events, anyyet, 6136 reventsp, phpp)); 6137 } 6138 6139 if (!(so_state & SS_ISCONNECTED) && 6140 (so->so_mode & SM_CONNREQUIRED)) { 6141 /* Not connected yet - turn off write side events */ 6142 events &= ~(POLLOUT|POLLWRBAND); 6143 } 6144 /* 6145 * Check for errors without calling strpoll if the caller wants them. 6146 * In sockets the errors are represented as input/output events 6147 * and there is no need to ask the stream head for this information. 6148 */ 6149 if (so->so_error != 0 && 6150 ((POLLIN|POLLRDNORM|POLLOUT) & origevents) != 0) { 6151 *reventsp = (POLLIN|POLLRDNORM|POLLOUT) & origevents; 6152 return (0); 6153 } 6154 /* 6155 * Ignore M_PROTO only messages such as the T_EXDATA_IND messages. 6156 * These message with only an M_PROTO/M_PCPROTO part and no M_DATA 6157 * will not trigger a POLLIN event with POLLRDDATA set. 6158 * The handling of urgent data (causing POLLRDBAND) is done by 6159 * inspecting SS_OOBPEND below. 6160 */ 6161 events |= POLLRDDATA; 6162 6163 /* 6164 * After shutdown(output) a stream head write error is set. 6165 * However, we should not return output events. 6166 */ 6167 events |= POLLNOERR; 6168 error = strpoll(vp->v_stream, events, anyyet, 6169 reventsp, phpp); 6170 if (error) 6171 return (error); 6172 6173 ASSERT(!(*reventsp & POLLERR)); 6174 6175 /* 6176 * Notes on T_CONN_IND handling for sockets. 6177 * 6178 * If strpoll() returned without events, SR_POLLIN is guaranteed 6179 * to be set, ensuring any subsequent strrput() runs pollwakeup(). 6180 * 6181 * Since the so_lock is not held, soqueueconnind() may have run 6182 * and a T_CONN_IND may be waiting. We now check for any queued 6183 * T_CONN_IND msgs on sti_conn_ind_head and set appropriate events 6184 * to ensure poll returns. 6185 * 6186 * However: 6187 * If the T_CONN_IND hasn't arrived by the time strpoll() returns, 6188 * when strrput() does run for an arriving M_PROTO with T_CONN_IND 6189 * the following actions will occur; taken together they ensure the 6190 * syscall will return. 6191 * 6192 * 1. If a socket, soqueueconnind() will queue the T_CONN_IND but if 6193 * the accept() was run on a non-blocking socket sowaitconnind() 6194 * may have already returned EWOULDBLOCK, so not be waiting to 6195 * process the message. Additionally socktpi_poll() has probably 6196 * proceeded past the sti_conn_ind_head check below. 6197 * 2. strrput() runs pollwakeup()->pollnotify()->cv_signal() to wake 6198 * this thread, however that could occur before poll_common() 6199 * has entered cv_wait. 6200 * 3. pollnotify() sets T_POLLWAKE, while holding the pc_lock. 6201 * 6202 * Before proceeding to cv_wait() in poll_common() for an event, 6203 * poll_common() atomically checks for T_POLLWAKE under the pc_lock, 6204 * and if set, re-calls strpoll() to ensure the late arriving 6205 * T_CONN_IND is recognized, and pollsys() returns. 6206 */ 6207 6208 if (sti->sti_conn_ind_head != NULL) 6209 *reventsp |= (POLLIN|POLLRDNORM) & events; 6210 6211 if (so->so_state & SS_CANTRCVMORE) { 6212 *reventsp |= POLLRDHUP & events; 6213 6214 if (so->so_state & SS_CANTSENDMORE) 6215 *reventsp |= POLLHUP; 6216 } 6217 6218 if (so->so_state & SS_OOBPEND) 6219 *reventsp |= POLLRDBAND & events; 6220 6221 return (0); 6222 } 6223 6224 /*ARGSUSED*/ 6225 static int 6226 socktpi_constructor(void *buf, void *cdrarg, int kmflags) 6227 { 6228 sotpi_sonode_t *st = (sotpi_sonode_t *)buf; 6229 int error = 0; 6230 6231 error = sonode_constructor(buf, cdrarg, kmflags); 6232 if (error != 0) 6233 return (error); 6234 6235 error = i_sotpi_info_constructor(&st->st_info); 6236 if (error != 0) 6237 sonode_destructor(buf, cdrarg); 6238 6239 st->st_sonode.so_priv = &st->st_info; 6240 6241 return (error); 6242 } 6243 6244 /*ARGSUSED1*/ 6245 static void 6246 socktpi_destructor(void *buf, void *cdrarg) 6247 { 6248 sotpi_sonode_t *st = (sotpi_sonode_t *)buf; 6249 6250 ASSERT(st->st_sonode.so_priv == &st->st_info); 6251 st->st_sonode.so_priv = NULL; 6252 6253 i_sotpi_info_destructor(&st->st_info); 6254 sonode_destructor(buf, cdrarg); 6255 } 6256 6257 static int 6258 socktpi_unix_constructor(void *buf, void *cdrarg, int kmflags) 6259 { 6260 int retval; 6261 6262 if ((retval = socktpi_constructor(buf, cdrarg, kmflags)) == 0) { 6263 struct sonode *so = (struct sonode *)buf; 6264 sotpi_info_t *sti = SOTOTPI(so); 6265 6266 mutex_enter(&socklist.sl_lock); 6267 6268 sti->sti_next_so = socklist.sl_list; 6269 sti->sti_prev_so = NULL; 6270 if (sti->sti_next_so != NULL) 6271 SOTOTPI(sti->sti_next_so)->sti_prev_so = so; 6272 socklist.sl_list = so; 6273 6274 mutex_exit(&socklist.sl_lock); 6275 6276 } 6277 return (retval); 6278 } 6279 6280 static void 6281 socktpi_unix_destructor(void *buf, void *cdrarg) 6282 { 6283 struct sonode *so = (struct sonode *)buf; 6284 sotpi_info_t *sti = SOTOTPI(so); 6285 6286 mutex_enter(&socklist.sl_lock); 6287 6288 if (sti->sti_next_so != NULL) 6289 SOTOTPI(sti->sti_next_so)->sti_prev_so = sti->sti_prev_so; 6290 if (sti->sti_prev_so != NULL) 6291 SOTOTPI(sti->sti_prev_so)->sti_next_so = sti->sti_next_so; 6292 else 6293 socklist.sl_list = sti->sti_next_so; 6294 6295 mutex_exit(&socklist.sl_lock); 6296 6297 socktpi_destructor(buf, cdrarg); 6298 } 6299 6300 int 6301 socktpi_init(void) 6302 { 6303 /* 6304 * Create sonode caches. We create a special one for AF_UNIX so 6305 * that we can track them for netstat(8). 6306 */ 6307 socktpi_cache = kmem_cache_create("socktpi_cache", 6308 sizeof (struct sotpi_sonode), 0, socktpi_constructor, 6309 socktpi_destructor, NULL, NULL, NULL, 0); 6310 6311 socktpi_unix_cache = kmem_cache_create("socktpi_unix_cache", 6312 sizeof (struct sotpi_sonode), 0, socktpi_unix_constructor, 6313 socktpi_unix_destructor, NULL, NULL, NULL, 0); 6314 6315 return (0); 6316 } 6317 6318 /* 6319 * Given a non-TPI sonode, allocate and prep it to be ready for TPI. 6320 * 6321 * Caller must still update state and mode using sotpi_update_state(). 6322 */ 6323 int 6324 sotpi_convert_sonode(struct sonode *so, struct sockparams *newsp, 6325 boolean_t *direct, queue_t **qp, struct cred *cr) 6326 { 6327 sotpi_info_t *sti; 6328 struct sockparams *origsp = so->so_sockparams; 6329 sock_lower_handle_t handle = so->so_proto_handle; 6330 struct stdata *stp; 6331 struct vnode *vp; 6332 queue_t *q; 6333 int error = 0; 6334 6335 ASSERT((so->so_state & (SS_FALLBACK_PENDING|SS_FALLBACK_COMP)) == 6336 SS_FALLBACK_PENDING); 6337 ASSERT(SOCK_IS_NONSTR(so)); 6338 6339 *qp = NULL; 6340 *direct = B_FALSE; 6341 so->so_sockparams = newsp; 6342 /* 6343 * Allocate and initalize fields required by TPI. 6344 */ 6345 (void) sotpi_info_create(so, KM_SLEEP); 6346 sotpi_info_init(so); 6347 6348 if ((error = sotpi_init(so, NULL, cr, SO_FALLBACK)) != 0) { 6349 sotpi_info_fini(so); 6350 sotpi_info_destroy(so); 6351 return (error); 6352 } 6353 ASSERT(handle == so->so_proto_handle); 6354 sti = SOTOTPI(so); 6355 if (sti->sti_direct != 0) 6356 *direct = B_TRUE; 6357 6358 /* 6359 * Keep the original sp around so we can properly dispose of the 6360 * sonode when the socket is being closed. 6361 */ 6362 sti->sti_orig_sp = origsp; 6363 6364 so_basic_strinit(so); /* skips the T_CAPABILITY_REQ */ 6365 so_alloc_addr(so, so->so_max_addr_len); 6366 6367 /* 6368 * If the application has done a SIOCSPGRP, make sure the 6369 * STREAM head is aware. This needs to take place before 6370 * the protocol start sending up messages. Otherwise we 6371 * might miss to generate SIGPOLL. 6372 * 6373 * It is possible that the application will receive duplicate 6374 * signals if some were already generated for either data or 6375 * connection indications. 6376 */ 6377 if (so->so_pgrp != 0) { 6378 if (so_set_events(so, so->so_vnode, cr) != 0) 6379 so->so_pgrp = 0; 6380 } 6381 6382 /* 6383 * Determine which queue to use. 6384 */ 6385 vp = SOTOV(so); 6386 stp = vp->v_stream; 6387 ASSERT(stp != NULL); 6388 q = stp->sd_wrq->q_next; 6389 6390 /* 6391 * Skip any modules that may have been auto pushed when the device 6392 * was opened 6393 */ 6394 while (q->q_next != NULL) 6395 q = q->q_next; 6396 *qp = _RD(q); 6397 6398 /* This is now a STREAMS sockets */ 6399 so->so_not_str = B_FALSE; 6400 6401 return (error); 6402 } 6403 6404 /* 6405 * Revert a TPI sonode. It is only allowed to revert the sonode during 6406 * the fallback process. 6407 */ 6408 void 6409 sotpi_revert_sonode(struct sonode *so, struct cred *cr) 6410 { 6411 vnode_t *vp = SOTOV(so); 6412 6413 ASSERT((so->so_state & (SS_FALLBACK_PENDING|SS_FALLBACK_COMP)) == 6414 SS_FALLBACK_PENDING); 6415 ASSERT(!SOCK_IS_NONSTR(so)); 6416 ASSERT(vp->v_stream != NULL); 6417 6418 strclean(vp); 6419 (void) strclose(vp, FREAD|FWRITE|SO_FALLBACK, cr); 6420 6421 /* 6422 * Restore the original sockparams. The caller is responsible for 6423 * dropping the ref to the new sp. 6424 */ 6425 so->so_sockparams = SOTOTPI(so)->sti_orig_sp; 6426 6427 sotpi_info_fini(so); 6428 sotpi_info_destroy(so); 6429 6430 /* This is no longer a STREAMS sockets */ 6431 so->so_not_str = B_TRUE; 6432 } 6433 6434 void 6435 sotpi_update_state(struct sonode *so, struct T_capability_ack *tcap, 6436 struct sockaddr *laddr, socklen_t laddrlen, struct sockaddr *faddr, 6437 socklen_t faddrlen, short opts) 6438 { 6439 sotpi_info_t *sti = SOTOTPI(so); 6440 6441 so_proc_tcapability_ack(so, tcap); 6442 6443 so->so_options |= opts; 6444 6445 /* 6446 * Determine whether the foreign and local address are valid 6447 */ 6448 if (laddrlen != 0) { 6449 ASSERT(laddrlen <= sti->sti_laddr_maxlen); 6450 sti->sti_laddr_len = laddrlen; 6451 bcopy(laddr, sti->sti_laddr_sa, laddrlen); 6452 sti->sti_laddr_valid = (so->so_state & SS_ISBOUND); 6453 } 6454 6455 if (faddrlen != 0) { 6456 ASSERT(faddrlen <= sti->sti_faddr_maxlen); 6457 sti->sti_faddr_len = faddrlen; 6458 bcopy(faddr, sti->sti_faddr_sa, faddrlen); 6459 sti->sti_faddr_valid = (so->so_state & SS_ISCONNECTED); 6460 } 6461 6462 } 6463 6464 /* 6465 * Allocate enough space to cache the local and foreign addresses. 6466 */ 6467 void 6468 so_alloc_addr(struct sonode *so, t_uscalar_t maxlen) 6469 { 6470 sotpi_info_t *sti = SOTOTPI(so); 6471 6472 ASSERT(sti->sti_laddr_sa == NULL && sti->sti_faddr_sa == NULL); 6473 ASSERT(sti->sti_laddr_len == 0 && sti->sti_faddr_len == 0); 6474 sti->sti_laddr_maxlen = sti->sti_faddr_maxlen = 6475 P2ROUNDUP(maxlen, KMEM_ALIGN); 6476 so->so_max_addr_len = sti->sti_laddr_maxlen; 6477 sti->sti_laddr_sa = kmem_alloc(sti->sti_laddr_maxlen * 2, KM_SLEEP); 6478 sti->sti_faddr_sa = (struct sockaddr *)((caddr_t)sti->sti_laddr_sa 6479 + sti->sti_laddr_maxlen); 6480 6481 if (so->so_family == AF_UNIX) { 6482 /* 6483 * Initialize AF_UNIX related fields. 6484 */ 6485 bzero(&sti->sti_ux_laddr, sizeof (sti->sti_ux_laddr)); 6486 bzero(&sti->sti_ux_faddr, sizeof (sti->sti_ux_faddr)); 6487 } 6488 } 6489 6490 6491 sotpi_info_t * 6492 sotpi_sototpi(struct sonode *so) 6493 { 6494 sotpi_info_t *sti; 6495 6496 ASSERT(so != NULL); 6497 6498 sti = (sotpi_info_t *)so->so_priv; 6499 6500 ASSERT(sti != NULL); 6501 ASSERT(sti->sti_magic == SOTPI_INFO_MAGIC); 6502 6503 return (sti); 6504 } 6505 6506 static int 6507 i_sotpi_info_constructor(sotpi_info_t *sti) 6508 { 6509 sti->sti_magic = SOTPI_INFO_MAGIC; 6510 sti->sti_ack_mp = NULL; 6511 sti->sti_discon_ind_mp = NULL; 6512 sti->sti_ux_bound_vp = NULL; 6513 sti->sti_unbind_mp = NULL; 6514 6515 sti->sti_conn_ind_head = NULL; 6516 sti->sti_conn_ind_tail = NULL; 6517 6518 sti->sti_laddr_sa = NULL; 6519 sti->sti_faddr_sa = NULL; 6520 6521 mutex_init(&sti->sti_plumb_lock, NULL, MUTEX_DEFAULT, NULL); 6522 cv_init(&sti->sti_ack_cv, NULL, CV_DEFAULT, NULL); 6523 6524 return (0); 6525 } 6526 6527 static void 6528 i_sotpi_info_destructor(sotpi_info_t *sti) 6529 { 6530 ASSERT(sti->sti_magic == SOTPI_INFO_MAGIC); 6531 ASSERT(sti->sti_ack_mp == NULL); 6532 ASSERT(sti->sti_discon_ind_mp == NULL); 6533 ASSERT(sti->sti_ux_bound_vp == NULL); 6534 ASSERT(sti->sti_unbind_mp == NULL); 6535 6536 ASSERT(sti->sti_conn_ind_head == NULL); 6537 ASSERT(sti->sti_conn_ind_tail == NULL); 6538 6539 ASSERT(sti->sti_laddr_sa == NULL); 6540 ASSERT(sti->sti_faddr_sa == NULL); 6541 6542 mutex_destroy(&sti->sti_plumb_lock); 6543 cv_destroy(&sti->sti_ack_cv); 6544 } 6545 6546 /* 6547 * Creates and attaches TPI information to the given sonode 6548 */ 6549 static boolean_t 6550 sotpi_info_create(struct sonode *so, int kmflags) 6551 { 6552 sotpi_info_t *sti; 6553 6554 ASSERT(so->so_priv == NULL); 6555 6556 if ((sti = kmem_zalloc(sizeof (*sti), kmflags)) == NULL) 6557 return (B_FALSE); 6558 6559 if (i_sotpi_info_constructor(sti) != 0) { 6560 kmem_free(sti, sizeof (*sti)); 6561 return (B_FALSE); 6562 } 6563 6564 so->so_priv = (void *)sti; 6565 return (B_TRUE); 6566 } 6567 6568 /* 6569 * Initializes the TPI information. 6570 */ 6571 static void 6572 sotpi_info_init(struct sonode *so) 6573 { 6574 struct vnode *vp = SOTOV(so); 6575 sotpi_info_t *sti = SOTOTPI(so); 6576 time_t now; 6577 6578 sti->sti_dev = so->so_sockparams->sp_sdev_info.sd_vnode->v_rdev; 6579 vp->v_rdev = sti->sti_dev; 6580 6581 sti->sti_orig_sp = NULL; 6582 6583 sti->sti_pushcnt = 0; 6584 6585 now = gethrestime_sec(); 6586 sti->sti_atime = now; 6587 sti->sti_mtime = now; 6588 sti->sti_ctime = now; 6589 6590 sti->sti_eaddr_mp = NULL; 6591 sti->sti_delayed_error = 0; 6592 6593 sti->sti_provinfo = NULL; 6594 6595 sti->sti_oobcnt = 0; 6596 sti->sti_oobsigcnt = 0; 6597 6598 ASSERT(sti->sti_laddr_sa == NULL && sti->sti_faddr_sa == NULL); 6599 6600 sti->sti_laddr_sa = 0; 6601 sti->sti_faddr_sa = 0; 6602 sti->sti_laddr_maxlen = sti->sti_faddr_maxlen = 0; 6603 sti->sti_laddr_len = sti->sti_faddr_len = 0; 6604 6605 sti->sti_laddr_valid = 0; 6606 sti->sti_faddr_valid = 0; 6607 sti->sti_faddr_noxlate = 0; 6608 6609 sti->sti_direct = 0; 6610 6611 ASSERT(sti->sti_ack_mp == NULL); 6612 ASSERT(sti->sti_ux_bound_vp == NULL); 6613 ASSERT(sti->sti_unbind_mp == NULL); 6614 6615 ASSERT(sti->sti_conn_ind_head == NULL); 6616 ASSERT(sti->sti_conn_ind_tail == NULL); 6617 } 6618 6619 /* 6620 * Given a sonode, grab the TPI info and free any data. 6621 */ 6622 static void 6623 sotpi_info_fini(struct sonode *so) 6624 { 6625 sotpi_info_t *sti = SOTOTPI(so); 6626 mblk_t *mp; 6627 6628 ASSERT(sti->sti_discon_ind_mp == NULL); 6629 6630 if ((mp = sti->sti_conn_ind_head) != NULL) { 6631 mblk_t *mp1; 6632 6633 while (mp) { 6634 mp1 = mp->b_next; 6635 mp->b_next = NULL; 6636 freemsg(mp); 6637 mp = mp1; 6638 } 6639 sti->sti_conn_ind_head = sti->sti_conn_ind_tail = NULL; 6640 } 6641 6642 /* 6643 * Protect so->so_[lf]addr_sa so that sockfs_snapshot() can safely 6644 * indirect them. It also uses so_count as a validity test. 6645 */ 6646 mutex_enter(&so->so_lock); 6647 6648 if (sti->sti_laddr_sa) { 6649 ASSERT((caddr_t)sti->sti_faddr_sa == 6650 (caddr_t)sti->sti_laddr_sa + sti->sti_laddr_maxlen); 6651 ASSERT(sti->sti_faddr_maxlen == sti->sti_laddr_maxlen); 6652 sti->sti_laddr_valid = 0; 6653 sti->sti_faddr_valid = 0; 6654 kmem_free(sti->sti_laddr_sa, sti->sti_laddr_maxlen * 2); 6655 sti->sti_laddr_sa = NULL; 6656 sti->sti_laddr_len = sti->sti_laddr_maxlen = 0; 6657 sti->sti_faddr_sa = NULL; 6658 sti->sti_faddr_len = sti->sti_faddr_maxlen = 0; 6659 } 6660 6661 mutex_exit(&so->so_lock); 6662 6663 if ((mp = sti->sti_eaddr_mp) != NULL) { 6664 freemsg(mp); 6665 sti->sti_eaddr_mp = NULL; 6666 sti->sti_delayed_error = 0; 6667 } 6668 6669 if ((mp = sti->sti_ack_mp) != NULL) { 6670 freemsg(mp); 6671 sti->sti_ack_mp = NULL; 6672 } 6673 6674 ASSERT(sti->sti_ux_bound_vp == NULL); 6675 if ((mp = sti->sti_unbind_mp) != NULL) { 6676 freemsg(mp); 6677 sti->sti_unbind_mp = NULL; 6678 } 6679 } 6680 6681 /* 6682 * Destroys the TPI information attached to a sonode. 6683 */ 6684 static void 6685 sotpi_info_destroy(struct sonode *so) 6686 { 6687 sotpi_info_t *sti = SOTOTPI(so); 6688 6689 i_sotpi_info_destructor(sti); 6690 kmem_free(sti, sizeof (*sti)); 6691 6692 so->so_priv = NULL; 6693 } 6694 6695 /* 6696 * Create the global sotpi socket module entry. It will never be freed. 6697 */ 6698 smod_info_t * 6699 sotpi_smod_create(void) 6700 { 6701 smod_info_t *smodp; 6702 6703 smodp = kmem_zalloc(sizeof (*smodp), KM_SLEEP); 6704 smodp->smod_name = kmem_alloc(sizeof (SOTPI_SMOD_NAME), KM_SLEEP); 6705 (void) strcpy(smodp->smod_name, SOTPI_SMOD_NAME); 6706 /* 6707 * Initialize the smod_refcnt to 1 so it will never be freed. 6708 */ 6709 smodp->smod_refcnt = 1; 6710 smodp->smod_uc_version = SOCK_UC_VERSION; 6711 smodp->smod_dc_version = SOCK_DC_VERSION; 6712 smodp->smod_sock_create_func = &sotpi_create; 6713 smodp->smod_sock_destroy_func = &sotpi_destroy; 6714 return (smodp); 6715 } 6716