1 /* $OpenBSD: uipc_usrreq.c,v 1.159 2021/12/07 01:19:47 mvs Exp $ */ 2 /* $NetBSD: uipc_usrreq.c,v 1.18 1996/02/09 19:00:50 christos Exp $ */ 3 4 /* 5 * Copyright (c) 1982, 1986, 1989, 1991, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)uipc_usrreq.c 8.3 (Berkeley) 1/4/94 33 */ 34 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/proc.h> 38 #include <sys/filedesc.h> 39 #include <sys/domain.h> 40 #include <sys/protosw.h> 41 #include <sys/queue.h> 42 #include <sys/socket.h> 43 #include <sys/socketvar.h> 44 #include <sys/unpcb.h> 45 #include <sys/un.h> 46 #include <sys/namei.h> 47 #include <sys/vnode.h> 48 #include <sys/file.h> 49 #include <sys/stat.h> 50 #include <sys/mbuf.h> 51 #include <sys/task.h> 52 #include <sys/pledge.h> 53 #include <sys/pool.h> 54 #include <sys/rwlock.h> 55 #include <sys/mutex.h> 56 #include <sys/sysctl.h> 57 #include <sys/lock.h> 58 59 /* 60 * Locks used to protect global data and struct members: 61 * I immutable after creation 62 * D unp_df_lock 63 * G unp_gc_lock 64 * U unp_lock 65 * R unp_rights_mtx 66 * a atomic 67 */ 68 69 struct rwlock unp_lock = RWLOCK_INITIALIZER("unplock"); 70 struct rwlock unp_df_lock = RWLOCK_INITIALIZER("unpdflk"); 71 struct rwlock unp_gc_lock = RWLOCK_INITIALIZER("unpgclk"); 72 73 struct mutex unp_rights_mtx = MUTEX_INITIALIZER(IPL_SOFTNET); 74 75 /* 76 * Stack of sets of files that were passed over a socket but were 77 * not received and need to be closed. 78 */ 79 struct unp_deferral { 80 SLIST_ENTRY(unp_deferral) ud_link; /* [D] */ 81 int ud_n; /* [I] */ 82 /* followed by ud_n struct fdpass */ 83 struct fdpass ud_fp[]; /* [I] */ 84 }; 85 86 void uipc_setaddr(const struct unpcb *, struct mbuf *); 87 void unp_discard(struct fdpass *, int); 88 void unp_mark(struct fdpass *, int); 89 void unp_scan(struct mbuf *, void (*)(struct fdpass *, int)); 90 int unp_nam2sun(struct mbuf *, struct sockaddr_un **, size_t *); 91 92 struct pool unpcb_pool; 93 struct task unp_gc_task = TASK_INITIALIZER(unp_gc, NULL); 94 95 /* 96 * Unix communications domain. 97 * 98 * TODO: 99 * RDM 100 * rethink name space problems 101 * need a proper out-of-band 102 */ 103 const struct sockaddr sun_noname = { sizeof(sun_noname), AF_UNIX }; 104 105 /* [G] list of all UNIX domain sockets, for unp_gc() */ 106 LIST_HEAD(unp_head, unpcb) unp_head = 107 LIST_HEAD_INITIALIZER(unp_head); 108 /* [D] list of sets of files that were sent over sockets that are now closed */ 109 SLIST_HEAD(,unp_deferral) unp_deferred = 110 SLIST_HEAD_INITIALIZER(unp_deferred); 111 112 ino_t unp_ino; /* [U] prototype for fake inode numbers */ 113 int unp_rights; /* [R] file descriptors in flight */ 114 int unp_defer; /* [G] number of deferred fp to close by the GC task */ 115 int unp_gcing; /* [G] GC task currently running */ 116 117 void 118 unp_init(void) 119 { 120 pool_init(&unpcb_pool, sizeof(struct unpcb), 0, 121 IPL_SOFTNET, 0, "unpcb", NULL); 122 } 123 124 void 125 uipc_setaddr(const struct unpcb *unp, struct mbuf *nam) 126 { 127 if (unp != NULL && unp->unp_addr != NULL) { 128 nam->m_len = unp->unp_addr->m_len; 129 memcpy(mtod(nam, caddr_t), mtod(unp->unp_addr, caddr_t), 130 nam->m_len); 131 } else { 132 nam->m_len = sizeof(sun_noname); 133 memcpy(mtod(nam, struct sockaddr *), &sun_noname, 134 nam->m_len); 135 } 136 } 137 138 int 139 uipc_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam, 140 struct mbuf *control, struct proc *p) 141 { 142 struct unpcb *unp = sotounpcb(so); 143 struct unpcb *unp2; 144 struct socket *so2; 145 int error = 0; 146 147 if (req == PRU_CONTROL) 148 return (EOPNOTSUPP); 149 if (req != PRU_SEND && control && control->m_len) { 150 error = EOPNOTSUPP; 151 goto release; 152 } 153 if (unp == NULL) { 154 error = EINVAL; 155 goto release; 156 } 157 158 switch (req) { 159 160 case PRU_BIND: 161 error = unp_bind(unp, nam, p); 162 break; 163 164 case PRU_LISTEN: 165 if (unp->unp_vnode == NULL) 166 error = EINVAL; 167 break; 168 169 case PRU_CONNECT: 170 error = unp_connect(so, nam, p); 171 break; 172 173 case PRU_CONNECT2: 174 error = unp_connect2(so, (struct socket *)nam); 175 if (!error) { 176 unp->unp_connid.uid = p->p_ucred->cr_uid; 177 unp->unp_connid.gid = p->p_ucred->cr_gid; 178 unp->unp_connid.pid = p->p_p->ps_pid; 179 unp->unp_flags |= UNP_FEIDS; 180 unp2 = sotounpcb((struct socket *)nam); 181 unp2->unp_connid.uid = p->p_ucred->cr_uid; 182 unp2->unp_connid.gid = p->p_ucred->cr_gid; 183 unp2->unp_connid.pid = p->p_p->ps_pid; 184 unp2->unp_flags |= UNP_FEIDS; 185 } 186 break; 187 188 case PRU_DISCONNECT: 189 unp_disconnect(unp); 190 break; 191 192 case PRU_ACCEPT: 193 /* 194 * Pass back name of connected socket, 195 * if it was bound and we are still connected 196 * (our peer may have closed already!). 197 */ 198 uipc_setaddr(unp->unp_conn, nam); 199 break; 200 201 case PRU_SHUTDOWN: 202 socantsendmore(so); 203 unp_shutdown(unp); 204 break; 205 206 case PRU_RCVD: 207 switch (so->so_type) { 208 209 case SOCK_DGRAM: 210 panic("uipc 1"); 211 /*NOTREACHED*/ 212 213 case SOCK_STREAM: 214 case SOCK_SEQPACKET: 215 if (unp->unp_conn == NULL) 216 break; 217 so2 = unp->unp_conn->unp_socket; 218 /* 219 * Adjust backpressure on sender 220 * and wakeup any waiting to write. 221 */ 222 so2->so_snd.sb_mbcnt = so->so_rcv.sb_mbcnt; 223 so2->so_snd.sb_cc = so->so_rcv.sb_cc; 224 sowwakeup(so2); 225 break; 226 227 default: 228 panic("uipc 2"); 229 } 230 break; 231 232 case PRU_SEND: 233 if (control) { 234 sounlock(so, SL_LOCKED); 235 error = unp_internalize(control, p); 236 solock(so); 237 if (error) 238 break; 239 } 240 switch (so->so_type) { 241 242 case SOCK_DGRAM: { 243 const struct sockaddr *from; 244 245 if (nam) { 246 if (unp->unp_conn) { 247 error = EISCONN; 248 break; 249 } 250 error = unp_connect(so, nam, p); 251 if (error) 252 break; 253 } else { 254 if (unp->unp_conn == NULL) { 255 error = ENOTCONN; 256 break; 257 } 258 } 259 so2 = unp->unp_conn->unp_socket; 260 if (unp->unp_addr) 261 from = mtod(unp->unp_addr, struct sockaddr *); 262 else 263 from = &sun_noname; 264 if (sbappendaddr(so2, &so2->so_rcv, from, m, control)) { 265 sorwakeup(so2); 266 m = NULL; 267 control = NULL; 268 } else 269 error = ENOBUFS; 270 if (nam) 271 unp_disconnect(unp); 272 break; 273 } 274 275 case SOCK_STREAM: 276 case SOCK_SEQPACKET: 277 if (so->so_state & SS_CANTSENDMORE) { 278 error = EPIPE; 279 break; 280 } 281 if (unp->unp_conn == NULL) { 282 error = ENOTCONN; 283 break; 284 } 285 so2 = unp->unp_conn->unp_socket; 286 /* 287 * Send to paired receive port, and then raise 288 * send buffer counts to maintain backpressure. 289 * Wake up readers. 290 */ 291 if (control) { 292 if (sbappendcontrol(so2, &so2->so_rcv, m, 293 control)) { 294 control = NULL; 295 } else { 296 error = ENOBUFS; 297 break; 298 } 299 } else if (so->so_type == SOCK_SEQPACKET) 300 sbappendrecord(so2, &so2->so_rcv, m); 301 else 302 sbappend(so2, &so2->so_rcv, m); 303 so->so_snd.sb_mbcnt = so2->so_rcv.sb_mbcnt; 304 so->so_snd.sb_cc = so2->so_rcv.sb_cc; 305 if (so2->so_rcv.sb_cc > 0) 306 sorwakeup(so2); 307 m = NULL; 308 break; 309 310 default: 311 panic("uipc 4"); 312 } 313 /* we need to undo unp_internalize in case of errors */ 314 if (control && error) 315 unp_dispose(control); 316 break; 317 318 case PRU_ABORT: 319 unp_detach(unp); 320 /* 321 * As long as `unp_lock' is taken before entering 322 * uipc_usrreq() releasing it here would lead to a 323 * double unlock. 324 */ 325 sofree(so, SL_NOUNLOCK); 326 break; 327 328 case PRU_SENSE: { 329 struct stat *sb = (struct stat *)m; 330 331 sb->st_blksize = so->so_snd.sb_hiwat; 332 sb->st_dev = NODEV; 333 if (unp->unp_ino == 0) 334 unp->unp_ino = unp_ino++; 335 sb->st_atim.tv_sec = 336 sb->st_mtim.tv_sec = 337 sb->st_ctim.tv_sec = unp->unp_ctime.tv_sec; 338 sb->st_atim.tv_nsec = 339 sb->st_mtim.tv_nsec = 340 sb->st_ctim.tv_nsec = unp->unp_ctime.tv_nsec; 341 sb->st_ino = unp->unp_ino; 342 break; 343 } 344 345 case PRU_RCVOOB: 346 case PRU_SENDOOB: 347 error = EOPNOTSUPP; 348 break; 349 350 case PRU_SOCKADDR: 351 uipc_setaddr(unp, nam); 352 break; 353 354 case PRU_PEERADDR: 355 uipc_setaddr(unp->unp_conn, nam); 356 break; 357 358 case PRU_SLOWTIMO: 359 break; 360 361 default: 362 panic("uipc_usrreq"); 363 } 364 release: 365 if (req != PRU_RCVD && req != PRU_RCVOOB && req != PRU_SENSE) { 366 m_freem(control); 367 m_freem(m); 368 } 369 return (error); 370 } 371 372 /* 373 * Both send and receive buffers are allocated PIPSIZ bytes of buffering 374 * for stream sockets, although the total for sender and receiver is 375 * actually only PIPSIZ. 376 * Datagram sockets really use the sendspace as the maximum datagram size, 377 * and don't really want to reserve the sendspace. Their recvspace should 378 * be large enough for at least one max-size datagram plus address. 379 */ 380 #define PIPSIZ 8192 381 u_int unpst_sendspace = PIPSIZ; 382 u_int unpst_recvspace = PIPSIZ; 383 u_int unpsq_sendspace = PIPSIZ; 384 u_int unpsq_recvspace = PIPSIZ; 385 u_int unpdg_sendspace = 2*1024; /* really max datagram size */ 386 u_int unpdg_recvspace = 16*1024; 387 388 const struct sysctl_bounded_args unpstctl_vars[] = { 389 { UNPCTL_RECVSPACE, &unpst_recvspace, 0, SB_MAX }, 390 { UNPCTL_SENDSPACE, &unpst_sendspace, 0, SB_MAX }, 391 }; 392 const struct sysctl_bounded_args unpsqctl_vars[] = { 393 { UNPCTL_RECVSPACE, &unpsq_recvspace, 0, SB_MAX }, 394 { UNPCTL_SENDSPACE, &unpsq_sendspace, 0, SB_MAX }, 395 }; 396 const struct sysctl_bounded_args unpdgctl_vars[] = { 397 { UNPCTL_RECVSPACE, &unpdg_recvspace, 0, SB_MAX }, 398 { UNPCTL_SENDSPACE, &unpdg_sendspace, 0, SB_MAX }, 399 }; 400 401 int 402 uipc_attach(struct socket *so, int proto) 403 { 404 struct unpcb *unp; 405 int error; 406 407 rw_assert_wrlock(&unp_lock); 408 409 if (so->so_pcb) 410 return EISCONN; 411 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { 412 switch (so->so_type) { 413 414 case SOCK_STREAM: 415 error = soreserve(so, unpst_sendspace, unpst_recvspace); 416 break; 417 418 case SOCK_SEQPACKET: 419 error = soreserve(so, unpsq_sendspace, unpsq_recvspace); 420 break; 421 422 case SOCK_DGRAM: 423 error = soreserve(so, unpdg_sendspace, unpdg_recvspace); 424 break; 425 426 default: 427 panic("unp_attach"); 428 } 429 if (error) 430 return (error); 431 } 432 unp = pool_get(&unpcb_pool, PR_NOWAIT|PR_ZERO); 433 if (unp == NULL) 434 return (ENOBUFS); 435 unp->unp_socket = so; 436 so->so_pcb = unp; 437 getnanotime(&unp->unp_ctime); 438 439 /* 440 * Enforce `unp_gc_lock' -> `solock()' lock order. 441 */ 442 /* 443 * We also release the lock on listening socket and on our peer 444 * socket when called from unp_connect(). This is safe. The 445 * listening socket protected by vnode(9) lock. The peer socket 446 * has 'UNP_CONNECTING' flag set. 447 */ 448 sounlock(so, SL_LOCKED); 449 rw_enter_write(&unp_gc_lock); 450 LIST_INSERT_HEAD(&unp_head, unp, unp_link); 451 rw_exit_write(&unp_gc_lock); 452 solock(so); 453 return (0); 454 } 455 456 int 457 uipc_detach(struct socket *so) 458 { 459 struct unpcb *unp = sotounpcb(so); 460 461 if (unp == NULL) 462 return (EINVAL); 463 464 unp_detach(unp); 465 466 return (0); 467 } 468 469 int 470 uipc_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp, 471 size_t newlen) 472 { 473 int *valp = &unp_defer; 474 475 /* All sysctl names at this level are terminal. */ 476 switch (name[0]) { 477 case SOCK_STREAM: 478 if (namelen != 2) 479 return (ENOTDIR); 480 return sysctl_bounded_arr(unpstctl_vars, nitems(unpstctl_vars), 481 name + 1, namelen - 1, oldp, oldlenp, newp, newlen); 482 case SOCK_SEQPACKET: 483 if (namelen != 2) 484 return (ENOTDIR); 485 return sysctl_bounded_arr(unpsqctl_vars, nitems(unpsqctl_vars), 486 name + 1, namelen - 1, oldp, oldlenp, newp, newlen); 487 case SOCK_DGRAM: 488 if (namelen != 2) 489 return (ENOTDIR); 490 return sysctl_bounded_arr(unpdgctl_vars, nitems(unpdgctl_vars), 491 name + 1, namelen - 1, oldp, oldlenp, newp, newlen); 492 case NET_UNIX_INFLIGHT: 493 valp = &unp_rights; 494 /* FALLTHOUGH */ 495 case NET_UNIX_DEFERRED: 496 if (namelen != 1) 497 return (ENOTDIR); 498 return sysctl_rdint(oldp, oldlenp, newp, *valp); 499 default: 500 return (ENOPROTOOPT); 501 } 502 } 503 504 void 505 unp_detach(struct unpcb *unp) 506 { 507 struct socket *so = unp->unp_socket; 508 struct vnode *vp = unp->unp_vnode; 509 510 rw_assert_wrlock(&unp_lock); 511 512 unp->unp_vnode = NULL; 513 514 /* 515 * Enforce `unp_gc_lock' -> `solock()' lock order. 516 * Enforce `i_lock' -> `unp_lock' lock order. 517 */ 518 sounlock(so, SL_LOCKED); 519 520 rw_enter_write(&unp_gc_lock); 521 LIST_REMOVE(unp, unp_link); 522 rw_exit_write(&unp_gc_lock); 523 524 if (vp != NULL) { 525 VOP_LOCK(vp, LK_EXCLUSIVE); 526 vp->v_socket = NULL; 527 528 KERNEL_LOCK(); 529 vput(vp); 530 KERNEL_UNLOCK(); 531 } 532 533 solock(so); 534 535 if (unp->unp_conn) 536 unp_disconnect(unp); 537 while (!SLIST_EMPTY(&unp->unp_refs)) 538 unp_drop(SLIST_FIRST(&unp->unp_refs), ECONNRESET); 539 soisdisconnected(so); 540 so->so_pcb = NULL; 541 m_freem(unp->unp_addr); 542 pool_put(&unpcb_pool, unp); 543 if (unp_rights) 544 task_add(systqmp, &unp_gc_task); 545 } 546 547 int 548 unp_bind(struct unpcb *unp, struct mbuf *nam, struct proc *p) 549 { 550 struct sockaddr_un *soun; 551 struct mbuf *nam2; 552 struct vnode *vp; 553 struct vattr vattr; 554 int error; 555 struct nameidata nd; 556 size_t pathlen; 557 558 if (unp->unp_flags & (UNP_BINDING | UNP_CONNECTING)) 559 return (EINVAL); 560 if (unp->unp_vnode != NULL) 561 return (EINVAL); 562 if ((error = unp_nam2sun(nam, &soun, &pathlen))) 563 return (error); 564 565 unp->unp_flags |= UNP_BINDING; 566 567 /* 568 * Enforce `i_lock' -> `unplock' because fifo subsystem 569 * requires it. The socket can't be closed concurrently 570 * because the file descriptor reference is still held. 571 */ 572 573 sounlock(unp->unp_socket, SL_LOCKED); 574 575 nam2 = m_getclr(M_WAITOK, MT_SONAME); 576 nam2->m_len = sizeof(struct sockaddr_un); 577 memcpy(mtod(nam2, struct sockaddr_un *), soun, 578 offsetof(struct sockaddr_un, sun_path) + pathlen); 579 /* No need to NUL terminate: m_getclr() returns zero'd mbufs. */ 580 581 soun = mtod(nam2, struct sockaddr_un *); 582 583 /* Fixup sun_len to keep it in sync with m_len. */ 584 soun->sun_len = nam2->m_len; 585 586 NDINIT(&nd, CREATE, NOFOLLOW | LOCKPARENT, UIO_SYSSPACE, 587 soun->sun_path, p); 588 nd.ni_pledge = PLEDGE_UNIX; 589 590 KERNEL_LOCK(); 591 /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */ 592 error = namei(&nd); 593 if (error != 0) { 594 m_freem(nam2); 595 solock(unp->unp_socket); 596 goto out; 597 } 598 vp = nd.ni_vp; 599 if (vp != NULL) { 600 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 601 if (nd.ni_dvp == vp) 602 vrele(nd.ni_dvp); 603 else 604 vput(nd.ni_dvp); 605 vrele(vp); 606 m_freem(nam2); 607 error = EADDRINUSE; 608 solock(unp->unp_socket); 609 goto out; 610 } 611 VATTR_NULL(&vattr); 612 vattr.va_type = VSOCK; 613 vattr.va_mode = ACCESSPERMS &~ p->p_fd->fd_cmask; 614 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 615 vput(nd.ni_dvp); 616 if (error) { 617 m_freem(nam2); 618 solock(unp->unp_socket); 619 goto out; 620 } 621 solock(unp->unp_socket); 622 unp->unp_addr = nam2; 623 vp = nd.ni_vp; 624 vp->v_socket = unp->unp_socket; 625 unp->unp_vnode = vp; 626 unp->unp_connid.uid = p->p_ucred->cr_uid; 627 unp->unp_connid.gid = p->p_ucred->cr_gid; 628 unp->unp_connid.pid = p->p_p->ps_pid; 629 unp->unp_flags |= UNP_FEIDSBIND; 630 VOP_UNLOCK(vp); 631 out: 632 KERNEL_UNLOCK(); 633 unp->unp_flags &= ~UNP_BINDING; 634 635 return (error); 636 } 637 638 int 639 unp_connect(struct socket *so, struct mbuf *nam, struct proc *p) 640 { 641 struct sockaddr_un *soun; 642 struct vnode *vp; 643 struct socket *so2, *so3; 644 struct unpcb *unp, *unp2, *unp3; 645 struct nameidata nd; 646 int error; 647 648 unp = sotounpcb(so); 649 if (unp->unp_flags & (UNP_BINDING | UNP_CONNECTING)) 650 return (EISCONN); 651 if ((error = unp_nam2sun(nam, &soun, NULL))) 652 return (error); 653 654 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, soun->sun_path, p); 655 nd.ni_pledge = PLEDGE_UNIX; 656 657 unp->unp_flags |= UNP_CONNECTING; 658 659 /* 660 * Enforce `i_lock' -> `unplock' because fifo subsystem 661 * requires it. The socket can't be closed concurrently 662 * because the file descriptor reference is still held. 663 */ 664 665 sounlock(so, SL_LOCKED); 666 667 KERNEL_LOCK(); 668 error = namei(&nd); 669 if (error != 0) 670 goto unlock; 671 vp = nd.ni_vp; 672 if (vp->v_type != VSOCK) { 673 error = ENOTSOCK; 674 goto put; 675 } 676 if ((error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p)) != 0) 677 goto put; 678 solock(so); 679 so2 = vp->v_socket; 680 if (so2 == NULL) { 681 error = ECONNREFUSED; 682 goto put_locked; 683 } 684 if (so->so_type != so2->so_type) { 685 error = EPROTOTYPE; 686 goto put_locked; 687 } 688 if (so->so_proto->pr_flags & PR_CONNREQUIRED) { 689 if ((so2->so_options & SO_ACCEPTCONN) == 0 || 690 (so3 = sonewconn(so2, 0)) == NULL) { 691 error = ECONNREFUSED; 692 goto put_locked; 693 } 694 unp2 = sotounpcb(so2); 695 unp3 = sotounpcb(so3); 696 if (unp2->unp_addr) 697 unp3->unp_addr = 698 m_copym(unp2->unp_addr, 0, M_COPYALL, M_NOWAIT); 699 unp3->unp_connid.uid = p->p_ucred->cr_uid; 700 unp3->unp_connid.gid = p->p_ucred->cr_gid; 701 unp3->unp_connid.pid = p->p_p->ps_pid; 702 unp3->unp_flags |= UNP_FEIDS; 703 so2 = so3; 704 if (unp2->unp_flags & UNP_FEIDSBIND) { 705 unp->unp_connid = unp2->unp_connid; 706 unp->unp_flags |= UNP_FEIDS; 707 } 708 } 709 error = unp_connect2(so, so2); 710 put_locked: 711 sounlock(so, SL_LOCKED); 712 put: 713 vput(vp); 714 unlock: 715 KERNEL_UNLOCK(); 716 solock(so); 717 unp->unp_flags &= ~UNP_CONNECTING; 718 719 /* 720 * The peer socket could be closed by concurrent thread 721 * when `so' and `vp' are unlocked. 722 */ 723 if (error == 0 && unp->unp_conn == NULL) 724 error = ECONNREFUSED; 725 726 return (error); 727 } 728 729 int 730 unp_connect2(struct socket *so, struct socket *so2) 731 { 732 struct unpcb *unp = sotounpcb(so); 733 struct unpcb *unp2; 734 735 rw_assert_wrlock(&unp_lock); 736 737 if (so2->so_type != so->so_type) 738 return (EPROTOTYPE); 739 unp2 = sotounpcb(so2); 740 unp->unp_conn = unp2; 741 switch (so->so_type) { 742 743 case SOCK_DGRAM: 744 SLIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_nextref); 745 soisconnected(so); 746 break; 747 748 case SOCK_STREAM: 749 case SOCK_SEQPACKET: 750 unp2->unp_conn = unp; 751 soisconnected(so); 752 soisconnected(so2); 753 break; 754 755 default: 756 panic("unp_connect2"); 757 } 758 return (0); 759 } 760 761 void 762 unp_disconnect(struct unpcb *unp) 763 { 764 struct unpcb *unp2 = unp->unp_conn; 765 766 if (unp2 == NULL) 767 return; 768 unp->unp_conn = NULL; 769 switch (unp->unp_socket->so_type) { 770 771 case SOCK_DGRAM: 772 SLIST_REMOVE(&unp2->unp_refs, unp, unpcb, unp_nextref); 773 unp->unp_socket->so_state &= ~SS_ISCONNECTED; 774 break; 775 776 case SOCK_STREAM: 777 case SOCK_SEQPACKET: 778 unp->unp_socket->so_snd.sb_mbcnt = 0; 779 unp->unp_socket->so_snd.sb_cc = 0; 780 soisdisconnected(unp->unp_socket); 781 unp2->unp_conn = NULL; 782 unp2->unp_socket->so_snd.sb_mbcnt = 0; 783 unp2->unp_socket->so_snd.sb_cc = 0; 784 soisdisconnected(unp2->unp_socket); 785 break; 786 } 787 } 788 789 void 790 unp_shutdown(struct unpcb *unp) 791 { 792 struct socket *so; 793 794 switch (unp->unp_socket->so_type) { 795 case SOCK_STREAM: 796 case SOCK_SEQPACKET: 797 if (unp->unp_conn && (so = unp->unp_conn->unp_socket)) 798 socantrcvmore(so); 799 break; 800 default: 801 break; 802 } 803 } 804 805 void 806 unp_drop(struct unpcb *unp, int errno) 807 { 808 struct socket *so = unp->unp_socket; 809 810 rw_assert_wrlock(&unp_lock); 811 812 so->so_error = errno; 813 unp_disconnect(unp); 814 } 815 816 #ifdef notdef 817 unp_drain(void) 818 { 819 820 } 821 #endif 822 823 static struct unpcb * 824 fptounp(struct file *fp) 825 { 826 struct socket *so; 827 828 if (fp->f_type != DTYPE_SOCKET) 829 return (NULL); 830 if ((so = fp->f_data) == NULL) 831 return (NULL); 832 if (so->so_proto->pr_domain != &unixdomain) 833 return (NULL); 834 return (sotounpcb(so)); 835 } 836 837 int 838 unp_externalize(struct mbuf *rights, socklen_t controllen, int flags) 839 { 840 struct proc *p = curproc; /* XXX */ 841 struct cmsghdr *cm = mtod(rights, struct cmsghdr *); 842 struct filedesc *fdp = p->p_fd; 843 int i, *fds = NULL; 844 struct fdpass *rp; 845 struct file *fp; 846 int nfds, error = 0; 847 848 /* 849 * This code only works because SCM_RIGHTS is the only supported 850 * control message type on unix sockets. Enforce this here. 851 */ 852 if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET) 853 return EINVAL; 854 855 nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) / 856 sizeof(struct fdpass); 857 if (controllen < CMSG_ALIGN(sizeof(struct cmsghdr))) 858 controllen = 0; 859 else 860 controllen -= CMSG_ALIGN(sizeof(struct cmsghdr)); 861 if (nfds > controllen / sizeof(int)) { 862 error = EMSGSIZE; 863 goto out; 864 } 865 866 /* Make sure the recipient should be able to see the descriptors.. */ 867 rp = (struct fdpass *)CMSG_DATA(cm); 868 869 /* fdp->fd_rdir requires KERNEL_LOCK() */ 870 KERNEL_LOCK(); 871 872 for (i = 0; i < nfds; i++) { 873 fp = rp->fp; 874 rp++; 875 error = pledge_recvfd(p, fp); 876 if (error) 877 break; 878 879 /* 880 * No to block devices. If passing a directory, 881 * make sure that it is underneath the root. 882 */ 883 if (fdp->fd_rdir != NULL && fp->f_type == DTYPE_VNODE) { 884 struct vnode *vp = (struct vnode *)fp->f_data; 885 886 if (vp->v_type == VBLK || 887 (vp->v_type == VDIR && 888 !vn_isunder(vp, fdp->fd_rdir, p))) { 889 error = EPERM; 890 break; 891 } 892 } 893 } 894 895 KERNEL_UNLOCK(); 896 897 if (error) 898 goto out; 899 900 fds = mallocarray(nfds, sizeof(int), M_TEMP, M_WAITOK); 901 902 fdplock(fdp); 903 restart: 904 /* 905 * First loop -- allocate file descriptor table slots for the 906 * new descriptors. 907 */ 908 rp = ((struct fdpass *)CMSG_DATA(cm)); 909 for (i = 0; i < nfds; i++) { 910 if ((error = fdalloc(p, 0, &fds[i])) != 0) { 911 /* 912 * Back out what we've done so far. 913 */ 914 for (--i; i >= 0; i--) 915 fdremove(fdp, fds[i]); 916 917 if (error == ENOSPC) { 918 fdexpand(p); 919 goto restart; 920 } 921 922 fdpunlock(fdp); 923 924 /* 925 * This is the error that has historically 926 * been returned, and some callers may 927 * expect it. 928 */ 929 930 error = EMSGSIZE; 931 goto out; 932 } 933 934 /* 935 * Make the slot reference the descriptor so that 936 * fdalloc() works properly.. We finalize it all 937 * in the loop below. 938 */ 939 mtx_enter(&fdp->fd_fplock); 940 KASSERT(fdp->fd_ofiles[fds[i]] == NULL); 941 fdp->fd_ofiles[fds[i]] = rp->fp; 942 mtx_leave(&fdp->fd_fplock); 943 944 fdp->fd_ofileflags[fds[i]] = (rp->flags & UF_PLEDGED); 945 if (flags & MSG_CMSG_CLOEXEC) 946 fdp->fd_ofileflags[fds[i]] |= UF_EXCLOSE; 947 948 rp++; 949 } 950 fdpunlock(fdp); 951 952 /* 953 * Now that adding them has succeeded, update all of the 954 * descriptor passing state. 955 */ 956 rp = (struct fdpass *)CMSG_DATA(cm); 957 958 for (i = 0; i < nfds; i++) { 959 struct unpcb *unp; 960 961 fp = rp->fp; 962 rp++; 963 if ((unp = fptounp(fp)) != NULL) { 964 rw_enter_write(&unp_gc_lock); 965 unp->unp_msgcount--; 966 rw_exit_write(&unp_gc_lock); 967 } 968 } 969 970 mtx_enter(&unp_rights_mtx); 971 unp_rights -= nfds; 972 mtx_leave(&unp_rights_mtx); 973 974 /* 975 * Copy temporary array to message and adjust length, in case of 976 * transition from large struct file pointers to ints. 977 */ 978 memcpy(CMSG_DATA(cm), fds, nfds * sizeof(int)); 979 cm->cmsg_len = CMSG_LEN(nfds * sizeof(int)); 980 rights->m_len = CMSG_LEN(nfds * sizeof(int)); 981 out: 982 if (fds != NULL) 983 free(fds, M_TEMP, nfds * sizeof(int)); 984 985 if (error) { 986 if (nfds > 0) { 987 /* 988 * No lock required. We are the only `cm' holder. 989 */ 990 rp = ((struct fdpass *)CMSG_DATA(cm)); 991 unp_discard(rp, nfds); 992 } 993 } 994 995 return (error); 996 } 997 998 int 999 unp_internalize(struct mbuf *control, struct proc *p) 1000 { 1001 struct filedesc *fdp = p->p_fd; 1002 struct cmsghdr *cm = mtod(control, struct cmsghdr *); 1003 struct fdpass *rp; 1004 struct file *fp; 1005 struct unpcb *unp; 1006 int i, error; 1007 int nfds, *ip, fd, neededspace; 1008 1009 /* 1010 * Check for two potential msg_controllen values because 1011 * IETF stuck their nose in a place it does not belong. 1012 */ 1013 if (control->m_len < CMSG_LEN(0) || cm->cmsg_len < CMSG_LEN(0)) 1014 return (EINVAL); 1015 if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET || 1016 !(cm->cmsg_len == control->m_len || 1017 control->m_len == CMSG_ALIGN(cm->cmsg_len))) 1018 return (EINVAL); 1019 nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) / sizeof (int); 1020 1021 mtx_enter(&unp_rights_mtx); 1022 if (unp_rights + nfds > maxfiles / 10) { 1023 mtx_leave(&unp_rights_mtx); 1024 return (EMFILE); 1025 } 1026 unp_rights += nfds; 1027 mtx_leave(&unp_rights_mtx); 1028 1029 /* Make sure we have room for the struct file pointers */ 1030 morespace: 1031 neededspace = CMSG_SPACE(nfds * sizeof(struct fdpass)) - 1032 control->m_len; 1033 if (neededspace > m_trailingspace(control)) { 1034 char *tmp; 1035 /* if we already have a cluster, the message is just too big */ 1036 if (control->m_flags & M_EXT) { 1037 error = E2BIG; 1038 goto nospace; 1039 } 1040 1041 /* copy cmsg data temporarily out of the mbuf */ 1042 tmp = malloc(control->m_len, M_TEMP, M_WAITOK); 1043 memcpy(tmp, mtod(control, caddr_t), control->m_len); 1044 1045 /* allocate a cluster and try again */ 1046 MCLGET(control, M_WAIT); 1047 if ((control->m_flags & M_EXT) == 0) { 1048 free(tmp, M_TEMP, control->m_len); 1049 error = ENOBUFS; /* allocation failed */ 1050 goto nospace; 1051 } 1052 1053 /* copy the data back into the cluster */ 1054 cm = mtod(control, struct cmsghdr *); 1055 memcpy(cm, tmp, control->m_len); 1056 free(tmp, M_TEMP, control->m_len); 1057 goto morespace; 1058 } 1059 1060 /* adjust message & mbuf to note amount of space actually used. */ 1061 cm->cmsg_len = CMSG_LEN(nfds * sizeof(struct fdpass)); 1062 control->m_len = CMSG_SPACE(nfds * sizeof(struct fdpass)); 1063 1064 ip = ((int *)CMSG_DATA(cm)) + nfds - 1; 1065 rp = ((struct fdpass *)CMSG_DATA(cm)) + nfds - 1; 1066 fdplock(fdp); 1067 for (i = 0; i < nfds; i++) { 1068 memcpy(&fd, ip, sizeof fd); 1069 ip--; 1070 if ((fp = fd_getfile(fdp, fd)) == NULL) { 1071 error = EBADF; 1072 goto fail; 1073 } 1074 if (fp->f_count >= FDUP_MAX_COUNT) { 1075 error = EDEADLK; 1076 goto fail; 1077 } 1078 error = pledge_sendfd(p, fp); 1079 if (error) 1080 goto fail; 1081 1082 /* kqueue descriptors cannot be copied */ 1083 if (fp->f_type == DTYPE_KQUEUE) { 1084 error = EINVAL; 1085 goto fail; 1086 } 1087 rp->fp = fp; 1088 rp->flags = fdp->fd_ofileflags[fd] & UF_PLEDGED; 1089 rp--; 1090 if ((unp = fptounp(fp)) != NULL) { 1091 rw_enter_write(&unp_gc_lock); 1092 unp->unp_msgcount++; 1093 unp->unp_file = fp; 1094 rw_exit_write(&unp_gc_lock); 1095 } 1096 } 1097 fdpunlock(fdp); 1098 return (0); 1099 fail: 1100 fdpunlock(fdp); 1101 if (fp != NULL) 1102 FRELE(fp, p); 1103 /* Back out what we just did. */ 1104 for ( ; i > 0; i--) { 1105 rp++; 1106 fp = rp->fp; 1107 if ((unp = fptounp(fp)) != NULL) { 1108 rw_enter_write(&unp_gc_lock); 1109 unp->unp_msgcount--; 1110 rw_exit_write(&unp_gc_lock); 1111 } 1112 FRELE(fp, p); 1113 } 1114 1115 nospace: 1116 mtx_enter(&unp_rights_mtx); 1117 unp_rights -= nfds; 1118 mtx_leave(&unp_rights_mtx); 1119 1120 return (error); 1121 } 1122 1123 void 1124 unp_gc(void *arg __unused) 1125 { 1126 struct unp_deferral *defer; 1127 struct file *fp; 1128 struct socket *so; 1129 struct unpcb *unp; 1130 int nunref, i; 1131 1132 rw_enter_write(&unp_gc_lock); 1133 if (unp_gcing) 1134 goto unlock; 1135 unp_gcing = 1; 1136 rw_exit_write(&unp_gc_lock); 1137 1138 rw_enter_write(&unp_df_lock); 1139 /* close any fds on the deferred list */ 1140 while ((defer = SLIST_FIRST(&unp_deferred)) != NULL) { 1141 SLIST_REMOVE_HEAD(&unp_deferred, ud_link); 1142 rw_exit_write(&unp_df_lock); 1143 for (i = 0; i < defer->ud_n; i++) { 1144 fp = defer->ud_fp[i].fp; 1145 if (fp == NULL) 1146 continue; 1147 if ((unp = fptounp(fp)) != NULL) { 1148 rw_enter_write(&unp_gc_lock); 1149 unp->unp_msgcount--; 1150 rw_exit_write(&unp_gc_lock); 1151 } 1152 mtx_enter(&unp_rights_mtx); 1153 unp_rights--; 1154 mtx_leave(&unp_rights_mtx); 1155 /* closef() expects a refcount of 2 */ 1156 FREF(fp); 1157 (void) closef(fp, NULL); 1158 } 1159 free(defer, M_TEMP, sizeof(*defer) + 1160 sizeof(struct fdpass) * defer->ud_n); 1161 rw_enter_write(&unp_df_lock); 1162 } 1163 rw_exit_write(&unp_df_lock); 1164 1165 rw_enter_write(&unp_gc_lock); 1166 unp_defer = 0; 1167 LIST_FOREACH(unp, &unp_head, unp_link) 1168 unp->unp_gcflags = 0; 1169 do { 1170 nunref = 0; 1171 LIST_FOREACH(unp, &unp_head, unp_link) { 1172 fp = unp->unp_file; 1173 if (unp->unp_gcflags & UNP_GCDEFER) { 1174 /* 1175 * This socket is referenced by another 1176 * socket which is known to be live, 1177 * so it's certainly live. 1178 */ 1179 unp->unp_gcflags &= ~UNP_GCDEFER; 1180 unp_defer--; 1181 } else if (unp->unp_gcflags & UNP_GCMARK) { 1182 /* marked as live in previous pass */ 1183 continue; 1184 } else if (fp == NULL) { 1185 /* not being passed, so can't be in loop */ 1186 } else if (fp->f_count == 0) { 1187 /* 1188 * Already being closed, let normal close 1189 * path take its course 1190 */ 1191 } else { 1192 /* 1193 * Unreferenced by other sockets so far, 1194 * so if all the references (f_count) are 1195 * from passing (unp_msgcount) then this 1196 * socket is prospectively dead 1197 */ 1198 if (fp->f_count == unp->unp_msgcount) { 1199 nunref++; 1200 unp->unp_gcflags |= UNP_GCDEAD; 1201 continue; 1202 } 1203 } 1204 1205 /* 1206 * This is the first time we've seen this socket on 1207 * the mark pass and known it has a live reference, 1208 * so mark it, then scan its receive buffer for 1209 * sockets and note them as deferred (== referenced, 1210 * but not yet marked). 1211 */ 1212 unp->unp_gcflags |= UNP_GCMARK; 1213 1214 so = unp->unp_socket; 1215 solock(so); 1216 unp_scan(so->so_rcv.sb_mb, unp_mark); 1217 sounlock(so, SL_LOCKED); 1218 } 1219 } while (unp_defer); 1220 1221 /* 1222 * If there are any unreferenced sockets, then for each dispose 1223 * of files in its receive buffer and then close it. 1224 */ 1225 if (nunref) { 1226 LIST_FOREACH(unp, &unp_head, unp_link) { 1227 if (unp->unp_gcflags & UNP_GCDEAD) { 1228 /* 1229 * This socket could still be connected 1230 * and if so it's `so_rcv' is still 1231 * accessible by concurrent PRU_SEND 1232 * thread. 1233 */ 1234 so = unp->unp_socket; 1235 solock(so); 1236 unp_scan(so->so_rcv.sb_mb, unp_discard); 1237 sounlock(so, SL_LOCKED); 1238 } 1239 } 1240 } 1241 unp_gcing = 0; 1242 unlock: 1243 rw_exit_write(&unp_gc_lock); 1244 } 1245 1246 void 1247 unp_dispose(struct mbuf *m) 1248 { 1249 1250 if (m) 1251 unp_scan(m, unp_discard); 1252 } 1253 1254 void 1255 unp_scan(struct mbuf *m0, void (*op)(struct fdpass *, int)) 1256 { 1257 struct mbuf *m; 1258 struct fdpass *rp; 1259 struct cmsghdr *cm; 1260 int qfds; 1261 1262 while (m0) { 1263 for (m = m0; m; m = m->m_next) { 1264 if (m->m_type == MT_CONTROL && 1265 m->m_len >= sizeof(*cm)) { 1266 cm = mtod(m, struct cmsghdr *); 1267 if (cm->cmsg_level != SOL_SOCKET || 1268 cm->cmsg_type != SCM_RIGHTS) 1269 continue; 1270 qfds = (cm->cmsg_len - CMSG_ALIGN(sizeof *cm)) 1271 / sizeof(struct fdpass); 1272 if (qfds > 0) { 1273 rp = (struct fdpass *)CMSG_DATA(cm); 1274 op(rp, qfds); 1275 } 1276 break; /* XXX, but saves time */ 1277 } 1278 } 1279 m0 = m0->m_nextpkt; 1280 } 1281 } 1282 1283 void 1284 unp_mark(struct fdpass *rp, int nfds) 1285 { 1286 struct unpcb *unp; 1287 int i; 1288 1289 rw_assert_wrlock(&unp_gc_lock); 1290 1291 for (i = 0; i < nfds; i++) { 1292 if (rp[i].fp == NULL) 1293 continue; 1294 1295 unp = fptounp(rp[i].fp); 1296 if (unp == NULL) 1297 continue; 1298 1299 if (unp->unp_gcflags & (UNP_GCMARK|UNP_GCDEFER)) 1300 continue; 1301 1302 unp_defer++; 1303 unp->unp_gcflags |= UNP_GCDEFER; 1304 unp->unp_gcflags &= ~UNP_GCDEAD; 1305 } 1306 } 1307 1308 void 1309 unp_discard(struct fdpass *rp, int nfds) 1310 { 1311 struct unp_deferral *defer; 1312 1313 /* copy the file pointers to a deferral structure */ 1314 defer = malloc(sizeof(*defer) + sizeof(*rp) * nfds, M_TEMP, M_WAITOK); 1315 defer->ud_n = nfds; 1316 memcpy(&defer->ud_fp[0], rp, sizeof(*rp) * nfds); 1317 memset(rp, 0, sizeof(*rp) * nfds); 1318 1319 rw_enter_write(&unp_df_lock); 1320 SLIST_INSERT_HEAD(&unp_deferred, defer, ud_link); 1321 rw_exit_write(&unp_df_lock); 1322 1323 task_add(systqmp, &unp_gc_task); 1324 } 1325 1326 int 1327 unp_nam2sun(struct mbuf *nam, struct sockaddr_un **sun, size_t *pathlen) 1328 { 1329 struct sockaddr *sa = mtod(nam, struct sockaddr *); 1330 size_t size, len; 1331 1332 if (nam->m_len < offsetof(struct sockaddr, sa_data)) 1333 return EINVAL; 1334 if (sa->sa_family != AF_UNIX) 1335 return EAFNOSUPPORT; 1336 if (sa->sa_len != nam->m_len) 1337 return EINVAL; 1338 if (sa->sa_len > sizeof(struct sockaddr_un)) 1339 return EINVAL; 1340 *sun = (struct sockaddr_un *)sa; 1341 1342 /* ensure that sun_path is NUL terminated and fits */ 1343 size = (*sun)->sun_len - offsetof(struct sockaddr_un, sun_path); 1344 len = strnlen((*sun)->sun_path, size); 1345 if (len == sizeof((*sun)->sun_path)) 1346 return EINVAL; 1347 if (len == size) { 1348 if (m_trailingspace(nam) == 0) 1349 return EINVAL; 1350 nam->m_len++; 1351 (*sun)->sun_len++; 1352 (*sun)->sun_path[len] = '\0'; 1353 } 1354 if (pathlen != NULL) 1355 *pathlen = len; 1356 1357 return 0; 1358 } 1359