1 /* $NetBSD: uipc_usrreq.c,v 1.56 2002/11/25 08:32:00 itojun Exp $ */ 2 3 /*- 4 * Copyright (c) 1998, 2000 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the NetBSD 22 * Foundation, Inc. and its contributors. 23 * 4. Neither the name of The NetBSD Foundation nor the names of its 24 * contributors may be used to endorse or promote products derived 25 * from this software without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 * POSSIBILITY OF SUCH DAMAGE. 38 */ 39 40 /* 41 * Copyright (c) 1997 Christopher G. Demetriou. All rights reserved. 42 * Copyright (c) 1982, 1986, 1989, 1991, 1993 43 * The Regents of the University of California. All rights reserved. 44 * 45 * Redistribution and use in source and binary forms, with or without 46 * modification, are permitted provided that the following conditions 47 * are met: 48 * 1. Redistributions of source code must retain the above copyright 49 * notice, this list of conditions and the following disclaimer. 50 * 2. Redistributions in binary form must reproduce the above copyright 51 * notice, this list of conditions and the following disclaimer in the 52 * documentation and/or other materials provided with the distribution. 53 * 3. All advertising materials mentioning features or use of this software 54 * must display the following acknowledgement: 55 * This product includes software developed by the University of 56 * California, Berkeley and its contributors. 57 * 4. Neither the name of the University nor the names of its contributors 58 * may be used to endorse or promote products derived from this software 59 * without specific prior written permission. 60 * 61 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 62 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 63 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 64 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 65 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 66 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 67 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 68 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 69 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 70 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 71 * SUCH DAMAGE. 72 * 73 * @(#)uipc_usrreq.c 8.9 (Berkeley) 5/14/95 74 */ 75 76 #include <sys/cdefs.h> 77 __KERNEL_RCSID(0, "$NetBSD: uipc_usrreq.c,v 1.56 2002/11/25 08:32:00 itojun Exp $"); 78 79 #include <sys/param.h> 80 #include <sys/systm.h> 81 #include <sys/proc.h> 82 #include <sys/filedesc.h> 83 #include <sys/domain.h> 84 #include <sys/protosw.h> 85 #include <sys/socket.h> 86 #include <sys/socketvar.h> 87 #include <sys/unpcb.h> 88 #include <sys/un.h> 89 #include <sys/namei.h> 90 #include <sys/vnode.h> 91 #include <sys/file.h> 92 #include <sys/stat.h> 93 #include <sys/mbuf.h> 94 95 /* 96 * Unix communications domain. 97 * 98 * TODO: 99 * SEQPACKET, RDM 100 * rethink name space problems 101 * need a proper out-of-band 102 */ 103 struct sockaddr_un sun_noname = { sizeof(sun_noname), AF_LOCAL }; 104 ino_t unp_ino; /* prototype for fake inode numbers */ 105 106 struct mbuf *unp_addsockcred __P((struct proc *, struct mbuf *)); 107 108 int 109 unp_output(m, control, unp, p) 110 struct mbuf *m, *control; 111 struct unpcb *unp; 112 struct proc *p; 113 { 114 struct socket *so2; 115 struct sockaddr_un *sun; 116 117 so2 = unp->unp_conn->unp_socket; 118 if (unp->unp_addr) 119 sun = unp->unp_addr; 120 else 121 sun = &sun_noname; 122 if (unp->unp_conn->unp_flags & UNP_WANTCRED) 123 control = unp_addsockcred(p, control); 124 if (sbappendaddr(&so2->so_rcv, (struct sockaddr *)sun, m, 125 control) == 0) { 126 m_freem(control); 127 m_freem(m); 128 return (EINVAL); 129 } else { 130 sorwakeup(so2); 131 return (0); 132 } 133 } 134 135 void 136 unp_setsockaddr(unp, nam) 137 struct unpcb *unp; 138 struct mbuf *nam; 139 { 140 struct sockaddr_un *sun; 141 142 if (unp->unp_addr) 143 sun = unp->unp_addr; 144 else 145 sun = &sun_noname; 146 nam->m_len = sun->sun_len; 147 if (nam->m_len > MLEN) 148 MEXTMALLOC(nam, nam->m_len, M_WAITOK); 149 memcpy(mtod(nam, caddr_t), sun, (size_t)nam->m_len); 150 } 151 152 void 153 unp_setpeeraddr(unp, nam) 154 struct unpcb *unp; 155 struct mbuf *nam; 156 { 157 struct sockaddr_un *sun; 158 159 if (unp->unp_conn && unp->unp_conn->unp_addr) 160 sun = unp->unp_conn->unp_addr; 161 else 162 sun = &sun_noname; 163 nam->m_len = sun->sun_len; 164 if (nam->m_len > MLEN) 165 MEXTMALLOC(nam, nam->m_len, M_WAITOK); 166 memcpy(mtod(nam, caddr_t), sun, (size_t)nam->m_len); 167 } 168 169 /*ARGSUSED*/ 170 int 171 uipc_usrreq(so, req, m, nam, control, p) 172 struct socket *so; 173 int req; 174 struct mbuf *m, *nam, *control; 175 struct proc *p; 176 { 177 struct unpcb *unp = sotounpcb(so); 178 struct socket *so2; 179 int error = 0; 180 181 if (req == PRU_CONTROL) 182 return (EOPNOTSUPP); 183 184 #ifdef DIAGNOSTIC 185 if (req != PRU_SEND && req != PRU_SENDOOB && control) 186 panic("uipc_usrreq: unexpected control mbuf"); 187 #endif 188 if (unp == 0 && req != PRU_ATTACH) { 189 error = EINVAL; 190 goto release; 191 } 192 193 switch (req) { 194 195 case PRU_ATTACH: 196 if (unp != 0) { 197 error = EISCONN; 198 break; 199 } 200 error = unp_attach(so); 201 break; 202 203 case PRU_DETACH: 204 unp_detach(unp); 205 break; 206 207 case PRU_BIND: 208 error = unp_bind(unp, nam, p); 209 break; 210 211 case PRU_LISTEN: 212 if (unp->unp_vnode == 0) 213 error = EINVAL; 214 break; 215 216 case PRU_CONNECT: 217 error = unp_connect(so, nam, p); 218 break; 219 220 case PRU_CONNECT2: 221 error = unp_connect2(so, (struct socket *)nam); 222 break; 223 224 case PRU_DISCONNECT: 225 unp_disconnect(unp); 226 break; 227 228 case PRU_ACCEPT: 229 unp_setpeeraddr(unp, nam); 230 break; 231 232 case PRU_SHUTDOWN: 233 socantsendmore(so); 234 unp_shutdown(unp); 235 break; 236 237 case PRU_RCVD: 238 switch (so->so_type) { 239 240 case SOCK_DGRAM: 241 panic("uipc 1"); 242 /*NOTREACHED*/ 243 244 case SOCK_STREAM: 245 #define rcv (&so->so_rcv) 246 #define snd (&so2->so_snd) 247 if (unp->unp_conn == 0) 248 break; 249 so2 = unp->unp_conn->unp_socket; 250 /* 251 * Adjust backpressure on sender 252 * and wakeup any waiting to write. 253 */ 254 snd->sb_mbmax += unp->unp_mbcnt - rcv->sb_mbcnt; 255 unp->unp_mbcnt = rcv->sb_mbcnt; 256 snd->sb_hiwat += unp->unp_cc - rcv->sb_cc; 257 unp->unp_cc = rcv->sb_cc; 258 sowwakeup(so2); 259 #undef snd 260 #undef rcv 261 break; 262 263 default: 264 panic("uipc 2"); 265 } 266 break; 267 268 case PRU_SEND: 269 /* 270 * Note: unp_internalize() rejects any control message 271 * other than SCM_RIGHTS, and only allows one. This 272 * has the side-effect of preventing a caller from 273 * forging SCM_CREDS. 274 */ 275 if (control && (error = unp_internalize(control, p))) 276 break; 277 switch (so->so_type) { 278 279 case SOCK_DGRAM: { 280 if (nam) { 281 if ((so->so_state & SS_ISCONNECTED) != 0) { 282 error = EISCONN; 283 goto die; 284 } 285 error = unp_connect(so, nam, p); 286 if (error) { 287 die: 288 m_freem(control); 289 m_freem(m); 290 break; 291 } 292 } else { 293 if ((so->so_state & SS_ISCONNECTED) == 0) { 294 error = ENOTCONN; 295 goto die; 296 } 297 } 298 error = unp_output(m, control, unp, p); 299 if (nam) 300 unp_disconnect(unp); 301 break; 302 } 303 304 case SOCK_STREAM: 305 #define rcv (&so2->so_rcv) 306 #define snd (&so->so_snd) 307 if (unp->unp_conn == 0) 308 panic("uipc 3"); 309 so2 = unp->unp_conn->unp_socket; 310 if (unp->unp_conn->unp_flags & UNP_WANTCRED) { 311 /* 312 * Credentials are passed only once on 313 * SOCK_STREAM. 314 */ 315 unp->unp_conn->unp_flags &= ~UNP_WANTCRED; 316 control = unp_addsockcred(p, control); 317 } 318 /* 319 * Send to paired receive port, and then reduce 320 * send buffer hiwater marks to maintain backpressure. 321 * Wake up readers. 322 */ 323 if (control) { 324 if (sbappendcontrol(rcv, m, control) == 0) 325 m_freem(control); 326 } else 327 sbappend(rcv, m); 328 snd->sb_mbmax -= 329 rcv->sb_mbcnt - unp->unp_conn->unp_mbcnt; 330 unp->unp_conn->unp_mbcnt = rcv->sb_mbcnt; 331 snd->sb_hiwat -= rcv->sb_cc - unp->unp_conn->unp_cc; 332 unp->unp_conn->unp_cc = rcv->sb_cc; 333 sorwakeup(so2); 334 #undef snd 335 #undef rcv 336 break; 337 338 default: 339 panic("uipc 4"); 340 } 341 break; 342 343 case PRU_ABORT: 344 unp_drop(unp, ECONNABORTED); 345 346 #ifdef DIAGNOSTIC 347 if (so->so_pcb == 0) 348 panic("uipc 5: drop killed pcb"); 349 #endif 350 unp_detach(unp); 351 break; 352 353 case PRU_SENSE: 354 ((struct stat *) m)->st_blksize = so->so_snd.sb_hiwat; 355 if (so->so_type == SOCK_STREAM && unp->unp_conn != 0) { 356 so2 = unp->unp_conn->unp_socket; 357 ((struct stat *) m)->st_blksize += so2->so_rcv.sb_cc; 358 } 359 ((struct stat *) m)->st_dev = NODEV; 360 if (unp->unp_ino == 0) 361 unp->unp_ino = unp_ino++; 362 ((struct stat *) m)->st_atimespec = 363 ((struct stat *) m)->st_mtimespec = 364 ((struct stat *) m)->st_ctimespec = unp->unp_ctime; 365 ((struct stat *) m)->st_ino = unp->unp_ino; 366 return (0); 367 368 case PRU_RCVOOB: 369 error = EOPNOTSUPP; 370 break; 371 372 case PRU_SENDOOB: 373 m_freem(control); 374 m_freem(m); 375 error = EOPNOTSUPP; 376 break; 377 378 case PRU_SOCKADDR: 379 unp_setsockaddr(unp, nam); 380 break; 381 382 case PRU_PEERADDR: 383 unp_setpeeraddr(unp, nam); 384 break; 385 386 default: 387 panic("piusrreq"); 388 } 389 390 release: 391 return (error); 392 } 393 394 /* 395 * Unix domain socket option processing. 396 */ 397 int 398 uipc_ctloutput(op, so, level, optname, mp) 399 int op; 400 struct socket *so; 401 int level, optname; 402 struct mbuf **mp; 403 { 404 struct unpcb *unp = sotounpcb(so); 405 struct mbuf *m = *mp; 406 int optval = 0, error = 0; 407 408 if (level != 0) { 409 error = EINVAL; 410 if (op == PRCO_SETOPT && m) 411 (void) m_free(m); 412 } else switch (op) { 413 414 case PRCO_SETOPT: 415 switch (optname) { 416 case LOCAL_CREDS: 417 if (m == NULL || m->m_len != sizeof(int)) 418 error = EINVAL; 419 else { 420 optval = *mtod(m, int *); 421 switch (optname) { 422 #define OPTSET(bit) \ 423 if (optval) \ 424 unp->unp_flags |= (bit); \ 425 else \ 426 unp->unp_flags &= ~(bit); 427 428 case LOCAL_CREDS: 429 OPTSET(UNP_WANTCRED); 430 break; 431 } 432 } 433 break; 434 #undef OPTSET 435 436 default: 437 error = ENOPROTOOPT; 438 break; 439 } 440 if (m) 441 (void) m_free(m); 442 break; 443 444 case PRCO_GETOPT: 445 switch (optname) { 446 case LOCAL_CREDS: 447 *mp = m = m_get(M_WAIT, MT_SOOPTS); 448 m->m_len = sizeof(int); 449 switch (optname) { 450 451 #define OPTBIT(bit) (unp->unp_flags & (bit) ? 1 : 0) 452 453 case LOCAL_CREDS: 454 optval = OPTBIT(UNP_WANTCRED); 455 break; 456 } 457 *mtod(m, int *) = optval; 458 break; 459 #undef OPTBIT 460 461 default: 462 error = ENOPROTOOPT; 463 break; 464 } 465 break; 466 } 467 return (error); 468 } 469 470 /* 471 * Both send and receive buffers are allocated PIPSIZ bytes of buffering 472 * for stream sockets, although the total for sender and receiver is 473 * actually only PIPSIZ. 474 * Datagram sockets really use the sendspace as the maximum datagram size, 475 * and don't really want to reserve the sendspace. Their recvspace should 476 * be large enough for at least one max-size datagram plus address. 477 */ 478 #define PIPSIZ 4096 479 u_long unpst_sendspace = PIPSIZ; 480 u_long unpst_recvspace = PIPSIZ; 481 u_long unpdg_sendspace = 2*1024; /* really max datagram size */ 482 u_long unpdg_recvspace = 4*1024; 483 484 int unp_rights; /* file descriptors in flight */ 485 486 int 487 unp_attach(so) 488 struct socket *so; 489 { 490 struct unpcb *unp; 491 struct timeval tv; 492 int error; 493 494 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { 495 switch (so->so_type) { 496 497 case SOCK_STREAM: 498 error = soreserve(so, unpst_sendspace, unpst_recvspace); 499 break; 500 501 case SOCK_DGRAM: 502 error = soreserve(so, unpdg_sendspace, unpdg_recvspace); 503 break; 504 505 default: 506 panic("unp_attach"); 507 } 508 if (error) 509 return (error); 510 } 511 unp = malloc(sizeof(*unp), M_PCB, M_NOWAIT); 512 if (unp == NULL) 513 return (ENOBUFS); 514 memset((caddr_t)unp, 0, sizeof(*unp)); 515 unp->unp_socket = so; 516 so->so_pcb = unp; 517 microtime(&tv); 518 TIMEVAL_TO_TIMESPEC(&tv, &unp->unp_ctime); 519 return (0); 520 } 521 522 void 523 unp_detach(unp) 524 struct unpcb *unp; 525 { 526 527 if (unp->unp_vnode) { 528 unp->unp_vnode->v_socket = 0; 529 vrele(unp->unp_vnode); 530 unp->unp_vnode = 0; 531 } 532 if (unp->unp_conn) 533 unp_disconnect(unp); 534 while (unp->unp_refs) 535 unp_drop(unp->unp_refs, ECONNRESET); 536 soisdisconnected(unp->unp_socket); 537 unp->unp_socket->so_pcb = 0; 538 if (unp->unp_addr) 539 free(unp->unp_addr, M_SONAME); 540 if (unp_rights) { 541 /* 542 * Normally the receive buffer is flushed later, 543 * in sofree, but if our receive buffer holds references 544 * to descriptors that are now garbage, we will dispose 545 * of those descriptor references after the garbage collector 546 * gets them (resulting in a "panic: closef: count < 0"). 547 */ 548 sorflush(unp->unp_socket); 549 free(unp, M_PCB); 550 unp_gc(); 551 } else 552 free(unp, M_PCB); 553 } 554 555 int 556 unp_bind(unp, nam, p) 557 struct unpcb *unp; 558 struct mbuf *nam; 559 struct proc *p; 560 { 561 struct sockaddr_un *sun; 562 struct vnode *vp; 563 struct vattr vattr; 564 size_t addrlen; 565 int error; 566 struct nameidata nd; 567 568 if (unp->unp_vnode != 0) 569 return (EINVAL); 570 571 /* 572 * Allocate the new sockaddr. We have to allocate one 573 * extra byte so that we can ensure that the pathname 574 * is nul-terminated. 575 */ 576 addrlen = nam->m_len + 1; 577 sun = malloc(addrlen, M_SONAME, M_WAITOK); 578 m_copydata(nam, 0, nam->m_len, (caddr_t)sun); 579 *(((char *)sun) + nam->m_len) = '\0'; 580 581 NDINIT(&nd, CREATE, FOLLOW | LOCKPARENT, UIO_SYSSPACE, 582 sun->sun_path, p); 583 584 /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */ 585 if ((error = namei(&nd)) != 0) 586 goto bad; 587 vp = nd.ni_vp; 588 if (vp != NULL) { 589 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 590 if (nd.ni_dvp == vp) 591 vrele(nd.ni_dvp); 592 else 593 vput(nd.ni_dvp); 594 vrele(vp); 595 error = EADDRINUSE; 596 goto bad; 597 } 598 VATTR_NULL(&vattr); 599 vattr.va_type = VSOCK; 600 vattr.va_mode = ACCESSPERMS; 601 VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); 602 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 603 if (error) 604 goto bad; 605 vp = nd.ni_vp; 606 vp->v_socket = unp->unp_socket; 607 unp->unp_vnode = vp; 608 unp->unp_addrlen = addrlen; 609 unp->unp_addr = sun; 610 VOP_UNLOCK(vp, 0); 611 return (0); 612 613 bad: 614 free(sun, M_SONAME); 615 return (error); 616 } 617 618 int 619 unp_connect(so, nam, p) 620 struct socket *so; 621 struct mbuf *nam; 622 struct proc *p; 623 { 624 struct sockaddr_un *sun; 625 struct vnode *vp; 626 struct socket *so2, *so3; 627 struct unpcb *unp2, *unp3; 628 size_t addrlen; 629 int error; 630 struct nameidata nd; 631 632 /* 633 * Allocate a temporary sockaddr. We have to allocate one extra 634 * byte so that we can ensure that the pathname is nul-terminated. 635 * When we establish the connection, we copy the other PCB's 636 * sockaddr to our own. 637 */ 638 addrlen = nam->m_len + 1; 639 sun = malloc(addrlen, M_SONAME, M_WAITOK); 640 m_copydata(nam, 0, nam->m_len, (caddr_t)sun); 641 *(((char *)sun) + nam->m_len) = '\0'; 642 643 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, sun->sun_path, p); 644 645 if ((error = namei(&nd)) != 0) 646 goto bad2; 647 vp = nd.ni_vp; 648 if (vp->v_type != VSOCK) { 649 error = ENOTSOCK; 650 goto bad; 651 } 652 if ((error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p)) != 0) 653 goto bad; 654 so2 = vp->v_socket; 655 if (so2 == 0) { 656 error = ECONNREFUSED; 657 goto bad; 658 } 659 if (so->so_type != so2->so_type) { 660 error = EPROTOTYPE; 661 goto bad; 662 } 663 if (so->so_proto->pr_flags & PR_CONNREQUIRED) { 664 if ((so2->so_options & SO_ACCEPTCONN) == 0 || 665 (so3 = sonewconn(so2, 0)) == 0) { 666 error = ECONNREFUSED; 667 goto bad; 668 } 669 unp2 = sotounpcb(so2); 670 unp3 = sotounpcb(so3); 671 if (unp2->unp_addr) { 672 unp3->unp_addr = malloc(unp2->unp_addrlen, 673 M_SONAME, M_WAITOK); 674 memcpy(unp3->unp_addr, unp2->unp_addr, 675 unp2->unp_addrlen); 676 unp3->unp_addrlen = unp2->unp_addrlen; 677 } 678 unp3->unp_flags = unp2->unp_flags; 679 so2 = so3; 680 } 681 error = unp_connect2(so, so2); 682 bad: 683 vput(vp); 684 bad2: 685 free(sun, M_SONAME); 686 return (error); 687 } 688 689 int 690 unp_connect2(so, so2) 691 struct socket *so; 692 struct socket *so2; 693 { 694 struct unpcb *unp = sotounpcb(so); 695 struct unpcb *unp2; 696 697 if (so2->so_type != so->so_type) 698 return (EPROTOTYPE); 699 unp2 = sotounpcb(so2); 700 unp->unp_conn = unp2; 701 switch (so->so_type) { 702 703 case SOCK_DGRAM: 704 unp->unp_nextref = unp2->unp_refs; 705 unp2->unp_refs = unp; 706 soisconnected(so); 707 break; 708 709 case SOCK_STREAM: 710 unp2->unp_conn = unp; 711 soisconnected(so); 712 soisconnected(so2); 713 break; 714 715 default: 716 panic("unp_connect2"); 717 } 718 return (0); 719 } 720 721 void 722 unp_disconnect(unp) 723 struct unpcb *unp; 724 { 725 struct unpcb *unp2 = unp->unp_conn; 726 727 if (unp2 == 0) 728 return; 729 unp->unp_conn = 0; 730 switch (unp->unp_socket->so_type) { 731 732 case SOCK_DGRAM: 733 if (unp2->unp_refs == unp) 734 unp2->unp_refs = unp->unp_nextref; 735 else { 736 unp2 = unp2->unp_refs; 737 for (;;) { 738 if (unp2 == 0) 739 panic("unp_disconnect"); 740 if (unp2->unp_nextref == unp) 741 break; 742 unp2 = unp2->unp_nextref; 743 } 744 unp2->unp_nextref = unp->unp_nextref; 745 } 746 unp->unp_nextref = 0; 747 unp->unp_socket->so_state &= ~SS_ISCONNECTED; 748 break; 749 750 case SOCK_STREAM: 751 soisdisconnected(unp->unp_socket); 752 unp2->unp_conn = 0; 753 soisdisconnected(unp2->unp_socket); 754 break; 755 } 756 } 757 758 #ifdef notdef 759 unp_abort(unp) 760 struct unpcb *unp; 761 { 762 763 unp_detach(unp); 764 } 765 #endif 766 767 void 768 unp_shutdown(unp) 769 struct unpcb *unp; 770 { 771 struct socket *so; 772 773 if (unp->unp_socket->so_type == SOCK_STREAM && unp->unp_conn && 774 (so = unp->unp_conn->unp_socket)) 775 socantrcvmore(so); 776 } 777 778 void 779 unp_drop(unp, errno) 780 struct unpcb *unp; 781 int errno; 782 { 783 struct socket *so = unp->unp_socket; 784 785 so->so_error = errno; 786 unp_disconnect(unp); 787 if (so->so_head) { 788 so->so_pcb = 0; 789 sofree(so); 790 if (unp->unp_addr) 791 free(unp->unp_addr, M_SONAME); 792 free(unp, M_PCB); 793 } 794 } 795 796 #ifdef notdef 797 unp_drain() 798 { 799 800 } 801 #endif 802 803 int 804 unp_externalize(rights) 805 struct mbuf *rights; 806 { 807 struct proc *p = curproc; /* XXX */ 808 struct cmsghdr *cm = mtod(rights, struct cmsghdr *); 809 int i, *fdp; 810 struct file **rp; 811 struct file *fp; 812 int nfds, error = 0; 813 814 nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) / 815 sizeof(struct file *); 816 rp = (struct file **)CMSG_DATA(cm); 817 818 fdp = malloc(nfds * sizeof(int), M_TEMP, M_WAITOK); 819 820 /* Make sure the recipient should be able to see the descriptors.. */ 821 if (p->p_cwdi->cwdi_rdir != NULL) { 822 rp = (struct file **)CMSG_DATA(cm); 823 for (i = 0; i < nfds; i++) { 824 fp = *rp++; 825 /* 826 * If we are in a chroot'ed directory, and 827 * someone wants to pass us a directory, make 828 * sure it's inside the subtree we're allowed 829 * to access. 830 */ 831 if (fp->f_type == DTYPE_VNODE) { 832 struct vnode *vp = (struct vnode *)fp->f_data; 833 if ((vp->v_type == VDIR) && 834 !vn_isunder(vp, p->p_cwdi->cwdi_rdir, p)) { 835 error = EPERM; 836 break; 837 } 838 } 839 } 840 } 841 842 restart: 843 rp = (struct file **)CMSG_DATA(cm); 844 if (error != 0) { 845 for (i = 0; i < nfds; i++) { 846 fp = *rp; 847 /* 848 * zero the pointer before calling unp_discard, 849 * since it may end up in unp_gc().. 850 */ 851 *rp++ = 0; 852 unp_discard(fp); 853 } 854 goto out; 855 } 856 857 /* 858 * First loop -- allocate file descriptor table slots for the 859 * new descriptors. 860 */ 861 for (i = 0; i < nfds; i++) { 862 fp = *rp++; 863 if ((error = fdalloc(p, 0, &fdp[i])) != 0) { 864 /* 865 * Back out what we've done so far. 866 */ 867 for (--i; i >= 0; i--) 868 fdremove(p->p_fd, fdp[i]); 869 870 if (error == ENOSPC) { 871 fdexpand(p); 872 error = 0; 873 } else { 874 /* 875 * This is the error that has historically 876 * been returned, and some callers may 877 * expect it. 878 */ 879 error = EMSGSIZE; 880 } 881 goto restart; 882 } 883 884 /* 885 * Make the slot reference the descriptor so that 886 * fdalloc() works properly.. We finalize it all 887 * in the loop below. 888 */ 889 p->p_fd->fd_ofiles[fdp[i]] = fp; 890 } 891 892 /* 893 * Now that adding them has succeeded, update all of the 894 * descriptor passing state. 895 */ 896 rp = (struct file **)CMSG_DATA(cm); 897 for (i = 0; i < nfds; i++) { 898 fp = *rp++; 899 fp->f_msgcount--; 900 unp_rights--; 901 } 902 903 /* 904 * Copy temporary array to message and adjust length, in case of 905 * transition from large struct file pointers to ints. 906 */ 907 memcpy(CMSG_DATA(cm), fdp, nfds * sizeof(int)); 908 cm->cmsg_len = CMSG_LEN(nfds * sizeof(int)); 909 rights->m_len = CMSG_SPACE(nfds * sizeof(int)); 910 out: 911 free(fdp, M_TEMP); 912 return (error); 913 } 914 915 int 916 unp_internalize(control, p) 917 struct mbuf *control; 918 struct proc *p; 919 { 920 struct filedesc *fdescp = p->p_fd; 921 struct cmsghdr *cm = mtod(control, struct cmsghdr *); 922 struct file **rp; 923 struct file *fp; 924 int i, fd, *fdp; 925 int nfds; 926 u_int neededspace; 927 928 /* Sanity check the control message header */ 929 if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET || 930 cm->cmsg_len != control->m_len) 931 return (EINVAL); 932 933 /* Verify that the file descriptors are valid */ 934 nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) / sizeof(int); 935 fdp = (int *)CMSG_DATA(cm); 936 for (i = 0; i < nfds; i++) { 937 fd = *fdp++; 938 if (fd_getfile(fdescp, fd) == NULL) 939 return (EBADF); 940 } 941 942 /* Make sure we have room for the struct file pointers */ 943 morespace: 944 neededspace = CMSG_SPACE(nfds * sizeof(struct file *)) - 945 control->m_len; 946 if (neededspace > M_TRAILINGSPACE(control)) { 947 948 /* if we already have a cluster, the message is just too big */ 949 if (control->m_flags & M_EXT) 950 return (E2BIG); 951 952 /* allocate a cluster and try again */ 953 MCLGET(control, M_WAIT); 954 if ((control->m_flags & M_EXT) == 0) 955 return (ENOBUFS); /* allocation failed */ 956 957 /* copy the data to the cluster */ 958 memcpy(mtod(control, char *), cm, cm->cmsg_len); 959 cm = mtod(control, struct cmsghdr *); 960 goto morespace; 961 } 962 963 /* adjust message & mbuf to note amount of space actually used. */ 964 cm->cmsg_len = CMSG_LEN(nfds * sizeof(struct file *)); 965 control->m_len = CMSG_SPACE(nfds * sizeof(struct file *)); 966 967 /* 968 * Transform the file descriptors into struct file pointers, in 969 * reverse order so that if pointers are bigger than ints, the 970 * int won't get until we're done. 971 */ 972 fdp = ((int *)CMSG_DATA(cm)) + nfds - 1; 973 rp = ((struct file **)CMSG_DATA(cm)) + nfds - 1; 974 for (i = 0; i < nfds; i++) { 975 fp = fdescp->fd_ofiles[*fdp--]; 976 FILE_USE(fp); 977 *rp-- = fp; 978 fp->f_count++; 979 fp->f_msgcount++; 980 FILE_UNUSE(fp, NULL); 981 unp_rights++; 982 } 983 return (0); 984 } 985 986 struct mbuf * 987 unp_addsockcred(p, control) 988 struct proc *p; 989 struct mbuf *control; 990 { 991 struct cmsghdr *cmp; 992 struct sockcred *sc; 993 struct mbuf *m, *n; 994 int len, space, i; 995 996 len = CMSG_LEN(SOCKCREDSIZE(p->p_ucred->cr_ngroups)); 997 space = CMSG_SPACE(SOCKCREDSIZE(p->p_ucred->cr_ngroups)); 998 999 m = m_get(M_WAIT, MT_CONTROL); 1000 if (space > MLEN) { 1001 if (space > MCLBYTES) 1002 MEXTMALLOC(m, space, M_WAITOK); 1003 else 1004 MCLGET(m, M_WAIT); 1005 if ((m->m_flags & M_EXT) == 0) { 1006 m_free(m); 1007 return (control); 1008 } 1009 } 1010 1011 m->m_len = space; 1012 m->m_next = NULL; 1013 cmp = mtod(m, struct cmsghdr *); 1014 sc = (struct sockcred *)CMSG_DATA(cmp); 1015 cmp->cmsg_len = len; 1016 cmp->cmsg_level = SOL_SOCKET; 1017 cmp->cmsg_type = SCM_CREDS; 1018 sc->sc_uid = p->p_cred->p_ruid; 1019 sc->sc_euid = p->p_ucred->cr_uid; 1020 sc->sc_gid = p->p_cred->p_rgid; 1021 sc->sc_egid = p->p_ucred->cr_gid; 1022 sc->sc_ngroups = p->p_ucred->cr_ngroups; 1023 for (i = 0; i < sc->sc_ngroups; i++) 1024 sc->sc_groups[i] = p->p_ucred->cr_groups[i]; 1025 1026 /* 1027 * If a control message already exists, append us to the end. 1028 */ 1029 if (control != NULL) { 1030 for (n = control; n->m_next != NULL; n = n->m_next) 1031 ; 1032 n->m_next = m; 1033 } else 1034 control = m; 1035 1036 return (control); 1037 } 1038 1039 int unp_defer, unp_gcing; 1040 extern struct domain unixdomain; 1041 1042 /* 1043 * Comment added long after the fact explaining what's going on here. 1044 * Do a mark-sweep GC of file descriptors on the system, to free up 1045 * any which are caught in flight to an about-to-be-closed socket. 1046 * 1047 * Traditional mark-sweep gc's start at the "root", and mark 1048 * everything reachable from the root (which, in our case would be the 1049 * process table). The mark bits are cleared during the sweep. 1050 * 1051 * XXX For some inexplicable reason (perhaps because the file 1052 * descriptor tables used to live in the u area which could be swapped 1053 * out and thus hard to reach), we do multiple scans over the set of 1054 * descriptors, using use *two* mark bits per object (DEFER and MARK). 1055 * Whenever we find a descriptor which references other descriptors, 1056 * the ones it references are marked with both bits, and we iterate 1057 * over the whole file table until there are no more DEFER bits set. 1058 * We also make an extra pass *before* the GC to clear the mark bits, 1059 * which could have been cleared at almost no cost during the previous 1060 * sweep. 1061 * 1062 * XXX MP: this needs to run with locks such that no other thread of 1063 * control can create or destroy references to file descriptors. it 1064 * may be necessary to defer the GC until later (when the locking 1065 * situation is more hospitable); it may be necessary to push this 1066 * into a separate thread. 1067 */ 1068 void 1069 unp_gc() 1070 { 1071 struct file *fp, *nextfp; 1072 struct socket *so, *so1; 1073 struct file **extra_ref, **fpp; 1074 int nunref, i; 1075 1076 if (unp_gcing) 1077 return; 1078 unp_gcing = 1; 1079 unp_defer = 0; 1080 1081 /* Clear mark bits */ 1082 LIST_FOREACH(fp, &filehead, f_list) 1083 fp->f_flag &= ~(FMARK|FDEFER); 1084 1085 /* 1086 * Iterate over the set of descriptors, marking ones believed 1087 * (based on refcount) to be referenced from a process, and 1088 * marking for rescan descriptors which are queued on a socket. 1089 */ 1090 do { 1091 LIST_FOREACH(fp, &filehead, f_list) { 1092 if (fp->f_flag & FDEFER) { 1093 fp->f_flag &= ~FDEFER; 1094 unp_defer--; 1095 #ifdef DIAGNOSTIC 1096 if (fp->f_count == 0) 1097 panic("unp_gc: deferred unreferenced socket"); 1098 #endif 1099 } else { 1100 if (fp->f_count == 0) 1101 continue; 1102 if (fp->f_flag & FMARK) 1103 continue; 1104 if (fp->f_count == fp->f_msgcount) 1105 continue; 1106 } 1107 fp->f_flag |= FMARK; 1108 1109 if (fp->f_type != DTYPE_SOCKET || 1110 (so = (struct socket *)fp->f_data) == 0) 1111 continue; 1112 if (so->so_proto->pr_domain != &unixdomain || 1113 (so->so_proto->pr_flags&PR_RIGHTS) == 0) 1114 continue; 1115 #ifdef notdef 1116 if (so->so_rcv.sb_flags & SB_LOCK) { 1117 /* 1118 * This is problematical; it's not clear 1119 * we need to wait for the sockbuf to be 1120 * unlocked (on a uniprocessor, at least), 1121 * and it's also not clear what to do 1122 * if sbwait returns an error due to receipt 1123 * of a signal. If sbwait does return 1124 * an error, we'll go into an infinite 1125 * loop. Delete all of this for now. 1126 */ 1127 (void) sbwait(&so->so_rcv); 1128 goto restart; 1129 } 1130 #endif 1131 unp_scan(so->so_rcv.sb_mb, unp_mark, 0); 1132 /* 1133 * mark descriptors referenced from sockets queued on the accept queue as well. 1134 */ 1135 if (so->so_options & SO_ACCEPTCONN) { 1136 TAILQ_FOREACH(so1, &so->so_q0, so_qe) { 1137 unp_scan(so1->so_rcv.sb_mb, unp_mark, 0); 1138 } 1139 TAILQ_FOREACH(so1, &so->so_q, so_qe) { 1140 unp_scan(so1->so_rcv.sb_mb, unp_mark, 0); 1141 } 1142 } 1143 1144 } 1145 } while (unp_defer); 1146 /* 1147 * Sweep pass. Find unmarked descriptors, and free them. 1148 * 1149 * We grab an extra reference to each of the file table entries 1150 * that are not otherwise accessible and then free the rights 1151 * that are stored in messages on them. 1152 * 1153 * The bug in the orginal code is a little tricky, so I'll describe 1154 * what's wrong with it here. 1155 * 1156 * It is incorrect to simply unp_discard each entry for f_msgcount 1157 * times -- consider the case of sockets A and B that contain 1158 * references to each other. On a last close of some other socket, 1159 * we trigger a gc since the number of outstanding rights (unp_rights) 1160 * is non-zero. If during the sweep phase the gc code un_discards, 1161 * we end up doing a (full) closef on the descriptor. A closef on A 1162 * results in the following chain. Closef calls soo_close, which 1163 * calls soclose. Soclose calls first (through the switch 1164 * uipc_usrreq) unp_detach, which re-invokes unp_gc. Unp_gc simply 1165 * returns because the previous instance had set unp_gcing, and 1166 * we return all the way back to soclose, which marks the socket 1167 * with SS_NOFDREF, and then calls sofree. Sofree calls sorflush 1168 * to free up the rights that are queued in messages on the socket A, 1169 * i.e., the reference on B. The sorflush calls via the dom_dispose 1170 * switch unp_dispose, which unp_scans with unp_discard. This second 1171 * instance of unp_discard just calls closef on B. 1172 * 1173 * Well, a similar chain occurs on B, resulting in a sorflush on B, 1174 * which results in another closef on A. Unfortunately, A is already 1175 * being closed, and the descriptor has already been marked with 1176 * SS_NOFDREF, and soclose panics at this point. 1177 * 1178 * Here, we first take an extra reference to each inaccessible 1179 * descriptor. Then, if the inaccessible descriptor is a 1180 * socket, we call sorflush in case it is a Unix domain 1181 * socket. After we destroy all the rights carried in 1182 * messages, we do a last closef to get rid of our extra 1183 * reference. This is the last close, and the unp_detach etc 1184 * will shut down the socket. 1185 * 1186 * 91/09/19, bsy@cs.cmu.edu 1187 */ 1188 extra_ref = malloc(nfiles * sizeof(struct file *), M_FILE, M_WAITOK); 1189 for (nunref = 0, fp = LIST_FIRST(&filehead), fpp = extra_ref; fp != 0; 1190 fp = nextfp) { 1191 nextfp = LIST_NEXT(fp, f_list); 1192 if (fp->f_count == 0) 1193 continue; 1194 if (fp->f_count == fp->f_msgcount && !(fp->f_flag & FMARK)) { 1195 *fpp++ = fp; 1196 nunref++; 1197 fp->f_count++; 1198 } 1199 } 1200 for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) { 1201 fp = *fpp; 1202 FILE_USE(fp); 1203 if (fp->f_type == DTYPE_SOCKET) 1204 sorflush((struct socket *)fp->f_data); 1205 FILE_UNUSE(fp, NULL); 1206 } 1207 for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) { 1208 fp = *fpp; 1209 FILE_USE(fp); 1210 (void) closef(fp, (struct proc *)0); 1211 } 1212 free((caddr_t)extra_ref, M_FILE); 1213 unp_gcing = 0; 1214 } 1215 1216 void 1217 unp_dispose(m) 1218 struct mbuf *m; 1219 { 1220 1221 if (m) 1222 unp_scan(m, unp_discard, 1); 1223 } 1224 1225 void 1226 unp_scan(m0, op, discard) 1227 struct mbuf *m0; 1228 void (*op) __P((struct file *)); 1229 int discard; 1230 { 1231 struct mbuf *m; 1232 struct file **rp; 1233 struct cmsghdr *cm; 1234 int i; 1235 int qfds; 1236 1237 while (m0) { 1238 for (m = m0; m; m = m->m_next) { 1239 if (m->m_type == MT_CONTROL && 1240 m->m_len >= sizeof(*cm)) { 1241 cm = mtod(m, struct cmsghdr *); 1242 if (cm->cmsg_level != SOL_SOCKET || 1243 cm->cmsg_type != SCM_RIGHTS) 1244 continue; 1245 qfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) 1246 / sizeof(struct file *); 1247 rp = (struct file **)CMSG_DATA(cm); 1248 for (i = 0; i < qfds; i++) { 1249 struct file *fp = *rp; 1250 if (discard) 1251 *rp = 0; 1252 (*op)(fp); 1253 rp++; 1254 } 1255 break; /* XXX, but saves time */ 1256 } 1257 } 1258 m0 = m0->m_nextpkt; 1259 } 1260 } 1261 1262 void 1263 unp_mark(fp) 1264 struct file *fp; 1265 { 1266 if (fp == NULL) 1267 return; 1268 1269 if (fp->f_flag & FMARK) 1270 return; 1271 1272 /* If we're already deferred, don't screw up the defer count */ 1273 if (fp->f_flag & FDEFER) 1274 return; 1275 1276 /* 1277 * Minimize the number of deferrals... Sockets are the only 1278 * type of descriptor which can hold references to another 1279 * descriptor, so just mark other descriptors, and defer 1280 * unmarked sockets for the next pass. 1281 */ 1282 if (fp->f_type == DTYPE_SOCKET) { 1283 unp_defer++; 1284 if (fp->f_count == 0) 1285 panic("unp_mark: queued unref"); 1286 fp->f_flag |= FDEFER; 1287 } else { 1288 fp->f_flag |= FMARK; 1289 } 1290 return; 1291 } 1292 1293 void 1294 unp_discard(fp) 1295 struct file *fp; 1296 { 1297 if (fp == NULL) 1298 return; 1299 FILE_USE(fp); 1300 fp->f_msgcount--; 1301 unp_rights--; 1302 (void) closef(fp, (struct proc *)0); 1303 } 1304