1 /* $OpenBSD: uipc_usrreq.c,v 1.24 2003/08/17 22:59:42 tedu Exp $ */ 2 /* $NetBSD: uipc_usrreq.c,v 1.18 1996/02/09 19:00:50 christos Exp $ */ 3 4 /* 5 * Copyright (c) 1982, 1986, 1989, 1991, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)uipc_usrreq.c 8.3 (Berkeley) 1/4/94 33 */ 34 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/proc.h> 38 #include <sys/filedesc.h> 39 #include <sys/domain.h> 40 #include <sys/protosw.h> 41 #include <sys/socket.h> 42 #include <sys/socketvar.h> 43 #include <sys/unpcb.h> 44 #include <sys/un.h> 45 #include <sys/namei.h> 46 #include <sys/vnode.h> 47 #include <sys/file.h> 48 #include <sys/stat.h> 49 #include <sys/mbuf.h> 50 51 /* 52 * Unix communications domain. 53 * 54 * TODO: 55 * SEQPACKET, RDM 56 * rethink name space problems 57 * need a proper out-of-band 58 */ 59 struct sockaddr sun_noname = { sizeof(sun_noname), AF_UNIX }; 60 ino_t unp_ino; /* prototype for fake inode numbers */ 61 62 /*ARGSUSED*/ 63 int 64 uipc_usrreq(so, req, m, nam, control) 65 struct socket *so; 66 int req; 67 struct mbuf *m, *nam, *control; 68 { 69 struct unpcb *unp = sotounpcb(so); 70 register struct socket *so2; 71 register int error = 0; 72 struct proc *p = curproc; /* XXX */ 73 74 if (req == PRU_CONTROL) 75 return (EOPNOTSUPP); 76 if (req != PRU_SEND && control && control->m_len) { 77 error = EOPNOTSUPP; 78 goto release; 79 } 80 if (unp == 0 && req != PRU_ATTACH) { 81 error = EINVAL; 82 goto release; 83 } 84 switch (req) { 85 86 case PRU_ATTACH: 87 if (unp) { 88 error = EISCONN; 89 break; 90 } 91 error = unp_attach(so); 92 break; 93 94 case PRU_DETACH: 95 unp_detach(unp); 96 break; 97 98 case PRU_BIND: 99 error = unp_bind(unp, nam, p); 100 break; 101 102 case PRU_LISTEN: 103 if (unp->unp_vnode == 0) 104 error = EINVAL; 105 break; 106 107 case PRU_CONNECT: 108 error = unp_connect(so, nam, p); 109 break; 110 111 case PRU_CONNECT2: 112 error = unp_connect2(so, (struct socket *)nam); 113 break; 114 115 case PRU_DISCONNECT: 116 unp_disconnect(unp); 117 break; 118 119 case PRU_ACCEPT: 120 /* 121 * Pass back name of connected socket, 122 * if it was bound and we are still connected 123 * (our peer may have closed already!). 124 */ 125 if (unp->unp_conn && unp->unp_conn->unp_addr) { 126 nam->m_len = unp->unp_conn->unp_addr->m_len; 127 bcopy(mtod(unp->unp_conn->unp_addr, caddr_t), 128 mtod(nam, caddr_t), (unsigned)nam->m_len); 129 } else { 130 nam->m_len = sizeof(sun_noname); 131 *(mtod(nam, struct sockaddr *)) = sun_noname; 132 } 133 break; 134 135 case PRU_SHUTDOWN: 136 socantsendmore(so); 137 unp_shutdown(unp); 138 break; 139 140 case PRU_RCVD: 141 switch (so->so_type) { 142 143 case SOCK_DGRAM: 144 panic("uipc 1"); 145 /*NOTREACHED*/ 146 147 case SOCK_STREAM: 148 #define rcv (&so->so_rcv) 149 #define snd (&so2->so_snd) 150 if (unp->unp_conn == 0) 151 break; 152 so2 = unp->unp_conn->unp_socket; 153 /* 154 * Adjust backpressure on sender 155 * and wakeup any waiting to write. 156 */ 157 snd->sb_mbmax += unp->unp_mbcnt - rcv->sb_mbcnt; 158 unp->unp_mbcnt = rcv->sb_mbcnt; 159 snd->sb_hiwat += unp->unp_cc - rcv->sb_cc; 160 unp->unp_cc = rcv->sb_cc; 161 sowwakeup(so2); 162 #undef snd 163 #undef rcv 164 break; 165 166 default: 167 panic("uipc 2"); 168 } 169 break; 170 171 case PRU_SEND: 172 if (control && (error = unp_internalize(control, p))) 173 break; 174 switch (so->so_type) { 175 176 case SOCK_DGRAM: { 177 struct sockaddr *from; 178 179 if (nam) { 180 if (unp->unp_conn) { 181 error = EISCONN; 182 break; 183 } 184 error = unp_connect(so, nam, p); 185 if (error) 186 break; 187 } else { 188 if (unp->unp_conn == 0) { 189 error = ENOTCONN; 190 break; 191 } 192 } 193 so2 = unp->unp_conn->unp_socket; 194 if (unp->unp_addr) 195 from = mtod(unp->unp_addr, struct sockaddr *); 196 else 197 from = &sun_noname; 198 if (sbappendaddr(&so2->so_rcv, from, m, control)) { 199 sorwakeup(so2); 200 m = 0; 201 control = 0; 202 } else 203 error = ENOBUFS; 204 if (nam) 205 unp_disconnect(unp); 206 break; 207 } 208 209 case SOCK_STREAM: 210 #define rcv (&so2->so_rcv) 211 #define snd (&so->so_snd) 212 if (so->so_state & SS_CANTSENDMORE) { 213 error = EPIPE; 214 break; 215 } 216 if (unp->unp_conn == 0) 217 panic("uipc 3"); 218 so2 = unp->unp_conn->unp_socket; 219 /* 220 * Send to paired receive port, and then reduce 221 * send buffer hiwater marks to maintain backpressure. 222 * Wake up readers. 223 */ 224 if (control) { 225 if (sbappendcontrol(rcv, m, control)) 226 control = 0; 227 } else 228 sbappend(rcv, m); 229 snd->sb_mbmax -= 230 rcv->sb_mbcnt - unp->unp_conn->unp_mbcnt; 231 unp->unp_conn->unp_mbcnt = rcv->sb_mbcnt; 232 snd->sb_hiwat -= rcv->sb_cc - unp->unp_conn->unp_cc; 233 unp->unp_conn->unp_cc = rcv->sb_cc; 234 sorwakeup(so2); 235 m = 0; 236 #undef snd 237 #undef rcv 238 break; 239 240 default: 241 panic("uipc 4"); 242 } 243 break; 244 245 case PRU_ABORT: 246 unp_drop(unp, ECONNABORTED); 247 break; 248 249 case PRU_SENSE: 250 ((struct stat *) m)->st_blksize = so->so_snd.sb_hiwat; 251 if (so->so_type == SOCK_STREAM && unp->unp_conn != 0) { 252 so2 = unp->unp_conn->unp_socket; 253 ((struct stat *) m)->st_blksize += so2->so_rcv.sb_cc; 254 } 255 ((struct stat *) m)->st_dev = NODEV; 256 if (unp->unp_ino == 0) 257 unp->unp_ino = unp_ino++; 258 ((struct stat *) m)->st_atimespec = 259 ((struct stat *) m)->st_mtimespec = 260 ((struct stat *) m)->st_ctimespec = unp->unp_ctime; 261 ((struct stat *) m)->st_ino = unp->unp_ino; 262 return (0); 263 264 case PRU_RCVOOB: 265 return (EOPNOTSUPP); 266 267 case PRU_SENDOOB: 268 error = EOPNOTSUPP; 269 break; 270 271 case PRU_SOCKADDR: 272 if (unp->unp_addr) { 273 nam->m_len = unp->unp_addr->m_len; 274 bcopy(mtod(unp->unp_addr, caddr_t), 275 mtod(nam, caddr_t), (unsigned)nam->m_len); 276 } else 277 nam->m_len = 0; 278 break; 279 280 case PRU_PEERADDR: 281 if (unp->unp_conn && unp->unp_conn->unp_addr) { 282 nam->m_len = unp->unp_conn->unp_addr->m_len; 283 bcopy(mtod(unp->unp_conn->unp_addr, caddr_t), 284 mtod(nam, caddr_t), (unsigned)nam->m_len); 285 } else 286 nam->m_len = 0; 287 break; 288 289 case PRU_PEEREID: 290 if (unp->unp_flags & UNP_FEIDS) { 291 nam->m_len = sizeof(struct unpcbid); 292 bcopy((caddr_t)(&(unp->unp_connid)), 293 mtod(nam, caddr_t), (unsigned)nam->m_len); 294 } else 295 nam->m_len = 0; 296 break; 297 298 case PRU_SLOWTIMO: 299 break; 300 301 default: 302 panic("piusrreq"); 303 } 304 release: 305 if (control) 306 m_freem(control); 307 if (m) 308 m_freem(m); 309 return (error); 310 } 311 312 /* 313 * Both send and receive buffers are allocated PIPSIZ bytes of buffering 314 * for stream sockets, although the total for sender and receiver is 315 * actually only PIPSIZ. 316 * Datagram sockets really use the sendspace as the maximum datagram size, 317 * and don't really want to reserve the sendspace. Their recvspace should 318 * be large enough for at least one max-size datagram plus address. 319 */ 320 #define PIPSIZ 4096 321 u_long unpst_sendspace = PIPSIZ; 322 u_long unpst_recvspace = PIPSIZ; 323 u_long unpdg_sendspace = 2*1024; /* really max datagram size */ 324 u_long unpdg_recvspace = 4*1024; 325 326 int unp_rights; /* file descriptors in flight */ 327 328 int 329 unp_attach(so) 330 struct socket *so; 331 { 332 register struct unpcb *unp; 333 struct timeval tv; 334 int error; 335 336 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { 337 switch (so->so_type) { 338 339 case SOCK_STREAM: 340 error = soreserve(so, unpst_sendspace, unpst_recvspace); 341 break; 342 343 case SOCK_DGRAM: 344 error = soreserve(so, unpdg_sendspace, unpdg_recvspace); 345 break; 346 347 default: 348 panic("unp_attach"); 349 } 350 if (error) 351 return (error); 352 } 353 unp = malloc(sizeof(*unp), M_PCB, M_NOWAIT); 354 if (unp == NULL) 355 return (ENOBUFS); 356 bzero((caddr_t)unp, sizeof(*unp)); 357 unp->unp_socket = so; 358 so->so_pcb = unp; 359 microtime(&tv); 360 TIMEVAL_TO_TIMESPEC(&tv, &unp->unp_ctime); 361 return (0); 362 } 363 364 void 365 unp_detach(unp) 366 register struct unpcb *unp; 367 { 368 369 if (unp->unp_vnode) { 370 unp->unp_vnode->v_socket = 0; 371 vrele(unp->unp_vnode); 372 unp->unp_vnode = 0; 373 } 374 if (unp->unp_conn) 375 unp_disconnect(unp); 376 while (unp->unp_refs) 377 unp_drop(unp->unp_refs, ECONNRESET); 378 soisdisconnected(unp->unp_socket); 379 unp->unp_socket->so_pcb = 0; 380 m_freem(unp->unp_addr); 381 if (unp_rights) { 382 /* 383 * Normally the receive buffer is flushed later, 384 * in sofree, but if our receive buffer holds references 385 * to descriptors that are now garbage, we will dispose 386 * of those descriptor references after the garbage collector 387 * gets them (resulting in a "panic: closef: count < 0"). 388 */ 389 sorflush(unp->unp_socket); 390 free(unp, M_PCB); 391 unp_gc(); 392 } else 393 free(unp, M_PCB); 394 } 395 396 int 397 unp_bind(unp, nam, p) 398 struct unpcb *unp; 399 struct mbuf *nam; 400 struct proc *p; 401 { 402 struct sockaddr_un *soun = mtod(nam, struct sockaddr_un *); 403 register struct vnode *vp; 404 struct vattr vattr; 405 int error, namelen; 406 struct nameidata nd; 407 char buf[MLEN]; 408 409 if (unp->unp_vnode != NULL) 410 return (EINVAL); 411 namelen = soun->sun_len - offsetof(struct sockaddr_un, sun_path); 412 if (namelen <= 0 || namelen >= MLEN) 413 return EINVAL; 414 strncpy(buf, soun->sun_path, namelen); 415 buf[namelen] = 0; /* null-terminate the string */ 416 NDINIT(&nd, CREATE, NOFOLLOW | LOCKPARENT, UIO_SYSSPACE, buf, p); 417 /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */ 418 if ((error = namei(&nd)) != 0) 419 return (error); 420 vp = nd.ni_vp; 421 if (vp != NULL) { 422 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 423 if (nd.ni_dvp == vp) 424 vrele(nd.ni_dvp); 425 else 426 vput(nd.ni_dvp); 427 vrele(vp); 428 return (EADDRINUSE); 429 } 430 VATTR_NULL(&vattr); 431 vattr.va_type = VSOCK; 432 vattr.va_mode = ACCESSPERMS &~ p->p_fd->fd_cmask; 433 VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); 434 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 435 if (error) 436 return (error); 437 vp = nd.ni_vp; 438 vp->v_socket = unp->unp_socket; 439 unp->unp_vnode = vp; 440 unp->unp_addr = m_copy(nam, 0, (int)M_COPYALL); 441 VOP_UNLOCK(vp, 0, p); 442 return (0); 443 } 444 445 int 446 unp_connect(so, nam, p) 447 struct socket *so; 448 struct mbuf *nam; 449 struct proc *p; 450 { 451 register struct sockaddr_un *soun = mtod(nam, struct sockaddr_un *); 452 register struct vnode *vp; 453 register struct socket *so2, *so3; 454 struct unpcb *unp2, *unp3; 455 int error; 456 struct nameidata nd; 457 458 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, soun->sun_path, p); 459 if (nam->m_data + nam->m_len == &nam->m_dat[MLEN]) { /* XXX */ 460 if (*(mtod(nam, caddr_t) + nam->m_len - 1) != 0) 461 return (EMSGSIZE); 462 } else 463 *(mtod(nam, caddr_t) + nam->m_len) = 0; 464 if ((error = namei(&nd)) != 0) 465 return (error); 466 vp = nd.ni_vp; 467 if (vp->v_type != VSOCK) { 468 error = ENOTSOCK; 469 goto bad; 470 } 471 if ((error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p)) != 0) 472 goto bad; 473 so2 = vp->v_socket; 474 if (so2 == 0) { 475 error = ECONNREFUSED; 476 goto bad; 477 } 478 if (so->so_type != so2->so_type) { 479 error = EPROTOTYPE; 480 goto bad; 481 } 482 if (so->so_proto->pr_flags & PR_CONNREQUIRED) { 483 if ((so2->so_options & SO_ACCEPTCONN) == 0 || 484 (so3 = sonewconn(so2, 0)) == 0) { 485 error = ECONNREFUSED; 486 goto bad; 487 } 488 unp2 = sotounpcb(so2); 489 unp3 = sotounpcb(so3); 490 if (unp2->unp_addr) 491 unp3->unp_addr = 492 m_copy(unp2->unp_addr, 0, (int)M_COPYALL); 493 unp3->unp_connid.unp_euid = p->p_ucred->cr_uid; 494 unp3->unp_connid.unp_egid = p->p_ucred->cr_gid; 495 unp3->unp_flags |= UNP_FEIDS; 496 so2 = so3; 497 } 498 error = unp_connect2(so, so2); 499 bad: 500 vput(vp); 501 return (error); 502 } 503 504 int 505 unp_connect2(so, so2) 506 register struct socket *so; 507 register struct socket *so2; 508 { 509 register struct unpcb *unp = sotounpcb(so); 510 register struct unpcb *unp2; 511 512 if (so2->so_type != so->so_type) 513 return (EPROTOTYPE); 514 unp2 = sotounpcb(so2); 515 unp->unp_conn = unp2; 516 switch (so->so_type) { 517 518 case SOCK_DGRAM: 519 unp->unp_nextref = unp2->unp_refs; 520 unp2->unp_refs = unp; 521 soisconnected(so); 522 break; 523 524 case SOCK_STREAM: 525 unp2->unp_conn = unp; 526 soisconnected(so); 527 soisconnected(so2); 528 break; 529 530 default: 531 panic("unp_connect2"); 532 } 533 return (0); 534 } 535 536 void 537 unp_disconnect(unp) 538 struct unpcb *unp; 539 { 540 register struct unpcb *unp2 = unp->unp_conn; 541 542 if (unp2 == 0) 543 return; 544 unp->unp_conn = 0; 545 switch (unp->unp_socket->so_type) { 546 547 case SOCK_DGRAM: 548 if (unp2->unp_refs == unp) 549 unp2->unp_refs = unp->unp_nextref; 550 else { 551 unp2 = unp2->unp_refs; 552 for (;;) { 553 if (unp2 == 0) 554 panic("unp_disconnect"); 555 if (unp2->unp_nextref == unp) 556 break; 557 unp2 = unp2->unp_nextref; 558 } 559 unp2->unp_nextref = unp->unp_nextref; 560 } 561 unp->unp_nextref = 0; 562 unp->unp_socket->so_state &= ~SS_ISCONNECTED; 563 break; 564 565 case SOCK_STREAM: 566 soisdisconnected(unp->unp_socket); 567 unp2->unp_conn = 0; 568 soisdisconnected(unp2->unp_socket); 569 break; 570 } 571 } 572 573 #ifdef notdef 574 unp_abort(unp) 575 struct unpcb *unp; 576 { 577 578 unp_detach(unp); 579 } 580 #endif 581 582 void 583 unp_shutdown(unp) 584 struct unpcb *unp; 585 { 586 struct socket *so; 587 588 if (unp->unp_socket->so_type == SOCK_STREAM && unp->unp_conn && 589 (so = unp->unp_conn->unp_socket)) 590 socantrcvmore(so); 591 } 592 593 void 594 unp_drop(unp, errno) 595 struct unpcb *unp; 596 int errno; 597 { 598 struct socket *so = unp->unp_socket; 599 600 so->so_error = errno; 601 unp_disconnect(unp); 602 if (so->so_head) { 603 so->so_pcb = 0; 604 sofree(so); 605 m_freem(unp->unp_addr); 606 free(unp, M_PCB); 607 } 608 } 609 610 #ifdef notdef 611 unp_drain() 612 { 613 614 } 615 #endif 616 617 int 618 unp_externalize(rights) 619 struct mbuf *rights; 620 { 621 struct proc *p = curproc; /* XXX */ 622 struct cmsghdr *cm = mtod(rights, struct cmsghdr *); 623 int i, *fdp; 624 struct file **rp; 625 struct file *fp; 626 int nfds, error = 0; 627 628 nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) / 629 sizeof(struct file *); 630 rp = (struct file **)CMSG_DATA(cm); 631 632 fdp = malloc(nfds * sizeof(int), M_TEMP, M_WAITOK); 633 634 #ifdef notyet 635 /* Make sure the recipient should be able to see the descriptors.. */ 636 if (p->p_cwdi->cwdi_rdir != NULL) { 637 rp = (struct file **)CMSG_DATA(cm); 638 for (i = 0; i < nfds; i++) { 639 fp = *rp++; 640 /* 641 * If we are in a chroot'ed directory, and 642 * someone wants to pass us a directory, make 643 * sure it's inside the subtree we're allowed 644 * to access. 645 */ 646 if (fp->f_type == DTYPE_VNODE) { 647 struct vnode *vp = (struct vnode *)fp->f_data; 648 if ((vp->v_type == VDIR) && 649 !vn_isunder(vp, p->p_cwdi->cwdi_rdir, p)) { 650 error = EPERM; 651 break; 652 } 653 } 654 } 655 } 656 #endif 657 658 restart: 659 if (error != 0) { 660 rp = ((struct file **)CMSG_DATA(cm)); 661 for (i = 0; i < nfds; i++) { 662 fp = *rp; 663 /* 664 * zero the pointer before calling unp_discard, 665 * since it may end up in unp_gc().. 666 */ 667 *rp++ = 0; 668 unp_discard(fp); 669 } 670 goto out; 671 } 672 673 /* 674 * First loop -- allocate file descriptor table slots for the 675 * new descriptors. 676 */ 677 rp = ((struct file **)CMSG_DATA(cm)); 678 for (i = 0; i < nfds; i++) { 679 bcopy(rp, &fp, sizeof(fp)); 680 rp++; 681 if ((error = fdalloc(p, 0, &fdp[i])) != 0) { 682 /* 683 * Back out what we've done so far. 684 */ 685 for (--i; i >= 0; i--) 686 fdremove(p->p_fd, fdp[i]); 687 688 if (error == ENOSPC) { 689 fdexpand(p); 690 error = 0; 691 } else { 692 /* 693 * This is the error that has historically 694 * been returned, and some callers may 695 * expect it. 696 */ 697 error = EMSGSIZE; 698 } 699 goto restart; 700 } 701 702 /* 703 * Make the slot reference the descriptor so that 704 * fdalloc() works properly.. We finalize it all 705 * in the loop below. 706 */ 707 p->p_fd->fd_ofiles[fdp[i]] = fp; 708 } 709 710 /* 711 * Now that adding them has succeeded, update all of the 712 * descriptor passing state. 713 */ 714 rp = (struct file **)CMSG_DATA(cm); 715 for (i = 0; i < nfds; i++) { 716 fp = *rp++; 717 fp->f_msgcount--; 718 unp_rights--; 719 } 720 721 /* 722 * Copy temporary array to message and adjust length, in case of 723 * transition from large struct file pointers to ints. 724 */ 725 memcpy(CMSG_DATA(cm), fdp, nfds * sizeof(int)); 726 cm->cmsg_len = CMSG_LEN(nfds * sizeof(int)); 727 rights->m_len = CMSG_SPACE(nfds * sizeof(int)); 728 out: 729 free(fdp, M_TEMP); 730 return (error); 731 } 732 733 int 734 unp_internalize(control, p) 735 struct mbuf *control; 736 struct proc *p; 737 { 738 struct filedesc *fdp = p->p_fd; 739 struct cmsghdr *cm = mtod(control, struct cmsghdr *); 740 struct file **rp, *fp; 741 int i, error; 742 int nfds, *ip, fd, neededspace; 743 744 if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET || 745 cm->cmsg_len != control->m_len) 746 return (EINVAL); 747 nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) / sizeof (int); 748 749 /* Make sure we have room for the struct file pointers */ 750 morespace: 751 neededspace = CMSG_SPACE(nfds * sizeof(struct file *)) - 752 control->m_len; 753 if (neededspace > M_TRAILINGSPACE(control)) { 754 /* if we already have a cluster, the message is just too big */ 755 if (control->m_flags & M_EXT) 756 return (E2BIG); 757 758 /* allocate a cluster and try again */ 759 MCLGET(control, M_WAIT); 760 if ((control->m_flags & M_EXT) == 0) 761 return (ENOBUFS); /* allocation failed */ 762 763 /* copy the data to the cluster */ 764 memcpy(mtod(control, char *), cm, cm->cmsg_len); 765 cm = mtod(control, struct cmsghdr *); 766 goto morespace; 767 } 768 769 /* adjust message & mbuf to note amount of space actually used. */ 770 cm->cmsg_len = CMSG_LEN(nfds * sizeof(struct file *)); 771 control->m_len = CMSG_SPACE(nfds * sizeof(struct file *)); 772 773 ip = ((int *)CMSG_DATA(cm)) + nfds - 1; 774 rp = ((struct file **)CMSG_DATA(cm)) + nfds - 1; 775 for (i = 0; i < nfds; i++) { 776 bcopy(ip, &fd, sizeof fd); 777 ip--; 778 if ((fp = fd_getfile(fdp, fd)) == NULL) { 779 error = EBADF; 780 goto fail; 781 } 782 if (fp->f_count == LONG_MAX-2 || 783 fp->f_msgcount == LONG_MAX-2) { 784 error = EDEADLK; 785 goto fail; 786 } 787 bcopy(&fp, rp, sizeof fp); 788 rp--; 789 fp->f_count++; 790 fp->f_msgcount++; 791 unp_rights++; 792 } 793 return (0); 794 fail: 795 /* Back out what we just did. */ 796 for ( ; i > 0; i--) { 797 bcopy(rp, &fp, sizeof(fp)); 798 rp++; 799 fp->f_count--; 800 fp->f_msgcount--; 801 unp_rights--; 802 } 803 804 return (error); 805 } 806 807 int unp_defer, unp_gcing; 808 extern struct domain unixdomain; 809 810 void 811 unp_gc() 812 { 813 register struct file *fp, *nextfp; 814 register struct socket *so; 815 struct file **extra_ref, **fpp; 816 int nunref, i; 817 818 if (unp_gcing) 819 return; 820 unp_gcing = 1; 821 unp_defer = 0; 822 LIST_FOREACH(fp, &filehead, f_list) 823 fp->f_flag &= ~(FMARK|FDEFER); 824 do { 825 LIST_FOREACH(fp, &filehead, f_list) { 826 if (fp->f_flag & FDEFER) { 827 fp->f_flag &= ~FDEFER; 828 unp_defer--; 829 } else { 830 if (fp->f_count == 0) 831 continue; 832 if (fp->f_flag & FMARK) 833 continue; 834 if (fp->f_count == fp->f_msgcount) 835 continue; 836 } 837 fp->f_flag |= FMARK; 838 839 if (fp->f_type != DTYPE_SOCKET || 840 (so = (struct socket *)fp->f_data) == 0) 841 continue; 842 if (so->so_proto->pr_domain != &unixdomain || 843 (so->so_proto->pr_flags&PR_RIGHTS) == 0) 844 continue; 845 #ifdef notdef 846 if (so->so_rcv.sb_flags & SB_LOCK) { 847 /* 848 * This is problematical; it's not clear 849 * we need to wait for the sockbuf to be 850 * unlocked (on a uniprocessor, at least), 851 * and it's also not clear what to do 852 * if sbwait returns an error due to receipt 853 * of a signal. If sbwait does return 854 * an error, we'll go into an infinite 855 * loop. Delete all of this for now. 856 */ 857 (void) sbwait(&so->so_rcv); 858 goto restart; 859 } 860 #endif 861 unp_scan(so->so_rcv.sb_mb, unp_mark, 0); 862 } 863 } while (unp_defer); 864 /* 865 * We grab an extra reference to each of the file table entries 866 * that are not otherwise accessible and then free the rights 867 * that are stored in messages on them. 868 * 869 * The bug in the original code is a little tricky, so I'll describe 870 * what's wrong with it here. 871 * 872 * It is incorrect to simply unp_discard each entry for f_msgcount 873 * times -- consider the case of sockets A and B that contain 874 * references to each other. On a last close of some other socket, 875 * we trigger a gc since the number of outstanding rights (unp_rights) 876 * is non-zero. If during the sweep phase the gc code un_discards, 877 * we end up doing a (full) closef on the descriptor. A closef on A 878 * results in the following chain. Closef calls soo_close, which 879 * calls soclose. Soclose calls first (through the switch 880 * uipc_usrreq) unp_detach, which re-invokes unp_gc. Unp_gc simply 881 * returns because the previous instance had set unp_gcing, and 882 * we return all the way back to soclose, which marks the socket 883 * with SS_NOFDREF, and then calls sofree. Sofree calls sorflush 884 * to free up the rights that are queued in messages on the socket A, 885 * i.e., the reference on B. The sorflush calls via the dom_dispose 886 * switch unp_dispose, which unp_scans with unp_discard. This second 887 * instance of unp_discard just calls closef on B. 888 * 889 * Well, a similar chain occurs on B, resulting in a sorflush on B, 890 * which results in another closef on A. Unfortunately, A is already 891 * being closed, and the descriptor has already been marked with 892 * SS_NOFDREF, and soclose panics at this point. 893 * 894 * Here, we first take an extra reference to each inaccessible 895 * descriptor. Then, we call sorflush ourself, since we know 896 * it is a Unix domain socket anyhow. After we destroy all the 897 * rights carried in messages, we do a last closef to get rid 898 * of our extra reference. This is the last close, and the 899 * unp_detach etc will shut down the socket. 900 * 901 * 91/09/19, bsy@cs.cmu.edu 902 */ 903 extra_ref = malloc(nfiles * sizeof(struct file *), M_FILE, M_WAITOK); 904 for (nunref = 0, fp = LIST_FIRST(&filehead), fpp = extra_ref; fp != 0; 905 fp = nextfp) { 906 nextfp = LIST_NEXT(fp, f_list); 907 if (fp->f_count == 0) 908 continue; 909 if (fp->f_count == fp->f_msgcount && !(fp->f_flag & FMARK)) { 910 *fpp++ = fp; 911 nunref++; 912 FREF(fp); 913 fp->f_count++; 914 } 915 } 916 for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) 917 if ((*fpp)->f_type == DTYPE_SOCKET && (*fpp)->f_data != NULL) 918 sorflush((struct socket *)(*fpp)->f_data); 919 for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) 920 (void) closef(*fpp, NULL); 921 free((caddr_t)extra_ref, M_FILE); 922 unp_gcing = 0; 923 } 924 925 void 926 unp_dispose(m) 927 struct mbuf *m; 928 { 929 930 if (m) 931 unp_scan(m, unp_discard, 1); 932 } 933 934 void 935 unp_scan(m0, op, discard) 936 struct mbuf *m0; 937 void (*op)(struct file *); 938 int discard; 939 { 940 struct mbuf *m; 941 struct file **rp, *fp; 942 struct cmsghdr *cm; 943 int i; 944 int qfds; 945 946 while (m0) { 947 for (m = m0; m; m = m->m_next) { 948 if (m->m_type == MT_CONTROL && 949 m->m_len >= sizeof(*cm)) { 950 cm = mtod(m, struct cmsghdr *); 951 if (cm->cmsg_level != SOL_SOCKET || 952 cm->cmsg_type != SCM_RIGHTS) 953 continue; 954 qfds = (cm->cmsg_len - CMSG_ALIGN(sizeof *cm)) 955 / sizeof(struct file *); 956 rp = (struct file **)CMSG_DATA(cm); 957 for (i = 0; i < qfds; i++) { 958 fp = *rp; 959 if (discard) 960 *rp = 0; 961 (*op)(fp); 962 rp++; 963 } 964 break; /* XXX, but saves time */ 965 } 966 } 967 m0 = m0->m_nextpkt; 968 } 969 } 970 971 void 972 unp_mark(fp) 973 struct file *fp; 974 { 975 976 if (fp->f_flag & FMARK) 977 return; 978 979 if (fp->f_flag & FDEFER) 980 return; 981 982 if (fp->f_type == DTYPE_SOCKET) { 983 unp_defer++; 984 fp->f_flag |= FDEFER; 985 } else { 986 fp->f_flag |= FMARK; 987 } 988 } 989 990 void 991 unp_discard(fp) 992 struct file *fp; 993 { 994 995 if (fp == NULL) 996 return; 997 FREF(fp); 998 fp->f_msgcount--; 999 unp_rights--; 1000 (void) closef(fp, NULL); 1001 } 1002