1 /* 2 * Copyright (c) 1982, 1986, 1989, 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the University of 16 * California, Berkeley and its contributors. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * From: @(#)uipc_usrreq.c 8.3 (Berkeley) 1/4/94 34 * $FreeBSD: src/sys/kern/uipc_usrreq.c,v 1.54.2.10 2003/03/04 17:28:09 nectar Exp $ 35 * $DragonFly: src/sys/kern/uipc_usrreq.c,v 1.44 2008/09/06 05:44:58 dillon Exp $ 36 */ 37 38 #include <sys/param.h> 39 #include <sys/systm.h> 40 #include <sys/kernel.h> 41 #include <sys/domain.h> 42 #include <sys/fcntl.h> 43 #include <sys/malloc.h> /* XXX must be before <sys/file.h> */ 44 #include <sys/proc.h> 45 #include <sys/file.h> 46 #include <sys/filedesc.h> 47 #include <sys/mbuf.h> 48 #include <sys/nlookup.h> 49 #include <sys/protosw.h> 50 #include <sys/socket.h> 51 #include <sys/socketvar.h> 52 #include <sys/resourcevar.h> 53 #include <sys/stat.h> 54 #include <sys/mount.h> 55 #include <sys/sysctl.h> 56 #include <sys/un.h> 57 #include <sys/unpcb.h> 58 #include <sys/vnode.h> 59 60 #include <sys/file2.h> 61 #include <sys/spinlock2.h> 62 #include <sys/socketvar2.h> 63 #include <sys/msgport2.h> 64 65 typedef struct unp_defdiscard { 66 struct unp_defdiscard *next; 67 struct file *fp; 68 } *unp_defdiscard_t; 69 70 static MALLOC_DEFINE(M_UNPCB, "unpcb", "unpcb struct"); 71 static unp_gen_t unp_gencnt; 72 static u_int unp_count; 73 74 static struct unp_head unp_shead, unp_dhead; 75 76 static struct lwkt_token unp_token = LWKT_TOKEN_MP_INITIALIZER(unp_token); 77 static int unp_defdiscard_nest; 78 static unp_defdiscard_t unp_defdiscard_base; 79 80 /* 81 * Unix communications domain. 82 * 83 * TODO: 84 * RDM 85 * rethink name space problems 86 * need a proper out-of-band 87 * lock pushdown 88 */ 89 static struct sockaddr sun_noname = { sizeof(sun_noname), AF_LOCAL }; 90 static ino_t unp_ino = 1; /* prototype for fake inode numbers */ 91 static struct spinlock unp_ino_spin = SPINLOCK_INITIALIZER(&unp_ino_spin); 92 93 static int unp_attach (struct socket *, struct pru_attach_info *); 94 static void unp_detach (struct unpcb *); 95 static int unp_bind (struct unpcb *,struct sockaddr *, struct thread *); 96 static int unp_connect (struct socket *,struct sockaddr *, 97 struct thread *); 98 static void unp_disconnect (struct unpcb *); 99 static void unp_shutdown (struct unpcb *); 100 static void unp_drop (struct unpcb *, int); 101 static void unp_gc (void); 102 static int unp_gc_clearmarks(struct file *, void *); 103 static int unp_gc_checkmarks(struct file *, void *); 104 static int unp_gc_checkrefs(struct file *, void *); 105 static int unp_revoke_gc_check(struct file *, void *); 106 static void unp_scan (struct mbuf *, void (*)(struct file *, void *), 107 void *data); 108 static void unp_mark (struct file *, void *data); 109 static void unp_discard (struct file *, void *); 110 static int unp_internalize (struct mbuf *, struct thread *); 111 static int unp_listen (struct unpcb *, struct thread *); 112 static void unp_fp_externalize(struct lwp *lp, struct file *fp, int fd); 113 114 /* 115 * NOTE: (so) is referenced from soabort*() and netmsg_pru_abort() 116 * will sofree() it when we return. 117 */ 118 static void 119 uipc_abort(netmsg_t msg) 120 { 121 struct unpcb *unp; 122 int error; 123 124 lwkt_gettoken(&unp_token); 125 unp = msg->base.nm_so->so_pcb; 126 if (unp) { 127 unp_drop(unp, ECONNABORTED); 128 unp_detach(unp); 129 error = 0; 130 } else { 131 error = EINVAL; 132 } 133 lwkt_reltoken(&unp_token); 134 135 lwkt_replymsg(&msg->lmsg, error); 136 } 137 138 static void 139 uipc_accept(netmsg_t msg) 140 { 141 struct unpcb *unp; 142 int error; 143 144 lwkt_gettoken(&unp_token); 145 unp = msg->base.nm_so->so_pcb; 146 if (unp == NULL) { 147 error = EINVAL; 148 } else { 149 /* 150 * Pass back name of connected socket, 151 * if it was bound and we are still connected 152 * (our peer may have closed already!). 153 */ 154 if (unp->unp_conn && unp->unp_conn->unp_addr) { 155 *msg->accept.nm_nam = dup_sockaddr( 156 (struct sockaddr *)unp->unp_conn->unp_addr); 157 } else { 158 *msg->accept.nm_nam = dup_sockaddr( 159 (struct sockaddr *)&sun_noname); 160 } 161 error = 0; 162 } 163 lwkt_reltoken(&unp_token); 164 lwkt_replymsg(&msg->lmsg, error); 165 } 166 167 static void 168 uipc_attach(netmsg_t msg) 169 { 170 struct unpcb *unp; 171 int error; 172 173 lwkt_gettoken(&unp_token); 174 unp = msg->base.nm_so->so_pcb; 175 if (unp) 176 error = EISCONN; 177 else 178 error = unp_attach(msg->base.nm_so, msg->attach.nm_ai); 179 lwkt_reltoken(&unp_token); 180 lwkt_replymsg(&msg->lmsg, error); 181 } 182 183 static void 184 uipc_bind(netmsg_t msg) 185 { 186 struct unpcb *unp; 187 int error; 188 189 lwkt_gettoken(&unp_token); 190 unp = msg->base.nm_so->so_pcb; 191 if (unp) 192 error = unp_bind(unp, msg->bind.nm_nam, msg->bind.nm_td); 193 else 194 error = EINVAL; 195 lwkt_reltoken(&unp_token); 196 lwkt_replymsg(&msg->lmsg, error); 197 } 198 199 static void 200 uipc_connect(netmsg_t msg) 201 { 202 struct unpcb *unp; 203 int error; 204 205 lwkt_gettoken(&unp_token); 206 unp = msg->base.nm_so->so_pcb; 207 if (unp) { 208 error = unp_connect(msg->base.nm_so, 209 msg->connect.nm_nam, 210 msg->connect.nm_td); 211 } else { 212 error = EINVAL; 213 } 214 lwkt_reltoken(&unp_token); 215 lwkt_replymsg(&msg->lmsg, error); 216 } 217 218 static void 219 uipc_connect2(netmsg_t msg) 220 { 221 struct unpcb *unp; 222 int error; 223 224 lwkt_gettoken(&unp_token); 225 unp = msg->connect2.nm_so1->so_pcb; 226 if (unp) { 227 error = unp_connect2(msg->connect2.nm_so1, 228 msg->connect2.nm_so2); 229 } else { 230 error = EINVAL; 231 } 232 lwkt_reltoken(&unp_token); 233 lwkt_replymsg(&msg->lmsg, error); 234 } 235 236 /* control is EOPNOTSUPP */ 237 238 static void 239 uipc_detach(netmsg_t msg) 240 { 241 struct unpcb *unp; 242 int error; 243 244 lwkt_gettoken(&unp_token); 245 unp = msg->base.nm_so->so_pcb; 246 if (unp) { 247 unp_detach(unp); 248 error = 0; 249 } else { 250 error = EINVAL; 251 } 252 lwkt_reltoken(&unp_token); 253 lwkt_replymsg(&msg->lmsg, error); 254 } 255 256 static void 257 uipc_disconnect(netmsg_t msg) 258 { 259 struct unpcb *unp; 260 int error; 261 262 lwkt_gettoken(&unp_token); 263 unp = msg->base.nm_so->so_pcb; 264 if (unp) { 265 unp_disconnect(unp); 266 error = 0; 267 } else { 268 error = EINVAL; 269 } 270 lwkt_reltoken(&unp_token); 271 lwkt_replymsg(&msg->lmsg, error); 272 } 273 274 static void 275 uipc_listen(netmsg_t msg) 276 { 277 struct unpcb *unp; 278 int error; 279 280 lwkt_gettoken(&unp_token); 281 unp = msg->base.nm_so->so_pcb; 282 if (unp == NULL || unp->unp_vnode == NULL) 283 error = EINVAL; 284 else 285 error = unp_listen(unp, msg->listen.nm_td); 286 lwkt_reltoken(&unp_token); 287 lwkt_replymsg(&msg->lmsg, error); 288 } 289 290 static void 291 uipc_peeraddr(netmsg_t msg) 292 { 293 struct unpcb *unp; 294 int error; 295 296 lwkt_gettoken(&unp_token); 297 unp = msg->base.nm_so->so_pcb; 298 if (unp == NULL) { 299 error = EINVAL; 300 } else if (unp->unp_conn && unp->unp_conn->unp_addr) { 301 *msg->peeraddr.nm_nam = dup_sockaddr( 302 (struct sockaddr *)unp->unp_conn->unp_addr); 303 error = 0; 304 } else { 305 /* 306 * XXX: It seems that this test always fails even when 307 * connection is established. So, this else clause is 308 * added as workaround to return PF_LOCAL sockaddr. 309 */ 310 *msg->peeraddr.nm_nam = dup_sockaddr( 311 (struct sockaddr *)&sun_noname); 312 error = 0; 313 } 314 lwkt_reltoken(&unp_token); 315 lwkt_replymsg(&msg->lmsg, error); 316 } 317 318 static void 319 uipc_rcvd(netmsg_t msg) 320 { 321 struct unpcb *unp; 322 struct socket *so; 323 struct socket *so2; 324 int error; 325 326 lwkt_gettoken(&unp_token); 327 so = msg->base.nm_so; 328 unp = so->so_pcb; 329 if (unp == NULL) { 330 error = EINVAL; 331 goto done; 332 } 333 334 switch (so->so_type) { 335 case SOCK_DGRAM: 336 panic("uipc_rcvd DGRAM?"); 337 /*NOTREACHED*/ 338 case SOCK_STREAM: 339 case SOCK_SEQPACKET: 340 if (unp->unp_conn == NULL) 341 break; 342 /* 343 * Because we are transfering mbufs directly to the 344 * peer socket we have to use SSB_STOP on the sender 345 * to prevent it from building up infinite mbufs. 346 */ 347 so2 = unp->unp_conn->unp_socket; 348 if (so->so_rcv.ssb_cc < so2->so_snd.ssb_hiwat && 349 so->so_rcv.ssb_mbcnt < so2->so_snd.ssb_mbmax 350 ) { 351 atomic_clear_int(&so2->so_snd.ssb_flags, SSB_STOP); 352 sowwakeup(so2); 353 } 354 break; 355 default: 356 panic("uipc_rcvd unknown socktype"); 357 /*NOTREACHED*/ 358 } 359 error = 0; 360 done: 361 lwkt_reltoken(&unp_token); 362 lwkt_replymsg(&msg->lmsg, error); 363 } 364 365 /* pru_rcvoob is EOPNOTSUPP */ 366 367 static void 368 uipc_send(netmsg_t msg) 369 { 370 struct unpcb *unp; 371 struct socket *so; 372 struct socket *so2; 373 struct mbuf *control; 374 struct mbuf *m; 375 int error = 0; 376 377 lwkt_gettoken(&unp_token); 378 so = msg->base.nm_so; 379 control = msg->send.nm_control; 380 m = msg->send.nm_m; 381 unp = so->so_pcb; 382 383 if (unp == NULL) { 384 error = EINVAL; 385 goto release; 386 } 387 if (msg->send.nm_flags & PRUS_OOB) { 388 error = EOPNOTSUPP; 389 goto release; 390 } 391 392 if (control && (error = unp_internalize(control, msg->send.nm_td))) 393 goto release; 394 395 switch (so->so_type) { 396 case SOCK_DGRAM: 397 { 398 struct sockaddr *from; 399 400 if (msg->send.nm_addr) { 401 if (unp->unp_conn) { 402 error = EISCONN; 403 break; 404 } 405 error = unp_connect(so, 406 msg->send.nm_addr, 407 msg->send.nm_td); 408 if (error) 409 break; 410 } else { 411 if (unp->unp_conn == NULL) { 412 error = ENOTCONN; 413 break; 414 } 415 } 416 so2 = unp->unp_conn->unp_socket; 417 if (unp->unp_addr) 418 from = (struct sockaddr *)unp->unp_addr; 419 else 420 from = &sun_noname; 421 422 lwkt_gettoken(&so2->so_rcv.ssb_token); 423 if (ssb_appendaddr(&so2->so_rcv, from, m, control)) { 424 sorwakeup(so2); 425 m = NULL; 426 control = NULL; 427 } else { 428 error = ENOBUFS; 429 } 430 if (msg->send.nm_addr) 431 unp_disconnect(unp); 432 lwkt_reltoken(&so2->so_rcv.ssb_token); 433 break; 434 } 435 436 case SOCK_STREAM: 437 case SOCK_SEQPACKET: 438 /* Connect if not connected yet. */ 439 /* 440 * Note: A better implementation would complain 441 * if not equal to the peer's address. 442 */ 443 if (!(so->so_state & SS_ISCONNECTED)) { 444 if (msg->send.nm_addr) { 445 error = unp_connect(so, 446 msg->send.nm_addr, 447 msg->send.nm_td); 448 if (error) 449 break; /* XXX */ 450 } else { 451 error = ENOTCONN; 452 break; 453 } 454 } 455 456 if (so->so_state & SS_CANTSENDMORE) { 457 error = EPIPE; 458 break; 459 } 460 if (unp->unp_conn == NULL) 461 panic("uipc_send connected but no connection?"); 462 so2 = unp->unp_conn->unp_socket; 463 /* 464 * Send to paired receive port, and then reduce 465 * send buffer hiwater marks to maintain backpressure. 466 * Wake up readers. 467 */ 468 lwkt_gettoken(&so2->so_rcv.ssb_token); 469 if (control) { 470 if (ssb_appendcontrol(&so2->so_rcv, m, control)) { 471 control = NULL; 472 m = NULL; 473 } 474 } else if (so->so_type == SOCK_SEQPACKET) { 475 sbappendrecord(&so2->so_rcv.sb, m); 476 m = NULL; 477 } else { 478 sbappend(&so2->so_rcv.sb, m); 479 m = NULL; 480 } 481 482 /* 483 * Because we are transfering mbufs directly to the 484 * peer socket we have to use SSB_STOP on the sender 485 * to prevent it from building up infinite mbufs. 486 */ 487 if (so2->so_rcv.ssb_cc >= so->so_snd.ssb_hiwat || 488 so2->so_rcv.ssb_mbcnt >= so->so_snd.ssb_mbmax 489 ) { 490 atomic_set_int(&so->so_snd.ssb_flags, SSB_STOP); 491 } 492 lwkt_reltoken(&so2->so_rcv.ssb_token); 493 sorwakeup(so2); 494 break; 495 496 default: 497 panic("uipc_send unknown socktype"); 498 } 499 500 /* 501 * SEND_EOF is equivalent to a SEND followed by a SHUTDOWN. 502 */ 503 if (msg->send.nm_flags & PRUS_EOF) { 504 socantsendmore(so); 505 unp_shutdown(unp); 506 } 507 508 if (control && error != 0) 509 unp_dispose(control); 510 511 release: 512 lwkt_reltoken(&unp_token); 513 514 if (control) 515 m_freem(control); 516 if (m) 517 m_freem(m); 518 lwkt_replymsg(&msg->lmsg, error); 519 } 520 521 /* 522 * MPSAFE 523 */ 524 static void 525 uipc_sense(netmsg_t msg) 526 { 527 struct unpcb *unp; 528 struct socket *so; 529 struct stat *sb; 530 int error; 531 532 lwkt_gettoken(&unp_token); 533 so = msg->base.nm_so; 534 sb = msg->sense.nm_stat; 535 unp = so->so_pcb; 536 if (unp == NULL) { 537 error = EINVAL; 538 goto done; 539 } 540 sb->st_blksize = so->so_snd.ssb_hiwat; 541 sb->st_dev = NOUDEV; 542 if (unp->unp_ino == 0) { /* make up a non-zero inode number */ 543 spin_lock(&unp_ino_spin); 544 unp->unp_ino = unp_ino++; 545 spin_unlock(&unp_ino_spin); 546 } 547 sb->st_ino = unp->unp_ino; 548 error = 0; 549 done: 550 lwkt_reltoken(&unp_token); 551 lwkt_replymsg(&msg->lmsg, error); 552 } 553 554 static void 555 uipc_shutdown(netmsg_t msg) 556 { 557 struct socket *so; 558 struct unpcb *unp; 559 int error; 560 561 lwkt_gettoken(&unp_token); 562 so = msg->base.nm_so; 563 unp = so->so_pcb; 564 if (unp) { 565 socantsendmore(so); 566 unp_shutdown(unp); 567 error = 0; 568 } else { 569 error = EINVAL; 570 } 571 lwkt_reltoken(&unp_token); 572 lwkt_replymsg(&msg->lmsg, error); 573 } 574 575 static void 576 uipc_sockaddr(netmsg_t msg) 577 { 578 struct unpcb *unp; 579 int error; 580 581 lwkt_gettoken(&unp_token); 582 unp = msg->base.nm_so->so_pcb; 583 if (unp) { 584 if (unp->unp_addr) { 585 *msg->sockaddr.nm_nam = 586 dup_sockaddr((struct sockaddr *)unp->unp_addr); 587 } 588 error = 0; 589 } else { 590 error = EINVAL; 591 } 592 lwkt_reltoken(&unp_token); 593 lwkt_replymsg(&msg->lmsg, error); 594 } 595 596 struct pr_usrreqs uipc_usrreqs = { 597 .pru_abort = uipc_abort, 598 .pru_accept = uipc_accept, 599 .pru_attach = uipc_attach, 600 .pru_bind = uipc_bind, 601 .pru_connect = uipc_connect, 602 .pru_connect2 = uipc_connect2, 603 .pru_control = pr_generic_notsupp, 604 .pru_detach = uipc_detach, 605 .pru_disconnect = uipc_disconnect, 606 .pru_listen = uipc_listen, 607 .pru_peeraddr = uipc_peeraddr, 608 .pru_rcvd = uipc_rcvd, 609 .pru_rcvoob = pr_generic_notsupp, 610 .pru_send = uipc_send, 611 .pru_sense = uipc_sense, 612 .pru_shutdown = uipc_shutdown, 613 .pru_sockaddr = uipc_sockaddr, 614 .pru_sosend = sosend, 615 .pru_soreceive = soreceive 616 }; 617 618 void 619 uipc_ctloutput(netmsg_t msg) 620 { 621 struct socket *so; 622 struct sockopt *sopt; 623 struct unpcb *unp; 624 int error = 0; 625 626 lwkt_gettoken(&unp_token); 627 so = msg->base.nm_so; 628 sopt = msg->ctloutput.nm_sopt; 629 unp = so->so_pcb; 630 631 switch (sopt->sopt_dir) { 632 case SOPT_GET: 633 switch (sopt->sopt_name) { 634 case LOCAL_PEERCRED: 635 if (unp->unp_flags & UNP_HAVEPC) 636 soopt_from_kbuf(sopt, &unp->unp_peercred, 637 sizeof(unp->unp_peercred)); 638 else { 639 if (so->so_type == SOCK_STREAM) 640 error = ENOTCONN; 641 else if (so->so_type == SOCK_SEQPACKET) 642 error = ENOTCONN; 643 else 644 error = EINVAL; 645 } 646 break; 647 default: 648 error = EOPNOTSUPP; 649 break; 650 } 651 break; 652 case SOPT_SET: 653 default: 654 error = EOPNOTSUPP; 655 break; 656 } 657 lwkt_reltoken(&unp_token); 658 lwkt_replymsg(&msg->lmsg, error); 659 } 660 661 /* 662 * Both send and receive buffers are allocated PIPSIZ bytes of buffering 663 * for stream sockets, although the total for sender and receiver is 664 * actually only PIPSIZ. 665 * 666 * Datagram sockets really use the sendspace as the maximum datagram size, 667 * and don't really want to reserve the sendspace. Their recvspace should 668 * be large enough for at least one max-size datagram plus address. 669 * 670 * We want the local send/recv space to be significant larger then lo0's 671 * mtu of 16384. 672 */ 673 #ifndef PIPSIZ 674 #define PIPSIZ 57344 675 #endif 676 static u_long unpst_sendspace = PIPSIZ; 677 static u_long unpst_recvspace = PIPSIZ; 678 static u_long unpdg_sendspace = 2*1024; /* really max datagram size */ 679 static u_long unpdg_recvspace = 4*1024; 680 681 static int unp_rights; /* file descriptors in flight */ 682 static struct spinlock unp_spin = SPINLOCK_INITIALIZER(&unp_spin); 683 684 SYSCTL_DECL(_net_local_seqpacket); 685 SYSCTL_DECL(_net_local_stream); 686 SYSCTL_INT(_net_local_stream, OID_AUTO, sendspace, CTLFLAG_RW, 687 &unpst_sendspace, 0, ""); 688 SYSCTL_INT(_net_local_stream, OID_AUTO, recvspace, CTLFLAG_RW, 689 &unpst_recvspace, 0, ""); 690 691 SYSCTL_DECL(_net_local_dgram); 692 SYSCTL_INT(_net_local_dgram, OID_AUTO, maxdgram, CTLFLAG_RW, 693 &unpdg_sendspace, 0, ""); 694 SYSCTL_INT(_net_local_dgram, OID_AUTO, recvspace, CTLFLAG_RW, 695 &unpdg_recvspace, 0, ""); 696 697 SYSCTL_DECL(_net_local); 698 SYSCTL_INT(_net_local, OID_AUTO, inflight, CTLFLAG_RD, &unp_rights, 0, ""); 699 700 static int 701 unp_attach(struct socket *so, struct pru_attach_info *ai) 702 { 703 struct unpcb *unp; 704 int error; 705 706 lwkt_gettoken(&unp_token); 707 708 if (so->so_snd.ssb_hiwat == 0 || so->so_rcv.ssb_hiwat == 0) { 709 switch (so->so_type) { 710 711 case SOCK_STREAM: 712 case SOCK_SEQPACKET: 713 error = soreserve(so, unpst_sendspace, unpst_recvspace, 714 ai->sb_rlimit); 715 break; 716 717 case SOCK_DGRAM: 718 error = soreserve(so, unpdg_sendspace, unpdg_recvspace, 719 ai->sb_rlimit); 720 break; 721 722 default: 723 panic("unp_attach"); 724 } 725 if (error) 726 goto failed; 727 } 728 unp = kmalloc(sizeof(*unp), M_UNPCB, M_NOWAIT|M_ZERO); 729 if (unp == NULL) { 730 error = ENOBUFS; 731 goto failed; 732 } 733 unp->unp_gencnt = ++unp_gencnt; 734 unp_count++; 735 LIST_INIT(&unp->unp_refs); 736 unp->unp_socket = so; 737 unp->unp_rvnode = ai->fd_rdir; /* jail cruft XXX JH */ 738 LIST_INSERT_HEAD(so->so_type == SOCK_DGRAM ? &unp_dhead 739 : &unp_shead, unp, unp_link); 740 so->so_pcb = (caddr_t)unp; 741 soreference(so); 742 error = 0; 743 failed: 744 lwkt_reltoken(&unp_token); 745 return error; 746 } 747 748 static void 749 unp_detach(struct unpcb *unp) 750 { 751 struct socket *so; 752 753 lwkt_gettoken(&unp_token); 754 755 LIST_REMOVE(unp, unp_link); 756 unp->unp_gencnt = ++unp_gencnt; 757 --unp_count; 758 if (unp->unp_vnode) { 759 unp->unp_vnode->v_socket = NULL; 760 vrele(unp->unp_vnode); 761 unp->unp_vnode = NULL; 762 } 763 if (unp->unp_conn) 764 unp_disconnect(unp); 765 while (!LIST_EMPTY(&unp->unp_refs)) 766 unp_drop(LIST_FIRST(&unp->unp_refs), ECONNRESET); 767 soisdisconnected(unp->unp_socket); 768 so = unp->unp_socket; 769 soreference(so); /* for delayed sorflush */ 770 so->so_pcb = NULL; 771 unp->unp_socket = NULL; 772 sofree(so); /* remove pcb ref */ 773 774 if (unp_rights) { 775 /* 776 * Normally the receive buffer is flushed later, 777 * in sofree, but if our receive buffer holds references 778 * to descriptors that are now garbage, we will dispose 779 * of those descriptor references after the garbage collector 780 * gets them (resulting in a "panic: closef: count < 0"). 781 */ 782 sorflush(so); 783 unp_gc(); 784 } 785 sofree(so); 786 lwkt_reltoken(&unp_token); 787 788 if (unp->unp_addr) 789 kfree(unp->unp_addr, M_SONAME); 790 kfree(unp, M_UNPCB); 791 } 792 793 static int 794 unp_bind(struct unpcb *unp, struct sockaddr *nam, struct thread *td) 795 { 796 struct proc *p = td->td_proc; 797 struct sockaddr_un *soun = (struct sockaddr_un *)nam; 798 struct vnode *vp; 799 struct vattr vattr; 800 int error, namelen; 801 struct nlookupdata nd; 802 char buf[SOCK_MAXADDRLEN]; 803 804 lwkt_gettoken(&unp_token); 805 if (unp->unp_vnode != NULL) { 806 error = EINVAL; 807 goto failed; 808 } 809 namelen = soun->sun_len - offsetof(struct sockaddr_un, sun_path); 810 if (namelen <= 0) { 811 error = EINVAL; 812 goto failed; 813 } 814 strncpy(buf, soun->sun_path, namelen); 815 buf[namelen] = 0; /* null-terminate the string */ 816 error = nlookup_init(&nd, buf, UIO_SYSSPACE, 817 NLC_LOCKVP | NLC_CREATE | NLC_REFDVP); 818 if (error == 0) 819 error = nlookup(&nd); 820 if (error == 0 && nd.nl_nch.ncp->nc_vp != NULL) 821 error = EADDRINUSE; 822 if (error) 823 goto done; 824 825 VATTR_NULL(&vattr); 826 vattr.va_type = VSOCK; 827 vattr.va_mode = (ACCESSPERMS & ~p->p_fd->fd_cmask); 828 error = VOP_NCREATE(&nd.nl_nch, nd.nl_dvp, &vp, nd.nl_cred, &vattr); 829 if (error == 0) { 830 vp->v_socket = unp->unp_socket; 831 unp->unp_vnode = vp; 832 unp->unp_addr = (struct sockaddr_un *)dup_sockaddr(nam); 833 vn_unlock(vp); 834 } 835 done: 836 nlookup_done(&nd); 837 failed: 838 lwkt_reltoken(&unp_token); 839 return (error); 840 } 841 842 static int 843 unp_connect(struct socket *so, struct sockaddr *nam, struct thread *td) 844 { 845 struct proc *p = td->td_proc; 846 struct sockaddr_un *soun = (struct sockaddr_un *)nam; 847 struct vnode *vp; 848 struct socket *so2, *so3; 849 struct unpcb *unp, *unp2, *unp3; 850 int error, len; 851 struct nlookupdata nd; 852 char buf[SOCK_MAXADDRLEN]; 853 854 lwkt_gettoken(&unp_token); 855 856 len = nam->sa_len - offsetof(struct sockaddr_un, sun_path); 857 if (len <= 0) { 858 error = EINVAL; 859 goto failed; 860 } 861 strncpy(buf, soun->sun_path, len); 862 buf[len] = 0; 863 864 vp = NULL; 865 error = nlookup_init(&nd, buf, UIO_SYSSPACE, NLC_FOLLOW); 866 if (error == 0) 867 error = nlookup(&nd); 868 if (error == 0) 869 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 870 nlookup_done(&nd); 871 if (error) 872 goto failed; 873 874 if (vp->v_type != VSOCK) { 875 error = ENOTSOCK; 876 goto bad; 877 } 878 error = VOP_EACCESS(vp, VWRITE, p->p_ucred); 879 if (error) 880 goto bad; 881 so2 = vp->v_socket; 882 if (so2 == NULL) { 883 error = ECONNREFUSED; 884 goto bad; 885 } 886 if (so->so_type != so2->so_type) { 887 error = EPROTOTYPE; 888 goto bad; 889 } 890 if (so->so_proto->pr_flags & PR_CONNREQUIRED) { 891 if (!(so2->so_options & SO_ACCEPTCONN) || 892 (so3 = sonewconn(so2, 0)) == NULL) { 893 error = ECONNREFUSED; 894 goto bad; 895 } 896 unp = so->so_pcb; 897 unp2 = so2->so_pcb; 898 unp3 = so3->so_pcb; 899 if (unp2->unp_addr) 900 unp3->unp_addr = (struct sockaddr_un *) 901 dup_sockaddr((struct sockaddr *)unp2->unp_addr); 902 903 /* 904 * unp_peercred management: 905 * 906 * The connecter's (client's) credentials are copied 907 * from its process structure at the time of connect() 908 * (which is now). 909 */ 910 cru2x(p->p_ucred, &unp3->unp_peercred); 911 unp3->unp_flags |= UNP_HAVEPC; 912 /* 913 * The receiver's (server's) credentials are copied 914 * from the unp_peercred member of socket on which the 915 * former called listen(); unp_listen() cached that 916 * process's credentials at that time so we can use 917 * them now. 918 */ 919 KASSERT(unp2->unp_flags & UNP_HAVEPCCACHED, 920 ("unp_connect: listener without cached peercred")); 921 memcpy(&unp->unp_peercred, &unp2->unp_peercred, 922 sizeof(unp->unp_peercred)); 923 unp->unp_flags |= UNP_HAVEPC; 924 925 so2 = so3; 926 } 927 error = unp_connect2(so, so2); 928 bad: 929 vput(vp); 930 failed: 931 lwkt_reltoken(&unp_token); 932 return (error); 933 } 934 935 int 936 unp_connect2(struct socket *so, struct socket *so2) 937 { 938 struct unpcb *unp; 939 struct unpcb *unp2; 940 941 lwkt_gettoken(&unp_token); 942 unp = so->so_pcb; 943 if (so2->so_type != so->so_type) { 944 lwkt_reltoken(&unp_token); 945 return (EPROTOTYPE); 946 } 947 unp2 = so2->so_pcb; 948 unp->unp_conn = unp2; 949 950 switch (so->so_type) { 951 case SOCK_DGRAM: 952 LIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_reflink); 953 soisconnected(so); 954 break; 955 956 case SOCK_STREAM: 957 case SOCK_SEQPACKET: 958 unp2->unp_conn = unp; 959 soisconnected(so); 960 soisconnected(so2); 961 break; 962 963 default: 964 panic("unp_connect2"); 965 } 966 lwkt_reltoken(&unp_token); 967 return (0); 968 } 969 970 static void 971 unp_disconnect(struct unpcb *unp) 972 { 973 struct unpcb *unp2; 974 975 lwkt_gettoken(&unp_token); 976 977 unp2 = unp->unp_conn; 978 if (unp2 == NULL) { 979 lwkt_reltoken(&unp_token); 980 return; 981 } 982 983 unp->unp_conn = NULL; 984 985 switch (unp->unp_socket->so_type) { 986 case SOCK_DGRAM: 987 LIST_REMOVE(unp, unp_reflink); 988 soclrstate(unp->unp_socket, SS_ISCONNECTED); 989 break; 990 case SOCK_STREAM: 991 case SOCK_SEQPACKET: 992 soisdisconnected(unp->unp_socket); 993 unp2->unp_conn = NULL; 994 soisdisconnected(unp2->unp_socket); 995 break; 996 } 997 lwkt_reltoken(&unp_token); 998 } 999 1000 #ifdef notdef 1001 void 1002 unp_abort(struct unpcb *unp) 1003 { 1004 lwkt_gettoken(&unp_token); 1005 unp_detach(unp); 1006 lwkt_reltoken(&unp_token); 1007 } 1008 #endif 1009 1010 static int 1011 prison_unpcb(struct thread *td, struct unpcb *unp) 1012 { 1013 struct proc *p; 1014 1015 if (td == NULL) 1016 return (0); 1017 if ((p = td->td_proc) == NULL) 1018 return (0); 1019 if (!p->p_ucred->cr_prison) 1020 return (0); 1021 if (p->p_fd->fd_rdir == unp->unp_rvnode) 1022 return (0); 1023 return (1); 1024 } 1025 1026 static int 1027 unp_pcblist(SYSCTL_HANDLER_ARGS) 1028 { 1029 int error, i, n; 1030 struct unpcb *unp, **unp_list; 1031 unp_gen_t gencnt; 1032 struct unp_head *head; 1033 1034 head = ((intptr_t)arg1 == SOCK_DGRAM ? &unp_dhead : &unp_shead); 1035 1036 KKASSERT(curproc != NULL); 1037 1038 /* 1039 * The process of preparing the PCB list is too time-consuming and 1040 * resource-intensive to repeat twice on every request. 1041 */ 1042 if (req->oldptr == NULL) { 1043 n = unp_count; 1044 req->oldidx = (n + n/8) * sizeof(struct xunpcb); 1045 return 0; 1046 } 1047 1048 if (req->newptr != NULL) 1049 return EPERM; 1050 1051 lwkt_gettoken(&unp_token); 1052 1053 /* 1054 * OK, now we're committed to doing something. 1055 */ 1056 gencnt = unp_gencnt; 1057 n = unp_count; 1058 1059 unp_list = kmalloc(n * sizeof *unp_list, M_TEMP, M_WAITOK); 1060 1061 for (unp = LIST_FIRST(head), i = 0; unp && i < n; 1062 unp = LIST_NEXT(unp, unp_link)) { 1063 if (unp->unp_gencnt <= gencnt && !prison_unpcb(req->td, unp)) 1064 unp_list[i++] = unp; 1065 } 1066 n = i; /* in case we lost some during malloc */ 1067 1068 error = 0; 1069 for (i = 0; i < n; i++) { 1070 unp = unp_list[i]; 1071 if (unp->unp_gencnt <= gencnt) { 1072 struct xunpcb xu; 1073 xu.xu_len = sizeof xu; 1074 xu.xu_unpp = unp; 1075 /* 1076 * XXX - need more locking here to protect against 1077 * connect/disconnect races for SMP. 1078 */ 1079 if (unp->unp_addr) 1080 bcopy(unp->unp_addr, &xu.xu_addr, 1081 unp->unp_addr->sun_len); 1082 if (unp->unp_conn && unp->unp_conn->unp_addr) 1083 bcopy(unp->unp_conn->unp_addr, 1084 &xu.xu_caddr, 1085 unp->unp_conn->unp_addr->sun_len); 1086 bcopy(unp, &xu.xu_unp, sizeof *unp); 1087 sotoxsocket(unp->unp_socket, &xu.xu_socket); 1088 error = SYSCTL_OUT(req, &xu, sizeof xu); 1089 } 1090 } 1091 lwkt_reltoken(&unp_token); 1092 kfree(unp_list, M_TEMP); 1093 1094 return error; 1095 } 1096 1097 SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist, CTLFLAG_RD, 1098 (caddr_t)(long)SOCK_DGRAM, 0, unp_pcblist, "S,xunpcb", 1099 "List of active local datagram sockets"); 1100 SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist, CTLFLAG_RD, 1101 (caddr_t)(long)SOCK_STREAM, 0, unp_pcblist, "S,xunpcb", 1102 "List of active local stream sockets"); 1103 SYSCTL_PROC(_net_local_seqpacket, OID_AUTO, pcblist, CTLFLAG_RD, 1104 (caddr_t)(long)SOCK_SEQPACKET, 0, unp_pcblist, "S,xunpcb", 1105 "List of active local seqpacket stream sockets"); 1106 1107 static void 1108 unp_shutdown(struct unpcb *unp) 1109 { 1110 struct socket *so; 1111 1112 if ((unp->unp_socket->so_type == SOCK_STREAM || 1113 unp->unp_socket->so_type == SOCK_SEQPACKET) && 1114 unp->unp_conn != NULL && (so = unp->unp_conn->unp_socket)) { 1115 socantrcvmore(so); 1116 } 1117 } 1118 1119 static void 1120 unp_drop(struct unpcb *unp, int err) 1121 { 1122 struct socket *so = unp->unp_socket; 1123 1124 so->so_error = err; 1125 unp_disconnect(unp); 1126 } 1127 1128 #ifdef notdef 1129 void 1130 unp_drain(void) 1131 { 1132 lwkt_gettoken(&unp_token); 1133 lwkt_reltoken(&unp_token); 1134 } 1135 #endif 1136 1137 int 1138 unp_externalize(struct mbuf *rights) 1139 { 1140 struct thread *td = curthread; 1141 struct proc *p = td->td_proc; /* XXX */ 1142 struct lwp *lp = td->td_lwp; 1143 struct cmsghdr *cm = mtod(rights, struct cmsghdr *); 1144 int *fdp; 1145 int i; 1146 struct file **rp; 1147 struct file *fp; 1148 int newfds = (cm->cmsg_len - (CMSG_DATA(cm) - (u_char *)cm)) 1149 / sizeof (struct file *); 1150 int f; 1151 1152 lwkt_gettoken(&unp_token); 1153 1154 /* 1155 * if the new FD's will not fit, then we free them all 1156 */ 1157 if (!fdavail(p, newfds)) { 1158 rp = (struct file **)CMSG_DATA(cm); 1159 for (i = 0; i < newfds; i++) { 1160 fp = *rp; 1161 /* 1162 * zero the pointer before calling unp_discard, 1163 * since it may end up in unp_gc().. 1164 */ 1165 *rp++ = 0; 1166 unp_discard(fp, NULL); 1167 } 1168 lwkt_reltoken(&unp_token); 1169 return (EMSGSIZE); 1170 } 1171 1172 /* 1173 * now change each pointer to an fd in the global table to 1174 * an integer that is the index to the local fd table entry 1175 * that we set up to point to the global one we are transferring. 1176 * If sizeof (struct file *) is bigger than or equal to sizeof int, 1177 * then do it in forward order. In that case, an integer will 1178 * always come in the same place or before its corresponding 1179 * struct file pointer. 1180 * If sizeof (struct file *) is smaller than sizeof int, then 1181 * do it in reverse order. 1182 */ 1183 if (sizeof (struct file *) >= sizeof (int)) { 1184 fdp = (int *)CMSG_DATA(cm); 1185 rp = (struct file **)CMSG_DATA(cm); 1186 for (i = 0; i < newfds; i++) { 1187 if (fdalloc(p, 0, &f)) 1188 panic("unp_externalize"); 1189 fp = *rp++; 1190 unp_fp_externalize(lp, fp, f); 1191 *fdp++ = f; 1192 } 1193 } else { 1194 fdp = (int *)CMSG_DATA(cm) + newfds - 1; 1195 rp = (struct file **)CMSG_DATA(cm) + newfds - 1; 1196 for (i = 0; i < newfds; i++) { 1197 if (fdalloc(p, 0, &f)) 1198 panic("unp_externalize"); 1199 fp = *rp--; 1200 unp_fp_externalize(lp, fp, f); 1201 *fdp-- = f; 1202 } 1203 } 1204 1205 /* 1206 * Adjust length, in case sizeof(struct file *) and sizeof(int) 1207 * differs. 1208 */ 1209 cm->cmsg_len = CMSG_LEN(newfds * sizeof(int)); 1210 rights->m_len = cm->cmsg_len; 1211 1212 lwkt_reltoken(&unp_token); 1213 return (0); 1214 } 1215 1216 static void 1217 unp_fp_externalize(struct lwp *lp, struct file *fp, int fd) 1218 { 1219 struct file *fx; 1220 int error; 1221 1222 lwkt_gettoken(&unp_token); 1223 1224 if (lp) { 1225 KKASSERT(fd >= 0); 1226 if (fp->f_flag & FREVOKED) { 1227 kprintf("Warning: revoked fp exiting unix socket\n"); 1228 fx = NULL; 1229 error = falloc(lp, &fx, NULL); 1230 if (error == 0) 1231 fsetfd(lp->lwp_proc->p_fd, fx, fd); 1232 else 1233 fsetfd(lp->lwp_proc->p_fd, NULL, fd); 1234 fdrop(fx); 1235 } else { 1236 fsetfd(lp->lwp_proc->p_fd, fp, fd); 1237 } 1238 } 1239 spin_lock(&unp_spin); 1240 fp->f_msgcount--; 1241 unp_rights--; 1242 spin_unlock(&unp_spin); 1243 fdrop(fp); 1244 1245 lwkt_reltoken(&unp_token); 1246 } 1247 1248 1249 void 1250 unp_init(void) 1251 { 1252 LIST_INIT(&unp_dhead); 1253 LIST_INIT(&unp_shead); 1254 spin_init(&unp_spin); 1255 } 1256 1257 static int 1258 unp_internalize(struct mbuf *control, struct thread *td) 1259 { 1260 struct proc *p = td->td_proc; 1261 struct filedesc *fdescp; 1262 struct cmsghdr *cm = mtod(control, struct cmsghdr *); 1263 struct file **rp; 1264 struct file *fp; 1265 int i, fd, *fdp; 1266 struct cmsgcred *cmcred; 1267 int oldfds; 1268 u_int newlen; 1269 int error; 1270 1271 KKASSERT(p); 1272 lwkt_gettoken(&unp_token); 1273 1274 fdescp = p->p_fd; 1275 if ((cm->cmsg_type != SCM_RIGHTS && cm->cmsg_type != SCM_CREDS) || 1276 cm->cmsg_level != SOL_SOCKET || 1277 CMSG_ALIGN(cm->cmsg_len) != control->m_len) { 1278 error = EINVAL; 1279 goto done; 1280 } 1281 1282 /* 1283 * Fill in credential information. 1284 */ 1285 if (cm->cmsg_type == SCM_CREDS) { 1286 cmcred = (struct cmsgcred *)CMSG_DATA(cm); 1287 cmcred->cmcred_pid = p->p_pid; 1288 cmcred->cmcred_uid = p->p_ucred->cr_ruid; 1289 cmcred->cmcred_gid = p->p_ucred->cr_rgid; 1290 cmcred->cmcred_euid = p->p_ucred->cr_uid; 1291 cmcred->cmcred_ngroups = MIN(p->p_ucred->cr_ngroups, 1292 CMGROUP_MAX); 1293 for (i = 0; i < cmcred->cmcred_ngroups; i++) 1294 cmcred->cmcred_groups[i] = p->p_ucred->cr_groups[i]; 1295 error = 0; 1296 goto done; 1297 } 1298 1299 /* 1300 * cmsghdr may not be aligned, do not allow calculation(s) to 1301 * go negative. 1302 */ 1303 if (cm->cmsg_len < CMSG_LEN(0)) { 1304 error = EINVAL; 1305 goto done; 1306 } 1307 1308 oldfds = (cm->cmsg_len - CMSG_LEN(0)) / sizeof (int); 1309 1310 /* 1311 * check that all the FDs passed in refer to legal OPEN files 1312 * If not, reject the entire operation. 1313 */ 1314 fdp = (int *)CMSG_DATA(cm); 1315 for (i = 0; i < oldfds; i++) { 1316 fd = *fdp++; 1317 if ((unsigned)fd >= fdescp->fd_nfiles || 1318 fdescp->fd_files[fd].fp == NULL) { 1319 error = EBADF; 1320 goto done; 1321 } 1322 if (fdescp->fd_files[fd].fp->f_type == DTYPE_KQUEUE) { 1323 error = EOPNOTSUPP; 1324 goto done; 1325 } 1326 } 1327 /* 1328 * Now replace the integer FDs with pointers to 1329 * the associated global file table entry.. 1330 * Allocate a bigger buffer as necessary. But if an cluster is not 1331 * enough, return E2BIG. 1332 */ 1333 newlen = CMSG_LEN(oldfds * sizeof(struct file *)); 1334 if (newlen > MCLBYTES) { 1335 error = E2BIG; 1336 goto done; 1337 } 1338 if (newlen - control->m_len > M_TRAILINGSPACE(control)) { 1339 if (control->m_flags & M_EXT) { 1340 error = E2BIG; 1341 goto done; 1342 } 1343 MCLGET(control, MB_WAIT); 1344 if (!(control->m_flags & M_EXT)) { 1345 error = ENOBUFS; 1346 goto done; 1347 } 1348 1349 /* copy the data to the cluster */ 1350 memcpy(mtod(control, char *), cm, cm->cmsg_len); 1351 cm = mtod(control, struct cmsghdr *); 1352 } 1353 1354 /* 1355 * Adjust length, in case sizeof(struct file *) and sizeof(int) 1356 * differs. 1357 */ 1358 cm->cmsg_len = newlen; 1359 control->m_len = CMSG_ALIGN(newlen); 1360 1361 /* 1362 * Transform the file descriptors into struct file pointers. 1363 * If sizeof (struct file *) is bigger than or equal to sizeof int, 1364 * then do it in reverse order so that the int won't get until 1365 * we're done. 1366 * If sizeof (struct file *) is smaller than sizeof int, then 1367 * do it in forward order. 1368 */ 1369 if (sizeof (struct file *) >= sizeof (int)) { 1370 fdp = (int *)CMSG_DATA(cm) + oldfds - 1; 1371 rp = (struct file **)CMSG_DATA(cm) + oldfds - 1; 1372 for (i = 0; i < oldfds; i++) { 1373 fp = fdescp->fd_files[*fdp--].fp; 1374 *rp-- = fp; 1375 fhold(fp); 1376 spin_lock(&unp_spin); 1377 fp->f_msgcount++; 1378 unp_rights++; 1379 spin_unlock(&unp_spin); 1380 } 1381 } else { 1382 fdp = (int *)CMSG_DATA(cm); 1383 rp = (struct file **)CMSG_DATA(cm); 1384 for (i = 0; i < oldfds; i++) { 1385 fp = fdescp->fd_files[*fdp++].fp; 1386 *rp++ = fp; 1387 fhold(fp); 1388 spin_lock(&unp_spin); 1389 fp->f_msgcount++; 1390 unp_rights++; 1391 spin_unlock(&unp_spin); 1392 } 1393 } 1394 error = 0; 1395 done: 1396 lwkt_reltoken(&unp_token); 1397 return error; 1398 } 1399 1400 /* 1401 * Garbage collect in-transit file descriptors that get lost due to 1402 * loops (i.e. when a socket is sent to another process over itself, 1403 * and more complex situations). 1404 * 1405 * NOT MPSAFE - TODO socket flush code and maybe closef. Rest is MPSAFE. 1406 */ 1407 1408 struct unp_gc_info { 1409 struct file **extra_ref; 1410 struct file *locked_fp; 1411 int defer; 1412 int index; 1413 int maxindex; 1414 }; 1415 1416 static void 1417 unp_gc(void) 1418 { 1419 struct unp_gc_info info; 1420 static boolean_t unp_gcing; 1421 struct file **fpp; 1422 int i; 1423 1424 /* 1425 * Only one gc can be in-progress at any given moment 1426 */ 1427 spin_lock(&unp_spin); 1428 if (unp_gcing) { 1429 spin_unlock(&unp_spin); 1430 return; 1431 } 1432 unp_gcing = TRUE; 1433 spin_unlock(&unp_spin); 1434 1435 lwkt_gettoken(&unp_token); 1436 1437 /* 1438 * Before going through all this, set all FDs to be NOT defered 1439 * and NOT externally accessible (not marked). During the scan 1440 * a fd can be marked externally accessible but we may or may not 1441 * be able to immediately process it (controlled by FDEFER). 1442 * 1443 * If we loop sleep a bit. The complexity of the topology can cause 1444 * multiple loops. Also failure to acquire the socket's so_rcv 1445 * token can cause us to loop. 1446 */ 1447 allfiles_scan_exclusive(unp_gc_clearmarks, NULL); 1448 do { 1449 info.defer = 0; 1450 allfiles_scan_exclusive(unp_gc_checkmarks, &info); 1451 if (info.defer) 1452 tsleep(&info, 0, "gcagain", 1); 1453 } while (info.defer); 1454 1455 /* 1456 * We grab an extra reference to each of the file table entries 1457 * that are not otherwise accessible and then free the rights 1458 * that are stored in messages on them. 1459 * 1460 * The bug in the orginal code is a little tricky, so I'll describe 1461 * what's wrong with it here. 1462 * 1463 * It is incorrect to simply unp_discard each entry for f_msgcount 1464 * times -- consider the case of sockets A and B that contain 1465 * references to each other. On a last close of some other socket, 1466 * we trigger a gc since the number of outstanding rights (unp_rights) 1467 * is non-zero. If during the sweep phase the gc code un_discards, 1468 * we end up doing a (full) closef on the descriptor. A closef on A 1469 * results in the following chain. Closef calls soo_close, which 1470 * calls soclose. Soclose calls first (through the switch 1471 * uipc_usrreq) unp_detach, which re-invokes unp_gc. Unp_gc simply 1472 * returns because the previous instance had set unp_gcing, and 1473 * we return all the way back to soclose, which marks the socket 1474 * with SS_NOFDREF, and then calls sofree. Sofree calls sorflush 1475 * to free up the rights that are queued in messages on the socket A, 1476 * i.e., the reference on B. The sorflush calls via the dom_dispose 1477 * switch unp_dispose, which unp_scans with unp_discard. This second 1478 * instance of unp_discard just calls closef on B. 1479 * 1480 * Well, a similar chain occurs on B, resulting in a sorflush on B, 1481 * which results in another closef on A. Unfortunately, A is already 1482 * being closed, and the descriptor has already been marked with 1483 * SS_NOFDREF, and soclose panics at this point. 1484 * 1485 * Here, we first take an extra reference to each inaccessible 1486 * descriptor. Then, we call sorflush ourself, since we know 1487 * it is a Unix domain socket anyhow. After we destroy all the 1488 * rights carried in messages, we do a last closef to get rid 1489 * of our extra reference. This is the last close, and the 1490 * unp_detach etc will shut down the socket. 1491 * 1492 * 91/09/19, bsy@cs.cmu.edu 1493 */ 1494 info.extra_ref = kmalloc(256 * sizeof(struct file *), M_FILE, M_WAITOK); 1495 info.maxindex = 256; 1496 1497 do { 1498 /* 1499 * Look for matches 1500 */ 1501 info.index = 0; 1502 allfiles_scan_exclusive(unp_gc_checkrefs, &info); 1503 1504 /* 1505 * For each FD on our hit list, do the following two things 1506 */ 1507 for (i = info.index, fpp = info.extra_ref; --i >= 0; ++fpp) { 1508 struct file *tfp = *fpp; 1509 if (tfp->f_type == DTYPE_SOCKET && tfp->f_data != NULL) 1510 sorflush((struct socket *)(tfp->f_data)); 1511 } 1512 for (i = info.index, fpp = info.extra_ref; --i >= 0; ++fpp) 1513 closef(*fpp, NULL); 1514 } while (info.index == info.maxindex); 1515 1516 lwkt_reltoken(&unp_token); 1517 1518 kfree((caddr_t)info.extra_ref, M_FILE); 1519 unp_gcing = FALSE; 1520 } 1521 1522 /* 1523 * MPSAFE - NOTE: filehead list and file pointer spinlocked on entry 1524 */ 1525 static int 1526 unp_gc_checkrefs(struct file *fp, void *data) 1527 { 1528 struct unp_gc_info *info = data; 1529 1530 if (fp->f_count == 0) 1531 return(0); 1532 if (info->index == info->maxindex) 1533 return(-1); 1534 1535 /* 1536 * If all refs are from msgs, and it's not marked accessible 1537 * then it must be referenced from some unreachable cycle 1538 * of (shut-down) FDs, so include it in our 1539 * list of FDs to remove 1540 */ 1541 if (fp->f_count == fp->f_msgcount && !(fp->f_flag & FMARK)) { 1542 info->extra_ref[info->index++] = fp; 1543 fhold(fp); 1544 } 1545 return(0); 1546 } 1547 1548 /* 1549 * MPSAFE - NOTE: filehead list and file pointer spinlocked on entry 1550 */ 1551 static int 1552 unp_gc_clearmarks(struct file *fp, void *data __unused) 1553 { 1554 atomic_clear_int(&fp->f_flag, FMARK | FDEFER); 1555 return(0); 1556 } 1557 1558 /* 1559 * MPSAFE - NOTE: filehead list and file pointer spinlocked on entry 1560 */ 1561 static int 1562 unp_gc_checkmarks(struct file *fp, void *data) 1563 { 1564 struct unp_gc_info *info = data; 1565 struct socket *so; 1566 1567 /* 1568 * If the file is not open, skip it. Make sure it isn't marked 1569 * defered or we could loop forever, in case we somehow race 1570 * something. 1571 */ 1572 if (fp->f_count == 0) { 1573 if (fp->f_flag & FDEFER) 1574 atomic_clear_int(&fp->f_flag, FDEFER); 1575 return(0); 1576 } 1577 /* 1578 * If we already marked it as 'defer' in a 1579 * previous pass, then try process it this time 1580 * and un-mark it 1581 */ 1582 if (fp->f_flag & FDEFER) { 1583 atomic_clear_int(&fp->f_flag, FDEFER); 1584 } else { 1585 /* 1586 * if it's not defered, then check if it's 1587 * already marked.. if so skip it 1588 */ 1589 if (fp->f_flag & FMARK) 1590 return(0); 1591 /* 1592 * If all references are from messages 1593 * in transit, then skip it. it's not 1594 * externally accessible. 1595 */ 1596 if (fp->f_count == fp->f_msgcount) 1597 return(0); 1598 /* 1599 * If it got this far then it must be 1600 * externally accessible. 1601 */ 1602 atomic_set_int(&fp->f_flag, FMARK); 1603 } 1604 1605 /* 1606 * either it was defered, or it is externally 1607 * accessible and not already marked so. 1608 * Now check if it is possibly one of OUR sockets. 1609 */ 1610 if (fp->f_type != DTYPE_SOCKET || 1611 (so = (struct socket *)fp->f_data) == NULL) { 1612 return(0); 1613 } 1614 if (so->so_proto->pr_domain != &localdomain || 1615 !(so->so_proto->pr_flags & PR_RIGHTS)) { 1616 return(0); 1617 } 1618 1619 /* 1620 * So, Ok, it's one of our sockets and it IS externally accessible 1621 * (or was defered). Now we look to see if we hold any file 1622 * descriptors in its message buffers. Follow those links and mark 1623 * them as accessible too. 1624 * 1625 * We are holding multiple spinlocks here, if we cannot get the 1626 * token non-blocking defer until the next loop. 1627 */ 1628 info->locked_fp = fp; 1629 if (lwkt_trytoken(&so->so_rcv.ssb_token)) { 1630 unp_scan(so->so_rcv.ssb_mb, unp_mark, info); 1631 lwkt_reltoken(&so->so_rcv.ssb_token); 1632 } else { 1633 atomic_set_int(&fp->f_flag, FDEFER); 1634 ++info->defer; 1635 } 1636 return (0); 1637 } 1638 1639 /* 1640 * Scan all unix domain sockets and replace any revoked file pointers 1641 * found with the dummy file pointer fx. We don't worry about races 1642 * against file pointers being read out as those are handled in the 1643 * externalize code. 1644 */ 1645 1646 #define REVOKE_GC_MAXFILES 32 1647 1648 struct unp_revoke_gc_info { 1649 struct file *fx; 1650 struct file *fary[REVOKE_GC_MAXFILES]; 1651 int fcount; 1652 }; 1653 1654 void 1655 unp_revoke_gc(struct file *fx) 1656 { 1657 struct unp_revoke_gc_info info; 1658 int i; 1659 1660 lwkt_gettoken(&unp_token); 1661 info.fx = fx; 1662 do { 1663 info.fcount = 0; 1664 allfiles_scan_exclusive(unp_revoke_gc_check, &info); 1665 for (i = 0; i < info.fcount; ++i) 1666 unp_fp_externalize(NULL, info.fary[i], -1); 1667 } while (info.fcount == REVOKE_GC_MAXFILES); 1668 lwkt_reltoken(&unp_token); 1669 } 1670 1671 /* 1672 * Check for and replace revoked descriptors. 1673 * 1674 * WARNING: This routine is not allowed to block. 1675 */ 1676 static int 1677 unp_revoke_gc_check(struct file *fps, void *vinfo) 1678 { 1679 struct unp_revoke_gc_info *info = vinfo; 1680 struct file *fp; 1681 struct socket *so; 1682 struct mbuf *m0; 1683 struct mbuf *m; 1684 struct file **rp; 1685 struct cmsghdr *cm; 1686 int i; 1687 int qfds; 1688 1689 /* 1690 * Is this a unix domain socket with rights-passing abilities? 1691 */ 1692 if (fps->f_type != DTYPE_SOCKET) 1693 return (0); 1694 if ((so = (struct socket *)fps->f_data) == NULL) 1695 return(0); 1696 if (so->so_proto->pr_domain != &localdomain) 1697 return(0); 1698 if ((so->so_proto->pr_flags & PR_RIGHTS) == 0) 1699 return(0); 1700 1701 /* 1702 * Scan the mbufs for control messages and replace any revoked 1703 * descriptors we find. 1704 */ 1705 lwkt_gettoken(&so->so_rcv.ssb_token); 1706 m0 = so->so_rcv.ssb_mb; 1707 while (m0) { 1708 for (m = m0; m; m = m->m_next) { 1709 if (m->m_type != MT_CONTROL) 1710 continue; 1711 if (m->m_len < sizeof(*cm)) 1712 continue; 1713 cm = mtod(m, struct cmsghdr *); 1714 if (cm->cmsg_level != SOL_SOCKET || 1715 cm->cmsg_type != SCM_RIGHTS) { 1716 continue; 1717 } 1718 qfds = (cm->cmsg_len - CMSG_LEN(0)) / sizeof(void *); 1719 rp = (struct file **)CMSG_DATA(cm); 1720 for (i = 0; i < qfds; i++) { 1721 fp = rp[i]; 1722 if (fp->f_flag & FREVOKED) { 1723 kprintf("Warning: Removing revoked fp from unix domain socket queue\n"); 1724 fhold(info->fx); 1725 info->fx->f_msgcount++; 1726 unp_rights++; 1727 rp[i] = info->fx; 1728 info->fary[info->fcount++] = fp; 1729 } 1730 if (info->fcount == REVOKE_GC_MAXFILES) 1731 break; 1732 } 1733 if (info->fcount == REVOKE_GC_MAXFILES) 1734 break; 1735 } 1736 m0 = m0->m_nextpkt; 1737 if (info->fcount == REVOKE_GC_MAXFILES) 1738 break; 1739 } 1740 lwkt_reltoken(&so->so_rcv.ssb_token); 1741 1742 /* 1743 * Stop the scan if we filled up our array. 1744 */ 1745 if (info->fcount == REVOKE_GC_MAXFILES) 1746 return(-1); 1747 return(0); 1748 } 1749 1750 /* 1751 * Dispose of the fp's stored in a mbuf. 1752 * 1753 * The dds loop can cause additional fps to be entered onto the 1754 * list while it is running, flattening out the operation and avoiding 1755 * a deep kernel stack recursion. 1756 */ 1757 void 1758 unp_dispose(struct mbuf *m) 1759 { 1760 unp_defdiscard_t dds; 1761 1762 lwkt_gettoken(&unp_token); 1763 ++unp_defdiscard_nest; 1764 if (m) { 1765 unp_scan(m, unp_discard, NULL); 1766 } 1767 if (unp_defdiscard_nest == 1) { 1768 while ((dds = unp_defdiscard_base) != NULL) { 1769 unp_defdiscard_base = dds->next; 1770 closef(dds->fp, NULL); 1771 kfree(dds, M_UNPCB); 1772 } 1773 } 1774 --unp_defdiscard_nest; 1775 lwkt_reltoken(&unp_token); 1776 } 1777 1778 static int 1779 unp_listen(struct unpcb *unp, struct thread *td) 1780 { 1781 struct proc *p = td->td_proc; 1782 1783 KKASSERT(p); 1784 lwkt_gettoken(&unp_token); 1785 cru2x(p->p_ucred, &unp->unp_peercred); 1786 unp->unp_flags |= UNP_HAVEPCCACHED; 1787 lwkt_reltoken(&unp_token); 1788 return (0); 1789 } 1790 1791 static void 1792 unp_scan(struct mbuf *m0, void (*op)(struct file *, void *), void *data) 1793 { 1794 struct mbuf *m; 1795 struct file **rp; 1796 struct cmsghdr *cm; 1797 int i; 1798 int qfds; 1799 1800 while (m0) { 1801 for (m = m0; m; m = m->m_next) { 1802 if (m->m_type == MT_CONTROL && 1803 m->m_len >= sizeof(*cm)) { 1804 cm = mtod(m, struct cmsghdr *); 1805 if (cm->cmsg_level != SOL_SOCKET || 1806 cm->cmsg_type != SCM_RIGHTS) 1807 continue; 1808 qfds = (cm->cmsg_len - CMSG_LEN(0)) / 1809 sizeof(void *); 1810 rp = (struct file **)CMSG_DATA(cm); 1811 for (i = 0; i < qfds; i++) 1812 (*op)(*rp++, data); 1813 break; /* XXX, but saves time */ 1814 } 1815 } 1816 m0 = m0->m_nextpkt; 1817 } 1818 } 1819 1820 /* 1821 * Mark visibility. info->defer is recalculated on every pass. 1822 */ 1823 static void 1824 unp_mark(struct file *fp, void *data) 1825 { 1826 struct unp_gc_info *info = data; 1827 1828 if ((fp->f_flag & FMARK) == 0) { 1829 ++info->defer; 1830 atomic_set_int(&fp->f_flag, FMARK | FDEFER); 1831 } else if (fp->f_flag & FDEFER) { 1832 ++info->defer; 1833 } 1834 } 1835 1836 /* 1837 * Discard a fp previously held in a unix domain socket mbuf. To 1838 * avoid blowing out the kernel stack due to contrived chain-reactions 1839 * we may have to defer the operation to a higher procedural level. 1840 * 1841 * Caller holds unp_token 1842 */ 1843 static void 1844 unp_discard(struct file *fp, void *data __unused) 1845 { 1846 unp_defdiscard_t dds; 1847 1848 spin_lock(&unp_spin); 1849 fp->f_msgcount--; 1850 unp_rights--; 1851 spin_unlock(&unp_spin); 1852 1853 if (unp_defdiscard_nest) { 1854 dds = kmalloc(sizeof(*dds), M_UNPCB, M_WAITOK|M_ZERO); 1855 dds->fp = fp; 1856 dds->next = unp_defdiscard_base; 1857 unp_defdiscard_base = dds; 1858 } else { 1859 closef(fp, NULL); 1860 } 1861 } 1862 1863