1 /* 2 * Copyright (c) 1982, 1986, 1989, 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. Neither the name of the University nor the names of its contributors 14 * may be used to endorse or promote products derived from this software 15 * without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 * From: @(#)uipc_usrreq.c 8.3 (Berkeley) 1/4/94 30 * $FreeBSD: src/sys/kern/uipc_usrreq.c,v 1.54.2.10 2003/03/04 17:28:09 nectar Exp $ 31 */ 32 33 #include <sys/param.h> 34 #include <sys/systm.h> 35 #include <sys/kernel.h> 36 #include <sys/domain.h> 37 #include <sys/fcntl.h> 38 #include <sys/malloc.h> /* XXX must be before <sys/file.h> */ 39 #include <sys/proc.h> 40 #include <sys/file.h> 41 #include <sys/filedesc.h> 42 #include <sys/mbuf.h> 43 #include <sys/nlookup.h> 44 #include <sys/protosw.h> 45 #include <sys/socket.h> 46 #include <sys/socketvar.h> 47 #include <sys/resourcevar.h> 48 #include <sys/stat.h> 49 #include <sys/mount.h> 50 #include <sys/sysctl.h> 51 #include <sys/un.h> 52 #include <sys/unpcb.h> 53 #include <sys/vnode.h> 54 55 #include <sys/file2.h> 56 #include <sys/spinlock2.h> 57 #include <sys/socketvar2.h> 58 #include <sys/msgport2.h> 59 60 typedef struct unp_defdiscard { 61 struct unp_defdiscard *next; 62 struct file *fp; 63 } *unp_defdiscard_t; 64 65 static MALLOC_DEFINE(M_UNPCB, "unpcb", "unpcb struct"); 66 static unp_gen_t unp_gencnt; 67 static u_int unp_count; 68 69 static struct unp_head unp_shead, unp_dhead; 70 71 static struct lwkt_token unp_token = LWKT_TOKEN_INITIALIZER(unp_token); 72 static int unp_defdiscard_nest; 73 static unp_defdiscard_t unp_defdiscard_base; 74 75 /* 76 * Unix communications domain. 77 * 78 * TODO: 79 * RDM 80 * rethink name space problems 81 * need a proper out-of-band 82 * lock pushdown 83 */ 84 static struct sockaddr sun_noname = { sizeof(sun_noname), AF_LOCAL }; 85 static ino_t unp_ino = 1; /* prototype for fake inode numbers */ 86 static struct spinlock unp_ino_spin = SPINLOCK_INITIALIZER(&unp_ino_spin, "unp_ino_spin"); 87 88 static int unp_attach (struct socket *, struct pru_attach_info *); 89 static void unp_detach (struct unpcb *); 90 static int unp_bind (struct unpcb *,struct sockaddr *, struct thread *); 91 static int unp_connect (struct socket *,struct sockaddr *, 92 struct thread *); 93 static void unp_disconnect (struct unpcb *); 94 static void unp_shutdown (struct unpcb *); 95 static void unp_drop (struct unpcb *, int); 96 static void unp_gc (void); 97 static int unp_gc_clearmarks(struct file *, void *); 98 static int unp_gc_checkmarks(struct file *, void *); 99 static int unp_gc_checkrefs(struct file *, void *); 100 static int unp_revoke_gc_check(struct file *, void *); 101 static void unp_scan (struct mbuf *, void (*)(struct file *, void *), 102 void *data); 103 static void unp_mark (struct file *, void *data); 104 static void unp_discard (struct file *, void *); 105 static int unp_internalize (struct mbuf *, struct thread *); 106 static int unp_listen (struct unpcb *, struct thread *); 107 static void unp_fp_externalize(struct lwp *lp, struct file *fp, int fd); 108 109 /* 110 * SMP Considerations: 111 * 112 * Since unp_token will be automaticly released upon execution of 113 * blocking code, we need to reference unp_conn before any possible 114 * blocking code to prevent it from being ripped behind our back. 115 * 116 * Any adjustment to unp->unp_conn requires both the global unp_token 117 * AND the per-unp token (lwkt_token_pool_lookup(unp)) to be held. 118 * 119 * Any access to so_pcb to obtain unp requires the pool token for 120 * unp to be held. 121 */ 122 123 /* NOTE: unp_token MUST be held */ 124 static __inline void 125 unp_reference(struct unpcb *unp) 126 { 127 atomic_add_int(&unp->unp_refcnt, 1); 128 } 129 130 /* NOTE: unp_token MUST be held */ 131 static __inline void 132 unp_free(struct unpcb *unp) 133 { 134 KKASSERT(unp->unp_refcnt > 0); 135 if (atomic_fetchadd_int(&unp->unp_refcnt, -1) == 1) 136 unp_detach(unp); 137 } 138 139 /* 140 * NOTE: (so) is referenced from soabort*() and netmsg_pru_abort() 141 * will sofree() it when we return. 142 */ 143 static void 144 uipc_abort(netmsg_t msg) 145 { 146 struct unpcb *unp; 147 int error; 148 149 lwkt_gettoken(&unp_token); 150 unp = msg->base.nm_so->so_pcb; 151 if (unp) { 152 unp_drop(unp, ECONNABORTED); 153 unp_free(unp); 154 error = 0; 155 } else { 156 error = EINVAL; 157 } 158 lwkt_reltoken(&unp_token); 159 160 lwkt_replymsg(&msg->lmsg, error); 161 } 162 163 static void 164 uipc_accept(netmsg_t msg) 165 { 166 struct unpcb *unp; 167 int error; 168 169 lwkt_gettoken(&unp_token); 170 unp = msg->base.nm_so->so_pcb; 171 if (unp == NULL) { 172 error = EINVAL; 173 } else { 174 struct unpcb *unp2 = unp->unp_conn; 175 176 /* 177 * Pass back name of connected socket, 178 * if it was bound and we are still connected 179 * (our peer may have closed already!). 180 */ 181 if (unp2 && unp2->unp_addr) { 182 unp_reference(unp2); 183 *msg->accept.nm_nam = dup_sockaddr( 184 (struct sockaddr *)unp2->unp_addr); 185 unp_free(unp2); 186 } else { 187 *msg->accept.nm_nam = dup_sockaddr(&sun_noname); 188 } 189 error = 0; 190 } 191 lwkt_reltoken(&unp_token); 192 lwkt_replymsg(&msg->lmsg, error); 193 } 194 195 static void 196 uipc_attach(netmsg_t msg) 197 { 198 struct unpcb *unp; 199 int error; 200 201 lwkt_gettoken(&unp_token); 202 unp = msg->base.nm_so->so_pcb; 203 if (unp) 204 error = EISCONN; 205 else 206 error = unp_attach(msg->base.nm_so, msg->attach.nm_ai); 207 lwkt_reltoken(&unp_token); 208 lwkt_replymsg(&msg->lmsg, error); 209 } 210 211 static void 212 uipc_bind(netmsg_t msg) 213 { 214 struct unpcb *unp; 215 int error; 216 217 lwkt_gettoken(&unp_token); 218 unp = msg->base.nm_so->so_pcb; 219 if (unp) 220 error = unp_bind(unp, msg->bind.nm_nam, msg->bind.nm_td); 221 else 222 error = EINVAL; 223 lwkt_reltoken(&unp_token); 224 lwkt_replymsg(&msg->lmsg, error); 225 } 226 227 static void 228 uipc_connect(netmsg_t msg) 229 { 230 struct unpcb *unp; 231 int error; 232 233 unp = msg->base.nm_so->so_pcb; 234 if (unp) { 235 error = unp_connect(msg->base.nm_so, 236 msg->connect.nm_nam, 237 msg->connect.nm_td); 238 } else { 239 error = EINVAL; 240 } 241 lwkt_replymsg(&msg->lmsg, error); 242 } 243 244 static void 245 uipc_connect2(netmsg_t msg) 246 { 247 struct unpcb *unp; 248 int error; 249 250 unp = msg->connect2.nm_so1->so_pcb; 251 if (unp) { 252 error = unp_connect2(msg->connect2.nm_so1, 253 msg->connect2.nm_so2); 254 } else { 255 error = EINVAL; 256 } 257 lwkt_replymsg(&msg->lmsg, error); 258 } 259 260 /* control is EOPNOTSUPP */ 261 262 static void 263 uipc_detach(netmsg_t msg) 264 { 265 struct unpcb *unp; 266 int error; 267 268 lwkt_gettoken(&unp_token); 269 unp = msg->base.nm_so->so_pcb; 270 if (unp) { 271 unp_free(unp); 272 error = 0; 273 } else { 274 error = EINVAL; 275 } 276 lwkt_reltoken(&unp_token); 277 lwkt_replymsg(&msg->lmsg, error); 278 } 279 280 static void 281 uipc_disconnect(netmsg_t msg) 282 { 283 struct unpcb *unp; 284 int error; 285 286 lwkt_gettoken(&unp_token); 287 unp = msg->base.nm_so->so_pcb; 288 if (unp) { 289 unp_disconnect(unp); 290 error = 0; 291 } else { 292 error = EINVAL; 293 } 294 lwkt_reltoken(&unp_token); 295 lwkt_replymsg(&msg->lmsg, error); 296 } 297 298 static void 299 uipc_listen(netmsg_t msg) 300 { 301 struct unpcb *unp; 302 int error; 303 304 lwkt_gettoken(&unp_token); 305 unp = msg->base.nm_so->so_pcb; 306 if (unp == NULL || unp->unp_vnode == NULL) 307 error = EINVAL; 308 else 309 error = unp_listen(unp, msg->listen.nm_td); 310 lwkt_reltoken(&unp_token); 311 lwkt_replymsg(&msg->lmsg, error); 312 } 313 314 static void 315 uipc_peeraddr(netmsg_t msg) 316 { 317 struct unpcb *unp; 318 int error; 319 320 lwkt_gettoken(&unp_token); 321 unp = msg->base.nm_so->so_pcb; 322 if (unp == NULL) { 323 error = EINVAL; 324 } else if (unp->unp_conn && unp->unp_conn->unp_addr) { 325 struct unpcb *unp2 = unp->unp_conn; 326 327 unp_reference(unp2); 328 *msg->peeraddr.nm_nam = dup_sockaddr( 329 (struct sockaddr *)unp2->unp_addr); 330 unp_free(unp2); 331 error = 0; 332 } else { 333 /* 334 * XXX: It seems that this test always fails even when 335 * connection is established. So, this else clause is 336 * added as workaround to return PF_LOCAL sockaddr. 337 */ 338 *msg->peeraddr.nm_nam = dup_sockaddr(&sun_noname); 339 error = 0; 340 } 341 lwkt_reltoken(&unp_token); 342 lwkt_replymsg(&msg->lmsg, error); 343 } 344 345 static void 346 uipc_rcvd(netmsg_t msg) 347 { 348 struct unpcb *unp, *unp2; 349 struct socket *so; 350 struct socket *so2; 351 int error; 352 353 /* 354 * so_pcb is only modified with both the global and the unp 355 * pool token held. The unp pointer is invalid until we verify 356 * that it is good by re-checking so_pcb AFTER obtaining the token. 357 */ 358 so = msg->base.nm_so; 359 while ((unp = so->so_pcb) != NULL) { 360 lwkt_getpooltoken(unp); 361 if (unp == so->so_pcb) 362 break; 363 lwkt_relpooltoken(unp); 364 } 365 if (unp == NULL) { 366 error = EINVAL; 367 goto done; 368 } 369 /* pool token held */ 370 371 switch (so->so_type) { 372 case SOCK_DGRAM: 373 panic("uipc_rcvd DGRAM?"); 374 /*NOTREACHED*/ 375 case SOCK_STREAM: 376 case SOCK_SEQPACKET: 377 if (unp->unp_conn == NULL) 378 break; 379 unp2 = unp->unp_conn; /* protected by pool token */ 380 381 /* 382 * Because we are transfering mbufs directly to the 383 * peer socket we have to use SSB_STOP on the sender 384 * to prevent it from building up infinite mbufs. 385 * 386 * As in several places in this module w ehave to ref unp2 387 * to ensure that it does not get ripped out from under us 388 * if we block on the so2 token or in sowwakeup(). 389 */ 390 so2 = unp2->unp_socket; 391 unp_reference(unp2); 392 lwkt_gettoken(&so2->so_rcv.ssb_token); 393 if (so->so_rcv.ssb_cc < so2->so_snd.ssb_hiwat && 394 so->so_rcv.ssb_mbcnt < so2->so_snd.ssb_mbmax 395 ) { 396 atomic_clear_int(&so2->so_snd.ssb_flags, SSB_STOP); 397 398 sowwakeup(so2); 399 } 400 lwkt_reltoken(&so2->so_rcv.ssb_token); 401 unp_free(unp2); 402 break; 403 default: 404 panic("uipc_rcvd unknown socktype"); 405 /*NOTREACHED*/ 406 } 407 error = 0; 408 lwkt_relpooltoken(unp); 409 done: 410 lwkt_replymsg(&msg->lmsg, error); 411 } 412 413 /* pru_rcvoob is EOPNOTSUPP */ 414 415 static void 416 uipc_send(netmsg_t msg) 417 { 418 struct unpcb *unp, *unp2; 419 struct socket *so; 420 struct socket *so2; 421 struct mbuf *control; 422 struct mbuf *m; 423 int error = 0; 424 425 so = msg->base.nm_so; 426 control = msg->send.nm_control; 427 m = msg->send.nm_m; 428 429 /* 430 * so_pcb is only modified with both the global and the unp 431 * pool token held. The unp pointer is invalid until we verify 432 * that it is good by re-checking so_pcb AFTER obtaining the token. 433 */ 434 so = msg->base.nm_so; 435 while ((unp = so->so_pcb) != NULL) { 436 lwkt_getpooltoken(unp); 437 if (unp == so->so_pcb) 438 break; 439 lwkt_relpooltoken(unp); 440 } 441 if (unp == NULL) { 442 error = EINVAL; 443 goto done; 444 } 445 /* pool token held */ 446 447 if (msg->send.nm_flags & PRUS_OOB) { 448 error = EOPNOTSUPP; 449 goto release; 450 } 451 452 wakeup_start_delayed(); 453 454 if (control && (error = unp_internalize(control, msg->send.nm_td))) 455 goto release; 456 457 switch (so->so_type) { 458 case SOCK_DGRAM: 459 { 460 struct sockaddr *from; 461 462 if (msg->send.nm_addr) { 463 if (unp->unp_conn) { 464 error = EISCONN; 465 break; 466 } 467 error = unp_connect(so, 468 msg->send.nm_addr, 469 msg->send.nm_td); 470 if (error) 471 break; 472 } else { 473 if (unp->unp_conn == NULL) { 474 error = ENOTCONN; 475 break; 476 } 477 } 478 unp2 = unp->unp_conn; 479 so2 = unp2->unp_socket; 480 if (unp->unp_addr) 481 from = (struct sockaddr *)unp->unp_addr; 482 else 483 from = &sun_noname; 484 485 unp_reference(unp2); 486 487 lwkt_gettoken(&so2->so_rcv.ssb_token); 488 if (ssb_appendaddr(&so2->so_rcv, from, m, control)) { 489 sorwakeup(so2); 490 m = NULL; 491 control = NULL; 492 } else { 493 error = ENOBUFS; 494 } 495 if (msg->send.nm_addr) 496 unp_disconnect(unp); 497 lwkt_reltoken(&so2->so_rcv.ssb_token); 498 499 unp_free(unp2); 500 break; 501 } 502 503 case SOCK_STREAM: 504 case SOCK_SEQPACKET: 505 /* Connect if not connected yet. */ 506 /* 507 * Note: A better implementation would complain 508 * if not equal to the peer's address. 509 */ 510 if (!(so->so_state & SS_ISCONNECTED)) { 511 if (msg->send.nm_addr) { 512 error = unp_connect(so, 513 msg->send.nm_addr, 514 msg->send.nm_td); 515 if (error) 516 break; /* XXX */ 517 } else { 518 error = ENOTCONN; 519 break; 520 } 521 } 522 523 if (so->so_state & SS_CANTSENDMORE) { 524 error = EPIPE; 525 break; 526 } 527 if (unp->unp_conn == NULL) 528 panic("uipc_send connected but no connection?"); 529 unp2 = unp->unp_conn; 530 so2 = unp2->unp_socket; 531 532 unp_reference(unp2); 533 534 /* 535 * Send to paired receive port, and then reduce 536 * send buffer hiwater marks to maintain backpressure. 537 * Wake up readers. 538 */ 539 lwkt_gettoken(&so2->so_rcv.ssb_token); 540 if (control) { 541 if (ssb_appendcontrol(&so2->so_rcv, m, control)) { 542 control = NULL; 543 m = NULL; 544 } 545 } else if (so->so_type == SOCK_SEQPACKET) { 546 sbappendrecord(&so2->so_rcv.sb, m); 547 m = NULL; 548 } else { 549 sbappend(&so2->so_rcv.sb, m); 550 m = NULL; 551 } 552 553 /* 554 * Because we are transfering mbufs directly to the 555 * peer socket we have to use SSB_STOP on the sender 556 * to prevent it from building up infinite mbufs. 557 */ 558 if (so2->so_rcv.ssb_cc >= so->so_snd.ssb_hiwat || 559 so2->so_rcv.ssb_mbcnt >= so->so_snd.ssb_mbmax 560 ) { 561 atomic_set_int(&so->so_snd.ssb_flags, SSB_STOP); 562 } 563 lwkt_reltoken(&so2->so_rcv.ssb_token); 564 sorwakeup(so2); 565 566 unp_free(unp2); 567 break; 568 569 default: 570 panic("uipc_send unknown socktype"); 571 } 572 573 /* 574 * SEND_EOF is equivalent to a SEND followed by a SHUTDOWN. 575 */ 576 if (msg->send.nm_flags & PRUS_EOF) { 577 socantsendmore(so); 578 unp_shutdown(unp); 579 } 580 581 if (control && error != 0) 582 unp_dispose(control); 583 release: 584 lwkt_relpooltoken(unp); 585 wakeup_end_delayed(); 586 done: 587 588 if (control) 589 m_freem(control); 590 if (m) 591 m_freem(m); 592 lwkt_replymsg(&msg->lmsg, error); 593 } 594 595 /* 596 * MPSAFE 597 */ 598 static void 599 uipc_sense(netmsg_t msg) 600 { 601 struct unpcb *unp; 602 struct socket *so; 603 struct stat *sb; 604 int error; 605 606 so = msg->base.nm_so; 607 sb = msg->sense.nm_stat; 608 609 /* 610 * so_pcb is only modified with both the global and the unp 611 * pool token held. The unp pointer is invalid until we verify 612 * that it is good by re-checking so_pcb AFTER obtaining the token. 613 */ 614 while ((unp = so->so_pcb) != NULL) { 615 lwkt_getpooltoken(unp); 616 if (unp == so->so_pcb) 617 break; 618 lwkt_relpooltoken(unp); 619 } 620 if (unp == NULL) { 621 error = EINVAL; 622 goto done; 623 } 624 /* pool token held */ 625 626 sb->st_blksize = so->so_snd.ssb_hiwat; 627 sb->st_dev = NOUDEV; 628 if (unp->unp_ino == 0) { /* make up a non-zero inode number */ 629 spin_lock(&unp_ino_spin); 630 unp->unp_ino = unp_ino++; 631 spin_unlock(&unp_ino_spin); 632 } 633 sb->st_ino = unp->unp_ino; 634 error = 0; 635 lwkt_relpooltoken(unp); 636 done: 637 lwkt_replymsg(&msg->lmsg, error); 638 } 639 640 static void 641 uipc_shutdown(netmsg_t msg) 642 { 643 struct socket *so; 644 struct unpcb *unp; 645 int error; 646 647 /* 648 * so_pcb is only modified with both the global and the unp 649 * pool token held. The unp pointer is invalid until we verify 650 * that it is good by re-checking so_pcb AFTER obtaining the token. 651 */ 652 so = msg->base.nm_so; 653 while ((unp = so->so_pcb) != NULL) { 654 lwkt_getpooltoken(unp); 655 if (unp == so->so_pcb) 656 break; 657 lwkt_relpooltoken(unp); 658 } 659 if (unp) { 660 /* pool token held */ 661 socantsendmore(so); 662 unp_shutdown(unp); 663 lwkt_relpooltoken(unp); 664 error = 0; 665 } else { 666 error = EINVAL; 667 } 668 lwkt_replymsg(&msg->lmsg, error); 669 } 670 671 static void 672 uipc_sockaddr(netmsg_t msg) 673 { 674 struct socket *so; 675 struct unpcb *unp; 676 int error; 677 678 /* 679 * so_pcb is only modified with both the global and the unp 680 * pool token held. The unp pointer is invalid until we verify 681 * that it is good by re-checking so_pcb AFTER obtaining the token. 682 */ 683 so = msg->base.nm_so; 684 while ((unp = so->so_pcb) != NULL) { 685 lwkt_getpooltoken(unp); 686 if (unp == so->so_pcb) 687 break; 688 lwkt_relpooltoken(unp); 689 } 690 if (unp) { 691 /* pool token held */ 692 if (unp->unp_addr) { 693 *msg->sockaddr.nm_nam = 694 dup_sockaddr((struct sockaddr *)unp->unp_addr); 695 } 696 lwkt_relpooltoken(unp); 697 error = 0; 698 } else { 699 error = EINVAL; 700 } 701 lwkt_replymsg(&msg->lmsg, error); 702 } 703 704 struct pr_usrreqs uipc_usrreqs = { 705 .pru_abort = uipc_abort, 706 .pru_accept = uipc_accept, 707 .pru_attach = uipc_attach, 708 .pru_bind = uipc_bind, 709 .pru_connect = uipc_connect, 710 .pru_connect2 = uipc_connect2, 711 .pru_control = pr_generic_notsupp, 712 .pru_detach = uipc_detach, 713 .pru_disconnect = uipc_disconnect, 714 .pru_listen = uipc_listen, 715 .pru_peeraddr = uipc_peeraddr, 716 .pru_rcvd = uipc_rcvd, 717 .pru_rcvoob = pr_generic_notsupp, 718 .pru_send = uipc_send, 719 .pru_sense = uipc_sense, 720 .pru_shutdown = uipc_shutdown, 721 .pru_sockaddr = uipc_sockaddr, 722 .pru_sosend = sosend, 723 .pru_soreceive = soreceive 724 }; 725 726 void 727 uipc_ctloutput(netmsg_t msg) 728 { 729 struct socket *so; 730 struct sockopt *sopt; 731 struct unpcb *unp; 732 int error = 0; 733 734 lwkt_gettoken(&unp_token); 735 so = msg->base.nm_so; 736 sopt = msg->ctloutput.nm_sopt; 737 unp = so->so_pcb; 738 739 switch (sopt->sopt_dir) { 740 case SOPT_GET: 741 switch (sopt->sopt_name) { 742 case LOCAL_PEERCRED: 743 if (unp->unp_flags & UNP_HAVEPC) 744 soopt_from_kbuf(sopt, &unp->unp_peercred, 745 sizeof(unp->unp_peercred)); 746 else { 747 if (so->so_type == SOCK_STREAM) 748 error = ENOTCONN; 749 else if (so->so_type == SOCK_SEQPACKET) 750 error = ENOTCONN; 751 else 752 error = EINVAL; 753 } 754 break; 755 default: 756 error = EOPNOTSUPP; 757 break; 758 } 759 break; 760 case SOPT_SET: 761 default: 762 error = EOPNOTSUPP; 763 break; 764 } 765 lwkt_reltoken(&unp_token); 766 lwkt_replymsg(&msg->lmsg, error); 767 } 768 769 /* 770 * Both send and receive buffers are allocated PIPSIZ bytes of buffering 771 * for stream sockets, although the total for sender and receiver is 772 * actually only PIPSIZ. 773 * 774 * Datagram sockets really use the sendspace as the maximum datagram size, 775 * and don't really want to reserve the sendspace. Their recvspace should 776 * be large enough for at least one max-size datagram plus address. 777 * 778 * We want the local send/recv space to be significant larger then lo0's 779 * mtu of 16384. 780 */ 781 #ifndef PIPSIZ 782 #define PIPSIZ 57344 783 #endif 784 static u_long unpst_sendspace = PIPSIZ; 785 static u_long unpst_recvspace = PIPSIZ; 786 static u_long unpdg_sendspace = 2*1024; /* really max datagram size */ 787 static u_long unpdg_recvspace = 4*1024; 788 789 static int unp_rights; /* file descriptors in flight */ 790 static struct spinlock unp_spin = SPINLOCK_INITIALIZER(&unp_spin, "unp_spin"); 791 792 SYSCTL_DECL(_net_local_seqpacket); 793 SYSCTL_DECL(_net_local_stream); 794 SYSCTL_INT(_net_local_stream, OID_AUTO, sendspace, CTLFLAG_RW, 795 &unpst_sendspace, 0, "Size of stream socket send buffer"); 796 SYSCTL_INT(_net_local_stream, OID_AUTO, recvspace, CTLFLAG_RW, 797 &unpst_recvspace, 0, "Size of stream socket receive buffer"); 798 799 SYSCTL_DECL(_net_local_dgram); 800 SYSCTL_INT(_net_local_dgram, OID_AUTO, maxdgram, CTLFLAG_RW, 801 &unpdg_sendspace, 0, "Max datagram socket size"); 802 SYSCTL_INT(_net_local_dgram, OID_AUTO, recvspace, CTLFLAG_RW, 803 &unpdg_recvspace, 0, "Size of datagram socket receive buffer"); 804 805 SYSCTL_DECL(_net_local); 806 SYSCTL_INT(_net_local, OID_AUTO, inflight, CTLFLAG_RD, &unp_rights, 0, 807 "File descriptors in flight"); 808 809 static int 810 unp_attach(struct socket *so, struct pru_attach_info *ai) 811 { 812 struct unpcb *unp; 813 int error; 814 815 lwkt_gettoken(&unp_token); 816 817 if (so->so_snd.ssb_hiwat == 0 || so->so_rcv.ssb_hiwat == 0) { 818 switch (so->so_type) { 819 case SOCK_STREAM: 820 case SOCK_SEQPACKET: 821 error = soreserve(so, unpst_sendspace, unpst_recvspace, 822 ai->sb_rlimit); 823 break; 824 825 case SOCK_DGRAM: 826 error = soreserve(so, unpdg_sendspace, unpdg_recvspace, 827 ai->sb_rlimit); 828 break; 829 830 default: 831 panic("unp_attach"); 832 } 833 if (error) 834 goto failed; 835 } 836 837 /* 838 * In order to support sendfile we have to set either SSB_STOPSUPP 839 * or SSB_PREALLOC. Unix domain sockets use the SSB_STOP flow 840 * control mechanism. 841 */ 842 if (so->so_type == SOCK_STREAM) { 843 atomic_set_int(&so->so_rcv.ssb_flags, SSB_STOPSUPP); 844 atomic_set_int(&so->so_snd.ssb_flags, SSB_STOPSUPP); 845 } 846 847 unp = kmalloc(sizeof(*unp), M_UNPCB, M_WAITOK | M_ZERO | M_NULLOK); 848 if (unp == NULL) { 849 error = ENOBUFS; 850 goto failed; 851 } 852 unp->unp_refcnt = 1; 853 unp->unp_gencnt = ++unp_gencnt; 854 unp_count++; 855 LIST_INIT(&unp->unp_refs); 856 unp->unp_socket = so; 857 unp->unp_rvnode = ai->fd_rdir; /* jail cruft XXX JH */ 858 LIST_INSERT_HEAD(so->so_type == SOCK_DGRAM ? &unp_dhead 859 : &unp_shead, unp, unp_link); 860 so->so_pcb = (caddr_t)unp; 861 soreference(so); 862 error = 0; 863 failed: 864 lwkt_reltoken(&unp_token); 865 return error; 866 } 867 868 static void 869 unp_detach(struct unpcb *unp) 870 { 871 struct socket *so; 872 873 lwkt_gettoken(&unp_token); 874 lwkt_getpooltoken(unp); 875 876 LIST_REMOVE(unp, unp_link); /* both tokens required */ 877 unp->unp_gencnt = ++unp_gencnt; 878 --unp_count; 879 if (unp->unp_vnode) { 880 unp->unp_vnode->v_socket = NULL; 881 vrele(unp->unp_vnode); 882 unp->unp_vnode = NULL; 883 } 884 if (unp->unp_conn) 885 unp_disconnect(unp); 886 while (!LIST_EMPTY(&unp->unp_refs)) 887 unp_drop(LIST_FIRST(&unp->unp_refs), ECONNRESET); 888 soisdisconnected(unp->unp_socket); 889 so = unp->unp_socket; 890 soreference(so); /* for delayed sorflush */ 891 KKASSERT(so->so_pcb == unp); 892 so->so_pcb = NULL; /* both tokens required */ 893 unp->unp_socket = NULL; 894 sofree(so); /* remove pcb ref */ 895 896 if (unp_rights) { 897 /* 898 * Normally the receive buffer is flushed later, 899 * in sofree, but if our receive buffer holds references 900 * to descriptors that are now garbage, we will dispose 901 * of those descriptor references after the garbage collector 902 * gets them (resulting in a "panic: closef: count < 0"). 903 */ 904 sorflush(so); 905 unp_gc(); 906 } 907 sofree(so); 908 lwkt_relpooltoken(unp); 909 lwkt_reltoken(&unp_token); 910 911 if (unp->unp_addr) 912 kfree(unp->unp_addr, M_SONAME); 913 kfree(unp, M_UNPCB); 914 } 915 916 static int 917 unp_bind(struct unpcb *unp, struct sockaddr *nam, struct thread *td) 918 { 919 struct proc *p = td->td_proc; 920 struct sockaddr_un *soun = (struct sockaddr_un *)nam; 921 struct vnode *vp; 922 struct vattr vattr; 923 int error, namelen; 924 struct nlookupdata nd; 925 char buf[SOCK_MAXADDRLEN]; 926 927 lwkt_gettoken(&unp_token); 928 if (unp->unp_vnode != NULL) { 929 error = EINVAL; 930 goto failed; 931 } 932 namelen = soun->sun_len - offsetof(struct sockaddr_un, sun_path); 933 if (namelen <= 0) { 934 error = EINVAL; 935 goto failed; 936 } 937 strncpy(buf, soun->sun_path, namelen); 938 buf[namelen] = 0; /* null-terminate the string */ 939 error = nlookup_init(&nd, buf, UIO_SYSSPACE, 940 NLC_LOCKVP | NLC_CREATE | NLC_REFDVP); 941 if (error == 0) 942 error = nlookup(&nd); 943 if (error == 0 && nd.nl_nch.ncp->nc_vp != NULL) 944 error = EADDRINUSE; 945 if (error) 946 goto done; 947 948 VATTR_NULL(&vattr); 949 vattr.va_type = VSOCK; 950 vattr.va_mode = (ACCESSPERMS & ~p->p_fd->fd_cmask); 951 error = VOP_NCREATE(&nd.nl_nch, nd.nl_dvp, &vp, nd.nl_cred, &vattr); 952 if (error == 0) { 953 if (unp->unp_vnode == NULL) { 954 vp->v_socket = unp->unp_socket; 955 unp->unp_vnode = vp; 956 unp->unp_addr = (struct sockaddr_un *)dup_sockaddr(nam); 957 vn_unlock(vp); 958 } else { 959 vput(vp); /* late race */ 960 error = EINVAL; 961 } 962 } 963 done: 964 nlookup_done(&nd); 965 failed: 966 lwkt_reltoken(&unp_token); 967 return (error); 968 } 969 970 static int 971 unp_connect(struct socket *so, struct sockaddr *nam, struct thread *td) 972 { 973 struct proc *p = td->td_proc; 974 struct sockaddr_un *soun = (struct sockaddr_un *)nam; 975 struct vnode *vp; 976 struct socket *so2, *so3; 977 struct unpcb *unp, *unp2, *unp3; 978 int error, len; 979 struct nlookupdata nd; 980 char buf[SOCK_MAXADDRLEN]; 981 982 lwkt_gettoken(&unp_token); 983 984 len = nam->sa_len - offsetof(struct sockaddr_un, sun_path); 985 if (len <= 0) { 986 error = EINVAL; 987 goto failed; 988 } 989 strncpy(buf, soun->sun_path, len); 990 buf[len] = 0; 991 992 vp = NULL; 993 error = nlookup_init(&nd, buf, UIO_SYSSPACE, NLC_FOLLOW); 994 if (error == 0) 995 error = nlookup(&nd); 996 if (error == 0) 997 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 998 nlookup_done(&nd); 999 if (error) 1000 goto failed; 1001 1002 if (vp->v_type != VSOCK) { 1003 error = ENOTSOCK; 1004 goto bad; 1005 } 1006 error = VOP_EACCESS(vp, VWRITE, p->p_ucred); 1007 if (error) 1008 goto bad; 1009 so2 = vp->v_socket; 1010 if (so2 == NULL) { 1011 error = ECONNREFUSED; 1012 goto bad; 1013 } 1014 if (so->so_type != so2->so_type) { 1015 error = EPROTOTYPE; 1016 goto bad; 1017 } 1018 if (so->so_proto->pr_flags & PR_CONNREQUIRED) { 1019 if (!(so2->so_options & SO_ACCEPTCONN) || 1020 (so3 = sonewconn(so2, 0)) == NULL) { 1021 error = ECONNREFUSED; 1022 goto bad; 1023 } 1024 unp = so->so_pcb; 1025 if (unp->unp_conn) { /* race, already connected! */ 1026 error = EISCONN; 1027 sofree(so3); 1028 goto bad; 1029 } 1030 unp2 = so2->so_pcb; 1031 unp3 = so3->so_pcb; 1032 if (unp2->unp_addr) 1033 unp3->unp_addr = (struct sockaddr_un *) 1034 dup_sockaddr((struct sockaddr *)unp2->unp_addr); 1035 1036 /* 1037 * unp_peercred management: 1038 * 1039 * The connecter's (client's) credentials are copied 1040 * from its process structure at the time of connect() 1041 * (which is now). 1042 */ 1043 cru2x(p->p_ucred, &unp3->unp_peercred); 1044 unp3->unp_flags |= UNP_HAVEPC; 1045 /* 1046 * The receiver's (server's) credentials are copied 1047 * from the unp_peercred member of socket on which the 1048 * former called listen(); unp_listen() cached that 1049 * process's credentials at that time so we can use 1050 * them now. 1051 */ 1052 KASSERT(unp2->unp_flags & UNP_HAVEPCCACHED, 1053 ("unp_connect: listener without cached peercred")); 1054 memcpy(&unp->unp_peercred, &unp2->unp_peercred, 1055 sizeof(unp->unp_peercred)); 1056 unp->unp_flags |= UNP_HAVEPC; 1057 1058 so2 = so3; 1059 } 1060 error = unp_connect2(so, so2); 1061 bad: 1062 vput(vp); 1063 failed: 1064 lwkt_reltoken(&unp_token); 1065 return (error); 1066 } 1067 1068 /* 1069 * Connect two unix domain sockets together. 1070 * 1071 * NOTE: Semantics for any change to unp_conn requires that the per-unp 1072 * pool token also be held. 1073 */ 1074 int 1075 unp_connect2(struct socket *so, struct socket *so2) 1076 { 1077 struct unpcb *unp; 1078 struct unpcb *unp2; 1079 1080 lwkt_gettoken(&unp_token); 1081 unp = so->so_pcb; 1082 if (so2->so_type != so->so_type) { 1083 lwkt_reltoken(&unp_token); 1084 return (EPROTOTYPE); 1085 } 1086 unp2 = so2->so_pcb; 1087 lwkt_getpooltoken(unp); 1088 lwkt_getpooltoken(unp2); 1089 1090 unp->unp_conn = unp2; 1091 1092 switch (so->so_type) { 1093 case SOCK_DGRAM: 1094 LIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_reflink); 1095 soisconnected(so); 1096 break; 1097 1098 case SOCK_STREAM: 1099 case SOCK_SEQPACKET: 1100 unp2->unp_conn = unp; 1101 soisconnected(so); 1102 soisconnected(so2); 1103 break; 1104 1105 default: 1106 panic("unp_connect2"); 1107 } 1108 lwkt_relpooltoken(unp2); 1109 lwkt_relpooltoken(unp); 1110 lwkt_reltoken(&unp_token); 1111 return (0); 1112 } 1113 1114 /* 1115 * Disconnect a unix domain socket pair. 1116 * 1117 * NOTE: Semantics for any change to unp_conn requires that the per-unp 1118 * pool token also be held. 1119 */ 1120 static void 1121 unp_disconnect(struct unpcb *unp) 1122 { 1123 struct unpcb *unp2; 1124 1125 lwkt_gettoken(&unp_token); 1126 lwkt_getpooltoken(unp); 1127 1128 while ((unp2 = unp->unp_conn) != NULL) { 1129 lwkt_getpooltoken(unp2); 1130 if (unp2 == unp->unp_conn) 1131 break; 1132 lwkt_relpooltoken(unp2); 1133 } 1134 if (unp2 == NULL) 1135 goto done; 1136 1137 unp->unp_conn = NULL; 1138 1139 switch (unp->unp_socket->so_type) { 1140 case SOCK_DGRAM: 1141 LIST_REMOVE(unp, unp_reflink); 1142 soclrstate(unp->unp_socket, SS_ISCONNECTED); 1143 break; 1144 1145 case SOCK_STREAM: 1146 case SOCK_SEQPACKET: 1147 unp_reference(unp2); 1148 unp2->unp_conn = NULL; 1149 1150 soisdisconnected(unp->unp_socket); 1151 soisdisconnected(unp2->unp_socket); 1152 1153 unp_free(unp2); 1154 break; 1155 } 1156 lwkt_relpooltoken(unp2); 1157 done: 1158 lwkt_relpooltoken(unp); 1159 lwkt_reltoken(&unp_token); 1160 } 1161 1162 #ifdef notdef 1163 void 1164 unp_abort(struct unpcb *unp) 1165 { 1166 lwkt_gettoken(&unp_token); 1167 unp_free(unp); 1168 lwkt_reltoken(&unp_token); 1169 } 1170 #endif 1171 1172 static int 1173 prison_unpcb(struct thread *td, struct unpcb *unp) 1174 { 1175 struct proc *p; 1176 1177 if (td == NULL) 1178 return (0); 1179 if ((p = td->td_proc) == NULL) 1180 return (0); 1181 if (!p->p_ucred->cr_prison) 1182 return (0); 1183 if (p->p_fd->fd_rdir == unp->unp_rvnode) 1184 return (0); 1185 return (1); 1186 } 1187 1188 static int 1189 unp_pcblist(SYSCTL_HANDLER_ARGS) 1190 { 1191 int error, i, n; 1192 struct unpcb *unp, **unp_list; 1193 unp_gen_t gencnt; 1194 struct unp_head *head; 1195 1196 head = ((intptr_t)arg1 == SOCK_DGRAM ? &unp_dhead : &unp_shead); 1197 1198 KKASSERT(curproc != NULL); 1199 1200 /* 1201 * The process of preparing the PCB list is too time-consuming and 1202 * resource-intensive to repeat twice on every request. 1203 */ 1204 if (req->oldptr == NULL) { 1205 n = unp_count; 1206 req->oldidx = (n + n/8) * sizeof(struct xunpcb); 1207 return 0; 1208 } 1209 1210 if (req->newptr != NULL) 1211 return EPERM; 1212 1213 lwkt_gettoken(&unp_token); 1214 1215 /* 1216 * OK, now we're committed to doing something. 1217 */ 1218 gencnt = unp_gencnt; 1219 n = unp_count; 1220 1221 unp_list = kmalloc(n * sizeof *unp_list, M_TEMP, M_WAITOK); 1222 1223 for (unp = LIST_FIRST(head), i = 0; unp && i < n; 1224 unp = LIST_NEXT(unp, unp_link)) { 1225 if (unp->unp_gencnt <= gencnt && !prison_unpcb(req->td, unp)) 1226 unp_list[i++] = unp; 1227 } 1228 n = i; /* in case we lost some during malloc */ 1229 1230 error = 0; 1231 for (i = 0; i < n; i++) { 1232 unp = unp_list[i]; 1233 if (unp->unp_gencnt <= gencnt) { 1234 struct xunpcb xu; 1235 xu.xu_len = sizeof xu; 1236 xu.xu_unpp = unp; 1237 /* 1238 * XXX - need more locking here to protect against 1239 * connect/disconnect races for SMP. 1240 */ 1241 if (unp->unp_addr) 1242 bcopy(unp->unp_addr, &xu.xu_addr, 1243 unp->unp_addr->sun_len); 1244 if (unp->unp_conn && unp->unp_conn->unp_addr) 1245 bcopy(unp->unp_conn->unp_addr, 1246 &xu.xu_caddr, 1247 unp->unp_conn->unp_addr->sun_len); 1248 bcopy(unp, &xu.xu_unp, sizeof *unp); 1249 sotoxsocket(unp->unp_socket, &xu.xu_socket); 1250 error = SYSCTL_OUT(req, &xu, sizeof xu); 1251 } 1252 } 1253 lwkt_reltoken(&unp_token); 1254 kfree(unp_list, M_TEMP); 1255 1256 return error; 1257 } 1258 1259 SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist, CTLFLAG_RD, 1260 (caddr_t)(long)SOCK_DGRAM, 0, unp_pcblist, "S,xunpcb", 1261 "List of active local datagram sockets"); 1262 SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist, CTLFLAG_RD, 1263 (caddr_t)(long)SOCK_STREAM, 0, unp_pcblist, "S,xunpcb", 1264 "List of active local stream sockets"); 1265 SYSCTL_PROC(_net_local_seqpacket, OID_AUTO, pcblist, CTLFLAG_RD, 1266 (caddr_t)(long)SOCK_SEQPACKET, 0, unp_pcblist, "S,xunpcb", 1267 "List of active local seqpacket stream sockets"); 1268 1269 static void 1270 unp_shutdown(struct unpcb *unp) 1271 { 1272 struct socket *so; 1273 1274 if ((unp->unp_socket->so_type == SOCK_STREAM || 1275 unp->unp_socket->so_type == SOCK_SEQPACKET) && 1276 unp->unp_conn != NULL && (so = unp->unp_conn->unp_socket)) { 1277 socantrcvmore(so); 1278 } 1279 } 1280 1281 static void 1282 unp_drop(struct unpcb *unp, int err) 1283 { 1284 struct socket *so = unp->unp_socket; 1285 1286 so->so_error = err; 1287 unp_disconnect(unp); 1288 } 1289 1290 #ifdef notdef 1291 void 1292 unp_drain(void) 1293 { 1294 lwkt_gettoken(&unp_token); 1295 lwkt_reltoken(&unp_token); 1296 } 1297 #endif 1298 1299 int 1300 unp_externalize(struct mbuf *rights) 1301 { 1302 struct thread *td = curthread; 1303 struct proc *p = td->td_proc; /* XXX */ 1304 struct lwp *lp = td->td_lwp; 1305 struct cmsghdr *cm = mtod(rights, struct cmsghdr *); 1306 int *fdp; 1307 int i; 1308 struct file **rp; 1309 struct file *fp; 1310 int newfds = (cm->cmsg_len - (CMSG_DATA(cm) - (u_char *)cm)) 1311 / sizeof (struct file *); 1312 int f; 1313 1314 lwkt_gettoken(&unp_token); 1315 1316 /* 1317 * if the new FD's will not fit, then we free them all 1318 */ 1319 if (!fdavail(p, newfds)) { 1320 rp = (struct file **)CMSG_DATA(cm); 1321 for (i = 0; i < newfds; i++) { 1322 fp = *rp; 1323 /* 1324 * zero the pointer before calling unp_discard, 1325 * since it may end up in unp_gc().. 1326 */ 1327 *rp++ = NULL; 1328 unp_discard(fp, NULL); 1329 } 1330 lwkt_reltoken(&unp_token); 1331 return (EMSGSIZE); 1332 } 1333 1334 /* 1335 * now change each pointer to an fd in the global table to 1336 * an integer that is the index to the local fd table entry 1337 * that we set up to point to the global one we are transferring. 1338 * If sizeof (struct file *) is bigger than or equal to sizeof int, 1339 * then do it in forward order. In that case, an integer will 1340 * always come in the same place or before its corresponding 1341 * struct file pointer. 1342 * If sizeof (struct file *) is smaller than sizeof int, then 1343 * do it in reverse order. 1344 */ 1345 if (sizeof (struct file *) >= sizeof (int)) { 1346 fdp = (int *)CMSG_DATA(cm); 1347 rp = (struct file **)CMSG_DATA(cm); 1348 for (i = 0; i < newfds; i++) { 1349 if (fdalloc(p, 0, &f)) 1350 panic("unp_externalize"); 1351 fp = *rp++; 1352 unp_fp_externalize(lp, fp, f); 1353 *fdp++ = f; 1354 } 1355 } else { 1356 fdp = (int *)CMSG_DATA(cm) + newfds - 1; 1357 rp = (struct file **)CMSG_DATA(cm) + newfds - 1; 1358 for (i = 0; i < newfds; i++) { 1359 if (fdalloc(p, 0, &f)) 1360 panic("unp_externalize"); 1361 fp = *rp--; 1362 unp_fp_externalize(lp, fp, f); 1363 *fdp-- = f; 1364 } 1365 } 1366 1367 /* 1368 * Adjust length, in case sizeof(struct file *) and sizeof(int) 1369 * differs. 1370 */ 1371 cm->cmsg_len = CMSG_LEN(newfds * sizeof(int)); 1372 rights->m_len = cm->cmsg_len; 1373 1374 lwkt_reltoken(&unp_token); 1375 return (0); 1376 } 1377 1378 static void 1379 unp_fp_externalize(struct lwp *lp, struct file *fp, int fd) 1380 { 1381 struct file *fx; 1382 int error; 1383 1384 lwkt_gettoken(&unp_token); 1385 1386 if (lp) { 1387 KKASSERT(fd >= 0); 1388 if (fp->f_flag & FREVOKED) { 1389 kprintf("Warning: revoked fp exiting unix socket\n"); 1390 fx = NULL; 1391 error = falloc(lp, &fx, NULL); 1392 if (error == 0) 1393 fsetfd(lp->lwp_proc->p_fd, fx, fd); 1394 else 1395 fsetfd(lp->lwp_proc->p_fd, NULL, fd); 1396 fdrop(fx); 1397 } else { 1398 fsetfd(lp->lwp_proc->p_fd, fp, fd); 1399 } 1400 } 1401 spin_lock(&unp_spin); 1402 fp->f_msgcount--; 1403 unp_rights--; 1404 spin_unlock(&unp_spin); 1405 fdrop(fp); 1406 1407 lwkt_reltoken(&unp_token); 1408 } 1409 1410 1411 void 1412 unp_init(void) 1413 { 1414 LIST_INIT(&unp_dhead); 1415 LIST_INIT(&unp_shead); 1416 spin_init(&unp_spin, "unpinit"); 1417 } 1418 1419 static int 1420 unp_internalize(struct mbuf *control, struct thread *td) 1421 { 1422 struct proc *p = td->td_proc; 1423 struct filedesc *fdescp; 1424 struct cmsghdr *cm = mtod(control, struct cmsghdr *); 1425 struct file **rp; 1426 struct file *fp; 1427 int i, fd, *fdp; 1428 struct cmsgcred *cmcred; 1429 int oldfds; 1430 u_int newlen; 1431 int error; 1432 1433 KKASSERT(p); 1434 lwkt_gettoken(&unp_token); 1435 1436 fdescp = p->p_fd; 1437 if ((cm->cmsg_type != SCM_RIGHTS && cm->cmsg_type != SCM_CREDS) || 1438 cm->cmsg_level != SOL_SOCKET || 1439 CMSG_ALIGN(cm->cmsg_len) != control->m_len) { 1440 error = EINVAL; 1441 goto done; 1442 } 1443 1444 /* 1445 * Fill in credential information. 1446 */ 1447 if (cm->cmsg_type == SCM_CREDS) { 1448 cmcred = (struct cmsgcred *)CMSG_DATA(cm); 1449 cmcred->cmcred_pid = p->p_pid; 1450 cmcred->cmcred_uid = p->p_ucred->cr_ruid; 1451 cmcred->cmcred_gid = p->p_ucred->cr_rgid; 1452 cmcred->cmcred_euid = p->p_ucred->cr_uid; 1453 cmcred->cmcred_ngroups = MIN(p->p_ucred->cr_ngroups, 1454 CMGROUP_MAX); 1455 for (i = 0; i < cmcred->cmcred_ngroups; i++) 1456 cmcred->cmcred_groups[i] = p->p_ucred->cr_groups[i]; 1457 error = 0; 1458 goto done; 1459 } 1460 1461 /* 1462 * cmsghdr may not be aligned, do not allow calculation(s) to 1463 * go negative. 1464 */ 1465 if (cm->cmsg_len < CMSG_LEN(0)) { 1466 error = EINVAL; 1467 goto done; 1468 } 1469 1470 oldfds = (cm->cmsg_len - CMSG_LEN(0)) / sizeof (int); 1471 1472 /* 1473 * check that all the FDs passed in refer to legal OPEN files 1474 * If not, reject the entire operation. 1475 */ 1476 fdp = (int *)CMSG_DATA(cm); 1477 for (i = 0; i < oldfds; i++) { 1478 fd = *fdp++; 1479 if ((unsigned)fd >= fdescp->fd_nfiles || 1480 fdescp->fd_files[fd].fp == NULL) { 1481 error = EBADF; 1482 goto done; 1483 } 1484 if (fdescp->fd_files[fd].fp->f_type == DTYPE_KQUEUE) { 1485 error = EOPNOTSUPP; 1486 goto done; 1487 } 1488 } 1489 /* 1490 * Now replace the integer FDs with pointers to 1491 * the associated global file table entry.. 1492 * Allocate a bigger buffer as necessary. But if an cluster is not 1493 * enough, return E2BIG. 1494 */ 1495 newlen = CMSG_LEN(oldfds * sizeof(struct file *)); 1496 if (newlen > MCLBYTES) { 1497 error = E2BIG; 1498 goto done; 1499 } 1500 if (newlen - control->m_len > M_TRAILINGSPACE(control)) { 1501 if (control->m_flags & M_EXT) { 1502 error = E2BIG; 1503 goto done; 1504 } 1505 MCLGET(control, MB_WAIT); 1506 if (!(control->m_flags & M_EXT)) { 1507 error = ENOBUFS; 1508 goto done; 1509 } 1510 1511 /* copy the data to the cluster */ 1512 memcpy(mtod(control, char *), cm, cm->cmsg_len); 1513 cm = mtod(control, struct cmsghdr *); 1514 } 1515 1516 /* 1517 * Adjust length, in case sizeof(struct file *) and sizeof(int) 1518 * differs. 1519 */ 1520 cm->cmsg_len = newlen; 1521 control->m_len = CMSG_ALIGN(newlen); 1522 1523 /* 1524 * Transform the file descriptors into struct file pointers. 1525 * If sizeof (struct file *) is bigger than or equal to sizeof int, 1526 * then do it in reverse order so that the int won't get until 1527 * we're done. 1528 * If sizeof (struct file *) is smaller than sizeof int, then 1529 * do it in forward order. 1530 */ 1531 if (sizeof (struct file *) >= sizeof (int)) { 1532 fdp = (int *)CMSG_DATA(cm) + oldfds - 1; 1533 rp = (struct file **)CMSG_DATA(cm) + oldfds - 1; 1534 for (i = 0; i < oldfds; i++) { 1535 fp = fdescp->fd_files[*fdp--].fp; 1536 *rp-- = fp; 1537 fhold(fp); 1538 spin_lock(&unp_spin); 1539 fp->f_msgcount++; 1540 unp_rights++; 1541 spin_unlock(&unp_spin); 1542 } 1543 } else { 1544 fdp = (int *)CMSG_DATA(cm); 1545 rp = (struct file **)CMSG_DATA(cm); 1546 for (i = 0; i < oldfds; i++) { 1547 fp = fdescp->fd_files[*fdp++].fp; 1548 *rp++ = fp; 1549 fhold(fp); 1550 spin_lock(&unp_spin); 1551 fp->f_msgcount++; 1552 unp_rights++; 1553 spin_unlock(&unp_spin); 1554 } 1555 } 1556 error = 0; 1557 done: 1558 lwkt_reltoken(&unp_token); 1559 return error; 1560 } 1561 1562 /* 1563 * Garbage collect in-transit file descriptors that get lost due to 1564 * loops (i.e. when a socket is sent to another process over itself, 1565 * and more complex situations). 1566 * 1567 * NOT MPSAFE - TODO socket flush code and maybe closef. Rest is MPSAFE. 1568 */ 1569 1570 struct unp_gc_info { 1571 struct file **extra_ref; 1572 struct file *locked_fp; 1573 int defer; 1574 int index; 1575 int maxindex; 1576 }; 1577 1578 static void 1579 unp_gc(void) 1580 { 1581 struct unp_gc_info info; 1582 static boolean_t unp_gcing; 1583 struct file **fpp; 1584 int i; 1585 1586 /* 1587 * Only one gc can be in-progress at any given moment 1588 */ 1589 spin_lock(&unp_spin); 1590 if (unp_gcing) { 1591 spin_unlock(&unp_spin); 1592 return; 1593 } 1594 unp_gcing = TRUE; 1595 spin_unlock(&unp_spin); 1596 1597 lwkt_gettoken(&unp_token); 1598 1599 /* 1600 * Before going through all this, set all FDs to be NOT defered 1601 * and NOT externally accessible (not marked). During the scan 1602 * a fd can be marked externally accessible but we may or may not 1603 * be able to immediately process it (controlled by FDEFER). 1604 * 1605 * If we loop sleep a bit. The complexity of the topology can cause 1606 * multiple loops. Also failure to acquire the socket's so_rcv 1607 * token can cause us to loop. 1608 */ 1609 allfiles_scan_exclusive(unp_gc_clearmarks, NULL); 1610 do { 1611 info.defer = 0; 1612 allfiles_scan_exclusive(unp_gc_checkmarks, &info); 1613 if (info.defer) 1614 tsleep(&info, 0, "gcagain", 1); 1615 } while (info.defer); 1616 1617 /* 1618 * We grab an extra reference to each of the file table entries 1619 * that are not otherwise accessible and then free the rights 1620 * that are stored in messages on them. 1621 * 1622 * The bug in the orginal code is a little tricky, so I'll describe 1623 * what's wrong with it here. 1624 * 1625 * It is incorrect to simply unp_discard each entry for f_msgcount 1626 * times -- consider the case of sockets A and B that contain 1627 * references to each other. On a last close of some other socket, 1628 * we trigger a gc since the number of outstanding rights (unp_rights) 1629 * is non-zero. If during the sweep phase the gc code un_discards, 1630 * we end up doing a (full) closef on the descriptor. A closef on A 1631 * results in the following chain. Closef calls soo_close, which 1632 * calls soclose. Soclose calls first (through the switch 1633 * uipc_usrreq) unp_detach, which re-invokes unp_gc. Unp_gc simply 1634 * returns because the previous instance had set unp_gcing, and 1635 * we return all the way back to soclose, which marks the socket 1636 * with SS_NOFDREF, and then calls sofree. Sofree calls sorflush 1637 * to free up the rights that are queued in messages on the socket A, 1638 * i.e., the reference on B. The sorflush calls via the dom_dispose 1639 * switch unp_dispose, which unp_scans with unp_discard. This second 1640 * instance of unp_discard just calls closef on B. 1641 * 1642 * Well, a similar chain occurs on B, resulting in a sorflush on B, 1643 * which results in another closef on A. Unfortunately, A is already 1644 * being closed, and the descriptor has already been marked with 1645 * SS_NOFDREF, and soclose panics at this point. 1646 * 1647 * Here, we first take an extra reference to each inaccessible 1648 * descriptor. Then, we call sorflush ourself, since we know 1649 * it is a Unix domain socket anyhow. After we destroy all the 1650 * rights carried in messages, we do a last closef to get rid 1651 * of our extra reference. This is the last close, and the 1652 * unp_detach etc will shut down the socket. 1653 * 1654 * 91/09/19, bsy@cs.cmu.edu 1655 */ 1656 info.extra_ref = kmalloc(256 * sizeof(struct file *), M_FILE, M_WAITOK); 1657 info.maxindex = 256; 1658 1659 do { 1660 /* 1661 * Look for matches 1662 */ 1663 info.index = 0; 1664 allfiles_scan_exclusive(unp_gc_checkrefs, &info); 1665 1666 /* 1667 * For each FD on our hit list, do the following two things 1668 */ 1669 for (i = info.index, fpp = info.extra_ref; --i >= 0; ++fpp) { 1670 struct file *tfp = *fpp; 1671 if (tfp->f_type == DTYPE_SOCKET && tfp->f_data != NULL) 1672 sorflush((struct socket *)(tfp->f_data)); 1673 } 1674 for (i = info.index, fpp = info.extra_ref; --i >= 0; ++fpp) 1675 closef(*fpp, NULL); 1676 } while (info.index == info.maxindex); 1677 1678 lwkt_reltoken(&unp_token); 1679 1680 kfree((caddr_t)info.extra_ref, M_FILE); 1681 unp_gcing = FALSE; 1682 } 1683 1684 /* 1685 * MPSAFE - NOTE: filehead list and file pointer spinlocked on entry 1686 */ 1687 static int 1688 unp_gc_checkrefs(struct file *fp, void *data) 1689 { 1690 struct unp_gc_info *info = data; 1691 1692 if (fp->f_count == 0) 1693 return(0); 1694 if (info->index == info->maxindex) 1695 return(-1); 1696 1697 /* 1698 * If all refs are from msgs, and it's not marked accessible 1699 * then it must be referenced from some unreachable cycle 1700 * of (shut-down) FDs, so include it in our 1701 * list of FDs to remove 1702 */ 1703 if (fp->f_count == fp->f_msgcount && !(fp->f_flag & FMARK)) { 1704 info->extra_ref[info->index++] = fp; 1705 fhold(fp); 1706 } 1707 return(0); 1708 } 1709 1710 /* 1711 * MPSAFE - NOTE: filehead list and file pointer spinlocked on entry 1712 */ 1713 static int 1714 unp_gc_clearmarks(struct file *fp, void *data __unused) 1715 { 1716 atomic_clear_int(&fp->f_flag, FMARK | FDEFER); 1717 return(0); 1718 } 1719 1720 /* 1721 * MPSAFE - NOTE: filehead list and file pointer spinlocked on entry 1722 */ 1723 static int 1724 unp_gc_checkmarks(struct file *fp, void *data) 1725 { 1726 struct unp_gc_info *info = data; 1727 struct socket *so; 1728 1729 /* 1730 * If the file is not open, skip it. Make sure it isn't marked 1731 * defered or we could loop forever, in case we somehow race 1732 * something. 1733 */ 1734 if (fp->f_count == 0) { 1735 if (fp->f_flag & FDEFER) 1736 atomic_clear_int(&fp->f_flag, FDEFER); 1737 return(0); 1738 } 1739 /* 1740 * If we already marked it as 'defer' in a 1741 * previous pass, then try process it this time 1742 * and un-mark it 1743 */ 1744 if (fp->f_flag & FDEFER) { 1745 atomic_clear_int(&fp->f_flag, FDEFER); 1746 } else { 1747 /* 1748 * if it's not defered, then check if it's 1749 * already marked.. if so skip it 1750 */ 1751 if (fp->f_flag & FMARK) 1752 return(0); 1753 /* 1754 * If all references are from messages 1755 * in transit, then skip it. it's not 1756 * externally accessible. 1757 */ 1758 if (fp->f_count == fp->f_msgcount) 1759 return(0); 1760 /* 1761 * If it got this far then it must be 1762 * externally accessible. 1763 */ 1764 atomic_set_int(&fp->f_flag, FMARK); 1765 } 1766 1767 /* 1768 * either it was defered, or it is externally 1769 * accessible and not already marked so. 1770 * Now check if it is possibly one of OUR sockets. 1771 */ 1772 if (fp->f_type != DTYPE_SOCKET || 1773 (so = (struct socket *)fp->f_data) == NULL) { 1774 return(0); 1775 } 1776 if (so->so_proto->pr_domain != &localdomain || 1777 !(so->so_proto->pr_flags & PR_RIGHTS)) { 1778 return(0); 1779 } 1780 1781 /* 1782 * So, Ok, it's one of our sockets and it IS externally accessible 1783 * (or was defered). Now we look to see if we hold any file 1784 * descriptors in its message buffers. Follow those links and mark 1785 * them as accessible too. 1786 * 1787 * We are holding multiple spinlocks here, if we cannot get the 1788 * token non-blocking defer until the next loop. 1789 */ 1790 info->locked_fp = fp; 1791 if (lwkt_trytoken(&so->so_rcv.ssb_token)) { 1792 unp_scan(so->so_rcv.ssb_mb, unp_mark, info); 1793 lwkt_reltoken(&so->so_rcv.ssb_token); 1794 } else { 1795 atomic_set_int(&fp->f_flag, FDEFER); 1796 ++info->defer; 1797 } 1798 return (0); 1799 } 1800 1801 /* 1802 * Scan all unix domain sockets and replace any revoked file pointers 1803 * found with the dummy file pointer fx. We don't worry about races 1804 * against file pointers being read out as those are handled in the 1805 * externalize code. 1806 */ 1807 1808 #define REVOKE_GC_MAXFILES 32 1809 1810 struct unp_revoke_gc_info { 1811 struct file *fx; 1812 struct file *fary[REVOKE_GC_MAXFILES]; 1813 int fcount; 1814 }; 1815 1816 void 1817 unp_revoke_gc(struct file *fx) 1818 { 1819 struct unp_revoke_gc_info info; 1820 int i; 1821 1822 lwkt_gettoken(&unp_token); 1823 info.fx = fx; 1824 do { 1825 info.fcount = 0; 1826 allfiles_scan_exclusive(unp_revoke_gc_check, &info); 1827 for (i = 0; i < info.fcount; ++i) 1828 unp_fp_externalize(NULL, info.fary[i], -1); 1829 } while (info.fcount == REVOKE_GC_MAXFILES); 1830 lwkt_reltoken(&unp_token); 1831 } 1832 1833 /* 1834 * Check for and replace revoked descriptors. 1835 * 1836 * WARNING: This routine is not allowed to block. 1837 */ 1838 static int 1839 unp_revoke_gc_check(struct file *fps, void *vinfo) 1840 { 1841 struct unp_revoke_gc_info *info = vinfo; 1842 struct file *fp; 1843 struct socket *so; 1844 struct mbuf *m0; 1845 struct mbuf *m; 1846 struct file **rp; 1847 struct cmsghdr *cm; 1848 int i; 1849 int qfds; 1850 1851 /* 1852 * Is this a unix domain socket with rights-passing abilities? 1853 */ 1854 if (fps->f_type != DTYPE_SOCKET) 1855 return (0); 1856 if ((so = (struct socket *)fps->f_data) == NULL) 1857 return(0); 1858 if (so->so_proto->pr_domain != &localdomain) 1859 return(0); 1860 if ((so->so_proto->pr_flags & PR_RIGHTS) == 0) 1861 return(0); 1862 1863 /* 1864 * Scan the mbufs for control messages and replace any revoked 1865 * descriptors we find. 1866 */ 1867 lwkt_gettoken(&so->so_rcv.ssb_token); 1868 m0 = so->so_rcv.ssb_mb; 1869 while (m0) { 1870 for (m = m0; m; m = m->m_next) { 1871 if (m->m_type != MT_CONTROL) 1872 continue; 1873 if (m->m_len < sizeof(*cm)) 1874 continue; 1875 cm = mtod(m, struct cmsghdr *); 1876 if (cm->cmsg_level != SOL_SOCKET || 1877 cm->cmsg_type != SCM_RIGHTS) { 1878 continue; 1879 } 1880 qfds = (cm->cmsg_len - CMSG_LEN(0)) / sizeof(void *); 1881 rp = (struct file **)CMSG_DATA(cm); 1882 for (i = 0; i < qfds; i++) { 1883 fp = rp[i]; 1884 if (fp->f_flag & FREVOKED) { 1885 kprintf("Warning: Removing revoked fp from unix domain socket queue\n"); 1886 fhold(info->fx); 1887 info->fx->f_msgcount++; 1888 unp_rights++; 1889 rp[i] = info->fx; 1890 info->fary[info->fcount++] = fp; 1891 } 1892 if (info->fcount == REVOKE_GC_MAXFILES) 1893 break; 1894 } 1895 if (info->fcount == REVOKE_GC_MAXFILES) 1896 break; 1897 } 1898 m0 = m0->m_nextpkt; 1899 if (info->fcount == REVOKE_GC_MAXFILES) 1900 break; 1901 } 1902 lwkt_reltoken(&so->so_rcv.ssb_token); 1903 1904 /* 1905 * Stop the scan if we filled up our array. 1906 */ 1907 if (info->fcount == REVOKE_GC_MAXFILES) 1908 return(-1); 1909 return(0); 1910 } 1911 1912 /* 1913 * Dispose of the fp's stored in a mbuf. 1914 * 1915 * The dds loop can cause additional fps to be entered onto the 1916 * list while it is running, flattening out the operation and avoiding 1917 * a deep kernel stack recursion. 1918 */ 1919 void 1920 unp_dispose(struct mbuf *m) 1921 { 1922 unp_defdiscard_t dds; 1923 1924 lwkt_gettoken(&unp_token); 1925 ++unp_defdiscard_nest; 1926 if (m) { 1927 unp_scan(m, unp_discard, NULL); 1928 } 1929 if (unp_defdiscard_nest == 1) { 1930 while ((dds = unp_defdiscard_base) != NULL) { 1931 unp_defdiscard_base = dds->next; 1932 closef(dds->fp, NULL); 1933 kfree(dds, M_UNPCB); 1934 } 1935 } 1936 --unp_defdiscard_nest; 1937 lwkt_reltoken(&unp_token); 1938 } 1939 1940 static int 1941 unp_listen(struct unpcb *unp, struct thread *td) 1942 { 1943 struct proc *p = td->td_proc; 1944 1945 KKASSERT(p); 1946 lwkt_gettoken(&unp_token); 1947 cru2x(p->p_ucred, &unp->unp_peercred); 1948 unp->unp_flags |= UNP_HAVEPCCACHED; 1949 lwkt_reltoken(&unp_token); 1950 return (0); 1951 } 1952 1953 static void 1954 unp_scan(struct mbuf *m0, void (*op)(struct file *, void *), void *data) 1955 { 1956 struct mbuf *m; 1957 struct file **rp; 1958 struct cmsghdr *cm; 1959 int i; 1960 int qfds; 1961 1962 while (m0) { 1963 for (m = m0; m; m = m->m_next) { 1964 if (m->m_type == MT_CONTROL && 1965 m->m_len >= sizeof(*cm)) { 1966 cm = mtod(m, struct cmsghdr *); 1967 if (cm->cmsg_level != SOL_SOCKET || 1968 cm->cmsg_type != SCM_RIGHTS) 1969 continue; 1970 qfds = (cm->cmsg_len - CMSG_LEN(0)) / 1971 sizeof(void *); 1972 rp = (struct file **)CMSG_DATA(cm); 1973 for (i = 0; i < qfds; i++) 1974 (*op)(*rp++, data); 1975 break; /* XXX, but saves time */ 1976 } 1977 } 1978 m0 = m0->m_nextpkt; 1979 } 1980 } 1981 1982 /* 1983 * Mark visibility. info->defer is recalculated on every pass. 1984 */ 1985 static void 1986 unp_mark(struct file *fp, void *data) 1987 { 1988 struct unp_gc_info *info = data; 1989 1990 if ((fp->f_flag & FMARK) == 0) { 1991 ++info->defer; 1992 atomic_set_int(&fp->f_flag, FMARK | FDEFER); 1993 } else if (fp->f_flag & FDEFER) { 1994 ++info->defer; 1995 } 1996 } 1997 1998 /* 1999 * Discard a fp previously held in a unix domain socket mbuf. To 2000 * avoid blowing out the kernel stack due to contrived chain-reactions 2001 * we may have to defer the operation to a higher procedural level. 2002 * 2003 * Caller holds unp_token 2004 */ 2005 static void 2006 unp_discard(struct file *fp, void *data __unused) 2007 { 2008 unp_defdiscard_t dds; 2009 2010 spin_lock(&unp_spin); 2011 fp->f_msgcount--; 2012 unp_rights--; 2013 spin_unlock(&unp_spin); 2014 2015 if (unp_defdiscard_nest) { 2016 dds = kmalloc(sizeof(*dds), M_UNPCB, M_WAITOK|M_ZERO); 2017 dds->fp = fp; 2018 dds->next = unp_defdiscard_base; 2019 unp_defdiscard_base = dds; 2020 } else { 2021 closef(fp, NULL); 2022 } 2023 } 2024 2025