1 /* 2 * Copyright (c) 1982, 1986, 1989, 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. Neither the name of the University nor the names of its contributors 14 * may be used to endorse or promote products derived from this software 15 * without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 * From: @(#)uipc_usrreq.c 8.3 (Berkeley) 1/4/94 30 * $FreeBSD: src/sys/kern/uipc_usrreq.c,v 1.54.2.10 2003/03/04 17:28:09 nectar Exp $ 31 */ 32 33 #include <sys/param.h> 34 #include <sys/systm.h> 35 #include <sys/kernel.h> 36 #include <sys/domain.h> 37 #include <sys/fcntl.h> 38 #include <sys/malloc.h> /* XXX must be before <sys/file.h> */ 39 #include <sys/proc.h> 40 #include <sys/file.h> 41 #include <sys/filedesc.h> 42 #include <sys/mbuf.h> 43 #include <sys/nlookup.h> 44 #include <sys/protosw.h> 45 #include <sys/socket.h> 46 #include <sys/socketvar.h> 47 #include <sys/resourcevar.h> 48 #include <sys/stat.h> 49 #include <sys/mount.h> 50 #include <sys/sysctl.h> 51 #include <sys/un.h> 52 #include <sys/unpcb.h> 53 #include <sys/vnode.h> 54 #include <sys/kern_syscall.h> 55 #include <sys/taskqueue.h> 56 57 #include <sys/file2.h> 58 #include <sys/spinlock2.h> 59 #include <sys/socketvar2.h> 60 #include <sys/msgport2.h> 61 62 #define UNP_DETACHED UNP_PRIVATE1 63 #define UNP_CONNECTING UNP_PRIVATE2 64 #define UNP_DROPPED UNP_PRIVATE3 65 #define UNP_MARKER UNP_PRIVATE4 66 67 #define UNP_ISATTACHED(unp) \ 68 ((unp) != NULL && ((unp)->unp_flags & UNP_DETACHED) == 0) 69 70 #ifdef INVARIANTS 71 #define UNP_ASSERT_TOKEN_HELD(unp) \ 72 ASSERT_LWKT_TOKEN_HELD(lwkt_token_pool_lookup((unp))) 73 #else /* !INVARIANTS */ 74 #define UNP_ASSERT_TOKEN_HELD(unp) 75 #endif /* INVARIANTS */ 76 77 struct unp_defdiscard { 78 SLIST_ENTRY(unp_defdiscard) next; 79 struct file *fp; 80 }; 81 SLIST_HEAD(unp_defdiscard_list, unp_defdiscard); 82 83 TAILQ_HEAD(unpcb_qhead, unpcb); 84 struct unp_global_head { 85 struct unpcb_qhead list; 86 int count; 87 }; 88 89 static MALLOC_DEFINE(M_UNPCB, "unpcb", "unpcb struct"); 90 static unp_gen_t unp_gencnt; 91 92 static struct unp_global_head unp_stream_head; 93 static struct unp_global_head unp_dgram_head; 94 static struct unp_global_head unp_seqpkt_head; 95 96 static struct lwkt_token unp_token = LWKT_TOKEN_INITIALIZER(unp_token); 97 static struct taskqueue *unp_taskqueue; 98 99 static struct unp_defdiscard_list unp_defdiscard_head; 100 static struct spinlock unp_defdiscard_spin; 101 static struct task unp_defdiscard_task; 102 103 /* 104 * Unix communications domain. 105 * 106 * TODO: 107 * RDM 108 * rethink name space problems 109 * need a proper out-of-band 110 * lock pushdown 111 */ 112 static struct sockaddr sun_noname = { sizeof(sun_noname), AF_LOCAL }; 113 static ino_t unp_ino = 1; /* prototype for fake inode numbers */ 114 115 static int unp_attach (struct socket *, struct pru_attach_info *); 116 static void unp_detach (struct unpcb *); 117 static int unp_bind (struct unpcb *,struct sockaddr *, struct thread *); 118 static int unp_connect (struct socket *,struct sockaddr *, 119 struct thread *); 120 static void unp_disconnect(struct unpcb *, int); 121 static void unp_shutdown (struct unpcb *); 122 static void unp_gc (void); 123 static int unp_gc_clearmarks(struct file *, void *); 124 static int unp_gc_checkmarks(struct file *, void *); 125 static int unp_gc_checkrefs(struct file *, void *); 126 static void unp_scan (struct mbuf *, void (*)(struct file *, void *), 127 void *data); 128 static void unp_mark (struct file *, void *data); 129 static void unp_discard (struct file *, void *); 130 static int unp_internalize (struct mbuf *, struct thread *); 131 static int unp_listen (struct unpcb *, struct thread *); 132 static void unp_fp_externalize(struct lwp *lp, struct file *fp, int fd, 133 int flags); 134 static int unp_find_lockref(struct sockaddr *nam, struct thread *td, 135 short type, struct unpcb **unp_ret); 136 static int unp_connect_pair(struct unpcb *unp, struct unpcb *unp2); 137 static void unp_drop(struct unpcb *unp, int error); 138 static void unp_defdiscard_taskfunc(void *, int); 139 140 /* 141 * SMP Considerations: 142 * 143 * Since unp_token will be automaticly released upon execution of 144 * blocking code, we need to reference unp_conn before any possible 145 * blocking code to prevent it from being ripped behind our back. 146 * 147 * Any adjustment to unp->unp_conn requires both the global unp_token 148 * AND the per-unp token (lwkt_token_pool_lookup(unp)) to be held. 149 * 150 * Any access to so_pcb to obtain unp requires the pool token for 151 * unp to be held. 152 */ 153 154 static __inline void 155 unp_reference(struct unpcb *unp) 156 { 157 /* 0->1 transition will not work */ 158 KKASSERT(unp->unp_refcnt > 0); 159 atomic_add_int(&unp->unp_refcnt, 1); 160 } 161 162 static __inline void 163 unp_free(struct unpcb *unp) 164 { 165 KKASSERT(unp->unp_refcnt > 0); 166 if (atomic_fetchadd_int(&unp->unp_refcnt, -1) == 1) 167 unp_detach(unp); 168 } 169 170 static __inline struct unpcb * 171 unp_getsocktoken(struct socket *so) 172 { 173 struct unpcb *unp; 174 175 /* 176 * The unp pointer is invalid until we verify that it is 177 * good by re-checking so_pcb AFTER obtaining the token. 178 */ 179 while ((unp = so->so_pcb) != NULL) { 180 lwkt_getpooltoken(unp); 181 if (unp == so->so_pcb) 182 break; 183 lwkt_relpooltoken(unp); 184 } 185 return unp; 186 } 187 188 static __inline void 189 unp_reltoken(struct unpcb *unp) 190 { 191 if (unp != NULL) 192 lwkt_relpooltoken(unp); 193 } 194 195 static __inline void 196 unp_setflags(struct unpcb *unp, int flags) 197 { 198 atomic_set_int(&unp->unp_flags, flags); 199 } 200 201 static __inline void 202 unp_clrflags(struct unpcb *unp, int flags) 203 { 204 atomic_clear_int(&unp->unp_flags, flags); 205 } 206 207 static __inline struct unp_global_head * 208 unp_globalhead(short type) 209 { 210 switch (type) { 211 case SOCK_STREAM: 212 return &unp_stream_head; 213 case SOCK_DGRAM: 214 return &unp_dgram_head; 215 case SOCK_SEQPACKET: 216 return &unp_seqpkt_head; 217 default: 218 panic("unknown socket type %d", type); 219 } 220 } 221 222 /* 223 * NOTE: (so) is referenced from soabort*() and netmsg_pru_abort() 224 * will sofree() it when we return. 225 */ 226 static void 227 uipc_abort(netmsg_t msg) 228 { 229 struct unpcb *unp; 230 int error; 231 232 lwkt_gettoken(&unp_token); 233 unp = unp_getsocktoken(msg->base.nm_so); 234 235 if (UNP_ISATTACHED(unp)) { 236 unp_setflags(unp, UNP_DETACHED); 237 unp_drop(unp, ECONNABORTED); 238 unp_free(unp); 239 error = 0; 240 } else { 241 error = EINVAL; 242 } 243 244 unp_reltoken(unp); 245 lwkt_reltoken(&unp_token); 246 247 lwkt_replymsg(&msg->lmsg, error); 248 } 249 250 static void 251 uipc_accept(netmsg_t msg) 252 { 253 struct unpcb *unp; 254 int error; 255 256 lwkt_gettoken(&unp_token); 257 unp = unp_getsocktoken(msg->base.nm_so); 258 259 if (!UNP_ISATTACHED(unp)) { 260 error = EINVAL; 261 } else { 262 struct unpcb *unp2 = unp->unp_conn; 263 264 /* 265 * Pass back name of connected socket, 266 * if it was bound and we are still connected 267 * (our peer may have closed already!). 268 */ 269 if (unp2 && unp2->unp_addr) { 270 unp_reference(unp2); 271 *msg->accept.nm_nam = dup_sockaddr( 272 (struct sockaddr *)unp2->unp_addr); 273 unp_free(unp2); 274 } else { 275 *msg->accept.nm_nam = dup_sockaddr(&sun_noname); 276 } 277 error = 0; 278 } 279 280 unp_reltoken(unp); 281 lwkt_reltoken(&unp_token); 282 283 lwkt_replymsg(&msg->lmsg, error); 284 } 285 286 static void 287 uipc_attach(netmsg_t msg) 288 { 289 int error; 290 291 lwkt_gettoken(&unp_token); 292 293 KASSERT(msg->base.nm_so->so_pcb == NULL, ("double unp attach")); 294 error = unp_attach(msg->base.nm_so, msg->attach.nm_ai); 295 296 lwkt_reltoken(&unp_token); 297 lwkt_replymsg(&msg->lmsg, error); 298 } 299 300 static void 301 uipc_bind(netmsg_t msg) 302 { 303 struct unpcb *unp; 304 int error; 305 306 lwkt_gettoken(&unp_token); 307 unp = unp_getsocktoken(msg->base.nm_so); 308 309 if (UNP_ISATTACHED(unp)) 310 error = unp_bind(unp, msg->bind.nm_nam, msg->bind.nm_td); 311 else 312 error = EINVAL; 313 314 unp_reltoken(unp); 315 lwkt_reltoken(&unp_token); 316 317 lwkt_replymsg(&msg->lmsg, error); 318 } 319 320 static void 321 uipc_connect(netmsg_t msg) 322 { 323 int error; 324 325 error = unp_connect(msg->base.nm_so, msg->connect.nm_nam, 326 msg->connect.nm_td); 327 lwkt_replymsg(&msg->lmsg, error); 328 } 329 330 static void 331 uipc_connect2(netmsg_t msg) 332 { 333 int error; 334 335 error = unp_connect2(msg->connect2.nm_so1, msg->connect2.nm_so2); 336 lwkt_replymsg(&msg->lmsg, error); 337 } 338 339 /* control is EOPNOTSUPP */ 340 341 static void 342 uipc_detach(netmsg_t msg) 343 { 344 struct unpcb *unp; 345 int error; 346 347 lwkt_gettoken(&unp_token); 348 unp = unp_getsocktoken(msg->base.nm_so); 349 350 if (UNP_ISATTACHED(unp)) { 351 unp_setflags(unp, UNP_DETACHED); 352 unp_drop(unp, 0); 353 unp_free(unp); 354 error = 0; 355 } else { 356 error = EINVAL; 357 } 358 359 unp_reltoken(unp); 360 lwkt_reltoken(&unp_token); 361 362 lwkt_replymsg(&msg->lmsg, error); 363 } 364 365 static void 366 uipc_disconnect(netmsg_t msg) 367 { 368 struct unpcb *unp; 369 int error; 370 371 lwkt_gettoken(&unp_token); 372 unp = unp_getsocktoken(msg->base.nm_so); 373 374 if (UNP_ISATTACHED(unp)) { 375 unp_disconnect(unp, 0); 376 error = 0; 377 } else { 378 error = EINVAL; 379 } 380 381 unp_reltoken(unp); 382 lwkt_reltoken(&unp_token); 383 384 lwkt_replymsg(&msg->lmsg, error); 385 } 386 387 static void 388 uipc_listen(netmsg_t msg) 389 { 390 struct unpcb *unp; 391 int error; 392 393 lwkt_gettoken(&unp_token); 394 unp = unp_getsocktoken(msg->base.nm_so); 395 396 if (!UNP_ISATTACHED(unp) || unp->unp_vnode == NULL) 397 error = EINVAL; 398 else 399 error = unp_listen(unp, msg->listen.nm_td); 400 401 unp_reltoken(unp); 402 lwkt_reltoken(&unp_token); 403 404 lwkt_replymsg(&msg->lmsg, error); 405 } 406 407 static void 408 uipc_peeraddr(netmsg_t msg) 409 { 410 struct unpcb *unp; 411 int error; 412 413 lwkt_gettoken(&unp_token); 414 unp = unp_getsocktoken(msg->base.nm_so); 415 416 if (!UNP_ISATTACHED(unp)) { 417 error = EINVAL; 418 } else if (unp->unp_conn && unp->unp_conn->unp_addr) { 419 struct unpcb *unp2 = unp->unp_conn; 420 421 unp_reference(unp2); 422 *msg->peeraddr.nm_nam = dup_sockaddr( 423 (struct sockaddr *)unp2->unp_addr); 424 unp_free(unp2); 425 error = 0; 426 } else { 427 /* 428 * XXX: It seems that this test always fails even when 429 * connection is established. So, this else clause is 430 * added as workaround to return PF_LOCAL sockaddr. 431 */ 432 *msg->peeraddr.nm_nam = dup_sockaddr(&sun_noname); 433 error = 0; 434 } 435 436 unp_reltoken(unp); 437 lwkt_reltoken(&unp_token); 438 439 lwkt_replymsg(&msg->lmsg, error); 440 } 441 442 static void 443 uipc_rcvd(netmsg_t msg) 444 { 445 struct unpcb *unp, *unp2; 446 struct socket *so; 447 struct socket *so2; 448 int error; 449 450 /* 451 * so_pcb is only modified with both the global and the unp 452 * pool token held. 453 */ 454 so = msg->base.nm_so; 455 unp = unp_getsocktoken(so); 456 457 if (!UNP_ISATTACHED(unp)) { 458 error = EINVAL; 459 goto done; 460 } 461 462 switch (so->so_type) { 463 case SOCK_DGRAM: 464 panic("uipc_rcvd DGRAM?"); 465 /*NOTREACHED*/ 466 case SOCK_STREAM: 467 case SOCK_SEQPACKET: 468 if (unp->unp_conn == NULL) 469 break; 470 unp2 = unp->unp_conn; /* protected by pool token */ 471 472 /* 473 * Because we are transfering mbufs directly to the 474 * peer socket we have to use SSB_STOP on the sender 475 * to prevent it from building up infinite mbufs. 476 * 477 * As in several places in this module w ehave to ref unp2 478 * to ensure that it does not get ripped out from under us 479 * if we block on the so2 token or in sowwakeup(). 480 */ 481 so2 = unp2->unp_socket; 482 unp_reference(unp2); 483 lwkt_gettoken(&so2->so_rcv.ssb_token); 484 if (so->so_rcv.ssb_cc < so2->so_snd.ssb_hiwat && 485 so->so_rcv.ssb_mbcnt < so2->so_snd.ssb_mbmax 486 ) { 487 atomic_clear_int(&so2->so_snd.ssb_flags, SSB_STOP); 488 489 sowwakeup(so2); 490 } 491 lwkt_reltoken(&so2->so_rcv.ssb_token); 492 unp_free(unp2); 493 break; 494 default: 495 panic("uipc_rcvd unknown socktype"); 496 /*NOTREACHED*/ 497 } 498 error = 0; 499 done: 500 unp_reltoken(unp); 501 lwkt_replymsg(&msg->lmsg, error); 502 } 503 504 /* pru_rcvoob is EOPNOTSUPP */ 505 506 static void 507 uipc_send(netmsg_t msg) 508 { 509 struct unpcb *unp, *unp2; 510 struct socket *so; 511 struct socket *so2; 512 struct mbuf *control; 513 struct mbuf *m; 514 int error = 0; 515 516 so = msg->base.nm_so; 517 control = msg->send.nm_control; 518 m = msg->send.nm_m; 519 520 /* 521 * so_pcb is only modified with both the global and the unp 522 * pool token held. 523 */ 524 so = msg->base.nm_so; 525 unp = unp_getsocktoken(so); 526 527 if (!UNP_ISATTACHED(unp)) { 528 error = EINVAL; 529 goto release; 530 } 531 532 if (msg->send.nm_flags & PRUS_OOB) { 533 error = EOPNOTSUPP; 534 goto release; 535 } 536 537 wakeup_start_delayed(); 538 539 if (control && (error = unp_internalize(control, msg->send.nm_td))) 540 goto release; 541 542 switch (so->so_type) { 543 case SOCK_DGRAM: 544 { 545 struct sockaddr *from; 546 547 if (msg->send.nm_addr) { 548 if (unp->unp_conn) { 549 error = EISCONN; 550 break; 551 } 552 lwkt_gettoken(&unp_token); 553 error = unp_find_lockref(msg->send.nm_addr, 554 msg->send.nm_td, so->so_type, &unp2); 555 lwkt_reltoken(&unp_token); 556 if (error) 557 break; 558 /* 559 * NOTE: 560 * unp2 is locked and referenced. 561 * 562 * We could unlock unp2 now, since it was checked 563 * and referenced. 564 */ 565 unp_reltoken(unp2); 566 } else { 567 if (unp->unp_conn == NULL) { 568 error = ENOTCONN; 569 break; 570 } 571 unp2 = unp->unp_conn; 572 unp_reference(unp2); 573 } 574 /* NOTE: unp2 is referenced. */ 575 so2 = unp2->unp_socket; 576 577 if (unp->unp_addr) 578 from = (struct sockaddr *)unp->unp_addr; 579 else 580 from = &sun_noname; 581 582 lwkt_gettoken(&so2->so_rcv.ssb_token); 583 if (ssb_appendaddr(&so2->so_rcv, from, m, control)) { 584 sorwakeup(so2); 585 m = NULL; 586 control = NULL; 587 } else { 588 error = ENOBUFS; 589 } 590 lwkt_reltoken(&so2->so_rcv.ssb_token); 591 592 unp_free(unp2); 593 break; 594 } 595 596 case SOCK_STREAM: 597 case SOCK_SEQPACKET: 598 /* Connect if not connected yet. */ 599 /* 600 * Note: A better implementation would complain 601 * if not equal to the peer's address. 602 */ 603 if (unp->unp_conn == NULL) { 604 if (msg->send.nm_addr) { 605 error = unp_connect(so, 606 msg->send.nm_addr, 607 msg->send.nm_td); 608 if (error) 609 break; /* XXX */ 610 } 611 /* 612 * NOTE: 613 * unp_conn still could be NULL, even if the 614 * above unp_connect() succeeds; since the 615 * current unp's token could be released due 616 * to blocking operations after unp_conn is 617 * assigned. 618 */ 619 if (unp->unp_conn == NULL) { 620 error = ENOTCONN; 621 break; 622 } 623 } 624 if (so->so_state & SS_CANTSENDMORE) { 625 error = EPIPE; 626 break; 627 } 628 629 unp2 = unp->unp_conn; 630 KASSERT(unp2 != NULL, ("unp is not connected")); 631 so2 = unp2->unp_socket; 632 633 unp_reference(unp2); 634 635 /* 636 * Send to paired receive port, and then reduce 637 * send buffer hiwater marks to maintain backpressure. 638 * Wake up readers. 639 */ 640 lwkt_gettoken(&so2->so_rcv.ssb_token); 641 if (control) { 642 if (ssb_appendcontrol(&so2->so_rcv, m, control)) { 643 control = NULL; 644 m = NULL; 645 } 646 } else if (so->so_type == SOCK_SEQPACKET) { 647 sbappendrecord(&so2->so_rcv.sb, m); 648 m = NULL; 649 } else { 650 sbappend(&so2->so_rcv.sb, m); 651 m = NULL; 652 } 653 654 /* 655 * Because we are transfering mbufs directly to the 656 * peer socket we have to use SSB_STOP on the sender 657 * to prevent it from building up infinite mbufs. 658 */ 659 if (so2->so_rcv.ssb_cc >= so->so_snd.ssb_hiwat || 660 so2->so_rcv.ssb_mbcnt >= so->so_snd.ssb_mbmax 661 ) { 662 atomic_set_int(&so->so_snd.ssb_flags, SSB_STOP); 663 } 664 lwkt_reltoken(&so2->so_rcv.ssb_token); 665 sorwakeup(so2); 666 667 unp_free(unp2); 668 break; 669 670 default: 671 panic("uipc_send unknown socktype"); 672 } 673 674 /* 675 * SEND_EOF is equivalent to a SEND followed by a SHUTDOWN. 676 */ 677 if (msg->send.nm_flags & PRUS_EOF) { 678 socantsendmore(so); 679 unp_shutdown(unp); 680 } 681 682 if (control && error != 0) 683 unp_dispose(control); 684 release: 685 unp_reltoken(unp); 686 wakeup_end_delayed(); 687 688 if (control) 689 m_freem(control); 690 if (m) 691 m_freem(m); 692 lwkt_replymsg(&msg->lmsg, error); 693 } 694 695 /* 696 * MPSAFE 697 */ 698 static void 699 uipc_sense(netmsg_t msg) 700 { 701 struct unpcb *unp; 702 struct socket *so; 703 struct stat *sb; 704 int error; 705 706 so = msg->base.nm_so; 707 sb = msg->sense.nm_stat; 708 709 /* 710 * so_pcb is only modified with both the global and the unp 711 * pool token held. 712 */ 713 unp = unp_getsocktoken(so); 714 715 if (!UNP_ISATTACHED(unp)) { 716 error = EINVAL; 717 goto done; 718 } 719 720 sb->st_blksize = so->so_snd.ssb_hiwat; 721 sb->st_dev = NOUDEV; 722 if (unp->unp_ino == 0) { /* make up a non-zero inode number */ 723 unp->unp_ino = atomic_fetchadd_long(&unp_ino, 1); 724 if (__predict_false(unp->unp_ino == 0)) 725 unp->unp_ino = atomic_fetchadd_long(&unp_ino, 1); 726 } 727 sb->st_ino = unp->unp_ino; 728 error = 0; 729 done: 730 unp_reltoken(unp); 731 lwkt_replymsg(&msg->lmsg, error); 732 } 733 734 static void 735 uipc_shutdown(netmsg_t msg) 736 { 737 struct socket *so; 738 struct unpcb *unp; 739 int error; 740 741 /* 742 * so_pcb is only modified with both the global and the unp 743 * pool token held. 744 */ 745 so = msg->base.nm_so; 746 unp = unp_getsocktoken(so); 747 748 if (UNP_ISATTACHED(unp)) { 749 socantsendmore(so); 750 unp_shutdown(unp); 751 error = 0; 752 } else { 753 error = EINVAL; 754 } 755 756 unp_reltoken(unp); 757 lwkt_replymsg(&msg->lmsg, error); 758 } 759 760 static void 761 uipc_sockaddr(netmsg_t msg) 762 { 763 struct unpcb *unp; 764 int error; 765 766 /* 767 * so_pcb is only modified with both the global and the unp 768 * pool token held. 769 */ 770 unp = unp_getsocktoken(msg->base.nm_so); 771 772 if (UNP_ISATTACHED(unp)) { 773 if (unp->unp_addr) { 774 *msg->sockaddr.nm_nam = 775 dup_sockaddr((struct sockaddr *)unp->unp_addr); 776 } 777 error = 0; 778 } else { 779 error = EINVAL; 780 } 781 782 unp_reltoken(unp); 783 lwkt_replymsg(&msg->lmsg, error); 784 } 785 786 struct pr_usrreqs uipc_usrreqs = { 787 .pru_abort = uipc_abort, 788 .pru_accept = uipc_accept, 789 .pru_attach = uipc_attach, 790 .pru_bind = uipc_bind, 791 .pru_connect = uipc_connect, 792 .pru_connect2 = uipc_connect2, 793 .pru_control = pr_generic_notsupp, 794 .pru_detach = uipc_detach, 795 .pru_disconnect = uipc_disconnect, 796 .pru_listen = uipc_listen, 797 .pru_peeraddr = uipc_peeraddr, 798 .pru_rcvd = uipc_rcvd, 799 .pru_rcvoob = pr_generic_notsupp, 800 .pru_send = uipc_send, 801 .pru_sense = uipc_sense, 802 .pru_shutdown = uipc_shutdown, 803 .pru_sockaddr = uipc_sockaddr, 804 .pru_sosend = sosend, 805 .pru_soreceive = soreceive 806 }; 807 808 void 809 uipc_ctloutput(netmsg_t msg) 810 { 811 struct socket *so; 812 struct sockopt *sopt; 813 struct unpcb *unp; 814 int error = 0; 815 816 so = msg->base.nm_so; 817 sopt = msg->ctloutput.nm_sopt; 818 819 lwkt_gettoken(&unp_token); 820 unp = unp_getsocktoken(so); 821 822 if (!UNP_ISATTACHED(unp)) { 823 error = EINVAL; 824 goto done; 825 } 826 827 switch (sopt->sopt_dir) { 828 case SOPT_GET: 829 switch (sopt->sopt_name) { 830 case LOCAL_PEERCRED: 831 if (unp->unp_flags & UNP_HAVEPC) 832 soopt_from_kbuf(sopt, &unp->unp_peercred, 833 sizeof(unp->unp_peercred)); 834 else { 835 if (so->so_type == SOCK_STREAM) 836 error = ENOTCONN; 837 else if (so->so_type == SOCK_SEQPACKET) 838 error = ENOTCONN; 839 else 840 error = EINVAL; 841 } 842 break; 843 default: 844 error = EOPNOTSUPP; 845 break; 846 } 847 break; 848 case SOPT_SET: 849 default: 850 error = EOPNOTSUPP; 851 break; 852 } 853 854 done: 855 unp_reltoken(unp); 856 lwkt_reltoken(&unp_token); 857 858 lwkt_replymsg(&msg->lmsg, error); 859 } 860 861 /* 862 * Both send and receive buffers are allocated PIPSIZ bytes of buffering 863 * for stream sockets, although the total for sender and receiver is 864 * actually only PIPSIZ. 865 * 866 * Datagram sockets really use the sendspace as the maximum datagram size, 867 * and don't really want to reserve the sendspace. Their recvspace should 868 * be large enough for at least one max-size datagram plus address. 869 * 870 * We want the local send/recv space to be significant larger then lo0's 871 * mtu of 16384. 872 */ 873 #ifndef PIPSIZ 874 #define PIPSIZ 57344 875 #endif 876 static u_long unpst_sendspace = PIPSIZ; 877 static u_long unpst_recvspace = PIPSIZ; 878 static u_long unpdg_sendspace = 2*1024; /* really max datagram size */ 879 static u_long unpdg_recvspace = 4*1024; 880 881 static int unp_rights; /* file descriptors in flight */ 882 static struct spinlock unp_spin = SPINLOCK_INITIALIZER(&unp_spin, "unp_spin"); 883 884 SYSCTL_DECL(_net_local_seqpacket); 885 SYSCTL_DECL(_net_local_stream); 886 SYSCTL_INT(_net_local_stream, OID_AUTO, sendspace, CTLFLAG_RW, 887 &unpst_sendspace, 0, "Size of stream socket send buffer"); 888 SYSCTL_INT(_net_local_stream, OID_AUTO, recvspace, CTLFLAG_RW, 889 &unpst_recvspace, 0, "Size of stream socket receive buffer"); 890 891 SYSCTL_DECL(_net_local_dgram); 892 SYSCTL_INT(_net_local_dgram, OID_AUTO, maxdgram, CTLFLAG_RW, 893 &unpdg_sendspace, 0, "Max datagram socket size"); 894 SYSCTL_INT(_net_local_dgram, OID_AUTO, recvspace, CTLFLAG_RW, 895 &unpdg_recvspace, 0, "Size of datagram socket receive buffer"); 896 897 SYSCTL_DECL(_net_local); 898 SYSCTL_INT(_net_local, OID_AUTO, inflight, CTLFLAG_RD, &unp_rights, 0, 899 "File descriptors in flight"); 900 901 static int 902 unp_attach(struct socket *so, struct pru_attach_info *ai) 903 { 904 struct unp_global_head *head; 905 struct unpcb *unp; 906 int error; 907 908 lwkt_gettoken(&unp_token); 909 910 if (so->so_snd.ssb_hiwat == 0 || so->so_rcv.ssb_hiwat == 0) { 911 switch (so->so_type) { 912 case SOCK_STREAM: 913 case SOCK_SEQPACKET: 914 error = soreserve(so, unpst_sendspace, unpst_recvspace, 915 ai->sb_rlimit); 916 break; 917 918 case SOCK_DGRAM: 919 error = soreserve(so, unpdg_sendspace, unpdg_recvspace, 920 ai->sb_rlimit); 921 break; 922 923 default: 924 panic("unp_attach"); 925 } 926 if (error) 927 goto failed; 928 } 929 930 /* 931 * In order to support sendfile we have to set either SSB_STOPSUPP 932 * or SSB_PREALLOC. Unix domain sockets use the SSB_STOP flow 933 * control mechanism. 934 */ 935 if (so->so_type == SOCK_STREAM) { 936 atomic_set_int(&so->so_rcv.ssb_flags, SSB_STOPSUPP); 937 atomic_set_int(&so->so_snd.ssb_flags, SSB_STOPSUPP); 938 } 939 940 unp = kmalloc(sizeof(*unp), M_UNPCB, M_WAITOK | M_ZERO | M_NULLOK); 941 if (unp == NULL) { 942 error = ENOBUFS; 943 goto failed; 944 } 945 unp->unp_refcnt = 1; 946 unp->unp_gencnt = ++unp_gencnt; 947 LIST_INIT(&unp->unp_refs); 948 unp->unp_socket = so; 949 unp->unp_rvnode = ai->fd_rdir; /* jail cruft XXX JH */ 950 so->so_pcb = (caddr_t)unp; 951 soreference(so); 952 953 head = unp_globalhead(so->so_type); 954 TAILQ_INSERT_TAIL(&head->list, unp, unp_link); 955 head->count++; 956 error = 0; 957 failed: 958 lwkt_reltoken(&unp_token); 959 return error; 960 } 961 962 static void 963 unp_detach(struct unpcb *unp) 964 { 965 struct unp_global_head *head; 966 struct socket *so; 967 968 lwkt_gettoken(&unp_token); 969 lwkt_getpooltoken(unp); 970 971 so = unp->unp_socket; 972 973 head = unp_globalhead(so->so_type); 974 KASSERT(head->count > 0, ("invalid unp count")); 975 TAILQ_REMOVE(&head->list, unp, unp_link); 976 head->count--; 977 978 unp->unp_gencnt = ++unp_gencnt; 979 if (unp->unp_vnode) { 980 unp->unp_vnode->v_socket = NULL; 981 vrele(unp->unp_vnode); 982 unp->unp_vnode = NULL; 983 } 984 soisdisconnected(so); 985 KKASSERT(so->so_pcb == unp); 986 so->so_pcb = NULL; /* both tokens required */ 987 unp->unp_socket = NULL; 988 989 lwkt_relpooltoken(unp); 990 lwkt_reltoken(&unp_token); 991 992 if (unp_rights) { 993 /* 994 * Normally the receive buffer is flushed later, 995 * in sofree, but if our receive buffer holds references 996 * to descriptors that are now garbage, we will dispose 997 * of those descriptor references after the garbage collector 998 * gets them (resulting in a "panic: closef: count < 0"). 999 */ 1000 sorflush(so); 1001 unp_gc(); 1002 } 1003 sofree(so); 1004 1005 KASSERT(unp->unp_conn == NULL, ("unp is still connected")); 1006 KASSERT(LIST_EMPTY(&unp->unp_refs), ("unp still has references")); 1007 1008 if (unp->unp_addr) 1009 kfree(unp->unp_addr, M_SONAME); 1010 kfree(unp, M_UNPCB); 1011 } 1012 1013 static int 1014 unp_bind(struct unpcb *unp, struct sockaddr *nam, struct thread *td) 1015 { 1016 struct proc *p = td->td_proc; 1017 struct sockaddr_un *soun = (struct sockaddr_un *)nam; 1018 struct vnode *vp; 1019 struct vattr vattr; 1020 int error, namelen; 1021 struct nlookupdata nd; 1022 char buf[SOCK_MAXADDRLEN]; 1023 1024 ASSERT_LWKT_TOKEN_HELD(&unp_token); 1025 UNP_ASSERT_TOKEN_HELD(unp); 1026 1027 if (unp->unp_vnode != NULL) 1028 return EINVAL; 1029 1030 namelen = soun->sun_len - offsetof(struct sockaddr_un, sun_path); 1031 if (namelen <= 0) 1032 return EINVAL; 1033 strncpy(buf, soun->sun_path, namelen); 1034 buf[namelen] = 0; /* null-terminate the string */ 1035 error = nlookup_init(&nd, buf, UIO_SYSSPACE, 1036 NLC_LOCKVP | NLC_CREATE | NLC_REFDVP); 1037 if (error == 0) 1038 error = nlookup(&nd); 1039 if (error == 0 && nd.nl_nch.ncp->nc_vp != NULL) 1040 error = EADDRINUSE; 1041 if (error) 1042 goto done; 1043 1044 VATTR_NULL(&vattr); 1045 vattr.va_type = VSOCK; 1046 vattr.va_mode = (ACCESSPERMS & ~p->p_fd->fd_cmask); 1047 error = VOP_NCREATE(&nd.nl_nch, nd.nl_dvp, &vp, nd.nl_cred, &vattr); 1048 if (error == 0) { 1049 if (unp->unp_vnode == NULL) { 1050 vp->v_socket = unp->unp_socket; 1051 unp->unp_vnode = vp; 1052 unp->unp_addr = (struct sockaddr_un *)dup_sockaddr(nam); 1053 vn_unlock(vp); 1054 } else { 1055 vput(vp); /* late race */ 1056 error = EINVAL; 1057 } 1058 } 1059 done: 1060 nlookup_done(&nd); 1061 return (error); 1062 } 1063 1064 static int 1065 unp_connect(struct socket *so, struct sockaddr *nam, struct thread *td) 1066 { 1067 struct unpcb *unp, *unp2; 1068 int error, flags = 0; 1069 1070 lwkt_gettoken(&unp_token); 1071 1072 unp = unp_getsocktoken(so); 1073 if (!UNP_ISATTACHED(unp)) { 1074 error = EINVAL; 1075 goto failed; 1076 } 1077 1078 if ((unp->unp_flags & UNP_CONNECTING) || unp->unp_conn != NULL) { 1079 error = EISCONN; 1080 goto failed; 1081 } 1082 1083 flags = UNP_CONNECTING; 1084 unp_setflags(unp, flags); 1085 1086 error = unp_find_lockref(nam, td, so->so_type, &unp2); 1087 if (error) 1088 goto failed; 1089 /* 1090 * NOTE: 1091 * unp2 is locked and referenced. 1092 */ 1093 1094 if (so->so_proto->pr_flags & PR_CONNREQUIRED) { 1095 struct socket *so2, *so3; 1096 struct unpcb *unp3; 1097 1098 so2 = unp2->unp_socket; 1099 if (!(so2->so_options & SO_ACCEPTCONN) || 1100 (so3 = sonewconn_faddr(so2, 0, NULL, 1101 TRUE /* keep ref */)) == NULL) { 1102 error = ECONNREFUSED; 1103 goto done; 1104 } 1105 /* so3 has a socket reference. */ 1106 1107 unp3 = unp_getsocktoken(so3); 1108 if (!UNP_ISATTACHED(unp3)) { 1109 unp_reltoken(unp3); 1110 /* 1111 * Already aborted; we only need to drop the 1112 * socket reference held by sonewconn_faddr(). 1113 */ 1114 sofree(so3); 1115 error = ECONNREFUSED; 1116 goto done; 1117 } 1118 unp_reference(unp3); 1119 /* 1120 * NOTE: 1121 * unp3 is locked and referenced. 1122 */ 1123 1124 /* 1125 * Release so3 socket reference held by sonewconn_faddr(). 1126 * Since we have referenced unp3, neither unp3 nor so3 will 1127 * be destroyed here. 1128 */ 1129 sofree(so3); 1130 1131 if (unp2->unp_addr != NULL) { 1132 unp3->unp_addr = (struct sockaddr_un *) 1133 dup_sockaddr((struct sockaddr *)unp2->unp_addr); 1134 } 1135 1136 /* 1137 * unp_peercred management: 1138 * 1139 * The connecter's (client's) credentials are copied 1140 * from its process structure at the time of connect() 1141 * (which is now). 1142 */ 1143 cru2x(td->td_proc->p_ucred, &unp3->unp_peercred); 1144 unp_setflags(unp3, UNP_HAVEPC); 1145 /* 1146 * The receiver's (server's) credentials are copied 1147 * from the unp_peercred member of socket on which the 1148 * former called listen(); unp_listen() cached that 1149 * process's credentials at that time so we can use 1150 * them now. 1151 */ 1152 KASSERT(unp2->unp_flags & UNP_HAVEPCCACHED, 1153 ("unp_connect: listener without cached peercred")); 1154 memcpy(&unp->unp_peercred, &unp2->unp_peercred, 1155 sizeof(unp->unp_peercred)); 1156 unp_setflags(unp, UNP_HAVEPC); 1157 1158 error = unp_connect_pair(unp, unp3); 1159 if (error) 1160 soabort_direct(so3); 1161 1162 /* Done with unp3 */ 1163 unp_free(unp3); 1164 unp_reltoken(unp3); 1165 } else { 1166 error = unp_connect_pair(unp, unp2); 1167 } 1168 done: 1169 unp_free(unp2); 1170 unp_reltoken(unp2); 1171 failed: 1172 if (flags) 1173 unp_clrflags(unp, flags); 1174 unp_reltoken(unp); 1175 1176 lwkt_reltoken(&unp_token); 1177 return (error); 1178 } 1179 1180 /* 1181 * Connect two unix domain sockets together. 1182 * 1183 * NOTE: Semantics for any change to unp_conn requires that the per-unp 1184 * pool token also be held. 1185 */ 1186 int 1187 unp_connect2(struct socket *so, struct socket *so2) 1188 { 1189 struct unpcb *unp, *unp2; 1190 int error; 1191 1192 lwkt_gettoken(&unp_token); 1193 if (so2->so_type != so->so_type) { 1194 lwkt_reltoken(&unp_token); 1195 return (EPROTOTYPE); 1196 } 1197 unp = unp_getsocktoken(so); 1198 unp2 = unp_getsocktoken(so2); 1199 1200 if (!UNP_ISATTACHED(unp)) { 1201 error = EINVAL; 1202 goto done; 1203 } 1204 if (!UNP_ISATTACHED(unp2)) { 1205 error = ECONNREFUSED; 1206 goto done; 1207 } 1208 1209 if (unp->unp_conn != NULL) { 1210 error = EISCONN; 1211 goto done; 1212 } 1213 if ((so->so_type == SOCK_STREAM || so->so_type == SOCK_SEQPACKET) && 1214 unp2->unp_conn != NULL) { 1215 error = EISCONN; 1216 goto done; 1217 } 1218 1219 error = unp_connect_pair(unp, unp2); 1220 done: 1221 unp_reltoken(unp2); 1222 unp_reltoken(unp); 1223 lwkt_reltoken(&unp_token); 1224 return (error); 1225 } 1226 1227 /* 1228 * Disconnect a unix domain socket pair. 1229 * 1230 * NOTE: Semantics for any change to unp_conn requires that the per-unp 1231 * pool token also be held. 1232 */ 1233 static void 1234 unp_disconnect(struct unpcb *unp, int error) 1235 { 1236 struct socket *so = unp->unp_socket; 1237 struct unpcb *unp2; 1238 1239 ASSERT_LWKT_TOKEN_HELD(&unp_token); 1240 UNP_ASSERT_TOKEN_HELD(unp); 1241 1242 if (error) 1243 so->so_error = error; 1244 1245 while ((unp2 = unp->unp_conn) != NULL) { 1246 lwkt_getpooltoken(unp2); 1247 if (unp2 == unp->unp_conn) 1248 break; 1249 lwkt_relpooltoken(unp2); 1250 } 1251 if (unp2 == NULL) 1252 return; 1253 /* unp2 is locked. */ 1254 1255 KASSERT((unp2->unp_flags & UNP_DROPPED) == 0, ("unp2 was dropped")); 1256 1257 unp->unp_conn = NULL; 1258 1259 switch (so->so_type) { 1260 case SOCK_DGRAM: 1261 LIST_REMOVE(unp, unp_reflink); 1262 soclrstate(so, SS_ISCONNECTED); 1263 break; 1264 1265 case SOCK_STREAM: 1266 case SOCK_SEQPACKET: 1267 /* 1268 * Keep a reference before clearing the unp_conn 1269 * to avoid racing uipc_detach()/uipc_abort() in 1270 * other thread. 1271 */ 1272 unp_reference(unp2); 1273 KASSERT(unp2->unp_conn == unp, ("unp_conn mismatch")); 1274 unp2->unp_conn = NULL; 1275 1276 soisdisconnected(so); 1277 soisdisconnected(unp2->unp_socket); 1278 1279 unp_free(unp2); 1280 break; 1281 } 1282 1283 lwkt_relpooltoken(unp2); 1284 } 1285 1286 #ifdef notdef 1287 void 1288 unp_abort(struct unpcb *unp) 1289 { 1290 lwkt_gettoken(&unp_token); 1291 unp_free(unp); 1292 lwkt_reltoken(&unp_token); 1293 } 1294 #endif 1295 1296 static int 1297 prison_unpcb(struct thread *td, struct unpcb *unp) 1298 { 1299 struct proc *p; 1300 1301 if (td == NULL) 1302 return (0); 1303 if ((p = td->td_proc) == NULL) 1304 return (0); 1305 if (!p->p_ucred->cr_prison) 1306 return (0); 1307 if (p->p_fd->fd_rdir == unp->unp_rvnode) 1308 return (0); 1309 return (1); 1310 } 1311 1312 static int 1313 unp_pcblist(SYSCTL_HANDLER_ARGS) 1314 { 1315 struct unp_global_head *head = arg1; 1316 int error, i, n; 1317 struct unpcb *unp, *marker; 1318 1319 KKASSERT(curproc != NULL); 1320 1321 /* 1322 * The process of preparing the PCB list is too time-consuming and 1323 * resource-intensive to repeat twice on every request. 1324 */ 1325 if (req->oldptr == NULL) { 1326 n = head->count; 1327 req->oldidx = (n + n/8) * sizeof(struct xunpcb); 1328 return 0; 1329 } 1330 1331 if (req->newptr != NULL) 1332 return EPERM; 1333 1334 marker = kmalloc(sizeof(*marker), M_UNPCB, M_WAITOK | M_ZERO); 1335 marker->unp_flags |= UNP_MARKER; 1336 1337 lwkt_gettoken(&unp_token); 1338 1339 n = head->count; 1340 i = 0; 1341 error = 0; 1342 1343 TAILQ_INSERT_HEAD(&head->list, marker, unp_link); 1344 while ((unp = TAILQ_NEXT(marker, unp_link)) != NULL && i < n) { 1345 struct xunpcb xu; 1346 1347 TAILQ_REMOVE(&head->list, marker, unp_link); 1348 TAILQ_INSERT_AFTER(&head->list, unp, marker, unp_link); 1349 1350 if (unp->unp_flags & UNP_MARKER) 1351 continue; 1352 if (prison_unpcb(req->td, unp)) 1353 continue; 1354 1355 xu.xu_len = sizeof(xu); 1356 xu.xu_unpp = unp; 1357 1358 /* 1359 * NOTE: 1360 * unp->unp_addr and unp->unp_conn are protected by 1361 * unp_token. So if we want to get rid of unp_token 1362 * or reduce the coverage of unp_token, care must be 1363 * taken. 1364 */ 1365 if (unp->unp_addr) { 1366 bcopy(unp->unp_addr, &xu.xu_addr, 1367 unp->unp_addr->sun_len); 1368 } 1369 if (unp->unp_conn && unp->unp_conn->unp_addr) { 1370 bcopy(unp->unp_conn->unp_addr, 1371 &xu.xu_caddr, 1372 unp->unp_conn->unp_addr->sun_len); 1373 } 1374 bcopy(unp, &xu.xu_unp, sizeof(*unp)); 1375 sotoxsocket(unp->unp_socket, &xu.xu_socket); 1376 1377 /* NOTE: This could block and temporarily release unp_token */ 1378 error = SYSCTL_OUT(req, &xu, sizeof(xu)); 1379 if (error) 1380 break; 1381 ++i; 1382 } 1383 TAILQ_REMOVE(&head->list, marker, unp_link); 1384 1385 lwkt_reltoken(&unp_token); 1386 1387 kfree(marker, M_UNPCB); 1388 return error; 1389 } 1390 1391 SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist, CTLFLAG_RD, 1392 &unp_dgram_head, 0, unp_pcblist, "S,xunpcb", 1393 "List of active local datagram sockets"); 1394 SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist, CTLFLAG_RD, 1395 &unp_stream_head, 0, unp_pcblist, "S,xunpcb", 1396 "List of active local stream sockets"); 1397 SYSCTL_PROC(_net_local_seqpacket, OID_AUTO, pcblist, CTLFLAG_RD, 1398 &unp_seqpkt_head, 0, unp_pcblist, "S,xunpcb", 1399 "List of active local seqpacket sockets"); 1400 1401 static void 1402 unp_shutdown(struct unpcb *unp) 1403 { 1404 struct socket *so; 1405 1406 if ((unp->unp_socket->so_type == SOCK_STREAM || 1407 unp->unp_socket->so_type == SOCK_SEQPACKET) && 1408 unp->unp_conn != NULL && (so = unp->unp_conn->unp_socket)) { 1409 socantrcvmore(so); 1410 } 1411 } 1412 1413 #ifdef notdef 1414 void 1415 unp_drain(void) 1416 { 1417 lwkt_gettoken(&unp_token); 1418 lwkt_reltoken(&unp_token); 1419 } 1420 #endif 1421 1422 int 1423 unp_externalize(struct mbuf *rights, int flags) 1424 { 1425 struct thread *td = curthread; 1426 struct proc *p = td->td_proc; /* XXX */ 1427 struct lwp *lp = td->td_lwp; 1428 struct cmsghdr *cm = mtod(rights, struct cmsghdr *); 1429 int *fdp; 1430 int i; 1431 struct file **rp; 1432 struct file *fp; 1433 int newfds = (cm->cmsg_len - (CMSG_DATA(cm) - (u_char *)cm)) 1434 / sizeof(struct file *); 1435 int f; 1436 1437 /* 1438 * if the new FD's will not fit, then we free them all 1439 */ 1440 if (!fdavail(p, newfds)) { 1441 rp = (struct file **)CMSG_DATA(cm); 1442 for (i = 0; i < newfds; i++) { 1443 fp = *rp; 1444 /* 1445 * zero the pointer before calling unp_discard, 1446 * since it may end up in unp_gc().. 1447 */ 1448 *rp++ = NULL; 1449 unp_discard(fp, NULL); 1450 } 1451 return (EMSGSIZE); 1452 } 1453 1454 /* 1455 * now change each pointer to an fd in the global table to 1456 * an integer that is the index to the local fd table entry 1457 * that we set up to point to the global one we are transferring. 1458 * If sizeof (struct file *) is bigger than or equal to sizeof int, 1459 * then do it in forward order. In that case, an integer will 1460 * always come in the same place or before its corresponding 1461 * struct file pointer. 1462 * If sizeof (struct file *) is smaller than sizeof int, then 1463 * do it in reverse order. 1464 * 1465 * Hold revoke_token in 'shared' mode, so that we won't miss 1466 * the FREVOKED update on fps being externalized (fsetfd). 1467 */ 1468 lwkt_gettoken_shared(&revoke_token); 1469 if (sizeof(struct file *) >= sizeof(int)) { 1470 fdp = (int *)CMSG_DATA(cm); 1471 rp = (struct file **)CMSG_DATA(cm); 1472 for (i = 0; i < newfds; i++) { 1473 if (fdalloc(p, 0, &f)) { 1474 int j; 1475 1476 /* 1477 * Previous fdavail() can't garantee 1478 * fdalloc() success due to SMP race. 1479 * Just clean up and return the same 1480 * error value as if fdavail() failed. 1481 */ 1482 1483 /* Close externalized files */ 1484 for (j = 0; j < i; j++) 1485 kern_close(fdp[j]); 1486 /* Discard the rest of internal files */ 1487 for (; i < newfds; i++) 1488 unp_discard(rp[i], NULL); 1489 /* Wipe out the control message */ 1490 for (i = 0; i < newfds; i++) 1491 rp[i] = NULL; 1492 1493 lwkt_reltoken(&revoke_token); 1494 return (EMSGSIZE); 1495 } 1496 fp = rp[i]; 1497 unp_fp_externalize(lp, fp, f, flags); 1498 fdp[i] = f; 1499 } 1500 } else { 1501 /* 1502 * XXX 1503 * Will this ever happen? I don't think compiler will 1504 * generate code for this code segment -- sephe 1505 */ 1506 fdp = (int *)CMSG_DATA(cm) + newfds - 1; 1507 rp = (struct file **)CMSG_DATA(cm) + newfds - 1; 1508 for (i = 0; i < newfds; i++) { 1509 if (fdalloc(p, 0, &f)) 1510 panic("unp_externalize"); 1511 fp = *rp--; 1512 unp_fp_externalize(lp, fp, f, flags); 1513 *fdp-- = f; 1514 } 1515 } 1516 lwkt_reltoken(&revoke_token); 1517 1518 /* 1519 * Adjust length, in case sizeof(struct file *) and sizeof(int) 1520 * differs. 1521 */ 1522 cm->cmsg_len = CMSG_LEN(newfds * sizeof(int)); 1523 rights->m_len = cm->cmsg_len; 1524 1525 return (0); 1526 } 1527 1528 static void 1529 unp_fp_externalize(struct lwp *lp, struct file *fp, int fd, int flags) 1530 { 1531 if (lp) { 1532 struct filedesc *fdp = lp->lwp_proc->p_fd; 1533 1534 KKASSERT(fd >= 0); 1535 if (fp->f_flag & FREVOKED) { 1536 struct file *fx; 1537 int error; 1538 1539 kprintf("Warning: revoked fp exiting unix socket\n"); 1540 error = falloc(lp, &fx, NULL); 1541 if (error == 0) { 1542 if (flags & MSG_CMSG_CLOEXEC) 1543 fdp->fd_files[fd].fileflags |= UF_EXCLOSE; 1544 fsetfd(fdp, fx, fd); 1545 fdrop(fx); 1546 } else { 1547 fsetfd(fdp, NULL, fd); 1548 } 1549 } else { 1550 if (flags & MSG_CMSG_CLOEXEC) 1551 fdp->fd_files[fd].fileflags |= UF_EXCLOSE; 1552 fsetfd(fdp, fp, fd); 1553 } 1554 } 1555 spin_lock(&unp_spin); 1556 fp->f_msgcount--; 1557 unp_rights--; 1558 spin_unlock(&unp_spin); 1559 fdrop(fp); 1560 } 1561 1562 void 1563 unp_init(void) 1564 { 1565 TAILQ_INIT(&unp_stream_head.list); 1566 TAILQ_INIT(&unp_dgram_head.list); 1567 TAILQ_INIT(&unp_seqpkt_head.list); 1568 1569 spin_init(&unp_spin, "unpinit"); 1570 1571 SLIST_INIT(&unp_defdiscard_head); 1572 spin_init(&unp_defdiscard_spin, "unpdisc"); 1573 TASK_INIT(&unp_defdiscard_task, 0, unp_defdiscard_taskfunc, NULL); 1574 1575 /* 1576 * Create taskqueue for defered discard, and stick it to 1577 * the last CPU. 1578 */ 1579 unp_taskqueue = taskqueue_create("unp_taskq", M_WAITOK, 1580 taskqueue_thread_enqueue, &unp_taskqueue); 1581 taskqueue_start_threads(&unp_taskqueue, 1, TDPRI_KERN_DAEMON, 1582 ncpus - 1, "unp taskq"); 1583 } 1584 1585 static int 1586 unp_internalize(struct mbuf *control, struct thread *td) 1587 { 1588 struct proc *p = td->td_proc; 1589 struct filedesc *fdescp; 1590 struct cmsghdr *cm = mtod(control, struct cmsghdr *); 1591 struct file **rp; 1592 struct file *fp; 1593 int i, fd, *fdp; 1594 struct cmsgcred *cmcred; 1595 int oldfds; 1596 u_int newlen; 1597 int error; 1598 1599 KKASSERT(p); 1600 1601 if ((cm->cmsg_type != SCM_RIGHTS && cm->cmsg_type != SCM_CREDS) || 1602 cm->cmsg_level != SOL_SOCKET || 1603 CMSG_ALIGN(cm->cmsg_len) != control->m_len) 1604 return EINVAL; 1605 1606 /* 1607 * Fill in credential information. 1608 */ 1609 if (cm->cmsg_type == SCM_CREDS) { 1610 cmcred = (struct cmsgcred *)CMSG_DATA(cm); 1611 cmcred->cmcred_pid = p->p_pid; 1612 cmcred->cmcred_uid = p->p_ucred->cr_ruid; 1613 cmcred->cmcred_gid = p->p_ucred->cr_rgid; 1614 cmcred->cmcred_euid = p->p_ucred->cr_uid; 1615 cmcred->cmcred_ngroups = MIN(p->p_ucred->cr_ngroups, 1616 CMGROUP_MAX); 1617 for (i = 0; i < cmcred->cmcred_ngroups; i++) 1618 cmcred->cmcred_groups[i] = p->p_ucred->cr_groups[i]; 1619 return 0; 1620 } 1621 1622 /* 1623 * cmsghdr may not be aligned, do not allow calculation(s) to 1624 * go negative. 1625 */ 1626 if (cm->cmsg_len < CMSG_LEN(0)) 1627 return EINVAL; 1628 1629 oldfds = (cm->cmsg_len - CMSG_LEN(0)) / sizeof(int); 1630 1631 /* 1632 * Now replace the integer FDs with pointers to 1633 * the associated global file table entry.. 1634 * Allocate a bigger buffer as necessary. But if an cluster is not 1635 * enough, return E2BIG. 1636 */ 1637 newlen = CMSG_LEN(oldfds * sizeof(struct file *)); 1638 if (newlen > MCLBYTES) 1639 return E2BIG; 1640 if (newlen - control->m_len > M_TRAILINGSPACE(control)) { 1641 if (control->m_flags & M_EXT) 1642 return E2BIG; 1643 MCLGET(control, M_WAITOK); 1644 if (!(control->m_flags & M_EXT)) 1645 return ENOBUFS; 1646 1647 /* copy the data to the cluster */ 1648 memcpy(mtod(control, char *), cm, cm->cmsg_len); 1649 cm = mtod(control, struct cmsghdr *); 1650 } 1651 1652 fdescp = p->p_fd; 1653 spin_lock_shared(&fdescp->fd_spin); 1654 1655 /* 1656 * check that all the FDs passed in refer to legal OPEN files 1657 * If not, reject the entire operation. 1658 */ 1659 fdp = (int *)CMSG_DATA(cm); 1660 for (i = 0; i < oldfds; i++) { 1661 fd = *fdp++; 1662 if ((unsigned)fd >= fdescp->fd_nfiles || 1663 fdescp->fd_files[fd].fp == NULL) { 1664 error = EBADF; 1665 goto done; 1666 } 1667 if (fdescp->fd_files[fd].fp->f_type == DTYPE_KQUEUE) { 1668 error = EOPNOTSUPP; 1669 goto done; 1670 } 1671 } 1672 1673 /* 1674 * Adjust length, in case sizeof(struct file *) and sizeof(int) 1675 * differs. 1676 */ 1677 cm->cmsg_len = newlen; 1678 control->m_len = CMSG_ALIGN(newlen); 1679 1680 /* 1681 * Transform the file descriptors into struct file pointers. 1682 * If sizeof (struct file *) is bigger than or equal to sizeof int, 1683 * then do it in reverse order so that the int won't get until 1684 * we're done. 1685 * If sizeof (struct file *) is smaller than sizeof int, then 1686 * do it in forward order. 1687 */ 1688 if (sizeof(struct file *) >= sizeof(int)) { 1689 fdp = (int *)CMSG_DATA(cm) + oldfds - 1; 1690 rp = (struct file **)CMSG_DATA(cm) + oldfds - 1; 1691 for (i = 0; i < oldfds; i++) { 1692 fp = fdescp->fd_files[*fdp--].fp; 1693 *rp-- = fp; 1694 fhold(fp); 1695 spin_lock(&unp_spin); 1696 fp->f_msgcount++; 1697 unp_rights++; 1698 spin_unlock(&unp_spin); 1699 } 1700 } else { 1701 /* 1702 * XXX 1703 * Will this ever happen? I don't think compiler will 1704 * generate code for this code segment -- sephe 1705 */ 1706 fdp = (int *)CMSG_DATA(cm); 1707 rp = (struct file **)CMSG_DATA(cm); 1708 for (i = 0; i < oldfds; i++) { 1709 fp = fdescp->fd_files[*fdp++].fp; 1710 *rp++ = fp; 1711 fhold(fp); 1712 spin_lock(&unp_spin); 1713 fp->f_msgcount++; 1714 unp_rights++; 1715 spin_unlock(&unp_spin); 1716 } 1717 } 1718 error = 0; 1719 done: 1720 spin_unlock_shared(&fdescp->fd_spin); 1721 return error; 1722 } 1723 1724 /* 1725 * Garbage collect in-transit file descriptors that get lost due to 1726 * loops (i.e. when a socket is sent to another process over itself, 1727 * and more complex situations). 1728 * 1729 * NOT MPSAFE - TODO socket flush code and maybe closef. Rest is MPSAFE. 1730 */ 1731 1732 struct unp_gc_info { 1733 struct file **extra_ref; 1734 struct file *locked_fp; 1735 int defer; 1736 int index; 1737 int maxindex; 1738 }; 1739 1740 static void 1741 unp_gc(void) 1742 { 1743 struct unp_gc_info info; 1744 static boolean_t unp_gcing; 1745 struct file **fpp; 1746 int i; 1747 1748 /* 1749 * Only one gc can be in-progress at any given moment 1750 */ 1751 spin_lock(&unp_spin); 1752 if (unp_gcing) { 1753 spin_unlock(&unp_spin); 1754 return; 1755 } 1756 unp_gcing = TRUE; 1757 spin_unlock(&unp_spin); 1758 1759 lwkt_gettoken(&unp_token); 1760 1761 /* 1762 * Before going through all this, set all FDs to be NOT defered 1763 * and NOT externally accessible (not marked). During the scan 1764 * a fd can be marked externally accessible but we may or may not 1765 * be able to immediately process it (controlled by FDEFER). 1766 * 1767 * If we loop sleep a bit. The complexity of the topology can cause 1768 * multiple loops. Also failure to acquire the socket's so_rcv 1769 * token can cause us to loop. 1770 */ 1771 allfiles_scan_exclusive(unp_gc_clearmarks, NULL); 1772 do { 1773 info.defer = 0; 1774 allfiles_scan_exclusive(unp_gc_checkmarks, &info); 1775 if (info.defer) 1776 tsleep(&info, 0, "gcagain", 1); 1777 } while (info.defer); 1778 1779 /* 1780 * We grab an extra reference to each of the file table entries 1781 * that are not otherwise accessible and then free the rights 1782 * that are stored in messages on them. 1783 * 1784 * The bug in the orginal code is a little tricky, so I'll describe 1785 * what's wrong with it here. 1786 * 1787 * It is incorrect to simply unp_discard each entry for f_msgcount 1788 * times -- consider the case of sockets A and B that contain 1789 * references to each other. On a last close of some other socket, 1790 * we trigger a gc since the number of outstanding rights (unp_rights) 1791 * is non-zero. If during the sweep phase the gc code un_discards, 1792 * we end up doing a (full) closef on the descriptor. A closef on A 1793 * results in the following chain. Closef calls soo_close, which 1794 * calls soclose. Soclose calls first (through the switch 1795 * uipc_usrreq) unp_detach, which re-invokes unp_gc. Unp_gc simply 1796 * returns because the previous instance had set unp_gcing, and 1797 * we return all the way back to soclose, which marks the socket 1798 * with SS_NOFDREF, and then calls sofree. Sofree calls sorflush 1799 * to free up the rights that are queued in messages on the socket A, 1800 * i.e., the reference on B. The sorflush calls via the dom_dispose 1801 * switch unp_dispose, which unp_scans with unp_discard. This second 1802 * instance of unp_discard just calls closef on B. 1803 * 1804 * Well, a similar chain occurs on B, resulting in a sorflush on B, 1805 * which results in another closef on A. Unfortunately, A is already 1806 * being closed, and the descriptor has already been marked with 1807 * SS_NOFDREF, and soclose panics at this point. 1808 * 1809 * Here, we first take an extra reference to each inaccessible 1810 * descriptor. Then, we call sorflush ourself, since we know 1811 * it is a Unix domain socket anyhow. After we destroy all the 1812 * rights carried in messages, we do a last closef to get rid 1813 * of our extra reference. This is the last close, and the 1814 * unp_detach etc will shut down the socket. 1815 * 1816 * 91/09/19, bsy@cs.cmu.edu 1817 */ 1818 info.extra_ref = kmalloc(256 * sizeof(struct file *), M_FILE, M_WAITOK); 1819 info.maxindex = 256; 1820 1821 do { 1822 /* 1823 * Look for matches 1824 */ 1825 info.index = 0; 1826 allfiles_scan_exclusive(unp_gc_checkrefs, &info); 1827 1828 /* 1829 * For each FD on our hit list, do the following two things 1830 */ 1831 for (i = info.index, fpp = info.extra_ref; --i >= 0; ++fpp) { 1832 struct file *tfp = *fpp; 1833 if (tfp->f_type == DTYPE_SOCKET && tfp->f_data != NULL) 1834 sorflush((struct socket *)(tfp->f_data)); 1835 } 1836 for (i = info.index, fpp = info.extra_ref; --i >= 0; ++fpp) 1837 closef(*fpp, NULL); 1838 } while (info.index == info.maxindex); 1839 1840 lwkt_reltoken(&unp_token); 1841 1842 kfree((caddr_t)info.extra_ref, M_FILE); 1843 unp_gcing = FALSE; 1844 } 1845 1846 /* 1847 * MPSAFE - NOTE: filehead list and file pointer spinlocked on entry 1848 */ 1849 static int 1850 unp_gc_checkrefs(struct file *fp, void *data) 1851 { 1852 struct unp_gc_info *info = data; 1853 1854 if (fp->f_count == 0) 1855 return(0); 1856 if (info->index == info->maxindex) 1857 return(-1); 1858 1859 /* 1860 * If all refs are from msgs, and it's not marked accessible 1861 * then it must be referenced from some unreachable cycle 1862 * of (shut-down) FDs, so include it in our 1863 * list of FDs to remove 1864 */ 1865 if (fp->f_count == fp->f_msgcount && !(fp->f_flag & FMARK)) { 1866 info->extra_ref[info->index++] = fp; 1867 fhold(fp); 1868 } 1869 return(0); 1870 } 1871 1872 /* 1873 * MPSAFE - NOTE: filehead list and file pointer spinlocked on entry 1874 */ 1875 static int 1876 unp_gc_clearmarks(struct file *fp, void *data __unused) 1877 { 1878 atomic_clear_int(&fp->f_flag, FMARK | FDEFER); 1879 return(0); 1880 } 1881 1882 /* 1883 * MPSAFE - NOTE: filehead list and file pointer spinlocked on entry 1884 */ 1885 static int 1886 unp_gc_checkmarks(struct file *fp, void *data) 1887 { 1888 struct unp_gc_info *info = data; 1889 struct socket *so; 1890 1891 /* 1892 * If the file is not open, skip it. Make sure it isn't marked 1893 * defered or we could loop forever, in case we somehow race 1894 * something. 1895 */ 1896 if (fp->f_count == 0) { 1897 if (fp->f_flag & FDEFER) 1898 atomic_clear_int(&fp->f_flag, FDEFER); 1899 return(0); 1900 } 1901 /* 1902 * If we already marked it as 'defer' in a 1903 * previous pass, then try process it this time 1904 * and un-mark it 1905 */ 1906 if (fp->f_flag & FDEFER) { 1907 atomic_clear_int(&fp->f_flag, FDEFER); 1908 } else { 1909 /* 1910 * if it's not defered, then check if it's 1911 * already marked.. if so skip it 1912 */ 1913 if (fp->f_flag & FMARK) 1914 return(0); 1915 /* 1916 * If all references are from messages 1917 * in transit, then skip it. it's not 1918 * externally accessible. 1919 */ 1920 if (fp->f_count == fp->f_msgcount) 1921 return(0); 1922 /* 1923 * If it got this far then it must be 1924 * externally accessible. 1925 */ 1926 atomic_set_int(&fp->f_flag, FMARK); 1927 } 1928 1929 /* 1930 * either it was defered, or it is externally 1931 * accessible and not already marked so. 1932 * Now check if it is possibly one of OUR sockets. 1933 */ 1934 if (fp->f_type != DTYPE_SOCKET || 1935 (so = (struct socket *)fp->f_data) == NULL) { 1936 return(0); 1937 } 1938 if (so->so_proto->pr_domain != &localdomain || 1939 !(so->so_proto->pr_flags & PR_RIGHTS)) { 1940 return(0); 1941 } 1942 1943 /* 1944 * So, Ok, it's one of our sockets and it IS externally accessible 1945 * (or was defered). Now we look to see if we hold any file 1946 * descriptors in its message buffers. Follow those links and mark 1947 * them as accessible too. 1948 * 1949 * We are holding multiple spinlocks here, if we cannot get the 1950 * token non-blocking defer until the next loop. 1951 */ 1952 info->locked_fp = fp; 1953 if (lwkt_trytoken(&so->so_rcv.ssb_token)) { 1954 unp_scan(so->so_rcv.ssb_mb, unp_mark, info); 1955 lwkt_reltoken(&so->so_rcv.ssb_token); 1956 } else { 1957 atomic_set_int(&fp->f_flag, FDEFER); 1958 ++info->defer; 1959 } 1960 return (0); 1961 } 1962 1963 /* 1964 * Dispose of the fp's stored in a mbuf. 1965 * 1966 * The dds loop can cause additional fps to be entered onto the 1967 * list while it is running, flattening out the operation and avoiding 1968 * a deep kernel stack recursion. 1969 */ 1970 void 1971 unp_dispose(struct mbuf *m) 1972 { 1973 lwkt_gettoken(&unp_token); 1974 if (m) 1975 unp_scan(m, unp_discard, NULL); 1976 lwkt_reltoken(&unp_token); 1977 } 1978 1979 static int 1980 unp_listen(struct unpcb *unp, struct thread *td) 1981 { 1982 struct proc *p = td->td_proc; 1983 1984 ASSERT_LWKT_TOKEN_HELD(&unp_token); 1985 UNP_ASSERT_TOKEN_HELD(unp); 1986 1987 KKASSERT(p); 1988 cru2x(p->p_ucred, &unp->unp_peercred); 1989 unp_setflags(unp, UNP_HAVEPCCACHED); 1990 return (0); 1991 } 1992 1993 static void 1994 unp_scan(struct mbuf *m0, void (*op)(struct file *, void *), void *data) 1995 { 1996 struct mbuf *m; 1997 struct file **rp; 1998 struct cmsghdr *cm; 1999 int i; 2000 int qfds; 2001 2002 while (m0) { 2003 for (m = m0; m; m = m->m_next) { 2004 if (m->m_type == MT_CONTROL && 2005 m->m_len >= sizeof(*cm)) { 2006 cm = mtod(m, struct cmsghdr *); 2007 if (cm->cmsg_level != SOL_SOCKET || 2008 cm->cmsg_type != SCM_RIGHTS) 2009 continue; 2010 qfds = (cm->cmsg_len - CMSG_LEN(0)) / 2011 sizeof(void *); 2012 rp = (struct file **)CMSG_DATA(cm); 2013 for (i = 0; i < qfds; i++) 2014 (*op)(*rp++, data); 2015 break; /* XXX, but saves time */ 2016 } 2017 } 2018 m0 = m0->m_nextpkt; 2019 } 2020 } 2021 2022 /* 2023 * Mark visibility. info->defer is recalculated on every pass. 2024 */ 2025 static void 2026 unp_mark(struct file *fp, void *data) 2027 { 2028 struct unp_gc_info *info = data; 2029 2030 if ((fp->f_flag & FMARK) == 0) { 2031 ++info->defer; 2032 atomic_set_int(&fp->f_flag, FMARK | FDEFER); 2033 } else if (fp->f_flag & FDEFER) { 2034 ++info->defer; 2035 } 2036 } 2037 2038 /* 2039 * Discard a fp previously held in a unix domain socket mbuf. To 2040 * avoid blowing out the kernel stack due to contrived chain-reactions 2041 * we may have to defer the operation to a higher procedural level. 2042 * 2043 * Caller holds unp_token 2044 */ 2045 static void 2046 unp_discard(struct file *fp, void *data __unused) 2047 { 2048 struct unp_defdiscard *d; 2049 2050 spin_lock(&unp_spin); 2051 fp->f_msgcount--; 2052 unp_rights--; 2053 spin_unlock(&unp_spin); 2054 2055 d = kmalloc(sizeof(*d), M_UNPCB, M_WAITOK); 2056 d->fp = fp; 2057 2058 spin_lock(&unp_defdiscard_spin); 2059 SLIST_INSERT_HEAD(&unp_defdiscard_head, d, next); 2060 spin_unlock(&unp_defdiscard_spin); 2061 2062 taskqueue_enqueue(unp_taskqueue, &unp_defdiscard_task); 2063 } 2064 2065 /* 2066 * NOTE: 2067 * unp_token must be held before calling this function to avoid name 2068 * resolution and v_socket accessing races, especially racing against 2069 * the unp_detach(). 2070 * 2071 * NOTE: 2072 * For anyone caring about unconnected unix socket sending performance, 2073 * other approach could be taken... 2074 */ 2075 static int 2076 unp_find_lockref(struct sockaddr *nam, struct thread *td, short type, 2077 struct unpcb **unp_ret) 2078 { 2079 struct proc *p = td->td_proc; 2080 struct sockaddr_un *soun = (struct sockaddr_un *)nam; 2081 struct vnode *vp = NULL; 2082 struct socket *so; 2083 struct unpcb *unp; 2084 int error, len; 2085 struct nlookupdata nd; 2086 char buf[SOCK_MAXADDRLEN]; 2087 2088 ASSERT_LWKT_TOKEN_HELD(&unp_token); 2089 2090 *unp_ret = NULL; 2091 2092 len = nam->sa_len - offsetof(struct sockaddr_un, sun_path); 2093 if (len <= 0) { 2094 error = EINVAL; 2095 goto failed; 2096 } 2097 strncpy(buf, soun->sun_path, len); 2098 buf[len] = 0; 2099 2100 error = nlookup_init(&nd, buf, UIO_SYSSPACE, NLC_FOLLOW); 2101 if (error == 0) 2102 error = nlookup(&nd); 2103 if (error == 0) 2104 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 2105 nlookup_done(&nd); 2106 if (error) { 2107 vp = NULL; 2108 goto failed; 2109 } 2110 2111 if (vp->v_type != VSOCK) { 2112 error = ENOTSOCK; 2113 goto failed; 2114 } 2115 error = VOP_EACCESS(vp, VWRITE, p->p_ucred); 2116 if (error) 2117 goto failed; 2118 so = vp->v_socket; 2119 if (so == NULL) { 2120 error = ECONNREFUSED; 2121 goto failed; 2122 } 2123 if (so->so_type != type) { 2124 error = EPROTOTYPE; 2125 goto failed; 2126 } 2127 2128 /* Lock this unp. */ 2129 unp = unp_getsocktoken(so); 2130 if (!UNP_ISATTACHED(unp)) { 2131 unp_reltoken(unp); 2132 error = ECONNREFUSED; 2133 goto failed; 2134 } 2135 /* And keep this unp referenced. */ 2136 unp_reference(unp); 2137 2138 /* Done! */ 2139 *unp_ret = unp; 2140 error = 0; 2141 failed: 2142 if (vp != NULL) 2143 vput(vp); 2144 return error; 2145 } 2146 2147 static int 2148 unp_connect_pair(struct unpcb *unp, struct unpcb *unp2) 2149 { 2150 struct socket *so = unp->unp_socket; 2151 struct socket *so2 = unp2->unp_socket; 2152 2153 ASSERT_LWKT_TOKEN_HELD(&unp_token); 2154 UNP_ASSERT_TOKEN_HELD(unp); 2155 UNP_ASSERT_TOKEN_HELD(unp2); 2156 2157 KASSERT(so->so_type == so2->so_type, 2158 ("socket type mismatch, so %d, so2 %d", so->so_type, so2->so_type)); 2159 2160 if (!UNP_ISATTACHED(unp)) 2161 return EINVAL; 2162 if (!UNP_ISATTACHED(unp2)) 2163 return ECONNREFUSED; 2164 2165 KASSERT(unp->unp_conn == NULL, ("unp is already connected")); 2166 unp->unp_conn = unp2; 2167 2168 switch (so->so_type) { 2169 case SOCK_DGRAM: 2170 LIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_reflink); 2171 soisconnected(so); 2172 break; 2173 2174 case SOCK_STREAM: 2175 case SOCK_SEQPACKET: 2176 KASSERT(unp2->unp_conn == NULL, ("unp2 is already connected")); 2177 unp2->unp_conn = unp; 2178 soisconnected(so); 2179 soisconnected(so2); 2180 break; 2181 2182 default: 2183 panic("unp_connect_pair: unknown socket type %d", so->so_type); 2184 } 2185 return 0; 2186 } 2187 2188 static void 2189 unp_drop(struct unpcb *unp, int error) 2190 { 2191 struct unpcb *unp2; 2192 2193 ASSERT_LWKT_TOKEN_HELD(&unp_token); 2194 UNP_ASSERT_TOKEN_HELD(unp); 2195 KASSERT(unp->unp_flags & UNP_DETACHED, ("unp is not detached")); 2196 2197 unp_disconnect(unp, error); 2198 2199 while ((unp2 = LIST_FIRST(&unp->unp_refs)) != NULL) { 2200 lwkt_getpooltoken(unp2); 2201 unp_disconnect(unp2, ECONNRESET); 2202 lwkt_relpooltoken(unp2); 2203 } 2204 unp_setflags(unp, UNP_DROPPED); 2205 } 2206 2207 static void 2208 unp_defdiscard_taskfunc(void *arg __unused, int pending __unused) 2209 { 2210 struct unp_defdiscard *d; 2211 2212 spin_lock(&unp_defdiscard_spin); 2213 while ((d = SLIST_FIRST(&unp_defdiscard_head)) != NULL) { 2214 SLIST_REMOVE_HEAD(&unp_defdiscard_head, next); 2215 spin_unlock(&unp_defdiscard_spin); 2216 2217 closef(d->fp, NULL); 2218 kfree(d, M_UNPCB); 2219 2220 spin_lock(&unp_defdiscard_spin); 2221 } 2222 spin_unlock(&unp_defdiscard_spin); 2223 } 2224