1 /* 2 * Copyright (c) 1982, 1986, 1989, 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the University of 16 * California, Berkeley and its contributors. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * From: @(#)uipc_usrreq.c 8.3 (Berkeley) 1/4/94 34 * $FreeBSD: src/sys/kern/uipc_usrreq.c,v 1.54.2.10 2003/03/04 17:28:09 nectar Exp $ 35 * $DragonFly: src/sys/kern/uipc_usrreq.c,v 1.44 2008/09/06 05:44:58 dillon Exp $ 36 */ 37 38 #include <sys/param.h> 39 #include <sys/systm.h> 40 #include <sys/kernel.h> 41 #include <sys/domain.h> 42 #include <sys/fcntl.h> 43 #include <sys/malloc.h> /* XXX must be before <sys/file.h> */ 44 #include <sys/proc.h> 45 #include <sys/file.h> 46 #include <sys/filedesc.h> 47 #include <sys/mbuf.h> 48 #include <sys/nlookup.h> 49 #include <sys/protosw.h> 50 #include <sys/socket.h> 51 #include <sys/socketvar.h> 52 #include <sys/resourcevar.h> 53 #include <sys/stat.h> 54 #include <sys/mount.h> 55 #include <sys/sysctl.h> 56 #include <sys/un.h> 57 #include <sys/unpcb.h> 58 #include <sys/vnode.h> 59 60 #include <sys/file2.h> 61 #include <sys/spinlock2.h> 62 #include <sys/socketvar2.h> 63 #include <sys/msgport2.h> 64 65 typedef struct unp_defdiscard { 66 struct unp_defdiscard *next; 67 struct file *fp; 68 } *unp_defdiscard_t; 69 70 static MALLOC_DEFINE(M_UNPCB, "unpcb", "unpcb struct"); 71 static unp_gen_t unp_gencnt; 72 static u_int unp_count; 73 74 static struct unp_head unp_shead, unp_dhead; 75 76 static struct lwkt_token unp_token = LWKT_TOKEN_INITIALIZER(unp_token); 77 static int unp_defdiscard_nest; 78 static unp_defdiscard_t unp_defdiscard_base; 79 80 /* 81 * Unix communications domain. 82 * 83 * TODO: 84 * RDM 85 * rethink name space problems 86 * need a proper out-of-band 87 * lock pushdown 88 */ 89 static struct sockaddr sun_noname = { sizeof(sun_noname), AF_LOCAL }; 90 static ino_t unp_ino = 1; /* prototype for fake inode numbers */ 91 static struct spinlock unp_ino_spin = SPINLOCK_INITIALIZER(&unp_ino_spin); 92 93 static int unp_attach (struct socket *, struct pru_attach_info *); 94 static void unp_detach (struct unpcb *); 95 static int unp_bind (struct unpcb *,struct sockaddr *, struct thread *); 96 static int unp_connect (struct socket *,struct sockaddr *, 97 struct thread *); 98 static void unp_disconnect (struct unpcb *); 99 static void unp_shutdown (struct unpcb *); 100 static void unp_drop (struct unpcb *, int); 101 static void unp_gc (void); 102 static int unp_gc_clearmarks(struct file *, void *); 103 static int unp_gc_checkmarks(struct file *, void *); 104 static int unp_gc_checkrefs(struct file *, void *); 105 static int unp_revoke_gc_check(struct file *, void *); 106 static void unp_scan (struct mbuf *, void (*)(struct file *, void *), 107 void *data); 108 static void unp_mark (struct file *, void *data); 109 static void unp_discard (struct file *, void *); 110 static int unp_internalize (struct mbuf *, struct thread *); 111 static int unp_listen (struct unpcb *, struct thread *); 112 static void unp_fp_externalize(struct lwp *lp, struct file *fp, int fd); 113 114 /* 115 * NOTE: 116 * Since unp_token will be automaticly released upon execution of 117 * blocking code, we need to reference unp_conn before any possible 118 * blocking code to prevent it from being ripped behind our back. 119 */ 120 121 /* NOTE: unp_token MUST be held */ 122 static __inline void 123 unp_reference(struct unpcb *unp) 124 { 125 atomic_add_int(&unp->unp_refcnt, 1); 126 } 127 128 /* NOTE: unp_token MUST be held */ 129 static __inline void 130 unp_free(struct unpcb *unp) 131 { 132 KKASSERT(unp->unp_refcnt > 0); 133 if (atomic_fetchadd_int(&unp->unp_refcnt, -1) == 1) 134 unp_detach(unp); 135 } 136 137 /* 138 * NOTE: (so) is referenced from soabort*() and netmsg_pru_abort() 139 * will sofree() it when we return. 140 */ 141 static void 142 uipc_abort(netmsg_t msg) 143 { 144 struct unpcb *unp; 145 int error; 146 147 lwkt_gettoken(&unp_token); 148 unp = msg->base.nm_so->so_pcb; 149 if (unp) { 150 unp_drop(unp, ECONNABORTED); 151 unp_free(unp); 152 error = 0; 153 } else { 154 error = EINVAL; 155 } 156 lwkt_reltoken(&unp_token); 157 158 lwkt_replymsg(&msg->lmsg, error); 159 } 160 161 static void 162 uipc_accept(netmsg_t msg) 163 { 164 struct unpcb *unp; 165 int error; 166 167 lwkt_gettoken(&unp_token); 168 unp = msg->base.nm_so->so_pcb; 169 if (unp == NULL) { 170 error = EINVAL; 171 } else { 172 struct unpcb *unp2 = unp->unp_conn; 173 174 /* 175 * Pass back name of connected socket, 176 * if it was bound and we are still connected 177 * (our peer may have closed already!). 178 */ 179 if (unp2 && unp2->unp_addr) { 180 unp_reference(unp2); 181 *msg->accept.nm_nam = dup_sockaddr( 182 (struct sockaddr *)unp2->unp_addr); 183 unp_free(unp2); 184 } else { 185 *msg->accept.nm_nam = dup_sockaddr( 186 (struct sockaddr *)&sun_noname); 187 } 188 error = 0; 189 } 190 lwkt_reltoken(&unp_token); 191 lwkt_replymsg(&msg->lmsg, error); 192 } 193 194 static void 195 uipc_attach(netmsg_t msg) 196 { 197 struct unpcb *unp; 198 int error; 199 200 lwkt_gettoken(&unp_token); 201 unp = msg->base.nm_so->so_pcb; 202 if (unp) 203 error = EISCONN; 204 else 205 error = unp_attach(msg->base.nm_so, msg->attach.nm_ai); 206 lwkt_reltoken(&unp_token); 207 lwkt_replymsg(&msg->lmsg, error); 208 } 209 210 static void 211 uipc_bind(netmsg_t msg) 212 { 213 struct unpcb *unp; 214 int error; 215 216 lwkt_gettoken(&unp_token); 217 unp = msg->base.nm_so->so_pcb; 218 if (unp) 219 error = unp_bind(unp, msg->bind.nm_nam, msg->bind.nm_td); 220 else 221 error = EINVAL; 222 lwkt_reltoken(&unp_token); 223 lwkt_replymsg(&msg->lmsg, error); 224 } 225 226 static void 227 uipc_connect(netmsg_t msg) 228 { 229 struct unpcb *unp; 230 int error; 231 232 lwkt_gettoken(&unp_token); 233 unp = msg->base.nm_so->so_pcb; 234 if (unp) { 235 error = unp_connect(msg->base.nm_so, 236 msg->connect.nm_nam, 237 msg->connect.nm_td); 238 } else { 239 error = EINVAL; 240 } 241 lwkt_reltoken(&unp_token); 242 lwkt_replymsg(&msg->lmsg, error); 243 } 244 245 static void 246 uipc_connect2(netmsg_t msg) 247 { 248 struct unpcb *unp; 249 int error; 250 251 lwkt_gettoken(&unp_token); 252 unp = msg->connect2.nm_so1->so_pcb; 253 if (unp) { 254 error = unp_connect2(msg->connect2.nm_so1, 255 msg->connect2.nm_so2); 256 } else { 257 error = EINVAL; 258 } 259 lwkt_reltoken(&unp_token); 260 lwkt_replymsg(&msg->lmsg, error); 261 } 262 263 /* control is EOPNOTSUPP */ 264 265 static void 266 uipc_detach(netmsg_t msg) 267 { 268 struct unpcb *unp; 269 int error; 270 271 lwkt_gettoken(&unp_token); 272 unp = msg->base.nm_so->so_pcb; 273 if (unp) { 274 unp_free(unp); 275 error = 0; 276 } else { 277 error = EINVAL; 278 } 279 lwkt_reltoken(&unp_token); 280 lwkt_replymsg(&msg->lmsg, error); 281 } 282 283 static void 284 uipc_disconnect(netmsg_t msg) 285 { 286 struct unpcb *unp; 287 int error; 288 289 lwkt_gettoken(&unp_token); 290 unp = msg->base.nm_so->so_pcb; 291 if (unp) { 292 unp_disconnect(unp); 293 error = 0; 294 } else { 295 error = EINVAL; 296 } 297 lwkt_reltoken(&unp_token); 298 lwkt_replymsg(&msg->lmsg, error); 299 } 300 301 static void 302 uipc_listen(netmsg_t msg) 303 { 304 struct unpcb *unp; 305 int error; 306 307 lwkt_gettoken(&unp_token); 308 unp = msg->base.nm_so->so_pcb; 309 if (unp == NULL || unp->unp_vnode == NULL) 310 error = EINVAL; 311 else 312 error = unp_listen(unp, msg->listen.nm_td); 313 lwkt_reltoken(&unp_token); 314 lwkt_replymsg(&msg->lmsg, error); 315 } 316 317 static void 318 uipc_peeraddr(netmsg_t msg) 319 { 320 struct unpcb *unp; 321 int error; 322 323 lwkt_gettoken(&unp_token); 324 unp = msg->base.nm_so->so_pcb; 325 if (unp == NULL) { 326 error = EINVAL; 327 } else if (unp->unp_conn && unp->unp_conn->unp_addr) { 328 struct unpcb *unp2 = unp->unp_conn; 329 330 unp_reference(unp2); 331 *msg->peeraddr.nm_nam = dup_sockaddr( 332 (struct sockaddr *)unp2->unp_addr); 333 unp_free(unp2); 334 error = 0; 335 } else { 336 /* 337 * XXX: It seems that this test always fails even when 338 * connection is established. So, this else clause is 339 * added as workaround to return PF_LOCAL sockaddr. 340 */ 341 *msg->peeraddr.nm_nam = dup_sockaddr( 342 (struct sockaddr *)&sun_noname); 343 error = 0; 344 } 345 lwkt_reltoken(&unp_token); 346 lwkt_replymsg(&msg->lmsg, error); 347 } 348 349 static void 350 uipc_rcvd(netmsg_t msg) 351 { 352 struct unpcb *unp, *unp2; 353 struct socket *so; 354 struct socket *so2; 355 int error; 356 357 lwkt_gettoken(&unp_token); 358 so = msg->base.nm_so; 359 unp = so->so_pcb; 360 if (unp == NULL) { 361 error = EINVAL; 362 goto done; 363 } 364 365 switch (so->so_type) { 366 case SOCK_DGRAM: 367 panic("uipc_rcvd DGRAM?"); 368 /*NOTREACHED*/ 369 case SOCK_STREAM: 370 case SOCK_SEQPACKET: 371 if (unp->unp_conn == NULL) 372 break; 373 unp2 = unp->unp_conn; 374 375 /* 376 * Because we are transfering mbufs directly to the 377 * peer socket we have to use SSB_STOP on the sender 378 * to prevent it from building up infinite mbufs. 379 */ 380 so2 = unp2->unp_socket; 381 if (so->so_rcv.ssb_cc < so2->so_snd.ssb_hiwat && 382 so->so_rcv.ssb_mbcnt < so2->so_snd.ssb_mbmax 383 ) { 384 atomic_clear_int(&so2->so_snd.ssb_flags, SSB_STOP); 385 386 unp_reference(unp2); 387 sowwakeup(so2); 388 unp_free(unp2); 389 } 390 break; 391 default: 392 panic("uipc_rcvd unknown socktype"); 393 /*NOTREACHED*/ 394 } 395 error = 0; 396 done: 397 lwkt_reltoken(&unp_token); 398 lwkt_replymsg(&msg->lmsg, error); 399 } 400 401 /* pru_rcvoob is EOPNOTSUPP */ 402 403 static void 404 uipc_send(netmsg_t msg) 405 { 406 struct unpcb *unp, *unp2; 407 struct socket *so; 408 struct socket *so2; 409 struct mbuf *control; 410 struct mbuf *m; 411 int error = 0; 412 413 lwkt_gettoken(&unp_token); 414 so = msg->base.nm_so; 415 control = msg->send.nm_control; 416 m = msg->send.nm_m; 417 unp = so->so_pcb; 418 419 if (unp == NULL) { 420 error = EINVAL; 421 goto release; 422 } 423 if (msg->send.nm_flags & PRUS_OOB) { 424 error = EOPNOTSUPP; 425 goto release; 426 } 427 428 if (control && (error = unp_internalize(control, msg->send.nm_td))) 429 goto release; 430 431 switch (so->so_type) { 432 case SOCK_DGRAM: 433 { 434 struct sockaddr *from; 435 436 if (msg->send.nm_addr) { 437 if (unp->unp_conn) { 438 error = EISCONN; 439 break; 440 } 441 error = unp_connect(so, 442 msg->send.nm_addr, 443 msg->send.nm_td); 444 if (error) 445 break; 446 } else { 447 if (unp->unp_conn == NULL) { 448 error = ENOTCONN; 449 break; 450 } 451 } 452 unp2 = unp->unp_conn; 453 so2 = unp2->unp_socket; 454 if (unp->unp_addr) 455 from = (struct sockaddr *)unp->unp_addr; 456 else 457 from = &sun_noname; 458 459 unp_reference(unp2); 460 461 lwkt_gettoken(&so2->so_rcv.ssb_token); 462 if (ssb_appendaddr(&so2->so_rcv, from, m, control)) { 463 sorwakeup(so2); 464 m = NULL; 465 control = NULL; 466 } else { 467 error = ENOBUFS; 468 } 469 if (msg->send.nm_addr) 470 unp_disconnect(unp); 471 lwkt_reltoken(&so2->so_rcv.ssb_token); 472 473 unp_free(unp2); 474 break; 475 } 476 477 case SOCK_STREAM: 478 case SOCK_SEQPACKET: 479 /* Connect if not connected yet. */ 480 /* 481 * Note: A better implementation would complain 482 * if not equal to the peer's address. 483 */ 484 if (!(so->so_state & SS_ISCONNECTED)) { 485 if (msg->send.nm_addr) { 486 error = unp_connect(so, 487 msg->send.nm_addr, 488 msg->send.nm_td); 489 if (error) 490 break; /* XXX */ 491 } else { 492 error = ENOTCONN; 493 break; 494 } 495 } 496 497 if (so->so_state & SS_CANTSENDMORE) { 498 error = EPIPE; 499 break; 500 } 501 if (unp->unp_conn == NULL) 502 panic("uipc_send connected but no connection?"); 503 unp2 = unp->unp_conn; 504 so2 = unp2->unp_socket; 505 506 unp_reference(unp2); 507 508 /* 509 * Send to paired receive port, and then reduce 510 * send buffer hiwater marks to maintain backpressure. 511 * Wake up readers. 512 */ 513 lwkt_gettoken(&so2->so_rcv.ssb_token); 514 if (control) { 515 if (ssb_appendcontrol(&so2->so_rcv, m, control)) { 516 control = NULL; 517 m = NULL; 518 } 519 } else if (so->so_type == SOCK_SEQPACKET) { 520 sbappendrecord(&so2->so_rcv.sb, m); 521 m = NULL; 522 } else { 523 sbappend(&so2->so_rcv.sb, m); 524 m = NULL; 525 } 526 527 /* 528 * Because we are transfering mbufs directly to the 529 * peer socket we have to use SSB_STOP on the sender 530 * to prevent it from building up infinite mbufs. 531 */ 532 if (so2->so_rcv.ssb_cc >= so->so_snd.ssb_hiwat || 533 so2->so_rcv.ssb_mbcnt >= so->so_snd.ssb_mbmax 534 ) { 535 atomic_set_int(&so->so_snd.ssb_flags, SSB_STOP); 536 } 537 lwkt_reltoken(&so2->so_rcv.ssb_token); 538 sorwakeup(so2); 539 540 unp_free(unp2); 541 break; 542 543 default: 544 panic("uipc_send unknown socktype"); 545 } 546 547 /* 548 * SEND_EOF is equivalent to a SEND followed by a SHUTDOWN. 549 */ 550 if (msg->send.nm_flags & PRUS_EOF) { 551 socantsendmore(so); 552 unp_shutdown(unp); 553 } 554 555 if (control && error != 0) 556 unp_dispose(control); 557 558 release: 559 lwkt_reltoken(&unp_token); 560 561 if (control) 562 m_freem(control); 563 if (m) 564 m_freem(m); 565 lwkt_replymsg(&msg->lmsg, error); 566 } 567 568 /* 569 * MPSAFE 570 */ 571 static void 572 uipc_sense(netmsg_t msg) 573 { 574 struct unpcb *unp; 575 struct socket *so; 576 struct stat *sb; 577 int error; 578 579 lwkt_gettoken(&unp_token); 580 so = msg->base.nm_so; 581 sb = msg->sense.nm_stat; 582 unp = so->so_pcb; 583 if (unp == NULL) { 584 error = EINVAL; 585 goto done; 586 } 587 sb->st_blksize = so->so_snd.ssb_hiwat; 588 sb->st_dev = NOUDEV; 589 if (unp->unp_ino == 0) { /* make up a non-zero inode number */ 590 spin_lock(&unp_ino_spin); 591 unp->unp_ino = unp_ino++; 592 spin_unlock(&unp_ino_spin); 593 } 594 sb->st_ino = unp->unp_ino; 595 error = 0; 596 done: 597 lwkt_reltoken(&unp_token); 598 lwkt_replymsg(&msg->lmsg, error); 599 } 600 601 static void 602 uipc_shutdown(netmsg_t msg) 603 { 604 struct socket *so; 605 struct unpcb *unp; 606 int error; 607 608 lwkt_gettoken(&unp_token); 609 so = msg->base.nm_so; 610 unp = so->so_pcb; 611 if (unp) { 612 socantsendmore(so); 613 unp_shutdown(unp); 614 error = 0; 615 } else { 616 error = EINVAL; 617 } 618 lwkt_reltoken(&unp_token); 619 lwkt_replymsg(&msg->lmsg, error); 620 } 621 622 static void 623 uipc_sockaddr(netmsg_t msg) 624 { 625 struct unpcb *unp; 626 int error; 627 628 lwkt_gettoken(&unp_token); 629 unp = msg->base.nm_so->so_pcb; 630 if (unp) { 631 if (unp->unp_addr) { 632 *msg->sockaddr.nm_nam = 633 dup_sockaddr((struct sockaddr *)unp->unp_addr); 634 } 635 error = 0; 636 } else { 637 error = EINVAL; 638 } 639 lwkt_reltoken(&unp_token); 640 lwkt_replymsg(&msg->lmsg, error); 641 } 642 643 struct pr_usrreqs uipc_usrreqs = { 644 .pru_abort = uipc_abort, 645 .pru_accept = uipc_accept, 646 .pru_attach = uipc_attach, 647 .pru_bind = uipc_bind, 648 .pru_connect = uipc_connect, 649 .pru_connect2 = uipc_connect2, 650 .pru_control = pr_generic_notsupp, 651 .pru_detach = uipc_detach, 652 .pru_disconnect = uipc_disconnect, 653 .pru_listen = uipc_listen, 654 .pru_peeraddr = uipc_peeraddr, 655 .pru_rcvd = uipc_rcvd, 656 .pru_rcvoob = pr_generic_notsupp, 657 .pru_send = uipc_send, 658 .pru_sense = uipc_sense, 659 .pru_shutdown = uipc_shutdown, 660 .pru_sockaddr = uipc_sockaddr, 661 .pru_sosend = sosend, 662 .pru_soreceive = soreceive 663 }; 664 665 void 666 uipc_ctloutput(netmsg_t msg) 667 { 668 struct socket *so; 669 struct sockopt *sopt; 670 struct unpcb *unp; 671 int error = 0; 672 673 lwkt_gettoken(&unp_token); 674 so = msg->base.nm_so; 675 sopt = msg->ctloutput.nm_sopt; 676 unp = so->so_pcb; 677 678 switch (sopt->sopt_dir) { 679 case SOPT_GET: 680 switch (sopt->sopt_name) { 681 case LOCAL_PEERCRED: 682 if (unp->unp_flags & UNP_HAVEPC) 683 soopt_from_kbuf(sopt, &unp->unp_peercred, 684 sizeof(unp->unp_peercred)); 685 else { 686 if (so->so_type == SOCK_STREAM) 687 error = ENOTCONN; 688 else if (so->so_type == SOCK_SEQPACKET) 689 error = ENOTCONN; 690 else 691 error = EINVAL; 692 } 693 break; 694 default: 695 error = EOPNOTSUPP; 696 break; 697 } 698 break; 699 case SOPT_SET: 700 default: 701 error = EOPNOTSUPP; 702 break; 703 } 704 lwkt_reltoken(&unp_token); 705 lwkt_replymsg(&msg->lmsg, error); 706 } 707 708 /* 709 * Both send and receive buffers are allocated PIPSIZ bytes of buffering 710 * for stream sockets, although the total for sender and receiver is 711 * actually only PIPSIZ. 712 * 713 * Datagram sockets really use the sendspace as the maximum datagram size, 714 * and don't really want to reserve the sendspace. Their recvspace should 715 * be large enough for at least one max-size datagram plus address. 716 * 717 * We want the local send/recv space to be significant larger then lo0's 718 * mtu of 16384. 719 */ 720 #ifndef PIPSIZ 721 #define PIPSIZ 57344 722 #endif 723 static u_long unpst_sendspace = PIPSIZ; 724 static u_long unpst_recvspace = PIPSIZ; 725 static u_long unpdg_sendspace = 2*1024; /* really max datagram size */ 726 static u_long unpdg_recvspace = 4*1024; 727 728 static int unp_rights; /* file descriptors in flight */ 729 static struct spinlock unp_spin = SPINLOCK_INITIALIZER(&unp_spin); 730 731 SYSCTL_DECL(_net_local_seqpacket); 732 SYSCTL_DECL(_net_local_stream); 733 SYSCTL_INT(_net_local_stream, OID_AUTO, sendspace, CTLFLAG_RW, 734 &unpst_sendspace, 0, "Size of stream socket send buffer"); 735 SYSCTL_INT(_net_local_stream, OID_AUTO, recvspace, CTLFLAG_RW, 736 &unpst_recvspace, 0, "Size of stream socket receive buffer"); 737 738 SYSCTL_DECL(_net_local_dgram); 739 SYSCTL_INT(_net_local_dgram, OID_AUTO, maxdgram, CTLFLAG_RW, 740 &unpdg_sendspace, 0, "Max datagram socket size"); 741 SYSCTL_INT(_net_local_dgram, OID_AUTO, recvspace, CTLFLAG_RW, 742 &unpdg_recvspace, 0, "Size of datagram socket receive buffer"); 743 744 SYSCTL_DECL(_net_local); 745 SYSCTL_INT(_net_local, OID_AUTO, inflight, CTLFLAG_RD, &unp_rights, 0, 746 "File descriptors in flight"); 747 748 static int 749 unp_attach(struct socket *so, struct pru_attach_info *ai) 750 { 751 struct unpcb *unp; 752 int error; 753 754 lwkt_gettoken(&unp_token); 755 756 if (so->so_snd.ssb_hiwat == 0 || so->so_rcv.ssb_hiwat == 0) { 757 switch (so->so_type) { 758 759 case SOCK_STREAM: 760 case SOCK_SEQPACKET: 761 error = soreserve(so, unpst_sendspace, unpst_recvspace, 762 ai->sb_rlimit); 763 break; 764 765 case SOCK_DGRAM: 766 error = soreserve(so, unpdg_sendspace, unpdg_recvspace, 767 ai->sb_rlimit); 768 break; 769 770 default: 771 panic("unp_attach"); 772 } 773 if (error) 774 goto failed; 775 } 776 unp = kmalloc(sizeof(*unp), M_UNPCB, M_WAITOK | M_ZERO | M_NULLOK); 777 if (unp == NULL) { 778 error = ENOBUFS; 779 goto failed; 780 } 781 unp->unp_refcnt = 1; 782 unp->unp_gencnt = ++unp_gencnt; 783 unp_count++; 784 LIST_INIT(&unp->unp_refs); 785 unp->unp_socket = so; 786 unp->unp_rvnode = ai->fd_rdir; /* jail cruft XXX JH */ 787 LIST_INSERT_HEAD(so->so_type == SOCK_DGRAM ? &unp_dhead 788 : &unp_shead, unp, unp_link); 789 so->so_pcb = (caddr_t)unp; 790 soreference(so); 791 error = 0; 792 failed: 793 lwkt_reltoken(&unp_token); 794 return error; 795 } 796 797 static void 798 unp_detach(struct unpcb *unp) 799 { 800 struct socket *so; 801 802 lwkt_gettoken(&unp_token); 803 804 LIST_REMOVE(unp, unp_link); 805 unp->unp_gencnt = ++unp_gencnt; 806 --unp_count; 807 if (unp->unp_vnode) { 808 unp->unp_vnode->v_socket = NULL; 809 vrele(unp->unp_vnode); 810 unp->unp_vnode = NULL; 811 } 812 if (unp->unp_conn) 813 unp_disconnect(unp); 814 while (!LIST_EMPTY(&unp->unp_refs)) 815 unp_drop(LIST_FIRST(&unp->unp_refs), ECONNRESET); 816 soisdisconnected(unp->unp_socket); 817 so = unp->unp_socket; 818 soreference(so); /* for delayed sorflush */ 819 so->so_pcb = NULL; 820 unp->unp_socket = NULL; 821 sofree(so); /* remove pcb ref */ 822 823 if (unp_rights) { 824 /* 825 * Normally the receive buffer is flushed later, 826 * in sofree, but if our receive buffer holds references 827 * to descriptors that are now garbage, we will dispose 828 * of those descriptor references after the garbage collector 829 * gets them (resulting in a "panic: closef: count < 0"). 830 */ 831 sorflush(so); 832 unp_gc(); 833 } 834 sofree(so); 835 lwkt_reltoken(&unp_token); 836 837 if (unp->unp_addr) 838 kfree(unp->unp_addr, M_SONAME); 839 kfree(unp, M_UNPCB); 840 } 841 842 static int 843 unp_bind(struct unpcb *unp, struct sockaddr *nam, struct thread *td) 844 { 845 struct proc *p = td->td_proc; 846 struct sockaddr_un *soun = (struct sockaddr_un *)nam; 847 struct vnode *vp; 848 struct vattr vattr; 849 int error, namelen; 850 struct nlookupdata nd; 851 char buf[SOCK_MAXADDRLEN]; 852 853 lwkt_gettoken(&unp_token); 854 if (unp->unp_vnode != NULL) { 855 error = EINVAL; 856 goto failed; 857 } 858 namelen = soun->sun_len - offsetof(struct sockaddr_un, sun_path); 859 if (namelen <= 0) { 860 error = EINVAL; 861 goto failed; 862 } 863 strncpy(buf, soun->sun_path, namelen); 864 buf[namelen] = 0; /* null-terminate the string */ 865 error = nlookup_init(&nd, buf, UIO_SYSSPACE, 866 NLC_LOCKVP | NLC_CREATE | NLC_REFDVP); 867 if (error == 0) 868 error = nlookup(&nd); 869 if (error == 0 && nd.nl_nch.ncp->nc_vp != NULL) 870 error = EADDRINUSE; 871 if (error) 872 goto done; 873 874 VATTR_NULL(&vattr); 875 vattr.va_type = VSOCK; 876 vattr.va_mode = (ACCESSPERMS & ~p->p_fd->fd_cmask); 877 error = VOP_NCREATE(&nd.nl_nch, nd.nl_dvp, &vp, nd.nl_cred, &vattr); 878 if (error == 0) { 879 vp->v_socket = unp->unp_socket; 880 unp->unp_vnode = vp; 881 unp->unp_addr = (struct sockaddr_un *)dup_sockaddr(nam); 882 vn_unlock(vp); 883 } 884 done: 885 nlookup_done(&nd); 886 failed: 887 lwkt_reltoken(&unp_token); 888 return (error); 889 } 890 891 static int 892 unp_connect(struct socket *so, struct sockaddr *nam, struct thread *td) 893 { 894 struct proc *p = td->td_proc; 895 struct sockaddr_un *soun = (struct sockaddr_un *)nam; 896 struct vnode *vp; 897 struct socket *so2, *so3; 898 struct unpcb *unp, *unp2, *unp3; 899 int error, len; 900 struct nlookupdata nd; 901 char buf[SOCK_MAXADDRLEN]; 902 903 lwkt_gettoken(&unp_token); 904 905 len = nam->sa_len - offsetof(struct sockaddr_un, sun_path); 906 if (len <= 0) { 907 error = EINVAL; 908 goto failed; 909 } 910 strncpy(buf, soun->sun_path, len); 911 buf[len] = 0; 912 913 vp = NULL; 914 error = nlookup_init(&nd, buf, UIO_SYSSPACE, NLC_FOLLOW); 915 if (error == 0) 916 error = nlookup(&nd); 917 if (error == 0) 918 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 919 nlookup_done(&nd); 920 if (error) 921 goto failed; 922 923 if (vp->v_type != VSOCK) { 924 error = ENOTSOCK; 925 goto bad; 926 } 927 error = VOP_EACCESS(vp, VWRITE, p->p_ucred); 928 if (error) 929 goto bad; 930 so2 = vp->v_socket; 931 if (so2 == NULL) { 932 error = ECONNREFUSED; 933 goto bad; 934 } 935 if (so->so_type != so2->so_type) { 936 error = EPROTOTYPE; 937 goto bad; 938 } 939 if (so->so_proto->pr_flags & PR_CONNREQUIRED) { 940 if (!(so2->so_options & SO_ACCEPTCONN) || 941 (so3 = sonewconn(so2, 0)) == NULL) { 942 error = ECONNREFUSED; 943 goto bad; 944 } 945 unp = so->so_pcb; 946 unp2 = so2->so_pcb; 947 unp3 = so3->so_pcb; 948 if (unp2->unp_addr) 949 unp3->unp_addr = (struct sockaddr_un *) 950 dup_sockaddr((struct sockaddr *)unp2->unp_addr); 951 952 /* 953 * unp_peercred management: 954 * 955 * The connecter's (client's) credentials are copied 956 * from its process structure at the time of connect() 957 * (which is now). 958 */ 959 cru2x(p->p_ucred, &unp3->unp_peercred); 960 unp3->unp_flags |= UNP_HAVEPC; 961 /* 962 * The receiver's (server's) credentials are copied 963 * from the unp_peercred member of socket on which the 964 * former called listen(); unp_listen() cached that 965 * process's credentials at that time so we can use 966 * them now. 967 */ 968 KASSERT(unp2->unp_flags & UNP_HAVEPCCACHED, 969 ("unp_connect: listener without cached peercred")); 970 memcpy(&unp->unp_peercred, &unp2->unp_peercred, 971 sizeof(unp->unp_peercred)); 972 unp->unp_flags |= UNP_HAVEPC; 973 974 so2 = so3; 975 } 976 error = unp_connect2(so, so2); 977 bad: 978 vput(vp); 979 failed: 980 lwkt_reltoken(&unp_token); 981 return (error); 982 } 983 984 int 985 unp_connect2(struct socket *so, struct socket *so2) 986 { 987 struct unpcb *unp; 988 struct unpcb *unp2; 989 990 lwkt_gettoken(&unp_token); 991 unp = so->so_pcb; 992 if (so2->so_type != so->so_type) { 993 lwkt_reltoken(&unp_token); 994 return (EPROTOTYPE); 995 } 996 unp2 = so2->so_pcb; 997 unp->unp_conn = unp2; 998 999 switch (so->so_type) { 1000 case SOCK_DGRAM: 1001 LIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_reflink); 1002 soisconnected(so); 1003 break; 1004 1005 case SOCK_STREAM: 1006 case SOCK_SEQPACKET: 1007 unp2->unp_conn = unp; 1008 soisconnected(so); 1009 soisconnected(so2); 1010 break; 1011 1012 default: 1013 panic("unp_connect2"); 1014 } 1015 lwkt_reltoken(&unp_token); 1016 return (0); 1017 } 1018 1019 static void 1020 unp_disconnect(struct unpcb *unp) 1021 { 1022 struct unpcb *unp2; 1023 1024 lwkt_gettoken(&unp_token); 1025 1026 unp2 = unp->unp_conn; 1027 if (unp2 == NULL) { 1028 lwkt_reltoken(&unp_token); 1029 return; 1030 } 1031 1032 unp->unp_conn = NULL; 1033 1034 switch (unp->unp_socket->so_type) { 1035 case SOCK_DGRAM: 1036 LIST_REMOVE(unp, unp_reflink); 1037 soclrstate(unp->unp_socket, SS_ISCONNECTED); 1038 break; 1039 1040 case SOCK_STREAM: 1041 case SOCK_SEQPACKET: 1042 unp_reference(unp2); 1043 unp2->unp_conn = NULL; 1044 1045 soisdisconnected(unp->unp_socket); 1046 soisdisconnected(unp2->unp_socket); 1047 1048 unp_free(unp2); 1049 break; 1050 } 1051 lwkt_reltoken(&unp_token); 1052 } 1053 1054 #ifdef notdef 1055 void 1056 unp_abort(struct unpcb *unp) 1057 { 1058 lwkt_gettoken(&unp_token); 1059 unp_free(unp); 1060 lwkt_reltoken(&unp_token); 1061 } 1062 #endif 1063 1064 static int 1065 prison_unpcb(struct thread *td, struct unpcb *unp) 1066 { 1067 struct proc *p; 1068 1069 if (td == NULL) 1070 return (0); 1071 if ((p = td->td_proc) == NULL) 1072 return (0); 1073 if (!p->p_ucred->cr_prison) 1074 return (0); 1075 if (p->p_fd->fd_rdir == unp->unp_rvnode) 1076 return (0); 1077 return (1); 1078 } 1079 1080 static int 1081 unp_pcblist(SYSCTL_HANDLER_ARGS) 1082 { 1083 int error, i, n; 1084 struct unpcb *unp, **unp_list; 1085 unp_gen_t gencnt; 1086 struct unp_head *head; 1087 1088 head = ((intptr_t)arg1 == SOCK_DGRAM ? &unp_dhead : &unp_shead); 1089 1090 KKASSERT(curproc != NULL); 1091 1092 /* 1093 * The process of preparing the PCB list is too time-consuming and 1094 * resource-intensive to repeat twice on every request. 1095 */ 1096 if (req->oldptr == NULL) { 1097 n = unp_count; 1098 req->oldidx = (n + n/8) * sizeof(struct xunpcb); 1099 return 0; 1100 } 1101 1102 if (req->newptr != NULL) 1103 return EPERM; 1104 1105 lwkt_gettoken(&unp_token); 1106 1107 /* 1108 * OK, now we're committed to doing something. 1109 */ 1110 gencnt = unp_gencnt; 1111 n = unp_count; 1112 1113 unp_list = kmalloc(n * sizeof *unp_list, M_TEMP, M_WAITOK); 1114 1115 for (unp = LIST_FIRST(head), i = 0; unp && i < n; 1116 unp = LIST_NEXT(unp, unp_link)) { 1117 if (unp->unp_gencnt <= gencnt && !prison_unpcb(req->td, unp)) 1118 unp_list[i++] = unp; 1119 } 1120 n = i; /* in case we lost some during malloc */ 1121 1122 error = 0; 1123 for (i = 0; i < n; i++) { 1124 unp = unp_list[i]; 1125 if (unp->unp_gencnt <= gencnt) { 1126 struct xunpcb xu; 1127 xu.xu_len = sizeof xu; 1128 xu.xu_unpp = unp; 1129 /* 1130 * XXX - need more locking here to protect against 1131 * connect/disconnect races for SMP. 1132 */ 1133 if (unp->unp_addr) 1134 bcopy(unp->unp_addr, &xu.xu_addr, 1135 unp->unp_addr->sun_len); 1136 if (unp->unp_conn && unp->unp_conn->unp_addr) 1137 bcopy(unp->unp_conn->unp_addr, 1138 &xu.xu_caddr, 1139 unp->unp_conn->unp_addr->sun_len); 1140 bcopy(unp, &xu.xu_unp, sizeof *unp); 1141 sotoxsocket(unp->unp_socket, &xu.xu_socket); 1142 error = SYSCTL_OUT(req, &xu, sizeof xu); 1143 } 1144 } 1145 lwkt_reltoken(&unp_token); 1146 kfree(unp_list, M_TEMP); 1147 1148 return error; 1149 } 1150 1151 SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist, CTLFLAG_RD, 1152 (caddr_t)(long)SOCK_DGRAM, 0, unp_pcblist, "S,xunpcb", 1153 "List of active local datagram sockets"); 1154 SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist, CTLFLAG_RD, 1155 (caddr_t)(long)SOCK_STREAM, 0, unp_pcblist, "S,xunpcb", 1156 "List of active local stream sockets"); 1157 SYSCTL_PROC(_net_local_seqpacket, OID_AUTO, pcblist, CTLFLAG_RD, 1158 (caddr_t)(long)SOCK_SEQPACKET, 0, unp_pcblist, "S,xunpcb", 1159 "List of active local seqpacket stream sockets"); 1160 1161 static void 1162 unp_shutdown(struct unpcb *unp) 1163 { 1164 struct socket *so; 1165 1166 if ((unp->unp_socket->so_type == SOCK_STREAM || 1167 unp->unp_socket->so_type == SOCK_SEQPACKET) && 1168 unp->unp_conn != NULL && (so = unp->unp_conn->unp_socket)) { 1169 socantrcvmore(so); 1170 } 1171 } 1172 1173 static void 1174 unp_drop(struct unpcb *unp, int err) 1175 { 1176 struct socket *so = unp->unp_socket; 1177 1178 so->so_error = err; 1179 unp_disconnect(unp); 1180 } 1181 1182 #ifdef notdef 1183 void 1184 unp_drain(void) 1185 { 1186 lwkt_gettoken(&unp_token); 1187 lwkt_reltoken(&unp_token); 1188 } 1189 #endif 1190 1191 int 1192 unp_externalize(struct mbuf *rights) 1193 { 1194 struct thread *td = curthread; 1195 struct proc *p = td->td_proc; /* XXX */ 1196 struct lwp *lp = td->td_lwp; 1197 struct cmsghdr *cm = mtod(rights, struct cmsghdr *); 1198 int *fdp; 1199 int i; 1200 struct file **rp; 1201 struct file *fp; 1202 int newfds = (cm->cmsg_len - (CMSG_DATA(cm) - (u_char *)cm)) 1203 / sizeof (struct file *); 1204 int f; 1205 1206 lwkt_gettoken(&unp_token); 1207 1208 /* 1209 * if the new FD's will not fit, then we free them all 1210 */ 1211 if (!fdavail(p, newfds)) { 1212 rp = (struct file **)CMSG_DATA(cm); 1213 for (i = 0; i < newfds; i++) { 1214 fp = *rp; 1215 /* 1216 * zero the pointer before calling unp_discard, 1217 * since it may end up in unp_gc().. 1218 */ 1219 *rp++ = 0; 1220 unp_discard(fp, NULL); 1221 } 1222 lwkt_reltoken(&unp_token); 1223 return (EMSGSIZE); 1224 } 1225 1226 /* 1227 * now change each pointer to an fd in the global table to 1228 * an integer that is the index to the local fd table entry 1229 * that we set up to point to the global one we are transferring. 1230 * If sizeof (struct file *) is bigger than or equal to sizeof int, 1231 * then do it in forward order. In that case, an integer will 1232 * always come in the same place or before its corresponding 1233 * struct file pointer. 1234 * If sizeof (struct file *) is smaller than sizeof int, then 1235 * do it in reverse order. 1236 */ 1237 if (sizeof (struct file *) >= sizeof (int)) { 1238 fdp = (int *)CMSG_DATA(cm); 1239 rp = (struct file **)CMSG_DATA(cm); 1240 for (i = 0; i < newfds; i++) { 1241 if (fdalloc(p, 0, &f)) 1242 panic("unp_externalize"); 1243 fp = *rp++; 1244 unp_fp_externalize(lp, fp, f); 1245 *fdp++ = f; 1246 } 1247 } else { 1248 fdp = (int *)CMSG_DATA(cm) + newfds - 1; 1249 rp = (struct file **)CMSG_DATA(cm) + newfds - 1; 1250 for (i = 0; i < newfds; i++) { 1251 if (fdalloc(p, 0, &f)) 1252 panic("unp_externalize"); 1253 fp = *rp--; 1254 unp_fp_externalize(lp, fp, f); 1255 *fdp-- = f; 1256 } 1257 } 1258 1259 /* 1260 * Adjust length, in case sizeof(struct file *) and sizeof(int) 1261 * differs. 1262 */ 1263 cm->cmsg_len = CMSG_LEN(newfds * sizeof(int)); 1264 rights->m_len = cm->cmsg_len; 1265 1266 lwkt_reltoken(&unp_token); 1267 return (0); 1268 } 1269 1270 static void 1271 unp_fp_externalize(struct lwp *lp, struct file *fp, int fd) 1272 { 1273 struct file *fx; 1274 int error; 1275 1276 lwkt_gettoken(&unp_token); 1277 1278 if (lp) { 1279 KKASSERT(fd >= 0); 1280 if (fp->f_flag & FREVOKED) { 1281 kprintf("Warning: revoked fp exiting unix socket\n"); 1282 fx = NULL; 1283 error = falloc(lp, &fx, NULL); 1284 if (error == 0) 1285 fsetfd(lp->lwp_proc->p_fd, fx, fd); 1286 else 1287 fsetfd(lp->lwp_proc->p_fd, NULL, fd); 1288 fdrop(fx); 1289 } else { 1290 fsetfd(lp->lwp_proc->p_fd, fp, fd); 1291 } 1292 } 1293 spin_lock(&unp_spin); 1294 fp->f_msgcount--; 1295 unp_rights--; 1296 spin_unlock(&unp_spin); 1297 fdrop(fp); 1298 1299 lwkt_reltoken(&unp_token); 1300 } 1301 1302 1303 void 1304 unp_init(void) 1305 { 1306 LIST_INIT(&unp_dhead); 1307 LIST_INIT(&unp_shead); 1308 spin_init(&unp_spin); 1309 } 1310 1311 static int 1312 unp_internalize(struct mbuf *control, struct thread *td) 1313 { 1314 struct proc *p = td->td_proc; 1315 struct filedesc *fdescp; 1316 struct cmsghdr *cm = mtod(control, struct cmsghdr *); 1317 struct file **rp; 1318 struct file *fp; 1319 int i, fd, *fdp; 1320 struct cmsgcred *cmcred; 1321 int oldfds; 1322 u_int newlen; 1323 int error; 1324 1325 KKASSERT(p); 1326 lwkt_gettoken(&unp_token); 1327 1328 fdescp = p->p_fd; 1329 if ((cm->cmsg_type != SCM_RIGHTS && cm->cmsg_type != SCM_CREDS) || 1330 cm->cmsg_level != SOL_SOCKET || 1331 CMSG_ALIGN(cm->cmsg_len) != control->m_len) { 1332 error = EINVAL; 1333 goto done; 1334 } 1335 1336 /* 1337 * Fill in credential information. 1338 */ 1339 if (cm->cmsg_type == SCM_CREDS) { 1340 cmcred = (struct cmsgcred *)CMSG_DATA(cm); 1341 cmcred->cmcred_pid = p->p_pid; 1342 cmcred->cmcred_uid = p->p_ucred->cr_ruid; 1343 cmcred->cmcred_gid = p->p_ucred->cr_rgid; 1344 cmcred->cmcred_euid = p->p_ucred->cr_uid; 1345 cmcred->cmcred_ngroups = MIN(p->p_ucred->cr_ngroups, 1346 CMGROUP_MAX); 1347 for (i = 0; i < cmcred->cmcred_ngroups; i++) 1348 cmcred->cmcred_groups[i] = p->p_ucred->cr_groups[i]; 1349 error = 0; 1350 goto done; 1351 } 1352 1353 /* 1354 * cmsghdr may not be aligned, do not allow calculation(s) to 1355 * go negative. 1356 */ 1357 if (cm->cmsg_len < CMSG_LEN(0)) { 1358 error = EINVAL; 1359 goto done; 1360 } 1361 1362 oldfds = (cm->cmsg_len - CMSG_LEN(0)) / sizeof (int); 1363 1364 /* 1365 * check that all the FDs passed in refer to legal OPEN files 1366 * If not, reject the entire operation. 1367 */ 1368 fdp = (int *)CMSG_DATA(cm); 1369 for (i = 0; i < oldfds; i++) { 1370 fd = *fdp++; 1371 if ((unsigned)fd >= fdescp->fd_nfiles || 1372 fdescp->fd_files[fd].fp == NULL) { 1373 error = EBADF; 1374 goto done; 1375 } 1376 if (fdescp->fd_files[fd].fp->f_type == DTYPE_KQUEUE) { 1377 error = EOPNOTSUPP; 1378 goto done; 1379 } 1380 } 1381 /* 1382 * Now replace the integer FDs with pointers to 1383 * the associated global file table entry.. 1384 * Allocate a bigger buffer as necessary. But if an cluster is not 1385 * enough, return E2BIG. 1386 */ 1387 newlen = CMSG_LEN(oldfds * sizeof(struct file *)); 1388 if (newlen > MCLBYTES) { 1389 error = E2BIG; 1390 goto done; 1391 } 1392 if (newlen - control->m_len > M_TRAILINGSPACE(control)) { 1393 if (control->m_flags & M_EXT) { 1394 error = E2BIG; 1395 goto done; 1396 } 1397 MCLGET(control, MB_WAIT); 1398 if (!(control->m_flags & M_EXT)) { 1399 error = ENOBUFS; 1400 goto done; 1401 } 1402 1403 /* copy the data to the cluster */ 1404 memcpy(mtod(control, char *), cm, cm->cmsg_len); 1405 cm = mtod(control, struct cmsghdr *); 1406 } 1407 1408 /* 1409 * Adjust length, in case sizeof(struct file *) and sizeof(int) 1410 * differs. 1411 */ 1412 cm->cmsg_len = newlen; 1413 control->m_len = CMSG_ALIGN(newlen); 1414 1415 /* 1416 * Transform the file descriptors into struct file pointers. 1417 * If sizeof (struct file *) is bigger than or equal to sizeof int, 1418 * then do it in reverse order so that the int won't get until 1419 * we're done. 1420 * If sizeof (struct file *) is smaller than sizeof int, then 1421 * do it in forward order. 1422 */ 1423 if (sizeof (struct file *) >= sizeof (int)) { 1424 fdp = (int *)CMSG_DATA(cm) + oldfds - 1; 1425 rp = (struct file **)CMSG_DATA(cm) + oldfds - 1; 1426 for (i = 0; i < oldfds; i++) { 1427 fp = fdescp->fd_files[*fdp--].fp; 1428 *rp-- = fp; 1429 fhold(fp); 1430 spin_lock(&unp_spin); 1431 fp->f_msgcount++; 1432 unp_rights++; 1433 spin_unlock(&unp_spin); 1434 } 1435 } else { 1436 fdp = (int *)CMSG_DATA(cm); 1437 rp = (struct file **)CMSG_DATA(cm); 1438 for (i = 0; i < oldfds; i++) { 1439 fp = fdescp->fd_files[*fdp++].fp; 1440 *rp++ = fp; 1441 fhold(fp); 1442 spin_lock(&unp_spin); 1443 fp->f_msgcount++; 1444 unp_rights++; 1445 spin_unlock(&unp_spin); 1446 } 1447 } 1448 error = 0; 1449 done: 1450 lwkt_reltoken(&unp_token); 1451 return error; 1452 } 1453 1454 /* 1455 * Garbage collect in-transit file descriptors that get lost due to 1456 * loops (i.e. when a socket is sent to another process over itself, 1457 * and more complex situations). 1458 * 1459 * NOT MPSAFE - TODO socket flush code and maybe closef. Rest is MPSAFE. 1460 */ 1461 1462 struct unp_gc_info { 1463 struct file **extra_ref; 1464 struct file *locked_fp; 1465 int defer; 1466 int index; 1467 int maxindex; 1468 }; 1469 1470 static void 1471 unp_gc(void) 1472 { 1473 struct unp_gc_info info; 1474 static boolean_t unp_gcing; 1475 struct file **fpp; 1476 int i; 1477 1478 /* 1479 * Only one gc can be in-progress at any given moment 1480 */ 1481 spin_lock(&unp_spin); 1482 if (unp_gcing) { 1483 spin_unlock(&unp_spin); 1484 return; 1485 } 1486 unp_gcing = TRUE; 1487 spin_unlock(&unp_spin); 1488 1489 lwkt_gettoken(&unp_token); 1490 1491 /* 1492 * Before going through all this, set all FDs to be NOT defered 1493 * and NOT externally accessible (not marked). During the scan 1494 * a fd can be marked externally accessible but we may or may not 1495 * be able to immediately process it (controlled by FDEFER). 1496 * 1497 * If we loop sleep a bit. The complexity of the topology can cause 1498 * multiple loops. Also failure to acquire the socket's so_rcv 1499 * token can cause us to loop. 1500 */ 1501 allfiles_scan_exclusive(unp_gc_clearmarks, NULL); 1502 do { 1503 info.defer = 0; 1504 allfiles_scan_exclusive(unp_gc_checkmarks, &info); 1505 if (info.defer) 1506 tsleep(&info, 0, "gcagain", 1); 1507 } while (info.defer); 1508 1509 /* 1510 * We grab an extra reference to each of the file table entries 1511 * that are not otherwise accessible and then free the rights 1512 * that are stored in messages on them. 1513 * 1514 * The bug in the orginal code is a little tricky, so I'll describe 1515 * what's wrong with it here. 1516 * 1517 * It is incorrect to simply unp_discard each entry for f_msgcount 1518 * times -- consider the case of sockets A and B that contain 1519 * references to each other. On a last close of some other socket, 1520 * we trigger a gc since the number of outstanding rights (unp_rights) 1521 * is non-zero. If during the sweep phase the gc code un_discards, 1522 * we end up doing a (full) closef on the descriptor. A closef on A 1523 * results in the following chain. Closef calls soo_close, which 1524 * calls soclose. Soclose calls first (through the switch 1525 * uipc_usrreq) unp_detach, which re-invokes unp_gc. Unp_gc simply 1526 * returns because the previous instance had set unp_gcing, and 1527 * we return all the way back to soclose, which marks the socket 1528 * with SS_NOFDREF, and then calls sofree. Sofree calls sorflush 1529 * to free up the rights that are queued in messages on the socket A, 1530 * i.e., the reference on B. The sorflush calls via the dom_dispose 1531 * switch unp_dispose, which unp_scans with unp_discard. This second 1532 * instance of unp_discard just calls closef on B. 1533 * 1534 * Well, a similar chain occurs on B, resulting in a sorflush on B, 1535 * which results in another closef on A. Unfortunately, A is already 1536 * being closed, and the descriptor has already been marked with 1537 * SS_NOFDREF, and soclose panics at this point. 1538 * 1539 * Here, we first take an extra reference to each inaccessible 1540 * descriptor. Then, we call sorflush ourself, since we know 1541 * it is a Unix domain socket anyhow. After we destroy all the 1542 * rights carried in messages, we do a last closef to get rid 1543 * of our extra reference. This is the last close, and the 1544 * unp_detach etc will shut down the socket. 1545 * 1546 * 91/09/19, bsy@cs.cmu.edu 1547 */ 1548 info.extra_ref = kmalloc(256 * sizeof(struct file *), M_FILE, M_WAITOK); 1549 info.maxindex = 256; 1550 1551 do { 1552 /* 1553 * Look for matches 1554 */ 1555 info.index = 0; 1556 allfiles_scan_exclusive(unp_gc_checkrefs, &info); 1557 1558 /* 1559 * For each FD on our hit list, do the following two things 1560 */ 1561 for (i = info.index, fpp = info.extra_ref; --i >= 0; ++fpp) { 1562 struct file *tfp = *fpp; 1563 if (tfp->f_type == DTYPE_SOCKET && tfp->f_data != NULL) 1564 sorflush((struct socket *)(tfp->f_data)); 1565 } 1566 for (i = info.index, fpp = info.extra_ref; --i >= 0; ++fpp) 1567 closef(*fpp, NULL); 1568 } while (info.index == info.maxindex); 1569 1570 lwkt_reltoken(&unp_token); 1571 1572 kfree((caddr_t)info.extra_ref, M_FILE); 1573 unp_gcing = FALSE; 1574 } 1575 1576 /* 1577 * MPSAFE - NOTE: filehead list and file pointer spinlocked on entry 1578 */ 1579 static int 1580 unp_gc_checkrefs(struct file *fp, void *data) 1581 { 1582 struct unp_gc_info *info = data; 1583 1584 if (fp->f_count == 0) 1585 return(0); 1586 if (info->index == info->maxindex) 1587 return(-1); 1588 1589 /* 1590 * If all refs are from msgs, and it's not marked accessible 1591 * then it must be referenced from some unreachable cycle 1592 * of (shut-down) FDs, so include it in our 1593 * list of FDs to remove 1594 */ 1595 if (fp->f_count == fp->f_msgcount && !(fp->f_flag & FMARK)) { 1596 info->extra_ref[info->index++] = fp; 1597 fhold(fp); 1598 } 1599 return(0); 1600 } 1601 1602 /* 1603 * MPSAFE - NOTE: filehead list and file pointer spinlocked on entry 1604 */ 1605 static int 1606 unp_gc_clearmarks(struct file *fp, void *data __unused) 1607 { 1608 atomic_clear_int(&fp->f_flag, FMARK | FDEFER); 1609 return(0); 1610 } 1611 1612 /* 1613 * MPSAFE - NOTE: filehead list and file pointer spinlocked on entry 1614 */ 1615 static int 1616 unp_gc_checkmarks(struct file *fp, void *data) 1617 { 1618 struct unp_gc_info *info = data; 1619 struct socket *so; 1620 1621 /* 1622 * If the file is not open, skip it. Make sure it isn't marked 1623 * defered or we could loop forever, in case we somehow race 1624 * something. 1625 */ 1626 if (fp->f_count == 0) { 1627 if (fp->f_flag & FDEFER) 1628 atomic_clear_int(&fp->f_flag, FDEFER); 1629 return(0); 1630 } 1631 /* 1632 * If we already marked it as 'defer' in a 1633 * previous pass, then try process it this time 1634 * and un-mark it 1635 */ 1636 if (fp->f_flag & FDEFER) { 1637 atomic_clear_int(&fp->f_flag, FDEFER); 1638 } else { 1639 /* 1640 * if it's not defered, then check if it's 1641 * already marked.. if so skip it 1642 */ 1643 if (fp->f_flag & FMARK) 1644 return(0); 1645 /* 1646 * If all references are from messages 1647 * in transit, then skip it. it's not 1648 * externally accessible. 1649 */ 1650 if (fp->f_count == fp->f_msgcount) 1651 return(0); 1652 /* 1653 * If it got this far then it must be 1654 * externally accessible. 1655 */ 1656 atomic_set_int(&fp->f_flag, FMARK); 1657 } 1658 1659 /* 1660 * either it was defered, or it is externally 1661 * accessible and not already marked so. 1662 * Now check if it is possibly one of OUR sockets. 1663 */ 1664 if (fp->f_type != DTYPE_SOCKET || 1665 (so = (struct socket *)fp->f_data) == NULL) { 1666 return(0); 1667 } 1668 if (so->so_proto->pr_domain != &localdomain || 1669 !(so->so_proto->pr_flags & PR_RIGHTS)) { 1670 return(0); 1671 } 1672 1673 /* 1674 * So, Ok, it's one of our sockets and it IS externally accessible 1675 * (or was defered). Now we look to see if we hold any file 1676 * descriptors in its message buffers. Follow those links and mark 1677 * them as accessible too. 1678 * 1679 * We are holding multiple spinlocks here, if we cannot get the 1680 * token non-blocking defer until the next loop. 1681 */ 1682 info->locked_fp = fp; 1683 if (lwkt_trytoken(&so->so_rcv.ssb_token)) { 1684 unp_scan(so->so_rcv.ssb_mb, unp_mark, info); 1685 lwkt_reltoken(&so->so_rcv.ssb_token); 1686 } else { 1687 atomic_set_int(&fp->f_flag, FDEFER); 1688 ++info->defer; 1689 } 1690 return (0); 1691 } 1692 1693 /* 1694 * Scan all unix domain sockets and replace any revoked file pointers 1695 * found with the dummy file pointer fx. We don't worry about races 1696 * against file pointers being read out as those are handled in the 1697 * externalize code. 1698 */ 1699 1700 #define REVOKE_GC_MAXFILES 32 1701 1702 struct unp_revoke_gc_info { 1703 struct file *fx; 1704 struct file *fary[REVOKE_GC_MAXFILES]; 1705 int fcount; 1706 }; 1707 1708 void 1709 unp_revoke_gc(struct file *fx) 1710 { 1711 struct unp_revoke_gc_info info; 1712 int i; 1713 1714 lwkt_gettoken(&unp_token); 1715 info.fx = fx; 1716 do { 1717 info.fcount = 0; 1718 allfiles_scan_exclusive(unp_revoke_gc_check, &info); 1719 for (i = 0; i < info.fcount; ++i) 1720 unp_fp_externalize(NULL, info.fary[i], -1); 1721 } while (info.fcount == REVOKE_GC_MAXFILES); 1722 lwkt_reltoken(&unp_token); 1723 } 1724 1725 /* 1726 * Check for and replace revoked descriptors. 1727 * 1728 * WARNING: This routine is not allowed to block. 1729 */ 1730 static int 1731 unp_revoke_gc_check(struct file *fps, void *vinfo) 1732 { 1733 struct unp_revoke_gc_info *info = vinfo; 1734 struct file *fp; 1735 struct socket *so; 1736 struct mbuf *m0; 1737 struct mbuf *m; 1738 struct file **rp; 1739 struct cmsghdr *cm; 1740 int i; 1741 int qfds; 1742 1743 /* 1744 * Is this a unix domain socket with rights-passing abilities? 1745 */ 1746 if (fps->f_type != DTYPE_SOCKET) 1747 return (0); 1748 if ((so = (struct socket *)fps->f_data) == NULL) 1749 return(0); 1750 if (so->so_proto->pr_domain != &localdomain) 1751 return(0); 1752 if ((so->so_proto->pr_flags & PR_RIGHTS) == 0) 1753 return(0); 1754 1755 /* 1756 * Scan the mbufs for control messages and replace any revoked 1757 * descriptors we find. 1758 */ 1759 lwkt_gettoken(&so->so_rcv.ssb_token); 1760 m0 = so->so_rcv.ssb_mb; 1761 while (m0) { 1762 for (m = m0; m; m = m->m_next) { 1763 if (m->m_type != MT_CONTROL) 1764 continue; 1765 if (m->m_len < sizeof(*cm)) 1766 continue; 1767 cm = mtod(m, struct cmsghdr *); 1768 if (cm->cmsg_level != SOL_SOCKET || 1769 cm->cmsg_type != SCM_RIGHTS) { 1770 continue; 1771 } 1772 qfds = (cm->cmsg_len - CMSG_LEN(0)) / sizeof(void *); 1773 rp = (struct file **)CMSG_DATA(cm); 1774 for (i = 0; i < qfds; i++) { 1775 fp = rp[i]; 1776 if (fp->f_flag & FREVOKED) { 1777 kprintf("Warning: Removing revoked fp from unix domain socket queue\n"); 1778 fhold(info->fx); 1779 info->fx->f_msgcount++; 1780 unp_rights++; 1781 rp[i] = info->fx; 1782 info->fary[info->fcount++] = fp; 1783 } 1784 if (info->fcount == REVOKE_GC_MAXFILES) 1785 break; 1786 } 1787 if (info->fcount == REVOKE_GC_MAXFILES) 1788 break; 1789 } 1790 m0 = m0->m_nextpkt; 1791 if (info->fcount == REVOKE_GC_MAXFILES) 1792 break; 1793 } 1794 lwkt_reltoken(&so->so_rcv.ssb_token); 1795 1796 /* 1797 * Stop the scan if we filled up our array. 1798 */ 1799 if (info->fcount == REVOKE_GC_MAXFILES) 1800 return(-1); 1801 return(0); 1802 } 1803 1804 /* 1805 * Dispose of the fp's stored in a mbuf. 1806 * 1807 * The dds loop can cause additional fps to be entered onto the 1808 * list while it is running, flattening out the operation and avoiding 1809 * a deep kernel stack recursion. 1810 */ 1811 void 1812 unp_dispose(struct mbuf *m) 1813 { 1814 unp_defdiscard_t dds; 1815 1816 lwkt_gettoken(&unp_token); 1817 ++unp_defdiscard_nest; 1818 if (m) { 1819 unp_scan(m, unp_discard, NULL); 1820 } 1821 if (unp_defdiscard_nest == 1) { 1822 while ((dds = unp_defdiscard_base) != NULL) { 1823 unp_defdiscard_base = dds->next; 1824 closef(dds->fp, NULL); 1825 kfree(dds, M_UNPCB); 1826 } 1827 } 1828 --unp_defdiscard_nest; 1829 lwkt_reltoken(&unp_token); 1830 } 1831 1832 static int 1833 unp_listen(struct unpcb *unp, struct thread *td) 1834 { 1835 struct proc *p = td->td_proc; 1836 1837 KKASSERT(p); 1838 lwkt_gettoken(&unp_token); 1839 cru2x(p->p_ucred, &unp->unp_peercred); 1840 unp->unp_flags |= UNP_HAVEPCCACHED; 1841 lwkt_reltoken(&unp_token); 1842 return (0); 1843 } 1844 1845 static void 1846 unp_scan(struct mbuf *m0, void (*op)(struct file *, void *), void *data) 1847 { 1848 struct mbuf *m; 1849 struct file **rp; 1850 struct cmsghdr *cm; 1851 int i; 1852 int qfds; 1853 1854 while (m0) { 1855 for (m = m0; m; m = m->m_next) { 1856 if (m->m_type == MT_CONTROL && 1857 m->m_len >= sizeof(*cm)) { 1858 cm = mtod(m, struct cmsghdr *); 1859 if (cm->cmsg_level != SOL_SOCKET || 1860 cm->cmsg_type != SCM_RIGHTS) 1861 continue; 1862 qfds = (cm->cmsg_len - CMSG_LEN(0)) / 1863 sizeof(void *); 1864 rp = (struct file **)CMSG_DATA(cm); 1865 for (i = 0; i < qfds; i++) 1866 (*op)(*rp++, data); 1867 break; /* XXX, but saves time */ 1868 } 1869 } 1870 m0 = m0->m_nextpkt; 1871 } 1872 } 1873 1874 /* 1875 * Mark visibility. info->defer is recalculated on every pass. 1876 */ 1877 static void 1878 unp_mark(struct file *fp, void *data) 1879 { 1880 struct unp_gc_info *info = data; 1881 1882 if ((fp->f_flag & FMARK) == 0) { 1883 ++info->defer; 1884 atomic_set_int(&fp->f_flag, FMARK | FDEFER); 1885 } else if (fp->f_flag & FDEFER) { 1886 ++info->defer; 1887 } 1888 } 1889 1890 /* 1891 * Discard a fp previously held in a unix domain socket mbuf. To 1892 * avoid blowing out the kernel stack due to contrived chain-reactions 1893 * we may have to defer the operation to a higher procedural level. 1894 * 1895 * Caller holds unp_token 1896 */ 1897 static void 1898 unp_discard(struct file *fp, void *data __unused) 1899 { 1900 unp_defdiscard_t dds; 1901 1902 spin_lock(&unp_spin); 1903 fp->f_msgcount--; 1904 unp_rights--; 1905 spin_unlock(&unp_spin); 1906 1907 if (unp_defdiscard_nest) { 1908 dds = kmalloc(sizeof(*dds), M_UNPCB, M_WAITOK|M_ZERO); 1909 dds->fp = fp; 1910 dds->next = unp_defdiscard_base; 1911 unp_defdiscard_base = dds; 1912 } else { 1913 closef(fp, NULL); 1914 } 1915 } 1916 1917