1 /* $OpenBSD: kern_descrip.c,v 1.134 2016/08/25 00:00:02 dlg Exp $ */ 2 /* $NetBSD: kern_descrip.c,v 1.42 1996/03/30 22:24:38 christos Exp $ */ 3 4 /* 5 * Copyright (c) 1982, 1986, 1989, 1991, 1993 6 * The Regents of the University of California. All rights reserved. 7 * (c) UNIX System Laboratories, Inc. 8 * All or some portions of this file are derived from material licensed 9 * to the University of California by American Telephone and Telegraph 10 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 11 * the permission of UNIX System Laboratories, Inc. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * @(#)kern_descrip.c 8.6 (Berkeley) 4/19/94 38 */ 39 40 #include <sys/param.h> 41 #include <sys/systm.h> 42 #include <sys/filedesc.h> 43 #include <sys/kernel.h> 44 #include <sys/vnode.h> 45 #include <sys/proc.h> 46 #include <sys/file.h> 47 #include <sys/socket.h> 48 #include <sys/socketvar.h> 49 #include <sys/stat.h> 50 #include <sys/ioctl.h> 51 #include <sys/fcntl.h> 52 #include <sys/lock.h> 53 #include <sys/malloc.h> 54 #include <sys/syslog.h> 55 #include <sys/ucred.h> 56 #include <sys/unistd.h> 57 #include <sys/resourcevar.h> 58 #include <sys/mount.h> 59 #include <sys/syscallargs.h> 60 #include <sys/event.h> 61 #include <sys/pool.h> 62 #include <sys/ktrace.h> 63 #include <sys/pledge.h> 64 65 #include <sys/pipe.h> 66 67 /* 68 * Descriptor management. 69 */ 70 struct filelist filehead; /* head of list of open files */ 71 int numfiles; /* actual number of open files */ 72 73 static __inline void fd_used(struct filedesc *, int); 74 static __inline void fd_unused(struct filedesc *, int); 75 static __inline int find_next_zero(u_int *, int, u_int); 76 int finishdup(struct proc *, struct file *, int, int, register_t *, int); 77 int find_last_set(struct filedesc *, int); 78 int dodup3(struct proc *, int, int, int, register_t *); 79 80 struct pool file_pool; 81 struct pool fdesc_pool; 82 83 void 84 filedesc_init(void) 85 { 86 pool_init(&file_pool, sizeof(struct file), 0, 0, PR_WAITOK, 87 "filepl", NULL); 88 pool_setipl(&file_pool, IPL_NONE); 89 pool_init(&fdesc_pool, sizeof(struct filedesc0), 0, 0, PR_WAITOK, 90 "fdescpl", NULL); 91 pool_setipl(&fdesc_pool, IPL_NONE); 92 LIST_INIT(&filehead); 93 } 94 95 static __inline int 96 find_next_zero (u_int *bitmap, int want, u_int bits) 97 { 98 int i, off, maxoff; 99 u_int sub; 100 101 if (want > bits) 102 return -1; 103 104 off = want >> NDENTRYSHIFT; 105 i = want & NDENTRYMASK; 106 if (i) { 107 sub = bitmap[off] | ((u_int)~0 >> (NDENTRIES - i)); 108 if (sub != ~0) 109 goto found; 110 off++; 111 } 112 113 maxoff = NDLOSLOTS(bits); 114 while (off < maxoff) { 115 if ((sub = bitmap[off]) != ~0) 116 goto found; 117 off++; 118 } 119 120 return -1; 121 122 found: 123 return (off << NDENTRYSHIFT) + ffs(~sub) - 1; 124 } 125 126 int 127 find_last_set(struct filedesc *fd, int last) 128 { 129 int off, i; 130 struct file **ofiles = fd->fd_ofiles; 131 u_int *bitmap = fd->fd_lomap; 132 133 off = (last - 1) >> NDENTRYSHIFT; 134 135 while (off >= 0 && !bitmap[off]) 136 off--; 137 if (off < 0) 138 return 0; 139 140 i = ((off + 1) << NDENTRYSHIFT) - 1; 141 if (i >= last) 142 i = last - 1; 143 144 while (i > 0 && ofiles[i] == NULL) 145 i--; 146 return i; 147 } 148 149 static __inline void 150 fd_used(struct filedesc *fdp, int fd) 151 { 152 u_int off = fd >> NDENTRYSHIFT; 153 154 fdp->fd_lomap[off] |= 1 << (fd & NDENTRYMASK); 155 if (fdp->fd_lomap[off] == ~0) 156 fdp->fd_himap[off >> NDENTRYSHIFT] |= 1 << (off & NDENTRYMASK); 157 158 if (fd > fdp->fd_lastfile) 159 fdp->fd_lastfile = fd; 160 fdp->fd_openfd++; 161 } 162 163 static __inline void 164 fd_unused(struct filedesc *fdp, int fd) 165 { 166 u_int off = fd >> NDENTRYSHIFT; 167 168 if (fd < fdp->fd_freefile) 169 fdp->fd_freefile = fd; 170 171 if (fdp->fd_lomap[off] == ~0) 172 fdp->fd_himap[off >> NDENTRYSHIFT] &= ~(1 << (off & NDENTRYMASK)); 173 fdp->fd_lomap[off] &= ~(1 << (fd & NDENTRYMASK)); 174 175 #ifdef DIAGNOSTIC 176 if (fd > fdp->fd_lastfile) 177 panic("fd_unused: fd_lastfile inconsistent"); 178 #endif 179 if (fd == fdp->fd_lastfile) 180 fdp->fd_lastfile = find_last_set(fdp, fd); 181 fdp->fd_openfd--; 182 } 183 184 struct file * 185 fd_getfile(struct filedesc *fdp, int fd) 186 { 187 struct file *fp; 188 189 if ((u_int)fd >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[fd]) == NULL) 190 return (NULL); 191 192 if (!FILE_IS_USABLE(fp)) 193 return (NULL); 194 195 return (fp); 196 } 197 198 struct file * 199 fd_getfile_mode(struct filedesc *fdp, int fd, int mode) 200 { 201 struct file *fp; 202 203 KASSERT(mode != 0); 204 205 fp = fd_getfile(fdp, fd); 206 207 if (fp == NULL || (fp->f_flag & mode) == 0) 208 return (NULL); 209 210 return (fp); 211 } 212 213 /* 214 * System calls on descriptors. 215 */ 216 217 /* 218 * Duplicate a file descriptor. 219 */ 220 int 221 sys_dup(struct proc *p, void *v, register_t *retval) 222 { 223 struct sys_dup_args /* { 224 syscallarg(int) fd; 225 } */ *uap = v; 226 struct filedesc *fdp = p->p_fd; 227 int old = SCARG(uap, fd); 228 struct file *fp; 229 int new; 230 int error; 231 232 restart: 233 if ((fp = fd_getfile(fdp, old)) == NULL) 234 return (EBADF); 235 FREF(fp); 236 fdplock(fdp); 237 if ((error = fdalloc(p, 0, &new)) != 0) { 238 FRELE(fp, p); 239 if (error == ENOSPC) { 240 fdexpand(p); 241 fdpunlock(fdp); 242 goto restart; 243 } 244 goto out; 245 } 246 error = finishdup(p, fp, old, new, retval, 0); 247 248 out: 249 fdpunlock(fdp); 250 return (error); 251 } 252 253 /* 254 * Duplicate a file descriptor to a particular value. 255 */ 256 int 257 sys_dup2(struct proc *p, void *v, register_t *retval) 258 { 259 struct sys_dup2_args /* { 260 syscallarg(int) from; 261 syscallarg(int) to; 262 } */ *uap = v; 263 264 return (dodup3(p, SCARG(uap, from), SCARG(uap, to), 0, retval)); 265 } 266 267 int 268 sys_dup3(struct proc *p, void *v, register_t *retval) 269 { 270 struct sys_dup3_args /* { 271 syscallarg(int) from; 272 syscallarg(int) to; 273 syscallarg(int) flags; 274 } */ *uap = v; 275 276 if (SCARG(uap, from) == SCARG(uap, to)) 277 return (EINVAL); 278 if (SCARG(uap, flags) & ~O_CLOEXEC) 279 return (EINVAL); 280 return (dodup3(p, SCARG(uap, from), SCARG(uap, to), 281 SCARG(uap, flags), retval)); 282 } 283 284 int 285 dodup3(struct proc *p, int old, int new, int flags, register_t *retval) 286 { 287 struct filedesc *fdp = p->p_fd; 288 struct file *fp; 289 int i, error; 290 291 restart: 292 if ((fp = fd_getfile(fdp, old)) == NULL) 293 return (EBADF); 294 if ((u_int)new >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur || 295 (u_int)new >= maxfiles) 296 return (EBADF); 297 if (old == new) { 298 /* 299 * NOTE! This doesn't clear the close-on-exec flag. This might 300 * or might not be the intended behavior from the start, but 301 * this is what everyone else does. 302 */ 303 *retval = new; 304 return (0); 305 } 306 FREF(fp); 307 fdplock(fdp); 308 if (new >= fdp->fd_nfiles) { 309 if ((error = fdalloc(p, new, &i)) != 0) { 310 FRELE(fp, p); 311 if (error == ENOSPC) { 312 fdexpand(p); 313 fdpunlock(fdp); 314 goto restart; 315 } 316 goto out; 317 } 318 if (new != i) 319 panic("dup2: fdalloc"); 320 fd_unused(fdp, new); 321 } 322 /* finishdup() does FRELE */ 323 error = finishdup(p, fp, old, new, retval, 1); 324 if (!error && flags & O_CLOEXEC) 325 fdp->fd_ofileflags[new] |= UF_EXCLOSE; 326 327 out: 328 fdpunlock(fdp); 329 return (error); 330 } 331 332 /* 333 * The file control system call. 334 */ 335 int 336 sys_fcntl(struct proc *p, void *v, register_t *retval) 337 { 338 struct sys_fcntl_args /* { 339 syscallarg(int) fd; 340 syscallarg(int) cmd; 341 syscallarg(void *) arg; 342 } */ *uap = v; 343 int fd = SCARG(uap, fd); 344 struct filedesc *fdp = p->p_fd; 345 struct file *fp; 346 struct vnode *vp; 347 int i, tmp, newmin, flg = F_POSIX; 348 struct flock fl; 349 int error = 0; 350 351 error = pledge_fcntl(p, SCARG(uap, cmd)); 352 if (error) 353 return (error); 354 355 restart: 356 if ((fp = fd_getfile(fdp, fd)) == NULL) 357 return (EBADF); 358 FREF(fp); 359 switch (SCARG(uap, cmd)) { 360 361 case F_DUPFD: 362 case F_DUPFD_CLOEXEC: 363 newmin = (long)SCARG(uap, arg); 364 if ((u_int)newmin >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur || 365 (u_int)newmin >= maxfiles) { 366 error = EINVAL; 367 break; 368 } 369 fdplock(fdp); 370 if ((error = fdalloc(p, newmin, &i)) != 0) { 371 FRELE(fp, p); 372 if (error == ENOSPC) { 373 fdexpand(p); 374 fdpunlock(fdp); 375 goto restart; 376 } 377 } else { 378 /* finishdup will FRELE for us. */ 379 error = finishdup(p, fp, fd, i, retval, 0); 380 381 if (!error && SCARG(uap, cmd) == F_DUPFD_CLOEXEC) 382 fdp->fd_ofileflags[i] |= UF_EXCLOSE; 383 } 384 385 fdpunlock(fdp); 386 return (error); 387 388 case F_GETFD: 389 *retval = fdp->fd_ofileflags[fd] & UF_EXCLOSE ? 1 : 0; 390 break; 391 392 case F_SETFD: 393 fdplock(fdp); 394 if ((long)SCARG(uap, arg) & 1) 395 fdp->fd_ofileflags[fd] |= UF_EXCLOSE; 396 else 397 fdp->fd_ofileflags[fd] &= ~UF_EXCLOSE; 398 fdpunlock(fdp); 399 break; 400 401 case F_GETFL: 402 *retval = OFLAGS(fp->f_flag); 403 break; 404 405 case F_ISATTY: 406 vp = fp->f_data; 407 if (fp->f_type == DTYPE_VNODE && (vp->v_flag & VISTTY)) 408 *retval = 1; 409 else { 410 *retval = 0; 411 error = ENOTTY; 412 } 413 break; 414 415 case F_SETFL: 416 fp->f_flag &= ~FCNTLFLAGS; 417 fp->f_flag |= FFLAGS((long)SCARG(uap, arg)) & FCNTLFLAGS; 418 tmp = fp->f_flag & FNONBLOCK; 419 error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p); 420 if (error) 421 break; 422 tmp = fp->f_flag & FASYNC; 423 error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, (caddr_t)&tmp, p); 424 if (!error) 425 break; 426 fp->f_flag &= ~FNONBLOCK; 427 tmp = 0; 428 (void) (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p); 429 break; 430 431 case F_GETOWN: 432 if (fp->f_type == DTYPE_SOCKET) { 433 *retval = ((struct socket *)fp->f_data)->so_pgid; 434 break; 435 } 436 if (fp->f_type == DTYPE_PIPE) { 437 *retval = ((struct pipe *)fp->f_data)->pipe_pgid; 438 break; 439 } 440 error = (*fp->f_ops->fo_ioctl) 441 (fp, TIOCGPGRP, (caddr_t)&tmp, p); 442 *retval = -tmp; 443 break; 444 445 case F_SETOWN: 446 if (fp->f_type == DTYPE_SOCKET) { 447 struct socket *so = fp->f_data; 448 449 so->so_pgid = (long)SCARG(uap, arg); 450 so->so_siguid = p->p_ucred->cr_ruid; 451 so->so_sigeuid = p->p_ucred->cr_uid; 452 break; 453 } 454 if (fp->f_type == DTYPE_PIPE) { 455 struct pipe *mpipe = fp->f_data; 456 457 mpipe->pipe_pgid = (long)SCARG(uap, arg); 458 break; 459 } 460 if ((long)SCARG(uap, arg) <= 0) { 461 SCARG(uap, arg) = (void *)(-(long)SCARG(uap, arg)); 462 } else { 463 struct process *pr1 = prfind((long)SCARG(uap, arg)); 464 if (pr1 == 0) { 465 error = ESRCH; 466 break; 467 } 468 SCARG(uap, arg) = (void *)(long)pr1->ps_pgrp->pg_id; 469 } 470 error = ((*fp->f_ops->fo_ioctl) 471 (fp, TIOCSPGRP, (caddr_t)&SCARG(uap, arg), p)); 472 break; 473 474 case F_SETLKW: 475 flg |= F_WAIT; 476 /* FALLTHROUGH */ 477 478 case F_SETLK: 479 error = pledge_flock(p); 480 if (error != 0) 481 break; 482 483 if (fp->f_type != DTYPE_VNODE) { 484 error = EBADF; 485 break; 486 } 487 vp = fp->f_data; 488 /* Copy in the lock structure */ 489 error = copyin((caddr_t)SCARG(uap, arg), (caddr_t)&fl, 490 sizeof (fl)); 491 if (error) 492 break; 493 if (fl.l_whence == SEEK_CUR) { 494 if (fl.l_start == 0 && fl.l_len < 0) { 495 /* lockf(3) compliance hack */ 496 fl.l_len = -fl.l_len; 497 fl.l_start = fp->f_offset - fl.l_len; 498 } else 499 fl.l_start += fp->f_offset; 500 } 501 switch (fl.l_type) { 502 503 case F_RDLCK: 504 if ((fp->f_flag & FREAD) == 0) { 505 error = EBADF; 506 goto out; 507 } 508 atomic_setbits_int(&fdp->fd_flags, FD_ADVLOCK); 509 error = VOP_ADVLOCK(vp, fdp, F_SETLK, &fl, flg); 510 break; 511 512 case F_WRLCK: 513 if ((fp->f_flag & FWRITE) == 0) { 514 error = EBADF; 515 goto out; 516 } 517 atomic_setbits_int(&fdp->fd_flags, FD_ADVLOCK); 518 error = VOP_ADVLOCK(vp, fdp, F_SETLK, &fl, flg); 519 break; 520 521 case F_UNLCK: 522 error = VOP_ADVLOCK(vp, fdp, F_UNLCK, &fl, F_POSIX); 523 goto out; 524 525 default: 526 error = EINVAL; 527 goto out; 528 } 529 530 if (fp != fd_getfile(fdp, fd)) { 531 /* 532 * We have lost the race with close() or dup2(); 533 * unlock, pretend that we've won the race and that 534 * lock had been removed by close() 535 */ 536 fl.l_whence = SEEK_SET; 537 fl.l_start = 0; 538 fl.l_len = 0; 539 VOP_ADVLOCK(vp, fdp, F_UNLCK, &fl, F_POSIX); 540 fl.l_type = F_UNLCK; 541 } 542 goto out; 543 544 545 case F_GETLK: 546 error = pledge_flock(p); 547 if (error != 0) 548 break; 549 550 if (fp->f_type != DTYPE_VNODE) { 551 error = EBADF; 552 break; 553 } 554 vp = fp->f_data; 555 /* Copy in the lock structure */ 556 error = copyin((caddr_t)SCARG(uap, arg), (caddr_t)&fl, 557 sizeof (fl)); 558 if (error) 559 break; 560 if (fl.l_whence == SEEK_CUR) { 561 if (fl.l_start == 0 && fl.l_len < 0) { 562 /* lockf(3) compliance hack */ 563 fl.l_len = -fl.l_len; 564 fl.l_start = fp->f_offset - fl.l_len; 565 } else 566 fl.l_start += fp->f_offset; 567 } 568 if (fl.l_type != F_RDLCK && 569 fl.l_type != F_WRLCK && 570 fl.l_type != F_UNLCK && 571 fl.l_type != 0) { 572 error = EINVAL; 573 break; 574 } 575 error = VOP_ADVLOCK(vp, fdp, F_GETLK, &fl, F_POSIX); 576 if (error) 577 break; 578 error = (copyout((caddr_t)&fl, (caddr_t)SCARG(uap, arg), 579 sizeof (fl))); 580 break; 581 582 default: 583 error = EINVAL; 584 break; 585 } 586 out: 587 FRELE(fp, p); 588 return (error); 589 } 590 591 /* 592 * Common code for dup, dup2, and fcntl(F_DUPFD). 593 */ 594 int 595 finishdup(struct proc *p, struct file *fp, int old, int new, 596 register_t *retval, int dup2) 597 { 598 struct file *oldfp; 599 struct filedesc *fdp = p->p_fd; 600 601 fdpassertlocked(fdp); 602 if (fp->f_count == LONG_MAX-2) { 603 FRELE(fp, p); 604 return (EDEADLK); 605 } 606 607 /* 608 * Don't fd_getfile here. We want to closef LARVAL files and 609 * closef can deal with that. 610 */ 611 oldfp = fdp->fd_ofiles[new]; 612 if (oldfp != NULL) 613 FREF(oldfp); 614 615 fdp->fd_ofiles[new] = fp; 616 fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] & ~UF_EXCLOSE; 617 fp->f_count++; 618 FRELE(fp, p); 619 if (dup2 && oldfp == NULL) 620 fd_used(fdp, new); 621 *retval = new; 622 623 if (oldfp != NULL) { 624 if (new < fdp->fd_knlistsize) 625 knote_fdclose(p, new); 626 closef(oldfp, p); 627 } 628 629 return (0); 630 } 631 632 void 633 fdremove(struct filedesc *fdp, int fd) 634 { 635 fdpassertlocked(fdp); 636 fdp->fd_ofiles[fd] = NULL; 637 fd_unused(fdp, fd); 638 } 639 640 int 641 fdrelease(struct proc *p, int fd) 642 { 643 struct filedesc *fdp = p->p_fd; 644 struct file **fpp, *fp; 645 646 fdpassertlocked(fdp); 647 648 /* 649 * Don't fd_getfile here. We want to closef LARVAL files and closef 650 * can deal with that. 651 */ 652 fpp = &fdp->fd_ofiles[fd]; 653 fp = *fpp; 654 if (fp == NULL) 655 return (EBADF); 656 FREF(fp); 657 *fpp = NULL; 658 fd_unused(fdp, fd); 659 if (fd < fdp->fd_knlistsize) 660 knote_fdclose(p, fd); 661 return (closef(fp, p)); 662 } 663 664 /* 665 * Close a file descriptor. 666 */ 667 int 668 sys_close(struct proc *p, void *v, register_t *retval) 669 { 670 struct sys_close_args /* { 671 syscallarg(int) fd; 672 } */ *uap = v; 673 int fd = SCARG(uap, fd), error; 674 struct filedesc *fdp = p->p_fd; 675 676 if (fd_getfile(fdp, fd) == NULL) 677 return (EBADF); 678 fdplock(fdp); 679 error = fdrelease(p, fd); 680 fdpunlock(fdp); 681 682 return (error); 683 } 684 685 /* 686 * Return status information about a file descriptor. 687 */ 688 int 689 sys_fstat(struct proc *p, void *v, register_t *retval) 690 { 691 struct sys_fstat_args /* { 692 syscallarg(int) fd; 693 syscallarg(struct stat *) sb; 694 } */ *uap = v; 695 int fd = SCARG(uap, fd); 696 struct filedesc *fdp = p->p_fd; 697 struct file *fp; 698 struct stat ub; 699 int error; 700 701 if ((fp = fd_getfile(fdp, fd)) == NULL) 702 return (EBADF); 703 FREF(fp); 704 error = (*fp->f_ops->fo_stat)(fp, &ub, p); 705 FRELE(fp, p); 706 if (error == 0) { 707 /* 708 * Don't let non-root see generation numbers 709 * (for NFS security) 710 */ 711 if (suser(p, 0)) 712 ub.st_gen = 0; 713 error = copyout((caddr_t)&ub, (caddr_t)SCARG(uap, sb), 714 sizeof (ub)); 715 } 716 #ifdef KTRACE 717 if (error == 0 && KTRPOINT(p, KTR_STRUCT)) 718 ktrstat(p, &ub); 719 #endif 720 return (error); 721 } 722 723 /* 724 * Return pathconf information about a file descriptor. 725 */ 726 int 727 sys_fpathconf(struct proc *p, void *v, register_t *retval) 728 { 729 struct sys_fpathconf_args /* { 730 syscallarg(int) fd; 731 syscallarg(int) name; 732 } */ *uap = v; 733 int fd = SCARG(uap, fd); 734 struct filedesc *fdp = p->p_fd; 735 struct file *fp; 736 struct vnode *vp; 737 int error; 738 739 if ((fp = fd_getfile(fdp, fd)) == NULL) 740 return (EBADF); 741 FREF(fp); 742 switch (fp->f_type) { 743 case DTYPE_PIPE: 744 case DTYPE_SOCKET: 745 if (SCARG(uap, name) != _PC_PIPE_BUF) { 746 error = EINVAL; 747 break; 748 } 749 *retval = PIPE_BUF; 750 error = 0; 751 break; 752 753 case DTYPE_VNODE: 754 vp = fp->f_data; 755 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); 756 error = VOP_PATHCONF(vp, SCARG(uap, name), retval); 757 VOP_UNLOCK(vp, p); 758 break; 759 760 default: 761 error = EOPNOTSUPP; 762 break; 763 } 764 FRELE(fp, p); 765 return (error); 766 } 767 768 /* 769 * Allocate a file descriptor for the process. 770 */ 771 int 772 fdalloc(struct proc *p, int want, int *result) 773 { 774 struct filedesc *fdp = p->p_fd; 775 int lim, last, i; 776 u_int new, off; 777 778 /* 779 * Search for a free descriptor starting at the higher 780 * of want or fd_freefile. If that fails, consider 781 * expanding the ofile array. 782 */ 783 restart: 784 lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles); 785 last = min(fdp->fd_nfiles, lim); 786 if ((i = want) < fdp->fd_freefile) 787 i = fdp->fd_freefile; 788 off = i >> NDENTRYSHIFT; 789 new = find_next_zero(fdp->fd_himap, off, 790 (last + NDENTRIES - 1) >> NDENTRYSHIFT); 791 if (new != -1) { 792 i = find_next_zero(&fdp->fd_lomap[new], 793 new > off ? 0 : i & NDENTRYMASK, 794 NDENTRIES); 795 if (i == -1) { 796 /* 797 * Free file descriptor in this block was 798 * below want, try again with higher want. 799 */ 800 want = (new + 1) << NDENTRYSHIFT; 801 goto restart; 802 } 803 i += (new << NDENTRYSHIFT); 804 if (i < last) { 805 fd_used(fdp, i); 806 if (want <= fdp->fd_freefile) 807 fdp->fd_freefile = i; 808 *result = i; 809 fdp->fd_ofileflags[i] = 0; 810 return (0); 811 } 812 } 813 if (fdp->fd_nfiles >= lim) 814 return (EMFILE); 815 816 return (ENOSPC); 817 } 818 819 void 820 fdexpand(struct proc *p) 821 { 822 struct filedesc *fdp = p->p_fd; 823 int nfiles; 824 size_t copylen; 825 struct file **newofile; 826 char *newofileflags; 827 u_int *newhimap, *newlomap; 828 829 fdpassertlocked(fdp); 830 831 /* 832 * No space in current array. 833 */ 834 if (fdp->fd_nfiles < NDEXTENT) 835 nfiles = NDEXTENT; 836 else 837 nfiles = 2 * fdp->fd_nfiles; 838 839 newofile = mallocarray(nfiles, OFILESIZE, M_FILEDESC, M_WAITOK); 840 newofileflags = (char *) &newofile[nfiles]; 841 842 /* 843 * Copy the existing ofile and ofileflags arrays 844 * and zero the new portion of each array. 845 */ 846 copylen = sizeof(struct file *) * fdp->fd_nfiles; 847 memcpy(newofile, fdp->fd_ofiles, copylen); 848 memset((char *)newofile + copylen, 0, 849 nfiles * sizeof(struct file *) - copylen); 850 copylen = sizeof(char) * fdp->fd_nfiles; 851 memcpy(newofileflags, fdp->fd_ofileflags, copylen); 852 memset(newofileflags + copylen, 0, nfiles * sizeof(char) - copylen); 853 854 if (fdp->fd_nfiles > NDFILE) 855 free(fdp->fd_ofiles, M_FILEDESC, fdp->fd_nfiles * OFILESIZE); 856 857 if (NDHISLOTS(nfiles) > NDHISLOTS(fdp->fd_nfiles)) { 858 newhimap = mallocarray(NDHISLOTS(nfiles), sizeof(u_int), 859 M_FILEDESC, M_WAITOK); 860 newlomap = mallocarray(NDLOSLOTS(nfiles), sizeof(u_int), 861 M_FILEDESC, M_WAITOK); 862 863 copylen = NDHISLOTS(fdp->fd_nfiles) * sizeof(u_int); 864 memcpy(newhimap, fdp->fd_himap, copylen); 865 memset((char *)newhimap + copylen, 0, 866 NDHISLOTS(nfiles) * sizeof(u_int) - copylen); 867 868 copylen = NDLOSLOTS(fdp->fd_nfiles) * sizeof(u_int); 869 memcpy(newlomap, fdp->fd_lomap, copylen); 870 memset((char *)newlomap + copylen, 0, 871 NDLOSLOTS(nfiles) * sizeof(u_int) - copylen); 872 873 if (NDHISLOTS(fdp->fd_nfiles) > NDHISLOTS(NDFILE)) { 874 free(fdp->fd_himap, M_FILEDESC, 875 NDHISLOTS(fdp->fd_nfiles) * sizeof(u_int)); 876 free(fdp->fd_lomap, M_FILEDESC, 877 NDLOSLOTS(fdp->fd_nfiles) * sizeof(u_int)); 878 } 879 fdp->fd_himap = newhimap; 880 fdp->fd_lomap = newlomap; 881 } 882 fdp->fd_ofiles = newofile; 883 fdp->fd_ofileflags = newofileflags; 884 fdp->fd_nfiles = nfiles; 885 } 886 887 /* 888 * Create a new open file structure and allocate 889 * a file descriptor for the process that refers to it. 890 */ 891 int 892 falloc(struct proc *p, struct file **resultfp, int *resultfd) 893 { 894 struct file *fp, *fq; 895 int error, i; 896 897 fdpassertlocked(p->p_fd); 898 restart: 899 if ((error = fdalloc(p, 0, &i)) != 0) { 900 if (error == ENOSPC) { 901 fdexpand(p); 902 goto restart; 903 } 904 return (error); 905 } 906 if (numfiles >= maxfiles) { 907 fd_unused(p->p_fd, i); 908 tablefull("file"); 909 return (ENFILE); 910 } 911 /* 912 * Allocate a new file descriptor. 913 * If the process has file descriptor zero open, add to the list 914 * of open files at that point, otherwise put it at the front of 915 * the list of open files. 916 */ 917 numfiles++; 918 fp = pool_get(&file_pool, PR_WAITOK|PR_ZERO); 919 fp->f_iflags = FIF_LARVAL; 920 if ((fq = p->p_fd->fd_ofiles[0]) != NULL) { 921 LIST_INSERT_AFTER(fq, fp, f_list); 922 } else { 923 LIST_INSERT_HEAD(&filehead, fp, f_list); 924 } 925 p->p_fd->fd_ofiles[i] = fp; 926 fp->f_count = 1; 927 fp->f_cred = p->p_ucred; 928 crhold(fp->f_cred); 929 if (resultfp) 930 *resultfp = fp; 931 if (resultfd) 932 *resultfd = i; 933 FREF(fp); 934 return (0); 935 } 936 937 /* 938 * Build a new filedesc structure. 939 */ 940 struct filedesc * 941 fdinit(void) 942 { 943 struct filedesc0 *newfdp; 944 945 newfdp = pool_get(&fdesc_pool, PR_WAITOK|PR_ZERO); 946 rw_init(&newfdp->fd_fd.fd_lock, "fdlock"); 947 948 /* Create the file descriptor table. */ 949 newfdp->fd_fd.fd_refcnt = 1; 950 newfdp->fd_fd.fd_cmask = S_IWGRP|S_IWOTH; 951 newfdp->fd_fd.fd_ofiles = newfdp->fd_dfiles; 952 newfdp->fd_fd.fd_ofileflags = newfdp->fd_dfileflags; 953 newfdp->fd_fd.fd_nfiles = NDFILE; 954 newfdp->fd_fd.fd_himap = newfdp->fd_dhimap; 955 newfdp->fd_fd.fd_lomap = newfdp->fd_dlomap; 956 newfdp->fd_fd.fd_knlistsize = -1; 957 958 newfdp->fd_fd.fd_freefile = 0; 959 newfdp->fd_fd.fd_lastfile = 0; 960 961 return (&newfdp->fd_fd); 962 } 963 964 /* 965 * Share a filedesc structure. 966 */ 967 struct filedesc * 968 fdshare(struct process *pr) 969 { 970 pr->ps_fd->fd_refcnt++; 971 return (pr->ps_fd); 972 } 973 974 /* 975 * Copy a filedesc structure. 976 */ 977 struct filedesc * 978 fdcopy(struct process *pr) 979 { 980 struct filedesc *newfdp, *fdp = pr->ps_fd; 981 struct file **fpp; 982 int i; 983 984 fdplock(fdp); 985 newfdp = pool_get(&fdesc_pool, PR_WAITOK); 986 memcpy(newfdp, fdp, sizeof(struct filedesc)); 987 if (newfdp->fd_cdir) 988 vref(newfdp->fd_cdir); 989 if (newfdp->fd_rdir) 990 vref(newfdp->fd_rdir); 991 newfdp->fd_refcnt = 1; 992 rw_init(&newfdp->fd_lock, "fdlock"); 993 994 /* 995 * If the number of open files fits in the internal arrays 996 * of the open file structure, use them, otherwise allocate 997 * additional memory for the number of descriptors currently 998 * in use. 999 */ 1000 if (newfdp->fd_lastfile < NDFILE) { 1001 newfdp->fd_ofiles = ((struct filedesc0 *) newfdp)->fd_dfiles; 1002 newfdp->fd_ofileflags = 1003 ((struct filedesc0 *) newfdp)->fd_dfileflags; 1004 i = NDFILE; 1005 } else { 1006 /* 1007 * Compute the smallest multiple of NDEXTENT needed 1008 * for the file descriptors currently in use, 1009 * allowing the table to shrink. 1010 */ 1011 i = newfdp->fd_nfiles; 1012 while (i >= 2 * NDEXTENT && i > newfdp->fd_lastfile * 2) 1013 i /= 2; 1014 newfdp->fd_ofiles = mallocarray(i, OFILESIZE, M_FILEDESC, M_WAITOK); 1015 newfdp->fd_ofileflags = (char *) &newfdp->fd_ofiles[i]; 1016 } 1017 if (NDHISLOTS(i) <= NDHISLOTS(NDFILE)) { 1018 newfdp->fd_himap = 1019 ((struct filedesc0 *) newfdp)->fd_dhimap; 1020 newfdp->fd_lomap = 1021 ((struct filedesc0 *) newfdp)->fd_dlomap; 1022 } else { 1023 newfdp->fd_himap = mallocarray(NDHISLOTS(i), sizeof(u_int), 1024 M_FILEDESC, M_WAITOK); 1025 newfdp->fd_lomap = mallocarray(NDLOSLOTS(i), sizeof(u_int), 1026 M_FILEDESC, M_WAITOK); 1027 } 1028 newfdp->fd_nfiles = i; 1029 memcpy(newfdp->fd_ofiles, fdp->fd_ofiles, i * sizeof(struct file *)); 1030 memcpy(newfdp->fd_ofileflags, fdp->fd_ofileflags, i * sizeof(char)); 1031 memcpy(newfdp->fd_himap, fdp->fd_himap, NDHISLOTS(i) * sizeof(u_int)); 1032 memcpy(newfdp->fd_lomap, fdp->fd_lomap, NDLOSLOTS(i) * sizeof(u_int)); 1033 fdpunlock(fdp); 1034 1035 fdplock(newfdp); 1036 fpp = newfdp->fd_ofiles; 1037 for (i = 0; i <= newfdp->fd_lastfile; i++, fpp++) 1038 if (*fpp != NULL) { 1039 /* 1040 * XXX Gruesome hack. If count gets too high, fail 1041 * to copy an fd, since fdcopy()'s callers do not 1042 * permit it to indicate failure yet. 1043 * Meanwhile, kqueue files have to be 1044 * tied to the process that opened them to enforce 1045 * their internal consistency, so close them here. 1046 */ 1047 if ((*fpp)->f_count == LONG_MAX-2 || 1048 (*fpp)->f_type == DTYPE_KQUEUE) 1049 fdremove(newfdp, i); 1050 else 1051 (*fpp)->f_count++; 1052 } 1053 1054 /* finish cleaning up kq bits */ 1055 if (newfdp->fd_knlistsize != -1) { 1056 newfdp->fd_knlist = NULL; 1057 newfdp->fd_knlistsize = -1; 1058 newfdp->fd_knhash = NULL; 1059 newfdp->fd_knhashmask = 0; 1060 } 1061 1062 fdpunlock(newfdp); 1063 return (newfdp); 1064 } 1065 1066 /* 1067 * Release a filedesc structure. 1068 */ 1069 void 1070 fdfree(struct proc *p) 1071 { 1072 struct filedesc *fdp = p->p_fd; 1073 struct file **fpp, *fp; 1074 int i; 1075 1076 if (--fdp->fd_refcnt > 0) 1077 return; 1078 fpp = fdp->fd_ofiles; 1079 for (i = fdp->fd_lastfile; i >= 0; i--, fpp++) { 1080 fp = *fpp; 1081 if (fp != NULL) { 1082 FREF(fp); 1083 *fpp = NULL; 1084 (void) closef(fp, p); 1085 } 1086 } 1087 p->p_fd = NULL; 1088 if (fdp->fd_nfiles > NDFILE) 1089 free(fdp->fd_ofiles, M_FILEDESC, fdp->fd_nfiles * OFILESIZE); 1090 if (NDHISLOTS(fdp->fd_nfiles) > NDHISLOTS(NDFILE)) { 1091 free(fdp->fd_himap, M_FILEDESC, 1092 NDHISLOTS(fdp->fd_nfiles) * sizeof(u_int)); 1093 free(fdp->fd_lomap, M_FILEDESC, 1094 NDLOSLOTS(fdp->fd_nfiles) * sizeof(u_int)); 1095 } 1096 if (fdp->fd_cdir) 1097 vrele(fdp->fd_cdir); 1098 if (fdp->fd_rdir) 1099 vrele(fdp->fd_rdir); 1100 free(fdp->fd_knlist, M_TEMP, fdp->fd_knlistsize * sizeof(struct klist)); 1101 free(fdp->fd_knhash, M_TEMP, 0); 1102 pool_put(&fdesc_pool, fdp); 1103 } 1104 1105 /* 1106 * Internal form of close. 1107 * Decrement reference count on file structure. 1108 * Note: p may be NULL when closing a file 1109 * that was being passed in a message. 1110 * 1111 * The fp must have its usecount bumped and will be FRELEd here. 1112 */ 1113 int 1114 closef(struct file *fp, struct proc *p) 1115 { 1116 struct filedesc *fdp; 1117 1118 if (fp == NULL) 1119 return (0); 1120 1121 #ifdef DIAGNOSTIC 1122 if (fp->f_count < 2) 1123 panic("closef: count (%ld) < 2", fp->f_count); 1124 #endif 1125 fp->f_count--; 1126 1127 /* 1128 * POSIX record locking dictates that any close releases ALL 1129 * locks owned by this process. This is handled by setting 1130 * a flag in the unlock to free ONLY locks obeying POSIX 1131 * semantics, and not to free BSD-style file locks. 1132 * If the descriptor was in a message, POSIX-style locks 1133 * aren't passed with the descriptor. 1134 */ 1135 1136 if (p && ((fdp = p->p_fd) != NULL) && 1137 (fdp->fd_flags & FD_ADVLOCK) && 1138 fp->f_type == DTYPE_VNODE) { 1139 struct vnode *vp = fp->f_data; 1140 struct flock lf; 1141 1142 lf.l_whence = SEEK_SET; 1143 lf.l_start = 0; 1144 lf.l_len = 0; 1145 lf.l_type = F_UNLCK; 1146 (void) VOP_ADVLOCK(vp, fdp, F_UNLCK, &lf, F_POSIX); 1147 } 1148 1149 return (FRELE(fp, p)); 1150 } 1151 1152 int 1153 fdrop(struct file *fp, struct proc *p) 1154 { 1155 int error; 1156 1157 #ifdef DIAGNOSTIC 1158 if (fp->f_count != 0) 1159 panic("fdrop: count (%ld) != 0", fp->f_count); 1160 #endif 1161 1162 if (fp->f_ops) 1163 error = (*fp->f_ops->fo_close)(fp, p); 1164 else 1165 error = 0; 1166 1167 /* Free fp */ 1168 LIST_REMOVE(fp, f_list); 1169 crfree(fp->f_cred); 1170 numfiles--; 1171 pool_put(&file_pool, fp); 1172 1173 return (error); 1174 } 1175 1176 /* 1177 * Apply an advisory lock on a file descriptor. 1178 * 1179 * Just attempt to get a record lock of the requested type on 1180 * the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0). 1181 */ 1182 int 1183 sys_flock(struct proc *p, void *v, register_t *retval) 1184 { 1185 struct sys_flock_args /* { 1186 syscallarg(int) fd; 1187 syscallarg(int) how; 1188 } */ *uap = v; 1189 int fd = SCARG(uap, fd); 1190 int how = SCARG(uap, how); 1191 struct filedesc *fdp = p->p_fd; 1192 struct file *fp; 1193 struct vnode *vp; 1194 struct flock lf; 1195 int error; 1196 1197 if ((fp = fd_getfile(fdp, fd)) == NULL) 1198 return (EBADF); 1199 if (fp->f_type != DTYPE_VNODE) 1200 return (EOPNOTSUPP); 1201 FREF(fp); 1202 vp = fp->f_data; 1203 lf.l_whence = SEEK_SET; 1204 lf.l_start = 0; 1205 lf.l_len = 0; 1206 if (how & LOCK_UN) { 1207 lf.l_type = F_UNLCK; 1208 fp->f_iflags &= ~FIF_HASLOCK; 1209 error = VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK); 1210 goto out; 1211 } 1212 if (how & LOCK_EX) 1213 lf.l_type = F_WRLCK; 1214 else if (how & LOCK_SH) 1215 lf.l_type = F_RDLCK; 1216 else { 1217 error = EINVAL; 1218 goto out; 1219 } 1220 fp->f_iflags |= FIF_HASLOCK; 1221 if (how & LOCK_NB) 1222 error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, F_FLOCK); 1223 else 1224 error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, F_FLOCK|F_WAIT); 1225 out: 1226 FRELE(fp, p); 1227 return (error); 1228 } 1229 1230 /* 1231 * File Descriptor pseudo-device driver (/dev/fd/). 1232 * 1233 * Opening minor device N dup()s the file (if any) connected to file 1234 * descriptor N belonging to the calling process. Note that this driver 1235 * consists of only the ``open()'' routine, because all subsequent 1236 * references to this file will be direct to the other driver. 1237 */ 1238 int 1239 filedescopen(dev_t dev, int mode, int type, struct proc *p) 1240 { 1241 1242 /* 1243 * XXX Kludge: set curproc->p_dupfd to contain the value of the 1244 * the file descriptor being sought for duplication. The error 1245 * return ensures that the vnode for this device will be released 1246 * by vn_open. Open will detect this special error and take the 1247 * actions in dupfdopen below. Other callers of vn_open or VOP_OPEN 1248 * will simply report the error. 1249 */ 1250 p->p_dupfd = minor(dev); 1251 return (ENODEV); 1252 } 1253 1254 /* 1255 * Duplicate the specified descriptor to a free descriptor. 1256 */ 1257 int 1258 dupfdopen(struct filedesc *fdp, int indx, int dfd, int mode) 1259 { 1260 struct file *wfp; 1261 1262 fdpassertlocked(fdp); 1263 1264 /* 1265 * Assume that the filename was user-specified; applications do 1266 * not tend to open /dev/fd/# when they can just call dup() 1267 */ 1268 if ((curproc->p_p->ps_flags & (PS_SUGIDEXEC | PS_SUGID))) { 1269 if (curproc->p_descfd == 255) 1270 return (EPERM); 1271 if (curproc->p_descfd != curproc->p_dupfd) 1272 return (EPERM); 1273 } 1274 1275 /* 1276 * If the to-be-dup'd fd number is greater than the allowed number 1277 * of file descriptors, or the fd to be dup'd has already been 1278 * closed, reject. Note, there is no need to check for new == old 1279 * because fd_getfile will return NULL if the file at indx is 1280 * newly created by falloc (FIF_LARVAL). 1281 */ 1282 if ((wfp = fd_getfile(fdp, dfd)) == NULL) 1283 return (EBADF); 1284 1285 /* 1286 * Check that the mode the file is being opened for is a 1287 * subset of the mode of the existing descriptor. 1288 */ 1289 if (((mode & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag) 1290 return (EACCES); 1291 if (wfp->f_count == LONG_MAX-2) 1292 return (EDEADLK); 1293 1294 fdp->fd_ofiles[indx] = wfp; 1295 fdp->fd_ofileflags[indx] = (fdp->fd_ofileflags[indx] & UF_EXCLOSE) | 1296 (fdp->fd_ofileflags[dfd] & ~UF_EXCLOSE); 1297 wfp->f_count++; 1298 fd_used(fdp, indx); 1299 return (0); 1300 } 1301 1302 /* 1303 * Close any files on exec? 1304 */ 1305 void 1306 fdcloseexec(struct proc *p) 1307 { 1308 struct filedesc *fdp = p->p_fd; 1309 int fd; 1310 1311 fdplock(fdp); 1312 for (fd = 0; fd <= fdp->fd_lastfile; fd++) 1313 if (fdp->fd_ofileflags[fd] & UF_EXCLOSE) 1314 (void) fdrelease(p, fd); 1315 fdpunlock(fdp); 1316 } 1317 1318 int 1319 sys_closefrom(struct proc *p, void *v, register_t *retval) 1320 { 1321 struct sys_closefrom_args *uap = v; 1322 struct filedesc *fdp = p->p_fd; 1323 u_int startfd, i; 1324 1325 startfd = SCARG(uap, fd); 1326 fdplock(fdp); 1327 1328 if (startfd > fdp->fd_lastfile) { 1329 fdpunlock(fdp); 1330 return (EBADF); 1331 } 1332 1333 for (i = startfd; i <= fdp->fd_lastfile; i++) 1334 fdrelease(p, i); 1335 1336 fdpunlock(fdp); 1337 return (0); 1338 } 1339 1340 int 1341 sys_getdtablecount(struct proc *p, void *v, register_t *retval) 1342 { 1343 *retval = p->p_fd->fd_openfd; 1344 return (0); 1345 } 1346