1 /* $OpenBSD: kern_descrip.c,v 1.113 2014/08/31 01:42:36 guenther Exp $ */ 2 /* $NetBSD: kern_descrip.c,v 1.42 1996/03/30 22:24:38 christos Exp $ */ 3 4 /* 5 * Copyright (c) 1982, 1986, 1989, 1991, 1993 6 * The Regents of the University of California. All rights reserved. 7 * (c) UNIX System Laboratories, Inc. 8 * All or some portions of this file are derived from material licensed 9 * to the University of California by American Telephone and Telegraph 10 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 11 * the permission of UNIX System Laboratories, Inc. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * @(#)kern_descrip.c 8.6 (Berkeley) 4/19/94 38 */ 39 40 #include <sys/param.h> 41 #include <sys/systm.h> 42 #include <sys/filedesc.h> 43 #include <sys/kernel.h> 44 #include <sys/vnode.h> 45 #include <sys/proc.h> 46 #include <sys/file.h> 47 #include <sys/socket.h> 48 #include <sys/socketvar.h> 49 #include <sys/stat.h> 50 #include <sys/ioctl.h> 51 #include <sys/fcntl.h> 52 #include <sys/malloc.h> 53 #include <sys/syslog.h> 54 #include <sys/ucred.h> 55 #include <sys/unistd.h> 56 #include <sys/resourcevar.h> 57 #include <sys/conf.h> 58 #include <sys/mount.h> 59 #include <sys/syscallargs.h> 60 #include <sys/event.h> 61 #include <sys/pool.h> 62 #include <sys/ktrace.h> 63 64 #include <sys/pipe.h> 65 66 /* 67 * Descriptor management. 68 */ 69 struct filelist filehead; /* head of list of open files */ 70 int nfiles; /* actual number of open files */ 71 72 static __inline void fd_used(struct filedesc *, int); 73 static __inline void fd_unused(struct filedesc *, int); 74 static __inline int find_next_zero(u_int *, int, u_int); 75 int finishdup(struct proc *, struct file *, int, int, register_t *, int); 76 int find_last_set(struct filedesc *, int); 77 int dodup3(struct proc *, int, int, int, register_t *); 78 79 struct pool file_pool; 80 struct pool fdesc_pool; 81 82 void 83 filedesc_init(void) 84 { 85 pool_init(&file_pool, sizeof(struct file), 0, 0, 0, "filepl", 86 &pool_allocator_nointr); 87 pool_init(&fdesc_pool, sizeof(struct filedesc0), 0, 0, 0, "fdescpl", 88 &pool_allocator_nointr); 89 LIST_INIT(&filehead); 90 } 91 92 static __inline int 93 find_next_zero (u_int *bitmap, int want, u_int bits) 94 { 95 int i, off, maxoff; 96 u_int sub; 97 98 if (want > bits) 99 return -1; 100 101 off = want >> NDENTRYSHIFT; 102 i = want & NDENTRYMASK; 103 if (i) { 104 sub = bitmap[off] | ((u_int)~0 >> (NDENTRIES - i)); 105 if (sub != ~0) 106 goto found; 107 off++; 108 } 109 110 maxoff = NDLOSLOTS(bits); 111 while (off < maxoff) { 112 if ((sub = bitmap[off]) != ~0) 113 goto found; 114 off++; 115 } 116 117 return -1; 118 119 found: 120 return (off << NDENTRYSHIFT) + ffs(~sub) - 1; 121 } 122 123 int 124 find_last_set(struct filedesc *fd, int last) 125 { 126 int off, i; 127 struct file **ofiles = fd->fd_ofiles; 128 u_int *bitmap = fd->fd_lomap; 129 130 off = (last - 1) >> NDENTRYSHIFT; 131 132 while (off >= 0 && !bitmap[off]) 133 off--; 134 if (off < 0) 135 return 0; 136 137 i = ((off + 1) << NDENTRYSHIFT) - 1; 138 if (i >= last) 139 i = last - 1; 140 141 while (i > 0 && ofiles[i] == NULL) 142 i--; 143 return i; 144 } 145 146 static __inline void 147 fd_used(struct filedesc *fdp, int fd) 148 { 149 u_int off = fd >> NDENTRYSHIFT; 150 151 fdp->fd_lomap[off] |= 1 << (fd & NDENTRYMASK); 152 if (fdp->fd_lomap[off] == ~0) 153 fdp->fd_himap[off >> NDENTRYSHIFT] |= 1 << (off & NDENTRYMASK); 154 155 if (fd > fdp->fd_lastfile) 156 fdp->fd_lastfile = fd; 157 fdp->fd_openfd++; 158 } 159 160 static __inline void 161 fd_unused(struct filedesc *fdp, int fd) 162 { 163 u_int off = fd >> NDENTRYSHIFT; 164 165 if (fd < fdp->fd_freefile) 166 fdp->fd_freefile = fd; 167 168 if (fdp->fd_lomap[off] == ~0) 169 fdp->fd_himap[off >> NDENTRYSHIFT] &= ~(1 << (off & NDENTRYMASK)); 170 fdp->fd_lomap[off] &= ~(1 << (fd & NDENTRYMASK)); 171 172 #ifdef DIAGNOSTIC 173 if (fd > fdp->fd_lastfile) 174 panic("fd_unused: fd_lastfile inconsistent"); 175 #endif 176 if (fd == fdp->fd_lastfile) 177 fdp->fd_lastfile = find_last_set(fdp, fd); 178 fdp->fd_openfd--; 179 } 180 181 struct file * 182 fd_getfile(struct filedesc *fdp, int fd) 183 { 184 struct file *fp; 185 186 if ((u_int)fd >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[fd]) == NULL) 187 return (NULL); 188 189 if (!FILE_IS_USABLE(fp)) 190 return (NULL); 191 192 return (fp); 193 } 194 195 /* 196 * System calls on descriptors. 197 */ 198 199 /* 200 * Duplicate a file descriptor. 201 */ 202 /* ARGSUSED */ 203 int 204 sys_dup(struct proc *p, void *v, register_t *retval) 205 { 206 struct sys_dup_args /* { 207 syscallarg(int) fd; 208 } */ *uap = v; 209 struct filedesc *fdp = p->p_fd; 210 int old = SCARG(uap, fd); 211 struct file *fp; 212 int new; 213 int error; 214 215 restart: 216 if ((fp = fd_getfile(fdp, old)) == NULL) 217 return (EBADF); 218 FREF(fp); 219 fdplock(fdp); 220 if ((error = fdalloc(p, 0, &new)) != 0) { 221 FRELE(fp, p); 222 if (error == ENOSPC) { 223 fdexpand(p); 224 fdpunlock(fdp); 225 goto restart; 226 } 227 goto out; 228 } 229 error = finishdup(p, fp, old, new, retval, 0); 230 231 out: 232 fdpunlock(fdp); 233 return (error); 234 } 235 236 /* 237 * Duplicate a file descriptor to a particular value. 238 */ 239 int 240 sys_dup2(struct proc *p, void *v, register_t *retval) 241 { 242 struct sys_dup2_args /* { 243 syscallarg(int) from; 244 syscallarg(int) to; 245 } */ *uap = v; 246 247 return (dodup3(p, SCARG(uap, from), SCARG(uap, to), 0, retval)); 248 } 249 250 int 251 sys_dup3(struct proc *p, void *v, register_t *retval) 252 { 253 struct sys_dup3_args /* { 254 syscallarg(int) from; 255 syscallarg(int) to; 256 syscallarg(int) flags; 257 } */ *uap = v; 258 259 if (SCARG(uap, from) == SCARG(uap, to)) 260 return (EINVAL); 261 if (SCARG(uap, flags) & ~O_CLOEXEC) 262 return (EINVAL); 263 return (dodup3(p, SCARG(uap, from), SCARG(uap, to), 264 SCARG(uap, flags), retval)); 265 } 266 267 int 268 dodup3(struct proc *p, int old, int new, int flags, register_t *retval) 269 { 270 struct filedesc *fdp = p->p_fd; 271 struct file *fp; 272 int i, error; 273 274 restart: 275 if ((fp = fd_getfile(fdp, old)) == NULL) 276 return (EBADF); 277 if ((u_int)new >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur || 278 (u_int)new >= maxfiles) 279 return (EBADF); 280 if (old == new) { 281 /* 282 * NOTE! This doesn't clear the close-on-exec flag. This might 283 * or might not be the intended behavior from the start, but 284 * this is what everyone else does. 285 */ 286 *retval = new; 287 return (0); 288 } 289 FREF(fp); 290 fdplock(fdp); 291 if (new >= fdp->fd_nfiles) { 292 if ((error = fdalloc(p, new, &i)) != 0) { 293 FRELE(fp, p); 294 if (error == ENOSPC) { 295 fdexpand(p); 296 fdpunlock(fdp); 297 goto restart; 298 } 299 goto out; 300 } 301 if (new != i) 302 panic("dup2: fdalloc"); 303 fd_unused(fdp, new); 304 } 305 /* finishdup() does FRELE */ 306 error = finishdup(p, fp, old, new, retval, 1); 307 if (!error && flags & O_CLOEXEC) 308 fdp->fd_ofileflags[new] |= UF_EXCLOSE; 309 310 out: 311 fdpunlock(fdp); 312 return (error); 313 } 314 315 /* 316 * The file control system call. 317 */ 318 /* ARGSUSED */ 319 int 320 sys_fcntl(struct proc *p, void *v, register_t *retval) 321 { 322 struct sys_fcntl_args /* { 323 syscallarg(int) fd; 324 syscallarg(int) cmd; 325 syscallarg(void *) arg; 326 } */ *uap = v; 327 int fd = SCARG(uap, fd); 328 struct filedesc *fdp = p->p_fd; 329 struct file *fp; 330 struct vnode *vp; 331 int i, tmp, newmin, flg = F_POSIX; 332 struct flock fl; 333 int error = 0; 334 335 restart: 336 if ((fp = fd_getfile(fdp, fd)) == NULL) 337 return (EBADF); 338 FREF(fp); 339 switch (SCARG(uap, cmd)) { 340 341 case F_DUPFD: 342 case F_DUPFD_CLOEXEC: 343 newmin = (long)SCARG(uap, arg); 344 if ((u_int)newmin >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur || 345 (u_int)newmin >= maxfiles) { 346 error = EINVAL; 347 break; 348 } 349 fdplock(fdp); 350 if ((error = fdalloc(p, newmin, &i)) != 0) { 351 FRELE(fp, p); 352 if (error == ENOSPC) { 353 fdexpand(p); 354 fdpunlock(fdp); 355 goto restart; 356 } 357 } else { 358 /* finishdup will FRELE for us. */ 359 error = finishdup(p, fp, fd, i, retval, 0); 360 361 if (!error && SCARG(uap, cmd) == F_DUPFD_CLOEXEC) 362 fdp->fd_ofileflags[i] |= UF_EXCLOSE; 363 } 364 365 fdpunlock(fdp); 366 return (error); 367 368 case F_GETFD: 369 *retval = fdp->fd_ofileflags[fd] & UF_EXCLOSE ? 1 : 0; 370 break; 371 372 case F_SETFD: 373 fdplock(fdp); 374 if ((long)SCARG(uap, arg) & 1) 375 fdp->fd_ofileflags[fd] |= UF_EXCLOSE; 376 else 377 fdp->fd_ofileflags[fd] &= ~UF_EXCLOSE; 378 fdpunlock(fdp); 379 break; 380 381 case F_GETFL: 382 *retval = OFLAGS(fp->f_flag); 383 break; 384 385 case F_SETFL: 386 fp->f_flag &= ~FCNTLFLAGS; 387 fp->f_flag |= FFLAGS((long)SCARG(uap, arg)) & FCNTLFLAGS; 388 tmp = fp->f_flag & FNONBLOCK; 389 error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p); 390 if (error) 391 break; 392 tmp = fp->f_flag & FASYNC; 393 error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, (caddr_t)&tmp, p); 394 if (!error) 395 break; 396 fp->f_flag &= ~FNONBLOCK; 397 tmp = 0; 398 (void) (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p); 399 break; 400 401 case F_GETOWN: 402 if (fp->f_type == DTYPE_SOCKET) { 403 *retval = ((struct socket *)fp->f_data)->so_pgid; 404 break; 405 } 406 error = (*fp->f_ops->fo_ioctl) 407 (fp, TIOCGPGRP, (caddr_t)&tmp, p); 408 *retval = -tmp; 409 break; 410 411 case F_SETOWN: 412 if (fp->f_type == DTYPE_SOCKET) { 413 struct socket *so = (struct socket *)fp->f_data; 414 415 so->so_pgid = (long)SCARG(uap, arg); 416 so->so_siguid = p->p_ucred->cr_ruid; 417 so->so_sigeuid = p->p_ucred->cr_uid; 418 break; 419 } 420 if ((long)SCARG(uap, arg) <= 0) { 421 SCARG(uap, arg) = (void *)(-(long)SCARG(uap, arg)); 422 } else { 423 struct process *pr1 = prfind((long)SCARG(uap, arg)); 424 if (pr1 == 0) { 425 error = ESRCH; 426 break; 427 } 428 SCARG(uap, arg) = (void *)(long)pr1->ps_pgrp->pg_id; 429 } 430 error = ((*fp->f_ops->fo_ioctl) 431 (fp, TIOCSPGRP, (caddr_t)&SCARG(uap, arg), p)); 432 break; 433 434 case F_SETLKW: 435 flg |= F_WAIT; 436 /* FALLTHROUGH */ 437 438 case F_SETLK: 439 if (fp->f_type != DTYPE_VNODE) { 440 error = EBADF; 441 break; 442 } 443 vp = (struct vnode *)fp->f_data; 444 /* Copy in the lock structure */ 445 error = copyin((caddr_t)SCARG(uap, arg), (caddr_t)&fl, 446 sizeof (fl)); 447 if (error) 448 break; 449 if (fl.l_whence == SEEK_CUR) { 450 if (fl.l_start == 0 && fl.l_len < 0) { 451 /* lockf(3) compliance hack */ 452 fl.l_len = -fl.l_len; 453 fl.l_start = fp->f_offset - fl.l_len; 454 } else 455 fl.l_start += fp->f_offset; 456 } 457 switch (fl.l_type) { 458 459 case F_RDLCK: 460 if ((fp->f_flag & FREAD) == 0) { 461 error = EBADF; 462 goto out; 463 } 464 atomic_setbits_int(&fdp->fd_flags, FD_ADVLOCK); 465 error = VOP_ADVLOCK(vp, fdp, F_SETLK, &fl, flg); 466 break; 467 468 case F_WRLCK: 469 if ((fp->f_flag & FWRITE) == 0) { 470 error = EBADF; 471 goto out; 472 } 473 atomic_setbits_int(&fdp->fd_flags, FD_ADVLOCK); 474 error = VOP_ADVLOCK(vp, fdp, F_SETLK, &fl, flg); 475 break; 476 477 case F_UNLCK: 478 error = VOP_ADVLOCK(vp, fdp, F_UNLCK, &fl, F_POSIX); 479 goto out; 480 481 default: 482 error = EINVAL; 483 goto out; 484 } 485 486 if (fp != fd_getfile(fdp, fd)) { 487 /* 488 * We have lost the race with close() or dup2(); 489 * unlock, pretend that we've won the race and that 490 * lock had been removed by close() 491 */ 492 fl.l_whence = SEEK_SET; 493 fl.l_start = 0; 494 fl.l_len = 0; 495 VOP_ADVLOCK(vp, fdp, F_UNLCK, &fl, F_POSIX); 496 fl.l_type = F_UNLCK; 497 } 498 goto out; 499 500 501 case F_GETLK: 502 if (fp->f_type != DTYPE_VNODE) { 503 error = EBADF; 504 break; 505 } 506 vp = (struct vnode *)fp->f_data; 507 /* Copy in the lock structure */ 508 error = copyin((caddr_t)SCARG(uap, arg), (caddr_t)&fl, 509 sizeof (fl)); 510 if (error) 511 break; 512 if (fl.l_whence == SEEK_CUR) { 513 if (fl.l_start == 0 && fl.l_len < 0) { 514 /* lockf(3) compliance hack */ 515 fl.l_len = -fl.l_len; 516 fl.l_start = fp->f_offset - fl.l_len; 517 } else 518 fl.l_start += fp->f_offset; 519 } 520 if (fl.l_type != F_RDLCK && 521 fl.l_type != F_WRLCK && 522 fl.l_type != F_UNLCK && 523 fl.l_type != 0) { 524 error = EINVAL; 525 break; 526 } 527 error = VOP_ADVLOCK(vp, fdp, F_GETLK, &fl, F_POSIX); 528 if (error) 529 break; 530 error = (copyout((caddr_t)&fl, (caddr_t)SCARG(uap, arg), 531 sizeof (fl))); 532 break; 533 534 default: 535 error = EINVAL; 536 break; 537 } 538 out: 539 FRELE(fp, p); 540 return (error); 541 } 542 543 /* 544 * Common code for dup, dup2, and fcntl(F_DUPFD). 545 */ 546 int 547 finishdup(struct proc *p, struct file *fp, int old, int new, 548 register_t *retval, int dup2) 549 { 550 struct file *oldfp; 551 struct filedesc *fdp = p->p_fd; 552 553 fdpassertlocked(fdp); 554 if (fp->f_count == LONG_MAX-2) { 555 FRELE(fp, p); 556 return (EDEADLK); 557 } 558 559 /* 560 * Don't fd_getfile here. We want to closef LARVAL files and 561 * closef can deal with that. 562 */ 563 oldfp = fdp->fd_ofiles[new]; 564 if (oldfp != NULL) 565 FREF(oldfp); 566 567 fdp->fd_ofiles[new] = fp; 568 fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] & ~UF_EXCLOSE; 569 fp->f_count++; 570 FRELE(fp, p); 571 if (dup2 && oldfp == NULL) 572 fd_used(fdp, new); 573 *retval = new; 574 575 if (oldfp != NULL) { 576 if (new < fdp->fd_knlistsize) 577 knote_fdclose(p, new); 578 closef(oldfp, p); 579 } 580 581 return (0); 582 } 583 584 void 585 fdremove(struct filedesc *fdp, int fd) 586 { 587 fdpassertlocked(fdp); 588 fdp->fd_ofiles[fd] = NULL; 589 fd_unused(fdp, fd); 590 } 591 592 int 593 fdrelease(struct proc *p, int fd) 594 { 595 struct filedesc *fdp = p->p_fd; 596 struct file **fpp, *fp; 597 598 fdpassertlocked(fdp); 599 600 /* 601 * Don't fd_getfile here. We want to closef LARVAL files and closef 602 * can deal with that. 603 */ 604 fpp = &fdp->fd_ofiles[fd]; 605 fp = *fpp; 606 if (fp == NULL) 607 return (EBADF); 608 FREF(fp); 609 *fpp = NULL; 610 fd_unused(fdp, fd); 611 if (fd < fdp->fd_knlistsize) 612 knote_fdclose(p, fd); 613 return (closef(fp, p)); 614 } 615 616 /* 617 * Close a file descriptor. 618 */ 619 /* ARGSUSED */ 620 int 621 sys_close(struct proc *p, void *v, register_t *retval) 622 { 623 struct sys_close_args /* { 624 syscallarg(int) fd; 625 } */ *uap = v; 626 int fd = SCARG(uap, fd), error; 627 struct filedesc *fdp = p->p_fd; 628 629 if (fd_getfile(fdp, fd) == NULL) 630 return (EBADF); 631 fdplock(fdp); 632 error = fdrelease(p, fd); 633 fdpunlock(fdp); 634 635 return (error); 636 } 637 638 /* 639 * Return status information about a file descriptor. 640 */ 641 int 642 sys_fstat(struct proc *p, void *v, register_t *retval) 643 { 644 struct sys_fstat_args /* { 645 syscallarg(int) fd; 646 syscallarg(struct stat *) sb; 647 } */ *uap = v; 648 int fd = SCARG(uap, fd); 649 struct filedesc *fdp = p->p_fd; 650 struct file *fp; 651 struct stat ub; 652 int error; 653 654 if ((fp = fd_getfile(fdp, fd)) == NULL) 655 return (EBADF); 656 FREF(fp); 657 error = (*fp->f_ops->fo_stat)(fp, &ub, p); 658 FRELE(fp, p); 659 if (error == 0) { 660 /* 661 * Don't let non-root see generation numbers 662 * (for NFS security) 663 */ 664 if (suser(p, 0)) 665 ub.st_gen = 0; 666 error = copyout((caddr_t)&ub, (caddr_t)SCARG(uap, sb), 667 sizeof (ub)); 668 } 669 #ifdef KTRACE 670 if (error == 0 && KTRPOINT(p, KTR_STRUCT)) 671 ktrstat(p, &ub); 672 #endif 673 return (error); 674 } 675 676 /* 677 * Return pathconf information about a file descriptor. 678 */ 679 /* ARGSUSED */ 680 int 681 sys_fpathconf(struct proc *p, void *v, register_t *retval) 682 { 683 struct sys_fpathconf_args /* { 684 syscallarg(int) fd; 685 syscallarg(int) name; 686 } */ *uap = v; 687 int fd = SCARG(uap, fd); 688 struct filedesc *fdp = p->p_fd; 689 struct file *fp; 690 struct vnode *vp; 691 int error; 692 693 if ((fp = fd_getfile(fdp, fd)) == NULL) 694 return (EBADF); 695 FREF(fp); 696 switch (fp->f_type) { 697 case DTYPE_PIPE: 698 case DTYPE_SOCKET: 699 if (SCARG(uap, name) != _PC_PIPE_BUF) { 700 error = EINVAL; 701 break; 702 } 703 *retval = PIPE_BUF; 704 error = 0; 705 break; 706 707 case DTYPE_VNODE: 708 vp = (struct vnode *)fp->f_data; 709 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); 710 error = VOP_PATHCONF(vp, SCARG(uap, name), retval); 711 VOP_UNLOCK(vp, 0, p); 712 break; 713 714 default: 715 error = EOPNOTSUPP; 716 break; 717 } 718 FRELE(fp, p); 719 return (error); 720 } 721 722 /* 723 * Allocate a file descriptor for the process. 724 */ 725 int 726 fdalloc(struct proc *p, int want, int *result) 727 { 728 struct filedesc *fdp = p->p_fd; 729 int lim, last, i; 730 u_int new, off; 731 732 /* 733 * Search for a free descriptor starting at the higher 734 * of want or fd_freefile. If that fails, consider 735 * expanding the ofile array. 736 */ 737 restart: 738 lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles); 739 last = min(fdp->fd_nfiles, lim); 740 if ((i = want) < fdp->fd_freefile) 741 i = fdp->fd_freefile; 742 off = i >> NDENTRYSHIFT; 743 new = find_next_zero(fdp->fd_himap, off, 744 (last + NDENTRIES - 1) >> NDENTRYSHIFT); 745 if (new != -1) { 746 i = find_next_zero(&fdp->fd_lomap[new], 747 new > off ? 0 : i & NDENTRYMASK, 748 NDENTRIES); 749 if (i == -1) { 750 /* 751 * Free file descriptor in this block was 752 * below want, try again with higher want. 753 */ 754 want = (new + 1) << NDENTRYSHIFT; 755 goto restart; 756 } 757 i += (new << NDENTRYSHIFT); 758 if (i < last) { 759 fd_used(fdp, i); 760 if (want <= fdp->fd_freefile) 761 fdp->fd_freefile = i; 762 *result = i; 763 fdp->fd_ofileflags[i] = 0; 764 return (0); 765 } 766 } 767 if (fdp->fd_nfiles >= lim) 768 return (EMFILE); 769 770 return (ENOSPC); 771 } 772 773 void 774 fdexpand(struct proc *p) 775 { 776 struct filedesc *fdp = p->p_fd; 777 int nfiles; 778 size_t copylen; 779 struct file **newofile; 780 char *newofileflags; 781 u_int *newhimap, *newlomap; 782 783 fdpassertlocked(fdp); 784 785 /* 786 * No space in current array. 787 */ 788 if (fdp->fd_nfiles < NDEXTENT) 789 nfiles = NDEXTENT; 790 else 791 nfiles = 2 * fdp->fd_nfiles; 792 793 newofile = mallocarray(nfiles, OFILESIZE, M_FILEDESC, M_WAITOK); 794 newofileflags = (char *) &newofile[nfiles]; 795 796 /* 797 * Copy the existing ofile and ofileflags arrays 798 * and zero the new portion of each array. 799 */ 800 copylen = sizeof(struct file *) * fdp->fd_nfiles; 801 memcpy(newofile, fdp->fd_ofiles, copylen); 802 memset((char *)newofile + copylen, 0, 803 nfiles * sizeof(struct file *) - copylen); 804 copylen = sizeof(char) * fdp->fd_nfiles; 805 memcpy(newofileflags, fdp->fd_ofileflags, copylen); 806 memset(newofileflags + copylen, 0, nfiles * sizeof(char) - copylen); 807 808 if (fdp->fd_nfiles > NDFILE) 809 free(fdp->fd_ofiles, M_FILEDESC, 0); 810 811 if (NDHISLOTS(nfiles) > NDHISLOTS(fdp->fd_nfiles)) { 812 newhimap = mallocarray(NDHISLOTS(nfiles), sizeof(u_int), 813 M_FILEDESC, M_WAITOK); 814 newlomap = mallocarray(NDLOSLOTS(nfiles), sizeof(u_int), 815 M_FILEDESC, M_WAITOK); 816 817 copylen = NDHISLOTS(fdp->fd_nfiles) * sizeof(u_int); 818 memcpy(newhimap, fdp->fd_himap, copylen); 819 memset((char *)newhimap + copylen, 0, 820 NDHISLOTS(nfiles) * sizeof(u_int) - copylen); 821 822 copylen = NDLOSLOTS(fdp->fd_nfiles) * sizeof(u_int); 823 memcpy(newlomap, fdp->fd_lomap, copylen); 824 memset((char *)newlomap + copylen, 0, 825 NDLOSLOTS(nfiles) * sizeof(u_int) - copylen); 826 827 if (NDHISLOTS(fdp->fd_nfiles) > NDHISLOTS(NDFILE)) { 828 free(fdp->fd_himap, M_FILEDESC, 0); 829 free(fdp->fd_lomap, M_FILEDESC, 0); 830 } 831 fdp->fd_himap = newhimap; 832 fdp->fd_lomap = newlomap; 833 } 834 fdp->fd_ofiles = newofile; 835 fdp->fd_ofileflags = newofileflags; 836 fdp->fd_nfiles = nfiles; 837 } 838 839 /* 840 * Create a new open file structure and allocate 841 * a file descriptor for the process that refers to it. 842 */ 843 int 844 falloc(struct proc *p, struct file **resultfp, int *resultfd) 845 { 846 struct file *fp, *fq; 847 int error, i; 848 849 fdpassertlocked(p->p_fd); 850 restart: 851 if ((error = fdalloc(p, 0, &i)) != 0) { 852 if (error == ENOSPC) { 853 fdexpand(p); 854 goto restart; 855 } 856 return (error); 857 } 858 if (nfiles >= maxfiles) { 859 fd_unused(p->p_fd, i); 860 tablefull("file"); 861 return (ENFILE); 862 } 863 /* 864 * Allocate a new file descriptor. 865 * If the process has file descriptor zero open, add to the list 866 * of open files at that point, otherwise put it at the front of 867 * the list of open files. 868 */ 869 nfiles++; 870 fp = pool_get(&file_pool, PR_WAITOK|PR_ZERO); 871 fp->f_iflags = FIF_LARVAL; 872 if ((fq = p->p_fd->fd_ofiles[0]) != NULL) { 873 LIST_INSERT_AFTER(fq, fp, f_list); 874 } else { 875 LIST_INSERT_HEAD(&filehead, fp, f_list); 876 } 877 p->p_fd->fd_ofiles[i] = fp; 878 fp->f_count = 1; 879 fp->f_cred = p->p_ucred; 880 crhold(fp->f_cred); 881 if (resultfp) 882 *resultfp = fp; 883 if (resultfd) 884 *resultfd = i; 885 FREF(fp); 886 return (0); 887 } 888 889 /* 890 * Build a new filedesc structure. 891 */ 892 struct filedesc * 893 fdinit(void) 894 { 895 struct filedesc0 *newfdp; 896 extern int cmask; 897 898 newfdp = pool_get(&fdesc_pool, PR_WAITOK|PR_ZERO); 899 rw_init(&newfdp->fd_fd.fd_lock, "fdlock"); 900 901 /* Create the file descriptor table. */ 902 newfdp->fd_fd.fd_refcnt = 1; 903 newfdp->fd_fd.fd_cmask = cmask; 904 newfdp->fd_fd.fd_ofiles = newfdp->fd_dfiles; 905 newfdp->fd_fd.fd_ofileflags = newfdp->fd_dfileflags; 906 newfdp->fd_fd.fd_nfiles = NDFILE; 907 newfdp->fd_fd.fd_himap = newfdp->fd_dhimap; 908 newfdp->fd_fd.fd_lomap = newfdp->fd_dlomap; 909 newfdp->fd_fd.fd_knlistsize = -1; 910 911 newfdp->fd_fd.fd_freefile = 0; 912 newfdp->fd_fd.fd_lastfile = 0; 913 914 return (&newfdp->fd_fd); 915 } 916 917 /* 918 * Share a filedesc structure. 919 */ 920 struct filedesc * 921 fdshare(struct process *pr) 922 { 923 pr->ps_fd->fd_refcnt++; 924 return (pr->ps_fd); 925 } 926 927 /* 928 * Copy a filedesc structure. 929 */ 930 struct filedesc * 931 fdcopy(struct process *pr) 932 { 933 struct filedesc *newfdp, *fdp = pr->ps_fd; 934 struct file **fpp; 935 int i; 936 937 fdplock(fdp); 938 newfdp = pool_get(&fdesc_pool, PR_WAITOK); 939 memcpy(newfdp, fdp, sizeof(struct filedesc)); 940 if (newfdp->fd_cdir) 941 vref(newfdp->fd_cdir); 942 if (newfdp->fd_rdir) 943 vref(newfdp->fd_rdir); 944 newfdp->fd_refcnt = 1; 945 rw_init(&newfdp->fd_lock, "fdlock"); 946 947 /* 948 * If the number of open files fits in the internal arrays 949 * of the open file structure, use them, otherwise allocate 950 * additional memory for the number of descriptors currently 951 * in use. 952 */ 953 if (newfdp->fd_lastfile < NDFILE) { 954 newfdp->fd_ofiles = ((struct filedesc0 *) newfdp)->fd_dfiles; 955 newfdp->fd_ofileflags = 956 ((struct filedesc0 *) newfdp)->fd_dfileflags; 957 i = NDFILE; 958 } else { 959 /* 960 * Compute the smallest multiple of NDEXTENT needed 961 * for the file descriptors currently in use, 962 * allowing the table to shrink. 963 */ 964 i = newfdp->fd_nfiles; 965 while (i >= 2 * NDEXTENT && i > newfdp->fd_lastfile * 2) 966 i /= 2; 967 newfdp->fd_ofiles = mallocarray(i, OFILESIZE, M_FILEDESC, M_WAITOK); 968 newfdp->fd_ofileflags = (char *) &newfdp->fd_ofiles[i]; 969 } 970 if (NDHISLOTS(i) <= NDHISLOTS(NDFILE)) { 971 newfdp->fd_himap = 972 ((struct filedesc0 *) newfdp)->fd_dhimap; 973 newfdp->fd_lomap = 974 ((struct filedesc0 *) newfdp)->fd_dlomap; 975 } else { 976 newfdp->fd_himap = mallocarray(NDHISLOTS(i), sizeof(u_int), 977 M_FILEDESC, M_WAITOK); 978 newfdp->fd_lomap = mallocarray(NDLOSLOTS(i), sizeof(u_int), 979 M_FILEDESC, M_WAITOK); 980 } 981 newfdp->fd_nfiles = i; 982 memcpy(newfdp->fd_ofiles, fdp->fd_ofiles, i * sizeof(struct file *)); 983 memcpy(newfdp->fd_ofileflags, fdp->fd_ofileflags, i * sizeof(char)); 984 memcpy(newfdp->fd_himap, fdp->fd_himap, NDHISLOTS(i) * sizeof(u_int)); 985 memcpy(newfdp->fd_lomap, fdp->fd_lomap, NDLOSLOTS(i) * sizeof(u_int)); 986 fdpunlock(fdp); 987 988 fdplock(newfdp); 989 fpp = newfdp->fd_ofiles; 990 for (i = 0; i <= newfdp->fd_lastfile; i++, fpp++) 991 if (*fpp != NULL) { 992 /* 993 * XXX Gruesome hack. If count gets too high, fail 994 * to copy an fd, since fdcopy()'s callers do not 995 * permit it to indicate failure yet. 996 * Meanwhile, kqueue and systrace files have to be 997 * tied to the process that opened them to enforce 998 * their internal consistency, so close them here. 999 */ 1000 if ((*fpp)->f_count == LONG_MAX-2 || 1001 (*fpp)->f_type == DTYPE_KQUEUE || 1002 (*fpp)->f_type == DTYPE_SYSTRACE) 1003 fdremove(newfdp, i); 1004 else 1005 (*fpp)->f_count++; 1006 } 1007 1008 /* finish cleaning up kq bits */ 1009 if (newfdp->fd_knlistsize != -1) { 1010 newfdp->fd_knlist = NULL; 1011 newfdp->fd_knlistsize = -1; 1012 newfdp->fd_knhash = NULL; 1013 newfdp->fd_knhashmask = 0; 1014 } 1015 1016 fdpunlock(newfdp); 1017 return (newfdp); 1018 } 1019 1020 /* 1021 * Release a filedesc structure. 1022 */ 1023 void 1024 fdfree(struct proc *p) 1025 { 1026 struct filedesc *fdp = p->p_fd; 1027 struct file **fpp, *fp; 1028 int i; 1029 1030 if (--fdp->fd_refcnt > 0) 1031 return; 1032 fpp = fdp->fd_ofiles; 1033 for (i = fdp->fd_lastfile; i >= 0; i--, fpp++) { 1034 fp = *fpp; 1035 if (fp != NULL) { 1036 FREF(fp); 1037 *fpp = NULL; 1038 (void) closef(fp, p); 1039 } 1040 } 1041 p->p_fd = NULL; 1042 if (fdp->fd_nfiles > NDFILE) 1043 free(fdp->fd_ofiles, M_FILEDESC, 0); 1044 if (NDHISLOTS(fdp->fd_nfiles) > NDHISLOTS(NDFILE)) { 1045 free(fdp->fd_himap, M_FILEDESC, 0); 1046 free(fdp->fd_lomap, M_FILEDESC, 0); 1047 } 1048 if (fdp->fd_cdir) 1049 vrele(fdp->fd_cdir); 1050 if (fdp->fd_rdir) 1051 vrele(fdp->fd_rdir); 1052 if (fdp->fd_knlist) 1053 free(fdp->fd_knlist, M_TEMP, 0); 1054 if (fdp->fd_knhash) 1055 free(fdp->fd_knhash, M_TEMP, 0); 1056 pool_put(&fdesc_pool, fdp); 1057 } 1058 1059 /* 1060 * Internal form of close. 1061 * Decrement reference count on file structure. 1062 * Note: p may be NULL when closing a file 1063 * that was being passed in a message. 1064 * 1065 * The fp must have its usecount bumped and will be FRELEd here. 1066 */ 1067 int 1068 closef(struct file *fp, struct proc *p) 1069 { 1070 struct filedesc *fdp; 1071 1072 if (fp == NULL) 1073 return (0); 1074 1075 #ifdef DIAGNOSTIC 1076 if (fp->f_count < 2) 1077 panic("closef: count (%ld) < 2", fp->f_count); 1078 #endif 1079 fp->f_count--; 1080 1081 /* 1082 * POSIX record locking dictates that any close releases ALL 1083 * locks owned by this process. This is handled by setting 1084 * a flag in the unlock to free ONLY locks obeying POSIX 1085 * semantics, and not to free BSD-style file locks. 1086 * If the descriptor was in a message, POSIX-style locks 1087 * aren't passed with the descriptor. 1088 */ 1089 1090 if (p && ((fdp = p->p_fd) != NULL) && 1091 (fdp->fd_flags & FD_ADVLOCK) && 1092 fp->f_type == DTYPE_VNODE) { 1093 struct vnode *vp = fp->f_data; 1094 struct flock lf; 1095 1096 lf.l_whence = SEEK_SET; 1097 lf.l_start = 0; 1098 lf.l_len = 0; 1099 lf.l_type = F_UNLCK; 1100 (void) VOP_ADVLOCK(vp, fdp, F_UNLCK, &lf, F_POSIX); 1101 } 1102 1103 return (FRELE(fp, p)); 1104 } 1105 1106 int 1107 fdrop(struct file *fp, struct proc *p) 1108 { 1109 int error; 1110 1111 #ifdef DIAGNOSTIC 1112 if (fp->f_count != 0) 1113 panic("fdrop: count (%ld) != 0", fp->f_count); 1114 #endif 1115 1116 if (fp->f_ops) 1117 error = (*fp->f_ops->fo_close)(fp, p); 1118 else 1119 error = 0; 1120 1121 /* Free fp */ 1122 LIST_REMOVE(fp, f_list); 1123 crfree(fp->f_cred); 1124 nfiles--; 1125 pool_put(&file_pool, fp); 1126 1127 return (error); 1128 } 1129 1130 /* 1131 * Apply an advisory lock on a file descriptor. 1132 * 1133 * Just attempt to get a record lock of the requested type on 1134 * the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0). 1135 */ 1136 /* ARGSUSED */ 1137 int 1138 sys_flock(struct proc *p, void *v, register_t *retval) 1139 { 1140 struct sys_flock_args /* { 1141 syscallarg(int) fd; 1142 syscallarg(int) how; 1143 } */ *uap = v; 1144 int fd = SCARG(uap, fd); 1145 int how = SCARG(uap, how); 1146 struct filedesc *fdp = p->p_fd; 1147 struct file *fp; 1148 struct vnode *vp; 1149 struct flock lf; 1150 int error; 1151 1152 if ((fp = fd_getfile(fdp, fd)) == NULL) 1153 return (EBADF); 1154 if (fp->f_type != DTYPE_VNODE) 1155 return (EOPNOTSUPP); 1156 FREF(fp); 1157 vp = (struct vnode *)fp->f_data; 1158 lf.l_whence = SEEK_SET; 1159 lf.l_start = 0; 1160 lf.l_len = 0; 1161 if (how & LOCK_UN) { 1162 lf.l_type = F_UNLCK; 1163 fp->f_iflags &= ~FIF_HASLOCK; 1164 error = VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK); 1165 goto out; 1166 } 1167 if (how & LOCK_EX) 1168 lf.l_type = F_WRLCK; 1169 else if (how & LOCK_SH) 1170 lf.l_type = F_RDLCK; 1171 else { 1172 error = EINVAL; 1173 goto out; 1174 } 1175 fp->f_iflags |= FIF_HASLOCK; 1176 if (how & LOCK_NB) 1177 error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, F_FLOCK); 1178 else 1179 error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, F_FLOCK|F_WAIT); 1180 out: 1181 FRELE(fp, p); 1182 return (error); 1183 } 1184 1185 /* 1186 * File Descriptor pseudo-device driver (/dev/fd/). 1187 * 1188 * Opening minor device N dup()s the file (if any) connected to file 1189 * descriptor N belonging to the calling process. Note that this driver 1190 * consists of only the ``open()'' routine, because all subsequent 1191 * references to this file will be direct to the other driver. 1192 */ 1193 /* ARGSUSED */ 1194 int 1195 filedescopen(dev_t dev, int mode, int type, struct proc *p) 1196 { 1197 1198 /* 1199 * XXX Kludge: set curproc->p_dupfd to contain the value of the 1200 * the file descriptor being sought for duplication. The error 1201 * return ensures that the vnode for this device will be released 1202 * by vn_open. Open will detect this special error and take the 1203 * actions in dupfdopen below. Other callers of vn_open or VOP_OPEN 1204 * will simply report the error. 1205 */ 1206 p->p_dupfd = minor(dev); 1207 return (ENODEV); 1208 } 1209 1210 /* 1211 * Duplicate the specified descriptor to a free descriptor. 1212 */ 1213 int 1214 dupfdopen(struct filedesc *fdp, int indx, int dfd, int mode) 1215 { 1216 struct file *wfp; 1217 1218 fdpassertlocked(fdp); 1219 1220 /* 1221 * Assume that the filename was user-specified; applications do 1222 * not tend to open /dev/fd/# when they can just call dup() 1223 */ 1224 if ((curproc->p_p->ps_flags & (PS_SUGIDEXEC | PS_SUGID))) { 1225 if (curproc->p_descfd == 255) 1226 return (EPERM); 1227 if (curproc->p_descfd != curproc->p_dupfd) 1228 return (EPERM); 1229 } 1230 1231 /* 1232 * If the to-be-dup'd fd number is greater than the allowed number 1233 * of file descriptors, or the fd to be dup'd has already been 1234 * closed, reject. Note, there is no need to check for new == old 1235 * because fd_getfile will return NULL if the file at indx is 1236 * newly created by falloc (FIF_LARVAL). 1237 */ 1238 if ((wfp = fd_getfile(fdp, dfd)) == NULL) 1239 return (EBADF); 1240 1241 /* 1242 * Check that the mode the file is being opened for is a 1243 * subset of the mode of the existing descriptor. 1244 */ 1245 if (((mode & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag) 1246 return (EACCES); 1247 if (wfp->f_count == LONG_MAX-2) 1248 return (EDEADLK); 1249 1250 fdp->fd_ofiles[indx] = wfp; 1251 fdp->fd_ofileflags[indx] = (fdp->fd_ofileflags[indx] & UF_EXCLOSE) | 1252 (fdp->fd_ofileflags[dfd] & ~UF_EXCLOSE); 1253 wfp->f_count++; 1254 fd_used(fdp, indx); 1255 return (0); 1256 } 1257 1258 /* 1259 * Close any files on exec? 1260 */ 1261 void 1262 fdcloseexec(struct proc *p) 1263 { 1264 struct filedesc *fdp = p->p_fd; 1265 int fd; 1266 1267 fdplock(fdp); 1268 for (fd = 0; fd <= fdp->fd_lastfile; fd++) 1269 if (fdp->fd_ofileflags[fd] & UF_EXCLOSE) 1270 (void) fdrelease(p, fd); 1271 fdpunlock(fdp); 1272 } 1273 1274 int 1275 sys_closefrom(struct proc *p, void *v, register_t *retval) 1276 { 1277 struct sys_closefrom_args *uap = v; 1278 struct filedesc *fdp = p->p_fd; 1279 u_int startfd, i; 1280 1281 startfd = SCARG(uap, fd); 1282 fdplock(fdp); 1283 1284 if (startfd > fdp->fd_lastfile) { 1285 fdpunlock(fdp); 1286 return (EBADF); 1287 } 1288 1289 for (i = startfd; i <= fdp->fd_lastfile; i++) 1290 fdrelease(p, i); 1291 1292 fdpunlock(fdp); 1293 return (0); 1294 } 1295 1296 int 1297 sys_getdtablecount(struct proc *p, void *v, register_t *retval) 1298 { 1299 *retval = p->p_fd->fd_openfd; 1300 return (0); 1301 } 1302