1 /* $OpenBSD: kern_descrip.c,v 1.120 2015/05/17 01:22:01 deraadt Exp $ */ 2 /* $NetBSD: kern_descrip.c,v 1.42 1996/03/30 22:24:38 christos Exp $ */ 3 4 /* 5 * Copyright (c) 1982, 1986, 1989, 1991, 1993 6 * The Regents of the University of California. All rights reserved. 7 * (c) UNIX System Laboratories, Inc. 8 * All or some portions of this file are derived from material licensed 9 * to the University of California by American Telephone and Telegraph 10 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 11 * the permission of UNIX System Laboratories, Inc. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * @(#)kern_descrip.c 8.6 (Berkeley) 4/19/94 38 */ 39 40 #include <sys/param.h> 41 #include <sys/systm.h> 42 #include <sys/filedesc.h> 43 #include <sys/kernel.h> 44 #include <sys/vnode.h> 45 #include <sys/proc.h> 46 #include <sys/file.h> 47 #include <sys/socket.h> 48 #include <sys/socketvar.h> 49 #include <sys/stat.h> 50 #include <sys/ioctl.h> 51 #include <sys/fcntl.h> 52 #include <sys/lock.h> 53 #include <sys/malloc.h> 54 #include <sys/syslog.h> 55 #include <sys/ucred.h> 56 #include <sys/unistd.h> 57 #include <sys/resourcevar.h> 58 #include <sys/mount.h> 59 #include <sys/syscallargs.h> 60 #include <sys/event.h> 61 #include <sys/pool.h> 62 #include <sys/ktrace.h> 63 64 #include <sys/pipe.h> 65 66 /* 67 * Descriptor management. 68 */ 69 struct filelist filehead; /* head of list of open files */ 70 int nfiles; /* actual number of open files */ 71 72 static __inline void fd_used(struct filedesc *, int); 73 static __inline void fd_unused(struct filedesc *, int); 74 static __inline int find_next_zero(u_int *, int, u_int); 75 int finishdup(struct proc *, struct file *, int, int, register_t *, int); 76 int find_last_set(struct filedesc *, int); 77 int dodup3(struct proc *, int, int, int, register_t *); 78 79 struct pool file_pool; 80 struct pool fdesc_pool; 81 82 void 83 filedesc_init(void) 84 { 85 pool_init(&file_pool, sizeof(struct file), 0, 0, PR_WAITOK, 86 "filepl", NULL); 87 pool_init(&fdesc_pool, sizeof(struct filedesc0), 0, 0, PR_WAITOK, 88 "fdescpl", NULL); 89 LIST_INIT(&filehead); 90 } 91 92 static __inline int 93 find_next_zero (u_int *bitmap, int want, u_int bits) 94 { 95 int i, off, maxoff; 96 u_int sub; 97 98 if (want > bits) 99 return -1; 100 101 off = want >> NDENTRYSHIFT; 102 i = want & NDENTRYMASK; 103 if (i) { 104 sub = bitmap[off] | ((u_int)~0 >> (NDENTRIES - i)); 105 if (sub != ~0) 106 goto found; 107 off++; 108 } 109 110 maxoff = NDLOSLOTS(bits); 111 while (off < maxoff) { 112 if ((sub = bitmap[off]) != ~0) 113 goto found; 114 off++; 115 } 116 117 return -1; 118 119 found: 120 return (off << NDENTRYSHIFT) + ffs(~sub) - 1; 121 } 122 123 int 124 find_last_set(struct filedesc *fd, int last) 125 { 126 int off, i; 127 struct file **ofiles = fd->fd_ofiles; 128 u_int *bitmap = fd->fd_lomap; 129 130 off = (last - 1) >> NDENTRYSHIFT; 131 132 while (off >= 0 && !bitmap[off]) 133 off--; 134 if (off < 0) 135 return 0; 136 137 i = ((off + 1) << NDENTRYSHIFT) - 1; 138 if (i >= last) 139 i = last - 1; 140 141 while (i > 0 && ofiles[i] == NULL) 142 i--; 143 return i; 144 } 145 146 static __inline void 147 fd_used(struct filedesc *fdp, int fd) 148 { 149 u_int off = fd >> NDENTRYSHIFT; 150 151 fdp->fd_lomap[off] |= 1 << (fd & NDENTRYMASK); 152 if (fdp->fd_lomap[off] == ~0) 153 fdp->fd_himap[off >> NDENTRYSHIFT] |= 1 << (off & NDENTRYMASK); 154 155 if (fd > fdp->fd_lastfile) 156 fdp->fd_lastfile = fd; 157 fdp->fd_openfd++; 158 } 159 160 static __inline void 161 fd_unused(struct filedesc *fdp, int fd) 162 { 163 u_int off = fd >> NDENTRYSHIFT; 164 165 if (fd < fdp->fd_freefile) 166 fdp->fd_freefile = fd; 167 168 if (fdp->fd_lomap[off] == ~0) 169 fdp->fd_himap[off >> NDENTRYSHIFT] &= ~(1 << (off & NDENTRYMASK)); 170 fdp->fd_lomap[off] &= ~(1 << (fd & NDENTRYMASK)); 171 172 #ifdef DIAGNOSTIC 173 if (fd > fdp->fd_lastfile) 174 panic("fd_unused: fd_lastfile inconsistent"); 175 #endif 176 if (fd == fdp->fd_lastfile) 177 fdp->fd_lastfile = find_last_set(fdp, fd); 178 fdp->fd_openfd--; 179 } 180 181 struct file * 182 fd_getfile(struct filedesc *fdp, int fd) 183 { 184 struct file *fp; 185 186 if ((u_int)fd >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[fd]) == NULL) 187 return (NULL); 188 189 if (!FILE_IS_USABLE(fp)) 190 return (NULL); 191 192 return (fp); 193 } 194 195 struct file * 196 fd_getfile_mode(struct filedesc *fdp, int fd, int mode) 197 { 198 struct file *fp; 199 200 KASSERT(mode != 0); 201 202 fp = fd_getfile(fdp, fd); 203 204 if (fp == NULL || (fp->f_flag & mode) == 0) 205 return (NULL); 206 207 return (fp); 208 } 209 210 /* 211 * System calls on descriptors. 212 */ 213 214 /* 215 * Duplicate a file descriptor. 216 */ 217 /* ARGSUSED */ 218 int 219 sys_dup(struct proc *p, void *v, register_t *retval) 220 { 221 struct sys_dup_args /* { 222 syscallarg(int) fd; 223 } */ *uap = v; 224 struct filedesc *fdp = p->p_fd; 225 int old = SCARG(uap, fd); 226 struct file *fp; 227 int new; 228 int error; 229 230 restart: 231 if ((fp = fd_getfile(fdp, old)) == NULL) 232 return (EBADF); 233 FREF(fp); 234 fdplock(fdp); 235 if ((error = fdalloc(p, 0, &new)) != 0) { 236 FRELE(fp, p); 237 if (error == ENOSPC) { 238 fdexpand(p); 239 fdpunlock(fdp); 240 goto restart; 241 } 242 goto out; 243 } 244 error = finishdup(p, fp, old, new, retval, 0); 245 246 out: 247 fdpunlock(fdp); 248 return (error); 249 } 250 251 /* 252 * Duplicate a file descriptor to a particular value. 253 */ 254 int 255 sys_dup2(struct proc *p, void *v, register_t *retval) 256 { 257 struct sys_dup2_args /* { 258 syscallarg(int) from; 259 syscallarg(int) to; 260 } */ *uap = v; 261 262 return (dodup3(p, SCARG(uap, from), SCARG(uap, to), 0, retval)); 263 } 264 265 int 266 sys_dup3(struct proc *p, void *v, register_t *retval) 267 { 268 struct sys_dup3_args /* { 269 syscallarg(int) from; 270 syscallarg(int) to; 271 syscallarg(int) flags; 272 } */ *uap = v; 273 274 if (SCARG(uap, from) == SCARG(uap, to)) 275 return (EINVAL); 276 if (SCARG(uap, flags) & ~O_CLOEXEC) 277 return (EINVAL); 278 return (dodup3(p, SCARG(uap, from), SCARG(uap, to), 279 SCARG(uap, flags), retval)); 280 } 281 282 int 283 dodup3(struct proc *p, int old, int new, int flags, register_t *retval) 284 { 285 struct filedesc *fdp = p->p_fd; 286 struct file *fp; 287 int i, error; 288 289 restart: 290 if ((fp = fd_getfile(fdp, old)) == NULL) 291 return (EBADF); 292 if ((u_int)new >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur || 293 (u_int)new >= maxfiles) 294 return (EBADF); 295 if (old == new) { 296 /* 297 * NOTE! This doesn't clear the close-on-exec flag. This might 298 * or might not be the intended behavior from the start, but 299 * this is what everyone else does. 300 */ 301 *retval = new; 302 return (0); 303 } 304 FREF(fp); 305 fdplock(fdp); 306 if (new >= fdp->fd_nfiles) { 307 if ((error = fdalloc(p, new, &i)) != 0) { 308 FRELE(fp, p); 309 if (error == ENOSPC) { 310 fdexpand(p); 311 fdpunlock(fdp); 312 goto restart; 313 } 314 goto out; 315 } 316 if (new != i) 317 panic("dup2: fdalloc"); 318 fd_unused(fdp, new); 319 } 320 /* finishdup() does FRELE */ 321 error = finishdup(p, fp, old, new, retval, 1); 322 if (!error && flags & O_CLOEXEC) 323 fdp->fd_ofileflags[new] |= UF_EXCLOSE; 324 325 out: 326 fdpunlock(fdp); 327 return (error); 328 } 329 330 /* 331 * The file control system call. 332 */ 333 /* ARGSUSED */ 334 int 335 sys_fcntl(struct proc *p, void *v, register_t *retval) 336 { 337 struct sys_fcntl_args /* { 338 syscallarg(int) fd; 339 syscallarg(int) cmd; 340 syscallarg(void *) arg; 341 } */ *uap = v; 342 int fd = SCARG(uap, fd); 343 struct filedesc *fdp = p->p_fd; 344 struct file *fp; 345 struct vnode *vp; 346 int i, tmp, newmin, flg = F_POSIX; 347 struct flock fl; 348 int error = 0; 349 350 restart: 351 if ((fp = fd_getfile(fdp, fd)) == NULL) 352 return (EBADF); 353 FREF(fp); 354 switch (SCARG(uap, cmd)) { 355 356 case F_DUPFD: 357 case F_DUPFD_CLOEXEC: 358 newmin = (long)SCARG(uap, arg); 359 if ((u_int)newmin >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur || 360 (u_int)newmin >= maxfiles) { 361 error = EINVAL; 362 break; 363 } 364 fdplock(fdp); 365 if ((error = fdalloc(p, newmin, &i)) != 0) { 366 FRELE(fp, p); 367 if (error == ENOSPC) { 368 fdexpand(p); 369 fdpunlock(fdp); 370 goto restart; 371 } 372 } else { 373 /* finishdup will FRELE for us. */ 374 error = finishdup(p, fp, fd, i, retval, 0); 375 376 if (!error && SCARG(uap, cmd) == F_DUPFD_CLOEXEC) 377 fdp->fd_ofileflags[i] |= UF_EXCLOSE; 378 } 379 380 fdpunlock(fdp); 381 return (error); 382 383 case F_GETFD: 384 *retval = fdp->fd_ofileflags[fd] & UF_EXCLOSE ? 1 : 0; 385 break; 386 387 case F_SETFD: 388 fdplock(fdp); 389 if ((long)SCARG(uap, arg) & 1) 390 fdp->fd_ofileflags[fd] |= UF_EXCLOSE; 391 else 392 fdp->fd_ofileflags[fd] &= ~UF_EXCLOSE; 393 fdpunlock(fdp); 394 break; 395 396 case F_GETFL: 397 *retval = OFLAGS(fp->f_flag); 398 break; 399 400 case F_ISATTY: 401 vp = (struct vnode *)fp->f_data; 402 if (fp->f_type == DTYPE_VNODE && (vp->v_flag & VISTTY)) 403 *retval = 1; 404 else { 405 *retval = 0; 406 error = ENOTTY; 407 } 408 break; 409 410 case F_SETFL: 411 fp->f_flag &= ~FCNTLFLAGS; 412 fp->f_flag |= FFLAGS((long)SCARG(uap, arg)) & FCNTLFLAGS; 413 tmp = fp->f_flag & FNONBLOCK; 414 error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p); 415 if (error) 416 break; 417 tmp = fp->f_flag & FASYNC; 418 error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, (caddr_t)&tmp, p); 419 if (!error) 420 break; 421 fp->f_flag &= ~FNONBLOCK; 422 tmp = 0; 423 (void) (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p); 424 break; 425 426 case F_GETOWN: 427 if (fp->f_type == DTYPE_SOCKET) { 428 *retval = ((struct socket *)fp->f_data)->so_pgid; 429 break; 430 } 431 error = (*fp->f_ops->fo_ioctl) 432 (fp, TIOCGPGRP, (caddr_t)&tmp, p); 433 *retval = -tmp; 434 break; 435 436 case F_SETOWN: 437 if (fp->f_type == DTYPE_SOCKET) { 438 struct socket *so = (struct socket *)fp->f_data; 439 440 so->so_pgid = (long)SCARG(uap, arg); 441 so->so_siguid = p->p_ucred->cr_ruid; 442 so->so_sigeuid = p->p_ucred->cr_uid; 443 break; 444 } 445 if ((long)SCARG(uap, arg) <= 0) { 446 SCARG(uap, arg) = (void *)(-(long)SCARG(uap, arg)); 447 } else { 448 struct process *pr1 = prfind((long)SCARG(uap, arg)); 449 if (pr1 == 0) { 450 error = ESRCH; 451 break; 452 } 453 SCARG(uap, arg) = (void *)(long)pr1->ps_pgrp->pg_id; 454 } 455 error = ((*fp->f_ops->fo_ioctl) 456 (fp, TIOCSPGRP, (caddr_t)&SCARG(uap, arg), p)); 457 break; 458 459 case F_SETLKW: 460 flg |= F_WAIT; 461 /* FALLTHROUGH */ 462 463 case F_SETLK: 464 if (fp->f_type != DTYPE_VNODE) { 465 error = EBADF; 466 break; 467 } 468 vp = (struct vnode *)fp->f_data; 469 /* Copy in the lock structure */ 470 error = copyin((caddr_t)SCARG(uap, arg), (caddr_t)&fl, 471 sizeof (fl)); 472 if (error) 473 break; 474 if (fl.l_whence == SEEK_CUR) { 475 if (fl.l_start == 0 && fl.l_len < 0) { 476 /* lockf(3) compliance hack */ 477 fl.l_len = -fl.l_len; 478 fl.l_start = fp->f_offset - fl.l_len; 479 } else 480 fl.l_start += fp->f_offset; 481 } 482 switch (fl.l_type) { 483 484 case F_RDLCK: 485 if ((fp->f_flag & FREAD) == 0) { 486 error = EBADF; 487 goto out; 488 } 489 atomic_setbits_int(&fdp->fd_flags, FD_ADVLOCK); 490 error = VOP_ADVLOCK(vp, fdp, F_SETLK, &fl, flg); 491 break; 492 493 case F_WRLCK: 494 if ((fp->f_flag & FWRITE) == 0) { 495 error = EBADF; 496 goto out; 497 } 498 atomic_setbits_int(&fdp->fd_flags, FD_ADVLOCK); 499 error = VOP_ADVLOCK(vp, fdp, F_SETLK, &fl, flg); 500 break; 501 502 case F_UNLCK: 503 error = VOP_ADVLOCK(vp, fdp, F_UNLCK, &fl, F_POSIX); 504 goto out; 505 506 default: 507 error = EINVAL; 508 goto out; 509 } 510 511 if (fp != fd_getfile(fdp, fd)) { 512 /* 513 * We have lost the race with close() or dup2(); 514 * unlock, pretend that we've won the race and that 515 * lock had been removed by close() 516 */ 517 fl.l_whence = SEEK_SET; 518 fl.l_start = 0; 519 fl.l_len = 0; 520 VOP_ADVLOCK(vp, fdp, F_UNLCK, &fl, F_POSIX); 521 fl.l_type = F_UNLCK; 522 } 523 goto out; 524 525 526 case F_GETLK: 527 if (fp->f_type != DTYPE_VNODE) { 528 error = EBADF; 529 break; 530 } 531 vp = (struct vnode *)fp->f_data; 532 /* Copy in the lock structure */ 533 error = copyin((caddr_t)SCARG(uap, arg), (caddr_t)&fl, 534 sizeof (fl)); 535 if (error) 536 break; 537 if (fl.l_whence == SEEK_CUR) { 538 if (fl.l_start == 0 && fl.l_len < 0) { 539 /* lockf(3) compliance hack */ 540 fl.l_len = -fl.l_len; 541 fl.l_start = fp->f_offset - fl.l_len; 542 } else 543 fl.l_start += fp->f_offset; 544 } 545 if (fl.l_type != F_RDLCK && 546 fl.l_type != F_WRLCK && 547 fl.l_type != F_UNLCK && 548 fl.l_type != 0) { 549 error = EINVAL; 550 break; 551 } 552 error = VOP_ADVLOCK(vp, fdp, F_GETLK, &fl, F_POSIX); 553 if (error) 554 break; 555 error = (copyout((caddr_t)&fl, (caddr_t)SCARG(uap, arg), 556 sizeof (fl))); 557 break; 558 559 default: 560 error = EINVAL; 561 break; 562 } 563 out: 564 FRELE(fp, p); 565 return (error); 566 } 567 568 /* 569 * Common code for dup, dup2, and fcntl(F_DUPFD). 570 */ 571 int 572 finishdup(struct proc *p, struct file *fp, int old, int new, 573 register_t *retval, int dup2) 574 { 575 struct file *oldfp; 576 struct filedesc *fdp = p->p_fd; 577 578 fdpassertlocked(fdp); 579 if (fp->f_count == LONG_MAX-2) { 580 FRELE(fp, p); 581 return (EDEADLK); 582 } 583 584 /* 585 * Don't fd_getfile here. We want to closef LARVAL files and 586 * closef can deal with that. 587 */ 588 oldfp = fdp->fd_ofiles[new]; 589 if (oldfp != NULL) 590 FREF(oldfp); 591 592 fdp->fd_ofiles[new] = fp; 593 fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] & ~UF_EXCLOSE; 594 fp->f_count++; 595 FRELE(fp, p); 596 if (dup2 && oldfp == NULL) 597 fd_used(fdp, new); 598 *retval = new; 599 600 if (oldfp != NULL) { 601 if (new < fdp->fd_knlistsize) 602 knote_fdclose(p, new); 603 closef(oldfp, p); 604 } 605 606 return (0); 607 } 608 609 void 610 fdremove(struct filedesc *fdp, int fd) 611 { 612 fdpassertlocked(fdp); 613 fdp->fd_ofiles[fd] = NULL; 614 fd_unused(fdp, fd); 615 } 616 617 int 618 fdrelease(struct proc *p, int fd) 619 { 620 struct filedesc *fdp = p->p_fd; 621 struct file **fpp, *fp; 622 623 fdpassertlocked(fdp); 624 625 /* 626 * Don't fd_getfile here. We want to closef LARVAL files and closef 627 * can deal with that. 628 */ 629 fpp = &fdp->fd_ofiles[fd]; 630 fp = *fpp; 631 if (fp == NULL) 632 return (EBADF); 633 FREF(fp); 634 *fpp = NULL; 635 fd_unused(fdp, fd); 636 if (fd < fdp->fd_knlistsize) 637 knote_fdclose(p, fd); 638 return (closef(fp, p)); 639 } 640 641 /* 642 * Close a file descriptor. 643 */ 644 /* ARGSUSED */ 645 int 646 sys_close(struct proc *p, void *v, register_t *retval) 647 { 648 struct sys_close_args /* { 649 syscallarg(int) fd; 650 } */ *uap = v; 651 int fd = SCARG(uap, fd), error; 652 struct filedesc *fdp = p->p_fd; 653 654 if (fd_getfile(fdp, fd) == NULL) 655 return (EBADF); 656 fdplock(fdp); 657 error = fdrelease(p, fd); 658 fdpunlock(fdp); 659 660 return (error); 661 } 662 663 /* 664 * Return status information about a file descriptor. 665 */ 666 int 667 sys_fstat(struct proc *p, void *v, register_t *retval) 668 { 669 struct sys_fstat_args /* { 670 syscallarg(int) fd; 671 syscallarg(struct stat *) sb; 672 } */ *uap = v; 673 int fd = SCARG(uap, fd); 674 struct filedesc *fdp = p->p_fd; 675 struct file *fp; 676 struct stat ub; 677 int error; 678 679 if ((fp = fd_getfile(fdp, fd)) == NULL) 680 return (EBADF); 681 FREF(fp); 682 error = (*fp->f_ops->fo_stat)(fp, &ub, p); 683 FRELE(fp, p); 684 if (error == 0) { 685 /* 686 * Don't let non-root see generation numbers 687 * (for NFS security) 688 */ 689 if (suser(p, 0)) 690 ub.st_gen = 0; 691 error = copyout((caddr_t)&ub, (caddr_t)SCARG(uap, sb), 692 sizeof (ub)); 693 } 694 #ifdef KTRACE 695 if (error == 0 && KTRPOINT(p, KTR_STRUCT)) 696 ktrstat(p, &ub); 697 #endif 698 return (error); 699 } 700 701 /* 702 * Return pathconf information about a file descriptor. 703 */ 704 /* ARGSUSED */ 705 int 706 sys_fpathconf(struct proc *p, void *v, register_t *retval) 707 { 708 struct sys_fpathconf_args /* { 709 syscallarg(int) fd; 710 syscallarg(int) name; 711 } */ *uap = v; 712 int fd = SCARG(uap, fd); 713 struct filedesc *fdp = p->p_fd; 714 struct file *fp; 715 struct vnode *vp; 716 int error; 717 718 if ((fp = fd_getfile(fdp, fd)) == NULL) 719 return (EBADF); 720 FREF(fp); 721 switch (fp->f_type) { 722 case DTYPE_PIPE: 723 case DTYPE_SOCKET: 724 if (SCARG(uap, name) != _PC_PIPE_BUF) { 725 error = EINVAL; 726 break; 727 } 728 *retval = PIPE_BUF; 729 error = 0; 730 break; 731 732 case DTYPE_VNODE: 733 vp = (struct vnode *)fp->f_data; 734 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); 735 error = VOP_PATHCONF(vp, SCARG(uap, name), retval); 736 VOP_UNLOCK(vp, 0, p); 737 break; 738 739 default: 740 error = EOPNOTSUPP; 741 break; 742 } 743 FRELE(fp, p); 744 return (error); 745 } 746 747 /* 748 * Allocate a file descriptor for the process. 749 */ 750 int 751 fdalloc(struct proc *p, int want, int *result) 752 { 753 struct filedesc *fdp = p->p_fd; 754 int lim, last, i; 755 u_int new, off; 756 757 /* 758 * Search for a free descriptor starting at the higher 759 * of want or fd_freefile. If that fails, consider 760 * expanding the ofile array. 761 */ 762 restart: 763 lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles); 764 last = min(fdp->fd_nfiles, lim); 765 if ((i = want) < fdp->fd_freefile) 766 i = fdp->fd_freefile; 767 off = i >> NDENTRYSHIFT; 768 new = find_next_zero(fdp->fd_himap, off, 769 (last + NDENTRIES - 1) >> NDENTRYSHIFT); 770 if (new != -1) { 771 i = find_next_zero(&fdp->fd_lomap[new], 772 new > off ? 0 : i & NDENTRYMASK, 773 NDENTRIES); 774 if (i == -1) { 775 /* 776 * Free file descriptor in this block was 777 * below want, try again with higher want. 778 */ 779 want = (new + 1) << NDENTRYSHIFT; 780 goto restart; 781 } 782 i += (new << NDENTRYSHIFT); 783 if (i < last) { 784 fd_used(fdp, i); 785 if (want <= fdp->fd_freefile) 786 fdp->fd_freefile = i; 787 *result = i; 788 fdp->fd_ofileflags[i] = 0; 789 return (0); 790 } 791 } 792 if (fdp->fd_nfiles >= lim) 793 return (EMFILE); 794 795 return (ENOSPC); 796 } 797 798 void 799 fdexpand(struct proc *p) 800 { 801 struct filedesc *fdp = p->p_fd; 802 int nfiles; 803 size_t copylen; 804 struct file **newofile; 805 char *newofileflags; 806 u_int *newhimap, *newlomap; 807 808 fdpassertlocked(fdp); 809 810 /* 811 * No space in current array. 812 */ 813 if (fdp->fd_nfiles < NDEXTENT) 814 nfiles = NDEXTENT; 815 else 816 nfiles = 2 * fdp->fd_nfiles; 817 818 newofile = mallocarray(nfiles, OFILESIZE, M_FILEDESC, M_WAITOK); 819 newofileflags = (char *) &newofile[nfiles]; 820 821 /* 822 * Copy the existing ofile and ofileflags arrays 823 * and zero the new portion of each array. 824 */ 825 copylen = sizeof(struct file *) * fdp->fd_nfiles; 826 memcpy(newofile, fdp->fd_ofiles, copylen); 827 memset((char *)newofile + copylen, 0, 828 nfiles * sizeof(struct file *) - copylen); 829 copylen = sizeof(char) * fdp->fd_nfiles; 830 memcpy(newofileflags, fdp->fd_ofileflags, copylen); 831 memset(newofileflags + copylen, 0, nfiles * sizeof(char) - copylen); 832 833 if (fdp->fd_nfiles > NDFILE) 834 free(fdp->fd_ofiles, M_FILEDESC, 0); 835 836 if (NDHISLOTS(nfiles) > NDHISLOTS(fdp->fd_nfiles)) { 837 newhimap = mallocarray(NDHISLOTS(nfiles), sizeof(u_int), 838 M_FILEDESC, M_WAITOK); 839 newlomap = mallocarray(NDLOSLOTS(nfiles), sizeof(u_int), 840 M_FILEDESC, M_WAITOK); 841 842 copylen = NDHISLOTS(fdp->fd_nfiles) * sizeof(u_int); 843 memcpy(newhimap, fdp->fd_himap, copylen); 844 memset((char *)newhimap + copylen, 0, 845 NDHISLOTS(nfiles) * sizeof(u_int) - copylen); 846 847 copylen = NDLOSLOTS(fdp->fd_nfiles) * sizeof(u_int); 848 memcpy(newlomap, fdp->fd_lomap, copylen); 849 memset((char *)newlomap + copylen, 0, 850 NDLOSLOTS(nfiles) * sizeof(u_int) - copylen); 851 852 if (NDHISLOTS(fdp->fd_nfiles) > NDHISLOTS(NDFILE)) { 853 free(fdp->fd_himap, M_FILEDESC, 0); 854 free(fdp->fd_lomap, M_FILEDESC, 0); 855 } 856 fdp->fd_himap = newhimap; 857 fdp->fd_lomap = newlomap; 858 } 859 fdp->fd_ofiles = newofile; 860 fdp->fd_ofileflags = newofileflags; 861 fdp->fd_nfiles = nfiles; 862 } 863 864 /* 865 * Create a new open file structure and allocate 866 * a file descriptor for the process that refers to it. 867 */ 868 int 869 falloc(struct proc *p, struct file **resultfp, int *resultfd) 870 { 871 struct file *fp, *fq; 872 int error, i; 873 874 fdpassertlocked(p->p_fd); 875 restart: 876 if ((error = fdalloc(p, 0, &i)) != 0) { 877 if (error == ENOSPC) { 878 fdexpand(p); 879 goto restart; 880 } 881 return (error); 882 } 883 if (nfiles >= maxfiles) { 884 fd_unused(p->p_fd, i); 885 tablefull("file"); 886 return (ENFILE); 887 } 888 /* 889 * Allocate a new file descriptor. 890 * If the process has file descriptor zero open, add to the list 891 * of open files at that point, otherwise put it at the front of 892 * the list of open files. 893 */ 894 nfiles++; 895 fp = pool_get(&file_pool, PR_WAITOK|PR_ZERO); 896 fp->f_iflags = FIF_LARVAL; 897 if ((fq = p->p_fd->fd_ofiles[0]) != NULL) { 898 LIST_INSERT_AFTER(fq, fp, f_list); 899 } else { 900 LIST_INSERT_HEAD(&filehead, fp, f_list); 901 } 902 p->p_fd->fd_ofiles[i] = fp; 903 fp->f_count = 1; 904 fp->f_cred = p->p_ucred; 905 crhold(fp->f_cred); 906 if (resultfp) 907 *resultfp = fp; 908 if (resultfd) 909 *resultfd = i; 910 FREF(fp); 911 return (0); 912 } 913 914 /* 915 * Build a new filedesc structure. 916 */ 917 struct filedesc * 918 fdinit(void) 919 { 920 struct filedesc0 *newfdp; 921 922 newfdp = pool_get(&fdesc_pool, PR_WAITOK|PR_ZERO); 923 rw_init(&newfdp->fd_fd.fd_lock, "fdlock"); 924 925 /* Create the file descriptor table. */ 926 newfdp->fd_fd.fd_refcnt = 1; 927 newfdp->fd_fd.fd_cmask = S_IWGRP|S_IWOTH; 928 newfdp->fd_fd.fd_ofiles = newfdp->fd_dfiles; 929 newfdp->fd_fd.fd_ofileflags = newfdp->fd_dfileflags; 930 newfdp->fd_fd.fd_nfiles = NDFILE; 931 newfdp->fd_fd.fd_himap = newfdp->fd_dhimap; 932 newfdp->fd_fd.fd_lomap = newfdp->fd_dlomap; 933 newfdp->fd_fd.fd_knlistsize = -1; 934 935 newfdp->fd_fd.fd_freefile = 0; 936 newfdp->fd_fd.fd_lastfile = 0; 937 938 return (&newfdp->fd_fd); 939 } 940 941 /* 942 * Share a filedesc structure. 943 */ 944 struct filedesc * 945 fdshare(struct process *pr) 946 { 947 pr->ps_fd->fd_refcnt++; 948 return (pr->ps_fd); 949 } 950 951 /* 952 * Copy a filedesc structure. 953 */ 954 struct filedesc * 955 fdcopy(struct process *pr) 956 { 957 struct filedesc *newfdp, *fdp = pr->ps_fd; 958 struct file **fpp; 959 int i; 960 961 fdplock(fdp); 962 newfdp = pool_get(&fdesc_pool, PR_WAITOK); 963 memcpy(newfdp, fdp, sizeof(struct filedesc)); 964 if (newfdp->fd_cdir) 965 vref(newfdp->fd_cdir); 966 if (newfdp->fd_rdir) 967 vref(newfdp->fd_rdir); 968 newfdp->fd_refcnt = 1; 969 rw_init(&newfdp->fd_lock, "fdlock"); 970 971 /* 972 * If the number of open files fits in the internal arrays 973 * of the open file structure, use them, otherwise allocate 974 * additional memory for the number of descriptors currently 975 * in use. 976 */ 977 if (newfdp->fd_lastfile < NDFILE) { 978 newfdp->fd_ofiles = ((struct filedesc0 *) newfdp)->fd_dfiles; 979 newfdp->fd_ofileflags = 980 ((struct filedesc0 *) newfdp)->fd_dfileflags; 981 i = NDFILE; 982 } else { 983 /* 984 * Compute the smallest multiple of NDEXTENT needed 985 * for the file descriptors currently in use, 986 * allowing the table to shrink. 987 */ 988 i = newfdp->fd_nfiles; 989 while (i >= 2 * NDEXTENT && i > newfdp->fd_lastfile * 2) 990 i /= 2; 991 newfdp->fd_ofiles = mallocarray(i, OFILESIZE, M_FILEDESC, M_WAITOK); 992 newfdp->fd_ofileflags = (char *) &newfdp->fd_ofiles[i]; 993 } 994 if (NDHISLOTS(i) <= NDHISLOTS(NDFILE)) { 995 newfdp->fd_himap = 996 ((struct filedesc0 *) newfdp)->fd_dhimap; 997 newfdp->fd_lomap = 998 ((struct filedesc0 *) newfdp)->fd_dlomap; 999 } else { 1000 newfdp->fd_himap = mallocarray(NDHISLOTS(i), sizeof(u_int), 1001 M_FILEDESC, M_WAITOK); 1002 newfdp->fd_lomap = mallocarray(NDLOSLOTS(i), sizeof(u_int), 1003 M_FILEDESC, M_WAITOK); 1004 } 1005 newfdp->fd_nfiles = i; 1006 memcpy(newfdp->fd_ofiles, fdp->fd_ofiles, i * sizeof(struct file *)); 1007 memcpy(newfdp->fd_ofileflags, fdp->fd_ofileflags, i * sizeof(char)); 1008 memcpy(newfdp->fd_himap, fdp->fd_himap, NDHISLOTS(i) * sizeof(u_int)); 1009 memcpy(newfdp->fd_lomap, fdp->fd_lomap, NDLOSLOTS(i) * sizeof(u_int)); 1010 fdpunlock(fdp); 1011 1012 fdplock(newfdp); 1013 fpp = newfdp->fd_ofiles; 1014 for (i = 0; i <= newfdp->fd_lastfile; i++, fpp++) 1015 if (*fpp != NULL) { 1016 /* 1017 * XXX Gruesome hack. If count gets too high, fail 1018 * to copy an fd, since fdcopy()'s callers do not 1019 * permit it to indicate failure yet. 1020 * Meanwhile, kqueue and systrace files have to be 1021 * tied to the process that opened them to enforce 1022 * their internal consistency, so close them here. 1023 */ 1024 if ((*fpp)->f_count == LONG_MAX-2 || 1025 (*fpp)->f_type == DTYPE_KQUEUE || 1026 (*fpp)->f_type == DTYPE_SYSTRACE) 1027 fdremove(newfdp, i); 1028 else 1029 (*fpp)->f_count++; 1030 } 1031 1032 /* finish cleaning up kq bits */ 1033 if (newfdp->fd_knlistsize != -1) { 1034 newfdp->fd_knlist = NULL; 1035 newfdp->fd_knlistsize = -1; 1036 newfdp->fd_knhash = NULL; 1037 newfdp->fd_knhashmask = 0; 1038 } 1039 1040 fdpunlock(newfdp); 1041 return (newfdp); 1042 } 1043 1044 /* 1045 * Release a filedesc structure. 1046 */ 1047 void 1048 fdfree(struct proc *p) 1049 { 1050 struct filedesc *fdp = p->p_fd; 1051 struct file **fpp, *fp; 1052 int i; 1053 1054 if (--fdp->fd_refcnt > 0) 1055 return; 1056 fpp = fdp->fd_ofiles; 1057 for (i = fdp->fd_lastfile; i >= 0; i--, fpp++) { 1058 fp = *fpp; 1059 if (fp != NULL) { 1060 FREF(fp); 1061 *fpp = NULL; 1062 (void) closef(fp, p); 1063 } 1064 } 1065 p->p_fd = NULL; 1066 if (fdp->fd_nfiles > NDFILE) 1067 free(fdp->fd_ofiles, M_FILEDESC, 0); 1068 if (NDHISLOTS(fdp->fd_nfiles) > NDHISLOTS(NDFILE)) { 1069 free(fdp->fd_himap, M_FILEDESC, 0); 1070 free(fdp->fd_lomap, M_FILEDESC, 0); 1071 } 1072 if (fdp->fd_cdir) 1073 vrele(fdp->fd_cdir); 1074 if (fdp->fd_rdir) 1075 vrele(fdp->fd_rdir); 1076 if (fdp->fd_knlist) 1077 free(fdp->fd_knlist, M_TEMP, 0); 1078 if (fdp->fd_knhash) 1079 free(fdp->fd_knhash, M_TEMP, 0); 1080 pool_put(&fdesc_pool, fdp); 1081 } 1082 1083 /* 1084 * Internal form of close. 1085 * Decrement reference count on file structure. 1086 * Note: p may be NULL when closing a file 1087 * that was being passed in a message. 1088 * 1089 * The fp must have its usecount bumped and will be FRELEd here. 1090 */ 1091 int 1092 closef(struct file *fp, struct proc *p) 1093 { 1094 struct filedesc *fdp; 1095 1096 if (fp == NULL) 1097 return (0); 1098 1099 #ifdef DIAGNOSTIC 1100 if (fp->f_count < 2) 1101 panic("closef: count (%ld) < 2", fp->f_count); 1102 #endif 1103 fp->f_count--; 1104 1105 /* 1106 * POSIX record locking dictates that any close releases ALL 1107 * locks owned by this process. This is handled by setting 1108 * a flag in the unlock to free ONLY locks obeying POSIX 1109 * semantics, and not to free BSD-style file locks. 1110 * If the descriptor was in a message, POSIX-style locks 1111 * aren't passed with the descriptor. 1112 */ 1113 1114 if (p && ((fdp = p->p_fd) != NULL) && 1115 (fdp->fd_flags & FD_ADVLOCK) && 1116 fp->f_type == DTYPE_VNODE) { 1117 struct vnode *vp = fp->f_data; 1118 struct flock lf; 1119 1120 lf.l_whence = SEEK_SET; 1121 lf.l_start = 0; 1122 lf.l_len = 0; 1123 lf.l_type = F_UNLCK; 1124 (void) VOP_ADVLOCK(vp, fdp, F_UNLCK, &lf, F_POSIX); 1125 } 1126 1127 return (FRELE(fp, p)); 1128 } 1129 1130 int 1131 fdrop(struct file *fp, struct proc *p) 1132 { 1133 int error; 1134 1135 #ifdef DIAGNOSTIC 1136 if (fp->f_count != 0) 1137 panic("fdrop: count (%ld) != 0", fp->f_count); 1138 #endif 1139 1140 if (fp->f_ops) 1141 error = (*fp->f_ops->fo_close)(fp, p); 1142 else 1143 error = 0; 1144 1145 /* Free fp */ 1146 LIST_REMOVE(fp, f_list); 1147 crfree(fp->f_cred); 1148 nfiles--; 1149 pool_put(&file_pool, fp); 1150 1151 return (error); 1152 } 1153 1154 /* 1155 * Apply an advisory lock on a file descriptor. 1156 * 1157 * Just attempt to get a record lock of the requested type on 1158 * the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0). 1159 */ 1160 /* ARGSUSED */ 1161 int 1162 sys_flock(struct proc *p, void *v, register_t *retval) 1163 { 1164 struct sys_flock_args /* { 1165 syscallarg(int) fd; 1166 syscallarg(int) how; 1167 } */ *uap = v; 1168 int fd = SCARG(uap, fd); 1169 int how = SCARG(uap, how); 1170 struct filedesc *fdp = p->p_fd; 1171 struct file *fp; 1172 struct vnode *vp; 1173 struct flock lf; 1174 int error; 1175 1176 if ((fp = fd_getfile(fdp, fd)) == NULL) 1177 return (EBADF); 1178 if (fp->f_type != DTYPE_VNODE) 1179 return (EOPNOTSUPP); 1180 FREF(fp); 1181 vp = (struct vnode *)fp->f_data; 1182 lf.l_whence = SEEK_SET; 1183 lf.l_start = 0; 1184 lf.l_len = 0; 1185 if (how & LOCK_UN) { 1186 lf.l_type = F_UNLCK; 1187 fp->f_iflags &= ~FIF_HASLOCK; 1188 error = VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK); 1189 goto out; 1190 } 1191 if (how & LOCK_EX) 1192 lf.l_type = F_WRLCK; 1193 else if (how & LOCK_SH) 1194 lf.l_type = F_RDLCK; 1195 else { 1196 error = EINVAL; 1197 goto out; 1198 } 1199 fp->f_iflags |= FIF_HASLOCK; 1200 if (how & LOCK_NB) 1201 error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, F_FLOCK); 1202 else 1203 error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, F_FLOCK|F_WAIT); 1204 out: 1205 FRELE(fp, p); 1206 return (error); 1207 } 1208 1209 /* 1210 * File Descriptor pseudo-device driver (/dev/fd/). 1211 * 1212 * Opening minor device N dup()s the file (if any) connected to file 1213 * descriptor N belonging to the calling process. Note that this driver 1214 * consists of only the ``open()'' routine, because all subsequent 1215 * references to this file will be direct to the other driver. 1216 */ 1217 /* ARGSUSED */ 1218 int 1219 filedescopen(dev_t dev, int mode, int type, struct proc *p) 1220 { 1221 1222 /* 1223 * XXX Kludge: set curproc->p_dupfd to contain the value of the 1224 * the file descriptor being sought for duplication. The error 1225 * return ensures that the vnode for this device will be released 1226 * by vn_open. Open will detect this special error and take the 1227 * actions in dupfdopen below. Other callers of vn_open or VOP_OPEN 1228 * will simply report the error. 1229 */ 1230 p->p_dupfd = minor(dev); 1231 return (ENODEV); 1232 } 1233 1234 /* 1235 * Duplicate the specified descriptor to a free descriptor. 1236 */ 1237 int 1238 dupfdopen(struct filedesc *fdp, int indx, int dfd, int mode) 1239 { 1240 struct file *wfp; 1241 1242 fdpassertlocked(fdp); 1243 1244 /* 1245 * Assume that the filename was user-specified; applications do 1246 * not tend to open /dev/fd/# when they can just call dup() 1247 */ 1248 if ((curproc->p_p->ps_flags & (PS_SUGIDEXEC | PS_SUGID))) { 1249 if (curproc->p_descfd == 255) 1250 return (EPERM); 1251 if (curproc->p_descfd != curproc->p_dupfd) 1252 return (EPERM); 1253 } 1254 1255 /* 1256 * If the to-be-dup'd fd number is greater than the allowed number 1257 * of file descriptors, or the fd to be dup'd has already been 1258 * closed, reject. Note, there is no need to check for new == old 1259 * because fd_getfile will return NULL if the file at indx is 1260 * newly created by falloc (FIF_LARVAL). 1261 */ 1262 if ((wfp = fd_getfile(fdp, dfd)) == NULL) 1263 return (EBADF); 1264 1265 /* 1266 * Check that the mode the file is being opened for is a 1267 * subset of the mode of the existing descriptor. 1268 */ 1269 if (((mode & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag) 1270 return (EACCES); 1271 if (wfp->f_count == LONG_MAX-2) 1272 return (EDEADLK); 1273 1274 fdp->fd_ofiles[indx] = wfp; 1275 fdp->fd_ofileflags[indx] = (fdp->fd_ofileflags[indx] & UF_EXCLOSE) | 1276 (fdp->fd_ofileflags[dfd] & ~UF_EXCLOSE); 1277 wfp->f_count++; 1278 fd_used(fdp, indx); 1279 return (0); 1280 } 1281 1282 /* 1283 * Close any files on exec? 1284 */ 1285 void 1286 fdcloseexec(struct proc *p) 1287 { 1288 struct filedesc *fdp = p->p_fd; 1289 int fd; 1290 1291 fdplock(fdp); 1292 for (fd = 0; fd <= fdp->fd_lastfile; fd++) 1293 if (fdp->fd_ofileflags[fd] & UF_EXCLOSE) 1294 (void) fdrelease(p, fd); 1295 fdpunlock(fdp); 1296 } 1297 1298 int 1299 sys_closefrom(struct proc *p, void *v, register_t *retval) 1300 { 1301 struct sys_closefrom_args *uap = v; 1302 struct filedesc *fdp = p->p_fd; 1303 u_int startfd, i; 1304 1305 startfd = SCARG(uap, fd); 1306 fdplock(fdp); 1307 1308 if (startfd > fdp->fd_lastfile) { 1309 fdpunlock(fdp); 1310 return (EBADF); 1311 } 1312 1313 for (i = startfd; i <= fdp->fd_lastfile; i++) 1314 fdrelease(p, i); 1315 1316 fdpunlock(fdp); 1317 return (0); 1318 } 1319 1320 int 1321 sys_getdtablecount(struct proc *p, void *v, register_t *retval) 1322 { 1323 *retval = p->p_fd->fd_openfd; 1324 return (0); 1325 } 1326