1 /* $OpenBSD: kern_descrip.c,v 1.82 2009/07/09 22:29:56 thib Exp $ */ 2 /* $NetBSD: kern_descrip.c,v 1.42 1996/03/30 22:24:38 christos Exp $ */ 3 4 /* 5 * Copyright (c) 1982, 1986, 1989, 1991, 1993 6 * The Regents of the University of California. All rights reserved. 7 * (c) UNIX System Laboratories, Inc. 8 * All or some portions of this file are derived from material licensed 9 * to the University of California by American Telephone and Telegraph 10 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 11 * the permission of UNIX System Laboratories, Inc. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * @(#)kern_descrip.c 8.6 (Berkeley) 4/19/94 38 */ 39 40 #include <sys/param.h> 41 #include <sys/systm.h> 42 #include <sys/filedesc.h> 43 #include <sys/kernel.h> 44 #include <sys/vnode.h> 45 #include <sys/proc.h> 46 #include <sys/file.h> 47 #include <sys/socket.h> 48 #include <sys/socketvar.h> 49 #include <sys/stat.h> 50 #include <sys/ioctl.h> 51 #include <sys/fcntl.h> 52 #include <sys/malloc.h> 53 #include <sys/syslog.h> 54 #include <sys/ucred.h> 55 #include <sys/unistd.h> 56 #include <sys/resourcevar.h> 57 #include <sys/conf.h> 58 #include <sys/mount.h> 59 #include <sys/syscallargs.h> 60 #include <sys/event.h> 61 #include <sys/pool.h> 62 63 #include <uvm/uvm_extern.h> 64 65 #include <sys/pipe.h> 66 67 /* 68 * Descriptor management. 69 */ 70 struct filelist filehead; /* head of list of open files */ 71 int nfiles; /* actual number of open files */ 72 73 static __inline void fd_used(struct filedesc *, int); 74 static __inline void fd_unused(struct filedesc *, int); 75 static __inline int find_next_zero(u_int *, int, u_int); 76 int finishdup(struct proc *, struct file *, int, int, register_t *); 77 int find_last_set(struct filedesc *, int); 78 79 struct pool file_pool; 80 struct pool fdesc_pool; 81 82 void 83 filedesc_init(void) 84 { 85 pool_init(&file_pool, sizeof(struct file), 0, 0, 0, "filepl", 86 &pool_allocator_nointr); 87 pool_init(&fdesc_pool, sizeof(struct filedesc0), 0, 0, 0, "fdescpl", 88 &pool_allocator_nointr); 89 LIST_INIT(&filehead); 90 } 91 92 static __inline int 93 find_next_zero (u_int *bitmap, int want, u_int bits) 94 { 95 int i, off, maxoff; 96 u_int sub; 97 98 if (want > bits) 99 return -1; 100 101 off = want >> NDENTRYSHIFT; 102 i = want & NDENTRYMASK; 103 if (i) { 104 sub = bitmap[off] | ((u_int)~0 >> (NDENTRIES - i)); 105 if (sub != ~0) 106 goto found; 107 off++; 108 } 109 110 maxoff = NDLOSLOTS(bits); 111 while (off < maxoff) { 112 if ((sub = bitmap[off]) != ~0) 113 goto found; 114 off++; 115 } 116 117 return -1; 118 119 found: 120 return (off << NDENTRYSHIFT) + ffs(~sub) - 1; 121 } 122 123 int 124 find_last_set(struct filedesc *fd, int last) 125 { 126 int off, i; 127 struct file **ofiles = fd->fd_ofiles; 128 u_int *bitmap = fd->fd_lomap; 129 130 off = (last - 1) >> NDENTRYSHIFT; 131 132 while (off >= 0 && !bitmap[off]) 133 off--; 134 if (off < 0) 135 return 0; 136 137 i = ((off + 1) << NDENTRYSHIFT) - 1; 138 if (i >= last) 139 i = last - 1; 140 141 while (i > 0 && ofiles[i] == NULL) 142 i--; 143 return i; 144 } 145 146 static __inline void 147 fd_used(struct filedesc *fdp, int fd) 148 { 149 u_int off = fd >> NDENTRYSHIFT; 150 151 fdp->fd_lomap[off] |= 1 << (fd & NDENTRYMASK); 152 if (fdp->fd_lomap[off] == ~0) 153 fdp->fd_himap[off >> NDENTRYSHIFT] |= 1 << (off & NDENTRYMASK); 154 155 if (fd > fdp->fd_lastfile) 156 fdp->fd_lastfile = fd; 157 } 158 159 static __inline void 160 fd_unused(struct filedesc *fdp, int fd) 161 { 162 u_int off = fd >> NDENTRYSHIFT; 163 164 if (fd < fdp->fd_freefile) 165 fdp->fd_freefile = fd; 166 167 if (fdp->fd_lomap[off] == ~0) 168 fdp->fd_himap[off >> NDENTRYSHIFT] &= ~(1 << (off & NDENTRYMASK)); 169 fdp->fd_lomap[off] &= ~(1 << (fd & NDENTRYMASK)); 170 171 #ifdef DIAGNOSTIC 172 if (fd > fdp->fd_lastfile) 173 panic("fd_unused: fd_lastfile inconsistent"); 174 #endif 175 if (fd == fdp->fd_lastfile) 176 fdp->fd_lastfile = find_last_set(fdp, fd); 177 } 178 179 struct file * 180 fd_getfile(struct filedesc *fdp, int fd) 181 { 182 struct file *fp; 183 184 if ((u_int)fd >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[fd]) == NULL) 185 return (NULL); 186 187 if (!FILE_IS_USABLE(fp)) 188 return (NULL); 189 190 return (fp); 191 } 192 193 /* 194 * System calls on descriptors. 195 */ 196 197 /* 198 * Duplicate a file descriptor. 199 */ 200 /* ARGSUSED */ 201 int 202 sys_dup(struct proc *p, void *v, register_t *retval) 203 { 204 struct sys_dup_args /* { 205 syscallarg(int) fd; 206 } */ *uap = v; 207 struct filedesc *fdp = p->p_fd; 208 int old = SCARG(uap, fd); 209 struct file *fp; 210 int new; 211 int error; 212 213 restart: 214 if ((fp = fd_getfile(fdp, old)) == NULL) 215 return (EBADF); 216 FREF(fp); 217 fdplock(fdp); 218 if ((error = fdalloc(p, 0, &new)) != 0) { 219 FRELE(fp); 220 if (error == ENOSPC) { 221 fdexpand(p); 222 fdpunlock(fdp); 223 goto restart; 224 } 225 goto out; 226 } 227 error = finishdup(p, fp, old, new, retval); 228 229 out: 230 fdpunlock(fdp); 231 return (error); 232 } 233 234 /* 235 * Duplicate a file descriptor to a particular value. 236 */ 237 /* ARGSUSED */ 238 int 239 sys_dup2(struct proc *p, void *v, register_t *retval) 240 { 241 struct sys_dup2_args /* { 242 syscallarg(int) from; 243 syscallarg(int) to; 244 } */ *uap = v; 245 int old = SCARG(uap, from), new = SCARG(uap, to); 246 struct filedesc *fdp = p->p_fd; 247 struct file *fp; 248 int i, error; 249 250 restart: 251 if ((fp = fd_getfile(fdp, old)) == NULL) 252 return (EBADF); 253 if ((u_int)new >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur || 254 (u_int)new >= maxfiles) 255 return (EBADF); 256 if (old == new) { 257 /* 258 * NOTE! This doesn't clear the close-on-exec flag. This might 259 * or might not be the intended behavior from the start, but 260 * this is what everyone else does. 261 */ 262 *retval = new; 263 return (0); 264 } 265 FREF(fp); 266 fdplock(fdp); 267 if (new >= fdp->fd_nfiles) { 268 if ((error = fdalloc(p, new, &i)) != 0) { 269 FRELE(fp); 270 if (error == ENOSPC) { 271 fdexpand(p); 272 fdpunlock(fdp); 273 goto restart; 274 } 275 goto out; 276 } 277 if (new != i) 278 panic("dup2: fdalloc"); 279 } 280 /* finishdup() does FRELE */ 281 error = finishdup(p, fp, old, new, retval); 282 283 out: 284 fdpunlock(fdp); 285 return (error); 286 } 287 288 /* 289 * The file control system call. 290 */ 291 /* ARGSUSED */ 292 int 293 sys_fcntl(struct proc *p, void *v, register_t *retval) 294 { 295 struct sys_fcntl_args /* { 296 syscallarg(int) fd; 297 syscallarg(int) cmd; 298 syscallarg(void *) arg; 299 } */ *uap = v; 300 int fd = SCARG(uap, fd); 301 struct filedesc *fdp = p->p_fd; 302 struct file *fp; 303 struct vnode *vp; 304 int i, tmp, newmin, flg = F_POSIX; 305 struct flock fl; 306 int error = 0; 307 308 restart: 309 if ((fp = fd_getfile(fdp, fd)) == NULL) 310 return (EBADF); 311 FREF(fp); 312 switch (SCARG(uap, cmd)) { 313 314 case F_DUPFD: 315 newmin = (long)SCARG(uap, arg); 316 if ((u_int)newmin >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur || 317 (u_int)newmin >= maxfiles) { 318 error = EINVAL; 319 break; 320 } 321 fdplock(fdp); 322 if ((error = fdalloc(p, newmin, &i)) != 0) { 323 if (error == ENOSPC) { 324 fdexpand(p); 325 FRELE(fp); 326 fdpunlock(fdp); 327 goto restart; 328 } 329 } 330 /* finishdup will FRELE for us. */ 331 if (!error) 332 error = finishdup(p, fp, fd, i, retval); 333 else 334 FRELE(fp); 335 336 fdpunlock(fdp); 337 return (error); 338 339 case F_GETFD: 340 *retval = fdp->fd_ofileflags[fd] & UF_EXCLOSE ? 1 : 0; 341 break; 342 343 case F_SETFD: 344 if ((long)SCARG(uap, arg) & 1) 345 fdp->fd_ofileflags[fd] |= UF_EXCLOSE; 346 else 347 fdp->fd_ofileflags[fd] &= ~UF_EXCLOSE; 348 break; 349 350 case F_GETFL: 351 *retval = OFLAGS(fp->f_flag); 352 break; 353 354 case F_SETFL: 355 fp->f_flag &= ~FCNTLFLAGS; 356 fp->f_flag |= FFLAGS((long)SCARG(uap, arg)) & FCNTLFLAGS; 357 tmp = fp->f_flag & FNONBLOCK; 358 error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p); 359 if (error) 360 break; 361 tmp = fp->f_flag & FASYNC; 362 error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, (caddr_t)&tmp, p); 363 if (!error) 364 break; 365 fp->f_flag &= ~FNONBLOCK; 366 tmp = 0; 367 (void) (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p); 368 break; 369 370 case F_GETOWN: 371 if (fp->f_type == DTYPE_SOCKET) { 372 *retval = ((struct socket *)fp->f_data)->so_pgid; 373 break; 374 } 375 error = (*fp->f_ops->fo_ioctl) 376 (fp, TIOCGPGRP, (caddr_t)&tmp, p); 377 *retval = -tmp; 378 break; 379 380 case F_SETOWN: 381 if (fp->f_type == DTYPE_SOCKET) { 382 struct socket *so = (struct socket *)fp->f_data; 383 384 so->so_pgid = (long)SCARG(uap, arg); 385 so->so_siguid = p->p_cred->p_ruid; 386 so->so_sigeuid = p->p_ucred->cr_uid; 387 break; 388 } 389 if ((long)SCARG(uap, arg) <= 0) { 390 SCARG(uap, arg) = (void *)(-(long)SCARG(uap, arg)); 391 } else { 392 struct proc *p1 = pfind((long)SCARG(uap, arg)); 393 if (p1 == 0) { 394 error = ESRCH; 395 break; 396 } 397 SCARG(uap, arg) = (void *)(long)p1->p_pgrp->pg_id; 398 } 399 error = ((*fp->f_ops->fo_ioctl) 400 (fp, TIOCSPGRP, (caddr_t)&SCARG(uap, arg), p)); 401 break; 402 403 case F_SETLKW: 404 flg |= F_WAIT; 405 /* FALLTHROUGH */ 406 407 case F_SETLK: 408 if (fp->f_type != DTYPE_VNODE) { 409 error = EBADF; 410 break; 411 } 412 vp = (struct vnode *)fp->f_data; 413 /* Copy in the lock structure */ 414 error = copyin((caddr_t)SCARG(uap, arg), (caddr_t)&fl, 415 sizeof (fl)); 416 if (error) 417 break; 418 if (fl.l_whence == SEEK_CUR) { 419 if (fl.l_start == 0 && fl.l_len < 0) { 420 /* lockf(3) compliance hack */ 421 fl.l_len = -fl.l_len; 422 fl.l_start = fp->f_offset - fl.l_len; 423 } else 424 fl.l_start += fp->f_offset; 425 } 426 switch (fl.l_type) { 427 428 case F_RDLCK: 429 if ((fp->f_flag & FREAD) == 0) { 430 error = EBADF; 431 goto out; 432 } 433 atomic_setbits_int(&fdp->fd_flags, FD_ADVLOCK); 434 error = VOP_ADVLOCK(vp, fdp, F_SETLK, &fl, flg); 435 break; 436 437 case F_WRLCK: 438 if ((fp->f_flag & FWRITE) == 0) { 439 error = EBADF; 440 goto out; 441 } 442 atomic_setbits_int(&fdp->fd_flags, FD_ADVLOCK); 443 error = VOP_ADVLOCK(vp, fdp, F_SETLK, &fl, flg); 444 break; 445 446 case F_UNLCK: 447 error = VOP_ADVLOCK(vp, fdp, F_UNLCK, &fl, F_POSIX); 448 goto out; 449 450 default: 451 error = EINVAL; 452 goto out; 453 } 454 455 if (fp != fd_getfile(fdp, fd)) { 456 /* 457 * We have lost the race with close() or dup2(); 458 * unlock, pretend that we've won the race and that 459 * lock had been removed by close() 460 */ 461 fl.l_whence = SEEK_SET; 462 fl.l_start = 0; 463 fl.l_len = 0; 464 VOP_ADVLOCK(vp, fdp, F_UNLCK, &fl, F_POSIX); 465 fl.l_type = F_UNLCK; 466 } 467 goto out; 468 469 470 case F_GETLK: 471 if (fp->f_type != DTYPE_VNODE) { 472 error = EBADF; 473 break; 474 } 475 vp = (struct vnode *)fp->f_data; 476 /* Copy in the lock structure */ 477 error = copyin((caddr_t)SCARG(uap, arg), (caddr_t)&fl, 478 sizeof (fl)); 479 if (error) 480 break; 481 if (fl.l_whence == SEEK_CUR) { 482 if (fl.l_start == 0 && fl.l_len < 0) { 483 /* lockf(3) compliance hack */ 484 fl.l_len = -fl.l_len; 485 fl.l_start = fp->f_offset - fl.l_len; 486 } else 487 fl.l_start += fp->f_offset; 488 } 489 if (fl.l_type != F_RDLCK && 490 fl.l_type != F_WRLCK && 491 fl.l_type != F_UNLCK && 492 fl.l_type != 0) { 493 error = EINVAL; 494 break; 495 } 496 error = VOP_ADVLOCK(vp, fdp, F_GETLK, &fl, F_POSIX); 497 if (error) 498 break; 499 error = (copyout((caddr_t)&fl, (caddr_t)SCARG(uap, arg), 500 sizeof (fl))); 501 break; 502 503 default: 504 error = EINVAL; 505 break; 506 } 507 out: 508 FRELE(fp); 509 return (error); 510 } 511 512 /* 513 * Common code for dup, dup2, and fcntl(F_DUPFD). 514 */ 515 int 516 finishdup(struct proc *p, struct file *fp, int old, int new, register_t *retval) 517 { 518 struct file *oldfp; 519 struct filedesc *fdp = p->p_fd; 520 521 if (fp->f_count == LONG_MAX-2) { 522 FRELE(fp); 523 return (EDEADLK); 524 } 525 526 /* 527 * Don't fd_getfile here. We want to closef LARVAL files and 528 * closef can deal with that. 529 */ 530 oldfp = fdp->fd_ofiles[new]; 531 if (oldfp != NULL) 532 FREF(oldfp); 533 534 fdp->fd_ofiles[new] = fp; 535 fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] & ~UF_EXCLOSE; 536 fp->f_count++; 537 FRELE(fp); 538 if (oldfp == NULL) 539 fd_used(fdp, new); 540 *retval = new; 541 542 if (oldfp != NULL) { 543 if (new < fdp->fd_knlistsize) 544 knote_fdclose(p, new); 545 closef(oldfp, p); 546 } 547 548 return (0); 549 } 550 551 void 552 fdremove(struct filedesc *fdp, int fd) 553 { 554 fdp->fd_ofiles[fd] = NULL; 555 fd_unused(fdp, fd); 556 } 557 558 int 559 fdrelease(struct proc *p, int fd) 560 { 561 struct filedesc *fdp = p->p_fd; 562 struct file **fpp, *fp; 563 564 /* 565 * Don't fd_getfile here. We want to closef LARVAL files and closef 566 * can deal with that. 567 */ 568 fpp = &fdp->fd_ofiles[fd]; 569 fp = *fpp; 570 if (fp == NULL) 571 return (EBADF); 572 FREF(fp); 573 *fpp = NULL; 574 fdp->fd_ofileflags[fd] = 0; 575 fd_unused(fdp, fd); 576 if (fd < fdp->fd_knlistsize) 577 knote_fdclose(p, fd); 578 return (closef(fp, p)); 579 } 580 581 /* 582 * Close a file descriptor. 583 */ 584 /* ARGSUSED */ 585 int 586 sys_close(struct proc *p, void *v, register_t *retval) 587 { 588 struct sys_close_args /* { 589 syscallarg(int) fd; 590 } */ *uap = v; 591 int fd = SCARG(uap, fd), error; 592 struct filedesc *fdp = p->p_fd; 593 594 if (fd_getfile(fdp, fd) == NULL) 595 return (EBADF); 596 fdplock(fdp); 597 error = fdrelease(p, fd); 598 fdpunlock(fdp); 599 600 return (error); 601 } 602 603 /* 604 * Return status information about a file descriptor. 605 */ 606 /* ARGSUSED */ 607 int 608 sys_fstat(struct proc *p, void *v, register_t *retval) 609 { 610 struct sys_fstat_args /* { 611 syscallarg(int) fd; 612 syscallarg(struct stat *) sb; 613 } */ *uap = v; 614 int fd = SCARG(uap, fd); 615 struct filedesc *fdp = p->p_fd; 616 struct file *fp; 617 struct stat ub; 618 int error; 619 620 if ((fp = fd_getfile(fdp, fd)) == NULL) 621 return (EBADF); 622 FREF(fp); 623 error = (*fp->f_ops->fo_stat)(fp, &ub, p); 624 FRELE(fp); 625 if (error == 0) { 626 /* 627 * Don't let non-root see generation numbers 628 * (for NFS security) 629 */ 630 if (suser(p, 0)) 631 ub.st_gen = 0; 632 error = copyout((caddr_t)&ub, (caddr_t)SCARG(uap, sb), 633 sizeof (ub)); 634 } 635 return (error); 636 } 637 638 /* 639 * Return pathconf information about a file descriptor. 640 */ 641 /* ARGSUSED */ 642 int 643 sys_fpathconf(struct proc *p, void *v, register_t *retval) 644 { 645 struct sys_fpathconf_args /* { 646 syscallarg(int) fd; 647 syscallarg(int) name; 648 } */ *uap = v; 649 int fd = SCARG(uap, fd); 650 struct filedesc *fdp = p->p_fd; 651 struct file *fp; 652 struct vnode *vp; 653 int error; 654 655 if ((fp = fd_getfile(fdp, fd)) == NULL) 656 return (EBADF); 657 FREF(fp); 658 switch (fp->f_type) { 659 case DTYPE_PIPE: 660 case DTYPE_SOCKET: 661 if (SCARG(uap, name) != _PC_PIPE_BUF) { 662 error = EINVAL; 663 break; 664 } 665 *retval = PIPE_BUF; 666 error = 0; 667 break; 668 669 case DTYPE_VNODE: 670 vp = (struct vnode *)fp->f_data; 671 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); 672 error = VOP_PATHCONF(vp, SCARG(uap, name), retval); 673 VOP_UNLOCK(vp, 0, p); 674 break; 675 676 default: 677 error = EOPNOTSUPP; 678 break; 679 } 680 FRELE(fp); 681 return (error); 682 } 683 684 /* 685 * Allocate a file descriptor for the process. 686 */ 687 int 688 fdalloc(struct proc *p, int want, int *result) 689 { 690 struct filedesc *fdp = p->p_fd; 691 int lim, last, i; 692 u_int new, off; 693 694 /* 695 * Search for a free descriptor starting at the higher 696 * of want or fd_freefile. If that fails, consider 697 * expanding the ofile array. 698 */ 699 restart: 700 lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles); 701 last = min(fdp->fd_nfiles, lim); 702 if ((i = want) < fdp->fd_freefile) 703 i = fdp->fd_freefile; 704 off = i >> NDENTRYSHIFT; 705 new = find_next_zero(fdp->fd_himap, off, 706 (last + NDENTRIES - 1) >> NDENTRYSHIFT); 707 if (new != -1) { 708 i = find_next_zero(&fdp->fd_lomap[new], 709 new > off ? 0 : i & NDENTRYMASK, 710 NDENTRIES); 711 if (i == -1) { 712 /* 713 * Free file descriptor in this block was 714 * below want, try again with higher want. 715 */ 716 want = (new + 1) << NDENTRYSHIFT; 717 goto restart; 718 } 719 i += (new << NDENTRYSHIFT); 720 if (i < last) { 721 fd_used(fdp, i); 722 if (want <= fdp->fd_freefile) 723 fdp->fd_freefile = i; 724 *result = i; 725 return (0); 726 } 727 } 728 if (fdp->fd_nfiles >= lim) 729 return (EMFILE); 730 731 return (ENOSPC); 732 } 733 734 void 735 fdexpand(struct proc *p) 736 { 737 struct filedesc *fdp = p->p_fd; 738 int nfiles, i; 739 struct file **newofile; 740 char *newofileflags; 741 u_int *newhimap, *newlomap; 742 743 /* 744 * No space in current array. 745 */ 746 if (fdp->fd_nfiles < NDEXTENT) 747 nfiles = NDEXTENT; 748 else 749 nfiles = 2 * fdp->fd_nfiles; 750 751 newofile = malloc(nfiles * OFILESIZE, M_FILEDESC, M_WAITOK); 752 newofileflags = (char *) &newofile[nfiles]; 753 754 /* 755 * Copy the existing ofile and ofileflags arrays 756 * and zero the new portion of each array. 757 */ 758 bcopy(fdp->fd_ofiles, newofile, 759 (i = sizeof(struct file *) * fdp->fd_nfiles)); 760 bzero((char *)newofile + i, nfiles * sizeof(struct file *) - i); 761 bcopy(fdp->fd_ofileflags, newofileflags, 762 (i = sizeof(char) * fdp->fd_nfiles)); 763 bzero(newofileflags + i, nfiles * sizeof(char) - i); 764 765 if (fdp->fd_nfiles > NDFILE) 766 free(fdp->fd_ofiles, M_FILEDESC); 767 768 if (NDHISLOTS(nfiles) > NDHISLOTS(fdp->fd_nfiles)) { 769 newhimap = malloc(NDHISLOTS(nfiles) * sizeof(u_int), 770 M_FILEDESC, M_WAITOK); 771 newlomap = malloc(NDLOSLOTS(nfiles) * sizeof(u_int), 772 M_FILEDESC, M_WAITOK); 773 774 bcopy(fdp->fd_himap, newhimap, 775 (i = NDHISLOTS(fdp->fd_nfiles) * sizeof(u_int))); 776 bzero((char *)newhimap + i, 777 NDHISLOTS(nfiles) * sizeof(u_int) - i); 778 779 bcopy(fdp->fd_lomap, newlomap, 780 (i = NDLOSLOTS(fdp->fd_nfiles) * sizeof(u_int))); 781 bzero((char *)newlomap + i, 782 NDLOSLOTS(nfiles) * sizeof(u_int) - i); 783 784 if (NDHISLOTS(fdp->fd_nfiles) > NDHISLOTS(NDFILE)) { 785 free(fdp->fd_himap, M_FILEDESC); 786 free(fdp->fd_lomap, M_FILEDESC); 787 } 788 fdp->fd_himap = newhimap; 789 fdp->fd_lomap = newlomap; 790 } 791 fdp->fd_ofiles = newofile; 792 fdp->fd_ofileflags = newofileflags; 793 fdp->fd_nfiles = nfiles; 794 } 795 796 /* 797 * Create a new open file structure and allocate 798 * a file descriptor for the process that refers to it. 799 */ 800 int 801 falloc(struct proc *p, struct file **resultfp, int *resultfd) 802 { 803 struct file *fp, *fq; 804 int error, i; 805 806 restart: 807 if ((error = fdalloc(p, 0, &i)) != 0) { 808 if (error == ENOSPC) { 809 fdexpand(p); 810 goto restart; 811 } 812 return (error); 813 } 814 if (nfiles >= maxfiles) { 815 fd_unused(p->p_fd, i); 816 tablefull("file"); 817 return (ENFILE); 818 } 819 /* 820 * Allocate a new file descriptor. 821 * If the process has file descriptor zero open, add to the list 822 * of open files at that point, otherwise put it at the front of 823 * the list of open files. 824 */ 825 nfiles++; 826 fp = pool_get(&file_pool, PR_WAITOK|PR_ZERO); 827 fp->f_iflags = FIF_LARVAL; 828 if ((fq = p->p_fd->fd_ofiles[0]) != NULL) { 829 LIST_INSERT_AFTER(fq, fp, f_list); 830 } else { 831 LIST_INSERT_HEAD(&filehead, fp, f_list); 832 } 833 p->p_fd->fd_ofiles[i] = fp; 834 fp->f_count = 1; 835 fp->f_cred = p->p_ucred; 836 crhold(fp->f_cred); 837 if (resultfp) 838 *resultfp = fp; 839 if (resultfd) 840 *resultfd = i; 841 FREF(fp); 842 return (0); 843 } 844 845 /* 846 * Build a new filedesc structure. 847 */ 848 struct filedesc * 849 fdinit(struct proc *p) 850 { 851 struct filedesc0 *newfdp; 852 extern int cmask; 853 854 newfdp = pool_get(&fdesc_pool, PR_WAITOK|PR_ZERO); 855 if (p != NULL) { 856 struct filedesc *fdp = p->p_fd; 857 858 newfdp->fd_fd.fd_cdir = fdp->fd_cdir; 859 vref(newfdp->fd_fd.fd_cdir); 860 newfdp->fd_fd.fd_rdir = fdp->fd_rdir; 861 if (newfdp->fd_fd.fd_rdir) 862 vref(newfdp->fd_fd.fd_rdir); 863 } 864 rw_init(&newfdp->fd_fd.fd_lock, "fdlock"); 865 866 /* Create the file descriptor table. */ 867 newfdp->fd_fd.fd_refcnt = 1; 868 newfdp->fd_fd.fd_cmask = cmask; 869 newfdp->fd_fd.fd_ofiles = newfdp->fd_dfiles; 870 newfdp->fd_fd.fd_ofileflags = newfdp->fd_dfileflags; 871 newfdp->fd_fd.fd_nfiles = NDFILE; 872 newfdp->fd_fd.fd_himap = newfdp->fd_dhimap; 873 newfdp->fd_fd.fd_lomap = newfdp->fd_dlomap; 874 newfdp->fd_fd.fd_knlistsize = -1; 875 876 newfdp->fd_fd.fd_freefile = 0; 877 newfdp->fd_fd.fd_lastfile = 0; 878 879 return (&newfdp->fd_fd); 880 } 881 882 /* 883 * Share a filedesc structure. 884 */ 885 struct filedesc * 886 fdshare(struct proc *p) 887 { 888 p->p_fd->fd_refcnt++; 889 return (p->p_fd); 890 } 891 892 /* 893 * Copy a filedesc structure. 894 */ 895 struct filedesc * 896 fdcopy(struct proc *p) 897 { 898 struct filedesc *newfdp, *fdp = p->p_fd; 899 struct file **fpp; 900 int i; 901 902 newfdp = pool_get(&fdesc_pool, PR_WAITOK); 903 bcopy(fdp, newfdp, sizeof(struct filedesc)); 904 if (newfdp->fd_cdir) 905 vref(newfdp->fd_cdir); 906 if (newfdp->fd_rdir) 907 vref(newfdp->fd_rdir); 908 newfdp->fd_refcnt = 1; 909 910 /* 911 * If the number of open files fits in the internal arrays 912 * of the open file structure, use them, otherwise allocate 913 * additional memory for the number of descriptors currently 914 * in use. 915 */ 916 if (newfdp->fd_lastfile < NDFILE) { 917 newfdp->fd_ofiles = ((struct filedesc0 *) newfdp)->fd_dfiles; 918 newfdp->fd_ofileflags = 919 ((struct filedesc0 *) newfdp)->fd_dfileflags; 920 i = NDFILE; 921 } else { 922 /* 923 * Compute the smallest multiple of NDEXTENT needed 924 * for the file descriptors currently in use, 925 * allowing the table to shrink. 926 */ 927 i = newfdp->fd_nfiles; 928 while (i >= 2 * NDEXTENT && i > newfdp->fd_lastfile * 2) 929 i /= 2; 930 newfdp->fd_ofiles = malloc(i * OFILESIZE, M_FILEDESC, M_WAITOK); 931 newfdp->fd_ofileflags = (char *) &newfdp->fd_ofiles[i]; 932 } 933 if (NDHISLOTS(i) <= NDHISLOTS(NDFILE)) { 934 newfdp->fd_himap = 935 ((struct filedesc0 *) newfdp)->fd_dhimap; 936 newfdp->fd_lomap = 937 ((struct filedesc0 *) newfdp)->fd_dlomap; 938 } else { 939 newfdp->fd_himap = malloc(NDHISLOTS(i) * sizeof(u_int), 940 M_FILEDESC, M_WAITOK); 941 newfdp->fd_lomap = malloc(NDLOSLOTS(i) * sizeof(u_int), 942 M_FILEDESC, M_WAITOK); 943 } 944 newfdp->fd_nfiles = i; 945 bcopy(fdp->fd_ofiles, newfdp->fd_ofiles, i * sizeof(struct file **)); 946 bcopy(fdp->fd_ofileflags, newfdp->fd_ofileflags, i * sizeof(char)); 947 bcopy(fdp->fd_himap, newfdp->fd_himap, NDHISLOTS(i) * sizeof(u_int)); 948 bcopy(fdp->fd_lomap, newfdp->fd_lomap, NDLOSLOTS(i) * sizeof(u_int)); 949 950 /* 951 * kq descriptors cannot be copied. 952 */ 953 if (newfdp->fd_knlistsize != -1) { 954 fpp = newfdp->fd_ofiles; 955 for (i = 0; i <= newfdp->fd_lastfile; i++, fpp++) 956 if (*fpp != NULL && (*fpp)->f_type == DTYPE_KQUEUE) 957 fdremove(newfdp, i); 958 newfdp->fd_knlist = NULL; 959 newfdp->fd_knlistsize = -1; 960 newfdp->fd_knhash = NULL; 961 newfdp->fd_knhashmask = 0; 962 } 963 964 fpp = newfdp->fd_ofiles; 965 for (i = 0; i <= newfdp->fd_lastfile; i++, fpp++) 966 if (*fpp != NULL) { 967 /* 968 * XXX Gruesome hack. If count gets too high, fail 969 * to copy an fd, since fdcopy()'s callers do not 970 * permit it to indicate failure yet. 971 */ 972 if ((*fpp)->f_count == LONG_MAX-2) 973 fdremove(newfdp, i); 974 else 975 (*fpp)->f_count++; 976 } 977 return (newfdp); 978 } 979 980 /* 981 * Release a filedesc structure. 982 */ 983 void 984 fdfree(struct proc *p) 985 { 986 struct filedesc *fdp = p->p_fd; 987 struct file **fpp, *fp; 988 int i; 989 990 if (--fdp->fd_refcnt > 0) 991 return; 992 fpp = fdp->fd_ofiles; 993 for (i = fdp->fd_lastfile; i >= 0; i--, fpp++) { 994 fp = *fpp; 995 if (fp != NULL) { 996 FREF(fp); 997 *fpp = NULL; 998 (void) closef(fp, p); 999 } 1000 } 1001 p->p_fd = NULL; 1002 if (fdp->fd_nfiles > NDFILE) 1003 free(fdp->fd_ofiles, M_FILEDESC); 1004 if (NDHISLOTS(fdp->fd_nfiles) > NDHISLOTS(NDFILE)) { 1005 free(fdp->fd_himap, M_FILEDESC); 1006 free(fdp->fd_lomap, M_FILEDESC); 1007 } 1008 if (fdp->fd_cdir) 1009 vrele(fdp->fd_cdir); 1010 if (fdp->fd_rdir) 1011 vrele(fdp->fd_rdir); 1012 if (fdp->fd_knlist) 1013 free(fdp->fd_knlist, M_TEMP); 1014 if (fdp->fd_knhash) 1015 free(fdp->fd_knhash, M_TEMP); 1016 pool_put(&fdesc_pool, fdp); 1017 } 1018 1019 /* 1020 * Internal form of close. 1021 * Decrement reference count on file structure. 1022 * Note: p may be NULL when closing a file 1023 * that was being passed in a message. 1024 * 1025 * The fp must have its usecount bumped and will be FRELEd here. 1026 */ 1027 int 1028 closef(struct file *fp, struct proc *p) 1029 { 1030 struct filedesc *fdp; 1031 int references_left; 1032 int error; 1033 1034 if (fp == NULL) 1035 return (0); 1036 1037 /* 1038 * Some files passed to this function could be accessed 1039 * without a FILE_IS_USABLE check (and in some cases it's perfectly 1040 * legal), we must beware of files where someone already won the 1041 * race to FIF_WANTCLOSE. 1042 */ 1043 if ((fp->f_iflags & FIF_WANTCLOSE) != 0 || 1044 --fp->f_count > 0) { 1045 references_left = 1; 1046 } else { 1047 references_left = 0; 1048 #ifdef DIAGNOSTIC 1049 if (fp->f_count < 0) 1050 panic("closef: count < 0"); 1051 #endif 1052 1053 /* Wait for the last usecount to drain. */ 1054 fp->f_iflags |= FIF_WANTCLOSE; 1055 while (fp->f_usecount > 1) 1056 tsleep(&fp->f_usecount, PRIBIO, "closef", 0); 1057 } 1058 1059 /* 1060 * POSIX record locking dictates that any close releases ALL 1061 * locks owned by this process. This is handled by setting 1062 * a flag in the unlock to free ONLY locks obeying POSIX 1063 * semantics, and not to free BSD-style file locks. 1064 * If the descriptor was in a message, POSIX-style locks 1065 * aren't passed with the descriptor. 1066 */ 1067 if (p && ((fdp = p->p_fd) != NULL) && 1068 (fdp->fd_flags & FD_ADVLOCK) && 1069 fp->f_type == DTYPE_VNODE) { 1070 struct vnode *vp = fp->f_data; 1071 struct flock lf; 1072 1073 lf.l_whence = SEEK_SET; 1074 lf.l_start = 0; 1075 lf.l_len = 0; 1076 lf.l_type = F_UNLCK; 1077 (void) VOP_ADVLOCK(vp, fdp, F_UNLCK, &lf, F_POSIX); 1078 } 1079 1080 if (references_left) { 1081 FRELE(fp); 1082 return (0); 1083 } 1084 1085 if (fp->f_ops) 1086 error = (*fp->f_ops->fo_close)(fp, p); 1087 else 1088 error = 0; 1089 1090 /* Free fp */ 1091 LIST_REMOVE(fp, f_list); 1092 crfree(fp->f_cred); 1093 #ifdef DIAGNOSTIC 1094 if (fp->f_count != 0 || fp->f_usecount != 1) 1095 panic("closef: count: %d/%d", fp->f_count, fp->f_usecount); 1096 #endif 1097 nfiles--; 1098 pool_put(&file_pool, fp); 1099 1100 return (error); 1101 } 1102 1103 /* 1104 * Apply an advisory lock on a file descriptor. 1105 * 1106 * Just attempt to get a record lock of the requested type on 1107 * the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0). 1108 */ 1109 /* ARGSUSED */ 1110 int 1111 sys_flock(struct proc *p, void *v, register_t *retval) 1112 { 1113 struct sys_flock_args /* { 1114 syscallarg(int) fd; 1115 syscallarg(int) how; 1116 } */ *uap = v; 1117 int fd = SCARG(uap, fd); 1118 int how = SCARG(uap, how); 1119 struct filedesc *fdp = p->p_fd; 1120 struct file *fp; 1121 struct vnode *vp; 1122 struct flock lf; 1123 int error; 1124 1125 if ((fp = fd_getfile(fdp, fd)) == NULL) 1126 return (EBADF); 1127 if (fp->f_type != DTYPE_VNODE) 1128 return (EOPNOTSUPP); 1129 FREF(fp); 1130 vp = (struct vnode *)fp->f_data; 1131 lf.l_whence = SEEK_SET; 1132 lf.l_start = 0; 1133 lf.l_len = 0; 1134 if (how & LOCK_UN) { 1135 lf.l_type = F_UNLCK; 1136 fp->f_flag &= ~FHASLOCK; 1137 error = VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK); 1138 goto out; 1139 } 1140 if (how & LOCK_EX) 1141 lf.l_type = F_WRLCK; 1142 else if (how & LOCK_SH) 1143 lf.l_type = F_RDLCK; 1144 else { 1145 error = EINVAL; 1146 goto out; 1147 } 1148 fp->f_flag |= FHASLOCK; 1149 if (how & LOCK_NB) 1150 error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, F_FLOCK); 1151 else 1152 error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, F_FLOCK|F_WAIT); 1153 out: 1154 FRELE(fp); 1155 return (error); 1156 } 1157 1158 /* 1159 * File Descriptor pseudo-device driver (/dev/fd/). 1160 * 1161 * Opening minor device N dup()s the file (if any) connected to file 1162 * descriptor N belonging to the calling process. Note that this driver 1163 * consists of only the ``open()'' routine, because all subsequent 1164 * references to this file will be direct to the other driver. 1165 */ 1166 /* ARGSUSED */ 1167 int 1168 filedescopen(dev_t dev, int mode, int type, struct proc *p) 1169 { 1170 1171 /* 1172 * XXX Kludge: set curproc->p_dupfd to contain the value of the 1173 * the file descriptor being sought for duplication. The error 1174 * return ensures that the vnode for this device will be released 1175 * by vn_open. Open will detect this special error and take the 1176 * actions in dupfdopen below. Other callers of vn_open or VOP_OPEN 1177 * will simply report the error. 1178 */ 1179 p->p_dupfd = minor(dev); 1180 return (ENODEV); 1181 } 1182 1183 /* 1184 * Duplicate the specified descriptor to a free descriptor. 1185 */ 1186 int 1187 dupfdopen(struct filedesc *fdp, int indx, int dfd, int mode, int error) 1188 { 1189 struct file *wfp; 1190 1191 /* 1192 * Assume that the filename was user-specified; applications do 1193 * not tend to open /dev/fd/# when they can just call dup() 1194 */ 1195 if ((curproc->p_flag & (P_SUGIDEXEC | P_SUGID))) { 1196 if (curproc->p_descfd == 255) 1197 return (EPERM); 1198 if (curproc->p_descfd != curproc->p_dupfd) 1199 return (EPERM); 1200 } 1201 1202 /* 1203 * If the to-be-dup'd fd number is greater than the allowed number 1204 * of file descriptors, or the fd to be dup'd has already been 1205 * closed, reject. Note, there is no need to check for new == old 1206 * because fd_getfile will return NULL if the file at indx is 1207 * newly created by falloc (FIF_LARVAL). 1208 */ 1209 if ((wfp = fd_getfile(fdp, dfd)) == NULL) 1210 return (EBADF); 1211 1212 /* 1213 * There are two cases of interest here. 1214 * 1215 * For ENODEV simply dup (dfd) to file descriptor 1216 * (indx) and return. 1217 * 1218 * For ENXIO steal away the file structure from (dfd) and 1219 * store it in (indx). (dfd) is effectively closed by 1220 * this operation. 1221 * 1222 * Any other error code is just returned. 1223 */ 1224 switch (error) { 1225 case ENODEV: 1226 /* 1227 * Check that the mode the file is being opened for is a 1228 * subset of the mode of the existing descriptor. 1229 */ 1230 if (((mode & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag) 1231 return (EACCES); 1232 if (wfp->f_count == LONG_MAX-2) 1233 return (EDEADLK); 1234 fdp->fd_ofiles[indx] = wfp; 1235 fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd]; 1236 wfp->f_count++; 1237 fd_used(fdp, indx); 1238 return (0); 1239 1240 case ENXIO: 1241 /* 1242 * Steal away the file pointer from dfd, and stuff it into indx. 1243 */ 1244 fdp->fd_ofiles[indx] = fdp->fd_ofiles[dfd]; 1245 fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd]; 1246 fdp->fd_ofiles[dfd] = NULL; 1247 fdp->fd_ofileflags[dfd] = 0; 1248 /* 1249 * Complete the clean up of the filedesc structure by 1250 * recomputing the various hints. 1251 */ 1252 fd_used(fdp, indx); 1253 fd_unused(fdp, dfd); 1254 return (0); 1255 1256 default: 1257 return (error); 1258 } 1259 /* NOTREACHED */ 1260 } 1261 1262 /* 1263 * Close any files on exec? 1264 */ 1265 void 1266 fdcloseexec(struct proc *p) 1267 { 1268 struct filedesc *fdp = p->p_fd; 1269 int fd; 1270 1271 for (fd = 0; fd <= fdp->fd_lastfile; fd++) 1272 if (fdp->fd_ofileflags[fd] & UF_EXCLOSE) 1273 (void) fdrelease(p, fd); 1274 } 1275 1276 int 1277 sys_closefrom(struct proc *p, void *v, register_t *retval) 1278 { 1279 struct sys_closefrom_args *uap = v; 1280 struct filedesc *fdp = p->p_fd; 1281 u_int startfd, i; 1282 1283 startfd = SCARG(uap, fd); 1284 fdplock(fdp); 1285 1286 if (startfd > fdp->fd_lastfile) { 1287 fdpunlock(fdp); 1288 return (EBADF); 1289 } 1290 1291 for (i = startfd; i <= fdp->fd_lastfile; i++) 1292 fdrelease(p, i); 1293 1294 fdpunlock(fdp); 1295 return (0); 1296 } 1297