xref: /dragonfly/sys/kern/kern_descrip.c (revision 38a690d7)
1 /*
2  * Copyright (c) 1982, 1986, 1989, 1991, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  * (c) UNIX System Laboratories, Inc.
5  * All or some portions of this file are derived from material licensed
6  * to the University of California by American Telephone and Telegraph
7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8  * the permission of UNIX System Laboratories, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed by the University of
21  *	California, Berkeley and its contributors.
22  * 4. Neither the name of the University nor the names of its contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  *
38  *	@(#)kern_descrip.c	8.6 (Berkeley) 4/19/94
39  * $FreeBSD: src/sys/kern/kern_descrip.c,v 1.81.2.17 2003/06/06 20:21:32 tegge Exp $
40  * $DragonFly: src/sys/kern/kern_descrip.c,v 1.11 2003/07/30 00:19:14 dillon Exp $
41  */
42 
43 #include "opt_compat.h"
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #include <sys/malloc.h>
47 #include <sys/sysproto.h>
48 #include <sys/conf.h>
49 #include <sys/filedesc.h>
50 #include <sys/kernel.h>
51 #include <sys/sysctl.h>
52 #include <sys/vnode.h>
53 #include <sys/proc.h>
54 #include <sys/namei.h>
55 #include <sys/file.h>
56 #include <sys/stat.h>
57 #include <sys/filio.h>
58 #include <sys/fcntl.h>
59 #include <sys/unistd.h>
60 #include <sys/resourcevar.h>
61 #include <sys/event.h>
62 
63 #include <vm/vm.h>
64 #include <vm/vm_extern.h>
65 
66 #include <sys/file2.h>
67 
68 static MALLOC_DEFINE(M_FILEDESC, "file desc", "Open file descriptor table");
69 static MALLOC_DEFINE(M_FILEDESC_TO_LEADER, "file desc to leader",
70 		     "file desc to leader structures");
71 MALLOC_DEFINE(M_FILE, "file", "Open file structure");
72 static MALLOC_DEFINE(M_SIGIO, "sigio", "sigio structures");
73 
74 static	 d_open_t  fdopen;
75 #define NUMFDESC 64
76 
77 #define CDEV_MAJOR 22
78 static struct cdevsw fildesc_cdevsw = {
79 	/* name */	"FD",
80 	/* maj */	CDEV_MAJOR,
81 	/* flags */	0,
82 	/* port */      NULL,
83 	/* autoq */	0,
84 
85 	/* open */	fdopen,
86 	/* close */	noclose,
87 	/* read */	noread,
88 	/* write */	nowrite,
89 	/* ioctl */	noioctl,
90 	/* poll */	nopoll,
91 	/* mmap */	nommap,
92 	/* strategy */	nostrategy,
93 	/* dump */	nodump,
94 	/* psize */	nopsize
95 };
96 
97 static int do_dup __P((struct filedesc *fdp, int old, int new, register_t *retval, struct proc *p));
98 static int badfo_readwrite __P((struct file *fp, struct uio *uio,
99     struct ucred *cred, int flags, struct thread *td));
100 static int badfo_ioctl __P((struct file *fp, u_long com, caddr_t data,
101     struct thread *td));
102 static int badfo_poll __P((struct file *fp, int events,
103     struct ucred *cred, struct thread *td));
104 static int badfo_kqfilter __P((struct file *fp, struct knote *kn));
105 static int badfo_stat __P((struct file *fp, struct stat *sb, struct thread *td));
106 static int badfo_close __P((struct file *fp, struct thread *td));
107 
108 /*
109  * Descriptor management.
110  */
111 struct filelist filehead;	/* head of list of open files */
112 int nfiles;			/* actual number of open files */
113 extern int cmask;
114 
115 /*
116  * System calls on descriptors.
117  */
118 /* ARGSUSED */
119 int
120 getdtablesize(struct getdtablesize_args *uap)
121 {
122 	struct proc *p = curproc;
123 
124 	uap->sysmsg_result =
125 	    min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
126 	return (0);
127 }
128 
129 /*
130  * Duplicate a file descriptor to a particular value.
131  *
132  * note: keep in mind that a potential race condition exists when closing
133  * descriptors from a shared descriptor table (via rfork).
134  */
135 /* ARGSUSED */
136 int
137 dup2(struct dup2_args *uap)
138 {
139 	struct proc *p = curproc;
140 	struct filedesc *fdp = p->p_fd;
141 	u_int old = uap->from, new = uap->to;
142 	int i, error;
143 
144 retry:
145 	if (old >= fdp->fd_nfiles ||
146 	    fdp->fd_ofiles[old] == NULL ||
147 	    new >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
148 	    new >= maxfilesperproc) {
149 		return (EBADF);
150 	}
151 	if (old == new) {
152 		uap->sysmsg_result = new;
153 		return (0);
154 	}
155 	if (new >= fdp->fd_nfiles) {
156 		if ((error = fdalloc(p, new, &i)))
157 			return (error);
158 		/*
159 		 * fdalloc() may block, retest everything.
160 		 */
161 		goto retry;
162 	}
163 	return (do_dup(fdp, (int)old, (int)new, uap->sysmsg_fds, p));
164 }
165 
166 /*
167  * Duplicate a file descriptor.
168  */
169 /* ARGSUSED */
170 int
171 dup(struct dup_args *uap)
172 {
173 	struct proc *p = curproc;
174 	struct filedesc *fdp;
175 	u_int old;
176 	int new, error;
177 
178 	old = uap->fd;
179 	fdp = p->p_fd;
180 	if (old >= fdp->fd_nfiles || fdp->fd_ofiles[old] == NULL)
181 		return (EBADF);
182 	if ((error = fdalloc(p, 0, &new)))
183 		return (error);
184 	return (do_dup(fdp, (int)old, new, uap->sysmsg_fds, p));
185 }
186 
187 /*
188  * The file control system call.
189  */
190 /* ARGSUSED */
191 int
192 fcntl(struct fcntl_args *uap)
193 {
194 	struct thread *td = curthread;
195 	struct proc *p = td->td_proc;
196 	struct filedesc *fdp = p->p_fd;
197 	struct file *fp;
198 	char *pop;
199 	struct vnode *vp;
200 	int i, tmp, error, flg = F_POSIX;
201 	struct flock fl;
202 	u_int newmin;
203 
204 	KKASSERT(p);
205 
206 	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
207 	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
208 		return (EBADF);
209 	pop = &fdp->fd_ofileflags[uap->fd];
210 
211 	switch (uap->cmd) {
212 	case F_DUPFD:
213 		newmin = uap->arg;
214 		if (newmin >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
215 		    newmin >= maxfilesperproc)
216 			return (EINVAL);
217 		if ((error = fdalloc(p, newmin, &i)))
218 			return (error);
219 		return (do_dup(fdp, uap->fd, i, uap->sysmsg_fds, p));
220 
221 	case F_GETFD:
222 		uap->sysmsg_result = (*pop & UF_EXCLOSE) ? FD_CLOEXEC : 0;
223 		return (0);
224 
225 	case F_SETFD:
226 		*pop = (*pop &~ UF_EXCLOSE) |
227 		    (uap->arg & FD_CLOEXEC ? UF_EXCLOSE : 0);
228 		return (0);
229 
230 	case F_GETFL:
231 		uap->sysmsg_result = OFLAGS(fp->f_flag);
232 		return (0);
233 
234 	case F_SETFL:
235 		fhold(fp);
236 		fp->f_flag &= ~FCNTLFLAGS;
237 		fp->f_flag |= FFLAGS(uap->arg & ~O_ACCMODE) & FCNTLFLAGS;
238 		tmp = fp->f_flag & FNONBLOCK;
239 		error = fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, td);
240 		if (error) {
241 			fdrop(fp, td);
242 			return (error);
243 		}
244 		tmp = fp->f_flag & FASYNC;
245 		error = fo_ioctl(fp, FIOASYNC, (caddr_t)&tmp, td);
246 		if (!error) {
247 			fdrop(fp, td);
248 			return (0);
249 		}
250 		fp->f_flag &= ~FNONBLOCK;
251 		tmp = 0;
252 		(void)fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, td);
253 		fdrop(fp, td);
254 		return (error);
255 
256 	case F_GETOWN:
257 		fhold(fp);
258 		error = fo_ioctl(fp, FIOGETOWN, (caddr_t)uap->sysmsg_fds, td);
259 		fdrop(fp, td);
260 		return(error);
261 
262 	case F_SETOWN:
263 		fhold(fp);
264 		error = fo_ioctl(fp, FIOSETOWN, (caddr_t)&uap->arg, td);
265 		fdrop(fp, td);
266 		return(error);
267 
268 	case F_SETLKW:
269 		flg |= F_WAIT;
270 		/* Fall into F_SETLK */
271 
272 	case F_SETLK:
273 		if (fp->f_type != DTYPE_VNODE)
274 			return (EBADF);
275 		vp = (struct vnode *)fp->f_data;
276 
277 		/*
278 		 * copyin/lockop may block
279 		 */
280 		fhold(fp);
281 		/* Copy in the lock structure */
282 		error = copyin((caddr_t)(intptr_t)uap->arg, (caddr_t)&fl,
283 		    sizeof(fl));
284 		if (error) {
285 			fdrop(fp, td);
286 			return (error);
287 		}
288 		if (fl.l_whence == SEEK_CUR)
289 			fl.l_start += fp->f_offset;
290 
291 		switch (fl.l_type) {
292 		case F_RDLCK:
293 			if ((fp->f_flag & FREAD) == 0) {
294 				error = EBADF;
295 				break;
296 			}
297 			p->p_leader->p_flag |= P_ADVLOCK;
298 			error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_SETLK,
299 			    &fl, flg);
300 			break;
301 		case F_WRLCK:
302 			if ((fp->f_flag & FWRITE) == 0) {
303 				error = EBADF;
304 				break;
305 			}
306 			p->p_leader->p_flag |= P_ADVLOCK;
307 			error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_SETLK,
308 			    &fl, flg);
309 			break;
310 		case F_UNLCK:
311 			error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_UNLCK,
312 				&fl, F_POSIX);
313 			break;
314 		default:
315 			error = EINVAL;
316 			break;
317 		}
318 		/* Check for race with close */
319 		if ((unsigned) uap->fd >= fdp->fd_nfiles ||
320 		    fp != fdp->fd_ofiles[uap->fd]) {
321 			fl.l_whence = SEEK_SET;
322 			fl.l_start = 0;
323 			fl.l_len = 0;
324 			fl.l_type = F_UNLCK;
325 			(void) VOP_ADVLOCK(vp, (caddr_t)p->p_leader,
326 					   F_UNLCK, &fl, F_POSIX);
327 		}
328 		fdrop(fp, td);
329 		return(error);
330 
331 	case F_GETLK:
332 		if (fp->f_type != DTYPE_VNODE)
333 			return (EBADF);
334 		vp = (struct vnode *)fp->f_data;
335 		/*
336 		 * copyin/lockop may block
337 		 */
338 		fhold(fp);
339 		/* Copy in the lock structure */
340 		error = copyin((caddr_t)(intptr_t)uap->arg, (caddr_t)&fl,
341 		    sizeof(fl));
342 		if (error) {
343 			fdrop(fp, td);
344 			return (error);
345 		}
346 		if (fl.l_type != F_RDLCK && fl.l_type != F_WRLCK &&
347 		    fl.l_type != F_UNLCK) {
348 			fdrop(fp, td);
349 			return (EINVAL);
350 		}
351 		if (fl.l_whence == SEEK_CUR)
352 			fl.l_start += fp->f_offset;
353 		error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_GETLK,
354 			    &fl, F_POSIX);
355 		fdrop(fp, td);
356 		if (error == 0) {
357 			error = copyout((caddr_t)&fl,
358 				    (caddr_t)(intptr_t)uap->arg, sizeof(fl));
359 		}
360 		return(error);
361 	default:
362 		return (EINVAL);
363 	}
364 	/* NOTREACHED */
365 }
366 
367 /*
368  * Common code for dup, dup2, and fcntl(F_DUPFD).
369  */
370 static int
371 do_dup(fdp, old, new, retval, p)
372 	struct filedesc *fdp;
373 	int old, new;
374 	register_t *retval;
375 	struct proc *p;
376 {
377 	struct thread *td = p->p_thread;
378 	struct file *fp;
379 	struct file *delfp;
380 	int holdleaders;
381 
382 	/*
383 	 * Save info on the descriptor being overwritten.  We have
384 	 * to do the unmap now, but we cannot close it without
385 	 * introducing an ownership race for the slot.
386 	 */
387 	delfp = fdp->fd_ofiles[new];
388 	if (delfp != NULL && p->p_fdtol != NULL) {
389 		/*
390 		 * Ask fdfree() to sleep to ensure that all relevant
391 		 * process leaders can be traversed in closef().
392 		 */
393 		fdp->fd_holdleaderscount++;
394 		holdleaders = 1;
395 	} else
396 		holdleaders = 0;
397 #if 0
398 	if (delfp && (fdp->fd_ofileflags[new] & UF_MAPPED))
399 		(void) munmapfd(p, new);
400 #endif
401 
402 	/*
403 	 * Duplicate the source descriptor, update lastfile
404 	 */
405 	fp = fdp->fd_ofiles[old];
406 	fdp->fd_ofiles[new] = fp;
407 	fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] &~ UF_EXCLOSE;
408 	fhold(fp);
409 	if (new > fdp->fd_lastfile)
410 		fdp->fd_lastfile = new;
411 	*retval = new;
412 
413 	/*
414 	 * If we dup'd over a valid file, we now own the reference to it
415 	 * and must dispose of it using closef() semantics (as if a
416 	 * close() were performed on it).
417 	 */
418 	if (delfp) {
419 		(void) closef(delfp, td);
420 		if (holdleaders) {
421 			fdp->fd_holdleaderscount--;
422 			if (fdp->fd_holdleaderscount == 0 &&
423 			    fdp->fd_holdleaderswakeup != 0) {
424 				fdp->fd_holdleaderswakeup = 0;
425 				wakeup(&fdp->fd_holdleaderscount);
426 			}
427 		}
428 	}
429 	return (0);
430 }
431 
432 /*
433  * If sigio is on the list associated with a process or process group,
434  * disable signalling from the device, remove sigio from the list and
435  * free sigio.
436  */
437 void
438 funsetown(sigio)
439 	struct sigio *sigio;
440 {
441 	int s;
442 
443 	if (sigio == NULL)
444 		return;
445 	s = splhigh();
446 	*(sigio->sio_myref) = NULL;
447 	splx(s);
448 	if (sigio->sio_pgid < 0) {
449 		SLIST_REMOVE(&sigio->sio_pgrp->pg_sigiolst, sigio,
450 			     sigio, sio_pgsigio);
451 	} else /* if ((*sigiop)->sio_pgid > 0) */ {
452 		SLIST_REMOVE(&sigio->sio_proc->p_sigiolst, sigio,
453 			     sigio, sio_pgsigio);
454 	}
455 	crfree(sigio->sio_ucred);
456 	FREE(sigio, M_SIGIO);
457 }
458 
459 /* Free a list of sigio structures. */
460 void
461 funsetownlst(sigiolst)
462 	struct sigiolst *sigiolst;
463 {
464 	struct sigio *sigio;
465 
466 	while ((sigio = SLIST_FIRST(sigiolst)) != NULL)
467 		funsetown(sigio);
468 }
469 
470 /*
471  * This is common code for FIOSETOWN ioctl called by fcntl(fd, F_SETOWN, arg).
472  *
473  * After permission checking, add a sigio structure to the sigio list for
474  * the process or process group.
475  */
476 int
477 fsetown(pgid, sigiop)
478 	pid_t pgid;
479 	struct sigio **sigiop;
480 {
481 	struct proc *proc;
482 	struct pgrp *pgrp;
483 	struct sigio *sigio;
484 	int s;
485 
486 	if (pgid == 0) {
487 		funsetown(*sigiop);
488 		return (0);
489 	}
490 	if (pgid > 0) {
491 		proc = pfind(pgid);
492 		if (proc == NULL)
493 			return (ESRCH);
494 
495 		/*
496 		 * Policy - Don't allow a process to FSETOWN a process
497 		 * in another session.
498 		 *
499 		 * Remove this test to allow maximum flexibility or
500 		 * restrict FSETOWN to the current process or process
501 		 * group for maximum safety.
502 		 */
503 		if (proc->p_session != curproc->p_session)
504 			return (EPERM);
505 
506 		pgrp = NULL;
507 	} else /* if (pgid < 0) */ {
508 		pgrp = pgfind(-pgid);
509 		if (pgrp == NULL)
510 			return (ESRCH);
511 
512 		/*
513 		 * Policy - Don't allow a process to FSETOWN a process
514 		 * in another session.
515 		 *
516 		 * Remove this test to allow maximum flexibility or
517 		 * restrict FSETOWN to the current process or process
518 		 * group for maximum safety.
519 		 */
520 		if (pgrp->pg_session != curproc->p_session)
521 			return (EPERM);
522 
523 		proc = NULL;
524 	}
525 	funsetown(*sigiop);
526 	MALLOC(sigio, struct sigio *, sizeof(struct sigio), M_SIGIO, M_WAITOK);
527 	if (pgid > 0) {
528 		SLIST_INSERT_HEAD(&proc->p_sigiolst, sigio, sio_pgsigio);
529 		sigio->sio_proc = proc;
530 	} else {
531 		SLIST_INSERT_HEAD(&pgrp->pg_sigiolst, sigio, sio_pgsigio);
532 		sigio->sio_pgrp = pgrp;
533 	}
534 	sigio->sio_pgid = pgid;
535 	sigio->sio_ucred = crhold(curproc->p_ucred);
536 	/* It would be convenient if p_ruid was in ucred. */
537 	sigio->sio_ruid = curproc->p_ucred->cr_ruid;
538 	sigio->sio_myref = sigiop;
539 	s = splhigh();
540 	*sigiop = sigio;
541 	splx(s);
542 	return (0);
543 }
544 
545 /*
546  * This is common code for FIOGETOWN ioctl called by fcntl(fd, F_GETOWN, arg).
547  */
548 pid_t
549 fgetown(sigio)
550 	struct sigio *sigio;
551 {
552 	return (sigio != NULL ? sigio->sio_pgid : 0);
553 }
554 
555 /*
556  * Close a file descriptor.
557  */
558 /* ARGSUSED */
559 int
560 close(struct close_args *uap)
561 {
562 	struct thread *td = curthread;
563 	struct proc *p = td->td_proc;
564 	struct filedesc *fdp;
565 	struct file *fp;
566 	int fd = uap->fd;
567 	int error;
568 	int holdleaders;
569 
570 	KKASSERT(p);
571 	fdp = p->p_fd;
572 
573 	if ((unsigned)fd >= fdp->fd_nfiles ||
574 	    (fp = fdp->fd_ofiles[fd]) == NULL)
575 		return (EBADF);
576 #if 0
577 	if (fdp->fd_ofileflags[fd] & UF_MAPPED)
578 		(void) munmapfd(p, fd);
579 #endif
580 	fdp->fd_ofiles[fd] = NULL;
581 	fdp->fd_ofileflags[fd] = 0;
582 	holdleaders = 0;
583 	if (p->p_fdtol != NULL) {
584 		/*
585 		 * Ask fdfree() to sleep to ensure that all relevant
586 		 * process leaders can be traversed in closef().
587 		 */
588 		fdp->fd_holdleaderscount++;
589 		holdleaders = 1;
590 	}
591 
592 	/*
593 	 * we now hold the fp reference that used to be owned by the descriptor
594 	 * array.
595 	 */
596 	while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
597 		fdp->fd_lastfile--;
598 	if (fd < fdp->fd_freefile)
599 		fdp->fd_freefile = fd;
600 	if (fd < fdp->fd_knlistsize)
601 		knote_fdclose(p, fd);
602 	error = closef(fp, td);
603 	if (holdleaders) {
604 		fdp->fd_holdleaderscount--;
605 		if (fdp->fd_holdleaderscount == 0 &&
606 		    fdp->fd_holdleaderswakeup != 0) {
607 			fdp->fd_holdleaderswakeup = 0;
608 			wakeup(&fdp->fd_holdleaderscount);
609 		}
610 	}
611 	return (error);
612 }
613 
614 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
615 /*
616  * Return status information about a file descriptor.
617  */
618 /* ARGSUSED */
619 int
620 ofstat(struct ofstat_args *uap)
621 {
622 	struct thread *td = curthread;
623 	struct proc *p = td->td_proc;
624 	struct filedesc *fdp;
625 	struct file *fp;
626 	struct stat ub;
627 	struct ostat oub;
628 	int error;
629 
630 	KKASSERT(p);
631 	fdp = p->p_fd;
632 	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
633 	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
634 		return (EBADF);
635 	fhold(fp);
636 	error = fo_stat(fp, &ub, td);
637 	if (error == 0) {
638 		cvtstat(&ub, &oub);
639 		error = copyout((caddr_t)&oub, (caddr_t)uap->sb, sizeof (oub));
640 	}
641 	fdrop(fp, td);
642 	return (error);
643 }
644 #endif /* COMPAT_43 || COMPAT_SUNOS */
645 
646 /*
647  * Return status information about a file descriptor.
648  */
649 /* ARGSUSED */
650 int
651 fstat(struct fstat_args *uap)
652 {
653 	struct thread *td = curthread;
654 	struct proc *p = td->td_proc;
655 	struct filedesc *fdp = p->p_fd;
656 	struct file *fp;
657 	struct stat ub;
658 	int error;
659 
660 	KKASSERT(p);
661 	fdp = p->p_fd;
662 	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
663 	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
664 		return (EBADF);
665 	fhold(fp);
666 	error = fo_stat(fp, &ub, td);
667 	if (error == 0)
668 		error = copyout((caddr_t)&ub, (caddr_t)uap->sb, sizeof (ub));
669 	fdrop(fp, td);
670 	return (error);
671 }
672 
673 /*
674  * Return status information about a file descriptor.
675  */
676 /* ARGSUSED */
677 int
678 nfstat(struct nfstat_args *uap)
679 {
680 	struct thread *td = curthread;
681 	struct proc *p = td->td_proc;
682 	struct filedesc *fdp;
683 	struct file *fp;
684 	struct stat ub;
685 	struct nstat nub;
686 	int error;
687 
688 	KKASSERT(p);
689 	fdp = p->p_fd;
690 	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
691 	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
692 		return (EBADF);
693 	fhold(fp);
694 	error = fo_stat(fp, &ub, td);
695 	if (error == 0) {
696 		cvtnstat(&ub, &nub);
697 		error = copyout((caddr_t)&nub, (caddr_t)uap->sb, sizeof (nub));
698 	}
699 	fdrop(fp, td);
700 	return (error);
701 }
702 
703 /*
704  * Return pathconf information about a file descriptor.
705  */
706 /* ARGSUSED */
707 int
708 fpathconf(struct fpathconf_args *uap)
709 {
710 	struct thread *td = curthread;
711 	struct proc *p = td->td_proc;
712 	struct filedesc *fdp;
713 	struct file *fp;
714 	struct vnode *vp;
715 	int error = 0;
716 
717 	KKASSERT(p);
718 	fdp = p->p_fd;
719 	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
720 	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
721 		return (EBADF);
722 
723 	fhold(fp);
724 
725 	switch (fp->f_type) {
726 	case DTYPE_PIPE:
727 	case DTYPE_SOCKET:
728 		if (uap->name != _PC_PIPE_BUF) {
729 			error = EINVAL;
730 		} else {
731 			uap->sysmsg_result = PIPE_BUF;
732 			error = 0;
733 		}
734 		break;
735 	case DTYPE_FIFO:
736 	case DTYPE_VNODE:
737 		vp = (struct vnode *)fp->f_data;
738 		error = VOP_PATHCONF(vp, uap->name, uap->sysmsg_fds);
739 		break;
740 	default:
741 		error = EOPNOTSUPP;
742 		break;
743 	}
744 	fdrop(fp, td);
745 	return(error);
746 }
747 
748 /*
749  * Allocate a file descriptor for the process.
750  */
751 static int fdexpand;
752 SYSCTL_INT(_debug, OID_AUTO, fdexpand, CTLFLAG_RD, &fdexpand, 0, "");
753 
754 int
755 fdalloc(p, want, result)
756 	struct proc *p;
757 	int want;
758 	int *result;
759 {
760 	struct filedesc *fdp = p->p_fd;
761 	int i;
762 	int lim, last, nfiles;
763 	struct file **newofile;
764 	char *newofileflags;
765 
766 	/*
767 	 * Search for a free descriptor starting at the higher
768 	 * of want or fd_freefile.  If that fails, consider
769 	 * expanding the ofile array.
770 	 */
771 	lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
772 	for (;;) {
773 		last = min(fdp->fd_nfiles, lim);
774 		if ((i = want) < fdp->fd_freefile)
775 			i = fdp->fd_freefile;
776 		for (; i < last; i++) {
777 			if (fdp->fd_ofiles[i] == NULL) {
778 				fdp->fd_ofileflags[i] = 0;
779 				if (i > fdp->fd_lastfile)
780 					fdp->fd_lastfile = i;
781 				if (want <= fdp->fd_freefile)
782 					fdp->fd_freefile = i;
783 				*result = i;
784 				return (0);
785 			}
786 		}
787 
788 		/*
789 		 * No space in current array.  Expand?
790 		 */
791 		if (fdp->fd_nfiles >= lim)
792 			return (EMFILE);
793 		if (fdp->fd_nfiles < NDEXTENT)
794 			nfiles = NDEXTENT;
795 		else
796 			nfiles = 2 * fdp->fd_nfiles;
797 		MALLOC(newofile, struct file **, nfiles * OFILESIZE,
798 		    M_FILEDESC, M_WAITOK);
799 
800 		/*
801 		 * deal with file-table extend race that might have occured
802 		 * when malloc was blocked.
803 		 */
804 		if (fdp->fd_nfiles >= nfiles) {
805 			FREE(newofile, M_FILEDESC);
806 			continue;
807 		}
808 		newofileflags = (char *) &newofile[nfiles];
809 		/*
810 		 * Copy the existing ofile and ofileflags arrays
811 		 * and zero the new portion of each array.
812 		 */
813 		bcopy(fdp->fd_ofiles, newofile,
814 			(i = sizeof(struct file *) * fdp->fd_nfiles));
815 		bzero((char *)newofile + i, nfiles * sizeof(struct file *) - i);
816 		bcopy(fdp->fd_ofileflags, newofileflags,
817 			(i = sizeof(char) * fdp->fd_nfiles));
818 		bzero(newofileflags + i, nfiles * sizeof(char) - i);
819 		if (fdp->fd_nfiles > NDFILE)
820 			FREE(fdp->fd_ofiles, M_FILEDESC);
821 		fdp->fd_ofiles = newofile;
822 		fdp->fd_ofileflags = newofileflags;
823 		fdp->fd_nfiles = nfiles;
824 		fdexpand++;
825 	}
826 	return (0);
827 }
828 
829 /*
830  * Check to see whether n user file descriptors
831  * are available to the process p.
832  */
833 int
834 fdavail(p, n)
835 	struct proc *p;
836 	int n;
837 {
838 	struct filedesc *fdp = p->p_fd;
839 	struct file **fpp;
840 	int i, lim, last;
841 
842 	lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
843 	if ((i = lim - fdp->fd_nfiles) > 0 && (n -= i) <= 0)
844 		return (1);
845 
846 	last = min(fdp->fd_nfiles, lim);
847 	fpp = &fdp->fd_ofiles[fdp->fd_freefile];
848 	for (i = last - fdp->fd_freefile; --i >= 0; fpp++) {
849 		if (*fpp == NULL && --n <= 0)
850 			return (1);
851 	}
852 	return (0);
853 }
854 
855 /*
856  * Create a new open file structure and allocate
857  * a file decriptor for the process that refers to it.
858  */
859 int
860 falloc(p, resultfp, resultfd)
861 	struct proc *p;
862 	struct file **resultfp;
863 	int *resultfd;
864 {
865 	struct file *fp, *fq;
866 	int error, i;
867 
868 	if (nfiles >= maxfiles) {
869 		tablefull("file");
870 		return (ENFILE);
871 	}
872 	/*
873 	 * Allocate a new file descriptor.
874 	 * If the process has file descriptor zero open, add to the list
875 	 * of open files at that point, otherwise put it at the front of
876 	 * the list of open files.
877 	 */
878 	nfiles++;
879 	MALLOC(fp, struct file *, sizeof(struct file), M_FILE, M_WAITOK);
880 	bzero(fp, sizeof(struct file));
881 
882 	/*
883 	 * wait until after malloc (which may have blocked) returns before
884 	 * allocating the slot, else a race might have shrunk it if we had
885 	 * allocated it before the malloc.
886 	 */
887 	if ((error = fdalloc(p, 0, &i))) {
888 		nfiles--;
889 		FREE(fp, M_FILE);
890 		return (error);
891 	}
892 	fp->f_count = 1;
893 	fp->f_cred = crhold(p->p_ucred);
894 	fp->f_ops = &badfileops;
895 	fp->f_seqcount = 1;
896 	if ((fq = p->p_fd->fd_ofiles[0])) {
897 		LIST_INSERT_AFTER(fq, fp, f_list);
898 	} else {
899 		LIST_INSERT_HEAD(&filehead, fp, f_list);
900 	}
901 	p->p_fd->fd_ofiles[i] = fp;
902 	if (resultfp)
903 		*resultfp = fp;
904 	if (resultfd)
905 		*resultfd = i;
906 	return (0);
907 }
908 
909 /*
910  * Free a file descriptor.
911  */
912 void
913 ffree(fp)
914 	struct file *fp;
915 {
916 	KASSERT((fp->f_count == 0), ("ffree: fp_fcount not 0!"));
917 	LIST_REMOVE(fp, f_list);
918 	crfree(fp->f_cred);
919 	nfiles--;
920 	FREE(fp, M_FILE);
921 }
922 
923 /*
924  * Build a new filedesc structure.
925  */
926 struct filedesc *
927 fdinit(p)
928 	struct proc *p;
929 {
930 	struct filedesc0 *newfdp;
931 	struct filedesc *fdp = p->p_fd;
932 
933 	MALLOC(newfdp, struct filedesc0 *, sizeof(struct filedesc0),
934 	    M_FILEDESC, M_WAITOK);
935 	bzero(newfdp, sizeof(struct filedesc0));
936 	newfdp->fd_fd.fd_cdir = fdp->fd_cdir;
937 	if (newfdp->fd_fd.fd_cdir)
938 		VREF(newfdp->fd_fd.fd_cdir);
939 	newfdp->fd_fd.fd_rdir = fdp->fd_rdir;
940 	if (newfdp->fd_fd.fd_rdir)
941 		VREF(newfdp->fd_fd.fd_rdir);
942 	newfdp->fd_fd.fd_jdir = fdp->fd_jdir;
943 	if (newfdp->fd_fd.fd_jdir)
944 		VREF(newfdp->fd_fd.fd_jdir);
945 
946 	/* Create the file descriptor table. */
947 	newfdp->fd_fd.fd_refcnt = 1;
948 	newfdp->fd_fd.fd_cmask = cmask;
949 	newfdp->fd_fd.fd_ofiles = newfdp->fd_dfiles;
950 	newfdp->fd_fd.fd_ofileflags = newfdp->fd_dfileflags;
951 	newfdp->fd_fd.fd_nfiles = NDFILE;
952 	newfdp->fd_fd.fd_knlistsize = -1;
953 
954 	return (&newfdp->fd_fd);
955 }
956 
957 /*
958  * Share a filedesc structure.
959  */
960 struct filedesc *
961 fdshare(p)
962 	struct proc *p;
963 {
964 	p->p_fd->fd_refcnt++;
965 	return (p->p_fd);
966 }
967 
968 /*
969  * Copy a filedesc structure.
970  */
971 struct filedesc *
972 fdcopy(p)
973 	struct proc *p;
974 {
975 	struct filedesc *newfdp, *fdp = p->p_fd;
976 	struct file **fpp;
977 	int i;
978 
979 	/* Certain daemons might not have file descriptors. */
980 	if (fdp == NULL)
981 		return (NULL);
982 
983 	MALLOC(newfdp, struct filedesc *, sizeof(struct filedesc0),
984 	    M_FILEDESC, M_WAITOK);
985 	bcopy(fdp, newfdp, sizeof(struct filedesc));
986 	if (newfdp->fd_cdir)
987 		VREF(newfdp->fd_cdir);
988 	if (newfdp->fd_rdir)
989 		VREF(newfdp->fd_rdir);
990 	if (newfdp->fd_jdir)
991 		VREF(newfdp->fd_jdir);
992 	newfdp->fd_refcnt = 1;
993 
994 	/*
995 	 * If the number of open files fits in the internal arrays
996 	 * of the open file structure, use them, otherwise allocate
997 	 * additional memory for the number of descriptors currently
998 	 * in use.
999 	 */
1000 	if (newfdp->fd_lastfile < NDFILE) {
1001 		newfdp->fd_ofiles = ((struct filedesc0 *) newfdp)->fd_dfiles;
1002 		newfdp->fd_ofileflags =
1003 		    ((struct filedesc0 *) newfdp)->fd_dfileflags;
1004 		i = NDFILE;
1005 	} else {
1006 		/*
1007 		 * Compute the smallest multiple of NDEXTENT needed
1008 		 * for the file descriptors currently in use,
1009 		 * allowing the table to shrink.
1010 		 */
1011 		i = newfdp->fd_nfiles;
1012 		while (i > 2 * NDEXTENT && i > newfdp->fd_lastfile * 2)
1013 			i /= 2;
1014 		MALLOC(newfdp->fd_ofiles, struct file **, i * OFILESIZE,
1015 		    M_FILEDESC, M_WAITOK);
1016 		newfdp->fd_ofileflags = (char *) &newfdp->fd_ofiles[i];
1017 	}
1018 	newfdp->fd_nfiles = i;
1019 	bcopy(fdp->fd_ofiles, newfdp->fd_ofiles, i * sizeof(struct file **));
1020 	bcopy(fdp->fd_ofileflags, newfdp->fd_ofileflags, i * sizeof(char));
1021 
1022 	/*
1023 	 * kq descriptors cannot be copied.
1024 	 */
1025 	if (newfdp->fd_knlistsize != -1) {
1026 		fpp = &newfdp->fd_ofiles[newfdp->fd_lastfile];
1027 		for (i = newfdp->fd_lastfile; i >= 0; i--, fpp--) {
1028 			if (*fpp != NULL && (*fpp)->f_type == DTYPE_KQUEUE) {
1029 				*fpp = NULL;
1030 				if (i < newfdp->fd_freefile)
1031 					newfdp->fd_freefile = i;
1032 			}
1033 			if (*fpp == NULL && i == newfdp->fd_lastfile && i > 0)
1034 				newfdp->fd_lastfile--;
1035 		}
1036 		newfdp->fd_knlist = NULL;
1037 		newfdp->fd_knlistsize = -1;
1038 		newfdp->fd_knhash = NULL;
1039 		newfdp->fd_knhashmask = 0;
1040 	}
1041 
1042 	fpp = newfdp->fd_ofiles;
1043 	for (i = newfdp->fd_lastfile; i-- >= 0; fpp++) {
1044 		if (*fpp != NULL)
1045 			fhold(*fpp);
1046 	}
1047 	return (newfdp);
1048 }
1049 
1050 /*
1051  * Release a filedesc structure.
1052  */
1053 void
1054 fdfree(struct proc *p)
1055 {
1056 	struct thread *td = p->p_thread;
1057 	struct filedesc *fdp = p->p_fd;
1058 	struct file **fpp;
1059 	int i;
1060 	struct filedesc_to_leader *fdtol;
1061 	struct file *fp;
1062 	struct vnode *vp;
1063 	struct flock lf;
1064 
1065 	/* Certain daemons might not have file descriptors. */
1066 	if (fdp == NULL)
1067 		return;
1068 
1069 	/* Check for special need to clear POSIX style locks */
1070 	fdtol = p->p_fdtol;
1071 	if (fdtol != NULL) {
1072 		KASSERT(fdtol->fdl_refcount > 0,
1073 			("filedesc_to_refcount botch: fdl_refcount=%d",
1074 			 fdtol->fdl_refcount));
1075 		if (fdtol->fdl_refcount == 1 &&
1076 		    (p->p_leader->p_flag & P_ADVLOCK) != 0) {
1077 			i = 0;
1078 			fpp = fdp->fd_ofiles;
1079 			for (i = 0, fpp = fdp->fd_ofiles;
1080 			     i < fdp->fd_lastfile;
1081 			     i++, fpp++) {
1082 				if (*fpp == NULL ||
1083 				    (*fpp)->f_type != DTYPE_VNODE)
1084 					continue;
1085 				fp = *fpp;
1086 				fhold(fp);
1087 				lf.l_whence = SEEK_SET;
1088 				lf.l_start = 0;
1089 				lf.l_len = 0;
1090 				lf.l_type = F_UNLCK;
1091 				vp = (struct vnode *)fp->f_data;
1092 				(void) VOP_ADVLOCK(vp,
1093 						   (caddr_t)p->p_leader,
1094 						   F_UNLCK,
1095 						   &lf,
1096 						   F_POSIX);
1097 				fdrop(fp, p->p_thread);
1098 				fpp = fdp->fd_ofiles + i;
1099 			}
1100 		}
1101 	retry:
1102 		if (fdtol->fdl_refcount == 1) {
1103 			if (fdp->fd_holdleaderscount > 0 &&
1104 			    (p->p_leader->p_flag & P_ADVLOCK) != 0) {
1105 				/*
1106 				 * close() or do_dup() has cleared a reference
1107 				 * in a shared file descriptor table.
1108 				 */
1109 				fdp->fd_holdleaderswakeup = 1;
1110 				tsleep(&fdp->fd_holdleaderscount,
1111 				       0, "fdlhold", 0);
1112 				goto retry;
1113 			}
1114 			if (fdtol->fdl_holdcount > 0) {
1115 				/*
1116 				 * Ensure that fdtol->fdl_leader
1117 				 * remains valid in closef().
1118 				 */
1119 				fdtol->fdl_wakeup = 1;
1120 				tsleep(fdtol, 0, "fdlhold", 0);
1121 				goto retry;
1122 			}
1123 		}
1124 		fdtol->fdl_refcount--;
1125 		if (fdtol->fdl_refcount == 0 &&
1126 		    fdtol->fdl_holdcount == 0) {
1127 			fdtol->fdl_next->fdl_prev = fdtol->fdl_prev;
1128 			fdtol->fdl_prev->fdl_next = fdtol->fdl_next;
1129 		} else
1130 			fdtol = NULL;
1131 		p->p_fdtol = NULL;
1132 		if (fdtol != NULL)
1133 			FREE(fdtol, M_FILEDESC_TO_LEADER);
1134 	}
1135 	if (--fdp->fd_refcnt > 0)
1136 		return;
1137 	/*
1138 	 * we are the last reference to the structure, we can
1139 	 * safely assume it will not change out from under us.
1140 	 */
1141 	fpp = fdp->fd_ofiles;
1142 	for (i = fdp->fd_lastfile; i-- >= 0; fpp++) {
1143 		if (*fpp)
1144 			(void) closef(*fpp, td);
1145 	}
1146 	if (fdp->fd_nfiles > NDFILE)
1147 		FREE(fdp->fd_ofiles, M_FILEDESC);
1148 	if (fdp->fd_cdir)
1149 		vrele(fdp->fd_cdir);
1150 	if (fdp->fd_rdir)
1151 		vrele(fdp->fd_rdir);
1152 	if (fdp->fd_jdir)
1153 		vrele(fdp->fd_jdir);
1154 	if (fdp->fd_knlist)
1155 		FREE(fdp->fd_knlist, M_KQUEUE);
1156 	if (fdp->fd_knhash)
1157 		FREE(fdp->fd_knhash, M_KQUEUE);
1158 	FREE(fdp, M_FILEDESC);
1159 }
1160 
1161 /*
1162  * For setugid programs, we don't want to people to use that setugidness
1163  * to generate error messages which write to a file which otherwise would
1164  * otherwise be off-limits to the process.
1165  *
1166  * This is a gross hack to plug the hole.  A better solution would involve
1167  * a special vop or other form of generalized access control mechanism.  We
1168  * go ahead and just reject all procfs file systems accesses as dangerous.
1169  *
1170  * Since setugidsafety calls this only for fd 0, 1 and 2, this check is
1171  * sufficient.  We also don't for check setugidness since we know we are.
1172  */
1173 static int
1174 is_unsafe(struct file *fp)
1175 {
1176 	if (fp->f_type == DTYPE_VNODE &&
1177 	    ((struct vnode *)(fp->f_data))->v_tag == VT_PROCFS)
1178 		return (1);
1179 	return (0);
1180 }
1181 
1182 /*
1183  * Make this setguid thing safe, if at all possible.
1184  */
1185 void
1186 setugidsafety(struct proc *p)
1187 {
1188 	struct thread *td = p->p_thread;
1189 	struct filedesc *fdp = p->p_fd;
1190 	int i;
1191 
1192 	/* Certain daemons might not have file descriptors. */
1193 	if (fdp == NULL)
1194 		return;
1195 
1196 	/*
1197 	 * note: fdp->fd_ofiles may be reallocated out from under us while
1198 	 * we are blocked in a close.  Be careful!
1199 	 */
1200 	for (i = 0; i <= fdp->fd_lastfile; i++) {
1201 		if (i > 2)
1202 			break;
1203 		if (fdp->fd_ofiles[i] && is_unsafe(fdp->fd_ofiles[i])) {
1204 			struct file *fp;
1205 
1206 #if 0
1207 			if ((fdp->fd_ofileflags[i] & UF_MAPPED) != 0)
1208 				(void) munmapfd(p, i);
1209 #endif
1210 			if (i < fdp->fd_knlistsize)
1211 				knote_fdclose(p, i);
1212 			/*
1213 			 * NULL-out descriptor prior to close to avoid
1214 			 * a race while close blocks.
1215 			 */
1216 			fp = fdp->fd_ofiles[i];
1217 			fdp->fd_ofiles[i] = NULL;
1218 			fdp->fd_ofileflags[i] = 0;
1219 			if (i < fdp->fd_freefile)
1220 				fdp->fd_freefile = i;
1221 			(void) closef(fp, td);
1222 		}
1223 	}
1224 	while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
1225 		fdp->fd_lastfile--;
1226 }
1227 
1228 /*
1229  * Close any files on exec?
1230  */
1231 void
1232 fdcloseexec(struct proc *p)
1233 {
1234 	struct thread *td = p->p_thread;
1235 	struct filedesc *fdp = p->p_fd;
1236 	int i;
1237 
1238 	/* Certain daemons might not have file descriptors. */
1239 	if (fdp == NULL)
1240 		return;
1241 
1242 	/*
1243 	 * We cannot cache fd_ofiles or fd_ofileflags since operations
1244 	 * may block and rip them out from under us.
1245 	 */
1246 	for (i = 0; i <= fdp->fd_lastfile; i++) {
1247 		if (fdp->fd_ofiles[i] != NULL &&
1248 		    (fdp->fd_ofileflags[i] & UF_EXCLOSE)) {
1249 			struct file *fp;
1250 
1251 #if 0
1252 			if (fdp->fd_ofileflags[i] & UF_MAPPED)
1253 				(void) munmapfd(p, i);
1254 #endif
1255 			if (i < fdp->fd_knlistsize)
1256 				knote_fdclose(p, i);
1257 			/*
1258 			 * NULL-out descriptor prior to close to avoid
1259 			 * a race while close blocks.
1260 			 */
1261 			fp = fdp->fd_ofiles[i];
1262 			fdp->fd_ofiles[i] = NULL;
1263 			fdp->fd_ofileflags[i] = 0;
1264 			if (i < fdp->fd_freefile)
1265 				fdp->fd_freefile = i;
1266 			(void) closef(fp, td);
1267 		}
1268 	}
1269 	while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
1270 		fdp->fd_lastfile--;
1271 }
1272 
1273 /*
1274  * It is unsafe for set[ug]id processes to be started with file
1275  * descriptors 0..2 closed, as these descriptors are given implicit
1276  * significance in the Standard C library.  fdcheckstd() will create a
1277  * descriptor referencing /dev/null for each of stdin, stdout, and
1278  * stderr that is not already open.
1279  */
1280 int
1281 fdcheckstd(struct proc *p)
1282 {
1283 	struct thread *td = p->p_thread;
1284 	struct nameidata nd;
1285 	struct filedesc *fdp;
1286 	struct file *fp;
1287 	register_t retval;
1288 	int fd, i, error, flags, devnull;
1289 
1290        fdp = p->p_fd;
1291        if (fdp == NULL)
1292                return (0);
1293        devnull = -1;
1294        error = 0;
1295        for (i = 0; i < 3; i++) {
1296                if (fdp->fd_ofiles[i] != NULL)
1297                        continue;
1298                if (devnull < 0) {
1299                        error = falloc(p, &fp, &fd);
1300                        if (error != 0)
1301                                break;
1302                        NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE,
1303 			   "/dev/null", td);
1304                        flags = FREAD | FWRITE;
1305                        error = vn_open(&nd, flags, 0);
1306                        if (error != 0) {
1307                                fdp->fd_ofiles[i] = NULL;
1308                                fdrop(fp, td);
1309                                break;
1310                        }
1311                        NDFREE(&nd, NDF_ONLY_PNBUF);
1312                        fp->f_data = (caddr_t)nd.ni_vp;
1313                        fp->f_flag = flags;
1314                        fp->f_ops = &vnops;
1315                        fp->f_type = DTYPE_VNODE;
1316                        VOP_UNLOCK(nd.ni_vp, 0, td);
1317                        devnull = fd;
1318                } else {
1319                        error = fdalloc(p, 0, &fd);
1320                        if (error != 0)
1321                                break;
1322                        error = do_dup(fdp, devnull, fd, &retval, p);
1323                        if (error != 0)
1324                                break;
1325                }
1326        }
1327        return (error);
1328 }
1329 
1330 /*
1331  * Internal form of close.
1332  * Decrement reference count on file structure.
1333  * Note: td and/or p may be NULL when closing a file
1334  * that was being passed in a message.
1335  */
1336 int
1337 closef(struct file *fp, struct thread *td)
1338 {
1339 	struct vnode *vp;
1340 	struct flock lf;
1341 	struct filedesc_to_leader *fdtol;
1342 	struct proc *p;
1343 
1344 	if (fp == NULL)
1345 		return (0);
1346 	if (td == NULL) {
1347 		td = curthread;
1348 		p = NULL;		/* allow no proc association */
1349 	} else {
1350 		p = td->td_proc;	/* can also be NULL */
1351 	}
1352 	/*
1353 	 * POSIX record locking dictates that any close releases ALL
1354 	 * locks owned by this process.  This is handled by setting
1355 	 * a flag in the unlock to free ONLY locks obeying POSIX
1356 	 * semantics, and not to free BSD-style file locks.
1357 	 * If the descriptor was in a message, POSIX-style locks
1358 	 * aren't passed with the descriptor.
1359 	 */
1360 	if (p != NULL &&
1361 	    fp->f_type == DTYPE_VNODE) {
1362 		if ((p->p_leader->p_flag & P_ADVLOCK) != 0) {
1363 			lf.l_whence = SEEK_SET;
1364 			lf.l_start = 0;
1365 			lf.l_len = 0;
1366 			lf.l_type = F_UNLCK;
1367 			vp = (struct vnode *)fp->f_data;
1368 			(void) VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_UNLCK,
1369 					   &lf, F_POSIX);
1370 		}
1371 		fdtol = p->p_fdtol;
1372 		if (fdtol != NULL) {
1373 			/*
1374 			 * Handle special case where file descriptor table
1375 			 * is shared between multiple process leaders.
1376 			 */
1377 			for (fdtol = fdtol->fdl_next;
1378 			     fdtol != p->p_fdtol;
1379 			     fdtol = fdtol->fdl_next) {
1380 				if ((fdtol->fdl_leader->p_flag &
1381 				     P_ADVLOCK) == 0)
1382 					continue;
1383 				fdtol->fdl_holdcount++;
1384 				lf.l_whence = SEEK_SET;
1385 				lf.l_start = 0;
1386 				lf.l_len = 0;
1387 				lf.l_type = F_UNLCK;
1388 				vp = (struct vnode *)fp->f_data;
1389 				(void) VOP_ADVLOCK(vp,
1390 						   (caddr_t)p->p_leader,
1391 						   F_UNLCK, &lf, F_POSIX);
1392 				fdtol->fdl_holdcount--;
1393 				if (fdtol->fdl_holdcount == 0 &&
1394 				    fdtol->fdl_wakeup != 0) {
1395 					fdtol->fdl_wakeup = 0;
1396 					wakeup(fdtol);
1397 				}
1398 			}
1399 		}
1400 	}
1401 	return (fdrop(fp, td));
1402 }
1403 
1404 int
1405 fdrop(struct file *fp, struct thread *td)
1406 {
1407 	struct flock lf;
1408 	struct vnode *vp;
1409 	int error;
1410 
1411 	if (--fp->f_count > 0)
1412 		return (0);
1413 	if (fp->f_count < 0)
1414 		panic("fdrop: count < 0");
1415 	if ((fp->f_flag & FHASLOCK) && fp->f_type == DTYPE_VNODE) {
1416 		lf.l_whence = SEEK_SET;
1417 		lf.l_start = 0;
1418 		lf.l_len = 0;
1419 		lf.l_type = F_UNLCK;
1420 		vp = (struct vnode *)fp->f_data;
1421 		(void) VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK);
1422 	}
1423 	if (fp->f_ops != &badfileops)
1424 		error = fo_close(fp, td);
1425 	else
1426 		error = 0;
1427 	ffree(fp);
1428 	return (error);
1429 }
1430 
1431 /*
1432  * Apply an advisory lock on a file descriptor.
1433  *
1434  * Just attempt to get a record lock of the requested type on
1435  * the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0).
1436  */
1437 /* ARGSUSED */
1438 int
1439 flock(struct flock_args *uap)
1440 {
1441 	struct proc *p = curproc;
1442 	struct filedesc *fdp = p->p_fd;
1443 	struct file *fp;
1444 	struct vnode *vp;
1445 	struct flock lf;
1446 
1447 	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
1448 	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
1449 		return (EBADF);
1450 	if (fp->f_type != DTYPE_VNODE)
1451 		return (EOPNOTSUPP);
1452 	vp = (struct vnode *)fp->f_data;
1453 	lf.l_whence = SEEK_SET;
1454 	lf.l_start = 0;
1455 	lf.l_len = 0;
1456 	if (uap->how & LOCK_UN) {
1457 		lf.l_type = F_UNLCK;
1458 		fp->f_flag &= ~FHASLOCK;
1459 		return (VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK));
1460 	}
1461 	if (uap->how & LOCK_EX)
1462 		lf.l_type = F_WRLCK;
1463 	else if (uap->how & LOCK_SH)
1464 		lf.l_type = F_RDLCK;
1465 	else
1466 		return (EBADF);
1467 	fp->f_flag |= FHASLOCK;
1468 	if (uap->how & LOCK_NB)
1469 		return (VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, F_FLOCK));
1470 	return (VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, F_FLOCK|F_WAIT));
1471 }
1472 
1473 /*
1474  * File Descriptor pseudo-device driver (/dev/fd/).
1475  *
1476  * Opening minor device N dup()s the file (if any) connected to file
1477  * descriptor N belonging to the calling process.  Note that this driver
1478  * consists of only the ``open()'' routine, because all subsequent
1479  * references to this file will be direct to the other driver.
1480  */
1481 /* ARGSUSED */
1482 static int
1483 fdopen(dev_t dev, int mode, int type, struct thread *td)
1484 {
1485 	KKASSERT(td->td_proc != NULL);
1486 
1487 	/*
1488 	 * XXX Kludge: set curproc->p_dupfd to contain the value of the
1489 	 * the file descriptor being sought for duplication. The error
1490 	 * return ensures that the vnode for this device will be released
1491 	 * by vn_open. Open will detect this special error and take the
1492 	 * actions in dupfdopen below. Other callers of vn_open or VOP_OPEN
1493 	 * will simply report the error.
1494 	 */
1495 	td->td_proc->p_dupfd = minor(dev);
1496 	return (ENODEV);
1497 }
1498 
1499 /*
1500  * Duplicate the specified descriptor to a free descriptor.
1501  */
1502 int
1503 dupfdopen(struct filedesc *fdp, int indx, int dfd, int mode, int error)
1504 {
1505 	struct file *wfp;
1506 	struct file *fp;
1507 
1508 	/*
1509 	 * If the to-be-dup'd fd number is greater than the allowed number
1510 	 * of file descriptors, or the fd to be dup'd has already been
1511 	 * closed, then reject.
1512 	 */
1513 	if ((u_int)dfd >= fdp->fd_nfiles ||
1514 	    (wfp = fdp->fd_ofiles[dfd]) == NULL) {
1515 		return (EBADF);
1516 	}
1517 
1518 	/*
1519 	 * There are two cases of interest here.
1520 	 *
1521 	 * For ENODEV simply dup (dfd) to file descriptor
1522 	 * (indx) and return.
1523 	 *
1524 	 * For ENXIO steal away the file structure from (dfd) and
1525 	 * store it in (indx).  (dfd) is effectively closed by
1526 	 * this operation.
1527 	 *
1528 	 * Any other error code is just returned.
1529 	 */
1530 	switch (error) {
1531 	case ENODEV:
1532 		/*
1533 		 * Check that the mode the file is being opened for is a
1534 		 * subset of the mode of the existing descriptor.
1535 		 */
1536 		if (((mode & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag)
1537 			return (EACCES);
1538 		fp = fdp->fd_ofiles[indx];
1539 #if 0
1540 		if (fp && fdp->fd_ofileflags[indx] & UF_MAPPED)
1541 			(void) munmapfd(p, indx);
1542 #endif
1543 		fdp->fd_ofiles[indx] = wfp;
1544 		fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
1545 		fhold(wfp);
1546 		if (indx > fdp->fd_lastfile)
1547 			fdp->fd_lastfile = indx;
1548 		/*
1549 		 * we now own the reference to fp that the ofiles[] array
1550 		 * used to own.  Release it.
1551 		 */
1552 		if (fp)
1553 			fdrop(fp, curthread);
1554 		return (0);
1555 
1556 	case ENXIO:
1557 		/*
1558 		 * Steal away the file pointer from dfd, and stuff it into indx.
1559 		 */
1560 		fp = fdp->fd_ofiles[indx];
1561 #if 0
1562 		if (fp && fdp->fd_ofileflags[indx] & UF_MAPPED)
1563 			(void) munmapfd(p, indx);
1564 #endif
1565 		fdp->fd_ofiles[indx] = fdp->fd_ofiles[dfd];
1566 		fdp->fd_ofiles[dfd] = NULL;
1567 		fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
1568 		fdp->fd_ofileflags[dfd] = 0;
1569 
1570 		/*
1571 		 * we now own the reference to fp that the ofiles[] array
1572 		 * used to own.  Release it.
1573 		 */
1574 		if (fp)
1575 			fdrop(fp, curthread);
1576 		/*
1577 		 * Complete the clean up of the filedesc structure by
1578 		 * recomputing the various hints.
1579 		 */
1580 		if (indx > fdp->fd_lastfile) {
1581 			fdp->fd_lastfile = indx;
1582 		} else {
1583 			while (fdp->fd_lastfile > 0 &&
1584 			   fdp->fd_ofiles[fdp->fd_lastfile] == NULL) {
1585 				fdp->fd_lastfile--;
1586 			}
1587 			if (dfd < fdp->fd_freefile)
1588 				fdp->fd_freefile = dfd;
1589 		}
1590 		return (0);
1591 
1592 	default:
1593 		return (error);
1594 	}
1595 	/* NOTREACHED */
1596 }
1597 
1598 
1599 struct filedesc_to_leader *
1600 filedesc_to_leader_alloc(struct filedesc_to_leader *old,
1601 			 struct proc *leader)
1602 {
1603 	struct filedesc_to_leader *fdtol;
1604 
1605 	MALLOC(fdtol, struct filedesc_to_leader *,
1606 	       sizeof(struct filedesc_to_leader),
1607 	       M_FILEDESC_TO_LEADER,
1608 	       M_WAITOK);
1609 	fdtol->fdl_refcount = 1;
1610 	fdtol->fdl_holdcount = 0;
1611 	fdtol->fdl_wakeup = 0;
1612 	fdtol->fdl_leader = leader;
1613 	if (old != NULL) {
1614 		fdtol->fdl_next = old->fdl_next;
1615 		fdtol->fdl_prev = old;
1616 		old->fdl_next = fdtol;
1617 		fdtol->fdl_next->fdl_prev = fdtol;
1618 	} else {
1619 		fdtol->fdl_next = fdtol;
1620 		fdtol->fdl_prev = fdtol;
1621 	}
1622 	return fdtol;
1623 }
1624 
1625 /*
1626  * Get file structures.
1627  */
1628 static int
1629 sysctl_kern_file(SYSCTL_HANDLER_ARGS)
1630 {
1631 	int error;
1632 	struct file *fp;
1633 
1634 	if (!req->oldptr) {
1635 		/*
1636 		 * overestimate by 10 files
1637 		 */
1638 		return (SYSCTL_OUT(req, 0, sizeof(filehead) +
1639 				(nfiles + 10) * sizeof(struct file)));
1640 	}
1641 
1642 	error = SYSCTL_OUT(req, (caddr_t)&filehead, sizeof(filehead));
1643 	if (error)
1644 		return (error);
1645 
1646 	/*
1647 	 * followed by an array of file structures
1648 	 */
1649 	LIST_FOREACH(fp, &filehead, f_list) {
1650 		error = SYSCTL_OUT(req, (caddr_t)fp, sizeof (struct file));
1651 		if (error)
1652 			return (error);
1653 	}
1654 	return (0);
1655 }
1656 
1657 SYSCTL_PROC(_kern, KERN_FILE, file, CTLTYPE_OPAQUE|CTLFLAG_RD,
1658     0, 0, sysctl_kern_file, "S,file", "Entire file table");
1659 
1660 SYSCTL_INT(_kern, KERN_MAXFILESPERPROC, maxfilesperproc, CTLFLAG_RW,
1661     &maxfilesperproc, 0, "Maximum files allowed open per process");
1662 
1663 SYSCTL_INT(_kern, KERN_MAXFILES, maxfiles, CTLFLAG_RW,
1664     &maxfiles, 0, "Maximum number of files");
1665 
1666 SYSCTL_INT(_kern, OID_AUTO, openfiles, CTLFLAG_RD,
1667 	&nfiles, 0, "System-wide number of open files");
1668 
1669 static void
1670 fildesc_drvinit(void *unused)
1671 {
1672 	int fd;
1673 
1674 	for (fd = 0; fd < NUMFDESC; fd++)
1675 		make_dev(&fildesc_cdevsw, fd,
1676 		    UID_BIN, GID_BIN, 0666, "fd/%d", fd);
1677 	make_dev(&fildesc_cdevsw, 0, UID_ROOT, GID_WHEEL, 0666, "stdin");
1678 	make_dev(&fildesc_cdevsw, 1, UID_ROOT, GID_WHEEL, 0666, "stdout");
1679 	make_dev(&fildesc_cdevsw, 2, UID_ROOT, GID_WHEEL, 0666, "stderr");
1680 }
1681 
1682 struct fileops badfileops = {
1683 	NULL,	/* port */
1684 	0,	/* autoq */
1685 	badfo_readwrite,
1686 	badfo_readwrite,
1687 	badfo_ioctl,
1688 	badfo_poll,
1689 	badfo_kqfilter,
1690 	badfo_stat,
1691 	badfo_close
1692 };
1693 
1694 static int
1695 badfo_readwrite(
1696 	struct file *fp,
1697 	struct uio *uio,
1698 	struct ucred *cred,
1699 	int flags,
1700 	struct thread *td
1701 ) {
1702 	return (EBADF);
1703 }
1704 
1705 static int
1706 badfo_ioctl(struct file *fp, u_long com, caddr_t data, struct thread *td)
1707 {
1708 	return (EBADF);
1709 }
1710 
1711 static int
1712 badfo_poll(struct file *fp, int events, struct ucred *cred, struct thread *td)
1713 {
1714 	return (0);
1715 }
1716 
1717 static int
1718 badfo_kqfilter(struct file *fp, struct knote *kn)
1719 {
1720 	return (0);
1721 }
1722 
1723 static int
1724 badfo_stat(struct file *fp, struct stat *sb, struct thread *td)
1725 {
1726 	return (EBADF);
1727 }
1728 
1729 static int
1730 badfo_close(struct file *fp, struct thread *td)
1731 {
1732 	return (EBADF);
1733 }
1734 
1735 SYSINIT(fildescdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE+CDEV_MAJOR,
1736 					fildesc_drvinit,NULL)
1737