xref: /openbsd/sys/kern/sys_generic.c (revision 610f49f8)
1 /*	$OpenBSD: sys_generic.c,v 1.37 2002/02/13 19:08:06 art Exp $	*/
2 /*	$NetBSD: sys_generic.c,v 1.24 1996/03/29 00:25:32 cgd Exp $	*/
3 
4 /*
5  * Copyright (c) 1996 Theo de Raadt
6  * Copyright (c) 1982, 1986, 1989, 1993
7  *	The Regents of the University of California.  All rights reserved.
8  * (c) UNIX System Laboratories, Inc.
9  * All or some portions of this file are derived from material licensed
10  * to the University of California by American Telephone and Telegraph
11  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
12  * the permission of UNIX System Laboratories, Inc.
13  *
14  * Redistribution and use in source and binary forms, with or without
15  * modification, are permitted provided that the following conditions
16  * are met:
17  * 1. Redistributions of source code must retain the above copyright
18  *    notice, this list of conditions and the following disclaimer.
19  * 2. Redistributions in binary form must reproduce the above copyright
20  *    notice, this list of conditions and the following disclaimer in the
21  *    documentation and/or other materials provided with the distribution.
22  * 3. All advertising materials mentioning features or use of this software
23  *    must display the following acknowledgement:
24  *	This product includes software developed by the University of
25  *	California, Berkeley and its contributors.
26  * 4. Neither the name of the University nor the names of its contributors
27  *    may be used to endorse or promote products derived from this software
28  *    without specific prior written permission.
29  *
30  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
31  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
32  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
33  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
34  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
35  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
36  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
37  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
38  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
39  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
40  * SUCH DAMAGE.
41  *
42  *	@(#)sys_generic.c	8.5 (Berkeley) 1/21/94
43  */
44 
45 #include <sys/param.h>
46 #include <sys/systm.h>
47 #include <sys/filedesc.h>
48 #include <sys/ioctl.h>
49 #include <sys/file.h>
50 #include <sys/proc.h>
51 #include <sys/resourcevar.h>
52 #include <sys/socketvar.h>
53 #include <sys/signalvar.h>
54 #include <sys/uio.h>
55 #include <sys/kernel.h>
56 #include <sys/stat.h>
57 #include <sys/malloc.h>
58 #include <sys/poll.h>
59 #ifdef KTRACE
60 #include <sys/ktrace.h>
61 #endif
62 
63 #include <sys/mount.h>
64 #include <sys/syscallargs.h>
65 
66 int selscan __P((struct proc *, fd_set *, fd_set *, int, register_t *));
67 int seltrue __P((dev_t, int, struct proc *));
68 void pollscan __P((struct proc *, struct pollfd *, int, register_t *));
69 
70 /*
71  * Read system call.
72  */
73 /* ARGSUSED */
74 int
75 sys_read(p, v, retval)
76 	struct proc *p;
77 	void *v;
78 	register_t *retval;
79 {
80 	struct sys_read_args /* {
81 		syscallarg(int) fd;
82 		syscallarg(void *) buf;
83 		syscallarg(size_t) nbyte;
84 	} */ *uap = v;
85 	int fd = SCARG(uap, fd);
86 	struct file *fp;
87 	struct filedesc *fdp = p->p_fd;
88 
89 	if ((fp = fd_getfile(fdp, fd)) == NULL)
90 		return (EBADF);
91 	if ((fp->f_flag & FREAD) == 0)
92 		return (EBADF);
93 
94 	FREF(fp);
95 
96 	/* dofileread() will FRELE the descriptor for us */
97 	return (dofileread(p, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
98 	    &fp->f_offset, retval));
99 }
100 
101 int
102 dofileread(p, fd, fp, buf, nbyte, offset, retval)
103 	struct proc *p;
104 	int fd;
105 	struct file *fp;
106 	void *buf;
107 	size_t nbyte;
108 	off_t *offset;
109 	register_t *retval;
110 {
111 	struct uio auio;
112 	struct iovec aiov;
113 	long cnt, error = 0;
114 #ifdef KTRACE
115 	struct iovec ktriov;
116 #endif
117 
118 	aiov.iov_base = (caddr_t)buf;
119 	aiov.iov_len = nbyte;
120 	auio.uio_iov = &aiov;
121 	auio.uio_iovcnt = 1;
122 	auio.uio_resid = nbyte;
123 	auio.uio_rw = UIO_READ;
124 	auio.uio_segflg = UIO_USERSPACE;
125 	auio.uio_procp = p;
126 
127 	/*
128 	 * Reads return ssize_t because -1 is returned on error.  Therefore
129 	 * we must restrict the length to SSIZE_MAX to avoid garbage return
130 	 * values.
131 	 */
132 	if (auio.uio_resid > SSIZE_MAX) {
133 		error = EINVAL;
134 		goto out;
135 	}
136 
137 #ifdef KTRACE
138 	/*
139 	 * if tracing, save a copy of iovec
140 	 */
141 	if (KTRPOINT(p, KTR_GENIO))
142 		ktriov = aiov;
143 #endif
144 	cnt = auio.uio_resid;
145 	error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred);
146 	if (error)
147 		if (auio.uio_resid != cnt && (error == ERESTART ||
148 		    error == EINTR || error == EWOULDBLOCK))
149 			error = 0;
150 	cnt -= auio.uio_resid;
151 #ifdef KTRACE
152 	if (KTRPOINT(p, KTR_GENIO) && error == 0)
153 		ktrgenio(p, fd, UIO_READ, &ktriov, cnt, error);
154 #endif
155 	*retval = cnt;
156  out:
157 	FRELE(fp);
158 	return (error);
159 }
160 
161 /*
162  * Scatter read system call.
163  */
164 int
165 sys_readv(p, v, retval)
166 	struct proc *p;
167 	void *v;
168 	register_t *retval;
169 {
170 	struct sys_readv_args /* {
171 		syscallarg(int) fd;
172 		syscallarg(const struct iovec *) iovp;
173 		syscallarg(int) iovcnt;
174 	} */ *uap = v;
175 	int fd = SCARG(uap, fd);
176 	struct file *fp;
177 	struct filedesc *fdp = p->p_fd;
178 
179 	if ((fp = fd_getfile(fdp, fd)) == NULL)
180 		return (EBADF);
181 	if ((fp->f_flag & FREAD) == 0)
182 		return (EBADF);
183 
184 	FREF(fp);
185 
186 	/* dofilereadv() will FRELE the descriptor for us */
187 	return (dofilereadv(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt),
188 	    &fp->f_offset, retval));
189 }
190 
191 int
192 dofilereadv(p, fd, fp, iovp, iovcnt, offset, retval)
193 	struct proc *p;
194 	int fd;
195 	struct file *fp;
196 	const struct iovec *iovp;
197 	int iovcnt;
198 	off_t *offset;
199 	register_t *retval;
200 {
201 	struct uio auio;
202 	struct iovec *iov;
203 	struct iovec *needfree;
204 	struct iovec aiov[UIO_SMALLIOV];
205 	long i, cnt, error = 0;
206 	u_int iovlen;
207 #ifdef KTRACE
208 	struct iovec *ktriov = NULL;
209 #endif
210 
211 	/* note: can't use iovlen until iovcnt is validated */
212 	iovlen = iovcnt * sizeof(struct iovec);
213 	if ((u_int)iovcnt > UIO_SMALLIOV) {
214 		if ((u_int)iovcnt > IOV_MAX) {
215 			error = EINVAL;
216 			goto out;
217 		}
218 		iov = needfree = malloc(iovlen, M_IOV, M_WAITOK);
219 	} else if ((u_int)iovcnt > 0) {
220 		iov = aiov;
221 		needfree = NULL;
222 	} else {
223 		error = EINVAL;
224 		goto out;
225 	}
226 
227 	auio.uio_iov = iov;
228 	auio.uio_iovcnt = iovcnt;
229 	auio.uio_rw = UIO_READ;
230 	auio.uio_segflg = UIO_USERSPACE;
231 	auio.uio_procp = p;
232 	error = copyin(iovp, iov, iovlen);
233 	if (error)
234 		goto done;
235 	auio.uio_resid = 0;
236 	for (i = 0; i < iovcnt; i++) {
237 		auio.uio_resid += iov->iov_len;
238 		/*
239 		 * Reads return ssize_t because -1 is returned on error.
240 		 * Therefore we must restrict the length to SSIZE_MAX to
241 		 * avoid garbage return values.
242 		 */
243 		if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) {
244 			error = EINVAL;
245 			goto done;
246 		}
247 		iov++;
248 	}
249 #ifdef KTRACE
250 	/*
251 	 * if tracing, save a copy of iovec
252 	 */
253 	if (KTRPOINT(p, KTR_GENIO))  {
254 		ktriov = malloc(iovlen, M_TEMP, M_WAITOK);
255 		bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen);
256 	}
257 #endif
258 	cnt = auio.uio_resid;
259 	error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred);
260 	if (error)
261 		if (auio.uio_resid != cnt && (error == ERESTART ||
262 		    error == EINTR || error == EWOULDBLOCK))
263 			error = 0;
264 	cnt -= auio.uio_resid;
265 #ifdef KTRACE
266 	if (ktriov != NULL) {
267 		if (error == 0)
268 			ktrgenio(p, fd, UIO_READ, ktriov, cnt,
269 			    error);
270 		free(ktriov, M_TEMP);
271 	}
272 #endif
273 	*retval = cnt;
274  done:
275 	if (needfree)
276 		free(needfree, M_IOV);
277  out:
278 	FRELE(fp);
279 	return (error);
280 }
281 
282 /*
283  * Write system call
284  */
285 int
286 sys_write(p, v, retval)
287 	struct proc *p;
288 	void *v;
289 	register_t *retval;
290 {
291 	struct sys_write_args /* {
292 		syscallarg(int) fd;
293 		syscallarg(const void *) buf;
294 		syscallarg(size_t) nbyte;
295 	} */ *uap = v;
296 	int fd = SCARG(uap, fd);
297 	struct file *fp;
298 	struct filedesc *fdp = p->p_fd;
299 
300 	if ((fp = fd_getfile(fdp, fd)) == NULL)
301 		return (EBADF);
302 	if ((fp->f_flag & FWRITE) == 0)
303 		return (EBADF);
304 
305 	FREF(fp);
306 
307 	/* dofilewrite() will FRELE the descriptor for us */
308 	return (dofilewrite(p, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
309 	    &fp->f_offset, retval));
310 }
311 
312 int
313 dofilewrite(p, fd, fp, buf, nbyte, offset, retval)
314 	struct proc *p;
315 	int fd;
316 	struct file *fp;
317 	const void *buf;
318 	size_t nbyte;
319 	off_t *offset;
320 	register_t *retval;
321 {
322 	struct uio auio;
323 	struct iovec aiov;
324 	long cnt, error = 0;
325 #ifdef KTRACE
326 	struct iovec ktriov;
327 #endif
328 
329 	aiov.iov_base = (caddr_t)buf;		/* XXX kills const */
330 	aiov.iov_len = nbyte;
331 	auio.uio_iov = &aiov;
332 	auio.uio_iovcnt = 1;
333 	auio.uio_resid = nbyte;
334 	auio.uio_rw = UIO_WRITE;
335 	auio.uio_segflg = UIO_USERSPACE;
336 	auio.uio_procp = p;
337 
338 	/*
339 	 * Writes return ssize_t because -1 is returned on error.  Therefore
340 	 * we must restrict the length to SSIZE_MAX to avoid garbage return
341 	 * values.
342 	 */
343 	if (auio.uio_resid > SSIZE_MAX) {
344 		error = EINVAL;
345 		goto out;
346 	}
347 
348 #ifdef KTRACE
349 	/*
350 	 * if tracing, save a copy of iovec
351 	 */
352 	if (KTRPOINT(p, KTR_GENIO))
353 		ktriov = aiov;
354 #endif
355 	cnt = auio.uio_resid;
356 	error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred);
357 	if (error) {
358 		if (auio.uio_resid != cnt && (error == ERESTART ||
359 		    error == EINTR || error == EWOULDBLOCK))
360 			error = 0;
361 		if (error == EPIPE)
362 			psignal(p, SIGPIPE);
363 	}
364 	cnt -= auio.uio_resid;
365 #ifdef KTRACE
366 	if (KTRPOINT(p, KTR_GENIO) && error == 0)
367 		ktrgenio(p, fd, UIO_WRITE, &ktriov, cnt, error);
368 #endif
369 	*retval = cnt;
370  out:
371 	FRELE(fp);
372 	return (error);
373 }
374 
375 /*
376  * Gather write system call
377  */
378 int
379 sys_writev(p, v, retval)
380 	struct proc *p;
381 	void *v;
382 	register_t *retval;
383 {
384 	struct sys_writev_args /* {
385 		syscallarg(int) fd;
386 		syscallarg(const struct iovec *) iovp;
387 		syscallarg(int) iovcnt;
388 	} */ *uap = v;
389 	int fd = SCARG(uap, fd);
390 	struct file *fp;
391 	struct filedesc *fdp = p->p_fd;
392 
393 	if ((fp = fd_getfile(fdp, fd)) == NULL)
394 		return (EBADF);
395 	if ((fp->f_flag & FWRITE) == 0)
396 		return (EBADF);
397 
398 	FREF(fp);
399 
400 	/* dofilewritev() will FRELE the descriptor for us */
401 	return (dofilewritev(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt),
402 	    &fp->f_offset, retval));
403 }
404 
405 int
406 dofilewritev(p, fd, fp, iovp, iovcnt, offset, retval)
407 	struct proc *p;
408 	int fd;
409 	struct file *fp;
410 	const struct iovec *iovp;
411 	int iovcnt;
412 	off_t *offset;
413 	register_t *retval;
414 {
415 	struct uio auio;
416 	struct iovec *iov;
417 	struct iovec *needfree;
418 	struct iovec aiov[UIO_SMALLIOV];
419 	long i, cnt, error = 0;
420 	u_int iovlen;
421 #ifdef KTRACE
422 	struct iovec *ktriov = NULL;
423 #endif
424 
425 	/* note: can't use iovlen until iovcnt is validated */
426 	iovlen = iovcnt * sizeof(struct iovec);
427 	if ((u_int)iovcnt > UIO_SMALLIOV) {
428 		if ((u_int)iovcnt > IOV_MAX)
429 			return (EINVAL);
430 		iov = needfree = malloc(iovlen, M_IOV, M_WAITOK);
431 	} else if ((u_int)iovcnt > 0) {
432 		iov = aiov;
433 		needfree = NULL;
434 	} else {
435 		error = EINVAL;
436 		goto out;
437 	}
438 
439 	auio.uio_iov = iov;
440 	auio.uio_iovcnt = iovcnt;
441 	auio.uio_rw = UIO_WRITE;
442 	auio.uio_segflg = UIO_USERSPACE;
443 	auio.uio_procp = p;
444 	error = copyin(iovp, iov, iovlen);
445 	if (error)
446 		goto done;
447 	auio.uio_resid = 0;
448 	for (i = 0; i < iovcnt; i++) {
449 		auio.uio_resid += iov->iov_len;
450 		/*
451 		 * Writes return ssize_t because -1 is returned on error.
452 		 * Therefore we must restrict the length to SSIZE_MAX to
453 		 * avoid garbage return values.
454 		 */
455 		if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) {
456 			error = EINVAL;
457 			goto done;
458 		}
459 		iov++;
460 	}
461 #ifdef KTRACE
462 	/*
463 	 * if tracing, save a copy of iovec
464 	 */
465 	if (KTRPOINT(p, KTR_GENIO))  {
466 		ktriov = malloc(iovlen, M_TEMP, M_WAITOK);
467 		bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen);
468 	}
469 #endif
470 	cnt = auio.uio_resid;
471 	error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred);
472 	if (error) {
473 		if (auio.uio_resid != cnt && (error == ERESTART ||
474 		    error == EINTR || error == EWOULDBLOCK))
475 			error = 0;
476 		if (error == EPIPE)
477 			psignal(p, SIGPIPE);
478 	}
479 	cnt -= auio.uio_resid;
480 #ifdef KTRACE
481 	if (ktriov != NULL) {
482 		if (error == 0)
483 			ktrgenio(p, fd, UIO_WRITE, ktriov, cnt,
484 			    error);
485 		free(ktriov, M_TEMP);
486 	}
487 #endif
488 	*retval = cnt;
489  done:
490 	if (needfree)
491 		free(needfree, M_IOV);
492  out:
493 	FRELE(fp);
494 	return (error);
495 }
496 
497 /*
498  * Ioctl system call
499  */
500 /* ARGSUSED */
501 int
502 sys_ioctl(p, v, retval)
503 	struct proc *p;
504 	void *v;
505 	register_t *retval;
506 {
507 	struct sys_ioctl_args /* {
508 		syscallarg(int) fd;
509 		syscallarg(u_long) com;
510 		syscallarg(caddr_t) data;
511 	} */ *uap = v;
512 	struct file *fp;
513 	struct filedesc *fdp;
514 	u_long com;
515 	int error;
516 	u_int size;
517 	caddr_t data, memp;
518 	int tmp;
519 #define STK_PARAMS	128
520 	char stkbuf[STK_PARAMS];
521 
522 	fdp = p->p_fd;
523 	if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL)
524 		return (EBADF);
525 
526 	if ((fp->f_flag & (FREAD | FWRITE)) == 0)
527 		return (EBADF);
528 
529 	switch (com = SCARG(uap, com)) {
530 	case FIONCLEX:
531 		fdp->fd_ofileflags[SCARG(uap, fd)] &= ~UF_EXCLOSE;
532 		return (0);
533 	case FIOCLEX:
534 		fdp->fd_ofileflags[SCARG(uap, fd)] |= UF_EXCLOSE;
535 		return (0);
536 	}
537 
538 	/*
539 	 * Interpret high order word to find amount of data to be
540 	 * copied to/from the user's address space.
541 	 */
542 	size = IOCPARM_LEN(com);
543 	if (size > IOCPARM_MAX)
544 		return (ENOTTY);
545 	FREF(fp);
546 	memp = NULL;
547 	if (size > sizeof (stkbuf)) {
548 		memp = (caddr_t)malloc((u_long)size, M_IOCTLOPS, M_WAITOK);
549 		data = memp;
550 	} else
551 		data = stkbuf;
552 	if (com&IOC_IN) {
553 		if (size) {
554 			error = copyin(SCARG(uap, data), data, (u_int)size);
555 			if (error) {
556 				goto out;
557 			}
558 		} else
559 			*(caddr_t *)data = SCARG(uap, data);
560 	} else if ((com&IOC_OUT) && size)
561 		/*
562 		 * Zero the buffer so the user always
563 		 * gets back something deterministic.
564 		 */
565 		bzero(data, size);
566 	else if (com&IOC_VOID)
567 		*(caddr_t *)data = SCARG(uap, data);
568 
569 	switch (com) {
570 
571 	case FIONBIO:
572 		if ((tmp = *(int *)data) != 0)
573 			fp->f_flag |= FNONBLOCK;
574 		else
575 			fp->f_flag &= ~FNONBLOCK;
576 		error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p);
577 		break;
578 
579 	case FIOASYNC:
580 		if ((tmp = *(int *)data) != 0)
581 			fp->f_flag |= FASYNC;
582 		else
583 			fp->f_flag &= ~FASYNC;
584 		error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, (caddr_t)&tmp, p);
585 		break;
586 
587 	case FIOSETOWN:
588 		tmp = *(int *)data;
589 		if (fp->f_type == DTYPE_SOCKET) {
590 			struct socket *so = (struct socket *)fp->f_data;
591 
592 			so->so_pgid = tmp;
593 			so->so_siguid = p->p_cred->p_ruid;
594 			so->so_sigeuid = p->p_ucred->cr_uid;
595 			error = 0;
596 			break;
597 		}
598 		if (tmp <= 0) {
599 			tmp = -tmp;
600 		} else {
601 			struct proc *p1 = pfind(tmp);
602 			if (p1 == 0) {
603 				error = ESRCH;
604 				break;
605 			}
606 			tmp = p1->p_pgrp->pg_id;
607 		}
608 		error = (*fp->f_ops->fo_ioctl)
609 			(fp, TIOCSPGRP, (caddr_t)&tmp, p);
610 		break;
611 
612 	case FIOGETOWN:
613 		if (fp->f_type == DTYPE_SOCKET) {
614 			error = 0;
615 			*(int *)data = ((struct socket *)fp->f_data)->so_pgid;
616 			break;
617 		}
618 		error = (*fp->f_ops->fo_ioctl)(fp, TIOCGPGRP, data, p);
619 		*(int *)data = -*(int *)data;
620 		break;
621 
622 	default:
623 		error = (*fp->f_ops->fo_ioctl)(fp, com, data, p);
624 		/*
625 		 * Copy any data to user, size was
626 		 * already set and checked above.
627 		 */
628 		if (error == 0 && (com&IOC_OUT) && size)
629 			error = copyout(data, SCARG(uap, data), (u_int)size);
630 		break;
631 	}
632 out:
633 	FRELE(fp);
634 	if (memp)
635 		free(memp, M_IOCTLOPS);
636 	return (error);
637 }
638 
639 int	selwait, nselcoll;
640 
641 /*
642  * Select system call.
643  */
644 int
645 sys_select(p, v, retval)
646 	register struct proc *p;
647 	void *v;
648 	register_t *retval;
649 {
650 	register struct sys_select_args /* {
651 		syscallarg(int) nd;
652 		syscallarg(fd_set *) in;
653 		syscallarg(fd_set *) ou;
654 		syscallarg(fd_set *) ex;
655 		syscallarg(struct timeval *) tv;
656 	} */ *uap = v;
657 	fd_set bits[6], *pibits[3], *pobits[3];
658 	struct timeval atv;
659 	int s, ncoll, error = 0, timo;
660 	u_int ni;
661 
662 	if (SCARG(uap, nd) > p->p_fd->fd_nfiles) {
663 		/* forgiving; slightly wrong */
664 		SCARG(uap, nd) = p->p_fd->fd_nfiles;
665 	}
666 	ni = howmany(SCARG(uap, nd), NFDBITS) * sizeof(fd_mask);
667 	if (SCARG(uap, nd) > FD_SETSIZE) {
668 		caddr_t mbits;
669 
670 		mbits = malloc(ni * 6, M_TEMP, M_WAITOK);
671 		bzero(mbits, ni * 6);
672 		pibits[0] = (fd_set *)&mbits[ni * 0];
673 		pibits[1] = (fd_set *)&mbits[ni * 1];
674 		pibits[2] = (fd_set *)&mbits[ni * 2];
675 		pobits[0] = (fd_set *)&mbits[ni * 3];
676 		pobits[1] = (fd_set *)&mbits[ni * 4];
677 		pobits[2] = (fd_set *)&mbits[ni * 5];
678 	} else {
679 		bzero((caddr_t)bits, sizeof(bits));
680 		pibits[0] = &bits[0];
681 		pibits[1] = &bits[1];
682 		pibits[2] = &bits[2];
683 		pobits[0] = &bits[3];
684 		pobits[1] = &bits[4];
685 		pobits[2] = &bits[5];
686 	}
687 
688 #define	getbits(name, x) \
689 	if (SCARG(uap, name) && (error = copyin((caddr_t)SCARG(uap, name), \
690 	    (caddr_t)pibits[x], ni))) \
691 		goto done;
692 	getbits(in, 0);
693 	getbits(ou, 1);
694 	getbits(ex, 2);
695 #undef	getbits
696 
697 	if (SCARG(uap, tv)) {
698 		error = copyin((caddr_t)SCARG(uap, tv), (caddr_t)&atv,
699 			sizeof (atv));
700 		if (error)
701 			goto done;
702 		if (itimerfix(&atv)) {
703 			error = EINVAL;
704 			goto done;
705 		}
706 		s = splclock();
707 		timeradd(&atv, &time, &atv);
708 		splx(s);
709 	} else
710 		timo = 0;
711 retry:
712 	ncoll = nselcoll;
713 	p->p_flag |= P_SELECT;
714 	error = selscan(p, pibits[0], pobits[0], SCARG(uap, nd), retval);
715 	if (error || *retval)
716 		goto done;
717 	if (SCARG(uap, tv)) {
718 		/*
719 		 * We have to recalculate the timeout on every retry.
720 		 */
721 		timo = hzto(&atv);
722 		if (timo <= 0)
723 			goto done;
724 	}
725 	s = splhigh();
726 	if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) {
727 		splx(s);
728 		goto retry;
729 	}
730 	p->p_flag &= ~P_SELECT;
731 	error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "select", timo);
732 	splx(s);
733 	if (error == 0)
734 		goto retry;
735 done:
736 	p->p_flag &= ~P_SELECT;
737 	/* select is not restarted after signals... */
738 	if (error == ERESTART)
739 		error = EINTR;
740 	if (error == EWOULDBLOCK)
741 		error = 0;
742 #define	putbits(name, x) \
743 	if (SCARG(uap, name) && (error2 = copyout((caddr_t)pobits[x], \
744 	    (caddr_t)SCARG(uap, name), ni))) \
745 		error = error2;
746 	if (error == 0) {
747 		int error2;
748 
749 		putbits(in, 0);
750 		putbits(ou, 1);
751 		putbits(ex, 2);
752 #undef putbits
753 	}
754 
755 	if (pibits[0] != &bits[0])
756 		free(pibits[0], M_TEMP);
757 	return (error);
758 }
759 
760 int
761 selscan(p, ibits, obits, nfd, retval)
762 	struct proc *p;
763 	fd_set *ibits, *obits;
764 	int nfd;
765 	register_t *retval;
766 {
767 	caddr_t cibits = (caddr_t)ibits, cobits = (caddr_t)obits;
768 	register struct filedesc *fdp = p->p_fd;
769 	register int msk, i, j, fd;
770 	register fd_mask bits;
771 	struct file *fp;
772 	int ni, n = 0;
773 	static int flag[3] = { FREAD, FWRITE, 0 };
774 
775 	/*
776 	 * if nfd > FD_SETSIZE then the fd_set's contain nfd bits (rounded
777 	 * up to the next byte) otherwise the fd_set's are normal sized.
778 	 */
779 	ni = sizeof(fd_set);
780 	if (nfd > FD_SETSIZE)
781 		ni = howmany(nfd, NFDBITS) * sizeof(fd_mask);
782 
783 	for (msk = 0; msk < 3; msk++) {
784 		fd_set *pibits = (fd_set *)&cibits[msk*ni];
785 		fd_set *pobits = (fd_set *)&cobits[msk*ni];
786 
787 		for (i = 0; i < nfd; i += NFDBITS) {
788 			bits = pibits->fds_bits[i/NFDBITS];
789 			while ((j = ffs(bits)) && (fd = i + --j) < nfd) {
790 				bits &= ~(1 << j);
791 				if ((fp = fd_getfile(fdp, fd)) == NULL)
792 					return (EBADF);
793 				FREF(fp);
794 				if ((*fp->f_ops->fo_select)(fp, flag[msk], p)) {
795 					FD_SET(fd, pobits);
796 					n++;
797 				}
798 				FRELE(fp);
799 			}
800 		}
801 	}
802 	*retval = n;
803 	return (0);
804 }
805 
806 /*ARGSUSED*/
807 int
808 seltrue(dev, flag, p)
809 	dev_t dev;
810 	int flag;
811 	struct proc *p;
812 {
813 
814 	return (1);
815 }
816 
817 /*
818  * Record a select request.
819  */
820 void
821 selrecord(selector, sip)
822 	struct proc *selector;
823 	struct selinfo *sip;
824 {
825 	struct proc *p;
826 	pid_t mypid;
827 
828 	mypid = selector->p_pid;
829 	if (sip->si_selpid == mypid)
830 		return;
831 	if (sip->si_selpid && (p = pfind(sip->si_selpid)) &&
832 	    p->p_wchan == (caddr_t)&selwait)
833 		sip->si_flags |= SI_COLL;
834 	else
835 		sip->si_selpid = mypid;
836 }
837 
838 /*
839  * Do a wakeup when a selectable event occurs.
840  */
841 void
842 selwakeup(sip)
843 	register struct selinfo *sip;
844 {
845 	register struct proc *p;
846 	int s;
847 
848 	if (sip->si_selpid == 0)
849 		return;
850 	if (sip->si_flags & SI_COLL) {
851 		nselcoll++;
852 		sip->si_flags &= ~SI_COLL;
853 		wakeup((caddr_t)&selwait);
854 	}
855 	p = pfind(sip->si_selpid);
856 	sip->si_selpid = 0;
857 	if (p != NULL) {
858 		s = splhigh();
859 		if (p->p_wchan == (caddr_t)&selwait) {
860 			if (p->p_stat == SSLEEP)
861 				setrunnable(p);
862 			else
863 				unsleep(p);
864 		} else if (p->p_flag & P_SELECT)
865 			p->p_flag &= ~P_SELECT;
866 		splx(s);
867 	}
868 }
869 
870 void
871 pollscan(p, pl, nfd, retval)
872 	struct proc *p;
873 	struct pollfd *pl;
874 	int nfd;
875 	register_t *retval;
876 {
877 	register struct filedesc *fdp = p->p_fd;
878 	register int msk, i;
879 	struct file *fp;
880 	int x, n = 0;
881 	static int flag[3] = { FREAD, FWRITE, 0 };
882 	static int pflag[3] = { POLLIN|POLLRDNORM, POLLOUT, POLLERR };
883 
884 	/*
885 	 * XXX: We need to implement the rest of the flags.
886 	 */
887 	for (i = 0; i < nfd; i++) {
888 		/* Check the file descriptor. */
889 		if (pl[i].fd < 0) {
890 			pl[i].revents = 0;
891 			continue;
892 		}
893 		if ((fp = fd_getfile(fdp, pl[i].fd)) == NULL) {
894 			pl[i].revents = POLLNVAL;
895 			n++;
896 			continue;
897 		}
898 		FREF(fp);
899 		for (x = msk = 0; msk < 3; msk++) {
900 			if (pl[i].events & pflag[msk]) {
901 				if ((*fp->f_ops->fo_select)(fp, flag[msk], p)) {
902 					pl[i].revents |= pflag[msk] &
903 					    pl[i].events;
904 					x++;
905 				}
906 			}
907 		}
908 		FRELE(fp);
909 		if (x)
910 			n++;
911 	}
912 	*retval = n;
913 }
914 
915 /*
916  * We are using the same mechanism as select only we encode/decode args
917  * differently.
918  */
919 int
920 sys_poll(p, v, retval)
921 	register struct proc *p;
922 	void *v;
923 	register_t *retval;
924 {
925 	struct sys_poll_args *uap = v;
926 	size_t sz;
927 	struct pollfd pfds[4], *pl = pfds;
928 	int msec = SCARG(uap, timeout);
929 	struct timeval atv;
930 	int timo, ncoll, i, s, error, error2;
931 	extern int nselcoll, selwait;
932 
933 	/* Standards say no more than MAX_OPEN; this is possibly better. */
934 	if (SCARG(uap, nfds) > min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur,
935 	    maxfiles))
936 		return (EINVAL);
937 
938 	sz = sizeof(struct pollfd) * SCARG(uap, nfds);
939 
940 	/* optimize for the default case, of a small nfds value */
941 	if (sz > sizeof(pfds))
942 		pl = (struct pollfd *) malloc(sz, M_TEMP, M_WAITOK);
943 
944 	if ((error = copyin(SCARG(uap, fds), pl, sz)) != 0)
945 		goto bad;
946 
947 	for (i = 0; i < SCARG(uap, nfds); i++)
948 		pl[i].revents = 0;
949 
950 	if (msec != -1) {
951 		atv.tv_sec = msec / 1000;
952 		atv.tv_usec = (msec - (atv.tv_sec * 1000)) * 1000;
953 
954 		if (itimerfix(&atv)) {
955 			error = EINVAL;
956 			goto done;
957 		}
958 		s = splclock();
959 		timeradd(&atv, &time, &atv);
960 		splx(s);
961 	} else
962 		timo = 0;
963 
964 retry:
965 	ncoll = nselcoll;
966 	p->p_flag |= P_SELECT;
967 	pollscan(p, pl, SCARG(uap, nfds), retval);
968 	if (*retval)
969 		goto done;
970 	if (msec != -1) {
971 		/*
972 		 * We have to recalculate the timeout on every retry.
973 		 */
974 		timo = hzto(&atv);
975 		if (timo <= 0)
976 			goto done;
977 	}
978 	s = splhigh();
979 	if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) {
980 		splx(s);
981 		goto retry;
982 	}
983 	p->p_flag &= ~P_SELECT;
984 	error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "poll", timo);
985 	splx(s);
986 	if (error == 0)
987 		goto retry;
988 
989 done:
990 	p->p_flag &= ~P_SELECT;
991 	/* poll is not restarted after signals... */
992 	if (error == ERESTART)
993 		error = EINTR;
994 	if (error == EWOULDBLOCK)
995 		error = 0;
996 	if ((error2 = copyout(pl, SCARG(uap, fds), sz)) != 0)
997 		error = error2;
998 bad:
999 	if (pl != pfds)
1000 		free((char *) pl, M_TEMP);
1001 	return (error);
1002 }
1003 
1004