xref: /original-bsd/sys/kern/sys_generic.c (revision deff14a8)
1 /*
2  * Copyright (c) 1982, 1986, 1989, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  * (c) UNIX System Laboratories, Inc.
5  * All or some portions of this file are derived from material licensed
6  * to the University of California by American Telephone and Telegraph
7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8  * the permission of UNIX System Laboratories, Inc.
9  *
10  * %sccs.include.redist.c%
11  *
12  *	@(#)sys_generic.c	8.8 (Berkeley) 08/11/94
13  */
14 
15 #include <sys/param.h>
16 #include <sys/systm.h>
17 #include <sys/filedesc.h>
18 #include <sys/ioctl.h>
19 #include <sys/file.h>
20 #include <sys/proc.h>
21 #include <sys/socketvar.h>
22 #include <sys/uio.h>
23 #include <sys/kernel.h>
24 #include <sys/stat.h>
25 #include <sys/malloc.h>
26 #ifdef KTRACE
27 #include <sys/ktrace.h>
28 #endif
29 
30 /*
31  * Read system call.
32  */
33 struct read_args {
34 	int	fd;
35 	char	*buf;
36 	u_int	nbyte;
37 };
38 /* ARGSUSED */
39 read(p, uap, retval)
40 	struct proc *p;
41 	register struct read_args *uap;
42 	int *retval;
43 {
44 	register struct file *fp;
45 	register struct filedesc *fdp = p->p_fd;
46 	struct uio auio;
47 	struct iovec aiov;
48 	long cnt, error = 0;
49 #ifdef KTRACE
50 	struct iovec ktriov;
51 #endif
52 
53 	if (((u_int)uap->fd) >= fdp->fd_nfiles ||
54 	    (fp = fdp->fd_ofiles[uap->fd]) == NULL ||
55 	    (fp->f_flag & FREAD) == 0)
56 		return (EBADF);
57 	aiov.iov_base = (caddr_t)uap->buf;
58 	aiov.iov_len = uap->nbyte;
59 	auio.uio_iov = &aiov;
60 	auio.uio_iovcnt = 1;
61 	auio.uio_resid = uap->nbyte;
62 	auio.uio_rw = UIO_READ;
63 	auio.uio_segflg = UIO_USERSPACE;
64 	auio.uio_procp = p;
65 #ifdef KTRACE
66 	/*
67 	 * if tracing, save a copy of iovec
68 	 */
69 	if (KTRPOINT(p, KTR_GENIO))
70 		ktriov = aiov;
71 #endif
72 	cnt = uap->nbyte;
73 	if (error = (*fp->f_ops->fo_read)(fp, &auio, fp->f_cred))
74 		if (auio.uio_resid != cnt && (error == ERESTART ||
75 		    error == EINTR || error == EWOULDBLOCK))
76 			error = 0;
77 	cnt -= auio.uio_resid;
78 #ifdef KTRACE
79 	if (KTRPOINT(p, KTR_GENIO) && error == 0)
80 		ktrgenio(p->p_tracep, uap->fd, UIO_READ, &ktriov, cnt, error);
81 #endif
82 	*retval = cnt;
83 	return (error);
84 }
85 
86 /*
87  * Scatter read system call.
88  */
89 struct readv_args {
90 	int	fdes;
91 	struct	iovec *iovp;
92 	u_int	iovcnt;
93 };
94 readv(p, uap, retval)
95 	struct proc *p;
96 	register struct readv_args *uap;
97 	int *retval;
98 {
99 	register struct file *fp;
100 	register struct filedesc *fdp = p->p_fd;
101 	struct uio auio;
102 	register struct iovec *iov;
103 	struct iovec *needfree;
104 	struct iovec aiov[UIO_SMALLIOV];
105 	long i, cnt, error = 0;
106 	u_int iovlen;
107 #ifdef KTRACE
108 	struct iovec *ktriov = NULL;
109 #endif
110 
111 	if (((u_int)uap->fdes) >= fdp->fd_nfiles ||
112 	    (fp = fdp->fd_ofiles[uap->fdes]) == NULL ||
113 	    (fp->f_flag & FREAD) == 0)
114 		return (EBADF);
115 	/* note: can't use iovlen until iovcnt is validated */
116 	iovlen = uap->iovcnt * sizeof (struct iovec);
117 	if (uap->iovcnt > UIO_SMALLIOV) {
118 		if (uap->iovcnt > UIO_MAXIOV)
119 			return (EINVAL);
120 		MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK);
121 		needfree = iov;
122 	} else {
123 		iov = aiov;
124 		needfree = NULL;
125 	}
126 	auio.uio_iov = iov;
127 	auio.uio_iovcnt = uap->iovcnt;
128 	auio.uio_rw = UIO_READ;
129 	auio.uio_segflg = UIO_USERSPACE;
130 	auio.uio_procp = p;
131 	if (error = copyin((caddr_t)uap->iovp, (caddr_t)iov, iovlen))
132 		goto done;
133 	auio.uio_resid = 0;
134 	for (i = 0; i < uap->iovcnt; i++) {
135 		if (auio.uio_resid + iov->iov_len < auio.uio_resid) {
136 			error = EINVAL;
137 			goto done;
138 		}
139 		auio.uio_resid += iov->iov_len;
140 		iov++;
141 	}
142 #ifdef KTRACE
143 	/*
144 	 * if tracing, save a copy of iovec
145 	 */
146 	if (KTRPOINT(p, KTR_GENIO))  {
147 		MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
148 		bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen);
149 	}
150 #endif
151 	cnt = auio.uio_resid;
152 	if (error = (*fp->f_ops->fo_read)(fp, &auio, fp->f_cred))
153 		if (auio.uio_resid != cnt && (error == ERESTART ||
154 		    error == EINTR || error == EWOULDBLOCK))
155 			error = 0;
156 	cnt -= auio.uio_resid;
157 #ifdef KTRACE
158 	if (ktriov != NULL) {
159 		if (error == 0)
160 			ktrgenio(p->p_tracep, uap->fdes, UIO_READ, ktriov,
161 			    cnt, error);
162 		FREE(ktriov, M_TEMP);
163 	}
164 #endif
165 	*retval = cnt;
166 done:
167 	if (needfree)
168 		FREE(needfree, M_IOV);
169 	return (error);
170 }
171 
172 /*
173  * Write system call
174  */
175 struct write_args {
176 	int	fd;
177 	char	*buf;
178 	u_int	nbyte;
179 };
180 write(p, uap, retval)
181 	struct proc *p;
182 	register struct write_args *uap;
183 	int *retval;
184 {
185 	register struct file *fp;
186 	register struct filedesc *fdp = p->p_fd;
187 	struct uio auio;
188 	struct iovec aiov;
189 	long cnt, error = 0;
190 #ifdef KTRACE
191 	struct iovec ktriov;
192 #endif
193 
194 	if (((u_int)uap->fd) >= fdp->fd_nfiles ||
195 	    (fp = fdp->fd_ofiles[uap->fd]) == NULL ||
196 	    (fp->f_flag & FWRITE) == 0)
197 		return (EBADF);
198 	aiov.iov_base = (caddr_t)uap->buf;
199 	aiov.iov_len = uap->nbyte;
200 	auio.uio_iov = &aiov;
201 	auio.uio_iovcnt = 1;
202 	auio.uio_resid = uap->nbyte;
203 	auio.uio_rw = UIO_WRITE;
204 	auio.uio_segflg = UIO_USERSPACE;
205 	auio.uio_procp = p;
206 #ifdef KTRACE
207 	/*
208 	 * if tracing, save a copy of iovec
209 	 */
210 	if (KTRPOINT(p, KTR_GENIO))
211 		ktriov = aiov;
212 #endif
213 	cnt = uap->nbyte;
214 	if (error = (*fp->f_ops->fo_write)(fp, &auio, fp->f_cred)) {
215 		if (auio.uio_resid != cnt && (error == ERESTART ||
216 		    error == EINTR || error == EWOULDBLOCK))
217 			error = 0;
218 		if (error == EPIPE)
219 			psignal(p, SIGPIPE);
220 	}
221 	cnt -= auio.uio_resid;
222 #ifdef KTRACE
223 	if (KTRPOINT(p, KTR_GENIO) && error == 0)
224 		ktrgenio(p->p_tracep, uap->fd, UIO_WRITE,
225 		    &ktriov, cnt, error);
226 #endif
227 	*retval = cnt;
228 	return (error);
229 }
230 
231 /*
232  * Gather write system call
233  */
234 struct writev_args {
235 	int	fd;
236 	struct	iovec *iovp;
237 	u_int	iovcnt;
238 };
239 writev(p, uap, retval)
240 	struct proc *p;
241 	register struct writev_args *uap;
242 	int *retval;
243 {
244 	register struct file *fp;
245 	register struct filedesc *fdp = p->p_fd;
246 	struct uio auio;
247 	register struct iovec *iov;
248 	struct iovec *needfree;
249 	struct iovec aiov[UIO_SMALLIOV];
250 	long i, cnt, error = 0;
251 	u_int iovlen;
252 #ifdef KTRACE
253 	struct iovec *ktriov = NULL;
254 #endif
255 
256 	if (((u_int)uap->fd) >= fdp->fd_nfiles ||
257 	    (fp = fdp->fd_ofiles[uap->fd]) == NULL ||
258 	    (fp->f_flag & FWRITE) == 0)
259 		return (EBADF);
260 	/* note: can't use iovlen until iovcnt is validated */
261 	iovlen = uap->iovcnt * sizeof (struct iovec);
262 	if (uap->iovcnt > UIO_SMALLIOV) {
263 		if (uap->iovcnt > UIO_MAXIOV)
264 			return (EINVAL);
265 		MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK);
266 		needfree = iov;
267 	} else {
268 		iov = aiov;
269 		needfree = NULL;
270 	}
271 	auio.uio_iov = iov;
272 	auio.uio_iovcnt = uap->iovcnt;
273 	auio.uio_rw = UIO_WRITE;
274 	auio.uio_segflg = UIO_USERSPACE;
275 	auio.uio_procp = p;
276 	if (error = copyin((caddr_t)uap->iovp, (caddr_t)iov, iovlen))
277 		goto done;
278 	auio.uio_resid = 0;
279 	for (i = 0; i < uap->iovcnt; i++) {
280 		if (auio.uio_resid + iov->iov_len < auio.uio_resid) {
281 			error = EINVAL;
282 			goto done;
283 		}
284 		auio.uio_resid += iov->iov_len;
285 		iov++;
286 	}
287 #ifdef KTRACE
288 	/*
289 	 * if tracing, save a copy of iovec
290 	 */
291 	if (KTRPOINT(p, KTR_GENIO))  {
292 		MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
293 		bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen);
294 	}
295 #endif
296 	cnt = auio.uio_resid;
297 	if (error = (*fp->f_ops->fo_write)(fp, &auio, fp->f_cred)) {
298 		if (auio.uio_resid != cnt && (error == ERESTART ||
299 		    error == EINTR || error == EWOULDBLOCK))
300 			error = 0;
301 		if (error == EPIPE)
302 			psignal(p, SIGPIPE);
303 	}
304 	cnt -= auio.uio_resid;
305 #ifdef KTRACE
306 	if (ktriov != NULL) {
307 		if (error == 0)
308 			ktrgenio(p->p_tracep, uap->fd, UIO_WRITE,
309 				ktriov, cnt, error);
310 		FREE(ktriov, M_TEMP);
311 	}
312 #endif
313 	*retval = cnt;
314 done:
315 	if (needfree)
316 		FREE(needfree, M_IOV);
317 	return (error);
318 }
319 
320 /*
321  * Ioctl system call
322  */
323 struct ioctl_args {
324 	int	fd;
325 	int	com;
326 	caddr_t	data;
327 };
328 /* ARGSUSED */
329 ioctl(p, uap, retval)
330 	struct proc *p;
331 	register struct ioctl_args *uap;
332 	int *retval;
333 {
334 	register struct file *fp;
335 	register struct filedesc *fdp;
336 	register int com, error;
337 	register u_int size;
338 	caddr_t data, memp;
339 	int tmp;
340 #define STK_PARAMS	128
341 	char stkbuf[STK_PARAMS];
342 
343 	fdp = p->p_fd;
344 	if ((u_int)uap->fd >= fdp->fd_nfiles ||
345 	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
346 		return (EBADF);
347 
348 	if ((fp->f_flag & (FREAD | FWRITE)) == 0)
349 		return (EBADF);
350 
351 	switch (com = uap->com) {
352 	case FIONCLEX:
353 		fdp->fd_ofileflags[uap->fd] &= ~UF_EXCLOSE;
354 		return (0);
355 	case FIOCLEX:
356 		fdp->fd_ofileflags[uap->fd] |= UF_EXCLOSE;
357 		return (0);
358 	}
359 
360 	/*
361 	 * Interpret high order word to find amount of data to be
362 	 * copied to/from the user's address space.
363 	 */
364 	size = IOCPARM_LEN(com);
365 	if (size > IOCPARM_MAX)
366 		return (ENOTTY);
367 	memp = NULL;
368 	if (size > sizeof (stkbuf)) {
369 		memp = (caddr_t)malloc((u_long)size, M_IOCTLOPS, M_WAITOK);
370 		data = memp;
371 	} else
372 		data = stkbuf;
373 	if (com&IOC_IN) {
374 		if (size) {
375 			error = copyin(uap->data, data, (u_int)size);
376 			if (error) {
377 				if (memp)
378 					free(memp, M_IOCTLOPS);
379 				return (error);
380 			}
381 		} else
382 			*(caddr_t *)data = uap->data;
383 	} else if ((com&IOC_OUT) && size)
384 		/*
385 		 * Zero the buffer so the user always
386 		 * gets back something deterministic.
387 		 */
388 		bzero(data, size);
389 	else if (com&IOC_VOID)
390 		*(caddr_t *)data = uap->data;
391 
392 	switch (com) {
393 
394 	case FIONBIO:
395 		if (tmp = *(int *)data)
396 			fp->f_flag |= FNONBLOCK;
397 		else
398 			fp->f_flag &= ~FNONBLOCK;
399 		error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p);
400 		break;
401 
402 	case FIOASYNC:
403 		if (tmp = *(int *)data)
404 			fp->f_flag |= FASYNC;
405 		else
406 			fp->f_flag &= ~FASYNC;
407 		error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, (caddr_t)&tmp, p);
408 		break;
409 
410 	case FIOSETOWN:
411 		tmp = *(int *)data;
412 		if (fp->f_type == DTYPE_SOCKET) {
413 			((struct socket *)fp->f_data)->so_pgid = tmp;
414 			error = 0;
415 			break;
416 		}
417 		if (tmp <= 0) {
418 			tmp = -tmp;
419 		} else {
420 			struct proc *p1 = pfind(tmp);
421 			if (p1 == 0) {
422 				error = ESRCH;
423 				break;
424 			}
425 			tmp = p1->p_pgrp->pg_id;
426 		}
427 		error = (*fp->f_ops->fo_ioctl)
428 			(fp, (int)TIOCSPGRP, (caddr_t)&tmp, p);
429 		break;
430 
431 	case FIOGETOWN:
432 		if (fp->f_type == DTYPE_SOCKET) {
433 			error = 0;
434 			*(int *)data = ((struct socket *)fp->f_data)->so_pgid;
435 			break;
436 		}
437 		error = (*fp->f_ops->fo_ioctl)(fp, (int)TIOCGPGRP, data, p);
438 		*(int *)data = -*(int *)data;
439 		break;
440 
441 	default:
442 		error = (*fp->f_ops->fo_ioctl)(fp, com, data, p);
443 		/*
444 		 * Copy any data to user, size was
445 		 * already set and checked above.
446 		 */
447 		if (error == 0 && (com&IOC_OUT) && size)
448 			error = copyout(data, uap->data, (u_int)size);
449 		break;
450 	}
451 	if (memp)
452 		free(memp, M_IOCTLOPS);
453 	return (error);
454 }
455 
456 int	selwait, nselcoll;
457 
458 /*
459  * Select system call.
460  */
461 struct select_args {
462 	u_int	nd;
463 	fd_set	*in, *ou, *ex;
464 	struct	timeval *tv;
465 };
466 
467 select(p, uap, retval)
468 	register struct proc *p;
469 	register struct select_args *uap;
470 	int *retval;
471 {
472 	fd_set ibits[3], obits[3];
473 	struct timeval atv;
474 	int s, ncoll, error, timo = 0;
475 	u_int ni;
476 
477 	bzero((caddr_t)ibits, sizeof(ibits));
478 	bzero((caddr_t)obits, sizeof(obits));
479 	if (uap->nd > FD_SETSIZE)
480 		return (EINVAL);
481 	if (uap->nd > p->p_fd->fd_nfiles)
482 		uap->nd = p->p_fd->fd_nfiles;	/* forgiving; slightly wrong */
483 	ni = howmany(uap->nd, NFDBITS) * sizeof(fd_mask);
484 
485 #define	getbits(name, x) \
486 	if (uap->name && \
487 	    (error = copyin((caddr_t)uap->name, (caddr_t)&ibits[x], ni))) \
488 		goto done;
489 	getbits(in, 0);
490 	getbits(ou, 1);
491 	getbits(ex, 2);
492 #undef	getbits
493 
494 	if (uap->tv) {
495 		error = copyin((caddr_t)uap->tv, (caddr_t)&atv,
496 			sizeof (atv));
497 		if (error)
498 			goto done;
499 		if (itimerfix(&atv)) {
500 			error = EINVAL;
501 			goto done;
502 		}
503 		s = splclock();
504 		timevaladd(&atv, (struct timeval *)&time);
505 		splx(s);
506 	}
507 retry:
508 	ncoll = nselcoll;
509 	p->p_flag |= P_SELECT;
510 	error = selscan(p, ibits, obits, uap->nd, retval);
511 	if (error || *retval)
512 		goto done;
513 	s = splhigh();
514 	if (uap->tv) {
515 		if (timercmp(&time, &atv, >=)) {
516 			splx(s);
517 			goto done;
518 		}
519 		/*
520 		 * If poll wait was tiny, this could be zero; we will
521 		 * have to round it up to avoid sleeping forever.  If
522 		 * we retry below, the timercmp above will get us out.
523 		 * Note that if wait was 0, the timercmp will prevent
524 		 * us from getting here the first time.
525 		 */
526 		timo = hzto(&atv);
527 		if (timo == 0)
528 			timo = 1;
529 	}
530 	if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) {
531 		splx(s);
532 		goto retry;
533 	}
534 	p->p_flag &= ~P_SELECT;
535 	error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "select", timo);
536 	splx(s);
537 	if (error == 0)
538 		goto retry;
539 done:
540 	p->p_flag &= ~P_SELECT;
541 	/* select is not restarted after signals... */
542 	if (error == ERESTART)
543 		error = EINTR;
544 	if (error == EWOULDBLOCK)
545 		error = 0;
546 #define	putbits(name, x) \
547 	if (uap->name && \
548 	    (error2 = copyout((caddr_t)&obits[x], (caddr_t)uap->name, ni))) \
549 		error = error2;
550 	if (error == 0) {
551 		int error2;
552 
553 		putbits(in, 0);
554 		putbits(ou, 1);
555 		putbits(ex, 2);
556 #undef putbits
557 	}
558 	return (error);
559 }
560 
561 selscan(p, ibits, obits, nfd, retval)
562 	struct proc *p;
563 	fd_set *ibits, *obits;
564 	int nfd, *retval;
565 {
566 	register struct filedesc *fdp = p->p_fd;
567 	register int msk, i, j, fd;
568 	register fd_mask bits;
569 	struct file *fp;
570 	int n = 0;
571 	static int flag[3] = { FREAD, FWRITE, 0 };
572 
573 	for (msk = 0; msk < 3; msk++) {
574 		for (i = 0; i < nfd; i += NFDBITS) {
575 			bits = ibits[msk].fds_bits[i/NFDBITS];
576 			while ((j = ffs(bits)) && (fd = i + --j) < nfd) {
577 				bits &= ~(1 << j);
578 				fp = fdp->fd_ofiles[fd];
579 				if (fp == NULL)
580 					return (EBADF);
581 				if ((*fp->f_ops->fo_select)(fp, flag[msk], p)) {
582 					FD_SET(fd, &obits[msk]);
583 					n++;
584 				}
585 			}
586 		}
587 	}
588 	*retval = n;
589 	return (0);
590 }
591 
592 /*ARGSUSED*/
593 seltrue(dev, flag, p)
594 	dev_t dev;
595 	int flag;
596 	struct proc *p;
597 {
598 
599 	return (1);
600 }
601 
602 /*
603  * Record a select request.
604  */
605 void
606 selrecord(selector, sip)
607 	struct proc *selector;
608 	struct selinfo *sip;
609 {
610 	struct proc *p;
611 	pid_t mypid;
612 
613 	mypid = selector->p_pid;
614 	if (sip->si_pid == mypid)
615 		return;
616 	if (sip->si_pid && (p = pfind(sip->si_pid)) &&
617 	    p->p_wchan == (caddr_t)&selwait)
618 		sip->si_flags |= SI_COLL;
619 	else
620 		sip->si_pid = mypid;
621 }
622 
623 /*
624  * Do a wakeup when a selectable event occurs.
625  */
626 void
627 selwakeup(sip)
628 	register struct selinfo *sip;
629 {
630 	register struct proc *p;
631 	int s;
632 
633 	if (sip->si_pid == 0)
634 		return;
635 	if (sip->si_flags & SI_COLL) {
636 		nselcoll++;
637 		sip->si_flags &= ~SI_COLL;
638 		wakeup((caddr_t)&selwait);
639 	}
640 	p = pfind(sip->si_pid);
641 	sip->si_pid = 0;
642 	if (p != NULL) {
643 		s = splhigh();
644 		if (p->p_wchan == (caddr_t)&selwait) {
645 			if (p->p_stat == SSLEEP)
646 				setrunnable(p);
647 			else
648 				unsleep(p);
649 		} else if (p->p_flag & P_SELECT)
650 			p->p_flag &= ~P_SELECT;
651 		splx(s);
652 	}
653 }
654