1 /* $OpenBSD: sys_generic.c,v 1.98 2015/05/10 22:35:38 millert Exp $ */ 2 /* $NetBSD: sys_generic.c,v 1.24 1996/03/29 00:25:32 cgd Exp $ */ 3 4 /* 5 * Copyright (c) 1996 Theo de Raadt 6 * Copyright (c) 1982, 1986, 1989, 1993 7 * The Regents of the University of California. All rights reserved. 8 * (c) UNIX System Laboratories, Inc. 9 * All or some portions of this file are derived from material licensed 10 * to the University of California by American Telephone and Telegraph 11 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 12 * the permission of UNIX System Laboratories, Inc. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions 16 * are met: 17 * 1. Redistributions of source code must retain the above copyright 18 * notice, this list of conditions and the following disclaimer. 19 * 2. Redistributions in binary form must reproduce the above copyright 20 * notice, this list of conditions and the following disclaimer in the 21 * documentation and/or other materials provided with the distribution. 22 * 3. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)sys_generic.c 8.5 (Berkeley) 1/21/94 39 */ 40 41 #include <sys/param.h> 42 #include <sys/systm.h> 43 #include <sys/filedesc.h> 44 #include <sys/ioctl.h> 45 #include <sys/file.h> 46 #include <sys/proc.h> 47 #include <sys/resourcevar.h> 48 #include <sys/socketvar.h> 49 #include <sys/signalvar.h> 50 #include <sys/uio.h> 51 #include <sys/kernel.h> 52 #include <sys/stat.h> 53 #include <sys/malloc.h> 54 #include <sys/poll.h> 55 #ifdef KTRACE 56 #include <sys/ktrace.h> 57 #endif 58 #include <sys/sched.h> 59 60 #include <sys/mount.h> 61 #include <sys/syscallargs.h> 62 63 #include <uvm/uvm_extern.h> 64 65 int selscan(struct proc *, fd_set *, fd_set *, int, int, register_t *); 66 void pollscan(struct proc *, struct pollfd *, u_int, register_t *); 67 int pollout(struct pollfd *, struct pollfd *, u_int); 68 int dopselect(struct proc *, int, fd_set *, fd_set *, fd_set *, 69 const struct timespec *, const sigset_t *, register_t *); 70 int doppoll(struct proc *, struct pollfd *, u_int, const struct timespec *, 71 const sigset_t *, register_t *); 72 73 /* 74 * Read system call. 75 */ 76 /* ARGSUSED */ 77 int 78 sys_read(struct proc *p, void *v, register_t *retval) 79 { 80 struct sys_read_args /* { 81 syscallarg(int) fd; 82 syscallarg(void *) buf; 83 syscallarg(size_t) nbyte; 84 } */ *uap = v; 85 struct iovec iov; 86 int fd = SCARG(uap, fd); 87 struct file *fp; 88 struct filedesc *fdp = p->p_fd; 89 90 if ((fp = fd_getfile_mode(fdp, fd, FREAD)) == NULL) 91 return (EBADF); 92 93 iov.iov_base = SCARG(uap, buf); 94 iov.iov_len = SCARG(uap, nbyte); 95 96 FREF(fp); 97 98 /* dofilereadv() will FRELE the descriptor for us */ 99 return (dofilereadv(p, fd, fp, &iov, 1, 0, &fp->f_offset, retval)); 100 } 101 102 /* 103 * Scatter read system call. 104 */ 105 int 106 sys_readv(struct proc *p, void *v, register_t *retval) 107 { 108 struct sys_readv_args /* { 109 syscallarg(int) fd; 110 syscallarg(const struct iovec *) iovp; 111 syscallarg(int) iovcnt; 112 } */ *uap = v; 113 int fd = SCARG(uap, fd); 114 struct file *fp; 115 struct filedesc *fdp = p->p_fd; 116 117 if ((fp = fd_getfile_mode(fdp, fd, FREAD)) == NULL) 118 return (EBADF); 119 FREF(fp); 120 121 /* dofilereadv() will FRELE the descriptor for us */ 122 return (dofilereadv(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt), 1, 123 &fp->f_offset, retval)); 124 } 125 126 int 127 dofilereadv(struct proc *p, int fd, struct file *fp, const struct iovec *iovp, 128 int iovcnt, int userspace, off_t *offset, register_t *retval) 129 { 130 struct iovec aiov[UIO_SMALLIOV]; 131 struct uio auio; 132 struct iovec *iov; 133 struct iovec *needfree = NULL; 134 long i, cnt, error = 0; 135 u_int iovlen; 136 #ifdef KTRACE 137 struct iovec *ktriov = NULL; 138 #endif 139 140 /* note: can't use iovlen until iovcnt is validated */ 141 iovlen = iovcnt * sizeof(struct iovec); 142 143 /* 144 * If the iovec array exists in userspace, it needs to be copied in; 145 * otherwise, it can be used directly. 146 */ 147 if (userspace) { 148 if ((u_int)iovcnt > UIO_SMALLIOV) { 149 if ((u_int)iovcnt > IOV_MAX) { 150 error = EINVAL; 151 goto out; 152 } 153 iov = needfree = malloc(iovlen, M_IOV, M_WAITOK); 154 } else if ((u_int)iovcnt > 0) { 155 iov = aiov; 156 needfree = NULL; 157 } else { 158 error = EINVAL; 159 goto out; 160 } 161 if ((error = copyin(iovp, iov, iovlen))) 162 goto done; 163 } else { 164 iov = (struct iovec *)iovp; /* de-constify */ 165 } 166 167 auio.uio_iov = iov; 168 auio.uio_iovcnt = iovcnt; 169 auio.uio_rw = UIO_READ; 170 auio.uio_segflg = UIO_USERSPACE; 171 auio.uio_procp = p; 172 auio.uio_resid = 0; 173 for (i = 0; i < iovcnt; i++) { 174 auio.uio_resid += iov->iov_len; 175 /* 176 * Reads return ssize_t because -1 is returned on error. 177 * Therefore we must restrict the length to SSIZE_MAX to 178 * avoid garbage return values. Note that the addition is 179 * guaranteed to not wrap because SSIZE_MAX * 2 < SIZE_MAX. 180 */ 181 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) { 182 error = EINVAL; 183 goto done; 184 } 185 iov++; 186 } 187 #ifdef KTRACE 188 /* 189 * if tracing, save a copy of iovec 190 */ 191 if (KTRPOINT(p, KTR_GENIO)) { 192 ktriov = malloc(iovlen, M_TEMP, M_WAITOK); 193 memcpy(ktriov, auio.uio_iov, iovlen); 194 } 195 #endif 196 cnt = auio.uio_resid; 197 error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred); 198 if (error) 199 if (auio.uio_resid != cnt && (error == ERESTART || 200 error == EINTR || error == EWOULDBLOCK)) 201 error = 0; 202 cnt -= auio.uio_resid; 203 204 fp->f_rxfer++; 205 fp->f_rbytes += cnt; 206 #ifdef KTRACE 207 if (ktriov != NULL) { 208 if (error == 0) 209 ktrgenio(p, fd, UIO_READ, ktriov, cnt); 210 free(ktriov, M_TEMP, iovlen); 211 } 212 #endif 213 *retval = cnt; 214 done: 215 if (needfree) 216 free(needfree, M_IOV, iovlen); 217 out: 218 FRELE(fp, p); 219 return (error); 220 } 221 222 /* 223 * Write system call 224 */ 225 int 226 sys_write(struct proc *p, void *v, register_t *retval) 227 { 228 struct sys_write_args /* { 229 syscallarg(int) fd; 230 syscallarg(const void *) buf; 231 syscallarg(size_t) nbyte; 232 } */ *uap = v; 233 struct iovec iov; 234 int fd = SCARG(uap, fd); 235 struct file *fp; 236 struct filedesc *fdp = p->p_fd; 237 238 if ((fp = fd_getfile_mode(fdp, fd, FWRITE)) == NULL) 239 return (EBADF); 240 241 iov.iov_base = (void *)SCARG(uap, buf); 242 iov.iov_len = SCARG(uap, nbyte); 243 244 FREF(fp); 245 246 /* dofilewritev() will FRELE the descriptor for us */ 247 return (dofilewritev(p, fd, fp, &iov, 1, 0, &fp->f_offset, retval)); 248 } 249 250 /* 251 * Gather write system call 252 */ 253 int 254 sys_writev(struct proc *p, void *v, register_t *retval) 255 { 256 struct sys_writev_args /* { 257 syscallarg(int) fd; 258 syscallarg(const struct iovec *) iovp; 259 syscallarg(int) iovcnt; 260 } */ *uap = v; 261 int fd = SCARG(uap, fd); 262 struct file *fp; 263 struct filedesc *fdp = p->p_fd; 264 265 if ((fp = fd_getfile_mode(fdp, fd, FWRITE)) == NULL) 266 return (EBADF); 267 FREF(fp); 268 269 /* dofilewritev() will FRELE the descriptor for us */ 270 return (dofilewritev(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt), 1, 271 &fp->f_offset, retval)); 272 } 273 274 int 275 dofilewritev(struct proc *p, int fd, struct file *fp, const struct iovec *iovp, 276 int iovcnt, int userspace, off_t *offset, register_t *retval) 277 { 278 struct iovec aiov[UIO_SMALLIOV]; 279 struct uio auio; 280 struct iovec *iov; 281 struct iovec *needfree = NULL; 282 long i, cnt, error = 0; 283 u_int iovlen; 284 #ifdef KTRACE 285 struct iovec *ktriov = NULL; 286 #endif 287 288 /* note: can't use iovlen until iovcnt is validated */ 289 iovlen = iovcnt * sizeof(struct iovec); 290 291 /* 292 * If the iovec array exists in userspace, it needs to be copied in; 293 * otherwise, it can be used directly. 294 */ 295 if (userspace) { 296 if ((u_int)iovcnt > UIO_SMALLIOV) { 297 if ((u_int)iovcnt > IOV_MAX) { 298 error = EINVAL; 299 goto out; 300 } 301 iov = needfree = malloc(iovlen, M_IOV, M_WAITOK); 302 } else if ((u_int)iovcnt > 0) { 303 iov = aiov; 304 needfree = NULL; 305 } else { 306 error = EINVAL; 307 goto out; 308 } 309 if ((error = copyin(iovp, iov, iovlen))) 310 goto done; 311 } else { 312 iov = (struct iovec *)iovp; /* de-constify */ 313 } 314 315 auio.uio_iov = iov; 316 auio.uio_iovcnt = iovcnt; 317 auio.uio_rw = UIO_WRITE; 318 auio.uio_segflg = UIO_USERSPACE; 319 auio.uio_procp = p; 320 auio.uio_resid = 0; 321 for (i = 0; i < iovcnt; i++) { 322 auio.uio_resid += iov->iov_len; 323 /* 324 * Writes return ssize_t because -1 is returned on error. 325 * Therefore we must restrict the length to SSIZE_MAX to 326 * avoid garbage return values. Note that the addition is 327 * guaranteed to not wrap because SSIZE_MAX * 2 < SIZE_MAX. 328 */ 329 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) { 330 error = EINVAL; 331 goto done; 332 } 333 iov++; 334 } 335 #ifdef KTRACE 336 /* 337 * if tracing, save a copy of iovec 338 */ 339 if (KTRPOINT(p, KTR_GENIO)) { 340 ktriov = malloc(iovlen, M_TEMP, M_WAITOK); 341 memcpy(ktriov, auio.uio_iov, iovlen); 342 } 343 #endif 344 cnt = auio.uio_resid; 345 error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred); 346 if (error) { 347 if (auio.uio_resid != cnt && (error == ERESTART || 348 error == EINTR || error == EWOULDBLOCK)) 349 error = 0; 350 if (error == EPIPE) 351 ptsignal(p, SIGPIPE, STHREAD); 352 } 353 cnt -= auio.uio_resid; 354 355 fp->f_wxfer++; 356 fp->f_wbytes += cnt; 357 #ifdef KTRACE 358 if (ktriov != NULL) { 359 if (error == 0) 360 ktrgenio(p, fd, UIO_WRITE, ktriov, cnt); 361 free(ktriov, M_TEMP, iovlen); 362 } 363 #endif 364 *retval = cnt; 365 done: 366 if (needfree) 367 free(needfree, M_IOV, iovlen); 368 out: 369 FRELE(fp, p); 370 return (error); 371 } 372 373 /* 374 * Ioctl system call 375 */ 376 /* ARGSUSED */ 377 int 378 sys_ioctl(struct proc *p, void *v, register_t *retval) 379 { 380 struct sys_ioctl_args /* { 381 syscallarg(int) fd; 382 syscallarg(u_long) com; 383 syscallarg(void *) data; 384 } */ *uap = v; 385 struct file *fp; 386 struct filedesc *fdp; 387 u_long com; 388 int error; 389 u_int size; 390 caddr_t data, memp; 391 int tmp; 392 #define STK_PARAMS 128 393 long long stkbuf[STK_PARAMS / sizeof(long long)]; 394 395 fdp = p->p_fd; 396 if ((fp = fd_getfile_mode(fdp, SCARG(uap, fd), FREAD|FWRITE)) == NULL) 397 return (EBADF); 398 399 switch (com = SCARG(uap, com)) { 400 case FIONCLEX: 401 case FIOCLEX: 402 fdplock(fdp); 403 if (com == FIONCLEX) 404 fdp->fd_ofileflags[SCARG(uap, fd)] &= ~UF_EXCLOSE; 405 else 406 fdp->fd_ofileflags[SCARG(uap, fd)] |= UF_EXCLOSE; 407 fdpunlock(fdp); 408 return (0); 409 } 410 411 /* 412 * Interpret high order word to find amount of data to be 413 * copied to/from the user's address space. 414 */ 415 size = IOCPARM_LEN(com); 416 if (size > IOCPARM_MAX) 417 return (ENOTTY); 418 FREF(fp); 419 memp = NULL; 420 if (size > sizeof (stkbuf)) { 421 memp = (caddr_t)malloc((u_long)size, M_IOCTLOPS, M_WAITOK); 422 data = memp; 423 } else 424 data = (caddr_t)stkbuf; 425 if (com&IOC_IN) { 426 if (size) { 427 error = copyin(SCARG(uap, data), data, (u_int)size); 428 if (error) { 429 goto out; 430 } 431 } else 432 *(caddr_t *)data = SCARG(uap, data); 433 } else if ((com&IOC_OUT) && size) 434 /* 435 * Zero the buffer so the user always 436 * gets back something deterministic. 437 */ 438 memset(data, 0, size); 439 else if (com&IOC_VOID) 440 *(caddr_t *)data = SCARG(uap, data); 441 442 switch (com) { 443 444 case FIONBIO: 445 if ((tmp = *(int *)data) != 0) 446 fp->f_flag |= FNONBLOCK; 447 else 448 fp->f_flag &= ~FNONBLOCK; 449 error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p); 450 break; 451 452 case FIOASYNC: 453 if ((tmp = *(int *)data) != 0) 454 fp->f_flag |= FASYNC; 455 else 456 fp->f_flag &= ~FASYNC; 457 error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, (caddr_t)&tmp, p); 458 break; 459 460 case FIOSETOWN: 461 tmp = *(int *)data; 462 if (fp->f_type == DTYPE_SOCKET) { 463 struct socket *so = (struct socket *)fp->f_data; 464 465 so->so_pgid = tmp; 466 so->so_siguid = p->p_ucred->cr_ruid; 467 so->so_sigeuid = p->p_ucred->cr_uid; 468 error = 0; 469 break; 470 } 471 if (tmp <= 0) { 472 tmp = -tmp; 473 } else { 474 struct process *pr = prfind(tmp); 475 if (pr == NULL) { 476 error = ESRCH; 477 break; 478 } 479 tmp = pr->ps_pgrp->pg_id; 480 } 481 error = (*fp->f_ops->fo_ioctl) 482 (fp, TIOCSPGRP, (caddr_t)&tmp, p); 483 break; 484 485 case FIOGETOWN: 486 if (fp->f_type == DTYPE_SOCKET) { 487 error = 0; 488 *(int *)data = ((struct socket *)fp->f_data)->so_pgid; 489 break; 490 } 491 error = (*fp->f_ops->fo_ioctl)(fp, TIOCGPGRP, data, p); 492 *(int *)data = -*(int *)data; 493 break; 494 495 default: 496 error = (*fp->f_ops->fo_ioctl)(fp, com, data, p); 497 break; 498 } 499 /* 500 * Copy any data to user, size was 501 * already set and checked above. 502 */ 503 if (error == 0 && (com&IOC_OUT) && size) 504 error = copyout(data, SCARG(uap, data), (u_int)size); 505 out: 506 FRELE(fp, p); 507 if (memp) 508 free(memp, M_IOCTLOPS, size); 509 return (error); 510 } 511 512 int selwait, nselcoll; 513 514 /* 515 * Select system call. 516 */ 517 int 518 sys_select(struct proc *p, void *v, register_t *retval) 519 { 520 struct sys_select_args /* { 521 syscallarg(int) nd; 522 syscallarg(fd_set *) in; 523 syscallarg(fd_set *) ou; 524 syscallarg(fd_set *) ex; 525 syscallarg(struct timeval *) tv; 526 } */ *uap = v; 527 528 struct timespec ts, *tsp = NULL; 529 int error; 530 531 if (SCARG(uap, tv) != NULL) { 532 struct timeval tv; 533 if ((error = copyin(SCARG(uap, tv), &tv, sizeof tv)) != 0) 534 return (error); 535 if ((error = itimerfix(&tv)) != 0) 536 return (error); 537 #ifdef KTRACE 538 if (KTRPOINT(p, KTR_STRUCT)) 539 ktrreltimeval(p, &tv); 540 #endif 541 TIMEVAL_TO_TIMESPEC(&tv, &ts); 542 tsp = &ts; 543 } 544 545 return (dopselect(p, SCARG(uap, nd), SCARG(uap, in), SCARG(uap, ou), 546 SCARG(uap, ex), tsp, NULL, retval)); 547 } 548 549 int 550 sys_pselect(struct proc *p, void *v, register_t *retval) 551 { 552 struct sys_pselect_args /* { 553 syscallarg(int) nd; 554 syscallarg(fd_set *) in; 555 syscallarg(fd_set *) ou; 556 syscallarg(fd_set *) ex; 557 syscallarg(const struct timespec *) ts; 558 syscallarg(const sigset_t *) mask; 559 } */ *uap = v; 560 561 struct timespec ts, *tsp = NULL; 562 sigset_t ss, *ssp = NULL; 563 int error; 564 565 if (SCARG(uap, ts) != NULL) { 566 if ((error = copyin(SCARG(uap, ts), &ts, sizeof ts)) != 0) 567 return (error); 568 if ((error = timespecfix(&ts)) != 0) 569 return (error); 570 #ifdef KTRACE 571 if (KTRPOINT(p, KTR_STRUCT)) 572 ktrreltimespec(p, &ts); 573 #endif 574 tsp = &ts; 575 } 576 if (SCARG(uap, mask) != NULL) { 577 if ((error = copyin(SCARG(uap, mask), &ss, sizeof ss)) != 0) 578 return (error); 579 ssp = &ss; 580 } 581 582 return (dopselect(p, SCARG(uap, nd), SCARG(uap, in), SCARG(uap, ou), 583 SCARG(uap, ex), tsp, ssp, retval)); 584 } 585 586 int 587 dopselect(struct proc *p, int nd, fd_set *in, fd_set *ou, fd_set *ex, 588 const struct timespec *tsp, const sigset_t *sigmask, register_t *retval) 589 { 590 fd_mask bits[6]; 591 fd_set *pibits[3], *pobits[3]; 592 struct timespec ats, rts, tts; 593 int s, ncoll, error = 0, timo; 594 u_int ni; 595 596 if (nd < 0) 597 return (EINVAL); 598 if (nd > p->p_fd->fd_nfiles) { 599 /* forgiving; slightly wrong */ 600 nd = p->p_fd->fd_nfiles; 601 } 602 ni = howmany(nd, NFDBITS) * sizeof(fd_mask); 603 if (ni > sizeof(bits[0])) { 604 caddr_t mbits; 605 606 mbits = mallocarray(6, ni, M_TEMP, M_WAITOK|M_ZERO); 607 pibits[0] = (fd_set *)&mbits[ni * 0]; 608 pibits[1] = (fd_set *)&mbits[ni * 1]; 609 pibits[2] = (fd_set *)&mbits[ni * 2]; 610 pobits[0] = (fd_set *)&mbits[ni * 3]; 611 pobits[1] = (fd_set *)&mbits[ni * 4]; 612 pobits[2] = (fd_set *)&mbits[ni * 5]; 613 } else { 614 memset(bits, 0, sizeof(bits)); 615 pibits[0] = (fd_set *)&bits[0]; 616 pibits[1] = (fd_set *)&bits[1]; 617 pibits[2] = (fd_set *)&bits[2]; 618 pobits[0] = (fd_set *)&bits[3]; 619 pobits[1] = (fd_set *)&bits[4]; 620 pobits[2] = (fd_set *)&bits[5]; 621 } 622 623 #define getbits(name, x) \ 624 if (name && (error = copyin(name, pibits[x], ni))) \ 625 goto done; 626 getbits(in, 0); 627 getbits(ou, 1); 628 getbits(ex, 2); 629 #undef getbits 630 #ifdef KTRACE 631 if (ni > 0 && KTRPOINT(p, KTR_STRUCT)) { 632 if (in) ktrfdset(p, pibits[0], ni); 633 if (ou) ktrfdset(p, pibits[1], ni); 634 if (ex) ktrfdset(p, pibits[2], ni); 635 } 636 #endif 637 638 if (tsp) { 639 getnanouptime(&rts); 640 timespecadd(tsp, &rts, &ats); 641 } else { 642 ats.tv_sec = 0; 643 ats.tv_nsec = 0; 644 } 645 timo = 0; 646 647 if (sigmask) 648 dosigsuspend(p, *sigmask &~ sigcantmask); 649 650 retry: 651 ncoll = nselcoll; 652 atomic_setbits_int(&p->p_flag, P_SELECT); 653 error = selscan(p, pibits[0], pobits[0], nd, ni, retval); 654 if (error || *retval) 655 goto done; 656 if (tsp) { 657 getnanouptime(&rts); 658 if (timespeccmp(&rts, &ats, >=)) 659 goto done; 660 timespecsub(&ats, &rts, &tts); 661 timo = tts.tv_sec > 24 * 60 * 60 ? 662 24 * 60 * 60 * hz : tstohz(&tts); 663 } 664 s = splhigh(); 665 if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) { 666 splx(s); 667 goto retry; 668 } 669 atomic_clearbits_int(&p->p_flag, P_SELECT); 670 error = tsleep(&selwait, PSOCK | PCATCH, "select", timo); 671 splx(s); 672 if (error == 0) 673 goto retry; 674 done: 675 atomic_clearbits_int(&p->p_flag, P_SELECT); 676 /* select is not restarted after signals... */ 677 if (error == ERESTART) 678 error = EINTR; 679 if (error == EWOULDBLOCK) 680 error = 0; 681 #define putbits(name, x) \ 682 if (name && (error2 = copyout(pobits[x], name, ni))) \ 683 error = error2; 684 if (error == 0) { 685 int error2; 686 687 putbits(in, 0); 688 putbits(ou, 1); 689 putbits(ex, 2); 690 #undef putbits 691 #ifdef KTRACE 692 if (ni > 0 && KTRPOINT(p, KTR_STRUCT)) { 693 if (in) ktrfdset(p, pobits[0], ni); 694 if (ou) ktrfdset(p, pobits[1], ni); 695 if (ex) ktrfdset(p, pobits[2], ni); 696 } 697 #endif 698 } 699 700 if (pibits[0] != (fd_set *)&bits[0]) 701 free(pibits[0], M_TEMP, 6 * ni); 702 return (error); 703 } 704 705 int 706 selscan(struct proc *p, fd_set *ibits, fd_set *obits, int nfd, int ni, 707 register_t *retval) 708 { 709 caddr_t cibits = (caddr_t)ibits, cobits = (caddr_t)obits; 710 struct filedesc *fdp = p->p_fd; 711 int msk, i, j, fd; 712 fd_mask bits; 713 struct file *fp; 714 int n = 0; 715 static const int flag[3] = { POLLIN, POLLOUT|POLLNOHUP, POLLPRI }; 716 717 for (msk = 0; msk < 3; msk++) { 718 fd_set *pibits = (fd_set *)&cibits[msk*ni]; 719 fd_set *pobits = (fd_set *)&cobits[msk*ni]; 720 721 for (i = 0; i < nfd; i += NFDBITS) { 722 bits = pibits->fds_bits[i/NFDBITS]; 723 while ((j = ffs(bits)) && (fd = i + --j) < nfd) { 724 bits &= ~(1 << j); 725 if ((fp = fd_getfile(fdp, fd)) == NULL) 726 return (EBADF); 727 FREF(fp); 728 if ((*fp->f_ops->fo_poll)(fp, flag[msk], p)) { 729 FD_SET(fd, pobits); 730 n++; 731 } 732 FRELE(fp, p); 733 } 734 } 735 } 736 *retval = n; 737 return (0); 738 } 739 740 /*ARGSUSED*/ 741 int 742 seltrue(dev_t dev, int events, struct proc *p) 743 { 744 745 return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)); 746 } 747 748 int 749 selfalse(dev_t dev, int events, struct proc *p) 750 { 751 752 return (0); 753 } 754 755 /* 756 * Record a select request. 757 */ 758 void 759 selrecord(struct proc *selector, struct selinfo *sip) 760 { 761 struct proc *p; 762 pid_t mypid; 763 764 mypid = selector->p_pid; 765 if (sip->si_selpid == mypid) 766 return; 767 if (sip->si_selpid && (p = pfind(sip->si_selpid)) && 768 p->p_wchan == (caddr_t)&selwait) 769 sip->si_flags |= SI_COLL; 770 else 771 sip->si_selpid = mypid; 772 } 773 774 /* 775 * Do a wakeup when a selectable event occurs. 776 */ 777 void 778 selwakeup(struct selinfo *sip) 779 { 780 struct proc *p; 781 int s; 782 783 KNOTE(&sip->si_note, 0); 784 if (sip->si_selpid == 0) 785 return; 786 if (sip->si_flags & SI_COLL) { 787 nselcoll++; 788 sip->si_flags &= ~SI_COLL; 789 wakeup(&selwait); 790 } 791 p = pfind(sip->si_selpid); 792 sip->si_selpid = 0; 793 if (p != NULL) { 794 SCHED_LOCK(s); 795 if (p->p_wchan == (caddr_t)&selwait) { 796 if (p->p_stat == SSLEEP) 797 setrunnable(p); 798 else 799 unsleep(p); 800 } else if (p->p_flag & P_SELECT) 801 atomic_clearbits_int(&p->p_flag, P_SELECT); 802 SCHED_UNLOCK(s); 803 } 804 } 805 806 void 807 pollscan(struct proc *p, struct pollfd *pl, u_int nfd, register_t *retval) 808 { 809 struct filedesc *fdp = p->p_fd; 810 struct file *fp; 811 u_int i; 812 int n = 0; 813 814 for (i = 0; i < nfd; i++, pl++) { 815 /* Check the file descriptor. */ 816 if (pl->fd < 0) { 817 pl->revents = 0; 818 continue; 819 } 820 if ((fp = fd_getfile(fdp, pl->fd)) == NULL) { 821 pl->revents = POLLNVAL; 822 n++; 823 continue; 824 } 825 FREF(fp); 826 pl->revents = (*fp->f_ops->fo_poll)(fp, pl->events, p); 827 FRELE(fp, p); 828 if (pl->revents != 0) 829 n++; 830 } 831 *retval = n; 832 } 833 834 /* 835 * Only copyout the revents field. 836 */ 837 int 838 pollout(struct pollfd *pl, struct pollfd *upl, u_int nfds) 839 { 840 int error = 0; 841 u_int i = 0; 842 843 while (!error && i++ < nfds) { 844 error = copyout(&pl->revents, &upl->revents, 845 sizeof(upl->revents)); 846 pl++; 847 upl++; 848 } 849 850 return (error); 851 } 852 853 /* 854 * We are using the same mechanism as select only we encode/decode args 855 * differently. 856 */ 857 int 858 sys_poll(struct proc *p, void *v, register_t *retval) 859 { 860 struct sys_poll_args /* { 861 syscallarg(struct pollfd *) fds; 862 syscallarg(u_int) nfds; 863 syscallarg(int) timeout; 864 } */ *uap = v; 865 866 struct timespec ts, *tsp = NULL; 867 int msec = SCARG(uap, timeout); 868 869 if (msec != INFTIM) { 870 if (msec < 0) 871 return (EINVAL); 872 ts.tv_sec = msec / 1000; 873 ts.tv_nsec = (msec - (ts.tv_sec * 1000)) * 1000000; 874 tsp = &ts; 875 } 876 877 return (doppoll(p, SCARG(uap, fds), SCARG(uap, nfds), tsp, NULL, 878 retval)); 879 } 880 881 int 882 sys_ppoll(struct proc *p, void *v, register_t *retval) 883 { 884 struct sys_ppoll_args /* { 885 syscallarg(struct pollfd *) fds; 886 syscallarg(u_int) nfds; 887 syscallarg(const struct timespec *) ts; 888 syscallarg(const sigset_t *) mask; 889 } */ *uap = v; 890 891 int error; 892 struct timespec ts, *tsp = NULL; 893 sigset_t ss, *ssp = NULL; 894 895 if (SCARG(uap, ts) != NULL) { 896 if ((error = copyin(SCARG(uap, ts), &ts, sizeof ts)) != 0) 897 return (error); 898 if ((error = timespecfix(&ts)) != 0) 899 return (error); 900 #ifdef KTRACE 901 if (KTRPOINT(p, KTR_STRUCT)) 902 ktrreltimespec(p, &ts); 903 #endif 904 tsp = &ts; 905 } 906 907 if (SCARG(uap, mask) != NULL) { 908 if ((error = copyin(SCARG(uap, mask), &ss, sizeof ss)) != 0) 909 return (error); 910 ssp = &ss; 911 } 912 913 return (doppoll(p, SCARG(uap, fds), SCARG(uap, nfds), tsp, ssp, 914 retval)); 915 } 916 917 int 918 doppoll(struct proc *p, struct pollfd *fds, u_int nfds, 919 const struct timespec *tsp, const sigset_t *sigmask, register_t *retval) 920 { 921 size_t sz; 922 struct pollfd pfds[4], *pl = pfds; 923 struct timespec ats, rts, tts; 924 int timo, ncoll, i, s, error; 925 926 /* Standards say no more than MAX_OPEN; this is possibly better. */ 927 if (nfds > min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles)) 928 return (EINVAL); 929 930 /* optimize for the default case, of a small nfds value */ 931 if (nfds > nitems(pfds)) { 932 pl = mallocarray(nfds, sizeof(*pl), M_TEMP, 933 M_WAITOK | M_CANFAIL); 934 if (pl == NULL) 935 return (EINVAL); 936 } 937 938 sz = nfds * sizeof(*pl); 939 940 if ((error = copyin(fds, pl, sz)) != 0) 941 goto bad; 942 943 for (i = 0; i < nfds; i++) { 944 pl[i].events &= ~POLLNOHUP; 945 pl[i].revents = 0; 946 } 947 948 if (tsp != NULL) { 949 getnanouptime(&rts); 950 timespecadd(tsp, &rts, &ats); 951 } else { 952 ats.tv_sec = 0; 953 ats.tv_nsec = 0; 954 } 955 timo = 0; 956 957 if (sigmask) 958 dosigsuspend(p, *sigmask &~ sigcantmask); 959 960 retry: 961 ncoll = nselcoll; 962 atomic_setbits_int(&p->p_flag, P_SELECT); 963 pollscan(p, pl, nfds, retval); 964 if (*retval) 965 goto done; 966 if (tsp != NULL) { 967 getnanouptime(&rts); 968 if (timespeccmp(&rts, &ats, >=)) 969 goto done; 970 timespecsub(&ats, &rts, &tts); 971 timo = tts.tv_sec > 24 * 60 * 60 ? 972 24 * 60 * 60 * hz : tstohz(&tts); 973 } 974 s = splhigh(); 975 if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) { 976 splx(s); 977 goto retry; 978 } 979 atomic_clearbits_int(&p->p_flag, P_SELECT); 980 error = tsleep(&selwait, PSOCK | PCATCH, "poll", timo); 981 splx(s); 982 if (error == 0) 983 goto retry; 984 985 done: 986 atomic_clearbits_int(&p->p_flag, P_SELECT); 987 /* 988 * NOTE: poll(2) is not restarted after a signal and EWOULDBLOCK is 989 * ignored (since the whole point is to see what would block). 990 */ 991 switch (error) { 992 case ERESTART: 993 error = pollout(pl, fds, nfds); 994 if (error == 0) 995 error = EINTR; 996 break; 997 case EWOULDBLOCK: 998 case 0: 999 error = pollout(pl, fds, nfds); 1000 break; 1001 } 1002 bad: 1003 if (pl != pfds) 1004 free(pl, M_TEMP, sz); 1005 return (error); 1006 } 1007 1008 /* 1009 * utrace system call 1010 */ 1011 /* ARGSUSED */ 1012 int 1013 sys_utrace(struct proc *curp, void *v, register_t *retval) 1014 { 1015 #ifdef KTRACE 1016 struct sys_utrace_args /* { 1017 syscallarg(const char *) label; 1018 syscallarg(const void *) addr; 1019 syscallarg(size_t) len; 1020 } */ *uap = v; 1021 return (ktruser(curp, SCARG(uap, label), SCARG(uap, addr), 1022 SCARG(uap, len))); 1023 #else 1024 return (0); 1025 #endif 1026 } 1027