1 /* $NetBSD: sys_generic.c,v 1.62 2002/03/22 18:58:59 jdolecek Exp $ */ 2 3 /* 4 * Copyright (c) 1982, 1986, 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * (c) UNIX System Laboratories, Inc. 7 * All or some portions of this file are derived from material licensed 8 * to the University of California by American Telephone and Telegraph 9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 10 * the permission of UNIX System Laboratories, Inc. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. All advertising materials mentioning features or use of this software 21 * must display the following acknowledgement: 22 * This product includes software developed by the University of 23 * California, Berkeley and its contributors. 24 * 4. Neither the name of the University nor the names of its contributors 25 * may be used to endorse or promote products derived from this software 26 * without specific prior written permission. 27 * 28 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 29 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 31 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 32 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 33 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 34 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 35 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 37 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 38 * SUCH DAMAGE. 39 * 40 * @(#)sys_generic.c 8.9 (Berkeley) 2/14/95 41 */ 42 43 #include <sys/cdefs.h> 44 __KERNEL_RCSID(0, "$NetBSD: sys_generic.c,v 1.62 2002/03/22 18:58:59 jdolecek Exp $"); 45 46 #include "opt_ktrace.h" 47 48 #include <sys/param.h> 49 #include <sys/systm.h> 50 #include <sys/filedesc.h> 51 #include <sys/ioctl.h> 52 #include <sys/file.h> 53 #include <sys/proc.h> 54 #include <sys/socketvar.h> 55 #include <sys/signalvar.h> 56 #include <sys/uio.h> 57 #include <sys/kernel.h> 58 #include <sys/stat.h> 59 #include <sys/malloc.h> 60 #include <sys/poll.h> 61 #ifdef KTRACE 62 #include <sys/ktrace.h> 63 #endif 64 65 #include <sys/mount.h> 66 #include <sys/syscallargs.h> 67 68 int selscan __P((struct proc *, fd_mask *, fd_mask *, int, register_t *)); 69 int pollscan __P((struct proc *, struct pollfd *, int, register_t *)); 70 71 /* 72 * Read system call. 73 */ 74 /* ARGSUSED */ 75 int 76 sys_read(struct proc *p, void *v, register_t *retval) 77 { 78 struct sys_read_args /* { 79 syscallarg(int) fd; 80 syscallarg(void *) buf; 81 syscallarg(size_t) nbyte; 82 } */ *uap = v; 83 int fd; 84 struct file *fp; 85 struct filedesc *fdp; 86 87 fd = SCARG(uap, fd); 88 fdp = p->p_fd; 89 90 if ((fp = fd_getfile(fdp, fd)) == NULL) 91 return (EBADF); 92 93 if ((fp->f_flag & FREAD) == 0) 94 return (EBADF); 95 96 FILE_USE(fp); 97 98 /* dofileread() will unuse the descriptor for us */ 99 return (dofileread(p, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 100 &fp->f_offset, FOF_UPDATE_OFFSET, retval)); 101 } 102 103 int 104 dofileread(struct proc *p, int fd, struct file *fp, void *buf, size_t nbyte, 105 off_t *offset, int flags, register_t *retval) 106 { 107 struct uio auio; 108 struct iovec aiov; 109 long cnt, error; 110 #ifdef KTRACE 111 struct iovec ktriov; 112 #endif 113 error = 0; 114 115 aiov.iov_base = (caddr_t)buf; 116 aiov.iov_len = nbyte; 117 auio.uio_iov = &aiov; 118 auio.uio_iovcnt = 1; 119 auio.uio_resid = nbyte; 120 auio.uio_rw = UIO_READ; 121 auio.uio_segflg = UIO_USERSPACE; 122 auio.uio_procp = p; 123 124 /* 125 * Reads return ssize_t because -1 is returned on error. Therefore 126 * we must restrict the length to SSIZE_MAX to avoid garbage return 127 * values. 128 */ 129 if (auio.uio_resid > SSIZE_MAX) { 130 error = EINVAL; 131 goto out; 132 } 133 134 #ifdef KTRACE 135 /* 136 * if tracing, save a copy of iovec 137 */ 138 if (KTRPOINT(p, KTR_GENIO)) 139 ktriov = aiov; 140 #endif 141 cnt = auio.uio_resid; 142 error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred, flags); 143 if (error) 144 if (auio.uio_resid != cnt && (error == ERESTART || 145 error == EINTR || error == EWOULDBLOCK)) 146 error = 0; 147 cnt -= auio.uio_resid; 148 #ifdef KTRACE 149 if (KTRPOINT(p, KTR_GENIO) && error == 0) 150 ktrgenio(p, fd, UIO_READ, &ktriov, cnt, error); 151 #endif 152 *retval = cnt; 153 out: 154 FILE_UNUSE(fp, p); 155 return (error); 156 } 157 158 /* 159 * Scatter read system call. 160 */ 161 int 162 sys_readv(struct proc *p, void *v, register_t *retval) 163 { 164 struct sys_readv_args /* { 165 syscallarg(int) fd; 166 syscallarg(const struct iovec *) iovp; 167 syscallarg(int) iovcnt; 168 } */ *uap = v; 169 int fd; 170 struct file *fp; 171 struct filedesc *fdp; 172 173 fd = SCARG(uap, fd); 174 fdp = p->p_fd; 175 176 if ((fp = fd_getfile(fdp, fd)) == NULL) 177 return (EBADF); 178 179 if ((fp->f_flag & FREAD) == 0) 180 return (EBADF); 181 182 FILE_USE(fp); 183 184 /* dofilereadv() will unuse the descriptor for us */ 185 return (dofilereadv(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt), 186 &fp->f_offset, FOF_UPDATE_OFFSET, retval)); 187 } 188 189 int 190 dofilereadv(struct proc *p, int fd, struct file *fp, const struct iovec *iovp, 191 int iovcnt, off_t *offset, int flags, register_t *retval) 192 { 193 struct uio auio; 194 struct iovec *iov, *needfree, aiov[UIO_SMALLIOV]; 195 long i, cnt, error; 196 u_int iovlen; 197 #ifdef KTRACE 198 struct iovec *ktriov; 199 #endif 200 201 error = 0; 202 #ifdef KTRACE 203 ktriov = NULL; 204 #endif 205 /* note: can't use iovlen until iovcnt is validated */ 206 iovlen = iovcnt * sizeof(struct iovec); 207 if ((u_int)iovcnt > UIO_SMALLIOV) { 208 if ((u_int)iovcnt > IOV_MAX) { 209 error = EINVAL; 210 goto out; 211 } 212 iov = malloc(iovlen, M_IOV, M_WAITOK); 213 needfree = iov; 214 } else if ((u_int)iovcnt > 0) { 215 iov = aiov; 216 needfree = NULL; 217 } else { 218 error = EINVAL; 219 goto out; 220 } 221 222 auio.uio_iov = iov; 223 auio.uio_iovcnt = iovcnt; 224 auio.uio_rw = UIO_READ; 225 auio.uio_segflg = UIO_USERSPACE; 226 auio.uio_procp = p; 227 error = copyin(iovp, iov, iovlen); 228 if (error) 229 goto done; 230 auio.uio_resid = 0; 231 for (i = 0; i < iovcnt; i++) { 232 auio.uio_resid += iov->iov_len; 233 /* 234 * Reads return ssize_t because -1 is returned on error. 235 * Therefore we must restrict the length to SSIZE_MAX to 236 * avoid garbage return values. 237 */ 238 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) { 239 error = EINVAL; 240 goto done; 241 } 242 iov++; 243 } 244 #ifdef KTRACE 245 /* 246 * if tracing, save a copy of iovec 247 */ 248 if (KTRPOINT(p, KTR_GENIO)) { 249 ktriov = malloc(iovlen, M_TEMP, M_WAITOK); 250 memcpy((caddr_t)ktriov, (caddr_t)auio.uio_iov, iovlen); 251 } 252 #endif 253 cnt = auio.uio_resid; 254 error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred, flags); 255 if (error) 256 if (auio.uio_resid != cnt && (error == ERESTART || 257 error == EINTR || error == EWOULDBLOCK)) 258 error = 0; 259 cnt -= auio.uio_resid; 260 #ifdef KTRACE 261 if (ktriov != NULL) { 262 if (error == 0) 263 ktrgenio(p, fd, UIO_READ, ktriov, cnt, error); 264 free(ktriov, M_TEMP); 265 } 266 #endif 267 *retval = cnt; 268 done: 269 if (needfree) 270 free(needfree, M_IOV); 271 out: 272 FILE_UNUSE(fp, p); 273 return (error); 274 } 275 276 /* 277 * Write system call 278 */ 279 int 280 sys_write(struct proc *p, void *v, register_t *retval) 281 { 282 struct sys_write_args /* { 283 syscallarg(int) fd; 284 syscallarg(const void *) buf; 285 syscallarg(size_t) nbyte; 286 } */ *uap = v; 287 int fd; 288 struct file *fp; 289 struct filedesc *fdp; 290 291 fd = SCARG(uap, fd); 292 fdp = p->p_fd; 293 294 if ((fp = fd_getfile(fdp, fd)) == NULL) 295 return (EBADF); 296 297 if ((fp->f_flag & FWRITE) == 0) 298 return (EBADF); 299 300 FILE_USE(fp); 301 302 /* dofilewrite() will unuse the descriptor for us */ 303 return (dofilewrite(p, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 304 &fp->f_offset, FOF_UPDATE_OFFSET, retval)); 305 } 306 307 int 308 dofilewrite(struct proc *p, int fd, struct file *fp, const void *buf, 309 size_t nbyte, off_t *offset, int flags, register_t *retval) 310 { 311 struct uio auio; 312 struct iovec aiov; 313 long cnt, error; 314 #ifdef KTRACE 315 struct iovec ktriov; 316 #endif 317 318 error = 0; 319 aiov.iov_base = (caddr_t)buf; /* XXX kills const */ 320 aiov.iov_len = nbyte; 321 auio.uio_iov = &aiov; 322 auio.uio_iovcnt = 1; 323 auio.uio_resid = nbyte; 324 auio.uio_rw = UIO_WRITE; 325 auio.uio_segflg = UIO_USERSPACE; 326 auio.uio_procp = p; 327 328 /* 329 * Writes return ssize_t because -1 is returned on error. Therefore 330 * we must restrict the length to SSIZE_MAX to avoid garbage return 331 * values. 332 */ 333 if (auio.uio_resid > SSIZE_MAX) { 334 error = EINVAL; 335 goto out; 336 } 337 338 #ifdef KTRACE 339 /* 340 * if tracing, save a copy of iovec 341 */ 342 if (KTRPOINT(p, KTR_GENIO)) 343 ktriov = aiov; 344 #endif 345 cnt = auio.uio_resid; 346 error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred, flags); 347 if (error) { 348 if (auio.uio_resid != cnt && (error == ERESTART || 349 error == EINTR || error == EWOULDBLOCK)) 350 error = 0; 351 if (error == EPIPE) 352 psignal(p, SIGPIPE); 353 } 354 cnt -= auio.uio_resid; 355 #ifdef KTRACE 356 if (KTRPOINT(p, KTR_GENIO) && error == 0) 357 ktrgenio(p, fd, UIO_WRITE, &ktriov, cnt, error); 358 #endif 359 *retval = cnt; 360 out: 361 FILE_UNUSE(fp, p); 362 return (error); 363 } 364 365 /* 366 * Gather write system call 367 */ 368 int 369 sys_writev(struct proc *p, void *v, register_t *retval) 370 { 371 struct sys_writev_args /* { 372 syscallarg(int) fd; 373 syscallarg(const struct iovec *) iovp; 374 syscallarg(int) iovcnt; 375 } */ *uap = v; 376 int fd; 377 struct file *fp; 378 struct filedesc *fdp; 379 380 fd = SCARG(uap, fd); 381 fdp = p->p_fd; 382 383 if ((fp = fd_getfile(fdp, fd)) == NULL) 384 return (EBADF); 385 386 if ((fp->f_flag & FWRITE) == 0) 387 return (EBADF); 388 389 FILE_USE(fp); 390 391 /* dofilewritev() will unuse the descriptor for us */ 392 return (dofilewritev(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt), 393 &fp->f_offset, FOF_UPDATE_OFFSET, retval)); 394 } 395 396 int 397 dofilewritev(struct proc *p, int fd, struct file *fp, const struct iovec *iovp, 398 int iovcnt, off_t *offset, int flags, register_t *retval) 399 { 400 struct uio auio; 401 struct iovec *iov, *needfree, aiov[UIO_SMALLIOV]; 402 long i, cnt, error; 403 u_int iovlen; 404 #ifdef KTRACE 405 struct iovec *ktriov; 406 #endif 407 408 error = 0; 409 #ifdef KTRACE 410 ktriov = NULL; 411 #endif 412 /* note: can't use iovlen until iovcnt is validated */ 413 iovlen = iovcnt * sizeof(struct iovec); 414 if ((u_int)iovcnt > UIO_SMALLIOV) { 415 if ((u_int)iovcnt > IOV_MAX) { 416 error = EINVAL; 417 goto out; 418 } 419 iov = malloc(iovlen, M_IOV, M_WAITOK); 420 needfree = iov; 421 } else if ((u_int)iovcnt > 0) { 422 iov = aiov; 423 needfree = NULL; 424 } else { 425 error = EINVAL; 426 goto out; 427 } 428 429 auio.uio_iov = iov; 430 auio.uio_iovcnt = iovcnt; 431 auio.uio_rw = UIO_WRITE; 432 auio.uio_segflg = UIO_USERSPACE; 433 auio.uio_procp = p; 434 error = copyin(iovp, iov, iovlen); 435 if (error) 436 goto done; 437 auio.uio_resid = 0; 438 for (i = 0; i < iovcnt; i++) { 439 auio.uio_resid += iov->iov_len; 440 /* 441 * Writes return ssize_t because -1 is returned on error. 442 * Therefore we must restrict the length to SSIZE_MAX to 443 * avoid garbage return values. 444 */ 445 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) { 446 error = EINVAL; 447 goto done; 448 } 449 iov++; 450 } 451 #ifdef KTRACE 452 /* 453 * if tracing, save a copy of iovec 454 */ 455 if (KTRPOINT(p, KTR_GENIO)) { 456 ktriov = malloc(iovlen, M_TEMP, M_WAITOK); 457 memcpy((caddr_t)ktriov, (caddr_t)auio.uio_iov, iovlen); 458 } 459 #endif 460 cnt = auio.uio_resid; 461 error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred, flags); 462 if (error) { 463 if (auio.uio_resid != cnt && (error == ERESTART || 464 error == EINTR || error == EWOULDBLOCK)) 465 error = 0; 466 if (error == EPIPE) 467 psignal(p, SIGPIPE); 468 } 469 cnt -= auio.uio_resid; 470 #ifdef KTRACE 471 if (KTRPOINT(p, KTR_GENIO)) 472 if (error == 0) { 473 ktrgenio(p, fd, UIO_WRITE, ktriov, cnt, error); 474 free(ktriov, M_TEMP); 475 } 476 #endif 477 *retval = cnt; 478 done: 479 if (needfree) 480 free(needfree, M_IOV); 481 out: 482 FILE_UNUSE(fp, p); 483 return (error); 484 } 485 486 /* 487 * Ioctl system call 488 */ 489 /* ARGSUSED */ 490 int 491 sys_ioctl(struct proc *p, void *v, register_t *retval) 492 { 493 struct sys_ioctl_args /* { 494 syscallarg(int) fd; 495 syscallarg(u_long) com; 496 syscallarg(caddr_t) data; 497 } */ *uap = v; 498 struct file *fp; 499 struct filedesc *fdp; 500 u_long com; 501 int error; 502 u_int size; 503 caddr_t data, memp; 504 int tmp; 505 #define STK_PARAMS 128 506 u_long stkbuf[STK_PARAMS/sizeof(u_long)]; 507 508 error = 0; 509 fdp = p->p_fd; 510 511 if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL) 512 return (EBADF); 513 514 FILE_USE(fp); 515 516 if ((fp->f_flag & (FREAD | FWRITE)) == 0) { 517 error = EBADF; 518 goto out; 519 } 520 521 switch (com = SCARG(uap, com)) { 522 case FIONCLEX: 523 fdp->fd_ofileflags[SCARG(uap, fd)] &= ~UF_EXCLOSE; 524 goto out; 525 526 case FIOCLEX: 527 fdp->fd_ofileflags[SCARG(uap, fd)] |= UF_EXCLOSE; 528 goto out; 529 } 530 531 /* 532 * Interpret high order word to find amount of data to be 533 * copied to/from the user's address space. 534 */ 535 size = IOCPARM_LEN(com); 536 if (size > IOCPARM_MAX) { 537 error = ENOTTY; 538 goto out; 539 } 540 memp = NULL; 541 if (size > sizeof(stkbuf)) { 542 memp = (caddr_t)malloc((u_long)size, M_IOCTLOPS, M_WAITOK); 543 data = memp; 544 } else 545 data = (caddr_t)stkbuf; 546 if (com&IOC_IN) { 547 if (size) { 548 error = copyin(SCARG(uap, data), data, size); 549 if (error) { 550 if (memp) 551 free(memp, M_IOCTLOPS); 552 goto out; 553 } 554 } else 555 *(caddr_t *)data = SCARG(uap, data); 556 } else if ((com&IOC_OUT) && size) 557 /* 558 * Zero the buffer so the user always 559 * gets back something deterministic. 560 */ 561 memset(data, 0, size); 562 else if (com&IOC_VOID) 563 *(caddr_t *)data = SCARG(uap, data); 564 565 switch (com) { 566 567 case FIONBIO: 568 if ((tmp = *(int *)data) != 0) 569 fp->f_flag |= FNONBLOCK; 570 else 571 fp->f_flag &= ~FNONBLOCK; 572 error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p); 573 break; 574 575 case FIOASYNC: 576 if ((tmp = *(int *)data) != 0) 577 fp->f_flag |= FASYNC; 578 else 579 fp->f_flag &= ~FASYNC; 580 error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, (caddr_t)&tmp, p); 581 break; 582 583 case FIOSETOWN: 584 tmp = *(int *)data; 585 if (fp->f_type == DTYPE_SOCKET) { 586 ((struct socket *)fp->f_data)->so_pgid = tmp; 587 error = 0; 588 break; 589 } 590 if (tmp <= 0) { 591 tmp = -tmp; 592 } else { 593 struct proc *p1 = pfind(tmp); 594 if (p1 == 0) { 595 error = ESRCH; 596 break; 597 } 598 tmp = p1->p_pgrp->pg_id; 599 } 600 error = (*fp->f_ops->fo_ioctl) 601 (fp, TIOCSPGRP, (caddr_t)&tmp, p); 602 break; 603 604 case FIOGETOWN: 605 if (fp->f_type == DTYPE_SOCKET) { 606 error = 0; 607 *(int *)data = ((struct socket *)fp->f_data)->so_pgid; 608 break; 609 } 610 error = (*fp->f_ops->fo_ioctl)(fp, TIOCGPGRP, data, p); 611 if (error == 0) 612 *(int *)data = -*(int *)data; 613 break; 614 615 default: 616 error = (*fp->f_ops->fo_ioctl)(fp, com, data, p); 617 /* 618 * Copy any data to user, size was 619 * already set and checked above. 620 */ 621 if (error == 0 && (com&IOC_OUT) && size) 622 error = copyout(data, SCARG(uap, data), size); 623 break; 624 } 625 if (memp) 626 free(memp, M_IOCTLOPS); 627 out: 628 FILE_UNUSE(fp, p); 629 switch (error) { 630 case -1: 631 printf("sys_ioctl: _IO%s%s('%c', %lu, %lu) returned -1: " 632 "pid=%d comm=%s\n", 633 (com & IOC_IN) ? "W" : "", (com & IOC_OUT) ? "R" : "", 634 (char)IOCGROUP(com), (com & 0xff), IOCPARM_LEN(com), 635 p->p_pid, p->p_comm); 636 /* FALLTHROUGH */ 637 case EPASSTHROUGH: 638 error = ENOTTY; 639 /* FALLTHROUGH */ 640 default: 641 return (error); 642 } 643 } 644 645 int selwait, nselcoll; 646 647 /* 648 * Select system call. 649 */ 650 int 651 sys_select(struct proc *p, void *v, register_t *retval) 652 { 653 struct sys_select_args /* { 654 syscallarg(int) nd; 655 syscallarg(fd_set *) in; 656 syscallarg(fd_set *) ou; 657 syscallarg(fd_set *) ex; 658 syscallarg(struct timeval *) tv; 659 } */ *uap = v; 660 caddr_t bits; 661 char smallbits[howmany(FD_SETSIZE, NFDBITS) * 662 sizeof(fd_mask) * 6]; 663 struct timeval atv; 664 int s, ncoll, error, timo; 665 size_t ni; 666 667 error = 0; 668 if (SCARG(uap, nd) < 0) 669 return (EINVAL); 670 if (SCARG(uap, nd) > p->p_fd->fd_nfiles) { 671 /* forgiving; slightly wrong */ 672 SCARG(uap, nd) = p->p_fd->fd_nfiles; 673 } 674 ni = howmany(SCARG(uap, nd), NFDBITS) * sizeof(fd_mask); 675 if (ni * 6 > sizeof(smallbits)) 676 bits = malloc(ni * 6, M_TEMP, M_WAITOK); 677 else 678 bits = smallbits; 679 680 #define getbits(name, x) \ 681 if (SCARG(uap, name)) { \ 682 error = copyin(SCARG(uap, name), bits + ni * x, ni); \ 683 if (error) \ 684 goto done; \ 685 } else \ 686 memset(bits + ni * x, 0, ni); 687 getbits(in, 0); 688 getbits(ou, 1); 689 getbits(ex, 2); 690 #undef getbits 691 692 if (SCARG(uap, tv)) { 693 error = copyin(SCARG(uap, tv), (caddr_t)&atv, 694 sizeof(atv)); 695 if (error) 696 goto done; 697 if (itimerfix(&atv)) { 698 error = EINVAL; 699 goto done; 700 } 701 s = splclock(); 702 timeradd(&atv, &time, &atv); 703 splx(s); 704 } else 705 timo = 0; 706 retry: 707 ncoll = nselcoll; 708 p->p_flag |= P_SELECT; 709 error = selscan(p, (fd_mask *)(bits + ni * 0), 710 (fd_mask *)(bits + ni * 3), SCARG(uap, nd), retval); 711 if (error || *retval) 712 goto done; 713 if (SCARG(uap, tv)) { 714 /* 715 * We have to recalculate the timeout on every retry. 716 */ 717 timo = hzto(&atv); 718 if (timo <= 0) 719 goto done; 720 } 721 s = splsched(); 722 if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) { 723 splx(s); 724 goto retry; 725 } 726 p->p_flag &= ~P_SELECT; 727 error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "select", timo); 728 splx(s); 729 if (error == 0) 730 goto retry; 731 done: 732 p->p_flag &= ~P_SELECT; 733 /* select is not restarted after signals... */ 734 if (error == ERESTART) 735 error = EINTR; 736 if (error == EWOULDBLOCK) 737 error = 0; 738 if (error == 0) { 739 740 #define putbits(name, x) \ 741 if (SCARG(uap, name)) { \ 742 error = copyout(bits + ni * x, SCARG(uap, name), ni); \ 743 if (error) \ 744 goto out; \ 745 } 746 putbits(in, 3); 747 putbits(ou, 4); 748 putbits(ex, 5); 749 #undef putbits 750 } 751 out: 752 if (ni * 6 > sizeof(smallbits)) 753 free(bits, M_TEMP); 754 return (error); 755 } 756 757 int 758 selscan(struct proc *p, fd_mask *ibitp, fd_mask *obitp, int nfd, 759 register_t *retval) 760 { 761 struct filedesc *fdp; 762 int msk, i, j, fd, n; 763 fd_mask ibits, obits; 764 struct file *fp; 765 static int flag[3] = { POLLRDNORM | POLLHUP | POLLERR, 766 POLLWRNORM | POLLHUP | POLLERR, 767 POLLRDBAND }; 768 769 fdp = p->p_fd; 770 n = 0; 771 for (msk = 0; msk < 3; msk++) { 772 for (i = 0; i < nfd; i += NFDBITS) { 773 ibits = *ibitp++; 774 obits = 0; 775 while ((j = ffs(ibits)) && (fd = i + --j) < nfd) { 776 ibits &= ~(1 << j); 777 if ((fp = fd_getfile(fdp, fd)) == NULL) 778 return (EBADF); 779 FILE_USE(fp); 780 if ((*fp->f_ops->fo_poll)(fp, flag[msk], p)) { 781 obits |= (1 << j); 782 n++; 783 } 784 FILE_UNUSE(fp, p); 785 } 786 *obitp++ = obits; 787 } 788 } 789 *retval = n; 790 return (0); 791 } 792 793 /* 794 * Poll system call. 795 */ 796 int 797 sys_poll(struct proc *p, void *v, register_t *retval) 798 { 799 struct sys_poll_args /* { 800 syscallarg(struct pollfd *) fds; 801 syscallarg(u_int) nfds; 802 syscallarg(int) timeout; 803 } */ *uap = v; 804 caddr_t bits; 805 char smallbits[32 * sizeof(struct pollfd)]; 806 struct timeval atv; 807 int s, ncoll, error, timo; 808 size_t ni; 809 810 error = 0; 811 if (SCARG(uap, nfds) > p->p_fd->fd_nfiles) { 812 /* forgiving; slightly wrong */ 813 SCARG(uap, nfds) = p->p_fd->fd_nfiles; 814 } 815 ni = SCARG(uap, nfds) * sizeof(struct pollfd); 816 if (ni > sizeof(smallbits)) 817 bits = malloc(ni, M_TEMP, M_WAITOK); 818 else 819 bits = smallbits; 820 821 error = copyin(SCARG(uap, fds), bits, ni); 822 if (error) 823 goto done; 824 825 if (SCARG(uap, timeout) != INFTIM) { 826 atv.tv_sec = SCARG(uap, timeout) / 1000; 827 atv.tv_usec = (SCARG(uap, timeout) % 1000) * 1000; 828 if (itimerfix(&atv)) { 829 error = EINVAL; 830 goto done; 831 } 832 s = splclock(); 833 timeradd(&atv, &time, &atv); 834 splx(s); 835 } else 836 timo = 0; 837 retry: 838 ncoll = nselcoll; 839 p->p_flag |= P_SELECT; 840 error = pollscan(p, (struct pollfd *)bits, SCARG(uap, nfds), retval); 841 if (error || *retval) 842 goto done; 843 if (SCARG(uap, timeout) != INFTIM) { 844 /* 845 * We have to recalculate the timeout on every retry. 846 */ 847 timo = hzto(&atv); 848 if (timo <= 0) 849 goto done; 850 } 851 s = splsched(); 852 if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) { 853 splx(s); 854 goto retry; 855 } 856 p->p_flag &= ~P_SELECT; 857 error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "select", timo); 858 splx(s); 859 if (error == 0) 860 goto retry; 861 done: 862 p->p_flag &= ~P_SELECT; 863 /* poll is not restarted after signals... */ 864 if (error == ERESTART) 865 error = EINTR; 866 if (error == EWOULDBLOCK) 867 error = 0; 868 if (error == 0) { 869 error = copyout(bits, SCARG(uap, fds), ni); 870 if (error) 871 goto out; 872 } 873 out: 874 if (ni > sizeof(smallbits)) 875 free(bits, M_TEMP); 876 return (error); 877 } 878 879 int 880 pollscan(struct proc *p, struct pollfd *fds, int nfd, register_t *retval) 881 { 882 struct filedesc *fdp; 883 int i, n; 884 struct file *fp; 885 886 fdp = p->p_fd; 887 n = 0; 888 for (i = 0; i < nfd; i++, fds++) { 889 if (fds->fd >= fdp->fd_nfiles) { 890 fds->revents = POLLNVAL; 891 n++; 892 } else if (fds->fd < 0) { 893 fds->revents = 0; 894 } else { 895 if ((fp = fd_getfile(fdp, fds->fd)) == NULL) { 896 fds->revents = POLLNVAL; 897 n++; 898 } else { 899 FILE_USE(fp); 900 fds->revents = (*fp->f_ops->fo_poll)(fp, 901 fds->events | POLLERR | POLLHUP, p); 902 if (fds->revents != 0) 903 n++; 904 FILE_UNUSE(fp, p); 905 } 906 } 907 } 908 *retval = n; 909 return (0); 910 } 911 912 /*ARGSUSED*/ 913 int 914 seltrue(dev_t dev, int events, struct proc *p) 915 { 916 917 return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)); 918 } 919 920 /* 921 * Record a select request. 922 */ 923 void 924 selrecord(struct proc *selector, struct selinfo *sip) 925 { 926 struct proc *p; 927 pid_t mypid; 928 929 mypid = selector->p_pid; 930 if (sip->si_pid == mypid) 931 return; 932 if (sip->si_pid && (p = pfind(sip->si_pid)) && 933 p->p_wchan == (caddr_t)&selwait) 934 sip->si_flags |= SI_COLL; 935 else { 936 sip->si_flags &= ~SI_COLL; 937 sip->si_pid = mypid; 938 } 939 } 940 941 /* 942 * Do a wakeup when a selectable event occurs. 943 */ 944 void 945 selwakeup(sip) 946 struct selinfo *sip; 947 { 948 struct proc *p; 949 int s; 950 951 if (sip->si_pid == 0) 952 return; 953 if (sip->si_flags & SI_COLL) { 954 nselcoll++; 955 sip->si_flags &= ~SI_COLL; 956 wakeup((caddr_t)&selwait); 957 } 958 p = pfind(sip->si_pid); 959 sip->si_pid = 0; 960 if (p != NULL) { 961 SCHED_LOCK(s); 962 if (p->p_wchan == (caddr_t)&selwait) { 963 if (p->p_stat == SSLEEP) 964 setrunnable(p); 965 else 966 unsleep(p); 967 } else if (p->p_flag & P_SELECT) 968 p->p_flag &= ~P_SELECT; 969 SCHED_UNLOCK(s); 970 } 971 } 972