1 /* $OpenBSD: sys_generic.c,v 1.37 2002/02/13 19:08:06 art Exp $ */ 2 /* $NetBSD: sys_generic.c,v 1.24 1996/03/29 00:25:32 cgd Exp $ */ 3 4 /* 5 * Copyright (c) 1996 Theo de Raadt 6 * Copyright (c) 1982, 1986, 1989, 1993 7 * The Regents of the University of California. All rights reserved. 8 * (c) UNIX System Laboratories, Inc. 9 * All or some portions of this file are derived from material licensed 10 * to the University of California by American Telephone and Telegraph 11 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 12 * the permission of UNIX System Laboratories, Inc. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions 16 * are met: 17 * 1. Redistributions of source code must retain the above copyright 18 * notice, this list of conditions and the following disclaimer. 19 * 2. Redistributions in binary form must reproduce the above copyright 20 * notice, this list of conditions and the following disclaimer in the 21 * documentation and/or other materials provided with the distribution. 22 * 3. All advertising materials mentioning features or use of this software 23 * must display the following acknowledgement: 24 * This product includes software developed by the University of 25 * California, Berkeley and its contributors. 26 * 4. Neither the name of the University nor the names of its contributors 27 * may be used to endorse or promote products derived from this software 28 * without specific prior written permission. 29 * 30 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 31 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 32 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 33 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 34 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 35 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 36 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 37 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 38 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 39 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 40 * SUCH DAMAGE. 41 * 42 * @(#)sys_generic.c 8.5 (Berkeley) 1/21/94 43 */ 44 45 #include <sys/param.h> 46 #include <sys/systm.h> 47 #include <sys/filedesc.h> 48 #include <sys/ioctl.h> 49 #include <sys/file.h> 50 #include <sys/proc.h> 51 #include <sys/resourcevar.h> 52 #include <sys/socketvar.h> 53 #include <sys/signalvar.h> 54 #include <sys/uio.h> 55 #include <sys/kernel.h> 56 #include <sys/stat.h> 57 #include <sys/malloc.h> 58 #include <sys/poll.h> 59 #ifdef KTRACE 60 #include <sys/ktrace.h> 61 #endif 62 63 #include <sys/mount.h> 64 #include <sys/syscallargs.h> 65 66 int selscan __P((struct proc *, fd_set *, fd_set *, int, register_t *)); 67 int seltrue __P((dev_t, int, struct proc *)); 68 void pollscan __P((struct proc *, struct pollfd *, int, register_t *)); 69 70 /* 71 * Read system call. 72 */ 73 /* ARGSUSED */ 74 int 75 sys_read(p, v, retval) 76 struct proc *p; 77 void *v; 78 register_t *retval; 79 { 80 struct sys_read_args /* { 81 syscallarg(int) fd; 82 syscallarg(void *) buf; 83 syscallarg(size_t) nbyte; 84 } */ *uap = v; 85 int fd = SCARG(uap, fd); 86 struct file *fp; 87 struct filedesc *fdp = p->p_fd; 88 89 if ((fp = fd_getfile(fdp, fd)) == NULL) 90 return (EBADF); 91 if ((fp->f_flag & FREAD) == 0) 92 return (EBADF); 93 94 FREF(fp); 95 96 /* dofileread() will FRELE the descriptor for us */ 97 return (dofileread(p, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 98 &fp->f_offset, retval)); 99 } 100 101 int 102 dofileread(p, fd, fp, buf, nbyte, offset, retval) 103 struct proc *p; 104 int fd; 105 struct file *fp; 106 void *buf; 107 size_t nbyte; 108 off_t *offset; 109 register_t *retval; 110 { 111 struct uio auio; 112 struct iovec aiov; 113 long cnt, error = 0; 114 #ifdef KTRACE 115 struct iovec ktriov; 116 #endif 117 118 aiov.iov_base = (caddr_t)buf; 119 aiov.iov_len = nbyte; 120 auio.uio_iov = &aiov; 121 auio.uio_iovcnt = 1; 122 auio.uio_resid = nbyte; 123 auio.uio_rw = UIO_READ; 124 auio.uio_segflg = UIO_USERSPACE; 125 auio.uio_procp = p; 126 127 /* 128 * Reads return ssize_t because -1 is returned on error. Therefore 129 * we must restrict the length to SSIZE_MAX to avoid garbage return 130 * values. 131 */ 132 if (auio.uio_resid > SSIZE_MAX) { 133 error = EINVAL; 134 goto out; 135 } 136 137 #ifdef KTRACE 138 /* 139 * if tracing, save a copy of iovec 140 */ 141 if (KTRPOINT(p, KTR_GENIO)) 142 ktriov = aiov; 143 #endif 144 cnt = auio.uio_resid; 145 error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred); 146 if (error) 147 if (auio.uio_resid != cnt && (error == ERESTART || 148 error == EINTR || error == EWOULDBLOCK)) 149 error = 0; 150 cnt -= auio.uio_resid; 151 #ifdef KTRACE 152 if (KTRPOINT(p, KTR_GENIO) && error == 0) 153 ktrgenio(p, fd, UIO_READ, &ktriov, cnt, error); 154 #endif 155 *retval = cnt; 156 out: 157 FRELE(fp); 158 return (error); 159 } 160 161 /* 162 * Scatter read system call. 163 */ 164 int 165 sys_readv(p, v, retval) 166 struct proc *p; 167 void *v; 168 register_t *retval; 169 { 170 struct sys_readv_args /* { 171 syscallarg(int) fd; 172 syscallarg(const struct iovec *) iovp; 173 syscallarg(int) iovcnt; 174 } */ *uap = v; 175 int fd = SCARG(uap, fd); 176 struct file *fp; 177 struct filedesc *fdp = p->p_fd; 178 179 if ((fp = fd_getfile(fdp, fd)) == NULL) 180 return (EBADF); 181 if ((fp->f_flag & FREAD) == 0) 182 return (EBADF); 183 184 FREF(fp); 185 186 /* dofilereadv() will FRELE the descriptor for us */ 187 return (dofilereadv(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt), 188 &fp->f_offset, retval)); 189 } 190 191 int 192 dofilereadv(p, fd, fp, iovp, iovcnt, offset, retval) 193 struct proc *p; 194 int fd; 195 struct file *fp; 196 const struct iovec *iovp; 197 int iovcnt; 198 off_t *offset; 199 register_t *retval; 200 { 201 struct uio auio; 202 struct iovec *iov; 203 struct iovec *needfree; 204 struct iovec aiov[UIO_SMALLIOV]; 205 long i, cnt, error = 0; 206 u_int iovlen; 207 #ifdef KTRACE 208 struct iovec *ktriov = NULL; 209 #endif 210 211 /* note: can't use iovlen until iovcnt is validated */ 212 iovlen = iovcnt * sizeof(struct iovec); 213 if ((u_int)iovcnt > UIO_SMALLIOV) { 214 if ((u_int)iovcnt > IOV_MAX) { 215 error = EINVAL; 216 goto out; 217 } 218 iov = needfree = malloc(iovlen, M_IOV, M_WAITOK); 219 } else if ((u_int)iovcnt > 0) { 220 iov = aiov; 221 needfree = NULL; 222 } else { 223 error = EINVAL; 224 goto out; 225 } 226 227 auio.uio_iov = iov; 228 auio.uio_iovcnt = iovcnt; 229 auio.uio_rw = UIO_READ; 230 auio.uio_segflg = UIO_USERSPACE; 231 auio.uio_procp = p; 232 error = copyin(iovp, iov, iovlen); 233 if (error) 234 goto done; 235 auio.uio_resid = 0; 236 for (i = 0; i < iovcnt; i++) { 237 auio.uio_resid += iov->iov_len; 238 /* 239 * Reads return ssize_t because -1 is returned on error. 240 * Therefore we must restrict the length to SSIZE_MAX to 241 * avoid garbage return values. 242 */ 243 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) { 244 error = EINVAL; 245 goto done; 246 } 247 iov++; 248 } 249 #ifdef KTRACE 250 /* 251 * if tracing, save a copy of iovec 252 */ 253 if (KTRPOINT(p, KTR_GENIO)) { 254 ktriov = malloc(iovlen, M_TEMP, M_WAITOK); 255 bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen); 256 } 257 #endif 258 cnt = auio.uio_resid; 259 error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred); 260 if (error) 261 if (auio.uio_resid != cnt && (error == ERESTART || 262 error == EINTR || error == EWOULDBLOCK)) 263 error = 0; 264 cnt -= auio.uio_resid; 265 #ifdef KTRACE 266 if (ktriov != NULL) { 267 if (error == 0) 268 ktrgenio(p, fd, UIO_READ, ktriov, cnt, 269 error); 270 free(ktriov, M_TEMP); 271 } 272 #endif 273 *retval = cnt; 274 done: 275 if (needfree) 276 free(needfree, M_IOV); 277 out: 278 FRELE(fp); 279 return (error); 280 } 281 282 /* 283 * Write system call 284 */ 285 int 286 sys_write(p, v, retval) 287 struct proc *p; 288 void *v; 289 register_t *retval; 290 { 291 struct sys_write_args /* { 292 syscallarg(int) fd; 293 syscallarg(const void *) buf; 294 syscallarg(size_t) nbyte; 295 } */ *uap = v; 296 int fd = SCARG(uap, fd); 297 struct file *fp; 298 struct filedesc *fdp = p->p_fd; 299 300 if ((fp = fd_getfile(fdp, fd)) == NULL) 301 return (EBADF); 302 if ((fp->f_flag & FWRITE) == 0) 303 return (EBADF); 304 305 FREF(fp); 306 307 /* dofilewrite() will FRELE the descriptor for us */ 308 return (dofilewrite(p, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 309 &fp->f_offset, retval)); 310 } 311 312 int 313 dofilewrite(p, fd, fp, buf, nbyte, offset, retval) 314 struct proc *p; 315 int fd; 316 struct file *fp; 317 const void *buf; 318 size_t nbyte; 319 off_t *offset; 320 register_t *retval; 321 { 322 struct uio auio; 323 struct iovec aiov; 324 long cnt, error = 0; 325 #ifdef KTRACE 326 struct iovec ktriov; 327 #endif 328 329 aiov.iov_base = (caddr_t)buf; /* XXX kills const */ 330 aiov.iov_len = nbyte; 331 auio.uio_iov = &aiov; 332 auio.uio_iovcnt = 1; 333 auio.uio_resid = nbyte; 334 auio.uio_rw = UIO_WRITE; 335 auio.uio_segflg = UIO_USERSPACE; 336 auio.uio_procp = p; 337 338 /* 339 * Writes return ssize_t because -1 is returned on error. Therefore 340 * we must restrict the length to SSIZE_MAX to avoid garbage return 341 * values. 342 */ 343 if (auio.uio_resid > SSIZE_MAX) { 344 error = EINVAL; 345 goto out; 346 } 347 348 #ifdef KTRACE 349 /* 350 * if tracing, save a copy of iovec 351 */ 352 if (KTRPOINT(p, KTR_GENIO)) 353 ktriov = aiov; 354 #endif 355 cnt = auio.uio_resid; 356 error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred); 357 if (error) { 358 if (auio.uio_resid != cnt && (error == ERESTART || 359 error == EINTR || error == EWOULDBLOCK)) 360 error = 0; 361 if (error == EPIPE) 362 psignal(p, SIGPIPE); 363 } 364 cnt -= auio.uio_resid; 365 #ifdef KTRACE 366 if (KTRPOINT(p, KTR_GENIO) && error == 0) 367 ktrgenio(p, fd, UIO_WRITE, &ktriov, cnt, error); 368 #endif 369 *retval = cnt; 370 out: 371 FRELE(fp); 372 return (error); 373 } 374 375 /* 376 * Gather write system call 377 */ 378 int 379 sys_writev(p, v, retval) 380 struct proc *p; 381 void *v; 382 register_t *retval; 383 { 384 struct sys_writev_args /* { 385 syscallarg(int) fd; 386 syscallarg(const struct iovec *) iovp; 387 syscallarg(int) iovcnt; 388 } */ *uap = v; 389 int fd = SCARG(uap, fd); 390 struct file *fp; 391 struct filedesc *fdp = p->p_fd; 392 393 if ((fp = fd_getfile(fdp, fd)) == NULL) 394 return (EBADF); 395 if ((fp->f_flag & FWRITE) == 0) 396 return (EBADF); 397 398 FREF(fp); 399 400 /* dofilewritev() will FRELE the descriptor for us */ 401 return (dofilewritev(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt), 402 &fp->f_offset, retval)); 403 } 404 405 int 406 dofilewritev(p, fd, fp, iovp, iovcnt, offset, retval) 407 struct proc *p; 408 int fd; 409 struct file *fp; 410 const struct iovec *iovp; 411 int iovcnt; 412 off_t *offset; 413 register_t *retval; 414 { 415 struct uio auio; 416 struct iovec *iov; 417 struct iovec *needfree; 418 struct iovec aiov[UIO_SMALLIOV]; 419 long i, cnt, error = 0; 420 u_int iovlen; 421 #ifdef KTRACE 422 struct iovec *ktriov = NULL; 423 #endif 424 425 /* note: can't use iovlen until iovcnt is validated */ 426 iovlen = iovcnt * sizeof(struct iovec); 427 if ((u_int)iovcnt > UIO_SMALLIOV) { 428 if ((u_int)iovcnt > IOV_MAX) 429 return (EINVAL); 430 iov = needfree = malloc(iovlen, M_IOV, M_WAITOK); 431 } else if ((u_int)iovcnt > 0) { 432 iov = aiov; 433 needfree = NULL; 434 } else { 435 error = EINVAL; 436 goto out; 437 } 438 439 auio.uio_iov = iov; 440 auio.uio_iovcnt = iovcnt; 441 auio.uio_rw = UIO_WRITE; 442 auio.uio_segflg = UIO_USERSPACE; 443 auio.uio_procp = p; 444 error = copyin(iovp, iov, iovlen); 445 if (error) 446 goto done; 447 auio.uio_resid = 0; 448 for (i = 0; i < iovcnt; i++) { 449 auio.uio_resid += iov->iov_len; 450 /* 451 * Writes return ssize_t because -1 is returned on error. 452 * Therefore we must restrict the length to SSIZE_MAX to 453 * avoid garbage return values. 454 */ 455 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) { 456 error = EINVAL; 457 goto done; 458 } 459 iov++; 460 } 461 #ifdef KTRACE 462 /* 463 * if tracing, save a copy of iovec 464 */ 465 if (KTRPOINT(p, KTR_GENIO)) { 466 ktriov = malloc(iovlen, M_TEMP, M_WAITOK); 467 bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen); 468 } 469 #endif 470 cnt = auio.uio_resid; 471 error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred); 472 if (error) { 473 if (auio.uio_resid != cnt && (error == ERESTART || 474 error == EINTR || error == EWOULDBLOCK)) 475 error = 0; 476 if (error == EPIPE) 477 psignal(p, SIGPIPE); 478 } 479 cnt -= auio.uio_resid; 480 #ifdef KTRACE 481 if (ktriov != NULL) { 482 if (error == 0) 483 ktrgenio(p, fd, UIO_WRITE, ktriov, cnt, 484 error); 485 free(ktriov, M_TEMP); 486 } 487 #endif 488 *retval = cnt; 489 done: 490 if (needfree) 491 free(needfree, M_IOV); 492 out: 493 FRELE(fp); 494 return (error); 495 } 496 497 /* 498 * Ioctl system call 499 */ 500 /* ARGSUSED */ 501 int 502 sys_ioctl(p, v, retval) 503 struct proc *p; 504 void *v; 505 register_t *retval; 506 { 507 struct sys_ioctl_args /* { 508 syscallarg(int) fd; 509 syscallarg(u_long) com; 510 syscallarg(caddr_t) data; 511 } */ *uap = v; 512 struct file *fp; 513 struct filedesc *fdp; 514 u_long com; 515 int error; 516 u_int size; 517 caddr_t data, memp; 518 int tmp; 519 #define STK_PARAMS 128 520 char stkbuf[STK_PARAMS]; 521 522 fdp = p->p_fd; 523 if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL) 524 return (EBADF); 525 526 if ((fp->f_flag & (FREAD | FWRITE)) == 0) 527 return (EBADF); 528 529 switch (com = SCARG(uap, com)) { 530 case FIONCLEX: 531 fdp->fd_ofileflags[SCARG(uap, fd)] &= ~UF_EXCLOSE; 532 return (0); 533 case FIOCLEX: 534 fdp->fd_ofileflags[SCARG(uap, fd)] |= UF_EXCLOSE; 535 return (0); 536 } 537 538 /* 539 * Interpret high order word to find amount of data to be 540 * copied to/from the user's address space. 541 */ 542 size = IOCPARM_LEN(com); 543 if (size > IOCPARM_MAX) 544 return (ENOTTY); 545 FREF(fp); 546 memp = NULL; 547 if (size > sizeof (stkbuf)) { 548 memp = (caddr_t)malloc((u_long)size, M_IOCTLOPS, M_WAITOK); 549 data = memp; 550 } else 551 data = stkbuf; 552 if (com&IOC_IN) { 553 if (size) { 554 error = copyin(SCARG(uap, data), data, (u_int)size); 555 if (error) { 556 goto out; 557 } 558 } else 559 *(caddr_t *)data = SCARG(uap, data); 560 } else if ((com&IOC_OUT) && size) 561 /* 562 * Zero the buffer so the user always 563 * gets back something deterministic. 564 */ 565 bzero(data, size); 566 else if (com&IOC_VOID) 567 *(caddr_t *)data = SCARG(uap, data); 568 569 switch (com) { 570 571 case FIONBIO: 572 if ((tmp = *(int *)data) != 0) 573 fp->f_flag |= FNONBLOCK; 574 else 575 fp->f_flag &= ~FNONBLOCK; 576 error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p); 577 break; 578 579 case FIOASYNC: 580 if ((tmp = *(int *)data) != 0) 581 fp->f_flag |= FASYNC; 582 else 583 fp->f_flag &= ~FASYNC; 584 error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, (caddr_t)&tmp, p); 585 break; 586 587 case FIOSETOWN: 588 tmp = *(int *)data; 589 if (fp->f_type == DTYPE_SOCKET) { 590 struct socket *so = (struct socket *)fp->f_data; 591 592 so->so_pgid = tmp; 593 so->so_siguid = p->p_cred->p_ruid; 594 so->so_sigeuid = p->p_ucred->cr_uid; 595 error = 0; 596 break; 597 } 598 if (tmp <= 0) { 599 tmp = -tmp; 600 } else { 601 struct proc *p1 = pfind(tmp); 602 if (p1 == 0) { 603 error = ESRCH; 604 break; 605 } 606 tmp = p1->p_pgrp->pg_id; 607 } 608 error = (*fp->f_ops->fo_ioctl) 609 (fp, TIOCSPGRP, (caddr_t)&tmp, p); 610 break; 611 612 case FIOGETOWN: 613 if (fp->f_type == DTYPE_SOCKET) { 614 error = 0; 615 *(int *)data = ((struct socket *)fp->f_data)->so_pgid; 616 break; 617 } 618 error = (*fp->f_ops->fo_ioctl)(fp, TIOCGPGRP, data, p); 619 *(int *)data = -*(int *)data; 620 break; 621 622 default: 623 error = (*fp->f_ops->fo_ioctl)(fp, com, data, p); 624 /* 625 * Copy any data to user, size was 626 * already set and checked above. 627 */ 628 if (error == 0 && (com&IOC_OUT) && size) 629 error = copyout(data, SCARG(uap, data), (u_int)size); 630 break; 631 } 632 out: 633 FRELE(fp); 634 if (memp) 635 free(memp, M_IOCTLOPS); 636 return (error); 637 } 638 639 int selwait, nselcoll; 640 641 /* 642 * Select system call. 643 */ 644 int 645 sys_select(p, v, retval) 646 register struct proc *p; 647 void *v; 648 register_t *retval; 649 { 650 register struct sys_select_args /* { 651 syscallarg(int) nd; 652 syscallarg(fd_set *) in; 653 syscallarg(fd_set *) ou; 654 syscallarg(fd_set *) ex; 655 syscallarg(struct timeval *) tv; 656 } */ *uap = v; 657 fd_set bits[6], *pibits[3], *pobits[3]; 658 struct timeval atv; 659 int s, ncoll, error = 0, timo; 660 u_int ni; 661 662 if (SCARG(uap, nd) > p->p_fd->fd_nfiles) { 663 /* forgiving; slightly wrong */ 664 SCARG(uap, nd) = p->p_fd->fd_nfiles; 665 } 666 ni = howmany(SCARG(uap, nd), NFDBITS) * sizeof(fd_mask); 667 if (SCARG(uap, nd) > FD_SETSIZE) { 668 caddr_t mbits; 669 670 mbits = malloc(ni * 6, M_TEMP, M_WAITOK); 671 bzero(mbits, ni * 6); 672 pibits[0] = (fd_set *)&mbits[ni * 0]; 673 pibits[1] = (fd_set *)&mbits[ni * 1]; 674 pibits[2] = (fd_set *)&mbits[ni * 2]; 675 pobits[0] = (fd_set *)&mbits[ni * 3]; 676 pobits[1] = (fd_set *)&mbits[ni * 4]; 677 pobits[2] = (fd_set *)&mbits[ni * 5]; 678 } else { 679 bzero((caddr_t)bits, sizeof(bits)); 680 pibits[0] = &bits[0]; 681 pibits[1] = &bits[1]; 682 pibits[2] = &bits[2]; 683 pobits[0] = &bits[3]; 684 pobits[1] = &bits[4]; 685 pobits[2] = &bits[5]; 686 } 687 688 #define getbits(name, x) \ 689 if (SCARG(uap, name) && (error = copyin((caddr_t)SCARG(uap, name), \ 690 (caddr_t)pibits[x], ni))) \ 691 goto done; 692 getbits(in, 0); 693 getbits(ou, 1); 694 getbits(ex, 2); 695 #undef getbits 696 697 if (SCARG(uap, tv)) { 698 error = copyin((caddr_t)SCARG(uap, tv), (caddr_t)&atv, 699 sizeof (atv)); 700 if (error) 701 goto done; 702 if (itimerfix(&atv)) { 703 error = EINVAL; 704 goto done; 705 } 706 s = splclock(); 707 timeradd(&atv, &time, &atv); 708 splx(s); 709 } else 710 timo = 0; 711 retry: 712 ncoll = nselcoll; 713 p->p_flag |= P_SELECT; 714 error = selscan(p, pibits[0], pobits[0], SCARG(uap, nd), retval); 715 if (error || *retval) 716 goto done; 717 if (SCARG(uap, tv)) { 718 /* 719 * We have to recalculate the timeout on every retry. 720 */ 721 timo = hzto(&atv); 722 if (timo <= 0) 723 goto done; 724 } 725 s = splhigh(); 726 if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) { 727 splx(s); 728 goto retry; 729 } 730 p->p_flag &= ~P_SELECT; 731 error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "select", timo); 732 splx(s); 733 if (error == 0) 734 goto retry; 735 done: 736 p->p_flag &= ~P_SELECT; 737 /* select is not restarted after signals... */ 738 if (error == ERESTART) 739 error = EINTR; 740 if (error == EWOULDBLOCK) 741 error = 0; 742 #define putbits(name, x) \ 743 if (SCARG(uap, name) && (error2 = copyout((caddr_t)pobits[x], \ 744 (caddr_t)SCARG(uap, name), ni))) \ 745 error = error2; 746 if (error == 0) { 747 int error2; 748 749 putbits(in, 0); 750 putbits(ou, 1); 751 putbits(ex, 2); 752 #undef putbits 753 } 754 755 if (pibits[0] != &bits[0]) 756 free(pibits[0], M_TEMP); 757 return (error); 758 } 759 760 int 761 selscan(p, ibits, obits, nfd, retval) 762 struct proc *p; 763 fd_set *ibits, *obits; 764 int nfd; 765 register_t *retval; 766 { 767 caddr_t cibits = (caddr_t)ibits, cobits = (caddr_t)obits; 768 register struct filedesc *fdp = p->p_fd; 769 register int msk, i, j, fd; 770 register fd_mask bits; 771 struct file *fp; 772 int ni, n = 0; 773 static int flag[3] = { FREAD, FWRITE, 0 }; 774 775 /* 776 * if nfd > FD_SETSIZE then the fd_set's contain nfd bits (rounded 777 * up to the next byte) otherwise the fd_set's are normal sized. 778 */ 779 ni = sizeof(fd_set); 780 if (nfd > FD_SETSIZE) 781 ni = howmany(nfd, NFDBITS) * sizeof(fd_mask); 782 783 for (msk = 0; msk < 3; msk++) { 784 fd_set *pibits = (fd_set *)&cibits[msk*ni]; 785 fd_set *pobits = (fd_set *)&cobits[msk*ni]; 786 787 for (i = 0; i < nfd; i += NFDBITS) { 788 bits = pibits->fds_bits[i/NFDBITS]; 789 while ((j = ffs(bits)) && (fd = i + --j) < nfd) { 790 bits &= ~(1 << j); 791 if ((fp = fd_getfile(fdp, fd)) == NULL) 792 return (EBADF); 793 FREF(fp); 794 if ((*fp->f_ops->fo_select)(fp, flag[msk], p)) { 795 FD_SET(fd, pobits); 796 n++; 797 } 798 FRELE(fp); 799 } 800 } 801 } 802 *retval = n; 803 return (0); 804 } 805 806 /*ARGSUSED*/ 807 int 808 seltrue(dev, flag, p) 809 dev_t dev; 810 int flag; 811 struct proc *p; 812 { 813 814 return (1); 815 } 816 817 /* 818 * Record a select request. 819 */ 820 void 821 selrecord(selector, sip) 822 struct proc *selector; 823 struct selinfo *sip; 824 { 825 struct proc *p; 826 pid_t mypid; 827 828 mypid = selector->p_pid; 829 if (sip->si_selpid == mypid) 830 return; 831 if (sip->si_selpid && (p = pfind(sip->si_selpid)) && 832 p->p_wchan == (caddr_t)&selwait) 833 sip->si_flags |= SI_COLL; 834 else 835 sip->si_selpid = mypid; 836 } 837 838 /* 839 * Do a wakeup when a selectable event occurs. 840 */ 841 void 842 selwakeup(sip) 843 register struct selinfo *sip; 844 { 845 register struct proc *p; 846 int s; 847 848 if (sip->si_selpid == 0) 849 return; 850 if (sip->si_flags & SI_COLL) { 851 nselcoll++; 852 sip->si_flags &= ~SI_COLL; 853 wakeup((caddr_t)&selwait); 854 } 855 p = pfind(sip->si_selpid); 856 sip->si_selpid = 0; 857 if (p != NULL) { 858 s = splhigh(); 859 if (p->p_wchan == (caddr_t)&selwait) { 860 if (p->p_stat == SSLEEP) 861 setrunnable(p); 862 else 863 unsleep(p); 864 } else if (p->p_flag & P_SELECT) 865 p->p_flag &= ~P_SELECT; 866 splx(s); 867 } 868 } 869 870 void 871 pollscan(p, pl, nfd, retval) 872 struct proc *p; 873 struct pollfd *pl; 874 int nfd; 875 register_t *retval; 876 { 877 register struct filedesc *fdp = p->p_fd; 878 register int msk, i; 879 struct file *fp; 880 int x, n = 0; 881 static int flag[3] = { FREAD, FWRITE, 0 }; 882 static int pflag[3] = { POLLIN|POLLRDNORM, POLLOUT, POLLERR }; 883 884 /* 885 * XXX: We need to implement the rest of the flags. 886 */ 887 for (i = 0; i < nfd; i++) { 888 /* Check the file descriptor. */ 889 if (pl[i].fd < 0) { 890 pl[i].revents = 0; 891 continue; 892 } 893 if ((fp = fd_getfile(fdp, pl[i].fd)) == NULL) { 894 pl[i].revents = POLLNVAL; 895 n++; 896 continue; 897 } 898 FREF(fp); 899 for (x = msk = 0; msk < 3; msk++) { 900 if (pl[i].events & pflag[msk]) { 901 if ((*fp->f_ops->fo_select)(fp, flag[msk], p)) { 902 pl[i].revents |= pflag[msk] & 903 pl[i].events; 904 x++; 905 } 906 } 907 } 908 FRELE(fp); 909 if (x) 910 n++; 911 } 912 *retval = n; 913 } 914 915 /* 916 * We are using the same mechanism as select only we encode/decode args 917 * differently. 918 */ 919 int 920 sys_poll(p, v, retval) 921 register struct proc *p; 922 void *v; 923 register_t *retval; 924 { 925 struct sys_poll_args *uap = v; 926 size_t sz; 927 struct pollfd pfds[4], *pl = pfds; 928 int msec = SCARG(uap, timeout); 929 struct timeval atv; 930 int timo, ncoll, i, s, error, error2; 931 extern int nselcoll, selwait; 932 933 /* Standards say no more than MAX_OPEN; this is possibly better. */ 934 if (SCARG(uap, nfds) > min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, 935 maxfiles)) 936 return (EINVAL); 937 938 sz = sizeof(struct pollfd) * SCARG(uap, nfds); 939 940 /* optimize for the default case, of a small nfds value */ 941 if (sz > sizeof(pfds)) 942 pl = (struct pollfd *) malloc(sz, M_TEMP, M_WAITOK); 943 944 if ((error = copyin(SCARG(uap, fds), pl, sz)) != 0) 945 goto bad; 946 947 for (i = 0; i < SCARG(uap, nfds); i++) 948 pl[i].revents = 0; 949 950 if (msec != -1) { 951 atv.tv_sec = msec / 1000; 952 atv.tv_usec = (msec - (atv.tv_sec * 1000)) * 1000; 953 954 if (itimerfix(&atv)) { 955 error = EINVAL; 956 goto done; 957 } 958 s = splclock(); 959 timeradd(&atv, &time, &atv); 960 splx(s); 961 } else 962 timo = 0; 963 964 retry: 965 ncoll = nselcoll; 966 p->p_flag |= P_SELECT; 967 pollscan(p, pl, SCARG(uap, nfds), retval); 968 if (*retval) 969 goto done; 970 if (msec != -1) { 971 /* 972 * We have to recalculate the timeout on every retry. 973 */ 974 timo = hzto(&atv); 975 if (timo <= 0) 976 goto done; 977 } 978 s = splhigh(); 979 if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) { 980 splx(s); 981 goto retry; 982 } 983 p->p_flag &= ~P_SELECT; 984 error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "poll", timo); 985 splx(s); 986 if (error == 0) 987 goto retry; 988 989 done: 990 p->p_flag &= ~P_SELECT; 991 /* poll is not restarted after signals... */ 992 if (error == ERESTART) 993 error = EINTR; 994 if (error == EWOULDBLOCK) 995 error = 0; 996 if ((error2 = copyout(pl, SCARG(uap, fds), sz)) != 0) 997 error = error2; 998 bad: 999 if (pl != pfds) 1000 free((char *) pl, M_TEMP); 1001 return (error); 1002 } 1003 1004