1 /* 2 * Copyright (c) 1982, 1986, 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)sys_generic.c 8.5 (Berkeley) 1/21/94 39 * $FreeBSD: src/sys/kern/sys_generic.c,v 1.55.2.10 2001/03/17 10:39:32 peter Exp $ 40 */ 41 42 #include "opt_ktrace.h" 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/sysproto.h> 47 #include <sys/filedesc.h> 48 #include <sys/filio.h> 49 #include <sys/fcntl.h> 50 #include <sys/file.h> 51 #include <sys/proc.h> 52 #include <sys/signalvar.h> 53 #include <sys/socketvar.h> 54 #include <sys/uio.h> 55 #include <sys/kernel.h> 56 #include <sys/malloc.h> 57 #include <sys/poll.h> 58 #include <sys/resourcevar.h> 59 #include <sys/sysctl.h> 60 #include <sys/sysent.h> 61 #include <sys/buf.h> 62 #ifdef KTRACE 63 #include <sys/ktrace.h> 64 #endif 65 #include <vm/vm.h> 66 #include <vm/vm_page.h> 67 68 #include <machine/limits.h> 69 70 static MALLOC_DEFINE(M_IOCTLOPS, "ioctlops", "ioctl data buffer"); 71 static MALLOC_DEFINE(M_SELECT, "select", "select() buffer"); 72 MALLOC_DEFINE(M_IOV, "iov", "large iov's"); 73 74 static int pollscan __P((struct proc *, struct pollfd *, u_int)); 75 static int selscan __P((struct proc *, fd_mask **, fd_mask **, int)); 76 static int dofileread __P((struct proc *, struct file *, int, void *, 77 size_t, off_t, int)); 78 static int dofilewrite __P((struct proc *, struct file *, int, 79 const void *, size_t, off_t, int)); 80 81 struct file* 82 holdfp(fdp, fd, flag) 83 struct filedesc* fdp; 84 int fd, flag; 85 { 86 struct file* fp; 87 88 if (((u_int)fd) >= fdp->fd_nfiles || 89 (fp = fdp->fd_ofiles[fd]) == NULL || 90 (fp->f_flag & flag) == 0) { 91 return (NULL); 92 } 93 fhold(fp); 94 return (fp); 95 } 96 97 /* 98 * Read system call. 99 */ 100 #ifndef _SYS_SYSPROTO_H_ 101 struct read_args { 102 int fd; 103 void *buf; 104 size_t nbyte; 105 }; 106 #endif 107 int 108 read(p, uap) 109 struct proc *p; 110 register struct read_args *uap; 111 { 112 register struct file *fp; 113 int error; 114 115 if ((fp = holdfp(p->p_fd, uap->fd, FREAD)) == NULL) 116 return (EBADF); 117 error = dofileread(p, fp, uap->fd, uap->buf, uap->nbyte, (off_t)-1, 0); 118 fdrop(fp, p); 119 return(error); 120 } 121 122 /* 123 * Pread system call 124 */ 125 #ifndef _SYS_SYSPROTO_H_ 126 struct pread_args { 127 int fd; 128 void *buf; 129 size_t nbyte; 130 int pad; 131 off_t offset; 132 }; 133 #endif 134 int 135 pread(p, uap) 136 struct proc *p; 137 register struct pread_args *uap; 138 { 139 register struct file *fp; 140 int error; 141 142 if ((fp = holdfp(p->p_fd, uap->fd, FREAD)) == NULL) 143 return (EBADF); 144 if (fp->f_type != DTYPE_VNODE) { 145 error = ESPIPE; 146 } else { 147 error = dofileread(p, fp, uap->fd, uap->buf, uap->nbyte, 148 uap->offset, FOF_OFFSET); 149 } 150 fdrop(fp, p); 151 return(error); 152 } 153 154 /* 155 * Code common for read and pread 156 */ 157 int 158 dofileread(p, fp, fd, buf, nbyte, offset, flags) 159 struct proc *p; 160 struct file *fp; 161 int fd, flags; 162 void *buf; 163 size_t nbyte; 164 off_t offset; 165 { 166 struct uio auio; 167 struct iovec aiov; 168 long cnt, error = 0; 169 #ifdef KTRACE 170 struct iovec ktriov; 171 struct uio ktruio; 172 int didktr = 0; 173 #endif 174 175 aiov.iov_base = (caddr_t)buf; 176 aiov.iov_len = nbyte; 177 auio.uio_iov = &aiov; 178 auio.uio_iovcnt = 1; 179 auio.uio_offset = offset; 180 if (nbyte > INT_MAX) 181 return (EINVAL); 182 auio.uio_resid = nbyte; 183 auio.uio_rw = UIO_READ; 184 auio.uio_segflg = UIO_USERSPACE; 185 auio.uio_procp = p; 186 #ifdef KTRACE 187 /* 188 * if tracing, save a copy of iovec 189 */ 190 if (KTRPOINT(p, KTR_GENIO)) { 191 ktriov = aiov; 192 ktruio = auio; 193 didktr = 1; 194 } 195 #endif 196 cnt = nbyte; 197 198 if ((error = fo_read(fp, &auio, fp->f_cred, flags, p))) { 199 if (auio.uio_resid != cnt && (error == ERESTART || 200 error == EINTR || error == EWOULDBLOCK)) 201 error = 0; 202 } 203 cnt -= auio.uio_resid; 204 #ifdef KTRACE 205 if (didktr && error == 0) { 206 ktruio.uio_iov = &ktriov; 207 ktruio.uio_resid = cnt; 208 ktrgenio(p->p_tracep, fd, UIO_READ, &ktruio, error); 209 } 210 #endif 211 p->p_retval[0] = cnt; 212 return (error); 213 } 214 215 /* 216 * Scatter read system call. 217 */ 218 #ifndef _SYS_SYSPROTO_H_ 219 struct readv_args { 220 int fd; 221 struct iovec *iovp; 222 u_int iovcnt; 223 }; 224 #endif 225 int 226 readv(p, uap) 227 struct proc *p; 228 register struct readv_args *uap; 229 { 230 register struct file *fp; 231 register struct filedesc *fdp = p->p_fd; 232 struct uio auio; 233 register struct iovec *iov; 234 struct iovec *needfree; 235 struct iovec aiov[UIO_SMALLIOV]; 236 long i, cnt, error = 0; 237 u_int iovlen; 238 #ifdef KTRACE 239 struct iovec *ktriov = NULL; 240 struct uio ktruio; 241 #endif 242 243 if ((fp = holdfp(fdp, uap->fd, FREAD)) == NULL) 244 return (EBADF); 245 /* note: can't use iovlen until iovcnt is validated */ 246 iovlen = uap->iovcnt * sizeof (struct iovec); 247 if (uap->iovcnt > UIO_SMALLIOV) { 248 if (uap->iovcnt > UIO_MAXIOV) 249 return (EINVAL); 250 MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK); 251 needfree = iov; 252 } else { 253 iov = aiov; 254 needfree = NULL; 255 } 256 auio.uio_iov = iov; 257 auio.uio_iovcnt = uap->iovcnt; 258 auio.uio_rw = UIO_READ; 259 auio.uio_segflg = UIO_USERSPACE; 260 auio.uio_procp = p; 261 auio.uio_offset = -1; 262 if ((error = copyin((caddr_t)uap->iovp, (caddr_t)iov, iovlen))) 263 goto done; 264 auio.uio_resid = 0; 265 for (i = 0; i < uap->iovcnt; i++) { 266 if (iov->iov_len > INT_MAX - auio.uio_resid) { 267 error = EINVAL; 268 goto done; 269 } 270 auio.uio_resid += iov->iov_len; 271 iov++; 272 } 273 #ifdef KTRACE 274 /* 275 * if tracing, save a copy of iovec 276 */ 277 if (KTRPOINT(p, KTR_GENIO)) { 278 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK); 279 bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen); 280 ktruio = auio; 281 } 282 #endif 283 cnt = auio.uio_resid; 284 if ((error = fo_read(fp, &auio, fp->f_cred, 0, p))) { 285 if (auio.uio_resid != cnt && (error == ERESTART || 286 error == EINTR || error == EWOULDBLOCK)) 287 error = 0; 288 } 289 cnt -= auio.uio_resid; 290 #ifdef KTRACE 291 if (ktriov != NULL) { 292 if (error == 0) { 293 ktruio.uio_iov = ktriov; 294 ktruio.uio_resid = cnt; 295 ktrgenio(p->p_tracep, uap->fd, UIO_READ, &ktruio, 296 error); 297 } 298 FREE(ktriov, M_TEMP); 299 } 300 #endif 301 p->p_retval[0] = cnt; 302 done: 303 fdrop(fp, p); 304 if (needfree) 305 FREE(needfree, M_IOV); 306 return (error); 307 } 308 309 /* 310 * Write system call 311 */ 312 #ifndef _SYS_SYSPROTO_H_ 313 struct write_args { 314 int fd; 315 const void *buf; 316 size_t nbyte; 317 }; 318 #endif 319 int 320 write(p, uap) 321 struct proc *p; 322 register struct write_args *uap; 323 { 324 register struct file *fp; 325 int error; 326 327 if ((fp = holdfp(p->p_fd, uap->fd, FWRITE)) == NULL) 328 return (EBADF); 329 error = dofilewrite(p, fp, uap->fd, uap->buf, uap->nbyte, (off_t)-1, 0); 330 fdrop(fp, p); 331 return(error); 332 } 333 334 /* 335 * Pwrite system call 336 */ 337 #ifndef _SYS_SYSPROTO_H_ 338 struct pwrite_args { 339 int fd; 340 const void *buf; 341 size_t nbyte; 342 int pad; 343 off_t offset; 344 }; 345 #endif 346 int 347 pwrite(p, uap) 348 struct proc *p; 349 register struct pwrite_args *uap; 350 { 351 register struct file *fp; 352 int error; 353 354 if ((fp = holdfp(p->p_fd, uap->fd, FWRITE)) == NULL) 355 return (EBADF); 356 if (fp->f_type != DTYPE_VNODE) { 357 error = ESPIPE; 358 } else { 359 error = dofilewrite(p, fp, uap->fd, uap->buf, uap->nbyte, 360 uap->offset, FOF_OFFSET); 361 } 362 fdrop(fp, p); 363 return(error); 364 } 365 366 static int 367 dofilewrite(p, fp, fd, buf, nbyte, offset, flags) 368 struct proc *p; 369 struct file *fp; 370 int fd, flags; 371 const void *buf; 372 size_t nbyte; 373 off_t offset; 374 { 375 struct uio auio; 376 struct iovec aiov; 377 long cnt, error = 0; 378 #ifdef KTRACE 379 struct iovec ktriov; 380 struct uio ktruio; 381 int didktr = 0; 382 #endif 383 384 aiov.iov_base = (void *)(uintptr_t)buf; 385 aiov.iov_len = nbyte; 386 auio.uio_iov = &aiov; 387 auio.uio_iovcnt = 1; 388 auio.uio_offset = offset; 389 if (nbyte > INT_MAX) 390 return (EINVAL); 391 auio.uio_resid = nbyte; 392 auio.uio_rw = UIO_WRITE; 393 auio.uio_segflg = UIO_USERSPACE; 394 auio.uio_procp = p; 395 #ifdef KTRACE 396 /* 397 * if tracing, save a copy of iovec and uio 398 */ 399 if (KTRPOINT(p, KTR_GENIO)) { 400 ktriov = aiov; 401 ktruio = auio; 402 didktr = 1; 403 } 404 #endif 405 cnt = nbyte; 406 if (fp->f_type == DTYPE_VNODE) 407 bwillwrite(); 408 if ((error = fo_write(fp, &auio, fp->f_cred, flags, p))) { 409 if (auio.uio_resid != cnt && (error == ERESTART || 410 error == EINTR || error == EWOULDBLOCK)) 411 error = 0; 412 if (error == EPIPE) 413 psignal(p, SIGPIPE); 414 } 415 cnt -= auio.uio_resid; 416 #ifdef KTRACE 417 if (didktr && error == 0) { 418 ktruio.uio_iov = &ktriov; 419 ktruio.uio_resid = cnt; 420 ktrgenio(p->p_tracep, fd, UIO_WRITE, &ktruio, error); 421 } 422 #endif 423 p->p_retval[0] = cnt; 424 return (error); 425 } 426 427 /* 428 * Gather write system call 429 */ 430 #ifndef _SYS_SYSPROTO_H_ 431 struct writev_args { 432 int fd; 433 struct iovec *iovp; 434 u_int iovcnt; 435 }; 436 #endif 437 int 438 writev(p, uap) 439 struct proc *p; 440 register struct writev_args *uap; 441 { 442 register struct file *fp; 443 register struct filedesc *fdp = p->p_fd; 444 struct uio auio; 445 register struct iovec *iov; 446 struct iovec *needfree; 447 struct iovec aiov[UIO_SMALLIOV]; 448 long i, cnt, error = 0; 449 u_int iovlen; 450 #ifdef KTRACE 451 struct iovec *ktriov = NULL; 452 struct uio ktruio; 453 #endif 454 455 if ((fp = holdfp(fdp, uap->fd, FWRITE)) == NULL) 456 return (EBADF); 457 /* note: can't use iovlen until iovcnt is validated */ 458 iovlen = uap->iovcnt * sizeof (struct iovec); 459 if (uap->iovcnt > UIO_SMALLIOV) { 460 if (uap->iovcnt > UIO_MAXIOV) { 461 needfree = NULL; 462 error = EINVAL; 463 goto done; 464 } 465 MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK); 466 needfree = iov; 467 } else { 468 iov = aiov; 469 needfree = NULL; 470 } 471 auio.uio_iov = iov; 472 auio.uio_iovcnt = uap->iovcnt; 473 auio.uio_rw = UIO_WRITE; 474 auio.uio_segflg = UIO_USERSPACE; 475 auio.uio_procp = p; 476 auio.uio_offset = -1; 477 if ((error = copyin((caddr_t)uap->iovp, (caddr_t)iov, iovlen))) 478 goto done; 479 auio.uio_resid = 0; 480 for (i = 0; i < uap->iovcnt; i++) { 481 if (iov->iov_len > INT_MAX - auio.uio_resid) { 482 error = EINVAL; 483 goto done; 484 } 485 auio.uio_resid += iov->iov_len; 486 iov++; 487 } 488 #ifdef KTRACE 489 /* 490 * if tracing, save a copy of iovec and uio 491 */ 492 if (KTRPOINT(p, KTR_GENIO)) { 493 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK); 494 bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen); 495 ktruio = auio; 496 } 497 #endif 498 cnt = auio.uio_resid; 499 if (fp->f_type == DTYPE_VNODE) 500 bwillwrite(); 501 if ((error = fo_write(fp, &auio, fp->f_cred, 0, p))) { 502 if (auio.uio_resid != cnt && (error == ERESTART || 503 error == EINTR || error == EWOULDBLOCK)) 504 error = 0; 505 if (error == EPIPE) 506 psignal(p, SIGPIPE); 507 } 508 cnt -= auio.uio_resid; 509 #ifdef KTRACE 510 if (ktriov != NULL) { 511 if (error == 0) { 512 ktruio.uio_iov = ktriov; 513 ktruio.uio_resid = cnt; 514 ktrgenio(p->p_tracep, uap->fd, UIO_WRITE, &ktruio, 515 error); 516 } 517 FREE(ktriov, M_TEMP); 518 } 519 #endif 520 p->p_retval[0] = cnt; 521 done: 522 fdrop(fp, p); 523 if (needfree) 524 FREE(needfree, M_IOV); 525 return (error); 526 } 527 528 /* 529 * Ioctl system call 530 */ 531 #ifndef _SYS_SYSPROTO_H_ 532 struct ioctl_args { 533 int fd; 534 u_long com; 535 caddr_t data; 536 }; 537 #endif 538 /* ARGSUSED */ 539 int 540 ioctl(p, uap) 541 struct proc *p; 542 register struct ioctl_args *uap; 543 { 544 register struct file *fp; 545 register struct filedesc *fdp; 546 register u_long com; 547 int error; 548 register u_int size; 549 caddr_t data, memp; 550 int tmp; 551 #define STK_PARAMS 128 552 union { 553 char stkbuf[STK_PARAMS]; 554 long align; 555 } ubuf; 556 557 fdp = p->p_fd; 558 if ((u_int)uap->fd >= fdp->fd_nfiles || 559 (fp = fdp->fd_ofiles[uap->fd]) == NULL) 560 return (EBADF); 561 562 if ((fp->f_flag & (FREAD | FWRITE)) == 0) 563 return (EBADF); 564 565 switch (com = uap->com) { 566 case FIONCLEX: 567 fdp->fd_ofileflags[uap->fd] &= ~UF_EXCLOSE; 568 return (0); 569 case FIOCLEX: 570 fdp->fd_ofileflags[uap->fd] |= UF_EXCLOSE; 571 return (0); 572 } 573 574 /* 575 * Interpret high order word to find amount of data to be 576 * copied to/from the user's address space. 577 */ 578 size = IOCPARM_LEN(com); 579 if (size > IOCPARM_MAX) 580 return (ENOTTY); 581 582 fhold(fp); 583 584 memp = NULL; 585 if (size > sizeof (ubuf.stkbuf)) { 586 memp = (caddr_t)malloc((u_long)size, M_IOCTLOPS, M_WAITOK); 587 data = memp; 588 } else { 589 data = ubuf.stkbuf; 590 } 591 if (com&IOC_IN) { 592 if (size) { 593 error = copyin(uap->data, data, (u_int)size); 594 if (error) { 595 if (memp) 596 free(memp, M_IOCTLOPS); 597 fdrop(fp, p); 598 return (error); 599 } 600 } else { 601 *(caddr_t *)data = uap->data; 602 } 603 } else if ((com&IOC_OUT) && size) { 604 /* 605 * Zero the buffer so the user always 606 * gets back something deterministic. 607 */ 608 bzero(data, size); 609 } else if (com&IOC_VOID) { 610 *(caddr_t *)data = uap->data; 611 } 612 613 switch (com) { 614 615 case FIONBIO: 616 if ((tmp = *(int *)data)) 617 fp->f_flag |= FNONBLOCK; 618 else 619 fp->f_flag &= ~FNONBLOCK; 620 error = fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, p); 621 break; 622 623 case FIOASYNC: 624 if ((tmp = *(int *)data)) 625 fp->f_flag |= FASYNC; 626 else 627 fp->f_flag &= ~FASYNC; 628 error = fo_ioctl(fp, FIOASYNC, (caddr_t)&tmp, p); 629 break; 630 631 default: 632 error = fo_ioctl(fp, com, data, p); 633 /* 634 * Copy any data to user, size was 635 * already set and checked above. 636 */ 637 if (error == 0 && (com&IOC_OUT) && size) 638 error = copyout(data, uap->data, (u_int)size); 639 break; 640 } 641 if (memp) 642 free(memp, M_IOCTLOPS); 643 fdrop(fp, p); 644 return (error); 645 } 646 647 static int nselcoll; /* Select collisions since boot */ 648 int selwait; 649 SYSCTL_INT(_kern, OID_AUTO, nselcoll, CTLFLAG_RD, &nselcoll, 0, ""); 650 651 /* 652 * Select system call. 653 */ 654 #ifndef _SYS_SYSPROTO_H_ 655 struct select_args { 656 int nd; 657 fd_set *in, *ou, *ex; 658 struct timeval *tv; 659 }; 660 #endif 661 int 662 select(p, uap) 663 register struct proc *p; 664 register struct select_args *uap; 665 { 666 /* 667 * The magic 2048 here is chosen to be just enough for FD_SETSIZE 668 * infds with the new FD_SETSIZE of 1024, and more than enough for 669 * FD_SETSIZE infds, outfds and exceptfds with the old FD_SETSIZE 670 * of 256. 671 */ 672 fd_mask s_selbits[howmany(2048, NFDBITS)]; 673 fd_mask *ibits[3], *obits[3], *selbits, *sbp; 674 struct timeval atv, rtv, ttv; 675 int s, ncoll, error, timo; 676 u_int nbufbytes, ncpbytes, nfdbits; 677 678 if (uap->nd < 0) 679 return (EINVAL); 680 if (uap->nd > p->p_fd->fd_nfiles) 681 uap->nd = p->p_fd->fd_nfiles; /* forgiving; slightly wrong */ 682 683 /* 684 * Allocate just enough bits for the non-null fd_sets. Use the 685 * preallocated auto buffer if possible. 686 */ 687 nfdbits = roundup(uap->nd, NFDBITS); 688 ncpbytes = nfdbits / NBBY; 689 nbufbytes = 0; 690 if (uap->in != NULL) 691 nbufbytes += 2 * ncpbytes; 692 if (uap->ou != NULL) 693 nbufbytes += 2 * ncpbytes; 694 if (uap->ex != NULL) 695 nbufbytes += 2 * ncpbytes; 696 if (nbufbytes <= sizeof s_selbits) 697 selbits = &s_selbits[0]; 698 else 699 selbits = malloc(nbufbytes, M_SELECT, M_WAITOK); 700 701 /* 702 * Assign pointers into the bit buffers and fetch the input bits. 703 * Put the output buffers together so that they can be bzeroed 704 * together. 705 */ 706 sbp = selbits; 707 #define getbits(name, x) \ 708 do { \ 709 if (uap->name == NULL) \ 710 ibits[x] = NULL; \ 711 else { \ 712 ibits[x] = sbp + nbufbytes / 2 / sizeof *sbp; \ 713 obits[x] = sbp; \ 714 sbp += ncpbytes / sizeof *sbp; \ 715 error = copyin(uap->name, ibits[x], ncpbytes); \ 716 if (error != 0) \ 717 goto done; \ 718 } \ 719 } while (0) 720 getbits(in, 0); 721 getbits(ou, 1); 722 getbits(ex, 2); 723 #undef getbits 724 if (nbufbytes != 0) 725 bzero(selbits, nbufbytes / 2); 726 727 if (uap->tv) { 728 error = copyin((caddr_t)uap->tv, (caddr_t)&atv, 729 sizeof (atv)); 730 if (error) 731 goto done; 732 if (itimerfix(&atv)) { 733 error = EINVAL; 734 goto done; 735 } 736 getmicrouptime(&rtv); 737 timevaladd(&atv, &rtv); 738 } else { 739 atv.tv_sec = 0; 740 atv.tv_usec = 0; 741 } 742 timo = 0; 743 retry: 744 ncoll = nselcoll; 745 p->p_flag |= P_SELECT; 746 error = selscan(p, ibits, obits, uap->nd); 747 if (error || p->p_retval[0]) 748 goto done; 749 if (atv.tv_sec || atv.tv_usec) { 750 getmicrouptime(&rtv); 751 if (timevalcmp(&rtv, &atv, >=)) 752 goto done; 753 ttv = atv; 754 timevalsub(&ttv, &rtv); 755 timo = ttv.tv_sec > 24 * 60 * 60 ? 756 24 * 60 * 60 * hz : tvtohz(&ttv); 757 } 758 s = splhigh(); 759 if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) { 760 splx(s); 761 goto retry; 762 } 763 p->p_flag &= ~P_SELECT; 764 765 error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "select", timo); 766 767 splx(s); 768 if (error == 0) 769 goto retry; 770 done: 771 p->p_flag &= ~P_SELECT; 772 /* select is not restarted after signals... */ 773 if (error == ERESTART) 774 error = EINTR; 775 if (error == EWOULDBLOCK) 776 error = 0; 777 #define putbits(name, x) \ 778 if (uap->name && (error2 = copyout(obits[x], uap->name, ncpbytes))) \ 779 error = error2; 780 if (error == 0) { 781 int error2; 782 783 putbits(in, 0); 784 putbits(ou, 1); 785 putbits(ex, 2); 786 #undef putbits 787 } 788 if (selbits != &s_selbits[0]) 789 free(selbits, M_SELECT); 790 return (error); 791 } 792 793 static int 794 selscan(p, ibits, obits, nfd) 795 struct proc *p; 796 fd_mask **ibits, **obits; 797 int nfd; 798 { 799 struct filedesc *fdp = p->p_fd; 800 int msk, i, fd; 801 fd_mask bits; 802 struct file *fp; 803 int n = 0; 804 /* Note: backend also returns POLLHUP/POLLERR if appropriate. */ 805 static int flag[3] = { POLLRDNORM, POLLWRNORM, POLLRDBAND }; 806 807 for (msk = 0; msk < 3; msk++) { 808 if (ibits[msk] == NULL) 809 continue; 810 for (i = 0; i < nfd; i += NFDBITS) { 811 bits = ibits[msk][i/NFDBITS]; 812 /* ffs(int mask) not portable, fd_mask is long */ 813 for (fd = i; bits && fd < nfd; fd++, bits >>= 1) { 814 if (!(bits & 1)) 815 continue; 816 fp = fdp->fd_ofiles[fd]; 817 if (fp == NULL) 818 return (EBADF); 819 if (fo_poll(fp, flag[msk], fp->f_cred, p)) { 820 obits[msk][(fd)/NFDBITS] |= 821 ((fd_mask)1 << ((fd) % NFDBITS)); 822 n++; 823 } 824 } 825 } 826 } 827 p->p_retval[0] = n; 828 return (0); 829 } 830 831 /* 832 * Poll system call. 833 */ 834 #ifndef _SYS_SYSPROTO_H_ 835 struct poll_args { 836 struct pollfd *fds; 837 u_int nfds; 838 int timeout; 839 }; 840 #endif 841 int 842 poll(p, uap) 843 struct proc *p; 844 struct poll_args *uap; 845 { 846 caddr_t bits; 847 char smallbits[32 * sizeof(struct pollfd)]; 848 struct timeval atv, rtv, ttv; 849 int s, ncoll, error = 0, timo; 850 u_int nfds; 851 size_t ni; 852 853 nfds = SCARG(uap, nfds); 854 /* 855 * This is kinda bogus. We have fd limits, but that is not 856 * really related to the size of the pollfd array. Make sure 857 * we let the process use at least FD_SETSIZE entries and at 858 * least enough for the current limits. We want to be reasonably 859 * safe, but not overly restrictive. 860 */ 861 if (nfds > p->p_rlimit[RLIMIT_NOFILE].rlim_cur && nfds > FD_SETSIZE) 862 return (EINVAL); 863 ni = nfds * sizeof(struct pollfd); 864 if (ni > sizeof(smallbits)) 865 bits = malloc(ni, M_TEMP, M_WAITOK); 866 else 867 bits = smallbits; 868 error = copyin(SCARG(uap, fds), bits, ni); 869 if (error) 870 goto done; 871 if (SCARG(uap, timeout) != INFTIM) { 872 atv.tv_sec = SCARG(uap, timeout) / 1000; 873 atv.tv_usec = (SCARG(uap, timeout) % 1000) * 1000; 874 if (itimerfix(&atv)) { 875 error = EINVAL; 876 goto done; 877 } 878 getmicrouptime(&rtv); 879 timevaladd(&atv, &rtv); 880 } else { 881 atv.tv_sec = 0; 882 atv.tv_usec = 0; 883 } 884 timo = 0; 885 retry: 886 ncoll = nselcoll; 887 p->p_flag |= P_SELECT; 888 error = pollscan(p, (struct pollfd *)bits, nfds); 889 if (error || p->p_retval[0]) 890 goto done; 891 if (atv.tv_sec || atv.tv_usec) { 892 getmicrouptime(&rtv); 893 if (timevalcmp(&rtv, &atv, >=)) 894 goto done; 895 ttv = atv; 896 timevalsub(&ttv, &rtv); 897 timo = ttv.tv_sec > 24 * 60 * 60 ? 898 24 * 60 * 60 * hz : tvtohz(&ttv); 899 } 900 s = splhigh(); 901 if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) { 902 splx(s); 903 goto retry; 904 } 905 p->p_flag &= ~P_SELECT; 906 error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "poll", timo); 907 splx(s); 908 if (error == 0) 909 goto retry; 910 done: 911 p->p_flag &= ~P_SELECT; 912 /* poll is not restarted after signals... */ 913 if (error == ERESTART) 914 error = EINTR; 915 if (error == EWOULDBLOCK) 916 error = 0; 917 if (error == 0) { 918 error = copyout(bits, SCARG(uap, fds), ni); 919 if (error) 920 goto out; 921 } 922 out: 923 if (ni > sizeof(smallbits)) 924 free(bits, M_TEMP); 925 return (error); 926 } 927 928 static int 929 pollscan(p, fds, nfd) 930 struct proc *p; 931 struct pollfd *fds; 932 u_int nfd; 933 { 934 register struct filedesc *fdp = p->p_fd; 935 int i; 936 struct file *fp; 937 int n = 0; 938 939 for (i = 0; i < nfd; i++, fds++) { 940 if (fds->fd >= fdp->fd_nfiles) { 941 fds->revents = POLLNVAL; 942 n++; 943 } else if (fds->fd < 0) { 944 fds->revents = 0; 945 } else { 946 fp = fdp->fd_ofiles[fds->fd]; 947 if (fp == NULL) { 948 fds->revents = POLLNVAL; 949 n++; 950 } else { 951 /* 952 * Note: backend also returns POLLHUP and 953 * POLLERR if appropriate. 954 */ 955 fds->revents = fo_poll(fp, fds->events, 956 fp->f_cred, p); 957 if (fds->revents != 0) 958 n++; 959 } 960 } 961 } 962 p->p_retval[0] = n; 963 return (0); 964 } 965 966 /* 967 * OpenBSD poll system call. 968 * XXX this isn't quite a true representation.. OpenBSD uses select ops. 969 */ 970 #ifndef _SYS_SYSPROTO_H_ 971 struct openbsd_poll_args { 972 struct pollfd *fds; 973 u_int nfds; 974 int timeout; 975 }; 976 #endif 977 int 978 openbsd_poll(p, uap) 979 register struct proc *p; 980 register struct openbsd_poll_args *uap; 981 { 982 return (poll(p, (struct poll_args *)uap)); 983 } 984 985 /*ARGSUSED*/ 986 int 987 seltrue(dev, events, p) 988 dev_t dev; 989 int events; 990 struct proc *p; 991 { 992 993 return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)); 994 } 995 996 /* 997 * Record a select request. 998 */ 999 void 1000 selrecord(selector, sip) 1001 struct proc *selector; 1002 struct selinfo *sip; 1003 { 1004 struct proc *p; 1005 pid_t mypid; 1006 1007 mypid = selector->p_pid; 1008 if (sip->si_pid == mypid) 1009 return; 1010 if (sip->si_pid && (p = pfind(sip->si_pid)) && 1011 p->p_wchan == (caddr_t)&selwait) 1012 sip->si_flags |= SI_COLL; 1013 else 1014 sip->si_pid = mypid; 1015 } 1016 1017 /* 1018 * Do a wakeup when a selectable event occurs. 1019 */ 1020 void 1021 selwakeup(sip) 1022 register struct selinfo *sip; 1023 { 1024 register struct proc *p; 1025 int s; 1026 1027 if (sip->si_pid == 0) 1028 return; 1029 if (sip->si_flags & SI_COLL) { 1030 nselcoll++; 1031 sip->si_flags &= ~SI_COLL; 1032 wakeup((caddr_t)&selwait); 1033 } 1034 p = pfind(sip->si_pid); 1035 sip->si_pid = 0; 1036 if (p != NULL) { 1037 s = splhigh(); 1038 if (p->p_wchan == (caddr_t)&selwait) { 1039 if (p->p_stat == SSLEEP) 1040 setrunnable(p); 1041 else 1042 unsleep(p); 1043 } else if (p->p_flag & P_SELECT) 1044 p->p_flag &= ~P_SELECT; 1045 splx(s); 1046 } 1047 } 1048