1 /* 2 * Copyright (c) 1982, 1986, 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)sys_generic.c 8.5 (Berkeley) 1/21/94 39 * $FreeBSD: src/sys/kern/sys_generic.c,v 1.55.2.10 2001/03/17 10:39:32 peter Exp $ 40 * $DragonFly: src/sys/kern/sys_generic.c,v 1.10 2003/07/30 00:19:14 dillon Exp $ 41 */ 42 43 #include "opt_ktrace.h" 44 45 #include <sys/param.h> 46 #include <sys/systm.h> 47 #include <sys/sysproto.h> 48 #include <sys/filedesc.h> 49 #include <sys/filio.h> 50 #include <sys/fcntl.h> 51 #include <sys/file.h> 52 #include <sys/proc.h> 53 #include <sys/signalvar.h> 54 #include <sys/socketvar.h> 55 #include <sys/uio.h> 56 #include <sys/kernel.h> 57 #include <sys/malloc.h> 58 #include <sys/poll.h> 59 #include <sys/resourcevar.h> 60 #include <sys/sysctl.h> 61 #include <sys/sysent.h> 62 #include <sys/buf.h> 63 #ifdef KTRACE 64 #include <sys/ktrace.h> 65 #endif 66 #include <vm/vm.h> 67 #include <vm/vm_page.h> 68 #include <sys/file2.h> 69 70 #include <machine/limits.h> 71 72 static MALLOC_DEFINE(M_IOCTLOPS, "ioctlops", "ioctl data buffer"); 73 static MALLOC_DEFINE(M_SELECT, "select", "select() buffer"); 74 MALLOC_DEFINE(M_IOV, "iov", "large iov's"); 75 76 static int pollscan __P((struct proc *, struct pollfd *, u_int, int *)); 77 static int selscan __P((struct proc *, fd_mask **, fd_mask **, 78 int, int *)); 79 static int dofileread __P((struct file *, int, void *, 80 size_t, off_t, int, int *)); 81 static int dofilewrite __P((struct file *, int, 82 const void *, size_t, off_t, int, int *)); 83 84 struct file* 85 holdfp(fdp, fd, flag) 86 struct filedesc* fdp; 87 int fd, flag; 88 { 89 struct file* fp; 90 91 if (((u_int)fd) >= fdp->fd_nfiles || 92 (fp = fdp->fd_ofiles[fd]) == NULL || 93 (fp->f_flag & flag) == 0) { 94 return (NULL); 95 } 96 fhold(fp); 97 return (fp); 98 } 99 100 /* 101 * Read system call. 102 */ 103 int 104 read(struct read_args *uap) 105 { 106 struct thread *td = curthread; 107 struct proc *p = td->td_proc; 108 struct file *fp; 109 int error; 110 111 KKASSERT(p); 112 if ((fp = holdfp(p->p_fd, uap->fd, FREAD)) == NULL) 113 return (EBADF); 114 error = dofileread(fp, uap->fd, uap->buf, uap->nbyte, (off_t)-1, 0, 115 &uap->sysmsg_result); 116 fdrop(fp, td); 117 return(error); 118 } 119 120 /* 121 * Pread system call 122 */ 123 int 124 pread(struct pread_args *uap) 125 { 126 struct thread *td = curthread; 127 struct proc *p = td->td_proc; 128 struct file *fp; 129 int error; 130 131 KKASSERT(p); 132 if ((fp = holdfp(p->p_fd, uap->fd, FREAD)) == NULL) 133 return (EBADF); 134 if (fp->f_type != DTYPE_VNODE) { 135 error = ESPIPE; 136 } else { 137 error = dofileread(fp, uap->fd, uap->buf, uap->nbyte, 138 uap->offset, FOF_OFFSET, &uap->sysmsg_result); 139 } 140 fdrop(fp, td); 141 return(error); 142 } 143 144 /* 145 * Code common for read and pread 146 */ 147 int 148 dofileread(fp, fd, buf, nbyte, offset, flags, res) 149 struct file *fp; 150 int fd, flags; 151 void *buf; 152 size_t nbyte; 153 off_t offset; 154 int *res; 155 { 156 struct thread *td = curthread; 157 struct proc *p = td->td_proc; 158 struct uio auio; 159 struct iovec aiov; 160 long cnt, error = 0; 161 #ifdef KTRACE 162 struct iovec ktriov; 163 struct uio ktruio; 164 int didktr = 0; 165 #endif 166 167 aiov.iov_base = (caddr_t)buf; 168 aiov.iov_len = nbyte; 169 auio.uio_iov = &aiov; 170 auio.uio_iovcnt = 1; 171 auio.uio_offset = offset; 172 if (nbyte > INT_MAX) 173 return (EINVAL); 174 auio.uio_resid = nbyte; 175 auio.uio_rw = UIO_READ; 176 auio.uio_segflg = UIO_USERSPACE; 177 auio.uio_td = td; 178 #ifdef KTRACE 179 /* 180 * if tracing, save a copy of iovec 181 */ 182 if (KTRPOINT(td, KTR_GENIO)) { 183 ktriov = aiov; 184 ktruio = auio; 185 didktr = 1; 186 } 187 #endif 188 cnt = nbyte; 189 190 if ((error = fo_read(fp, &auio, fp->f_cred, flags, td))) { 191 if (auio.uio_resid != cnt && (error == ERESTART || 192 error == EINTR || error == EWOULDBLOCK)) 193 error = 0; 194 } 195 cnt -= auio.uio_resid; 196 #ifdef KTRACE 197 if (didktr && error == 0) { 198 ktruio.uio_iov = &ktriov; 199 ktruio.uio_resid = cnt; 200 ktrgenio(p->p_tracep, fd, UIO_READ, &ktruio, error); 201 } 202 #endif 203 *res = cnt; 204 return (error); 205 } 206 207 /* 208 * Scatter read system call. 209 */ 210 int 211 readv(struct readv_args *uap) 212 { 213 struct thread *td = curthread; 214 struct proc *p = td->td_proc; 215 struct file *fp; 216 struct filedesc *fdp = p->p_fd; 217 struct uio auio; 218 struct iovec *iov; 219 struct iovec *needfree; 220 struct iovec aiov[UIO_SMALLIOV]; 221 long i, cnt, error = 0; 222 u_int iovlen; 223 #ifdef KTRACE 224 struct iovec *ktriov = NULL; 225 struct uio ktruio; 226 #endif 227 228 if ((fp = holdfp(fdp, uap->fd, FREAD)) == NULL) 229 return (EBADF); 230 /* note: can't use iovlen until iovcnt is validated */ 231 iovlen = uap->iovcnt * sizeof (struct iovec); 232 if (uap->iovcnt > UIO_SMALLIOV) { 233 if (uap->iovcnt > UIO_MAXIOV) 234 return (EINVAL); 235 MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK); 236 needfree = iov; 237 } else { 238 iov = aiov; 239 needfree = NULL; 240 } 241 auio.uio_iov = iov; 242 auio.uio_iovcnt = uap->iovcnt; 243 auio.uio_rw = UIO_READ; 244 auio.uio_segflg = UIO_USERSPACE; 245 auio.uio_td = td; 246 auio.uio_offset = -1; 247 if ((error = copyin((caddr_t)uap->iovp, (caddr_t)iov, iovlen))) 248 goto done; 249 auio.uio_resid = 0; 250 for (i = 0; i < uap->iovcnt; i++) { 251 if (iov->iov_len > INT_MAX - auio.uio_resid) { 252 error = EINVAL; 253 goto done; 254 } 255 auio.uio_resid += iov->iov_len; 256 iov++; 257 } 258 #ifdef KTRACE 259 /* 260 * if tracing, save a copy of iovec 261 */ 262 if (KTRPOINT(td, KTR_GENIO)) { 263 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK); 264 bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen); 265 ktruio = auio; 266 } 267 #endif 268 cnt = auio.uio_resid; 269 if ((error = fo_read(fp, &auio, fp->f_cred, 0, td))) { 270 if (auio.uio_resid != cnt && (error == ERESTART || 271 error == EINTR || error == EWOULDBLOCK)) 272 error = 0; 273 } 274 cnt -= auio.uio_resid; 275 #ifdef KTRACE 276 if (ktriov != NULL) { 277 if (error == 0) { 278 ktruio.uio_iov = ktriov; 279 ktruio.uio_resid = cnt; 280 ktrgenio(p->p_tracep, uap->fd, UIO_READ, &ktruio, 281 error); 282 } 283 FREE(ktriov, M_TEMP); 284 } 285 #endif 286 uap->sysmsg_result = cnt; 287 done: 288 fdrop(fp, td); 289 if (needfree) 290 FREE(needfree, M_IOV); 291 return (error); 292 } 293 294 /* 295 * Write system call 296 */ 297 int 298 write(struct write_args *uap) 299 { 300 struct thread *td = curthread; 301 struct proc *p = td->td_proc; 302 struct file *fp; 303 int error; 304 305 KKASSERT(p); 306 307 if ((fp = holdfp(p->p_fd, uap->fd, FWRITE)) == NULL) 308 return (EBADF); 309 error = dofilewrite(fp, uap->fd, uap->buf, uap->nbyte, (off_t)-1, 0, 310 &uap->sysmsg_result); 311 fdrop(fp, td); 312 return(error); 313 } 314 315 /* 316 * Pwrite system call 317 */ 318 int 319 pwrite(struct pwrite_args *uap) 320 { 321 struct thread *td = curthread; 322 struct proc *p = td->td_proc; 323 struct file *fp; 324 int error; 325 326 KKASSERT(p); 327 if ((fp = holdfp(p->p_fd, uap->fd, FWRITE)) == NULL) 328 return (EBADF); 329 if (fp->f_type != DTYPE_VNODE) { 330 error = ESPIPE; 331 } else { 332 error = dofilewrite(fp, uap->fd, uap->buf, uap->nbyte, 333 uap->offset, FOF_OFFSET, &uap->sysmsg_result); 334 } 335 fdrop(fp, td); 336 return(error); 337 } 338 339 static int 340 dofilewrite( 341 struct file *fp, 342 int fd, 343 const void *buf, 344 size_t nbyte, 345 off_t offset, 346 int flags, 347 int *res 348 ) { 349 struct thread *td = curthread; 350 struct proc *p = td->td_proc; 351 struct uio auio; 352 struct iovec aiov; 353 long cnt, error = 0; 354 #ifdef KTRACE 355 struct iovec ktriov; 356 struct uio ktruio; 357 int didktr = 0; 358 #endif 359 360 aiov.iov_base = (void *)(uintptr_t)buf; 361 aiov.iov_len = nbyte; 362 auio.uio_iov = &aiov; 363 auio.uio_iovcnt = 1; 364 auio.uio_offset = offset; 365 if (nbyte > INT_MAX) 366 return (EINVAL); 367 auio.uio_resid = nbyte; 368 auio.uio_rw = UIO_WRITE; 369 auio.uio_segflg = UIO_USERSPACE; 370 auio.uio_td = td; 371 #ifdef KTRACE 372 /* 373 * if tracing, save a copy of iovec and uio 374 */ 375 if (KTRPOINT(td, KTR_GENIO)) { 376 ktriov = aiov; 377 ktruio = auio; 378 didktr = 1; 379 } 380 #endif 381 cnt = nbyte; 382 if (fp->f_type == DTYPE_VNODE) 383 bwillwrite(); 384 if ((error = fo_write(fp, &auio, fp->f_cred, flags, td))) { 385 if (auio.uio_resid != cnt && (error == ERESTART || 386 error == EINTR || error == EWOULDBLOCK)) 387 error = 0; 388 if (error == EPIPE) 389 psignal(p, SIGPIPE); 390 } 391 cnt -= auio.uio_resid; 392 #ifdef KTRACE 393 if (didktr && error == 0) { 394 ktruio.uio_iov = &ktriov; 395 ktruio.uio_resid = cnt; 396 ktrgenio(p->p_tracep, fd, UIO_WRITE, &ktruio, error); 397 } 398 #endif 399 *res = cnt; 400 return (error); 401 } 402 403 /* 404 * Gather write system call 405 */ 406 int 407 writev(struct writev_args *uap) 408 { 409 struct thread *td = curthread; 410 struct proc *p = td->td_proc; 411 struct file *fp; 412 struct filedesc *fdp; 413 struct uio auio; 414 struct iovec *iov; 415 struct iovec *needfree; 416 struct iovec aiov[UIO_SMALLIOV]; 417 long i, cnt, error = 0; 418 u_int iovlen; 419 #ifdef KTRACE 420 struct iovec *ktriov = NULL; 421 struct uio ktruio; 422 #endif 423 424 KKASSERT(p); 425 fdp = p->p_fd; 426 427 if ((fp = holdfp(fdp, uap->fd, FWRITE)) == NULL) 428 return (EBADF); 429 /* note: can't use iovlen until iovcnt is validated */ 430 iovlen = uap->iovcnt * sizeof (struct iovec); 431 if (uap->iovcnt > UIO_SMALLIOV) { 432 if (uap->iovcnt > UIO_MAXIOV) { 433 needfree = NULL; 434 error = EINVAL; 435 goto done; 436 } 437 MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK); 438 needfree = iov; 439 } else { 440 iov = aiov; 441 needfree = NULL; 442 } 443 auio.uio_iov = iov; 444 auio.uio_iovcnt = uap->iovcnt; 445 auio.uio_rw = UIO_WRITE; 446 auio.uio_segflg = UIO_USERSPACE; 447 auio.uio_td = td; 448 auio.uio_offset = -1; 449 if ((error = copyin((caddr_t)uap->iovp, (caddr_t)iov, iovlen))) 450 goto done; 451 auio.uio_resid = 0; 452 for (i = 0; i < uap->iovcnt; i++) { 453 if (iov->iov_len > INT_MAX - auio.uio_resid) { 454 error = EINVAL; 455 goto done; 456 } 457 auio.uio_resid += iov->iov_len; 458 iov++; 459 } 460 #ifdef KTRACE 461 /* 462 * if tracing, save a copy of iovec and uio 463 */ 464 if (KTRPOINT(td, KTR_GENIO)) { 465 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK); 466 bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen); 467 ktruio = auio; 468 } 469 #endif 470 cnt = auio.uio_resid; 471 if (fp->f_type == DTYPE_VNODE) 472 bwillwrite(); 473 if ((error = fo_write(fp, &auio, fp->f_cred, 0, td))) { 474 if (auio.uio_resid != cnt && (error == ERESTART || 475 error == EINTR || error == EWOULDBLOCK)) 476 error = 0; 477 if (error == EPIPE) 478 psignal(p, SIGPIPE); 479 } 480 cnt -= auio.uio_resid; 481 #ifdef KTRACE 482 if (ktriov != NULL) { 483 if (error == 0) { 484 ktruio.uio_iov = ktriov; 485 ktruio.uio_resid = cnt; 486 ktrgenio(p->p_tracep, uap->fd, UIO_WRITE, &ktruio, 487 error); 488 } 489 FREE(ktriov, M_TEMP); 490 } 491 #endif 492 uap->sysmsg_result = cnt; 493 done: 494 fdrop(fp, td); 495 if (needfree) 496 FREE(needfree, M_IOV); 497 return (error); 498 } 499 500 /* 501 * Ioctl system call 502 */ 503 /* ARGSUSED */ 504 int 505 ioctl(struct ioctl_args *uap) 506 { 507 struct thread *td = curthread; 508 struct proc *p = td->td_proc; 509 struct file *fp; 510 struct filedesc *fdp; 511 u_long com; 512 int error; 513 u_int size; 514 caddr_t data, memp; 515 int tmp; 516 #define STK_PARAMS 128 517 union { 518 char stkbuf[STK_PARAMS]; 519 long align; 520 } ubuf; 521 522 KKASSERT(p); 523 fdp = p->p_fd; 524 if ((u_int)uap->fd >= fdp->fd_nfiles || 525 (fp = fdp->fd_ofiles[uap->fd]) == NULL) 526 return (EBADF); 527 528 if ((fp->f_flag & (FREAD | FWRITE)) == 0) 529 return (EBADF); 530 531 switch (com = uap->com) { 532 case FIONCLEX: 533 fdp->fd_ofileflags[uap->fd] &= ~UF_EXCLOSE; 534 return (0); 535 case FIOCLEX: 536 fdp->fd_ofileflags[uap->fd] |= UF_EXCLOSE; 537 return (0); 538 } 539 540 /* 541 * Interpret high order word to find amount of data to be 542 * copied to/from the user's address space. 543 */ 544 size = IOCPARM_LEN(com); 545 if (size > IOCPARM_MAX) 546 return (ENOTTY); 547 548 fhold(fp); 549 550 memp = NULL; 551 if (size > sizeof (ubuf.stkbuf)) { 552 memp = (caddr_t)malloc((u_long)size, M_IOCTLOPS, M_WAITOK); 553 data = memp; 554 } else { 555 data = ubuf.stkbuf; 556 } 557 if (com&IOC_IN) { 558 if (size) { 559 error = copyin(uap->data, data, (u_int)size); 560 if (error) { 561 if (memp) 562 free(memp, M_IOCTLOPS); 563 fdrop(fp, td); 564 return (error); 565 } 566 } else { 567 *(caddr_t *)data = uap->data; 568 } 569 } else if ((com&IOC_OUT) && size) { 570 /* 571 * Zero the buffer so the user always 572 * gets back something deterministic. 573 */ 574 bzero(data, size); 575 } else if (com&IOC_VOID) { 576 *(caddr_t *)data = uap->data; 577 } 578 579 switch (com) { 580 581 case FIONBIO: 582 if ((tmp = *(int *)data)) 583 fp->f_flag |= FNONBLOCK; 584 else 585 fp->f_flag &= ~FNONBLOCK; 586 error = fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, td); 587 break; 588 589 case FIOASYNC: 590 if ((tmp = *(int *)data)) 591 fp->f_flag |= FASYNC; 592 else 593 fp->f_flag &= ~FASYNC; 594 error = fo_ioctl(fp, FIOASYNC, (caddr_t)&tmp, td); 595 break; 596 597 default: 598 error = fo_ioctl(fp, com, data, td); 599 /* 600 * Copy any data to user, size was 601 * already set and checked above. 602 */ 603 if (error == 0 && (com&IOC_OUT) && size) 604 error = copyout(data, uap->data, (u_int)size); 605 break; 606 } 607 if (memp) 608 free(memp, M_IOCTLOPS); 609 fdrop(fp, td); 610 return (error); 611 } 612 613 static int nselcoll; /* Select collisions since boot */ 614 int selwait; 615 SYSCTL_INT(_kern, OID_AUTO, nselcoll, CTLFLAG_RD, &nselcoll, 0, ""); 616 617 /* 618 * Select system call. 619 */ 620 int 621 select(struct select_args *uap) 622 { 623 struct proc *p = curproc; 624 625 /* 626 * The magic 2048 here is chosen to be just enough for FD_SETSIZE 627 * infds with the new FD_SETSIZE of 1024, and more than enough for 628 * FD_SETSIZE infds, outfds and exceptfds with the old FD_SETSIZE 629 * of 256. 630 */ 631 fd_mask s_selbits[howmany(2048, NFDBITS)]; 632 fd_mask *ibits[3], *obits[3], *selbits, *sbp; 633 struct timeval atv, rtv, ttv; 634 int s, ncoll, error, timo; 635 u_int nbufbytes, ncpbytes, nfdbits; 636 637 if (uap->nd < 0) 638 return (EINVAL); 639 if (uap->nd > p->p_fd->fd_nfiles) 640 uap->nd = p->p_fd->fd_nfiles; /* forgiving; slightly wrong */ 641 642 /* 643 * Allocate just enough bits for the non-null fd_sets. Use the 644 * preallocated auto buffer if possible. 645 */ 646 nfdbits = roundup(uap->nd, NFDBITS); 647 ncpbytes = nfdbits / NBBY; 648 nbufbytes = 0; 649 if (uap->in != NULL) 650 nbufbytes += 2 * ncpbytes; 651 if (uap->ou != NULL) 652 nbufbytes += 2 * ncpbytes; 653 if (uap->ex != NULL) 654 nbufbytes += 2 * ncpbytes; 655 if (nbufbytes <= sizeof s_selbits) 656 selbits = &s_selbits[0]; 657 else 658 selbits = malloc(nbufbytes, M_SELECT, M_WAITOK); 659 660 /* 661 * Assign pointers into the bit buffers and fetch the input bits. 662 * Put the output buffers together so that they can be bzeroed 663 * together. 664 */ 665 sbp = selbits; 666 #define getbits(name, x) \ 667 do { \ 668 if (uap->name == NULL) \ 669 ibits[x] = NULL; \ 670 else { \ 671 ibits[x] = sbp + nbufbytes / 2 / sizeof *sbp; \ 672 obits[x] = sbp; \ 673 sbp += ncpbytes / sizeof *sbp; \ 674 error = copyin(uap->name, ibits[x], ncpbytes); \ 675 if (error != 0) \ 676 goto done; \ 677 } \ 678 } while (0) 679 getbits(in, 0); 680 getbits(ou, 1); 681 getbits(ex, 2); 682 #undef getbits 683 if (nbufbytes != 0) 684 bzero(selbits, nbufbytes / 2); 685 686 if (uap->tv) { 687 error = copyin((caddr_t)uap->tv, (caddr_t)&atv, 688 sizeof (atv)); 689 if (error) 690 goto done; 691 if (itimerfix(&atv)) { 692 error = EINVAL; 693 goto done; 694 } 695 getmicrouptime(&rtv); 696 timevaladd(&atv, &rtv); 697 } else { 698 atv.tv_sec = 0; 699 atv.tv_usec = 0; 700 } 701 timo = 0; 702 retry: 703 ncoll = nselcoll; 704 p->p_flag |= P_SELECT; 705 error = selscan(p, ibits, obits, uap->nd, &uap->sysmsg_result); 706 if (error || uap->sysmsg_result) 707 goto done; 708 if (atv.tv_sec || atv.tv_usec) { 709 getmicrouptime(&rtv); 710 if (timevalcmp(&rtv, &atv, >=)) 711 goto done; 712 ttv = atv; 713 timevalsub(&ttv, &rtv); 714 timo = ttv.tv_sec > 24 * 60 * 60 ? 715 24 * 60 * 60 * hz : tvtohz(&ttv); 716 } 717 s = splhigh(); 718 if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) { 719 splx(s); 720 goto retry; 721 } 722 p->p_flag &= ~P_SELECT; 723 724 error = tsleep((caddr_t)&selwait, PCATCH, "select", timo); 725 726 splx(s); 727 if (error == 0) 728 goto retry; 729 done: 730 p->p_flag &= ~P_SELECT; 731 /* select is not restarted after signals... */ 732 if (error == ERESTART) 733 error = EINTR; 734 if (error == EWOULDBLOCK) 735 error = 0; 736 #define putbits(name, x) \ 737 if (uap->name && (error2 = copyout(obits[x], uap->name, ncpbytes))) \ 738 error = error2; 739 if (error == 0) { 740 int error2; 741 742 putbits(in, 0); 743 putbits(ou, 1); 744 putbits(ex, 2); 745 #undef putbits 746 } 747 if (selbits != &s_selbits[0]) 748 free(selbits, M_SELECT); 749 return (error); 750 } 751 752 static int 753 selscan(struct proc *p, fd_mask **ibits, fd_mask **obits, int nfd, int *res) 754 { 755 struct thread *td = p->p_thread; 756 struct filedesc *fdp = p->p_fd; 757 int msk, i, fd; 758 fd_mask bits; 759 struct file *fp; 760 int n = 0; 761 /* Note: backend also returns POLLHUP/POLLERR if appropriate. */ 762 static int flag[3] = { POLLRDNORM, POLLWRNORM, POLLRDBAND }; 763 764 for (msk = 0; msk < 3; msk++) { 765 if (ibits[msk] == NULL) 766 continue; 767 for (i = 0; i < nfd; i += NFDBITS) { 768 bits = ibits[msk][i/NFDBITS]; 769 /* ffs(int mask) not portable, fd_mask is long */ 770 for (fd = i; bits && fd < nfd; fd++, bits >>= 1) { 771 if (!(bits & 1)) 772 continue; 773 fp = fdp->fd_ofiles[fd]; 774 if (fp == NULL) 775 return (EBADF); 776 if (fo_poll(fp, flag[msk], fp->f_cred, td)) { 777 obits[msk][(fd)/NFDBITS] |= 778 ((fd_mask)1 << ((fd) % NFDBITS)); 779 n++; 780 } 781 } 782 } 783 } 784 *res = n; 785 return (0); 786 } 787 788 /* 789 * Poll system call. 790 */ 791 int 792 poll(struct poll_args *uap) 793 { 794 caddr_t bits; 795 char smallbits[32 * sizeof(struct pollfd)]; 796 struct timeval atv, rtv, ttv; 797 int s, ncoll, error = 0, timo; 798 u_int nfds; 799 size_t ni; 800 struct proc *p = curproc; 801 802 nfds = SCARG(uap, nfds); 803 /* 804 * This is kinda bogus. We have fd limits, but that is not 805 * really related to the size of the pollfd array. Make sure 806 * we let the process use at least FD_SETSIZE entries and at 807 * least enough for the current limits. We want to be reasonably 808 * safe, but not overly restrictive. 809 */ 810 if (nfds > p->p_rlimit[RLIMIT_NOFILE].rlim_cur && nfds > FD_SETSIZE) 811 return (EINVAL); 812 ni = nfds * sizeof(struct pollfd); 813 if (ni > sizeof(smallbits)) 814 bits = malloc(ni, M_TEMP, M_WAITOK); 815 else 816 bits = smallbits; 817 error = copyin(SCARG(uap, fds), bits, ni); 818 if (error) 819 goto done; 820 if (SCARG(uap, timeout) != INFTIM) { 821 atv.tv_sec = SCARG(uap, timeout) / 1000; 822 atv.tv_usec = (SCARG(uap, timeout) % 1000) * 1000; 823 if (itimerfix(&atv)) { 824 error = EINVAL; 825 goto done; 826 } 827 getmicrouptime(&rtv); 828 timevaladd(&atv, &rtv); 829 } else { 830 atv.tv_sec = 0; 831 atv.tv_usec = 0; 832 } 833 timo = 0; 834 retry: 835 ncoll = nselcoll; 836 p->p_flag |= P_SELECT; 837 error = pollscan(p, (struct pollfd *)bits, nfds, &uap->sysmsg_result); 838 if (error || uap->sysmsg_result) 839 goto done; 840 if (atv.tv_sec || atv.tv_usec) { 841 getmicrouptime(&rtv); 842 if (timevalcmp(&rtv, &atv, >=)) 843 goto done; 844 ttv = atv; 845 timevalsub(&ttv, &rtv); 846 timo = ttv.tv_sec > 24 * 60 * 60 ? 847 24 * 60 * 60 * hz : tvtohz(&ttv); 848 } 849 s = splhigh(); 850 if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) { 851 splx(s); 852 goto retry; 853 } 854 p->p_flag &= ~P_SELECT; 855 error = tsleep((caddr_t)&selwait, PCATCH, "poll", timo); 856 splx(s); 857 if (error == 0) 858 goto retry; 859 done: 860 p->p_flag &= ~P_SELECT; 861 /* poll is not restarted after signals... */ 862 if (error == ERESTART) 863 error = EINTR; 864 if (error == EWOULDBLOCK) 865 error = 0; 866 if (error == 0) { 867 error = copyout(bits, SCARG(uap, fds), ni); 868 if (error) 869 goto out; 870 } 871 out: 872 if (ni > sizeof(smallbits)) 873 free(bits, M_TEMP); 874 return (error); 875 } 876 877 static int 878 pollscan(struct proc *p, struct pollfd *fds, u_int nfd, int *res) 879 { 880 struct thread *td = p->p_thread; 881 struct filedesc *fdp = p->p_fd; 882 int i; 883 struct file *fp; 884 int n = 0; 885 886 for (i = 0; i < nfd; i++, fds++) { 887 if (fds->fd >= fdp->fd_nfiles) { 888 fds->revents = POLLNVAL; 889 n++; 890 } else if (fds->fd < 0) { 891 fds->revents = 0; 892 } else { 893 fp = fdp->fd_ofiles[fds->fd]; 894 if (fp == NULL) { 895 fds->revents = POLLNVAL; 896 n++; 897 } else { 898 /* 899 * Note: backend also returns POLLHUP and 900 * POLLERR if appropriate. 901 */ 902 fds->revents = fo_poll(fp, fds->events, 903 fp->f_cred, td); 904 if (fds->revents != 0) 905 n++; 906 } 907 } 908 } 909 *res = n; 910 return (0); 911 } 912 913 /* 914 * OpenBSD poll system call. 915 * XXX this isn't quite a true representation.. OpenBSD uses select ops. 916 */ 917 int 918 openbsd_poll(struct openbsd_poll_args *uap) 919 { 920 return (poll((struct poll_args *)uap)); 921 } 922 923 /*ARGSUSED*/ 924 int 925 seltrue(dev_t dev, int events, struct thread *td) 926 { 927 return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)); 928 } 929 930 /* 931 * Record a select request. A global wait must be used since a process/thread 932 * might go away after recording its request. 933 */ 934 void 935 selrecord(struct thread *selector, struct selinfo *sip) 936 { 937 struct proc *p; 938 pid_t mypid; 939 940 if ((p = selector->td_proc) == NULL) 941 panic("selrecord: thread needs a process"); 942 943 mypid = p->p_pid; 944 if (sip->si_pid == mypid) 945 return; 946 if (sip->si_pid && (p = pfind(sip->si_pid)) && 947 p->p_wchan == (caddr_t)&selwait) { 948 sip->si_flags |= SI_COLL; 949 } else { 950 sip->si_pid = mypid; 951 } 952 } 953 954 /* 955 * Do a wakeup when a selectable event occurs. 956 */ 957 void 958 selwakeup(struct selinfo *sip) 959 { 960 struct proc *p; 961 int s; 962 963 if (sip->si_pid == 0) 964 return; 965 if (sip->si_flags & SI_COLL) { 966 nselcoll++; 967 sip->si_flags &= ~SI_COLL; 968 wakeup((caddr_t)&selwait); /* YYY fixable */ 969 } 970 p = pfind(sip->si_pid); 971 sip->si_pid = 0; 972 if (p != NULL) { 973 s = splhigh(); 974 if (p->p_wchan == (caddr_t)&selwait) { 975 if (p->p_stat == SSLEEP) 976 setrunnable(p); 977 else 978 unsleep(p->p_thread); 979 } else if (p->p_flag & P_SELECT) 980 p->p_flag &= ~P_SELECT; 981 splx(s); 982 } 983 } 984 985