1 /* 2 * Copyright (c) 1982, 1986, 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)sys_generic.c 8.5 (Berkeley) 1/21/94 39 * $FreeBSD: src/sys/kern/sys_generic.c,v 1.55.2.10 2001/03/17 10:39:32 peter Exp $ 40 * $DragonFly: src/sys/kern/sys_generic.c,v 1.16 2004/01/07 11:04:18 dillon Exp $ 41 */ 42 43 #include "opt_ktrace.h" 44 45 #include <sys/param.h> 46 #include <sys/systm.h> 47 #include <sys/sysproto.h> 48 #include <sys/filedesc.h> 49 #include <sys/filio.h> 50 #include <sys/fcntl.h> 51 #include <sys/file.h> 52 #include <sys/proc.h> 53 #include <sys/signalvar.h> 54 #include <sys/socketvar.h> 55 #include <sys/uio.h> 56 #include <sys/kernel.h> 57 #include <sys/kern_syscall.h> 58 #include <sys/malloc.h> 59 #include <sys/poll.h> 60 #include <sys/resourcevar.h> 61 #include <sys/sysctl.h> 62 #include <sys/sysent.h> 63 #include <sys/buf.h> 64 #ifdef KTRACE 65 #include <sys/ktrace.h> 66 #endif 67 #include <vm/vm.h> 68 #include <vm/vm_page.h> 69 #include <sys/file2.h> 70 71 #include <machine/limits.h> 72 73 static MALLOC_DEFINE(M_IOCTLOPS, "ioctlops", "ioctl data buffer"); 74 static MALLOC_DEFINE(M_SELECT, "select", "select() buffer"); 75 MALLOC_DEFINE(M_IOV, "iov", "large iov's"); 76 77 static int pollscan (struct proc *, struct pollfd *, u_int, int *); 78 static int selscan (struct proc *, fd_mask **, fd_mask **, 79 int, int *); 80 81 struct file* 82 holdfp(fdp, fd, flag) 83 struct filedesc* fdp; 84 int fd, flag; 85 { 86 struct file* fp; 87 88 if (((u_int)fd) >= fdp->fd_nfiles || 89 (fp = fdp->fd_ofiles[fd]) == NULL || 90 (fp->f_flag & flag) == 0) { 91 return (NULL); 92 } 93 fhold(fp); 94 return (fp); 95 } 96 97 /* 98 * Read system call. 99 */ 100 int 101 read(struct read_args *uap) 102 { 103 struct thread *td = curthread; 104 struct uio auio; 105 struct iovec aiov; 106 int error; 107 108 aiov.iov_base = uap->buf; 109 aiov.iov_len = uap->nbyte; 110 auio.uio_iov = &aiov; 111 auio.uio_iovcnt = 1; 112 auio.uio_offset = -1; 113 auio.uio_resid = uap->nbyte; 114 auio.uio_rw = UIO_READ; 115 auio.uio_segflg = UIO_USERSPACE; 116 auio.uio_td = td; 117 118 error = kern_readv(uap->fd, &auio, 0, &uap->sysmsg_result); 119 120 return(error); 121 } 122 123 /* 124 * Pread system call 125 */ 126 int 127 pread(struct pread_args *uap) 128 { 129 struct thread *td = curthread; 130 struct uio auio; 131 struct iovec aiov; 132 int error; 133 134 aiov.iov_base = uap->buf; 135 aiov.iov_len = uap->nbyte; 136 auio.uio_iov = &aiov; 137 auio.uio_iovcnt = 1; 138 auio.uio_offset = uap->offset; 139 auio.uio_resid = uap->nbyte; 140 auio.uio_rw = UIO_READ; 141 auio.uio_segflg = UIO_USERSPACE; 142 auio.uio_td = td; 143 144 error = kern_readv(uap->fd, &auio, FOF_OFFSET, &uap->sysmsg_result); 145 146 return(error); 147 } 148 149 int 150 readv(struct readv_args *uap) 151 { 152 struct thread *td = curthread; 153 struct uio auio; 154 struct iovec aiov[UIO_SMALLIOV], *iov = NULL; 155 int error; 156 157 error = iovec_copyin(uap->iovp, &iov, aiov, uap->iovcnt, 158 &auio.uio_resid); 159 if (error) 160 return (error); 161 auio.uio_iov = iov; 162 auio.uio_iovcnt = uap->iovcnt; 163 auio.uio_offset = -1; 164 auio.uio_rw = UIO_READ; 165 auio.uio_segflg = UIO_USERSPACE; 166 auio.uio_td = td; 167 168 error = kern_readv(uap->fd, &auio, 0, &uap->sysmsg_result); 169 170 iovec_free(&iov, aiov); 171 return (error); 172 } 173 174 int 175 kern_readv(int fd, struct uio *auio, int flags, int *res) 176 { 177 struct thread *td = curthread; 178 struct proc *p = td->td_proc; 179 struct file *fp; 180 struct filedesc *fdp = p->p_fd; 181 int len, error; 182 #ifdef KTRACE 183 struct iovec *ktriov = NULL; 184 struct uio ktruio; 185 #endif 186 187 KKASSERT(p); 188 189 fp = holdfp(fdp, fd, FREAD); 190 if (fp == NULL) 191 return (EBADF); 192 if (flags & FOF_OFFSET && fp->f_type != DTYPE_VNODE) { 193 error = ESPIPE; 194 goto done; 195 } 196 if (auio->uio_resid < 0) { 197 error = EINVAL; 198 goto done; 199 } 200 #ifdef KTRACE 201 /* 202 * if tracing, save a copy of iovec 203 */ 204 if (KTRPOINT(td, KTR_GENIO)) { 205 int iovlen = auio->uio_iovcnt * sizeof(struct iovec); 206 207 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK); 208 bcopy((caddr_t)auio->uio_iov, (caddr_t)ktriov, iovlen); 209 ktruio = *auio; 210 } 211 #endif 212 len = auio->uio_resid; 213 error = fo_read(fp, auio, fp->f_cred, flags, td); 214 if (error) { 215 if (auio->uio_resid != len && (error == ERESTART || 216 error == EINTR || error == EWOULDBLOCK)) 217 error = 0; 218 } 219 #ifdef KTRACE 220 if (ktriov != NULL) { 221 if (error == 0) { 222 ktruio.uio_iov = ktriov; 223 ktruio.uio_resid = len - auio->uio_resid; 224 ktrgenio(p->p_tracep, fd, UIO_READ, &ktruio, error); 225 } 226 FREE(ktriov, M_TEMP); 227 } 228 #endif 229 if (error == 0) 230 *res = len - auio->uio_resid; 231 done: 232 fdrop(fp, td); 233 return (error); 234 } 235 236 /* 237 * Write system call 238 */ 239 int 240 write(struct write_args *uap) 241 { 242 struct thread *td = curthread; 243 struct uio auio; 244 struct iovec aiov; 245 int error; 246 247 aiov.iov_base = (void *)(uintptr_t)uap->buf; 248 aiov.iov_len = uap->nbyte; 249 auio.uio_iov = &aiov; 250 auio.uio_iovcnt = 1; 251 auio.uio_offset = -1; 252 auio.uio_resid = uap->nbyte; 253 auio.uio_rw = UIO_WRITE; 254 auio.uio_segflg = UIO_USERSPACE; 255 auio.uio_td = td; 256 257 error = kern_writev(uap->fd, &auio, 0, &uap->sysmsg_result); 258 259 return(error); 260 } 261 262 /* 263 * Pwrite system call 264 */ 265 int 266 pwrite(struct pwrite_args *uap) 267 { 268 struct thread *td = curthread; 269 struct uio auio; 270 struct iovec aiov; 271 int error; 272 273 aiov.iov_base = (void *)(uintptr_t)uap->buf; 274 aiov.iov_len = uap->nbyte; 275 auio.uio_iov = &aiov; 276 auio.uio_iovcnt = 1; 277 auio.uio_offset = uap->offset; 278 auio.uio_resid = uap->nbyte; 279 auio.uio_rw = UIO_WRITE; 280 auio.uio_segflg = UIO_USERSPACE; 281 auio.uio_td = td; 282 283 error = kern_writev(uap->fd, &auio, FOF_OFFSET, &uap->sysmsg_result); 284 285 return(error); 286 } 287 288 int 289 writev(struct writev_args *uap) 290 { 291 struct thread *td = curthread; 292 struct uio auio; 293 struct iovec aiov[UIO_SMALLIOV], *iov = NULL; 294 int error; 295 296 error = iovec_copyin(uap->iovp, &iov, aiov, uap->iovcnt, 297 &auio.uio_resid); 298 if (error) 299 return (error); 300 auio.uio_iov = iov; 301 auio.uio_iovcnt = uap->iovcnt; 302 auio.uio_offset = -1; 303 auio.uio_rw = UIO_WRITE; 304 auio.uio_segflg = UIO_USERSPACE; 305 auio.uio_td = td; 306 307 error = kern_writev(uap->fd, &auio, 0, &uap->sysmsg_result); 308 309 iovec_free(&iov, aiov); 310 return (error); 311 } 312 313 /* 314 * Gather write system call 315 */ 316 int 317 kern_writev(int fd, struct uio *auio, int flags, int *res) 318 { 319 struct thread *td = curthread; 320 struct proc *p = td->td_proc; 321 struct file *fp; 322 struct filedesc *fdp = p->p_fd; 323 long len, error; 324 #ifdef KTRACE 325 struct iovec *ktriov = NULL; 326 struct uio ktruio; 327 #endif 328 329 KKASSERT(p); 330 331 fp = holdfp(fdp, fd, FWRITE); 332 if (fp == NULL) 333 return (EBADF); 334 if ((flags & FOF_OFFSET) && fp->f_type != DTYPE_VNODE) { 335 error = ESPIPE; 336 goto done; 337 } 338 if (auio->uio_resid < 0) { 339 error = EINVAL; 340 goto done; 341 } 342 #ifdef KTRACE 343 /* 344 * if tracing, save a copy of iovec and uio 345 */ 346 if (KTRPOINT(td, KTR_GENIO)) { 347 int iovlen = auio->uio_iovcnt * sizeof(struct iovec); 348 349 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK); 350 bcopy((caddr_t)auio->uio_iov, (caddr_t)ktriov, iovlen); 351 ktruio = *auio; 352 } 353 #endif 354 len = auio->uio_resid; 355 if (fp->f_type == DTYPE_VNODE) 356 bwillwrite(); 357 error = fo_write(fp, auio, fp->f_cred, flags, td); 358 if (error) { 359 if (auio->uio_resid != len && (error == ERESTART || 360 error == EINTR || error == EWOULDBLOCK)) 361 error = 0; 362 if (error == EPIPE) 363 psignal(p, SIGPIPE); 364 } 365 #ifdef KTRACE 366 if (ktriov != NULL) { 367 if (error == 0) { 368 ktruio.uio_iov = ktriov; 369 ktruio.uio_resid = len - auio->uio_resid; 370 ktrgenio(p->p_tracep, fd, UIO_WRITE, &ktruio, error); 371 } 372 FREE(ktriov, M_TEMP); 373 } 374 #endif 375 if (error == 0) 376 *res = len - auio->uio_resid; 377 done: 378 fdrop(fp, td); 379 return (error); 380 } 381 382 /* 383 * Ioctl system call 384 */ 385 /* ARGSUSED */ 386 int 387 ioctl(struct ioctl_args *uap) 388 { 389 struct thread *td = curthread; 390 struct proc *p = td->td_proc; 391 struct file *fp; 392 struct filedesc *fdp; 393 u_long com; 394 int error; 395 u_int size; 396 caddr_t data, memp; 397 int tmp; 398 #define STK_PARAMS 128 399 union { 400 char stkbuf[STK_PARAMS]; 401 long align; 402 } ubuf; 403 404 KKASSERT(p); 405 fdp = p->p_fd; 406 if ((u_int)uap->fd >= fdp->fd_nfiles || 407 (fp = fdp->fd_ofiles[uap->fd]) == NULL) 408 return (EBADF); 409 410 if ((fp->f_flag & (FREAD | FWRITE)) == 0) 411 return (EBADF); 412 413 switch (com = uap->com) { 414 case FIONCLEX: 415 fdp->fd_ofileflags[uap->fd] &= ~UF_EXCLOSE; 416 return (0); 417 case FIOCLEX: 418 fdp->fd_ofileflags[uap->fd] |= UF_EXCLOSE; 419 return (0); 420 } 421 422 /* 423 * Interpret high order word to find amount of data to be 424 * copied to/from the user's address space. 425 */ 426 size = IOCPARM_LEN(com); 427 if (size > IOCPARM_MAX) 428 return (ENOTTY); 429 430 fhold(fp); 431 432 memp = NULL; 433 if (size > sizeof (ubuf.stkbuf)) { 434 memp = (caddr_t)malloc((u_long)size, M_IOCTLOPS, M_WAITOK); 435 data = memp; 436 } else { 437 data = ubuf.stkbuf; 438 } 439 if (com&IOC_IN) { 440 if (size) { 441 error = copyin(uap->data, data, (u_int)size); 442 if (error) { 443 if (memp) 444 free(memp, M_IOCTLOPS); 445 fdrop(fp, td); 446 return (error); 447 } 448 } else { 449 *(caddr_t *)data = uap->data; 450 } 451 } else if ((com&IOC_OUT) && size) { 452 /* 453 * Zero the buffer so the user always 454 * gets back something deterministic. 455 */ 456 bzero(data, size); 457 } else if (com&IOC_VOID) { 458 *(caddr_t *)data = uap->data; 459 } 460 461 switch (com) { 462 463 case FIONBIO: 464 if ((tmp = *(int *)data)) 465 fp->f_flag |= FNONBLOCK; 466 else 467 fp->f_flag &= ~FNONBLOCK; 468 error = fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, td); 469 break; 470 471 case FIOASYNC: 472 if ((tmp = *(int *)data)) 473 fp->f_flag |= FASYNC; 474 else 475 fp->f_flag &= ~FASYNC; 476 error = fo_ioctl(fp, FIOASYNC, (caddr_t)&tmp, td); 477 break; 478 479 default: 480 error = fo_ioctl(fp, com, data, td); 481 /* 482 * Copy any data to user, size was 483 * already set and checked above. 484 */ 485 if (error == 0 && (com&IOC_OUT) && size) 486 error = copyout(data, uap->data, (u_int)size); 487 break; 488 } 489 if (memp) 490 free(memp, M_IOCTLOPS); 491 fdrop(fp, td); 492 return (error); 493 } 494 495 static int nselcoll; /* Select collisions since boot */ 496 int selwait; 497 SYSCTL_INT(_kern, OID_AUTO, nselcoll, CTLFLAG_RD, &nselcoll, 0, ""); 498 499 /* 500 * Select system call. 501 */ 502 int 503 select(struct select_args *uap) 504 { 505 struct proc *p = curproc; 506 507 /* 508 * The magic 2048 here is chosen to be just enough for FD_SETSIZE 509 * infds with the new FD_SETSIZE of 1024, and more than enough for 510 * FD_SETSIZE infds, outfds and exceptfds with the old FD_SETSIZE 511 * of 256. 512 */ 513 fd_mask s_selbits[howmany(2048, NFDBITS)]; 514 fd_mask *ibits[3], *obits[3], *selbits, *sbp; 515 struct timeval atv, rtv, ttv; 516 int s, ncoll, error, timo; 517 u_int nbufbytes, ncpbytes, nfdbits; 518 519 if (uap->nd < 0) 520 return (EINVAL); 521 if (uap->nd > p->p_fd->fd_nfiles) 522 uap->nd = p->p_fd->fd_nfiles; /* forgiving; slightly wrong */ 523 524 /* 525 * Allocate just enough bits for the non-null fd_sets. Use the 526 * preallocated auto buffer if possible. 527 */ 528 nfdbits = roundup(uap->nd, NFDBITS); 529 ncpbytes = nfdbits / NBBY; 530 nbufbytes = 0; 531 if (uap->in != NULL) 532 nbufbytes += 2 * ncpbytes; 533 if (uap->ou != NULL) 534 nbufbytes += 2 * ncpbytes; 535 if (uap->ex != NULL) 536 nbufbytes += 2 * ncpbytes; 537 if (nbufbytes <= sizeof s_selbits) 538 selbits = &s_selbits[0]; 539 else 540 selbits = malloc(nbufbytes, M_SELECT, M_WAITOK); 541 542 /* 543 * Assign pointers into the bit buffers and fetch the input bits. 544 * Put the output buffers together so that they can be bzeroed 545 * together. 546 */ 547 sbp = selbits; 548 #define getbits(name, x) \ 549 do { \ 550 if (uap->name == NULL) \ 551 ibits[x] = NULL; \ 552 else { \ 553 ibits[x] = sbp + nbufbytes / 2 / sizeof *sbp; \ 554 obits[x] = sbp; \ 555 sbp += ncpbytes / sizeof *sbp; \ 556 error = copyin(uap->name, ibits[x], ncpbytes); \ 557 if (error != 0) \ 558 goto done; \ 559 } \ 560 } while (0) 561 getbits(in, 0); 562 getbits(ou, 1); 563 getbits(ex, 2); 564 #undef getbits 565 if (nbufbytes != 0) 566 bzero(selbits, nbufbytes / 2); 567 568 if (uap->tv) { 569 error = copyin((caddr_t)uap->tv, (caddr_t)&atv, 570 sizeof (atv)); 571 if (error) 572 goto done; 573 if (itimerfix(&atv)) { 574 error = EINVAL; 575 goto done; 576 } 577 getmicrouptime(&rtv); 578 timevaladd(&atv, &rtv); 579 } else { 580 atv.tv_sec = 0; 581 atv.tv_usec = 0; 582 } 583 timo = 0; 584 retry: 585 ncoll = nselcoll; 586 p->p_flag |= P_SELECT; 587 error = selscan(p, ibits, obits, uap->nd, &uap->sysmsg_result); 588 if (error || uap->sysmsg_result) 589 goto done; 590 if (atv.tv_sec || atv.tv_usec) { 591 getmicrouptime(&rtv); 592 if (timevalcmp(&rtv, &atv, >=)) 593 goto done; 594 ttv = atv; 595 timevalsub(&ttv, &rtv); 596 timo = ttv.tv_sec > 24 * 60 * 60 ? 597 24 * 60 * 60 * hz : tvtohz_high(&ttv); 598 } 599 s = splhigh(); 600 if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) { 601 splx(s); 602 goto retry; 603 } 604 p->p_flag &= ~P_SELECT; 605 606 error = tsleep((caddr_t)&selwait, PCATCH, "select", timo); 607 608 splx(s); 609 if (error == 0) 610 goto retry; 611 done: 612 p->p_flag &= ~P_SELECT; 613 /* select is not restarted after signals... */ 614 if (error == ERESTART) 615 error = EINTR; 616 if (error == EWOULDBLOCK) 617 error = 0; 618 #define putbits(name, x) \ 619 if (uap->name && (error2 = copyout(obits[x], uap->name, ncpbytes))) \ 620 error = error2; 621 if (error == 0) { 622 int error2; 623 624 putbits(in, 0); 625 putbits(ou, 1); 626 putbits(ex, 2); 627 #undef putbits 628 } 629 if (selbits != &s_selbits[0]) 630 free(selbits, M_SELECT); 631 return (error); 632 } 633 634 static int 635 selscan(struct proc *p, fd_mask **ibits, fd_mask **obits, int nfd, int *res) 636 { 637 struct thread *td = p->p_thread; 638 struct filedesc *fdp = p->p_fd; 639 int msk, i, fd; 640 fd_mask bits; 641 struct file *fp; 642 int n = 0; 643 /* Note: backend also returns POLLHUP/POLLERR if appropriate. */ 644 static int flag[3] = { POLLRDNORM, POLLWRNORM, POLLRDBAND }; 645 646 for (msk = 0; msk < 3; msk++) { 647 if (ibits[msk] == NULL) 648 continue; 649 for (i = 0; i < nfd; i += NFDBITS) { 650 bits = ibits[msk][i/NFDBITS]; 651 /* ffs(int mask) not portable, fd_mask is long */ 652 for (fd = i; bits && fd < nfd; fd++, bits >>= 1) { 653 if (!(bits & 1)) 654 continue; 655 fp = fdp->fd_ofiles[fd]; 656 if (fp == NULL) 657 return (EBADF); 658 if (fo_poll(fp, flag[msk], fp->f_cred, td)) { 659 obits[msk][(fd)/NFDBITS] |= 660 ((fd_mask)1 << ((fd) % NFDBITS)); 661 n++; 662 } 663 } 664 } 665 } 666 *res = n; 667 return (0); 668 } 669 670 /* 671 * Poll system call. 672 */ 673 int 674 poll(struct poll_args *uap) 675 { 676 caddr_t bits; 677 char smallbits[32 * sizeof(struct pollfd)]; 678 struct timeval atv, rtv, ttv; 679 int s, ncoll, error = 0, timo; 680 u_int nfds; 681 size_t ni; 682 struct proc *p = curproc; 683 684 nfds = SCARG(uap, nfds); 685 /* 686 * This is kinda bogus. We have fd limits, but that is not 687 * really related to the size of the pollfd array. Make sure 688 * we let the process use at least FD_SETSIZE entries and at 689 * least enough for the current limits. We want to be reasonably 690 * safe, but not overly restrictive. 691 */ 692 if (nfds > p->p_rlimit[RLIMIT_NOFILE].rlim_cur && nfds > FD_SETSIZE) 693 return (EINVAL); 694 ni = nfds * sizeof(struct pollfd); 695 if (ni > sizeof(smallbits)) 696 bits = malloc(ni, M_TEMP, M_WAITOK); 697 else 698 bits = smallbits; 699 error = copyin(SCARG(uap, fds), bits, ni); 700 if (error) 701 goto done; 702 if (SCARG(uap, timeout) != INFTIM) { 703 atv.tv_sec = SCARG(uap, timeout) / 1000; 704 atv.tv_usec = (SCARG(uap, timeout) % 1000) * 1000; 705 if (itimerfix(&atv)) { 706 error = EINVAL; 707 goto done; 708 } 709 getmicrouptime(&rtv); 710 timevaladd(&atv, &rtv); 711 } else { 712 atv.tv_sec = 0; 713 atv.tv_usec = 0; 714 } 715 timo = 0; 716 retry: 717 ncoll = nselcoll; 718 p->p_flag |= P_SELECT; 719 error = pollscan(p, (struct pollfd *)bits, nfds, &uap->sysmsg_result); 720 if (error || uap->sysmsg_result) 721 goto done; 722 if (atv.tv_sec || atv.tv_usec) { 723 getmicrouptime(&rtv); 724 if (timevalcmp(&rtv, &atv, >=)) 725 goto done; 726 ttv = atv; 727 timevalsub(&ttv, &rtv); 728 timo = ttv.tv_sec > 24 * 60 * 60 ? 729 24 * 60 * 60 * hz : tvtohz_high(&ttv); 730 } 731 s = splhigh(); 732 if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) { 733 splx(s); 734 goto retry; 735 } 736 p->p_flag &= ~P_SELECT; 737 error = tsleep((caddr_t)&selwait, PCATCH, "poll", timo); 738 splx(s); 739 if (error == 0) 740 goto retry; 741 done: 742 p->p_flag &= ~P_SELECT; 743 /* poll is not restarted after signals... */ 744 if (error == ERESTART) 745 error = EINTR; 746 if (error == EWOULDBLOCK) 747 error = 0; 748 if (error == 0) { 749 error = copyout(bits, SCARG(uap, fds), ni); 750 if (error) 751 goto out; 752 } 753 out: 754 if (ni > sizeof(smallbits)) 755 free(bits, M_TEMP); 756 return (error); 757 } 758 759 static int 760 pollscan(struct proc *p, struct pollfd *fds, u_int nfd, int *res) 761 { 762 struct thread *td = p->p_thread; 763 struct filedesc *fdp = p->p_fd; 764 int i; 765 struct file *fp; 766 int n = 0; 767 768 for (i = 0; i < nfd; i++, fds++) { 769 if (fds->fd >= fdp->fd_nfiles) { 770 fds->revents = POLLNVAL; 771 n++; 772 } else if (fds->fd < 0) { 773 fds->revents = 0; 774 } else { 775 fp = fdp->fd_ofiles[fds->fd]; 776 if (fp == NULL) { 777 fds->revents = POLLNVAL; 778 n++; 779 } else { 780 /* 781 * Note: backend also returns POLLHUP and 782 * POLLERR if appropriate. 783 */ 784 fds->revents = fo_poll(fp, fds->events, 785 fp->f_cred, td); 786 if (fds->revents != 0) 787 n++; 788 } 789 } 790 } 791 *res = n; 792 return (0); 793 } 794 795 /* 796 * OpenBSD poll system call. 797 * XXX this isn't quite a true representation.. OpenBSD uses select ops. 798 */ 799 int 800 openbsd_poll(struct openbsd_poll_args *uap) 801 { 802 return (poll((struct poll_args *)uap)); 803 } 804 805 /*ARGSUSED*/ 806 int 807 seltrue(dev_t dev, int events, struct thread *td) 808 { 809 return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)); 810 } 811 812 /* 813 * Record a select request. A global wait must be used since a process/thread 814 * might go away after recording its request. 815 */ 816 void 817 selrecord(struct thread *selector, struct selinfo *sip) 818 { 819 struct proc *p; 820 pid_t mypid; 821 822 if ((p = selector->td_proc) == NULL) 823 panic("selrecord: thread needs a process"); 824 825 mypid = p->p_pid; 826 if (sip->si_pid == mypid) 827 return; 828 if (sip->si_pid && (p = pfind(sip->si_pid)) && 829 p->p_wchan == (caddr_t)&selwait) { 830 sip->si_flags |= SI_COLL; 831 } else { 832 sip->si_pid = mypid; 833 } 834 } 835 836 /* 837 * Do a wakeup when a selectable event occurs. 838 */ 839 void 840 selwakeup(struct selinfo *sip) 841 { 842 struct proc *p; 843 int s; 844 845 if (sip->si_pid == 0) 846 return; 847 if (sip->si_flags & SI_COLL) { 848 nselcoll++; 849 sip->si_flags &= ~SI_COLL; 850 wakeup((caddr_t)&selwait); /* YYY fixable */ 851 } 852 p = pfind(sip->si_pid); 853 sip->si_pid = 0; 854 if (p != NULL) { 855 s = splhigh(); 856 if (p->p_wchan == (caddr_t)&selwait) { 857 if (p->p_stat == SSLEEP) 858 setrunnable(p); 859 else 860 unsleep(p->p_thread); 861 } else if (p->p_flag & P_SELECT) 862 p->p_flag &= ~P_SELECT; 863 splx(s); 864 } 865 } 866 867