1 /* 2 * Copyright (c) 1982, 1986, 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)sys_generic.c 8.5 (Berkeley) 1/21/94 39 * $FreeBSD: src/sys/kern/sys_generic.c,v 1.55.2.10 2001/03/17 10:39:32 peter Exp $ 40 * $DragonFly: src/sys/kern/sys_generic.c,v 1.22 2005/06/22 01:33:21 dillon Exp $ 41 */ 42 43 #include "opt_ktrace.h" 44 45 #include <sys/param.h> 46 #include <sys/systm.h> 47 #include <sys/sysproto.h> 48 #include <sys/filedesc.h> 49 #include <sys/filio.h> 50 #include <sys/fcntl.h> 51 #include <sys/file.h> 52 #include <sys/proc.h> 53 #include <sys/signalvar.h> 54 #include <sys/socketvar.h> 55 #include <sys/uio.h> 56 #include <sys/kernel.h> 57 #include <sys/kern_syscall.h> 58 #include <sys/malloc.h> 59 #include <sys/mapped_ioctl.h> 60 #include <sys/poll.h> 61 #include <sys/queue.h> 62 #include <sys/resourcevar.h> 63 #include <sys/sysctl.h> 64 #include <sys/sysent.h> 65 #include <sys/buf.h> 66 #ifdef KTRACE 67 #include <sys/ktrace.h> 68 #endif 69 #include <vm/vm.h> 70 #include <vm/vm_page.h> 71 #include <sys/file2.h> 72 73 #include <machine/limits.h> 74 75 static MALLOC_DEFINE(M_IOCTLOPS, "ioctlops", "ioctl data buffer"); 76 static MALLOC_DEFINE(M_IOCTLMAP, "ioctlmap", "mapped ioctl handler buffer"); 77 static MALLOC_DEFINE(M_SELECT, "select", "select() buffer"); 78 MALLOC_DEFINE(M_IOV, "iov", "large iov's"); 79 80 static int pollscan (struct proc *, struct pollfd *, u_int, int *); 81 static int selscan (struct proc *, fd_mask **, fd_mask **, 82 int, int *); 83 84 struct file* 85 holdfp(fdp, fd, flag) 86 struct filedesc* fdp; 87 int fd, flag; 88 { 89 struct file* fp; 90 91 if (((u_int)fd) >= fdp->fd_nfiles || 92 (fp = fdp->fd_files[fd].fp) == NULL || 93 (fp->f_flag & flag) == 0) { 94 return (NULL); 95 } 96 fhold(fp); 97 return (fp); 98 } 99 100 /* 101 * Read system call. 102 */ 103 int 104 read(struct read_args *uap) 105 { 106 struct thread *td = curthread; 107 struct uio auio; 108 struct iovec aiov; 109 int error; 110 111 aiov.iov_base = uap->buf; 112 aiov.iov_len = uap->nbyte; 113 auio.uio_iov = &aiov; 114 auio.uio_iovcnt = 1; 115 auio.uio_offset = -1; 116 auio.uio_resid = uap->nbyte; 117 auio.uio_rw = UIO_READ; 118 auio.uio_segflg = UIO_USERSPACE; 119 auio.uio_td = td; 120 121 error = kern_readv(uap->fd, &auio, 0, &uap->sysmsg_result); 122 123 return(error); 124 } 125 126 /* 127 * Pread system call 128 */ 129 int 130 pread(struct pread_args *uap) 131 { 132 struct thread *td = curthread; 133 struct uio auio; 134 struct iovec aiov; 135 int error; 136 137 aiov.iov_base = uap->buf; 138 aiov.iov_len = uap->nbyte; 139 auio.uio_iov = &aiov; 140 auio.uio_iovcnt = 1; 141 auio.uio_offset = uap->offset; 142 auio.uio_resid = uap->nbyte; 143 auio.uio_rw = UIO_READ; 144 auio.uio_segflg = UIO_USERSPACE; 145 auio.uio_td = td; 146 147 error = kern_readv(uap->fd, &auio, FOF_OFFSET, &uap->sysmsg_result); 148 149 return(error); 150 } 151 152 int 153 readv(struct readv_args *uap) 154 { 155 struct thread *td = curthread; 156 struct uio auio; 157 struct iovec aiov[UIO_SMALLIOV], *iov = NULL; 158 int error; 159 160 error = iovec_copyin(uap->iovp, &iov, aiov, uap->iovcnt, 161 &auio.uio_resid); 162 if (error) 163 return (error); 164 auio.uio_iov = iov; 165 auio.uio_iovcnt = uap->iovcnt; 166 auio.uio_offset = -1; 167 auio.uio_rw = UIO_READ; 168 auio.uio_segflg = UIO_USERSPACE; 169 auio.uio_td = td; 170 171 error = kern_readv(uap->fd, &auio, 0, &uap->sysmsg_result); 172 173 iovec_free(&iov, aiov); 174 return (error); 175 } 176 177 int 178 kern_readv(int fd, struct uio *auio, int flags, int *res) 179 { 180 struct thread *td = curthread; 181 struct proc *p = td->td_proc; 182 struct file *fp; 183 struct filedesc *fdp = p->p_fd; 184 int len, error; 185 #ifdef KTRACE 186 struct iovec *ktriov = NULL; 187 struct uio ktruio; 188 #endif 189 190 KKASSERT(p); 191 192 fp = holdfp(fdp, fd, FREAD); 193 if (fp == NULL) 194 return (EBADF); 195 if (flags & FOF_OFFSET && fp->f_type != DTYPE_VNODE) { 196 error = ESPIPE; 197 goto done; 198 } 199 if (auio->uio_resid < 0) { 200 error = EINVAL; 201 goto done; 202 } 203 #ifdef KTRACE 204 /* 205 * if tracing, save a copy of iovec 206 */ 207 if (KTRPOINT(td, KTR_GENIO)) { 208 int iovlen = auio->uio_iovcnt * sizeof(struct iovec); 209 210 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK); 211 bcopy((caddr_t)auio->uio_iov, (caddr_t)ktriov, iovlen); 212 ktruio = *auio; 213 } 214 #endif 215 len = auio->uio_resid; 216 error = fo_read(fp, auio, fp->f_cred, flags, td); 217 if (error) { 218 if (auio->uio_resid != len && (error == ERESTART || 219 error == EINTR || error == EWOULDBLOCK)) 220 error = 0; 221 } 222 #ifdef KTRACE 223 if (ktriov != NULL) { 224 if (error == 0) { 225 ktruio.uio_iov = ktriov; 226 ktruio.uio_resid = len - auio->uio_resid; 227 ktrgenio(p->p_tracep, fd, UIO_READ, &ktruio, error); 228 } 229 FREE(ktriov, M_TEMP); 230 } 231 #endif 232 if (error == 0) 233 *res = len - auio->uio_resid; 234 done: 235 fdrop(fp, td); 236 return (error); 237 } 238 239 /* 240 * Write system call 241 */ 242 int 243 write(struct write_args *uap) 244 { 245 struct thread *td = curthread; 246 struct uio auio; 247 struct iovec aiov; 248 int error; 249 250 aiov.iov_base = (void *)(uintptr_t)uap->buf; 251 aiov.iov_len = uap->nbyte; 252 auio.uio_iov = &aiov; 253 auio.uio_iovcnt = 1; 254 auio.uio_offset = -1; 255 auio.uio_resid = uap->nbyte; 256 auio.uio_rw = UIO_WRITE; 257 auio.uio_segflg = UIO_USERSPACE; 258 auio.uio_td = td; 259 260 error = kern_writev(uap->fd, &auio, 0, &uap->sysmsg_result); 261 262 return(error); 263 } 264 265 /* 266 * Pwrite system call 267 */ 268 int 269 pwrite(struct pwrite_args *uap) 270 { 271 struct thread *td = curthread; 272 struct uio auio; 273 struct iovec aiov; 274 int error; 275 276 aiov.iov_base = (void *)(uintptr_t)uap->buf; 277 aiov.iov_len = uap->nbyte; 278 auio.uio_iov = &aiov; 279 auio.uio_iovcnt = 1; 280 auio.uio_offset = uap->offset; 281 auio.uio_resid = uap->nbyte; 282 auio.uio_rw = UIO_WRITE; 283 auio.uio_segflg = UIO_USERSPACE; 284 auio.uio_td = td; 285 286 error = kern_writev(uap->fd, &auio, FOF_OFFSET, &uap->sysmsg_result); 287 288 return(error); 289 } 290 291 int 292 writev(struct writev_args *uap) 293 { 294 struct thread *td = curthread; 295 struct uio auio; 296 struct iovec aiov[UIO_SMALLIOV], *iov = NULL; 297 int error; 298 299 error = iovec_copyin(uap->iovp, &iov, aiov, uap->iovcnt, 300 &auio.uio_resid); 301 if (error) 302 return (error); 303 auio.uio_iov = iov; 304 auio.uio_iovcnt = uap->iovcnt; 305 auio.uio_offset = -1; 306 auio.uio_rw = UIO_WRITE; 307 auio.uio_segflg = UIO_USERSPACE; 308 auio.uio_td = td; 309 310 error = kern_writev(uap->fd, &auio, 0, &uap->sysmsg_result); 311 312 iovec_free(&iov, aiov); 313 return (error); 314 } 315 316 /* 317 * Gather write system call 318 */ 319 int 320 kern_writev(int fd, struct uio *auio, int flags, int *res) 321 { 322 struct thread *td = curthread; 323 struct proc *p = td->td_proc; 324 struct file *fp; 325 struct filedesc *fdp = p->p_fd; 326 long len, error; 327 #ifdef KTRACE 328 struct iovec *ktriov = NULL; 329 struct uio ktruio; 330 #endif 331 332 KKASSERT(p); 333 334 fp = holdfp(fdp, fd, FWRITE); 335 if (fp == NULL) 336 return (EBADF); 337 if ((flags & FOF_OFFSET) && fp->f_type != DTYPE_VNODE) { 338 error = ESPIPE; 339 goto done; 340 } 341 if (auio->uio_resid < 0) { 342 error = EINVAL; 343 goto done; 344 } 345 #ifdef KTRACE 346 /* 347 * if tracing, save a copy of iovec and uio 348 */ 349 if (KTRPOINT(td, KTR_GENIO)) { 350 int iovlen = auio->uio_iovcnt * sizeof(struct iovec); 351 352 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK); 353 bcopy((caddr_t)auio->uio_iov, (caddr_t)ktriov, iovlen); 354 ktruio = *auio; 355 } 356 #endif 357 len = auio->uio_resid; 358 if (fp->f_type == DTYPE_VNODE) 359 bwillwrite(); 360 error = fo_write(fp, auio, fp->f_cred, flags, td); 361 if (error) { 362 if (auio->uio_resid != len && (error == ERESTART || 363 error == EINTR || error == EWOULDBLOCK)) 364 error = 0; 365 if (error == EPIPE) 366 psignal(p, SIGPIPE); 367 } 368 #ifdef KTRACE 369 if (ktriov != NULL) { 370 if (error == 0) { 371 ktruio.uio_iov = ktriov; 372 ktruio.uio_resid = len - auio->uio_resid; 373 ktrgenio(p->p_tracep, fd, UIO_WRITE, &ktruio, error); 374 } 375 FREE(ktriov, M_TEMP); 376 } 377 #endif 378 if (error == 0) 379 *res = len - auio->uio_resid; 380 done: 381 fdrop(fp, td); 382 return (error); 383 } 384 385 /* 386 * Ioctl system call 387 */ 388 /* ARGSUSED */ 389 int 390 ioctl(struct ioctl_args *uap) 391 { 392 return(mapped_ioctl(uap->fd, uap->com, uap->data, NULL)); 393 } 394 395 struct ioctl_map_entry { 396 const char *subsys; 397 struct ioctl_map_range *cmd_ranges; 398 LIST_ENTRY(ioctl_map_entry) entries; 399 }; 400 401 /* 402 * The true heart of all ioctl syscall handlers (native, emulation). 403 * If map != NULL, it will be searched for a matching entry for com, 404 * and appropriate conversions/conversion functions will be utilized. 405 */ 406 int 407 mapped_ioctl(int fd, u_long com, caddr_t uspc_data, struct ioctl_map *map) 408 { 409 struct thread *td = curthread; 410 struct proc *p = td->td_proc; 411 struct file *fp; 412 struct filedesc *fdp; 413 struct ioctl_map_range *iomc = NULL; 414 int error; 415 u_int size; 416 u_long ocom = com; 417 caddr_t data, memp; 418 int tmp; 419 #define STK_PARAMS 128 420 union { 421 char stkbuf[STK_PARAMS]; 422 long align; 423 } ubuf; 424 425 KKASSERT(p); 426 fdp = p->p_fd; 427 if ((u_int)fd >= fdp->fd_nfiles || 428 (fp = fdp->fd_files[fd].fp) == NULL) 429 return(EBADF); 430 431 if ((fp->f_flag & (FREAD | FWRITE)) == 0) 432 return(EBADF); 433 434 if (map != NULL) { /* obey translation map */ 435 u_long maskcmd; 436 struct ioctl_map_entry *e; 437 438 maskcmd = com & map->mask; 439 440 LIST_FOREACH(e, &map->mapping, entries) { 441 for (iomc = e->cmd_ranges; iomc->start != 0 || 442 iomc->maptocmd != 0 || iomc->wrapfunc != NULL || 443 iomc->mapfunc != NULL; 444 iomc++) { 445 if (maskcmd >= iomc->start && 446 maskcmd <= iomc->end) 447 break; 448 } 449 450 /* Did we find a match? */ 451 if (iomc->start != 0 || iomc->maptocmd != 0 || 452 iomc->wrapfunc != NULL || iomc->mapfunc != NULL) 453 break; 454 } 455 456 if (iomc == NULL || 457 (iomc->start == 0 && iomc->maptocmd == 0 458 && iomc->wrapfunc == NULL && iomc->mapfunc == NULL)) { 459 printf("%s: 'ioctl' fd=%d, cmd=0x%lx ('%c',%d) not implemented\n", 460 map->sys, fd, maskcmd, 461 (int)((maskcmd >> 8) & 0xff), 462 (int)(maskcmd & 0xff)); 463 return(EINVAL); 464 } 465 466 /* 467 * If it's a non-range one to one mapping, maptocmd should be 468 * correct. If it's a ranged one to one mapping, we pass the 469 * original value of com, and for a range mapped to a different 470 * range, we always need a mapping function to translate the 471 * ioctl to our native ioctl. Ex. 6500-65ff <-> 9500-95ff 472 */ 473 if (iomc->start == iomc->end && iomc->maptocmd == iomc->maptoend) { 474 com = iomc->maptocmd; 475 } else if (iomc->start == iomc->maptocmd && iomc->end == iomc->maptoend) { 476 if (iomc->mapfunc != NULL) 477 com = iomc->mapfunc(iomc->start, iomc->end, 478 iomc->start, iomc->end, 479 com, com); 480 } else { 481 if (iomc->mapfunc != NULL) { 482 com = iomc->mapfunc(iomc->start, iomc->end, 483 iomc->maptocmd, iomc->maptoend, 484 com, ocom); 485 } else { 486 printf("%s: Invalid mapping for fd=%d, cmd=%#lx ('%c',%d)\n", 487 map->sys, fd, maskcmd, 488 (int)((maskcmd >> 8) & 0xff), 489 (int)(maskcmd & 0xff)); 490 return(EINVAL); 491 } 492 } 493 } 494 495 switch (com) { 496 case FIONCLEX: 497 fdp->fd_files[fd].fileflags &= ~UF_EXCLOSE; 498 return(0); 499 case FIOCLEX: 500 fdp->fd_files[fd].fileflags |= UF_EXCLOSE; 501 return(0); 502 } 503 504 /* 505 * Interpret high order word to find amount of data to be 506 * copied to/from the user's address space. 507 */ 508 size = IOCPARM_LEN(com); 509 if (size > IOCPARM_MAX) 510 return(ENOTTY); 511 512 fhold(fp); 513 514 memp = NULL; 515 if (size > sizeof (ubuf.stkbuf)) { 516 memp = malloc(size, M_IOCTLOPS, M_WAITOK); 517 data = memp; 518 } else { 519 data = ubuf.stkbuf; 520 } 521 if ((com & IOC_IN) != 0) { 522 if (size != 0) { 523 error = copyin(uspc_data, data, (u_int)size); 524 if (error) { 525 if (memp != NULL) 526 free(memp, M_IOCTLOPS); 527 fdrop(fp, td); 528 return(error); 529 } 530 } else { 531 *(caddr_t *)data = uspc_data; 532 } 533 } else if ((com & IOC_OUT) != 0 && size) { 534 /* 535 * Zero the buffer so the user always 536 * gets back something deterministic. 537 */ 538 bzero(data, size); 539 } else if ((com & IOC_VOID) != 0) { 540 *(caddr_t *)data = uspc_data; 541 } 542 543 switch (com) { 544 545 case FIONBIO: 546 if ((tmp = *(int *)data)) 547 fp->f_flag |= FNONBLOCK; 548 else 549 fp->f_flag &= ~FNONBLOCK; 550 error = fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, td); 551 break; 552 553 case FIOASYNC: 554 if ((tmp = *(int *)data)) 555 fp->f_flag |= FASYNC; 556 else 557 fp->f_flag &= ~FASYNC; 558 error = fo_ioctl(fp, FIOASYNC, (caddr_t)&tmp, td); 559 break; 560 561 default: 562 /* 563 * If there is a override function, 564 * call it instead of directly routing the call 565 */ 566 if (map != NULL && iomc->wrapfunc != NULL) 567 error = iomc->wrapfunc(fp, com, ocom, data, td); 568 else 569 error = fo_ioctl(fp, com, data, td); 570 /* 571 * Copy any data to user, size was 572 * already set and checked above. 573 */ 574 if (error == 0 && (com & IOC_OUT) != 0 && size != 0) 575 error = copyout(data, uspc_data, (u_int)size); 576 break; 577 } 578 if (memp != NULL) 579 free(memp, M_IOCTLOPS); 580 fdrop(fp, td); 581 return(error); 582 } 583 584 int 585 mapped_ioctl_register_handler(struct ioctl_map_handler *he) 586 { 587 struct ioctl_map_entry *ne; 588 589 KKASSERT(he != NULL && he->map != NULL && he->cmd_ranges != NULL && 590 he->subsys != NULL && *he->subsys != '\0'); 591 592 ne = malloc(sizeof(struct ioctl_map_entry), M_IOCTLMAP, M_WAITOK); 593 594 ne->subsys = he->subsys; 595 ne->cmd_ranges = he->cmd_ranges; 596 597 LIST_INSERT_HEAD(&he->map->mapping, ne, entries); 598 599 return(0); 600 } 601 602 int 603 mapped_ioctl_unregister_handler(struct ioctl_map_handler *he) 604 { 605 struct ioctl_map_entry *ne; 606 607 KKASSERT(he != NULL && he->map != NULL && he->cmd_ranges != NULL); 608 609 LIST_FOREACH(ne, &he->map->mapping, entries) { 610 if (ne->cmd_ranges != he->cmd_ranges) 611 continue; 612 LIST_REMOVE(ne, entries); 613 free(ne, M_IOCTLMAP); 614 return(0); 615 } 616 return(EINVAL); 617 } 618 619 static int nselcoll; /* Select collisions since boot */ 620 int selwait; 621 SYSCTL_INT(_kern, OID_AUTO, nselcoll, CTLFLAG_RD, &nselcoll, 0, ""); 622 623 /* 624 * Select system call. 625 */ 626 int 627 select(struct select_args *uap) 628 { 629 struct proc *p = curproc; 630 631 /* 632 * The magic 2048 here is chosen to be just enough for FD_SETSIZE 633 * infds with the new FD_SETSIZE of 1024, and more than enough for 634 * FD_SETSIZE infds, outfds and exceptfds with the old FD_SETSIZE 635 * of 256. 636 */ 637 fd_mask s_selbits[howmany(2048, NFDBITS)]; 638 fd_mask *ibits[3], *obits[3], *selbits, *sbp; 639 struct timeval atv, rtv, ttv; 640 int ncoll, error, timo; 641 u_int nbufbytes, ncpbytes, nfdbits; 642 643 if (uap->nd < 0) 644 return (EINVAL); 645 if (uap->nd > p->p_fd->fd_nfiles) 646 uap->nd = p->p_fd->fd_nfiles; /* forgiving; slightly wrong */ 647 648 /* 649 * Allocate just enough bits for the non-null fd_sets. Use the 650 * preallocated auto buffer if possible. 651 */ 652 nfdbits = roundup(uap->nd, NFDBITS); 653 ncpbytes = nfdbits / NBBY; 654 nbufbytes = 0; 655 if (uap->in != NULL) 656 nbufbytes += 2 * ncpbytes; 657 if (uap->ou != NULL) 658 nbufbytes += 2 * ncpbytes; 659 if (uap->ex != NULL) 660 nbufbytes += 2 * ncpbytes; 661 if (nbufbytes <= sizeof s_selbits) 662 selbits = &s_selbits[0]; 663 else 664 selbits = malloc(nbufbytes, M_SELECT, M_WAITOK); 665 666 /* 667 * Assign pointers into the bit buffers and fetch the input bits. 668 * Put the output buffers together so that they can be bzeroed 669 * together. 670 */ 671 sbp = selbits; 672 #define getbits(name, x) \ 673 do { \ 674 if (uap->name == NULL) \ 675 ibits[x] = NULL; \ 676 else { \ 677 ibits[x] = sbp + nbufbytes / 2 / sizeof *sbp; \ 678 obits[x] = sbp; \ 679 sbp += ncpbytes / sizeof *sbp; \ 680 error = copyin(uap->name, ibits[x], ncpbytes); \ 681 if (error != 0) \ 682 goto done; \ 683 } \ 684 } while (0) 685 getbits(in, 0); 686 getbits(ou, 1); 687 getbits(ex, 2); 688 #undef getbits 689 if (nbufbytes != 0) 690 bzero(selbits, nbufbytes / 2); 691 692 if (uap->tv) { 693 error = copyin((caddr_t)uap->tv, (caddr_t)&atv, 694 sizeof (atv)); 695 if (error) 696 goto done; 697 if (itimerfix(&atv)) { 698 error = EINVAL; 699 goto done; 700 } 701 getmicrouptime(&rtv); 702 timevaladd(&atv, &rtv); 703 } else { 704 atv.tv_sec = 0; 705 atv.tv_usec = 0; 706 } 707 timo = 0; 708 retry: 709 ncoll = nselcoll; 710 p->p_flag |= P_SELECT; 711 error = selscan(p, ibits, obits, uap->nd, &uap->sysmsg_result); 712 if (error || uap->sysmsg_result) 713 goto done; 714 if (atv.tv_sec || atv.tv_usec) { 715 getmicrouptime(&rtv); 716 if (timevalcmp(&rtv, &atv, >=)) 717 goto done; 718 ttv = atv; 719 timevalsub(&ttv, &rtv); 720 timo = ttv.tv_sec > 24 * 60 * 60 ? 721 24 * 60 * 60 * hz : tvtohz_high(&ttv); 722 } 723 crit_enter(); 724 if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) { 725 crit_exit(); 726 goto retry; 727 } 728 p->p_flag &= ~P_SELECT; 729 730 error = tsleep((caddr_t)&selwait, PCATCH, "select", timo); 731 732 crit_exit(); 733 if (error == 0) 734 goto retry; 735 done: 736 p->p_flag &= ~P_SELECT; 737 /* select is not restarted after signals... */ 738 if (error == ERESTART) 739 error = EINTR; 740 if (error == EWOULDBLOCK) 741 error = 0; 742 #define putbits(name, x) \ 743 if (uap->name && (error2 = copyout(obits[x], uap->name, ncpbytes))) \ 744 error = error2; 745 if (error == 0) { 746 int error2; 747 748 putbits(in, 0); 749 putbits(ou, 1); 750 putbits(ex, 2); 751 #undef putbits 752 } 753 if (selbits != &s_selbits[0]) 754 free(selbits, M_SELECT); 755 return (error); 756 } 757 758 static int 759 selscan(struct proc *p, fd_mask **ibits, fd_mask **obits, int nfd, int *res) 760 { 761 struct thread *td = p->p_thread; 762 struct filedesc *fdp = p->p_fd; 763 int msk, i, fd; 764 fd_mask bits; 765 struct file *fp; 766 int n = 0; 767 /* Note: backend also returns POLLHUP/POLLERR if appropriate. */ 768 static int flag[3] = { POLLRDNORM, POLLWRNORM, POLLRDBAND }; 769 770 for (msk = 0; msk < 3; msk++) { 771 if (ibits[msk] == NULL) 772 continue; 773 for (i = 0; i < nfd; i += NFDBITS) { 774 bits = ibits[msk][i/NFDBITS]; 775 /* ffs(int mask) not portable, fd_mask is long */ 776 for (fd = i; bits && fd < nfd; fd++, bits >>= 1) { 777 if (!(bits & 1)) 778 continue; 779 fp = fdp->fd_files[fd].fp; 780 if (fp == NULL) 781 return (EBADF); 782 if (fo_poll(fp, flag[msk], fp->f_cred, td)) { 783 obits[msk][(fd)/NFDBITS] |= 784 ((fd_mask)1 << ((fd) % NFDBITS)); 785 n++; 786 } 787 } 788 } 789 } 790 *res = n; 791 return (0); 792 } 793 794 /* 795 * Poll system call. 796 */ 797 int 798 poll(struct poll_args *uap) 799 { 800 struct pollfd *bits; 801 struct pollfd smallbits[32]; 802 struct timeval atv, rtv, ttv; 803 int ncoll, error = 0, timo; 804 u_int nfds; 805 size_t ni; 806 struct proc *p = curproc; 807 808 nfds = uap->nfds; 809 /* 810 * This is kinda bogus. We have fd limits, but that is not 811 * really related to the size of the pollfd array. Make sure 812 * we let the process use at least FD_SETSIZE entries and at 813 * least enough for the current limits. We want to be reasonably 814 * safe, but not overly restrictive. 815 */ 816 if (nfds > p->p_rlimit[RLIMIT_NOFILE].rlim_cur && nfds > FD_SETSIZE) 817 return (EINVAL); 818 ni = nfds * sizeof(struct pollfd); 819 if (ni > sizeof(smallbits)) 820 bits = malloc(ni, M_TEMP, M_WAITOK); 821 else 822 bits = smallbits; 823 error = copyin(uap->fds, bits, ni); 824 if (error) 825 goto done; 826 if (uap->timeout != INFTIM) { 827 atv.tv_sec = uap->timeout / 1000; 828 atv.tv_usec = (uap->timeout % 1000) * 1000; 829 if (itimerfix(&atv)) { 830 error = EINVAL; 831 goto done; 832 } 833 getmicrouptime(&rtv); 834 timevaladd(&atv, &rtv); 835 } else { 836 atv.tv_sec = 0; 837 atv.tv_usec = 0; 838 } 839 timo = 0; 840 retry: 841 ncoll = nselcoll; 842 p->p_flag |= P_SELECT; 843 error = pollscan(p, bits, nfds, &uap->sysmsg_result); 844 if (error || uap->sysmsg_result) 845 goto done; 846 if (atv.tv_sec || atv.tv_usec) { 847 getmicrouptime(&rtv); 848 if (timevalcmp(&rtv, &atv, >=)) 849 goto done; 850 ttv = atv; 851 timevalsub(&ttv, &rtv); 852 timo = ttv.tv_sec > 24 * 60 * 60 ? 853 24 * 60 * 60 * hz : tvtohz_high(&ttv); 854 } 855 crit_enter(); 856 if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) { 857 crit_exit(); 858 goto retry; 859 } 860 p->p_flag &= ~P_SELECT; 861 error = tsleep((caddr_t)&selwait, PCATCH, "poll", timo); 862 crit_exit(); 863 if (error == 0) 864 goto retry; 865 done: 866 p->p_flag &= ~P_SELECT; 867 /* poll is not restarted after signals... */ 868 if (error == ERESTART) 869 error = EINTR; 870 if (error == EWOULDBLOCK) 871 error = 0; 872 if (error == 0) { 873 error = copyout(bits, uap->fds, ni); 874 if (error) 875 goto out; 876 } 877 out: 878 if (ni > sizeof(smallbits)) 879 free(bits, M_TEMP); 880 return (error); 881 } 882 883 static int 884 pollscan(struct proc *p, struct pollfd *fds, u_int nfd, int *res) 885 { 886 struct thread *td = p->p_thread; 887 struct filedesc *fdp = p->p_fd; 888 int i; 889 struct file *fp; 890 int n = 0; 891 892 for (i = 0; i < nfd; i++, fds++) { 893 if (fds->fd >= fdp->fd_nfiles) { 894 fds->revents = POLLNVAL; 895 n++; 896 } else if (fds->fd < 0) { 897 fds->revents = 0; 898 } else { 899 fp = fdp->fd_files[fds->fd].fp; 900 if (fp == NULL) { 901 fds->revents = POLLNVAL; 902 n++; 903 } else { 904 /* 905 * Note: backend also returns POLLHUP and 906 * POLLERR if appropriate. 907 */ 908 fds->revents = fo_poll(fp, fds->events, 909 fp->f_cred, td); 910 if (fds->revents != 0) 911 n++; 912 } 913 } 914 } 915 *res = n; 916 return (0); 917 } 918 919 /* 920 * OpenBSD poll system call. 921 * XXX this isn't quite a true representation.. OpenBSD uses select ops. 922 */ 923 int 924 openbsd_poll(struct openbsd_poll_args *uap) 925 { 926 return (poll((struct poll_args *)uap)); 927 } 928 929 /*ARGSUSED*/ 930 int 931 seltrue(dev_t dev, int events, struct thread *td) 932 { 933 return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)); 934 } 935 936 /* 937 * Record a select request. A global wait must be used since a process/thread 938 * might go away after recording its request. 939 */ 940 void 941 selrecord(struct thread *selector, struct selinfo *sip) 942 { 943 struct proc *p; 944 pid_t mypid; 945 946 if ((p = selector->td_proc) == NULL) 947 panic("selrecord: thread needs a process"); 948 949 mypid = p->p_pid; 950 if (sip->si_pid == mypid) 951 return; 952 if (sip->si_pid && (p = pfind(sip->si_pid)) && 953 p->p_wchan == (caddr_t)&selwait) { 954 sip->si_flags |= SI_COLL; 955 } else { 956 sip->si_pid = mypid; 957 } 958 } 959 960 /* 961 * Do a wakeup when a selectable event occurs. 962 */ 963 void 964 selwakeup(struct selinfo *sip) 965 { 966 struct proc *p; 967 968 if (sip->si_pid == 0) 969 return; 970 if (sip->si_flags & SI_COLL) { 971 nselcoll++; 972 sip->si_flags &= ~SI_COLL; 973 wakeup((caddr_t)&selwait); /* YYY fixable */ 974 } 975 p = pfind(sip->si_pid); 976 sip->si_pid = 0; 977 if (p != NULL) { 978 crit_enter(); 979 if (p->p_wchan == (caddr_t)&selwait) { 980 if (p->p_stat == SSLEEP) 981 setrunnable(p); 982 else 983 unsleep(p->p_thread); 984 } else if (p->p_flag & P_SELECT) 985 p->p_flag &= ~P_SELECT; 986 crit_exit(); 987 } 988 } 989 990