1 /* 2 * Copyright (c) 1982, 1986, 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)sys_generic.c 8.5 (Berkeley) 1/21/94 39 * $FreeBSD: src/sys/kern/sys_generic.c,v 1.55.2.10 2001/03/17 10:39:32 peter Exp $ 40 */ 41 42 #include "opt_ktrace.h" 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/sysproto.h> 47 #include <sys/event.h> 48 #include <sys/filedesc.h> 49 #include <sys/filio.h> 50 #include <sys/fcntl.h> 51 #include <sys/file.h> 52 #include <sys/proc.h> 53 #include <sys/signalvar.h> 54 #include <sys/socketvar.h> 55 #include <sys/uio.h> 56 #include <sys/kernel.h> 57 #include <sys/kern_syscall.h> 58 #include <sys/malloc.h> 59 #include <sys/mapped_ioctl.h> 60 #include <sys/poll.h> 61 #include <sys/queue.h> 62 #include <sys/resourcevar.h> 63 #include <sys/socketops.h> 64 #include <sys/sysctl.h> 65 #include <sys/sysent.h> 66 #include <sys/buf.h> 67 #ifdef KTRACE 68 #include <sys/ktrace.h> 69 #endif 70 #include <vm/vm.h> 71 #include <vm/vm_page.h> 72 73 #include <sys/file2.h> 74 #include <sys/mplock2.h> 75 #include <sys/spinlock2.h> 76 77 #include <machine/limits.h> 78 79 static MALLOC_DEFINE(M_IOCTLOPS, "ioctlops", "ioctl data buffer"); 80 static MALLOC_DEFINE(M_IOCTLMAP, "ioctlmap", "mapped ioctl handler buffer"); 81 static MALLOC_DEFINE(M_SELECT, "select", "select() buffer"); 82 MALLOC_DEFINE(M_IOV, "iov", "large iov's"); 83 84 typedef struct kfd_set { 85 fd_mask fds_bits[2]; 86 } kfd_set; 87 88 enum select_copyin_states { 89 COPYIN_READ, COPYIN_WRITE, COPYIN_EXCEPT, COPYIN_DONE }; 90 91 struct select_kevent_copyin_args { 92 kfd_set *read_set; 93 kfd_set *write_set; 94 kfd_set *except_set; 95 int active_set; /* One of select_copyin_states */ 96 struct lwp *lwp; /* Pointer to our lwp */ 97 int num_fds; /* Number of file descriptors (syscall arg) */ 98 int proc_fds; /* Processed fd's (wraps) */ 99 int error; /* Returned to userland */ 100 }; 101 102 struct poll_kevent_copyin_args { 103 struct lwp *lwp; 104 struct pollfd *fds; 105 int nfds; 106 int pfds; 107 int error; 108 }; 109 110 static struct lwkt_token mioctl_token = LWKT_TOKEN_INITIALIZER(mioctl_token); 111 112 static int doselect(int nd, fd_set *in, fd_set *ou, fd_set *ex, 113 struct timespec *ts, int *res); 114 static int dopoll(int nfds, struct pollfd *fds, struct timespec *ts, 115 int *res); 116 static int dofileread(int, struct file *, struct uio *, int, size_t *); 117 static int dofilewrite(int, struct file *, struct uio *, int, size_t *); 118 119 /* 120 * Read system call. 121 * 122 * MPSAFE 123 */ 124 int 125 sys_read(struct read_args *uap) 126 { 127 struct thread *td = curthread; 128 struct uio auio; 129 struct iovec aiov; 130 int error; 131 132 if ((ssize_t)uap->nbyte < 0) 133 error = EINVAL; 134 135 aiov.iov_base = uap->buf; 136 aiov.iov_len = uap->nbyte; 137 auio.uio_iov = &aiov; 138 auio.uio_iovcnt = 1; 139 auio.uio_offset = -1; 140 auio.uio_resid = uap->nbyte; 141 auio.uio_rw = UIO_READ; 142 auio.uio_segflg = UIO_USERSPACE; 143 auio.uio_td = td; 144 145 error = kern_preadv(uap->fd, &auio, 0, &uap->sysmsg_szresult); 146 return(error); 147 } 148 149 /* 150 * Positioned (Pread) read system call 151 * 152 * MPSAFE 153 */ 154 int 155 sys_extpread(struct extpread_args *uap) 156 { 157 struct thread *td = curthread; 158 struct uio auio; 159 struct iovec aiov; 160 int error; 161 int flags; 162 163 if ((ssize_t)uap->nbyte < 0) 164 return(EINVAL); 165 166 aiov.iov_base = uap->buf; 167 aiov.iov_len = uap->nbyte; 168 auio.uio_iov = &aiov; 169 auio.uio_iovcnt = 1; 170 auio.uio_offset = uap->offset; 171 auio.uio_resid = uap->nbyte; 172 auio.uio_rw = UIO_READ; 173 auio.uio_segflg = UIO_USERSPACE; 174 auio.uio_td = td; 175 176 flags = uap->flags & O_FMASK; 177 if (uap->offset != (off_t)-1) 178 flags |= O_FOFFSET; 179 180 error = kern_preadv(uap->fd, &auio, flags, &uap->sysmsg_szresult); 181 return(error); 182 } 183 184 /* 185 * Scatter read system call. 186 * 187 * MPSAFE 188 */ 189 int 190 sys_readv(struct readv_args *uap) 191 { 192 struct thread *td = curthread; 193 struct uio auio; 194 struct iovec aiov[UIO_SMALLIOV], *iov = NULL; 195 int error; 196 197 error = iovec_copyin(uap->iovp, &iov, aiov, uap->iovcnt, 198 &auio.uio_resid); 199 if (error) 200 return (error); 201 auio.uio_iov = iov; 202 auio.uio_iovcnt = uap->iovcnt; 203 auio.uio_offset = -1; 204 auio.uio_rw = UIO_READ; 205 auio.uio_segflg = UIO_USERSPACE; 206 auio.uio_td = td; 207 208 error = kern_preadv(uap->fd, &auio, 0, &uap->sysmsg_szresult); 209 210 iovec_free(&iov, aiov); 211 return (error); 212 } 213 214 215 /* 216 * Scatter positioned read system call. 217 * 218 * MPSAFE 219 */ 220 int 221 sys_extpreadv(struct extpreadv_args *uap) 222 { 223 struct thread *td = curthread; 224 struct uio auio; 225 struct iovec aiov[UIO_SMALLIOV], *iov = NULL; 226 int error; 227 int flags; 228 229 error = iovec_copyin(uap->iovp, &iov, aiov, uap->iovcnt, 230 &auio.uio_resid); 231 if (error) 232 return (error); 233 auio.uio_iov = iov; 234 auio.uio_iovcnt = uap->iovcnt; 235 auio.uio_offset = uap->offset; 236 auio.uio_rw = UIO_READ; 237 auio.uio_segflg = UIO_USERSPACE; 238 auio.uio_td = td; 239 240 flags = uap->flags & O_FMASK; 241 if (uap->offset != (off_t)-1) 242 flags |= O_FOFFSET; 243 244 error = kern_preadv(uap->fd, &auio, flags, &uap->sysmsg_szresult); 245 246 iovec_free(&iov, aiov); 247 return(error); 248 } 249 250 /* 251 * MPSAFE 252 */ 253 int 254 kern_preadv(int fd, struct uio *auio, int flags, size_t *res) 255 { 256 struct thread *td = curthread; 257 struct proc *p = td->td_proc; 258 struct file *fp; 259 int error; 260 261 KKASSERT(p); 262 263 fp = holdfp(p->p_fd, fd, FREAD); 264 if (fp == NULL) 265 return (EBADF); 266 if (flags & O_FOFFSET && fp->f_type != DTYPE_VNODE) { 267 error = ESPIPE; 268 } else { 269 error = dofileread(fd, fp, auio, flags, res); 270 } 271 fdrop(fp); 272 return(error); 273 } 274 275 /* 276 * Common code for readv and preadv that reads data in 277 * from a file using the passed in uio, offset, and flags. 278 * 279 * MPALMOSTSAFE - ktrace needs help 280 */ 281 static int 282 dofileread(int fd, struct file *fp, struct uio *auio, int flags, size_t *res) 283 { 284 int error; 285 size_t len; 286 #ifdef KTRACE 287 struct thread *td = curthread; 288 struct iovec *ktriov = NULL; 289 struct uio ktruio; 290 #endif 291 292 #ifdef KTRACE 293 /* 294 * if tracing, save a copy of iovec 295 */ 296 if (KTRPOINT(td, KTR_GENIO)) { 297 int iovlen = auio->uio_iovcnt * sizeof(struct iovec); 298 299 ktriov = kmalloc(iovlen, M_TEMP, M_WAITOK); 300 bcopy((caddr_t)auio->uio_iov, (caddr_t)ktriov, iovlen); 301 ktruio = *auio; 302 } 303 #endif 304 len = auio->uio_resid; 305 error = fo_read(fp, auio, fp->f_cred, flags); 306 if (error) { 307 if (auio->uio_resid != len && (error == ERESTART || 308 error == EINTR || error == EWOULDBLOCK)) 309 error = 0; 310 } 311 #ifdef KTRACE 312 if (ktriov != NULL) { 313 if (error == 0) { 314 ktruio.uio_iov = ktriov; 315 ktruio.uio_resid = len - auio->uio_resid; 316 get_mplock(); 317 ktrgenio(td->td_lwp, fd, UIO_READ, &ktruio, error); 318 rel_mplock(); 319 } 320 kfree(ktriov, M_TEMP); 321 } 322 #endif 323 if (error == 0) 324 *res = len - auio->uio_resid; 325 326 return(error); 327 } 328 329 /* 330 * Write system call 331 * 332 * MPSAFE 333 */ 334 int 335 sys_write(struct write_args *uap) 336 { 337 struct thread *td = curthread; 338 struct uio auio; 339 struct iovec aiov; 340 int error; 341 342 if ((ssize_t)uap->nbyte < 0) 343 error = EINVAL; 344 345 aiov.iov_base = (void *)(uintptr_t)uap->buf; 346 aiov.iov_len = uap->nbyte; 347 auio.uio_iov = &aiov; 348 auio.uio_iovcnt = 1; 349 auio.uio_offset = -1; 350 auio.uio_resid = uap->nbyte; 351 auio.uio_rw = UIO_WRITE; 352 auio.uio_segflg = UIO_USERSPACE; 353 auio.uio_td = td; 354 355 error = kern_pwritev(uap->fd, &auio, 0, &uap->sysmsg_szresult); 356 357 return(error); 358 } 359 360 /* 361 * Pwrite system call 362 * 363 * MPSAFE 364 */ 365 int 366 sys_extpwrite(struct extpwrite_args *uap) 367 { 368 struct thread *td = curthread; 369 struct uio auio; 370 struct iovec aiov; 371 int error; 372 int flags; 373 374 if ((ssize_t)uap->nbyte < 0) 375 error = EINVAL; 376 377 aiov.iov_base = (void *)(uintptr_t)uap->buf; 378 aiov.iov_len = uap->nbyte; 379 auio.uio_iov = &aiov; 380 auio.uio_iovcnt = 1; 381 auio.uio_offset = uap->offset; 382 auio.uio_resid = uap->nbyte; 383 auio.uio_rw = UIO_WRITE; 384 auio.uio_segflg = UIO_USERSPACE; 385 auio.uio_td = td; 386 387 flags = uap->flags & O_FMASK; 388 if (uap->offset != (off_t)-1) 389 flags |= O_FOFFSET; 390 error = kern_pwritev(uap->fd, &auio, flags, &uap->sysmsg_szresult); 391 return(error); 392 } 393 394 /* 395 * MPSAFE 396 */ 397 int 398 sys_writev(struct writev_args *uap) 399 { 400 struct thread *td = curthread; 401 struct uio auio; 402 struct iovec aiov[UIO_SMALLIOV], *iov = NULL; 403 int error; 404 405 error = iovec_copyin(uap->iovp, &iov, aiov, uap->iovcnt, 406 &auio.uio_resid); 407 if (error) 408 return (error); 409 auio.uio_iov = iov; 410 auio.uio_iovcnt = uap->iovcnt; 411 auio.uio_offset = -1; 412 auio.uio_rw = UIO_WRITE; 413 auio.uio_segflg = UIO_USERSPACE; 414 auio.uio_td = td; 415 416 error = kern_pwritev(uap->fd, &auio, 0, &uap->sysmsg_szresult); 417 418 iovec_free(&iov, aiov); 419 return (error); 420 } 421 422 423 /* 424 * Gather positioned write system call 425 * 426 * MPSAFE 427 */ 428 int 429 sys_extpwritev(struct extpwritev_args *uap) 430 { 431 struct thread *td = curthread; 432 struct uio auio; 433 struct iovec aiov[UIO_SMALLIOV], *iov = NULL; 434 int error; 435 int flags; 436 437 error = iovec_copyin(uap->iovp, &iov, aiov, uap->iovcnt, 438 &auio.uio_resid); 439 if (error) 440 return (error); 441 auio.uio_iov = iov; 442 auio.uio_iovcnt = uap->iovcnt; 443 auio.uio_offset = uap->offset; 444 auio.uio_rw = UIO_WRITE; 445 auio.uio_segflg = UIO_USERSPACE; 446 auio.uio_td = td; 447 448 flags = uap->flags & O_FMASK; 449 if (uap->offset != (off_t)-1) 450 flags |= O_FOFFSET; 451 452 error = kern_pwritev(uap->fd, &auio, flags, &uap->sysmsg_szresult); 453 454 iovec_free(&iov, aiov); 455 return(error); 456 } 457 458 /* 459 * MPSAFE 460 */ 461 int 462 kern_pwritev(int fd, struct uio *auio, int flags, size_t *res) 463 { 464 struct thread *td = curthread; 465 struct proc *p = td->td_proc; 466 struct file *fp; 467 int error; 468 469 KKASSERT(p); 470 471 fp = holdfp(p->p_fd, fd, FWRITE); 472 if (fp == NULL) 473 return (EBADF); 474 else if ((flags & O_FOFFSET) && fp->f_type != DTYPE_VNODE) { 475 error = ESPIPE; 476 } else { 477 error = dofilewrite(fd, fp, auio, flags, res); 478 } 479 480 fdrop(fp); 481 return (error); 482 } 483 484 /* 485 * Common code for writev and pwritev that writes data to 486 * a file using the passed in uio, offset, and flags. 487 * 488 * MPALMOSTSAFE - ktrace needs help 489 */ 490 static int 491 dofilewrite(int fd, struct file *fp, struct uio *auio, int flags, size_t *res) 492 { 493 struct thread *td = curthread; 494 struct lwp *lp = td->td_lwp; 495 int error; 496 size_t len; 497 #ifdef KTRACE 498 struct iovec *ktriov = NULL; 499 struct uio ktruio; 500 #endif 501 502 #ifdef KTRACE 503 /* 504 * if tracing, save a copy of iovec and uio 505 */ 506 if (KTRPOINT(td, KTR_GENIO)) { 507 int iovlen = auio->uio_iovcnt * sizeof(struct iovec); 508 509 ktriov = kmalloc(iovlen, M_TEMP, M_WAITOK); 510 bcopy((caddr_t)auio->uio_iov, (caddr_t)ktriov, iovlen); 511 ktruio = *auio; 512 } 513 #endif 514 len = auio->uio_resid; 515 error = fo_write(fp, auio, fp->f_cred, flags); 516 if (error) { 517 if (auio->uio_resid != len && (error == ERESTART || 518 error == EINTR || error == EWOULDBLOCK)) 519 error = 0; 520 /* Socket layer is responsible for issuing SIGPIPE. */ 521 if (error == EPIPE && fp->f_type != DTYPE_SOCKET) 522 lwpsignal(lp->lwp_proc, lp, SIGPIPE); 523 } 524 #ifdef KTRACE 525 if (ktriov != NULL) { 526 if (error == 0) { 527 ktruio.uio_iov = ktriov; 528 ktruio.uio_resid = len - auio->uio_resid; 529 get_mplock(); 530 ktrgenio(lp, fd, UIO_WRITE, &ktruio, error); 531 rel_mplock(); 532 } 533 kfree(ktriov, M_TEMP); 534 } 535 #endif 536 if (error == 0) 537 *res = len - auio->uio_resid; 538 539 return(error); 540 } 541 542 /* 543 * Ioctl system call 544 * 545 * MPSAFE 546 */ 547 int 548 sys_ioctl(struct ioctl_args *uap) 549 { 550 int error; 551 552 error = mapped_ioctl(uap->fd, uap->com, uap->data, NULL, &uap->sysmsg); 553 return (error); 554 } 555 556 struct ioctl_map_entry { 557 const char *subsys; 558 struct ioctl_map_range *cmd_ranges; 559 LIST_ENTRY(ioctl_map_entry) entries; 560 }; 561 562 /* 563 * The true heart of all ioctl syscall handlers (native, emulation). 564 * If map != NULL, it will be searched for a matching entry for com, 565 * and appropriate conversions/conversion functions will be utilized. 566 * 567 * MPSAFE 568 */ 569 int 570 mapped_ioctl(int fd, u_long com, caddr_t uspc_data, struct ioctl_map *map, 571 struct sysmsg *msg) 572 { 573 struct thread *td = curthread; 574 struct proc *p = td->td_proc; 575 struct ucred *cred; 576 struct file *fp; 577 struct ioctl_map_range *iomc = NULL; 578 int error; 579 u_int size; 580 u_long ocom = com; 581 caddr_t data, memp; 582 int tmp; 583 #define STK_PARAMS 128 584 union { 585 char stkbuf[STK_PARAMS]; 586 long align; 587 } ubuf; 588 589 KKASSERT(p); 590 cred = td->td_ucred; 591 592 fp = holdfp(p->p_fd, fd, FREAD|FWRITE); 593 if (fp == NULL) 594 return(EBADF); 595 596 if (map != NULL) { /* obey translation map */ 597 u_long maskcmd; 598 struct ioctl_map_entry *e; 599 600 maskcmd = com & map->mask; 601 602 lwkt_gettoken(&mioctl_token); 603 LIST_FOREACH(e, &map->mapping, entries) { 604 for (iomc = e->cmd_ranges; iomc->start != 0 || 605 iomc->maptocmd != 0 || iomc->wrapfunc != NULL || 606 iomc->mapfunc != NULL; 607 iomc++) { 608 if (maskcmd >= iomc->start && 609 maskcmd <= iomc->end) 610 break; 611 } 612 613 /* Did we find a match? */ 614 if (iomc->start != 0 || iomc->maptocmd != 0 || 615 iomc->wrapfunc != NULL || iomc->mapfunc != NULL) 616 break; 617 } 618 lwkt_reltoken(&mioctl_token); 619 620 if (iomc == NULL || 621 (iomc->start == 0 && iomc->maptocmd == 0 622 && iomc->wrapfunc == NULL && iomc->mapfunc == NULL)) { 623 kprintf("%s: 'ioctl' fd=%d, cmd=0x%lx ('%c',%d) not implemented\n", 624 map->sys, fd, maskcmd, 625 (int)((maskcmd >> 8) & 0xff), 626 (int)(maskcmd & 0xff)); 627 error = EINVAL; 628 goto done; 629 } 630 631 /* 632 * If it's a non-range one to one mapping, maptocmd should be 633 * correct. If it's a ranged one to one mapping, we pass the 634 * original value of com, and for a range mapped to a different 635 * range, we always need a mapping function to translate the 636 * ioctl to our native ioctl. Ex. 6500-65ff <-> 9500-95ff 637 */ 638 if (iomc->start == iomc->end && iomc->maptocmd == iomc->maptoend) { 639 com = iomc->maptocmd; 640 } else if (iomc->start == iomc->maptocmd && iomc->end == iomc->maptoend) { 641 if (iomc->mapfunc != NULL) 642 com = iomc->mapfunc(iomc->start, iomc->end, 643 iomc->start, iomc->end, 644 com, com); 645 } else { 646 if (iomc->mapfunc != NULL) { 647 com = iomc->mapfunc(iomc->start, iomc->end, 648 iomc->maptocmd, iomc->maptoend, 649 com, ocom); 650 } else { 651 kprintf("%s: Invalid mapping for fd=%d, cmd=%#lx ('%c',%d)\n", 652 map->sys, fd, maskcmd, 653 (int)((maskcmd >> 8) & 0xff), 654 (int)(maskcmd & 0xff)); 655 error = EINVAL; 656 goto done; 657 } 658 } 659 } 660 661 switch (com) { 662 case FIONCLEX: 663 error = fclrfdflags(p->p_fd, fd, UF_EXCLOSE); 664 goto done; 665 case FIOCLEX: 666 error = fsetfdflags(p->p_fd, fd, UF_EXCLOSE); 667 goto done; 668 } 669 670 /* 671 * Interpret high order word to find amount of data to be 672 * copied to/from the user's address space. 673 */ 674 size = IOCPARM_LEN(com); 675 if (size > IOCPARM_MAX) { 676 error = ENOTTY; 677 goto done; 678 } 679 680 if (size > sizeof (ubuf.stkbuf)) { 681 memp = kmalloc(size, M_IOCTLOPS, M_WAITOK); 682 data = memp; 683 } else { 684 memp = NULL; 685 data = ubuf.stkbuf; 686 } 687 if ((com & IOC_IN) != 0) { 688 if (size != 0) { 689 error = copyin(uspc_data, data, (size_t)size); 690 if (error) { 691 if (memp != NULL) 692 kfree(memp, M_IOCTLOPS); 693 goto done; 694 } 695 } else { 696 *(caddr_t *)data = uspc_data; 697 } 698 } else if ((com & IOC_OUT) != 0 && size) { 699 /* 700 * Zero the buffer so the user always 701 * gets back something deterministic. 702 */ 703 bzero(data, (size_t)size); 704 } else if ((com & IOC_VOID) != 0) { 705 *(caddr_t *)data = uspc_data; 706 } 707 708 switch (com) { 709 case FIONBIO: 710 if ((tmp = *(int *)data)) 711 atomic_set_int(&fp->f_flag, FNONBLOCK); 712 else 713 atomic_clear_int(&fp->f_flag, FNONBLOCK); 714 error = 0; 715 break; 716 717 case FIOASYNC: 718 if ((tmp = *(int *)data)) 719 atomic_set_int(&fp->f_flag, FASYNC); 720 else 721 atomic_clear_int(&fp->f_flag, FASYNC); 722 error = fo_ioctl(fp, FIOASYNC, (caddr_t)&tmp, cred, msg); 723 break; 724 725 default: 726 /* 727 * If there is a override function, 728 * call it instead of directly routing the call 729 */ 730 if (map != NULL && iomc->wrapfunc != NULL) 731 error = iomc->wrapfunc(fp, com, ocom, data, cred); 732 else 733 error = fo_ioctl(fp, com, data, cred, msg); 734 /* 735 * Copy any data to user, size was 736 * already set and checked above. 737 */ 738 if (error == 0 && (com & IOC_OUT) != 0 && size != 0) 739 error = copyout(data, uspc_data, (size_t)size); 740 break; 741 } 742 if (memp != NULL) 743 kfree(memp, M_IOCTLOPS); 744 done: 745 fdrop(fp); 746 return(error); 747 } 748 749 /* 750 * MPSAFE 751 */ 752 int 753 mapped_ioctl_register_handler(struct ioctl_map_handler *he) 754 { 755 struct ioctl_map_entry *ne; 756 757 KKASSERT(he != NULL && he->map != NULL && he->cmd_ranges != NULL && 758 he->subsys != NULL && *he->subsys != '\0'); 759 760 ne = kmalloc(sizeof(struct ioctl_map_entry), M_IOCTLMAP, 761 M_WAITOK | M_ZERO); 762 763 ne->subsys = he->subsys; 764 ne->cmd_ranges = he->cmd_ranges; 765 766 lwkt_gettoken(&mioctl_token); 767 LIST_INSERT_HEAD(&he->map->mapping, ne, entries); 768 lwkt_reltoken(&mioctl_token); 769 770 return(0); 771 } 772 773 /* 774 * MPSAFE 775 */ 776 int 777 mapped_ioctl_unregister_handler(struct ioctl_map_handler *he) 778 { 779 struct ioctl_map_entry *ne; 780 int error = EINVAL; 781 782 KKASSERT(he != NULL && he->map != NULL && he->cmd_ranges != NULL); 783 784 lwkt_gettoken(&mioctl_token); 785 LIST_FOREACH(ne, &he->map->mapping, entries) { 786 if (ne->cmd_ranges == he->cmd_ranges) { 787 LIST_REMOVE(ne, entries); 788 kfree(ne, M_IOCTLMAP); 789 error = 0; 790 break; 791 } 792 } 793 lwkt_reltoken(&mioctl_token); 794 return(error); 795 } 796 797 static int nselcoll; /* Select collisions since boot */ 798 int selwait; 799 SYSCTL_INT(_kern, OID_AUTO, nselcoll, CTLFLAG_RD, &nselcoll, 0, ""); 800 static int nseldebug; 801 SYSCTL_INT(_kern, OID_AUTO, nseldebug, CTLFLAG_RW, &nseldebug, 0, ""); 802 803 /* 804 * Select system call. 805 * 806 * MPSAFE 807 */ 808 int 809 sys_select(struct select_args *uap) 810 { 811 struct timeval ktv; 812 struct timespec *ktsp, kts; 813 int error; 814 815 /* 816 * Get timeout if any. 817 */ 818 if (uap->tv != NULL) { 819 error = copyin(uap->tv, &ktv, sizeof (ktv)); 820 if (error) 821 return (error); 822 TIMEVAL_TO_TIMESPEC(&ktv, &kts); 823 ktsp = &kts; 824 } else { 825 ktsp = NULL; 826 } 827 828 /* 829 * Do real work. 830 */ 831 error = doselect(uap->nd, uap->in, uap->ou, uap->ex, ktsp, 832 &uap->sysmsg_result); 833 834 return (error); 835 } 836 837 838 /* 839 * Pselect system call. 840 */ 841 int 842 sys_pselect(struct pselect_args *uap) 843 { 844 struct thread *td = curthread; 845 struct lwp *lp = td->td_lwp; 846 struct timespec *ktsp, kts; 847 sigset_t sigmask; 848 int error; 849 850 /* 851 * Get timeout if any. 852 */ 853 if (uap->ts != NULL) { 854 error = copyin(uap->ts, &kts, sizeof (kts)); 855 if (error) 856 return (error); 857 ktsp = &kts; 858 } else { 859 ktsp = NULL; 860 } 861 862 /* 863 * Install temporary signal mask if any provided. 864 */ 865 if (uap->sigmask != NULL) { 866 error = copyin(uap->sigmask, &sigmask, sizeof(sigmask)); 867 if (error) 868 return (error); 869 lwkt_gettoken(&lp->lwp_proc->p_token); 870 lp->lwp_oldsigmask = lp->lwp_sigmask; 871 SIG_CANTMASK(sigmask); 872 lp->lwp_sigmask = sigmask; 873 lwkt_reltoken(&lp->lwp_proc->p_token); 874 } 875 876 /* 877 * Do real job. 878 */ 879 error = doselect(uap->nd, uap->in, uap->ou, uap->ex, ktsp, 880 &uap->sysmsg_result); 881 882 if (uap->sigmask != NULL) { 883 lwkt_gettoken(&lp->lwp_proc->p_token); 884 /* doselect() responsible for turning ERESTART into EINTR */ 885 KKASSERT(error != ERESTART); 886 if (error == EINTR) { 887 /* 888 * We can't restore the previous signal mask now 889 * because it could block the signal that interrupted 890 * us. So make a note to restore it after executing 891 * the handler. 892 */ 893 lp->lwp_flags |= LWP_OLDMASK; 894 } else { 895 /* 896 * No handler to run. Restore previous mask immediately. 897 */ 898 lp->lwp_sigmask = lp->lwp_oldsigmask; 899 } 900 lwkt_reltoken(&lp->lwp_proc->p_token); 901 } 902 903 return (error); 904 } 905 906 static int 907 select_copyin(void *arg, struct kevent *kevp, int maxevents, int *events) 908 { 909 struct select_kevent_copyin_args *skap = NULL; 910 struct kevent *kev; 911 int fd; 912 kfd_set *fdp = NULL; 913 short filter = 0; 914 u_int fflags = 0; 915 916 skap = (struct select_kevent_copyin_args *)arg; 917 918 if (*events == maxevents) 919 return (0); 920 921 while (skap->active_set < COPYIN_DONE) { 922 switch (skap->active_set) { 923 case COPYIN_READ: 924 /* 925 * Register descriptors for the read filter 926 */ 927 fdp = skap->read_set; 928 filter = EVFILT_READ; 929 fflags = NOTE_OLDAPI; 930 if (fdp) 931 break; 932 ++skap->active_set; 933 skap->proc_fds = 0; 934 /* fall through */ 935 case COPYIN_WRITE: 936 /* 937 * Register descriptors for the write filter 938 */ 939 fdp = skap->write_set; 940 filter = EVFILT_WRITE; 941 fflags = NOTE_OLDAPI; 942 if (fdp) 943 break; 944 ++skap->active_set; 945 skap->proc_fds = 0; 946 /* fall through */ 947 case COPYIN_EXCEPT: 948 /* 949 * Register descriptors for the exception filter 950 */ 951 fdp = skap->except_set; 952 filter = EVFILT_EXCEPT; 953 fflags = NOTE_OLDAPI | NOTE_OOB; 954 if (fdp) 955 break; 956 ++skap->active_set; 957 skap->proc_fds = 0; 958 /* fall through */ 959 case COPYIN_DONE: 960 /* 961 * Nothing left to register 962 */ 963 return(0); 964 /* NOT REACHED */ 965 } 966 967 while (skap->proc_fds < skap->num_fds) { 968 fd = skap->proc_fds; 969 if (FD_ISSET(fd, fdp)) { 970 kev = &kevp[*events]; 971 EV_SET(kev, fd, filter, 972 EV_ADD|EV_ENABLE, 973 fflags, 0, 974 (void *)(uintptr_t) 975 skap->lwp->lwp_kqueue_serial); 976 FD_CLR(fd, fdp); 977 ++*events; 978 979 if (nseldebug) 980 kprintf("select fd %d filter %d serial %d\n", 981 fd, filter, skap->lwp->lwp_kqueue_serial); 982 } 983 ++skap->proc_fds; 984 if (*events == maxevents) 985 return (0); 986 } 987 skap->active_set++; 988 skap->proc_fds = 0; 989 } 990 991 return (0); 992 } 993 994 static int 995 select_copyout(void *arg, struct kevent *kevp, int count, int *res) 996 { 997 struct select_kevent_copyin_args *skap; 998 struct kevent kev; 999 int i = 0; 1000 1001 skap = (struct select_kevent_copyin_args *)arg; 1002 1003 for (i = 0; i < count; ++i) { 1004 /* 1005 * Filter out and delete spurious events 1006 */ 1007 if ((u_int)(uintptr_t)kevp[i].udata != 1008 skap->lwp->lwp_kqueue_serial) { 1009 kev = kevp[i]; 1010 kev.flags = EV_DISABLE|EV_DELETE; 1011 kqueue_register(&skap->lwp->lwp_kqueue, &kev); 1012 if (nseldebug) 1013 kprintf("select fd %ju mismatched serial %d\n", 1014 (uintmax_t)kevp[i].ident, 1015 skap->lwp->lwp_kqueue_serial); 1016 continue; 1017 } 1018 1019 /* 1020 * Handle errors 1021 */ 1022 if (kevp[i].flags & EV_ERROR) { 1023 int error = kevp[i].data; 1024 1025 switch (error) { 1026 case EBADF: 1027 /* 1028 * A bad file descriptor is considered a 1029 * fatal error for select, bail out. 1030 */ 1031 skap->error = error; 1032 *res = -1; 1033 return error; 1034 1035 default: 1036 /* 1037 * Select silently swallows any unknown errors 1038 * for descriptors in the read or write sets. 1039 * 1040 * ALWAYS filter out EOPNOTSUPP errors from 1041 * filters (at least until all filters support 1042 * EVFILT_EXCEPT) 1043 * 1044 * We also filter out ENODEV since dev_dkqfilter 1045 * returns ENODEV if EOPNOTSUPP is returned in an 1046 * inner call. 1047 * 1048 * XXX: fix this 1049 */ 1050 if (kevp[i].filter != EVFILT_READ && 1051 kevp[i].filter != EVFILT_WRITE && 1052 error != EOPNOTSUPP && 1053 error != ENODEV) { 1054 skap->error = error; 1055 *res = -1; 1056 return error; 1057 } 1058 break; 1059 } 1060 if (nseldebug) 1061 kprintf("select fd %ju filter %d error %d\n", 1062 (uintmax_t)kevp[i].ident, 1063 kevp[i].filter, error); 1064 continue; 1065 } 1066 1067 switch (kevp[i].filter) { 1068 case EVFILT_READ: 1069 FD_SET(kevp[i].ident, skap->read_set); 1070 break; 1071 case EVFILT_WRITE: 1072 FD_SET(kevp[i].ident, skap->write_set); 1073 break; 1074 case EVFILT_EXCEPT: 1075 FD_SET(kevp[i].ident, skap->except_set); 1076 break; 1077 } 1078 1079 ++*res; 1080 } 1081 1082 return (0); 1083 } 1084 1085 /* 1086 * Copy select bits in from userland. Allocate kernel memory if the 1087 * set is large. 1088 */ 1089 static int 1090 getbits(int bytes, fd_set *in_set, kfd_set **out_set, kfd_set *tmp_set) 1091 { 1092 int error; 1093 1094 if (in_set) { 1095 if (bytes < sizeof(*tmp_set)) 1096 *out_set = tmp_set; 1097 else 1098 *out_set = kmalloc(bytes, M_SELECT, M_WAITOK); 1099 error = copyin(in_set, *out_set, bytes); 1100 } else { 1101 *out_set = NULL; 1102 error = 0; 1103 } 1104 return (error); 1105 } 1106 1107 /* 1108 * Copy returned select bits back out to userland. 1109 */ 1110 static int 1111 putbits(int bytes, kfd_set *in_set, fd_set *out_set) 1112 { 1113 int error; 1114 1115 if (in_set) { 1116 error = copyout(in_set, out_set, bytes); 1117 } else { 1118 error = 0; 1119 } 1120 return (error); 1121 } 1122 1123 static int 1124 dotimeout_only(struct timespec *ts) 1125 { 1126 return(nanosleep1(ts, NULL)); 1127 } 1128 1129 /* 1130 * Common code for sys_select() and sys_pselect(). 1131 * 1132 * in, out and ex are userland pointers. ts must point to validated 1133 * kernel-side timeout value or NULL for infinite timeout. res must 1134 * point to syscall return value. 1135 */ 1136 static int 1137 doselect(int nd, fd_set *read, fd_set *write, fd_set *except, 1138 struct timespec *ts, int *res) 1139 { 1140 struct proc *p = curproc; 1141 struct select_kevent_copyin_args *kap, ka; 1142 int bytes, error; 1143 kfd_set read_tmp; 1144 kfd_set write_tmp; 1145 kfd_set except_tmp; 1146 1147 *res = 0; 1148 if (nd < 0) 1149 return (EINVAL); 1150 if (nd == 0 && ts) 1151 return (dotimeout_only(ts)); 1152 1153 if (nd > p->p_fd->fd_nfiles) /* limit kmalloc */ 1154 nd = p->p_fd->fd_nfiles; 1155 1156 kap = &ka; 1157 kap->lwp = curthread->td_lwp; 1158 kap->num_fds = nd; 1159 kap->proc_fds = 0; 1160 kap->error = 0; 1161 kap->active_set = COPYIN_READ; 1162 1163 /* 1164 * Calculate bytes based on the number of __fd_mask[] array entries 1165 * multiplied by the size of __fd_mask. 1166 */ 1167 bytes = howmany(nd, __NFDBITS) * sizeof(__fd_mask); 1168 1169 /* kap->read_set = NULL; not needed */ 1170 kap->write_set = NULL; 1171 kap->except_set = NULL; 1172 1173 error = getbits(bytes, read, &kap->read_set, &read_tmp); 1174 if (error == 0) 1175 error = getbits(bytes, write, &kap->write_set, &write_tmp); 1176 if (error == 0) 1177 error = getbits(bytes, except, &kap->except_set, &except_tmp); 1178 if (error) 1179 goto done; 1180 1181 /* 1182 * NOTE: Make sure the max events passed to kern_kevent() is 1183 * effectively unlimited. (nd * 3) accomplishes this. 1184 * 1185 * (*res) continues to increment as returned events are 1186 * loaded in. 1187 */ 1188 error = kern_kevent(&kap->lwp->lwp_kqueue, 0x7FFFFFFF, res, kap, 1189 select_copyin, select_copyout, ts); 1190 if (error == 0) 1191 error = putbits(bytes, kap->read_set, read); 1192 if (error == 0) 1193 error = putbits(bytes, kap->write_set, write); 1194 if (error == 0) 1195 error = putbits(bytes, kap->except_set, except); 1196 1197 /* 1198 * An error from an individual event that should be passed 1199 * back to userland (EBADF) 1200 */ 1201 if (kap->error) 1202 error = kap->error; 1203 1204 /* 1205 * Clean up. 1206 */ 1207 done: 1208 if (kap->read_set && kap->read_set != &read_tmp) 1209 kfree(kap->read_set, M_SELECT); 1210 if (kap->write_set && kap->write_set != &write_tmp) 1211 kfree(kap->write_set, M_SELECT); 1212 if (kap->except_set && kap->except_set != &except_tmp) 1213 kfree(kap->except_set, M_SELECT); 1214 1215 kap->lwp->lwp_kqueue_serial += kap->num_fds; 1216 1217 return (error); 1218 } 1219 1220 /* 1221 * Poll system call. 1222 * 1223 * MPSAFE 1224 */ 1225 int 1226 sys_poll(struct poll_args *uap) 1227 { 1228 struct timespec ts, *tsp; 1229 int error; 1230 1231 if (uap->timeout != INFTIM) { 1232 ts.tv_sec = uap->timeout / 1000; 1233 ts.tv_nsec = (uap->timeout % 1000) * 1000 * 1000; 1234 tsp = &ts; 1235 } else { 1236 tsp = NULL; 1237 } 1238 1239 error = dopoll(uap->nfds, uap->fds, tsp, &uap->sysmsg_result); 1240 1241 return (error); 1242 } 1243 1244 static int 1245 poll_copyin(void *arg, struct kevent *kevp, int maxevents, int *events) 1246 { 1247 struct poll_kevent_copyin_args *pkap; 1248 struct pollfd *pfd; 1249 struct kevent *kev; 1250 int kev_count; 1251 1252 pkap = (struct poll_kevent_copyin_args *)arg; 1253 1254 while (pkap->pfds < pkap->nfds) { 1255 pfd = &pkap->fds[pkap->pfds]; 1256 1257 /* Clear return events */ 1258 pfd->revents = 0; 1259 1260 /* Do not check if fd is equal to -1 */ 1261 if (pfd->fd == -1) { 1262 ++pkap->pfds; 1263 continue; 1264 } 1265 1266 kev_count = 0; 1267 if (pfd->events & (POLLIN | POLLRDNORM)) 1268 kev_count++; 1269 if (pfd->events & (POLLOUT | POLLWRNORM)) 1270 kev_count++; 1271 if (pfd->events & (POLLPRI | POLLRDBAND)) 1272 kev_count++; 1273 1274 if (*events + kev_count > maxevents) 1275 return (0); 1276 1277 /* 1278 * NOTE: A combined serial number and poll array index is 1279 * stored in kev->udata. 1280 */ 1281 kev = &kevp[*events]; 1282 if (pfd->events & (POLLIN | POLLRDNORM)) { 1283 EV_SET(kev++, pfd->fd, EVFILT_READ, EV_ADD|EV_ENABLE, 1284 NOTE_OLDAPI, 0, (void *)(uintptr_t) 1285 (pkap->lwp->lwp_kqueue_serial + pkap->pfds)); 1286 } 1287 if (pfd->events & (POLLOUT | POLLWRNORM)) { 1288 EV_SET(kev++, pfd->fd, EVFILT_WRITE, EV_ADD|EV_ENABLE, 1289 NOTE_OLDAPI, 0, (void *)(uintptr_t) 1290 (pkap->lwp->lwp_kqueue_serial + pkap->pfds)); 1291 } 1292 if (pfd->events & (POLLPRI | POLLRDBAND)) { 1293 EV_SET(kev++, pfd->fd, EVFILT_EXCEPT, EV_ADD|EV_ENABLE, 1294 NOTE_OLDAPI | NOTE_OOB, 0, 1295 (void *)(uintptr_t) 1296 (pkap->lwp->lwp_kqueue_serial + pkap->pfds)); 1297 } 1298 1299 if (nseldebug) { 1300 kprintf("poll index %d/%d fd %d events %08x serial %d\n", 1301 pkap->pfds, pkap->nfds-1, pfd->fd, pfd->events, 1302 pkap->lwp->lwp_kqueue_serial); 1303 } 1304 1305 ++pkap->pfds; 1306 (*events) += kev_count; 1307 } 1308 1309 return (0); 1310 } 1311 1312 static int 1313 poll_copyout(void *arg, struct kevent *kevp, int count, int *res) 1314 { 1315 struct poll_kevent_copyin_args *pkap; 1316 struct pollfd *pfd; 1317 struct kevent kev; 1318 int count_res; 1319 int i; 1320 u_int pi; 1321 1322 pkap = (struct poll_kevent_copyin_args *)arg; 1323 1324 for (i = 0; i < count; ++i) { 1325 /* 1326 * Extract the poll array index and delete spurious events. 1327 * We can easily tell if the serial number is incorrect 1328 * by checking whether the extracted index is out of range. 1329 */ 1330 pi = (u_int)(uintptr_t)kevp[i].udata - 1331 (u_int)pkap->lwp->lwp_kqueue_serial; 1332 1333 if (pi >= pkap->nfds) { 1334 kev = kevp[i]; 1335 kev.flags = EV_DISABLE|EV_DELETE; 1336 kqueue_register(&pkap->lwp->lwp_kqueue, &kev); 1337 if (nseldebug) 1338 kprintf("poll index %d out of range against serial %d\n", 1339 pi, pkap->lwp->lwp_kqueue_serial); 1340 continue; 1341 } 1342 pfd = &pkap->fds[pi]; 1343 if (kevp[i].ident == pfd->fd) { 1344 /* 1345 * A single descriptor may generate an error against 1346 * more than one filter, make sure to set the 1347 * appropriate flags but do not increment (*res) 1348 * more than once. 1349 */ 1350 count_res = (pfd->revents == 0); 1351 if (kevp[i].flags & EV_ERROR) { 1352 switch(kevp[i].data) { 1353 case EBADF: 1354 case POLLNVAL: 1355 /* Bad file descriptor */ 1356 if (count_res) 1357 ++*res; 1358 pfd->revents |= POLLNVAL; 1359 break; 1360 default: 1361 /* 1362 * Poll silently swallows any unknown 1363 * errors except in the case of POLLPRI 1364 * (OOB/urgent data). 1365 * 1366 * ALWAYS filter out EOPNOTSUPP errors 1367 * from filters, common applications 1368 * set POLLPRI|POLLRDBAND and most 1369 * filters do not support EVFILT_EXCEPT. 1370 * 1371 * We also filter out ENODEV since dev_dkqfilter 1372 * returns ENODEV if EOPNOTSUPP is returned in an 1373 * inner call. 1374 * 1375 * XXX: fix this 1376 */ 1377 if (kevp[i].filter != EVFILT_READ && 1378 kevp[i].filter != EVFILT_WRITE && 1379 kevp[i].data != EOPNOTSUPP && 1380 kevp[i].data != ENODEV) { 1381 if (count_res == 0) 1382 ++*res; 1383 pfd->revents |= POLLERR; 1384 } 1385 break; 1386 } 1387 if (nseldebug) { 1388 kprintf("poll index %d fd %d " 1389 "filter %d error %jd\n", 1390 pi, pfd->fd, 1391 kevp[i].filter, 1392 (intmax_t)kevp[i].data); 1393 } 1394 continue; 1395 } 1396 1397 switch (kevp[i].filter) { 1398 case EVFILT_READ: 1399 #if 0 1400 /* 1401 * NODATA on the read side can indicate a 1402 * half-closed situation and not necessarily 1403 * a disconnect, so depend on the user 1404 * issuing a read() and getting 0 bytes back. 1405 */ 1406 if (kevp[i].flags & EV_NODATA) 1407 pfd->revents |= POLLHUP; 1408 #endif 1409 if ((kevp[i].flags & EV_EOF) && 1410 kevp[i].fflags != 0) 1411 pfd->revents |= POLLERR; 1412 if (pfd->events & POLLIN) 1413 pfd->revents |= POLLIN; 1414 if (pfd->events & POLLRDNORM) 1415 pfd->revents |= POLLRDNORM; 1416 break; 1417 case EVFILT_WRITE: 1418 /* 1419 * As per the OpenGroup POLLHUP is mutually 1420 * exclusive with the writability flags. I 1421 * consider this a bit broken but... 1422 * 1423 * In this case a disconnect is implied even 1424 * for a half-closed (write side) situation. 1425 */ 1426 if (kevp[i].flags & EV_EOF) { 1427 pfd->revents |= POLLHUP; 1428 if (kevp[i].fflags != 0) 1429 pfd->revents |= POLLERR; 1430 } else { 1431 if (pfd->events & POLLOUT) 1432 pfd->revents |= POLLOUT; 1433 if (pfd->events & POLLWRNORM) 1434 pfd->revents |= POLLWRNORM; 1435 } 1436 break; 1437 case EVFILT_EXCEPT: 1438 /* 1439 * EV_NODATA should never be tagged for this 1440 * filter. 1441 */ 1442 if (pfd->events & POLLPRI) 1443 pfd->revents |= POLLPRI; 1444 if (pfd->events & POLLRDBAND) 1445 pfd->revents |= POLLRDBAND; 1446 break; 1447 } 1448 1449 if (nseldebug) { 1450 kprintf("poll index %d/%d fd %d revents %08x\n", 1451 pi, pkap->nfds, pfd->fd, pfd->revents); 1452 } 1453 1454 if (count_res && pfd->revents) 1455 ++*res; 1456 } else { 1457 if (nseldebug) { 1458 kprintf("poll index %d mismatch %ju/%d\n", 1459 pi, (uintmax_t)kevp[i].ident, pfd->fd); 1460 } 1461 } 1462 } 1463 1464 return (0); 1465 } 1466 1467 static int 1468 dopoll(int nfds, struct pollfd *fds, struct timespec *ts, int *res) 1469 { 1470 struct poll_kevent_copyin_args ka; 1471 struct pollfd sfds[64]; 1472 int bytes; 1473 int error; 1474 1475 *res = 0; 1476 if (nfds < 0) 1477 return (EINVAL); 1478 1479 if (nfds == 0 && ts) 1480 return (dotimeout_only(ts)); 1481 1482 /* 1483 * This is a bit arbitrary but we need to limit internal kmallocs. 1484 */ 1485 if (nfds > maxfilesperproc * 2) 1486 nfds = maxfilesperproc * 2; 1487 bytes = sizeof(struct pollfd) * nfds; 1488 1489 ka.lwp = curthread->td_lwp; 1490 ka.nfds = nfds; 1491 ka.pfds = 0; 1492 ka.error = 0; 1493 1494 if (ka.nfds < 64) 1495 ka.fds = sfds; 1496 else 1497 ka.fds = kmalloc(bytes, M_SELECT, M_WAITOK); 1498 1499 error = copyin(fds, ka.fds, bytes); 1500 if (error == 0) 1501 error = kern_kevent(&ka.lwp->lwp_kqueue, 0x7FFFFFFF, res, &ka, 1502 poll_copyin, poll_copyout, ts); 1503 1504 if (error == 0) 1505 error = copyout(ka.fds, fds, bytes); 1506 1507 if (ka.fds != sfds) 1508 kfree(ka.fds, M_SELECT); 1509 1510 ka.lwp->lwp_kqueue_serial += nfds; 1511 1512 return (error); 1513 } 1514 1515 static int 1516 socket_wait_copyin(void *arg, struct kevent *kevp, int maxevents, int *events) 1517 { 1518 return (0); 1519 } 1520 1521 static int 1522 socket_wait_copyout(void *arg, struct kevent *kevp, int count, int *res) 1523 { 1524 ++*res; 1525 return (0); 1526 } 1527 1528 extern struct fileops socketops; 1529 1530 /* 1531 * NOTE: Callers of socket_wait() must already have a reference on the 1532 * socket. 1533 */ 1534 int 1535 socket_wait(struct socket *so, struct timespec *ts, int *res) 1536 { 1537 struct thread *td = curthread; 1538 struct file *fp; 1539 struct kqueue kq; 1540 struct kevent kev; 1541 int error, fd; 1542 1543 if ((error = falloc(td->td_lwp, &fp, &fd)) != 0) 1544 return (error); 1545 1546 fp->f_type = DTYPE_SOCKET; 1547 fp->f_flag = FREAD | FWRITE; 1548 fp->f_ops = &socketops; 1549 fp->f_data = so; 1550 fsetfd(td->td_lwp->lwp_proc->p_fd, fp, fd); 1551 1552 kqueue_init(&kq, td->td_lwp->lwp_proc->p_fd); 1553 EV_SET(&kev, fd, EVFILT_READ, EV_ADD|EV_ENABLE, 0, 0, NULL); 1554 if ((error = kqueue_register(&kq, &kev)) != 0) { 1555 fdrop(fp); 1556 return (error); 1557 } 1558 1559 error = kern_kevent(&kq, 1, res, NULL, socket_wait_copyin, 1560 socket_wait_copyout, ts); 1561 1562 EV_SET(&kev, fd, EVFILT_READ, EV_DELETE, 0, 0, NULL); 1563 kqueue_register(&kq, &kev); 1564 fp->f_ops = &badfileops; 1565 fdrop(fp); 1566 1567 return (error); 1568 } 1569 1570 /* 1571 * OpenBSD poll system call. 1572 * XXX this isn't quite a true representation.. OpenBSD uses select ops. 1573 * 1574 * MPSAFE 1575 */ 1576 int 1577 sys_openbsd_poll(struct openbsd_poll_args *uap) 1578 { 1579 return (sys_poll((struct poll_args *)uap)); 1580 } 1581 1582 /*ARGSUSED*/ 1583 int 1584 seltrue(cdev_t dev, int events) 1585 { 1586 return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)); 1587 } 1588