1 /* 2 * Copyright (c) 1982, 1986, 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)sys_generic.c 8.5 (Berkeley) 1/21/94 35 * $FreeBSD: src/sys/kern/sys_generic.c,v 1.55.2.10 2001/03/17 10:39:32 peter Exp $ 36 */ 37 38 #include "opt_ktrace.h" 39 40 #include <sys/param.h> 41 #include <sys/systm.h> 42 #include <sys/sysproto.h> 43 #include <sys/event.h> 44 #include <sys/filedesc.h> 45 #include <sys/filio.h> 46 #include <sys/fcntl.h> 47 #include <sys/file.h> 48 #include <sys/proc.h> 49 #include <sys/signalvar.h> 50 #include <sys/socketvar.h> 51 #include <sys/uio.h> 52 #include <sys/kernel.h> 53 #include <sys/kern_syscall.h> 54 #include <sys/malloc.h> 55 #include <sys/mapped_ioctl.h> 56 #include <sys/poll.h> 57 #include <sys/queue.h> 58 #include <sys/resourcevar.h> 59 #include <sys/socketops.h> 60 #include <sys/sysctl.h> 61 #include <sys/sysent.h> 62 #include <sys/buf.h> 63 #ifdef KTRACE 64 #include <sys/ktrace.h> 65 #endif 66 #include <vm/vm.h> 67 #include <vm/vm_page.h> 68 69 #include <sys/file2.h> 70 #include <sys/mplock2.h> 71 #include <sys/spinlock2.h> 72 73 #include <machine/limits.h> 74 75 static MALLOC_DEFINE(M_IOCTLOPS, "ioctlops", "ioctl data buffer"); 76 static MALLOC_DEFINE(M_IOCTLMAP, "ioctlmap", "mapped ioctl handler buffer"); 77 static MALLOC_DEFINE(M_SELECT, "select", "select() buffer"); 78 MALLOC_DEFINE(M_IOV, "iov", "large iov's"); 79 80 typedef struct kfd_set { 81 fd_mask fds_bits[2]; 82 } kfd_set; 83 84 enum select_copyin_states { 85 COPYIN_READ, COPYIN_WRITE, COPYIN_EXCEPT, COPYIN_DONE }; 86 87 struct select_kevent_copyin_args { 88 kfd_set *read_set; 89 kfd_set *write_set; 90 kfd_set *except_set; 91 int active_set; /* One of select_copyin_states */ 92 struct lwp *lwp; /* Pointer to our lwp */ 93 int num_fds; /* Number of file descriptors (syscall arg) */ 94 int proc_fds; /* Processed fd's (wraps) */ 95 int error; /* Returned to userland */ 96 }; 97 98 struct poll_kevent_copyin_args { 99 struct lwp *lwp; 100 struct pollfd *fds; 101 int nfds; 102 int pfds; 103 int error; 104 }; 105 106 static struct lwkt_token mioctl_token = LWKT_TOKEN_INITIALIZER(mioctl_token); 107 108 static int doselect(int nd, fd_set *in, fd_set *ou, fd_set *ex, 109 struct timespec *ts, int *res); 110 static int dopoll(int nfds, struct pollfd *fds, struct timespec *ts, 111 int *res); 112 static int dofileread(int, struct file *, struct uio *, int, size_t *); 113 static int dofilewrite(int, struct file *, struct uio *, int, size_t *); 114 115 /* 116 * Read system call. 117 * 118 * MPSAFE 119 */ 120 int 121 sys_read(struct read_args *uap) 122 { 123 struct thread *td = curthread; 124 struct uio auio; 125 struct iovec aiov; 126 int error; 127 128 if ((ssize_t)uap->nbyte < 0) 129 error = EINVAL; 130 131 aiov.iov_base = uap->buf; 132 aiov.iov_len = uap->nbyte; 133 auio.uio_iov = &aiov; 134 auio.uio_iovcnt = 1; 135 auio.uio_offset = -1; 136 auio.uio_resid = uap->nbyte; 137 auio.uio_rw = UIO_READ; 138 auio.uio_segflg = UIO_USERSPACE; 139 auio.uio_td = td; 140 141 error = kern_preadv(uap->fd, &auio, 0, &uap->sysmsg_szresult); 142 return(error); 143 } 144 145 /* 146 * Positioned (Pread) read system call 147 * 148 * MPSAFE 149 */ 150 int 151 sys_extpread(struct extpread_args *uap) 152 { 153 struct thread *td = curthread; 154 struct uio auio; 155 struct iovec aiov; 156 int error; 157 int flags; 158 159 if ((ssize_t)uap->nbyte < 0) 160 return(EINVAL); 161 162 aiov.iov_base = uap->buf; 163 aiov.iov_len = uap->nbyte; 164 auio.uio_iov = &aiov; 165 auio.uio_iovcnt = 1; 166 auio.uio_offset = uap->offset; 167 auio.uio_resid = uap->nbyte; 168 auio.uio_rw = UIO_READ; 169 auio.uio_segflg = UIO_USERSPACE; 170 auio.uio_td = td; 171 172 flags = uap->flags & O_FMASK; 173 if (uap->offset != (off_t)-1) 174 flags |= O_FOFFSET; 175 176 error = kern_preadv(uap->fd, &auio, flags, &uap->sysmsg_szresult); 177 return(error); 178 } 179 180 /* 181 * Scatter read system call. 182 * 183 * MPSAFE 184 */ 185 int 186 sys_readv(struct readv_args *uap) 187 { 188 struct thread *td = curthread; 189 struct uio auio; 190 struct iovec aiov[UIO_SMALLIOV], *iov = NULL; 191 int error; 192 193 error = iovec_copyin(uap->iovp, &iov, aiov, uap->iovcnt, 194 &auio.uio_resid); 195 if (error) 196 return (error); 197 auio.uio_iov = iov; 198 auio.uio_iovcnt = uap->iovcnt; 199 auio.uio_offset = -1; 200 auio.uio_rw = UIO_READ; 201 auio.uio_segflg = UIO_USERSPACE; 202 auio.uio_td = td; 203 204 error = kern_preadv(uap->fd, &auio, 0, &uap->sysmsg_szresult); 205 206 iovec_free(&iov, aiov); 207 return (error); 208 } 209 210 211 /* 212 * Scatter positioned read system call. 213 * 214 * MPSAFE 215 */ 216 int 217 sys_extpreadv(struct extpreadv_args *uap) 218 { 219 struct thread *td = curthread; 220 struct uio auio; 221 struct iovec aiov[UIO_SMALLIOV], *iov = NULL; 222 int error; 223 int flags; 224 225 error = iovec_copyin(uap->iovp, &iov, aiov, uap->iovcnt, 226 &auio.uio_resid); 227 if (error) 228 return (error); 229 auio.uio_iov = iov; 230 auio.uio_iovcnt = uap->iovcnt; 231 auio.uio_offset = uap->offset; 232 auio.uio_rw = UIO_READ; 233 auio.uio_segflg = UIO_USERSPACE; 234 auio.uio_td = td; 235 236 flags = uap->flags & O_FMASK; 237 if (uap->offset != (off_t)-1) 238 flags |= O_FOFFSET; 239 240 error = kern_preadv(uap->fd, &auio, flags, &uap->sysmsg_szresult); 241 242 iovec_free(&iov, aiov); 243 return(error); 244 } 245 246 /* 247 * MPSAFE 248 */ 249 int 250 kern_preadv(int fd, struct uio *auio, int flags, size_t *res) 251 { 252 struct thread *td = curthread; 253 struct proc *p = td->td_proc; 254 struct file *fp; 255 int error; 256 257 KKASSERT(p); 258 259 fp = holdfp(p->p_fd, fd, FREAD); 260 if (fp == NULL) 261 return (EBADF); 262 if (flags & O_FOFFSET && fp->f_type != DTYPE_VNODE) { 263 error = ESPIPE; 264 } else { 265 error = dofileread(fd, fp, auio, flags, res); 266 } 267 fdrop(fp); 268 return(error); 269 } 270 271 /* 272 * Common code for readv and preadv that reads data in 273 * from a file using the passed in uio, offset, and flags. 274 * 275 * MPALMOSTSAFE - ktrace needs help 276 */ 277 static int 278 dofileread(int fd, struct file *fp, struct uio *auio, int flags, size_t *res) 279 { 280 int error; 281 size_t len; 282 #ifdef KTRACE 283 struct thread *td = curthread; 284 struct iovec *ktriov = NULL; 285 struct uio ktruio; 286 #endif 287 288 #ifdef KTRACE 289 /* 290 * if tracing, save a copy of iovec 291 */ 292 if (KTRPOINT(td, KTR_GENIO)) { 293 int iovlen = auio->uio_iovcnt * sizeof(struct iovec); 294 295 ktriov = kmalloc(iovlen, M_TEMP, M_WAITOK); 296 bcopy((caddr_t)auio->uio_iov, (caddr_t)ktriov, iovlen); 297 ktruio = *auio; 298 } 299 #endif 300 len = auio->uio_resid; 301 error = fo_read(fp, auio, fp->f_cred, flags); 302 if (error) { 303 if (auio->uio_resid != len && (error == ERESTART || 304 error == EINTR || error == EWOULDBLOCK)) 305 error = 0; 306 } 307 #ifdef KTRACE 308 if (ktriov != NULL) { 309 if (error == 0) { 310 ktruio.uio_iov = ktriov; 311 ktruio.uio_resid = len - auio->uio_resid; 312 get_mplock(); 313 ktrgenio(td->td_lwp, fd, UIO_READ, &ktruio, error); 314 rel_mplock(); 315 } 316 kfree(ktriov, M_TEMP); 317 } 318 #endif 319 if (error == 0) 320 *res = len - auio->uio_resid; 321 322 return(error); 323 } 324 325 /* 326 * Write system call 327 * 328 * MPSAFE 329 */ 330 int 331 sys_write(struct write_args *uap) 332 { 333 struct thread *td = curthread; 334 struct uio auio; 335 struct iovec aiov; 336 int error; 337 338 if ((ssize_t)uap->nbyte < 0) 339 error = EINVAL; 340 341 aiov.iov_base = (void *)(uintptr_t)uap->buf; 342 aiov.iov_len = uap->nbyte; 343 auio.uio_iov = &aiov; 344 auio.uio_iovcnt = 1; 345 auio.uio_offset = -1; 346 auio.uio_resid = uap->nbyte; 347 auio.uio_rw = UIO_WRITE; 348 auio.uio_segflg = UIO_USERSPACE; 349 auio.uio_td = td; 350 351 error = kern_pwritev(uap->fd, &auio, 0, &uap->sysmsg_szresult); 352 353 return(error); 354 } 355 356 /* 357 * Pwrite system call 358 * 359 * MPSAFE 360 */ 361 int 362 sys_extpwrite(struct extpwrite_args *uap) 363 { 364 struct thread *td = curthread; 365 struct uio auio; 366 struct iovec aiov; 367 int error; 368 int flags; 369 370 if ((ssize_t)uap->nbyte < 0) 371 error = EINVAL; 372 373 aiov.iov_base = (void *)(uintptr_t)uap->buf; 374 aiov.iov_len = uap->nbyte; 375 auio.uio_iov = &aiov; 376 auio.uio_iovcnt = 1; 377 auio.uio_offset = uap->offset; 378 auio.uio_resid = uap->nbyte; 379 auio.uio_rw = UIO_WRITE; 380 auio.uio_segflg = UIO_USERSPACE; 381 auio.uio_td = td; 382 383 flags = uap->flags & O_FMASK; 384 if (uap->offset != (off_t)-1) 385 flags |= O_FOFFSET; 386 error = kern_pwritev(uap->fd, &auio, flags, &uap->sysmsg_szresult); 387 return(error); 388 } 389 390 /* 391 * MPSAFE 392 */ 393 int 394 sys_writev(struct writev_args *uap) 395 { 396 struct thread *td = curthread; 397 struct uio auio; 398 struct iovec aiov[UIO_SMALLIOV], *iov = NULL; 399 int error; 400 401 error = iovec_copyin(uap->iovp, &iov, aiov, uap->iovcnt, 402 &auio.uio_resid); 403 if (error) 404 return (error); 405 auio.uio_iov = iov; 406 auio.uio_iovcnt = uap->iovcnt; 407 auio.uio_offset = -1; 408 auio.uio_rw = UIO_WRITE; 409 auio.uio_segflg = UIO_USERSPACE; 410 auio.uio_td = td; 411 412 error = kern_pwritev(uap->fd, &auio, 0, &uap->sysmsg_szresult); 413 414 iovec_free(&iov, aiov); 415 return (error); 416 } 417 418 419 /* 420 * Gather positioned write system call 421 * 422 * MPSAFE 423 */ 424 int 425 sys_extpwritev(struct extpwritev_args *uap) 426 { 427 struct thread *td = curthread; 428 struct uio auio; 429 struct iovec aiov[UIO_SMALLIOV], *iov = NULL; 430 int error; 431 int flags; 432 433 error = iovec_copyin(uap->iovp, &iov, aiov, uap->iovcnt, 434 &auio.uio_resid); 435 if (error) 436 return (error); 437 auio.uio_iov = iov; 438 auio.uio_iovcnt = uap->iovcnt; 439 auio.uio_offset = uap->offset; 440 auio.uio_rw = UIO_WRITE; 441 auio.uio_segflg = UIO_USERSPACE; 442 auio.uio_td = td; 443 444 flags = uap->flags & O_FMASK; 445 if (uap->offset != (off_t)-1) 446 flags |= O_FOFFSET; 447 448 error = kern_pwritev(uap->fd, &auio, flags, &uap->sysmsg_szresult); 449 450 iovec_free(&iov, aiov); 451 return(error); 452 } 453 454 /* 455 * MPSAFE 456 */ 457 int 458 kern_pwritev(int fd, struct uio *auio, int flags, size_t *res) 459 { 460 struct thread *td = curthread; 461 struct proc *p = td->td_proc; 462 struct file *fp; 463 int error; 464 465 KKASSERT(p); 466 467 fp = holdfp(p->p_fd, fd, FWRITE); 468 if (fp == NULL) 469 return (EBADF); 470 else if ((flags & O_FOFFSET) && fp->f_type != DTYPE_VNODE) { 471 error = ESPIPE; 472 } else { 473 error = dofilewrite(fd, fp, auio, flags, res); 474 } 475 476 fdrop(fp); 477 return (error); 478 } 479 480 /* 481 * Common code for writev and pwritev that writes data to 482 * a file using the passed in uio, offset, and flags. 483 * 484 * MPALMOSTSAFE - ktrace needs help 485 */ 486 static int 487 dofilewrite(int fd, struct file *fp, struct uio *auio, int flags, size_t *res) 488 { 489 struct thread *td = curthread; 490 struct lwp *lp = td->td_lwp; 491 int error; 492 size_t len; 493 #ifdef KTRACE 494 struct iovec *ktriov = NULL; 495 struct uio ktruio; 496 #endif 497 498 #ifdef KTRACE 499 /* 500 * if tracing, save a copy of iovec and uio 501 */ 502 if (KTRPOINT(td, KTR_GENIO)) { 503 int iovlen = auio->uio_iovcnt * sizeof(struct iovec); 504 505 ktriov = kmalloc(iovlen, M_TEMP, M_WAITOK); 506 bcopy((caddr_t)auio->uio_iov, (caddr_t)ktriov, iovlen); 507 ktruio = *auio; 508 } 509 #endif 510 len = auio->uio_resid; 511 error = fo_write(fp, auio, fp->f_cred, flags); 512 if (error) { 513 if (auio->uio_resid != len && (error == ERESTART || 514 error == EINTR || error == EWOULDBLOCK)) 515 error = 0; 516 /* Socket layer is responsible for issuing SIGPIPE. */ 517 if (error == EPIPE && fp->f_type != DTYPE_SOCKET) 518 lwpsignal(lp->lwp_proc, lp, SIGPIPE); 519 } 520 #ifdef KTRACE 521 if (ktriov != NULL) { 522 if (error == 0) { 523 ktruio.uio_iov = ktriov; 524 ktruio.uio_resid = len - auio->uio_resid; 525 get_mplock(); 526 ktrgenio(lp, fd, UIO_WRITE, &ktruio, error); 527 rel_mplock(); 528 } 529 kfree(ktriov, M_TEMP); 530 } 531 #endif 532 if (error == 0) 533 *res = len - auio->uio_resid; 534 535 return(error); 536 } 537 538 /* 539 * Ioctl system call 540 * 541 * MPSAFE 542 */ 543 int 544 sys_ioctl(struct ioctl_args *uap) 545 { 546 int error; 547 548 error = mapped_ioctl(uap->fd, uap->com, uap->data, NULL, &uap->sysmsg); 549 return (error); 550 } 551 552 struct ioctl_map_entry { 553 const char *subsys; 554 struct ioctl_map_range *cmd_ranges; 555 LIST_ENTRY(ioctl_map_entry) entries; 556 }; 557 558 /* 559 * The true heart of all ioctl syscall handlers (native, emulation). 560 * If map != NULL, it will be searched for a matching entry for com, 561 * and appropriate conversions/conversion functions will be utilized. 562 * 563 * MPSAFE 564 */ 565 int 566 mapped_ioctl(int fd, u_long com, caddr_t uspc_data, struct ioctl_map *map, 567 struct sysmsg *msg) 568 { 569 struct thread *td = curthread; 570 struct proc *p = td->td_proc; 571 struct ucred *cred; 572 struct file *fp; 573 struct ioctl_map_range *iomc = NULL; 574 int error; 575 u_int size; 576 u_long ocom = com; 577 caddr_t data, memp; 578 int tmp; 579 #define STK_PARAMS 128 580 union { 581 char stkbuf[STK_PARAMS]; 582 long align; 583 } ubuf; 584 585 KKASSERT(p); 586 cred = td->td_ucred; 587 memp = NULL; 588 589 fp = holdfp(p->p_fd, fd, FREAD|FWRITE); 590 if (fp == NULL) 591 return(EBADF); 592 593 if (map != NULL) { /* obey translation map */ 594 u_long maskcmd; 595 struct ioctl_map_entry *e; 596 597 maskcmd = com & map->mask; 598 599 lwkt_gettoken(&mioctl_token); 600 LIST_FOREACH(e, &map->mapping, entries) { 601 for (iomc = e->cmd_ranges; iomc->start != 0 || 602 iomc->maptocmd != 0 || iomc->wrapfunc != NULL || 603 iomc->mapfunc != NULL; 604 iomc++) { 605 if (maskcmd >= iomc->start && 606 maskcmd <= iomc->end) 607 break; 608 } 609 610 /* Did we find a match? */ 611 if (iomc->start != 0 || iomc->maptocmd != 0 || 612 iomc->wrapfunc != NULL || iomc->mapfunc != NULL) 613 break; 614 } 615 lwkt_reltoken(&mioctl_token); 616 617 if (iomc == NULL || 618 (iomc->start == 0 && iomc->maptocmd == 0 619 && iomc->wrapfunc == NULL && iomc->mapfunc == NULL)) { 620 kprintf("%s: 'ioctl' fd=%d, cmd=0x%lx ('%c',%d) not implemented\n", 621 map->sys, fd, maskcmd, 622 (int)((maskcmd >> 8) & 0xff), 623 (int)(maskcmd & 0xff)); 624 error = EINVAL; 625 goto done; 626 } 627 628 /* 629 * If it's a non-range one to one mapping, maptocmd should be 630 * correct. If it's a ranged one to one mapping, we pass the 631 * original value of com, and for a range mapped to a different 632 * range, we always need a mapping function to translate the 633 * ioctl to our native ioctl. Ex. 6500-65ff <-> 9500-95ff 634 */ 635 if (iomc->start == iomc->end && iomc->maptocmd == iomc->maptoend) { 636 com = iomc->maptocmd; 637 } else if (iomc->start == iomc->maptocmd && iomc->end == iomc->maptoend) { 638 if (iomc->mapfunc != NULL) 639 com = iomc->mapfunc(iomc->start, iomc->end, 640 iomc->start, iomc->end, 641 com, com); 642 } else { 643 if (iomc->mapfunc != NULL) { 644 com = iomc->mapfunc(iomc->start, iomc->end, 645 iomc->maptocmd, iomc->maptoend, 646 com, ocom); 647 } else { 648 kprintf("%s: Invalid mapping for fd=%d, cmd=%#lx ('%c',%d)\n", 649 map->sys, fd, maskcmd, 650 (int)((maskcmd >> 8) & 0xff), 651 (int)(maskcmd & 0xff)); 652 error = EINVAL; 653 goto done; 654 } 655 } 656 } 657 658 switch (com) { 659 case FIONCLEX: 660 error = fclrfdflags(p->p_fd, fd, UF_EXCLOSE); 661 goto done; 662 case FIOCLEX: 663 error = fsetfdflags(p->p_fd, fd, UF_EXCLOSE); 664 goto done; 665 } 666 667 /* 668 * Interpret high order word to find amount of data to be 669 * copied to/from the user's address space. 670 */ 671 size = IOCPARM_LEN(com); 672 if (size > IOCPARM_MAX) { 673 error = ENOTTY; 674 goto done; 675 } 676 677 if ((com & IOC_VOID) == 0 && size > sizeof(ubuf.stkbuf)) { 678 memp = kmalloc(size, M_IOCTLOPS, M_WAITOK); 679 data = memp; 680 } else { 681 memp = NULL; 682 data = ubuf.stkbuf; 683 } 684 if (com & IOC_VOID) { 685 *(caddr_t *)data = uspc_data; 686 } else if (com & IOC_IN) { 687 if (size != 0) { 688 error = copyin(uspc_data, data, (size_t)size); 689 if (error) 690 goto done; 691 } else { 692 *(caddr_t *)data = uspc_data; 693 } 694 } else if ((com & IOC_OUT) != 0 && size) { 695 /* 696 * Zero the buffer so the user always 697 * gets back something deterministic. 698 */ 699 bzero(data, (size_t)size); 700 } 701 702 switch (com) { 703 case FIONBIO: 704 if ((tmp = *(int *)data)) 705 atomic_set_int(&fp->f_flag, FNONBLOCK); 706 else 707 atomic_clear_int(&fp->f_flag, FNONBLOCK); 708 error = 0; 709 break; 710 711 case FIOASYNC: 712 if ((tmp = *(int *)data)) 713 atomic_set_int(&fp->f_flag, FASYNC); 714 else 715 atomic_clear_int(&fp->f_flag, FASYNC); 716 error = fo_ioctl(fp, FIOASYNC, (caddr_t)&tmp, cred, msg); 717 break; 718 719 default: 720 /* 721 * If there is a override function, 722 * call it instead of directly routing the call 723 */ 724 if (map != NULL && iomc->wrapfunc != NULL) 725 error = iomc->wrapfunc(fp, com, ocom, data, cred); 726 else 727 error = fo_ioctl(fp, com, data, cred, msg); 728 /* 729 * Copy any data to user, size was 730 * already set and checked above. 731 */ 732 if (error == 0 && (com & IOC_OUT) != 0 && size != 0) 733 error = copyout(data, uspc_data, (size_t)size); 734 break; 735 } 736 done: 737 if (memp != NULL) 738 kfree(memp, M_IOCTLOPS); 739 fdrop(fp); 740 return(error); 741 } 742 743 /* 744 * MPSAFE 745 */ 746 int 747 mapped_ioctl_register_handler(struct ioctl_map_handler *he) 748 { 749 struct ioctl_map_entry *ne; 750 751 KKASSERT(he != NULL && he->map != NULL && he->cmd_ranges != NULL && 752 he->subsys != NULL && *he->subsys != '\0'); 753 754 ne = kmalloc(sizeof(struct ioctl_map_entry), M_IOCTLMAP, 755 M_WAITOK | M_ZERO); 756 757 ne->subsys = he->subsys; 758 ne->cmd_ranges = he->cmd_ranges; 759 760 lwkt_gettoken(&mioctl_token); 761 LIST_INSERT_HEAD(&he->map->mapping, ne, entries); 762 lwkt_reltoken(&mioctl_token); 763 764 return(0); 765 } 766 767 /* 768 * MPSAFE 769 */ 770 int 771 mapped_ioctl_unregister_handler(struct ioctl_map_handler *he) 772 { 773 struct ioctl_map_entry *ne; 774 int error = EINVAL; 775 776 KKASSERT(he != NULL && he->map != NULL && he->cmd_ranges != NULL); 777 778 lwkt_gettoken(&mioctl_token); 779 LIST_FOREACH(ne, &he->map->mapping, entries) { 780 if (ne->cmd_ranges == he->cmd_ranges) { 781 LIST_REMOVE(ne, entries); 782 kfree(ne, M_IOCTLMAP); 783 error = 0; 784 break; 785 } 786 } 787 lwkt_reltoken(&mioctl_token); 788 return(error); 789 } 790 791 static int nselcoll; /* Select collisions since boot */ 792 int selwait; 793 SYSCTL_INT(_kern, OID_AUTO, nselcoll, CTLFLAG_RD, &nselcoll, 0, ""); 794 static int nseldebug; 795 SYSCTL_INT(_kern, OID_AUTO, nseldebug, CTLFLAG_RW, &nseldebug, 0, ""); 796 797 /* 798 * Select system call. 799 * 800 * MPSAFE 801 */ 802 int 803 sys_select(struct select_args *uap) 804 { 805 struct timeval ktv; 806 struct timespec *ktsp, kts; 807 int error; 808 809 /* 810 * Get timeout if any. 811 */ 812 if (uap->tv != NULL) { 813 error = copyin(uap->tv, &ktv, sizeof (ktv)); 814 if (error) 815 return (error); 816 TIMEVAL_TO_TIMESPEC(&ktv, &kts); 817 ktsp = &kts; 818 } else { 819 ktsp = NULL; 820 } 821 822 /* 823 * Do real work. 824 */ 825 error = doselect(uap->nd, uap->in, uap->ou, uap->ex, ktsp, 826 &uap->sysmsg_result); 827 828 return (error); 829 } 830 831 832 /* 833 * Pselect system call. 834 */ 835 int 836 sys_pselect(struct pselect_args *uap) 837 { 838 struct thread *td = curthread; 839 struct lwp *lp = td->td_lwp; 840 struct timespec *ktsp, kts; 841 sigset_t sigmask; 842 int error; 843 844 /* 845 * Get timeout if any. 846 */ 847 if (uap->ts != NULL) { 848 error = copyin(uap->ts, &kts, sizeof (kts)); 849 if (error) 850 return (error); 851 ktsp = &kts; 852 } else { 853 ktsp = NULL; 854 } 855 856 /* 857 * Install temporary signal mask if any provided. 858 */ 859 if (uap->sigmask != NULL) { 860 error = copyin(uap->sigmask, &sigmask, sizeof(sigmask)); 861 if (error) 862 return (error); 863 lwkt_gettoken(&lp->lwp_proc->p_token); 864 lp->lwp_oldsigmask = lp->lwp_sigmask; 865 SIG_CANTMASK(sigmask); 866 lp->lwp_sigmask = sigmask; 867 lwkt_reltoken(&lp->lwp_proc->p_token); 868 } 869 870 /* 871 * Do real job. 872 */ 873 error = doselect(uap->nd, uap->in, uap->ou, uap->ex, ktsp, 874 &uap->sysmsg_result); 875 876 if (uap->sigmask != NULL) { 877 lwkt_gettoken(&lp->lwp_proc->p_token); 878 /* doselect() responsible for turning ERESTART into EINTR */ 879 KKASSERT(error != ERESTART); 880 if (error == EINTR) { 881 /* 882 * We can't restore the previous signal mask now 883 * because it could block the signal that interrupted 884 * us. So make a note to restore it after executing 885 * the handler. 886 */ 887 lp->lwp_flags |= LWP_OLDMASK; 888 } else { 889 /* 890 * No handler to run. Restore previous mask immediately. 891 */ 892 lp->lwp_sigmask = lp->lwp_oldsigmask; 893 } 894 lwkt_reltoken(&lp->lwp_proc->p_token); 895 } 896 897 return (error); 898 } 899 900 static int 901 select_copyin(void *arg, struct kevent *kevp, int maxevents, int *events) 902 { 903 struct select_kevent_copyin_args *skap = NULL; 904 struct kevent *kev; 905 int fd; 906 kfd_set *fdp = NULL; 907 short filter = 0; 908 u_int fflags = 0; 909 910 skap = (struct select_kevent_copyin_args *)arg; 911 912 if (*events == maxevents) 913 return (0); 914 915 while (skap->active_set < COPYIN_DONE) { 916 switch (skap->active_set) { 917 case COPYIN_READ: 918 /* 919 * Register descriptors for the read filter 920 */ 921 fdp = skap->read_set; 922 filter = EVFILT_READ; 923 fflags = NOTE_OLDAPI; 924 if (fdp) 925 break; 926 ++skap->active_set; 927 skap->proc_fds = 0; 928 /* fall through */ 929 case COPYIN_WRITE: 930 /* 931 * Register descriptors for the write filter 932 */ 933 fdp = skap->write_set; 934 filter = EVFILT_WRITE; 935 fflags = NOTE_OLDAPI; 936 if (fdp) 937 break; 938 ++skap->active_set; 939 skap->proc_fds = 0; 940 /* fall through */ 941 case COPYIN_EXCEPT: 942 /* 943 * Register descriptors for the exception filter 944 */ 945 fdp = skap->except_set; 946 filter = EVFILT_EXCEPT; 947 fflags = NOTE_OLDAPI | NOTE_OOB; 948 if (fdp) 949 break; 950 ++skap->active_set; 951 skap->proc_fds = 0; 952 /* fall through */ 953 case COPYIN_DONE: 954 /* 955 * Nothing left to register 956 */ 957 return(0); 958 /* NOT REACHED */ 959 } 960 961 while (skap->proc_fds < skap->num_fds) { 962 fd = skap->proc_fds; 963 if (FD_ISSET(fd, fdp)) { 964 kev = &kevp[*events]; 965 EV_SET(kev, fd, filter, 966 EV_ADD|EV_ENABLE, 967 fflags, 0, 968 (void *)(uintptr_t) 969 skap->lwp->lwp_kqueue_serial); 970 FD_CLR(fd, fdp); 971 ++*events; 972 973 if (nseldebug) 974 kprintf("select fd %d filter %d serial %d\n", 975 fd, filter, skap->lwp->lwp_kqueue_serial); 976 } 977 ++skap->proc_fds; 978 if (*events == maxevents) 979 return (0); 980 } 981 skap->active_set++; 982 skap->proc_fds = 0; 983 } 984 985 return (0); 986 } 987 988 static int 989 select_copyout(void *arg, struct kevent *kevp, int count, int *res) 990 { 991 struct select_kevent_copyin_args *skap; 992 struct kevent kev; 993 int i = 0; 994 995 skap = (struct select_kevent_copyin_args *)arg; 996 997 for (i = 0; i < count; ++i) { 998 /* 999 * Filter out and delete spurious events 1000 */ 1001 if ((u_int)(uintptr_t)kevp[i].udata != 1002 skap->lwp->lwp_kqueue_serial) { 1003 kev = kevp[i]; 1004 kev.flags = EV_DISABLE|EV_DELETE; 1005 kqueue_register(&skap->lwp->lwp_kqueue, &kev); 1006 if (nseldebug) 1007 kprintf("select fd %ju mismatched serial %d\n", 1008 (uintmax_t)kevp[i].ident, 1009 skap->lwp->lwp_kqueue_serial); 1010 continue; 1011 } 1012 1013 /* 1014 * Handle errors 1015 */ 1016 if (kevp[i].flags & EV_ERROR) { 1017 int error = kevp[i].data; 1018 1019 switch (error) { 1020 case EBADF: 1021 /* 1022 * A bad file descriptor is considered a 1023 * fatal error for select, bail out. 1024 */ 1025 skap->error = error; 1026 *res = -1; 1027 return error; 1028 1029 default: 1030 /* 1031 * Select silently swallows any unknown errors 1032 * for descriptors in the read or write sets. 1033 * 1034 * ALWAYS filter out EOPNOTSUPP errors from 1035 * filters (at least until all filters support 1036 * EVFILT_EXCEPT) 1037 * 1038 * We also filter out ENODEV since dev_dkqfilter 1039 * returns ENODEV if EOPNOTSUPP is returned in an 1040 * inner call. 1041 * 1042 * XXX: fix this 1043 */ 1044 if (kevp[i].filter != EVFILT_READ && 1045 kevp[i].filter != EVFILT_WRITE && 1046 error != EOPNOTSUPP && 1047 error != ENODEV) { 1048 skap->error = error; 1049 *res = -1; 1050 return error; 1051 } 1052 break; 1053 } 1054 if (nseldebug) 1055 kprintf("select fd %ju filter %d error %d\n", 1056 (uintmax_t)kevp[i].ident, 1057 kevp[i].filter, error); 1058 continue; 1059 } 1060 1061 switch (kevp[i].filter) { 1062 case EVFILT_READ: 1063 FD_SET(kevp[i].ident, skap->read_set); 1064 break; 1065 case EVFILT_WRITE: 1066 FD_SET(kevp[i].ident, skap->write_set); 1067 break; 1068 case EVFILT_EXCEPT: 1069 FD_SET(kevp[i].ident, skap->except_set); 1070 break; 1071 } 1072 1073 ++*res; 1074 } 1075 1076 return (0); 1077 } 1078 1079 /* 1080 * Copy select bits in from userland. Allocate kernel memory if the 1081 * set is large. 1082 */ 1083 static int 1084 getbits(int bytes, fd_set *in_set, kfd_set **out_set, kfd_set *tmp_set) 1085 { 1086 int error; 1087 1088 if (in_set) { 1089 if (bytes < sizeof(*tmp_set)) 1090 *out_set = tmp_set; 1091 else 1092 *out_set = kmalloc(bytes, M_SELECT, M_WAITOK); 1093 error = copyin(in_set, *out_set, bytes); 1094 } else { 1095 *out_set = NULL; 1096 error = 0; 1097 } 1098 return (error); 1099 } 1100 1101 /* 1102 * Copy returned select bits back out to userland. 1103 */ 1104 static int 1105 putbits(int bytes, kfd_set *in_set, fd_set *out_set) 1106 { 1107 int error; 1108 1109 if (in_set) { 1110 error = copyout(in_set, out_set, bytes); 1111 } else { 1112 error = 0; 1113 } 1114 return (error); 1115 } 1116 1117 static int 1118 dotimeout_only(struct timespec *ts) 1119 { 1120 return(nanosleep1(ts, NULL)); 1121 } 1122 1123 /* 1124 * Common code for sys_select() and sys_pselect(). 1125 * 1126 * in, out and ex are userland pointers. ts must point to validated 1127 * kernel-side timeout value or NULL for infinite timeout. res must 1128 * point to syscall return value. 1129 */ 1130 static int 1131 doselect(int nd, fd_set *read, fd_set *write, fd_set *except, 1132 struct timespec *ts, int *res) 1133 { 1134 struct proc *p = curproc; 1135 struct select_kevent_copyin_args *kap, ka; 1136 int bytes, error; 1137 kfd_set read_tmp; 1138 kfd_set write_tmp; 1139 kfd_set except_tmp; 1140 1141 *res = 0; 1142 if (nd < 0) 1143 return (EINVAL); 1144 if (nd == 0 && ts) 1145 return (dotimeout_only(ts)); 1146 1147 if (nd > p->p_fd->fd_nfiles) /* limit kmalloc */ 1148 nd = p->p_fd->fd_nfiles; 1149 1150 kap = &ka; 1151 kap->lwp = curthread->td_lwp; 1152 kap->num_fds = nd; 1153 kap->proc_fds = 0; 1154 kap->error = 0; 1155 kap->active_set = COPYIN_READ; 1156 1157 /* 1158 * Calculate bytes based on the number of __fd_mask[] array entries 1159 * multiplied by the size of __fd_mask. 1160 */ 1161 bytes = howmany(nd, __NFDBITS) * sizeof(__fd_mask); 1162 1163 /* kap->read_set = NULL; not needed */ 1164 kap->write_set = NULL; 1165 kap->except_set = NULL; 1166 1167 error = getbits(bytes, read, &kap->read_set, &read_tmp); 1168 if (error == 0) 1169 error = getbits(bytes, write, &kap->write_set, &write_tmp); 1170 if (error == 0) 1171 error = getbits(bytes, except, &kap->except_set, &except_tmp); 1172 if (error) 1173 goto done; 1174 1175 /* 1176 * NOTE: Make sure the max events passed to kern_kevent() is 1177 * effectively unlimited. (nd * 3) accomplishes this. 1178 * 1179 * (*res) continues to increment as returned events are 1180 * loaded in. 1181 */ 1182 error = kern_kevent(&kap->lwp->lwp_kqueue, 0x7FFFFFFF, res, kap, 1183 select_copyin, select_copyout, ts); 1184 if (error == 0) 1185 error = putbits(bytes, kap->read_set, read); 1186 if (error == 0) 1187 error = putbits(bytes, kap->write_set, write); 1188 if (error == 0) 1189 error = putbits(bytes, kap->except_set, except); 1190 1191 /* 1192 * An error from an individual event that should be passed 1193 * back to userland (EBADF) 1194 */ 1195 if (kap->error) 1196 error = kap->error; 1197 1198 /* 1199 * Clean up. 1200 */ 1201 done: 1202 if (kap->read_set && kap->read_set != &read_tmp) 1203 kfree(kap->read_set, M_SELECT); 1204 if (kap->write_set && kap->write_set != &write_tmp) 1205 kfree(kap->write_set, M_SELECT); 1206 if (kap->except_set && kap->except_set != &except_tmp) 1207 kfree(kap->except_set, M_SELECT); 1208 1209 kap->lwp->lwp_kqueue_serial += kap->num_fds; 1210 1211 return (error); 1212 } 1213 1214 /* 1215 * Poll system call. 1216 * 1217 * MPSAFE 1218 */ 1219 int 1220 sys_poll(struct poll_args *uap) 1221 { 1222 struct timespec ts, *tsp; 1223 int error; 1224 1225 if (uap->timeout != INFTIM) { 1226 if (uap->timeout < 0) 1227 return (EINVAL); 1228 ts.tv_sec = uap->timeout / 1000; 1229 ts.tv_nsec = (uap->timeout % 1000) * 1000 * 1000; 1230 tsp = &ts; 1231 } else { 1232 tsp = NULL; 1233 } 1234 1235 error = dopoll(uap->nfds, uap->fds, tsp, &uap->sysmsg_result); 1236 1237 return (error); 1238 } 1239 1240 static int 1241 poll_copyin(void *arg, struct kevent *kevp, int maxevents, int *events) 1242 { 1243 struct poll_kevent_copyin_args *pkap; 1244 struct pollfd *pfd; 1245 struct kevent *kev; 1246 int kev_count; 1247 1248 pkap = (struct poll_kevent_copyin_args *)arg; 1249 1250 while (pkap->pfds < pkap->nfds) { 1251 pfd = &pkap->fds[pkap->pfds]; 1252 1253 /* Clear return events */ 1254 pfd->revents = 0; 1255 1256 /* Do not check if fd is equal to -1 */ 1257 if (pfd->fd == -1) { 1258 ++pkap->pfds; 1259 continue; 1260 } 1261 1262 kev_count = 0; 1263 if (pfd->events & (POLLIN | POLLRDNORM)) 1264 kev_count++; 1265 if (pfd->events & (POLLOUT | POLLWRNORM)) 1266 kev_count++; 1267 if (pfd->events & (POLLPRI | POLLRDBAND)) 1268 kev_count++; 1269 1270 if (*events + kev_count > maxevents) 1271 return (0); 1272 1273 /* 1274 * NOTE: A combined serial number and poll array index is 1275 * stored in kev->udata. 1276 */ 1277 kev = &kevp[*events]; 1278 if (pfd->events & (POLLIN | POLLRDNORM)) { 1279 EV_SET(kev++, pfd->fd, EVFILT_READ, EV_ADD|EV_ENABLE, 1280 NOTE_OLDAPI, 0, (void *)(uintptr_t) 1281 (pkap->lwp->lwp_kqueue_serial + pkap->pfds)); 1282 } 1283 if (pfd->events & (POLLOUT | POLLWRNORM)) { 1284 EV_SET(kev++, pfd->fd, EVFILT_WRITE, EV_ADD|EV_ENABLE, 1285 NOTE_OLDAPI, 0, (void *)(uintptr_t) 1286 (pkap->lwp->lwp_kqueue_serial + pkap->pfds)); 1287 } 1288 if (pfd->events & (POLLPRI | POLLRDBAND)) { 1289 EV_SET(kev++, pfd->fd, EVFILT_EXCEPT, EV_ADD|EV_ENABLE, 1290 NOTE_OLDAPI | NOTE_OOB, 0, 1291 (void *)(uintptr_t) 1292 (pkap->lwp->lwp_kqueue_serial + pkap->pfds)); 1293 } 1294 1295 if (nseldebug) { 1296 kprintf("poll index %d/%d fd %d events %08x serial %d\n", 1297 pkap->pfds, pkap->nfds-1, pfd->fd, pfd->events, 1298 pkap->lwp->lwp_kqueue_serial); 1299 } 1300 1301 ++pkap->pfds; 1302 (*events) += kev_count; 1303 } 1304 1305 return (0); 1306 } 1307 1308 static int 1309 poll_copyout(void *arg, struct kevent *kevp, int count, int *res) 1310 { 1311 struct poll_kevent_copyin_args *pkap; 1312 struct pollfd *pfd; 1313 struct kevent kev; 1314 int count_res; 1315 int i; 1316 u_int pi; 1317 1318 pkap = (struct poll_kevent_copyin_args *)arg; 1319 1320 for (i = 0; i < count; ++i) { 1321 /* 1322 * Extract the poll array index and delete spurious events. 1323 * We can easily tell if the serial number is incorrect 1324 * by checking whether the extracted index is out of range. 1325 */ 1326 pi = (u_int)(uintptr_t)kevp[i].udata - 1327 (u_int)pkap->lwp->lwp_kqueue_serial; 1328 1329 if (pi >= pkap->nfds) { 1330 kev = kevp[i]; 1331 kev.flags = EV_DISABLE|EV_DELETE; 1332 kqueue_register(&pkap->lwp->lwp_kqueue, &kev); 1333 if (nseldebug) 1334 kprintf("poll index %d out of range against serial %d\n", 1335 pi, pkap->lwp->lwp_kqueue_serial); 1336 continue; 1337 } 1338 pfd = &pkap->fds[pi]; 1339 if (kevp[i].ident == pfd->fd) { 1340 /* 1341 * A single descriptor may generate an error against 1342 * more than one filter, make sure to set the 1343 * appropriate flags but do not increment (*res) 1344 * more than once. 1345 */ 1346 count_res = (pfd->revents == 0); 1347 if (kevp[i].flags & EV_ERROR) { 1348 switch(kevp[i].data) { 1349 case EBADF: 1350 case POLLNVAL: 1351 /* Bad file descriptor */ 1352 if (count_res) 1353 ++*res; 1354 pfd->revents |= POLLNVAL; 1355 break; 1356 default: 1357 /* 1358 * Poll silently swallows any unknown 1359 * errors except in the case of POLLPRI 1360 * (OOB/urgent data). 1361 * 1362 * ALWAYS filter out EOPNOTSUPP errors 1363 * from filters, common applications 1364 * set POLLPRI|POLLRDBAND and most 1365 * filters do not support EVFILT_EXCEPT. 1366 * 1367 * We also filter out ENODEV since dev_dkqfilter 1368 * returns ENODEV if EOPNOTSUPP is returned in an 1369 * inner call. 1370 * 1371 * XXX: fix this 1372 */ 1373 if (kevp[i].filter != EVFILT_READ && 1374 kevp[i].filter != EVFILT_WRITE && 1375 kevp[i].data != EOPNOTSUPP && 1376 kevp[i].data != ENODEV) { 1377 if (count_res == 0) 1378 ++*res; 1379 pfd->revents |= POLLERR; 1380 } 1381 break; 1382 } 1383 if (nseldebug) { 1384 kprintf("poll index %d fd %d " 1385 "filter %d error %jd\n", 1386 pi, pfd->fd, 1387 kevp[i].filter, 1388 (intmax_t)kevp[i].data); 1389 } 1390 continue; 1391 } 1392 1393 switch (kevp[i].filter) { 1394 case EVFILT_READ: 1395 #if 0 1396 /* 1397 * NODATA on the read side can indicate a 1398 * half-closed situation and not necessarily 1399 * a disconnect, so depend on the user 1400 * issuing a read() and getting 0 bytes back. 1401 */ 1402 if (kevp[i].flags & EV_NODATA) 1403 pfd->revents |= POLLHUP; 1404 #endif 1405 if ((kevp[i].flags & EV_EOF) && 1406 kevp[i].fflags != 0) 1407 pfd->revents |= POLLERR; 1408 if (pfd->events & POLLIN) 1409 pfd->revents |= POLLIN; 1410 if (pfd->events & POLLRDNORM) 1411 pfd->revents |= POLLRDNORM; 1412 break; 1413 case EVFILT_WRITE: 1414 /* 1415 * As per the OpenGroup POLLHUP is mutually 1416 * exclusive with the writability flags. I 1417 * consider this a bit broken but... 1418 * 1419 * In this case a disconnect is implied even 1420 * for a half-closed (write side) situation. 1421 */ 1422 if (kevp[i].flags & EV_EOF) { 1423 pfd->revents |= POLLHUP; 1424 if (kevp[i].fflags != 0) 1425 pfd->revents |= POLLERR; 1426 } else { 1427 if (pfd->events & POLLOUT) 1428 pfd->revents |= POLLOUT; 1429 if (pfd->events & POLLWRNORM) 1430 pfd->revents |= POLLWRNORM; 1431 } 1432 break; 1433 case EVFILT_EXCEPT: 1434 /* 1435 * EV_NODATA should never be tagged for this 1436 * filter. 1437 */ 1438 if (pfd->events & POLLPRI) 1439 pfd->revents |= POLLPRI; 1440 if (pfd->events & POLLRDBAND) 1441 pfd->revents |= POLLRDBAND; 1442 break; 1443 } 1444 1445 if (nseldebug) { 1446 kprintf("poll index %d/%d fd %d revents %08x\n", 1447 pi, pkap->nfds, pfd->fd, pfd->revents); 1448 } 1449 1450 if (count_res && pfd->revents) 1451 ++*res; 1452 } else { 1453 if (nseldebug) { 1454 kprintf("poll index %d mismatch %ju/%d\n", 1455 pi, (uintmax_t)kevp[i].ident, pfd->fd); 1456 } 1457 } 1458 } 1459 1460 return (0); 1461 } 1462 1463 static int 1464 dopoll(int nfds, struct pollfd *fds, struct timespec *ts, int *res) 1465 { 1466 struct poll_kevent_copyin_args ka; 1467 struct pollfd sfds[64]; 1468 int bytes; 1469 int error; 1470 1471 *res = 0; 1472 if (nfds < 0) 1473 return (EINVAL); 1474 1475 if (nfds == 0 && ts) 1476 return (dotimeout_only(ts)); 1477 1478 /* 1479 * This is a bit arbitrary but we need to limit internal kmallocs. 1480 */ 1481 if (nfds > maxfilesperproc * 2) 1482 nfds = maxfilesperproc * 2; 1483 bytes = sizeof(struct pollfd) * nfds; 1484 1485 ka.lwp = curthread->td_lwp; 1486 ka.nfds = nfds; 1487 ka.pfds = 0; 1488 ka.error = 0; 1489 1490 if (ka.nfds < 64) 1491 ka.fds = sfds; 1492 else 1493 ka.fds = kmalloc(bytes, M_SELECT, M_WAITOK); 1494 1495 error = copyin(fds, ka.fds, bytes); 1496 if (error == 0) 1497 error = kern_kevent(&ka.lwp->lwp_kqueue, 0x7FFFFFFF, res, &ka, 1498 poll_copyin, poll_copyout, ts); 1499 1500 if (error == 0) 1501 error = copyout(ka.fds, fds, bytes); 1502 1503 if (ka.fds != sfds) 1504 kfree(ka.fds, M_SELECT); 1505 1506 ka.lwp->lwp_kqueue_serial += nfds; 1507 1508 return (error); 1509 } 1510 1511 static int 1512 socket_wait_copyin(void *arg, struct kevent *kevp, int maxevents, int *events) 1513 { 1514 return (0); 1515 } 1516 1517 static int 1518 socket_wait_copyout(void *arg, struct kevent *kevp, int count, int *res) 1519 { 1520 ++*res; 1521 return (0); 1522 } 1523 1524 extern struct fileops socketops; 1525 1526 /* 1527 * NOTE: Callers of socket_wait() must already have a reference on the 1528 * socket. 1529 */ 1530 int 1531 socket_wait(struct socket *so, struct timespec *ts, int *res) 1532 { 1533 struct thread *td = curthread; 1534 struct file *fp; 1535 struct kqueue kq; 1536 struct kevent kev; 1537 int error, fd; 1538 1539 if ((error = falloc(td->td_lwp, &fp, &fd)) != 0) 1540 return (error); 1541 1542 fp->f_type = DTYPE_SOCKET; 1543 fp->f_flag = FREAD | FWRITE; 1544 fp->f_ops = &socketops; 1545 fp->f_data = so; 1546 fsetfd(td->td_lwp->lwp_proc->p_fd, fp, fd); 1547 1548 kqueue_init(&kq, td->td_lwp->lwp_proc->p_fd); 1549 EV_SET(&kev, fd, EVFILT_READ, EV_ADD|EV_ENABLE, 0, 0, NULL); 1550 if ((error = kqueue_register(&kq, &kev)) != 0) { 1551 fdrop(fp); 1552 return (error); 1553 } 1554 1555 error = kern_kevent(&kq, 1, res, NULL, socket_wait_copyin, 1556 socket_wait_copyout, ts); 1557 1558 EV_SET(&kev, fd, EVFILT_READ, EV_DELETE, 0, 0, NULL); 1559 kqueue_register(&kq, &kev); 1560 fp->f_ops = &badfileops; 1561 fdrop(fp); 1562 1563 return (error); 1564 } 1565 1566 /* 1567 * OpenBSD poll system call. 1568 * XXX this isn't quite a true representation.. OpenBSD uses select ops. 1569 * 1570 * MPSAFE 1571 */ 1572 int 1573 sys_openbsd_poll(struct openbsd_poll_args *uap) 1574 { 1575 return (sys_poll((struct poll_args *)uap)); 1576 } 1577 1578 /*ARGSUSED*/ 1579 int 1580 seltrue(cdev_t dev, int events) 1581 { 1582 return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)); 1583 } 1584