1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2007 Roman Divacky 5 * Copyright (c) 2014 Dmitry Chagin <dchagin@FreeBSD.org> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 #include <sys/param.h> 33 #include <sys/systm.h> 34 #include <sys/imgact.h> 35 #include <sys/kernel.h> 36 #include <sys/limits.h> 37 #include <sys/lock.h> 38 #include <sys/mutex.h> 39 #include <sys/callout.h> 40 #include <sys/capsicum.h> 41 #include <sys/types.h> 42 #include <sys/user.h> 43 #include <sys/file.h> 44 #include <sys/filedesc.h> 45 #include <sys/filio.h> 46 #include <sys/errno.h> 47 #include <sys/event.h> 48 #include <sys/poll.h> 49 #include <sys/proc.h> 50 #include <sys/selinfo.h> 51 #include <sys/specialfd.h> 52 #include <sys/sx.h> 53 #include <sys/syscallsubr.h> 54 #include <sys/timespec.h> 55 #include <sys/eventfd.h> 56 57 #ifdef COMPAT_LINUX32 58 #include <machine/../linux32/linux.h> 59 #include <machine/../linux32/linux32_proto.h> 60 #else 61 #include <machine/../linux/linux.h> 62 #include <machine/../linux/linux_proto.h> 63 #endif 64 65 #include <compat/linux/linux_emul.h> 66 #include <compat/linux/linux_event.h> 67 #include <compat/linux/linux_file.h> 68 #include <compat/linux/linux_signal.h> 69 #include <compat/linux/linux_timer.h> 70 #include <compat/linux/linux_util.h> 71 72 typedef uint64_t epoll_udata_t; 73 74 struct epoll_event { 75 uint32_t events; 76 epoll_udata_t data; 77 } 78 #if defined(__amd64__) 79 __attribute__((packed)) 80 #endif 81 ; 82 83 #define LINUX_MAX_EVENTS (INT_MAX / sizeof(struct epoll_event)) 84 85 static int epoll_to_kevent(struct thread *td, int fd, 86 struct epoll_event *l_event, struct kevent *kevent, 87 int *nkevents); 88 static void kevent_to_epoll(struct kevent *kevent, struct epoll_event *l_event); 89 static int epoll_kev_copyout(void *arg, struct kevent *kevp, int count); 90 static int epoll_kev_copyin(void *arg, struct kevent *kevp, int count); 91 static int epoll_register_kevent(struct thread *td, struct file *epfp, 92 int fd, int filter, unsigned int flags); 93 static int epoll_fd_registered(struct thread *td, struct file *epfp, 94 int fd); 95 static int epoll_delete_all_events(struct thread *td, struct file *epfp, 96 int fd); 97 98 struct epoll_copyin_args { 99 struct kevent *changelist; 100 }; 101 102 struct epoll_copyout_args { 103 struct epoll_event *leventlist; 104 struct proc *p; 105 uint32_t count; 106 int error; 107 }; 108 109 /* timerfd */ 110 typedef uint64_t timerfd_t; 111 112 static fo_rdwr_t timerfd_read; 113 static fo_ioctl_t timerfd_ioctl; 114 static fo_poll_t timerfd_poll; 115 static fo_kqfilter_t timerfd_kqfilter; 116 static fo_stat_t timerfd_stat; 117 static fo_close_t timerfd_close; 118 static fo_fill_kinfo_t timerfd_fill_kinfo; 119 120 static struct fileops timerfdops = { 121 .fo_read = timerfd_read, 122 .fo_write = invfo_rdwr, 123 .fo_truncate = invfo_truncate, 124 .fo_ioctl = timerfd_ioctl, 125 .fo_poll = timerfd_poll, 126 .fo_kqfilter = timerfd_kqfilter, 127 .fo_stat = timerfd_stat, 128 .fo_close = timerfd_close, 129 .fo_chmod = invfo_chmod, 130 .fo_chown = invfo_chown, 131 .fo_sendfile = invfo_sendfile, 132 .fo_fill_kinfo = timerfd_fill_kinfo, 133 .fo_flags = DFLAG_PASSABLE 134 }; 135 136 static void filt_timerfddetach(struct knote *kn); 137 static int filt_timerfdread(struct knote *kn, long hint); 138 139 static struct filterops timerfd_rfiltops = { 140 .f_isfd = 1, 141 .f_detach = filt_timerfddetach, 142 .f_event = filt_timerfdread 143 }; 144 145 struct timerfd { 146 clockid_t tfd_clockid; 147 struct itimerspec tfd_time; 148 struct callout tfd_callout; 149 timerfd_t tfd_count; 150 bool tfd_canceled; 151 struct selinfo tfd_sel; 152 struct mtx tfd_lock; 153 }; 154 155 static void linux_timerfd_expire(void *); 156 static void linux_timerfd_curval(struct timerfd *, struct itimerspec *); 157 158 static int 159 epoll_create_common(struct thread *td, int flags) 160 { 161 162 return (kern_kqueue(td, flags, NULL)); 163 } 164 165 #ifdef LINUX_LEGACY_SYSCALLS 166 int 167 linux_epoll_create(struct thread *td, struct linux_epoll_create_args *args) 168 { 169 170 /* 171 * args->size is unused. Linux just tests it 172 * and then forgets it as well. 173 */ 174 if (args->size <= 0) 175 return (EINVAL); 176 177 return (epoll_create_common(td, 0)); 178 } 179 #endif 180 181 int 182 linux_epoll_create1(struct thread *td, struct linux_epoll_create1_args *args) 183 { 184 int flags; 185 186 if ((args->flags & ~(LINUX_O_CLOEXEC)) != 0) 187 return (EINVAL); 188 189 flags = 0; 190 if ((args->flags & LINUX_O_CLOEXEC) != 0) 191 flags |= O_CLOEXEC; 192 193 return (epoll_create_common(td, flags)); 194 } 195 196 /* Structure converting function from epoll to kevent. */ 197 static int 198 epoll_to_kevent(struct thread *td, int fd, struct epoll_event *l_event, 199 struct kevent *kevent, int *nkevents) 200 { 201 uint32_t levents = l_event->events; 202 struct linux_pemuldata *pem; 203 struct proc *p; 204 unsigned short kev_flags = EV_ADD | EV_ENABLE; 205 206 /* flags related to how event is registered */ 207 if ((levents & LINUX_EPOLLONESHOT) != 0) 208 kev_flags |= EV_DISPATCH; 209 if ((levents & LINUX_EPOLLET) != 0) 210 kev_flags |= EV_CLEAR; 211 if ((levents & LINUX_EPOLLERR) != 0) 212 kev_flags |= EV_ERROR; 213 if ((levents & LINUX_EPOLLRDHUP) != 0) 214 kev_flags |= EV_EOF; 215 216 /* flags related to what event is registered */ 217 if ((levents & LINUX_EPOLL_EVRD) != 0) { 218 EV_SET(kevent, fd, EVFILT_READ, kev_flags, 0, 0, 0); 219 kevent->ext[0] = l_event->data; 220 ++kevent; 221 ++(*nkevents); 222 } 223 if ((levents & LINUX_EPOLL_EVWR) != 0) { 224 EV_SET(kevent, fd, EVFILT_WRITE, kev_flags, 0, 0, 0); 225 kevent->ext[0] = l_event->data; 226 ++kevent; 227 ++(*nkevents); 228 } 229 /* zero event mask is legal */ 230 if ((levents & (LINUX_EPOLL_EVRD | LINUX_EPOLL_EVWR)) == 0) { 231 EV_SET(kevent++, fd, EVFILT_READ, EV_ADD|EV_DISABLE, 0, 0, 0); 232 ++(*nkevents); 233 } 234 235 if ((levents & ~(LINUX_EPOLL_EVSUP)) != 0) { 236 p = td->td_proc; 237 238 pem = pem_find(p); 239 KASSERT(pem != NULL, ("epoll proc emuldata not found.\n")); 240 241 LINUX_PEM_XLOCK(pem); 242 if ((pem->flags & LINUX_XUNSUP_EPOLL) == 0) { 243 pem->flags |= LINUX_XUNSUP_EPOLL; 244 LINUX_PEM_XUNLOCK(pem); 245 linux_msg(td, "epoll_ctl unsupported flags: 0x%x", 246 levents); 247 } else 248 LINUX_PEM_XUNLOCK(pem); 249 return (EINVAL); 250 } 251 252 return (0); 253 } 254 255 /* 256 * Structure converting function from kevent to epoll. In a case 257 * this is called on error in registration we store the error in 258 * event->data and pick it up later in linux_epoll_ctl(). 259 */ 260 static void 261 kevent_to_epoll(struct kevent *kevent, struct epoll_event *l_event) 262 { 263 264 l_event->data = kevent->ext[0]; 265 266 if ((kevent->flags & EV_ERROR) != 0) { 267 l_event->events = LINUX_EPOLLERR; 268 return; 269 } 270 271 /* XXX EPOLLPRI, EPOLLHUP */ 272 switch (kevent->filter) { 273 case EVFILT_READ: 274 l_event->events = LINUX_EPOLLIN; 275 if ((kevent->flags & EV_EOF) != 0) 276 l_event->events |= LINUX_EPOLLRDHUP; 277 break; 278 case EVFILT_WRITE: 279 l_event->events = LINUX_EPOLLOUT; 280 break; 281 } 282 } 283 284 /* 285 * Copyout callback used by kevent. This converts kevent 286 * events to epoll events and copies them back to the 287 * userspace. This is also called on error on registering 288 * of the filter. 289 */ 290 static int 291 epoll_kev_copyout(void *arg, struct kevent *kevp, int count) 292 { 293 struct epoll_copyout_args *args; 294 struct epoll_event *eep; 295 int error, i; 296 297 args = (struct epoll_copyout_args*) arg; 298 eep = malloc(sizeof(*eep) * count, M_EPOLL, M_WAITOK | M_ZERO); 299 300 for (i = 0; i < count; i++) 301 kevent_to_epoll(&kevp[i], &eep[i]); 302 303 error = copyout(eep, args->leventlist, count * sizeof(*eep)); 304 if (error == 0) { 305 args->leventlist += count; 306 args->count += count; 307 } else if (args->error == 0) 308 args->error = error; 309 310 free(eep, M_EPOLL); 311 return (error); 312 } 313 314 /* 315 * Copyin callback used by kevent. This copies already 316 * converted filters from kernel memory to the kevent 317 * internal kernel memory. Hence the memcpy instead of 318 * copyin. 319 */ 320 static int 321 epoll_kev_copyin(void *arg, struct kevent *kevp, int count) 322 { 323 struct epoll_copyin_args *args; 324 325 args = (struct epoll_copyin_args*) arg; 326 327 memcpy(kevp, args->changelist, count * sizeof(*kevp)); 328 args->changelist += count; 329 330 return (0); 331 } 332 333 /* 334 * Load epoll filter, convert it to kevent filter 335 * and load it into kevent subsystem. 336 */ 337 int 338 linux_epoll_ctl(struct thread *td, struct linux_epoll_ctl_args *args) 339 { 340 struct file *epfp, *fp; 341 struct epoll_copyin_args ciargs; 342 struct kevent kev[2]; 343 struct kevent_copyops k_ops = { &ciargs, 344 NULL, 345 epoll_kev_copyin}; 346 struct epoll_event le; 347 cap_rights_t rights; 348 int nchanges = 0; 349 int error; 350 351 if (args->op != LINUX_EPOLL_CTL_DEL) { 352 error = copyin(args->event, &le, sizeof(le)); 353 if (error != 0) 354 return (error); 355 } 356 357 error = fget(td, args->epfd, 358 cap_rights_init_one(&rights, CAP_KQUEUE_CHANGE), &epfp); 359 if (error != 0) 360 return (error); 361 if (epfp->f_type != DTYPE_KQUEUE) { 362 error = EINVAL; 363 goto leave1; 364 } 365 366 /* Protect user data vector from incorrectly supplied fd. */ 367 error = fget(td, args->fd, 368 cap_rights_init_one(&rights, CAP_POLL_EVENT), &fp); 369 if (error != 0) 370 goto leave1; 371 372 /* Linux disallows spying on himself */ 373 if (epfp == fp) { 374 error = EINVAL; 375 goto leave0; 376 } 377 378 ciargs.changelist = kev; 379 380 if (args->op != LINUX_EPOLL_CTL_DEL) { 381 error = epoll_to_kevent(td, args->fd, &le, kev, &nchanges); 382 if (error != 0) 383 goto leave0; 384 } 385 386 switch (args->op) { 387 case LINUX_EPOLL_CTL_MOD: 388 error = epoll_delete_all_events(td, epfp, args->fd); 389 if (error != 0) 390 goto leave0; 391 break; 392 393 case LINUX_EPOLL_CTL_ADD: 394 if (epoll_fd_registered(td, epfp, args->fd)) { 395 error = EEXIST; 396 goto leave0; 397 } 398 break; 399 400 case LINUX_EPOLL_CTL_DEL: 401 /* CTL_DEL means unregister this fd with this epoll */ 402 error = epoll_delete_all_events(td, epfp, args->fd); 403 goto leave0; 404 405 default: 406 error = EINVAL; 407 goto leave0; 408 } 409 410 error = kern_kevent_fp(td, epfp, nchanges, 0, &k_ops, NULL); 411 412 leave0: 413 fdrop(fp, td); 414 415 leave1: 416 fdrop(epfp, td); 417 return (error); 418 } 419 420 /* 421 * Wait for a filter to be triggered on the epoll file descriptor. 422 */ 423 424 static int 425 linux_epoll_wait_ts(struct thread *td, int epfd, struct epoll_event *events, 426 int maxevents, struct timespec *tsp, sigset_t *uset) 427 { 428 struct epoll_copyout_args coargs; 429 struct kevent_copyops k_ops = { &coargs, 430 epoll_kev_copyout, 431 NULL}; 432 cap_rights_t rights; 433 struct file *epfp; 434 sigset_t omask; 435 int error; 436 437 if (maxevents <= 0 || maxevents > LINUX_MAX_EVENTS) 438 return (EINVAL); 439 440 error = fget(td, epfd, 441 cap_rights_init_one(&rights, CAP_KQUEUE_EVENT), &epfp); 442 if (error != 0) 443 return (error); 444 if (epfp->f_type != DTYPE_KQUEUE) { 445 error = EINVAL; 446 goto leave; 447 } 448 if (uset != NULL) { 449 error = kern_sigprocmask(td, SIG_SETMASK, uset, 450 &omask, 0); 451 if (error != 0) 452 goto leave; 453 td->td_pflags |= TDP_OLDMASK; 454 /* 455 * Make sure that ast() is called on return to 456 * usermode and TDP_OLDMASK is cleared, restoring old 457 * sigmask. 458 */ 459 ast_sched(td, TDA_SIGSUSPEND); 460 } 461 462 coargs.leventlist = events; 463 coargs.p = td->td_proc; 464 coargs.count = 0; 465 coargs.error = 0; 466 467 error = kern_kevent_fp(td, epfp, 0, maxevents, &k_ops, tsp); 468 if (error == 0 && coargs.error != 0) 469 error = coargs.error; 470 471 /* 472 * kern_kevent might return ENOMEM which is not expected from epoll_wait. 473 * Maybe we should translate that but I don't think it matters at all. 474 */ 475 if (error == 0) 476 td->td_retval[0] = coargs.count; 477 478 if (uset != NULL) 479 error = kern_sigprocmask(td, SIG_SETMASK, &omask, 480 NULL, 0); 481 leave: 482 fdrop(epfp, td); 483 return (error); 484 } 485 486 static int 487 linux_epoll_wait_common(struct thread *td, int epfd, struct epoll_event *events, 488 int maxevents, int timeout, sigset_t *uset) 489 { 490 struct timespec ts, *tsp; 491 492 /* 493 * Linux epoll_wait(2) man page states that timeout of -1 causes caller 494 * to block indefinitely. Real implementation does it if any negative 495 * timeout value is passed. 496 */ 497 if (timeout >= 0) { 498 /* Convert from milliseconds to timespec. */ 499 ts.tv_sec = timeout / 1000; 500 ts.tv_nsec = (timeout % 1000) * 1000000; 501 tsp = &ts; 502 } else { 503 tsp = NULL; 504 } 505 return (linux_epoll_wait_ts(td, epfd, events, maxevents, tsp, uset)); 506 507 } 508 509 #ifdef LINUX_LEGACY_SYSCALLS 510 int 511 linux_epoll_wait(struct thread *td, struct linux_epoll_wait_args *args) 512 { 513 514 return (linux_epoll_wait_common(td, args->epfd, args->events, 515 args->maxevents, args->timeout, NULL)); 516 } 517 #endif 518 519 int 520 linux_epoll_pwait(struct thread *td, struct linux_epoll_pwait_args *args) 521 { 522 sigset_t mask, *pmask; 523 int error; 524 525 error = linux_copyin_sigset(td, args->mask, sizeof(l_sigset_t), 526 &mask, &pmask); 527 if (error != 0) 528 return (error); 529 530 return (linux_epoll_wait_common(td, args->epfd, args->events, 531 args->maxevents, args->timeout, pmask)); 532 } 533 534 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 535 int 536 linux_epoll_pwait2_64(struct thread *td, struct linux_epoll_pwait2_64_args *args) 537 { 538 struct timespec ts, *tsa; 539 sigset_t mask, *pmask; 540 int error; 541 542 error = linux_copyin_sigset(td, args->mask, sizeof(l_sigset_t), 543 &mask, &pmask); 544 if (error != 0) 545 return (error); 546 547 if (args->timeout) { 548 error = linux_get_timespec64(&ts, args->timeout); 549 if (error != 0) 550 return (error); 551 tsa = &ts; 552 } else 553 tsa = NULL; 554 555 return (linux_epoll_wait_ts(td, args->epfd, args->events, 556 args->maxevents, tsa, pmask)); 557 } 558 #else 559 int 560 linux_epoll_pwait2(struct thread *td, struct linux_epoll_pwait2_args *args) 561 { 562 struct timespec ts, *tsa; 563 sigset_t mask, *pmask; 564 int error; 565 566 error = linux_copyin_sigset(td, args->mask, sizeof(l_sigset_t), 567 &mask, &pmask); 568 if (error != 0) 569 return (error); 570 571 if (args->timeout) { 572 error = linux_get_timespec(&ts, args->timeout); 573 if (error != 0) 574 return (error); 575 tsa = &ts; 576 } else 577 tsa = NULL; 578 579 return (linux_epoll_wait_ts(td, args->epfd, args->events, 580 args->maxevents, tsa, pmask)); 581 } 582 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 583 584 static int 585 epoll_register_kevent(struct thread *td, struct file *epfp, int fd, int filter, 586 unsigned int flags) 587 { 588 struct epoll_copyin_args ciargs; 589 struct kevent kev; 590 struct kevent_copyops k_ops = { &ciargs, 591 NULL, 592 epoll_kev_copyin}; 593 594 ciargs.changelist = &kev; 595 EV_SET(&kev, fd, filter, flags, 0, 0, 0); 596 597 return (kern_kevent_fp(td, epfp, 1, 0, &k_ops, NULL)); 598 } 599 600 static int 601 epoll_fd_registered(struct thread *td, struct file *epfp, int fd) 602 { 603 /* 604 * Set empty filter flags to avoid accidental modification of already 605 * registered events. In the case of event re-registration: 606 * 1. If event does not exists kevent() does nothing and returns ENOENT 607 * 2. If event does exists, it's enabled/disabled state is preserved 608 * but fflags, data and udata fields are overwritten. So we can not 609 * set socket lowats and store user's context pointer in udata. 610 */ 611 if (epoll_register_kevent(td, epfp, fd, EVFILT_READ, 0) != ENOENT || 612 epoll_register_kevent(td, epfp, fd, EVFILT_WRITE, 0) != ENOENT) 613 return (1); 614 615 return (0); 616 } 617 618 static int 619 epoll_delete_all_events(struct thread *td, struct file *epfp, int fd) 620 { 621 int error1, error2; 622 623 error1 = epoll_register_kevent(td, epfp, fd, EVFILT_READ, EV_DELETE); 624 error2 = epoll_register_kevent(td, epfp, fd, EVFILT_WRITE, EV_DELETE); 625 626 /* return 0 if at least one result positive */ 627 return (error1 == 0 ? 0 : error2); 628 } 629 630 #ifdef LINUX_LEGACY_SYSCALLS 631 int 632 linux_eventfd(struct thread *td, struct linux_eventfd_args *args) 633 { 634 struct specialfd_eventfd ae; 635 636 bzero(&ae, sizeof(ae)); 637 ae.initval = args->initval; 638 return (kern_specialfd(td, SPECIALFD_EVENTFD, &ae)); 639 } 640 #endif 641 642 int 643 linux_eventfd2(struct thread *td, struct linux_eventfd2_args *args) 644 { 645 struct specialfd_eventfd ae; 646 int flags; 647 648 if ((args->flags & ~(LINUX_O_CLOEXEC | LINUX_O_NONBLOCK | 649 LINUX_EFD_SEMAPHORE)) != 0) 650 return (EINVAL); 651 flags = 0; 652 if ((args->flags & LINUX_O_CLOEXEC) != 0) 653 flags |= EFD_CLOEXEC; 654 if ((args->flags & LINUX_O_NONBLOCK) != 0) 655 flags |= EFD_NONBLOCK; 656 if ((args->flags & LINUX_EFD_SEMAPHORE) != 0) 657 flags |= EFD_SEMAPHORE; 658 659 bzero(&ae, sizeof(ae)); 660 ae.flags = flags; 661 ae.initval = args->initval; 662 return (kern_specialfd(td, SPECIALFD_EVENTFD, &ae)); 663 } 664 665 int 666 linux_timerfd_create(struct thread *td, struct linux_timerfd_create_args *args) 667 { 668 struct timerfd *tfd; 669 struct file *fp; 670 clockid_t clockid; 671 int fflags, fd, error; 672 673 if ((args->flags & ~LINUX_TFD_CREATE_FLAGS) != 0) 674 return (EINVAL); 675 676 error = linux_to_native_clockid(&clockid, args->clockid); 677 if (error != 0) 678 return (error); 679 if (clockid != CLOCK_REALTIME && clockid != CLOCK_MONOTONIC) 680 return (EINVAL); 681 682 fflags = 0; 683 if ((args->flags & LINUX_TFD_CLOEXEC) != 0) 684 fflags |= O_CLOEXEC; 685 686 error = falloc(td, &fp, &fd, fflags); 687 if (error != 0) 688 return (error); 689 690 tfd = malloc(sizeof(*tfd), M_EPOLL, M_WAITOK | M_ZERO); 691 tfd->tfd_clockid = clockid; 692 mtx_init(&tfd->tfd_lock, "timerfd", NULL, MTX_DEF); 693 694 callout_init_mtx(&tfd->tfd_callout, &tfd->tfd_lock, 0); 695 knlist_init_mtx(&tfd->tfd_sel.si_note, &tfd->tfd_lock); 696 697 fflags = FREAD; 698 if ((args->flags & LINUX_O_NONBLOCK) != 0) 699 fflags |= FNONBLOCK; 700 701 finit(fp, fflags, DTYPE_LINUXTFD, tfd, &timerfdops); 702 fdrop(fp, td); 703 704 td->td_retval[0] = fd; 705 return (error); 706 } 707 708 static int 709 timerfd_close(struct file *fp, struct thread *td) 710 { 711 struct timerfd *tfd; 712 713 tfd = fp->f_data; 714 if (fp->f_type != DTYPE_LINUXTFD || tfd == NULL) 715 return (EINVAL); 716 717 timespecclear(&tfd->tfd_time.it_value); 718 timespecclear(&tfd->tfd_time.it_interval); 719 720 callout_drain(&tfd->tfd_callout); 721 722 seldrain(&tfd->tfd_sel); 723 knlist_destroy(&tfd->tfd_sel.si_note); 724 725 fp->f_ops = &badfileops; 726 mtx_destroy(&tfd->tfd_lock); 727 free(tfd, M_EPOLL); 728 729 return (0); 730 } 731 732 static int 733 timerfd_read(struct file *fp, struct uio *uio, struct ucred *active_cred, 734 int flags, struct thread *td) 735 { 736 struct timerfd *tfd; 737 timerfd_t count; 738 int error; 739 740 tfd = fp->f_data; 741 if (fp->f_type != DTYPE_LINUXTFD || tfd == NULL) 742 return (EINVAL); 743 744 if (uio->uio_resid < sizeof(timerfd_t)) 745 return (EINVAL); 746 747 error = 0; 748 mtx_lock(&tfd->tfd_lock); 749 retry: 750 if (tfd->tfd_canceled) { 751 tfd->tfd_count = 0; 752 mtx_unlock(&tfd->tfd_lock); 753 return (ECANCELED); 754 } 755 if (tfd->tfd_count == 0) { 756 if ((fp->f_flag & FNONBLOCK) != 0) { 757 mtx_unlock(&tfd->tfd_lock); 758 return (EAGAIN); 759 } 760 error = mtx_sleep(&tfd->tfd_count, &tfd->tfd_lock, PCATCH, "ltfdrd", 0); 761 if (error == 0) 762 goto retry; 763 } 764 if (error == 0) { 765 count = tfd->tfd_count; 766 tfd->tfd_count = 0; 767 mtx_unlock(&tfd->tfd_lock); 768 error = uiomove(&count, sizeof(timerfd_t), uio); 769 } else 770 mtx_unlock(&tfd->tfd_lock); 771 772 return (error); 773 } 774 775 static int 776 timerfd_poll(struct file *fp, int events, struct ucred *active_cred, 777 struct thread *td) 778 { 779 struct timerfd *tfd; 780 int revents = 0; 781 782 tfd = fp->f_data; 783 if (fp->f_type != DTYPE_LINUXTFD || tfd == NULL) 784 return (POLLERR); 785 786 mtx_lock(&tfd->tfd_lock); 787 if ((events & (POLLIN|POLLRDNORM)) && tfd->tfd_count > 0) 788 revents |= events & (POLLIN|POLLRDNORM); 789 if (revents == 0) 790 selrecord(td, &tfd->tfd_sel); 791 mtx_unlock(&tfd->tfd_lock); 792 793 return (revents); 794 } 795 796 static int 797 timerfd_kqfilter(struct file *fp, struct knote *kn) 798 { 799 struct timerfd *tfd; 800 801 tfd = fp->f_data; 802 if (fp->f_type != DTYPE_LINUXTFD || tfd == NULL) 803 return (EINVAL); 804 805 if (kn->kn_filter == EVFILT_READ) 806 kn->kn_fop = &timerfd_rfiltops; 807 else 808 return (EINVAL); 809 810 kn->kn_hook = tfd; 811 knlist_add(&tfd->tfd_sel.si_note, kn, 0); 812 813 return (0); 814 } 815 816 static void 817 filt_timerfddetach(struct knote *kn) 818 { 819 struct timerfd *tfd = kn->kn_hook; 820 821 mtx_lock(&tfd->tfd_lock); 822 knlist_remove(&tfd->tfd_sel.si_note, kn, 1); 823 mtx_unlock(&tfd->tfd_lock); 824 } 825 826 static int 827 filt_timerfdread(struct knote *kn, long hint) 828 { 829 struct timerfd *tfd = kn->kn_hook; 830 831 return (tfd->tfd_count > 0); 832 } 833 834 static int 835 timerfd_ioctl(struct file *fp, u_long cmd, void *data, 836 struct ucred *active_cred, struct thread *td) 837 { 838 839 if (fp->f_data == NULL || fp->f_type != DTYPE_LINUXTFD) 840 return (EINVAL); 841 842 switch (cmd) { 843 case FIONBIO: 844 case FIOASYNC: 845 return (0); 846 } 847 848 return (ENOTTY); 849 } 850 851 static int 852 timerfd_stat(struct file *fp, struct stat *st, struct ucred *active_cred) 853 { 854 855 return (ENXIO); 856 } 857 858 static int 859 timerfd_fill_kinfo(struct file *fp, struct kinfo_file *kif, struct filedesc *fdp) 860 { 861 862 kif->kf_type = KF_TYPE_UNKNOWN; 863 return (0); 864 } 865 866 static void 867 linux_timerfd_clocktime(struct timerfd *tfd, struct timespec *ts) 868 { 869 870 if (tfd->tfd_clockid == CLOCK_REALTIME) 871 getnanotime(ts); 872 else /* CLOCK_MONOTONIC */ 873 getnanouptime(ts); 874 } 875 876 static void 877 linux_timerfd_curval(struct timerfd *tfd, struct itimerspec *ots) 878 { 879 struct timespec cts; 880 881 linux_timerfd_clocktime(tfd, &cts); 882 *ots = tfd->tfd_time; 883 if (ots->it_value.tv_sec != 0 || ots->it_value.tv_nsec != 0) { 884 timespecsub(&ots->it_value, &cts, &ots->it_value); 885 if (ots->it_value.tv_sec < 0 || 886 (ots->it_value.tv_sec == 0 && 887 ots->it_value.tv_nsec == 0)) { 888 ots->it_value.tv_sec = 0; 889 ots->it_value.tv_nsec = 1; 890 } 891 } 892 } 893 894 static int 895 linux_timerfd_gettime_common(struct thread *td, int fd, struct itimerspec *ots) 896 { 897 struct timerfd *tfd; 898 struct file *fp; 899 int error; 900 901 error = fget(td, fd, &cap_read_rights, &fp); 902 if (error != 0) 903 return (error); 904 tfd = fp->f_data; 905 if (fp->f_type != DTYPE_LINUXTFD || tfd == NULL) { 906 error = EINVAL; 907 goto out; 908 } 909 910 mtx_lock(&tfd->tfd_lock); 911 linux_timerfd_curval(tfd, ots); 912 mtx_unlock(&tfd->tfd_lock); 913 914 out: 915 fdrop(fp, td); 916 return (error); 917 } 918 919 int 920 linux_timerfd_gettime(struct thread *td, struct linux_timerfd_gettime_args *args) 921 { 922 struct l_itimerspec lots; 923 struct itimerspec ots; 924 int error; 925 926 error = linux_timerfd_gettime_common(td, args->fd, &ots); 927 if (error != 0) 928 return (error); 929 error = native_to_linux_itimerspec(&lots, &ots); 930 if (error == 0) 931 error = copyout(&lots, args->old_value, sizeof(lots)); 932 return (error); 933 } 934 935 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 936 int 937 linux_timerfd_gettime64(struct thread *td, struct linux_timerfd_gettime64_args *args) 938 { 939 struct l_itimerspec64 lots; 940 struct itimerspec ots; 941 int error; 942 943 error = linux_timerfd_gettime_common(td, args->fd, &ots); 944 if (error != 0) 945 return (error); 946 error = native_to_linux_itimerspec64(&lots, &ots); 947 if (error == 0) 948 error = copyout(&lots, args->old_value, sizeof(lots)); 949 return (error); 950 } 951 #endif 952 953 static int 954 linux_timerfd_settime_common(struct thread *td, int fd, int flags, 955 struct itimerspec *nts, struct itimerspec *oval) 956 { 957 struct timespec cts, ts; 958 struct timerfd *tfd; 959 struct timeval tv; 960 struct file *fp; 961 int error; 962 963 if ((flags & ~LINUX_TFD_SETTIME_FLAGS) != 0) 964 return (EINVAL); 965 966 error = fget(td, fd, &cap_write_rights, &fp); 967 if (error != 0) 968 return (error); 969 tfd = fp->f_data; 970 if (fp->f_type != DTYPE_LINUXTFD || tfd == NULL) { 971 error = EINVAL; 972 goto out; 973 } 974 975 mtx_lock(&tfd->tfd_lock); 976 if (!timespecisset(&nts->it_value)) 977 timespecclear(&nts->it_interval); 978 if (oval != NULL) 979 linux_timerfd_curval(tfd, oval); 980 981 bcopy(nts, &tfd->tfd_time, sizeof(*nts)); 982 tfd->tfd_count = 0; 983 if (timespecisset(&nts->it_value)) { 984 linux_timerfd_clocktime(tfd, &cts); 985 ts = nts->it_value; 986 if ((flags & LINUX_TFD_TIMER_ABSTIME) == 0) { 987 timespecadd(&tfd->tfd_time.it_value, &cts, 988 &tfd->tfd_time.it_value); 989 } else { 990 timespecsub(&ts, &cts, &ts); 991 } 992 TIMESPEC_TO_TIMEVAL(&tv, &ts); 993 callout_reset(&tfd->tfd_callout, tvtohz(&tv), 994 linux_timerfd_expire, tfd); 995 tfd->tfd_canceled = false; 996 } else { 997 tfd->tfd_canceled = true; 998 callout_stop(&tfd->tfd_callout); 999 } 1000 mtx_unlock(&tfd->tfd_lock); 1001 1002 out: 1003 fdrop(fp, td); 1004 return (error); 1005 } 1006 1007 int 1008 linux_timerfd_settime(struct thread *td, struct linux_timerfd_settime_args *args) 1009 { 1010 struct l_itimerspec lots; 1011 struct itimerspec nts, ots, *pots; 1012 int error; 1013 1014 error = copyin(args->new_value, &lots, sizeof(lots)); 1015 if (error != 0) 1016 return (error); 1017 error = linux_to_native_itimerspec(&nts, &lots); 1018 if (error != 0) 1019 return (error); 1020 pots = (args->old_value != NULL ? &ots : NULL); 1021 error = linux_timerfd_settime_common(td, args->fd, args->flags, 1022 &nts, pots); 1023 if (error == 0 && args->old_value != NULL) { 1024 error = native_to_linux_itimerspec(&lots, &ots); 1025 if (error == 0) 1026 error = copyout(&lots, args->old_value, sizeof(lots)); 1027 } 1028 return (error); 1029 } 1030 1031 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 1032 int 1033 linux_timerfd_settime64(struct thread *td, struct linux_timerfd_settime64_args *args) 1034 { 1035 struct l_itimerspec64 lots; 1036 struct itimerspec nts, ots, *pots; 1037 int error; 1038 1039 error = copyin(args->new_value, &lots, sizeof(lots)); 1040 if (error != 0) 1041 return (error); 1042 error = linux_to_native_itimerspec64(&nts, &lots); 1043 if (error != 0) 1044 return (error); 1045 pots = (args->old_value != NULL ? &ots : NULL); 1046 error = linux_timerfd_settime_common(td, args->fd, args->flags, 1047 &nts, pots); 1048 if (error == 0 && args->old_value != NULL) { 1049 error = native_to_linux_itimerspec64(&lots, &ots); 1050 if (error == 0) 1051 error = copyout(&lots, args->old_value, sizeof(lots)); 1052 } 1053 return (error); 1054 } 1055 #endif 1056 1057 static void 1058 linux_timerfd_expire(void *arg) 1059 { 1060 struct timespec cts, ts; 1061 struct timeval tv; 1062 struct timerfd *tfd; 1063 1064 tfd = (struct timerfd *)arg; 1065 1066 linux_timerfd_clocktime(tfd, &cts); 1067 if (timespeccmp(&cts, &tfd->tfd_time.it_value, >=)) { 1068 if (timespecisset(&tfd->tfd_time.it_interval)) 1069 timespecadd(&tfd->tfd_time.it_value, 1070 &tfd->tfd_time.it_interval, 1071 &tfd->tfd_time.it_value); 1072 else 1073 /* single shot timer */ 1074 timespecclear(&tfd->tfd_time.it_value); 1075 if (timespecisset(&tfd->tfd_time.it_value)) { 1076 timespecsub(&tfd->tfd_time.it_value, &cts, &ts); 1077 TIMESPEC_TO_TIMEVAL(&tv, &ts); 1078 callout_reset(&tfd->tfd_callout, tvtohz(&tv), 1079 linux_timerfd_expire, tfd); 1080 } 1081 tfd->tfd_count++; 1082 KNOTE_LOCKED(&tfd->tfd_sel.si_note, 0); 1083 selwakeup(&tfd->tfd_sel); 1084 wakeup(&tfd->tfd_count); 1085 } else if (timespecisset(&tfd->tfd_time.it_value)) { 1086 timespecsub(&tfd->tfd_time.it_value, &cts, &ts); 1087 TIMESPEC_TO_TIMEVAL(&tv, &ts); 1088 callout_reset(&tfd->tfd_callout, tvtohz(&tv), 1089 linux_timerfd_expire, tfd); 1090 } 1091 } 1092