1 /* $OpenBSD: kern_event.c,v 1.175 2021/12/11 09:28:26 visa Exp $ */ 2 3 /*- 4 * Copyright (c) 1999,2000,2001 Jonathan Lemon <jlemon@FreeBSD.org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 * $FreeBSD: src/sys/kern/kern_event.c,v 1.22 2001/02/23 20:32:42 jlemon Exp $ 29 */ 30 31 #include <sys/param.h> 32 #include <sys/systm.h> 33 #include <sys/atomic.h> 34 #include <sys/kernel.h> 35 #include <sys/proc.h> 36 #include <sys/pledge.h> 37 #include <sys/malloc.h> 38 #include <sys/unistd.h> 39 #include <sys/file.h> 40 #include <sys/filedesc.h> 41 #include <sys/fcntl.h> 42 #include <sys/selinfo.h> 43 #include <sys/queue.h> 44 #include <sys/event.h> 45 #include <sys/eventvar.h> 46 #include <sys/ktrace.h> 47 #include <sys/pool.h> 48 #include <sys/protosw.h> 49 #include <sys/socket.h> 50 #include <sys/socketvar.h> 51 #include <sys/stat.h> 52 #include <sys/uio.h> 53 #include <sys/mount.h> 54 #include <sys/poll.h> 55 #include <sys/syscallargs.h> 56 #include <sys/time.h> 57 #include <sys/timeout.h> 58 #include <sys/wait.h> 59 60 #ifdef DIAGNOSTIC 61 #define KLIST_ASSERT_LOCKED(kl) do { \ 62 if ((kl)->kl_ops != NULL) \ 63 (kl)->kl_ops->klo_assertlk((kl)->kl_arg); \ 64 else \ 65 KERNEL_ASSERT_LOCKED(); \ 66 } while (0) 67 #else 68 #define KLIST_ASSERT_LOCKED(kl) ((void)(kl)) 69 #endif 70 71 struct kqueue *kqueue_alloc(struct filedesc *); 72 void kqueue_terminate(struct proc *p, struct kqueue *); 73 void KQREF(struct kqueue *); 74 void KQRELE(struct kqueue *); 75 76 void kqueue_purge(struct proc *, struct kqueue *); 77 int kqueue_sleep(struct kqueue *, struct timespec *); 78 79 int kqueue_read(struct file *, struct uio *, int); 80 int kqueue_write(struct file *, struct uio *, int); 81 int kqueue_ioctl(struct file *fp, u_long com, caddr_t data, 82 struct proc *p); 83 int kqueue_poll(struct file *fp, int events, struct proc *p); 84 int kqueue_kqfilter(struct file *fp, struct knote *kn); 85 int kqueue_stat(struct file *fp, struct stat *st, struct proc *p); 86 int kqueue_close(struct file *fp, struct proc *p); 87 void kqueue_wakeup(struct kqueue *kq); 88 89 #ifdef KQUEUE_DEBUG 90 void kqueue_do_check(struct kqueue *kq, const char *func, int line); 91 #define kqueue_check(kq) kqueue_do_check((kq), __func__, __LINE__) 92 #else 93 #define kqueue_check(kq) do {} while (0) 94 #endif 95 96 static int filter_attach(struct knote *kn); 97 static void filter_detach(struct knote *kn); 98 static int filter_event(struct knote *kn, long hint); 99 static int filter_modify(struct kevent *kev, struct knote *kn); 100 static int filter_process(struct knote *kn, struct kevent *kev); 101 static void kqueue_expand_hash(struct kqueue *kq); 102 static void kqueue_expand_list(struct kqueue *kq, int fd); 103 static void kqueue_task(void *); 104 static int klist_lock(struct klist *); 105 static void klist_unlock(struct klist *, int); 106 107 const struct fileops kqueueops = { 108 .fo_read = kqueue_read, 109 .fo_write = kqueue_write, 110 .fo_ioctl = kqueue_ioctl, 111 .fo_poll = kqueue_poll, 112 .fo_kqfilter = kqueue_kqfilter, 113 .fo_stat = kqueue_stat, 114 .fo_close = kqueue_close 115 }; 116 117 void knote_attach(struct knote *kn); 118 void knote_detach(struct knote *kn); 119 void knote_drop(struct knote *kn, struct proc *p); 120 void knote_enqueue(struct knote *kn); 121 void knote_dequeue(struct knote *kn); 122 int knote_acquire(struct knote *kn, struct klist *, int); 123 void knote_release(struct knote *kn); 124 void knote_activate(struct knote *kn); 125 void knote_remove(struct proc *p, struct kqueue *kq, struct knlist *list, 126 int purge); 127 128 void filt_kqdetach(struct knote *kn); 129 int filt_kqueue(struct knote *kn, long hint); 130 int filt_kqueuemodify(struct kevent *kev, struct knote *kn); 131 int filt_kqueueprocess(struct knote *kn, struct kevent *kev); 132 int filt_kqueue_common(struct knote *kn, struct kqueue *kq); 133 int filt_procattach(struct knote *kn); 134 void filt_procdetach(struct knote *kn); 135 int filt_proc(struct knote *kn, long hint); 136 int filt_fileattach(struct knote *kn); 137 void filt_timerexpire(void *knx); 138 int filt_timerattach(struct knote *kn); 139 void filt_timerdetach(struct knote *kn); 140 int filt_timermodify(struct kevent *kev, struct knote *kn); 141 int filt_timerprocess(struct knote *kn, struct kevent *kev); 142 void filt_seltruedetach(struct knote *kn); 143 144 const struct filterops kqread_filtops = { 145 .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, 146 .f_attach = NULL, 147 .f_detach = filt_kqdetach, 148 .f_event = filt_kqueue, 149 .f_modify = filt_kqueuemodify, 150 .f_process = filt_kqueueprocess, 151 }; 152 153 const struct filterops proc_filtops = { 154 .f_flags = 0, 155 .f_attach = filt_procattach, 156 .f_detach = filt_procdetach, 157 .f_event = filt_proc, 158 }; 159 160 const struct filterops file_filtops = { 161 .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, 162 .f_attach = filt_fileattach, 163 .f_detach = NULL, 164 .f_event = NULL, 165 }; 166 167 const struct filterops timer_filtops = { 168 .f_flags = 0, 169 .f_attach = filt_timerattach, 170 .f_detach = filt_timerdetach, 171 .f_event = NULL, 172 .f_modify = filt_timermodify, 173 .f_process = filt_timerprocess, 174 }; 175 176 struct pool knote_pool; 177 struct pool kqueue_pool; 178 struct mutex kqueue_klist_lock = MUTEX_INITIALIZER(IPL_MPFLOOR); 179 int kq_ntimeouts = 0; 180 int kq_timeoutmax = (4 * 1024); 181 182 #define KN_HASH(val, mask) (((val) ^ (val >> 8)) & (mask)) 183 184 /* 185 * Table for for all system-defined filters. 186 */ 187 const struct filterops *const sysfilt_ops[] = { 188 &file_filtops, /* EVFILT_READ */ 189 &file_filtops, /* EVFILT_WRITE */ 190 NULL, /*&aio_filtops,*/ /* EVFILT_AIO */ 191 &file_filtops, /* EVFILT_VNODE */ 192 &proc_filtops, /* EVFILT_PROC */ 193 &sig_filtops, /* EVFILT_SIGNAL */ 194 &timer_filtops, /* EVFILT_TIMER */ 195 &file_filtops, /* EVFILT_DEVICE */ 196 &file_filtops, /* EVFILT_EXCEPT */ 197 }; 198 199 void 200 KQREF(struct kqueue *kq) 201 { 202 atomic_inc_int(&kq->kq_refs); 203 } 204 205 void 206 KQRELE(struct kqueue *kq) 207 { 208 struct filedesc *fdp; 209 210 if (atomic_dec_int_nv(&kq->kq_refs) > 0) 211 return; 212 213 fdp = kq->kq_fdp; 214 if (rw_status(&fdp->fd_lock) == RW_WRITE) { 215 LIST_REMOVE(kq, kq_next); 216 } else { 217 fdplock(fdp); 218 LIST_REMOVE(kq, kq_next); 219 fdpunlock(fdp); 220 } 221 222 KASSERT(TAILQ_EMPTY(&kq->kq_head)); 223 224 free(kq->kq_knlist, M_KEVENT, kq->kq_knlistsize * 225 sizeof(struct knlist)); 226 hashfree(kq->kq_knhash, KN_HASHSIZE, M_KEVENT); 227 klist_free(&kq->kq_sel.si_note); 228 pool_put(&kqueue_pool, kq); 229 } 230 231 void 232 kqueue_init(void) 233 { 234 pool_init(&kqueue_pool, sizeof(struct kqueue), 0, IPL_MPFLOOR, 235 PR_WAITOK, "kqueuepl", NULL); 236 pool_init(&knote_pool, sizeof(struct knote), 0, IPL_MPFLOOR, 237 PR_WAITOK, "knotepl", NULL); 238 } 239 240 void 241 kqueue_init_percpu(void) 242 { 243 pool_cache_init(&knote_pool); 244 } 245 246 int 247 filt_fileattach(struct knote *kn) 248 { 249 struct file *fp = kn->kn_fp; 250 251 return fp->f_ops->fo_kqfilter(fp, kn); 252 } 253 254 int 255 kqueue_kqfilter(struct file *fp, struct knote *kn) 256 { 257 struct kqueue *kq = kn->kn_fp->f_data; 258 259 if (kn->kn_filter != EVFILT_READ) 260 return (EINVAL); 261 262 kn->kn_fop = &kqread_filtops; 263 klist_insert(&kq->kq_sel.si_note, kn); 264 return (0); 265 } 266 267 void 268 filt_kqdetach(struct knote *kn) 269 { 270 struct kqueue *kq = kn->kn_fp->f_data; 271 272 klist_remove(&kq->kq_sel.si_note, kn); 273 } 274 275 int 276 filt_kqueue_common(struct knote *kn, struct kqueue *kq) 277 { 278 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 279 280 kn->kn_data = kq->kq_count; 281 282 return (kn->kn_data > 0); 283 } 284 285 int 286 filt_kqueue(struct knote *kn, long hint) 287 { 288 struct kqueue *kq = kn->kn_fp->f_data; 289 int active; 290 291 mtx_enter(&kq->kq_lock); 292 active = filt_kqueue_common(kn, kq); 293 mtx_leave(&kq->kq_lock); 294 295 return (active); 296 } 297 298 int 299 filt_kqueuemodify(struct kevent *kev, struct knote *kn) 300 { 301 struct kqueue *kq = kn->kn_fp->f_data; 302 int active; 303 304 mtx_enter(&kq->kq_lock); 305 knote_modify(kev, kn); 306 active = filt_kqueue_common(kn, kq); 307 mtx_leave(&kq->kq_lock); 308 309 return (active); 310 } 311 312 int 313 filt_kqueueprocess(struct knote *kn, struct kevent *kev) 314 { 315 struct kqueue *kq = kn->kn_fp->f_data; 316 int active; 317 318 mtx_enter(&kq->kq_lock); 319 if (kev != NULL && (kn->kn_flags & EV_ONESHOT)) 320 active = 1; 321 else 322 active = filt_kqueue_common(kn, kq); 323 if (active) 324 knote_submit(kn, kev); 325 mtx_leave(&kq->kq_lock); 326 327 return (active); 328 } 329 330 int 331 filt_procattach(struct knote *kn) 332 { 333 struct process *pr; 334 int s; 335 336 if ((curproc->p_p->ps_flags & PS_PLEDGE) && 337 (curproc->p_p->ps_pledge & PLEDGE_PROC) == 0) 338 return pledge_fail(curproc, EPERM, PLEDGE_PROC); 339 340 if (kn->kn_id > PID_MAX) 341 return ESRCH; 342 343 pr = prfind(kn->kn_id); 344 if (pr == NULL) 345 return (ESRCH); 346 347 /* exiting processes can't be specified */ 348 if (pr->ps_flags & PS_EXITING) 349 return (ESRCH); 350 351 kn->kn_ptr.p_process = pr; 352 kn->kn_flags |= EV_CLEAR; /* automatically set */ 353 354 /* 355 * internal flag indicating registration done by kernel 356 */ 357 if (kn->kn_flags & EV_FLAG1) { 358 kn->kn_data = kn->kn_sdata; /* ppid */ 359 kn->kn_fflags = NOTE_CHILD; 360 kn->kn_flags &= ~EV_FLAG1; 361 } 362 363 s = splhigh(); 364 klist_insert_locked(&pr->ps_klist, kn); 365 splx(s); 366 367 return (0); 368 } 369 370 /* 371 * The knote may be attached to a different process, which may exit, 372 * leaving nothing for the knote to be attached to. So when the process 373 * exits, the knote is marked as DETACHED and also flagged as ONESHOT so 374 * it will be deleted when read out. However, as part of the knote deletion, 375 * this routine is called, so a check is needed to avoid actually performing 376 * a detach, because the original process does not exist any more. 377 */ 378 void 379 filt_procdetach(struct knote *kn) 380 { 381 struct kqueue *kq = kn->kn_kq; 382 struct process *pr = kn->kn_ptr.p_process; 383 int s, status; 384 385 mtx_enter(&kq->kq_lock); 386 status = kn->kn_status; 387 mtx_leave(&kq->kq_lock); 388 389 if (status & KN_DETACHED) 390 return; 391 392 s = splhigh(); 393 klist_remove_locked(&pr->ps_klist, kn); 394 splx(s); 395 } 396 397 int 398 filt_proc(struct knote *kn, long hint) 399 { 400 struct kqueue *kq = kn->kn_kq; 401 u_int event; 402 403 /* 404 * mask off extra data 405 */ 406 event = (u_int)hint & NOTE_PCTRLMASK; 407 408 /* 409 * if the user is interested in this event, record it. 410 */ 411 if (kn->kn_sfflags & event) 412 kn->kn_fflags |= event; 413 414 /* 415 * process is gone, so flag the event as finished and remove it 416 * from the process's klist 417 */ 418 if (event == NOTE_EXIT) { 419 struct process *pr = kn->kn_ptr.p_process; 420 int s; 421 422 mtx_enter(&kq->kq_lock); 423 kn->kn_status |= KN_DETACHED; 424 mtx_leave(&kq->kq_lock); 425 426 s = splhigh(); 427 kn->kn_flags |= (EV_EOF | EV_ONESHOT); 428 kn->kn_data = W_EXITCODE(pr->ps_xexit, pr->ps_xsig); 429 klist_remove_locked(&pr->ps_klist, kn); 430 splx(s); 431 return (1); 432 } 433 434 /* 435 * process forked, and user wants to track the new process, 436 * so attach a new knote to it, and immediately report an 437 * event with the parent's pid. 438 */ 439 if ((event == NOTE_FORK) && (kn->kn_sfflags & NOTE_TRACK)) { 440 struct kevent kev; 441 int error; 442 443 /* 444 * register knote with new process. 445 */ 446 memset(&kev, 0, sizeof(kev)); 447 kev.ident = hint & NOTE_PDATAMASK; /* pid */ 448 kev.filter = kn->kn_filter; 449 kev.flags = kn->kn_flags | EV_ADD | EV_ENABLE | EV_FLAG1; 450 kev.fflags = kn->kn_sfflags; 451 kev.data = kn->kn_id; /* parent */ 452 kev.udata = kn->kn_udata; /* preserve udata */ 453 error = kqueue_register(kq, &kev, NULL); 454 if (error) 455 kn->kn_fflags |= NOTE_TRACKERR; 456 } 457 458 return (kn->kn_fflags != 0); 459 } 460 461 static void 462 filt_timer_timeout_add(struct knote *kn) 463 { 464 struct timeval tv; 465 struct timeout *to = kn->kn_hook; 466 int tticks; 467 468 tv.tv_sec = kn->kn_sdata / 1000; 469 tv.tv_usec = (kn->kn_sdata % 1000) * 1000; 470 tticks = tvtohz(&tv); 471 /* Remove extra tick from tvtohz() if timeout has fired before. */ 472 if (timeout_triggered(to)) 473 tticks--; 474 timeout_add(to, (tticks > 0) ? tticks : 1); 475 } 476 477 void 478 filt_timerexpire(void *knx) 479 { 480 struct knote *kn = knx; 481 struct kqueue *kq = kn->kn_kq; 482 483 kn->kn_data++; 484 mtx_enter(&kq->kq_lock); 485 knote_activate(kn); 486 mtx_leave(&kq->kq_lock); 487 488 if ((kn->kn_flags & EV_ONESHOT) == 0) 489 filt_timer_timeout_add(kn); 490 } 491 492 493 /* 494 * data contains amount of time to sleep, in milliseconds 495 */ 496 int 497 filt_timerattach(struct knote *kn) 498 { 499 struct timeout *to; 500 501 if (kq_ntimeouts > kq_timeoutmax) 502 return (ENOMEM); 503 kq_ntimeouts++; 504 505 kn->kn_flags |= EV_CLEAR; /* automatically set */ 506 to = malloc(sizeof(*to), M_KEVENT, M_WAITOK); 507 timeout_set(to, filt_timerexpire, kn); 508 kn->kn_hook = to; 509 filt_timer_timeout_add(kn); 510 511 return (0); 512 } 513 514 void 515 filt_timerdetach(struct knote *kn) 516 { 517 struct timeout *to; 518 519 to = (struct timeout *)kn->kn_hook; 520 timeout_del_barrier(to); 521 free(to, M_KEVENT, sizeof(*to)); 522 kq_ntimeouts--; 523 } 524 525 int 526 filt_timermodify(struct kevent *kev, struct knote *kn) 527 { 528 struct kqueue *kq = kn->kn_kq; 529 struct timeout *to = kn->kn_hook; 530 531 /* Reset the timer. Any pending events are discarded. */ 532 533 timeout_del_barrier(to); 534 535 mtx_enter(&kq->kq_lock); 536 if (kn->kn_status & KN_QUEUED) 537 knote_dequeue(kn); 538 kn->kn_status &= ~KN_ACTIVE; 539 mtx_leave(&kq->kq_lock); 540 541 kn->kn_data = 0; 542 knote_modify(kev, kn); 543 /* Reinit timeout to invoke tick adjustment again. */ 544 timeout_set(to, filt_timerexpire, kn); 545 filt_timer_timeout_add(kn); 546 547 return (0); 548 } 549 550 int 551 filt_timerprocess(struct knote *kn, struct kevent *kev) 552 { 553 int active, s; 554 555 s = splsoftclock(); 556 active = (kn->kn_data != 0); 557 if (active) 558 knote_submit(kn, kev); 559 splx(s); 560 561 return (active); 562 } 563 564 565 /* 566 * filt_seltrue: 567 * 568 * This filter "event" routine simulates seltrue(). 569 */ 570 int 571 filt_seltrue(struct knote *kn, long hint) 572 { 573 574 /* 575 * We don't know how much data can be read/written, 576 * but we know that it *can* be. This is about as 577 * good as select/poll does as well. 578 */ 579 kn->kn_data = 0; 580 return (1); 581 } 582 583 int 584 filt_seltruemodify(struct kevent *kev, struct knote *kn) 585 { 586 knote_modify(kev, kn); 587 return (1); 588 } 589 590 int 591 filt_seltrueprocess(struct knote *kn, struct kevent *kev) 592 { 593 knote_submit(kn, kev); 594 return (1); 595 } 596 597 /* 598 * This provides full kqfilter entry for device switch tables, which 599 * has same effect as filter using filt_seltrue() as filter method. 600 */ 601 void 602 filt_seltruedetach(struct knote *kn) 603 { 604 /* Nothing to do */ 605 } 606 607 const struct filterops seltrue_filtops = { 608 .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, 609 .f_attach = NULL, 610 .f_detach = filt_seltruedetach, 611 .f_event = filt_seltrue, 612 .f_modify = filt_seltruemodify, 613 .f_process = filt_seltrueprocess, 614 }; 615 616 int 617 seltrue_kqfilter(dev_t dev, struct knote *kn) 618 { 619 switch (kn->kn_filter) { 620 case EVFILT_READ: 621 case EVFILT_WRITE: 622 kn->kn_fop = &seltrue_filtops; 623 break; 624 default: 625 return (EINVAL); 626 } 627 628 /* Nothing more to do */ 629 return (0); 630 } 631 632 static int 633 filt_dead(struct knote *kn, long hint) 634 { 635 kn->kn_flags |= (EV_EOF | EV_ONESHOT); 636 if (kn->kn_flags & __EV_POLL) 637 kn->kn_flags |= __EV_HUP; 638 kn->kn_data = 0; 639 return (1); 640 } 641 642 static void 643 filt_deaddetach(struct knote *kn) 644 { 645 /* Nothing to do */ 646 } 647 648 const struct filterops dead_filtops = { 649 .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, 650 .f_attach = NULL, 651 .f_detach = filt_deaddetach, 652 .f_event = filt_dead, 653 .f_modify = filt_seltruemodify, 654 .f_process = filt_seltrueprocess, 655 }; 656 657 static int 658 filt_badfd(struct knote *kn, long hint) 659 { 660 kn->kn_flags |= (EV_ERROR | EV_ONESHOT); 661 kn->kn_data = EBADF; 662 return (1); 663 } 664 665 /* For use with kqpoll. */ 666 const struct filterops badfd_filtops = { 667 .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, 668 .f_attach = NULL, 669 .f_detach = filt_deaddetach, 670 .f_event = filt_badfd, 671 .f_modify = filt_seltruemodify, 672 .f_process = filt_seltrueprocess, 673 }; 674 675 static int 676 filter_attach(struct knote *kn) 677 { 678 int error; 679 680 if (kn->kn_fop->f_flags & FILTEROP_MPSAFE) { 681 error = kn->kn_fop->f_attach(kn); 682 } else { 683 KERNEL_LOCK(); 684 error = kn->kn_fop->f_attach(kn); 685 KERNEL_UNLOCK(); 686 } 687 return (error); 688 } 689 690 static void 691 filter_detach(struct knote *kn) 692 { 693 if (kn->kn_fop->f_flags & FILTEROP_MPSAFE) { 694 kn->kn_fop->f_detach(kn); 695 } else { 696 KERNEL_LOCK(); 697 kn->kn_fop->f_detach(kn); 698 KERNEL_UNLOCK(); 699 } 700 } 701 702 static int 703 filter_event(struct knote *kn, long hint) 704 { 705 if ((kn->kn_fop->f_flags & FILTEROP_MPSAFE) == 0) 706 KERNEL_ASSERT_LOCKED(); 707 708 return (kn->kn_fop->f_event(kn, hint)); 709 } 710 711 static int 712 filter_modify(struct kevent *kev, struct knote *kn) 713 { 714 int active, s; 715 716 if (kn->kn_fop->f_flags & FILTEROP_MPSAFE) { 717 active = kn->kn_fop->f_modify(kev, kn); 718 } else { 719 KERNEL_LOCK(); 720 if (kn->kn_fop->f_modify != NULL) { 721 active = kn->kn_fop->f_modify(kev, kn); 722 } else { 723 /* Emulate f_modify using f_event. */ 724 s = splhigh(); 725 knote_modify(kev, kn); 726 active = kn->kn_fop->f_event(kn, 0); 727 splx(s); 728 } 729 KERNEL_UNLOCK(); 730 } 731 return (active); 732 } 733 734 static int 735 filter_process(struct knote *kn, struct kevent *kev) 736 { 737 int active, s; 738 739 if (kn->kn_fop->f_flags & FILTEROP_MPSAFE) { 740 active = kn->kn_fop->f_process(kn, kev); 741 } else { 742 KERNEL_LOCK(); 743 if (kn->kn_fop->f_process != NULL) { 744 active = kn->kn_fop->f_process(kn, kev); 745 } else { 746 /* Emulate f_process using f_event. */ 747 s = splhigh(); 748 /* 749 * If called from kqueue_scan(), skip f_event 750 * when EV_ONESHOT is set, to preserve old behaviour. 751 */ 752 if (kev != NULL && (kn->kn_flags & EV_ONESHOT)) 753 active = 1; 754 else 755 active = kn->kn_fop->f_event(kn, 0); 756 if (active) 757 knote_submit(kn, kev); 758 splx(s); 759 } 760 KERNEL_UNLOCK(); 761 } 762 return (active); 763 } 764 765 /* 766 * Initialize the current thread for poll/select system call. 767 * num indicates the number of serials that the system call may utilize. 768 * After this function, the valid range of serials is 769 * p_kq_serial <= x < p_kq_serial + num. 770 */ 771 void 772 kqpoll_init(unsigned int num) 773 { 774 struct proc *p = curproc; 775 struct filedesc *fdp; 776 777 if (p->p_kq == NULL) { 778 p->p_kq = kqueue_alloc(p->p_fd); 779 p->p_kq_serial = arc4random(); 780 fdp = p->p_fd; 781 fdplock(fdp); 782 LIST_INSERT_HEAD(&fdp->fd_kqlist, p->p_kq, kq_next); 783 fdpunlock(fdp); 784 } 785 786 if (p->p_kq_serial + num < p->p_kq_serial) { 787 /* Serial is about to wrap. Clear all attached knotes. */ 788 kqueue_purge(p, p->p_kq); 789 p->p_kq_serial = 0; 790 } 791 } 792 793 /* 794 * Finish poll/select system call. 795 * num must have the same value that was used with kqpoll_init(). 796 */ 797 void 798 kqpoll_done(unsigned int num) 799 { 800 struct proc *p = curproc; 801 802 KASSERT(p->p_kq != NULL); 803 KASSERT(p->p_kq_serial + num >= p->p_kq_serial); 804 805 p->p_kq_serial += num; 806 } 807 808 void 809 kqpoll_exit(void) 810 { 811 struct proc *p = curproc; 812 813 if (p->p_kq == NULL) 814 return; 815 816 kqueue_purge(p, p->p_kq); 817 kqueue_terminate(p, p->p_kq); 818 KASSERT(p->p_kq->kq_refs == 1); 819 KQRELE(p->p_kq); 820 p->p_kq = NULL; 821 } 822 823 struct kqueue * 824 kqueue_alloc(struct filedesc *fdp) 825 { 826 struct kqueue *kq; 827 828 kq = pool_get(&kqueue_pool, PR_WAITOK | PR_ZERO); 829 kq->kq_refs = 1; 830 kq->kq_fdp = fdp; 831 TAILQ_INIT(&kq->kq_head); 832 mtx_init(&kq->kq_lock, IPL_HIGH); 833 task_set(&kq->kq_task, kqueue_task, kq); 834 klist_init_mutex(&kq->kq_sel.si_note, &kqueue_klist_lock); 835 836 return (kq); 837 } 838 839 int 840 sys_kqueue(struct proc *p, void *v, register_t *retval) 841 { 842 struct filedesc *fdp = p->p_fd; 843 struct kqueue *kq; 844 struct file *fp; 845 int fd, error; 846 847 kq = kqueue_alloc(fdp); 848 849 fdplock(fdp); 850 error = falloc(p, &fp, &fd); 851 if (error) 852 goto out; 853 fp->f_flag = FREAD | FWRITE; 854 fp->f_type = DTYPE_KQUEUE; 855 fp->f_ops = &kqueueops; 856 fp->f_data = kq; 857 *retval = fd; 858 LIST_INSERT_HEAD(&fdp->fd_kqlist, kq, kq_next); 859 kq = NULL; 860 fdinsert(fdp, fd, 0, fp); 861 FRELE(fp, p); 862 out: 863 fdpunlock(fdp); 864 if (kq != NULL) 865 pool_put(&kqueue_pool, kq); 866 return (error); 867 } 868 869 int 870 sys_kevent(struct proc *p, void *v, register_t *retval) 871 { 872 struct kqueue_scan_state scan; 873 struct filedesc* fdp = p->p_fd; 874 struct sys_kevent_args /* { 875 syscallarg(int) fd; 876 syscallarg(const struct kevent *) changelist; 877 syscallarg(int) nchanges; 878 syscallarg(struct kevent *) eventlist; 879 syscallarg(int) nevents; 880 syscallarg(const struct timespec *) timeout; 881 } */ *uap = v; 882 struct kevent *kevp; 883 struct kqueue *kq; 884 struct file *fp; 885 struct timespec ts; 886 struct timespec *tsp = NULL; 887 int i, n, nerrors, error; 888 int ready, total; 889 struct kevent kev[KQ_NEVENTS]; 890 891 if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL) 892 return (EBADF); 893 894 if (fp->f_type != DTYPE_KQUEUE) { 895 error = EBADF; 896 goto done; 897 } 898 899 if (SCARG(uap, timeout) != NULL) { 900 error = copyin(SCARG(uap, timeout), &ts, sizeof(ts)); 901 if (error) 902 goto done; 903 #ifdef KTRACE 904 if (KTRPOINT(p, KTR_STRUCT)) 905 ktrreltimespec(p, &ts); 906 #endif 907 if (ts.tv_sec < 0 || !timespecisvalid(&ts)) { 908 error = EINVAL; 909 goto done; 910 } 911 tsp = &ts; 912 } 913 914 kq = fp->f_data; 915 nerrors = 0; 916 917 while ((n = SCARG(uap, nchanges)) > 0) { 918 if (n > nitems(kev)) 919 n = nitems(kev); 920 error = copyin(SCARG(uap, changelist), kev, 921 n * sizeof(struct kevent)); 922 if (error) 923 goto done; 924 #ifdef KTRACE 925 if (KTRPOINT(p, KTR_STRUCT)) 926 ktrevent(p, kev, n); 927 #endif 928 for (i = 0; i < n; i++) { 929 kevp = &kev[i]; 930 kevp->flags &= ~EV_SYSFLAGS; 931 error = kqueue_register(kq, kevp, p); 932 if (error || (kevp->flags & EV_RECEIPT)) { 933 if (SCARG(uap, nevents) != 0) { 934 kevp->flags = EV_ERROR; 935 kevp->data = error; 936 copyout(kevp, SCARG(uap, eventlist), 937 sizeof(*kevp)); 938 SCARG(uap, eventlist)++; 939 SCARG(uap, nevents)--; 940 nerrors++; 941 } else { 942 goto done; 943 } 944 } 945 } 946 SCARG(uap, nchanges) -= n; 947 SCARG(uap, changelist) += n; 948 } 949 if (nerrors) { 950 *retval = nerrors; 951 error = 0; 952 goto done; 953 } 954 955 kqueue_scan_setup(&scan, kq); 956 FRELE(fp, p); 957 /* 958 * Collect as many events as we can. The timeout on successive 959 * loops is disabled (kqueue_scan() becomes non-blocking). 960 */ 961 total = 0; 962 error = 0; 963 while ((n = SCARG(uap, nevents) - total) > 0) { 964 if (n > nitems(kev)) 965 n = nitems(kev); 966 ready = kqueue_scan(&scan, n, kev, tsp, p, &error); 967 if (ready == 0) 968 break; 969 error = copyout(kev, SCARG(uap, eventlist) + total, 970 sizeof(struct kevent) * ready); 971 #ifdef KTRACE 972 if (KTRPOINT(p, KTR_STRUCT)) 973 ktrevent(p, kev, ready); 974 #endif 975 total += ready; 976 if (error || ready < n) 977 break; 978 } 979 kqueue_scan_finish(&scan); 980 *retval = total; 981 return (error); 982 983 done: 984 FRELE(fp, p); 985 return (error); 986 } 987 988 #ifdef KQUEUE_DEBUG 989 void 990 kqueue_do_check(struct kqueue *kq, const char *func, int line) 991 { 992 struct knote *kn; 993 int count = 0, nmarker = 0; 994 995 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 996 997 TAILQ_FOREACH(kn, &kq->kq_head, kn_tqe) { 998 if (kn->kn_filter == EVFILT_MARKER) { 999 if ((kn->kn_status & KN_QUEUED) != 0) 1000 panic("%s:%d: kq=%p kn=%p marker QUEUED", 1001 func, line, kq, kn); 1002 nmarker++; 1003 } else { 1004 if ((kn->kn_status & KN_ACTIVE) == 0) 1005 panic("%s:%d: kq=%p kn=%p knote !ACTIVE", 1006 func, line, kq, kn); 1007 if ((kn->kn_status & KN_QUEUED) == 0) 1008 panic("%s:%d: kq=%p kn=%p knote !QUEUED", 1009 func, line, kq, kn); 1010 if (kn->kn_kq != kq) 1011 panic("%s:%d: kq=%p kn=%p kn_kq=%p != kq", 1012 func, line, kq, kn, kn->kn_kq); 1013 count++; 1014 if (count > kq->kq_count) 1015 goto bad; 1016 } 1017 } 1018 if (count != kq->kq_count) { 1019 bad: 1020 panic("%s:%d: kq=%p kq_count=%d count=%d nmarker=%d", 1021 func, line, kq, kq->kq_count, count, nmarker); 1022 } 1023 } 1024 #endif 1025 1026 int 1027 kqueue_register(struct kqueue *kq, struct kevent *kev, struct proc *p) 1028 { 1029 struct filedesc *fdp = kq->kq_fdp; 1030 const struct filterops *fops = NULL; 1031 struct file *fp = NULL; 1032 struct knote *kn = NULL, *newkn = NULL; 1033 struct knlist *list = NULL; 1034 int active, error = 0; 1035 1036 if (kev->filter < 0) { 1037 if (kev->filter + EVFILT_SYSCOUNT < 0) 1038 return (EINVAL); 1039 fops = sysfilt_ops[~kev->filter]; /* to 0-base index */ 1040 } 1041 1042 if (fops == NULL) { 1043 /* 1044 * XXX 1045 * filter attach routine is responsible for ensuring that 1046 * the identifier can be attached to it. 1047 */ 1048 return (EINVAL); 1049 } 1050 1051 if (fops->f_flags & FILTEROP_ISFD) { 1052 /* validate descriptor */ 1053 if (kev->ident > INT_MAX) 1054 return (EBADF); 1055 } 1056 1057 if (kev->flags & EV_ADD) 1058 newkn = pool_get(&knote_pool, PR_WAITOK | PR_ZERO); 1059 1060 again: 1061 if (fops->f_flags & FILTEROP_ISFD) { 1062 if ((fp = fd_getfile(fdp, kev->ident)) == NULL) { 1063 error = EBADF; 1064 goto done; 1065 } 1066 mtx_enter(&kq->kq_lock); 1067 if (kev->flags & EV_ADD) 1068 kqueue_expand_list(kq, kev->ident); 1069 if (kev->ident < kq->kq_knlistsize) 1070 list = &kq->kq_knlist[kev->ident]; 1071 } else { 1072 mtx_enter(&kq->kq_lock); 1073 if (kev->flags & EV_ADD) 1074 kqueue_expand_hash(kq); 1075 if (kq->kq_knhashmask != 0) { 1076 list = &kq->kq_knhash[ 1077 KN_HASH((u_long)kev->ident, kq->kq_knhashmask)]; 1078 } 1079 } 1080 if (list != NULL) { 1081 SLIST_FOREACH(kn, list, kn_link) { 1082 if (kev->filter == kn->kn_filter && 1083 kev->ident == kn->kn_id) { 1084 if (!knote_acquire(kn, NULL, 0)) { 1085 /* knote_acquire() has released 1086 * kq_lock. */ 1087 if (fp != NULL) { 1088 FRELE(fp, p); 1089 fp = NULL; 1090 } 1091 goto again; 1092 } 1093 break; 1094 } 1095 } 1096 } 1097 KASSERT(kn == NULL || (kn->kn_status & KN_PROCESSING) != 0); 1098 1099 if (kn == NULL && ((kev->flags & EV_ADD) == 0)) { 1100 mtx_leave(&kq->kq_lock); 1101 error = ENOENT; 1102 goto done; 1103 } 1104 1105 /* 1106 * kn now contains the matching knote, or NULL if no match. 1107 */ 1108 if (kev->flags & EV_ADD) { 1109 if (kn == NULL) { 1110 kn = newkn; 1111 newkn = NULL; 1112 kn->kn_status = KN_PROCESSING; 1113 kn->kn_fp = fp; 1114 kn->kn_kq = kq; 1115 kn->kn_fop = fops; 1116 1117 /* 1118 * apply reference count to knote structure, and 1119 * do not release it at the end of this routine. 1120 */ 1121 fp = NULL; 1122 1123 kn->kn_sfflags = kev->fflags; 1124 kn->kn_sdata = kev->data; 1125 kev->fflags = 0; 1126 kev->data = 0; 1127 kn->kn_kevent = *kev; 1128 1129 knote_attach(kn); 1130 mtx_leave(&kq->kq_lock); 1131 1132 error = filter_attach(kn); 1133 if (error != 0) { 1134 knote_drop(kn, p); 1135 goto done; 1136 } 1137 1138 /* 1139 * If this is a file descriptor filter, check if 1140 * fd was closed while the knote was being added. 1141 * knote_fdclose() has missed kn if the function 1142 * ran before kn appeared in kq_knlist. 1143 */ 1144 if ((fops->f_flags & FILTEROP_ISFD) && 1145 fd_checkclosed(fdp, kev->ident, kn->kn_fp)) { 1146 /* 1147 * Drop the knote silently without error 1148 * because another thread might already have 1149 * seen it. This corresponds to the insert 1150 * happening in full before the close. 1151 */ 1152 filter_detach(kn); 1153 knote_drop(kn, p); 1154 goto done; 1155 } 1156 1157 /* Check if there is a pending event. */ 1158 active = filter_process(kn, NULL); 1159 mtx_enter(&kq->kq_lock); 1160 if (active) 1161 knote_activate(kn); 1162 } else if (kn->kn_fop == &badfd_filtops) { 1163 /* 1164 * Nothing expects this badfd knote any longer. 1165 * Drop it to make room for the new knote and retry. 1166 */ 1167 KASSERT(kq == p->p_kq); 1168 mtx_leave(&kq->kq_lock); 1169 filter_detach(kn); 1170 knote_drop(kn, p); 1171 1172 KASSERT(fp != NULL); 1173 FRELE(fp, p); 1174 fp = NULL; 1175 1176 goto again; 1177 } else { 1178 /* 1179 * The user may change some filter values after the 1180 * initial EV_ADD, but doing so will not reset any 1181 * filters which have already been triggered. 1182 */ 1183 mtx_leave(&kq->kq_lock); 1184 active = filter_modify(kev, kn); 1185 mtx_enter(&kq->kq_lock); 1186 if (active) 1187 knote_activate(kn); 1188 if (kev->flags & EV_ERROR) { 1189 error = kev->data; 1190 goto release; 1191 } 1192 } 1193 } else if (kev->flags & EV_DELETE) { 1194 mtx_leave(&kq->kq_lock); 1195 filter_detach(kn); 1196 knote_drop(kn, p); 1197 goto done; 1198 } 1199 1200 if ((kev->flags & EV_DISABLE) && ((kn->kn_status & KN_DISABLED) == 0)) 1201 kn->kn_status |= KN_DISABLED; 1202 1203 if ((kev->flags & EV_ENABLE) && (kn->kn_status & KN_DISABLED)) { 1204 kn->kn_status &= ~KN_DISABLED; 1205 mtx_leave(&kq->kq_lock); 1206 /* Check if there is a pending event. */ 1207 active = filter_process(kn, NULL); 1208 mtx_enter(&kq->kq_lock); 1209 if (active) 1210 knote_activate(kn); 1211 } 1212 1213 release: 1214 knote_release(kn); 1215 mtx_leave(&kq->kq_lock); 1216 done: 1217 if (fp != NULL) 1218 FRELE(fp, p); 1219 if (newkn != NULL) 1220 pool_put(&knote_pool, newkn); 1221 return (error); 1222 } 1223 1224 int 1225 kqueue_sleep(struct kqueue *kq, struct timespec *tsp) 1226 { 1227 struct timespec elapsed, start, stop; 1228 uint64_t nsecs; 1229 int error; 1230 1231 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1232 1233 if (tsp != NULL) { 1234 getnanouptime(&start); 1235 nsecs = MIN(TIMESPEC_TO_NSEC(tsp), MAXTSLP); 1236 } else 1237 nsecs = INFSLP; 1238 error = msleep_nsec(kq, &kq->kq_lock, PSOCK | PCATCH | PNORELOCK, 1239 "kqread", nsecs); 1240 if (tsp != NULL) { 1241 getnanouptime(&stop); 1242 timespecsub(&stop, &start, &elapsed); 1243 timespecsub(tsp, &elapsed, tsp); 1244 if (tsp->tv_sec < 0) 1245 timespecclear(tsp); 1246 } 1247 1248 return (error); 1249 } 1250 1251 /* 1252 * Scan the kqueue, blocking if necessary until the target time is reached. 1253 * If tsp is NULL we block indefinitely. If tsp->ts_secs/nsecs are both 1254 * 0 we do not block at all. 1255 */ 1256 int 1257 kqueue_scan(struct kqueue_scan_state *scan, int maxevents, 1258 struct kevent *kevp, struct timespec *tsp, struct proc *p, int *errorp) 1259 { 1260 struct kqueue *kq = scan->kqs_kq; 1261 struct knote *kn; 1262 int error = 0, nkev = 0; 1263 1264 if (maxevents == 0) 1265 goto done; 1266 retry: 1267 KASSERT(nkev == 0); 1268 1269 error = 0; 1270 1271 /* msleep() with PCATCH requires kernel lock. */ 1272 KERNEL_LOCK(); 1273 1274 mtx_enter(&kq->kq_lock); 1275 1276 if (kq->kq_state & KQ_DYING) { 1277 mtx_leave(&kq->kq_lock); 1278 KERNEL_UNLOCK(); 1279 error = EBADF; 1280 goto done; 1281 } 1282 1283 if (kq->kq_count == 0) { 1284 /* 1285 * Successive loops are only necessary if there are more 1286 * ready events to gather, so they don't need to block. 1287 */ 1288 if ((tsp != NULL && !timespecisset(tsp)) || 1289 scan->kqs_nevent != 0) { 1290 mtx_leave(&kq->kq_lock); 1291 KERNEL_UNLOCK(); 1292 error = 0; 1293 goto done; 1294 } 1295 kq->kq_state |= KQ_SLEEP; 1296 error = kqueue_sleep(kq, tsp); 1297 /* kqueue_sleep() has released kq_lock. */ 1298 KERNEL_UNLOCK(); 1299 if (error == 0 || error == EWOULDBLOCK) 1300 goto retry; 1301 /* don't restart after signals... */ 1302 if (error == ERESTART) 1303 error = EINTR; 1304 goto done; 1305 } 1306 1307 /* The actual scan does not sleep on kq, so unlock the kernel. */ 1308 KERNEL_UNLOCK(); 1309 1310 /* 1311 * Put the end marker in the queue to limit the scan to the events 1312 * that are currently active. This prevents events from being 1313 * recollected if they reactivate during scan. 1314 * 1315 * If a partial scan has been performed already but no events have 1316 * been collected, reposition the end marker to make any new events 1317 * reachable. 1318 */ 1319 if (!scan->kqs_queued) { 1320 TAILQ_INSERT_TAIL(&kq->kq_head, &scan->kqs_end, kn_tqe); 1321 scan->kqs_queued = 1; 1322 } else if (scan->kqs_nevent == 0) { 1323 TAILQ_REMOVE(&kq->kq_head, &scan->kqs_end, kn_tqe); 1324 TAILQ_INSERT_TAIL(&kq->kq_head, &scan->kqs_end, kn_tqe); 1325 } 1326 1327 TAILQ_INSERT_HEAD(&kq->kq_head, &scan->kqs_start, kn_tqe); 1328 while (nkev < maxevents) { 1329 kn = TAILQ_NEXT(&scan->kqs_start, kn_tqe); 1330 if (kn->kn_filter == EVFILT_MARKER) { 1331 if (kn == &scan->kqs_end) 1332 break; 1333 1334 /* Move start marker past another thread's marker. */ 1335 TAILQ_REMOVE(&kq->kq_head, &scan->kqs_start, kn_tqe); 1336 TAILQ_INSERT_AFTER(&kq->kq_head, kn, &scan->kqs_start, 1337 kn_tqe); 1338 continue; 1339 } 1340 1341 if (!knote_acquire(kn, NULL, 0)) { 1342 /* knote_acquire() has released kq_lock. */ 1343 mtx_enter(&kq->kq_lock); 1344 continue; 1345 } 1346 1347 kqueue_check(kq); 1348 TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe); 1349 kn->kn_status &= ~KN_QUEUED; 1350 kq->kq_count--; 1351 kqueue_check(kq); 1352 1353 if (kn->kn_status & KN_DISABLED) { 1354 knote_release(kn); 1355 continue; 1356 } 1357 1358 mtx_leave(&kq->kq_lock); 1359 1360 /* Drop expired kqpoll knotes. */ 1361 if (p->p_kq == kq && 1362 p->p_kq_serial > (unsigned long)kn->kn_udata) { 1363 filter_detach(kn); 1364 knote_drop(kn, p); 1365 mtx_enter(&kq->kq_lock); 1366 continue; 1367 } 1368 1369 memset(kevp, 0, sizeof(*kevp)); 1370 if (filter_process(kn, kevp) == 0) { 1371 mtx_enter(&kq->kq_lock); 1372 if ((kn->kn_status & KN_QUEUED) == 0) 1373 kn->kn_status &= ~KN_ACTIVE; 1374 knote_release(kn); 1375 kqueue_check(kq); 1376 continue; 1377 } 1378 1379 /* 1380 * Post-event action on the note 1381 */ 1382 if (kevp->flags & EV_ONESHOT) { 1383 filter_detach(kn); 1384 knote_drop(kn, p); 1385 mtx_enter(&kq->kq_lock); 1386 } else if (kevp->flags & (EV_CLEAR | EV_DISPATCH)) { 1387 mtx_enter(&kq->kq_lock); 1388 if (kevp->flags & EV_DISPATCH) 1389 kn->kn_status |= KN_DISABLED; 1390 if ((kn->kn_status & KN_QUEUED) == 0) 1391 kn->kn_status &= ~KN_ACTIVE; 1392 knote_release(kn); 1393 } else { 1394 mtx_enter(&kq->kq_lock); 1395 if ((kn->kn_status & KN_QUEUED) == 0) { 1396 kqueue_check(kq); 1397 kq->kq_count++; 1398 kn->kn_status |= KN_QUEUED; 1399 TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe); 1400 } 1401 knote_release(kn); 1402 } 1403 kqueue_check(kq); 1404 1405 kevp++; 1406 nkev++; 1407 scan->kqs_nevent++; 1408 } 1409 TAILQ_REMOVE(&kq->kq_head, &scan->kqs_start, kn_tqe); 1410 mtx_leave(&kq->kq_lock); 1411 if (scan->kqs_nevent == 0) 1412 goto retry; 1413 done: 1414 *errorp = error; 1415 return (nkev); 1416 } 1417 1418 void 1419 kqueue_scan_setup(struct kqueue_scan_state *scan, struct kqueue *kq) 1420 { 1421 memset(scan, 0, sizeof(*scan)); 1422 1423 KQREF(kq); 1424 scan->kqs_kq = kq; 1425 scan->kqs_start.kn_filter = EVFILT_MARKER; 1426 scan->kqs_start.kn_status = KN_PROCESSING; 1427 scan->kqs_end.kn_filter = EVFILT_MARKER; 1428 scan->kqs_end.kn_status = KN_PROCESSING; 1429 } 1430 1431 void 1432 kqueue_scan_finish(struct kqueue_scan_state *scan) 1433 { 1434 struct kqueue *kq = scan->kqs_kq; 1435 1436 KASSERT(scan->kqs_start.kn_filter == EVFILT_MARKER); 1437 KASSERT(scan->kqs_start.kn_status == KN_PROCESSING); 1438 KASSERT(scan->kqs_end.kn_filter == EVFILT_MARKER); 1439 KASSERT(scan->kqs_end.kn_status == KN_PROCESSING); 1440 1441 if (scan->kqs_queued) { 1442 scan->kqs_queued = 0; 1443 mtx_enter(&kq->kq_lock); 1444 TAILQ_REMOVE(&kq->kq_head, &scan->kqs_end, kn_tqe); 1445 mtx_leave(&kq->kq_lock); 1446 } 1447 KQRELE(kq); 1448 } 1449 1450 /* 1451 * XXX 1452 * This could be expanded to call kqueue_scan, if desired. 1453 */ 1454 int 1455 kqueue_read(struct file *fp, struct uio *uio, int fflags) 1456 { 1457 return (ENXIO); 1458 } 1459 1460 int 1461 kqueue_write(struct file *fp, struct uio *uio, int fflags) 1462 { 1463 return (ENXIO); 1464 } 1465 1466 int 1467 kqueue_ioctl(struct file *fp, u_long com, caddr_t data, struct proc *p) 1468 { 1469 return (ENOTTY); 1470 } 1471 1472 int 1473 kqueue_poll(struct file *fp, int events, struct proc *p) 1474 { 1475 struct kqueue *kq = (struct kqueue *)fp->f_data; 1476 int revents = 0; 1477 1478 if (events & (POLLIN | POLLRDNORM)) { 1479 mtx_enter(&kq->kq_lock); 1480 if (kq->kq_count) { 1481 revents |= events & (POLLIN | POLLRDNORM); 1482 } else { 1483 selrecord(p, &kq->kq_sel); 1484 kq->kq_state |= KQ_SEL; 1485 } 1486 mtx_leave(&kq->kq_lock); 1487 } 1488 return (revents); 1489 } 1490 1491 int 1492 kqueue_stat(struct file *fp, struct stat *st, struct proc *p) 1493 { 1494 struct kqueue *kq = fp->f_data; 1495 1496 memset(st, 0, sizeof(*st)); 1497 st->st_size = kq->kq_count; /* unlocked read */ 1498 st->st_blksize = sizeof(struct kevent); 1499 st->st_mode = S_IFIFO; 1500 return (0); 1501 } 1502 1503 void 1504 kqueue_purge(struct proc *p, struct kqueue *kq) 1505 { 1506 int i; 1507 1508 mtx_enter(&kq->kq_lock); 1509 for (i = 0; i < kq->kq_knlistsize; i++) 1510 knote_remove(p, kq, &kq->kq_knlist[i], 1); 1511 if (kq->kq_knhashmask != 0) { 1512 for (i = 0; i < kq->kq_knhashmask + 1; i++) 1513 knote_remove(p, kq, &kq->kq_knhash[i], 1); 1514 } 1515 mtx_leave(&kq->kq_lock); 1516 } 1517 1518 void 1519 kqueue_terminate(struct proc *p, struct kqueue *kq) 1520 { 1521 struct knote *kn; 1522 1523 mtx_enter(&kq->kq_lock); 1524 1525 /* 1526 * Any remaining entries should be scan markers. 1527 * They are removed when the ongoing scans finish. 1528 */ 1529 KASSERT(kq->kq_count == 0); 1530 TAILQ_FOREACH(kn, &kq->kq_head, kn_tqe) 1531 KASSERT(kn->kn_filter == EVFILT_MARKER); 1532 1533 kq->kq_state |= KQ_DYING; 1534 kqueue_wakeup(kq); 1535 mtx_leave(&kq->kq_lock); 1536 1537 KASSERT(klist_empty(&kq->kq_sel.si_note)); 1538 task_del(systq, &kq->kq_task); 1539 1540 } 1541 1542 int 1543 kqueue_close(struct file *fp, struct proc *p) 1544 { 1545 struct kqueue *kq = fp->f_data; 1546 1547 fp->f_data = NULL; 1548 1549 kqueue_purge(p, kq); 1550 kqueue_terminate(p, kq); 1551 1552 KQRELE(kq); 1553 1554 return (0); 1555 } 1556 1557 static void 1558 kqueue_task(void *arg) 1559 { 1560 struct kqueue *kq = arg; 1561 1562 /* Kernel lock is needed inside selwakeup(). */ 1563 KERNEL_ASSERT_LOCKED(); 1564 1565 mtx_enter(&kqueue_klist_lock); 1566 mtx_enter(&kq->kq_lock); 1567 if (kq->kq_state & KQ_SEL) { 1568 kq->kq_state &= ~KQ_SEL; 1569 mtx_leave(&kq->kq_lock); 1570 selwakeup(&kq->kq_sel); 1571 } else { 1572 mtx_leave(&kq->kq_lock); 1573 KNOTE(&kq->kq_sel.si_note, 0); 1574 } 1575 mtx_leave(&kqueue_klist_lock); 1576 KQRELE(kq); 1577 } 1578 1579 void 1580 kqueue_wakeup(struct kqueue *kq) 1581 { 1582 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1583 1584 if (kq->kq_state & KQ_SLEEP) { 1585 kq->kq_state &= ~KQ_SLEEP; 1586 wakeup(kq); 1587 } 1588 if ((kq->kq_state & KQ_SEL) || !klist_empty(&kq->kq_sel.si_note)) { 1589 /* Defer activation to avoid recursion. */ 1590 KQREF(kq); 1591 if (!task_add(systq, &kq->kq_task)) 1592 KQRELE(kq); 1593 } 1594 } 1595 1596 static void 1597 kqueue_expand_hash(struct kqueue *kq) 1598 { 1599 struct knlist *hash; 1600 u_long hashmask; 1601 1602 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1603 1604 if (kq->kq_knhashmask == 0) { 1605 mtx_leave(&kq->kq_lock); 1606 hash = hashinit(KN_HASHSIZE, M_KEVENT, M_WAITOK, &hashmask); 1607 mtx_enter(&kq->kq_lock); 1608 if (kq->kq_knhashmask == 0) { 1609 kq->kq_knhash = hash; 1610 kq->kq_knhashmask = hashmask; 1611 } else { 1612 /* Another thread has allocated the hash. */ 1613 mtx_leave(&kq->kq_lock); 1614 hashfree(hash, KN_HASHSIZE, M_KEVENT); 1615 mtx_enter(&kq->kq_lock); 1616 } 1617 } 1618 } 1619 1620 static void 1621 kqueue_expand_list(struct kqueue *kq, int fd) 1622 { 1623 struct knlist *list, *olist; 1624 int size, osize; 1625 1626 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1627 1628 if (kq->kq_knlistsize <= fd) { 1629 size = kq->kq_knlistsize; 1630 mtx_leave(&kq->kq_lock); 1631 while (size <= fd) 1632 size += KQEXTENT; 1633 list = mallocarray(size, sizeof(*list), M_KEVENT, M_WAITOK); 1634 mtx_enter(&kq->kq_lock); 1635 if (kq->kq_knlistsize <= fd) { 1636 memcpy(list, kq->kq_knlist, 1637 kq->kq_knlistsize * sizeof(*list)); 1638 memset(&list[kq->kq_knlistsize], 0, 1639 (size - kq->kq_knlistsize) * sizeof(*list)); 1640 olist = kq->kq_knlist; 1641 osize = kq->kq_knlistsize; 1642 kq->kq_knlist = list; 1643 kq->kq_knlistsize = size; 1644 mtx_leave(&kq->kq_lock); 1645 free(olist, M_KEVENT, osize * sizeof(*list)); 1646 mtx_enter(&kq->kq_lock); 1647 } else { 1648 /* Another thread has expanded the list. */ 1649 mtx_leave(&kq->kq_lock); 1650 free(list, M_KEVENT, size * sizeof(*list)); 1651 mtx_enter(&kq->kq_lock); 1652 } 1653 } 1654 } 1655 1656 /* 1657 * Acquire a knote, return non-zero on success, 0 on failure. 1658 * 1659 * If we cannot acquire the knote we sleep and return 0. The knote 1660 * may be stale on return in this case and the caller must restart 1661 * whatever loop they are in. 1662 * 1663 * If we are about to sleep and klist is non-NULL, the list is unlocked 1664 * before sleep and remains unlocked on return. 1665 */ 1666 int 1667 knote_acquire(struct knote *kn, struct klist *klist, int ls) 1668 { 1669 struct kqueue *kq = kn->kn_kq; 1670 1671 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1672 KASSERT(kn->kn_filter != EVFILT_MARKER); 1673 1674 if (kn->kn_status & KN_PROCESSING) { 1675 kn->kn_status |= KN_WAITING; 1676 if (klist != NULL) { 1677 mtx_leave(&kq->kq_lock); 1678 klist_unlock(klist, ls); 1679 /* XXX Timeout resolves potential loss of wakeup. */ 1680 tsleep_nsec(kn, 0, "kqepts", SEC_TO_NSEC(1)); 1681 } else { 1682 msleep_nsec(kn, &kq->kq_lock, PNORELOCK, "kqepts", 1683 SEC_TO_NSEC(1)); 1684 } 1685 /* knote may be stale now */ 1686 return (0); 1687 } 1688 kn->kn_status |= KN_PROCESSING; 1689 return (1); 1690 } 1691 1692 /* 1693 * Release an acquired knote, clearing KN_PROCESSING. 1694 */ 1695 void 1696 knote_release(struct knote *kn) 1697 { 1698 MUTEX_ASSERT_LOCKED(&kn->kn_kq->kq_lock); 1699 KASSERT(kn->kn_filter != EVFILT_MARKER); 1700 KASSERT(kn->kn_status & KN_PROCESSING); 1701 1702 if (kn->kn_status & KN_WAITING) { 1703 kn->kn_status &= ~KN_WAITING; 1704 wakeup(kn); 1705 } 1706 kn->kn_status &= ~KN_PROCESSING; 1707 /* kn should not be accessed anymore */ 1708 } 1709 1710 /* 1711 * activate one knote. 1712 */ 1713 void 1714 knote_activate(struct knote *kn) 1715 { 1716 MUTEX_ASSERT_LOCKED(&kn->kn_kq->kq_lock); 1717 1718 kn->kn_status |= KN_ACTIVE; 1719 if ((kn->kn_status & (KN_QUEUED | KN_DISABLED)) == 0) 1720 knote_enqueue(kn); 1721 } 1722 1723 /* 1724 * walk down a list of knotes, activating them if their event has triggered. 1725 */ 1726 void 1727 knote(struct klist *list, long hint) 1728 { 1729 struct knote *kn, *kn0; 1730 struct kqueue *kq; 1731 1732 KLIST_ASSERT_LOCKED(list); 1733 1734 SLIST_FOREACH_SAFE(kn, &list->kl_list, kn_selnext, kn0) { 1735 if (filter_event(kn, hint)) { 1736 kq = kn->kn_kq; 1737 mtx_enter(&kq->kq_lock); 1738 knote_activate(kn); 1739 mtx_leave(&kq->kq_lock); 1740 } 1741 } 1742 } 1743 1744 /* 1745 * remove all knotes from a specified knlist 1746 */ 1747 void 1748 knote_remove(struct proc *p, struct kqueue *kq, struct knlist *list, int purge) 1749 { 1750 struct knote *kn; 1751 1752 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1753 1754 while ((kn = SLIST_FIRST(list)) != NULL) { 1755 KASSERT(kn->kn_kq == kq); 1756 1757 if (!purge) { 1758 /* Skip pending badfd knotes. */ 1759 while (kn->kn_fop == &badfd_filtops) { 1760 kn = SLIST_NEXT(kn, kn_link); 1761 if (kn == NULL) 1762 return; 1763 KASSERT(kn->kn_kq == kq); 1764 } 1765 } 1766 1767 if (!knote_acquire(kn, NULL, 0)) { 1768 /* knote_acquire() has released kq_lock. */ 1769 mtx_enter(&kq->kq_lock); 1770 continue; 1771 } 1772 mtx_leave(&kq->kq_lock); 1773 filter_detach(kn); 1774 1775 /* 1776 * Notify poll(2) and select(2) when a monitored 1777 * file descriptor is closed. 1778 * 1779 * This reuses the original knote for delivering the 1780 * notification so as to avoid allocating memory. 1781 */ 1782 if (!purge && (kn->kn_flags & (__EV_POLL | __EV_SELECT)) && 1783 !(p->p_kq == kq && 1784 p->p_kq_serial > (unsigned long)kn->kn_udata) && 1785 kn->kn_fop != &badfd_filtops) { 1786 KASSERT(kn->kn_fop->f_flags & FILTEROP_ISFD); 1787 FRELE(kn->kn_fp, p); 1788 kn->kn_fp = NULL; 1789 1790 kn->kn_fop = &badfd_filtops; 1791 filter_event(kn, 0); 1792 mtx_enter(&kq->kq_lock); 1793 knote_activate(kn); 1794 knote_release(kn); 1795 continue; 1796 } 1797 1798 knote_drop(kn, p); 1799 mtx_enter(&kq->kq_lock); 1800 } 1801 } 1802 1803 /* 1804 * remove all knotes referencing a specified fd 1805 */ 1806 void 1807 knote_fdclose(struct proc *p, int fd) 1808 { 1809 struct filedesc *fdp = p->p_p->ps_fd; 1810 struct kqueue *kq; 1811 1812 /* 1813 * fdplock can be ignored if the file descriptor table is being freed 1814 * because no other thread can access the fdp. 1815 */ 1816 if (fdp->fd_refcnt != 0) 1817 fdpassertlocked(fdp); 1818 1819 LIST_FOREACH(kq, &fdp->fd_kqlist, kq_next) { 1820 mtx_enter(&kq->kq_lock); 1821 if (fd < kq->kq_knlistsize) 1822 knote_remove(p, kq, &kq->kq_knlist[fd], 0); 1823 mtx_leave(&kq->kq_lock); 1824 } 1825 } 1826 1827 /* 1828 * handle a process exiting, including the triggering of NOTE_EXIT notes 1829 * XXX this could be more efficient, doing a single pass down the klist 1830 */ 1831 void 1832 knote_processexit(struct proc *p) 1833 { 1834 struct process *pr = p->p_p; 1835 1836 KERNEL_ASSERT_LOCKED(); 1837 KASSERT(p == curproc); 1838 1839 KNOTE(&pr->ps_klist, NOTE_EXIT); 1840 1841 /* remove other knotes hanging off the process */ 1842 klist_invalidate(&pr->ps_klist); 1843 } 1844 1845 void 1846 knote_attach(struct knote *kn) 1847 { 1848 struct kqueue *kq = kn->kn_kq; 1849 struct knlist *list; 1850 1851 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1852 KASSERT(kn->kn_status & KN_PROCESSING); 1853 1854 if (kn->kn_fop->f_flags & FILTEROP_ISFD) { 1855 KASSERT(kq->kq_knlistsize > kn->kn_id); 1856 list = &kq->kq_knlist[kn->kn_id]; 1857 } else { 1858 KASSERT(kq->kq_knhashmask != 0); 1859 list = &kq->kq_knhash[KN_HASH(kn->kn_id, kq->kq_knhashmask)]; 1860 } 1861 SLIST_INSERT_HEAD(list, kn, kn_link); 1862 } 1863 1864 void 1865 knote_detach(struct knote *kn) 1866 { 1867 struct kqueue *kq = kn->kn_kq; 1868 struct knlist *list; 1869 1870 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1871 KASSERT(kn->kn_status & KN_PROCESSING); 1872 1873 if (kn->kn_fop->f_flags & FILTEROP_ISFD) 1874 list = &kq->kq_knlist[kn->kn_id]; 1875 else 1876 list = &kq->kq_knhash[KN_HASH(kn->kn_id, kq->kq_knhashmask)]; 1877 SLIST_REMOVE(list, kn, knote, kn_link); 1878 } 1879 1880 /* 1881 * should be called at spl == 0, since we don't want to hold spl 1882 * while calling FRELE and pool_put. 1883 */ 1884 void 1885 knote_drop(struct knote *kn, struct proc *p) 1886 { 1887 struct kqueue *kq = kn->kn_kq; 1888 1889 KASSERT(kn->kn_filter != EVFILT_MARKER); 1890 1891 mtx_enter(&kq->kq_lock); 1892 knote_detach(kn); 1893 if (kn->kn_status & KN_QUEUED) 1894 knote_dequeue(kn); 1895 if (kn->kn_status & KN_WAITING) { 1896 kn->kn_status &= ~KN_WAITING; 1897 wakeup(kn); 1898 } 1899 mtx_leave(&kq->kq_lock); 1900 1901 if ((kn->kn_fop->f_flags & FILTEROP_ISFD) && kn->kn_fp != NULL) 1902 FRELE(kn->kn_fp, p); 1903 pool_put(&knote_pool, kn); 1904 } 1905 1906 1907 void 1908 knote_enqueue(struct knote *kn) 1909 { 1910 struct kqueue *kq = kn->kn_kq; 1911 1912 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1913 KASSERT(kn->kn_filter != EVFILT_MARKER); 1914 KASSERT((kn->kn_status & KN_QUEUED) == 0); 1915 1916 kqueue_check(kq); 1917 TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe); 1918 kn->kn_status |= KN_QUEUED; 1919 kq->kq_count++; 1920 kqueue_check(kq); 1921 kqueue_wakeup(kq); 1922 } 1923 1924 void 1925 knote_dequeue(struct knote *kn) 1926 { 1927 struct kqueue *kq = kn->kn_kq; 1928 1929 MUTEX_ASSERT_LOCKED(&kq->kq_lock); 1930 KASSERT(kn->kn_filter != EVFILT_MARKER); 1931 KASSERT(kn->kn_status & KN_QUEUED); 1932 1933 kqueue_check(kq); 1934 TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe); 1935 kn->kn_status &= ~KN_QUEUED; 1936 kq->kq_count--; 1937 kqueue_check(kq); 1938 } 1939 1940 /* 1941 * Modify the knote's parameters. 1942 * 1943 * The knote's object lock must be held. 1944 */ 1945 void 1946 knote_modify(const struct kevent *kev, struct knote *kn) 1947 { 1948 if ((kn->kn_fop->f_flags & FILTEROP_MPSAFE) == 0) 1949 KERNEL_ASSERT_LOCKED(); 1950 1951 kn->kn_sfflags = kev->fflags; 1952 kn->kn_sdata = kev->data; 1953 kn->kn_udata = kev->udata; 1954 } 1955 1956 /* 1957 * Submit the knote's event for delivery. 1958 * 1959 * The knote's object lock must be held. 1960 */ 1961 void 1962 knote_submit(struct knote *kn, struct kevent *kev) 1963 { 1964 if ((kn->kn_fop->f_flags & FILTEROP_MPSAFE) == 0) 1965 KERNEL_ASSERT_LOCKED(); 1966 1967 if (kev != NULL) { 1968 *kev = kn->kn_kevent; 1969 if (kn->kn_flags & EV_CLEAR) { 1970 kn->kn_fflags = 0; 1971 kn->kn_data = 0; 1972 } 1973 } 1974 } 1975 1976 void 1977 klist_init(struct klist *klist, const struct klistops *ops, void *arg) 1978 { 1979 SLIST_INIT(&klist->kl_list); 1980 klist->kl_ops = ops; 1981 klist->kl_arg = arg; 1982 } 1983 1984 void 1985 klist_free(struct klist *klist) 1986 { 1987 KASSERT(SLIST_EMPTY(&klist->kl_list)); 1988 } 1989 1990 void 1991 klist_insert(struct klist *klist, struct knote *kn) 1992 { 1993 int ls; 1994 1995 ls = klist_lock(klist); 1996 SLIST_INSERT_HEAD(&klist->kl_list, kn, kn_selnext); 1997 klist_unlock(klist, ls); 1998 } 1999 2000 void 2001 klist_insert_locked(struct klist *klist, struct knote *kn) 2002 { 2003 KLIST_ASSERT_LOCKED(klist); 2004 2005 SLIST_INSERT_HEAD(&klist->kl_list, kn, kn_selnext); 2006 } 2007 2008 void 2009 klist_remove(struct klist *klist, struct knote *kn) 2010 { 2011 int ls; 2012 2013 ls = klist_lock(klist); 2014 SLIST_REMOVE(&klist->kl_list, kn, knote, kn_selnext); 2015 klist_unlock(klist, ls); 2016 } 2017 2018 void 2019 klist_remove_locked(struct klist *klist, struct knote *kn) 2020 { 2021 KLIST_ASSERT_LOCKED(klist); 2022 2023 SLIST_REMOVE(&klist->kl_list, kn, knote, kn_selnext); 2024 } 2025 2026 int 2027 klist_empty(struct klist *klist) 2028 { 2029 return (SLIST_EMPTY(&klist->kl_list)); 2030 } 2031 2032 /* 2033 * Detach all knotes from klist. The knotes are rewired to indicate EOF. 2034 * 2035 * The caller of this function must not hold any locks that can block 2036 * filterops callbacks that run with KN_PROCESSING. 2037 * Otherwise this function might deadlock. 2038 */ 2039 void 2040 klist_invalidate(struct klist *list) 2041 { 2042 struct knote *kn; 2043 struct kqueue *kq; 2044 struct proc *p = curproc; 2045 int ls; 2046 2047 NET_ASSERT_UNLOCKED(); 2048 2049 ls = klist_lock(list); 2050 while ((kn = SLIST_FIRST(&list->kl_list)) != NULL) { 2051 kq = kn->kn_kq; 2052 mtx_enter(&kq->kq_lock); 2053 if (!knote_acquire(kn, list, ls)) { 2054 /* knote_acquire() has released kq_lock 2055 * and klist lock. */ 2056 ls = klist_lock(list); 2057 continue; 2058 } 2059 mtx_leave(&kq->kq_lock); 2060 klist_unlock(list, ls); 2061 filter_detach(kn); 2062 if (kn->kn_fop->f_flags & FILTEROP_ISFD) { 2063 kn->kn_fop = &dead_filtops; 2064 filter_event(kn, 0); 2065 mtx_enter(&kq->kq_lock); 2066 knote_activate(kn); 2067 knote_release(kn); 2068 mtx_leave(&kq->kq_lock); 2069 } else { 2070 knote_drop(kn, p); 2071 } 2072 ls = klist_lock(list); 2073 } 2074 klist_unlock(list, ls); 2075 } 2076 2077 static int 2078 klist_lock(struct klist *list) 2079 { 2080 int ls = 0; 2081 2082 if (list->kl_ops != NULL) { 2083 ls = list->kl_ops->klo_lock(list->kl_arg); 2084 } else { 2085 KERNEL_LOCK(); 2086 ls = splhigh(); 2087 } 2088 return ls; 2089 } 2090 2091 static void 2092 klist_unlock(struct klist *list, int ls) 2093 { 2094 if (list->kl_ops != NULL) { 2095 list->kl_ops->klo_unlock(list->kl_arg, ls); 2096 } else { 2097 splx(ls); 2098 KERNEL_UNLOCK(); 2099 } 2100 } 2101 2102 static void 2103 klist_mutex_assertlk(void *arg) 2104 { 2105 struct mutex *mtx = arg; 2106 2107 (void)mtx; 2108 2109 MUTEX_ASSERT_LOCKED(mtx); 2110 } 2111 2112 static int 2113 klist_mutex_lock(void *arg) 2114 { 2115 struct mutex *mtx = arg; 2116 2117 mtx_enter(mtx); 2118 return 0; 2119 } 2120 2121 static void 2122 klist_mutex_unlock(void *arg, int s) 2123 { 2124 struct mutex *mtx = arg; 2125 2126 mtx_leave(mtx); 2127 } 2128 2129 static const struct klistops mutex_klistops = { 2130 .klo_assertlk = klist_mutex_assertlk, 2131 .klo_lock = klist_mutex_lock, 2132 .klo_unlock = klist_mutex_unlock, 2133 }; 2134 2135 void 2136 klist_init_mutex(struct klist *klist, struct mutex *mtx) 2137 { 2138 klist_init(klist, &mutex_klistops, mtx); 2139 } 2140 2141 static void 2142 klist_rwlock_assertlk(void *arg) 2143 { 2144 struct rwlock *rwl = arg; 2145 2146 (void)rwl; 2147 2148 rw_assert_wrlock(rwl); 2149 } 2150 2151 static int 2152 klist_rwlock_lock(void *arg) 2153 { 2154 struct rwlock *rwl = arg; 2155 2156 rw_enter_write(rwl); 2157 return 0; 2158 } 2159 2160 static void 2161 klist_rwlock_unlock(void *arg, int s) 2162 { 2163 struct rwlock *rwl = arg; 2164 2165 rw_exit_write(rwl); 2166 } 2167 2168 static const struct klistops rwlock_klistops = { 2169 .klo_assertlk = klist_rwlock_assertlk, 2170 .klo_lock = klist_rwlock_lock, 2171 .klo_unlock = klist_rwlock_unlock, 2172 }; 2173 2174 void 2175 klist_init_rwlock(struct klist *klist, struct rwlock *rwl) 2176 { 2177 klist_init(klist, &rwlock_klistops, rwl); 2178 } 2179