1 /*- 2 * Copyright (c) 1999,2000,2001 Jonathan Lemon <jlemon@FreeBSD.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * $FreeBSD: src/sys/kern/kern_event.c,v 1.2.2.10 2004/04/04 07:03:14 cperciva Exp $ 27 * $DragonFly: src/sys/kern/kern_event.c,v 1.33 2007/02/03 17:05:57 corecode Exp $ 28 */ 29 30 #include <sys/param.h> 31 #include <sys/systm.h> 32 #include <sys/kernel.h> 33 #include <sys/proc.h> 34 #include <sys/malloc.h> 35 #include <sys/unistd.h> 36 #include <sys/file.h> 37 #include <sys/lock.h> 38 #include <sys/fcntl.h> 39 #include <sys/queue.h> 40 #include <sys/event.h> 41 #include <sys/eventvar.h> 42 #include <sys/protosw.h> 43 #include <sys/socket.h> 44 #include <sys/socketvar.h> 45 #include <sys/stat.h> 46 #include <sys/sysctl.h> 47 #include <sys/sysproto.h> 48 #include <sys/thread.h> 49 #include <sys/uio.h> 50 #include <sys/signalvar.h> 51 #include <sys/filio.h> 52 #include <sys/ktr.h> 53 54 #include <sys/thread2.h> 55 #include <sys/file2.h> 56 #include <sys/mplock2.h> 57 58 #include <vm/vm_zone.h> 59 60 /* 61 * Global token for kqueue subsystem 62 */ 63 struct lwkt_token kq_token = LWKT_TOKEN_UP_INITIALIZER(kq_token); 64 SYSCTL_INT(_lwkt, OID_AUTO, kq_mpsafe, 65 CTLFLAG_RW, &kq_token.t_flags, 0, ""); 66 SYSCTL_LONG(_lwkt, OID_AUTO, kq_collisions, 67 CTLFLAG_RW, &kq_token.t_collisions, 0, ""); 68 69 MALLOC_DEFINE(M_KQUEUE, "kqueue", "memory for kqueue system"); 70 71 struct kevent_copyin_args { 72 struct kevent_args *ka; 73 int pchanges; 74 }; 75 76 static int kqueue_sleep(struct kqueue *kq, struct timespec *tsp); 77 static int kqueue_scan(struct kqueue *kq, struct kevent *kevp, int count, 78 struct knote *marker); 79 static int kqueue_read(struct file *fp, struct uio *uio, 80 struct ucred *cred, int flags); 81 static int kqueue_write(struct file *fp, struct uio *uio, 82 struct ucred *cred, int flags); 83 static int kqueue_ioctl(struct file *fp, u_long com, caddr_t data, 84 struct ucred *cred, struct sysmsg *msg); 85 static int kqueue_kqfilter(struct file *fp, struct knote *kn); 86 static int kqueue_stat(struct file *fp, struct stat *st, 87 struct ucred *cred); 88 static int kqueue_close(struct file *fp); 89 static void kqueue_wakeup(struct kqueue *kq); 90 static int filter_attach(struct knote *kn); 91 static int filter_event(struct knote *kn, long hint); 92 93 /* 94 * MPSAFE 95 */ 96 static struct fileops kqueueops = { 97 .fo_read = kqueue_read, 98 .fo_write = kqueue_write, 99 .fo_ioctl = kqueue_ioctl, 100 .fo_kqfilter = kqueue_kqfilter, 101 .fo_stat = kqueue_stat, 102 .fo_close = kqueue_close, 103 .fo_shutdown = nofo_shutdown 104 }; 105 106 static void knote_attach(struct knote *kn); 107 static void knote_drop(struct knote *kn); 108 static void knote_detach_and_drop(struct knote *kn); 109 static void knote_detach_and_drop_locked(struct knote *kn); 110 static void knote_enqueue(struct knote *kn); 111 static void knote_dequeue(struct knote *kn); 112 static void knote_init(void); 113 static struct knote *knote_alloc(void); 114 static void knote_free(struct knote *kn); 115 116 static void filt_kqdetach(struct knote *kn); 117 static int filt_kqueue(struct knote *kn, long hint); 118 static int filt_procattach(struct knote *kn); 119 static void filt_procdetach(struct knote *kn); 120 static int filt_proc(struct knote *kn, long hint); 121 static int filt_fileattach(struct knote *kn); 122 static void filt_timerexpire(void *knx); 123 static int filt_timerattach(struct knote *kn); 124 static void filt_timerdetach(struct knote *kn); 125 static int filt_timer(struct knote *kn, long hint); 126 127 static struct filterops file_filtops = 128 { FILTEROP_ISFD, filt_fileattach, NULL, NULL }; 129 static struct filterops kqread_filtops = 130 { FILTEROP_ISFD, NULL, filt_kqdetach, filt_kqueue }; 131 static struct filterops proc_filtops = 132 { 0, filt_procattach, filt_procdetach, filt_proc }; 133 static struct filterops timer_filtops = 134 { 0, filt_timerattach, filt_timerdetach, filt_timer }; 135 136 static vm_zone_t knote_zone; 137 static int kq_ncallouts = 0; 138 static int kq_calloutmax = (4 * 1024); 139 SYSCTL_INT(_kern, OID_AUTO, kq_calloutmax, CTLFLAG_RW, 140 &kq_calloutmax, 0, "Maximum number of callouts allocated for kqueue"); 141 static int kq_checkloop = 1000000; 142 SYSCTL_INT(_kern, OID_AUTO, kq_checkloop, CTLFLAG_RW, 143 &kq_checkloop, 0, "Maximum number of callouts allocated for kqueue"); 144 145 #define KNOTE_ACTIVATE(kn) do { \ 146 kn->kn_status |= KN_ACTIVE; \ 147 if ((kn->kn_status & (KN_QUEUED | KN_DISABLED)) == 0) \ 148 knote_enqueue(kn); \ 149 } while(0) 150 151 #define KN_HASHSIZE 64 /* XXX should be tunable */ 152 #define KN_HASH(val, mask) (((val) ^ (val >> 8)) & (mask)) 153 154 extern struct filterops aio_filtops; 155 extern struct filterops sig_filtops; 156 157 /* 158 * Table for for all system-defined filters. 159 */ 160 static struct filterops *sysfilt_ops[] = { 161 &file_filtops, /* EVFILT_READ */ 162 &file_filtops, /* EVFILT_WRITE */ 163 &aio_filtops, /* EVFILT_AIO */ 164 &file_filtops, /* EVFILT_VNODE */ 165 &proc_filtops, /* EVFILT_PROC */ 166 &sig_filtops, /* EVFILT_SIGNAL */ 167 &timer_filtops, /* EVFILT_TIMER */ 168 &file_filtops, /* EVFILT_EXCEPT */ 169 }; 170 171 static int 172 filt_fileattach(struct knote *kn) 173 { 174 return (fo_kqfilter(kn->kn_fp, kn)); 175 } 176 177 /* 178 * MPSAFE 179 */ 180 static int 181 kqueue_kqfilter(struct file *fp, struct knote *kn) 182 { 183 struct kqueue *kq = (struct kqueue *)kn->kn_fp->f_data; 184 185 if (kn->kn_filter != EVFILT_READ) 186 return (EOPNOTSUPP); 187 188 kn->kn_fop = &kqread_filtops; 189 knote_insert(&kq->kq_kqinfo.ki_note, kn); 190 return (0); 191 } 192 193 static void 194 filt_kqdetach(struct knote *kn) 195 { 196 struct kqueue *kq = (struct kqueue *)kn->kn_fp->f_data; 197 198 knote_remove(&kq->kq_kqinfo.ki_note, kn); 199 } 200 201 /*ARGSUSED*/ 202 static int 203 filt_kqueue(struct knote *kn, long hint) 204 { 205 struct kqueue *kq = (struct kqueue *)kn->kn_fp->f_data; 206 207 kn->kn_data = kq->kq_count; 208 return (kn->kn_data > 0); 209 } 210 211 static int 212 filt_procattach(struct knote *kn) 213 { 214 struct proc *p; 215 int immediate; 216 217 immediate = 0; 218 lwkt_gettoken(&proc_token); 219 p = pfind(kn->kn_id); 220 if (p == NULL && (kn->kn_sfflags & NOTE_EXIT)) { 221 p = zpfind(kn->kn_id); 222 immediate = 1; 223 } 224 if (p == NULL) { 225 lwkt_reltoken(&proc_token); 226 return (ESRCH); 227 } 228 if (!PRISON_CHECK(curthread->td_ucred, p->p_ucred)) { 229 lwkt_reltoken(&proc_token); 230 return (EACCES); 231 } 232 233 kn->kn_ptr.p_proc = p; 234 kn->kn_flags |= EV_CLEAR; /* automatically set */ 235 236 /* 237 * internal flag indicating registration done by kernel 238 */ 239 if (kn->kn_flags & EV_FLAG1) { 240 kn->kn_data = kn->kn_sdata; /* ppid */ 241 kn->kn_fflags = NOTE_CHILD; 242 kn->kn_flags &= ~EV_FLAG1; 243 } 244 245 knote_insert(&p->p_klist, kn); 246 247 /* 248 * Immediately activate any exit notes if the target process is a 249 * zombie. This is necessary to handle the case where the target 250 * process, e.g. a child, dies before the kevent is negistered. 251 */ 252 if (immediate && filt_proc(kn, NOTE_EXIT)) 253 KNOTE_ACTIVATE(kn); 254 lwkt_reltoken(&proc_token); 255 256 return (0); 257 } 258 259 /* 260 * The knote may be attached to a different process, which may exit, 261 * leaving nothing for the knote to be attached to. So when the process 262 * exits, the knote is marked as DETACHED and also flagged as ONESHOT so 263 * it will be deleted when read out. However, as part of the knote deletion, 264 * this routine is called, so a check is needed to avoid actually performing 265 * a detach, because the original process does not exist any more. 266 */ 267 static void 268 filt_procdetach(struct knote *kn) 269 { 270 struct proc *p; 271 272 if (kn->kn_status & KN_DETACHED) 273 return; 274 /* XXX locking? take proc_token here? */ 275 p = kn->kn_ptr.p_proc; 276 knote_remove(&p->p_klist, kn); 277 } 278 279 static int 280 filt_proc(struct knote *kn, long hint) 281 { 282 u_int event; 283 284 /* 285 * mask off extra data 286 */ 287 event = (u_int)hint & NOTE_PCTRLMASK; 288 289 /* 290 * if the user is interested in this event, record it. 291 */ 292 if (kn->kn_sfflags & event) 293 kn->kn_fflags |= event; 294 295 /* 296 * Process is gone, so flag the event as finished. Detach the 297 * knote from the process now because the process will be poof, 298 * gone later on. 299 */ 300 if (event == NOTE_EXIT) { 301 struct proc *p = kn->kn_ptr.p_proc; 302 if ((kn->kn_status & KN_DETACHED) == 0) { 303 knote_remove(&p->p_klist, kn); 304 kn->kn_status |= KN_DETACHED; 305 kn->kn_data = p->p_xstat; 306 kn->kn_ptr.p_proc = NULL; 307 } 308 kn->kn_flags |= (EV_EOF | EV_ONESHOT); 309 return (1); 310 } 311 312 /* 313 * process forked, and user wants to track the new process, 314 * so attach a new knote to it, and immediately report an 315 * event with the parent's pid. 316 */ 317 if ((event == NOTE_FORK) && (kn->kn_sfflags & NOTE_TRACK)) { 318 struct kevent kev; 319 int error; 320 321 /* 322 * register knote with new process. 323 */ 324 kev.ident = hint & NOTE_PDATAMASK; /* pid */ 325 kev.filter = kn->kn_filter; 326 kev.flags = kn->kn_flags | EV_ADD | EV_ENABLE | EV_FLAG1; 327 kev.fflags = kn->kn_sfflags; 328 kev.data = kn->kn_id; /* parent */ 329 kev.udata = kn->kn_kevent.udata; /* preserve udata */ 330 error = kqueue_register(kn->kn_kq, &kev); 331 if (error) 332 kn->kn_fflags |= NOTE_TRACKERR; 333 } 334 335 return (kn->kn_fflags != 0); 336 } 337 338 static void 339 filt_timerexpire(void *knx) 340 { 341 struct knote *kn = knx; 342 struct callout *calloutp; 343 struct timeval tv; 344 int tticks; 345 346 lwkt_gettoken(&kq_token); 347 348 kn->kn_data++; 349 KNOTE_ACTIVATE(kn); 350 351 if ((kn->kn_flags & EV_ONESHOT) == 0) { 352 tv.tv_sec = kn->kn_sdata / 1000; 353 tv.tv_usec = (kn->kn_sdata % 1000) * 1000; 354 tticks = tvtohz_high(&tv); 355 calloutp = (struct callout *)kn->kn_hook; 356 callout_reset(calloutp, tticks, filt_timerexpire, kn); 357 } 358 359 lwkt_reltoken(&kq_token); 360 } 361 362 /* 363 * data contains amount of time to sleep, in milliseconds 364 */ 365 static int 366 filt_timerattach(struct knote *kn) 367 { 368 struct callout *calloutp; 369 struct timeval tv; 370 int tticks; 371 372 if (kq_ncallouts >= kq_calloutmax) 373 return (ENOMEM); 374 kq_ncallouts++; 375 376 tv.tv_sec = kn->kn_sdata / 1000; 377 tv.tv_usec = (kn->kn_sdata % 1000) * 1000; 378 tticks = tvtohz_high(&tv); 379 380 kn->kn_flags |= EV_CLEAR; /* automatically set */ 381 MALLOC(calloutp, struct callout *, sizeof(*calloutp), 382 M_KQUEUE, M_WAITOK); 383 callout_init(calloutp); 384 kn->kn_hook = (caddr_t)calloutp; 385 callout_reset(calloutp, tticks, filt_timerexpire, kn); 386 387 return (0); 388 } 389 390 static void 391 filt_timerdetach(struct knote *kn) 392 { 393 struct callout *calloutp; 394 395 calloutp = (struct callout *)kn->kn_hook; 396 callout_stop(calloutp); 397 FREE(calloutp, M_KQUEUE); 398 kq_ncallouts--; 399 } 400 401 static int 402 filt_timer(struct knote *kn, long hint) 403 { 404 405 return (kn->kn_data != 0); 406 } 407 408 /* 409 * Initialize a kqueue. 410 * 411 * NOTE: The lwp/proc code initializes a kqueue for select/poll ops. 412 * 413 * MPSAFE 414 */ 415 void 416 kqueue_init(struct kqueue *kq, struct filedesc *fdp) 417 { 418 TAILQ_INIT(&kq->kq_knpend); 419 TAILQ_INIT(&kq->kq_knlist); 420 kq->kq_count = 0; 421 kq->kq_fdp = fdp; 422 SLIST_INIT(&kq->kq_kqinfo.ki_note); 423 } 424 425 /* 426 * Terminate a kqueue. Freeing the actual kq itself is left up to the 427 * caller (it might be embedded in a lwp so we don't do it here). 428 * 429 * The kq's knlist must be completely eradicated so block on any 430 * processing races. 431 */ 432 void 433 kqueue_terminate(struct kqueue *kq) 434 { 435 struct knote *kn; 436 437 lwkt_gettoken(&kq_token); 438 while ((kn = TAILQ_FIRST(&kq->kq_knlist)) != NULL) { 439 if (kn->kn_status & KN_PROCESSING) { 440 kn->kn_status |= KN_WAITING | KN_REPROCESS; 441 tsleep(kn, 0, "kqtrms", hz); 442 continue; 443 } 444 knote_detach_and_drop(kn); 445 } 446 if (kq->kq_knhash) { 447 kfree(kq->kq_knhash, M_KQUEUE); 448 kq->kq_knhash = NULL; 449 kq->kq_knhashmask = 0; 450 } 451 lwkt_reltoken(&kq_token); 452 } 453 454 /* 455 * MPSAFE 456 */ 457 int 458 sys_kqueue(struct kqueue_args *uap) 459 { 460 struct thread *td = curthread; 461 struct kqueue *kq; 462 struct file *fp; 463 int fd, error; 464 465 error = falloc(td->td_lwp, &fp, &fd); 466 if (error) 467 return (error); 468 fp->f_flag = FREAD | FWRITE; 469 fp->f_type = DTYPE_KQUEUE; 470 fp->f_ops = &kqueueops; 471 472 kq = kmalloc(sizeof(struct kqueue), M_KQUEUE, M_WAITOK | M_ZERO); 473 kqueue_init(kq, td->td_proc->p_fd); 474 fp->f_data = kq; 475 476 fsetfd(kq->kq_fdp, fp, fd); 477 uap->sysmsg_result = fd; 478 fdrop(fp); 479 return (error); 480 } 481 482 /* 483 * Copy 'count' items into the destination list pointed to by uap->eventlist. 484 */ 485 static int 486 kevent_copyout(void *arg, struct kevent *kevp, int count, int *res) 487 { 488 struct kevent_copyin_args *kap; 489 int error; 490 491 kap = (struct kevent_copyin_args *)arg; 492 493 error = copyout(kevp, kap->ka->eventlist, count * sizeof(*kevp)); 494 if (error == 0) { 495 kap->ka->eventlist += count; 496 *res += count; 497 } else { 498 *res = -1; 499 } 500 501 return (error); 502 } 503 504 /* 505 * Copy at most 'max' items from the list pointed to by kap->changelist, 506 * return number of items in 'events'. 507 */ 508 static int 509 kevent_copyin(void *arg, struct kevent *kevp, int max, int *events) 510 { 511 struct kevent_copyin_args *kap; 512 int error, count; 513 514 kap = (struct kevent_copyin_args *)arg; 515 516 count = min(kap->ka->nchanges - kap->pchanges, max); 517 error = copyin(kap->ka->changelist, kevp, count * sizeof *kevp); 518 if (error == 0) { 519 kap->ka->changelist += count; 520 kap->pchanges += count; 521 *events = count; 522 } 523 524 return (error); 525 } 526 527 /* 528 * MPSAFE 529 */ 530 int 531 kern_kevent(struct kqueue *kq, int nevents, int *res, void *uap, 532 k_copyin_fn kevent_copyinfn, k_copyout_fn kevent_copyoutfn, 533 struct timespec *tsp_in) 534 { 535 struct kevent *kevp; 536 struct timespec *tsp; 537 int i, n, total, error, nerrors = 0; 538 int lres; 539 int limit = kq_checkloop; 540 struct kevent kev[KQ_NEVENTS]; 541 struct knote marker; 542 543 tsp = tsp_in; 544 *res = 0; 545 546 lwkt_gettoken(&kq_token); 547 for ( ;; ) { 548 n = 0; 549 error = kevent_copyinfn(uap, kev, KQ_NEVENTS, &n); 550 if (error) 551 goto done; 552 if (n == 0) 553 break; 554 for (i = 0; i < n; i++) { 555 kevp = &kev[i]; 556 kevp->flags &= ~EV_SYSFLAGS; 557 error = kqueue_register(kq, kevp); 558 559 /* 560 * If a registration returns an error we 561 * immediately post the error. The kevent() 562 * call itself will fail with the error if 563 * no space is available for posting. 564 * 565 * Such errors normally bypass the timeout/blocking 566 * code. However, if the copyoutfn function refuses 567 * to post the error (see sys_poll()), then we 568 * ignore it too. 569 */ 570 if (error) { 571 kevp->flags = EV_ERROR; 572 kevp->data = error; 573 lres = *res; 574 kevent_copyoutfn(uap, kevp, 1, res); 575 if (lres != *res) { 576 nevents--; 577 nerrors++; 578 } 579 } 580 } 581 } 582 if (nerrors) { 583 error = 0; 584 goto done; 585 } 586 587 /* 588 * Acquire/wait for events - setup timeout 589 */ 590 if (tsp != NULL) { 591 struct timespec ats; 592 593 if (tsp->tv_sec || tsp->tv_nsec) { 594 nanouptime(&ats); 595 timespecadd(tsp, &ats); /* tsp = target time */ 596 } 597 } 598 599 /* 600 * Loop as required. 601 * 602 * Collect as many events as we can. Sleeping on successive 603 * loops is disabled if copyoutfn has incremented (*res). 604 * 605 * The loop stops if an error occurs, all events have been 606 * scanned (the marker has been reached), or fewer than the 607 * maximum number of events is found. 608 * 609 * The copyoutfn function does not have to increment (*res) in 610 * order for the loop to continue. 611 * 612 * NOTE: doselect() usually passes 0x7FFFFFFF for nevents. 613 */ 614 total = 0; 615 error = 0; 616 marker.kn_filter = EVFILT_MARKER; 617 marker.kn_status = KN_PROCESSING; 618 TAILQ_INSERT_TAIL(&kq->kq_knpend, &marker, kn_tqe); 619 while ((n = nevents - total) > 0) { 620 if (n > KQ_NEVENTS) 621 n = KQ_NEVENTS; 622 623 /* 624 * If no events are pending sleep until timeout (if any) 625 * or an event occurs. 626 * 627 * After the sleep completes the marker is moved to the 628 * end of the list, making any received events available 629 * to our scan. 630 */ 631 if (kq->kq_count == 0 && *res == 0) { 632 error = kqueue_sleep(kq, tsp); 633 if (error) 634 break; 635 636 TAILQ_REMOVE(&kq->kq_knpend, &marker, kn_tqe); 637 TAILQ_INSERT_TAIL(&kq->kq_knpend, &marker, kn_tqe); 638 } 639 640 /* 641 * Process all received events 642 * Account for all non-spurious events in our total 643 */ 644 i = kqueue_scan(kq, kev, n, &marker); 645 if (i) { 646 lres = *res; 647 error = kevent_copyoutfn(uap, kev, i, res); 648 total += *res - lres; 649 if (error) 650 break; 651 } 652 if (limit && --limit == 0) 653 panic("kqueue: checkloop failed i=%d", i); 654 655 /* 656 * Normally when fewer events are returned than requested 657 * we can stop. However, if only spurious events were 658 * collected the copyout will not bump (*res) and we have 659 * to continue. 660 */ 661 if (i < n && *res) 662 break; 663 664 /* 665 * Deal with an edge case where spurious events can cause 666 * a loop to occur without moving the marker. This can 667 * prevent kqueue_scan() from picking up new events which 668 * race us. We must be sure to move the marker for this 669 * case. 670 * 671 * NOTE: We do not want to move the marker if events 672 * were scanned because normal kqueue operations 673 * may reactivate events. Moving the marker in 674 * that case could result in duplicates for the 675 * same event. 676 */ 677 if (i == 0) { 678 TAILQ_REMOVE(&kq->kq_knpend, &marker, kn_tqe); 679 TAILQ_INSERT_TAIL(&kq->kq_knpend, &marker, kn_tqe); 680 } 681 } 682 TAILQ_REMOVE(&kq->kq_knpend, &marker, kn_tqe); 683 684 /* Timeouts do not return EWOULDBLOCK. */ 685 if (error == EWOULDBLOCK) 686 error = 0; 687 688 done: 689 lwkt_reltoken(&kq_token); 690 return (error); 691 } 692 693 /* 694 * MPALMOSTSAFE 695 */ 696 int 697 sys_kevent(struct kevent_args *uap) 698 { 699 struct thread *td = curthread; 700 struct proc *p = td->td_proc; 701 struct timespec ts, *tsp; 702 struct kqueue *kq; 703 struct file *fp = NULL; 704 struct kevent_copyin_args *kap, ka; 705 int error; 706 707 if (uap->timeout) { 708 error = copyin(uap->timeout, &ts, sizeof(ts)); 709 if (error) 710 return (error); 711 tsp = &ts; 712 } else { 713 tsp = NULL; 714 } 715 716 fp = holdfp(p->p_fd, uap->fd, -1); 717 if (fp == NULL) 718 return (EBADF); 719 if (fp->f_type != DTYPE_KQUEUE) { 720 fdrop(fp); 721 return (EBADF); 722 } 723 724 kq = (struct kqueue *)fp->f_data; 725 726 kap = &ka; 727 kap->ka = uap; 728 kap->pchanges = 0; 729 730 error = kern_kevent(kq, uap->nevents, &uap->sysmsg_result, kap, 731 kevent_copyin, kevent_copyout, tsp); 732 733 fdrop(fp); 734 735 return (error); 736 } 737 738 int 739 kqueue_register(struct kqueue *kq, struct kevent *kev) 740 { 741 struct filedesc *fdp = kq->kq_fdp; 742 struct filterops *fops; 743 struct file *fp = NULL; 744 struct knote *kn = NULL; 745 int error = 0; 746 747 if (kev->filter < 0) { 748 if (kev->filter + EVFILT_SYSCOUNT < 0) 749 return (EINVAL); 750 fops = sysfilt_ops[~kev->filter]; /* to 0-base index */ 751 } else { 752 /* 753 * XXX 754 * filter attach routine is responsible for insuring that 755 * the identifier can be attached to it. 756 */ 757 kprintf("unknown filter: %d\n", kev->filter); 758 return (EINVAL); 759 } 760 761 lwkt_gettoken(&kq_token); 762 if (fops->f_flags & FILTEROP_ISFD) { 763 /* validate descriptor */ 764 fp = holdfp(fdp, kev->ident, -1); 765 if (fp == NULL) { 766 lwkt_reltoken(&kq_token); 767 return (EBADF); 768 } 769 770 SLIST_FOREACH(kn, &fp->f_klist, kn_link) { 771 if (kn->kn_kq == kq && 772 kn->kn_filter == kev->filter && 773 kn->kn_id == kev->ident) { 774 break; 775 } 776 } 777 } else { 778 if (kq->kq_knhashmask) { 779 struct klist *list; 780 781 list = &kq->kq_knhash[ 782 KN_HASH((u_long)kev->ident, kq->kq_knhashmask)]; 783 SLIST_FOREACH(kn, list, kn_link) { 784 if (kn->kn_id == kev->ident && 785 kn->kn_filter == kev->filter) 786 break; 787 } 788 } 789 } 790 791 if (kn == NULL && ((kev->flags & EV_ADD) == 0)) { 792 error = ENOENT; 793 goto done; 794 } 795 796 /* 797 * kn now contains the matching knote, or NULL if no match 798 */ 799 if (kev->flags & EV_ADD) { 800 if (kn == NULL) { 801 kn = knote_alloc(); 802 if (kn == NULL) { 803 error = ENOMEM; 804 goto done; 805 } 806 kn->kn_fp = fp; 807 kn->kn_kq = kq; 808 kn->kn_fop = fops; 809 810 /* 811 * apply reference count to knote structure, and 812 * do not release it at the end of this routine. 813 */ 814 fp = NULL; 815 816 kn->kn_sfflags = kev->fflags; 817 kn->kn_sdata = kev->data; 818 kev->fflags = 0; 819 kev->data = 0; 820 kn->kn_kevent = *kev; 821 822 /* 823 * KN_PROCESSING prevents the knote from getting 824 * ripped out from under us while we are trying 825 * to attach it, in case the attach blocks. 826 */ 827 kn->kn_status = KN_PROCESSING; 828 knote_attach(kn); 829 if ((error = filter_attach(kn)) != 0) { 830 kn->kn_status |= KN_DELETING; 831 knote_drop(kn); 832 goto done; 833 } 834 835 /* 836 * Interlock against close races which either tried 837 * to remove our knote while we were blocked or missed 838 * it entirely prior to our attachment. We do not 839 * want to end up with a knote on a closed descriptor. 840 */ 841 if ((fops->f_flags & FILTEROP_ISFD) && 842 checkfdclosed(fdp, kev->ident, kn->kn_fp)) { 843 kn->kn_status |= KN_DELETING; 844 } 845 } else { 846 /* 847 * The user may change some filter values after the 848 * initial EV_ADD, but doing so will not reset any 849 * filter which have already been triggered. 850 */ 851 kn->kn_status |= KN_PROCESSING; 852 kn->kn_sfflags = kev->fflags; 853 kn->kn_sdata = kev->data; 854 kn->kn_kevent.udata = kev->udata; 855 } 856 857 /* 858 * Execute the filter event to immediately activate the 859 * knote if necessary. We still own KN_PROCESSING so 860 * process any KN_REPROCESS races as well. 861 */ 862 for (;;) { 863 if (kn->kn_status & KN_DELETING) { 864 error = EBADF; 865 knote_detach_and_drop_locked(kn); 866 goto done; 867 } 868 if (filter_event(kn, 0)) 869 KNOTE_ACTIVATE(kn); 870 if ((kn->kn_status & KN_REPROCESS) == 0) 871 break; 872 if (kn->kn_status & KN_WAITING) { 873 kn->kn_status &= ~KN_WAITING; 874 wakeup(kn); 875 } 876 kn->kn_status &= ~KN_REPROCESS; 877 } 878 kn->kn_status &= ~KN_PROCESSING; 879 } else if (kev->flags & EV_DELETE) { 880 /* 881 * Attempt to delete the existing knote 882 */ 883 knote_detach_and_drop(kn); 884 goto done; 885 } 886 887 /* 888 * Disablement does not deactivate a knote here. 889 */ 890 if ((kev->flags & EV_DISABLE) && 891 ((kn->kn_status & KN_DISABLED) == 0)) { 892 kn->kn_status |= KN_DISABLED; 893 } 894 895 /* 896 * Re-enablement may have to immediately enqueue an active knote. 897 */ 898 if ((kev->flags & EV_ENABLE) && (kn->kn_status & KN_DISABLED)) { 899 kn->kn_status &= ~KN_DISABLED; 900 if ((kn->kn_status & KN_ACTIVE) && 901 ((kn->kn_status & KN_QUEUED) == 0)) { 902 knote_enqueue(kn); 903 } 904 } 905 906 done: 907 lwkt_reltoken(&kq_token); 908 if (fp != NULL) 909 fdrop(fp); 910 return (error); 911 } 912 913 /* 914 * Block as necessary until the target time is reached. 915 * If tsp is NULL we block indefinitely. If tsp->ts_secs/nsecs are both 916 * 0 we do not block at all. 917 */ 918 static int 919 kqueue_sleep(struct kqueue *kq, struct timespec *tsp) 920 { 921 int error = 0; 922 923 if (tsp == NULL) { 924 kq->kq_state |= KQ_SLEEP; 925 error = tsleep(kq, PCATCH, "kqread", 0); 926 } else if (tsp->tv_sec == 0 && tsp->tv_nsec == 0) { 927 error = EWOULDBLOCK; 928 } else { 929 struct timespec ats; 930 struct timespec atx = *tsp; 931 int timeout; 932 933 nanouptime(&ats); 934 timespecsub(&atx, &ats); 935 if (ats.tv_sec < 0) { 936 error = EWOULDBLOCK; 937 } else { 938 timeout = atx.tv_sec > 24 * 60 * 60 ? 939 24 * 60 * 60 * hz : tstohz_high(&atx); 940 kq->kq_state |= KQ_SLEEP; 941 error = tsleep(kq, PCATCH, "kqread", timeout); 942 } 943 } 944 945 /* don't restart after signals... */ 946 if (error == ERESTART) 947 return (EINTR); 948 949 return (error); 950 } 951 952 /* 953 * Scan the kqueue, return the number of active events placed in kevp up 954 * to count. 955 * 956 * Continuous mode events may get recycled, do not continue scanning past 957 * marker unless no events have been collected. 958 */ 959 static int 960 kqueue_scan(struct kqueue *kq, struct kevent *kevp, int count, 961 struct knote *marker) 962 { 963 struct knote *kn, local_marker; 964 int total; 965 966 total = 0; 967 local_marker.kn_filter = EVFILT_MARKER; 968 local_marker.kn_status = KN_PROCESSING; 969 970 /* 971 * Collect events. 972 */ 973 TAILQ_INSERT_HEAD(&kq->kq_knpend, &local_marker, kn_tqe); 974 while (count) { 975 kn = TAILQ_NEXT(&local_marker, kn_tqe); 976 if (kn->kn_filter == EVFILT_MARKER) { 977 /* Marker reached, we are done */ 978 if (kn == marker) 979 break; 980 981 /* Move local marker past some other threads marker */ 982 kn = TAILQ_NEXT(kn, kn_tqe); 983 TAILQ_REMOVE(&kq->kq_knpend, &local_marker, kn_tqe); 984 TAILQ_INSERT_BEFORE(kn, &local_marker, kn_tqe); 985 continue; 986 } 987 988 /* 989 * We can't skip a knote undergoing processing, otherwise 990 * we risk not returning it when the user process expects 991 * it should be returned. Sleep and retry. 992 */ 993 if (kn->kn_status & KN_PROCESSING) { 994 kn->kn_status |= KN_WAITING | KN_REPROCESS; 995 tsleep(kn, 0, "kqepts", hz); 996 continue; 997 } 998 999 /* 1000 * Remove the event for processing. 1001 * 1002 * WARNING! We must leave KN_QUEUED set to prevent the 1003 * event from being KNOTE_ACTIVATE()d while 1004 * the queue state is in limbo, in case we 1005 * block. 1006 * 1007 * WARNING! We must set KN_PROCESSING to avoid races 1008 * against deletion or another thread's 1009 * processing. 1010 */ 1011 TAILQ_REMOVE(&kq->kq_knpend, kn, kn_tqe); 1012 kq->kq_count--; 1013 kn->kn_status |= KN_PROCESSING; 1014 1015 /* 1016 * We have to deal with an extremely important race against 1017 * file descriptor close()s here. The file descriptor can 1018 * disappear MPSAFE, and there is a small window of 1019 * opportunity between that and the call to knote_fdclose(). 1020 * 1021 * If we hit that window here while doselect or dopoll is 1022 * trying to delete a spurious event they will not be able 1023 * to match up the event against a knote and will go haywire. 1024 */ 1025 if ((kn->kn_fop->f_flags & FILTEROP_ISFD) && 1026 checkfdclosed(kq->kq_fdp, kn->kn_kevent.ident, kn->kn_fp)) { 1027 kn->kn_status |= KN_DELETING | KN_REPROCESS; 1028 } 1029 1030 if (kn->kn_status & KN_DISABLED) { 1031 /* 1032 * If disabled we ensure the event is not queued 1033 * but leave its active bit set. On re-enablement 1034 * the event may be immediately triggered. 1035 */ 1036 kn->kn_status &= ~KN_QUEUED; 1037 } else if ((kn->kn_flags & EV_ONESHOT) == 0 && 1038 filter_event(kn, 0) == 0) { 1039 /* 1040 * If not running in one-shot mode and the event 1041 * is no longer present we ensure it is removed 1042 * from the queue and ignore it. 1043 */ 1044 kn->kn_status &= ~(KN_QUEUED | KN_ACTIVE); 1045 } else { 1046 /* 1047 * Post the event 1048 */ 1049 *kevp++ = kn->kn_kevent; 1050 ++total; 1051 --count; 1052 1053 if (kn->kn_flags & EV_ONESHOT) { 1054 kn->kn_status &= ~KN_QUEUED; 1055 kn->kn_status |= KN_DELETING | KN_REPROCESS; 1056 } else if (kn->kn_flags & EV_CLEAR) { 1057 kn->kn_data = 0; 1058 kn->kn_fflags = 0; 1059 kn->kn_status &= ~(KN_QUEUED | KN_ACTIVE); 1060 } else { 1061 TAILQ_INSERT_TAIL(&kq->kq_knpend, kn, kn_tqe); 1062 kq->kq_count++; 1063 } 1064 } 1065 1066 /* 1067 * Handle any post-processing states 1068 */ 1069 while (kn->kn_status & KN_REPROCESS) { 1070 kn->kn_status &= ~KN_REPROCESS; 1071 if (kn->kn_status & KN_DELETING) { 1072 knote_detach_and_drop_locked(kn); 1073 goto skip; 1074 } 1075 if (kn->kn_status & KN_WAITING) { 1076 kn->kn_status &= ~KN_WAITING; 1077 wakeup(kn); 1078 } 1079 if (filter_event(kn, 0)) 1080 KNOTE_ACTIVATE(kn); 1081 } 1082 kn->kn_status &= ~KN_PROCESSING; 1083 skip: 1084 ; 1085 } 1086 TAILQ_REMOVE(&kq->kq_knpend, &local_marker, kn_tqe); 1087 1088 return (total); 1089 } 1090 1091 /* 1092 * XXX 1093 * This could be expanded to call kqueue_scan, if desired. 1094 * 1095 * MPSAFE 1096 */ 1097 static int 1098 kqueue_read(struct file *fp, struct uio *uio, struct ucred *cred, int flags) 1099 { 1100 return (ENXIO); 1101 } 1102 1103 /* 1104 * MPSAFE 1105 */ 1106 static int 1107 kqueue_write(struct file *fp, struct uio *uio, struct ucred *cred, int flags) 1108 { 1109 return (ENXIO); 1110 } 1111 1112 /* 1113 * MPALMOSTSAFE 1114 */ 1115 static int 1116 kqueue_ioctl(struct file *fp, u_long com, caddr_t data, 1117 struct ucred *cred, struct sysmsg *msg) 1118 { 1119 struct kqueue *kq; 1120 int error; 1121 1122 lwkt_gettoken(&kq_token); 1123 kq = (struct kqueue *)fp->f_data; 1124 1125 switch(com) { 1126 case FIOASYNC: 1127 if (*(int *)data) 1128 kq->kq_state |= KQ_ASYNC; 1129 else 1130 kq->kq_state &= ~KQ_ASYNC; 1131 error = 0; 1132 break; 1133 case FIOSETOWN: 1134 error = fsetown(*(int *)data, &kq->kq_sigio); 1135 break; 1136 default: 1137 error = ENOTTY; 1138 break; 1139 } 1140 lwkt_reltoken(&kq_token); 1141 return (error); 1142 } 1143 1144 /* 1145 * MPSAFE 1146 */ 1147 static int 1148 kqueue_stat(struct file *fp, struct stat *st, struct ucred *cred) 1149 { 1150 struct kqueue *kq = (struct kqueue *)fp->f_data; 1151 1152 bzero((void *)st, sizeof(*st)); 1153 st->st_size = kq->kq_count; 1154 st->st_blksize = sizeof(struct kevent); 1155 st->st_mode = S_IFIFO; 1156 return (0); 1157 } 1158 1159 /* 1160 * MPSAFE 1161 */ 1162 static int 1163 kqueue_close(struct file *fp) 1164 { 1165 struct kqueue *kq = (struct kqueue *)fp->f_data; 1166 1167 kqueue_terminate(kq); 1168 1169 fp->f_data = NULL; 1170 funsetown(kq->kq_sigio); 1171 1172 kfree(kq, M_KQUEUE); 1173 return (0); 1174 } 1175 1176 static void 1177 kqueue_wakeup(struct kqueue *kq) 1178 { 1179 if (kq->kq_state & KQ_SLEEP) { 1180 kq->kq_state &= ~KQ_SLEEP; 1181 wakeup(kq); 1182 } 1183 KNOTE(&kq->kq_kqinfo.ki_note, 0); 1184 } 1185 1186 /* 1187 * Calls filterops f_attach function, acquiring mplock if filter is not 1188 * marked as FILTEROP_MPSAFE. 1189 */ 1190 static int 1191 filter_attach(struct knote *kn) 1192 { 1193 int ret; 1194 1195 if (!(kn->kn_fop->f_flags & FILTEROP_MPSAFE)) { 1196 get_mplock(); 1197 ret = kn->kn_fop->f_attach(kn); 1198 rel_mplock(); 1199 } else { 1200 ret = kn->kn_fop->f_attach(kn); 1201 } 1202 1203 return (ret); 1204 } 1205 1206 /* 1207 * Detach the knote and drop it, destroying the knote. 1208 * 1209 * Calls filterops f_detach function, acquiring mplock if filter is not 1210 * marked as FILTEROP_MPSAFE. 1211 */ 1212 static void 1213 knote_detach_and_drop(struct knote *kn) 1214 { 1215 /* 1216 * If someone else is procesing the knote we cannot destroy it now, 1217 * flag the request and return. 1218 */ 1219 if (kn->kn_status & KN_PROCESSING) { 1220 kn->kn_status |= KN_DELETING | KN_REPROCESS; 1221 return; 1222 } 1223 kn->kn_status |= KN_PROCESSING | KN_DELETING; 1224 knote_detach_and_drop_locked(kn); 1225 } 1226 1227 static void 1228 knote_detach_and_drop_locked(struct knote *kn) 1229 { 1230 if (kn->kn_fop->f_flags & FILTEROP_MPSAFE) { 1231 kn->kn_fop->f_detach(kn); 1232 } else { 1233 get_mplock(); 1234 kn->kn_fop->f_detach(kn); 1235 rel_mplock(); 1236 } 1237 knote_drop(kn); 1238 } 1239 1240 /* 1241 * Calls filterops f_event function, acquiring mplock if filter is not 1242 * marked as FILTEROP_MPSAFE. 1243 * 1244 * If the knote is in the middle of being created or deleted we cannot 1245 * safely call the filter op. 1246 */ 1247 static int 1248 filter_event(struct knote *kn, long hint) 1249 { 1250 int ret; 1251 1252 if (kn->kn_fop->f_flags & FILTEROP_MPSAFE) { 1253 ret = kn->kn_fop->f_event(kn, hint); 1254 } else { 1255 get_mplock(); 1256 ret = kn->kn_fop->f_event(kn, hint); 1257 rel_mplock(); 1258 } 1259 return (ret); 1260 } 1261 1262 /* 1263 * Walk down a list of knotes, activating them if their event has triggered. 1264 * 1265 * If we encounter any knotes which are undergoing processing we just mark 1266 * them for reprocessing and do not try to [re]activate the knote. However, 1267 * if a hint is being passed we have to wait and that makes things a bit 1268 * sticky. 1269 */ 1270 void 1271 knote(struct klist *list, long hint) 1272 { 1273 struct knote *kn; 1274 1275 lwkt_gettoken(&kq_token); 1276 restart: 1277 SLIST_FOREACH(kn, list, kn_next) { 1278 if (kn->kn_status & KN_PROCESSING) { 1279 /* 1280 * Someone else is processing the knote, ask the 1281 * other thread to reprocess it and don't mess 1282 * with it otherwise. 1283 */ 1284 if (hint == 0) { 1285 kn->kn_status |= KN_REPROCESS; 1286 continue; 1287 } 1288 1289 /* 1290 * If the note is not empty we have to wait. 1291 * 1292 * XXX This is a real problem, certain process 1293 * and signal filters will bump kn_data for 1294 * already-processed notes more than once if 1295 * we restart the list scan. FIXME. 1296 */ 1297 kprintf("Warning: knote() on busy " 1298 "knote (ev=%d hint=%08lx)\n", 1299 kn->kn_filter, hint); 1300 kn->kn_status |= KN_WAITING | KN_REPROCESS; 1301 tsleep(kn, 0, "knotec", hz); 1302 goto restart; 1303 } 1304 1305 /* 1306 * Become the reprocessing master ourselves. 1307 */ 1308 kn->kn_status |= KN_PROCESSING; 1309 if (filter_event(kn, hint)) 1310 KNOTE_ACTIVATE(kn); 1311 while (kn->kn_status & KN_REPROCESS) { 1312 kn->kn_status &= ~KN_REPROCESS; 1313 if (kn->kn_status & KN_DELETING) { 1314 knote_detach_and_drop_locked(kn); 1315 goto restart; 1316 } 1317 if (kn->kn_status & KN_WAITING) { 1318 kn->kn_status &= ~KN_WAITING; 1319 wakeup(kn); 1320 } 1321 if (filter_event(kn, hint)) 1322 KNOTE_ACTIVATE(kn); 1323 } 1324 kn->kn_status &= ~KN_PROCESSING; 1325 } 1326 lwkt_reltoken(&kq_token); 1327 } 1328 1329 /* 1330 * Insert knote at head of klist. 1331 * 1332 * This function may only be called via a filter function and thus 1333 * kq_token should already be held and marked for processing. 1334 */ 1335 void 1336 knote_insert(struct klist *klist, struct knote *kn) 1337 { 1338 KKASSERT(kn->kn_status & KN_PROCESSING); 1339 ASSERT_LWKT_TOKEN_HELD(&kq_token); 1340 SLIST_INSERT_HEAD(klist, kn, kn_next); 1341 } 1342 1343 /* 1344 * Remove knote from a klist 1345 * 1346 * This function may only be called via a filter function and thus 1347 * kq_token should already be held and marked for processing. 1348 */ 1349 void 1350 knote_remove(struct klist *klist, struct knote *kn) 1351 { 1352 KKASSERT(kn->kn_status & KN_PROCESSING); 1353 ASSERT_LWKT_TOKEN_HELD(&kq_token); 1354 SLIST_REMOVE(klist, kn, knote, kn_next); 1355 } 1356 1357 /* 1358 * Remove all knotes from a specified klist 1359 * 1360 * Only called from aio. 1361 */ 1362 void 1363 knote_empty(struct klist *list) 1364 { 1365 struct knote *kn; 1366 1367 lwkt_gettoken(&kq_token); 1368 while ((kn = SLIST_FIRST(list)) != NULL) { 1369 if (kn->kn_status & KN_PROCESSING) { 1370 kn->kn_status |= KN_WAITING | KN_REPROCESS; 1371 tsleep(kn, 0, "kqepts", hz); 1372 continue; 1373 } 1374 knote_detach_and_drop(kn); 1375 } 1376 lwkt_reltoken(&kq_token); 1377 } 1378 1379 /* 1380 * remove all knotes referencing a specified fd 1381 */ 1382 void 1383 knote_fdclose(struct file *fp, struct filedesc *fdp, int fd) 1384 { 1385 struct knote *kn; 1386 1387 lwkt_gettoken(&kq_token); 1388 restart: 1389 SLIST_FOREACH(kn, &fp->f_klist, kn_link) { 1390 if (kn->kn_kq->kq_fdp == fdp && kn->kn_id == fd) { 1391 if (kn->kn_status & KN_PROCESSING) { 1392 kn->kn_status |= KN_WAITING | KN_REPROCESS; 1393 tsleep(kn, 0, "kqepts", hz); 1394 } else { 1395 knote_detach_and_drop(kn); 1396 } 1397 goto restart; 1398 } 1399 } 1400 lwkt_reltoken(&kq_token); 1401 } 1402 1403 /* 1404 * Low level attach function. 1405 * 1406 * The knote should already be marked for processing. 1407 */ 1408 static void 1409 knote_attach(struct knote *kn) 1410 { 1411 struct klist *list; 1412 struct kqueue *kq = kn->kn_kq; 1413 1414 if (kn->kn_fop->f_flags & FILTEROP_ISFD) { 1415 KKASSERT(kn->kn_fp); 1416 list = &kn->kn_fp->f_klist; 1417 } else { 1418 if (kq->kq_knhashmask == 0) 1419 kq->kq_knhash = hashinit(KN_HASHSIZE, M_KQUEUE, 1420 &kq->kq_knhashmask); 1421 list = &kq->kq_knhash[KN_HASH(kn->kn_id, kq->kq_knhashmask)]; 1422 } 1423 SLIST_INSERT_HEAD(list, kn, kn_link); 1424 TAILQ_INSERT_HEAD(&kq->kq_knlist, kn, kn_kqlink); 1425 } 1426 1427 /* 1428 * Low level drop function. 1429 * 1430 * The knote should already be marked for processing. 1431 */ 1432 static void 1433 knote_drop(struct knote *kn) 1434 { 1435 struct kqueue *kq; 1436 struct klist *list; 1437 1438 kq = kn->kn_kq; 1439 1440 if (kn->kn_fop->f_flags & FILTEROP_ISFD) 1441 list = &kn->kn_fp->f_klist; 1442 else 1443 list = &kq->kq_knhash[KN_HASH(kn->kn_id, kq->kq_knhashmask)]; 1444 1445 SLIST_REMOVE(list, kn, knote, kn_link); 1446 TAILQ_REMOVE(&kq->kq_knlist, kn, kn_kqlink); 1447 if (kn->kn_status & KN_QUEUED) 1448 knote_dequeue(kn); 1449 if (kn->kn_fop->f_flags & FILTEROP_ISFD) { 1450 fdrop(kn->kn_fp); 1451 kn->kn_fp = NULL; 1452 } 1453 knote_free(kn); 1454 } 1455 1456 /* 1457 * Low level enqueue function. 1458 * 1459 * The knote should already be marked for processing. 1460 */ 1461 static void 1462 knote_enqueue(struct knote *kn) 1463 { 1464 struct kqueue *kq = kn->kn_kq; 1465 1466 KASSERT((kn->kn_status & KN_QUEUED) == 0, ("knote already queued")); 1467 TAILQ_INSERT_TAIL(&kq->kq_knpend, kn, kn_tqe); 1468 kn->kn_status |= KN_QUEUED; 1469 ++kq->kq_count; 1470 1471 /* 1472 * Send SIGIO on request (typically set up as a mailbox signal) 1473 */ 1474 if (kq->kq_sigio && (kq->kq_state & KQ_ASYNC) && kq->kq_count == 1) 1475 pgsigio(kq->kq_sigio, SIGIO, 0); 1476 1477 kqueue_wakeup(kq); 1478 } 1479 1480 /* 1481 * Low level dequeue function. 1482 * 1483 * The knote should already be marked for processing. 1484 */ 1485 static void 1486 knote_dequeue(struct knote *kn) 1487 { 1488 struct kqueue *kq = kn->kn_kq; 1489 1490 KASSERT(kn->kn_status & KN_QUEUED, ("knote not queued")); 1491 TAILQ_REMOVE(&kq->kq_knpend, kn, kn_tqe); 1492 kn->kn_status &= ~KN_QUEUED; 1493 kq->kq_count--; 1494 } 1495 1496 static void 1497 knote_init(void) 1498 { 1499 knote_zone = zinit("KNOTE", sizeof(struct knote), 0, 0, 1); 1500 } 1501 SYSINIT(knote, SI_SUB_PSEUDO, SI_ORDER_ANY, knote_init, NULL) 1502 1503 static struct knote * 1504 knote_alloc(void) 1505 { 1506 return ((struct knote *)zalloc(knote_zone)); 1507 } 1508 1509 static void 1510 knote_free(struct knote *kn) 1511 { 1512 zfree(knote_zone, kn); 1513 } 1514