1 /*- 2 * Copyright (c) 1999,2000,2001 Jonathan Lemon <jlemon@FreeBSD.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * $FreeBSD: src/sys/kern/kern_event.c,v 1.2.2.10 2004/04/04 07:03:14 cperciva Exp $ 27 * $DragonFly: src/sys/kern/kern_event.c,v 1.33 2007/02/03 17:05:57 corecode Exp $ 28 */ 29 30 #include <sys/param.h> 31 #include <sys/systm.h> 32 #include <sys/kernel.h> 33 #include <sys/proc.h> 34 #include <sys/malloc.h> 35 #include <sys/unistd.h> 36 #include <sys/file.h> 37 #include <sys/lock.h> 38 #include <sys/fcntl.h> 39 #include <sys/select.h> 40 #include <sys/queue.h> 41 #include <sys/event.h> 42 #include <sys/eventvar.h> 43 #include <sys/poll.h> 44 #include <sys/protosw.h> 45 #include <sys/socket.h> 46 #include <sys/socketvar.h> 47 #include <sys/stat.h> 48 #include <sys/sysctl.h> 49 #include <sys/sysproto.h> 50 #include <sys/uio.h> 51 #include <sys/thread2.h> 52 #include <sys/signalvar.h> 53 #include <sys/filio.h> 54 #include <sys/file2.h> 55 56 #include <vm/vm_zone.h> 57 58 MALLOC_DEFINE(M_KQUEUE, "kqueue", "memory for kqueue system"); 59 60 static int kqueue_scan(struct file *fp, int maxevents, 61 struct kevent *ulistp, const struct timespec *timeout, 62 struct thread *td, int *res); 63 static int kqueue_read(struct file *fp, struct uio *uio, 64 struct ucred *cred, int flags); 65 static int kqueue_write(struct file *fp, struct uio *uio, 66 struct ucred *cred, int flags); 67 static int kqueue_ioctl(struct file *fp, u_long com, caddr_t data, 68 struct ucred *cred, struct sysmsg *msg); 69 static int kqueue_poll(struct file *fp, int events, struct ucred *cred); 70 static int kqueue_kqfilter(struct file *fp, struct knote *kn); 71 static int kqueue_stat(struct file *fp, struct stat *st, 72 struct ucred *cred); 73 static int kqueue_close(struct file *fp); 74 static void kqueue_wakeup(struct kqueue *kq); 75 76 /* 77 * MPSAFE 78 */ 79 static struct fileops kqueueops = { 80 .fo_read = kqueue_read, 81 .fo_write = kqueue_write, 82 .fo_ioctl = kqueue_ioctl, 83 .fo_poll = kqueue_poll, 84 .fo_kqfilter = kqueue_kqfilter, 85 .fo_stat = kqueue_stat, 86 .fo_close = kqueue_close, 87 .fo_shutdown = nofo_shutdown 88 }; 89 90 static void knote_attach(struct knote *kn, struct filedesc *fdp); 91 static void knote_drop(struct knote *kn, struct thread *td); 92 static void knote_enqueue(struct knote *kn); 93 static void knote_dequeue(struct knote *kn); 94 static void knote_init(void); 95 static struct knote *knote_alloc(void); 96 static void knote_free(struct knote *kn); 97 98 static void filt_kqdetach(struct knote *kn); 99 static int filt_kqueue(struct knote *kn, long hint); 100 static int filt_procattach(struct knote *kn); 101 static void filt_procdetach(struct knote *kn); 102 static int filt_proc(struct knote *kn, long hint); 103 static int filt_fileattach(struct knote *kn); 104 static void filt_timerexpire(void *knx); 105 static int filt_timerattach(struct knote *kn); 106 static void filt_timerdetach(struct knote *kn); 107 static int filt_timer(struct knote *kn, long hint); 108 109 static struct filterops file_filtops = 110 { 1, filt_fileattach, NULL, NULL }; 111 static struct filterops kqread_filtops = 112 { 1, NULL, filt_kqdetach, filt_kqueue }; 113 static struct filterops proc_filtops = 114 { 0, filt_procattach, filt_procdetach, filt_proc }; 115 static struct filterops timer_filtops = 116 { 0, filt_timerattach, filt_timerdetach, filt_timer }; 117 118 static vm_zone_t knote_zone; 119 static int kq_ncallouts = 0; 120 static int kq_calloutmax = (4 * 1024); 121 SYSCTL_INT(_kern, OID_AUTO, kq_calloutmax, CTLFLAG_RW, 122 &kq_calloutmax, 0, "Maximum number of callouts allocated for kqueue"); 123 124 #define KNOTE_ACTIVATE(kn) do { \ 125 kn->kn_status |= KN_ACTIVE; \ 126 if ((kn->kn_status & (KN_QUEUED | KN_DISABLED)) == 0) \ 127 knote_enqueue(kn); \ 128 } while(0) 129 130 #define KN_HASHSIZE 64 /* XXX should be tunable */ 131 #define KN_HASH(val, mask) (((val) ^ (val >> 8)) & (mask)) 132 133 extern struct filterops aio_filtops; 134 extern struct filterops sig_filtops; 135 136 /* 137 * Table for for all system-defined filters. 138 */ 139 static struct filterops *sysfilt_ops[] = { 140 &file_filtops, /* EVFILT_READ */ 141 &file_filtops, /* EVFILT_WRITE */ 142 &aio_filtops, /* EVFILT_AIO */ 143 &file_filtops, /* EVFILT_VNODE */ 144 &proc_filtops, /* EVFILT_PROC */ 145 &sig_filtops, /* EVFILT_SIGNAL */ 146 &timer_filtops, /* EVFILT_TIMER */ 147 }; 148 149 static int 150 filt_fileattach(struct knote *kn) 151 { 152 return (fo_kqfilter(kn->kn_fp, kn)); 153 } 154 155 /* 156 * MPALMOSTSAFE - acquires mplock 157 */ 158 static int 159 kqueue_kqfilter(struct file *fp, struct knote *kn) 160 { 161 struct kqueue *kq = (struct kqueue *)kn->kn_fp->f_data; 162 163 get_mplock(); 164 if (kn->kn_filter != EVFILT_READ) { 165 rel_mplock(); 166 return (1); 167 } 168 169 kn->kn_fop = &kqread_filtops; 170 SLIST_INSERT_HEAD(&kq->kq_sel.si_note, kn, kn_selnext); 171 rel_mplock(); 172 return (0); 173 } 174 175 static void 176 filt_kqdetach(struct knote *kn) 177 { 178 struct kqueue *kq = (struct kqueue *)kn->kn_fp->f_data; 179 180 SLIST_REMOVE(&kq->kq_sel.si_note, kn, knote, kn_selnext); 181 } 182 183 /*ARGSUSED*/ 184 static int 185 filt_kqueue(struct knote *kn, long hint) 186 { 187 struct kqueue *kq = (struct kqueue *)kn->kn_fp->f_data; 188 189 kn->kn_data = kq->kq_count; 190 return (kn->kn_data > 0); 191 } 192 193 static int 194 filt_procattach(struct knote *kn) 195 { 196 struct proc *p; 197 int immediate; 198 199 immediate = 0; 200 p = pfind(kn->kn_id); 201 if (p == NULL && (kn->kn_sfflags & NOTE_EXIT)) { 202 p = zpfind(kn->kn_id); 203 immediate = 1; 204 } 205 if (p == NULL) 206 return (ESRCH); 207 if (! PRISON_CHECK(curproc->p_ucred, p->p_ucred)) 208 return (EACCES); 209 210 kn->kn_ptr.p_proc = p; 211 kn->kn_flags |= EV_CLEAR; /* automatically set */ 212 213 /* 214 * internal flag indicating registration done by kernel 215 */ 216 if (kn->kn_flags & EV_FLAG1) { 217 kn->kn_data = kn->kn_sdata; /* ppid */ 218 kn->kn_fflags = NOTE_CHILD; 219 kn->kn_flags &= ~EV_FLAG1; 220 } 221 222 /* XXX lock the proc here while adding to the list? */ 223 SLIST_INSERT_HEAD(&p->p_klist, kn, kn_selnext); 224 225 /* 226 * Immediately activate any exit notes if the target process is a 227 * zombie. This is necessary to handle the case where the target 228 * process, e.g. a child, dies before the kevent is registered. 229 */ 230 if (immediate && filt_proc(kn, NOTE_EXIT)) 231 KNOTE_ACTIVATE(kn); 232 233 return (0); 234 } 235 236 /* 237 * The knote may be attached to a different process, which may exit, 238 * leaving nothing for the knote to be attached to. So when the process 239 * exits, the knote is marked as DETACHED and also flagged as ONESHOT so 240 * it will be deleted when read out. However, as part of the knote deletion, 241 * this routine is called, so a check is needed to avoid actually performing 242 * a detach, because the original process does not exist any more. 243 */ 244 static void 245 filt_procdetach(struct knote *kn) 246 { 247 struct proc *p; 248 249 if (kn->kn_status & KN_DETACHED) 250 return; 251 /* XXX locking? this might modify another process. */ 252 p = kn->kn_ptr.p_proc; 253 SLIST_REMOVE(&p->p_klist, kn, knote, kn_selnext); 254 } 255 256 static int 257 filt_proc(struct knote *kn, long hint) 258 { 259 u_int event; 260 261 /* 262 * mask off extra data 263 */ 264 event = (u_int)hint & NOTE_PCTRLMASK; 265 266 /* 267 * if the user is interested in this event, record it. 268 */ 269 if (kn->kn_sfflags & event) 270 kn->kn_fflags |= event; 271 272 /* 273 * Process is gone, so flag the event as finished. Detach the 274 * knote from the process now because the process will be poof, 275 * gone later on. 276 */ 277 if (event == NOTE_EXIT) { 278 struct proc *p = kn->kn_ptr.p_proc; 279 if ((kn->kn_status & KN_DETACHED) == 0) { 280 SLIST_REMOVE(&p->p_klist, kn, knote, kn_selnext); 281 kn->kn_status |= KN_DETACHED; 282 kn->kn_data = p->p_xstat; 283 kn->kn_ptr.p_proc = NULL; 284 } 285 kn->kn_flags |= (EV_EOF | EV_ONESHOT); 286 return (1); 287 } 288 289 /* 290 * process forked, and user wants to track the new process, 291 * so attach a new knote to it, and immediately report an 292 * event with the parent's pid. 293 */ 294 if ((event == NOTE_FORK) && (kn->kn_sfflags & NOTE_TRACK)) { 295 struct kevent kev; 296 int error; 297 298 /* 299 * register knote with new process. 300 */ 301 kev.ident = hint & NOTE_PDATAMASK; /* pid */ 302 kev.filter = kn->kn_filter; 303 kev.flags = kn->kn_flags | EV_ADD | EV_ENABLE | EV_FLAG1; 304 kev.fflags = kn->kn_sfflags; 305 kev.data = kn->kn_id; /* parent */ 306 kev.udata = kn->kn_kevent.udata; /* preserve udata */ 307 error = kqueue_register(kn->kn_kq, &kev, NULL); 308 if (error) 309 kn->kn_fflags |= NOTE_TRACKERR; 310 } 311 312 return (kn->kn_fflags != 0); 313 } 314 315 static void 316 filt_timerexpire(void *knx) 317 { 318 struct knote *kn = knx; 319 struct callout *calloutp; 320 struct timeval tv; 321 int tticks; 322 323 kn->kn_data++; 324 KNOTE_ACTIVATE(kn); 325 326 if ((kn->kn_flags & EV_ONESHOT) == 0) { 327 tv.tv_sec = kn->kn_sdata / 1000; 328 tv.tv_usec = (kn->kn_sdata % 1000) * 1000; 329 tticks = tvtohz_high(&tv); 330 calloutp = (struct callout *)kn->kn_hook; 331 callout_reset(calloutp, tticks, filt_timerexpire, kn); 332 } 333 } 334 335 /* 336 * data contains amount of time to sleep, in milliseconds 337 */ 338 static int 339 filt_timerattach(struct knote *kn) 340 { 341 struct callout *calloutp; 342 struct timeval tv; 343 int tticks; 344 345 if (kq_ncallouts >= kq_calloutmax) 346 return (ENOMEM); 347 kq_ncallouts++; 348 349 tv.tv_sec = kn->kn_sdata / 1000; 350 tv.tv_usec = (kn->kn_sdata % 1000) * 1000; 351 tticks = tvtohz_high(&tv); 352 353 kn->kn_flags |= EV_CLEAR; /* automatically set */ 354 MALLOC(calloutp, struct callout *, sizeof(*calloutp), 355 M_KQUEUE, M_WAITOK); 356 callout_init(calloutp); 357 kn->kn_hook = (caddr_t)calloutp; 358 callout_reset(calloutp, tticks, filt_timerexpire, kn); 359 360 return (0); 361 } 362 363 static void 364 filt_timerdetach(struct knote *kn) 365 { 366 struct callout *calloutp; 367 368 calloutp = (struct callout *)kn->kn_hook; 369 callout_stop(calloutp); 370 FREE(calloutp, M_KQUEUE); 371 kq_ncallouts--; 372 } 373 374 static int 375 filt_timer(struct knote *kn, long hint) 376 { 377 378 return (kn->kn_data != 0); 379 } 380 381 int 382 sys_kqueue(struct kqueue_args *uap) 383 { 384 struct proc *p = curproc; 385 struct filedesc *fdp = p->p_fd; 386 struct kqueue *kq; 387 struct file *fp; 388 int fd, error; 389 390 error = falloc(p, &fp, &fd); 391 if (error) 392 return (error); 393 fp->f_flag = FREAD | FWRITE; 394 fp->f_type = DTYPE_KQUEUE; 395 fp->f_ops = &kqueueops; 396 397 kq = kmalloc(sizeof(struct kqueue), M_KQUEUE, M_WAITOK | M_ZERO); 398 TAILQ_INIT(&kq->kq_head); 399 kq->kq_fdp = fdp; 400 fp->f_data = kq; 401 402 fsetfd(p, fp, fd); 403 uap->sysmsg_result = fd; 404 fdrop(fp); 405 return (error); 406 } 407 408 int 409 sys_kevent(struct kevent_args *uap) 410 { 411 struct thread *td = curthread; 412 struct proc *p = td->td_proc; 413 struct kevent *kevp; 414 struct kqueue *kq; 415 struct file *fp = NULL; 416 struct timespec ts; 417 int i, n, nerrors, error; 418 419 KKASSERT(p); 420 421 fp = holdfp(p->p_fd, uap->fd, -1); 422 if (fp == NULL) 423 return (EBADF); 424 if (fp->f_type != DTYPE_KQUEUE) { 425 fdrop(fp); 426 return (EBADF); 427 } 428 429 if (uap->timeout != NULL) { 430 error = copyin(uap->timeout, &ts, sizeof(ts)); 431 if (error) 432 goto done; 433 uap->timeout = &ts; 434 } 435 436 kq = (struct kqueue *)fp->f_data; 437 nerrors = 0; 438 439 while (uap->nchanges > 0) { 440 n = uap->nchanges > KQ_NEVENTS ? KQ_NEVENTS : uap->nchanges; 441 error = copyin(uap->changelist, kq->kq_kev, 442 n * sizeof(struct kevent)); 443 if (error) 444 goto done; 445 for (i = 0; i < n; i++) { 446 kevp = &kq->kq_kev[i]; 447 kevp->flags &= ~EV_SYSFLAGS; 448 error = kqueue_register(kq, kevp, td); 449 if (error) { 450 if (uap->nevents != 0) { 451 kevp->flags = EV_ERROR; 452 kevp->data = error; 453 (void) copyout((caddr_t)kevp, 454 (caddr_t)uap->eventlist, 455 sizeof(*kevp)); 456 uap->eventlist++; 457 uap->nevents--; 458 nerrors++; 459 } else { 460 goto done; 461 } 462 } 463 } 464 uap->nchanges -= n; 465 uap->changelist += n; 466 } 467 if (nerrors) { 468 uap->sysmsg_result = nerrors; 469 error = 0; 470 goto done; 471 } 472 473 error = kqueue_scan(fp, uap->nevents, uap->eventlist, uap->timeout, td, &uap->sysmsg_result); 474 done: 475 if (fp != NULL) 476 fdrop(fp); 477 return (error); 478 } 479 480 int 481 kqueue_register(struct kqueue *kq, struct kevent *kev, struct thread *td) 482 { 483 struct filedesc *fdp = kq->kq_fdp; 484 struct filterops *fops; 485 struct file *fp = NULL; 486 struct knote *kn = NULL; 487 int error = 0; 488 489 if (kev->filter < 0) { 490 if (kev->filter + EVFILT_SYSCOUNT < 0) 491 return (EINVAL); 492 fops = sysfilt_ops[~kev->filter]; /* to 0-base index */ 493 } else { 494 /* 495 * XXX 496 * filter attach routine is responsible for insuring that 497 * the identifier can be attached to it. 498 */ 499 kprintf("unknown filter: %d\n", kev->filter); 500 return (EINVAL); 501 } 502 503 if (fops->f_isfd) { 504 /* validate descriptor */ 505 fp = holdfp(fdp, kev->ident, -1); 506 if (fp == NULL) 507 return (EBADF); 508 509 if (kev->ident < fdp->fd_knlistsize) { 510 SLIST_FOREACH(kn, &fdp->fd_knlist[kev->ident], kn_link) 511 if (kq == kn->kn_kq && 512 kev->filter == kn->kn_filter) 513 break; 514 } 515 } else { 516 if (fdp->fd_knhashmask != 0) { 517 struct klist *list; 518 519 list = &fdp->fd_knhash[ 520 KN_HASH((u_long)kev->ident, fdp->fd_knhashmask)]; 521 SLIST_FOREACH(kn, list, kn_link) 522 if (kev->ident == kn->kn_id && 523 kq == kn->kn_kq && 524 kev->filter == kn->kn_filter) 525 break; 526 } 527 } 528 529 if (kn == NULL && ((kev->flags & EV_ADD) == 0)) { 530 error = ENOENT; 531 goto done; 532 } 533 534 /* 535 * kn now contains the matching knote, or NULL if no match 536 */ 537 if (kev->flags & EV_ADD) { 538 539 if (kn == NULL) { 540 kn = knote_alloc(); 541 if (kn == NULL) { 542 error = ENOMEM; 543 goto done; 544 } 545 kn->kn_fp = fp; 546 kn->kn_kq = kq; 547 kn->kn_fop = fops; 548 549 /* 550 * apply reference count to knote structure, and 551 * do not release it at the end of this routine. 552 */ 553 fp = NULL; 554 555 kn->kn_sfflags = kev->fflags; 556 kn->kn_sdata = kev->data; 557 kev->fflags = 0; 558 kev->data = 0; 559 kn->kn_kevent = *kev; 560 561 knote_attach(kn, fdp); 562 if ((error = fops->f_attach(kn)) != 0) { 563 knote_drop(kn, td); 564 goto done; 565 } 566 } else { 567 /* 568 * The user may change some filter values after the 569 * initial EV_ADD, but doing so will not reset any 570 * filter which have already been triggered. 571 */ 572 kn->kn_sfflags = kev->fflags; 573 kn->kn_sdata = kev->data; 574 kn->kn_kevent.udata = kev->udata; 575 } 576 577 crit_enter(); 578 if (kn->kn_fop->f_event(kn, 0)) 579 KNOTE_ACTIVATE(kn); 580 crit_exit(); 581 } else if (kev->flags & EV_DELETE) { 582 kn->kn_fop->f_detach(kn); 583 knote_drop(kn, td); 584 goto done; 585 } 586 587 if ((kev->flags & EV_DISABLE) && 588 ((kn->kn_status & KN_DISABLED) == 0)) { 589 crit_enter(); 590 kn->kn_status |= KN_DISABLED; 591 crit_exit(); 592 } 593 594 if ((kev->flags & EV_ENABLE) && (kn->kn_status & KN_DISABLED)) { 595 crit_enter(); 596 kn->kn_status &= ~KN_DISABLED; 597 if ((kn->kn_status & KN_ACTIVE) && 598 ((kn->kn_status & KN_QUEUED) == 0)) 599 knote_enqueue(kn); 600 crit_exit(); 601 } 602 603 done: 604 if (fp != NULL) 605 fdrop(fp); 606 return (error); 607 } 608 609 static int 610 kqueue_scan(struct file *fp, int maxevents, struct kevent *ulistp, 611 const struct timespec *tsp, struct thread *td, int *res) 612 { 613 struct kqueue *kq = (struct kqueue *)fp->f_data; 614 struct kevent *kevp; 615 struct timeval atv, rtv, ttv; 616 struct knote *kn, marker; 617 int count, timeout, nkev = 0, error = 0; 618 619 count = maxevents; 620 if (count == 0) 621 goto done; 622 623 if (tsp != NULL) { 624 TIMESPEC_TO_TIMEVAL(&atv, tsp); 625 if (itimerfix(&atv)) { 626 error = EINVAL; 627 goto done; 628 } 629 if (tsp->tv_sec == 0 && tsp->tv_nsec == 0) 630 timeout = -1; 631 else 632 timeout = atv.tv_sec > 24 * 60 * 60 ? 633 24 * 60 * 60 * hz : tvtohz_high(&atv); 634 getmicrouptime(&rtv); 635 timevaladd(&atv, &rtv); 636 } else { 637 atv.tv_sec = 0; 638 atv.tv_usec = 0; 639 timeout = 0; 640 } 641 goto start; 642 643 retry: 644 if (atv.tv_sec || atv.tv_usec) { 645 getmicrouptime(&rtv); 646 if (timevalcmp(&rtv, &atv, >=)) 647 goto done; 648 ttv = atv; 649 timevalsub(&ttv, &rtv); 650 timeout = ttv.tv_sec > 24 * 60 * 60 ? 651 24 * 60 * 60 * hz : tvtohz_high(&ttv); 652 } 653 654 start: 655 kevp = kq->kq_kev; 656 crit_enter(); 657 if (kq->kq_count == 0) { 658 if (timeout < 0) { 659 error = EWOULDBLOCK; 660 } else { 661 kq->kq_state |= KQ_SLEEP; 662 error = tsleep(kq, PCATCH, "kqread", timeout); 663 } 664 crit_exit(); 665 if (error == 0) 666 goto retry; 667 /* don't restart after signals... */ 668 if (error == ERESTART) 669 error = EINTR; 670 else if (error == EWOULDBLOCK) 671 error = 0; 672 goto done; 673 } 674 675 TAILQ_INSERT_TAIL(&kq->kq_head, &marker, kn_tqe); 676 while (count) { 677 kn = TAILQ_FIRST(&kq->kq_head); 678 TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe); 679 if (kn == &marker) { 680 crit_exit(); 681 if (count == maxevents) 682 goto retry; 683 goto done; 684 } 685 if (kn->kn_status & KN_DISABLED) { 686 kn->kn_status &= ~KN_QUEUED; 687 kq->kq_count--; 688 continue; 689 } 690 if ((kn->kn_flags & EV_ONESHOT) == 0 && 691 kn->kn_fop->f_event(kn, 0) == 0) { 692 kn->kn_status &= ~(KN_QUEUED | KN_ACTIVE); 693 kq->kq_count--; 694 continue; 695 } 696 *kevp = kn->kn_kevent; 697 kevp++; 698 nkev++; 699 if (kn->kn_flags & EV_ONESHOT) { 700 kn->kn_status &= ~KN_QUEUED; 701 kq->kq_count--; 702 crit_exit(); 703 kn->kn_fop->f_detach(kn); 704 knote_drop(kn, td); 705 crit_enter(); 706 } else if (kn->kn_flags & EV_CLEAR) { 707 kn->kn_data = 0; 708 kn->kn_fflags = 0; 709 kn->kn_status &= ~(KN_QUEUED | KN_ACTIVE); 710 kq->kq_count--; 711 } else { 712 TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe); 713 } 714 count--; 715 if (nkev == KQ_NEVENTS) { 716 crit_exit(); 717 error = copyout((caddr_t)&kq->kq_kev, (caddr_t)ulistp, 718 sizeof(struct kevent) * nkev); 719 ulistp += nkev; 720 nkev = 0; 721 kevp = kq->kq_kev; 722 crit_enter(); 723 if (error) 724 break; 725 } 726 } 727 TAILQ_REMOVE(&kq->kq_head, &marker, kn_tqe); 728 crit_exit(); 729 done: 730 if (nkev != 0) 731 error = copyout((caddr_t)&kq->kq_kev, (caddr_t)ulistp, 732 sizeof(struct kevent) * nkev); 733 *res = maxevents - count; 734 return (error); 735 } 736 737 /* 738 * XXX 739 * This could be expanded to call kqueue_scan, if desired. 740 * 741 * MPSAFE 742 */ 743 static int 744 kqueue_read(struct file *fp, struct uio *uio, struct ucred *cred, int flags) 745 { 746 return (ENXIO); 747 } 748 749 /* 750 * MPSAFE 751 */ 752 static int 753 kqueue_write(struct file *fp, struct uio *uio, struct ucred *cred, int flags) 754 { 755 return (ENXIO); 756 } 757 758 /* 759 * MPSAFE 760 */ 761 static int 762 kqueue_ioctl(struct file *fp, u_long com, caddr_t data, 763 struct ucred *cred, struct sysmsg *msg) 764 { 765 struct kqueue *kq; 766 int error; 767 768 get_mplock(); 769 kq = (struct kqueue *)fp->f_data; 770 771 switch(com) { 772 case FIOASYNC: 773 if (*(int *)data) 774 kq->kq_state |= KQ_ASYNC; 775 else 776 kq->kq_state &= ~KQ_ASYNC; 777 error = 0; 778 break; 779 case FIOSETOWN: 780 error = fsetown(*(int *)data, &kq->kq_sigio); 781 break; 782 default: 783 error = ENOTTY; 784 break; 785 } 786 rel_mplock(); 787 return (error); 788 } 789 790 /* 791 * MPALMOSTSAFE - acquires mplock 792 */ 793 static int 794 kqueue_poll(struct file *fp, int events, struct ucred *cred) 795 { 796 struct kqueue *kq = (struct kqueue *)fp->f_data; 797 int revents = 0; 798 799 get_mplock(); 800 crit_enter(); 801 if (events & (POLLIN | POLLRDNORM)) { 802 if (kq->kq_count) { 803 revents |= events & (POLLIN | POLLRDNORM); 804 } else { 805 selrecord(curthread, &kq->kq_sel); 806 kq->kq_state |= KQ_SEL; 807 } 808 } 809 crit_exit(); 810 rel_mplock(); 811 return (revents); 812 } 813 814 /* 815 * MPSAFE 816 */ 817 static int 818 kqueue_stat(struct file *fp, struct stat *st, struct ucred *cred) 819 { 820 struct kqueue *kq = (struct kqueue *)fp->f_data; 821 822 bzero((void *)st, sizeof(*st)); 823 st->st_size = kq->kq_count; 824 st->st_blksize = sizeof(struct kevent); 825 st->st_mode = S_IFIFO; 826 return (0); 827 } 828 829 /* 830 * MPALMOSTSAFE - acquires mplock 831 */ 832 static int 833 kqueue_close(struct file *fp) 834 { 835 struct thread *td = curthread; 836 struct proc *p = td->td_proc; 837 struct kqueue *kq = (struct kqueue *)fp->f_data; 838 struct filedesc *fdp; 839 struct knote **knp, *kn, *kn0; 840 int i; 841 842 KKASSERT(p); 843 get_mplock(); 844 fdp = p->p_fd; 845 for (i = 0; i < fdp->fd_knlistsize; i++) { 846 knp = &SLIST_FIRST(&fdp->fd_knlist[i]); 847 kn = *knp; 848 while (kn != NULL) { 849 kn0 = SLIST_NEXT(kn, kn_link); 850 if (kq == kn->kn_kq) { 851 kn->kn_fop->f_detach(kn); 852 fdrop(kn->kn_fp); 853 knote_free(kn); 854 *knp = kn0; 855 } else { 856 knp = &SLIST_NEXT(kn, kn_link); 857 } 858 kn = kn0; 859 } 860 } 861 if (fdp->fd_knhashmask != 0) { 862 for (i = 0; i < fdp->fd_knhashmask + 1; i++) { 863 knp = &SLIST_FIRST(&fdp->fd_knhash[i]); 864 kn = *knp; 865 while (kn != NULL) { 866 kn0 = SLIST_NEXT(kn, kn_link); 867 if (kq == kn->kn_kq) { 868 kn->kn_fop->f_detach(kn); 869 /* XXX non-fd release of kn->kn_ptr */ 870 knote_free(kn); 871 *knp = kn0; 872 } else { 873 knp = &SLIST_NEXT(kn, kn_link); 874 } 875 kn = kn0; 876 } 877 } 878 } 879 fp->f_data = NULL; 880 funsetown(kq->kq_sigio); 881 rel_mplock(); 882 883 kfree(kq, M_KQUEUE); 884 return (0); 885 } 886 887 static void 888 kqueue_wakeup(struct kqueue *kq) 889 { 890 if (kq->kq_state & KQ_SLEEP) { 891 kq->kq_state &= ~KQ_SLEEP; 892 wakeup(kq); 893 } 894 if (kq->kq_state & KQ_SEL) { 895 kq->kq_state &= ~KQ_SEL; 896 selwakeup(&kq->kq_sel); 897 } 898 KNOTE(&kq->kq_sel.si_note, 0); 899 } 900 901 /* 902 * walk down a list of knotes, activating them if their event has triggered. 903 */ 904 void 905 knote(struct klist *list, long hint) 906 { 907 struct knote *kn; 908 909 SLIST_FOREACH(kn, list, kn_selnext) 910 if (kn->kn_fop->f_event(kn, hint)) 911 KNOTE_ACTIVATE(kn); 912 } 913 914 /* 915 * remove all knotes from a specified klist 916 */ 917 void 918 knote_remove(struct thread *td, struct klist *list) 919 { 920 struct knote *kn; 921 922 while ((kn = SLIST_FIRST(list)) != NULL) { 923 kn->kn_fop->f_detach(kn); 924 knote_drop(kn, td); 925 } 926 } 927 928 /* 929 * remove all knotes referencing a specified fd 930 */ 931 void 932 knote_fdclose(struct proc *p, int fd) 933 { 934 struct filedesc *fdp = p->p_fd; 935 struct klist *list = &fdp->fd_knlist[fd]; 936 /* Take any thread of p */ 937 struct thread *td = FIRST_LWP_IN_PROC(p)->lwp_thread; 938 939 knote_remove(td, list); 940 } 941 942 static void 943 knote_attach(struct knote *kn, struct filedesc *fdp) 944 { 945 struct klist *list; 946 int size; 947 948 if (! kn->kn_fop->f_isfd) { 949 if (fdp->fd_knhashmask == 0) 950 fdp->fd_knhash = hashinit(KN_HASHSIZE, M_KQUEUE, 951 &fdp->fd_knhashmask); 952 list = &fdp->fd_knhash[KN_HASH(kn->kn_id, fdp->fd_knhashmask)]; 953 goto done; 954 } 955 956 if (fdp->fd_knlistsize <= kn->kn_id) { 957 size = fdp->fd_knlistsize; 958 while (size <= kn->kn_id) 959 size += KQEXTENT; 960 MALLOC(list, struct klist *, 961 size * sizeof(struct klist *), M_KQUEUE, M_WAITOK); 962 bcopy((caddr_t)fdp->fd_knlist, (caddr_t)list, 963 fdp->fd_knlistsize * sizeof(struct klist *)); 964 bzero((caddr_t)list + 965 fdp->fd_knlistsize * sizeof(struct klist *), 966 (size - fdp->fd_knlistsize) * sizeof(struct klist *)); 967 if (fdp->fd_knlist != NULL) 968 FREE(fdp->fd_knlist, M_KQUEUE); 969 fdp->fd_knlistsize = size; 970 fdp->fd_knlist = list; 971 } 972 list = &fdp->fd_knlist[kn->kn_id]; 973 done: 974 SLIST_INSERT_HEAD(list, kn, kn_link); 975 kn->kn_status = 0; 976 } 977 978 /* 979 * should be called outside of a critical section, since we don't want to 980 * hold a critical section while calling fdrop and free. 981 */ 982 static void 983 knote_drop(struct knote *kn, struct thread *td) 984 { 985 struct filedesc *fdp; 986 struct klist *list; 987 988 KKASSERT(td->td_proc); 989 fdp = td->td_proc->p_fd; 990 if (kn->kn_fop->f_isfd) 991 list = &fdp->fd_knlist[kn->kn_id]; 992 else 993 list = &fdp->fd_knhash[KN_HASH(kn->kn_id, fdp->fd_knhashmask)]; 994 995 SLIST_REMOVE(list, kn, knote, kn_link); 996 if (kn->kn_status & KN_QUEUED) 997 knote_dequeue(kn); 998 if (kn->kn_fop->f_isfd) 999 fdrop(kn->kn_fp); 1000 knote_free(kn); 1001 } 1002 1003 1004 static void 1005 knote_enqueue(struct knote *kn) 1006 { 1007 struct kqueue *kq = kn->kn_kq; 1008 1009 crit_enter(); 1010 KASSERT((kn->kn_status & KN_QUEUED) == 0, ("knote already queued")); 1011 1012 TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe); 1013 kn->kn_status |= KN_QUEUED; 1014 ++kq->kq_count; 1015 1016 /* 1017 * Send SIGIO on request (typically set up as a mailbox signal) 1018 */ 1019 if (kq->kq_sigio && (kq->kq_state & KQ_ASYNC) && kq->kq_count == 1) 1020 pgsigio(kq->kq_sigio, SIGIO, 0); 1021 crit_exit(); 1022 kqueue_wakeup(kq); 1023 } 1024 1025 static void 1026 knote_dequeue(struct knote *kn) 1027 { 1028 struct kqueue *kq = kn->kn_kq; 1029 1030 KASSERT(kn->kn_status & KN_QUEUED, ("knote not queued")); 1031 crit_enter(); 1032 1033 TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe); 1034 kn->kn_status &= ~KN_QUEUED; 1035 kq->kq_count--; 1036 crit_exit(); 1037 } 1038 1039 static void 1040 knote_init(void) 1041 { 1042 knote_zone = zinit("KNOTE", sizeof(struct knote), 0, 0, 1); 1043 } 1044 SYSINIT(knote, SI_SUB_PSEUDO, SI_ORDER_ANY, knote_init, NULL) 1045 1046 static struct knote * 1047 knote_alloc(void) 1048 { 1049 return ((struct knote *)zalloc(knote_zone)); 1050 } 1051 1052 static void 1053 knote_free(struct knote *kn) 1054 { 1055 zfree(knote_zone, kn); 1056 } 1057