1 /*- 2 * Copyright (c) 1999,2000,2001 Jonathan Lemon <jlemon@FreeBSD.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * $FreeBSD: src/sys/kern/kern_event.c,v 1.2.2.10 2004/04/04 07:03:14 cperciva Exp $ 27 * $DragonFly: src/sys/kern/kern_event.c,v 1.33 2007/02/03 17:05:57 corecode Exp $ 28 */ 29 30 #include <sys/param.h> 31 #include <sys/systm.h> 32 #include <sys/kernel.h> 33 #include <sys/proc.h> 34 #include <sys/malloc.h> 35 #include <sys/unistd.h> 36 #include <sys/file.h> 37 #include <sys/lock.h> 38 #include <sys/fcntl.h> 39 #include <sys/select.h> 40 #include <sys/queue.h> 41 #include <sys/event.h> 42 #include <sys/eventvar.h> 43 #include <sys/poll.h> 44 #include <sys/protosw.h> 45 #include <sys/socket.h> 46 #include <sys/socketvar.h> 47 #include <sys/stat.h> 48 #include <sys/sysctl.h> 49 #include <sys/sysproto.h> 50 #include <sys/uio.h> 51 #include <sys/thread2.h> 52 #include <sys/signalvar.h> 53 #include <sys/filio.h> 54 #include <sys/file2.h> 55 56 #include <vm/vm_zone.h> 57 58 MALLOC_DEFINE(M_KQUEUE, "kqueue", "memory for kqueue system"); 59 60 static int kqueue_scan(struct file *fp, int maxevents, 61 struct kevent *ulistp, const struct timespec *timeout, 62 struct thread *td, int *res); 63 static int kqueue_read(struct file *fp, struct uio *uio, 64 struct ucred *cred, int flags); 65 static int kqueue_write(struct file *fp, struct uio *uio, 66 struct ucred *cred, int flags); 67 static int kqueue_ioctl(struct file *fp, u_long com, caddr_t data, 68 struct ucred *cred, struct sysmsg *msg); 69 static int kqueue_poll(struct file *fp, int events, struct ucred *cred); 70 static int kqueue_kqfilter(struct file *fp, struct knote *kn); 71 static int kqueue_stat(struct file *fp, struct stat *st, 72 struct ucred *cred); 73 static int kqueue_close(struct file *fp); 74 static void kqueue_wakeup(struct kqueue *kq); 75 76 /* 77 * MPSAFE 78 */ 79 static struct fileops kqueueops = { 80 .fo_read = kqueue_read, 81 .fo_write = kqueue_write, 82 .fo_ioctl = kqueue_ioctl, 83 .fo_poll = kqueue_poll, 84 .fo_kqfilter = kqueue_kqfilter, 85 .fo_stat = kqueue_stat, 86 .fo_close = kqueue_close, 87 .fo_shutdown = nofo_shutdown 88 }; 89 90 static void knote_attach(struct knote *kn, struct filedesc *fdp); 91 static void knote_drop(struct knote *kn, struct thread *td); 92 static void knote_enqueue(struct knote *kn); 93 static void knote_dequeue(struct knote *kn); 94 static void knote_init(void); 95 static struct knote *knote_alloc(void); 96 static void knote_free(struct knote *kn); 97 98 static void filt_kqdetach(struct knote *kn); 99 static int filt_kqueue(struct knote *kn, long hint); 100 static int filt_procattach(struct knote *kn); 101 static void filt_procdetach(struct knote *kn); 102 static int filt_proc(struct knote *kn, long hint); 103 static int filt_fileattach(struct knote *kn); 104 static void filt_timerexpire(void *knx); 105 static int filt_timerattach(struct knote *kn); 106 static void filt_timerdetach(struct knote *kn); 107 static int filt_timer(struct knote *kn, long hint); 108 109 static struct filterops file_filtops = 110 { 1, filt_fileattach, NULL, NULL }; 111 static struct filterops kqread_filtops = 112 { 1, NULL, filt_kqdetach, filt_kqueue }; 113 static struct filterops proc_filtops = 114 { 0, filt_procattach, filt_procdetach, filt_proc }; 115 static struct filterops timer_filtops = 116 { 0, filt_timerattach, filt_timerdetach, filt_timer }; 117 118 static vm_zone_t knote_zone; 119 static int kq_ncallouts = 0; 120 static int kq_calloutmax = (4 * 1024); 121 SYSCTL_INT(_kern, OID_AUTO, kq_calloutmax, CTLFLAG_RW, 122 &kq_calloutmax, 0, "Maximum number of callouts allocated for kqueue"); 123 124 #define KNOTE_ACTIVATE(kn) do { \ 125 kn->kn_status |= KN_ACTIVE; \ 126 if ((kn->kn_status & (KN_QUEUED | KN_DISABLED)) == 0) \ 127 knote_enqueue(kn); \ 128 } while(0) 129 130 #define KN_HASHSIZE 64 /* XXX should be tunable */ 131 #define KN_HASH(val, mask) (((val) ^ (val >> 8)) & (mask)) 132 133 extern struct filterops aio_filtops; 134 extern struct filterops sig_filtops; 135 136 /* 137 * Table for for all system-defined filters. 138 */ 139 static struct filterops *sysfilt_ops[] = { 140 &file_filtops, /* EVFILT_READ */ 141 &file_filtops, /* EVFILT_WRITE */ 142 &aio_filtops, /* EVFILT_AIO */ 143 &file_filtops, /* EVFILT_VNODE */ 144 &proc_filtops, /* EVFILT_PROC */ 145 &sig_filtops, /* EVFILT_SIGNAL */ 146 &timer_filtops, /* EVFILT_TIMER */ 147 }; 148 149 static int 150 filt_fileattach(struct knote *kn) 151 { 152 return (fo_kqfilter(kn->kn_fp, kn)); 153 } 154 155 /* 156 * MPALMOSTSAFE - acquires mplock 157 */ 158 static int 159 kqueue_kqfilter(struct file *fp, struct knote *kn) 160 { 161 struct kqueue *kq = (struct kqueue *)kn->kn_fp->f_data; 162 163 get_mplock(); 164 if (kn->kn_filter != EVFILT_READ) { 165 rel_mplock(); 166 return (1); 167 } 168 169 kn->kn_fop = &kqread_filtops; 170 SLIST_INSERT_HEAD(&kq->kq_sel.si_note, kn, kn_selnext); 171 rel_mplock(); 172 return (0); 173 } 174 175 static void 176 filt_kqdetach(struct knote *kn) 177 { 178 struct kqueue *kq = (struct kqueue *)kn->kn_fp->f_data; 179 180 SLIST_REMOVE(&kq->kq_sel.si_note, kn, knote, kn_selnext); 181 } 182 183 /*ARGSUSED*/ 184 static int 185 filt_kqueue(struct knote *kn, long hint) 186 { 187 struct kqueue *kq = (struct kqueue *)kn->kn_fp->f_data; 188 189 kn->kn_data = kq->kq_count; 190 return (kn->kn_data > 0); 191 } 192 193 static int 194 filt_procattach(struct knote *kn) 195 { 196 struct proc *p; 197 int immediate; 198 199 immediate = 0; 200 p = pfind(kn->kn_id); 201 if (p == NULL && (kn->kn_sfflags & NOTE_EXIT)) { 202 p = zpfind(kn->kn_id); 203 immediate = 1; 204 } 205 if (p == NULL) 206 return (ESRCH); 207 if (! PRISON_CHECK(curproc->p_ucred, p->p_ucred)) 208 return (EACCES); 209 210 kn->kn_ptr.p_proc = p; 211 kn->kn_flags |= EV_CLEAR; /* automatically set */ 212 213 /* 214 * internal flag indicating registration done by kernel 215 */ 216 if (kn->kn_flags & EV_FLAG1) { 217 kn->kn_data = kn->kn_sdata; /* ppid */ 218 kn->kn_fflags = NOTE_CHILD; 219 kn->kn_flags &= ~EV_FLAG1; 220 } 221 222 /* XXX lock the proc here while adding to the list? */ 223 SLIST_INSERT_HEAD(&p->p_klist, kn, kn_selnext); 224 225 /* 226 * Immediately activate any exit notes if the target process is a 227 * zombie. This is necessary to handle the case where the target 228 * process, e.g. a child, dies before the kevent is registered. 229 */ 230 if (immediate && filt_proc(kn, NOTE_EXIT)) 231 KNOTE_ACTIVATE(kn); 232 233 return (0); 234 } 235 236 /* 237 * The knote may be attached to a different process, which may exit, 238 * leaving nothing for the knote to be attached to. So when the process 239 * exits, the knote is marked as DETACHED and also flagged as ONESHOT so 240 * it will be deleted when read out. However, as part of the knote deletion, 241 * this routine is called, so a check is needed to avoid actually performing 242 * a detach, because the original process does not exist any more. 243 */ 244 static void 245 filt_procdetach(struct knote *kn) 246 { 247 struct proc *p = kn->kn_ptr.p_proc; 248 249 if (kn->kn_status & KN_DETACHED) 250 return; 251 252 /* XXX locking? this might modify another process. */ 253 SLIST_REMOVE(&p->p_klist, kn, knote, kn_selnext); 254 } 255 256 static int 257 filt_proc(struct knote *kn, long hint) 258 { 259 u_int event; 260 261 /* 262 * mask off extra data 263 */ 264 event = (u_int)hint & NOTE_PCTRLMASK; 265 266 /* 267 * if the user is interested in this event, record it. 268 */ 269 if (kn->kn_sfflags & event) 270 kn->kn_fflags |= event; 271 272 /* 273 * process is gone, so flag the event as finished. 274 */ 275 if (event == NOTE_EXIT) { 276 kn->kn_status |= KN_DETACHED; 277 kn->kn_flags |= (EV_EOF | EV_ONESHOT); 278 return (1); 279 } 280 281 /* 282 * process forked, and user wants to track the new process, 283 * so attach a new knote to it, and immediately report an 284 * event with the parent's pid. 285 */ 286 if ((event == NOTE_FORK) && (kn->kn_sfflags & NOTE_TRACK)) { 287 struct kevent kev; 288 int error; 289 290 /* 291 * register knote with new process. 292 */ 293 kev.ident = hint & NOTE_PDATAMASK; /* pid */ 294 kev.filter = kn->kn_filter; 295 kev.flags = kn->kn_flags | EV_ADD | EV_ENABLE | EV_FLAG1; 296 kev.fflags = kn->kn_sfflags; 297 kev.data = kn->kn_id; /* parent */ 298 kev.udata = kn->kn_kevent.udata; /* preserve udata */ 299 error = kqueue_register(kn->kn_kq, &kev, NULL); 300 if (error) 301 kn->kn_fflags |= NOTE_TRACKERR; 302 } 303 304 return (kn->kn_fflags != 0); 305 } 306 307 static void 308 filt_timerexpire(void *knx) 309 { 310 struct knote *kn = knx; 311 struct callout *calloutp; 312 struct timeval tv; 313 int tticks; 314 315 kn->kn_data++; 316 KNOTE_ACTIVATE(kn); 317 318 if ((kn->kn_flags & EV_ONESHOT) == 0) { 319 tv.tv_sec = kn->kn_sdata / 1000; 320 tv.tv_usec = (kn->kn_sdata % 1000) * 1000; 321 tticks = tvtohz_high(&tv); 322 calloutp = (struct callout *)kn->kn_hook; 323 callout_reset(calloutp, tticks, filt_timerexpire, kn); 324 } 325 } 326 327 /* 328 * data contains amount of time to sleep, in milliseconds 329 */ 330 static int 331 filt_timerattach(struct knote *kn) 332 { 333 struct callout *calloutp; 334 struct timeval tv; 335 int tticks; 336 337 if (kq_ncallouts >= kq_calloutmax) 338 return (ENOMEM); 339 kq_ncallouts++; 340 341 tv.tv_sec = kn->kn_sdata / 1000; 342 tv.tv_usec = (kn->kn_sdata % 1000) * 1000; 343 tticks = tvtohz_high(&tv); 344 345 kn->kn_flags |= EV_CLEAR; /* automatically set */ 346 MALLOC(calloutp, struct callout *, sizeof(*calloutp), 347 M_KQUEUE, M_WAITOK); 348 callout_init(calloutp); 349 kn->kn_hook = (caddr_t)calloutp; 350 callout_reset(calloutp, tticks, filt_timerexpire, kn); 351 352 return (0); 353 } 354 355 static void 356 filt_timerdetach(struct knote *kn) 357 { 358 struct callout *calloutp; 359 360 calloutp = (struct callout *)kn->kn_hook; 361 callout_stop(calloutp); 362 FREE(calloutp, M_KQUEUE); 363 kq_ncallouts--; 364 } 365 366 static int 367 filt_timer(struct knote *kn, long hint) 368 { 369 370 return (kn->kn_data != 0); 371 } 372 373 int 374 sys_kqueue(struct kqueue_args *uap) 375 { 376 struct proc *p = curproc; 377 struct filedesc *fdp = p->p_fd; 378 struct kqueue *kq; 379 struct file *fp; 380 int fd, error; 381 382 error = falloc(p, &fp, &fd); 383 if (error) 384 return (error); 385 fp->f_flag = FREAD | FWRITE; 386 fp->f_type = DTYPE_KQUEUE; 387 fp->f_ops = &kqueueops; 388 389 kq = kmalloc(sizeof(struct kqueue), M_KQUEUE, M_WAITOK | M_ZERO); 390 TAILQ_INIT(&kq->kq_head); 391 kq->kq_fdp = fdp; 392 fp->f_data = kq; 393 394 fsetfd(p, fp, fd); 395 uap->sysmsg_result = fd; 396 fdrop(fp); 397 return (error); 398 } 399 400 int 401 sys_kevent(struct kevent_args *uap) 402 { 403 struct thread *td = curthread; 404 struct proc *p = td->td_proc; 405 struct kevent *kevp; 406 struct kqueue *kq; 407 struct file *fp = NULL; 408 struct timespec ts; 409 int i, n, nerrors, error; 410 411 KKASSERT(p); 412 413 fp = holdfp(p->p_fd, uap->fd, -1); 414 if (fp == NULL) 415 return (EBADF); 416 if (fp->f_type != DTYPE_KQUEUE) { 417 fdrop(fp); 418 return (EBADF); 419 } 420 421 if (uap->timeout != NULL) { 422 error = copyin(uap->timeout, &ts, sizeof(ts)); 423 if (error) 424 goto done; 425 uap->timeout = &ts; 426 } 427 428 kq = (struct kqueue *)fp->f_data; 429 nerrors = 0; 430 431 while (uap->nchanges > 0) { 432 n = uap->nchanges > KQ_NEVENTS ? KQ_NEVENTS : uap->nchanges; 433 error = copyin(uap->changelist, kq->kq_kev, 434 n * sizeof(struct kevent)); 435 if (error) 436 goto done; 437 for (i = 0; i < n; i++) { 438 kevp = &kq->kq_kev[i]; 439 kevp->flags &= ~EV_SYSFLAGS; 440 error = kqueue_register(kq, kevp, td); 441 if (error) { 442 if (uap->nevents != 0) { 443 kevp->flags = EV_ERROR; 444 kevp->data = error; 445 (void) copyout((caddr_t)kevp, 446 (caddr_t)uap->eventlist, 447 sizeof(*kevp)); 448 uap->eventlist++; 449 uap->nevents--; 450 nerrors++; 451 } else { 452 goto done; 453 } 454 } 455 } 456 uap->nchanges -= n; 457 uap->changelist += n; 458 } 459 if (nerrors) { 460 uap->sysmsg_result = nerrors; 461 error = 0; 462 goto done; 463 } 464 465 error = kqueue_scan(fp, uap->nevents, uap->eventlist, uap->timeout, td, &uap->sysmsg_result); 466 done: 467 if (fp != NULL) 468 fdrop(fp); 469 return (error); 470 } 471 472 int 473 kqueue_register(struct kqueue *kq, struct kevent *kev, struct thread *td) 474 { 475 struct filedesc *fdp = kq->kq_fdp; 476 struct filterops *fops; 477 struct file *fp = NULL; 478 struct knote *kn = NULL; 479 int error = 0; 480 481 if (kev->filter < 0) { 482 if (kev->filter + EVFILT_SYSCOUNT < 0) 483 return (EINVAL); 484 fops = sysfilt_ops[~kev->filter]; /* to 0-base index */ 485 } else { 486 /* 487 * XXX 488 * filter attach routine is responsible for insuring that 489 * the identifier can be attached to it. 490 */ 491 kprintf("unknown filter: %d\n", kev->filter); 492 return (EINVAL); 493 } 494 495 if (fops->f_isfd) { 496 /* validate descriptor */ 497 fp = holdfp(fdp, kev->ident, -1); 498 if (fp == NULL) 499 return (EBADF); 500 501 if (kev->ident < fdp->fd_knlistsize) { 502 SLIST_FOREACH(kn, &fdp->fd_knlist[kev->ident], kn_link) 503 if (kq == kn->kn_kq && 504 kev->filter == kn->kn_filter) 505 break; 506 } 507 } else { 508 if (fdp->fd_knhashmask != 0) { 509 struct klist *list; 510 511 list = &fdp->fd_knhash[ 512 KN_HASH((u_long)kev->ident, fdp->fd_knhashmask)]; 513 SLIST_FOREACH(kn, list, kn_link) 514 if (kev->ident == kn->kn_id && 515 kq == kn->kn_kq && 516 kev->filter == kn->kn_filter) 517 break; 518 } 519 } 520 521 if (kn == NULL && ((kev->flags & EV_ADD) == 0)) { 522 error = ENOENT; 523 goto done; 524 } 525 526 /* 527 * kn now contains the matching knote, or NULL if no match 528 */ 529 if (kev->flags & EV_ADD) { 530 531 if (kn == NULL) { 532 kn = knote_alloc(); 533 if (kn == NULL) { 534 error = ENOMEM; 535 goto done; 536 } 537 kn->kn_fp = fp; 538 kn->kn_kq = kq; 539 kn->kn_fop = fops; 540 541 /* 542 * apply reference count to knote structure, and 543 * do not release it at the end of this routine. 544 */ 545 fp = NULL; 546 547 kn->kn_sfflags = kev->fflags; 548 kn->kn_sdata = kev->data; 549 kev->fflags = 0; 550 kev->data = 0; 551 kn->kn_kevent = *kev; 552 553 knote_attach(kn, fdp); 554 if ((error = fops->f_attach(kn)) != 0) { 555 knote_drop(kn, td); 556 goto done; 557 } 558 } else { 559 /* 560 * The user may change some filter values after the 561 * initial EV_ADD, but doing so will not reset any 562 * filter which have already been triggered. 563 */ 564 kn->kn_sfflags = kev->fflags; 565 kn->kn_sdata = kev->data; 566 kn->kn_kevent.udata = kev->udata; 567 } 568 569 crit_enter(); 570 if (kn->kn_fop->f_event(kn, 0)) 571 KNOTE_ACTIVATE(kn); 572 crit_exit(); 573 } else if (kev->flags & EV_DELETE) { 574 kn->kn_fop->f_detach(kn); 575 knote_drop(kn, td); 576 goto done; 577 } 578 579 if ((kev->flags & EV_DISABLE) && 580 ((kn->kn_status & KN_DISABLED) == 0)) { 581 crit_enter(); 582 kn->kn_status |= KN_DISABLED; 583 crit_exit(); 584 } 585 586 if ((kev->flags & EV_ENABLE) && (kn->kn_status & KN_DISABLED)) { 587 crit_enter(); 588 kn->kn_status &= ~KN_DISABLED; 589 if ((kn->kn_status & KN_ACTIVE) && 590 ((kn->kn_status & KN_QUEUED) == 0)) 591 knote_enqueue(kn); 592 crit_exit(); 593 } 594 595 done: 596 if (fp != NULL) 597 fdrop(fp); 598 return (error); 599 } 600 601 static int 602 kqueue_scan(struct file *fp, int maxevents, struct kevent *ulistp, 603 const struct timespec *tsp, struct thread *td, int *res) 604 { 605 struct kqueue *kq = (struct kqueue *)fp->f_data; 606 struct kevent *kevp; 607 struct timeval atv, rtv, ttv; 608 struct knote *kn, marker; 609 int count, timeout, nkev = 0, error = 0; 610 611 count = maxevents; 612 if (count == 0) 613 goto done; 614 615 if (tsp != NULL) { 616 TIMESPEC_TO_TIMEVAL(&atv, tsp); 617 if (itimerfix(&atv)) { 618 error = EINVAL; 619 goto done; 620 } 621 if (tsp->tv_sec == 0 && tsp->tv_nsec == 0) 622 timeout = -1; 623 else 624 timeout = atv.tv_sec > 24 * 60 * 60 ? 625 24 * 60 * 60 * hz : tvtohz_high(&atv); 626 getmicrouptime(&rtv); 627 timevaladd(&atv, &rtv); 628 } else { 629 atv.tv_sec = 0; 630 atv.tv_usec = 0; 631 timeout = 0; 632 } 633 goto start; 634 635 retry: 636 if (atv.tv_sec || atv.tv_usec) { 637 getmicrouptime(&rtv); 638 if (timevalcmp(&rtv, &atv, >=)) 639 goto done; 640 ttv = atv; 641 timevalsub(&ttv, &rtv); 642 timeout = ttv.tv_sec > 24 * 60 * 60 ? 643 24 * 60 * 60 * hz : tvtohz_high(&ttv); 644 } 645 646 start: 647 kevp = kq->kq_kev; 648 crit_enter(); 649 if (kq->kq_count == 0) { 650 if (timeout < 0) { 651 error = EWOULDBLOCK; 652 } else { 653 kq->kq_state |= KQ_SLEEP; 654 error = tsleep(kq, PCATCH, "kqread", timeout); 655 } 656 crit_exit(); 657 if (error == 0) 658 goto retry; 659 /* don't restart after signals... */ 660 if (error == ERESTART) 661 error = EINTR; 662 else if (error == EWOULDBLOCK) 663 error = 0; 664 goto done; 665 } 666 667 TAILQ_INSERT_TAIL(&kq->kq_head, &marker, kn_tqe); 668 while (count) { 669 kn = TAILQ_FIRST(&kq->kq_head); 670 TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe); 671 if (kn == &marker) { 672 crit_exit(); 673 if (count == maxevents) 674 goto retry; 675 goto done; 676 } 677 if (kn->kn_status & KN_DISABLED) { 678 kn->kn_status &= ~KN_QUEUED; 679 kq->kq_count--; 680 continue; 681 } 682 if ((kn->kn_flags & EV_ONESHOT) == 0 && 683 kn->kn_fop->f_event(kn, 0) == 0) { 684 kn->kn_status &= ~(KN_QUEUED | KN_ACTIVE); 685 kq->kq_count--; 686 continue; 687 } 688 *kevp = kn->kn_kevent; 689 kevp++; 690 nkev++; 691 if (kn->kn_flags & EV_ONESHOT) { 692 kn->kn_status &= ~KN_QUEUED; 693 kq->kq_count--; 694 crit_exit(); 695 kn->kn_fop->f_detach(kn); 696 knote_drop(kn, td); 697 crit_enter(); 698 } else if (kn->kn_flags & EV_CLEAR) { 699 kn->kn_data = 0; 700 kn->kn_fflags = 0; 701 kn->kn_status &= ~(KN_QUEUED | KN_ACTIVE); 702 kq->kq_count--; 703 } else { 704 TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe); 705 } 706 count--; 707 if (nkev == KQ_NEVENTS) { 708 crit_exit(); 709 error = copyout((caddr_t)&kq->kq_kev, (caddr_t)ulistp, 710 sizeof(struct kevent) * nkev); 711 ulistp += nkev; 712 nkev = 0; 713 kevp = kq->kq_kev; 714 crit_enter(); 715 if (error) 716 break; 717 } 718 } 719 TAILQ_REMOVE(&kq->kq_head, &marker, kn_tqe); 720 crit_exit(); 721 done: 722 if (nkev != 0) 723 error = copyout((caddr_t)&kq->kq_kev, (caddr_t)ulistp, 724 sizeof(struct kevent) * nkev); 725 *res = maxevents - count; 726 return (error); 727 } 728 729 /* 730 * XXX 731 * This could be expanded to call kqueue_scan, if desired. 732 * 733 * MPSAFE 734 */ 735 static int 736 kqueue_read(struct file *fp, struct uio *uio, struct ucred *cred, int flags) 737 { 738 return (ENXIO); 739 } 740 741 /* 742 * MPSAFE 743 */ 744 static int 745 kqueue_write(struct file *fp, struct uio *uio, struct ucred *cred, int flags) 746 { 747 return (ENXIO); 748 } 749 750 /* 751 * MPSAFE 752 */ 753 static int 754 kqueue_ioctl(struct file *fp, u_long com, caddr_t data, 755 struct ucred *cred, struct sysmsg *msg) 756 { 757 struct kqueue *kq; 758 int error; 759 760 get_mplock(); 761 kq = (struct kqueue *)fp->f_data; 762 763 switch(com) { 764 case FIOASYNC: 765 if (*(int *)data) 766 kq->kq_state |= KQ_ASYNC; 767 else 768 kq->kq_state &= ~KQ_ASYNC; 769 error = 0; 770 break; 771 case FIOSETOWN: 772 error = fsetown(*(int *)data, &kq->kq_sigio); 773 break; 774 default: 775 error = ENOTTY; 776 break; 777 } 778 rel_mplock(); 779 return (error); 780 } 781 782 /* 783 * MPALMOSTSAFE - acquires mplock 784 */ 785 static int 786 kqueue_poll(struct file *fp, int events, struct ucred *cred) 787 { 788 struct kqueue *kq = (struct kqueue *)fp->f_data; 789 int revents = 0; 790 791 get_mplock(); 792 crit_enter(); 793 if (events & (POLLIN | POLLRDNORM)) { 794 if (kq->kq_count) { 795 revents |= events & (POLLIN | POLLRDNORM); 796 } else { 797 selrecord(curthread, &kq->kq_sel); 798 kq->kq_state |= KQ_SEL; 799 } 800 } 801 crit_exit(); 802 rel_mplock(); 803 return (revents); 804 } 805 806 /* 807 * MPSAFE 808 */ 809 static int 810 kqueue_stat(struct file *fp, struct stat *st, struct ucred *cred) 811 { 812 struct kqueue *kq = (struct kqueue *)fp->f_data; 813 814 bzero((void *)st, sizeof(*st)); 815 st->st_size = kq->kq_count; 816 st->st_blksize = sizeof(struct kevent); 817 st->st_mode = S_IFIFO; 818 return (0); 819 } 820 821 /* 822 * MPALMOSTSAFE - acquires mplock 823 */ 824 static int 825 kqueue_close(struct file *fp) 826 { 827 struct thread *td = curthread; 828 struct proc *p = td->td_proc; 829 struct kqueue *kq = (struct kqueue *)fp->f_data; 830 struct filedesc *fdp; 831 struct knote **knp, *kn, *kn0; 832 int i; 833 834 KKASSERT(p); 835 get_mplock(); 836 fdp = p->p_fd; 837 for (i = 0; i < fdp->fd_knlistsize; i++) { 838 knp = &SLIST_FIRST(&fdp->fd_knlist[i]); 839 kn = *knp; 840 while (kn != NULL) { 841 kn0 = SLIST_NEXT(kn, kn_link); 842 if (kq == kn->kn_kq) { 843 kn->kn_fop->f_detach(kn); 844 fdrop(kn->kn_fp); 845 knote_free(kn); 846 *knp = kn0; 847 } else { 848 knp = &SLIST_NEXT(kn, kn_link); 849 } 850 kn = kn0; 851 } 852 } 853 if (fdp->fd_knhashmask != 0) { 854 for (i = 0; i < fdp->fd_knhashmask + 1; i++) { 855 knp = &SLIST_FIRST(&fdp->fd_knhash[i]); 856 kn = *knp; 857 while (kn != NULL) { 858 kn0 = SLIST_NEXT(kn, kn_link); 859 if (kq == kn->kn_kq) { 860 kn->kn_fop->f_detach(kn); 861 /* XXX non-fd release of kn->kn_ptr */ 862 knote_free(kn); 863 *knp = kn0; 864 } else { 865 knp = &SLIST_NEXT(kn, kn_link); 866 } 867 kn = kn0; 868 } 869 } 870 } 871 fp->f_data = NULL; 872 funsetown(kq->kq_sigio); 873 rel_mplock(); 874 875 kfree(kq, M_KQUEUE); 876 return (0); 877 } 878 879 static void 880 kqueue_wakeup(struct kqueue *kq) 881 { 882 if (kq->kq_state & KQ_SLEEP) { 883 kq->kq_state &= ~KQ_SLEEP; 884 wakeup(kq); 885 } 886 if (kq->kq_state & KQ_SEL) { 887 kq->kq_state &= ~KQ_SEL; 888 selwakeup(&kq->kq_sel); 889 } 890 KNOTE(&kq->kq_sel.si_note, 0); 891 } 892 893 /* 894 * walk down a list of knotes, activating them if their event has triggered. 895 */ 896 void 897 knote(struct klist *list, long hint) 898 { 899 struct knote *kn; 900 901 SLIST_FOREACH(kn, list, kn_selnext) 902 if (kn->kn_fop->f_event(kn, hint)) 903 KNOTE_ACTIVATE(kn); 904 } 905 906 /* 907 * remove all knotes from a specified klist 908 */ 909 void 910 knote_remove(struct thread *td, struct klist *list) 911 { 912 struct knote *kn; 913 914 while ((kn = SLIST_FIRST(list)) != NULL) { 915 kn->kn_fop->f_detach(kn); 916 knote_drop(kn, td); 917 } 918 } 919 920 /* 921 * remove all knotes referencing a specified fd 922 */ 923 void 924 knote_fdclose(struct proc *p, int fd) 925 { 926 struct filedesc *fdp = p->p_fd; 927 struct klist *list = &fdp->fd_knlist[fd]; 928 /* Take any thread of p */ 929 struct thread *td = FIRST_LWP_IN_PROC(p)->lwp_thread; 930 931 knote_remove(td, list); 932 } 933 934 static void 935 knote_attach(struct knote *kn, struct filedesc *fdp) 936 { 937 struct klist *list; 938 int size; 939 940 if (! kn->kn_fop->f_isfd) { 941 if (fdp->fd_knhashmask == 0) 942 fdp->fd_knhash = hashinit(KN_HASHSIZE, M_KQUEUE, 943 &fdp->fd_knhashmask); 944 list = &fdp->fd_knhash[KN_HASH(kn->kn_id, fdp->fd_knhashmask)]; 945 goto done; 946 } 947 948 if (fdp->fd_knlistsize <= kn->kn_id) { 949 size = fdp->fd_knlistsize; 950 while (size <= kn->kn_id) 951 size += KQEXTENT; 952 MALLOC(list, struct klist *, 953 size * sizeof(struct klist *), M_KQUEUE, M_WAITOK); 954 bcopy((caddr_t)fdp->fd_knlist, (caddr_t)list, 955 fdp->fd_knlistsize * sizeof(struct klist *)); 956 bzero((caddr_t)list + 957 fdp->fd_knlistsize * sizeof(struct klist *), 958 (size - fdp->fd_knlistsize) * sizeof(struct klist *)); 959 if (fdp->fd_knlist != NULL) 960 FREE(fdp->fd_knlist, M_KQUEUE); 961 fdp->fd_knlistsize = size; 962 fdp->fd_knlist = list; 963 } 964 list = &fdp->fd_knlist[kn->kn_id]; 965 done: 966 SLIST_INSERT_HEAD(list, kn, kn_link); 967 kn->kn_status = 0; 968 } 969 970 /* 971 * should be called outside of a critical section, since we don't want to 972 * hold a critical section while calling fdrop and free. 973 */ 974 static void 975 knote_drop(struct knote *kn, struct thread *td) 976 { 977 struct filedesc *fdp; 978 struct klist *list; 979 980 KKASSERT(td->td_proc); 981 fdp = td->td_proc->p_fd; 982 if (kn->kn_fop->f_isfd) 983 list = &fdp->fd_knlist[kn->kn_id]; 984 else 985 list = &fdp->fd_knhash[KN_HASH(kn->kn_id, fdp->fd_knhashmask)]; 986 987 SLIST_REMOVE(list, kn, knote, kn_link); 988 if (kn->kn_status & KN_QUEUED) 989 knote_dequeue(kn); 990 if (kn->kn_fop->f_isfd) 991 fdrop(kn->kn_fp); 992 knote_free(kn); 993 } 994 995 996 static void 997 knote_enqueue(struct knote *kn) 998 { 999 struct kqueue *kq = kn->kn_kq; 1000 1001 crit_enter(); 1002 KASSERT((kn->kn_status & KN_QUEUED) == 0, ("knote already queued")); 1003 1004 TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe); 1005 kn->kn_status |= KN_QUEUED; 1006 ++kq->kq_count; 1007 1008 /* 1009 * Send SIGIO on request (typically set up as a mailbox signal) 1010 */ 1011 if (kq->kq_sigio && (kq->kq_state & KQ_ASYNC) && kq->kq_count == 1) 1012 pgsigio(kq->kq_sigio, SIGIO, 0); 1013 crit_exit(); 1014 kqueue_wakeup(kq); 1015 } 1016 1017 static void 1018 knote_dequeue(struct knote *kn) 1019 { 1020 struct kqueue *kq = kn->kn_kq; 1021 1022 KASSERT(kn->kn_status & KN_QUEUED, ("knote not queued")); 1023 crit_enter(); 1024 1025 TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe); 1026 kn->kn_status &= ~KN_QUEUED; 1027 kq->kq_count--; 1028 crit_exit(); 1029 } 1030 1031 static void 1032 knote_init(void) 1033 { 1034 knote_zone = zinit("KNOTE", sizeof(struct knote), 0, 0, 1); 1035 } 1036 SYSINIT(knote, SI_SUB_PSEUDO, SI_ORDER_ANY, knote_init, NULL) 1037 1038 static struct knote * 1039 knote_alloc(void) 1040 { 1041 return ((struct knote *)zalloc(knote_zone)); 1042 } 1043 1044 static void 1045 knote_free(struct knote *kn) 1046 { 1047 zfree(knote_zone, kn); 1048 } 1049