1 /* $NetBSD: kern_event.c,v 1.5 2002/11/26 18:44:34 christos Exp $ */ 2 /*- 3 * Copyright (c) 1999,2000,2001 Jonathan Lemon <jlemon@FreeBSD.org> 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 * 27 * $FreeBSD: src/sys/kern/kern_event.c,v 1.27 2001/07/05 17:10:44 rwatson Exp $ 28 */ 29 30 #include <sys/param.h> 31 #include <sys/systm.h> 32 #include <sys/kernel.h> 33 #include <sys/proc.h> 34 #include <sys/malloc.h> 35 #include <sys/unistd.h> 36 #include <sys/file.h> 37 #include <sys/fcntl.h> 38 #include <sys/select.h> 39 #include <sys/queue.h> 40 #include <sys/event.h> 41 #include <sys/eventvar.h> 42 #include <sys/poll.h> 43 #include <sys/pool.h> 44 #include <sys/protosw.h> 45 #include <sys/socket.h> 46 #include <sys/socketvar.h> 47 #include <sys/stat.h> 48 #include <sys/uio.h> 49 #include <sys/mount.h> 50 #include <sys/filedesc.h> 51 #include <sys/syscallargs.h> 52 53 static int kqueue_scan(struct file *fp, size_t maxevents, 54 struct kevent *ulistp, const struct timespec *timeout, 55 struct proc *p, register_t *retval); 56 static void kqueue_wakeup(struct kqueue *kq); 57 58 static int kqueue_read(struct file *fp, off_t *offset, struct uio *uio, 59 struct ucred *cred, int flags); 60 static int kqueue_write(struct file *fp, off_t *offset, struct uio *uio, 61 struct ucred *cred, int flags); 62 static int kqueue_ioctl(struct file *fp, u_long com, caddr_t data, 63 struct proc *p); 64 static int kqueue_fcntl(struct file *fp, u_int com, caddr_t data, 65 struct proc *p); 66 static int kqueue_poll(struct file *fp, int events, struct proc *p); 67 static int kqueue_kqfilter(struct file *fp, struct knote *kn); 68 static int kqueue_stat(struct file *fp, struct stat *sp, struct proc *p); 69 static int kqueue_close(struct file *fp, struct proc *p); 70 71 static struct fileops kqueueops = { 72 kqueue_read, kqueue_write, kqueue_ioctl, kqueue_fcntl, kqueue_poll, 73 kqueue_stat, kqueue_close, kqueue_kqfilter 74 }; 75 76 static void knote_attach(struct knote *kn, struct filedesc *fdp); 77 static void knote_drop(struct knote *kn, struct proc *p, 78 struct filedesc *fdp); 79 static void knote_enqueue(struct knote *kn); 80 static void knote_dequeue(struct knote *kn); 81 82 static void filt_kqdetach(struct knote *kn); 83 static int filt_kqueue(struct knote *kn, long hint); 84 static int filt_procattach(struct knote *kn); 85 static void filt_procdetach(struct knote *kn); 86 static int filt_proc(struct knote *kn, long hint); 87 static int filt_fileattach(struct knote *kn); 88 89 static const struct filterops kqread_filtops = 90 { 1, NULL, filt_kqdetach, filt_kqueue }; 91 static const struct filterops proc_filtops = 92 { 0, filt_procattach, filt_procdetach, filt_proc }; 93 static const struct filterops file_filtops = 94 { 1, filt_fileattach, NULL, NULL }; 95 96 struct pool kqueue_pool; 97 struct pool knote_pool; 98 99 #define KNOTE_ACTIVATE(kn) \ 100 do { \ 101 kn->kn_status |= KN_ACTIVE; \ 102 if ((kn->kn_status & (KN_QUEUED | KN_DISABLED)) == 0) \ 103 knote_enqueue(kn); \ 104 } while(0) 105 106 #define KN_HASHSIZE 64 /* XXX should be tunable */ 107 #define KN_HASH(val, mask) (((val) ^ (val >> 8)) & (mask)) 108 109 extern const struct filterops sig_filtops; 110 111 /* 112 * Table for for all system-defined filters. 113 * These should be listed in the numeric order of the EVFILT_* defines. 114 * If filtops is NULL, the filter isn't implemented in NetBSD. 115 * End of list is when name is NULL. 116 */ 117 struct kfilter { 118 const char *name; /* name of filter */ 119 uint32_t filter; /* id of filter */ 120 const struct filterops *filtops;/* operations for filter */ 121 }; 122 123 /* System defined filters */ 124 static const struct kfilter sys_kfilters[] = { 125 { "EVFILT_READ", EVFILT_READ, &file_filtops }, 126 { "EVFILT_WRITE", EVFILT_WRITE, &file_filtops }, 127 { "EVFILT_AIO", EVFILT_AIO, NULL }, 128 { "EVFILT_VNODE", EVFILT_VNODE, &file_filtops }, 129 { "EVFILT_PROC", EVFILT_PROC, &proc_filtops }, 130 { "EVFILT_SIGNAL", EVFILT_SIGNAL, &sig_filtops }, 131 { NULL, 0, NULL }, /* end of list */ 132 }; 133 134 /* User defined kfilters */ 135 static struct kfilter *user_kfilters; /* array */ 136 static int user_kfilterc; /* current offset */ 137 static int user_kfiltermaxc; /* max size so far */ 138 139 /* 140 * kqueue_init: 141 * 142 * Initialize the kqueue/knote facility. 143 */ 144 void 145 kqueue_init(void) 146 { 147 148 pool_init(&kqueue_pool, sizeof(struct kqueue), 0, 0, 0, "kqueuepl", 149 NULL); 150 pool_init(&knote_pool, sizeof(struct knote), 0, 0, 0, "knotepl", 151 NULL); 152 } 153 154 /* 155 * Find kfilter entry by name, or NULL if not found. 156 */ 157 static const struct kfilter * 158 kfilter_byname_sys(const char *name) 159 { 160 int i; 161 162 for (i = 0; sys_kfilters[i].name != NULL; i++) { 163 if (strcmp(name, sys_kfilters[i].name) == 0) 164 return (&sys_kfilters[i]); 165 } 166 return (NULL); 167 } 168 169 static struct kfilter * 170 kfilter_byname_user(const char *name) 171 { 172 int i; 173 174 /* user_kfilters[] could be NULL if no filters were registered */ 175 if (!user_kfilters) 176 return (NULL); 177 178 for (i = 0; user_kfilters[i].name != NULL; i++) { 179 if (user_kfilters[i].name != '\0' && 180 strcmp(name, user_kfilters[i].name) == 0) 181 return (&user_kfilters[i]); 182 } 183 return (NULL); 184 } 185 186 static const struct kfilter * 187 kfilter_byname(const char *name) 188 { 189 const struct kfilter *kfilter; 190 191 if ((kfilter = kfilter_byname_sys(name)) != NULL) 192 return (kfilter); 193 194 return (kfilter_byname_user(name)); 195 } 196 197 /* 198 * Find kfilter entry by filter id, or NULL if not found. 199 * Assumes entries are indexed in filter id order, for speed. 200 */ 201 static const struct kfilter * 202 kfilter_byfilter(uint32_t filter) 203 { 204 const struct kfilter *kfilter; 205 206 if (filter < EVFILT_SYSCOUNT) /* it's a system filter */ 207 kfilter = &sys_kfilters[filter]; 208 else if (user_kfilters != NULL && 209 filter < EVFILT_SYSCOUNT + user_kfilterc) 210 /* it's a user filter */ 211 kfilter = &user_kfilters[filter - EVFILT_SYSCOUNT]; 212 else 213 return (NULL); /* out of range */ 214 KASSERT(kfilter->filter == filter); /* sanity check! */ 215 return (kfilter); 216 } 217 218 /* 219 * Register a new kfilter. Stores the entry in user_kfilters. 220 * Returns 0 if operation succeeded, or an appropriate errno(2) otherwise. 221 * If retfilter != NULL, the new filterid is returned in it. 222 */ 223 int 224 kfilter_register(const char *name, const struct filterops *filtops, 225 int *retfilter) 226 { 227 struct kfilter *kfilter; 228 void *space; 229 int len; 230 231 if (name == NULL || name[0] == '\0' || filtops == NULL) 232 return (EINVAL); /* invalid args */ 233 if (kfilter_byname(name) != NULL) 234 return (EEXIST); /* already exists */ 235 if (user_kfilterc > 0xffffffff - EVFILT_SYSCOUNT) 236 return (EINVAL); /* too many */ 237 238 /* check if need to grow user_kfilters */ 239 if (user_kfilterc + 1 > user_kfiltermaxc) { 240 /* 241 * Grow in KFILTER_EXTENT chunks. Use malloc(9), because we 242 * want to traverse user_kfilters as an array. 243 */ 244 user_kfiltermaxc += KFILTER_EXTENT; 245 kfilter = malloc(user_kfiltermaxc * sizeof(struct filter *), 246 M_KEVENT, M_WAITOK); 247 248 /* copy existing user_kfilters */ 249 if (user_kfilters != NULL) 250 memcpy((caddr_t)kfilter, (caddr_t)user_kfilters, 251 user_kfilterc * sizeof(struct kfilter *)); 252 /* zero new sections */ 253 memset((caddr_t)kfilter + 254 user_kfilterc * sizeof(struct kfilter *), 0, 255 (user_kfiltermaxc - user_kfilterc) * 256 sizeof(struct kfilter *)); 257 /* switch to new kfilter */ 258 if (user_kfilters != NULL) 259 free(user_kfilters, M_KEVENT); 260 user_kfilters = kfilter; 261 } 262 len = strlen(name) + 1; /* copy name */ 263 space = malloc(len, M_KEVENT, M_WAITOK); 264 memcpy(space, name, len); 265 user_kfilters[user_kfilterc].name = space; 266 267 user_kfilters[user_kfilterc].filter = user_kfilterc + EVFILT_SYSCOUNT; 268 269 len = sizeof(struct filterops); /* copy filtops */ 270 space = malloc(len, M_KEVENT, M_WAITOK); 271 memcpy(space, filtops, len); 272 user_kfilters[user_kfilterc].filtops = space; 273 274 if (retfilter != NULL) 275 *retfilter = user_kfilters[user_kfilterc].filter; 276 user_kfilterc++; /* finally, increment count */ 277 return (0); 278 } 279 280 /* 281 * Unregister a kfilter previously registered with kfilter_register. 282 * This retains the filter id, but clears the name and frees filtops (filter 283 * operations), so that the number isn't reused during a boot. 284 * Returns 0 if operation succeeded, or an appropriate errno(2) otherwise. 285 */ 286 int 287 kfilter_unregister(const char *name) 288 { 289 struct kfilter *kfilter; 290 291 if (name == NULL || name[0] == '\0') 292 return (EINVAL); /* invalid name */ 293 294 if (kfilter_byname_sys(name) != NULL) 295 return (EINVAL); /* can't detach system filters */ 296 297 kfilter = kfilter_byname_user(name); 298 if (kfilter == NULL) /* not found */ 299 return (ENOENT); 300 301 if (kfilter->name[0] != '\0') { 302 /* XXX Cast away const (but we know it's safe. */ 303 free((void *) kfilter->name, M_KEVENT); 304 kfilter->name = ""; /* mark as `not implemented' */ 305 } 306 if (kfilter->filtops != NULL) { 307 /* XXX Cast away const (but we know it's safe. */ 308 free((void *) kfilter->filtops, M_KEVENT); 309 kfilter->filtops = NULL; /* mark as `not implemented' */ 310 } 311 return (0); 312 } 313 314 315 /* 316 * Filter attach method for EVFILT_READ and EVFILT_WRITE on normal file 317 * descriptors. Calls struct fileops kqfilter method for given file descriptor. 318 */ 319 static int 320 filt_fileattach(struct knote *kn) 321 { 322 struct file *fp; 323 324 fp = kn->kn_fp; 325 return ((*fp->f_ops->fo_kqfilter)(fp, kn)); 326 } 327 328 /* 329 * Filter detach method for EVFILT_READ on kqueue descriptor. 330 */ 331 static void 332 filt_kqdetach(struct knote *kn) 333 { 334 struct kqueue *kq; 335 336 kq = (struct kqueue *)kn->kn_fp->f_data; 337 SLIST_REMOVE(&kq->kq_sel.sel_klist, kn, knote, kn_selnext); 338 } 339 340 /* 341 * Filter event method for EVFILT_READ on kqueue descriptor. 342 */ 343 /*ARGSUSED*/ 344 static int 345 filt_kqueue(struct knote *kn, long hint) 346 { 347 struct kqueue *kq; 348 349 kq = (struct kqueue *)kn->kn_fp->f_data; 350 kn->kn_data = kq->kq_count; 351 return (kn->kn_data > 0); 352 } 353 354 /* 355 * Filter attach method for EVFILT_PROC. 356 */ 357 static int 358 filt_procattach(struct knote *kn) 359 { 360 struct proc *p; 361 362 p = pfind(kn->kn_id); 363 if (p == NULL) 364 return (ESRCH); 365 366 /* 367 * Fail if it's not owned by you, or the last exec gave us 368 * setuid/setgid privs (unless you're root). 369 */ 370 if ((p->p_cred->p_ruid != curproc->p_cred->p_ruid || 371 (p->p_flag & P_SUGID)) 372 && suser(curproc->p_ucred, &curproc->p_acflag) != 0) 373 return (EACCES); 374 375 kn->kn_ptr.p_proc = p; 376 kn->kn_flags |= EV_CLEAR; /* automatically set */ 377 378 /* 379 * internal flag indicating registration done by kernel 380 */ 381 if (kn->kn_flags & EV_FLAG1) { 382 kn->kn_data = kn->kn_sdata; /* ppid */ 383 kn->kn_fflags = NOTE_CHILD; 384 kn->kn_flags &= ~EV_FLAG1; 385 } 386 387 /* XXXSMP lock the process? */ 388 SLIST_INSERT_HEAD(&p->p_klist, kn, kn_selnext); 389 390 return (0); 391 } 392 393 /* 394 * Filter detach method for EVFILT_PROC. 395 * 396 * The knote may be attached to a different process, which may exit, 397 * leaving nothing for the knote to be attached to. So when the process 398 * exits, the knote is marked as DETACHED and also flagged as ONESHOT so 399 * it will be deleted when read out. However, as part of the knote deletion, 400 * this routine is called, so a check is needed to avoid actually performing 401 * a detach, because the original process might not exist any more. 402 */ 403 static void 404 filt_procdetach(struct knote *kn) 405 { 406 struct proc *p; 407 408 if (kn->kn_status & KN_DETACHED) 409 return; 410 411 p = kn->kn_ptr.p_proc; 412 KASSERT(p->p_stat == SDEAD || pfind(kn->kn_id) == p); 413 414 /* XXXSMP lock the process? */ 415 SLIST_REMOVE(&p->p_klist, kn, knote, kn_selnext); 416 } 417 418 /* 419 * Filter event method for EVFILT_PROC. 420 */ 421 static int 422 filt_proc(struct knote *kn, long hint) 423 { 424 u_int event; 425 426 /* 427 * mask off extra data 428 */ 429 event = (u_int)hint & NOTE_PCTRLMASK; 430 431 /* 432 * if the user is interested in this event, record it. 433 */ 434 if (kn->kn_sfflags & event) 435 kn->kn_fflags |= event; 436 437 /* 438 * process is gone, so flag the event as finished. 439 */ 440 if (event == NOTE_EXIT) { 441 /* 442 * Detach the knote from watched process and mark 443 * it as such. We can't leave this to kqueue_scan(), 444 * since the process might not exist by then. And we 445 * have to do this now, since psignal KNOTE() is called 446 * also for zombies and we might end up reading freed 447 * memory if the kevent would already be picked up 448 * and knote g/c'ed. 449 */ 450 kn->kn_fop->f_detach(kn); 451 kn->kn_status |= KN_DETACHED; 452 453 /* Mark as ONESHOT, so that the knote it g/c'ed when read */ 454 kn->kn_flags |= (EV_EOF | EV_ONESHOT); 455 return (1); 456 } 457 458 /* 459 * process forked, and user wants to track the new process, 460 * so attach a new knote to it, and immediately report an 461 * event with the parent's pid. 462 */ 463 if ((event == NOTE_FORK) && (kn->kn_sfflags & NOTE_TRACK)) { 464 struct kevent kev; 465 int error; 466 467 /* 468 * register knote with new process. 469 */ 470 kev.ident = hint & NOTE_PDATAMASK; /* pid */ 471 kev.filter = kn->kn_filter; 472 kev.flags = kn->kn_flags | EV_ADD | EV_ENABLE | EV_FLAG1; 473 kev.fflags = kn->kn_sfflags; 474 kev.data = kn->kn_id; /* parent */ 475 kev.udata = kn->kn_kevent.udata; /* preserve udata */ 476 error = kqueue_register(kn->kn_kq, &kev, NULL); 477 if (error) 478 kn->kn_fflags |= NOTE_TRACKERR; 479 } 480 481 return (kn->kn_fflags != 0); 482 } 483 484 /* 485 * filt_seltrue: 486 * 487 * This filter "event" routine simulates seltrue(). 488 */ 489 int 490 filt_seltrue(struct knote *kn, long hint) 491 { 492 493 /* 494 * We don't know how much data can be read/written, 495 * but we know that it *can* be. This is about as 496 * good as select/poll does as well. 497 */ 498 kn->kn_data = 0; 499 return (1); 500 } 501 502 /* 503 * This provides full kqfilter entry for device switch tables, which 504 * has same effect as filter using filt_seltrue() as filter method. 505 */ 506 static void 507 filt_seltruedetach(struct knote *kn) 508 { 509 /* Nothing to do */ 510 } 511 512 static const struct filterops seltrue_filtops = 513 { 1, NULL, filt_seltruedetach, filt_seltrue }; 514 515 int 516 seltrue_kqfilter(dev_t dev, struct knote *kn) 517 { 518 switch (kn->kn_filter) { 519 case EVFILT_READ: 520 case EVFILT_WRITE: 521 kn->kn_fop = &seltrue_filtops; 522 break; 523 default: 524 return (1); 525 } 526 527 /* Nothing more to do */ 528 return (0); 529 } 530 531 /* 532 * kqueue(2) system call. 533 */ 534 int 535 sys_kqueue(struct proc *p, void *v, register_t *retval) 536 { 537 struct filedesc *fdp; 538 struct kqueue *kq; 539 struct file *fp; 540 int fd, error; 541 542 fdp = p->p_fd; 543 error = falloc(p, &fp, &fd); /* setup a new file descriptor */ 544 if (error) 545 return (error); 546 fp->f_flag = FREAD | FWRITE; 547 fp->f_type = DTYPE_KQUEUE; 548 fp->f_ops = &kqueueops; 549 kq = pool_get(&kqueue_pool, PR_WAITOK); 550 memset((char *)kq, 0, sizeof(struct kqueue)); 551 TAILQ_INIT(&kq->kq_head); 552 fp->f_data = (caddr_t)kq; /* store the kqueue with the fp */ 553 *retval = fd; 554 if (fdp->fd_knlistsize < 0) 555 fdp->fd_knlistsize = 0; /* this process has a kq */ 556 kq->kq_fdp = fdp; 557 FILE_SET_MATURE(fp); 558 FILE_UNUSE(fp, p); /* falloc() does FILE_USE() */ 559 return (error); 560 } 561 562 /* 563 * kevent(2) system call. 564 */ 565 int 566 sys_kevent(struct proc *p, void *v, register_t *retval) 567 { 568 struct sys_kevent_args /* { 569 syscallarg(int) fd; 570 syscallarg(const struct kevent *) changelist; 571 syscallarg(size_t) nchanges; 572 syscallarg(struct kevent *) eventlist; 573 syscallarg(size_t) nevents; 574 syscallarg(const struct timespec *) timeout; 575 } */ *uap = v; 576 struct kevent *kevp; 577 struct kqueue *kq; 578 struct file *fp; 579 struct timespec ts; 580 size_t i, n; 581 int nerrors, error; 582 583 /* check that we're dealing with a kq */ 584 fp = fd_getfile(p->p_fd, SCARG(uap, fd)); 585 if (!fp || fp->f_type != DTYPE_KQUEUE) 586 return (EBADF); 587 588 FILE_USE(fp); 589 590 if (SCARG(uap, timeout) != NULL) { 591 error = copyin(SCARG(uap, timeout), &ts, sizeof(ts)); 592 if (error) 593 goto done; 594 SCARG(uap, timeout) = &ts; 595 } 596 597 kq = (struct kqueue *)fp->f_data; 598 nerrors = 0; 599 600 /* traverse list of events to register */ 601 while (SCARG(uap, nchanges) > 0) { 602 /* copyin a maximum of KQ_EVENTS at each pass */ 603 n = MIN(SCARG(uap, nchanges), KQ_NEVENTS); 604 error = copyin(SCARG(uap, changelist), kq->kq_kev, 605 n * sizeof(struct kevent)); 606 if (error) 607 goto done; 608 for (i = 0; i < n; i++) { 609 kevp = &kq->kq_kev[i]; 610 kevp->flags &= ~EV_SYSFLAGS; 611 /* register each knote */ 612 error = kqueue_register(kq, kevp, p); 613 if (error) { 614 if (SCARG(uap, nevents) != 0) { 615 kevp->flags = EV_ERROR; 616 kevp->data = error; 617 error = copyout((caddr_t)kevp, 618 (caddr_t)SCARG(uap, eventlist), 619 sizeof(*kevp)); 620 if (error) 621 goto done; 622 SCARG(uap, eventlist)++; 623 SCARG(uap, nevents)--; 624 nerrors++; 625 } else { 626 goto done; 627 } 628 } 629 } 630 SCARG(uap, nchanges) -= n; /* update the results */ 631 SCARG(uap, changelist) += n; 632 } 633 if (nerrors) { 634 *retval = nerrors; 635 error = 0; 636 goto done; 637 } 638 639 /* actually scan through the events */ 640 error = kqueue_scan(fp, SCARG(uap, nevents), SCARG(uap, eventlist), 641 SCARG(uap, timeout), p, retval); 642 done: 643 FILE_UNUSE(fp, p); 644 return (error); 645 } 646 647 /* 648 * Register a given kevent kev onto the kqueue 649 */ 650 int 651 kqueue_register(struct kqueue *kq, struct kevent *kev, struct proc *p) 652 { 653 const struct kfilter *kfilter; 654 struct filedesc *fdp; 655 struct file *fp; 656 struct knote *kn; 657 int s, error; 658 659 fdp = kq->kq_fdp; 660 fp = NULL; 661 kn = NULL; 662 error = 0; 663 kfilter = kfilter_byfilter(kev->filter); 664 if (kfilter == NULL || kfilter->filtops == NULL) { 665 /* filter not found nor implemented */ 666 return (EINVAL); 667 } 668 669 /* search if knote already exists */ 670 if (kfilter->filtops->f_isfd) { 671 /* monitoring a file descriptor */ 672 if ((fp = fd_getfile(fdp, kev->ident)) == NULL) 673 return (EBADF); /* validate descriptor */ 674 FILE_USE(fp); 675 676 if (kev->ident < fdp->fd_knlistsize) { 677 SLIST_FOREACH(kn, &fdp->fd_knlist[kev->ident], kn_link) 678 if (kq == kn->kn_kq && 679 kev->filter == kn->kn_filter) 680 break; 681 } 682 } else { 683 /* 684 * not monitoring a file descriptor, so 685 * lookup knotes in internal hash table 686 */ 687 if (fdp->fd_knhashmask != 0) { 688 struct klist *list; 689 690 list = &fdp->fd_knhash[ 691 KN_HASH((u_long)kev->ident, fdp->fd_knhashmask)]; 692 SLIST_FOREACH(kn, list, kn_link) 693 if (kev->ident == kn->kn_id && 694 kq == kn->kn_kq && 695 kev->filter == kn->kn_filter) 696 break; 697 } 698 } 699 700 if (kn == NULL && ((kev->flags & EV_ADD) == 0)) { 701 error = ENOENT; /* filter not found */ 702 goto done; 703 } 704 705 /* 706 * kn now contains the matching knote, or NULL if no match 707 */ 708 if (kev->flags & EV_ADD) { 709 /* add knote */ 710 711 if (kn == NULL) { 712 /* create new knote */ 713 kn = pool_get(&knote_pool, PR_WAITOK); 714 if (kn == NULL) { 715 error = ENOMEM; 716 goto done; 717 } 718 kn->kn_fp = fp; 719 kn->kn_kq = kq; 720 kn->kn_fop = kfilter->filtops; 721 722 /* 723 * apply reference count to knote structure, and 724 * do not release it at the end of this routine. 725 */ 726 fp = NULL; 727 728 kn->kn_sfflags = kev->fflags; 729 kn->kn_sdata = kev->data; 730 kev->fflags = 0; 731 kev->data = 0; 732 kn->kn_kevent = *kev; 733 734 knote_attach(kn, fdp); 735 if ((error = kfilter->filtops->f_attach(kn)) != 0) { 736 knote_drop(kn, p, fdp); 737 goto done; 738 } 739 } else { 740 /* modify existing knote */ 741 742 /* 743 * The user may change some filter values after the 744 * initial EV_ADD, but doing so will not reset any 745 * filter which have already been triggered. 746 */ 747 kn->kn_sfflags = kev->fflags; 748 kn->kn_sdata = kev->data; 749 kn->kn_kevent.udata = kev->udata; 750 } 751 752 s = splhigh(); 753 if (kn->kn_fop->f_event(kn, 0)) 754 KNOTE_ACTIVATE(kn); 755 splx(s); 756 757 } else if (kev->flags & EV_DELETE) { /* delete knote */ 758 kn->kn_fop->f_detach(kn); 759 knote_drop(kn, p, fdp); 760 goto done; 761 } 762 763 /* disable knote */ 764 if ((kev->flags & EV_DISABLE) && 765 ((kn->kn_status & KN_DISABLED) == 0)) { 766 s = splhigh(); 767 kn->kn_status |= KN_DISABLED; 768 splx(s); 769 } 770 771 /* enable knote */ 772 if ((kev->flags & EV_ENABLE) && (kn->kn_status & KN_DISABLED)) { 773 s = splhigh(); 774 kn->kn_status &= ~KN_DISABLED; 775 if ((kn->kn_status & KN_ACTIVE) && 776 ((kn->kn_status & KN_QUEUED) == 0)) 777 knote_enqueue(kn); 778 splx(s); 779 } 780 781 done: 782 if (fp != NULL) 783 FILE_UNUSE(fp, p); 784 return (error); 785 } 786 787 /* 788 * Scan through the list of events on fp (for a maximum of maxevents), 789 * returning the results in to ulistp. Timeout is determined by tsp; if 790 * NULL, wait indefinitely, if 0 valued, perform a poll, otherwise wait 791 * as appropriate. 792 */ 793 static int 794 kqueue_scan(struct file *fp, size_t maxevents, struct kevent *ulistp, 795 const struct timespec *tsp, struct proc *p, register_t *retval) 796 { 797 struct kqueue *kq; 798 struct kevent *kevp; 799 struct timeval atv; 800 struct knote *kn, marker; 801 size_t count, nkev; 802 int s, timeout, error; 803 804 kq = (struct kqueue *)fp->f_data; 805 count = maxevents; 806 nkev = error = 0; 807 if (count == 0) 808 goto done; 809 810 if (tsp != NULL) { /* timeout supplied */ 811 TIMESPEC_TO_TIMEVAL(&atv, tsp); 812 if (itimerfix(&atv)) { 813 error = EINVAL; 814 goto done; 815 } 816 s = splclock(); 817 timeradd(&atv, &time, &atv); /* calc. time to wait until */ 818 splx(s); 819 if (tsp->tv_sec == 0 && tsp->tv_nsec < 1000 /*<1us*/) 820 timeout = -1; /* perform a poll */ 821 else 822 timeout = hzto(&atv); /* calculate hz till timeout */ 823 } else { 824 atv.tv_sec = 0; /* no timeout, wait forever */ 825 atv.tv_usec = 0; 826 timeout = 0; 827 } 828 goto start; 829 830 retry: 831 if (atv.tv_sec || atv.tv_usec) { /* timeout requested */ 832 s = splclock(); 833 if (timercmp(&time, &atv, >=)) { 834 splx(s); 835 goto done; /* timeout reached */ 836 } 837 splx(s); 838 timeout = hzto(&atv); /* recalc. timeout remaining */ 839 } 840 841 start: 842 kevp = kq->kq_kev; 843 s = splhigh(); 844 if (kq->kq_count == 0) { 845 if (timeout < 0) { 846 error = EWOULDBLOCK; 847 } else { 848 kq->kq_state |= KQ_SLEEP; 849 error = tsleep(kq, PSOCK | PCATCH, "kqread", timeout); 850 } 851 splx(s); 852 if (error == 0) 853 goto retry; 854 /* don't restart after signals... */ 855 if (error == ERESTART) 856 error = EINTR; 857 else if (error == EWOULDBLOCK) 858 error = 0; 859 goto done; 860 } 861 862 /* mark end of knote list */ 863 TAILQ_INSERT_TAIL(&kq->kq_head, &marker, kn_tqe); 864 865 while (count) { /* while user wants data ... */ 866 kn = TAILQ_FIRST(&kq->kq_head); /* get next knote */ 867 TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe); 868 if (kn == &marker) { /* if it's our marker, stop */ 869 splx(s); 870 if (count == maxevents) 871 goto retry; 872 goto done; 873 } 874 if (kn->kn_status & KN_DISABLED) { 875 /* don't want disabled events */ 876 kn->kn_status &= ~KN_QUEUED; 877 kq->kq_count--; 878 continue; 879 } 880 if ((kn->kn_flags & EV_ONESHOT) == 0 && 881 kn->kn_fop->f_event(kn, 0) == 0) { 882 /* 883 * non-ONESHOT event that hasn't 884 * triggered again, so de-queue. 885 */ 886 kn->kn_status &= ~(KN_QUEUED | KN_ACTIVE); 887 kq->kq_count--; 888 continue; 889 } 890 *kevp = kn->kn_kevent; 891 kevp++; 892 nkev++; 893 if (kn->kn_flags & EV_ONESHOT) { 894 /* delete ONESHOT events after retrieval */ 895 kn->kn_status &= ~KN_QUEUED; 896 kq->kq_count--; 897 splx(s); 898 kn->kn_fop->f_detach(kn); 899 knote_drop(kn, p, p->p_fd); 900 s = splhigh(); 901 } else if (kn->kn_flags & EV_CLEAR) { 902 /* clear state after retrieval */ 903 kn->kn_data = 0; 904 kn->kn_fflags = 0; 905 kn->kn_status &= ~(KN_QUEUED | KN_ACTIVE); 906 kq->kq_count--; 907 } else { 908 /* add event back on list */ 909 TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe); 910 } 911 count--; 912 if (nkev == KQ_NEVENTS) { 913 /* do copyouts in KQ_NEVENTS chunks */ 914 splx(s); 915 error = copyout((caddr_t)&kq->kq_kev, (caddr_t)ulistp, 916 sizeof(struct kevent) * nkev); 917 ulistp += nkev; 918 nkev = 0; 919 kevp = kq->kq_kev; 920 s = splhigh(); 921 if (error) 922 break; 923 } 924 } 925 926 /* remove marker */ 927 TAILQ_REMOVE(&kq->kq_head, &marker, kn_tqe); 928 splx(s); 929 done: 930 if (nkev != 0) { 931 /* copyout remaining events */ 932 error = copyout((caddr_t)&kq->kq_kev, (caddr_t)ulistp, 933 sizeof(struct kevent) * nkev); 934 } 935 *retval = maxevents - count; 936 937 return (error); 938 } 939 940 /* 941 * struct fileops read method for a kqueue descriptor. 942 * Not implemented. 943 * XXX: This could be expanded to call kqueue_scan, if desired. 944 */ 945 /*ARGSUSED*/ 946 static int 947 kqueue_read(struct file *fp, off_t *offset, struct uio *uio, 948 struct ucred *cred, int flags) 949 { 950 951 return (ENXIO); 952 } 953 954 /* 955 * struct fileops write method for a kqueue descriptor. 956 * Not implemented. 957 */ 958 /*ARGSUSED*/ 959 static int 960 kqueue_write(struct file *fp, off_t *offset, struct uio *uio, 961 struct ucred *cred, int flags) 962 { 963 964 return (ENXIO); 965 } 966 967 /* 968 * struct fileops ioctl method for a kqueue descriptor. 969 * 970 * Two ioctls are currently supported. They both use struct kfilter_mapping: 971 * KFILTER_BYNAME find name for filter, and return result in 972 * name, which is of size len. 973 * KFILTER_BYFILTER find filter for name. len is ignored. 974 */ 975 /*ARGSUSED*/ 976 static int 977 kqueue_ioctl(struct file *fp, u_long com, caddr_t data, struct proc *p) 978 { 979 struct kfilter_mapping *km; 980 const struct kfilter *kfilter; 981 char *name; 982 int error; 983 984 km = (struct kfilter_mapping *)data; 985 error = 0; 986 987 switch (com) { 988 case KFILTER_BYFILTER: /* convert filter -> name */ 989 kfilter = kfilter_byfilter(km->filter); 990 if (kfilter != NULL) 991 error = copyoutstr(kfilter->name, km->name, km->len, 992 NULL); 993 else 994 error = ENOENT; 995 break; 996 997 case KFILTER_BYNAME: /* convert name -> filter */ 998 MALLOC(name, char *, KFILTER_MAXNAME, M_KEVENT, M_WAITOK); 999 error = copyinstr(km->name, name, KFILTER_MAXNAME, NULL); 1000 if (error) { 1001 FREE(name, M_KEVENT); 1002 break; 1003 } 1004 kfilter = kfilter_byname(name); 1005 if (kfilter != NULL) 1006 km->filter = kfilter->filter; 1007 else 1008 error = ENOENT; 1009 FREE(name, M_KEVENT); 1010 break; 1011 1012 default: 1013 error = ENOTTY; 1014 1015 } 1016 return (error); 1017 } 1018 1019 /* 1020 * struct fileops fcntl method for a kqueue descriptor. 1021 * Not implemented. 1022 */ 1023 /*ARGSUSED*/ 1024 static int 1025 kqueue_fcntl(struct file *fp, u_int com, caddr_t data, struct proc *p) 1026 { 1027 1028 return (ENOTTY); 1029 } 1030 1031 /* 1032 * struct fileops poll method for a kqueue descriptor. 1033 * Determine if kqueue has events pending. 1034 */ 1035 static int 1036 kqueue_poll(struct file *fp, int events, struct proc *p) 1037 { 1038 struct kqueue *kq; 1039 int revents; 1040 1041 kq = (struct kqueue *)fp->f_data; 1042 revents = 0; 1043 if (events & (POLLIN | POLLRDNORM)) { 1044 if (kq->kq_count) { 1045 revents |= events & (POLLIN | POLLRDNORM); 1046 } else { 1047 selrecord(p, &kq->kq_sel); 1048 } 1049 } 1050 return (revents); 1051 } 1052 1053 /* 1054 * struct fileops stat method for a kqueue descriptor. 1055 * Returns dummy info, with st_size being number of events pending. 1056 */ 1057 static int 1058 kqueue_stat(struct file *fp, struct stat *st, struct proc *p) 1059 { 1060 struct kqueue *kq; 1061 1062 kq = (struct kqueue *)fp->f_data; 1063 memset((void *)st, 0, sizeof(*st)); 1064 st->st_size = kq->kq_count; 1065 st->st_blksize = sizeof(struct kevent); 1066 st->st_mode = S_IFIFO; 1067 return (0); 1068 } 1069 1070 /* 1071 * struct fileops close method for a kqueue descriptor. 1072 * Cleans up kqueue. 1073 */ 1074 static int 1075 kqueue_close(struct file *fp, struct proc *p) 1076 { 1077 struct kqueue *kq; 1078 struct filedesc *fdp; 1079 struct knote **knp, *kn, *kn0; 1080 int i; 1081 1082 kq = (struct kqueue *)fp->f_data; 1083 fdp = p->p_fd; 1084 for (i = 0; i < fdp->fd_knlistsize; i++) { 1085 knp = &SLIST_FIRST(&fdp->fd_knlist[i]); 1086 kn = *knp; 1087 while (kn != NULL) { 1088 kn0 = SLIST_NEXT(kn, kn_link); 1089 if (kq == kn->kn_kq) { 1090 kn->kn_fop->f_detach(kn); 1091 FILE_UNUSE(kn->kn_fp, p); 1092 pool_put(&knote_pool, kn); 1093 *knp = kn0; 1094 } else { 1095 knp = &SLIST_NEXT(kn, kn_link); 1096 } 1097 kn = kn0; 1098 } 1099 } 1100 if (fdp->fd_knhashmask != 0) { 1101 for (i = 0; i < fdp->fd_knhashmask + 1; i++) { 1102 knp = &SLIST_FIRST(&fdp->fd_knhash[i]); 1103 kn = *knp; 1104 while (kn != NULL) { 1105 kn0 = SLIST_NEXT(kn, kn_link); 1106 if (kq == kn->kn_kq) { 1107 kn->kn_fop->f_detach(kn); 1108 /* XXX non-fd release of kn->kn_ptr */ 1109 pool_put(&knote_pool, kn); 1110 *knp = kn0; 1111 } else { 1112 knp = &SLIST_NEXT(kn, kn_link); 1113 } 1114 kn = kn0; 1115 } 1116 } 1117 } 1118 pool_put(&kqueue_pool, kq); 1119 fp->f_data = NULL; 1120 1121 return (0); 1122 } 1123 1124 /* 1125 * wakeup a kqueue 1126 */ 1127 static void 1128 kqueue_wakeup(struct kqueue *kq) 1129 { 1130 1131 if (kq->kq_state & KQ_SLEEP) { /* if currently sleeping ... */ 1132 kq->kq_state &= ~KQ_SLEEP; 1133 wakeup(kq); /* ... wakeup */ 1134 } 1135 1136 /* Notify select/poll and kevent. */ 1137 selnotify(&kq->kq_sel, 0); 1138 } 1139 1140 /* 1141 * struct fileops kqfilter method for a kqueue descriptor. 1142 * Event triggered when monitored kqueue changes. 1143 */ 1144 /*ARGSUSED*/ 1145 static int 1146 kqueue_kqfilter(struct file *fp, struct knote *kn) 1147 { 1148 struct kqueue *kq; 1149 1150 KASSERT(fp == kn->kn_fp); 1151 kq = (struct kqueue *)kn->kn_fp->f_data; 1152 if (kn->kn_filter != EVFILT_READ) 1153 return (1); 1154 kn->kn_fop = &kqread_filtops; 1155 SLIST_INSERT_HEAD(&kq->kq_sel.sel_klist, kn, kn_selnext); 1156 return (0); 1157 } 1158 1159 1160 /* 1161 * Walk down a list of knotes, activating them if their event has triggered. 1162 */ 1163 void 1164 knote(struct klist *list, long hint) 1165 { 1166 struct knote *kn; 1167 1168 SLIST_FOREACH(kn, list, kn_selnext) 1169 if (kn->kn_fop->f_event(kn, hint)) 1170 KNOTE_ACTIVATE(kn); 1171 } 1172 1173 /* 1174 * Remove all knotes from a specified klist 1175 */ 1176 void 1177 knote_remove(struct proc *p, struct klist *list) 1178 { 1179 struct knote *kn; 1180 1181 while ((kn = SLIST_FIRST(list)) != NULL) { 1182 kn->kn_fop->f_detach(kn); 1183 knote_drop(kn, p, p->p_fd); 1184 } 1185 } 1186 1187 /* 1188 * Remove all knotes referencing a specified fd 1189 */ 1190 void 1191 knote_fdclose(struct proc *p, int fd) 1192 { 1193 struct filedesc *fdp; 1194 struct klist *list; 1195 1196 fdp = p->p_fd; 1197 list = &fdp->fd_knlist[fd]; 1198 knote_remove(p, list); 1199 } 1200 1201 /* 1202 * Attach a new knote to a file descriptor 1203 */ 1204 static void 1205 knote_attach(struct knote *kn, struct filedesc *fdp) 1206 { 1207 struct klist *list; 1208 int size; 1209 1210 if (! kn->kn_fop->f_isfd) { 1211 /* if knote is not on an fd, store on internal hash table */ 1212 if (fdp->fd_knhashmask == 0) 1213 fdp->fd_knhash = hashinit(KN_HASHSIZE, HASH_LIST, 1214 M_KEVENT, M_WAITOK, &fdp->fd_knhashmask); 1215 list = &fdp->fd_knhash[KN_HASH(kn->kn_id, fdp->fd_knhashmask)]; 1216 goto done; 1217 } 1218 1219 /* 1220 * otherwise, knote is on an fd. 1221 * knotes are stored in fd_knlist indexed by kn->kn_id. 1222 */ 1223 if (fdp->fd_knlistsize <= kn->kn_id) { 1224 /* expand list, it's too small */ 1225 size = fdp->fd_knlistsize; 1226 while (size <= kn->kn_id) { 1227 /* grow in KQ_EXTENT chunks */ 1228 size += KQ_EXTENT; 1229 } 1230 list = malloc(size * sizeof(struct klist *), M_KEVENT,M_WAITOK); 1231 if (fdp->fd_knlist) { 1232 /* copy existing knlist */ 1233 memcpy((caddr_t)list, (caddr_t)fdp->fd_knlist, 1234 fdp->fd_knlistsize * sizeof(struct klist *)); 1235 } 1236 /* 1237 * Zero new memory. Stylistically, SLIST_INIT() should be 1238 * used here, but that does same thing as the memset() anyway. 1239 */ 1240 memset(&list[fdp->fd_knlistsize], 0, 1241 (size - fdp->fd_knlistsize) * sizeof(struct klist *)); 1242 1243 /* switch to new knlist */ 1244 if (fdp->fd_knlist != NULL) 1245 free(fdp->fd_knlist, M_KEVENT); 1246 fdp->fd_knlistsize = size; 1247 fdp->fd_knlist = list; 1248 } 1249 1250 /* get list head for this fd */ 1251 list = &fdp->fd_knlist[kn->kn_id]; 1252 done: 1253 /* add new knote */ 1254 SLIST_INSERT_HEAD(list, kn, kn_link); 1255 kn->kn_status = 0; 1256 } 1257 1258 /* 1259 * Drop knote. 1260 * Should be called at spl == 0, since we don't want to hold spl 1261 * while calling FILE_UNUSE and free. 1262 */ 1263 static void 1264 knote_drop(struct knote *kn, struct proc *p, struct filedesc *fdp) 1265 { 1266 struct klist *list; 1267 1268 if (kn->kn_fop->f_isfd) 1269 list = &fdp->fd_knlist[kn->kn_id]; 1270 else 1271 list = &fdp->fd_knhash[KN_HASH(kn->kn_id, fdp->fd_knhashmask)]; 1272 1273 SLIST_REMOVE(list, kn, knote, kn_link); 1274 if (kn->kn_status & KN_QUEUED) 1275 knote_dequeue(kn); 1276 if (kn->kn_fop->f_isfd) 1277 FILE_UNUSE(kn->kn_fp, p); 1278 pool_put(&knote_pool, kn); 1279 } 1280 1281 1282 /* 1283 * Queue new event for knote. 1284 */ 1285 static void 1286 knote_enqueue(struct knote *kn) 1287 { 1288 struct kqueue *kq; 1289 int s; 1290 1291 kq = kn->kn_kq; 1292 s = splhigh(); 1293 KASSERT((kn->kn_status & KN_QUEUED) == 0); 1294 1295 TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe); 1296 kn->kn_status |= KN_QUEUED; 1297 kq->kq_count++; 1298 splx(s); 1299 kqueue_wakeup(kq); 1300 } 1301 1302 /* 1303 * Dequeue event for knote. 1304 */ 1305 static void 1306 knote_dequeue(struct knote *kn) 1307 { 1308 struct kqueue *kq; 1309 int s; 1310 1311 kq = kn->kn_kq; 1312 s = splhigh(); 1313 KASSERT(kn->kn_status & KN_QUEUED); 1314 1315 TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe); 1316 kn->kn_status &= ~KN_QUEUED; 1317 kq->kq_count--; 1318 splx(s); 1319 } 1320