1*37fbd8dfSbluhm /* $OpenBSD: kqueue.c,v 1.33 2014/10/18 21:56:44 bluhm Exp $ */ 234fc9cdeSmickey 3fd332320Sprovos /* 4fd332320Sprovos * Copyright 2000-2002 Niels Provos <provos@citi.umich.edu> 5fd332320Sprovos * All rights reserved. 6fd332320Sprovos * 7fd332320Sprovos * Redistribution and use in source and binary forms, with or without 8fd332320Sprovos * modification, are permitted provided that the following conditions 9fd332320Sprovos * are met: 10fd332320Sprovos * 1. Redistributions of source code must retain the above copyright 11fd332320Sprovos * notice, this list of conditions and the following disclaimer. 12fd332320Sprovos * 2. Redistributions in binary form must reproduce the above copyright 13fd332320Sprovos * notice, this list of conditions and the following disclaimer in the 14fd332320Sprovos * documentation and/or other materials provided with the distribution. 15ff9272daSbrad * 3. The name of the author may not be used to endorse or promote products 16fd332320Sprovos * derived from this software without specific prior written permission. 17fd332320Sprovos * 18fd332320Sprovos * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 19fd332320Sprovos * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 20fd332320Sprovos * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 21fd332320Sprovos * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 22fd332320Sprovos * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 23fd332320Sprovos * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24fd332320Sprovos * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25fd332320Sprovos * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26fd332320Sprovos * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 27fd332320Sprovos * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28fd332320Sprovos */ 29fd332320Sprovos 30fd332320Sprovos #include <sys/types.h> 31fd332320Sprovos #include <sys/time.h> 32fd332320Sprovos #include <sys/queue.h> 33fd332320Sprovos #include <sys/event.h> 34defc4074Sbluhm 35fd332320Sprovos #include <signal.h> 36fd332320Sprovos #include <stdio.h> 37fd332320Sprovos #include <stdlib.h> 38ff33a3f0Sderaadt #include <string.h> 39fd332320Sprovos #include <unistd.h> 40fd332320Sprovos #include <errno.h> 418ead113eSnicm #include <assert.h> 421770acb2Smarkus #include <inttypes.h> 43fd332320Sprovos 44fd332320Sprovos #include "event.h" 458ead113eSnicm #include "event-internal.h" 464643be29Sbrad #include "log.h" 47e175bf56Snicm #include "evsignal.h" 48fd332320Sprovos 49fd332320Sprovos #define EVLIST_X_KQINKERNEL 0x1000 50fd332320Sprovos 51fd332320Sprovos #define NEVENT 64 52fd332320Sprovos 53fd332320Sprovos struct kqop { 54fd332320Sprovos struct kevent *changes; 55fd332320Sprovos int nchanges; 56fd332320Sprovos struct kevent *events; 578ead113eSnicm struct event_list evsigevents[NSIG]; 58fd332320Sprovos int nevents; 59fd332320Sprovos int kq; 608ead113eSnicm pid_t pid; 614643be29Sbrad }; 62fd332320Sprovos 638ead113eSnicm static void *kq_init (struct event_base *); 648ead113eSnicm static int kq_add (void *, struct event *); 658ead113eSnicm static int kq_del (void *, struct event *); 668ead113eSnicm static int kq_dispatch (struct event_base *, void *, struct timeval *); 678ead113eSnicm static int kq_insert (struct kqop *, struct kevent *); 688ead113eSnicm static void kq_dealloc (struct event_base *, void *); 69fd332320Sprovos 70759b8817Smickey const struct eventop kqops = { 71fd332320Sprovos "kqueue", 72fd332320Sprovos kq_init, 73fd332320Sprovos kq_add, 74fd332320Sprovos kq_del, 753ac1ba99Sbrad kq_dispatch, 768ead113eSnicm kq_dealloc, 778ead113eSnicm 1 /* need reinit */ 78fd332320Sprovos }; 79fd332320Sprovos 808ead113eSnicm static void * 81bdce580dSbrad kq_init(struct event_base *base) 82fd332320Sprovos { 838ead113eSnicm int i, kq; 844643be29Sbrad struct kqop *kqueueop; 85fd332320Sprovos 86fd332320Sprovos /* Disable kqueue when this environment variable is set */ 878ead113eSnicm if (evutil_getenv("EVENT_NOKQUEUE")) 88fd332320Sprovos return (NULL); 89fd332320Sprovos 904643be29Sbrad if (!(kqueueop = calloc(1, sizeof(struct kqop)))) 914643be29Sbrad return (NULL); 92fd332320Sprovos 93ff9272daSbrad /* Initalize the kernel queue */ 94fd332320Sprovos 95fd332320Sprovos if ((kq = kqueue()) == -1) { 964643be29Sbrad event_warn("kqueue"); 974643be29Sbrad free (kqueueop); 98fd332320Sprovos return (NULL); 99fd332320Sprovos } 100fd332320Sprovos 1014643be29Sbrad kqueueop->kq = kq; 102fd332320Sprovos 1038ead113eSnicm kqueueop->pid = getpid(); 1048ead113eSnicm 105ff9272daSbrad /* Initalize fields */ 1061ed98fdfSderaadt kqueueop->changes = calloc(NEVENT, sizeof(struct kevent)); 1074643be29Sbrad if (kqueueop->changes == NULL) { 1084643be29Sbrad free (kqueueop); 109fd332320Sprovos return (NULL); 110fd332320Sprovos } 1111ed98fdfSderaadt kqueueop->events = calloc(NEVENT, sizeof(struct kevent)); 1124643be29Sbrad if (kqueueop->events == NULL) { 1134643be29Sbrad free (kqueueop->changes); 1144643be29Sbrad free (kqueueop); 1154643be29Sbrad return (NULL); 1164643be29Sbrad } 1174643be29Sbrad kqueueop->nevents = NEVENT; 118fd332320Sprovos 1198ead113eSnicm /* we need to keep track of multiple events per signal */ 1208ead113eSnicm for (i = 0; i < NSIG; ++i) { 1218ead113eSnicm TAILQ_INIT(&kqueueop->evsigevents[i]); 1228ead113eSnicm } 1238ead113eSnicm 124348ce57bSbrad /* Check for Mac OS X kqueue bug. */ 125348ce57bSbrad kqueueop->changes[0].ident = -1; 126348ce57bSbrad kqueueop->changes[0].filter = EVFILT_READ; 127348ce57bSbrad kqueueop->changes[0].flags = EV_ADD; 128348ce57bSbrad /* 129348ce57bSbrad * If kqueue works, then kevent will succeed, and it will 130348ce57bSbrad * stick an error in events[0]. If kqueue is broken, then 131348ce57bSbrad * kevent will fail. 132348ce57bSbrad */ 133348ce57bSbrad if (kevent(kq, 134348ce57bSbrad kqueueop->changes, 1, kqueueop->events, NEVENT, NULL) != 1 || 135348ce57bSbrad kqueueop->events[0].ident != -1 || 136348ce57bSbrad kqueueop->events[0].flags != EV_ERROR) { 137348ce57bSbrad event_warn("%s: detected broken kqueue; not using.", __func__); 138348ce57bSbrad free(kqueueop->changes); 139348ce57bSbrad free(kqueueop->events); 140348ce57bSbrad free(kqueueop); 141348ce57bSbrad close(kq); 142348ce57bSbrad return (NULL); 143348ce57bSbrad } 144348ce57bSbrad 1454643be29Sbrad return (kqueueop); 146fd332320Sprovos } 147fd332320Sprovos 1488ead113eSnicm static int 149fd332320Sprovos kq_insert(struct kqop *kqop, struct kevent *kev) 150fd332320Sprovos { 151fd332320Sprovos int nevents = kqop->nevents; 152fd332320Sprovos 153fd332320Sprovos if (kqop->nchanges == nevents) { 154fd332320Sprovos struct kevent *newchange; 155fd332320Sprovos struct kevent *newresult; 156fd332320Sprovos 157fd332320Sprovos nevents *= 2; 158fd332320Sprovos 1592af34b00Sderaadt newchange = reallocarray(kqop->changes, 1602af34b00Sderaadt nevents, sizeof(struct kevent)); 161fd332320Sprovos if (newchange == NULL) { 1624643be29Sbrad event_warn("%s: malloc", __func__); 163fd332320Sprovos return (-1); 164fd332320Sprovos } 165fd332320Sprovos kqop->changes = newchange; 166fd332320Sprovos 1672af34b00Sderaadt newresult = reallocarray(kqop->events, 1682af34b00Sderaadt nevents, sizeof(struct kevent)); 169fd332320Sprovos 170fd332320Sprovos /* 171fd332320Sprovos * If we fail, we don't have to worry about freeing, 172fd332320Sprovos * the next realloc will pick it up. 173fd332320Sprovos */ 174fd332320Sprovos if (newresult == NULL) { 1754643be29Sbrad event_warn("%s: malloc", __func__); 176fd332320Sprovos return (-1); 177fd332320Sprovos } 178ff9272daSbrad kqop->events = newresult; 179fd332320Sprovos 180fd332320Sprovos kqop->nevents = nevents; 181fd332320Sprovos } 182fd332320Sprovos 183fd332320Sprovos memcpy(&kqop->changes[kqop->nchanges++], kev, sizeof(struct kevent)); 184fd332320Sprovos 1854643be29Sbrad event_debug(("%s: fd %d %s%s", 1868ead113eSnicm __func__, (int)kev->ident, 187fd332320Sprovos kev->filter == EVFILT_READ ? "EVFILT_READ" : "EVFILT_WRITE", 188fd332320Sprovos kev->flags == EV_DELETE ? " (del)" : "")); 189fd332320Sprovos 190fd332320Sprovos return (0); 191fd332320Sprovos } 192fd332320Sprovos 193fd332320Sprovos static void 194fd332320Sprovos kq_sighandler(int sig) 195fd332320Sprovos { 196fd332320Sprovos /* Do nothing here */ 197fd332320Sprovos } 198fd332320Sprovos 1998ead113eSnicm static int 2004643be29Sbrad kq_dispatch(struct event_base *base, void *arg, struct timeval *tv) 201fd332320Sprovos { 202fd332320Sprovos struct kqop *kqop = arg; 203fd332320Sprovos struct kevent *changes = kqop->changes; 204fd332320Sprovos struct kevent *events = kqop->events; 205fd332320Sprovos struct event *ev; 206bdce580dSbrad struct timespec ts, *ts_p = NULL; 207fd332320Sprovos int i, res; 208fd332320Sprovos 209bdce580dSbrad if (tv != NULL) { 210fd332320Sprovos TIMEVAL_TO_TIMESPEC(tv, &ts); 211bdce580dSbrad ts_p = &ts; 212bdce580dSbrad } 213fd332320Sprovos 214fd332320Sprovos res = kevent(kqop->kq, changes, kqop->nchanges, 215bdce580dSbrad events, kqop->nevents, ts_p); 216fd332320Sprovos kqop->nchanges = 0; 217fd332320Sprovos if (res == -1) { 218fd332320Sprovos if (errno != EINTR) { 2194643be29Sbrad event_warn("kevent"); 220fd332320Sprovos return (-1); 221fd332320Sprovos } 222fd332320Sprovos 223fd332320Sprovos return (0); 224fd332320Sprovos } 225fd332320Sprovos 2264643be29Sbrad event_debug(("%s: kevent reports %d", __func__, res)); 227fd332320Sprovos 228fd332320Sprovos for (i = 0; i < res; i++) { 229fd332320Sprovos int which = 0; 230fd332320Sprovos 231fd332320Sprovos if (events[i].flags & EV_ERROR) { 23210649951Snicm switch (events[i].data) { 23310649951Snicm 23410649951Snicm /* Can occur on delete if we are not currently 23510649951Snicm * watching any events on this fd. That can 23610649951Snicm * happen when the fd was closed and another 23710649951Snicm * file was opened with that fd. */ 23810649951Snicm case ENOENT: 23910649951Snicm /* Can occur for reasons not fully understood 24010649951Snicm * on FreeBSD. */ 24110649951Snicm case EINVAL: 242fd332320Sprovos continue; 24310649951Snicm /* Can occur on a delete if the fd is closed. Can 24410649951Snicm * occur on an add if the fd was one side of a pipe, 24510649951Snicm * and the other side was closed. */ 24610649951Snicm case EBADF: 2471db6aa6eSnicm continue; 24810649951Snicm /* These two can occur on an add if the fd was one side 24910649951Snicm * of a pipe, and the other side was closed. */ 25010649951Snicm case EPERM: 25110649951Snicm case EPIPE: 25210649951Snicm /* Report read events, if we're listening for 25310649951Snicm * them, so that the user can learn about any 25410649951Snicm * add errors. (If the operation was a 25510649951Snicm * delete, then udata should be cleared.) */ 25610649951Snicm if (events[i].udata) { 25710649951Snicm /* The operation was an add: 25810649951Snicm * report the error as a read. */ 25910649951Snicm which |= EV_READ; 26010649951Snicm break; 26110649951Snicm } else { 26210649951Snicm /* The operation was a del: 26310649951Snicm * report nothing. */ 26410649951Snicm continue; 26510649951Snicm } 26610649951Snicm 26710649951Snicm /* Other errors shouldn't occur. */ 26810649951Snicm default: 2691085edd8Sbrad errno = events[i].data; 270fd332320Sprovos return (-1); 271fd332320Sprovos } 27210649951Snicm } else if (events[i].filter == EVFILT_READ) { 273fd332320Sprovos which |= EV_READ; 274fd332320Sprovos } else if (events[i].filter == EVFILT_WRITE) { 275fd332320Sprovos which |= EV_WRITE; 276fd332320Sprovos } else if (events[i].filter == EVFILT_SIGNAL) { 277fd332320Sprovos which |= EV_SIGNAL; 2781770acb2Smarkus } 279fd332320Sprovos 280fd332320Sprovos if (!which) 281fd332320Sprovos continue; 282fd332320Sprovos 2838ead113eSnicm if (events[i].filter == EVFILT_SIGNAL) { 2848ead113eSnicm struct event_list *head = 2858ead113eSnicm (struct event_list *)events[i].udata; 2868ead113eSnicm TAILQ_FOREACH(ev, head, ev_signal_next) { 2878ead113eSnicm event_active(ev, which, events[i].data); 2888ead113eSnicm } 2898ead113eSnicm } else { 2908ead113eSnicm ev = (struct event *)events[i].udata; 291fd332320Sprovos 2928ead113eSnicm if (!(ev->ev_events & EV_PERSIST)) 2938ead113eSnicm ev->ev_flags &= ~EVLIST_X_KQINKERNEL; 2948ead113eSnicm 2958ead113eSnicm event_active(ev, which, 1); 2968ead113eSnicm } 297fd332320Sprovos } 298fd332320Sprovos 299fd332320Sprovos return (0); 300fd332320Sprovos } 301fd332320Sprovos 302fd332320Sprovos 3038ead113eSnicm static int 304fd332320Sprovos kq_add(void *arg, struct event *ev) 305fd332320Sprovos { 306fd332320Sprovos struct kqop *kqop = arg; 307fd332320Sprovos struct kevent kev; 308fd332320Sprovos 309fd332320Sprovos if (ev->ev_events & EV_SIGNAL) { 310fd332320Sprovos int nsignal = EVENT_SIGNAL(ev); 311fd332320Sprovos 3128ead113eSnicm assert(nsignal >= 0 && nsignal < NSIG); 3138ead113eSnicm if (TAILQ_EMPTY(&kqop->evsigevents[nsignal])) { 3148ead113eSnicm struct timespec timeout = { 0, 0 }; 3158ead113eSnicm 316fd332320Sprovos memset(&kev, 0, sizeof(kev)); 317fd332320Sprovos kev.ident = nsignal; 318fd332320Sprovos kev.filter = EVFILT_SIGNAL; 319fd332320Sprovos kev.flags = EV_ADD; 320*37fbd8dfSbluhm kev.udata = &kqop->evsigevents[nsignal]; 321fd332320Sprovos 3228ead113eSnicm /* Be ready for the signal if it is sent any 3238ead113eSnicm * time between now and the next call to 3248ead113eSnicm * kq_dispatch. */ 3258ead113eSnicm if (kevent(kqop->kq, &kev, 1, NULL, 0, &timeout) == -1) 326fd332320Sprovos return (-1); 327fd332320Sprovos 3288ead113eSnicm if (_evsignal_set_handler(ev->ev_base, nsignal, 3298ead113eSnicm kq_sighandler) == -1) 330fd332320Sprovos return (-1); 3318ead113eSnicm } 332fd332320Sprovos 3338ead113eSnicm TAILQ_INSERT_TAIL(&kqop->evsigevents[nsignal], ev, 3348ead113eSnicm ev_signal_next); 335fd332320Sprovos ev->ev_flags |= EVLIST_X_KQINKERNEL; 336fd332320Sprovos return (0); 337fd332320Sprovos } 338fd332320Sprovos 339fd332320Sprovos if (ev->ev_events & EV_READ) { 340fd332320Sprovos memset(&kev, 0, sizeof(kev)); 341fd332320Sprovos kev.ident = ev->ev_fd; 342fd332320Sprovos kev.filter = EVFILT_READ; 343c74e3f8eSmarkus /* Make it behave like select() and poll() */ 344c74e3f8eSmarkus kev.fflags = NOTE_EOF; 345ff9272daSbrad kev.flags = EV_ADD; 346e5c7daabSart if (!(ev->ev_events & EV_PERSIST)) 347ddb00dd9Sitojun kev.flags |= EV_ONESHOT; 348*37fbd8dfSbluhm kev.udata = ev; 349fd332320Sprovos 350fd332320Sprovos if (kq_insert(kqop, &kev) == -1) 351fd332320Sprovos return (-1); 352fd332320Sprovos 353fd332320Sprovos ev->ev_flags |= EVLIST_X_KQINKERNEL; 354fd332320Sprovos } 355fd332320Sprovos 356fd332320Sprovos if (ev->ev_events & EV_WRITE) { 357fd332320Sprovos memset(&kev, 0, sizeof(kev)); 358fd332320Sprovos kev.ident = ev->ev_fd; 359fd332320Sprovos kev.filter = EVFILT_WRITE; 360e5c7daabSart kev.flags = EV_ADD; 361e5c7daabSart if (!(ev->ev_events & EV_PERSIST)) 362ddb00dd9Sitojun kev.flags |= EV_ONESHOT; 363*37fbd8dfSbluhm kev.udata = ev; 364fd332320Sprovos 365fd332320Sprovos if (kq_insert(kqop, &kev) == -1) 366fd332320Sprovos return (-1); 367fd332320Sprovos 368fd332320Sprovos ev->ev_flags |= EVLIST_X_KQINKERNEL; 369fd332320Sprovos } 370fd332320Sprovos 371fd332320Sprovos return (0); 372fd332320Sprovos } 373fd332320Sprovos 3748ead113eSnicm static int 375fd332320Sprovos kq_del(void *arg, struct event *ev) 376fd332320Sprovos { 377fd332320Sprovos struct kqop *kqop = arg; 378fd332320Sprovos struct kevent kev; 379fd332320Sprovos 380fd332320Sprovos if (!(ev->ev_flags & EVLIST_X_KQINKERNEL)) 381fd332320Sprovos return (0); 382fd332320Sprovos 383fd332320Sprovos if (ev->ev_events & EV_SIGNAL) { 384fd332320Sprovos int nsignal = EVENT_SIGNAL(ev); 3858ead113eSnicm struct timespec timeout = { 0, 0 }; 386fd332320Sprovos 3878ead113eSnicm assert(nsignal >= 0 && nsignal < NSIG); 3888ead113eSnicm TAILQ_REMOVE(&kqop->evsigevents[nsignal], ev, ev_signal_next); 3898ead113eSnicm if (TAILQ_EMPTY(&kqop->evsigevents[nsignal])) { 390fd332320Sprovos memset(&kev, 0, sizeof(kev)); 391c46b02d0Sbrad kev.ident = nsignal; 392fd332320Sprovos kev.filter = EVFILT_SIGNAL; 393fd332320Sprovos kev.flags = EV_DELETE; 394fd332320Sprovos 3958ead113eSnicm /* Because we insert signal events 3968ead113eSnicm * immediately, we need to delete them 3978ead113eSnicm * immediately, too */ 3988ead113eSnicm if (kevent(kqop->kq, &kev, 1, NULL, 0, &timeout) == -1) 399fd332320Sprovos return (-1); 400fd332320Sprovos 4018ead113eSnicm if (_evsignal_restore_handler(ev->ev_base, 4028ead113eSnicm nsignal) == -1) 403fd332320Sprovos return (-1); 4048ead113eSnicm } 405fd332320Sprovos 406fd332320Sprovos ev->ev_flags &= ~EVLIST_X_KQINKERNEL; 407fd332320Sprovos return (0); 408fd332320Sprovos } 409fd332320Sprovos 410fd332320Sprovos if (ev->ev_events & EV_READ) { 411fd332320Sprovos memset(&kev, 0, sizeof(kev)); 412fd332320Sprovos kev.ident = ev->ev_fd; 413fd332320Sprovos kev.filter = EVFILT_READ; 414fd332320Sprovos kev.flags = EV_DELETE; 415fd332320Sprovos 416fd332320Sprovos if (kq_insert(kqop, &kev) == -1) 417fd332320Sprovos return (-1); 418fd332320Sprovos 419fd332320Sprovos ev->ev_flags &= ~EVLIST_X_KQINKERNEL; 420fd332320Sprovos } 421fd332320Sprovos 422fd332320Sprovos if (ev->ev_events & EV_WRITE) { 423fd332320Sprovos memset(&kev, 0, sizeof(kev)); 424fd332320Sprovos kev.ident = ev->ev_fd; 425fd332320Sprovos kev.filter = EVFILT_WRITE; 426fd332320Sprovos kev.flags = EV_DELETE; 427fd332320Sprovos 428fd332320Sprovos if (kq_insert(kqop, &kev) == -1) 429fd332320Sprovos return (-1); 430fd332320Sprovos 431fd332320Sprovos ev->ev_flags &= ~EVLIST_X_KQINKERNEL; 432fd332320Sprovos } 433fd332320Sprovos 434fd332320Sprovos return (0); 435fd332320Sprovos } 4363ac1ba99Sbrad 4378ead113eSnicm static void 438bdce580dSbrad kq_dealloc(struct event_base *base, void *arg) 4393ac1ba99Sbrad { 4403ac1ba99Sbrad struct kqop *kqop = arg; 4413ac1ba99Sbrad 442e175bf56Snicm evsignal_dealloc(base); 443e175bf56Snicm 4443ac1ba99Sbrad if (kqop->changes) 4453ac1ba99Sbrad free(kqop->changes); 4463ac1ba99Sbrad if (kqop->events) 4473ac1ba99Sbrad free(kqop->events); 4488ead113eSnicm if (kqop->kq >= 0 && kqop->pid == getpid()) 4493ac1ba99Sbrad close(kqop->kq); 450e175bf56Snicm 4513ac1ba99Sbrad memset(kqop, 0, sizeof(struct kqop)); 4523ac1ba99Sbrad free(kqop); 4533ac1ba99Sbrad } 454