1*8ead113eSnicm /* $OpenBSD: kqueue.c,v 1.25 2010/04/21 20:02:40 nicm Exp $ */ 234fc9cdeSmickey 3fd332320Sprovos /* 4fd332320Sprovos * Copyright 2000-2002 Niels Provos <provos@citi.umich.edu> 5fd332320Sprovos * All rights reserved. 6fd332320Sprovos * 7fd332320Sprovos * Redistribution and use in source and binary forms, with or without 8fd332320Sprovos * modification, are permitted provided that the following conditions 9fd332320Sprovos * are met: 10fd332320Sprovos * 1. Redistributions of source code must retain the above copyright 11fd332320Sprovos * notice, this list of conditions and the following disclaimer. 12fd332320Sprovos * 2. Redistributions in binary form must reproduce the above copyright 13fd332320Sprovos * notice, this list of conditions and the following disclaimer in the 14fd332320Sprovos * documentation and/or other materials provided with the distribution. 15ff9272daSbrad * 3. The name of the author may not be used to endorse or promote products 16fd332320Sprovos * derived from this software without specific prior written permission. 17fd332320Sprovos * 18fd332320Sprovos * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 19fd332320Sprovos * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 20fd332320Sprovos * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 21fd332320Sprovos * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 22fd332320Sprovos * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 23fd332320Sprovos * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24fd332320Sprovos * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25fd332320Sprovos * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26fd332320Sprovos * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 27fd332320Sprovos * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28fd332320Sprovos */ 291770acb2Smarkus #ifdef HAVE_CONFIG_H 30fd332320Sprovos #include "config.h" 311770acb2Smarkus #endif 32fd332320Sprovos 33*8ead113eSnicm #define _GNU_SOURCE 1 34*8ead113eSnicm 35fd332320Sprovos #include <sys/types.h> 361770acb2Smarkus #ifdef HAVE_SYS_TIME_H 37fd332320Sprovos #include <sys/time.h> 381770acb2Smarkus #else 39*8ead113eSnicm #include <sys/_libevent_time.h> 401770acb2Smarkus #endif 41fd332320Sprovos #include <sys/queue.h> 42fd332320Sprovos #include <sys/event.h> 43fd332320Sprovos #include <signal.h> 44fd332320Sprovos #include <stdio.h> 45fd332320Sprovos #include <stdlib.h> 46ff33a3f0Sderaadt #include <string.h> 47fd332320Sprovos #include <unistd.h> 48fd332320Sprovos #include <errno.h> 49*8ead113eSnicm #include <assert.h> 501770acb2Smarkus #ifdef HAVE_INTTYPES_H 511770acb2Smarkus #include <inttypes.h> 521770acb2Smarkus #endif 53fd332320Sprovos 54bdce580dSbrad /* Some platforms apparently define the udata field of struct kevent as 55*8ead113eSnicm * intptr_t, whereas others define it as void*. There doesn't seem to be an 56bdce580dSbrad * easy way to tell them apart via autoconf, so we need to use OS macros. */ 57bdce580dSbrad #if defined(HAVE_INTTYPES_H) && !defined(__OpenBSD__) && !defined(__FreeBSD__) && !defined(__darwin__) && !defined(__APPLE__) 58bdce580dSbrad #define PTR_TO_UDATA(x) ((intptr_t)(x)) 591770acb2Smarkus #else 60bdce580dSbrad #define PTR_TO_UDATA(x) (x) 61fd332320Sprovos #endif 62fd332320Sprovos 63fd332320Sprovos #include "event.h" 64*8ead113eSnicm #include "event-internal.h" 654643be29Sbrad #include "log.h" 66fd332320Sprovos 67fd332320Sprovos #define EVLIST_X_KQINKERNEL 0x1000 68fd332320Sprovos 69fd332320Sprovos #define NEVENT 64 70fd332320Sprovos 71fd332320Sprovos struct kqop { 72fd332320Sprovos struct kevent *changes; 73fd332320Sprovos int nchanges; 74fd332320Sprovos struct kevent *events; 75*8ead113eSnicm struct event_list evsigevents[NSIG]; 76fd332320Sprovos int nevents; 77fd332320Sprovos int kq; 78*8ead113eSnicm pid_t pid; 794643be29Sbrad }; 80fd332320Sprovos 81*8ead113eSnicm static void *kq_init (struct event_base *); 82*8ead113eSnicm static int kq_add (void *, struct event *); 83*8ead113eSnicm static int kq_del (void *, struct event *); 84*8ead113eSnicm static int kq_dispatch (struct event_base *, void *, struct timeval *); 85*8ead113eSnicm static int kq_insert (struct kqop *, struct kevent *); 86*8ead113eSnicm static void kq_dealloc (struct event_base *, void *); 87fd332320Sprovos 88759b8817Smickey const struct eventop kqops = { 89fd332320Sprovos "kqueue", 90fd332320Sprovos kq_init, 91fd332320Sprovos kq_add, 92fd332320Sprovos kq_del, 933ac1ba99Sbrad kq_dispatch, 94*8ead113eSnicm kq_dealloc, 95*8ead113eSnicm 1 /* need reinit */ 96fd332320Sprovos }; 97fd332320Sprovos 98*8ead113eSnicm static void * 99bdce580dSbrad kq_init(struct event_base *base) 100fd332320Sprovos { 101*8ead113eSnicm int i, kq; 1024643be29Sbrad struct kqop *kqueueop; 103fd332320Sprovos 104fd332320Sprovos /* Disable kqueue when this environment variable is set */ 105*8ead113eSnicm if (evutil_getenv("EVENT_NOKQUEUE")) 106fd332320Sprovos return (NULL); 107fd332320Sprovos 1084643be29Sbrad if (!(kqueueop = calloc(1, sizeof(struct kqop)))) 1094643be29Sbrad return (NULL); 110fd332320Sprovos 111ff9272daSbrad /* Initalize the kernel queue */ 112fd332320Sprovos 113fd332320Sprovos if ((kq = kqueue()) == -1) { 1144643be29Sbrad event_warn("kqueue"); 1154643be29Sbrad free (kqueueop); 116fd332320Sprovos return (NULL); 117fd332320Sprovos } 118fd332320Sprovos 1194643be29Sbrad kqueueop->kq = kq; 120fd332320Sprovos 121*8ead113eSnicm kqueueop->pid = getpid(); 122*8ead113eSnicm 123ff9272daSbrad /* Initalize fields */ 1241ed98fdfSderaadt kqueueop->changes = calloc(NEVENT, sizeof(struct kevent)); 1254643be29Sbrad if (kqueueop->changes == NULL) { 1264643be29Sbrad free (kqueueop); 127fd332320Sprovos return (NULL); 128fd332320Sprovos } 1291ed98fdfSderaadt kqueueop->events = calloc(NEVENT, sizeof(struct kevent)); 1304643be29Sbrad if (kqueueop->events == NULL) { 1314643be29Sbrad free (kqueueop->changes); 1324643be29Sbrad free (kqueueop); 1334643be29Sbrad return (NULL); 1344643be29Sbrad } 1354643be29Sbrad kqueueop->nevents = NEVENT; 136fd332320Sprovos 137*8ead113eSnicm /* we need to keep track of multiple events per signal */ 138*8ead113eSnicm for (i = 0; i < NSIG; ++i) { 139*8ead113eSnicm TAILQ_INIT(&kqueueop->evsigevents[i]); 140*8ead113eSnicm } 141*8ead113eSnicm 142348ce57bSbrad /* Check for Mac OS X kqueue bug. */ 143348ce57bSbrad kqueueop->changes[0].ident = -1; 144348ce57bSbrad kqueueop->changes[0].filter = EVFILT_READ; 145348ce57bSbrad kqueueop->changes[0].flags = EV_ADD; 146348ce57bSbrad /* 147348ce57bSbrad * If kqueue works, then kevent will succeed, and it will 148348ce57bSbrad * stick an error in events[0]. If kqueue is broken, then 149348ce57bSbrad * kevent will fail. 150348ce57bSbrad */ 151348ce57bSbrad if (kevent(kq, 152348ce57bSbrad kqueueop->changes, 1, kqueueop->events, NEVENT, NULL) != 1 || 153348ce57bSbrad kqueueop->events[0].ident != -1 || 154348ce57bSbrad kqueueop->events[0].flags != EV_ERROR) { 155348ce57bSbrad event_warn("%s: detected broken kqueue; not using.", __func__); 156348ce57bSbrad free(kqueueop->changes); 157348ce57bSbrad free(kqueueop->events); 158348ce57bSbrad free(kqueueop); 159348ce57bSbrad close(kq); 160348ce57bSbrad return (NULL); 161348ce57bSbrad } 162348ce57bSbrad 1634643be29Sbrad return (kqueueop); 164fd332320Sprovos } 165fd332320Sprovos 166*8ead113eSnicm static int 167fd332320Sprovos kq_insert(struct kqop *kqop, struct kevent *kev) 168fd332320Sprovos { 169fd332320Sprovos int nevents = kqop->nevents; 170fd332320Sprovos 171fd332320Sprovos if (kqop->nchanges == nevents) { 172fd332320Sprovos struct kevent *newchange; 173fd332320Sprovos struct kevent *newresult; 174fd332320Sprovos 175fd332320Sprovos nevents *= 2; 176fd332320Sprovos 177fd332320Sprovos newchange = realloc(kqop->changes, 178fd332320Sprovos nevents * sizeof(struct kevent)); 179fd332320Sprovos if (newchange == NULL) { 1804643be29Sbrad event_warn("%s: malloc", __func__); 181fd332320Sprovos return (-1); 182fd332320Sprovos } 183fd332320Sprovos kqop->changes = newchange; 184fd332320Sprovos 185ff9272daSbrad newresult = realloc(kqop->events, 186fd332320Sprovos nevents * sizeof(struct kevent)); 187fd332320Sprovos 188fd332320Sprovos /* 189fd332320Sprovos * If we fail, we don't have to worry about freeing, 190fd332320Sprovos * the next realloc will pick it up. 191fd332320Sprovos */ 192fd332320Sprovos if (newresult == NULL) { 1934643be29Sbrad event_warn("%s: malloc", __func__); 194fd332320Sprovos return (-1); 195fd332320Sprovos } 196ff9272daSbrad kqop->events = newresult; 197fd332320Sprovos 198fd332320Sprovos kqop->nevents = nevents; 199fd332320Sprovos } 200fd332320Sprovos 201fd332320Sprovos memcpy(&kqop->changes[kqop->nchanges++], kev, sizeof(struct kevent)); 202fd332320Sprovos 2034643be29Sbrad event_debug(("%s: fd %d %s%s", 204*8ead113eSnicm __func__, (int)kev->ident, 205fd332320Sprovos kev->filter == EVFILT_READ ? "EVFILT_READ" : "EVFILT_WRITE", 206fd332320Sprovos kev->flags == EV_DELETE ? " (del)" : "")); 207fd332320Sprovos 208fd332320Sprovos return (0); 209fd332320Sprovos } 210fd332320Sprovos 211fd332320Sprovos static void 212fd332320Sprovos kq_sighandler(int sig) 213fd332320Sprovos { 214fd332320Sprovos /* Do nothing here */ 215fd332320Sprovos } 216fd332320Sprovos 217*8ead113eSnicm static int 2184643be29Sbrad kq_dispatch(struct event_base *base, void *arg, struct timeval *tv) 219fd332320Sprovos { 220fd332320Sprovos struct kqop *kqop = arg; 221fd332320Sprovos struct kevent *changes = kqop->changes; 222fd332320Sprovos struct kevent *events = kqop->events; 223fd332320Sprovos struct event *ev; 224bdce580dSbrad struct timespec ts, *ts_p = NULL; 225fd332320Sprovos int i, res; 226fd332320Sprovos 227bdce580dSbrad if (tv != NULL) { 228fd332320Sprovos TIMEVAL_TO_TIMESPEC(tv, &ts); 229bdce580dSbrad ts_p = &ts; 230bdce580dSbrad } 231fd332320Sprovos 232fd332320Sprovos res = kevent(kqop->kq, changes, kqop->nchanges, 233bdce580dSbrad events, kqop->nevents, ts_p); 234fd332320Sprovos kqop->nchanges = 0; 235fd332320Sprovos if (res == -1) { 236fd332320Sprovos if (errno != EINTR) { 2374643be29Sbrad event_warn("kevent"); 238fd332320Sprovos return (-1); 239fd332320Sprovos } 240fd332320Sprovos 241fd332320Sprovos return (0); 242fd332320Sprovos } 243fd332320Sprovos 2444643be29Sbrad event_debug(("%s: kevent reports %d", __func__, res)); 245fd332320Sprovos 246fd332320Sprovos for (i = 0; i < res; i++) { 247fd332320Sprovos int which = 0; 248fd332320Sprovos 249fd332320Sprovos if (events[i].flags & EV_ERROR) { 250fd332320Sprovos /* 251fd332320Sprovos * Error messages that can happen, when a delete fails. 252fd332320Sprovos * EBADF happens when the file discriptor has been 253fd332320Sprovos * closed, 254fd332320Sprovos * ENOENT when the file discriptor was closed and 255fd332320Sprovos * then reopened. 256348ce57bSbrad * EINVAL for some reasons not understood; EINVAL 257348ce57bSbrad * should not be returned ever; but FreeBSD does :-\ 258fd332320Sprovos * An error is also indicated when a callback deletes 259fd332320Sprovos * an event we are still processing. In that case 260fd332320Sprovos * the data field is set to ENOENT. 261fd332320Sprovos */ 262fd332320Sprovos if (events[i].data == EBADF || 263348ce57bSbrad events[i].data == EINVAL || 264fd332320Sprovos events[i].data == ENOENT) 265fd332320Sprovos continue; 2661085edd8Sbrad errno = events[i].data; 267fd332320Sprovos return (-1); 268fd332320Sprovos } 269fd332320Sprovos 270fd332320Sprovos if (events[i].filter == EVFILT_READ) { 271fd332320Sprovos which |= EV_READ; 272fd332320Sprovos } else if (events[i].filter == EVFILT_WRITE) { 273fd332320Sprovos which |= EV_WRITE; 274fd332320Sprovos } else if (events[i].filter == EVFILT_SIGNAL) { 275fd332320Sprovos which |= EV_SIGNAL; 2761770acb2Smarkus } 277fd332320Sprovos 278fd332320Sprovos if (!which) 279fd332320Sprovos continue; 280fd332320Sprovos 281*8ead113eSnicm if (events[i].filter == EVFILT_SIGNAL) { 282*8ead113eSnicm struct event_list *head = 283*8ead113eSnicm (struct event_list *)events[i].udata; 284*8ead113eSnicm TAILQ_FOREACH(ev, head, ev_signal_next) { 285*8ead113eSnicm event_active(ev, which, events[i].data); 286*8ead113eSnicm } 287*8ead113eSnicm } else { 288*8ead113eSnicm ev = (struct event *)events[i].udata; 289fd332320Sprovos 290*8ead113eSnicm if (!(ev->ev_events & EV_PERSIST)) 291*8ead113eSnicm ev->ev_flags &= ~EVLIST_X_KQINKERNEL; 292*8ead113eSnicm 293*8ead113eSnicm event_active(ev, which, 1); 294*8ead113eSnicm } 295fd332320Sprovos } 296fd332320Sprovos 297fd332320Sprovos return (0); 298fd332320Sprovos } 299fd332320Sprovos 300fd332320Sprovos 301*8ead113eSnicm static int 302fd332320Sprovos kq_add(void *arg, struct event *ev) 303fd332320Sprovos { 304fd332320Sprovos struct kqop *kqop = arg; 305fd332320Sprovos struct kevent kev; 306fd332320Sprovos 307fd332320Sprovos if (ev->ev_events & EV_SIGNAL) { 308fd332320Sprovos int nsignal = EVENT_SIGNAL(ev); 309fd332320Sprovos 310*8ead113eSnicm assert(nsignal >= 0 && nsignal < NSIG); 311*8ead113eSnicm if (TAILQ_EMPTY(&kqop->evsigevents[nsignal])) { 312*8ead113eSnicm struct timespec timeout = { 0, 0 }; 313*8ead113eSnicm 314fd332320Sprovos memset(&kev, 0, sizeof(kev)); 315fd332320Sprovos kev.ident = nsignal; 316fd332320Sprovos kev.filter = EVFILT_SIGNAL; 317fd332320Sprovos kev.flags = EV_ADD; 318*8ead113eSnicm kev.udata = PTR_TO_UDATA(&kqop->evsigevents[nsignal]); 319fd332320Sprovos 320*8ead113eSnicm /* Be ready for the signal if it is sent any 321*8ead113eSnicm * time between now and the next call to 322*8ead113eSnicm * kq_dispatch. */ 323*8ead113eSnicm if (kevent(kqop->kq, &kev, 1, NULL, 0, &timeout) == -1) 324fd332320Sprovos return (-1); 325fd332320Sprovos 326*8ead113eSnicm if (_evsignal_set_handler(ev->ev_base, nsignal, 327*8ead113eSnicm kq_sighandler) == -1) 328fd332320Sprovos return (-1); 329*8ead113eSnicm } 330fd332320Sprovos 331*8ead113eSnicm TAILQ_INSERT_TAIL(&kqop->evsigevents[nsignal], ev, 332*8ead113eSnicm ev_signal_next); 333fd332320Sprovos ev->ev_flags |= EVLIST_X_KQINKERNEL; 334fd332320Sprovos return (0); 335fd332320Sprovos } 336fd332320Sprovos 337fd332320Sprovos if (ev->ev_events & EV_READ) { 338fd332320Sprovos memset(&kev, 0, sizeof(kev)); 339fd332320Sprovos kev.ident = ev->ev_fd; 340fd332320Sprovos kev.filter = EVFILT_READ; 341ff9272daSbrad #ifdef NOTE_EOF 342c74e3f8eSmarkus /* Make it behave like select() and poll() */ 343c74e3f8eSmarkus kev.fflags = NOTE_EOF; 344ff9272daSbrad #endif 345ff9272daSbrad kev.flags = EV_ADD; 346e5c7daabSart if (!(ev->ev_events & EV_PERSIST)) 347ddb00dd9Sitojun kev.flags |= EV_ONESHOT; 348bdce580dSbrad kev.udata = PTR_TO_UDATA(ev); 349fd332320Sprovos 350fd332320Sprovos if (kq_insert(kqop, &kev) == -1) 351fd332320Sprovos return (-1); 352fd332320Sprovos 353fd332320Sprovos ev->ev_flags |= EVLIST_X_KQINKERNEL; 354fd332320Sprovos } 355fd332320Sprovos 356fd332320Sprovos if (ev->ev_events & EV_WRITE) { 357fd332320Sprovos memset(&kev, 0, sizeof(kev)); 358fd332320Sprovos kev.ident = ev->ev_fd; 359fd332320Sprovos kev.filter = EVFILT_WRITE; 360e5c7daabSart kev.flags = EV_ADD; 361e5c7daabSart if (!(ev->ev_events & EV_PERSIST)) 362ddb00dd9Sitojun kev.flags |= EV_ONESHOT; 363bdce580dSbrad kev.udata = PTR_TO_UDATA(ev); 364fd332320Sprovos 365fd332320Sprovos if (kq_insert(kqop, &kev) == -1) 366fd332320Sprovos return (-1); 367fd332320Sprovos 368fd332320Sprovos ev->ev_flags |= EVLIST_X_KQINKERNEL; 369fd332320Sprovos } 370fd332320Sprovos 371fd332320Sprovos return (0); 372fd332320Sprovos } 373fd332320Sprovos 374*8ead113eSnicm static int 375fd332320Sprovos kq_del(void *arg, struct event *ev) 376fd332320Sprovos { 377fd332320Sprovos struct kqop *kqop = arg; 378fd332320Sprovos struct kevent kev; 379fd332320Sprovos 380fd332320Sprovos if (!(ev->ev_flags & EVLIST_X_KQINKERNEL)) 381fd332320Sprovos return (0); 382fd332320Sprovos 383fd332320Sprovos if (ev->ev_events & EV_SIGNAL) { 384fd332320Sprovos int nsignal = EVENT_SIGNAL(ev); 385*8ead113eSnicm struct timespec timeout = { 0, 0 }; 386fd332320Sprovos 387*8ead113eSnicm assert(nsignal >= 0 && nsignal < NSIG); 388*8ead113eSnicm TAILQ_REMOVE(&kqop->evsigevents[nsignal], ev, ev_signal_next); 389*8ead113eSnicm if (TAILQ_EMPTY(&kqop->evsigevents[nsignal])) { 390fd332320Sprovos memset(&kev, 0, sizeof(kev)); 391c46b02d0Sbrad kev.ident = nsignal; 392fd332320Sprovos kev.filter = EVFILT_SIGNAL; 393fd332320Sprovos kev.flags = EV_DELETE; 394fd332320Sprovos 395*8ead113eSnicm /* Because we insert signal events 396*8ead113eSnicm * immediately, we need to delete them 397*8ead113eSnicm * immediately, too */ 398*8ead113eSnicm if (kevent(kqop->kq, &kev, 1, NULL, 0, &timeout) == -1) 399fd332320Sprovos return (-1); 400fd332320Sprovos 401*8ead113eSnicm if (_evsignal_restore_handler(ev->ev_base, 402*8ead113eSnicm nsignal) == -1) 403fd332320Sprovos return (-1); 404*8ead113eSnicm } 405fd332320Sprovos 406fd332320Sprovos ev->ev_flags &= ~EVLIST_X_KQINKERNEL; 407fd332320Sprovos return (0); 408fd332320Sprovos } 409fd332320Sprovos 410fd332320Sprovos if (ev->ev_events & EV_READ) { 411fd332320Sprovos memset(&kev, 0, sizeof(kev)); 412fd332320Sprovos kev.ident = ev->ev_fd; 413fd332320Sprovos kev.filter = EVFILT_READ; 414fd332320Sprovos kev.flags = EV_DELETE; 415fd332320Sprovos 416fd332320Sprovos if (kq_insert(kqop, &kev) == -1) 417fd332320Sprovos return (-1); 418fd332320Sprovos 419fd332320Sprovos ev->ev_flags &= ~EVLIST_X_KQINKERNEL; 420fd332320Sprovos } 421fd332320Sprovos 422fd332320Sprovos if (ev->ev_events & EV_WRITE) { 423fd332320Sprovos memset(&kev, 0, sizeof(kev)); 424fd332320Sprovos kev.ident = ev->ev_fd; 425fd332320Sprovos kev.filter = EVFILT_WRITE; 426fd332320Sprovos kev.flags = EV_DELETE; 427fd332320Sprovos 428fd332320Sprovos if (kq_insert(kqop, &kev) == -1) 429fd332320Sprovos return (-1); 430fd332320Sprovos 431fd332320Sprovos ev->ev_flags &= ~EVLIST_X_KQINKERNEL; 432fd332320Sprovos } 433fd332320Sprovos 434fd332320Sprovos return (0); 435fd332320Sprovos } 4363ac1ba99Sbrad 437*8ead113eSnicm static void 438bdce580dSbrad kq_dealloc(struct event_base *base, void *arg) 4393ac1ba99Sbrad { 4403ac1ba99Sbrad struct kqop *kqop = arg; 4413ac1ba99Sbrad 4423ac1ba99Sbrad if (kqop->changes) 4433ac1ba99Sbrad free(kqop->changes); 4443ac1ba99Sbrad if (kqop->events) 4453ac1ba99Sbrad free(kqop->events); 446*8ead113eSnicm if (kqop->kq >= 0 && kqop->pid == getpid()) 4473ac1ba99Sbrad close(kqop->kq); 4483ac1ba99Sbrad memset(kqop, 0, sizeof(struct kqop)); 4493ac1ba99Sbrad free(kqop); 4503ac1ba99Sbrad } 451