1*bdce580dSbrad /* $OpenBSD: kqueue.c,v 1.24 2008/05/02 06:09:11 brad Exp $ */ 234fc9cdeSmickey 3fd332320Sprovos /* 4fd332320Sprovos * Copyright 2000-2002 Niels Provos <provos@citi.umich.edu> 5fd332320Sprovos * All rights reserved. 6fd332320Sprovos * 7fd332320Sprovos * Redistribution and use in source and binary forms, with or without 8fd332320Sprovos * modification, are permitted provided that the following conditions 9fd332320Sprovos * are met: 10fd332320Sprovos * 1. Redistributions of source code must retain the above copyright 11fd332320Sprovos * notice, this list of conditions and the following disclaimer. 12fd332320Sprovos * 2. Redistributions in binary form must reproduce the above copyright 13fd332320Sprovos * notice, this list of conditions and the following disclaimer in the 14fd332320Sprovos * documentation and/or other materials provided with the distribution. 15ff9272daSbrad * 3. The name of the author may not be used to endorse or promote products 16fd332320Sprovos * derived from this software without specific prior written permission. 17fd332320Sprovos * 18fd332320Sprovos * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 19fd332320Sprovos * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 20fd332320Sprovos * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 21fd332320Sprovos * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 22fd332320Sprovos * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 23fd332320Sprovos * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24fd332320Sprovos * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25fd332320Sprovos * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26fd332320Sprovos * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 27fd332320Sprovos * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28fd332320Sprovos */ 291770acb2Smarkus #ifdef HAVE_CONFIG_H 30fd332320Sprovos #include "config.h" 311770acb2Smarkus #endif 32fd332320Sprovos 33fd332320Sprovos #include <sys/types.h> 341770acb2Smarkus #ifdef HAVE_SYS_TIME_H 35fd332320Sprovos #include <sys/time.h> 361770acb2Smarkus #else 371770acb2Smarkus #include <sys/_time.h> 381770acb2Smarkus #endif 39fd332320Sprovos #include <sys/queue.h> 40fd332320Sprovos #include <sys/event.h> 41fd332320Sprovos #include <signal.h> 42fd332320Sprovos #include <stdio.h> 43fd332320Sprovos #include <stdlib.h> 44ff33a3f0Sderaadt #include <string.h> 45fd332320Sprovos #include <unistd.h> 46fd332320Sprovos #include <errno.h> 471770acb2Smarkus #ifdef HAVE_INTTYPES_H 481770acb2Smarkus #include <inttypes.h> 491770acb2Smarkus #endif 50fd332320Sprovos 51*bdce580dSbrad /* Some platforms apparently define the udata field of struct kevent as 52*bdce580dSbrad * ntptr_t, whereas others define it as void*. There doesn't seem to be an 53*bdce580dSbrad * easy way to tell them apart via autoconf, so we need to use OS macros. */ 54*bdce580dSbrad #if defined(HAVE_INTTYPES_H) && !defined(__OpenBSD__) && !defined(__FreeBSD__) && !defined(__darwin__) && !defined(__APPLE__) 55*bdce580dSbrad #define PTR_TO_UDATA(x) ((intptr_t)(x)) 561770acb2Smarkus #else 57*bdce580dSbrad #define PTR_TO_UDATA(x) (x) 58fd332320Sprovos #endif 59fd332320Sprovos 60fd332320Sprovos #include "event.h" 614643be29Sbrad #include "log.h" 62fd332320Sprovos 63fd332320Sprovos #define EVLIST_X_KQINKERNEL 0x1000 64fd332320Sprovos 65fd332320Sprovos #define NEVENT 64 66fd332320Sprovos 67fd332320Sprovos struct kqop { 68fd332320Sprovos struct kevent *changes; 69fd332320Sprovos int nchanges; 70fd332320Sprovos struct kevent *events; 71fd332320Sprovos int nevents; 72fd332320Sprovos int kq; 734643be29Sbrad }; 74fd332320Sprovos 75*bdce580dSbrad void *kq_init (struct event_base *); 76fd332320Sprovos int kq_add (void *, struct event *); 77fd332320Sprovos int kq_del (void *, struct event *); 784643be29Sbrad int kq_recalc (struct event_base *, void *, int); 794643be29Sbrad int kq_dispatch (struct event_base *, void *, struct timeval *); 80ff9272daSbrad int kq_insert (struct kqop *, struct kevent *); 81*bdce580dSbrad void kq_dealloc (struct event_base *, void *); 82fd332320Sprovos 83759b8817Smickey const struct eventop kqops = { 84fd332320Sprovos "kqueue", 85fd332320Sprovos kq_init, 86fd332320Sprovos kq_add, 87fd332320Sprovos kq_del, 88fd332320Sprovos kq_recalc, 893ac1ba99Sbrad kq_dispatch, 903ac1ba99Sbrad kq_dealloc 91fd332320Sprovos }; 92fd332320Sprovos 93fd332320Sprovos void * 94*bdce580dSbrad kq_init(struct event_base *base) 95fd332320Sprovos { 96fd332320Sprovos int kq; 974643be29Sbrad struct kqop *kqueueop; 98fd332320Sprovos 99fd332320Sprovos /* Disable kqueue when this environment variable is set */ 1001770acb2Smarkus if (!issetugid() && getenv("EVENT_NOKQUEUE")) 101fd332320Sprovos return (NULL); 102fd332320Sprovos 1034643be29Sbrad if (!(kqueueop = calloc(1, sizeof(struct kqop)))) 1044643be29Sbrad return (NULL); 105fd332320Sprovos 106ff9272daSbrad /* Initalize the kernel queue */ 107fd332320Sprovos 108fd332320Sprovos if ((kq = kqueue()) == -1) { 1094643be29Sbrad event_warn("kqueue"); 1104643be29Sbrad free (kqueueop); 111fd332320Sprovos return (NULL); 112fd332320Sprovos } 113fd332320Sprovos 1144643be29Sbrad kqueueop->kq = kq; 115fd332320Sprovos 116ff9272daSbrad /* Initalize fields */ 1171ed98fdfSderaadt kqueueop->changes = calloc(NEVENT, sizeof(struct kevent)); 1184643be29Sbrad if (kqueueop->changes == NULL) { 1194643be29Sbrad free (kqueueop); 120fd332320Sprovos return (NULL); 121fd332320Sprovos } 1221ed98fdfSderaadt kqueueop->events = calloc(NEVENT, sizeof(struct kevent)); 1234643be29Sbrad if (kqueueop->events == NULL) { 1244643be29Sbrad free (kqueueop->changes); 1254643be29Sbrad free (kqueueop); 1264643be29Sbrad return (NULL); 1274643be29Sbrad } 1284643be29Sbrad kqueueop->nevents = NEVENT; 129fd332320Sprovos 130348ce57bSbrad /* Check for Mac OS X kqueue bug. */ 131348ce57bSbrad kqueueop->changes[0].ident = -1; 132348ce57bSbrad kqueueop->changes[0].filter = EVFILT_READ; 133348ce57bSbrad kqueueop->changes[0].flags = EV_ADD; 134348ce57bSbrad /* 135348ce57bSbrad * If kqueue works, then kevent will succeed, and it will 136348ce57bSbrad * stick an error in events[0]. If kqueue is broken, then 137348ce57bSbrad * kevent will fail. 138348ce57bSbrad */ 139348ce57bSbrad if (kevent(kq, 140348ce57bSbrad kqueueop->changes, 1, kqueueop->events, NEVENT, NULL) != 1 || 141348ce57bSbrad kqueueop->events[0].ident != -1 || 142348ce57bSbrad kqueueop->events[0].flags != EV_ERROR) { 143348ce57bSbrad event_warn("%s: detected broken kqueue; not using.", __func__); 144348ce57bSbrad free(kqueueop->changes); 145348ce57bSbrad free(kqueueop->events); 146348ce57bSbrad free(kqueueop); 147348ce57bSbrad close(kq); 148348ce57bSbrad return (NULL); 149348ce57bSbrad } 150348ce57bSbrad 1514643be29Sbrad return (kqueueop); 152fd332320Sprovos } 153fd332320Sprovos 154fd332320Sprovos int 1554643be29Sbrad kq_recalc(struct event_base *base, void *arg, int max) 156fd332320Sprovos { 157fd332320Sprovos return (0); 158fd332320Sprovos } 159fd332320Sprovos 160fd332320Sprovos int 161fd332320Sprovos kq_insert(struct kqop *kqop, struct kevent *kev) 162fd332320Sprovos { 163fd332320Sprovos int nevents = kqop->nevents; 164fd332320Sprovos 165fd332320Sprovos if (kqop->nchanges == nevents) { 166fd332320Sprovos struct kevent *newchange; 167fd332320Sprovos struct kevent *newresult; 168fd332320Sprovos 169fd332320Sprovos nevents *= 2; 170fd332320Sprovos 171fd332320Sprovos newchange = realloc(kqop->changes, 172fd332320Sprovos nevents * sizeof(struct kevent)); 173fd332320Sprovos if (newchange == NULL) { 1744643be29Sbrad event_warn("%s: malloc", __func__); 175fd332320Sprovos return (-1); 176fd332320Sprovos } 177fd332320Sprovos kqop->changes = newchange; 178fd332320Sprovos 179ff9272daSbrad newresult = realloc(kqop->events, 180fd332320Sprovos nevents * sizeof(struct kevent)); 181fd332320Sprovos 182fd332320Sprovos /* 183fd332320Sprovos * If we fail, we don't have to worry about freeing, 184fd332320Sprovos * the next realloc will pick it up. 185fd332320Sprovos */ 186fd332320Sprovos if (newresult == NULL) { 1874643be29Sbrad event_warn("%s: malloc", __func__); 188fd332320Sprovos return (-1); 189fd332320Sprovos } 190ff9272daSbrad kqop->events = newresult; 191fd332320Sprovos 192fd332320Sprovos kqop->nevents = nevents; 193fd332320Sprovos } 194fd332320Sprovos 195fd332320Sprovos memcpy(&kqop->changes[kqop->nchanges++], kev, sizeof(struct kevent)); 196fd332320Sprovos 1974643be29Sbrad event_debug(("%s: fd %d %s%s", 1981770acb2Smarkus __func__, kev->ident, 199fd332320Sprovos kev->filter == EVFILT_READ ? "EVFILT_READ" : "EVFILT_WRITE", 200fd332320Sprovos kev->flags == EV_DELETE ? " (del)" : "")); 201fd332320Sprovos 202fd332320Sprovos return (0); 203fd332320Sprovos } 204fd332320Sprovos 205fd332320Sprovos static void 206fd332320Sprovos kq_sighandler(int sig) 207fd332320Sprovos { 208fd332320Sprovos /* Do nothing here */ 209fd332320Sprovos } 210fd332320Sprovos 211fd332320Sprovos int 2124643be29Sbrad kq_dispatch(struct event_base *base, void *arg, struct timeval *tv) 213fd332320Sprovos { 214fd332320Sprovos struct kqop *kqop = arg; 215fd332320Sprovos struct kevent *changes = kqop->changes; 216fd332320Sprovos struct kevent *events = kqop->events; 217fd332320Sprovos struct event *ev; 218*bdce580dSbrad struct timespec ts, *ts_p = NULL; 219fd332320Sprovos int i, res; 220fd332320Sprovos 221*bdce580dSbrad if (tv != NULL) { 222fd332320Sprovos TIMEVAL_TO_TIMESPEC(tv, &ts); 223*bdce580dSbrad ts_p = &ts; 224*bdce580dSbrad } 225fd332320Sprovos 226fd332320Sprovos res = kevent(kqop->kq, changes, kqop->nchanges, 227*bdce580dSbrad events, kqop->nevents, ts_p); 228fd332320Sprovos kqop->nchanges = 0; 229fd332320Sprovos if (res == -1) { 230fd332320Sprovos if (errno != EINTR) { 2314643be29Sbrad event_warn("kevent"); 232fd332320Sprovos return (-1); 233fd332320Sprovos } 234fd332320Sprovos 235fd332320Sprovos return (0); 236fd332320Sprovos } 237fd332320Sprovos 2384643be29Sbrad event_debug(("%s: kevent reports %d", __func__, res)); 239fd332320Sprovos 240fd332320Sprovos for (i = 0; i < res; i++) { 241fd332320Sprovos int which = 0; 242fd332320Sprovos 243fd332320Sprovos if (events[i].flags & EV_ERROR) { 244fd332320Sprovos /* 245fd332320Sprovos * Error messages that can happen, when a delete fails. 246fd332320Sprovos * EBADF happens when the file discriptor has been 247fd332320Sprovos * closed, 248fd332320Sprovos * ENOENT when the file discriptor was closed and 249fd332320Sprovos * then reopened. 250348ce57bSbrad * EINVAL for some reasons not understood; EINVAL 251348ce57bSbrad * should not be returned ever; but FreeBSD does :-\ 252fd332320Sprovos * An error is also indicated when a callback deletes 253fd332320Sprovos * an event we are still processing. In that case 254fd332320Sprovos * the data field is set to ENOENT. 255fd332320Sprovos */ 256fd332320Sprovos if (events[i].data == EBADF || 257348ce57bSbrad events[i].data == EINVAL || 258fd332320Sprovos events[i].data == ENOENT) 259fd332320Sprovos continue; 2601085edd8Sbrad errno = events[i].data; 261fd332320Sprovos return (-1); 262fd332320Sprovos } 263fd332320Sprovos 2641770acb2Smarkus ev = (struct event *)events[i].udata; 265fd332320Sprovos 266fd332320Sprovos if (events[i].filter == EVFILT_READ) { 267fd332320Sprovos which |= EV_READ; 268fd332320Sprovos } else if (events[i].filter == EVFILT_WRITE) { 269fd332320Sprovos which |= EV_WRITE; 270fd332320Sprovos } else if (events[i].filter == EVFILT_SIGNAL) { 271fd332320Sprovos which |= EV_SIGNAL; 2721770acb2Smarkus } 273fd332320Sprovos 274fd332320Sprovos if (!which) 275fd332320Sprovos continue; 276fd332320Sprovos 27794f6f200Sbrad if (!(ev->ev_events & EV_PERSIST)) 278fd332320Sprovos event_del(ev); 279fd332320Sprovos 2801770acb2Smarkus event_active(ev, which, 2811770acb2Smarkus ev->ev_events & EV_SIGNAL ? events[i].data : 1); 282fd332320Sprovos } 283fd332320Sprovos 284fd332320Sprovos return (0); 285fd332320Sprovos } 286fd332320Sprovos 287fd332320Sprovos 288fd332320Sprovos int 289fd332320Sprovos kq_add(void *arg, struct event *ev) 290fd332320Sprovos { 291fd332320Sprovos struct kqop *kqop = arg; 292fd332320Sprovos struct kevent kev; 293fd332320Sprovos 294fd332320Sprovos if (ev->ev_events & EV_SIGNAL) { 295fd332320Sprovos int nsignal = EVENT_SIGNAL(ev); 296fd332320Sprovos 297fd332320Sprovos memset(&kev, 0, sizeof(kev)); 298fd332320Sprovos kev.ident = nsignal; 299fd332320Sprovos kev.filter = EVFILT_SIGNAL; 300fd332320Sprovos kev.flags = EV_ADD; 301fd332320Sprovos if (!(ev->ev_events & EV_PERSIST)) 302ddb00dd9Sitojun kev.flags |= EV_ONESHOT; 303*bdce580dSbrad kev.udata = PTR_TO_UDATA(ev); 304fd332320Sprovos 305fd332320Sprovos if (kq_insert(kqop, &kev) == -1) 306fd332320Sprovos return (-1); 307fd332320Sprovos 308fd332320Sprovos if (signal(nsignal, kq_sighandler) == SIG_ERR) 309fd332320Sprovos return (-1); 310fd332320Sprovos 311fd332320Sprovos ev->ev_flags |= EVLIST_X_KQINKERNEL; 312fd332320Sprovos return (0); 313fd332320Sprovos } 314fd332320Sprovos 315fd332320Sprovos if (ev->ev_events & EV_READ) { 316fd332320Sprovos memset(&kev, 0, sizeof(kev)); 317fd332320Sprovos kev.ident = ev->ev_fd; 318fd332320Sprovos kev.filter = EVFILT_READ; 319ff9272daSbrad #ifdef NOTE_EOF 320c74e3f8eSmarkus /* Make it behave like select() and poll() */ 321c74e3f8eSmarkus kev.fflags = NOTE_EOF; 322ff9272daSbrad #endif 323ff9272daSbrad kev.flags = EV_ADD; 324e5c7daabSart if (!(ev->ev_events & EV_PERSIST)) 325ddb00dd9Sitojun kev.flags |= EV_ONESHOT; 326*bdce580dSbrad kev.udata = PTR_TO_UDATA(ev); 327fd332320Sprovos 328fd332320Sprovos if (kq_insert(kqop, &kev) == -1) 329fd332320Sprovos return (-1); 330fd332320Sprovos 331fd332320Sprovos ev->ev_flags |= EVLIST_X_KQINKERNEL; 332fd332320Sprovos } 333fd332320Sprovos 334fd332320Sprovos if (ev->ev_events & EV_WRITE) { 335fd332320Sprovos memset(&kev, 0, sizeof(kev)); 336fd332320Sprovos kev.ident = ev->ev_fd; 337fd332320Sprovos kev.filter = EVFILT_WRITE; 338e5c7daabSart kev.flags = EV_ADD; 339e5c7daabSart if (!(ev->ev_events & EV_PERSIST)) 340ddb00dd9Sitojun kev.flags |= EV_ONESHOT; 341*bdce580dSbrad kev.udata = PTR_TO_UDATA(ev); 342fd332320Sprovos 343fd332320Sprovos if (kq_insert(kqop, &kev) == -1) 344fd332320Sprovos return (-1); 345fd332320Sprovos 346fd332320Sprovos ev->ev_flags |= EVLIST_X_KQINKERNEL; 347fd332320Sprovos } 348fd332320Sprovos 349fd332320Sprovos return (0); 350fd332320Sprovos } 351fd332320Sprovos 352fd332320Sprovos int 353fd332320Sprovos kq_del(void *arg, struct event *ev) 354fd332320Sprovos { 355fd332320Sprovos struct kqop *kqop = arg; 356fd332320Sprovos struct kevent kev; 357fd332320Sprovos 358fd332320Sprovos if (!(ev->ev_flags & EVLIST_X_KQINKERNEL)) 359fd332320Sprovos return (0); 360fd332320Sprovos 361fd332320Sprovos if (ev->ev_events & EV_SIGNAL) { 362fd332320Sprovos int nsignal = EVENT_SIGNAL(ev); 363fd332320Sprovos 364fd332320Sprovos memset(&kev, 0, sizeof(kev)); 365c46b02d0Sbrad kev.ident = nsignal; 366fd332320Sprovos kev.filter = EVFILT_SIGNAL; 367fd332320Sprovos kev.flags = EV_DELETE; 368fd332320Sprovos 369fd332320Sprovos if (kq_insert(kqop, &kev) == -1) 370fd332320Sprovos return (-1); 371fd332320Sprovos 372fd332320Sprovos if (signal(nsignal, SIG_DFL) == SIG_ERR) 373fd332320Sprovos return (-1); 374fd332320Sprovos 375fd332320Sprovos ev->ev_flags &= ~EVLIST_X_KQINKERNEL; 376fd332320Sprovos return (0); 377fd332320Sprovos } 378fd332320Sprovos 379fd332320Sprovos if (ev->ev_events & EV_READ) { 380fd332320Sprovos memset(&kev, 0, sizeof(kev)); 381fd332320Sprovos kev.ident = ev->ev_fd; 382fd332320Sprovos kev.filter = EVFILT_READ; 383fd332320Sprovos kev.flags = EV_DELETE; 384fd332320Sprovos 385fd332320Sprovos if (kq_insert(kqop, &kev) == -1) 386fd332320Sprovos return (-1); 387fd332320Sprovos 388fd332320Sprovos ev->ev_flags &= ~EVLIST_X_KQINKERNEL; 389fd332320Sprovos } 390fd332320Sprovos 391fd332320Sprovos if (ev->ev_events & EV_WRITE) { 392fd332320Sprovos memset(&kev, 0, sizeof(kev)); 393fd332320Sprovos kev.ident = ev->ev_fd; 394fd332320Sprovos kev.filter = EVFILT_WRITE; 395fd332320Sprovos kev.flags = EV_DELETE; 396fd332320Sprovos 397fd332320Sprovos if (kq_insert(kqop, &kev) == -1) 398fd332320Sprovos return (-1); 399fd332320Sprovos 400fd332320Sprovos ev->ev_flags &= ~EVLIST_X_KQINKERNEL; 401fd332320Sprovos } 402fd332320Sprovos 403fd332320Sprovos return (0); 404fd332320Sprovos } 4053ac1ba99Sbrad 4063ac1ba99Sbrad void 407*bdce580dSbrad kq_dealloc(struct event_base *base, void *arg) 4083ac1ba99Sbrad { 4093ac1ba99Sbrad struct kqop *kqop = arg; 4103ac1ba99Sbrad 4113ac1ba99Sbrad if (kqop->changes) 4123ac1ba99Sbrad free(kqop->changes); 4133ac1ba99Sbrad if (kqop->events) 4143ac1ba99Sbrad free(kqop->events); 4153ac1ba99Sbrad if (kqop->kq) 4163ac1ba99Sbrad close(kqop->kq); 4173ac1ba99Sbrad memset(kqop, 0, sizeof(struct kqop)); 4183ac1ba99Sbrad free(kqop); 4193ac1ba99Sbrad } 420