1 /* $OpenBSD: kqueue.c,v 1.26 2010/07/12 18:03:38 nicm Exp $ */ 2 3 /* 4 * Copyright 2000-2002 Niels Provos <provos@citi.umich.edu> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. The name of the author may not be used to endorse or promote products 16 * derived from this software without specific prior written permission. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 */ 29 #ifdef HAVE_CONFIG_H 30 #include "config.h" 31 #endif 32 33 #define _GNU_SOURCE 1 34 35 #include <sys/types.h> 36 #ifdef HAVE_SYS_TIME_H 37 #include <sys/time.h> 38 #else 39 #include <sys/_libevent_time.h> 40 #endif 41 #include <sys/queue.h> 42 #include <sys/event.h> 43 #include <signal.h> 44 #include <stdio.h> 45 #include <stdlib.h> 46 #include <string.h> 47 #include <unistd.h> 48 #include <errno.h> 49 #include <assert.h> 50 #ifdef HAVE_INTTYPES_H 51 #include <inttypes.h> 52 #endif 53 54 /* Some platforms apparently define the udata field of struct kevent as 55 * intptr_t, whereas others define it as void*. There doesn't seem to be an 56 * easy way to tell them apart via autoconf, so we need to use OS macros. */ 57 #if defined(HAVE_INTTYPES_H) && !defined(__OpenBSD__) && !defined(__FreeBSD__) && !defined(__darwin__) && !defined(__APPLE__) 58 #define PTR_TO_UDATA(x) ((intptr_t)(x)) 59 #else 60 #define PTR_TO_UDATA(x) (x) 61 #endif 62 63 #include "event.h" 64 #include "event-internal.h" 65 #include "log.h" 66 #include "evsignal.h" 67 68 #define EVLIST_X_KQINKERNEL 0x1000 69 70 #define NEVENT 64 71 72 struct kqop { 73 struct kevent *changes; 74 int nchanges; 75 struct kevent *events; 76 struct event_list evsigevents[NSIG]; 77 int nevents; 78 int kq; 79 pid_t pid; 80 }; 81 82 static void *kq_init (struct event_base *); 83 static int kq_add (void *, struct event *); 84 static int kq_del (void *, struct event *); 85 static int kq_dispatch (struct event_base *, void *, struct timeval *); 86 static int kq_insert (struct kqop *, struct kevent *); 87 static void kq_dealloc (struct event_base *, void *); 88 89 const struct eventop kqops = { 90 "kqueue", 91 kq_init, 92 kq_add, 93 kq_del, 94 kq_dispatch, 95 kq_dealloc, 96 1 /* need reinit */ 97 }; 98 99 static void * 100 kq_init(struct event_base *base) 101 { 102 int i, kq; 103 struct kqop *kqueueop; 104 105 /* Disable kqueue when this environment variable is set */ 106 if (evutil_getenv("EVENT_NOKQUEUE")) 107 return (NULL); 108 109 if (!(kqueueop = calloc(1, sizeof(struct kqop)))) 110 return (NULL); 111 112 /* Initalize the kernel queue */ 113 114 if ((kq = kqueue()) == -1) { 115 event_warn("kqueue"); 116 free (kqueueop); 117 return (NULL); 118 } 119 120 kqueueop->kq = kq; 121 122 kqueueop->pid = getpid(); 123 124 /* Initalize fields */ 125 kqueueop->changes = calloc(NEVENT, sizeof(struct kevent)); 126 if (kqueueop->changes == NULL) { 127 free (kqueueop); 128 return (NULL); 129 } 130 kqueueop->events = calloc(NEVENT, sizeof(struct kevent)); 131 if (kqueueop->events == NULL) { 132 free (kqueueop->changes); 133 free (kqueueop); 134 return (NULL); 135 } 136 kqueueop->nevents = NEVENT; 137 138 /* we need to keep track of multiple events per signal */ 139 for (i = 0; i < NSIG; ++i) { 140 TAILQ_INIT(&kqueueop->evsigevents[i]); 141 } 142 143 /* Check for Mac OS X kqueue bug. */ 144 kqueueop->changes[0].ident = -1; 145 kqueueop->changes[0].filter = EVFILT_READ; 146 kqueueop->changes[0].flags = EV_ADD; 147 /* 148 * If kqueue works, then kevent will succeed, and it will 149 * stick an error in events[0]. If kqueue is broken, then 150 * kevent will fail. 151 */ 152 if (kevent(kq, 153 kqueueop->changes, 1, kqueueop->events, NEVENT, NULL) != 1 || 154 kqueueop->events[0].ident != -1 || 155 kqueueop->events[0].flags != EV_ERROR) { 156 event_warn("%s: detected broken kqueue; not using.", __func__); 157 free(kqueueop->changes); 158 free(kqueueop->events); 159 free(kqueueop); 160 close(kq); 161 return (NULL); 162 } 163 164 return (kqueueop); 165 } 166 167 static int 168 kq_insert(struct kqop *kqop, struct kevent *kev) 169 { 170 int nevents = kqop->nevents; 171 172 if (kqop->nchanges == nevents) { 173 struct kevent *newchange; 174 struct kevent *newresult; 175 176 nevents *= 2; 177 178 newchange = realloc(kqop->changes, 179 nevents * sizeof(struct kevent)); 180 if (newchange == NULL) { 181 event_warn("%s: malloc", __func__); 182 return (-1); 183 } 184 kqop->changes = newchange; 185 186 newresult = realloc(kqop->events, 187 nevents * sizeof(struct kevent)); 188 189 /* 190 * If we fail, we don't have to worry about freeing, 191 * the next realloc will pick it up. 192 */ 193 if (newresult == NULL) { 194 event_warn("%s: malloc", __func__); 195 return (-1); 196 } 197 kqop->events = newresult; 198 199 kqop->nevents = nevents; 200 } 201 202 memcpy(&kqop->changes[kqop->nchanges++], kev, sizeof(struct kevent)); 203 204 event_debug(("%s: fd %d %s%s", 205 __func__, (int)kev->ident, 206 kev->filter == EVFILT_READ ? "EVFILT_READ" : "EVFILT_WRITE", 207 kev->flags == EV_DELETE ? " (del)" : "")); 208 209 return (0); 210 } 211 212 static void 213 kq_sighandler(int sig) 214 { 215 /* Do nothing here */ 216 } 217 218 static int 219 kq_dispatch(struct event_base *base, void *arg, struct timeval *tv) 220 { 221 struct kqop *kqop = arg; 222 struct kevent *changes = kqop->changes; 223 struct kevent *events = kqop->events; 224 struct event *ev; 225 struct timespec ts, *ts_p = NULL; 226 int i, res; 227 228 if (tv != NULL) { 229 TIMEVAL_TO_TIMESPEC(tv, &ts); 230 ts_p = &ts; 231 } 232 233 res = kevent(kqop->kq, changes, kqop->nchanges, 234 events, kqop->nevents, ts_p); 235 kqop->nchanges = 0; 236 if (res == -1) { 237 if (errno != EINTR) { 238 event_warn("kevent"); 239 return (-1); 240 } 241 242 return (0); 243 } 244 245 event_debug(("%s: kevent reports %d", __func__, res)); 246 247 for (i = 0; i < res; i++) { 248 int which = 0; 249 250 if (events[i].flags & EV_ERROR) { 251 /* 252 * Error messages that can happen, when a delete fails. 253 * EBADF happens when the file discriptor has been 254 * closed, 255 * ENOENT when the file discriptor was closed and 256 * then reopened. 257 * EINVAL for some reasons not understood; EINVAL 258 * should not be returned ever; but FreeBSD does :-\ 259 * An error is also indicated when a callback deletes 260 * an event we are still processing. In that case 261 * the data field is set to ENOENT. 262 */ 263 if (events[i].data == EBADF || 264 events[i].data == EINVAL || 265 events[i].data == ENOENT) 266 continue; 267 errno = events[i].data; 268 return (-1); 269 } 270 271 if (events[i].filter == EVFILT_READ) { 272 which |= EV_READ; 273 } else if (events[i].filter == EVFILT_WRITE) { 274 which |= EV_WRITE; 275 } else if (events[i].filter == EVFILT_SIGNAL) { 276 which |= EV_SIGNAL; 277 } 278 279 if (!which) 280 continue; 281 282 if (events[i].filter == EVFILT_SIGNAL) { 283 struct event_list *head = 284 (struct event_list *)events[i].udata; 285 TAILQ_FOREACH(ev, head, ev_signal_next) { 286 event_active(ev, which, events[i].data); 287 } 288 } else { 289 ev = (struct event *)events[i].udata; 290 291 if (!(ev->ev_events & EV_PERSIST)) 292 ev->ev_flags &= ~EVLIST_X_KQINKERNEL; 293 294 event_active(ev, which, 1); 295 } 296 } 297 298 return (0); 299 } 300 301 302 static int 303 kq_add(void *arg, struct event *ev) 304 { 305 struct kqop *kqop = arg; 306 struct kevent kev; 307 308 if (ev->ev_events & EV_SIGNAL) { 309 int nsignal = EVENT_SIGNAL(ev); 310 311 assert(nsignal >= 0 && nsignal < NSIG); 312 if (TAILQ_EMPTY(&kqop->evsigevents[nsignal])) { 313 struct timespec timeout = { 0, 0 }; 314 315 memset(&kev, 0, sizeof(kev)); 316 kev.ident = nsignal; 317 kev.filter = EVFILT_SIGNAL; 318 kev.flags = EV_ADD; 319 kev.udata = PTR_TO_UDATA(&kqop->evsigevents[nsignal]); 320 321 /* Be ready for the signal if it is sent any 322 * time between now and the next call to 323 * kq_dispatch. */ 324 if (kevent(kqop->kq, &kev, 1, NULL, 0, &timeout) == -1) 325 return (-1); 326 327 if (_evsignal_set_handler(ev->ev_base, nsignal, 328 kq_sighandler) == -1) 329 return (-1); 330 } 331 332 TAILQ_INSERT_TAIL(&kqop->evsigevents[nsignal], ev, 333 ev_signal_next); 334 ev->ev_flags |= EVLIST_X_KQINKERNEL; 335 return (0); 336 } 337 338 if (ev->ev_events & EV_READ) { 339 memset(&kev, 0, sizeof(kev)); 340 kev.ident = ev->ev_fd; 341 kev.filter = EVFILT_READ; 342 #ifdef NOTE_EOF 343 /* Make it behave like select() and poll() */ 344 kev.fflags = NOTE_EOF; 345 #endif 346 kev.flags = EV_ADD; 347 if (!(ev->ev_events & EV_PERSIST)) 348 kev.flags |= EV_ONESHOT; 349 kev.udata = PTR_TO_UDATA(ev); 350 351 if (kq_insert(kqop, &kev) == -1) 352 return (-1); 353 354 ev->ev_flags |= EVLIST_X_KQINKERNEL; 355 } 356 357 if (ev->ev_events & EV_WRITE) { 358 memset(&kev, 0, sizeof(kev)); 359 kev.ident = ev->ev_fd; 360 kev.filter = EVFILT_WRITE; 361 kev.flags = EV_ADD; 362 if (!(ev->ev_events & EV_PERSIST)) 363 kev.flags |= EV_ONESHOT; 364 kev.udata = PTR_TO_UDATA(ev); 365 366 if (kq_insert(kqop, &kev) == -1) 367 return (-1); 368 369 ev->ev_flags |= EVLIST_X_KQINKERNEL; 370 } 371 372 return (0); 373 } 374 375 static int 376 kq_del(void *arg, struct event *ev) 377 { 378 struct kqop *kqop = arg; 379 struct kevent kev; 380 381 if (!(ev->ev_flags & EVLIST_X_KQINKERNEL)) 382 return (0); 383 384 if (ev->ev_events & EV_SIGNAL) { 385 int nsignal = EVENT_SIGNAL(ev); 386 struct timespec timeout = { 0, 0 }; 387 388 assert(nsignal >= 0 && nsignal < NSIG); 389 TAILQ_REMOVE(&kqop->evsigevents[nsignal], ev, ev_signal_next); 390 if (TAILQ_EMPTY(&kqop->evsigevents[nsignal])) { 391 memset(&kev, 0, sizeof(kev)); 392 kev.ident = nsignal; 393 kev.filter = EVFILT_SIGNAL; 394 kev.flags = EV_DELETE; 395 396 /* Because we insert signal events 397 * immediately, we need to delete them 398 * immediately, too */ 399 if (kevent(kqop->kq, &kev, 1, NULL, 0, &timeout) == -1) 400 return (-1); 401 402 if (_evsignal_restore_handler(ev->ev_base, 403 nsignal) == -1) 404 return (-1); 405 } 406 407 ev->ev_flags &= ~EVLIST_X_KQINKERNEL; 408 return (0); 409 } 410 411 if (ev->ev_events & EV_READ) { 412 memset(&kev, 0, sizeof(kev)); 413 kev.ident = ev->ev_fd; 414 kev.filter = EVFILT_READ; 415 kev.flags = EV_DELETE; 416 417 if (kq_insert(kqop, &kev) == -1) 418 return (-1); 419 420 ev->ev_flags &= ~EVLIST_X_KQINKERNEL; 421 } 422 423 if (ev->ev_events & EV_WRITE) { 424 memset(&kev, 0, sizeof(kev)); 425 kev.ident = ev->ev_fd; 426 kev.filter = EVFILT_WRITE; 427 kev.flags = EV_DELETE; 428 429 if (kq_insert(kqop, &kev) == -1) 430 return (-1); 431 432 ev->ev_flags &= ~EVLIST_X_KQINKERNEL; 433 } 434 435 return (0); 436 } 437 438 static void 439 kq_dealloc(struct event_base *base, void *arg) 440 { 441 struct kqop *kqop = arg; 442 443 evsignal_dealloc(base); 444 445 if (kqop->changes) 446 free(kqop->changes); 447 if (kqop->events) 448 free(kqop->events); 449 if (kqop->kq >= 0 && kqop->pid == getpid()) 450 close(kqop->kq); 451 452 memset(kqop, 0, sizeof(struct kqop)); 453 free(kqop); 454 } 455