1 /* $OpenBSD: kqueue.c,v 1.28 2012/02/08 09:01:00 nicm Exp $ */ 2 3 /* 4 * Copyright 2000-2002 Niels Provos <provos@citi.umich.edu> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. The name of the author may not be used to endorse or promote products 16 * derived from this software without specific prior written permission. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 */ 29 #ifdef HAVE_CONFIG_H 30 #include "config.h" 31 #endif 32 33 #define _GNU_SOURCE 1 34 35 #include <sys/types.h> 36 #ifdef HAVE_SYS_TIME_H 37 #include <sys/time.h> 38 #else 39 #include <sys/_libevent_time.h> 40 #endif 41 #include <sys/queue.h> 42 #include <sys/event.h> 43 #include <signal.h> 44 #include <stdio.h> 45 #include <stdlib.h> 46 #include <string.h> 47 #include <unistd.h> 48 #include <errno.h> 49 #include <assert.h> 50 #ifdef HAVE_INTTYPES_H 51 #include <inttypes.h> 52 #endif 53 54 /* Some platforms apparently define the udata field of struct kevent as 55 * intptr_t, whereas others define it as void*. There doesn't seem to be an 56 * easy way to tell them apart via autoconf, so we need to use OS macros. */ 57 #if defined(HAVE_INTTYPES_H) && !defined(__OpenBSD__) && !defined(__FreeBSD__) && !defined(__darwin__) && !defined(__APPLE__) 58 #define PTR_TO_UDATA(x) ((intptr_t)(x)) 59 #else 60 #define PTR_TO_UDATA(x) (x) 61 #endif 62 63 #include "event.h" 64 #include "event-internal.h" 65 #include "log.h" 66 #include "evsignal.h" 67 68 #define EVLIST_X_KQINKERNEL 0x1000 69 70 #define NEVENT 64 71 72 struct kqop { 73 struct kevent *changes; 74 int nchanges; 75 struct kevent *events; 76 struct event_list evsigevents[NSIG]; 77 int nevents; 78 int kq; 79 pid_t pid; 80 }; 81 82 static void *kq_init (struct event_base *); 83 static int kq_add (void *, struct event *); 84 static int kq_del (void *, struct event *); 85 static int kq_dispatch (struct event_base *, void *, struct timeval *); 86 static int kq_insert (struct kqop *, struct kevent *); 87 static void kq_dealloc (struct event_base *, void *); 88 89 const struct eventop kqops = { 90 "kqueue", 91 kq_init, 92 kq_add, 93 kq_del, 94 kq_dispatch, 95 kq_dealloc, 96 1 /* need reinit */ 97 }; 98 99 static void * 100 kq_init(struct event_base *base) 101 { 102 int i, kq; 103 struct kqop *kqueueop; 104 105 /* Disable kqueue when this environment variable is set */ 106 if (evutil_getenv("EVENT_NOKQUEUE")) 107 return (NULL); 108 109 if (!(kqueueop = calloc(1, sizeof(struct kqop)))) 110 return (NULL); 111 112 /* Initalize the kernel queue */ 113 114 if ((kq = kqueue()) == -1) { 115 event_warn("kqueue"); 116 free (kqueueop); 117 return (NULL); 118 } 119 120 kqueueop->kq = kq; 121 122 kqueueop->pid = getpid(); 123 124 /* Initalize fields */ 125 kqueueop->changes = calloc(NEVENT, sizeof(struct kevent)); 126 if (kqueueop->changes == NULL) { 127 free (kqueueop); 128 return (NULL); 129 } 130 kqueueop->events = calloc(NEVENT, sizeof(struct kevent)); 131 if (kqueueop->events == NULL) { 132 free (kqueueop->changes); 133 free (kqueueop); 134 return (NULL); 135 } 136 kqueueop->nevents = NEVENT; 137 138 /* we need to keep track of multiple events per signal */ 139 for (i = 0; i < NSIG; ++i) { 140 TAILQ_INIT(&kqueueop->evsigevents[i]); 141 } 142 143 /* Check for Mac OS X kqueue bug. */ 144 kqueueop->changes[0].ident = -1; 145 kqueueop->changes[0].filter = EVFILT_READ; 146 kqueueop->changes[0].flags = EV_ADD; 147 /* 148 * If kqueue works, then kevent will succeed, and it will 149 * stick an error in events[0]. If kqueue is broken, then 150 * kevent will fail. 151 */ 152 if (kevent(kq, 153 kqueueop->changes, 1, kqueueop->events, NEVENT, NULL) != 1 || 154 kqueueop->events[0].ident != -1 || 155 kqueueop->events[0].flags != EV_ERROR) { 156 event_warn("%s: detected broken kqueue; not using.", __func__); 157 free(kqueueop->changes); 158 free(kqueueop->events); 159 free(kqueueop); 160 close(kq); 161 return (NULL); 162 } 163 164 return (kqueueop); 165 } 166 167 static int 168 kq_insert(struct kqop *kqop, struct kevent *kev) 169 { 170 int nevents = kqop->nevents; 171 172 if (kqop->nchanges == nevents) { 173 struct kevent *newchange; 174 struct kevent *newresult; 175 176 nevents *= 2; 177 178 newchange = realloc(kqop->changes, 179 nevents * sizeof(struct kevent)); 180 if (newchange == NULL) { 181 event_warn("%s: malloc", __func__); 182 return (-1); 183 } 184 kqop->changes = newchange; 185 186 newresult = realloc(kqop->events, 187 nevents * sizeof(struct kevent)); 188 189 /* 190 * If we fail, we don't have to worry about freeing, 191 * the next realloc will pick it up. 192 */ 193 if (newresult == NULL) { 194 event_warn("%s: malloc", __func__); 195 return (-1); 196 } 197 kqop->events = newresult; 198 199 kqop->nevents = nevents; 200 } 201 202 memcpy(&kqop->changes[kqop->nchanges++], kev, sizeof(struct kevent)); 203 204 event_debug(("%s: fd %d %s%s", 205 __func__, (int)kev->ident, 206 kev->filter == EVFILT_READ ? "EVFILT_READ" : "EVFILT_WRITE", 207 kev->flags == EV_DELETE ? " (del)" : "")); 208 209 return (0); 210 } 211 212 static void 213 kq_sighandler(int sig) 214 { 215 /* Do nothing here */ 216 } 217 218 static int 219 kq_dispatch(struct event_base *base, void *arg, struct timeval *tv) 220 { 221 struct kqop *kqop = arg; 222 struct kevent *changes = kqop->changes; 223 struct kevent *events = kqop->events; 224 struct event *ev; 225 struct timespec ts, *ts_p = NULL; 226 int i, res; 227 228 if (tv != NULL) { 229 TIMEVAL_TO_TIMESPEC(tv, &ts); 230 ts_p = &ts; 231 } 232 233 res = kevent(kqop->kq, changes, kqop->nchanges, 234 events, kqop->nevents, ts_p); 235 kqop->nchanges = 0; 236 if (res == -1) { 237 if (errno != EINTR) { 238 event_warn("kevent"); 239 return (-1); 240 } 241 242 return (0); 243 } 244 245 event_debug(("%s: kevent reports %d", __func__, res)); 246 247 for (i = 0; i < res; i++) { 248 int which = 0; 249 250 if (events[i].flags & EV_ERROR) { 251 switch (events[i].data) { 252 253 /* Can occur on delete if we are not currently 254 * watching any events on this fd. That can 255 * happen when the fd was closed and another 256 * file was opened with that fd. */ 257 case ENOENT: 258 /* Can occur for reasons not fully understood 259 * on FreeBSD. */ 260 case EINVAL: 261 continue; 262 /* Can occur on a delete if the fd is closed. Can 263 * occur on an add if the fd was one side of a pipe, 264 * and the other side was closed. */ 265 case EBADF: 266 continue; 267 /* These two can occur on an add if the fd was one side 268 * of a pipe, and the other side was closed. */ 269 case EPERM: 270 case EPIPE: 271 /* Report read events, if we're listening for 272 * them, so that the user can learn about any 273 * add errors. (If the operation was a 274 * delete, then udata should be cleared.) */ 275 if (events[i].udata) { 276 /* The operation was an add: 277 * report the error as a read. */ 278 which |= EV_READ; 279 break; 280 } else { 281 /* The operation was a del: 282 * report nothing. */ 283 continue; 284 } 285 286 /* Other errors shouldn't occur. */ 287 default: 288 errno = events[i].data; 289 return (-1); 290 } 291 } else if (events[i].filter == EVFILT_READ) { 292 which |= EV_READ; 293 } else if (events[i].filter == EVFILT_WRITE) { 294 which |= EV_WRITE; 295 } else if (events[i].filter == EVFILT_SIGNAL) { 296 which |= EV_SIGNAL; 297 } 298 299 if (!which) 300 continue; 301 302 if (events[i].filter == EVFILT_SIGNAL) { 303 struct event_list *head = 304 (struct event_list *)events[i].udata; 305 TAILQ_FOREACH(ev, head, ev_signal_next) { 306 event_active(ev, which, events[i].data); 307 } 308 } else { 309 ev = (struct event *)events[i].udata; 310 311 if (!(ev->ev_events & EV_PERSIST)) 312 ev->ev_flags &= ~EVLIST_X_KQINKERNEL; 313 314 event_active(ev, which, 1); 315 } 316 } 317 318 return (0); 319 } 320 321 322 static int 323 kq_add(void *arg, struct event *ev) 324 { 325 struct kqop *kqop = arg; 326 struct kevent kev; 327 328 if (ev->ev_events & EV_SIGNAL) { 329 int nsignal = EVENT_SIGNAL(ev); 330 331 assert(nsignal >= 0 && nsignal < NSIG); 332 if (TAILQ_EMPTY(&kqop->evsigevents[nsignal])) { 333 struct timespec timeout = { 0, 0 }; 334 335 memset(&kev, 0, sizeof(kev)); 336 kev.ident = nsignal; 337 kev.filter = EVFILT_SIGNAL; 338 kev.flags = EV_ADD; 339 kev.udata = PTR_TO_UDATA(&kqop->evsigevents[nsignal]); 340 341 /* Be ready for the signal if it is sent any 342 * time between now and the next call to 343 * kq_dispatch. */ 344 if (kevent(kqop->kq, &kev, 1, NULL, 0, &timeout) == -1) 345 return (-1); 346 347 if (_evsignal_set_handler(ev->ev_base, nsignal, 348 kq_sighandler) == -1) 349 return (-1); 350 } 351 352 TAILQ_INSERT_TAIL(&kqop->evsigevents[nsignal], ev, 353 ev_signal_next); 354 ev->ev_flags |= EVLIST_X_KQINKERNEL; 355 return (0); 356 } 357 358 if (ev->ev_events & EV_READ) { 359 memset(&kev, 0, sizeof(kev)); 360 kev.ident = ev->ev_fd; 361 kev.filter = EVFILT_READ; 362 #ifdef NOTE_EOF 363 /* Make it behave like select() and poll() */ 364 kev.fflags = NOTE_EOF; 365 #endif 366 kev.flags = EV_ADD; 367 if (!(ev->ev_events & EV_PERSIST)) 368 kev.flags |= EV_ONESHOT; 369 kev.udata = PTR_TO_UDATA(ev); 370 371 if (kq_insert(kqop, &kev) == -1) 372 return (-1); 373 374 ev->ev_flags |= EVLIST_X_KQINKERNEL; 375 } 376 377 if (ev->ev_events & EV_WRITE) { 378 memset(&kev, 0, sizeof(kev)); 379 kev.ident = ev->ev_fd; 380 kev.filter = EVFILT_WRITE; 381 kev.flags = EV_ADD; 382 if (!(ev->ev_events & EV_PERSIST)) 383 kev.flags |= EV_ONESHOT; 384 kev.udata = PTR_TO_UDATA(ev); 385 386 if (kq_insert(kqop, &kev) == -1) 387 return (-1); 388 389 ev->ev_flags |= EVLIST_X_KQINKERNEL; 390 } 391 392 return (0); 393 } 394 395 static int 396 kq_del(void *arg, struct event *ev) 397 { 398 struct kqop *kqop = arg; 399 struct kevent kev; 400 401 if (!(ev->ev_flags & EVLIST_X_KQINKERNEL)) 402 return (0); 403 404 if (ev->ev_events & EV_SIGNAL) { 405 int nsignal = EVENT_SIGNAL(ev); 406 struct timespec timeout = { 0, 0 }; 407 408 assert(nsignal >= 0 && nsignal < NSIG); 409 TAILQ_REMOVE(&kqop->evsigevents[nsignal], ev, ev_signal_next); 410 if (TAILQ_EMPTY(&kqop->evsigevents[nsignal])) { 411 memset(&kev, 0, sizeof(kev)); 412 kev.ident = nsignal; 413 kev.filter = EVFILT_SIGNAL; 414 kev.flags = EV_DELETE; 415 416 /* Because we insert signal events 417 * immediately, we need to delete them 418 * immediately, too */ 419 if (kevent(kqop->kq, &kev, 1, NULL, 0, &timeout) == -1) 420 return (-1); 421 422 if (_evsignal_restore_handler(ev->ev_base, 423 nsignal) == -1) 424 return (-1); 425 } 426 427 ev->ev_flags &= ~EVLIST_X_KQINKERNEL; 428 return (0); 429 } 430 431 if (ev->ev_events & EV_READ) { 432 memset(&kev, 0, sizeof(kev)); 433 kev.ident = ev->ev_fd; 434 kev.filter = EVFILT_READ; 435 kev.flags = EV_DELETE; 436 437 if (kq_insert(kqop, &kev) == -1) 438 return (-1); 439 440 ev->ev_flags &= ~EVLIST_X_KQINKERNEL; 441 } 442 443 if (ev->ev_events & EV_WRITE) { 444 memset(&kev, 0, sizeof(kev)); 445 kev.ident = ev->ev_fd; 446 kev.filter = EVFILT_WRITE; 447 kev.flags = EV_DELETE; 448 449 if (kq_insert(kqop, &kev) == -1) 450 return (-1); 451 452 ev->ev_flags &= ~EVLIST_X_KQINKERNEL; 453 } 454 455 return (0); 456 } 457 458 static void 459 kq_dealloc(struct event_base *base, void *arg) 460 { 461 struct kqop *kqop = arg; 462 463 evsignal_dealloc(base); 464 465 if (kqop->changes) 466 free(kqop->changes); 467 if (kqop->events) 468 free(kqop->events); 469 if (kqop->kq >= 0 && kqop->pid == getpid()) 470 close(kqop->kq); 471 472 memset(kqop, 0, sizeof(struct kqop)); 473 free(kqop); 474 } 475