1 /* $OpenBSD: kqueue.c,v 1.24 2008/05/02 06:09:11 brad Exp $ */ 2 3 /* 4 * Copyright 2000-2002 Niels Provos <provos@citi.umich.edu> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. The name of the author may not be used to endorse or promote products 16 * derived from this software without specific prior written permission. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 */ 29 #ifdef HAVE_CONFIG_H 30 #include "config.h" 31 #endif 32 33 #include <sys/types.h> 34 #ifdef HAVE_SYS_TIME_H 35 #include <sys/time.h> 36 #else 37 #include <sys/_time.h> 38 #endif 39 #include <sys/queue.h> 40 #include <sys/event.h> 41 #include <signal.h> 42 #include <stdio.h> 43 #include <stdlib.h> 44 #include <string.h> 45 #include <unistd.h> 46 #include <errno.h> 47 #ifdef HAVE_INTTYPES_H 48 #include <inttypes.h> 49 #endif 50 51 /* Some platforms apparently define the udata field of struct kevent as 52 * ntptr_t, whereas others define it as void*. There doesn't seem to be an 53 * easy way to tell them apart via autoconf, so we need to use OS macros. */ 54 #if defined(HAVE_INTTYPES_H) && !defined(__OpenBSD__) && !defined(__FreeBSD__) && !defined(__darwin__) && !defined(__APPLE__) 55 #define PTR_TO_UDATA(x) ((intptr_t)(x)) 56 #else 57 #define PTR_TO_UDATA(x) (x) 58 #endif 59 60 #include "event.h" 61 #include "log.h" 62 63 #define EVLIST_X_KQINKERNEL 0x1000 64 65 #define NEVENT 64 66 67 struct kqop { 68 struct kevent *changes; 69 int nchanges; 70 struct kevent *events; 71 int nevents; 72 int kq; 73 }; 74 75 void *kq_init (struct event_base *); 76 int kq_add (void *, struct event *); 77 int kq_del (void *, struct event *); 78 int kq_recalc (struct event_base *, void *, int); 79 int kq_dispatch (struct event_base *, void *, struct timeval *); 80 int kq_insert (struct kqop *, struct kevent *); 81 void kq_dealloc (struct event_base *, void *); 82 83 const struct eventop kqops = { 84 "kqueue", 85 kq_init, 86 kq_add, 87 kq_del, 88 kq_recalc, 89 kq_dispatch, 90 kq_dealloc 91 }; 92 93 void * 94 kq_init(struct event_base *base) 95 { 96 int kq; 97 struct kqop *kqueueop; 98 99 /* Disable kqueue when this environment variable is set */ 100 if (!issetugid() && getenv("EVENT_NOKQUEUE")) 101 return (NULL); 102 103 if (!(kqueueop = calloc(1, sizeof(struct kqop)))) 104 return (NULL); 105 106 /* Initalize the kernel queue */ 107 108 if ((kq = kqueue()) == -1) { 109 event_warn("kqueue"); 110 free (kqueueop); 111 return (NULL); 112 } 113 114 kqueueop->kq = kq; 115 116 /* Initalize fields */ 117 kqueueop->changes = calloc(NEVENT, sizeof(struct kevent)); 118 if (kqueueop->changes == NULL) { 119 free (kqueueop); 120 return (NULL); 121 } 122 kqueueop->events = calloc(NEVENT, sizeof(struct kevent)); 123 if (kqueueop->events == NULL) { 124 free (kqueueop->changes); 125 free (kqueueop); 126 return (NULL); 127 } 128 kqueueop->nevents = NEVENT; 129 130 /* Check for Mac OS X kqueue bug. */ 131 kqueueop->changes[0].ident = -1; 132 kqueueop->changes[0].filter = EVFILT_READ; 133 kqueueop->changes[0].flags = EV_ADD; 134 /* 135 * If kqueue works, then kevent will succeed, and it will 136 * stick an error in events[0]. If kqueue is broken, then 137 * kevent will fail. 138 */ 139 if (kevent(kq, 140 kqueueop->changes, 1, kqueueop->events, NEVENT, NULL) != 1 || 141 kqueueop->events[0].ident != -1 || 142 kqueueop->events[0].flags != EV_ERROR) { 143 event_warn("%s: detected broken kqueue; not using.", __func__); 144 free(kqueueop->changes); 145 free(kqueueop->events); 146 free(kqueueop); 147 close(kq); 148 return (NULL); 149 } 150 151 return (kqueueop); 152 } 153 154 int 155 kq_recalc(struct event_base *base, void *arg, int max) 156 { 157 return (0); 158 } 159 160 int 161 kq_insert(struct kqop *kqop, struct kevent *kev) 162 { 163 int nevents = kqop->nevents; 164 165 if (kqop->nchanges == nevents) { 166 struct kevent *newchange; 167 struct kevent *newresult; 168 169 nevents *= 2; 170 171 newchange = realloc(kqop->changes, 172 nevents * sizeof(struct kevent)); 173 if (newchange == NULL) { 174 event_warn("%s: malloc", __func__); 175 return (-1); 176 } 177 kqop->changes = newchange; 178 179 newresult = realloc(kqop->events, 180 nevents * sizeof(struct kevent)); 181 182 /* 183 * If we fail, we don't have to worry about freeing, 184 * the next realloc will pick it up. 185 */ 186 if (newresult == NULL) { 187 event_warn("%s: malloc", __func__); 188 return (-1); 189 } 190 kqop->events = newresult; 191 192 kqop->nevents = nevents; 193 } 194 195 memcpy(&kqop->changes[kqop->nchanges++], kev, sizeof(struct kevent)); 196 197 event_debug(("%s: fd %d %s%s", 198 __func__, kev->ident, 199 kev->filter == EVFILT_READ ? "EVFILT_READ" : "EVFILT_WRITE", 200 kev->flags == EV_DELETE ? " (del)" : "")); 201 202 return (0); 203 } 204 205 static void 206 kq_sighandler(int sig) 207 { 208 /* Do nothing here */ 209 } 210 211 int 212 kq_dispatch(struct event_base *base, void *arg, struct timeval *tv) 213 { 214 struct kqop *kqop = arg; 215 struct kevent *changes = kqop->changes; 216 struct kevent *events = kqop->events; 217 struct event *ev; 218 struct timespec ts, *ts_p = NULL; 219 int i, res; 220 221 if (tv != NULL) { 222 TIMEVAL_TO_TIMESPEC(tv, &ts); 223 ts_p = &ts; 224 } 225 226 res = kevent(kqop->kq, changes, kqop->nchanges, 227 events, kqop->nevents, ts_p); 228 kqop->nchanges = 0; 229 if (res == -1) { 230 if (errno != EINTR) { 231 event_warn("kevent"); 232 return (-1); 233 } 234 235 return (0); 236 } 237 238 event_debug(("%s: kevent reports %d", __func__, res)); 239 240 for (i = 0; i < res; i++) { 241 int which = 0; 242 243 if (events[i].flags & EV_ERROR) { 244 /* 245 * Error messages that can happen, when a delete fails. 246 * EBADF happens when the file discriptor has been 247 * closed, 248 * ENOENT when the file discriptor was closed and 249 * then reopened. 250 * EINVAL for some reasons not understood; EINVAL 251 * should not be returned ever; but FreeBSD does :-\ 252 * An error is also indicated when a callback deletes 253 * an event we are still processing. In that case 254 * the data field is set to ENOENT. 255 */ 256 if (events[i].data == EBADF || 257 events[i].data == EINVAL || 258 events[i].data == ENOENT) 259 continue; 260 errno = events[i].data; 261 return (-1); 262 } 263 264 ev = (struct event *)events[i].udata; 265 266 if (events[i].filter == EVFILT_READ) { 267 which |= EV_READ; 268 } else if (events[i].filter == EVFILT_WRITE) { 269 which |= EV_WRITE; 270 } else if (events[i].filter == EVFILT_SIGNAL) { 271 which |= EV_SIGNAL; 272 } 273 274 if (!which) 275 continue; 276 277 if (!(ev->ev_events & EV_PERSIST)) 278 event_del(ev); 279 280 event_active(ev, which, 281 ev->ev_events & EV_SIGNAL ? events[i].data : 1); 282 } 283 284 return (0); 285 } 286 287 288 int 289 kq_add(void *arg, struct event *ev) 290 { 291 struct kqop *kqop = arg; 292 struct kevent kev; 293 294 if (ev->ev_events & EV_SIGNAL) { 295 int nsignal = EVENT_SIGNAL(ev); 296 297 memset(&kev, 0, sizeof(kev)); 298 kev.ident = nsignal; 299 kev.filter = EVFILT_SIGNAL; 300 kev.flags = EV_ADD; 301 if (!(ev->ev_events & EV_PERSIST)) 302 kev.flags |= EV_ONESHOT; 303 kev.udata = PTR_TO_UDATA(ev); 304 305 if (kq_insert(kqop, &kev) == -1) 306 return (-1); 307 308 if (signal(nsignal, kq_sighandler) == SIG_ERR) 309 return (-1); 310 311 ev->ev_flags |= EVLIST_X_KQINKERNEL; 312 return (0); 313 } 314 315 if (ev->ev_events & EV_READ) { 316 memset(&kev, 0, sizeof(kev)); 317 kev.ident = ev->ev_fd; 318 kev.filter = EVFILT_READ; 319 #ifdef NOTE_EOF 320 /* Make it behave like select() and poll() */ 321 kev.fflags = NOTE_EOF; 322 #endif 323 kev.flags = EV_ADD; 324 if (!(ev->ev_events & EV_PERSIST)) 325 kev.flags |= EV_ONESHOT; 326 kev.udata = PTR_TO_UDATA(ev); 327 328 if (kq_insert(kqop, &kev) == -1) 329 return (-1); 330 331 ev->ev_flags |= EVLIST_X_KQINKERNEL; 332 } 333 334 if (ev->ev_events & EV_WRITE) { 335 memset(&kev, 0, sizeof(kev)); 336 kev.ident = ev->ev_fd; 337 kev.filter = EVFILT_WRITE; 338 kev.flags = EV_ADD; 339 if (!(ev->ev_events & EV_PERSIST)) 340 kev.flags |= EV_ONESHOT; 341 kev.udata = PTR_TO_UDATA(ev); 342 343 if (kq_insert(kqop, &kev) == -1) 344 return (-1); 345 346 ev->ev_flags |= EVLIST_X_KQINKERNEL; 347 } 348 349 return (0); 350 } 351 352 int 353 kq_del(void *arg, struct event *ev) 354 { 355 struct kqop *kqop = arg; 356 struct kevent kev; 357 358 if (!(ev->ev_flags & EVLIST_X_KQINKERNEL)) 359 return (0); 360 361 if (ev->ev_events & EV_SIGNAL) { 362 int nsignal = EVENT_SIGNAL(ev); 363 364 memset(&kev, 0, sizeof(kev)); 365 kev.ident = nsignal; 366 kev.filter = EVFILT_SIGNAL; 367 kev.flags = EV_DELETE; 368 369 if (kq_insert(kqop, &kev) == -1) 370 return (-1); 371 372 if (signal(nsignal, SIG_DFL) == SIG_ERR) 373 return (-1); 374 375 ev->ev_flags &= ~EVLIST_X_KQINKERNEL; 376 return (0); 377 } 378 379 if (ev->ev_events & EV_READ) { 380 memset(&kev, 0, sizeof(kev)); 381 kev.ident = ev->ev_fd; 382 kev.filter = EVFILT_READ; 383 kev.flags = EV_DELETE; 384 385 if (kq_insert(kqop, &kev) == -1) 386 return (-1); 387 388 ev->ev_flags &= ~EVLIST_X_KQINKERNEL; 389 } 390 391 if (ev->ev_events & EV_WRITE) { 392 memset(&kev, 0, sizeof(kev)); 393 kev.ident = ev->ev_fd; 394 kev.filter = EVFILT_WRITE; 395 kev.flags = EV_DELETE; 396 397 if (kq_insert(kqop, &kev) == -1) 398 return (-1); 399 400 ev->ev_flags &= ~EVLIST_X_KQINKERNEL; 401 } 402 403 return (0); 404 } 405 406 void 407 kq_dealloc(struct event_base *base, void *arg) 408 { 409 struct kqop *kqop = arg; 410 411 if (kqop->changes) 412 free(kqop->changes); 413 if (kqop->events) 414 free(kqop->events); 415 if (kqop->kq) 416 close(kqop->kq); 417 memset(kqop, 0, sizeof(struct kqop)); 418 free(kqop); 419 } 420