1 /* $OpenBSD: kqueue.c,v 1.39 2016/09/03 11:31:17 nayden Exp $ */ 2 3 /* 4 * Copyright 2000-2002 Niels Provos <provos@citi.umich.edu> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. The name of the author may not be used to endorse or promote products 16 * derived from this software without specific prior written permission. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 */ 29 30 #include <sys/types.h> 31 #include <sys/time.h> 32 #include <sys/queue.h> 33 #include <sys/event.h> 34 35 #include <signal.h> 36 #include <stdio.h> 37 #include <stdlib.h> 38 #include <string.h> 39 #include <unistd.h> 40 #include <errno.h> 41 #include <assert.h> 42 #include <inttypes.h> 43 44 #include "event.h" 45 #include "event-internal.h" 46 #include "log.h" 47 #include "evsignal.h" 48 49 #define EVLIST_X_KQINKERNEL 0x1000 50 51 #define NEVENT 64 52 53 struct kqop { 54 struct kevent *changes; 55 int nchanges; 56 struct kevent *events; 57 struct event_list evsigevents[NSIG]; 58 int nevents; 59 int kq; 60 pid_t pid; 61 }; 62 63 static void *kq_init (struct event_base *); 64 static int kq_add (void *, struct event *); 65 static int kq_del (void *, struct event *); 66 static int kq_dispatch (struct event_base *, void *, struct timeval *); 67 static int kq_insert (struct kqop *, struct kevent *); 68 static void kq_dealloc (struct event_base *, void *); 69 70 const struct eventop kqops = { 71 "kqueue", 72 kq_init, 73 kq_add, 74 kq_del, 75 kq_dispatch, 76 kq_dealloc, 77 1 /* need reinit */ 78 }; 79 80 static void * 81 kq_init(struct event_base *base) 82 { 83 int i, kq; 84 struct kqop *kqueueop; 85 86 /* Disable kqueue when this environment variable is set */ 87 if (!issetugid() && getenv("EVENT_NOKQUEUE")) 88 return (NULL); 89 90 if (!(kqueueop = calloc(1, sizeof(struct kqop)))) 91 return (NULL); 92 93 /* Initalize the kernel queue */ 94 95 if ((kq = kqueue()) == -1) { 96 event_warn("kqueue"); 97 free (kqueueop); 98 return (NULL); 99 } 100 101 kqueueop->kq = kq; 102 103 kqueueop->pid = getpid(); 104 105 /* Initalize fields */ 106 kqueueop->changes = calloc(NEVENT, sizeof(struct kevent)); 107 if (kqueueop->changes == NULL) { 108 free (kqueueop); 109 return (NULL); 110 } 111 kqueueop->events = calloc(NEVENT, sizeof(struct kevent)); 112 if (kqueueop->events == NULL) { 113 free (kqueueop->changes); 114 free (kqueueop); 115 return (NULL); 116 } 117 kqueueop->nevents = NEVENT; 118 119 /* we need to keep track of multiple events per signal */ 120 for (i = 0; i < NSIG; ++i) { 121 TAILQ_INIT(&kqueueop->evsigevents[i]); 122 } 123 124 return (kqueueop); 125 } 126 127 static int 128 kq_insert(struct kqop *kqop, struct kevent *kev) 129 { 130 int nevents = kqop->nevents; 131 132 if (kqop->nchanges == nevents) { 133 struct kevent *newchange; 134 struct kevent *newresult; 135 136 nevents *= 2; 137 138 newchange = reallocarray(kqop->changes, 139 nevents, sizeof(struct kevent)); 140 if (newchange == NULL) { 141 event_warn("%s: malloc", __func__); 142 return (-1); 143 } 144 kqop->changes = newchange; 145 146 newresult = reallocarray(kqop->events, 147 nevents, sizeof(struct kevent)); 148 149 /* 150 * If we fail, we don't have to worry about freeing, 151 * the next realloc will pick it up. 152 */ 153 if (newresult == NULL) { 154 event_warn("%s: malloc", __func__); 155 return (-1); 156 } 157 kqop->events = newresult; 158 159 kqop->nevents = nevents; 160 } 161 162 memcpy(&kqop->changes[kqop->nchanges++], kev, sizeof(struct kevent)); 163 164 event_debug(("%s: fd %d %s%s", 165 __func__, (int)kev->ident, 166 kev->filter == EVFILT_READ ? "EVFILT_READ" : "EVFILT_WRITE", 167 kev->flags == EV_DELETE ? " (del)" : "")); 168 169 return (0); 170 } 171 172 static void 173 kq_sighandler(int sig) 174 { 175 /* Do nothing here */ 176 } 177 178 static int 179 kq_dispatch(struct event_base *base, void *arg, struct timeval *tv) 180 { 181 struct kqop *kqop = arg; 182 struct kevent *changes = kqop->changes; 183 struct kevent *events = kqop->events; 184 struct event *ev; 185 struct timespec ts, *ts_p = NULL; 186 int i, res; 187 188 if (tv != NULL) { 189 TIMEVAL_TO_TIMESPEC(tv, &ts); 190 ts_p = &ts; 191 } 192 193 res = kevent(kqop->kq, changes, kqop->nchanges, 194 events, kqop->nevents, ts_p); 195 kqop->nchanges = 0; 196 if (res == -1) { 197 if (errno != EINTR) { 198 event_warn("kevent"); 199 return (-1); 200 } 201 202 return (0); 203 } 204 205 event_debug(("%s: kevent reports %d", __func__, res)); 206 207 for (i = 0; i < res; i++) { 208 int which = 0; 209 210 if (events[i].flags & EV_ERROR) { 211 switch (events[i].data) { 212 213 /* Can occur on delete if we are not currently 214 * watching any events on this fd. That can 215 * happen when the fd was closed and another 216 * file was opened with that fd. */ 217 case ENOENT: 218 /* Can occur for reasons not fully understood 219 * on FreeBSD. */ 220 case EINVAL: 221 continue; 222 /* Can occur on a delete if the fd is closed. Can 223 * occur on an add if the fd was one side of a pipe, 224 * and the other side was closed. */ 225 case EBADF: 226 continue; 227 /* These two can occur on an add if the fd was one side 228 * of a pipe, and the other side was closed. */ 229 case EPERM: 230 case EPIPE: 231 /* Report read events, if we're listening for 232 * them, so that the user can learn about any 233 * add errors. (If the operation was a 234 * delete, then udata should be cleared.) */ 235 if (events[i].udata) { 236 /* The operation was an add: 237 * report the error as a read. */ 238 which |= EV_READ; 239 break; 240 } else { 241 /* The operation was a del: 242 * report nothing. */ 243 continue; 244 } 245 246 /* Other errors shouldn't occur. */ 247 default: 248 errno = events[i].data; 249 return (-1); 250 } 251 } else if (events[i].filter == EVFILT_READ) { 252 which |= EV_READ; 253 } else if (events[i].filter == EVFILT_WRITE) { 254 which |= EV_WRITE; 255 } else if (events[i].filter == EVFILT_SIGNAL) { 256 which |= EV_SIGNAL; 257 } 258 259 if (!which) 260 continue; 261 262 if (events[i].filter == EVFILT_SIGNAL) { 263 struct event_list *head = 264 (struct event_list *)events[i].udata; 265 TAILQ_FOREACH(ev, head, ev_signal_next) { 266 event_active(ev, which, events[i].data); 267 } 268 } else { 269 ev = (struct event *)events[i].udata; 270 271 if (!(ev->ev_events & EV_PERSIST)) 272 ev->ev_flags &= ~EVLIST_X_KQINKERNEL; 273 274 event_active(ev, which, 1); 275 } 276 } 277 278 return (0); 279 } 280 281 282 static int 283 kq_add(void *arg, struct event *ev) 284 { 285 struct kqop *kqop = arg; 286 struct kevent kev; 287 288 if (ev->ev_events & EV_SIGNAL) { 289 int nsignal = EVENT_SIGNAL(ev); 290 291 assert(nsignal >= 0 && nsignal < NSIG); 292 if (TAILQ_EMPTY(&kqop->evsigevents[nsignal])) { 293 struct timespec timeout = { 0, 0 }; 294 295 memset(&kev, 0, sizeof(kev)); 296 kev.ident = nsignal; 297 kev.filter = EVFILT_SIGNAL; 298 kev.flags = EV_ADD; 299 kev.udata = &kqop->evsigevents[nsignal]; 300 301 /* Be ready for the signal if it is sent any 302 * time between now and the next call to 303 * kq_dispatch. */ 304 if (kevent(kqop->kq, &kev, 1, NULL, 0, &timeout) == -1) 305 return (-1); 306 307 if (_evsignal_set_handler(ev->ev_base, nsignal, 308 kq_sighandler) == -1) 309 return (-1); 310 } 311 312 TAILQ_INSERT_TAIL(&kqop->evsigevents[nsignal], ev, 313 ev_signal_next); 314 ev->ev_flags |= EVLIST_X_KQINKERNEL; 315 return (0); 316 } 317 318 if (ev->ev_events & EV_READ) { 319 memset(&kev, 0, sizeof(kev)); 320 kev.ident = ev->ev_fd; 321 kev.filter = EVFILT_READ; 322 /* Make it behave like select() and poll() */ 323 kev.fflags = NOTE_EOF; 324 kev.flags = EV_ADD; 325 if (!(ev->ev_events & EV_PERSIST)) 326 kev.flags |= EV_ONESHOT; 327 kev.udata = ev; 328 329 if (kq_insert(kqop, &kev) == -1) 330 return (-1); 331 332 ev->ev_flags |= EVLIST_X_KQINKERNEL; 333 } 334 335 if (ev->ev_events & EV_WRITE) { 336 memset(&kev, 0, sizeof(kev)); 337 kev.ident = ev->ev_fd; 338 kev.filter = EVFILT_WRITE; 339 kev.flags = EV_ADD; 340 if (!(ev->ev_events & EV_PERSIST)) 341 kev.flags |= EV_ONESHOT; 342 kev.udata = ev; 343 344 if (kq_insert(kqop, &kev) == -1) 345 return (-1); 346 347 ev->ev_flags |= EVLIST_X_KQINKERNEL; 348 } 349 350 return (0); 351 } 352 353 static int 354 kq_del(void *arg, struct event *ev) 355 { 356 struct kqop *kqop = arg; 357 struct kevent kev; 358 359 if (!(ev->ev_flags & EVLIST_X_KQINKERNEL)) 360 return (0); 361 362 if (ev->ev_events & EV_SIGNAL) { 363 int nsignal = EVENT_SIGNAL(ev); 364 struct timespec timeout = { 0, 0 }; 365 366 assert(nsignal >= 0 && nsignal < NSIG); 367 TAILQ_REMOVE(&kqop->evsigevents[nsignal], ev, ev_signal_next); 368 if (TAILQ_EMPTY(&kqop->evsigevents[nsignal])) { 369 memset(&kev, 0, sizeof(kev)); 370 kev.ident = nsignal; 371 kev.filter = EVFILT_SIGNAL; 372 kev.flags = EV_DELETE; 373 374 /* Because we insert signal events 375 * immediately, we need to delete them 376 * immediately, too */ 377 if (kevent(kqop->kq, &kev, 1, NULL, 0, &timeout) == -1) 378 return (-1); 379 380 if (_evsignal_restore_handler(ev->ev_base, 381 nsignal) == -1) 382 return (-1); 383 } 384 385 ev->ev_flags &= ~EVLIST_X_KQINKERNEL; 386 return (0); 387 } 388 389 if (ev->ev_events & EV_READ) { 390 memset(&kev, 0, sizeof(kev)); 391 kev.ident = ev->ev_fd; 392 kev.filter = EVFILT_READ; 393 kev.flags = EV_DELETE; 394 395 if (kq_insert(kqop, &kev) == -1) 396 return (-1); 397 398 ev->ev_flags &= ~EVLIST_X_KQINKERNEL; 399 } 400 401 if (ev->ev_events & EV_WRITE) { 402 memset(&kev, 0, sizeof(kev)); 403 kev.ident = ev->ev_fd; 404 kev.filter = EVFILT_WRITE; 405 kev.flags = EV_DELETE; 406 407 if (kq_insert(kqop, &kev) == -1) 408 return (-1); 409 410 ev->ev_flags &= ~EVLIST_X_KQINKERNEL; 411 } 412 413 return (0); 414 } 415 416 static void 417 kq_dealloc(struct event_base *base, void *arg) 418 { 419 struct kqop *kqop = arg; 420 421 evsignal_dealloc(base); 422 423 free(kqop->changes); 424 free(kqop->events); 425 if (kqop->kq >= 0 && kqop->pid == getpid()) 426 close(kqop->kq); 427 428 memset(kqop, 0, sizeof(struct kqop)); 429 free(kqop); 430 } 431