1 /* $OpenBSD: kqueue.c,v 1.41 2019/05/08 17:33:22 tobias Exp $ */ 2 3 /* 4 * Copyright 2000-2002 Niels Provos <provos@citi.umich.edu> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. The name of the author may not be used to endorse or promote products 16 * derived from this software without specific prior written permission. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 */ 29 30 #include <sys/types.h> 31 #include <sys/time.h> 32 #include <sys/queue.h> 33 #include <sys/event.h> 34 35 #include <assert.h> 36 #include <errno.h> 37 #include <inttypes.h> 38 #include <limits.h> 39 #include <signal.h> 40 #include <stdio.h> 41 #include <stdlib.h> 42 #include <string.h> 43 #include <unistd.h> 44 45 #include "event.h" 46 #include "event-internal.h" 47 #include "log.h" 48 #include "evsignal.h" 49 50 #define EVLIST_X_KQINKERNEL 0x1000 51 52 #define NEVENT 64 53 54 struct kqop { 55 struct kevent *changes; 56 int nchanges; 57 struct kevent *events; 58 struct event_list evsigevents[NSIG]; 59 int nevents; 60 int kq; 61 pid_t pid; 62 }; 63 64 static void *kq_init (struct event_base *); 65 static int kq_add (void *, struct event *); 66 static int kq_del (void *, struct event *); 67 static int kq_dispatch (struct event_base *, void *, struct timeval *); 68 static int kq_insert (struct kqop *, struct kevent *); 69 static void kq_dealloc (struct event_base *, void *); 70 71 const struct eventop kqops = { 72 "kqueue", 73 kq_init, 74 kq_add, 75 kq_del, 76 kq_dispatch, 77 kq_dealloc, 78 1 /* need reinit */ 79 }; 80 81 static void * 82 kq_init(struct event_base *base) 83 { 84 int i, kq; 85 struct kqop *kqueueop; 86 87 /* Disable kqueue when this environment variable is set */ 88 if (!issetugid() && getenv("EVENT_NOKQUEUE")) 89 return (NULL); 90 91 if (!(kqueueop = calloc(1, sizeof(struct kqop)))) 92 return (NULL); 93 94 /* Initalize the kernel queue */ 95 96 if ((kq = kqueue()) == -1) { 97 event_warn("kqueue"); 98 free (kqueueop); 99 return (NULL); 100 } 101 102 kqueueop->kq = kq; 103 104 kqueueop->pid = getpid(); 105 106 /* Initalize fields */ 107 kqueueop->changes = calloc(NEVENT, sizeof(struct kevent)); 108 if (kqueueop->changes == NULL) { 109 free (kqueueop); 110 return (NULL); 111 } 112 kqueueop->events = calloc(NEVENT, sizeof(struct kevent)); 113 if (kqueueop->events == NULL) { 114 free (kqueueop->changes); 115 free (kqueueop); 116 return (NULL); 117 } 118 kqueueop->nevents = NEVENT; 119 120 /* we need to keep track of multiple events per signal */ 121 for (i = 0; i < NSIG; ++i) { 122 TAILQ_INIT(&kqueueop->evsigevents[i]); 123 } 124 125 return (kqueueop); 126 } 127 128 static int 129 kq_insert(struct kqop *kqop, struct kevent *kev) 130 { 131 int nevents = kqop->nevents; 132 133 if (kqop->nchanges == nevents) { 134 struct kevent *newchange; 135 struct kevent *newresult; 136 137 if (nevents > INT_MAX / 2) { 138 event_warnx("%s: integer overflow", __func__); 139 return (-1); 140 } 141 nevents *= 2; 142 143 newchange = recallocarray(kqop->changes, 144 kqop->nevents, nevents, sizeof(struct kevent)); 145 if (newchange == NULL) { 146 event_warn("%s: recallocarray", __func__); 147 return (-1); 148 } 149 kqop->changes = newchange; 150 151 newresult = recallocarray(kqop->events, 152 kqop->nevents, nevents, sizeof(struct kevent)); 153 154 /* 155 * If we fail, we don't have to worry about freeing, 156 * the next realloc will pick it up. 157 */ 158 if (newresult == NULL) { 159 event_warn("%s: recallocarray", __func__); 160 return (-1); 161 } 162 kqop->events = newresult; 163 164 kqop->nevents = nevents; 165 } 166 167 memcpy(&kqop->changes[kqop->nchanges++], kev, sizeof(struct kevent)); 168 169 event_debug(("%s: fd %d %s%s", 170 __func__, (int)kev->ident, 171 kev->filter == EVFILT_READ ? "EVFILT_READ" : "EVFILT_WRITE", 172 kev->flags == EV_DELETE ? " (del)" : "")); 173 174 return (0); 175 } 176 177 static void 178 kq_sighandler(int sig) 179 { 180 /* Do nothing here */ 181 } 182 183 static int 184 kq_dispatch(struct event_base *base, void *arg, struct timeval *tv) 185 { 186 struct kqop *kqop = arg; 187 struct kevent *changes = kqop->changes; 188 struct kevent *events = kqop->events; 189 struct event *ev; 190 struct timespec ts, *ts_p = NULL; 191 int i, res; 192 193 if (tv != NULL) { 194 TIMEVAL_TO_TIMESPEC(tv, &ts); 195 ts_p = &ts; 196 } 197 198 res = kevent(kqop->kq, kqop->nchanges ? changes : NULL, kqop->nchanges, 199 events, kqop->nevents, ts_p); 200 kqop->nchanges = 0; 201 if (res == -1) { 202 if (errno != EINTR) { 203 event_warn("kevent"); 204 return (-1); 205 } 206 207 return (0); 208 } 209 210 event_debug(("%s: kevent reports %d", __func__, res)); 211 212 for (i = 0; i < res; i++) { 213 int which = 0; 214 215 if (events[i].flags & EV_ERROR) { 216 switch (events[i].data) { 217 218 /* Can occur on delete if we are not currently 219 * watching any events on this fd. That can 220 * happen when the fd was closed and another 221 * file was opened with that fd. */ 222 case ENOENT: 223 /* Can occur for reasons not fully understood 224 * on FreeBSD. */ 225 case EINVAL: 226 continue; 227 /* Can occur on a delete if the fd is closed. Can 228 * occur on an add if the fd was one side of a pipe, 229 * and the other side was closed. */ 230 case EBADF: 231 continue; 232 /* These two can occur on an add if the fd was one side 233 * of a pipe, and the other side was closed. */ 234 case EPERM: 235 case EPIPE: 236 /* Report read events, if we're listening for 237 * them, so that the user can learn about any 238 * add errors. (If the operation was a 239 * delete, then udata should be cleared.) */ 240 if (events[i].udata) { 241 /* The operation was an add: 242 * report the error as a read. */ 243 which |= EV_READ; 244 break; 245 } else { 246 /* The operation was a del: 247 * report nothing. */ 248 continue; 249 } 250 251 /* Other errors shouldn't occur. */ 252 default: 253 errno = events[i].data; 254 return (-1); 255 } 256 } else if (events[i].filter == EVFILT_READ) { 257 which |= EV_READ; 258 } else if (events[i].filter == EVFILT_WRITE) { 259 which |= EV_WRITE; 260 } else if (events[i].filter == EVFILT_SIGNAL) { 261 which |= EV_SIGNAL; 262 } 263 264 if (!which) 265 continue; 266 267 if (events[i].filter == EVFILT_SIGNAL) { 268 struct event_list *head = 269 (struct event_list *)events[i].udata; 270 TAILQ_FOREACH(ev, head, ev_signal_next) { 271 event_active(ev, which, events[i].data); 272 } 273 } else { 274 ev = (struct event *)events[i].udata; 275 276 if (!(ev->ev_events & EV_PERSIST)) 277 ev->ev_flags &= ~EVLIST_X_KQINKERNEL; 278 279 event_active(ev, which, 1); 280 } 281 } 282 283 return (0); 284 } 285 286 287 static int 288 kq_add(void *arg, struct event *ev) 289 { 290 struct kqop *kqop = arg; 291 struct kevent kev; 292 293 if (ev->ev_events & EV_SIGNAL) { 294 int nsignal = EVENT_SIGNAL(ev); 295 296 assert(nsignal >= 0 && nsignal < NSIG); 297 if (TAILQ_EMPTY(&kqop->evsigevents[nsignal])) { 298 struct timespec timeout = { 0, 0 }; 299 300 memset(&kev, 0, sizeof(kev)); 301 kev.ident = nsignal; 302 kev.filter = EVFILT_SIGNAL; 303 kev.flags = EV_ADD; 304 kev.udata = &kqop->evsigevents[nsignal]; 305 306 /* Be ready for the signal if it is sent any 307 * time between now and the next call to 308 * kq_dispatch. */ 309 if (kevent(kqop->kq, &kev, 1, NULL, 0, &timeout) == -1) 310 return (-1); 311 312 if (_evsignal_set_handler(ev->ev_base, nsignal, 313 kq_sighandler) == -1) 314 return (-1); 315 } 316 317 TAILQ_INSERT_TAIL(&kqop->evsigevents[nsignal], ev, 318 ev_signal_next); 319 ev->ev_flags |= EVLIST_X_KQINKERNEL; 320 return (0); 321 } 322 323 if (ev->ev_events & EV_READ) { 324 memset(&kev, 0, sizeof(kev)); 325 kev.ident = ev->ev_fd; 326 kev.filter = EVFILT_READ; 327 /* Make it behave like select() and poll() */ 328 kev.fflags = NOTE_EOF; 329 kev.flags = EV_ADD; 330 if (!(ev->ev_events & EV_PERSIST)) 331 kev.flags |= EV_ONESHOT; 332 kev.udata = ev; 333 334 if (kq_insert(kqop, &kev) == -1) 335 return (-1); 336 337 ev->ev_flags |= EVLIST_X_KQINKERNEL; 338 } 339 340 if (ev->ev_events & EV_WRITE) { 341 memset(&kev, 0, sizeof(kev)); 342 kev.ident = ev->ev_fd; 343 kev.filter = EVFILT_WRITE; 344 kev.flags = EV_ADD; 345 if (!(ev->ev_events & EV_PERSIST)) 346 kev.flags |= EV_ONESHOT; 347 kev.udata = ev; 348 349 if (kq_insert(kqop, &kev) == -1) 350 return (-1); 351 352 ev->ev_flags |= EVLIST_X_KQINKERNEL; 353 } 354 355 return (0); 356 } 357 358 static int 359 kq_del(void *arg, struct event *ev) 360 { 361 struct kqop *kqop = arg; 362 struct kevent kev; 363 364 if (!(ev->ev_flags & EVLIST_X_KQINKERNEL)) 365 return (0); 366 367 if (ev->ev_events & EV_SIGNAL) { 368 int nsignal = EVENT_SIGNAL(ev); 369 struct timespec timeout = { 0, 0 }; 370 371 assert(nsignal >= 0 && nsignal < NSIG); 372 TAILQ_REMOVE(&kqop->evsigevents[nsignal], ev, ev_signal_next); 373 if (TAILQ_EMPTY(&kqop->evsigevents[nsignal])) { 374 memset(&kev, 0, sizeof(kev)); 375 kev.ident = nsignal; 376 kev.filter = EVFILT_SIGNAL; 377 kev.flags = EV_DELETE; 378 379 /* Because we insert signal events 380 * immediately, we need to delete them 381 * immediately, too */ 382 if (kevent(kqop->kq, &kev, 1, NULL, 0, &timeout) == -1) 383 return (-1); 384 385 if (_evsignal_restore_handler(ev->ev_base, 386 nsignal) == -1) 387 return (-1); 388 } 389 390 ev->ev_flags &= ~EVLIST_X_KQINKERNEL; 391 return (0); 392 } 393 394 if (ev->ev_events & EV_READ) { 395 memset(&kev, 0, sizeof(kev)); 396 kev.ident = ev->ev_fd; 397 kev.filter = EVFILT_READ; 398 kev.flags = EV_DELETE; 399 400 if (kq_insert(kqop, &kev) == -1) 401 return (-1); 402 403 ev->ev_flags &= ~EVLIST_X_KQINKERNEL; 404 } 405 406 if (ev->ev_events & EV_WRITE) { 407 memset(&kev, 0, sizeof(kev)); 408 kev.ident = ev->ev_fd; 409 kev.filter = EVFILT_WRITE; 410 kev.flags = EV_DELETE; 411 412 if (kq_insert(kqop, &kev) == -1) 413 return (-1); 414 415 ev->ev_flags &= ~EVLIST_X_KQINKERNEL; 416 } 417 418 return (0); 419 } 420 421 static void 422 kq_dealloc(struct event_base *base, void *arg) 423 { 424 struct kqop *kqop = arg; 425 426 evsignal_dealloc(base); 427 428 free(kqop->changes); 429 free(kqop->events); 430 if (kqop->kq >= 0 && kqop->pid == getpid()) 431 close(kqop->kq); 432 433 memset(kqop, 0, sizeof(struct kqop)); 434 free(kqop); 435 } 436