1 /*
2  * IRC - Internet Relay Chat, ircd/engine_kqueue.c
3  * Copyright (C) 2001 Kevin L. Mitchell <klmitch@mit.edu>
4  *
5  * This program is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation; either version 1, or (at your option)
8  * any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program; if not, write to the Free Software
17  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18  */
19 /** @file
20  * @brief FreeBSD kqueue()/kevent() event engine.
21  * @version $Id$
22  */
23 #include "config.h"
24 
25 #include "ircd_events.h"
26 
27 #include "ircd.h"
28 #include "ircd_alloc.h"
29 #include "ircd_features.h"
30 #include "ircd_log.h"
31 #include "s_debug.h"
32 
33 /* #include <assert.h> -- Now using assert in ircd_log.h */
34 #include <errno.h>
35 #include <signal.h>
36 #include <sys/types.h>
37 #include <sys/event.h>
38 #include <sys/socket.h>
39 #include <sys/time.h>
40 #include <time.h>
41 #include <unistd.h>
42 
43 #define KQUEUE_ERROR_THRESHOLD	20	/**< after 20 kqueue errors, restart */
44 #define ERROR_EXPIRE_TIME	3600	/**< expire errors after an hour */
45 
46 /** Array of active Socket structures, indexed by file descriptor. */
47 static struct Socket** sockList;
48 /** Maximum file descriptor supported, plus one. */
49 static int kqueue_max;
50 /** File descriptor for kqueue pseudo-file. */
51 static int kqueue_id;
52 /** Current array of event descriptors. */
53 static struct kevent *events;
54 /** Number of ::events elements that have been populated. */
55 static int events_used;
56 
57 /** Number of recent errors from kqueue. */
58 static int errors = 0;
59 /** Periodic timer to forget errors. */
60 static struct Timer clear_error;
61 
62 /** Decrement the error count (once per hour).
63  * @param[in] ev Expired timer event (ignored).
64  */
65 static void
error_clear(struct Event * ev)66 error_clear(struct Event* ev)
67 {
68   if (!--errors) /* remove timer when error count reaches 0 */
69     timer_del(ev_timer(ev));
70 }
71 
72 /** Initialize the kqueue engine.
73  * @param[in] max_sockets Maximum number of file descriptors to support.
74  * @return Non-zero on success, or zero on failure.
75  */
76 static int
engine_init(int max_sockets)77 engine_init(int max_sockets)
78 {
79   int i;
80 
81   if ((kqueue_id = kqueue()) < 0) { /* initialize... */
82     log_write(LS_SYSTEM, L_WARNING, 0,
83 	      "kqueue() engine cannot initialize: %m");
84     return 0;
85   }
86 
87   /* allocate necessary memory */
88   sockList = (struct Socket**) MyMalloc(sizeof(struct Socket*) * max_sockets);
89 
90   /* initialize the data */
91   for (i = 0; i < max_sockets; i++)
92     sockList[i] = 0;
93 
94   kqueue_max = max_sockets; /* number of sockets allocated */
95 
96   return 1; /* success! */
97 }
98 
99 /** Add a signal to the event engine.
100  * @param[in] sig Signal to add to engine.
101  */
102 static void
engine_signal(struct Signal * sig)103 engine_signal(struct Signal* sig)
104 {
105   struct kevent sigevent;
106   struct sigaction act;
107 
108   assert(0 != sig);
109 
110   Debug((DEBUG_ENGINE, "kqueue: Adding filter for signal %d [%p]",
111 	 sig_signal(sig), sig));
112 
113   sigevent.ident = sig_signal(sig); /* set up the kqueue event */
114   sigevent.filter = EVFILT_SIGNAL; /* looking for signals... */
115   sigevent.flags = EV_ADD | EV_ENABLE; /* add and enable it */
116   sigevent.fflags = 0;
117   sigevent.data = 0;
118   sigevent.udata = sig; /* store our user data */
119 
120   if (kevent(kqueue_id, &sigevent, 1, 0, 0, 0) < 0) { /* add event */
121     log_write(LS_SYSTEM, L_WARNING, 0, "Unable to trap signal %d",
122 	      sig_signal(sig));
123     return;
124   }
125 
126   act.sa_handler = SIG_IGN; /* ignore the signal */
127   act.sa_flags = 0;
128   sigemptyset(&act.sa_mask);
129   sigaction(sig_signal(sig), &act, 0);
130 }
131 
132 /** Figure out what events go with a given state.
133  * @param[in] state %Socket state to consider.
134  * @param[in] events User-specified preferred event set.
135  * @return Actual set of preferred events.
136  */
137 static unsigned int
state_to_events(enum SocketState state,unsigned int events)138 state_to_events(enum SocketState state, unsigned int events)
139 {
140   switch (state) {
141   case SS_CONNECTING: /* connecting socket */
142     return SOCK_EVENT_WRITABLE;
143     break;
144 
145   case SS_LISTENING: /* listening socket */
146   case SS_NOTSOCK: /* our signal socket--just in case */
147     return SOCK_EVENT_READABLE;
148     break;
149 
150   case SS_CONNECTED: case SS_DATAGRAM: case SS_CONNECTDG:
151     return events; /* ordinary socket */
152     break;
153   }
154 
155   /*NOTREACHED*/
156   return 0;
157 }
158 
159 /** Activate kqueue filters as appropriate.
160  * @param[in] sock Socket structure to operate on.
161  * @param[in] clear Set of interest events to clear from socket.
162  * @param[in] set Set of interest events to set on socket.
163  */
164 static void
set_or_clear(struct Socket * sock,unsigned int clear,unsigned int set)165 set_or_clear(struct Socket* sock, unsigned int clear, unsigned int set)
166 {
167   int i = 0;
168   struct kevent chglist[2];
169 
170   assert(0 != sock);
171   assert(-1 < s_fd(sock));
172 
173   if ((clear ^ set) & SOCK_EVENT_READABLE) { /* readable has changed */
174     chglist[i].ident = s_fd(sock); /* set up the change list */
175     chglist[i].filter = EVFILT_READ; /* readable filter */
176     chglist[i].flags = EV_ADD; /* adding it */
177     chglist[i].fflags = 0;
178     chglist[i].data = 0;
179     chglist[i].udata = 0; /* I love udata, but it can't really be used here */
180 
181     if (set & SOCK_EVENT_READABLE) /* it's set */
182       chglist[i].flags |= EV_ENABLE;
183     else /* clear it */
184       chglist[i].flags |= EV_DISABLE;
185 
186     i++; /* advance to next element */
187   }
188 
189   if ((clear ^ set) & SOCK_EVENT_WRITABLE) { /* writable has changed */
190     chglist[i].ident = s_fd(sock); /* set up the change list */
191     chglist[i].filter = EVFILT_WRITE; /* writable filter */
192     chglist[i].flags = EV_ADD; /* adding it */
193     chglist[i].fflags = 0;
194     chglist[i].data = 0;
195     chglist[i].udata = 0;
196 
197     if (set & SOCK_EVENT_WRITABLE) /* it's set */
198       chglist[i].flags |= EV_ENABLE;
199     else /* clear it */
200       chglist[i].flags |= EV_DISABLE;
201 
202     i++; /* advance count... */
203   }
204 
205   if (kevent(kqueue_id, chglist, i, 0, 0, 0) < 0 && errno != EBADF)
206     event_generate(ET_ERROR, sock, errno); /* report error */
207 }
208 
209 /** Add a socket to the event engine.
210  * @param[in] sock Socket to add to engine.
211  * @return Non-zero on success, or zero on error.
212  */
213 static int
engine_add(struct Socket * sock)214 engine_add(struct Socket* sock)
215 {
216   assert(0 != sock);
217   assert(0 == sockList[s_fd(sock)]);
218 
219   /* bounds-check... */
220   if (sock->s_fd >= kqueue_max) {
221     log_write(LS_SYSTEM, L_ERROR, 0,
222 	      "Attempt to add socket %d (> %d) to event engine", s_fd(sock),
223 	      kqueue_max);
224     return 0;
225   }
226 
227   sockList[s_fd(sock)] = sock; /* add to list */
228 
229   Debug((DEBUG_ENGINE, "kqueue: Adding socket %d [%p], state %s, to engine",
230 	 s_fd(sock), sock, state_to_name(s_state(sock))));
231 
232   /* Add socket to queue */
233   set_or_clear(sock, 0, state_to_events(s_state(sock), s_events(sock)));
234 
235   return 1; /* success */
236 }
237 
238 /** Handle state transition for a socket.
239  * @param[in] sock Socket changing state.
240  * @param[in] new_state New state for socket.
241  */
242 static void
engine_state(struct Socket * sock,enum SocketState new_state)243 engine_state(struct Socket* sock, enum SocketState new_state)
244 {
245   assert(0 != sock);
246   assert(sock == sockList[s_fd(sock)]);
247 
248   Debug((DEBUG_ENGINE, "kqueue: Changing state for socket %p to %s", sock,
249 	 state_to_name(new_state)));
250 
251   /* set the correct events */
252   set_or_clear(sock,
253 	       state_to_events(s_state(sock), s_events(sock)), /* old state */
254 	       state_to_events(new_state, s_events(sock))); /* new state */
255 
256 }
257 
258 /** Handle change to preferred socket events.
259  * @param[in] sock Socket getting new interest list.
260  * @param[in] new_events New set of interesting events for socket.
261  */
262 static void
engine_events(struct Socket * sock,unsigned int new_events)263 engine_events(struct Socket* sock, unsigned int new_events)
264 {
265   assert(0 != sock);
266   assert(sock == sockList[s_fd(sock)]);
267 
268   Debug((DEBUG_ENGINE, "kqueue: Changing event mask for socket %p to [%s]",
269 	 sock, sock_flags(new_events)));
270 
271   /* set the correct events */
272   set_or_clear(sock,
273 	       state_to_events(s_state(sock), s_events(sock)), /* old events */
274 	       state_to_events(s_state(sock), new_events)); /* new events */
275 }
276 
277 /** Remove a socket from the event engine.
278  * @param[in] sock Socket being destroyed.
279  */
280 static void
engine_delete(struct Socket * sock)281 engine_delete(struct Socket* sock)
282 {
283   int ii;
284 
285   assert(0 != sock);
286   assert(sock == sockList[s_fd(sock)]);
287 
288   Debug((DEBUG_ENGINE, "kqueue: Deleting socket %d [%p], state %s",
289 	 s_fd(sock), sock, state_to_name(s_state(sock))));
290 
291   sockList[s_fd(sock)] = 0;
292 
293   /* Drop any unprocessed events citing this socket. */
294   for (ii = 0; ii < events_used; ii++) {
295     if (events[ii].ident == s_fd(sock)) {
296       events[ii] = events[--events_used];
297     }
298   }
299 }
300 
301 /** Run engine event loop.
302  * @param[in] gen Lists of generators of various types.
303  */
304 static void
engine_loop(struct Generators * gen)305 engine_loop(struct Generators* gen)
306 {
307   int events_count;
308   struct kevent *evt;
309   struct Socket* sock;
310   struct timespec wait;
311   int i;
312   int errcode;
313   socklen_t codesize;
314 
315   if ((events_count = feature_int(FEAT_POLLS_PER_LOOP)) < 20)
316     events_count = 20;
317   events = (struct kevent *)MyMalloc(sizeof(struct kevent) * events_count);
318 
319   while (running) {
320     if ((i = feature_int(FEAT_POLLS_PER_LOOP)) >= 20 && i != events_count) {
321       events = (struct kevent *)MyRealloc(events, sizeof(struct kevent) * i);
322       events_count = i;
323     }
324 
325     /* set up the sleep time */
326     wait.tv_sec = timer_next(gen) ? (timer_next(gen) - CurrentTime) : -1;
327     wait.tv_nsec = 0;
328 
329     Debug((DEBUG_ENGINE, "kqueue: delay: %Tu (%Tu) %Tu", timer_next(gen),
330 	   CurrentTime, wait.tv_sec));
331 
332     /* check for active events */
333     events_used = kevent(kqueue_id, 0, 0, events, events_count,
334                          wait.tv_sec < 0 ? 0 : &wait);
335 
336     CurrentTime = time(0); /* set current time... */
337 
338     if (events_used < 0) {
339       if (errno != EINTR) { /* ignore kevent interrupts */
340 	/* Log the kqueue error */
341 	log_write(LS_SOCKET, L_ERROR, 0, "kevent() error: %m");
342 	if (!errors++)
343 	  timer_add(timer_init(&clear_error), error_clear, 0, TT_PERIODIC,
344 		    ERROR_EXPIRE_TIME);
345 	else if (errors > KQUEUE_ERROR_THRESHOLD) /* too many errors... */
346 	  server_restart("too many kevent errors");
347       }
348       /* old code did a sleep(1) here; with usage these days,
349        * that may be too expensive
350        */
351       continue;
352     }
353 
354     while (events_used > 0) {
355       evt = &events[--events_used];
356 
357       if (evt->filter == EVFILT_SIGNAL) {
358 	/* it's a signal; deal appropriately */
359 	event_generate(ET_SIGNAL, evt->udata, evt->ident);
360 	continue; /* skip socket processing loop */
361       }
362 
363       assert(evt->filter == EVFILT_READ || evt->filter == EVFILT_WRITE);
364 
365       sock = sockList[evt->ident];
366       if (!sock) /* slots may become empty while processing events */
367 	continue;
368 
369       assert(s_fd(sock) == evt->ident);
370 
371       gen_ref_inc(sock); /* can't have it going away on us */
372 
373       Debug((DEBUG_ENGINE, "kqueue: Checking socket %p (fd %d) state %s, "
374 	     "events %s", sock, s_fd(sock), state_to_name(s_state(sock)),
375 	     sock_flags(s_events(sock))));
376 
377       if (s_state(sock) != SS_NOTSOCK) {
378 	errcode = 0; /* check for errors on socket */
379 	codesize = sizeof(errcode);
380 	if (getsockopt(s_fd(sock), SOL_SOCKET, SO_ERROR, &errcode,
381 		       &codesize) < 0)
382 	  errcode = errno; /* work around Solaris implementation */
383 
384 	if (errcode) { /* an error occurred; generate an event */
385 	  Debug((DEBUG_ENGINE, "kqueue: Error %d on fd %d, socket %p", errcode,
386 		 s_fd(sock), sock));
387 	  event_generate(ET_ERROR, sock, errcode);
388 	  gen_ref_dec(sock); /* careful not to leak reference counts */
389 	  continue;
390 	}
391       }
392 
393       switch (s_state(sock)) {
394       case SS_CONNECTING:
395 	if (evt->filter == EVFILT_WRITE) { /* connection completed */
396 	  Debug((DEBUG_ENGINE, "kqueue: Connection completed"));
397 	  event_generate(ET_CONNECT, sock, 0);
398 	}
399 	break;
400 
401       case SS_LISTENING:
402 	if (evt->filter == EVFILT_READ) { /* connect. to be accept. */
403 	  Debug((DEBUG_ENGINE, "kqueue: Ready for accept"));
404 	  event_generate(ET_ACCEPT, sock, 0);
405 	}
406 	break;
407 
408       case SS_NOTSOCK: /* doing nothing socket-specific */
409       case SS_CONNECTED:
410 	if (evt->filter == EVFILT_READ) { /* data on socket */
411 	  Debug((DEBUG_ENGINE, "kqueue: EOF or data to be read"));
412 	  event_generate(evt->flags & EV_EOF ? ET_EOF : ET_READ, sock, 0);
413 	}
414 	if (evt->filter == EVFILT_WRITE) { /* socket writable */
415 	  Debug((DEBUG_ENGINE, "kqueue: Data can be written"));
416 	  event_generate(ET_WRITE, sock, 0);
417 	}
418 	break;
419 
420       case SS_DATAGRAM: case SS_CONNECTDG:
421 	if (evt->filter == EVFILT_READ) { /* socket readable */
422 	  Debug((DEBUG_ENGINE, "kqueue: Datagram to be read"));
423 	  event_generate(ET_READ, sock, 0);
424 	}
425 	if (evt->filter == EVFILT_WRITE) { /* socket writable */
426 	  Debug((DEBUG_ENGINE, "kqueue: Datagram can be written"));
427 	  event_generate(ET_WRITE, sock, 0);
428 	}
429 	break;
430       }
431 
432       gen_ref_dec(sock); /* we're done with it */
433     }
434 
435     timer_run(); /* execute any pending timers */
436   }
437 }
438 
439 /** Descriptor for kqueue() event engine. */
440 struct Engine engine_kqueue = {
441   "kqueue()",		/* Engine name */
442   engine_init,		/* Engine initialization function */
443   engine_signal,	/* Engine signal registration function */
444   engine_add,		/* Engine socket registration function */
445   engine_state,		/* Engine socket state change function */
446   engine_events,	/* Engine socket events mask function */
447   engine_delete,	/* Engine socket deletion function */
448   engine_loop		/* Core engine event loop */
449 };
450