1 /*
2  *  ircd-ratbox: A slightly useful ircd.
3  *  epoll.c: Linux epoll compatible network routines.
4  *
5  *  Copyright (C) 1990 Jarkko Oikarinen and University of Oulu, Co Center
6  *  Copyright (C) 1996-2002 Hybrid Development Team
7  *  Copyright (C) 2001 Adrian Chadd <adrian@creative.net.au>
8  *  Copyright (C) 2002-2005 ircd-ratbox development team
9  *  Copyright (C) 2002 Aaron Sethman <androsyn@ratbox.org>
10  *
11  *  This program is free software; you can redistribute it and/or modify
12  *  it under the terms of the GNU General Public License as published by
13  *  the Free Software Foundation; either version 2 of the License, or
14  *  (at your option) any later version.
15  *
16  *  This program is distributed in the hope that it will be useful,
17  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
18  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19  *  GNU General Public License for more details.
20  *
21  *  You should have received a copy of the GNU General Public License
22  *  along with this program; if not, write to the Free Software
23  *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301
24  *  USA
25  *
26  *  $Id: epoll.c 26525 2009-05-13 15:31:01Z androsyn $
27  */
28 #define _GNU_SOURCE 1
29 
30 #include <libratbox_config.h>
31 #include <ratbox_lib.h>
32 #include <commio-int.h>
33 #include <event-int.h>
34 #if defined(HAVE_EPOLL_CTL) && (HAVE_SYS_EPOLL_H)
35 #define USING_EPOLL
36 #include <fcntl.h>
37 #include <sys/epoll.h>
38 
39 #if defined(HAVE_SIGNALFD) && (HAVE_SYS_SIGNALFD_H) && (USE_TIMER_CREATE) && (HAVE_SYS_UIO_H)
40 #include <signal.h>
41 #include <sys/signalfd.h>
42 #include <sys/uio.h>
43 #define EPOLL_SCHED_EVENT 1
44 #endif
45 
46 #if defined(USE_TIMERFD_CREATE)
47 #include <sys/timerfd.h>
48 #endif
49 
50 #define RTSIGNAL SIGRTMIN
51 struct epoll_info
52 {
53 	int ep;
54 	struct epoll_event *pfd;
55 	int pfd_size;
56 };
57 
58 static struct epoll_info *ep_info;
59 static int can_do_event;
60 static int can_do_timerfd;
61 
62 /*
63  * rb_init_netio
64  *
65  * This is a needed exported function which will be called to initialise
66  * the network loop code.
67  */
68 int
rb_init_netio_epoll(void)69 rb_init_netio_epoll(void)
70 {
71 	can_do_event = 0;	/* shut up gcc */
72 	can_do_timerfd = 0;
73 	ep_info = rb_malloc(sizeof(struct epoll_info));
74 	ep_info->pfd_size = getdtablesize();
75 	ep_info->ep = epoll_create(ep_info->pfd_size);
76 	if(ep_info->ep < 0)
77 	{
78 		return -1;
79 	}
80 
81 	if(rb_open(ep_info->ep, RB_FD_UNKNOWN, "epoll file descriptor") == NULL)
82 	{
83 		rb_lib_log("Unable to rb_open epoll fd");
84 		return -1;
85 	}
86 	ep_info->pfd = rb_malloc(sizeof(struct epoll_event) * ep_info->pfd_size);
87 
88 	return 0;
89 }
90 
91 int
rb_setup_fd_epoll(rb_fde_t * F)92 rb_setup_fd_epoll(rb_fde_t *F)
93 {
94 	return 0;
95 }
96 
97 
98 /*
99  * rb_setselect
100  *
101  * This is a needed exported function which will be called to register
102  * and deregister interest in a pending IO state for a given FD.
103  */
104 void
rb_setselect_epoll(rb_fde_t * F,unsigned int type,PF * handler,void * client_data)105 rb_setselect_epoll(rb_fde_t *F, unsigned int type, PF * handler, void *client_data)
106 {
107 	struct epoll_event ep_event;
108 	int old_flags = F->pflags;
109 	int op = -1;
110 
111 	lrb_assert(IsFDOpen(F));
112 
113 	/* Update the list, even though we're not using it .. */
114 	if(type & RB_SELECT_READ)
115 	{
116 		if(handler != NULL)
117 			F->pflags |= EPOLLIN;
118 		else
119 			F->pflags &= ~EPOLLIN;
120 		F->read_handler = handler;
121 		F->read_data = client_data;
122 	}
123 
124 	if(type & RB_SELECT_WRITE)
125 	{
126 		if(handler != NULL)
127 			F->pflags |= EPOLLOUT;
128 		else
129 			F->pflags &= ~EPOLLOUT;
130 		F->write_handler = handler;
131 		F->write_data = client_data;
132 	}
133 
134 	if(old_flags == 0 && F->pflags == 0)
135 		return;
136 	else if(F->pflags <= 0)
137 		op = EPOLL_CTL_DEL;
138 	else if(old_flags == 0 && F->pflags > 0)
139 		op = EPOLL_CTL_ADD;
140 	else if(F->pflags != old_flags)
141 		op = EPOLL_CTL_MOD;
142 
143 	if(op == -1)
144 		return;
145 
146 	ep_event.events = F->pflags;
147 	ep_event.data.ptr = F;
148 
149 	if(op == EPOLL_CTL_ADD || op == EPOLL_CTL_MOD)
150 		ep_event.events |= EPOLLET;
151 
152 	if(epoll_ctl(ep_info->ep, op, F->fd, &ep_event) != 0)
153 	{
154 		rb_lib_log("rb_setselect_epoll(): epoll_ctl failed: %s", strerror(errno));
155 		abort();
156 	}
157 
158 
159 }
160 
161 /*
162  * rb_select
163  *
164  * Called to do the new-style IO, courtesy of squid (like most of this
165  * new IO code). This routine handles the stuff we've hidden in
166  * rb_setselect and fd_table[] and calls callbacks for IO ready
167  * events.
168  */
169 
170 int
rb_select_epoll(long delay)171 rb_select_epoll(long delay)
172 {
173 	int num, i, flags, old_flags, op;
174 	struct epoll_event ep_event;
175 	int o_errno;
176 	void *data;
177 
178 	num = epoll_wait(ep_info->ep, ep_info->pfd, ep_info->pfd_size, delay);
179 
180 	/* save errno as rb_set_time() will likely clobber it */
181 	o_errno = errno;
182 	rb_set_time();
183 	errno = o_errno;
184 
185 	if(num < 0 && !rb_ignore_errno(o_errno))
186 		return RB_ERROR;
187 
188 	if(num <= 0)
189 		return RB_OK;
190 
191 	for(i = 0; i < num; i++)
192 	{
193 		PF *hdl;
194 		rb_fde_t *F = ep_info->pfd[i].data.ptr;
195 		old_flags = F->pflags;
196 		if(ep_info->pfd[i].events & (EPOLLIN | EPOLLHUP | EPOLLERR))
197 		{
198 			hdl = F->read_handler;
199 			data = F->read_data;
200 			F->read_handler = NULL;
201 			F->read_data = NULL;
202 			if(hdl)
203 			{
204 				hdl(F, data);
205 			}
206 		}
207 
208 		if(!IsFDOpen(F))
209 			continue;
210 		if(ep_info->pfd[i].events & (EPOLLOUT | EPOLLHUP | EPOLLERR))
211 		{
212 			hdl = F->write_handler;
213 			data = F->write_data;
214 			F->write_handler = NULL;
215 			F->write_data = NULL;
216 
217 			if(hdl)
218 			{
219 				hdl(F, data);
220 			}
221 		}
222 
223 		if(!IsFDOpen(F))
224 			continue;
225 
226 		flags = 0;
227 
228 		if(F->read_handler != NULL)
229 			flags |= EPOLLIN;
230 		if(F->write_handler != NULL)
231 			flags |= EPOLLOUT;
232 
233 		if(old_flags != flags)
234 		{
235 			if(flags == 0)
236 				op = EPOLL_CTL_DEL;
237 			else
238 				op = EPOLL_CTL_MOD;
239 			F->pflags = ep_event.events = flags;
240 			ep_event.data.ptr = F;
241 			if(op == EPOLL_CTL_MOD || op == EPOLL_CTL_ADD)
242 				ep_event.events |= EPOLLET;
243 
244 			if(epoll_ctl(ep_info->ep, op, F->fd, &ep_event) != 0)
245 			{
246 				rb_lib_log("rb_select_epoll(): epoll_ctl failed: %s",
247 					   strerror(errno));
248 			}
249 		}
250 
251 	}
252 	return RB_OK;
253 }
254 
255 #ifdef EPOLL_SCHED_EVENT
256 int
rb_epoll_supports_event(void)257 rb_epoll_supports_event(void)
258 {
259 	/* try to detect at runtime if everything we need actually works */
260 	timer_t timer;
261 	struct sigevent ev;
262 	int fd;
263 	sigset_t set;
264 
265 	if(can_do_event == 1)
266 		return 1;
267 	if(can_do_event == -1)
268 		return 0;
269 
270 #ifdef USE_TIMERFD_CREATE
271 	if((fd = timerfd_create(CLOCK_REALTIME, 0)) >= 0)
272 	{
273 		close(fd);
274 		can_do_event = 1;
275 		can_do_timerfd = 1;
276 		return 1;
277 	}
278 #endif
279 
280 	ev.sigev_signo = SIGVTALRM;
281 	ev.sigev_notify = SIGEV_SIGNAL;
282 	if(timer_create(CLOCK_REALTIME, &ev, &timer) != 0)
283 	{
284 		can_do_event = -1;
285 		return 0;
286 	}
287 	timer_delete(timer);
288 	sigemptyset(&set);
289 	fd = signalfd(-1, &set, 0);
290 	if(fd < 0)
291 	{
292 		can_do_event = -1;
293 		return 0;
294 	}
295 	close(fd);
296 	can_do_event = 1;
297 	return 1;
298 }
299 
300 
301 /* bleh..work around a glibc header bug on 32bit systems */
302 struct our_signalfd_siginfo
303 {
304 	uint32_t signo;
305 	int32_t err;
306 	int32_t code;
307 	uint32_t pid;
308 	uint32_t uid;
309 	int32_t fd;
310 	uint32_t tid;
311 	uint32_t band;
312 	uint32_t overrun;
313 	uint32_t trapno;
314 	int32_t status;
315 	int32_t svint;
316 	uint64_t svptr;
317 	uint64_t utime;
318 	uint64_t stime;
319 	uint64_t addr;
320 	uint8_t pad[48];
321 };
322 
323 
324 #define SIGFDIOV_COUNT 16
325 static void
signalfd_handler(rb_fde_t * F,void * data)326 signalfd_handler(rb_fde_t *F, void *data)
327 {
328 	static struct our_signalfd_siginfo fdsig[SIGFDIOV_COUNT];
329 	static struct iovec iov[SIGFDIOV_COUNT];
330 	struct ev_entry *ev;
331 	int ret, x;
332 
333 	for(x = 0; x < SIGFDIOV_COUNT; x++)
334 	{
335 		iov[x].iov_base = &fdsig[x];
336 		iov[x].iov_len = sizeof(struct our_signalfd_siginfo);
337 	}
338 
339 	while(1)
340 	{
341 		ret = readv(rb_get_fd(F), iov, SIGFDIOV_COUNT);
342 
343 		if(ret == 0 || (ret < 0 && !rb_ignore_errno(errno)))
344 		{
345 			rb_close(F);
346 			rb_epoll_init_event();
347 			return;
348 		}
349 
350 		if(ret < 0)
351 		{
352 			rb_setselect(F, RB_SELECT_READ, signalfd_handler, NULL);
353 			return;
354 		}
355 		for(x = 0; x < ret / (int)sizeof(struct our_signalfd_siginfo); x++)
356 		{
357 #if __WORDSIZE == 32 && defined(__sparc__)
358 			uint32_t *q = (uint32_t *)&fdsig[x].svptr;
359 			ev = (struct ev_entry *)q[0];
360 #else
361 			ev = (struct ev_entry *)(uintptr_t)(fdsig[x].svptr);
362 
363 #endif
364 			if(ev == NULL)
365 				continue;
366 			rb_run_event(ev);
367 		}
368 	}
369 }
370 
371 void
rb_epoll_init_event(void)372 rb_epoll_init_event(void)
373 {
374 
375 	sigset_t ss;
376 	rb_fde_t *F;
377 	int sfd;
378 	rb_epoll_supports_event();
379 	if(!can_do_timerfd)
380 	{
381 		sigemptyset(&ss);
382 		sigaddset(&ss, RTSIGNAL);
383 		sigprocmask(SIG_BLOCK, &ss, 0);
384 		sigemptyset(&ss);
385 		sigaddset(&ss, RTSIGNAL);
386 		sfd = signalfd(-1, &ss, 0);
387 		if(sfd == -1)
388 		{
389 			can_do_event = -1;
390 			return;
391 		}
392 		F = rb_open(sfd, RB_FD_UNKNOWN, "signalfd");
393 		rb_set_nb(F);
394 		signalfd_handler(F, NULL);
395 	}
396 }
397 
398 static int
rb_epoll_sched_event_signalfd(struct ev_entry * event,int when)399 rb_epoll_sched_event_signalfd(struct ev_entry *event, int when)
400 {
401 	timer_t *id;
402 	struct sigevent ev;
403 	struct itimerspec ts;
404 
405 	memset(&ev, 0, sizeof(&ev));
406 	event->comm_ptr = rb_malloc(sizeof(timer_t));
407 	id = event->comm_ptr;
408 	ev.sigev_notify = SIGEV_SIGNAL;
409 	ev.sigev_signo = RTSIGNAL;
410 	ev.sigev_value.sival_ptr = event;
411 
412 	if(timer_create(CLOCK_REALTIME, &ev, id) < 0)
413 	{
414 		rb_lib_log("timer_create: %s\n", strerror(errno));
415 		return 0;
416 	}
417 	memset(&ts, 0, sizeof(ts));
418 	ts.it_value.tv_sec = when;
419 	ts.it_value.tv_nsec = 0;
420 	if(event->frequency != 0)
421 		ts.it_interval = ts.it_value;
422 
423 	if(timer_settime(*id, 0, &ts, NULL) < 0)
424 	{
425 		rb_lib_log("timer_settime: %s\n", strerror(errno));
426 		return 0;
427 	}
428 	return 1;
429 }
430 
431 #ifdef USE_TIMERFD_CREATE
432 static void
rb_read_timerfd(rb_fde_t * F,void * data)433 rb_read_timerfd(rb_fde_t *F, void *data)
434 {
435 	struct ev_entry *event = (struct ev_entry *)data;
436 	int retlen;
437 	uint64_t count;
438 
439 	if(event == NULL)
440 	{
441 		rb_close(F);
442 		return;
443 	}
444 
445 	retlen = rb_read(F, &count, sizeof(count));
446 
447 	if(retlen == 0 || (retlen < 0 && !rb_ignore_errno(errno)))
448 	{
449 		rb_close(F);
450 		rb_lib_log("rb_read_timerfd: timerfd[%s] closed on error: %s", event->name,
451 			   strerror(errno));
452 		return;
453 	}
454 	rb_setselect(F, RB_SELECT_READ, rb_read_timerfd, event);
455 	rb_run_event(event);
456 }
457 
458 
459 static int
rb_epoll_sched_event_timerfd(struct ev_entry * event,int when)460 rb_epoll_sched_event_timerfd(struct ev_entry *event, int when)
461 {
462 	struct itimerspec ts;
463 	static char buf[FD_DESC_SZ + 8];
464 	int fd;
465 	rb_fde_t *F;
466 
467 	if((fd = timerfd_create(CLOCK_REALTIME, 0)) < 0)
468 	{
469 		rb_lib_log("timerfd_create: %s\n", strerror(errno));
470 		return 0;
471 	}
472 
473 	memset(&ts, 0, sizeof(ts));
474 	ts.it_value.tv_sec = when;
475 	ts.it_value.tv_nsec = 0;
476 	if(event->frequency != 0)
477 		ts.it_interval = ts.it_value;
478 
479 	if(timerfd_settime(fd, 0, &ts, NULL) < 0)
480 	{
481 		rb_lib_log("timerfd_settime: %s\n", strerror(errno));
482 		close(fd);
483 		return 0;
484 	}
485 	rb_snprintf(buf, sizeof(buf), "timerfd: %s", event->name);
486 	F = rb_open(fd, RB_FD_UNKNOWN, buf);
487 	rb_set_nb(F);
488 	event->comm_ptr = F;
489 	rb_setselect(F, RB_SELECT_READ, rb_read_timerfd, event);
490 	return 1;
491 }
492 #endif
493 
494 
495 
496 int
rb_epoll_sched_event(struct ev_entry * event,int when)497 rb_epoll_sched_event(struct ev_entry *event, int when)
498 {
499 #ifdef USE_TIMERFD_CREATE
500 	if(can_do_timerfd)
501 	{
502 		return rb_epoll_sched_event_timerfd(event, when);
503 	}
504 #endif
505 	return rb_epoll_sched_event_signalfd(event, when);
506 }
507 
508 void
rb_epoll_unsched_event(struct ev_entry * event)509 rb_epoll_unsched_event(struct ev_entry *event)
510 {
511 #ifdef USE_TIMERFD_CREATE
512 	if(can_do_timerfd)
513 	{
514 		rb_close((rb_fde_t *)event->comm_ptr);
515 		event->comm_ptr = NULL;
516 		return;
517 	}
518 #endif
519 	timer_delete(*((timer_t *) event->comm_ptr));
520 	rb_free(event->comm_ptr);
521 	event->comm_ptr = NULL;
522 }
523 #endif /* EPOLL_SCHED_EVENT */
524 
525 #else /* epoll not supported here */
526 int
rb_init_netio_epoll(void)527 rb_init_netio_epoll(void)
528 {
529 	return ENOSYS;
530 }
531 
532 void
rb_setselect_epoll(rb_fde_t * F,unsigned int type,PF * handler,void * client_data)533 rb_setselect_epoll(rb_fde_t *F, unsigned int type, PF * handler, void *client_data)
534 {
535 	errno = ENOSYS;
536 	return;
537 }
538 
539 int
rb_select_epoll(long delay)540 rb_select_epoll(long delay)
541 {
542 	errno = ENOSYS;
543 	return -1;
544 }
545 
546 int
rb_setup_fd_epoll(rb_fde_t * F)547 rb_setup_fd_epoll(rb_fde_t *F)
548 {
549 	errno = ENOSYS;
550 	return -1;
551 }
552 
553 
554 #endif
555 
556 #if !defined(USING_EPOLL) || !defined(EPOLL_SCHED_EVENT)
557 void
rb_epoll_init_event(void)558 rb_epoll_init_event(void)
559 {
560 	return;
561 }
562 
563 int
rb_epoll_sched_event(struct ev_entry * event,int when)564 rb_epoll_sched_event(struct ev_entry *event, int when)
565 {
566 	errno = ENOSYS;
567 	return -1;
568 }
569 
570 void
rb_epoll_unsched_event(struct ev_entry * event)571 rb_epoll_unsched_event(struct ev_entry *event)
572 {
573 	return;
574 }
575 
576 int
rb_epoll_supports_event(void)577 rb_epoll_supports_event(void)
578 {
579 	errno = ENOSYS;
580 	return 0;
581 }
582 #endif /* !USING_EPOLL || !EPOLL_SCHED_EVENT */
583