1 /*	$NetBSD: devpoll.c,v 1.5 2020/05/25 20:47:33 christos Exp $	*/
2 
3 /*
4  * Copyright 2000-2009 Niels Provos <provos@citi.umich.edu>
5  * Copyright 2009-2012 Niels Provos and Nick Mathewson
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. The name of the author may not be used to endorse or promote products
16  *    derived from this software without specific prior written permission.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28  */
29 #include "event2/event-config.h"
30 #include "evconfig-private.h"
31 
32 #ifdef EVENT__HAVE_DEVPOLL
33 
34 #include <sys/types.h>
35 #include <sys/resource.h>
36 #ifdef EVENT__HAVE_SYS_TIME_H
37 #include <sys/time.h>
38 #endif
39 #include <sys/queue.h>
40 #include <sys/devpoll.h>
41 #include <signal.h>
42 #include <stdio.h>
43 #include <stdlib.h>
44 #include <string.h>
45 #include <unistd.h>
46 #include <fcntl.h>
47 #include <errno.h>
48 
49 #include "event2/event.h"
50 #include "event2/event_struct.h"
51 #include "event2/thread.h"
52 #include "event-internal.h"
53 #include "evsignal-internal.h"
54 #include "log-internal.h"
55 #include "evmap-internal.h"
56 #include "evthread-internal.h"
57 
58 struct devpollop {
59 	struct pollfd *events;
60 	int nevents;
61 	int dpfd;
62 	struct pollfd *changes;
63 	int nchanges;
64 };
65 
66 static void *devpoll_init(struct event_base *);
67 static int devpoll_add(struct event_base *, int fd, short old, short events, void *);
68 static int devpoll_del(struct event_base *, int fd, short old, short events, void *);
69 static int devpoll_dispatch(struct event_base *, struct timeval *);
70 static void devpoll_dealloc(struct event_base *);
71 
72 const struct eventop devpollops = {
73 	"devpoll",
74 	devpoll_init,
75 	devpoll_add,
76 	devpoll_del,
77 	devpoll_dispatch,
78 	devpoll_dealloc,
79 	1, /* need reinit */
80 	EV_FEATURE_FDS|EV_FEATURE_O1,
81 	0
82 };
83 
84 #define NEVENT	32000
85 
86 static int
devpoll_commit(struct devpollop * devpollop)87 devpoll_commit(struct devpollop *devpollop)
88 {
89 	/*
90 	 * Due to a bug in Solaris, we have to use pwrite with an offset of 0.
91 	 * Write is limited to 2GB of data, until it will fail.
92 	 */
93 	if (pwrite(devpollop->dpfd, devpollop->changes,
94 		sizeof(struct pollfd) * devpollop->nchanges, 0) == -1)
95 		return (-1);
96 
97 	devpollop->nchanges = 0;
98 	return (0);
99 }
100 
101 static int
devpoll_queue(struct devpollop * devpollop,int fd,int events)102 devpoll_queue(struct devpollop *devpollop, int fd, int events) {
103 	struct pollfd *pfd;
104 
105 	if (devpollop->nchanges >= devpollop->nevents) {
106 		/*
107 		 * Change buffer is full, must commit it to /dev/poll before
108 		 * adding more
109 		 */
110 		if (devpoll_commit(devpollop) != 0)
111 			return (-1);
112 	}
113 
114 	pfd = &devpollop->changes[devpollop->nchanges++];
115 	pfd->fd = fd;
116 	pfd->events = events;
117 	pfd->revents = 0;
118 
119 	return (0);
120 }
121 
122 static void *
devpoll_init(struct event_base * base)123 devpoll_init(struct event_base *base)
124 {
125 	int dpfd, nfiles = NEVENT;
126 	struct rlimit rl;
127 	struct devpollop *devpollop;
128 
129 	if (!(devpollop = mm_calloc(1, sizeof(struct devpollop))))
130 		return (NULL);
131 
132 	if (getrlimit(RLIMIT_NOFILE, &rl) == 0 &&
133 	    rl.rlim_cur != RLIM_INFINITY)
134 		nfiles = rl.rlim_cur;
135 
136 	/* Initialize the kernel queue */
137 	if ((dpfd = evutil_open_closeonexec_("/dev/poll", O_RDWR, 0)) == -1) {
138 		event_warn("open: /dev/poll");
139 		mm_free(devpollop);
140 		return (NULL);
141 	}
142 
143 	devpollop->dpfd = dpfd;
144 
145 	/* Initialize fields */
146 	/* FIXME: allocating 'nfiles' worth of space here can be
147 	 * expensive and unnecessary.  See how epoll.c does it instead. */
148 	devpollop->events = mm_calloc(nfiles, sizeof(struct pollfd));
149 	if (devpollop->events == NULL) {
150 		mm_free(devpollop);
151 		close(dpfd);
152 		return (NULL);
153 	}
154 	devpollop->nevents = nfiles;
155 
156 	devpollop->changes = mm_calloc(nfiles, sizeof(struct pollfd));
157 	if (devpollop->changes == NULL) {
158 		mm_free(devpollop->events);
159 		mm_free(devpollop);
160 		close(dpfd);
161 		return (NULL);
162 	}
163 
164 	evsig_init_(base);
165 
166 	return (devpollop);
167 }
168 
169 static int
devpoll_dispatch(struct event_base * base,struct timeval * tv)170 devpoll_dispatch(struct event_base *base, struct timeval *tv)
171 {
172 	struct devpollop *devpollop = base->evbase;
173 	struct pollfd *events = devpollop->events;
174 	struct dvpoll dvp;
175 	int i, res, timeout = -1;
176 
177 	if (devpollop->nchanges)
178 		devpoll_commit(devpollop);
179 
180 	if (tv != NULL)
181 		timeout = tv->tv_sec * 1000 + (tv->tv_usec + 999) / 1000;
182 
183 	dvp.dp_fds = devpollop->events;
184 	dvp.dp_nfds = devpollop->nevents;
185 	dvp.dp_timeout = timeout;
186 
187 	EVBASE_RELEASE_LOCK(base, th_base_lock);
188 
189 	res = ioctl(devpollop->dpfd, DP_POLL, &dvp);
190 
191 	EVBASE_ACQUIRE_LOCK(base, th_base_lock);
192 
193 	if (res == -1) {
194 		if (errno != EINTR) {
195 			event_warn("ioctl: DP_POLL");
196 			return (-1);
197 		}
198 
199 		return (0);
200 	}
201 
202 	event_debug(("%s: devpoll_wait reports %d", __func__, res));
203 
204 	for (i = 0; i < res; i++) {
205 		int which = 0;
206 		int what = events[i].revents;
207 
208 		if (what & POLLHUP)
209 			what |= POLLIN | POLLOUT;
210 		else if (what & POLLERR)
211 			what |= POLLIN | POLLOUT;
212 
213 		if (what & POLLIN)
214 			which |= EV_READ;
215 		if (what & POLLOUT)
216 			which |= EV_WRITE;
217 
218 		if (!which)
219 			continue;
220 
221 		/* XXX(niels): not sure if this works for devpoll */
222 		evmap_io_active_(base, events[i].fd, which);
223 	}
224 
225 	return (0);
226 }
227 
228 
229 static int
devpoll_add(struct event_base * base,int fd,short old,short events,void * p)230 devpoll_add(struct event_base *base, int fd, short old, short events, void *p)
231 {
232 	struct devpollop *devpollop = base->evbase;
233 	int res;
234 	(void)p;
235 
236 	/*
237 	 * It's not necessary to OR the existing read/write events that we
238 	 * are currently interested in with the new event we are adding.
239 	 * The /dev/poll driver ORs any new events with the existing events
240 	 * that it has cached for the fd.
241 	 */
242 
243 	res = 0;
244 	if (events & EV_READ)
245 		res |= POLLIN;
246 	if (events & EV_WRITE)
247 		res |= POLLOUT;
248 
249 	if (devpoll_queue(devpollop, fd, res) != 0)
250 		return (-1);
251 
252 	return (0);
253 }
254 
255 static int
devpoll_del(struct event_base * base,int fd,short old,short events,void * p)256 devpoll_del(struct event_base *base, int fd, short old, short events, void *p)
257 {
258 	struct devpollop *devpollop = base->evbase;
259 	int res;
260 	(void)p;
261 
262 	res = 0;
263 	if (events & EV_READ)
264 		res |= POLLIN;
265 	if (events & EV_WRITE)
266 		res |= POLLOUT;
267 
268 	/*
269 	 * The only way to remove an fd from the /dev/poll monitored set is
270 	 * to use POLLREMOVE by itself.  This removes ALL events for the fd
271 	 * provided so if we care about two events and are only removing one
272 	 * we must re-add the other event after POLLREMOVE.
273 	 */
274 
275 	if (devpoll_queue(devpollop, fd, POLLREMOVE) != 0)
276 		return (-1);
277 
278 	if ((res & (POLLIN|POLLOUT)) != (POLLIN|POLLOUT)) {
279 		/*
280 		 * We're not deleting all events, so we must resubmit the
281 		 * event that we are still interested in if one exists.
282 		 */
283 
284 		if ((res & POLLIN) && (old & EV_WRITE)) {
285 			/* Deleting read, still care about write */
286 			devpoll_queue(devpollop, fd, POLLOUT);
287 		} else if ((res & POLLOUT) && (old & EV_READ)) {
288 			/* Deleting write, still care about read */
289 			devpoll_queue(devpollop, fd, POLLIN);
290 		}
291 	}
292 
293 	return (0);
294 }
295 
296 static void
devpoll_dealloc(struct event_base * base)297 devpoll_dealloc(struct event_base *base)
298 {
299 	struct devpollop *devpollop = base->evbase;
300 
301 	evsig_dealloc_(base);
302 	if (devpollop->events)
303 		mm_free(devpollop->events);
304 	if (devpollop->changes)
305 		mm_free(devpollop->changes);
306 	if (devpollop->dpfd >= 0)
307 		close(devpollop->dpfd);
308 
309 	memset(devpollop, 0, sizeof(struct devpollop));
310 	mm_free(devpollop);
311 }
312 
313 #endif /* EVENT__HAVE_DEVPOLL */
314