1 /*
2  * %CopyrightBegin%
3  *
4  * Copyright Ericsson AB 2006-2020. All Rights Reserved.
5  *
6  * Licensed under the Apache License, Version 2.0 (the "License");
7  * you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  *     http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  *
18  * %CopyrightEnd%
19  */
20 
21 /*
22  * Description:	Check I/O
23  *
24  * Author: 	Rickard Green
25  */
26 
27 #ifdef HAVE_CONFIG_H
28 #  include "config.h"
29 #endif
30 
31 #define ERL_CHECK_IO_C__
32 #ifndef WANT_NONBLOCKING
33 #  define WANT_NONBLOCKING
34 #endif
35 #include "sys.h"
36 #include "global.h"
37 #include "erl_port.h"
38 #include "erl_check_io.h"
39 #include "erl_thr_progress.h"
40 #include "erl_bif_unique.h"
41 #include "dtrace-wrapper.h"
42 #include "lttng-wrapper.h"
43 #define ERTS_WANT_TIMER_WHEEL_API
44 #include "erl_time.h"
45 
46 #if 0
47 #define DEBUG_PRINT(FMT, ...) do { erts_printf(FMT "\r\n", ##__VA_ARGS__); fflush(stdout); } while(0)
48 #define DEBUG_PRINT_FD(FMT, STATE, ...)                                 \
49     DEBUG_PRINT("%d: " FMT " (ev=%s, ac=%s, flg=%s)",                   \
50                 (STATE) ? (STATE)->fd : (ErtsSysFdType)-1, ##__VA_ARGS__, \
51                 ev2str((STATE) ? (STATE)->events : ERTS_POLL_EV_NONE),  \
52                 ev2str((STATE) ? (STATE)->active_events : ERTS_POLL_EV_NONE), \
53                 (STATE) ? flag2str((STATE)->flags) : ERTS_EV_FLAG_CLEAR)
54 #define DEBUG_PRINT_MODE
55 #else
56 #define DEBUG_PRINT(...)
57 #endif
58 
59 #ifndef DEBUG_PRINT_FD
60 #define DEBUG_PRINT_FD(...)
61 #endif
62 
63 #ifndef ERTS_SYS_CONTINOUS_FD_NUMBERS
64 #  include "safe_hash.h"
65 #  define DRV_EV_STATE_HTAB_SIZE 1024
66 #endif
67 
68 typedef enum {
69     ERTS_EV_TYPE_NONE     = 0,
70     ERTS_EV_TYPE_DRV_SEL  = 1, /* driver_select */
71     ERTS_EV_TYPE_STOP_USE = 2, /* pending stop_select */
72     ERTS_EV_TYPE_NIF      = 3, /* enif_select */
73     ERTS_EV_TYPE_STOP_NIF = 4  /* pending nif stop */
74 } EventStateType;
75 
76 typedef enum {
77     ERTS_EV_FLAG_CLEAR         = 0,
78     ERTS_EV_FLAG_USED          = 1,   /* ERL_DRV_USE has been turned on */
79 #if ERTS_POLL_USE_SCHEDULER_POLLING
80     ERTS_EV_FLAG_SCHEDULER     = 2,   /* Set when the fd has been migrated
81                                          to scheduler pollset */
82     ERTS_EV_FLAG_IN_SCHEDULER  = 4,   /* Set when the fd is currently in
83                                          scheduler pollset */
84 #else
85     ERTS_EV_FLAG_SCHEDULER     = ERTS_EV_FLAG_CLEAR,
86     ERTS_EV_FLAG_IN_SCHEDULER  = ERTS_EV_FLAG_CLEAR,
87 #endif
88 #ifdef ERTS_POLL_USE_FALLBACK
89     ERTS_EV_FLAG_FALLBACK      = 8,   /* Set when kernel poll rejected fd
90                                          and it was put in the nkp version */
91 #else
92     ERTS_EV_FLAG_FALLBACK      = ERTS_EV_FLAG_CLEAR,
93 #endif
94     ERTS_EV_FLAG_WANT_ERROR    = 0x10,  /* ERL_NIF_SELECT_ERROR turned on */
95 
96     /* Combinations */
97     ERTS_EV_FLAG_USED_FALLBACK = ERTS_EV_FLAG_USED | ERTS_EV_FLAG_FALLBACK,
98     ERTS_EV_FLAG_USED_SCHEDULER = ERTS_EV_FLAG_USED | ERTS_EV_FLAG_SCHEDULER,
99     ERTS_EV_FLAG_USED_IN_SCHEDULER = ERTS_EV_FLAG_USED | ERTS_EV_FLAG_SCHEDULER | ERTS_EV_FLAG_IN_SCHEDULER,
100     ERTS_EV_FLAG_UNUSED_SCHEDULER = ERTS_EV_FLAG_SCHEDULER,
101     ERTS_EV_FLAG_UNUSED_IN_SCHEDULER = ERTS_EV_FLAG_SCHEDULER | ERTS_EV_FLAG_IN_SCHEDULER
102 } EventStateFlags;
103 
104 #define flag2str(flags)                                                 \
105     ((flags) == ERTS_EV_FLAG_CLEAR ? "CLEAR" :                          \
106      ((flags) == ERTS_EV_FLAG_USED ? "USED" :                           \
107       ((flags) == ERTS_EV_FLAG_FALLBACK ? "FLBK" :                      \
108        ((flags) == ERTS_EV_FLAG_USED_FALLBACK ? "USED|FLBK" :           \
109         ((flags) == ERTS_EV_FLAG_USED_SCHEDULER ? "USED|SCHD" :         \
110          ((flags) == ERTS_EV_FLAG_UNUSED_SCHEDULER ? "SCHD" :           \
111           ((flags) == ERTS_EV_FLAG_USED_IN_SCHEDULER ? "USED|IN_SCHD" : \
112            ((flags) == ERTS_EV_FLAG_UNUSED_IN_SCHEDULER ? "IN_SCHD" :   \
113             "ERROR"))))))))
114 
115 /* How many events that can be handled at once by one erts_poll_wait call */
116 #define ERTS_CHECK_IO_POLL_RES_LEN 512
117 
118 /* Each I/O Poll Thread has one ErtsPollThread each. The ps field
119    can point to either a private ErtsPollSet or a shared one.
120    At the moment only kqueue and epoll pollsets can be
121    shared across threads.
122 */
123 typedef struct erts_poll_thread
124 {
125     ErtsPollSet *ps;
126     ErtsPollResFd *pollres;
127     ErtsThrPrgrData *tpd;
128     int pollres_len;
129 } ErtsPollThread;
130 
131 /* pollsetv contains pointers to the ErtsPollSets that are in use.
132  * Which pollset to use is determined by hashing the fd.
133  */
134 static ErtsPollSet **pollsetv;
135 static ErtsPollThread *psiv;
136 #if ERTS_POLL_USE_FALLBACK
137 static ErtsPollSet *flbk_pollset;
138 #endif
139 #if ERTS_POLL_USE_SCHEDULER_POLLING
140 static ErtsPollSet *sched_pollset;
141 #endif
142 
143 typedef struct {
144 #ifndef ERTS_SYS_CONTINOUS_FD_NUMBERS
145     SafeHashBucket hb;
146 #endif
147     ErtsSysFdType fd;
148     struct {
149 	ErtsDrvSelectDataState *select;   /* ERTS_EV_TYPE_DRV_SEL */
150         ErtsNifSelectDataState *nif;      /* ERTS_EV_TYPE_NIF */
151         union {
152             erts_driver_t*  drv_ptr;    /* ERTS_EV_TYPE_STOP_USE */
153             ErtsResource* resource;   /* ERTS_EV_TYPE_STOP_NIF */
154         } stop;
155     } driver;
156     ErtsPollEvents events;        /* The events that have been selected upon */
157     ErtsPollEvents active_events; /* The events currently active in the pollset */
158     EventStateType type;
159     EventStateFlags flags;
160     int count;                    /* Number of times this fd has triggered
161                                      without being deselected. */
162 } ErtsDrvEventState;
163 
164 struct drv_ev_state_shared {
165 
166     union {
167         erts_mtx_t lck;
168         byte _cache_line_alignment[ERTS_ALC_CACHE_LINE_ALIGN_SIZE(sizeof(erts_mtx_t))];
169     } locks[ERTS_CHECK_IO_DRV_EV_STATE_LOCK_CNT];
170 
171 #ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS
172     int max_fds;
173     erts_atomic_t len;
174     ErtsDrvEventState *v;
175     erts_mtx_t grow_lock; /* prevent lock-hogging of racing growers */
176 #else
177     SafeHash tab;
178     int num_prealloc;
179     ErtsDrvEventState *prealloc_first;
180     erts_spinlock_t prealloc_lock;
181 #endif
182 };
183 
184 int ERTS_WRITE_UNLIKELY(erts_no_pollsets) = 1;
185 int ERTS_WRITE_UNLIKELY(erts_no_poll_threads) = 1;
186 struct drv_ev_state_shared drv_ev_state;
187 
fd_hash(ErtsSysFdType fd)188 static ERTS_INLINE int fd_hash(ErtsSysFdType fd)
189 {
190 #ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS
191     int hash = (int)fd;
192 #else
193     int hash = (int)(SWord)fd;
194     hash ^= (hash >> 9);
195 #endif
196     return hash;
197 }
198 
fd_mtx(ErtsSysFdType fd)199 static ERTS_INLINE erts_mtx_t* fd_mtx(ErtsSysFdType fd)
200 {
201     return &drv_ev_state.locks[fd_hash(fd) % ERTS_CHECK_IO_DRV_EV_STATE_LOCK_CNT].lck;
202 }
203 
204 #ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS
205 
get_drv_ev_state(ErtsSysFdType fd)206 static ERTS_INLINE ErtsDrvEventState *get_drv_ev_state(ErtsSysFdType fd)
207 {
208     return &drv_ev_state.v[(int) fd];
209 }
210 
211 #define new_drv_ev_state(State, fd) (State)
212 #define erase_drv_ev_state(State)
213 
grow_drv_ev_state(ErtsSysFdType fd)214 static ERTS_INLINE int grow_drv_ev_state(ErtsSysFdType fd) {
215     int i;
216     int old_len;
217     int new_len;
218 
219     if ((unsigned)fd >= (unsigned)erts_atomic_read_nob(&drv_ev_state.len)) {
220 
221 	if (fd < 0 || fd >= drv_ev_state.max_fds)
222             return 0;
223 
224         erts_mtx_lock(&drv_ev_state.grow_lock);
225         old_len = erts_atomic_read_nob(&drv_ev_state.len);
226         if (fd >= old_len) {
227             new_len = erts_poll_new_table_len(old_len, fd + 1);
228             if (new_len > drv_ev_state.max_fds)
229                 new_len = drv_ev_state.max_fds;
230 
231             for (i=0; i<ERTS_CHECK_IO_DRV_EV_STATE_LOCK_CNT; i++) { /* lock all fd's */
232                 erts_mtx_lock(&drv_ev_state.locks[i].lck);
233             }
234             drv_ev_state.v = (drv_ev_state.v
235                               ? erts_realloc(ERTS_ALC_T_DRV_EV_STATE,
236                                              drv_ev_state.v,
237                                              sizeof(ErtsDrvEventState)*new_len)
238                               : erts_alloc(ERTS_ALC_T_DRV_EV_STATE,
239                                            sizeof(ErtsDrvEventState)*new_len));
240             ERTS_CT_ASSERT(ERTS_EV_TYPE_NONE == 0);
241             sys_memzero(drv_ev_state.v+old_len,
242                         sizeof(ErtsDrvEventState) * (new_len - old_len));
243             for (i = old_len; i < new_len; i++) {
244                 drv_ev_state.v[i].fd = (ErtsSysFdType) i;
245             }
246             erts_atomic_set_nob(&drv_ev_state.len, new_len);
247             for (i=0; i<ERTS_CHECK_IO_DRV_EV_STATE_LOCK_CNT; i++) {
248                 erts_mtx_unlock(&drv_ev_state.locks[i].lck);
249             }
250         }
251         /*else already grown by racing thread */
252 
253         erts_mtx_unlock(&drv_ev_state.grow_lock);
254     }
255     return 1;
256 }
257 
drv_ev_state_len(void)258 static int drv_ev_state_len(void)
259 {
260     return erts_atomic_read_nob(&drv_ev_state.len);
261 }
262 
263 #else /* !ERTS_SYS_CONTINOUS_FD_NUMBERS */
264 
get_drv_ev_state(ErtsSysFdType fd)265 static ERTS_INLINE ErtsDrvEventState *get_drv_ev_state(ErtsSysFdType fd)
266 {
267     ErtsDrvEventState tmpl;
268     tmpl.fd = fd;
269     return  (ErtsDrvEventState *) safe_hash_get(&drv_ev_state.tab, (void *) &tmpl);
270 }
271 
new_drv_ev_state(ErtsDrvEventState * state,ErtsSysFdType fd)272 static ERTS_INLINE ErtsDrvEventState* new_drv_ev_state(ErtsDrvEventState *state,
273                                                        ErtsSysFdType fd)
274 {
275     ErtsDrvEventState tmpl;
276 
277     if (state)
278         return state;
279 
280     tmpl.fd = fd;
281     tmpl.driver.select = NULL;
282     tmpl.driver.nif = NULL;
283     tmpl.driver.stop.drv_ptr = NULL;
284     tmpl.events = 0;
285     tmpl.active_events = 0;
286     tmpl.type = ERTS_EV_TYPE_NONE;
287     tmpl.flags = 0;
288 
289     return  (ErtsDrvEventState *) safe_hash_put(&drv_ev_state.tab, (void *) &tmpl);
290 }
291 
erase_drv_ev_state(ErtsDrvEventState * state)292 static ERTS_INLINE void erase_drv_ev_state(ErtsDrvEventState *state)
293 {
294     safe_hash_erase(&drv_ev_state.tab, (void *) state);
295 }
296 
drv_ev_state_len(void)297 static int drv_ev_state_len(void)
298 {
299     return erts_atomic_read_nob(&drv_ev_state.tab.nitems);
300 }
301 
302 #endif /* !ERTS_SYS_CONTINOUS_FD_NUMBERS */
303 
304 static void stale_drv_select(Eterm id, ErtsDrvEventState *state, int mode);
305 static void drv_select_steal(ErlDrvPort ix, ErtsDrvEventState *state,
306                              int mode, int on);
307 static void nif_select_steal(ErtsDrvEventState *state, int mode,
308                              ErtsResource* resource, Eterm ref);
309 
310 static void print_drv_select_op(erts_dsprintf_buf_t *dsbufp,
311                                 ErlDrvPort ix, ErtsSysFdType fd, int mode, int on);
312 static void print_nif_select_op(erts_dsprintf_buf_t*, ErtsSysFdType,
313                                 int mode, ErtsResource*, Eterm ref);
314 
315 #ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS
316 static void drv_select_large_fd_error(ErlDrvPort, ErtsSysFdType, int, int);
317 static void nif_select_large_fd_error(ErtsSysFdType, int, ErtsResource*,Eterm ref);
318 #endif
319 static void
320 steal_pending_stop_use(erts_dsprintf_buf_t*, ErlDrvPort, ErtsDrvEventState*,
321                        int mode, int on);
322 static void
323 steal_pending_stop_nif(erts_dsprintf_buf_t *dsbufp, ErtsResource*,
324                        ErtsDrvEventState *state, int mode, int on);
325 static ERTS_INLINE void
326 check_fd_cleanup(ErtsDrvEventState *state,
327 		 ErtsDrvSelectDataState **free_select,
328                  ErtsNifSelectDataState **free_nif);
329 static ERTS_INLINE void iready(Eterm id, ErtsDrvEventState *state);
330 static ERTS_INLINE void oready(Eterm id, ErtsDrvEventState *state);
331 #ifdef DEBUG_PRINT_MODE
332 static char *drvmode2str(int mode);
333 static char *nifmode2str(enum ErlNifSelectFlags mode);
334 #endif
335 
336 static ERTS_INLINE void
init_iotask(ErtsIoTask * io_task,ErtsSysFdType fd)337 init_iotask(ErtsIoTask *io_task, ErtsSysFdType fd)
338 {
339     erts_port_task_handle_init(&io_task->task);
340     io_task->fd = fd;
341 }
342 
343 static ERTS_INLINE int
is_iotask_active(ErtsIoTask * io_task)344 is_iotask_active(ErtsIoTask *io_task)
345 {
346     if (erts_port_task_is_scheduled(&io_task->task))
347 	return 1;
348     return 0;
349 }
350 
351 static ERTS_INLINE ErtsDrvSelectDataState *
alloc_drv_select_data(ErtsSysFdType fd)352 alloc_drv_select_data(ErtsSysFdType fd)
353 {
354     ErtsDrvSelectDataState *dsp = erts_alloc(ERTS_ALC_T_DRV_SEL_D_STATE,
355 					     sizeof(ErtsDrvSelectDataState));
356     dsp->inport = NIL;
357     dsp->outport = NIL;
358     init_iotask(&dsp->iniotask, fd);
359     init_iotask(&dsp->outiotask, fd);
360     return dsp;
361 }
362 
363 static ERTS_INLINE ErtsNifSelectDataState *
alloc_nif_select_data(void)364 alloc_nif_select_data(void)
365 {
366     ErtsNifSelectDataState *dsp = erts_alloc(ERTS_ALC_T_NIF_SEL_D_STATE,
367 					     sizeof(ErtsNifSelectDataState));
368     dsp->in.pid = NIL;
369     dsp->out.pid = NIL;
370     dsp->err.pid = NIL;
371     return dsp;
372 }
373 
374 static ERTS_INLINE void
free_drv_select_data(ErtsDrvSelectDataState * dsp)375 free_drv_select_data(ErtsDrvSelectDataState *dsp)
376 {
377     ASSERT(!erts_port_task_is_scheduled(&dsp->iniotask.task));
378     ASSERT(!erts_port_task_is_scheduled(&dsp->outiotask.task));
379     erts_free(ERTS_ALC_T_DRV_SEL_D_STATE, dsp);
380 }
381 
382 static ERTS_INLINE void
free_nif_select_data(ErtsNifSelectDataState * dsp)383 free_nif_select_data(ErtsNifSelectDataState *dsp)
384 {
385     erts_free(ERTS_ALC_T_NIF_SEL_D_STATE, dsp);
386 }
387 
388 static ERTS_INLINE int
get_pollset_id(ErtsSysFdType fd)389 get_pollset_id(ErtsSysFdType fd)
390 {
391     return fd_hash(fd) % erts_no_pollsets;
392 }
393 
394 static ERTS_INLINE ErtsPollSet *
get_pollset(ErtsSysFdType fd)395 get_pollset(ErtsSysFdType fd)
396 {
397     return pollsetv[get_pollset_id(fd)];
398 }
399 
400 #if ERTS_POLL_USE_FALLBACK
401 static ERTS_INLINE ErtsPollSet *
get_fallback_pollset(void)402 get_fallback_pollset(void)
403 {
404     return flbk_pollset;
405 }
406 #endif
407 
408 static ERTS_INLINE ErtsPollSet *
get_scheduler_pollset(ErtsSysFdType fd)409 get_scheduler_pollset(ErtsSysFdType fd)
410 {
411 #if ERTS_POLL_USE_SCHEDULER_POLLING
412     return sched_pollset;
413 #else
414     return get_pollset(fd);
415 #endif
416 }
417 
418 /*
419  * Place a fd within a pollset. This will automatically use
420  * the fallback ps if needed.
421  */
422 static ERTS_INLINE ErtsPollEvents
erts_io_control_wakeup(ErtsDrvEventState * state,ErtsPollOp op,ErtsPollEvents pe,int * wake_poller)423 erts_io_control_wakeup(ErtsDrvEventState *state, ErtsPollOp op,
424                        ErtsPollEvents pe, int *wake_poller)
425 {
426     ErtsSysFdType fd = state->fd;
427     ErtsPollEvents res = 0;
428     EventStateFlags flags = state->flags;
429 
430     ERTS_LC_ASSERT(erts_lc_mtx_is_locked(fd_mtx(state->fd)));
431 
432     if (!(flags & ERTS_EV_FLAG_FALLBACK)) {
433 
434         if (op == ERTS_POLL_OP_DEL && (flags & ERTS_EV_FLAG_SCHEDULER)) {
435             erts_poll_control(get_scheduler_pollset(fd), fd, op, pe, wake_poller);
436             flags &= ~ERTS_EV_FLAG_IN_SCHEDULER;
437         }
438         if (!(flags & ERTS_EV_FLAG_IN_SCHEDULER) || (pe & ERTS_POLL_EV_OUT)) {
439             res = erts_poll_control(get_pollset(fd), fd, op, pe, wake_poller);
440         } else {
441             res = erts_poll_control(get_scheduler_pollset(fd), fd, op, pe, wake_poller);
442         }
443 
444 #if ERTS_POLL_USE_FALLBACK
445         if (op == ERTS_POLL_OP_ADD && res == ERTS_POLL_EV_NVAL) {
446             /* When an add fails with NVAL, the poll/kevent operation could not
447                put that fd in the pollset, so we instead put it into a fallback pollset */
448             state->flags |= ERTS_EV_FLAG_FALLBACK;
449             res = erts_poll_control_flbk(get_fallback_pollset(), fd, op, pe, wake_poller);
450         }
451     } else {
452         ASSERT(op != ERTS_POLL_OP_ADD);
453         res = erts_poll_control_flbk(get_fallback_pollset(), fd, op, pe, wake_poller);
454 #endif
455     }
456 
457     return res;
458 }
459 
460 static ERTS_INLINE ErtsPollEvents
erts_io_control(ErtsDrvEventState * state,ErtsPollOp op,ErtsPollEvents pe)461 erts_io_control(ErtsDrvEventState *state, ErtsPollOp op, ErtsPollEvents pe)
462 {
463     int wake_poller = 0;
464     return erts_io_control_wakeup(state, op, pe, &wake_poller);
465 }
466 
467 /* ToDo: Was inline in erl_check_io.h but now need struct erts_poll_thread */
468 void
erts_io_notify_port_task_executed(ErtsPortTaskType type,ErtsPortTaskHandle * pthp,void (* reset_handle)(ErtsPortTaskHandle *))469 erts_io_notify_port_task_executed(ErtsPortTaskType type,
470                                   ErtsPortTaskHandle *pthp,
471                                   void (*reset_handle)(ErtsPortTaskHandle *))
472 {
473     ErtsIoTask *itp = ErtsContainerStruct(pthp, ErtsIoTask, task);
474     ErtsSysFdType fd = itp->fd;
475     erts_mtx_t *mtx = fd_mtx(fd);
476     ErtsPollOp op = ERTS_POLL_OP_MOD;
477     int active_events, new_events = 0;
478     ErtsDrvEventState *state;
479     ErtsDrvSelectDataState *free_select = NULL;
480     ErtsNifSelectDataState *free_nif = NULL;
481 
482     ERTS_MSACC_PUSH_AND_SET_STATE_M_X(ERTS_MSACC_STATE_CHECK_IO);
483 
484     erts_mtx_lock(mtx);
485     state = get_drv_ev_state(fd);
486 
487     reset_handle(pthp);
488 
489     active_events = state->active_events;
490 
491     if (!(state->flags & ERTS_EV_FLAG_IN_SCHEDULER) || type == ERTS_PORT_TASK_OUTPUT) {
492         switch (type) {
493         case ERTS_PORT_TASK_INPUT:
494 
495             DEBUG_PRINT_FD("executed ready_input", state);
496 
497             ASSERT(!(state->active_events & ERTS_POLL_EV_IN));
498             if (state->events & ERTS_POLL_EV_IN) {
499                 active_events |= ERTS_POLL_EV_IN;
500                 if (state->count > 10 && ERTS_POLL_USE_SCHEDULER_POLLING) {
501                     if (!(state->flags & ERTS_EV_FLAG_SCHEDULER))
502                         op = ERTS_POLL_OP_ADD;
503                     state->flags |= ERTS_EV_FLAG_IN_SCHEDULER|ERTS_EV_FLAG_SCHEDULER;
504                     new_events = ERTS_POLL_EV_IN;
505                     DEBUG_PRINT_FD("moving to scheduler ps", state);
506                 } else
507                     new_events = active_events;
508                 if (!(state->flags & ERTS_EV_FLAG_FALLBACK) && ERTS_POLL_USE_SCHEDULER_POLLING)
509                     state->count++;
510             }
511             break;
512         case ERTS_PORT_TASK_OUTPUT:
513 
514             DEBUG_PRINT_FD("executed ready_output", state);
515 
516             ASSERT(!(state->active_events & ERTS_POLL_EV_OUT));
517             if (state->events & ERTS_POLL_EV_OUT) {
518                 active_events |= ERTS_POLL_EV_OUT;
519                 if (state->flags & ERTS_EV_FLAG_IN_SCHEDULER && active_events & ERTS_POLL_EV_IN)
520                     new_events = ERTS_POLL_EV_OUT;
521                 else
522                     new_events = active_events;
523             }
524             break;
525         default:
526             erts_exit(ERTS_ABORT_EXIT, "Invalid IO port task type");
527             break;
528         }
529 
530         if (state->active_events != active_events && new_events) {
531             state->active_events = active_events;
532             new_events = erts_io_control(state, op, new_events);
533         }
534 
535         /* We were unable to re-insert the fd into the pollset, signal the callback. */
536         if (new_events & ERTS_POLL_EV_NVAL) {
537             if (state->active_events & ERTS_POLL_EV_IN)
538                 iready(state->driver.select->inport, state);
539             if (state->active_events & ERTS_POLL_EV_OUT)
540                 oready(state->driver.select->outport, state);
541             state->active_events = 0;
542             active_events = 0;
543         }
544     }
545 
546     if (!active_events)
547         check_fd_cleanup(state, &free_select, &free_nif);
548 
549     erts_mtx_unlock(mtx);
550 
551     if (free_select)
552         free_drv_select_data(free_select);
553     if (free_nif)
554         free_nif_select_data(free_nif);
555 
556     ERTS_MSACC_POP_STATE_M_X();
557 }
558 
559 static ERTS_INLINE void
abort_task(Eterm id,ErtsPortTaskHandle * pthp,EventStateType type)560 abort_task(Eterm id, ErtsPortTaskHandle *pthp, EventStateType type)
561 {
562     if (is_not_nil(id) && erts_port_task_is_scheduled(pthp)) {
563 	erts_port_task_abort(pthp);
564 	ASSERT(erts_is_port_alive(id));
565     }
566 }
567 
568 static ERTS_INLINE void
abort_tasks(ErtsDrvEventState * state,int mode)569 abort_tasks(ErtsDrvEventState *state, int mode)
570 {
571     switch (mode) {
572     case 0: check_type:
573 	switch (state->type) {
574         case ERTS_EV_TYPE_NIF:
575 	case ERTS_EV_TYPE_NONE:
576 	    return;
577 	default:
578 	    ASSERT(state->type == ERTS_EV_TYPE_DRV_SEL);
579 	    /* Fall through */
580 	}
581     case ERL_DRV_READ|ERL_DRV_WRITE:
582     case ERL_DRV_WRITE:
583 	ASSERT(state->type == ERTS_EV_TYPE_DRV_SEL);
584 	abort_task(state->driver.select->outport,
585 		   &state->driver.select->outiotask.task,
586 		   state->type);
587 	if (mode == ERL_DRV_WRITE)
588 	    break;
589     case ERL_DRV_READ:
590 	ASSERT(state->type == ERTS_EV_TYPE_DRV_SEL);
591 	abort_task(state->driver.select->inport,
592 		   &state->driver.select->iniotask.task,
593 		   state->type);
594 	break;
595     default:
596 	goto check_type;
597     }
598 }
599 
prepare_select_msg(struct erts_nif_select_event * e,enum ErlNifSelectFlags mode,Eterm recipient,ErtsResource * resource,Eterm msg,ErlNifEnv * msg_env,Eterm event_atom)600 static void prepare_select_msg(struct erts_nif_select_event* e,
601                                enum ErlNifSelectFlags mode,
602                                Eterm recipient,
603                                ErtsResource* resource,
604                                Eterm msg,
605                                ErlNifEnv* msg_env,
606                                Eterm event_atom)
607 {
608     ErtsMessage* mp;
609     Eterm* hp;
610     Uint hsz;
611 
612     if (is_not_nil(e->pid)) {
613         ASSERT(e->mp);
614         erts_cleanup_messages(e->mp);
615     }
616 
617     if (mode & ERL_NIF_SELECT_CUSTOM_MSG) {
618         if (msg_env) {
619             mp = erts_create_message_from_nif_env(msg_env);
620             ERL_MESSAGE_TERM(mp) = msg;
621         }
622         else {
623             hsz = size_object(msg);
624             mp = erts_alloc_message(hsz, &hp);
625             ERL_MESSAGE_TERM(mp) = copy_struct(msg, hsz, &hp, &mp->hfrag.off_heap);
626         }
627     }
628     else {
629         ErtsBinary* bin;
630         Eterm resource_term, ref_term, tuple;
631         Eterm* hp_start;
632 
633          /* {select, Resource, Ref, EventAtom} */
634         hsz = 5 + ERTS_MAGIC_REF_THING_SIZE;
635         if (is_internal_ref(msg))
636             hsz += ERTS_REF_THING_SIZE;
637         else
638             ASSERT(is_immed(msg));
639 
640         mp = erts_alloc_message(hsz, &hp);
641         hp_start = hp;
642 
643         bin = ERTS_MAGIC_BIN_FROM_UNALIGNED_DATA(resource);
644         resource_term = erts_mk_magic_ref(&hp, &mp->hfrag.off_heap, &bin->binary);
645         if (is_internal_ref(msg)) {
646             Uint32* refn = internal_ref_numbers(msg);
647             write_ref_thing(hp, refn[0], refn[1], refn[2]);
648             ref_term = make_internal_ref(hp);
649             hp += ERTS_REF_THING_SIZE;
650         }
651         else {
652             ASSERT(is_immed(msg));
653             ref_term = msg;
654         }
655         tuple = TUPLE4(hp, am_select, resource_term, ref_term, event_atom);
656         hp += 5;
657         ERL_MESSAGE_TERM(mp) = tuple;
658         ASSERT(hp == hp_start + hsz); (void)hp_start;
659     }
660 
661     ASSERT(is_not_nil(recipient));
662     e->pid = recipient;
663     e->mp = mp;
664 }
665 
send_select_msg(struct erts_nif_select_event * e)666 static ERTS_INLINE void send_select_msg(struct erts_nif_select_event* e)
667 {
668     Process* rp = erts_proc_lookup(e->pid);
669 
670     ASSERT(is_internal_pid(e->pid));
671     if (!rp) {
672         erts_cleanup_messages(e->mp);
673         return;
674     }
675 
676     erts_queue_message(rp, 0, e->mp, ERL_MESSAGE_TERM(e->mp), am_system);
677 }
678 
clear_select_event(struct erts_nif_select_event * e)679 static void clear_select_event(struct erts_nif_select_event* e)
680 {
681     if (is_not_nil(e->pid)) {
682         /* Discard unsent message */
683         ASSERT(e->mp);
684         erts_cleanup_messages(e->mp);
685         e->mp = NULL;
686         e->pid = NIL;
687     }
688 }
689 
690 static void
deselect(ErtsDrvEventState * state,int mode)691 deselect(ErtsDrvEventState *state, int mode)
692 {
693     ErtsPollEvents rm_events;
694     ERTS_LC_ASSERT(erts_lc_mtx_is_locked(fd_mtx(state->fd)));
695 
696     abort_tasks(state, mode);
697 
698     if (!mode) {
699 	rm_events = state->events;
700     } else {
701 	rm_events = 0;
702 	ASSERT(state->type == ERTS_EV_TYPE_DRV_SEL);
703 	if (mode & ERL_DRV_READ) {
704 	    state->driver.select->inport = NIL;
705 	    rm_events |= ERTS_POLL_EV_IN;
706 	}
707 	if (mode & ERL_DRV_WRITE) {
708 	    state->driver.select->outport = NIL;
709 	    rm_events |= ERTS_POLL_EV_OUT;
710 	}
711     }
712 
713     state->events &= ~rm_events;
714     state->active_events &= ~rm_events;
715 
716     if (!(state->events)) {
717         erts_io_control(state, ERTS_POLL_OP_DEL, 0);
718 	switch (state->type) {
719         case ERTS_EV_TYPE_NIF:
720             clear_select_event(&state->driver.nif->in);
721             clear_select_event(&state->driver.nif->out);
722             clear_select_event(&state->driver.nif->err);
723             enif_release_resource(state->driver.stop.resource->data);
724             state->driver.stop.resource = NULL;
725             break;
726 	case ERTS_EV_TYPE_DRV_SEL:
727 	    state->driver.select->inport = NIL;
728 	    state->driver.select->outport = NIL;
729 	    break;
730 	case ERTS_EV_TYPE_NONE:
731 	    break;
732 	default:
733 	    ASSERT(0);
734 	    break;
735 	}
736 	state->type = ERTS_EV_TYPE_NONE;
737 	state->flags = 0;
738     } else {
739         ErtsPollEvents new_events =
740             erts_io_control(state, ERTS_POLL_OP_MOD, state->active_events);
741 
742         /* We were unable to re-insert the fd into the pollset, signal the callback. */
743         if (new_events & ERTS_POLL_EV_NVAL) {
744             if (state->active_events & ERTS_POLL_EV_IN)
745                 iready(state->driver.select->inport, state);
746             if (state->active_events & ERTS_POLL_EV_OUT)
747                 oready(state->driver.select->outport, state);
748             state->active_events = 0;
749         }
750     }
751 }
752 
753 #ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS
754 #  define IS_FD_UNKNOWN(state) ((state)->type == ERTS_EV_TYPE_NONE)
755 #else
756 #  define IS_FD_UNKNOWN(state) ((state) == NULL)
757 #endif
758 
759 static ERTS_INLINE void
check_fd_cleanup(ErtsDrvEventState * state,ErtsDrvSelectDataState ** free_select,ErtsNifSelectDataState ** free_nif)760 check_fd_cleanup(ErtsDrvEventState *state,
761 		 ErtsDrvSelectDataState **free_select,
762                  ErtsNifSelectDataState **free_nif)
763 {
764     ERTS_LC_ASSERT(erts_lc_mtx_is_locked(fd_mtx(state->fd)));
765     *free_select = NULL;
766     if (state->driver.select
767 	&& (state->type != ERTS_EV_TYPE_DRV_SEL)
768 	&& !is_iotask_active(&state->driver.select->iniotask)
769 	&& !is_iotask_active(&state->driver.select->outiotask)) {
770 
771 	*free_select = state->driver.select;
772 	state->driver.select = NULL;
773     }
774 
775     *free_nif = NULL;
776     if (state->driver.nif && (state->type != ERTS_EV_TYPE_NIF)) {
777         *free_nif = state->driver.nif;
778         state->driver.nif = NULL;
779     }
780 
781     if (((state->type != ERTS_EV_TYPE_NONE)
782          | (state->driver.nif != NULL)
783 	 | (state->driver.select != NULL)) == 0) {
784 
785 	erase_drv_ev_state(state);
786     }
787 }
788 
789 #ifdef __WIN32__
790 # define MUST_DEFER(MAY_SLEEP) 1
791 #else
792 # define MUST_DEFER(MAY_SLEEP) (MAY_SLEEP)
793 #endif
794 
795 int
driver_select(ErlDrvPort ix,ErlDrvEvent e,int mode,int on)796 driver_select(ErlDrvPort ix, ErlDrvEvent e, int mode, int on)
797 {
798     void (*stop_select_fn)(ErlDrvEvent, void*) = NULL;
799     Port *prt = erts_drvport2port(ix);
800     Eterm id = erts_drvport2id(ix);
801     ErtsSysFdType fd = (ErtsSysFdType) e;
802     ErtsPollEvents ctl_events = (ErtsPollEvents) 0;
803     ErtsPollEvents old_events;
804     ErtsPollEvents new_events;
805     ErtsPollOp ctl_op = ERTS_POLL_OP_MOD;
806     ErtsDrvEventState *state;
807     int wake_poller = 0;
808     int ret;
809     ErtsDrvSelectDataState *free_select = NULL;
810     ErtsNifSelectDataState *free_nif = NULL;
811 #ifdef USE_VM_PROBES
812     DTRACE_CHARBUF(name, 64);
813 #endif
814     ERTS_MSACC_PUSH_AND_SET_STATE(ERTS_MSACC_STATE_CHECK_IO);
815 
816     if (prt == ERTS_INVALID_ERL_DRV_PORT) {
817         ERTS_MSACC_POP_STATE();
818 	return -1;
819     }
820 
821     ERTS_LC_ASSERT(erts_lc_is_port_locked(prt));
822 
823 #ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS
824     if (!grow_drv_ev_state(fd)) {
825         if (fd > 0) drv_select_large_fd_error(ix, fd, mode, on);
826         ERTS_MSACC_POP_STATE();
827         return -1;
828     }
829 #endif
830 
831     erts_mtx_lock(fd_mtx(fd));
832 
833     state = get_drv_ev_state(fd); /* may be NULL! */
834 
835     DEBUG_PRINT_FD("driver_select(%T, %p, %s, %d)",
836                    state, id, fd, drvmode2str(mode), on);
837 
838     if (!on) {
839         if (IS_FD_UNKNOWN(state)) {
840             if ((mode&ERL_DRV_USE_NO_CALLBACK) == ERL_DRV_USE) {
841                 /* fast track to stop_select callback */
842                 stop_select_fn = prt->drv_ptr->stop_select;
843         #ifdef USE_VM_PROBES
844                 strncpy(name, prt->drv_ptr->name,
845                         sizeof(DTRACE_CHARBUF_NAME(name))-1);
846                 name[sizeof(name)-1] = '\0';
847         #endif
848             }
849             ret = 0;
850             goto done_unknown;
851         }
852         /* For some reason (don't know why), we do not clean all
853            events when doing ERL_DRV_USE_NO_CALLBACK. */
854         else if ((mode&ERL_DRV_USE_NO_CALLBACK) == ERL_DRV_USE) {
855             mode |= (ERL_DRV_READ | ERL_DRV_WRITE);
856         }
857     }
858 
859     state = new_drv_ev_state(state, fd);
860 
861     switch (state->type) {
862     case ERTS_EV_TYPE_NIF:
863         drv_select_steal(ix, state, mode, on);
864         break;
865     case ERTS_EV_TYPE_STOP_USE: {
866         erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf();
867         print_drv_select_op(dsbufp, ix, state->fd, mode, on);
868         steal_pending_stop_use(dsbufp, ix, state, mode, on);
869         if (state->type == ERTS_EV_TYPE_STOP_USE) {
870             ret = 0;
871             goto done; /* stop_select still pending */
872         }
873         ASSERT(state->type == ERTS_EV_TYPE_NONE);
874         break;
875     }
876     case ERTS_EV_TYPE_STOP_NIF: {
877         erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf();
878         print_drv_select_op(dsbufp, ix, state->fd, mode, on);
879         steal_pending_stop_nif(dsbufp, NULL, state, mode, on);
880         ASSERT(state->type == ERTS_EV_TYPE_NONE);
881         break;
882 
883     }
884     default: break;
885     }
886 
887     if (mode & ERL_DRV_READ) {
888 	if (state->type == ERTS_EV_TYPE_DRV_SEL) {
889 	    Eterm owner = state->driver.select->inport;
890 	    if (owner != id && is_not_nil(owner))
891 		drv_select_steal(ix, state, mode, on);
892 	}
893         ctl_events = ERTS_POLL_EV_IN;
894     }
895     if (mode & ERL_DRV_WRITE) {
896 	if (state->type == ERTS_EV_TYPE_DRV_SEL) {
897 	    Eterm owner = state->driver.select->outport;
898 	    if (owner != id && is_not_nil(owner))
899 		drv_select_steal(ix, state, mode, on);
900 	}
901 	ctl_events |= ERTS_POLL_EV_OUT;
902     }
903 
904 
905     ASSERT((state->type == ERTS_EV_TYPE_DRV_SEL) ||
906 	   (state->type == ERTS_EV_TYPE_NONE && !state->events));
907 
908     old_events = state->events;
909 
910     if (on) {
911         ctl_events &= ~old_events;
912         state->events |= ctl_events;
913         if (ctl_events & ERTS_POLL_EV_IN && (!state->driver.select || !is_iotask_active(&state->driver.select->iniotask)))
914             state->active_events |= ERTS_POLL_EV_IN;
915         if (ctl_events & ERTS_POLL_EV_OUT && (!state->driver.select || !is_iotask_active(&state->driver.select->outiotask)))
916             state->active_events |= ERTS_POLL_EV_OUT;
917         if (old_events == 0 && !(state->flags & ERTS_EV_FLAG_USED)) {
918             ctl_op = ERTS_POLL_OP_ADD;
919         }
920         new_events = state->active_events;
921         if (state->flags & ERTS_EV_FLAG_IN_SCHEDULER)
922             new_events &= ~ERTS_POLL_EV_IN;
923     }
924     else {
925         ctl_events &= old_events;
926         state->events &= ~ctl_events;
927         state->active_events &= ~ctl_events;
928         new_events = state->active_events;
929 
930         if (ctl_events & ERTS_POLL_EV_IN) {
931             state->count = 0;
932             if (state->flags & ERTS_EV_FLAG_IN_SCHEDULER) {
933                 new_events = 0;
934             }
935         }
936 
937         if (!state->events) {
938             if (!(state->flags & ERTS_EV_FLAG_USED) || mode & ERL_DRV_USE)
939                 ctl_op = ERTS_POLL_OP_DEL;
940         }
941     }
942 
943     if (ctl_events || ctl_op == ERTS_POLL_OP_DEL) {
944 
945         new_events = erts_io_control_wakeup(state, ctl_op,
946                                             new_events,
947                                             &wake_poller);
948 
949         ASSERT(state->type == ERTS_EV_TYPE_DRV_SEL || state->type == ERTS_EV_TYPE_NONE);
950     }
951 
952     if (on) {
953         if (ctl_events) {
954 	    if (!state->driver.select)
955 		state->driver.select = alloc_drv_select_data(state->fd);
956 	    if (state->type == ERTS_EV_TYPE_NONE)
957 		state->type = ERTS_EV_TYPE_DRV_SEL;
958 	    ASSERT(state->type == ERTS_EV_TYPE_DRV_SEL);
959 	    if (ctl_events & ERTS_POLL_EV_IN) {
960 		state->driver.select->inport = id;
961                 if (new_events & (ERTS_POLL_EV_ERR|ERTS_POLL_EV_NVAL))
962                     iready(id, state);
963             }
964 	    if (ctl_events & ERTS_POLL_EV_OUT) {
965 		state->driver.select->outport = id;
966                 if (new_events & (ERTS_POLL_EV_ERR|ERTS_POLL_EV_NVAL))
967                     oready(id, state);
968             }
969 	    if (mode & ERL_DRV_USE)
970 		state->flags |= ERTS_EV_FLAG_USED;
971         }
972     }
973     else { /* off */
974         if (state->type == ERTS_EV_TYPE_DRV_SEL) {
975             if (ctl_events & ERTS_POLL_EV_IN) {
976                 abort_tasks(state, ERL_DRV_READ);
977                 state->driver.select->inport = NIL;
978                 state->flags &= ~ERTS_EV_FLAG_IN_SCHEDULER;
979             }
980             if (ctl_events & ERTS_POLL_EV_OUT) {
981                 abort_tasks(state, ERL_DRV_WRITE);
982                 state->driver.select->outport = NIL;
983             }
984             if (state->events == 0) {
985                 if ((mode & ERL_DRV_USE) || !(state->flags & ERTS_EV_FLAG_USED)) {
986                     state->type = ERTS_EV_TYPE_NONE;
987                     if (state->flags & ERTS_EV_FLAG_SCHEDULER)
988                         erts_atomic32_read_bor_nob(&prt->state, ERTS_PORT_SFLG_CHECK_FD_CLEANUP);
989                     state->flags = 0;
990                 }
991                 /*else keep it, as fd will probably be selected upon again */
992             }
993         }
994         if ((mode & ERL_DRV_USE_NO_CALLBACK) == ERL_DRV_USE) {
995             erts_driver_t* drv_ptr = prt->drv_ptr;
996             ASSERT(state->events==0);
997             if (!wake_poller) {
998                 /* Safe to close fd now as it is not in pollset
999                    or there was no need to eject fd (kernel poll) */
1000                 stop_select_fn = drv_ptr->stop_select;
1001 #ifdef USE_VM_PROBES
1002                 strncpy(name, prt->drv_ptr->name, sizeof(name)-1);
1003                 name[sizeof(name)-1] = '\0';
1004 #endif
1005             }
1006             else {
1007                 /* Not safe to close fd, postpone stop_select callback. */
1008                 state->type = ERTS_EV_TYPE_STOP_USE;
1009                 state->driver.stop.drv_ptr = drv_ptr;
1010                 if (drv_ptr->handle) {
1011                     erts_ddll_reference_referenced_driver(drv_ptr->handle);
1012                 }
1013             }
1014         }
1015     }
1016 
1017     ret = 0;
1018 
1019 done:
1020 
1021     check_fd_cleanup(state,
1022 		     &free_select,
1023                      &free_nif);
1024 
1025 done_unknown:
1026     erts_mtx_unlock(fd_mtx(fd));
1027     if (stop_select_fn) {
1028 	DTRACE1(driver_stop_select, name);
1029 	LTTNG1(driver_stop_select, "unknown");
1030 	(*stop_select_fn)(e, NULL);
1031     }
1032     if (free_select)
1033 	free_drv_select_data(free_select);
1034     if (free_nif)
1035         free_nif_select_data(free_nif);
1036 
1037     ERTS_MSACC_POP_STATE();
1038 
1039     return ret;
1040 }
1041 
1042 int
enif_select(ErlNifEnv * env,ErlNifEvent e,enum ErlNifSelectFlags mode,void * obj,const ErlNifPid * pid,Eterm msg)1043 enif_select(ErlNifEnv* env, ErlNifEvent e, enum ErlNifSelectFlags mode,
1044             void* obj, const ErlNifPid* pid, Eterm msg)
1045 {
1046     return enif_select_x(env, e, mode, obj, pid, msg, NULL);
1047 }
1048 
1049 
1050 int
enif_select_x(ErlNifEnv * env,ErlNifEvent e,enum ErlNifSelectFlags mode,void * obj,const ErlNifPid * pid,Eterm msg,ErlNifEnv * msg_env)1051 enif_select_x(ErlNifEnv* env,
1052               ErlNifEvent e,
1053               enum ErlNifSelectFlags mode,
1054               void* obj,
1055               const ErlNifPid* pid,
1056               Eterm msg,
1057               ErlNifEnv* msg_env)
1058 {
1059     int on;
1060     ErtsResource* resource = DATA_TO_RESOURCE(obj);
1061     ErtsSysFdType fd = (ErtsSysFdType) e;
1062     ErtsPollEvents ctl_events = (ErtsPollEvents) 0;
1063     ErtsPollEvents old_events;
1064     ErtsPollOp ctl_op = ERTS_POLL_OP_MOD;
1065     ErtsDrvEventState *state;
1066     int ret, wake_poller = 0;
1067     enum { NO_STOP=0, CALL_STOP, CALL_STOP_AND_RELEASE } call_stop = NO_STOP;
1068     ErtsDrvSelectDataState *free_select = NULL;
1069     ErtsNifSelectDataState *free_nif = NULL;
1070 
1071     ASSERT(!erts_dbg_is_resource_dying(resource));
1072 
1073 #ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS
1074     if (!grow_drv_ev_state(fd)) {
1075         if (fd > 0) nif_select_large_fd_error(fd, mode, resource, msg);
1076         return INT_MIN | ERL_NIF_SELECT_INVALID_EVENT;
1077     }
1078 #endif
1079 
1080     erts_mtx_lock(fd_mtx(fd));
1081 
1082     state = get_drv_ev_state(fd); /* may be NULL! */
1083 
1084     DEBUG_PRINT_FD("enif_select(%T, %d, %s, %p, %T, %T)",
1085                    state, env->proc->common.id, fd, nifmode2str(mode), resource,
1086                    pid ? pid->pid : THE_NON_VALUE, THE_NON_VALUE);
1087 
1088     if (mode & ERL_NIF_SELECT_STOP) {
1089         ASSERT(resource->type->fn.stop);
1090         if (IS_FD_UNKNOWN(state)) {
1091             /* fast track to stop callback */
1092             call_stop = CALL_STOP;
1093             ret = ERL_NIF_SELECT_STOP_CALLED;
1094             goto done_unknown;
1095         }
1096         on = 0;
1097         mode = ERL_DRV_READ | ERL_DRV_WRITE | ERL_DRV_USE;
1098         ctl_events = ERTS_POLL_EV_IN | ERTS_POLL_EV_OUT | ERTS_POLL_EV_ERR;
1099         ctl_op = ERTS_POLL_OP_DEL;
1100     }
1101     else {
1102         on = !(mode & ERL_NIF_SELECT_CANCEL);
1103         ASSERT(mode);
1104         if (mode & ERL_DRV_READ) {
1105             ctl_events |= ERTS_POLL_EV_IN;
1106         }
1107         if (mode & ERL_DRV_WRITE) {
1108             ctl_events |= ERTS_POLL_EV_OUT;
1109         }
1110         if (mode & ERL_NIF_SELECT_ERROR) {
1111 #if (!ERTS_ENABLE_KERNEL_POLL || ERTS_POLL_USE_EPOLL) && defined(ERTS_USE_POLL)
1112             ctl_events |= ERTS_POLL_EV_ERR;
1113 #else
1114             erts_mtx_unlock(fd_mtx(fd));
1115             return INT_MIN | ERL_NIF_SELECT_NOTSUP;
1116 #endif
1117         }
1118     }
1119 
1120     state = new_drv_ev_state(state,fd);
1121 
1122     switch (state->type) {
1123     case ERTS_EV_TYPE_NIF:
1124         /*
1125          * Changing resource is considered stealing.
1126          * Changing process and/or ref is ok (I think?).
1127          */
1128         if (state->driver.stop.resource != resource)
1129             nif_select_steal(state, ERL_DRV_READ | ERL_DRV_WRITE, resource, msg);
1130         break;
1131     case ERTS_EV_TYPE_DRV_SEL:
1132         nif_select_steal(state, mode, resource, msg);
1133         break;
1134     case ERTS_EV_TYPE_STOP_USE: {
1135         erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf();
1136         print_nif_select_op(dsbufp, fd, mode, resource, msg);
1137         steal_pending_stop_use(dsbufp, ERTS_INVALID_ERL_DRV_PORT, state, mode, on);
1138         ASSERT(state->type == ERTS_EV_TYPE_NONE);
1139         break;
1140     }
1141     case ERTS_EV_TYPE_STOP_NIF: {
1142         erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf();
1143         print_nif_select_op(dsbufp, fd, mode, resource, msg);
1144         steal_pending_stop_nif(dsbufp, resource, state, mode, on);
1145         if (state->type == ERTS_EV_TYPE_STOP_NIF) {
1146             ret = ERL_NIF_SELECT_STOP_SCHEDULED;  /* ?? */
1147             goto done;
1148         }
1149         ASSERT(state->type == ERTS_EV_TYPE_NONE);
1150         break;
1151     }
1152     default: break;
1153     }
1154 
1155     ASSERT((state->type == ERTS_EV_TYPE_NIF) ||
1156 	   (state->type == ERTS_EV_TYPE_NONE && !state->events));
1157 
1158     old_events = state->events;
1159 
1160     if (on) {
1161         ctl_events &= ~old_events;
1162         state->events |= ctl_events;
1163         state->active_events |= ctl_events;
1164         if (state->type == ERTS_EV_TYPE_NONE)
1165             ctl_op = ERTS_POLL_OP_ADD;
1166         if (ctl_events & ERTS_POLL_EV_ERR)
1167             state->flags |= ERTS_EV_FLAG_WANT_ERROR;
1168     }
1169     else {
1170         ctl_events &= old_events;
1171         state->events &= ~ctl_events;
1172         state->active_events &= ~ctl_events;
1173     }
1174 
1175     if (ctl_events || ctl_op == ERTS_POLL_OP_DEL) {
1176         ErtsPollEvents new_events;
1177 
1178         new_events = erts_io_control_wakeup(state,
1179                                             ctl_op,
1180                                             state->active_events,
1181                                             &wake_poller);
1182 
1183         if (new_events & ERTS_POLL_EV_NVAL) {
1184             if (state->type == ERTS_EV_TYPE_NIF && !old_events) {
1185                 state->type = ERTS_EV_TYPE_NONE;
1186                 state->flags = 0;
1187                 state->driver.nif->in.pid = NIL;
1188                 state->driver.nif->out.pid = NIL;
1189                 state->driver.nif->err.pid = NIL;
1190                 state->driver.stop.resource = NULL;
1191             }
1192             ret = INT_MIN | ERL_NIF_SELECT_FAILED;
1193             goto done;
1194         }
1195         ASSERT(new_events == state->events);
1196     }
1197 
1198     ASSERT(state->type == ERTS_EV_TYPE_NIF
1199 	   || state->type == ERTS_EV_TYPE_NONE);
1200 
1201     if (on) {
1202         const Eterm recipient = pid ? pid->pid : env->proc->common.id;
1203         ASSERT(is_internal_pid(recipient));
1204         if (!state->driver.nif)
1205             state->driver.nif = alloc_nif_select_data();
1206         if (state->type == ERTS_EV_TYPE_NONE) {
1207             state->type = ERTS_EV_TYPE_NIF;
1208             state->driver.stop.resource = resource;
1209             enif_keep_resource(resource->data);
1210         }
1211         ASSERT(state->type == ERTS_EV_TYPE_NIF);
1212         ASSERT(state->driver.stop.resource == resource);
1213         if (mode & ERL_DRV_READ) {
1214             prepare_select_msg(&state->driver.nif->in, mode, recipient,
1215                                resource, msg, msg_env, am_ready_input);
1216             msg_env = NULL;
1217         }
1218         if (mode & ERL_DRV_WRITE) {
1219             prepare_select_msg(&state->driver.nif->out, mode, recipient,
1220                                resource, msg, msg_env, am_ready_output);
1221             msg_env = NULL;
1222         }
1223         if (mode & ERL_NIF_SELECT_ERROR) {
1224             prepare_select_msg(&state->driver.nif->err, mode, recipient,
1225                                resource, msg, msg_env, am_ready_error);
1226         }
1227         ret = 0;
1228     }
1229     else { /* off */
1230         ret = 0;
1231         if (state->type == ERTS_EV_TYPE_NIF) {
1232             if (mode & ERL_NIF_SELECT_READ
1233                 && is_not_nil(state->driver.nif->in.pid)) {
1234                 clear_select_event(&state->driver.nif->in);
1235                 ret |= ERL_NIF_SELECT_READ_CANCELLED;
1236             }
1237             if (mode & ERL_NIF_SELECT_WRITE
1238                 && is_not_nil(state->driver.nif->out.pid)) {
1239                 clear_select_event(&state->driver.nif->out);
1240                 ret |= ERL_NIF_SELECT_WRITE_CANCELLED;
1241             }
1242             if (mode & ERL_NIF_SELECT_ERROR
1243                 && is_not_nil(state->driver.nif->err.pid)) {
1244                 clear_select_event(&state->driver.nif->err);
1245                 ret |= ERL_NIF_SELECT_ERROR_CANCELLED;
1246             }
1247         }
1248         if (mode & ERL_NIF_SELECT_STOP) {
1249             ASSERT(state->events==0);
1250             if (!wake_poller) {
1251                 /*
1252                  * Safe to close fd now as it is not in pollset
1253                  * or there was no need to eject fd (kernel poll)
1254                  */
1255                 if (state->type == ERTS_EV_TYPE_NIF) {
1256                     ASSERT(state->driver.stop.resource == resource);
1257                     call_stop = CALL_STOP_AND_RELEASE;
1258                     state->driver.stop.resource = NULL;
1259                 }
1260                 else {
1261                     ASSERT(!state->driver.stop.resource);
1262                     call_stop = CALL_STOP;
1263                 }
1264                 state->type = ERTS_EV_TYPE_NONE;
1265                 ret |= ERL_NIF_SELECT_STOP_CALLED;
1266             }
1267             else {
1268                 /* Not safe to close fd, postpone stop_select callback. */
1269                 if (state->type == ERTS_EV_TYPE_NONE) {
1270                     ASSERT(!state->driver.stop.resource);
1271                     state->driver.stop.resource = resource;
1272                     enif_keep_resource(resource);
1273                 }
1274                 state->type = ERTS_EV_TYPE_STOP_NIF;
1275                 ret |= ERL_NIF_SELECT_STOP_SCHEDULED;
1276             }
1277             state->flags &= ~ERTS_EV_FLAG_WANT_ERROR;
1278         }
1279         else
1280             ASSERT(mode & ERL_NIF_SELECT_CANCEL);
1281     }
1282 
1283 done:
1284 
1285     check_fd_cleanup(state,
1286 		     &free_select,
1287                      &free_nif);
1288 
1289 done_unknown:
1290     erts_mtx_unlock(fd_mtx(fd));
1291     if (call_stop) {
1292         erts_resource_stop(resource, (ErlNifEvent)fd, 1);
1293         if (call_stop == CALL_STOP_AND_RELEASE) {
1294             enif_release_resource(resource->data);
1295         }
1296     }
1297     if (free_select)
1298 	free_drv_select_data(free_select);
1299     if (free_nif)
1300         free_nif_select_data(free_nif);
1301 
1302     return ret;
1303 }
1304 
1305 static ERTS_INLINE int
chk_stale(Eterm id,ErtsDrvEventState * state,int mode)1306 chk_stale(Eterm id, ErtsDrvEventState *state, int mode)
1307 {
1308     if (is_nil(id))
1309 	return 0;
1310     if (erts_is_port_alive(id))
1311 	return 1; /* Steal */
1312     stale_drv_select(id, state, mode);
1313     return 0;
1314 }
1315 
1316 static int
need2steal(ErtsDrvEventState * state,int mode)1317 need2steal(ErtsDrvEventState *state, int mode)
1318 {
1319     int do_steal = 0;
1320     switch (state->type) {
1321     case ERTS_EV_TYPE_DRV_SEL:
1322 	if (mode & ERL_DRV_READ)
1323 	    do_steal |= chk_stale(state->driver.select->inport,
1324 				  state,
1325 				  ERL_DRV_READ);
1326 	if (mode & ERL_DRV_WRITE)
1327 	    do_steal |= chk_stale(state->driver.select->outport,
1328 				  state,
1329 				  ERL_DRV_WRITE);
1330 	break;
1331     case ERTS_EV_TYPE_NIF:
1332         ASSERT(state->driver.stop.resource);
1333         do_steal = 1;
1334         break;
1335 
1336     case ERTS_EV_TYPE_STOP_USE:
1337     case ERTS_EV_TYPE_STOP_NIF:
1338         ASSERT(0);
1339 	break;
1340     default:
1341 	break;
1342     }
1343     return do_steal;
1344 }
1345 
1346 static void
print_driver_name(erts_dsprintf_buf_t * dsbufp,Eterm id)1347 print_driver_name(erts_dsprintf_buf_t *dsbufp, Eterm id)
1348 {
1349     ErtsPortNames *pnp = erts_get_port_names(id, ERTS_INVALID_ERL_DRV_PORT);
1350     if (!pnp->name && !pnp->driver_name)
1351 	erts_dsprintf(dsbufp, "%s ", "<unknown>");
1352     else {
1353 	if (pnp->name) {
1354 	    if (!pnp->driver_name || strcmp(pnp->driver_name, pnp->name) == 0)
1355 		erts_dsprintf(dsbufp, "%s ", pnp->name);
1356 	    else
1357 		erts_dsprintf(dsbufp, "%s (%s) ", pnp->driver_name, pnp->name);
1358 	}
1359 	else if (pnp->driver_name) {
1360 	    erts_dsprintf(dsbufp, "%s ", pnp->driver_name);
1361 	}
1362     }
1363     erts_free_port_names(pnp);
1364 }
1365 
1366 static void
steal(erts_dsprintf_buf_t * dsbufp,ErtsDrvEventState * state,int mode)1367 steal(erts_dsprintf_buf_t *dsbufp, ErtsDrvEventState *state, int mode)
1368 {
1369     erts_dsprintf(dsbufp, "stealing control of fd=%bpd from ", (SWord) state->fd);
1370     switch (state->type) {
1371     case ERTS_EV_TYPE_DRV_SEL: {
1372 	int deselect_mode = 0;
1373 	Eterm iid = state->driver.select->inport;
1374 	Eterm oid = state->driver.select->outport;
1375 	if ((mode & ERL_DRV_READ) && (is_not_nil(iid))) {
1376 	    erts_dsprintf(dsbufp, "input driver ");
1377 	    print_driver_name(dsbufp, iid);
1378 	    erts_dsprintf(dsbufp, "%T ", iid);
1379 	    deselect_mode |= ERL_DRV_READ;
1380 	}
1381 	if ((mode & ERL_DRV_WRITE) && is_not_nil(oid)) {
1382 	    if (deselect_mode) {
1383 	    erts_dsprintf(dsbufp, "and ");
1384 	    }
1385 	    erts_dsprintf(dsbufp, "output driver ");
1386 	    print_driver_name(dsbufp, oid);
1387 	    erts_dsprintf(dsbufp, "%T ", oid);
1388 	    deselect_mode |= ERL_DRV_WRITE;
1389 	}
1390 	if (deselect_mode)
1391 	    deselect(state, deselect_mode);
1392 	else {
1393 	    erts_dsprintf(dsbufp, "no one");
1394 	    ASSERT(0);
1395 	}
1396 	erts_dsprintf(dsbufp, "\n");
1397 	break;
1398     }
1399     case ERTS_EV_TYPE_NIF: {
1400         const Eterm iid = state->driver.nif->in.pid;
1401         const Eterm oid = state->driver.nif->out.pid;
1402         const Eterm eid = state->driver.nif->err.pid;
1403         const char* with = "with";
1404         ErlNifResourceType* rt = state->driver.stop.resource->type;
1405 
1406         erts_dsprintf(dsbufp, "resource %T:%T", rt->module, rt->name);
1407 
1408         if (is_not_nil(iid)) {
1409             erts_dsprintf(dsbufp, " %s in-pid %T", with, iid);
1410             with = "and";
1411         }
1412         if (is_not_nil(oid)) {
1413             erts_dsprintf(dsbufp, " %s out-pid %T", with, oid);
1414             with = "and";
1415         }
1416         if (is_not_nil(eid)) {
1417             erts_dsprintf(dsbufp, " %s err-pid %T", with, eid);
1418         }
1419         deselect(state, 0);
1420         erts_dsprintf(dsbufp, "\n");
1421         break;
1422     }
1423     case ERTS_EV_TYPE_STOP_USE:
1424     case ERTS_EV_TYPE_STOP_NIF: {
1425 	ASSERT(0);
1426 	break;
1427     }
1428     default:
1429 	erts_dsprintf(dsbufp, "no one\n");
1430 	ASSERT(0);
1431     }
1432 }
1433 
1434 static void
print_drv_select_op(erts_dsprintf_buf_t * dsbufp,ErlDrvPort ix,ErtsSysFdType fd,int mode,int on)1435 print_drv_select_op(erts_dsprintf_buf_t *dsbufp,
1436                     ErlDrvPort ix, ErtsSysFdType fd, int mode, int on)
1437 {
1438     Port *pp = erts_drvport2port(ix);
1439     erts_dsprintf(dsbufp,
1440 		  "driver_select(%p, %bpd,%s%s%s%s, %d) "
1441 		  "by ",
1442 		  ix,
1443 		  (SWord) fd,
1444 		  mode & ERL_DRV_READ ? " ERL_DRV_READ" : "",
1445 		  mode & ERL_DRV_WRITE ? " ERL_DRV_WRITE" : "",
1446 		  mode & ERL_DRV_USE ? " ERL_DRV_USE" : "",
1447 		  mode & (ERL_DRV_USE_NO_CALLBACK & ~ERL_DRV_USE) ? "_NO_CALLBACK" : "",
1448 		  on);
1449     print_driver_name(dsbufp, pp != ERTS_INVALID_ERL_DRV_PORT ? pp->common.id : NIL);
1450     erts_dsprintf(dsbufp, "driver %T ", pp != ERTS_INVALID_ERL_DRV_PORT ? pp->common.id : NIL);
1451 }
1452 
1453 static void
print_nif_select_op(erts_dsprintf_buf_t * dsbufp,ErtsSysFdType fd,int mode,ErtsResource * resource,Eterm ref)1454 print_nif_select_op(erts_dsprintf_buf_t *dsbufp,
1455                     ErtsSysFdType fd, int mode,
1456                     ErtsResource* resource, Eterm ref)
1457 {
1458     erts_dsprintf(dsbufp,
1459 		  "enif_select(_, %bpd,%s%s%s, %T:%T, %T) ",
1460 		  (SWord) fd,
1461 		  mode & ERL_NIF_SELECT_READ ? " READ" : "",
1462 		  mode & ERL_NIF_SELECT_WRITE ? " WRITE" : "",
1463 		  (mode & ERL_NIF_SELECT_STOP ? " STOP"
1464                    : (mode & ERL_NIF_SELECT_CANCEL ? " CANCEL" : "")),
1465 		  resource->type->module,
1466                   resource->type->name,
1467                   ref);
1468 }
1469 
1470 
1471 static void
drv_select_steal(ErlDrvPort ix,ErtsDrvEventState * state,int mode,int on)1472 drv_select_steal(ErlDrvPort ix, ErtsDrvEventState *state, int mode, int on)
1473 {
1474     if (need2steal(state, mode)) {
1475 	erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf();
1476 	print_drv_select_op(dsbufp, ix, state->fd, mode, on);
1477 	steal(dsbufp, state, mode);
1478 	erts_send_error_to_logger_nogl(dsbufp);
1479     }
1480 }
1481 
1482 static void
nif_select_steal(ErtsDrvEventState * state,int mode,ErtsResource * resource,Eterm ref)1483 nif_select_steal(ErtsDrvEventState *state, int mode,
1484                  ErtsResource* resource, Eterm ref)
1485 {
1486     if (need2steal(state, mode)) {
1487 	erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf();
1488 	print_nif_select_op(dsbufp, state->fd, mode, resource, ref);
1489 	steal(dsbufp, state, mode);
1490 	erts_send_error_to_logger_nogl(dsbufp);
1491     }
1492 }
1493 
1494 #ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS
1495 static void
large_fd_error_common(erts_dsprintf_buf_t * dsbufp,ErtsSysFdType fd)1496 large_fd_error_common(erts_dsprintf_buf_t *dsbufp, ErtsSysFdType fd)
1497 {
1498     erts_dsprintf(dsbufp,
1499 		  "fd=%d is larger than the largest allowed fd=%d\n",
1500 		  (int) fd, drv_ev_state.max_fds - 1);
1501 }
1502 
1503 static void
drv_select_large_fd_error(ErlDrvPort ix,ErtsSysFdType fd,int mode,int on)1504 drv_select_large_fd_error(ErlDrvPort ix, ErtsSysFdType fd, int mode, int on)
1505 {
1506     erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf();
1507     print_drv_select_op(dsbufp, ix, fd, mode, on);
1508     erts_dsprintf(dsbufp, "failed: ");
1509     large_fd_error_common(dsbufp, fd);
1510     erts_send_error_to_logger_nogl(dsbufp);
1511 }
1512 static void
nif_select_large_fd_error(ErtsSysFdType fd,int mode,ErtsResource * resource,Eterm ref)1513 nif_select_large_fd_error(ErtsSysFdType fd, int mode,
1514                           ErtsResource* resource, Eterm ref)
1515 {
1516     erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf();
1517     print_nif_select_op(dsbufp, fd, mode, resource, ref);
1518     erts_dsprintf(dsbufp, "failed: ");
1519     large_fd_error_common(dsbufp, fd);
1520     erts_send_error_to_logger_nogl(dsbufp);
1521 }
1522 #endif /* ERTS_SYS_CONTINOUS_FD_NUMBERS */
1523 
1524 
1525 
1526 static void
steal_pending_stop_use(erts_dsprintf_buf_t * dsbufp,ErlDrvPort ix,ErtsDrvEventState * state,int mode,int on)1527 steal_pending_stop_use(erts_dsprintf_buf_t *dsbufp, ErlDrvPort ix,
1528                        ErtsDrvEventState *state, int mode, int on)
1529 {
1530     int cancel = 0;
1531     ASSERT(state->type == ERTS_EV_TYPE_STOP_USE);
1532 
1533     if (on) {
1534 	/* Either fd-owner changed its mind about closing
1535 	 * or closed fd before stop_select callback and fd is now reused.
1536 	 * In either case stop_select should not be called.
1537 	 */
1538         cancel = 1;
1539     }
1540     else if ((mode & ERL_DRV_USE_NO_CALLBACK) == ERL_DRV_USE) {
1541 	Port *prt = erts_drvport2port(ix);
1542 	if (prt == ERTS_INVALID_ERL_DRV_PORT
1543             || prt->drv_ptr != state->driver.stop.drv_ptr) {
1544 	    /* Some other driver or nif wants the stop_select callback */
1545             cancel = 1;
1546         }
1547     }
1548 
1549     if (cancel) {
1550         erts_dsprintf(dsbufp, "called before stop_select was called for driver '%s'\n",
1551                       state->driver.stop.drv_ptr->name);
1552         if (state->driver.stop.drv_ptr->handle) {
1553             erts_ddll_dereference_driver(state->driver.stop.drv_ptr->handle);
1554         }
1555         state->type = ERTS_EV_TYPE_NONE;
1556         state->flags = 0;
1557         state->driver.stop.drv_ptr = NULL;
1558     }
1559     else {
1560         erts_dsprintf(dsbufp, "ignored repeated call\n");
1561     }
1562     erts_send_error_to_logger_nogl(dsbufp);
1563 }
1564 
1565 static void
steal_pending_stop_nif(erts_dsprintf_buf_t * dsbufp,ErtsResource * resource,ErtsDrvEventState * state,int mode,int on)1566 steal_pending_stop_nif(erts_dsprintf_buf_t *dsbufp, ErtsResource* resource,
1567                        ErtsDrvEventState *state, int mode, int on)
1568 {
1569     int cancel = 0;
1570 
1571     ASSERT(state->type == ERTS_EV_TYPE_STOP_NIF);
1572     ASSERT(state->driver.stop.resource);
1573 
1574     if (on) {
1575         ASSERT(mode & (ERL_NIF_SELECT_READ | ERL_NIF_SELECT_WRITE));
1576         /* Either fd-owner changed its mind about closing
1577          * or closed fd before stop callback and fd is now reused.
1578          * In either case, stop should not be called.
1579          */
1580         cancel = 1;
1581     }
1582     else if ((mode & ERL_DRV_USE_NO_CALLBACK) == ERL_DRV_USE
1583              && resource != state->driver.stop.resource) {
1584         /* Some driver or other resource wants the stop callback */
1585         cancel = 1;
1586     }
1587 
1588     if (cancel) {
1589         ErlNifResourceType* rt = state->driver.stop.resource->type;
1590         erts_dsprintf(dsbufp, "called before stop was called for NIF resource %T:%T\n",
1591                       rt->module, rt->name);
1592 
1593         enif_release_resource(state->driver.stop.resource->data);
1594         state->type = ERTS_EV_TYPE_NONE;
1595         state->flags = 0;
1596         state->driver.stop.resource = NULL;
1597     }
1598     else {
1599         erts_dsprintf(dsbufp, "ignored repeated call\n");
1600     }
1601     erts_send_error_to_logger_nogl(dsbufp);
1602 
1603 }
1604 
1605 static ERTS_INLINE int
io_task_schedule_allowed(ErtsDrvEventState * state,ErtsPortTaskType type)1606 io_task_schedule_allowed(ErtsDrvEventState *state,
1607 			 ErtsPortTaskType type)
1608 {
1609     ErtsIoTask *io_task;
1610 
1611     switch (type) {
1612     case ERTS_PORT_TASK_INPUT:
1613 	if (!state->driver.select)
1614 	    return 0;
1615 	io_task = &state->driver.select->iniotask;
1616 	break;
1617     case ERTS_PORT_TASK_OUTPUT:
1618 	if (!state->driver.select)
1619 	    return 0;
1620 	io_task = &state->driver.select->outiotask;
1621 	break;
1622     default:
1623 	ERTS_INTERNAL_ERROR("Invalid I/O-task type");
1624 	return 0;
1625     }
1626 
1627     return !is_iotask_active(io_task);
1628 }
1629 
1630 static ERTS_INLINE void
iready(Eterm id,ErtsDrvEventState * state)1631 iready(Eterm id, ErtsDrvEventState *state)
1632 {
1633     if (io_task_schedule_allowed(state,
1634 				 ERTS_PORT_TASK_INPUT)) {
1635 	ErtsIoTask *iotask = &state->driver.select->iniotask;
1636 	if (erts_port_task_schedule(id,
1637 				    &iotask->task,
1638 				    ERTS_PORT_TASK_INPUT,
1639 				    (ErlDrvEvent) state->fd,
1640                                     state->flags & ERTS_EV_FLAG_IN_SCHEDULER) != 0) {
1641 	    stale_drv_select(id, state, ERL_DRV_READ);
1642 	} else {
1643             DEBUG_PRINT_FD("schedule ready_input(%T, %d)",
1644                            state, id, state->fd);
1645         }
1646     }
1647 }
1648 
1649 static ERTS_INLINE void
oready(Eterm id,ErtsDrvEventState * state)1650 oready(Eterm id, ErtsDrvEventState *state)
1651 {
1652     if (io_task_schedule_allowed(state,
1653 				 ERTS_PORT_TASK_OUTPUT)) {
1654 	ErtsIoTask *iotask = &state->driver.select->outiotask;
1655 	if (erts_port_task_schedule(id,
1656 				    &iotask->task,
1657 				    ERTS_PORT_TASK_OUTPUT,
1658 				    (ErlDrvEvent) state->fd,
1659                                     0) != 0) {
1660 	    stale_drv_select(id, state, ERL_DRV_WRITE);
1661 	} else {
1662             DEBUG_PRINT_FD("schedule ready_output(%T, %d)", state, id, state->fd);
1663         }
1664     }
1665 }
1666 
1667 static void bad_fd_in_pollset(ErtsDrvEventState *, Eterm inport, Eterm outport);
1668 
1669 void
erts_check_io_interrupt(ErtsPollThread * psi,int set)1670 erts_check_io_interrupt(ErtsPollThread *psi, int set)
1671 {
1672     if (psi) {
1673 #if ERTS_POLL_USE_FALLBACK
1674         if (psi->ps == get_fallback_pollset()) {
1675             erts_poll_interrupt_flbk(psi->ps, set);
1676             return;
1677         }
1678 #endif
1679         erts_poll_interrupt(psi->ps, set);
1680     }
1681 }
1682 
1683 ErtsPollThread *
erts_create_pollset_thread(int id,ErtsThrPrgrData * tpd)1684 erts_create_pollset_thread(int id, ErtsThrPrgrData *tpd) {
1685     psiv[id].tpd = tpd;
1686     return psiv+id;
1687 }
1688 
1689 void
erts_check_io(ErtsPollThread * psi,ErtsMonotonicTime timeout_time,int poll_only_thread)1690 erts_check_io(ErtsPollThread *psi, ErtsMonotonicTime timeout_time, int poll_only_thread)
1691 {
1692     int pollres_len;
1693     int poll_ret, i;
1694     ERTS_MSACC_PUSH_AND_SET_STATE(ERTS_MSACC_STATE_CHECK_IO);
1695 
1696  restart:
1697 
1698 #ifdef ERTS_ENABLE_LOCK_CHECK
1699     erts_lc_check_exact(NULL, 0); /* No locks should be locked */
1700 #endif
1701 
1702     pollres_len = psi->pollres_len;
1703 
1704     if (poll_only_thread)
1705         erts_thr_progress_active(psi->tpd, 0);
1706 
1707 #if ERTS_POLL_USE_FALLBACK
1708     if (psi->ps == get_fallback_pollset()) {
1709 
1710         poll_ret = erts_poll_wait_flbk(psi->ps, psi->pollres, &pollres_len, psi->tpd, timeout_time);
1711 
1712     } else
1713 #endif
1714     {
1715         poll_ret = erts_poll_wait(psi->ps, psi->pollres, &pollres_len, psi->tpd, timeout_time);
1716     }
1717 
1718     if (poll_only_thread)
1719         erts_thr_progress_active(psi->tpd, 1);
1720 
1721 #ifdef ERTS_ENABLE_LOCK_CHECK
1722     erts_lc_check_exact(NULL, 0); /* No locks should be locked */
1723 #endif
1724 
1725     if (poll_ret != 0) {
1726 
1727 	if (poll_ret == EAGAIN) {
1728 	    goto restart;
1729 	}
1730 
1731 	if (poll_ret != ETIMEDOUT
1732 	    && poll_ret != EINTR
1733 #ifdef ERRNO_BLOCK
1734 	    && poll_ret != ERRNO_BLOCK
1735 #endif
1736 	    ) {
1737 	    erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf();
1738 	    erts_dsprintf(dsbufp, "erts_poll_wait() failed: %s (%d)\n",
1739 			  erl_errno_id(poll_ret), poll_ret);
1740 	    erts_send_error_to_logger_nogl(dsbufp);
1741 	}
1742         ERTS_MSACC_POP_STATE();
1743 	return;
1744     }
1745 
1746     for (i = 0; i < pollres_len; i++) {
1747 
1748         erts_driver_t* drv_ptr = NULL;
1749         ErtsResource* resource = NULL;
1750         ErtsDrvSelectDataState *free_select = NULL;
1751         ErtsNifSelectDataState *free_nif = NULL;
1752 	ErtsSysFdType fd = (ErtsSysFdType) ERTS_POLL_RES_GET_FD(&psi->pollres[i]);
1753 	ErtsDrvEventState *state;
1754         ErtsPollEvents revents = ERTS_POLL_RES_GET_EVTS(&psi->pollres[i]);
1755 
1756         /* The fd will be set to INVALID if a pollset internal fd was triggered
1757            that was determined to be too expensive to remove from the result.
1758         */
1759         if (fd == ERTS_SYS_FD_INVALID) continue;
1760 
1761 	erts_mtx_lock(fd_mtx(fd));
1762 
1763 	state = get_drv_ev_state(fd);
1764 
1765 	if (!state) {
1766             erts_mtx_unlock(fd_mtx(fd));
1767             continue;
1768 	}
1769 
1770         DEBUG_PRINT_FD("triggered %s", state, ev2str(revents));
1771 
1772         if (revents & ERTS_POLL_EV_ERR
1773             && !(state->flags & ERTS_EV_FLAG_WANT_ERROR)) {
1774             /*
1775              * Handle error events by triggering all in/out events
1776              * that has been selected on.
1777              * We *do not* want to call a callback that corresponds
1778              * to an event not selected.
1779              */
1780             revents = state->active_events;
1781             state->active_events = 0;
1782 
1783             if (state->flags & ERTS_EV_FLAG_IN_SCHEDULER) {
1784                 erts_io_control(state, ERTS_POLL_OP_MOD, 0);
1785                 state->flags &= ~ERTS_EV_FLAG_IN_SCHEDULER;
1786             }
1787         } else {
1788 
1789             /* Disregard any events that are not active at the moment,
1790                for instance this could happen if the driver/nif does
1791                select/deselect in rapid succession. */
1792             revents &= state->active_events | ERTS_POLL_EV_NVAL;
1793 
1794             if (psi->ps != get_scheduler_pollset(fd) || !ERTS_POLL_USE_SCHEDULER_POLLING) {
1795                 ErtsPollEvents reactive_events;
1796                 state->active_events &= ~revents;
1797 
1798                 reactive_events = state->active_events;
1799 
1800                 if (state->flags & ERTS_EV_FLAG_IN_SCHEDULER) {
1801                     reactive_events &= ~ERTS_POLL_EV_IN;
1802                     state->active_events |= ERTS_POLL_EV_IN;
1803                 }
1804 
1805                 /* Reactivate the poll op if there are still active events */
1806                 if (reactive_events) {
1807                     ErtsPollEvents new_events;
1808                     DEBUG_PRINT_FD("re-enable %s", state, ev2str(reactive_events));
1809 
1810                     new_events = erts_io_control(state, ERTS_POLL_OP_MOD, reactive_events);
1811 
1812                     /* Unable to re-enable the fd, signal all callbacks */
1813                     if (new_events & ERTS_POLL_EV_NVAL) {
1814                         revents |= reactive_events;
1815                         state->active_events &= ~reactive_events;
1816                     }
1817                 }
1818             }
1819         }
1820 
1821 	switch (state->type) {
1822 	case ERTS_EV_TYPE_DRV_SEL: { /* Requested via driver_select()... */
1823 
1824             if (revents & (ERTS_POLL_EV_IN|ERTS_POLL_EV_OUT)) {
1825 		if (revents & ERTS_POLL_EV_OUT) {
1826 		    oready(state->driver.select->outport, state);
1827 		}
1828 		/* Someone might have deselected input since revents
1829 		   was read (true also on the non-smp emulator since
1830 		   oready() may have been called); therefore, update
1831 		   revents... */
1832                 revents &= state->events;
1833 		if (revents & ERTS_POLL_EV_IN) {
1834 		    iready(state->driver.select->inport, state);
1835 		}
1836 	    }
1837 	    else if (revents & ERTS_POLL_EV_NVAL) {
1838 		bad_fd_in_pollset(state,
1839                                   state->driver.select->inport,
1840                                   state->driver.select->outport);
1841                 check_fd_cleanup(state, &free_select, &free_nif);
1842 	    }
1843 	    break;
1844 	}
1845 
1846         case ERTS_EV_TYPE_NIF: { /* Requested via enif_select()... */
1847             struct erts_nif_select_event in_ev = {NIL};
1848             struct erts_nif_select_event out_ev = {NIL};
1849             struct erts_nif_select_event err_ev = {NIL};
1850 
1851             if (revents & (ERTS_POLL_EV_IN | ERTS_POLL_EV_OUT | ERTS_POLL_EV_ERR)) {
1852                 if (revents & ERTS_POLL_EV_OUT) {
1853                     if (is_not_nil(state->driver.nif->out.pid)) {
1854                         out_ev = state->driver.nif->out;
1855                         resource = state->driver.stop.resource;
1856                         state->driver.nif->out.pid = NIL;
1857                         state->driver.nif->out.mp = NULL;
1858                     }
1859                 }
1860                 if (revents & ERTS_POLL_EV_IN) {
1861                     if (is_not_nil(state->driver.nif->in.pid)) {
1862                         in_ev = state->driver.nif->in;
1863                         resource = state->driver.stop.resource;
1864                         state->driver.nif->in.pid = NIL;
1865                         state->driver.nif->in.mp = NULL;
1866                     }
1867                 }
1868                 if (revents & ERTS_POLL_EV_ERR) {
1869                     if (is_not_nil(state->driver.nif->err.pid)) {
1870                         err_ev = state->driver.nif->err;
1871                         resource = state->driver.stop.resource;
1872                         state->driver.nif->err.pid = NIL;
1873                         state->driver.nif->err.mp = NULL;
1874                     }
1875                 }
1876                 state->events &= ~revents;
1877             }
1878             else if (revents & ERTS_POLL_EV_NVAL) {
1879                 bad_fd_in_pollset(state, NIL, NIL);
1880                 check_fd_cleanup(state, &free_select, &free_nif);
1881             }
1882 
1883             erts_mtx_unlock(fd_mtx(fd));
1884 
1885             if (is_not_nil(in_ev.pid)) {
1886                 send_select_msg(&in_ev);
1887             }
1888             if (is_not_nil(out_ev.pid)) {
1889                 send_select_msg(&out_ev);
1890             }
1891             if (is_not_nil(err_ev.pid)) {
1892                 send_select_msg(&err_ev);
1893             }
1894             continue;
1895         }
1896 
1897         case ERTS_EV_TYPE_STOP_NIF: {
1898             resource = state->driver.stop.resource;
1899             state->type = ERTS_EV_TYPE_NONE;
1900             goto case_ERTS_EV_TYPE_NONE;
1901         }
1902 
1903         case ERTS_EV_TYPE_STOP_USE: {
1904 #if ERTS_POLL_USE_FALLBACK
1905             ASSERT(psi->ps == get_fallback_pollset());
1906 #endif
1907             drv_ptr = state->driver.stop.drv_ptr;
1908             state->type = ERTS_EV_TYPE_NONE;
1909             /* fallthrough */
1910 	case ERTS_EV_TYPE_NONE: /* Deselected ... */
1911         case_ERTS_EV_TYPE_NONE:
1912             ASSERT(!state->events && !state->active_events && !state->flags);
1913             check_fd_cleanup(state, &free_select, &free_nif);
1914 	    break;
1915         }
1916 
1917 	default: { /* Error */
1918 	    erts_dsprintf_buf_t *dsbufp;
1919 	    dsbufp = erts_create_logger_dsbuf();
1920 	    erts_dsprintf(dsbufp,
1921 			  "Invalid event request type for fd in erts_poll()! "
1922 			  "fd=%bpd, event request type=%d\n", (SWord) state->fd,
1923 			  (int) state->type);
1924 	    ASSERT(0);
1925 	    deselect(state, 0);
1926 	    break;
1927 	}
1928 	}
1929 
1930 	erts_mtx_unlock(fd_mtx(fd));
1931 
1932         if (drv_ptr) {
1933             DTRACE1(driver_stop_select, drv_ptr->name);
1934             LTTNG1(driver_stop_select, drv_ptr->name);
1935             (*drv_ptr->stop_select)((ErlDrvEvent) fd, NULL);
1936             if (drv_ptr->handle) {
1937 		erts_ddll_dereference_driver(drv_ptr->handle);
1938 	    }
1939         }
1940         if (resource) {
1941             erts_resource_stop(resource, (ErlNifEvent)fd, 0);
1942             enif_release_resource(resource->data);
1943         }
1944         if (free_select)
1945             free_drv_select_data(free_select);
1946         if (free_nif)
1947             free_nif_select_data(free_nif);
1948     }
1949 
1950     /* The entire pollres array was filled with events,
1951      * grow it for the next call. We do this for two reasons:
1952      * 1. Pulling out more events in on go will increase throughput
1953      * 2. If the polling implementation is not fair, this will make
1954      *    sure that we get all fds that we can. i.e. if 12 fds are
1955      *    constantly active, but we only have a pollres_len of 10,
1956      *    two of the fds may never be triggered depending on what the
1957      *    kernel decides to do.
1958      **/
1959     if (pollres_len == psi->pollres_len) {
1960         int ev_state_len = drv_ev_state_len();
1961         erts_free(ERTS_ALC_T_POLLSET, psi->pollres);
1962         psi->pollres_len *= 2;
1963         /* Never grow it larger than the current drv_ev_state.len size */
1964         if (psi->pollres_len > ev_state_len)
1965             psi->pollres_len = ev_state_len;
1966         psi->pollres = erts_alloc(ERTS_ALC_T_POLLSET,
1967                                   sizeof(ErtsPollResFd) * psi->pollres_len);
1968     }
1969 
1970     ERTS_MSACC_POP_STATE();
1971 }
1972 
1973 static void
bad_fd_in_pollset(ErtsDrvEventState * state,Eterm inport,Eterm outport)1974 bad_fd_in_pollset(ErtsDrvEventState *state, Eterm inport, Eterm outport)
1975 {
1976     ErtsPollEvents events = state->events;
1977     erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf();
1978 
1979     if (events & (ERTS_POLL_EV_IN|ERTS_POLL_EV_OUT)) {
1980 	char *io_str;
1981 	Eterm port = NIL;
1982 	if ((events & ERTS_POLL_EV_IN) && (events & ERTS_POLL_EV_OUT)) {
1983 	    io_str = "input/output";
1984 	    if (inport == outport)
1985 		port = inport;
1986 	}
1987 	else {
1988 	    if (events & ERTS_POLL_EV_IN) {
1989 		io_str = "input";
1990 		port = inport;
1991 	    }
1992 	    else {
1993 		io_str = "output";
1994 		port = outport;
1995 	    }
1996 	}
1997 	erts_dsprintf(dsbufp,
1998 		      "Bad %s fd in erts_poll()! fd=%bpd, ",
1999 		      io_str, (SWord) state->fd);
2000         if (state->type == ERTS_EV_TYPE_DRV_SEL) {
2001             if (is_nil(port)) {
2002                 ErtsPortNames *ipnp = erts_get_port_names(inport, ERTS_INVALID_ERL_DRV_PORT);
2003                 ErtsPortNames *opnp = erts_get_port_names(outport, ERTS_INVALID_ERL_DRV_PORT);
2004                 erts_dsprintf(dsbufp, "ports=%T/%T, drivers=%s/%s, names=%s/%s\n",
2005                               is_nil(inport) ? am_undefined : inport,
2006                               is_nil(outport) ? am_undefined : outport,
2007                               ipnp->driver_name ? ipnp->driver_name : "<unknown>",
2008                               opnp->driver_name ? opnp->driver_name : "<unknown>",
2009                               ipnp->name ? ipnp->name : "<unknown>",
2010                               opnp->name ? opnp->name : "<unknown>");
2011                 erts_free_port_names(ipnp);
2012                 erts_free_port_names(opnp);
2013             }
2014             else {
2015                 ErtsPortNames *pnp = erts_get_port_names(port, ERTS_INVALID_ERL_DRV_PORT);
2016                 erts_dsprintf(dsbufp, "port=%T, driver=%s, name=%s\n",
2017                               is_nil(port) ? am_undefined : port,
2018                               pnp->driver_name ? pnp->driver_name : "<unknown>",
2019                               pnp->name ? pnp->name : "<unknown>");
2020                 erts_free_port_names(pnp);
2021             }
2022         }
2023         else {
2024             ErlNifResourceType* rt;
2025             ASSERT(state->type == ERTS_EV_TYPE_NIF);
2026             ASSERT(state->driver.stop.resource);
2027             rt = state->driver.stop.resource->type;
2028             erts_dsprintf(dsbufp, "resource={%T,%T}\n", rt->module, rt->name);
2029         }
2030     }
2031     else {
2032 	erts_dsprintf(dsbufp, "Bad fd in erts_poll()! fd=%bpd\n",
2033 		      (SWord) state->fd);
2034     }
2035     erts_send_error_to_logger_nogl(dsbufp);
2036 
2037     /* unmap entry */
2038     deselect(state, 0);
2039 }
2040 
2041 static void
stale_drv_select(Eterm id,ErtsDrvEventState * state,int mode)2042 stale_drv_select(Eterm id, ErtsDrvEventState *state, int mode)
2043 {
2044     erts_stale_drv_select(id, ERTS_INVALID_ERL_DRV_PORT, (ErlDrvEvent) state->fd, mode, 0);
2045     deselect(state, mode);
2046 }
2047 
2048 #ifndef ERTS_SYS_CONTINOUS_FD_NUMBERS
2049 
drv_ev_state_hash(void * des)2050 static SafeHashValue drv_ev_state_hash(void *des)
2051 {
2052     SafeHashValue val = (SafeHashValue)(SWord) ((ErtsDrvEventState *) des)->fd;
2053     return val ^ (val >> 8);  /* Good enough for aligned pointer values? */
2054 }
2055 
drv_ev_state_cmp(void * des1,void * des2)2056 static int drv_ev_state_cmp(void *des1, void *des2)
2057 {
2058     return ( ((ErtsDrvEventState *) des1)->fd == ((ErtsDrvEventState *) des2)->fd
2059 	    ? 0 : 1);
2060 }
2061 
drv_ev_state_alloc(void * des_tmpl)2062 static void *drv_ev_state_alloc(void *des_tmpl)
2063 {
2064     ErtsDrvEventState *evstate;
2065     erts_spin_lock(&drv_ev_state.prealloc_lock);
2066     if (drv_ev_state.prealloc_first == NULL) {
2067 	erts_spin_unlock(&drv_ev_state.prealloc_lock);
2068 	evstate = (ErtsDrvEventState *)
2069 	    erts_alloc(ERTS_ALC_T_DRV_EV_STATE, sizeof(ErtsDrvEventState));
2070     } else {
2071 	evstate = drv_ev_state.prealloc_first;
2072 	drv_ev_state.prealloc_first = (ErtsDrvEventState *) evstate->hb.next;
2073 	--drv_ev_state.num_prealloc;
2074 	erts_spin_unlock(&drv_ev_state.prealloc_lock);
2075     }
2076     /* XXX: Already valid data if prealloced, could ignore template! */
2077     *evstate = *((ErtsDrvEventState *) des_tmpl);
2078 
2079     return (void *) evstate;
2080 }
2081 
drv_ev_state_free(void * des)2082 static void drv_ev_state_free(void *des)
2083 {
2084     erts_spin_lock(&drv_ev_state.prealloc_lock);
2085     ((ErtsDrvEventState *) des)->hb.next = &drv_ev_state.prealloc_first->hb;
2086     drv_ev_state.prealloc_first = (ErtsDrvEventState *) des;
2087     ++drv_ev_state.num_prealloc;
2088     erts_spin_unlock(&drv_ev_state.prealloc_lock);
2089 }
2090 #endif
2091 
2092 #define ERTS_MAX_NO_OF_POLL_THREADS ERTS_MAX_NO_OF_SCHEDULERS
2093 
2094 static char *
get_arg(char * rest,char ** argv,int * ip)2095 get_arg(char* rest, char** argv, int* ip)
2096 {
2097     int i = *ip;
2098     if (*rest == '\0') {
2099 	if (argv[i+1] == NULL) {
2100 	    erts_fprintf(stderr, "too few arguments\n");
2101 	    erts_usage();
2102 	}
2103         argv[i++] = NULL;
2104         rest = argv[i];
2105     }
2106     argv[i] = NULL;
2107     *ip = i;
2108     return rest;
2109 }
2110 
2111 static void
parse_args(int * argc,char ** argv,int concurrent_waiters)2112 parse_args(int *argc, char **argv, int concurrent_waiters)
2113 {
2114     int i = 0, j;
2115     int no_pollsets = 0, no_poll_threads = 0,
2116         no_pollsets_percentage = 0,
2117         no_poll_threads_percentage = 0;
2118     ASSERT(argc && argv);
2119     while (i < *argc) {
2120 	if(argv[i][0] == '-') {
2121 	    switch (argv[i][1]) {
2122             case 'I': {
2123                 if (strncmp(argv[i]+2, "Ot", 2) == 0) {
2124                     char *arg = get_arg(argv[i]+4, argv, &i);
2125                     if (sscanf(arg, "%d", &no_poll_threads) != 1 ||
2126                         no_poll_threads < 1 ||
2127                         ERTS_MAX_NO_OF_POLL_THREADS < no_poll_threads) {
2128                         erts_fprintf(stderr,"bad I/O poll threads number: %s\n", arg);
2129                         erts_usage();
2130                     }
2131                 } else if (strncmp(argv[i]+2, "Op", 3) == 0) {
2132                     char *arg = get_arg(argv[i]+4, argv, &i);
2133                     if (sscanf(arg, "%d", &no_pollsets) != 1 ||
2134                         no_pollsets < 1) {
2135                         erts_fprintf(stderr,"bad I/O pollset number: %s\n", arg);
2136                         erts_usage();
2137                     }
2138                 } else if (strncmp(argv[i]+2, "OPt", 4) == 0) {
2139                     char *arg = get_arg(argv[i]+5, argv, &i);
2140                     if (sscanf(arg, "%d", &no_poll_threads_percentage) != 1 ||
2141                         no_poll_threads_percentage < 0 ||
2142                         no_poll_threads_percentage > 100) {
2143                         erts_fprintf(stderr,"bad I/O poll thread percentage number: %s\n", arg);
2144                         erts_usage();
2145                     }
2146                 } else if (strncmp(argv[i]+2, "OPp", 4) == 0) {
2147                     char *arg = get_arg(argv[i]+5, argv, &i);
2148                     if (sscanf(arg, "%d", &no_pollsets_percentage) != 1 ||
2149                         no_pollsets_percentage < 0 ||
2150                         no_pollsets_percentage > 100) {
2151                         erts_fprintf(stderr,"bad I/O pollset percentage number: %s\n", arg);
2152                         erts_usage();
2153                     }
2154                 } else {
2155                     break;
2156                 }
2157                 break;
2158             }
2159             case 'K':
2160                 (void)get_arg(argv[i]+2, argv, &i);
2161                 break;
2162             case '-':
2163                 goto args_parsed;
2164             default:
2165                 break;
2166             }
2167         }
2168         i++;
2169     }
2170 
2171 args_parsed:
2172 
2173     if (!concurrent_waiters) {
2174         no_pollsets = no_poll_threads;
2175         no_pollsets_percentage = 100;
2176     }
2177 
2178     if (no_poll_threads == 0) {
2179         if (no_poll_threads_percentage == 0)
2180             no_poll_threads = 1; /* This is the default */
2181         else {
2182             no_poll_threads = erts_no_schedulers * no_poll_threads_percentage / 100;
2183             if (no_poll_threads < 1)
2184                 no_poll_threads = 1;
2185         }
2186     }
2187 
2188     if (no_pollsets == 0) {
2189         if (no_pollsets_percentage == 0)
2190             no_pollsets = 1; /* This is the default */
2191         else {
2192             no_pollsets = no_poll_threads * no_pollsets_percentage / 100;
2193             if (no_pollsets < 1)
2194                 no_pollsets = 1;
2195         }
2196     }
2197 
2198     if (no_poll_threads < no_pollsets) {
2199         erts_fprintf(stderr,
2200                      "number of IO poll threads has to be greater or equal to "
2201                      "the number of \nIO pollsets. Current values are set to: \n"
2202                      "  -IOt %d -IOp %d\n",
2203                      no_poll_threads, no_pollsets);
2204         erts_usage();
2205     }
2206 
2207     /* Handled arguments have been marked with NULL. Slide arguments
2208        not handled towards the beginning of argv. */
2209     for (i = 0, j = 0; i < *argc; i++) {
2210 	if (argv[i])
2211 	    argv[j++] = argv[i];
2212     }
2213     *argc = j;
2214 
2215     erts_no_pollsets = no_pollsets;
2216     erts_no_poll_threads = no_poll_threads;
2217 }
2218 
2219 void
erts_init_check_io(int * argc,char ** argv)2220 erts_init_check_io(int *argc, char **argv)
2221 {
2222     int j, concurrent_waiters, no_poll_threads;
2223     ERTS_CT_ASSERT((INT_MIN & (ERL_NIF_SELECT_STOP_CALLED |
2224                                ERL_NIF_SELECT_STOP_SCHEDULED |
2225                                ERL_NIF_SELECT_INVALID_EVENT |
2226                                ERL_NIF_SELECT_FAILED)) == 0);
2227 
2228 
2229     erts_poll_init(&concurrent_waiters);
2230 #if ERTS_POLL_USE_FALLBACK
2231     erts_poll_init_flbk(NULL);
2232 #endif
2233 
2234     parse_args(argc, argv, concurrent_waiters);
2235 
2236     /* Create the actual pollsets */
2237     pollsetv = erts_alloc(ERTS_ALC_T_POLLSET,sizeof(ErtsPollSet *) * erts_no_pollsets);
2238 
2239     for (j=0; j < erts_no_pollsets; j++)
2240         pollsetv[j] = erts_poll_create_pollset(j);
2241 
2242     no_poll_threads = erts_no_poll_threads;
2243 
2244     j = -1;
2245 
2246 #if ERTS_POLL_USE_SCHEDULER_POLLING
2247     sched_pollset = erts_poll_create_pollset(j--);
2248     no_poll_threads++;
2249 #endif
2250 
2251 #if ERTS_POLL_USE_FALLBACK
2252     flbk_pollset = erts_poll_create_pollset_flbk(j--);
2253     no_poll_threads++;
2254 #endif
2255 
2256     psiv = erts_alloc(ERTS_ALC_T_POLLSET, sizeof(ErtsPollThread) * no_poll_threads);
2257 
2258 #if ERTS_POLL_USE_FALLBACK
2259     psiv[0].pollres_len = ERTS_CHECK_IO_POLL_RES_LEN;
2260     psiv[0].pollres = erts_alloc(ERTS_ALC_T_POLLSET,
2261         sizeof(ErtsPollResFd) * ERTS_CHECK_IO_POLL_RES_LEN);
2262     psiv[0].ps = get_fallback_pollset();
2263     psiv++;
2264 #endif
2265 
2266 #if ERTS_POLL_USE_SCHEDULER_POLLING
2267     psiv[0].pollres_len = ERTS_CHECK_IO_POLL_RES_LEN;
2268     psiv[0].pollres = erts_alloc(ERTS_ALC_T_POLLSET,
2269         sizeof(ErtsPollResFd) * ERTS_CHECK_IO_POLL_RES_LEN);
2270     psiv[0].ps = get_scheduler_pollset(0);
2271     psiv++;
2272 #endif
2273 
2274     for (j = 0; j < erts_no_poll_threads; j++) {
2275         psiv[j].pollres_len = ERTS_CHECK_IO_POLL_RES_LEN;
2276         psiv[j].pollres = erts_alloc(ERTS_ALC_T_POLLSET,
2277                                       sizeof(ErtsPollResFd) * ERTS_CHECK_IO_POLL_RES_LEN);
2278         psiv[j].ps = pollsetv[j % erts_no_pollsets];
2279     }
2280 
2281     for (j=0; j < ERTS_CHECK_IO_DRV_EV_STATE_LOCK_CNT; j++) {
2282         erts_mtx_init(&drv_ev_state.locks[j].lck, "drv_ev_state", make_small(j),
2283                           ERTS_LOCK_FLAGS_PROPERTY_STATIC | ERTS_LOCK_FLAGS_CATEGORY_IO);
2284     }
2285 
2286 #ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS
2287     drv_ev_state.max_fds = erts_poll_max_fds();
2288     erts_atomic_init_nob(&drv_ev_state.len, 0);
2289     drv_ev_state.v = NULL;
2290     erts_mtx_init(&drv_ev_state.grow_lock, "drv_ev_state_grow", NIL,
2291         ERTS_LOCK_FLAGS_PROPERTY_STATIC | ERTS_LOCK_FLAGS_CATEGORY_IO);
2292 #else
2293     {
2294 	SafeHashFunctions hf;
2295 	hf.hash = &drv_ev_state_hash;
2296 	hf.cmp = &drv_ev_state_cmp;
2297 	hf.alloc = &drv_ev_state_alloc;
2298 	hf.free = &drv_ev_state_free;
2299 	drv_ev_state.num_prealloc = 0;
2300 	drv_ev_state.prealloc_first = NULL;
2301 	erts_spinlock_init(&drv_ev_state.prealloc_lock, "state_prealloc", NIL,
2302                                ERTS_LOCK_FLAGS_PROPERTY_STATIC | ERTS_LOCK_FLAGS_CATEGORY_IO);
2303 	safe_hash_init(ERTS_ALC_T_DRV_EV_STATE, &drv_ev_state.tab, "drv_ev_state_tab",
2304             ERTS_LOCK_FLAGS_CATEGORY_IO, DRV_EV_STATE_HTAB_SIZE, hf);
2305     }
2306 #endif
2307 }
2308 
2309 int
erts_check_io_max_files(void)2310 erts_check_io_max_files(void)
2311 {
2312 #ifdef  ERTS_SYS_CONTINOUS_FD_NUMBERS
2313     return drv_ev_state.max_fds;
2314 #else
2315     return erts_poll_max_fds();
2316 #endif
2317 }
2318 
2319 Uint
erts_check_io_size(void)2320 erts_check_io_size(void)
2321 {
2322     Uint res = 0;
2323     ErtsPollInfo pi;
2324     int i;
2325 
2326 #if ERTS_POLL_USE_FALLBACK
2327     erts_poll_info(get_fallback_pollset(), &pi);
2328     res += pi.memory_size;
2329 #endif
2330 
2331 #if ERTS_POLL_USE_SCHEDULER_POLLING
2332     erts_poll_info(get_scheduler_pollset(0), &pi);
2333     res += pi.memory_size;
2334 #endif
2335 
2336     for (i = 0; i < erts_no_pollsets; i++) {
2337         erts_poll_info(pollsetv[i], &pi);
2338         res += pi.memory_size;
2339     }
2340 #ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS
2341     res += sizeof(ErtsDrvEventState) * erts_atomic_read_nob(&drv_ev_state.len);
2342 #else
2343     res += safe_hash_table_sz(&drv_ev_state.tab);
2344     {
2345 	SafeHashInfo hi;
2346 	safe_hash_get_info(&hi, &drv_ev_state.tab);
2347 	res += hi.objs * sizeof(ErtsDrvEventState);
2348     }
2349     erts_spin_lock(&drv_ev_state.prealloc_lock);
2350     res += drv_ev_state.num_prealloc * sizeof(ErtsDrvEventState);
2351     erts_spin_unlock(&drv_ev_state.prealloc_lock);
2352 #endif
2353     return res;
2354 }
2355 
2356 Eterm
erts_check_io_info(void * proc)2357 erts_check_io_info(void *proc)
2358 {
2359     Process *p = (Process *) proc;
2360     Eterm tags[16], values[16], res, list = NIL;
2361     Uint sz, *szp, *hp, **hpp;
2362     ErtsPollInfo *piv;
2363     Sint i, j = 0, len;
2364     int no_pollsets = erts_no_pollsets + ERTS_POLL_USE_FALLBACK + ERTS_POLL_USE_SCHEDULER_POLLING;
2365     ERTS_CT_ASSERT(ERTS_POLL_USE_FALLBACK == 0 || ERTS_POLL_USE_FALLBACK == 1);
2366     ERTS_CT_ASSERT(ERTS_POLL_USE_SCHEDULER_POLLING == 0 || ERTS_POLL_USE_SCHEDULER_POLLING == 1);
2367 
2368     piv = erts_alloc(ERTS_ALC_T_TMP, sizeof(ErtsPollInfo) * no_pollsets);
2369 
2370 #if ERTS_POLL_USE_FALLBACK
2371     erts_poll_info_flbk(get_fallback_pollset(), &piv[0]);
2372     piv[0].poll_threads = 0;
2373     piv[0].active_fds = 0;
2374     piv++;
2375 #endif
2376 
2377 #if ERTS_POLL_USE_SCHEDULER_POLLING
2378     erts_poll_info(get_scheduler_pollset(0), &piv[0]);
2379     piv[0].poll_threads = 0;
2380     piv[0].active_fds = 0;
2381     piv++;
2382 #endif
2383 
2384     for (j = 0; j < erts_no_pollsets; j++) {
2385         erts_poll_info(pollsetv[j], &piv[j]);
2386         piv[j].active_fds = 0;
2387         piv[j].poll_threads = erts_no_poll_threads / erts_no_pollsets;
2388         if (erts_no_poll_threads % erts_no_pollsets > j)
2389             piv[j].poll_threads++;
2390     }
2391 
2392 #ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS
2393     i = 0;
2394     erts_mtx_lock(&drv_ev_state.grow_lock);
2395     len = erts_atomic_read_nob(&drv_ev_state.len);
2396     for (i = 0; i < ERTS_CHECK_IO_DRV_EV_STATE_LOCK_CNT; i++) {
2397         erts_mtx_lock(&drv_ev_state.locks[i].lck);
2398         for (j = i; j < len; j+=ERTS_CHECK_IO_DRV_EV_STATE_LOCK_CNT) {
2399             ErtsDrvEventState *state = get_drv_ev_state(j);
2400             int pollsetid = get_pollset_id(j);
2401             ASSERT(fd_mtx(j) == &drv_ev_state.locks[i].lck);
2402             if (state->flags & ERTS_EV_FLAG_FALLBACK)
2403                 pollsetid = -1;
2404             if (state->driver.select
2405                 && (state->type == ERTS_EV_TYPE_DRV_SEL)
2406                 && (is_iotask_active(&state->driver.select->iniotask)
2407                     || is_iotask_active(&state->driver.select->outiotask)))
2408                 piv[pollsetid].active_fds++;
2409         }
2410         erts_mtx_unlock(&drv_ev_state.locks[i].lck);
2411     }
2412     erts_mtx_unlock(&drv_ev_state.grow_lock);
2413 
2414     piv[0].memory_size += sizeof(ErtsDrvEventState) * erts_atomic_read_nob(&drv_ev_state.len);
2415 #else
2416     piv[0].memory_size += safe_hash_table_sz(&drv_ev_state.tab);
2417     {
2418         SafeHashInfo hi;
2419         safe_hash_get_info(&hi, &drv_ev_state.tab);
2420         piv[0].memory_size += hi.objs * sizeof(ErtsDrvEventState);
2421     }
2422     erts_spin_lock(&drv_ev_state.prealloc_lock);
2423     piv[0].memory_size += drv_ev_state.num_prealloc * sizeof(ErtsDrvEventState);
2424     erts_spin_unlock(&drv_ev_state.prealloc_lock);
2425 #endif
2426 
2427     hpp = NULL;
2428     szp = &sz;
2429     sz = 0;
2430 
2431     piv -= ERTS_POLL_USE_FALLBACK;
2432     piv -= ERTS_POLL_USE_SCHEDULER_POLLING;
2433 
2434  bld_it:
2435 
2436     for (j = no_pollsets-1; j >= 0; j--) {
2437         i = 0;
2438 
2439         tags[i] = erts_bld_atom(hpp, szp, "name");
2440         values[i++] = erts_bld_atom(hpp, szp, "erts_poll");
2441 
2442         tags[i] = erts_bld_atom(hpp, szp, "primary");
2443         values[i++] = erts_bld_atom(hpp, szp, piv[j].primary);
2444 
2445         tags[i] = erts_bld_atom(hpp, szp, "kernel_poll");
2446         values[i++] = erts_bld_atom(hpp, szp,
2447                                     piv[j].kernel_poll ? piv[j].kernel_poll : "false");
2448 
2449         tags[i] = erts_bld_atom(hpp, szp, "memory_size");
2450         values[i++] = erts_bld_uint(hpp, szp, piv[j].memory_size);
2451 
2452         tags[i] = erts_bld_atom(hpp, szp, "total_poll_set_size");
2453         values[i++] = erts_bld_uint(hpp, szp, piv[j].poll_set_size);
2454 
2455         tags[i] = erts_bld_atom(hpp, szp, "lazy_updates");
2456         values[i++] = piv[j].lazy_updates ? am_true : am_false;
2457 
2458         tags[i] = erts_bld_atom(hpp, szp, "pending_updates");
2459         values[i++] = erts_bld_uint(hpp, szp, piv[j].pending_updates);
2460 
2461         tags[i] = erts_bld_atom(hpp, szp, "batch_updates");
2462         values[i++] = piv[j].batch_updates ? am_true : am_false;
2463 
2464         tags[i] = erts_bld_atom(hpp, szp, "concurrent_updates");
2465         values[i++] = piv[j].concurrent_updates ? am_true : am_false;
2466 
2467         tags[i] = erts_bld_atom(hpp, szp, "fallback");
2468         values[i++] = piv[j].is_fallback ? am_true : am_false;
2469 
2470         tags[i] = erts_bld_atom(hpp, szp, "max_fds");
2471         values[i++] = erts_bld_uint(hpp, szp, piv[j].max_fds);
2472 
2473         tags[i] = erts_bld_atom(hpp, szp, "active_fds");
2474         values[i++] = erts_bld_uint(hpp, szp, piv[j].active_fds);
2475 
2476         tags[i] = erts_bld_atom(hpp, szp, "poll_threads");
2477         values[i++] = erts_bld_uint(hpp, szp, piv[j].poll_threads);
2478 
2479         res = erts_bld_2tup_list(hpp, szp, i, tags, values);
2480 
2481         if (!hpp) {
2482             *szp += 2;
2483         }
2484         else {
2485             list = CONS(*hpp, res, list);
2486             *hpp += 2;
2487         }
2488     }
2489 
2490     if (!hpp) {
2491 	hp = HAlloc(p, sz);
2492 	hpp = &hp;
2493 	szp = NULL;
2494 	goto bld_it;
2495     }
2496 
2497     erts_free(ERTS_ALC_T_TMP, piv);
2498 
2499     return list;
2500 }
2501 
2502 static ERTS_INLINE ErtsPollEvents
print_events(erts_dsprintf_buf_t * dsbufp,ErtsPollEvents ev)2503 print_events(erts_dsprintf_buf_t *dsbufp, ErtsPollEvents ev)
2504 {
2505     int first = 1;
2506     if(ev == ERTS_POLL_EV_NONE) {
2507         erts_dsprintf(dsbufp, "N/A");
2508         return 0;
2509     }
2510     if(ev & ERTS_POLL_EV_IN) {
2511 	ev &= ~ERTS_POLL_EV_IN;
2512 	erts_dsprintf(dsbufp, "%s%s", first ? "" : "|", "IN");
2513 	first = 0;
2514     }
2515     if(ev & ERTS_POLL_EV_OUT) {
2516 	ev &= ~ERTS_POLL_EV_OUT;
2517 	erts_dsprintf(dsbufp, "%s%s", first ? "" : "|", "OUT");
2518 	first = 0;
2519     }
2520     /* The following should not appear... */
2521     if(ev & ERTS_POLL_EV_NVAL) {
2522 	erts_dsprintf(dsbufp, "%s%s", first ? "" : "|", "NVAL");
2523 	first = 0;
2524     }
2525     if(ev & ERTS_POLL_EV_ERR) {
2526 	erts_dsprintf(dsbufp, "%s%s", first ? "" : "|", "ERR");
2527 	first = 0;
2528     }
2529     if (ev)
2530 	erts_dsprintf(dsbufp, "%s0x%b32x", first ? "" : "|", (Uint32) ev);
2531     return ev;
2532 }
2533 
2534 static ERTS_INLINE void
print_flags(erts_dsprintf_buf_t * dsbufp,EventStateFlags f)2535 print_flags(erts_dsprintf_buf_t *dsbufp, EventStateFlags f)
2536 {
2537     erts_dsprintf(dsbufp, "%s", flag2str(f));
2538 }
2539 
2540 #ifdef DEBUG_PRINT_MODE
2541 
2542 static ERTS_INLINE char *
drvmode2str(int mode)2543 drvmode2str(int mode) {
2544     switch (mode) {
2545     case ERL_DRV_READ|ERL_DRV_USE: return "READ|USE";
2546     case ERL_DRV_WRITE|ERL_DRV_USE: return "WRITE|USE";
2547     case ERL_DRV_READ|ERL_DRV_WRITE|ERL_DRV_USE: return "READ|WRITE|USE";
2548     case ERL_DRV_USE: return "USE";
2549     case ERL_DRV_READ|ERL_DRV_USE_NO_CALLBACK: return "READ|USE_NO_CB";
2550     case ERL_DRV_WRITE|ERL_DRV_USE_NO_CALLBACK: return "WRITE|USE_NO_CB";
2551     case ERL_DRV_READ|ERL_DRV_WRITE|ERL_DRV_USE_NO_CALLBACK: return "READ|WRITE|USE_NO_CB";
2552     case ERL_DRV_USE_NO_CALLBACK: return "USE_NO_CB";
2553     case ERL_DRV_READ: return "READ";
2554     case ERL_DRV_WRITE: return "WRITE";
2555     case ERL_DRV_READ|ERL_DRV_WRITE: return "READ|WRITE";
2556     default: return "UNKNOWN";
2557     }
2558 }
2559 
2560 static ERTS_INLINE char *
nifmode2str(enum ErlNifSelectFlags mode)2561 nifmode2str(enum ErlNifSelectFlags mode) {
2562     if (mode & ERL_NIF_SELECT_STOP)
2563         return "STOP";
2564     switch (mode) {
2565     case ERL_NIF_SELECT_READ: return "READ";
2566     case ERL_NIF_SELECT_WRITE: return "WRITE";
2567     case ERL_NIF_SELECT_READ|ERL_NIF_SELECT_WRITE: return "READ|WRITE";
2568     case ERL_NIF_SELECT_CANCEL|ERL_NIF_SELECT_READ: return "CANCEL|READ";
2569     case ERL_NIF_SELECT_CANCEL|ERL_NIF_SELECT_WRITE: return "CANCEL|WRITE";
2570     case ERL_NIF_SELECT_CANCEL|ERL_NIF_SELECT_READ|ERL_NIF_SELECT_WRITE:
2571         return "CANCEL|READ|WRITE";
2572     default: return "UNKNOWN";
2573     }
2574 }
2575 
2576 #endif
2577 
2578 typedef struct {
2579     int used_fds;
2580     int num_errors;
2581     int no_driver_select_structs;
2582     int no_enif_select_structs;
2583 #ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS
2584     int internal_fds;
2585     ErtsPollEvents *epep;
2586 #endif
2587 } IterDebugCounters;
2588 
erts_debug_print_checkio_state(erts_dsprintf_buf_t * dsbufp,ErtsDrvEventState * state,ErtsPollEvents ep_events,int internal)2589 static int erts_debug_print_checkio_state(erts_dsprintf_buf_t *dsbufp,
2590                                           ErtsDrvEventState *state,
2591                                           ErtsPollEvents ep_events,
2592                                           int internal)
2593 {
2594 #if defined(HAVE_FSTAT) && !defined(NO_FSTAT_ON_SYS_FD_TYPE)
2595     struct stat stat_buf;
2596 #endif
2597     ErtsSysFdType fd = state->fd;
2598     ErtsPollEvents cio_events = state->events;
2599     int err = 0;
2600 #ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS
2601     ErtsPollEvents aio_events = state->active_events;
2602 #endif
2603     erts_dsprintf(dsbufp, "pollset=%d fd=%bpd ",
2604 		  state->flags & ERTS_EV_FLAG_FALLBACK ? -1 : get_pollset_id(fd),
2605 		  (SWord) fd);
2606 
2607 #if defined(HAVE_FSTAT) && !defined(NO_FSTAT_ON_SYS_FD_TYPE)
2608     if (fstat((int) fd, &stat_buf) < 0)
2609         erts_dsprintf(dsbufp, "type=unknown ");
2610     else {
2611         erts_dsprintf(dsbufp, "type=");
2612 #ifdef S_ISSOCK
2613         if (S_ISSOCK(stat_buf.st_mode))
2614             erts_dsprintf(dsbufp, "sock ");
2615         else
2616 #endif
2617 #ifdef S_ISFIFO
2618 	    if (S_ISFIFO(stat_buf.st_mode))
2619 		erts_dsprintf(dsbufp, "fifo ");
2620 	    else
2621 #endif
2622 #ifdef S_ISCHR
2623                 if (S_ISCHR(stat_buf.st_mode))
2624                     erts_dsprintf(dsbufp, "chr ");
2625                 else
2626 #endif
2627 #ifdef S_ISDIR
2628                     if (S_ISDIR(stat_buf.st_mode))
2629                         erts_dsprintf(dsbufp, "dir ");
2630                     else
2631 #endif
2632 #ifdef S_ISBLK
2633                         if (S_ISBLK(stat_buf.st_mode))
2634                             erts_dsprintf(dsbufp, "blk ");
2635                         else
2636 #endif
2637 #ifdef S_ISREG
2638                             if (S_ISREG(stat_buf.st_mode))
2639                                 erts_dsprintf(dsbufp, "reg ");
2640                             else
2641 #endif
2642 #ifdef S_ISLNK
2643                                 if (S_ISLNK(stat_buf.st_mode))
2644                                     erts_dsprintf(dsbufp, "lnk ");
2645                                 else
2646 #endif
2647 #ifdef S_ISDOOR
2648                                     if (S_ISDOOR(stat_buf.st_mode))
2649                                         erts_dsprintf(dsbufp, "door ");
2650                                     else
2651 #endif
2652 #ifdef S_ISWHT
2653                                         if (S_ISWHT(stat_buf.st_mode))
2654                                             erts_dsprintf(dsbufp, "wht ");
2655                                         else
2656 #endif
2657 #ifdef S_ISXATTR
2658                                             if (S_ISXATTR(stat_buf.st_mode))
2659                                                 erts_dsprintf(dsbufp, "xattr ");
2660                                             else
2661 #endif
2662                                                 erts_dsprintf(dsbufp, "unknown ");
2663     }
2664 #else
2665     erts_dsprintf(dsbufp, "type=unknown ");
2666 #endif
2667 
2668     if (state->type == ERTS_EV_TYPE_DRV_SEL) {
2669         erts_dsprintf(dsbufp, "driver_select ");
2670         ASSERT(state->driver.select != NULL);
2671 #ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS
2672         if (internal) {
2673             erts_dsprintf(dsbufp, "internal ");
2674             err = 1;
2675         }
2676         if (aio_events == cio_events) {
2677             if (cio_events == ep_events) {
2678                 erts_dsprintf(dsbufp, "ev=");
2679                 if (print_events(dsbufp, cio_events) != 0)
2680                     err = 1;
2681             }
2682             else {
2683                 ErtsPollEvents ev = cio_events;
2684                 if (ev != ep_events && ep_events != ERTS_POLL_EV_NONE)
2685                     err = 1;
2686                 erts_dsprintf(dsbufp, "cio_ev=");
2687                 print_events(dsbufp, cio_events);
2688                 erts_dsprintf(dsbufp, " ep_ev=");
2689                 print_events(dsbufp, ep_events);
2690             }
2691         } else {
2692             erts_dsprintf(dsbufp, "cio_ev=");
2693             print_events(dsbufp, cio_events);
2694             erts_dsprintf(dsbufp, " aio_ev=");
2695             print_events(dsbufp, aio_events);
2696             if ((aio_events != ep_events && ep_events != ERTS_POLL_EV_NONE) ||
2697                 (aio_events != 0 && ep_events == ERTS_POLL_EV_NONE)) {
2698                 erts_dsprintf(dsbufp, " ep_ev=");
2699                 print_events(dsbufp, ep_events);
2700                 err = 1;
2701             }
2702         }
2703 #else
2704         if (print_events(dsbufp, cio_events) != 0)
2705             err = 1;
2706 #endif
2707         erts_dsprintf(dsbufp, " ");
2708         if (cio_events & ERTS_POLL_EV_IN) {
2709             Eterm id = state->driver.select->inport;
2710             if (is_nil(id)) {
2711                 erts_dsprintf(dsbufp, "inport=none inname=none indrv=none ");
2712                 err = 1;
2713             }
2714             else {
2715                 ErtsPortNames *pnp = erts_get_port_names(id, ERTS_INVALID_ERL_DRV_PORT);
2716                 erts_dsprintf(dsbufp, " inport=%T inname=%s indrv=%s ",
2717                             id,
2718                             pnp->name ? pnp->name : "unknown",
2719                             (pnp->driver_name
2720                              ? pnp->driver_name
2721                              : "unknown"));
2722                 erts_free_port_names(pnp);
2723             }
2724         }
2725         if (cio_events & ERTS_POLL_EV_OUT) {
2726             Eterm id = state->driver.select->outport;
2727             if (is_nil(id)) {
2728                 erts_dsprintf(dsbufp, "outport=none outname=none outdrv=none ");
2729                 err = 1;
2730             }
2731             else {
2732                 ErtsPortNames *pnp = erts_get_port_names(id, ERTS_INVALID_ERL_DRV_PORT);
2733                 erts_dsprintf(dsbufp, " outport=%T outname=%s outdrv=%s ",
2734                             id,
2735                             pnp->name ? pnp->name : "unknown",
2736                             (pnp->driver_name
2737                              ? pnp->driver_name
2738                              : "unknown"));
2739                 erts_free_port_names(pnp);
2740             }
2741         }
2742     }
2743     else if (state->type == ERTS_EV_TYPE_NIF) {
2744         ErtsResource* r;
2745         ASSERT(state->driver.nif != NULL);
2746         erts_dsprintf(dsbufp, "enif_select ");
2747 
2748 #ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS
2749         if (internal) {
2750             erts_dsprintf(dsbufp, "internal ");
2751             err = 1;
2752         }
2753 
2754         if (cio_events == ep_events) {
2755             erts_dsprintf(dsbufp, "ev=");
2756             if (print_events(dsbufp, cio_events) != 0)
2757                 err = 1;
2758         }
2759         else {
2760             err = 1;
2761             erts_dsprintf(dsbufp, "cio_ev=");
2762             print_events(dsbufp, cio_events);
2763             erts_dsprintf(dsbufp, " ep_ev=");
2764             print_events(dsbufp, ep_events);
2765         }
2766 #else
2767         if (print_events(dsbufp, cio_events) != 0)
2768             err = 1;
2769 #endif
2770         erts_dsprintf(dsbufp, " inpid=%T", state->driver.nif->in.pid);
2771         erts_dsprintf(dsbufp, " outpid=%T", state->driver.nif->out.pid);
2772         erts_dsprintf(dsbufp, " errpid=%T", state->driver.nif->err.pid);
2773         r = state->driver.stop.resource;
2774         erts_dsprintf(dsbufp, " resource=%p(%T:%T)", r, r->type->module, r->type->name);
2775     }
2776 #ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS
2777     else if (internal) {
2778         erts_dsprintf(dsbufp, "internal ");
2779         if (cio_events) {
2780             err = 1;
2781             erts_dsprintf(dsbufp, "cio_ev=");
2782             print_events(dsbufp, cio_events);
2783         }
2784         if (ep_events) {
2785             erts_dsprintf(dsbufp, "ep_ev=");
2786             print_events(dsbufp, ep_events);
2787         }
2788     }
2789 #endif
2790     else {
2791         err = 1;
2792         erts_dsprintf(dsbufp, "control_type=%d ", (int)state->type);
2793 #ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS
2794         if (cio_events == ep_events) {
2795             erts_dsprintf(dsbufp, "ev=");
2796             print_events(dsbufp, cio_events);
2797         }
2798         else {
2799             erts_dsprintf(dsbufp, "cio_ev="); print_events(dsbufp, cio_events);
2800             erts_dsprintf(dsbufp, " ep_ev="); print_events(dsbufp, ep_events);
2801         }
2802 #else
2803         erts_dsprintf(dsbufp, "ev=0x%b32x", (Uint32) cio_events);
2804 #endif
2805     }
2806 
2807     erts_dsprintf(dsbufp, " flags="); print_flags(dsbufp, state->flags);
2808     if (err) {
2809         erts_dsprintf(dsbufp, " ERROR");
2810     }
2811     erts_dsprintf(dsbufp, "\r\n");
2812     return err;
2813 }
2814 
doit_erts_check_io_debug(void * vstate,void * vcounters,erts_dsprintf_buf_t * dsbufp)2815 static void doit_erts_check_io_debug(void *vstate, void *vcounters,
2816                                      erts_dsprintf_buf_t *dsbufp)
2817 {
2818     ErtsDrvEventState *state = (ErtsDrvEventState *) vstate;
2819     IterDebugCounters *counters = (IterDebugCounters *) vcounters;
2820     int internal = 0;
2821 #ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS
2822     ErtsSysFdType fd = state->fd;
2823     ErtsPollEvents ep_events = counters->epep[(int) fd];
2824 #else
2825     ErtsPollEvents ep_events = ERTS_POLL_EV_NONE;
2826 #endif
2827 
2828     if (state->driver.select) {
2829 	counters->no_driver_select_structs++;
2830         ASSERT(state->events || (ep_events != 0 && ep_events != ERTS_POLL_EV_NONE));
2831     }
2832     if (state->driver.nif) {
2833         counters->no_enif_select_structs++;
2834         ASSERT(state->events || (ep_events != 0 && ep_events != ERTS_POLL_EV_NONE));
2835     }
2836 
2837 #ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS
2838     if (state->events || (ep_events != 0 && ep_events != ERTS_POLL_EV_NONE)) {
2839 	if (ep_events & ERTS_POLL_EV_NVAL) {
2840 	    ep_events &= ~ERTS_POLL_EV_NVAL;
2841 	    internal = 1;
2842 	    counters->internal_fds++;
2843 	}
2844 	else
2845 	    counters->used_fds++;
2846 #else
2847     if (state->events) {
2848 	counters->used_fds++;
2849 #endif
2850 	if (erts_debug_print_checkio_state(dsbufp, state, ep_events, internal)) {
2851 	    counters->num_errors++;
2852 	}
2853     }
2854 }
2855 
2856 /* ciodpi can be NULL when called from etp-commands */
2857 int
2858 erts_check_io_debug(ErtsCheckIoDebugInfo *ciodip)
2859 {
2860     erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf();
2861 #ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS
2862     int fd, len, i;
2863 #endif
2864     IterDebugCounters counters = {0};
2865 #ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS
2866     ErtsDrvEventState null_des;
2867 
2868     null_des.driver.select = NULL;
2869     null_des.driver.nif = NULL;
2870     null_des.driver.stop.drv_ptr = NULL;
2871     null_des.events = 0;
2872     null_des.active_events = 0;
2873     null_des.type = ERTS_EV_TYPE_NONE;
2874     null_des.flags = 0;
2875 
2876     counters.epep = erts_alloc(ERTS_ALC_T_TMP,
2877                                sizeof(ErtsPollEvents)*drv_ev_state.max_fds);
2878 #endif
2879 
2880 
2881 #if defined(ERTS_ENABLE_LOCK_CHECK)
2882     erts_lc_check_exact(NULL, 0); /* No locks should be locked */
2883 #endif
2884 
2885     if (ciodip)
2886         erts_thr_progress_block(); /* stop the world to avoid messy locking */
2887 
2888 #ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS
2889     len = erts_atomic_read_nob(&drv_ev_state.len);
2890 
2891 #if ERTS_POLL_USE_FALLBACK
2892     erts_dsprintf(dsbufp, "--- fds in flbk pollset ---------------------------------\n");
2893     erts_poll_get_selected_events_flbk(get_fallback_pollset(), counters.epep,
2894                                        drv_ev_state.max_fds);
2895     for (fd = 0; fd < len; fd++) {
2896         if (drv_ev_state.v[fd].flags & ERTS_EV_FLAG_FALLBACK)
2897             doit_erts_check_io_debug(&drv_ev_state.v[fd], &counters, dsbufp);
2898     }
2899 #endif
2900 #if ERTS_POLL_USE_SCHEDULER_POLLING
2901     erts_dsprintf(dsbufp, "--- fds in scheduler pollset ----------------------------\n");
2902     erts_poll_get_selected_events(get_scheduler_pollset(0), counters.epep,
2903                                   drv_ev_state.max_fds);
2904     for (fd = 0; fd < len; fd++) {
2905         if (drv_ev_state.v[fd].flags & ERTS_EV_FLAG_SCHEDULER) {
2906             if (drv_ev_state.v[fd].events && drv_ev_state.v[fd].events != ERTS_POLL_EV_NONE)
2907                 counters.epep[fd] &= ~ERTS_POLL_EV_OUT;
2908             doit_erts_check_io_debug(&drv_ev_state.v[fd], &counters, dsbufp);
2909         }
2910     }
2911 #endif
2912 
2913     erts_dsprintf(dsbufp, "--- fds in pollset --------------------------------------\n");
2914 
2915     for (i = 0; i < erts_no_pollsets; i++) {
2916         erts_poll_get_selected_events(pollsetv[i],
2917                                       counters.epep,
2918                                       drv_ev_state.max_fds);
2919         for (fd = 0; fd < len; fd++) {
2920             if (!(drv_ev_state.v[fd].flags & ERTS_EV_FLAG_FALLBACK)
2921                 && get_pollset_id(fd) == i) {
2922                 if (counters.epep[fd] != ERTS_POLL_EV_NONE &&
2923                     drv_ev_state.v[fd].flags & ERTS_EV_FLAG_IN_SCHEDULER) {
2924                     /* We add the in flag if it is enabled in the scheduler pollset
2925                        and get_selected_events works on the platform */
2926                     counters.epep[fd] |= ERTS_POLL_EV_IN;
2927                 }
2928                 doit_erts_check_io_debug(&drv_ev_state.v[fd], &counters, dsbufp);
2929             }
2930         }
2931     }
2932     for (fd = len ; fd < drv_ev_state.max_fds; fd++) {
2933         null_des.fd = fd;
2934         doit_erts_check_io_debug(&null_des, &counters, dsbufp);
2935     }
2936 #else
2937     safe_hash_for_each(&drv_ev_state.tab, &doit_erts_check_io_debug,
2938                        &counters, dsbufp);
2939 #endif
2940 
2941     if (ciodip)
2942         erts_thr_progress_unblock();
2943 
2944     if (ciodip) {
2945         ciodip->no_used_fds = counters.used_fds;
2946         ciodip->no_driver_select_structs = counters.no_driver_select_structs;
2947         ciodip->no_enif_select_structs = counters.no_enif_select_structs;
2948     }
2949 
2950     erts_dsprintf(dsbufp, "\n");
2951     erts_dsprintf(dsbufp, "used fds=%d\n", counters.used_fds);
2952     erts_dsprintf(dsbufp, "Number of driver_select() structures=%d\n", counters.no_driver_select_structs);
2953     erts_dsprintf(dsbufp, "Number of enif_select() structures=%d\n", counters.no_enif_select_structs);
2954 #ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS
2955     erts_dsprintf(dsbufp, "internal fds=%d\n", counters.internal_fds);
2956 #endif
2957     erts_dsprintf(dsbufp, "---------------------------------------------------------\n");
2958     erts_send_error_to_logger_nogl(dsbufp);
2959 #ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS
2960     erts_free(ERTS_ALC_T_TMP, (void *) counters.epep);
2961 #endif
2962 
2963     return counters.num_errors;
2964 }
2965 
2966 #ifdef ERTS_ENABLE_LOCK_COUNT
2967 void erts_lcnt_update_cio_locks(int enable) {
2968     int i;
2969 #ifndef ERTS_SYS_CONTINOUS_FD_NUMBERS
2970     erts_lcnt_enable_hash_lock_count(&drv_ev_state.tab, ERTS_LOCK_FLAGS_CATEGORY_IO, enable);
2971 #else
2972     (void)enable;
2973 #endif
2974 
2975 #if ERTS_POLL_USE_FALLBACK
2976     erts_lcnt_enable_pollset_lock_count_flbk(get_fallback_pollset(), enable);
2977 #endif
2978 
2979     for (i = 0; i < erts_no_pollsets; i++)
2980         erts_lcnt_enable_pollset_lock_count(pollsetv[i], enable);
2981 }
2982 #endif /* ERTS_ENABLE_LOCK_COUNT */
2983