1 /*
2  * %CopyrightBegin%
3  *
4  * Copyright Ericsson AB 1996-2020. All Rights Reserved.
5  *
6  * Licensed under the Apache License, Version 2.0 (the "License");
7  * you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  *     http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  *
18  * %CopyrightEnd%
19  */
20 
21 #ifdef HAVE_CONFIG_H
22 #  include "config.h"
23 #endif
24 
25 #ifdef ISC32
26 #define _POSIX_SOURCE
27 #define _XOPEN_SOURCE
28 #endif
29 
30 #include <sys/times.h>		/* ! */
31 #include <time.h>
32 #include <signal.h>
33 #include <sys/wait.h>
34 #include <sys/uio.h>
35 #include <termios.h>
36 #include <ctype.h>
37 #include <sys/utsname.h>
38 #include <sys/select.h>
39 #include <arpa/inet.h>
40 
41 #ifdef ISC32
42 #include <sys/bsdtypes.h>
43 #endif
44 
45 #include <termios.h>
46 #ifdef HAVE_FCNTL_H
47 #include <fcntl.h>
48 #endif
49 #ifdef HAVE_SYS_IOCTL_H
50 #include <sys/ioctl.h>
51 #endif
52 
53 #include <sys/types.h>
54 #include <sys/socket.h>
55 
56 #define WANT_NONBLOCKING    /* must define this to pull in defs from sys.h */
57 #include "sys.h"
58 #include "erl_osenv.h"
59 
60 #include "erl_threads.h"
61 
62 extern erts_atomic_t sys_misc_mem_sz;
63 
64 static Eterm forker_port;
65 
66 #define MAX_VSIZE 16		/* Max number of entries allowed in an I/O
67 				 * vector sock_sendv().
68 				 */
69 /*
70  * Don't need global.h, but erl_cpu_topology.h won't compile otherwise
71  */
72 #include "global.h"
73 #include "erl_cpu_topology.h"
74 
75 #include "erl_sys_driver.h"
76 #include "sys_uds.h"
77 
78 #include "erl_child_setup.h"
79 
80 #if defined IOV_MAX
81 #define MAXIOV IOV_MAX
82 #elif defined UIO_MAXIOV
83 #define MAXIOV UIO_MAXIOV
84 #else
85 #define MAXIOV 16
86 #endif
87 
88 /* Used by the fd driver iff the fd could not be set to non-blocking */
89 typedef struct ErtsSysBlocking_ {
90     ErlDrvPDL pdl;
91     ErlDrvSSizeT res;
92     int err;
93     unsigned int pkey;
94 } ErtsSysBlocking;
95 
96 typedef struct fd_data {
97     int   fd;
98     char  pbuf[4];   /* hold partial packet bytes */
99     int   psz;       /* size of pbuf */
100     char  *buf;
101     char  *cpos;
102     int   sz;
103     int   remain;  /* for input on fd */
104 } ErtsSysFdData;
105 
106 typedef struct driver_data {
107     ErlDrvPort port_num;
108     ErtsSysFdData *ofd;
109     ErtsSysFdData *ifd;
110     int packet_bytes;
111     int pid;
112     int alive;
113     int status;
114     int terminating;
115     ErtsSysBlocking *blocking;
116     int busy;
117     ErlDrvSizeT high_watermark;
118     ErlDrvSizeT low_watermark;
119 } ErtsSysDriverData;
120 
121 #define DIR_SEPARATOR_CHAR    '/'
122 
123 #if defined(__ANDROID__)
124 #define SHELL "/system/bin/sh"
125 #else
126 #define SHELL "/bin/sh"
127 #endif /* __ANDROID__ */
128 
129 #if defined(DEBUG)
130 #define ERL_BUILD_TYPE_MARKER ".debug"
131 #elif defined(PURIFY)
132 #define ERL_BUILD_TYPE_MARKER ".purify"
133 #elif defined(QUANTIFY)
134 #define ERL_BUILD_TYPE_MARKER ".quantify"
135 #elif defined(PURECOV)
136 #define ERL_BUILD_TYPE_MARKER ".purecov"
137 #elif defined(VALGRIND)
138 #define ERL_BUILD_TYPE_MARKER ".valgrind"
139 #else /* opt */
140 #define ERL_BUILD_TYPE_MARKER
141 #endif
142 
143 #ifdef DEBUG
144 #define close(fd) do { int res = close(fd); ASSERT(res > -1); } while(0)
145 #endif
146 
147 #define CHILD_SETUP_PROG_NAME	"erl_child_setup" ERL_BUILD_TYPE_MARKER
148 
149 // #define HARD_DEBUG
150 #ifdef HARD_DEBUG
151 #define driver_select(port_num, fd, flags, onoff)                       \
152     do {                                                                \
153         if (((flags) & ERL_DRV_READ) && onoff)                          \
154             fprintf(stderr,"%010d %p: read select %d\r\n", __LINE__, port_num, (int)fd); \
155         if (((flags) & ERL_DRV_WRITE) && onoff)                         \
156             fprintf(stderr,"%010d %p: writ select %d\r\n", __LINE__, port_num, (int)fd); \
157         if (((flags) & ERL_DRV_READ) && !onoff)                          \
158             fprintf(stderr,"%010d %p: read unsele %d\r\n", __LINE__, port_num, (int)fd); \
159         if (((flags) & ERL_DRV_WRITE) && !onoff)                         \
160             fprintf(stderr,"%010d %p: writ unsele %d\r\n", __LINE__, port_num, (int)fd); \
161         driver_select_nkp(port_num, fd, flags, onoff);                  \
162     } while(0)
163 #endif
164 
165 /*
166  * Decreasing the size of it below 16384 is not allowed.
167  */
168 
169 #define ERTS_SYS_READ_BUF_SZ (64*1024)
170 
171 /* I. Initialization */
172 
173 void
erl_sys_late_init(void)174 erl_sys_late_init(void)
175 {
176     SysDriverOpts opts = {0};
177     Port *port;
178 
179     sys_signal(SIGPIPE, SIG_IGN); /* Ignore - we'll handle the write failure */
180 
181     opts.packet_bytes = 0;
182     opts.use_stdio = 1;
183     opts.redir_stderr = 0;
184     opts.read_write = 0;
185     opts.hide_window = 0;
186     opts.wd = NULL;
187     erts_osenv_init(&opts.envir);
188     opts.exit_status = 0;
189     opts.overlapped_io = 0;
190     opts.spawn_type = ERTS_SPAWN_ANY;
191     opts.argv = NULL;
192     opts.parallelism = erts_port_parallelism;
193 
194     port =
195         erts_open_driver(&forker_driver, make_internal_pid(0), "forker", &opts, NULL, NULL);
196     erts_mtx_unlock(port->lock);
197     erts_sys_unix_later_init(); /* Need to be called after forker has been started */
198 }
199 
200 /* II. Prototypes */
201 
202 /* II.I Spawn prototypes */
203 static ErlDrvData spawn_start(ErlDrvPort, char*, SysDriverOpts*);
204 static ErlDrvSSizeT spawn_control(ErlDrvData, unsigned int, char *,
205                                   ErlDrvSizeT, char **, ErlDrvSizeT);
206 
207 /* II.II Vanilla prototypes */
208 static ErlDrvData vanilla_start(ErlDrvPort, char*, SysDriverOpts*);
209 
210 
211 /* II.III FD prototypes */
212 static ErlDrvData fd_start(ErlDrvPort, char*, SysDriverOpts*);
213 static void fd_async(void *);
214 static void fd_ready_async(ErlDrvData drv_data, ErlDrvThreadData thread_data);
215 static ErlDrvSSizeT fd_control(ErlDrvData, unsigned int, char *, ErlDrvSizeT,
216 			       char **, ErlDrvSizeT);
217 static void fd_stop(ErlDrvData);
218 static void fd_flush(ErlDrvData);
219 
220 /* II.IV Common prototypes */
221 static void stop(ErlDrvData);
222 static void ready_input(ErlDrvData, ErlDrvEvent);
223 static void ready_output(ErlDrvData, ErlDrvEvent);
224 static void output(ErlDrvData, char*, ErlDrvSizeT);
225 static void outputv(ErlDrvData, ErlIOVec*);
226 static void stop_select(ErlDrvEvent, void*);
227 
228 /* II.V Forker prototypes */
229 static ErlDrvData forker_start(ErlDrvPort, char*, SysDriverOpts*);
230 static void forker_stop(ErlDrvData);
231 static void forker_ready_input(ErlDrvData, ErlDrvEvent);
232 static void forker_ready_output(ErlDrvData, ErlDrvEvent);
233 static ErlDrvSSizeT forker_control(ErlDrvData, unsigned int, char *,
234                                    ErlDrvSizeT, char **, ErlDrvSizeT);
235 
236 /* III Driver entries */
237 
238 /* III.I The spawn driver */
239 struct erl_drv_entry spawn_driver_entry = {
240     NULL,
241     spawn_start,
242     stop,
243     output,
244     ready_input,
245     ready_output,
246     "spawn",
247     NULL,
248     NULL,
249     spawn_control,
250     NULL,
251     NULL,
252     NULL,
253     NULL,
254     NULL,
255     NULL,
256     ERL_DRV_EXTENDED_MARKER,
257     ERL_DRV_EXTENDED_MAJOR_VERSION,
258     ERL_DRV_EXTENDED_MINOR_VERSION,
259     ERL_DRV_FLAG_USE_PORT_LOCKING | ERL_DRV_FLAG_USE_INIT_ACK,
260     NULL, NULL,
261     stop_select
262 };
263 
264 /* III.II The fd driver */
265 struct erl_drv_entry fd_driver_entry = {
266     NULL,
267     fd_start,
268     fd_stop,
269     output,
270     ready_input,
271     ready_output,
272     "fd",
273     NULL,
274     NULL,
275     fd_control,
276     NULL,
277     outputv,
278     fd_ready_async, /* ready_async */
279     fd_flush, /* flush */
280     NULL, /* call */
281     NULL, /* event */
282     ERL_DRV_EXTENDED_MARKER,
283     ERL_DRV_EXTENDED_MAJOR_VERSION,
284     ERL_DRV_EXTENDED_MINOR_VERSION,
285     0, /* ERL_DRV_FLAGs */
286     NULL, /* handle2 */
287     NULL, /* process_exit */
288     stop_select
289 };
290 
291 /* III.III The vanilla driver */
292 struct erl_drv_entry vanilla_driver_entry = {
293     NULL,
294     vanilla_start,
295     stop,
296     output,
297     ready_input,
298     ready_output,
299     "vanilla",
300     NULL,
301     NULL,
302     NULL,
303     NULL,
304     NULL,
305     NULL,
306     NULL, /* flush */
307     NULL, /* call */
308     NULL, /* event */
309     ERL_DRV_EXTENDED_MARKER,
310     ERL_DRV_EXTENDED_MAJOR_VERSION,
311     ERL_DRV_EXTENDED_MINOR_VERSION,
312     0, /* ERL_DRV_FLAGs */
313     NULL, /* handle2 */
314     NULL, /* process_exit */
315     stop_select
316 };
317 
318 /* III.III The forker driver */
319 struct erl_drv_entry forker_driver_entry = {
320     NULL,
321     forker_start,
322     forker_stop,
323     NULL,
324     forker_ready_input,
325     forker_ready_output,
326     "spawn_forker",
327     NULL,
328     NULL,
329     forker_control,
330     NULL,
331     NULL,
332     NULL,
333     NULL,
334     NULL,
335     NULL,
336     ERL_DRV_EXTENDED_MARKER,
337     ERL_DRV_EXTENDED_MAJOR_VERSION,
338     ERL_DRV_EXTENDED_MINOR_VERSION,
339     0,
340     NULL, NULL,
341     stop_select
342 };
343 
344 /* Untility functions */
345 
set_blocking_data(ErtsSysDriverData * dd)346 static int set_blocking_data(ErtsSysDriverData *dd) {
347 
348     dd->blocking = erts_alloc(ERTS_ALC_T_SYS_BLOCKING, sizeof(ErtsSysBlocking));
349 
350     erts_atomic_add_nob(&sys_misc_mem_sz, sizeof(ErtsSysBlocking));
351 
352     dd->blocking->pdl = driver_pdl_create(dd->port_num);
353     dd->blocking->res = 0;
354     dd->blocking->err = 0;
355     dd->blocking->pkey = driver_async_port_key(dd->port_num);
356 
357     return 1;
358 }
359 
init_fd_data(ErtsSysFdData * fd_data,int fd)360 static void init_fd_data(ErtsSysFdData *fd_data, int fd)
361 {
362     fd_data->fd = fd;
363     fd_data->buf = NULL;
364     fd_data->cpos = NULL;
365     fd_data->remain = 0;
366     fd_data->sz = 0;
367     fd_data->psz = 0;
368 }
369 
370 static ErtsSysDriverData *
create_driver_data(ErlDrvPort port_num,int ifd,int ofd,int packet_bytes,int read_write,int exit_status,int pid,int is_blocking,SysDriverOpts * opts)371 create_driver_data(ErlDrvPort port_num,
372                    int ifd,
373                    int ofd,
374                    int packet_bytes,
375                    int read_write,
376                    int exit_status,
377                    int pid,
378                    int is_blocking,
379                    SysDriverOpts* opts)
380 {
381     Port *prt;
382     ErtsSysDriverData *driver_data;
383     char *data;
384     int size = sizeof(ErtsSysDriverData);
385 
386     if (read_write & DO_READ)
387         size += sizeof(ErtsSysFdData);
388 
389     if ((read_write & DO_WRITE) &&
390         ((ifd != ofd || ofd == -1) || !(read_write & DO_READ)))
391         size += sizeof(ErtsSysFdData);
392 
393     data = erts_alloc(ERTS_ALC_T_DRV_TAB,size);
394     erts_atomic_add_nob(&sys_misc_mem_sz, size);
395 
396     driver_data = (ErtsSysDriverData*)data;
397     data += sizeof(*driver_data);
398 
399     prt = erts_drvport2port(port_num);
400     if (prt != ERTS_INVALID_ERL_DRV_PORT)
401 	prt->os_pid = pid;
402 
403     driver_data->packet_bytes = packet_bytes;
404     driver_data->port_num = port_num;
405     driver_data->pid = pid;
406     driver_data->alive = exit_status ? 1 : 0;
407     driver_data->status = 0;
408     driver_data->terminating = 0;
409     driver_data->blocking = NULL;
410 
411     if (read_write & DO_READ) {
412         driver_data->ifd = (ErtsSysFdData*)data;
413         data += sizeof(*driver_data->ifd);
414         init_fd_data(driver_data->ifd, ifd);
415         driver_select(port_num, ifd, (ERL_DRV_READ|ERL_DRV_USE), 1);
416     } else {
417         driver_data->ifd = NULL;
418     }
419 
420     if (read_write & DO_WRITE) {
421         if (ofd != -1 && ifd == ofd && read_write & DO_READ) {
422             /* This is for when ifd and ofd are the same fd */
423             driver_data->ofd = driver_data->ifd;
424         } else {
425             driver_data->ofd = (ErtsSysFdData*)data;
426             data += sizeof(*driver_data->ofd);
427             init_fd_data(driver_data->ofd, ofd);
428         }
429         if (is_blocking)
430             if (!set_blocking_data(driver_data)) {
431                 erts_free(ERTS_ALC_T_DRV_TAB, driver_data);
432                 return NULL;
433             }
434     } else {
435         driver_data->ofd = NULL;
436     }
437 
438     driver_data->busy = 0;
439     driver_data->high_watermark = opts->high_watermark;
440     driver_data->low_watermark = opts->low_watermark;
441 
442     return driver_data;
443 }
444 
445 /* Spawn driver */
446 
close_pipes(int ifd[2],int ofd[2])447 static void close_pipes(int ifd[2], int ofd[2])
448 {
449     close(ifd[0]);
450     close(ifd[1]);
451     close(ofd[0]);
452     close(ofd[1]);
453 }
454 
455 struct __add_spawn_env_state {
456     struct iovec *iov;
457     int *iov_index;
458 
459     Sint32 *payload_size;
460     char *env_block;
461 };
462 
add_spawn_env_block_foreach(void * _state,const erts_osenv_data_t * key,const erts_osenv_data_t * value)463 static void add_spawn_env_block_foreach(void *_state,
464                                         const erts_osenv_data_t *key,
465                                         const erts_osenv_data_t *value)
466 {
467     struct __add_spawn_env_state *state;
468     struct iovec *iov;
469 
470     state = (struct __add_spawn_env_state*)(_state);
471     iov = &state->iov[*state->iov_index];
472 
473     iov->iov_base = state->env_block;
474 
475     sys_memcpy(state->env_block, key->data, key->length);
476     state->env_block += key->length;
477     *state->env_block++ = '=';
478     sys_memcpy(state->env_block, value->data, value->length);
479     state->env_block += value->length;
480     *state->env_block++ = '\0';
481 
482     iov->iov_len = state->env_block - (char*)iov->iov_base;
483 
484     (*state->payload_size) += iov->iov_len;
485     (*state->iov_index)++;
486 }
487 
add_spawn_env_block(const erts_osenv_t * env,struct iovec * iov,int * iov_index,Sint32 * payload_size)488 static void *add_spawn_env_block(const erts_osenv_t *env, struct iovec *iov,
489                                   int *iov_index, Sint32 *payload_size) {
490     struct __add_spawn_env_state add_state;
491     char *env_block;
492 
493     env_block = erts_alloc(ERTS_ALC_T_TMP, env->content_size +
494         env->variable_count * sizeof("=\0"));
495 
496     add_state.iov = iov;
497     add_state.iov_index = iov_index;
498     add_state.env_block = env_block;
499     add_state.payload_size = payload_size;
500 
501     erts_osenv_foreach_native(env, &add_state, add_spawn_env_block_foreach);
502 
503     return env_block;
504 }
505 
spawn_start(ErlDrvPort port_num,char * name,SysDriverOpts * opts)506 static ErlDrvData spawn_start(ErlDrvPort port_num, char* name,
507                               SysDriverOpts* opts)
508 {
509 #define CMD_LINE_PREFIX_STR "exec "
510 #define CMD_LINE_PREFIX_STR_SZ (sizeof(CMD_LINE_PREFIX_STR) - 1)
511 
512     int len;
513     ErtsSysDriverData *dd;
514     char *cmd_line;
515     char wd_buff[MAXPATHLEN+1];
516     char *wd, *cwd;
517     int ifd[2], ofd[2], stderrfd;
518 
519     if (pipe(ifd) < 0) return ERL_DRV_ERROR_ERRNO;
520     errno = EMFILE;		/* default for next three conditions */
521     if (ifd[0] >= sys_max_files() || pipe(ofd) < 0) {
522         close(ifd[0]);
523         close(ifd[1]);
524         return ERL_DRV_ERROR_ERRNO;
525     }
526     if (ofd[1] >= sys_max_files()) {
527         close_pipes(ifd, ofd);
528         errno = EMFILE;
529         return ERL_DRV_ERROR_ERRNO;
530     }
531 
532     SET_NONBLOCKING(ifd[0]);
533     SET_NONBLOCKING(ofd[1]);
534 
535     stderrfd = opts->redir_stderr ? ifd[1] : dup(2);
536 
537     if (stderrfd >= sys_max_files() || stderrfd < 0) {
538         close_pipes(ifd, ofd);
539         if (stderrfd > -1)
540             close(stderrfd);
541         return ERL_DRV_ERROR_ERRNO;
542     }
543 
544     if (opts->spawn_type == ERTS_SPAWN_EXECUTABLE) {
545 	/* started with spawn_executable, not with spawn */
546 	len = strlen(name);
547 	cmd_line = (char *) erts_alloc_fnf(ERTS_ALC_T_TMP, len + 1);
548 	if (!cmd_line) {
549             close_pipes(ifd, ofd);
550 	    errno = ENOMEM;
551 	    return ERL_DRV_ERROR_ERRNO;
552 	}
553 	memcpy((void *) cmd_line,(void *) name, len);
554 	cmd_line[len] = '\0';
555 	len = len + 1;
556 	if (access(cmd_line,X_OK) != 0) {
557 	    int save_errno = errno;
558 	    erts_free(ERTS_ALC_T_TMP, cmd_line);
559             close_pipes(ifd, ofd);
560 	    errno = save_errno;
561 	    return ERL_DRV_ERROR_ERRNO;
562 	}
563     } else {
564 	/* make the string suitable for giving to "sh" */
565 	len = strlen(name);
566 	cmd_line = (char *) erts_alloc_fnf(ERTS_ALC_T_TMP,
567 					   CMD_LINE_PREFIX_STR_SZ + len + 1);
568 	if (!cmd_line) {
569             close_pipes(ifd, ofd);
570 	    errno = ENOMEM;
571 	    return ERL_DRV_ERROR_ERRNO;
572 	}
573 	memcpy((void *) cmd_line,
574 	       (void *) CMD_LINE_PREFIX_STR,
575 	       CMD_LINE_PREFIX_STR_SZ);
576 	memcpy((void *) (cmd_line + CMD_LINE_PREFIX_STR_SZ), (void *) name, len);
577 	cmd_line[CMD_LINE_PREFIX_STR_SZ + len] = '\0';
578 	len = CMD_LINE_PREFIX_STR_SZ + len + 1;
579 }
580 
581     if ((cwd = getcwd(wd_buff, MAXPATHLEN+1)) == NULL) {
582         /* on some OSs this call opens a fd in the
583            background which means that this can
584            return EMFILE */
585         int err = errno;
586         close_pipes(ifd, ofd);
587         erts_free(ERTS_ALC_T_TMP, (void *) cmd_line);
588         errno = err;
589         return ERL_DRV_ERROR_ERRNO;
590     }
591 
592     wd = opts->wd;
593 
594     {
595         void *environment_block;
596         struct iovec *io_vector;
597         int iov_len = 5;
598         char nullbuff[] = "\0";
599         int j, i = 0, res;
600         Sint32 buffsz = 0, env_len = 0, argv_len = 0,
601             flags = (opts->use_stdio ? FORKER_FLAG_USE_STDIO : 0)
602             | (opts->exit_status ? FORKER_FLAG_EXIT_STATUS : 0)
603             | (opts->read_write & DO_READ ? FORKER_FLAG_DO_READ : 0)
604             | (opts->read_write & DO_WRITE ? FORKER_FLAG_DO_WRITE : 0);
605 
606         if (wd) iov_len++;
607 
608         /* num envs including size int */
609         iov_len += 1 + opts->envir.variable_count;
610 
611         /* count number of element in argument list */
612         if (opts->spawn_type == ERTS_SPAWN_EXECUTABLE) {
613             if (opts->argv != NULL) {
614                 while(opts->argv[argv_len] != NULL)
615                     argv_len++;
616             } else {
617                 argv_len++;
618             }
619             iov_len += 1 + argv_len; /* num argvs including size int */
620         }
621 
622         io_vector = erts_alloc_fnf(ERTS_ALC_T_TMP, sizeof(struct iovec) * iov_len);
623 
624         if (!io_vector) {
625             close_pipes(ifd, ofd);
626             erts_free(ERTS_ALC_T_TMP, (void *) cmd_line);
627             errno = ENOMEM;
628             return ERL_DRV_ERROR_ERRNO;
629         }
630 
631         /*
632          * Whitebox test port_SUITE:pipe_limit_env
633          * assumes this command payload format.
634          */
635         io_vector[i].iov_base = (void*)&buffsz;
636         io_vector[i++].iov_len = sizeof(buffsz);
637 
638         io_vector[i].iov_base = (void*)&flags;
639         flags = htonl(flags);
640         io_vector[i++].iov_len = sizeof(flags);
641         buffsz += sizeof(flags);
642 
643         io_vector[i].iov_base = cmd_line;
644         io_vector[i++].iov_len = len;
645         buffsz += len;
646 
647         io_vector[i].iov_base = cwd;
648         io_vector[i].iov_len = strlen(io_vector[i].iov_base) + 1;
649         buffsz += io_vector[i++].iov_len;
650 
651         if (wd) {
652             io_vector[i].iov_base = wd;
653             io_vector[i].iov_len = strlen(io_vector[i].iov_base) + 1;
654             buffsz += io_vector[i++].iov_len;
655         }
656 
657         io_vector[i].iov_base = nullbuff;
658         io_vector[i++].iov_len = 1;
659         buffsz += io_vector[i-1].iov_len;
660 
661         env_len = htonl(opts->envir.variable_count);
662         io_vector[i].iov_base = (void*)&env_len;
663         io_vector[i++].iov_len = sizeof(env_len);
664         buffsz += io_vector[i-1].iov_len;
665 
666         environment_block = add_spawn_env_block(&opts->envir, io_vector, &i,
667             &buffsz);
668 
669         /* only append arguments if this was a spawn_executable */
670         if (opts->spawn_type == ERTS_SPAWN_EXECUTABLE) {
671 
672             io_vector[i].iov_base = (void*)&argv_len;
673             argv_len = htonl(argv_len);
674             io_vector[i++].iov_len = sizeof(argv_len);
675             buffsz += io_vector[i-1].iov_len;
676 
677             if (opts->argv) {
678                 /* If there are arguments we copy in the references to
679                    them into the iov */
680                 for (j = 0; opts->argv[j]; j++) {
681                     if (opts->argv[j] == erts_default_arg0)
682                         io_vector[i].iov_base = cmd_line;
683                     else
684                         io_vector[i].iov_base = opts->argv[j];
685                     io_vector[i].iov_len = strlen(io_vector[i].iov_base) + 1;
686                     buffsz += io_vector[i++].iov_len;
687                 }
688             } else {
689                 io_vector[i].iov_base = cmd_line;
690                 io_vector[i].iov_len = strlen(io_vector[i].iov_base) + 1;
691                 buffsz += io_vector[i++].iov_len;
692             }
693         }
694 
695         /* we send the request to do the fork */
696         if ((res = writev(ofd[1], io_vector, iov_len > MAXIOV ? MAXIOV : iov_len)) < 0) {
697             if (errno == ERRNO_BLOCK || errno == EINTR) {
698                 res = 0;
699             } else {
700                 int err = errno;
701                 close_pipes(ifd, ofd);
702                 erts_free(ERTS_ALC_T_TMP, io_vector);
703                 erts_free(ERTS_ALC_T_TMP, (void *) cmd_line);
704                 errno = err;
705                 return ERL_DRV_ERROR_ERRNO;
706             }
707         }
708 
709         if (res < (buffsz + sizeof(buffsz))) {
710             /* we only wrote part of the command payload. Enqueue the rest. */
711             for (i = 0; i < iov_len; i++) {
712                 if (res >= io_vector[i].iov_len)
713                     res -= io_vector[i].iov_len;
714                 else {
715                     driver_enq(port_num, io_vector[i].iov_base + res,
716                                io_vector[i].iov_len - res);
717                     res = 0;
718                 }
719             }
720             driver_select(port_num, ofd[1], ERL_DRV_WRITE|ERL_DRV_USE, 1);
721         }
722 
723         erts_free(ERTS_ALC_T_TMP, environment_block);
724         erts_free(ERTS_ALC_T_TMP, io_vector);
725     }
726 
727     erts_free(ERTS_ALC_T_TMP, (void *) cmd_line);
728 
729     dd = create_driver_data(port_num, ifd[0], ofd[1], opts->packet_bytes,
730                              DO_WRITE | DO_READ, opts->exit_status,
731                             0, 0, opts);
732 
733     {
734         /* send ofd[0] + ifd[1] + stderrfd to forker port */
735         ErtsSysForkerProto *proto =
736             erts_alloc(ERTS_ALC_T_DRV_CTRL_DATA,
737                        sizeof(ErtsSysForkerProto));
738         memset(proto, 0, sizeof(ErtsSysForkerProto));
739         proto->action = ErtsSysForkerProtoAction_Start;
740         proto->u.start.fds[0] = ofd[0];
741         proto->u.start.fds[1] = ifd[1];
742         proto->u.start.fds[2] = stderrfd;
743         proto->u.start.port_id = opts->exit_status ? erts_drvport2id(port_num) : THE_NON_VALUE;
744         if (erl_drv_port_control(forker_port, ERTS_FORKER_DRV_CONTROL_MAGIC_NUMBER,
745                                  (char*)proto, sizeof(*proto))) {
746             /* The forker port has been killed, we close both fd's which will
747                make open_port throw an epipe error */
748             close(ofd[0]);
749             close(ifd[1]);
750         }
751     }
752 
753     /* we set these fds to negative to mark if
754        they should be closed after the handshake */
755     if (!(opts->read_write & DO_READ))
756         dd->ifd->fd *= -1;
757 
758     if (!(opts->read_write & DO_WRITE))
759         dd->ofd->fd *= -1;
760 
761     return (ErlDrvData)dd;
762 #undef CMD_LINE_PREFIX_STR
763 #undef CMD_LINE_PREFIX_STR_SZ
764 }
765 
spawn_control(ErlDrvData e,unsigned int cmd,char * buf,ErlDrvSizeT len,char ** rbuf,ErlDrvSizeT rlen)766 static ErlDrvSSizeT spawn_control(ErlDrvData e, unsigned int cmd, char *buf,
767                                   ErlDrvSizeT len, char **rbuf, ErlDrvSizeT rlen)
768 {
769     ErtsSysDriverData *dd = (ErtsSysDriverData*)e;
770     ErtsSysForkerProto *proto = (ErtsSysForkerProto *)buf;
771 
772     if (cmd != ERTS_SPAWN_DRV_CONTROL_MAGIC_NUMBER)
773         return -1;
774 
775     ASSERT(len == sizeof(*proto));
776     ASSERT(proto->action == ErtsSysForkerProtoAction_SigChld);
777 
778     dd->status = proto->u.sigchld.error_number;
779     dd->alive = -1;
780 
781     if (dd->ifd)
782         driver_select(dd->port_num, abs(dd->ifd->fd), ERL_DRV_READ | ERL_DRV_USE, 1);
783 
784     if (dd->ofd)
785         driver_select(dd->port_num, abs(dd->ofd->fd), ERL_DRV_WRITE | ERL_DRV_USE, 1);
786 
787     return 0;
788 }
789 
790 #define FD_DEF_HEIGHT 24
791 #define FD_DEF_WIDTH 80
792 /* Control op */
793 #define FD_CTRL_OP_GET_WINSIZE 100
794 
fd_get_window_size(int fd,Uint32 * width,Uint32 * height)795 static int fd_get_window_size(int fd, Uint32 *width, Uint32 *height)
796 {
797 #ifdef TIOCGWINSZ
798     struct winsize ws;
799     if (ioctl(fd,TIOCGWINSZ,&ws) == 0) {
800 	*width = (Uint32) ws.ws_col;
801 	*height = (Uint32) ws.ws_row;
802 	return 1;
803     }
804 #endif
805     return 0;
806 }
807 
fd_control(ErlDrvData drv_data,unsigned int command,char * buf,ErlDrvSizeT len,char ** rbuf,ErlDrvSizeT rlen)808 static ErlDrvSSizeT fd_control(ErlDrvData drv_data,
809 			       unsigned int command,
810 			       char *buf, ErlDrvSizeT len,
811 			       char **rbuf, ErlDrvSizeT rlen)
812 {
813     char resbuff[2*sizeof(Uint32)];
814     ErtsSysDriverData* dd = (ErtsSysDriverData*)drv_data;
815     command -= ERTS_TTYSL_DRV_CONTROL_MAGIC_NUMBER;
816     switch (command) {
817     case FD_CTRL_OP_GET_WINSIZE:
818 	{
819 	    Uint32 w,h;
820             int success = 0;
821             if (dd->ofd != NULL) {
822                 /* Try with output file descriptor */
823                 int out_fd = dd->ofd->fd;
824                 success = fd_get_window_size(out_fd,&w,&h);
825             }
826             if (!success && dd->ifd != NULL) {
827                 /* Try with input file descriptor */
828                 int in_fd = dd->ifd->fd;
829                 success = fd_get_window_size(in_fd,&w,&h);
830             }
831             if (!success) {
832                 return -1;
833             }
834             /* Succeeded */
835 	    memcpy(resbuff,&w,sizeof(Uint32));
836 	    memcpy(resbuff+sizeof(Uint32),&h,sizeof(Uint32));
837 	}
838 	break;
839     default:
840 	return -1;
841     }
842     if (rlen < 2*sizeof(Uint32)) {
843 	*rbuf = driver_alloc(2*sizeof(Uint32));
844     }
845     memcpy(*rbuf,resbuff,2*sizeof(Uint32));
846     return 2*sizeof(Uint32);
847 }
848 
fd_start(ErlDrvPort port_num,char * name,SysDriverOpts * opts)849 static ErlDrvData fd_start(ErlDrvPort port_num, char* name,
850 			   SysDriverOpts* opts)
851 {
852     int non_blocking = 0;
853 
854     if (((opts->read_write & DO_READ) && opts->ifd >= sys_max_files()) ||
855 	((opts->read_write & DO_WRITE) && opts->ofd >= sys_max_files()))
856 	return ERL_DRV_ERROR_GENERAL;
857 
858     /*
859      * Historical:
860      *
861      * "Note about nonblocking I/O.
862      *
863      * At least on Solaris, setting the write end of a TTY to nonblocking,
864      * will set the input end to nonblocking as well (and vice-versa).
865      * If erl is run in a pipeline like this:  cat | erl
866      * the input end of the TTY will be the standard input of cat.
867      * And cat is not prepared to handle nonblocking I/O."
868      *
869      * Actually, the reason for this is not that the tty itself gets set
870      * in non-blocking mode, but that the "input end" (cat's stdin) and
871      * the "output end" (erlang's stdout) are typically the "same" file
872      * descriptor, dup()'ed from a single fd by one of this process'
873      * ancestors.
874      *
875      * The workaround for this problem used to be a rather bad kludge,
876      * interposing an extra process ("internal cat") between erlang's
877      * stdout and the original stdout, allowing erlang to set its stdout
878      * in non-blocking mode without affecting the stdin of the preceding
879      * process in the pipeline - and being a kludge, it caused all kinds
880      * of weird problems.
881      *
882      * So, this is the current logic:
883      *
884      * The only reason to set non-blocking mode on the output fd at all is
885      * if it's something that can cause a write() to block, of course,
886      * i.e. primarily if it points to a tty, socket, pipe, or fifo.
887      *
888      * If we don't set non-blocking mode when we "should" have, and output
889      * becomes blocked, the entire runtime system will be suspended - this
890      * is normally bad of course, and can happen fairly "easily" - e.g. user
891      * hits ^S on tty - but doesn't necessarily happen.
892      *
893      * If we do set non-blocking mode when we "shouldn't" have, the runtime
894      * system will end up seeing EOF on the input fd (due to the preceding
895      * process dying), which typically will cause the entire runtime system
896      * to terminate immediately (due to whatever erlang process is seeing
897      * the EOF taking it as a signal to halt the system). This is *very* bad.
898      *
899      * I.e. we should take a conservative approach, and only set non-
900      * blocking mode when we a) need to, and b) are reasonably certain
901      * that it won't be a problem. And as in the example above, the problem
902      * occurs when input fd and output fd point to different "things".
903      *
904      * However, determining that they are not just the same "type" of
905      * "thing", but actually the same instance of that type of thing, is
906      * unreasonably complex in many/most cases.
907      *
908      * Also, with pipes, sockets, and fifos it's far from obvious that the
909      * user *wants* non-blocking output: If you're running erlang inside
910      * some complex pipeline, you're probably not running a real-time system
911      * that must never stop, but rather *want* it to suspend if the output
912      * channel is "full".
913      *
914      * So, the bottom line: We will only set the output fd non-blocking if
915      * it points to a tty, and either a) the input fd also points to a tty,
916      * or b) we can make sure that setting the output fd non-blocking
917      * doesn't interfere with someone else's input, via a somewhat milder
918      * kludge than the above.
919      *
920      * Also keep in mind that while this code is almost exclusively run as
921      * a result of an erlang open_port({fd,0,1}, ...), that isn't the only
922      * case - it can be called with any old pre-existing file descriptors,
923      * the relations between which (if they're even two) we can only guess
924      * at - still, we try our best...
925      *
926      * Added note OTP 18: Some systems seem to use stdout/stderr to log data
927      * using unix pipes, so we cannot allow the system to block on a write.
928      * Therefore we use an async thread to write the data to fd's that could
929      * not be set to non-blocking. When no async threads are available we
930      * fall back on the old behaviour.
931      *
932      * Also the guarantee about what is delivered to the OS has changed.
933      * Pre 18 the fd driver did no flushing of data before terminating.
934      * Now it does. This is because we want to be able to guarantee that things
935      * such as escripts and friends really have outputted all data before
936      * terminating. This could potentially block the termination of the system
937      * for a very long time, but if the user wants to terminate fast she should
938      * use erlang:halt with flush=false.
939      */
940 
941     /* Try to figure out if we can use non-blocking writes */
942     if (opts->read_write & DO_WRITE) {
943 
944 	/* If we don't have a read end, all bets are off - no non-blocking. */
945 	if (opts->read_write & DO_READ) {
946 
947 	    if (isatty(opts->ofd)) { /* output fd is a tty:-) */
948 
949 		if (isatty(opts->ifd)) { /* input fd is also a tty */
950 
951 		    /* To really do this "right", we should also check that
952 		       input and output fd point to the *same* tty - but
953 		       this seems like overkill; ttyname() isn't for free,
954 		       and this is a very common case - and it's hard to
955 		       imagine a scenario where setting non-blocking mode
956 		       here would cause problems - go ahead and do it. */
957 
958                     non_blocking = 1;
959 		    SET_NONBLOCKING(opts->ofd);
960 
961 		} else {	/* output fd is a tty, input fd isn't */
962 
963 		    /* This is a "problem case", but also common (see the
964 		       example above) - i.e. it makes sense to try a bit
965 		       harder before giving up on non-blocking mode: Try to
966 		       re-open the tty that the output fd points to, and if
967 		       successful replace the original one with the "new" fd
968 		       obtained this way, and set *that* one in non-blocking
969 		       mode. (Yes, this is a kludge.)
970 
971 		       However, re-opening the tty may fail in a couple of
972 		       (unusual) cases:
973 
974 		       1) The name of the tty (or an equivalent one, i.e.
975 			  same major/minor number) can't be found, because
976 			  it actually lives somewhere other than /dev (or
977 			  wherever ttyname() looks for it), and isn't
978 			  equivalent to any of those that do live in the
979 			  "standard" place - this should be *very* unusual.
980 
981 		       2) Permissions on the tty don't allow us to open it -
982 			  it's perfectly possible to have an fd open to an
983 			  object whose permissions wouldn't allow us to open
984 			  it. This is not as unusual as it sounds, one case
985 			  is if the user has su'ed to someone else (not
986 			  root) - we have a read/write fd open to the tty
987 			  (because it has been inherited all the way down
988 			  here), but we have neither read nor write
989 			  permission for the tty.
990 
991 		       In these cases, we finally give up, and don't set the
992 		       output fd in non-blocking mode. */
993 
994 		    char *tty;
995 		    int nfd;
996 
997 		    if ((tty = ttyname(opts->ofd)) != NULL &&
998 			(nfd = open(tty, O_WRONLY)) != -1) {
999 			dup2(nfd, opts->ofd);
1000 			close(nfd);
1001                         non_blocking = 1;
1002 			SET_NONBLOCKING(opts->ofd);
1003 		    }
1004 		}
1005 	    }
1006 	}
1007     }
1008     return (ErlDrvData)create_driver_data(port_num, opts->ifd, opts->ofd,
1009                                           opts->packet_bytes,
1010                                           opts->read_write, 0, -1,
1011                                           !non_blocking, opts);
1012 }
1013 
clear_fd_data(ErtsSysFdData * fdd)1014 static void clear_fd_data(ErtsSysFdData *fdd)
1015 {
1016     if (fdd->sz > 0) {
1017 	erts_free(ERTS_ALC_T_FD_ENTRY_BUF, (void *) fdd->buf);
1018 	ASSERT(erts_atomic_read_nob(&sys_misc_mem_sz) >= fdd->sz);
1019 	erts_atomic_add_nob(&sys_misc_mem_sz, -1*fdd->sz);
1020     }
1021     fdd->buf = NULL;
1022     fdd->sz = 0;
1023     fdd->remain = 0;
1024     fdd->cpos = NULL;
1025     fdd->psz = 0;
1026 }
1027 
nbio_stop_fd(ErlDrvPort prt,ErtsSysFdData * fdd,int use)1028 static void nbio_stop_fd(ErlDrvPort prt, ErtsSysFdData *fdd, int use)
1029 {
1030     clear_fd_data(fdd);
1031     SET_BLOCKING(abs(fdd->fd));
1032 }
1033 
fd_stop(ErlDrvData ev)1034 static void fd_stop(ErlDrvData ev)  /* Does not close the fds */
1035 {
1036     ErtsSysDriverData* dd = (ErtsSysDriverData*)ev;
1037     ErlDrvPort prt = dd->port_num;
1038     int sz = sizeof(ErtsSysDriverData);
1039 
1040     if (dd->blocking) {
1041         erts_free(ERTS_ALC_T_SYS_BLOCKING, dd->blocking);
1042         dd->blocking = NULL;
1043         sz += sizeof(ErtsSysBlocking);
1044     }
1045 
1046     if (dd->ifd) {
1047         sz += sizeof(ErtsSysFdData);
1048         driver_select(prt, abs(dd->ifd->fd), ERL_DRV_USE_NO_CALLBACK|DO_READ|DO_WRITE, 0);
1049         nbio_stop_fd(prt, dd->ifd, 1);
1050     }
1051     if (dd->ofd && dd->ofd != dd->ifd) {
1052         sz += sizeof(ErtsSysFdData);
1053         driver_select(prt, abs(dd->ofd->fd), ERL_DRV_USE_NO_CALLBACK|DO_WRITE, 0);
1054         nbio_stop_fd(prt, dd->ofd, 1);
1055     }
1056 
1057      erts_free(ERTS_ALC_T_DRV_TAB, dd);
1058      erts_atomic_add_nob(&sys_misc_mem_sz, -sz);
1059 }
1060 
fd_flush(ErlDrvData ev)1061 static void fd_flush(ErlDrvData ev)
1062 {
1063     ErtsSysDriverData* dd = (ErtsSysDriverData*)ev;
1064     if (!dd->terminating)
1065         dd->terminating = 1;
1066 }
1067 
vanilla_start(ErlDrvPort port_num,char * name,SysDriverOpts * opts)1068 static ErlDrvData vanilla_start(ErlDrvPort port_num, char* name,
1069 				SysDriverOpts* opts)
1070 {
1071     int flags, fd;
1072     ErlDrvData res;
1073 
1074     flags = (opts->read_write == DO_READ ? O_RDONLY :
1075 	     opts->read_write == DO_WRITE ? O_WRONLY|O_CREAT|O_TRUNC :
1076 	     O_RDWR|O_CREAT);
1077     if ((fd = open(name, flags, 0666)) < 0)
1078 	return ERL_DRV_ERROR_GENERAL;
1079     if (fd >= sys_max_files()) {
1080 	close(fd);
1081 	return ERL_DRV_ERROR_GENERAL;
1082     }
1083     SET_NONBLOCKING(fd);
1084 
1085     res = (ErlDrvData)(long)create_driver_data(port_num, fd, fd,
1086                                                opts->packet_bytes,
1087                                                opts->read_write, 0, -1, 0,
1088                                                opts);
1089     return res;
1090 }
1091 
1092 /* Note that driver_data[fd].ifd == fd if the port was opened for reading, */
1093 /* otherwise (i.e. write only) driver_data[fd].ofd = fd.  */
1094 
stop(ErlDrvData ev)1095 static void stop(ErlDrvData ev)
1096 {
1097     ErtsSysDriverData* dd = (ErtsSysDriverData*)ev;
1098     ErlDrvPort prt = dd->port_num;
1099 
1100     if (dd->ifd) {
1101         nbio_stop_fd(prt, dd->ifd, 0);
1102         driver_select(prt, abs(dd->ifd->fd), ERL_DRV_USE, 0);  /* close(ifd); */
1103     }
1104 
1105     if (dd->ofd && dd->ofd != dd->ifd) {
1106 	nbio_stop_fd(prt, dd->ofd, 0);
1107 	driver_select(prt, abs(dd->ofd->fd), ERL_DRV_USE, 0);  /* close(ofd); */
1108     }
1109 
1110     erts_free(ERTS_ALC_T_DRV_TAB, dd);
1111 }
1112 
1113 /* used by fd_driver */
outputv(ErlDrvData e,ErlIOVec * ev)1114 static void outputv(ErlDrvData e, ErlIOVec* ev)
1115 {
1116     ErtsSysDriverData *dd = (ErtsSysDriverData*)e;
1117     ErlDrvPort ix = dd->port_num;
1118     int pb = dd->packet_bytes;
1119     int ofd = dd->ofd ? dd->ofd->fd : -1;
1120     ssize_t n;
1121     char lb[4];
1122     char* lbp;
1123     ErlDrvSizeT len = ev->size;
1124     ErlDrvSizeT qsz;
1125 
1126     /* (len > ((unsigned long)-1 >> (4-pb)*8)) */
1127     /*    if (pb >= 0 && (len & (((ErlDrvSizeT)1 << (pb*8))) - 1) != len) {*/
1128     if (((pb == 2) && (len > 0xffff)) || (pb == 1 && len > 0xff)) {
1129 	driver_failure_posix(ix, EINVAL);
1130 	return; /* -1; */
1131     }
1132     /* Handles 0 <= pb <= 4 only */
1133     put_int32((Uint32) len, lb);
1134     lbp = lb + (4-pb);
1135 
1136     ev->iov[0].iov_base = lbp;
1137     ev->iov[0].iov_len = pb;
1138     ev->size += pb;
1139 
1140     if (dd->blocking)
1141         driver_pdl_lock(dd->blocking->pdl);
1142 
1143     qsz = driver_sizeq(ix);
1144     if (qsz) {
1145         if (qsz == (ErlDrvSizeT) -1) {
1146             if (dd->blocking)
1147                 driver_pdl_unlock(dd->blocking->pdl);
1148             driver_failure_posix(ix, EINVAL);
1149             return;
1150         }
1151         driver_enqv(ix, ev, 0);
1152         qsz += ev->size;
1153         if (!dd->busy && qsz >= dd->high_watermark)
1154             set_busy_port(ix, (dd->busy = !0));
1155         if (dd->blocking)
1156             driver_pdl_unlock(dd->blocking->pdl);
1157     }
1158     else if (!dd->blocking) {
1159         /* We try to write directly if the fd in non-blocking */
1160 	int vsize = ev->vsize > MAX_VSIZE ? MAX_VSIZE : ev->vsize;
1161 
1162 	n = writev(ofd, (const void *) (ev->iov), vsize);
1163 	if (n == ev->size)
1164 	    return; /* 0;*/
1165 	if (n < 0) {
1166 	    if ((errno != EINTR) && (errno != ERRNO_BLOCK)) {
1167 		driver_failure_posix(ix, errno);
1168 		return; /* -1;*/
1169 	    }
1170 	    n = 0;
1171 	}
1172 	driver_enqv(ix, ev, n);  /* n is the skip value */
1173         qsz = ev->size - n;
1174         if (!dd->busy && qsz >= dd->high_watermark)
1175             set_busy_port(ix, (dd->busy = !0));
1176 	driver_select(ix, ofd, ERL_DRV_WRITE|ERL_DRV_USE, 1);
1177     }
1178     else {
1179         if (ev->size != 0) {
1180             driver_enqv(ix, ev, 0);
1181             qsz = ev->size;
1182             if (!dd->busy && qsz >= dd->high_watermark)
1183                 set_busy_port(ix, (dd->busy = !0));
1184             driver_pdl_unlock(dd->blocking->pdl);
1185             driver_async(ix, &dd->blocking->pkey,
1186                          fd_async, dd, NULL);
1187         } else {
1188             driver_pdl_unlock(dd->blocking->pdl);
1189         }
1190     }
1191 
1192     /* return 0;*/
1193 }
1194 
1195 /* Used by spawn_driver and vanilla driver */
output(ErlDrvData e,char * buf,ErlDrvSizeT len)1196 static void output(ErlDrvData e, char* buf, ErlDrvSizeT len)
1197 {
1198     ErtsSysDriverData *dd = (ErtsSysDriverData*)e;
1199     ErlDrvPort ix = dd->port_num;
1200     int pb = dd->packet_bytes;
1201     int ofd = dd->ofd ? dd->ofd->fd : -1;
1202     ssize_t n;
1203     ErlDrvSizeT qsz;
1204     char lb[4];
1205     char* lbp;
1206     struct iovec iv[2];
1207 
1208     /* (len > ((unsigned long)-1 >> (4-pb)*8)) */
1209     if (((pb == 2) && (len > 0xffff))
1210         || (pb == 1 && len > 0xff)
1211         || dd->pid == 0 /* Attempt at output before port is ready */) {
1212 	driver_failure_posix(ix, EINVAL);
1213 	return; /* -1; */
1214     }
1215     put_int32(len, lb);
1216     lbp = lb + (4-pb);
1217 
1218     qsz = driver_sizeq(ix);
1219     if (qsz) {
1220         if (qsz == (ErlDrvSizeT) -1) {
1221             driver_failure_posix(ix, EINVAL);
1222             return;
1223         }
1224 	driver_enq(ix, lbp, pb);
1225 	driver_enq(ix, buf, len);
1226         qsz += len + pb;
1227     }
1228     else {
1229 	iv[0].iov_base = lbp;
1230 	iv[0].iov_len = pb;  /* should work for pb=0 */
1231 	iv[1].iov_base = buf;
1232 	iv[1].iov_len = len;
1233 	n = writev(ofd, iv, 2);
1234 	if (n == pb+len)
1235 	    return; /* 0; */
1236 	if (n < 0) {
1237 	    if ((errno != EINTR) && (errno != ERRNO_BLOCK)) {
1238 		driver_failure_posix(ix, errno);
1239 		return; /* -1; */
1240 	    }
1241 	    n = 0;
1242 	}
1243         qsz = pb + len - n;
1244 	if (n < pb) {
1245 	    driver_enq(ix, lbp+n, pb-n);
1246 	    driver_enq(ix, buf, len);
1247 	}
1248 	else {
1249 	    n -= pb;
1250 	    driver_enq(ix, buf+n, len-n);
1251 	}
1252 	driver_select(ix, ofd, ERL_DRV_WRITE|ERL_DRV_USE, 1);
1253     }
1254 
1255     if (!dd->busy && qsz >= dd->high_watermark)
1256         set_busy_port(ix, (dd->busy = !0));
1257 
1258     return; /* 0; */
1259 }
1260 
port_inp_failure(ErtsSysDriverData * dd,int res)1261 static int port_inp_failure(ErtsSysDriverData *dd, int res)
1262 				/* Result: 0 (eof) or -1 (error) */
1263 {
1264     int err = errno;
1265 
1266     ASSERT(res <= 0);
1267     if (dd->ifd) {
1268         driver_select(dd->port_num, dd->ifd->fd, ERL_DRV_READ|ERL_DRV_WRITE, 0);
1269         clear_fd_data(dd->ifd);
1270     }
1271 
1272     if (dd->blocking) {
1273         driver_pdl_lock(dd->blocking->pdl);
1274         if (driver_sizeq(dd->port_num) > 0) {
1275             driver_pdl_unlock(dd->blocking->pdl);
1276             /* We have stuff in the output queue, so we just
1277                set the state to terminating and wait for fd_async_ready
1278                to terminate the port */
1279             if (res == 0)
1280                 dd->terminating = 2;
1281             else
1282                 dd->terminating = -err;
1283             return 0;
1284         }
1285         driver_pdl_unlock(dd->blocking->pdl);
1286     }
1287 
1288     if (res == 0) {
1289         if (dd->alive == 1) {
1290             /*
1291              * We have eof and want to report exit status, but the process
1292              * hasn't exited yet. When it does ready_input will
1293              * driver_select() this fd which will make sure that we get
1294              * back here with dd->alive == -1 and dd->status set.
1295              */
1296             return 0;
1297         }
1298         else if (dd->alive == -1) {
1299             int status = dd->status;
1300 
1301             /* We need not be prepared for stopped/continued processes. */
1302             if (WIFSIGNALED(status))
1303                 status = 128 + WTERMSIG(status);
1304             else
1305                 status = WEXITSTATUS(status);
1306             driver_report_exit(dd->port_num, status);
1307         }
1308        driver_failure_eof(dd->port_num);
1309     } else if (dd->ifd) {
1310         if (dd->alive == -1)
1311             errno = dd->status;
1312         erl_drv_init_ack(dd->port_num, ERL_DRV_ERROR_ERRNO);
1313     } else {
1314 	driver_failure_posix(dd->port_num, err);
1315     }
1316     return 0;
1317 }
1318 
1319 /* fd is the drv_data that is returned from the */
1320 /* initial start routine                        */
1321 /* ready_fd is the descriptor that is ready to read */
1322 
ready_input(ErlDrvData e,ErlDrvEvent ready_fd)1323 static void ready_input(ErlDrvData e, ErlDrvEvent ready_fd)
1324 {
1325     ErtsSysDriverData *dd = (ErtsSysDriverData*)e;
1326     ErlDrvPort port_num;
1327     int packet_bytes;
1328     int res;
1329     Uint h;
1330 
1331     port_num = dd->port_num;
1332     packet_bytes = dd->packet_bytes;
1333 
1334     ASSERT(abs(dd->ifd->fd) == ready_fd);
1335 
1336     if (dd->pid == 0) {
1337         /* the pid is sent from erl_child_setup. spawn driver only. */
1338         ErtsSysForkerProto proto;
1339         int res;
1340 
1341         if((res = read(ready_fd, &proto, sizeof(proto))) <= 0) {
1342             if (res < 0 && (errno == ERRNO_BLOCK || errno == EINTR))
1343                 return;
1344             /* hmm, child setup seems to have closed the pipe too early...
1345                we close the port as there is not much else we can do */
1346             driver_select(port_num, ready_fd, ERL_DRV_READ, 0);
1347             if (res == 0)
1348                 errno = EPIPE;
1349             port_inp_failure(dd, -1);
1350             return;
1351         }
1352 
1353         ASSERT(proto.action == ErtsSysForkerProtoAction_Go);
1354         dd->pid = proto.u.go.os_pid;
1355 
1356         if (dd->pid == -1) {
1357             /* Setup failed! The only reason why this should happen is if
1358                the fork fails. */
1359             errno = proto.u.go.error_number;
1360             port_inp_failure(dd, -1);
1361             return;
1362         }
1363 
1364         proto.action = ErtsSysForkerProtoAction_Ack;
1365 
1366         if (driver_sizeq(port_num) > 0) {
1367             driver_enq(port_num, (char*)&proto, sizeof(proto));
1368             } else {
1369                 if (write(abs(dd->ofd->fd), &proto, sizeof(proto)) < 0)
1370                     if (errno == ERRNO_BLOCK || errno == EINTR)
1371                         driver_enq(port_num, (char*)&proto, sizeof(proto));
1372                 /* do nothing on failure here. If the ofd is broken, then
1373                    the ifd will probably also be broken and trigger
1374                    a port_inp_failure */
1375             }
1376 
1377             if (dd->ifd->fd < 0) {
1378                 driver_select(port_num, abs(dd->ifd->fd), ERL_DRV_READ|ERL_DRV_USE, 0);
1379                 erts_atomic_add_nob(&sys_misc_mem_sz, -sizeof(ErtsSysFdData));
1380                 dd->ifd = NULL;
1381             }
1382 
1383             if (dd->ofd->fd < 0  || driver_sizeq(port_num) > 0)
1384                 /* we select in order to close fd or write to queue,
1385                    child setup will close this fd if fd < 0 */
1386                 driver_select(port_num, abs(dd->ofd->fd), ERL_DRV_WRITE|ERL_DRV_USE, 1);
1387 
1388             erl_drv_set_os_pid(port_num, dd->pid);
1389             erl_drv_init_ack(port_num, e);
1390             return;
1391     }
1392 
1393     if (packet_bytes == 0) {
1394 	byte *read_buf = (byte *) erts_alloc(ERTS_ALC_T_SYS_READ_BUF,
1395 					     ERTS_SYS_READ_BUF_SZ);
1396 	res = read(ready_fd, read_buf, ERTS_SYS_READ_BUF_SZ);
1397 	if (res < 0) {
1398 	    if ((errno != EINTR) && (errno != ERRNO_BLOCK))
1399 		port_inp_failure(dd, res);
1400 	}
1401 	else if (res == 0)
1402 	    port_inp_failure(dd, res);
1403 	else
1404 	    driver_output(port_num, (char*) read_buf, res);
1405 	erts_free(ERTS_ALC_T_SYS_READ_BUF, (void *) read_buf);
1406     }
1407     else if (dd->ifd->remain > 0) { /* We try to read the remainder */
1408 	/* space is allocated in buf */
1409 	res = read(ready_fd, dd->ifd->cpos,
1410 		   dd->ifd->remain);
1411 	if (res < 0) {
1412 	    if ((errno != EINTR) && (errno != ERRNO_BLOCK))
1413 		port_inp_failure(dd, res);
1414 	}
1415 	else if (res == 0) {
1416 	    port_inp_failure(dd, res);
1417 	}
1418 	else if (res == dd->ifd->remain) { /* we're done  */
1419 	    driver_output(port_num, dd->ifd->buf,
1420 			  dd->ifd->sz);
1421 	    clear_fd_data(dd->ifd);
1422 	}
1423 	else { /*  if (res < dd->ifd->remain) */
1424 	    dd->ifd->cpos += res;
1425 	    dd->ifd->remain -= res;
1426 	}
1427     }
1428     else if (dd->ifd->remain == 0) { /* clean fd */
1429 	byte *read_buf = (byte *) erts_alloc(ERTS_ALC_T_SYS_READ_BUF,
1430 					     ERTS_SYS_READ_BUF_SZ);
1431 	/* We make one read attempt and see what happens */
1432 	res = read(ready_fd, read_buf, ERTS_SYS_READ_BUF_SZ);
1433 	if (res < 0) {
1434 	    if ((errno != EINTR) && (errno != ERRNO_BLOCK))
1435 		port_inp_failure(dd, res);
1436 	}
1437 	else if (res == 0) {     	/* eof */
1438 	    port_inp_failure(dd, res);
1439 	}
1440 	else if (res < packet_bytes - dd->ifd->psz) {
1441 	    memcpy(dd->ifd->pbuf+dd->ifd->psz,
1442 		   read_buf, res);
1443 	    dd->ifd->psz += res;
1444 	}
1445 	else  { /* if (res >= packet_bytes) */
1446 	    unsigned char* cpos = read_buf;
1447 	    int bytes_left = res;
1448 
1449 	    while (1) {
1450 		int psz = dd->ifd->psz;
1451 		char* pbp = dd->ifd->pbuf + psz;
1452 
1453 		while(bytes_left && (psz < packet_bytes)) {
1454 		    *pbp++ = *cpos++;
1455 		    bytes_left--;
1456 		    psz++;
1457 		}
1458 
1459 		if (psz < packet_bytes) {
1460 		    dd->ifd->psz = psz;
1461 		    break;
1462 		}
1463 		dd->ifd->psz = 0;
1464 
1465 		switch (packet_bytes) {
1466 		case 1: h = get_int8(dd->ifd->pbuf);  break;
1467 		case 2: h = get_int16(dd->ifd->pbuf); break;
1468 		case 4: h = get_int32(dd->ifd->pbuf); break;
1469 		default: ASSERT(0); return; /* -1; */
1470 		}
1471 
1472 		if (h <= (bytes_left)) {
1473 		    driver_output(port_num, (char*) cpos, h);
1474 		    cpos += h;
1475 		    bytes_left -= h;
1476 		    continue;
1477 		}
1478 		else {		/* The last message we got was split */
1479                     char *buf = erts_alloc_fnf(ERTS_ALC_T_FD_ENTRY_BUF, h);
1480 		    if (!buf) {
1481 			errno = ENOMEM;
1482 			port_inp_failure(dd, -1);
1483 		    }
1484 		    else {
1485 			erts_atomic_add_nob(&sys_misc_mem_sz, h);
1486 			sys_memcpy(buf, cpos, bytes_left);
1487 			dd->ifd->buf = buf;
1488 			dd->ifd->sz = h;
1489 			dd->ifd->remain = h - bytes_left;
1490 			dd->ifd->cpos = buf + bytes_left;
1491 		    }
1492 		    break;
1493 		}
1494 	    }
1495 	}
1496 	erts_free(ERTS_ALC_T_SYS_READ_BUF, (void *) read_buf);
1497     }
1498 }
1499 
1500 
1501 /* fd is the drv_data that is returned from the */
1502 /* initial start routine                        */
1503 /* ready_fd is the descriptor that is ready to read */
1504 
ready_output(ErlDrvData e,ErlDrvEvent ready_fd)1505 static void ready_output(ErlDrvData e, ErlDrvEvent ready_fd)
1506 {
1507     ErtsSysDriverData *dd = (ErtsSysDriverData*)e;
1508     ErlDrvPort ix = dd->port_num;
1509     int n;
1510     struct iovec* iv;
1511     int vsize;
1512 
1513     if ((iv = (struct iovec*) driver_peekq(ix, &vsize)) == NULL) {
1514         if (dd->busy)
1515             set_busy_port(ix, (dd->busy = 0));
1516 	driver_select(ix, ready_fd, ERL_DRV_WRITE, 0);
1517         if (dd->pid > 0 && dd->ofd->fd < 0) {
1518             /* The port was opened with 'in' option, which means we
1519                should close the output fd as soon as the command has
1520                been sent. */
1521             driver_select(ix, ready_fd, ERL_DRV_WRITE|ERL_DRV_USE, 0);
1522             erts_atomic_add_nob(&sys_misc_mem_sz, -sizeof(ErtsSysFdData));
1523             dd->ofd = NULL;
1524         }
1525         if (dd->terminating)
1526             driver_failure_atom(dd->port_num,"normal");
1527 	return; /* 0; */
1528     }
1529     vsize = vsize > MAX_VSIZE ? MAX_VSIZE : vsize;
1530     if ((n = writev(ready_fd, iv, vsize)) > 0) {
1531         ErlDrvSizeT qsz = driver_deq(ix, n);
1532         if (qsz == (ErlDrvSizeT) -1) {
1533             driver_failure_posix(ix, EINVAL);
1534             return;
1535         }
1536         if (dd->busy && qsz < dd->low_watermark)
1537             set_busy_port(ix, (dd->busy = 0));
1538     }
1539     else if (n < 0) {
1540 	if (errno == ERRNO_BLOCK || errno == EINTR)
1541 	    return; /* 0; */
1542 	else {
1543 	    int res = errno;
1544 	    driver_select(ix, ready_fd, ERL_DRV_WRITE, 0);
1545 	    driver_failure_posix(ix, res);
1546 	    return; /* -1; */
1547 	}
1548     }
1549     return; /* 0; */
1550 }
1551 
stop_select(ErlDrvEvent fd,void * _)1552 static void stop_select(ErlDrvEvent fd, void* _)
1553 {
1554     close((int)fd);
1555 }
1556 
1557 
1558 static void
fd_async(void * async_data)1559 fd_async(void *async_data)
1560 {
1561     ErlDrvSSizeT res;
1562     ErtsSysDriverData *dd = (ErtsSysDriverData *)async_data;
1563     SysIOVec      *iov0;
1564     SysIOVec      *iov;
1565     int            iovlen;
1566     int            err = 0;
1567     /* much of this code is stolen from efile_drv:invoke_writev */
1568     driver_pdl_lock(dd->blocking->pdl);
1569     iov0 = driver_peekq(dd->port_num, &iovlen);
1570     iovlen = iovlen < MAXIOV ? iovlen : MAXIOV;
1571     iov = erts_alloc_fnf(ERTS_ALC_T_SYS_WRITE_BUF,
1572                          sizeof(SysIOVec)*iovlen);
1573     if (!iov) {
1574         res = -1;
1575         err = ENOMEM;
1576         driver_pdl_unlock(dd->blocking->pdl);
1577     } else {
1578         memcpy(iov,iov0,iovlen*sizeof(SysIOVec));
1579         driver_pdl_unlock(dd->blocking->pdl);
1580 
1581         do {
1582             res = writev(dd->ofd->fd, iov, iovlen);
1583         } while (res < 0 && errno == EINTR);
1584         if (res < 0)
1585             err = errno;
1586 
1587         erts_free(ERTS_ALC_T_SYS_WRITE_BUF, iov);
1588     }
1589     dd->blocking->res = res;
1590     dd->blocking->err = err;
1591 }
1592 
fd_ready_async(ErlDrvData drv_data,ErlDrvThreadData thread_data)1593 void fd_ready_async(ErlDrvData drv_data,
1594                     ErlDrvThreadData thread_data) {
1595     ErtsSysDriverData *dd = (ErtsSysDriverData *)thread_data;
1596     ErlDrvPort port_num = dd->port_num;
1597 
1598     ASSERT(dd->blocking);
1599 
1600     if (dd->blocking->res > 0) {
1601         ErlDrvSizeT qsz;
1602         driver_pdl_lock(dd->blocking->pdl);
1603         qsz = driver_deq(port_num, dd->blocking->res);
1604         if (qsz == (ErlDrvSizeT) -1) {
1605             driver_pdl_unlock(dd->blocking->pdl);
1606             driver_failure_posix(port_num, EINVAL);
1607             return;
1608         }
1609         if (dd->busy && qsz < dd->low_watermark)
1610             set_busy_port(port_num, (dd->busy = 0));
1611         driver_pdl_unlock(dd->blocking->pdl);
1612         if (qsz == 0) {
1613             if (dd->terminating) {
1614                 /* The port is has been ordered to terminate
1615                    from either fd_flush or port_inp_failure */
1616                 if (dd->terminating == 1)
1617                     driver_failure_atom(port_num, "normal");
1618                 else if (dd->terminating == 2)
1619                     driver_failure_eof(port_num);
1620                 else if (dd->terminating < 0)
1621                     driver_failure_posix(port_num, -dd->terminating);
1622                 return; /* -1; */
1623             }
1624         } else {
1625             /* still data left to write in queue */
1626             driver_async(port_num, &dd->blocking->pkey, fd_async, dd, NULL);
1627             return /* 0; */;
1628         }
1629     } else if (dd->blocking->res < 0) {
1630         if (dd->blocking->err == ERRNO_BLOCK) {
1631             /* still data left to write in queue */
1632             driver_async(port_num, &dd->blocking->pkey, fd_async, dd, NULL);
1633         } else
1634             driver_failure_posix(port_num, dd->blocking->err);
1635         return; /* -1; */
1636     }
1637     return; /* 0; */
1638 }
1639 
1640 
1641 /* Forker driver */
1642 
1643 static int forker_fd;
1644 
forker_start(ErlDrvPort port_num,char * name,SysDriverOpts * opts)1645 static ErlDrvData forker_start(ErlDrvPort port_num, char* name,
1646                                SysDriverOpts* opts)
1647 {
1648 
1649     int i;
1650     int fds[2];
1651     int res, unbind;
1652     char bindir[MAXPATHLEN];
1653     size_t bindirsz = sizeof(bindir);
1654     Uint csp_path_sz;
1655     char *child_setup_prog;
1656 
1657     forker_port = erts_drvport2id(port_num);
1658 
1659     res = erts_sys_explicit_8bit_getenv("BINDIR", bindir, &bindirsz);
1660     if (res == 0) {
1661         erts_exit(1, "Environment variable BINDIR is not set\n");
1662     } else if(res < 0) {
1663         erts_exit(1, "Value of environment variable BINDIR is too large\n");
1664     }
1665 
1666     if (bindir[0] != DIR_SEPARATOR_CHAR)
1667         erts_exit(1,
1668                  "Environment variable BINDIR does not contain an"
1669                  " absolute path\n");
1670     csp_path_sz = (strlen(bindir)
1671                    + 1 /* DIR_SEPARATOR_CHAR */
1672                    + sizeof(CHILD_SETUP_PROG_NAME)
1673                    + 1);
1674     child_setup_prog = erts_alloc(ERTS_ALC_T_CS_PROG_PATH, csp_path_sz);
1675     erts_snprintf(child_setup_prog, csp_path_sz,
1676                   "%s%c%s",
1677                   bindir,
1678                   DIR_SEPARATOR_CHAR,
1679                   CHILD_SETUP_PROG_NAME);
1680     if (socketpair(AF_UNIX, SOCK_STREAM, 0, fds) < 0) {
1681         erts_exit(ERTS_ABORT_EXIT,
1682                  "Could not open unix domain socket in spawn_init: %d\n",
1683                  errno);
1684     }
1685 
1686     forker_fd = fds[0];
1687 
1688     unbind = erts_sched_bind_atfork_prepare();
1689 
1690     i = fork();
1691 
1692     if (i == 0) {
1693         /* The child */
1694         char *cs_argv[FORKER_ARGV_NO_OF_ARGS] =
1695             {CHILD_SETUP_PROG_NAME, NULL, NULL};
1696         char buff[128];
1697 
1698         erts_sched_bind_atfork_child(unbind);
1699 
1700         snprintf(buff, 128, "%d", sys_max_files());
1701         cs_argv[FORKER_ARGV_MAX_FILES] = buff;
1702 
1703         /* We preallocate fd 3 for the uds fd */
1704         if (fds[1] != 3) {
1705             dup2(fds[1], 3);
1706         }
1707 
1708 #if defined(USE_SETPGRP_NOARGS)		/* SysV */
1709     (void) setpgrp();
1710 #elif defined(USE_SETPGRP)		/* BSD */
1711     (void) setpgrp(0, getpid());
1712 #else					/* POSIX */
1713     (void) setsid();
1714 #endif
1715 
1716         execv(child_setup_prog, cs_argv);
1717         _exit(1);
1718     }
1719 
1720     erts_sched_bind_atfork_parent(unbind);
1721 
1722     erts_free(ERTS_ALC_T_CS_PROG_PATH, child_setup_prog);
1723 
1724     close(fds[1]);
1725 
1726     SET_NONBLOCKING(forker_fd);
1727 
1728     return (ErlDrvData)port_num;
1729 }
1730 
forker_stop(ErlDrvData e)1731 static void forker_stop(ErlDrvData e)
1732 {
1733     /* we probably should do something here,
1734        the port has been closed by the user. */
1735 }
1736 
forker_deq(ErlDrvPort port_num,ErtsSysForkerProto * proto)1737 static ErlDrvSizeT forker_deq(ErlDrvPort port_num, ErtsSysForkerProto *proto)
1738 {
1739     close(proto->u.start.fds[0]);
1740     close(proto->u.start.fds[1]);
1741     if (proto->u.start.fds[1] != proto->u.start.fds[2])
1742         close(proto->u.start.fds[2]);
1743 
1744     return driver_deq(port_num, sizeof(*proto));
1745 }
1746 
forker_sigchld(Eterm port_id,int error)1747 static void forker_sigchld(Eterm port_id, int error)
1748 {
1749     ErtsSysForkerProto *proto = erts_alloc(ERTS_ALC_T_DRV_CTRL_DATA, sizeof(*proto));
1750     proto->action = ErtsSysForkerProtoAction_SigChld;
1751     proto->u.sigchld.error_number = error;
1752     proto->u.sigchld.port_id = port_id;
1753 
1754     /* ideally this would be a port_command call, but as command is
1755        already used by the spawn_driver, we use control instead.
1756        Note that when using erl_drv_port_control it is an asynchronous
1757        control. */
1758     erl_drv_port_control(port_id, ERTS_SPAWN_DRV_CONTROL_MAGIC_NUMBER,
1759                          (char*)proto, sizeof(*proto));
1760 }
1761 
forker_ready_input(ErlDrvData e,ErlDrvEvent fd)1762 static void forker_ready_input(ErlDrvData e, ErlDrvEvent fd)
1763 {
1764     int res;
1765     ErtsSysForkerProto proto;
1766 
1767     if ((res = read(fd, &proto, sizeof(proto))) < 0) {
1768         if (errno == ERRNO_BLOCK || errno == EINTR)
1769             return;
1770         erts_exit(ERTS_DUMP_EXIT, "Failed to read from erl_child_setup: %d\n", errno);
1771     }
1772 
1773     if (res == 0)
1774         erts_exit(ERTS_DUMP_EXIT, "erl_child_setup closed\n");
1775 
1776     ASSERT(res == sizeof(proto));
1777 
1778 #ifdef FORKER_PROTO_START_ACK
1779     if (proto.action == ErtsSysForkerProtoAction_StartAck) {
1780         /* Ideally we would like to not have to ack each Start
1781            command being sent over the uds, but it would seem
1782            that some operating systems (only observed on FreeBSD)
1783            throw away data on the uds when the socket becomes full,
1784            so we have to.
1785         */
1786         ErlDrvPort port_num = (ErlDrvPort)e;
1787         int vlen;
1788         SysIOVec *iov = driver_peekq(port_num, &vlen);
1789         ErtsSysForkerProto *qproto = (ErtsSysForkerProto *)iov[0].iov_base;
1790 
1791         if (forker_deq(port_num, qproto))
1792             driver_select(port_num, forker_fd, ERL_DRV_WRITE|ERL_DRV_USE, 1);
1793     } else
1794 #endif
1795     {
1796         ASSERT(proto.action == ErtsSysForkerProtoAction_SigChld);
1797         forker_sigchld(proto.u.sigchld.port_id, proto.u.sigchld.error_number);
1798     }
1799 
1800 }
1801 
forker_ready_output(ErlDrvData e,ErlDrvEvent fd)1802 static void forker_ready_output(ErlDrvData e, ErlDrvEvent fd)
1803 {
1804     ErlDrvPort port_num = (ErlDrvPort)e;
1805 
1806 #ifndef FORKER_PROTO_START_ACK
1807     int loops = 10;
1808     while (driver_sizeq(port_num) > 0 && --loops) {
1809 #endif
1810         int vlen;
1811         SysIOVec *iov = driver_peekq(port_num, &vlen);
1812         ErtsSysForkerProto *proto = (ErtsSysForkerProto *)iov[0].iov_base;
1813         ASSERT(iov[0].iov_len >= (sizeof(*proto)));
1814         if (sys_uds_write(forker_fd, (char*)proto, sizeof(*proto),
1815                           proto->u.start.fds, 3, 0) < 0) {
1816             if (errno == ERRNO_BLOCK || errno == EINTR) {
1817                 return;
1818             } else if (errno == EMFILE) {
1819                 forker_sigchld(proto->u.start.port_id, errno);
1820                 if (forker_deq(port_num, proto) == 0)
1821                     driver_select(port_num, forker_fd, ERL_DRV_WRITE, 0);
1822                 return;
1823             } else {
1824                 erts_exit(ERTS_DUMP_EXIT, "Failed to write to erl_child_setup: %d\n", errno);
1825             }
1826         }
1827 #ifndef FORKER_PROTO_START_ACK
1828         if (forker_deq(port_num, proto) == 0)
1829             driver_select(port_num, forker_fd, ERL_DRV_WRITE, 0);
1830     }
1831 #else
1832     driver_select(port_num, forker_fd, ERL_DRV_WRITE, 0);
1833 #endif
1834 }
1835 
forker_control(ErlDrvData e,unsigned int cmd,char * buf,ErlDrvSizeT len,char ** rbuf,ErlDrvSizeT rlen)1836 static ErlDrvSSizeT forker_control(ErlDrvData e, unsigned int cmd, char *buf,
1837                                    ErlDrvSizeT len, char **rbuf, ErlDrvSizeT rlen)
1838 {
1839     static int first_call = 1;
1840     ErtsSysForkerProto *proto = (ErtsSysForkerProto *)buf;
1841     ErlDrvPort port_num = (ErlDrvPort)e;
1842     int res;
1843 
1844     if (cmd != ERTS_FORKER_DRV_CONTROL_MAGIC_NUMBER)
1845         return -1;
1846 
1847     if (first_call) {
1848         /*
1849          * Do driver_select here when schedulers and their pollsets have started.
1850          */
1851         driver_select(port_num, forker_fd, ERL_DRV_READ|ERL_DRV_USE, 1);
1852         first_call = 0;
1853     }
1854 
1855     driver_enq(port_num, buf, len);
1856     if (driver_sizeq(port_num) > sizeof(*proto)) {
1857         return 0;
1858     }
1859 
1860     if ((res = sys_uds_write(forker_fd, (char*)proto, sizeof(*proto),
1861                              proto->u.start.fds, 3, 0)) < 0) {
1862         if (errno == ERRNO_BLOCK || errno == EINTR) {
1863             driver_select(port_num, forker_fd, ERL_DRV_WRITE|ERL_DRV_USE, 1);
1864             return 0;
1865         } else if (errno == EMFILE) {
1866             forker_sigchld(proto->u.start.port_id, errno);
1867             forker_deq(port_num, proto);
1868             return 0;
1869         } else {
1870             erts_exit(ERTS_DUMP_EXIT, "Failed to write to erl_child_setup: %d\n", errno);
1871         }
1872     }
1873 
1874 #ifndef FORKER_PROTO_START_ACK
1875     ASSERT(res == sizeof(*proto));
1876     forker_deq(port_num, proto);
1877 #endif
1878 
1879     return 0;
1880 }
1881