1 /*
2  * %CopyrightBegin%
3  *
4  * Copyright Ericsson AB 1996-2020. All Rights Reserved.
5  *
6  * Licensed under the Apache License, Version 2.0 (the "License");
7  * you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  *     http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  *
18  * %CopyrightEnd%
19  */
20 
21 #ifdef HAVE_CONFIG_H
22 #  include "config.h"
23 #endif
24 
25 #ifdef ISC32
26 #define _POSIX_SOURCE
27 #define _XOPEN_SOURCE
28 #endif
29 
30 #include <sys/times.h>		/* ! */
31 #include <time.h>
32 #include <signal.h>
33 #include <sys/wait.h>
34 #include <sys/uio.h>
35 #include <termios.h>
36 #include <ctype.h>
37 #include <sys/utsname.h>
38 #include <sys/select.h>
39 #include <arpa/inet.h>
40 
41 #ifdef ISC32
42 #include <sys/bsdtypes.h>
43 #endif
44 
45 #include <termios.h>
46 #ifdef HAVE_FCNTL_H
47 #include <fcntl.h>
48 #endif
49 #ifdef HAVE_SYS_IOCTL_H
50 #include <sys/ioctl.h>
51 #endif
52 
53 #include <sys/types.h>
54 #include <sys/socket.h>
55 
56 #define WANT_NONBLOCKING    /* must define this to pull in defs from sys.h */
57 #include "sys.h"
58 #include "erl_osenv.h"
59 
60 #include "erl_threads.h"
61 
62 extern erts_atomic_t sys_misc_mem_sz;
63 
64 static Eterm forker_port;
65 
66 #define MAX_VSIZE 16		/* Max number of entries allowed in an I/O
67 				 * vector sock_sendv().
68 				 */
69 /*
70  * Don't need global.h, but erl_cpu_topology.h won't compile otherwise
71  */
72 #include "global.h"
73 #include "erl_cpu_topology.h"
74 
75 #include "erl_sys_driver.h"
76 #include "sys_uds.h"
77 
78 #include "erl_child_setup.h"
79 
80 #if defined IOV_MAX
81 #define MAXIOV IOV_MAX
82 #elif defined UIO_MAXIOV
83 #define MAXIOV UIO_MAXIOV
84 #else
85 #define MAXIOV 16
86 #endif
87 
88 /* Used by the fd driver iff the fd could not be set to non-blocking */
89 typedef struct ErtsSysBlocking_ {
90     ErlDrvPDL pdl;
91     ErlDrvSSizeT res;
92     int err;
93     unsigned int pkey;
94 } ErtsSysBlocking;
95 
96 typedef struct fd_data {
97     int   fd;
98     char  pbuf[4];   /* hold partial packet bytes */
99     int   psz;       /* size of pbuf */
100     char  *buf;
101     char  *cpos;
102     int   sz;
103     int   remain;  /* for input on fd */
104 } ErtsSysFdData;
105 
106 typedef struct driver_data {
107     ErlDrvPort port_num;
108     ErtsSysFdData *ofd;
109     ErtsSysFdData *ifd;
110     int packet_bytes;
111     int pid;
112     int alive;
113     int status;
114     int terminating;
115     ErtsSysBlocking *blocking;
116     int busy;
117     ErlDrvSizeT high_watermark;
118     ErlDrvSizeT low_watermark;
119 } ErtsSysDriverData;
120 
121 #define DIR_SEPARATOR_CHAR    '/'
122 
123 #if defined(__ANDROID__)
124 #define SHELL "/system/bin/sh"
125 #else
126 #define SHELL "/bin/sh"
127 #endif /* __ANDROID__ */
128 
129 #if defined(DEBUG)
130 #define ERL_BUILD_TYPE_MARKER ".debug"
131 #elif defined(VALGRIND)
132 #define ERL_BUILD_TYPE_MARKER ".valgrind"
133 #else /* opt */
134 #define ERL_BUILD_TYPE_MARKER
135 #endif
136 
137 #ifdef DEBUG
138 #define close(fd) do { int res = close(fd); ASSERT(res > -1); } while(0)
139 #endif
140 
141 #define CHILD_SETUP_PROG_NAME	"erl_child_setup" ERL_BUILD_TYPE_MARKER
142 
143 // #define HARD_DEBUG
144 #ifdef HARD_DEBUG
145 #define driver_select(port_num, fd, flags, onoff)                       \
146     do {                                                                \
147         if (((flags) & ERL_DRV_READ) && onoff)                          \
148             fprintf(stderr,"%010d %p: read select %d\r\n", __LINE__, port_num, (int)fd); \
149         if (((flags) & ERL_DRV_WRITE) && onoff)                         \
150             fprintf(stderr,"%010d %p: writ select %d\r\n", __LINE__, port_num, (int)fd); \
151         if (((flags) & ERL_DRV_READ) && !onoff)                          \
152             fprintf(stderr,"%010d %p: read unsele %d\r\n", __LINE__, port_num, (int)fd); \
153         if (((flags) & ERL_DRV_WRITE) && !onoff)                         \
154             fprintf(stderr,"%010d %p: writ unsele %d\r\n", __LINE__, port_num, (int)fd); \
155         driver_select_nkp(port_num, fd, flags, onoff);                  \
156     } while(0)
157 #endif
158 
159 /*
160  * Decreasing the size of it below 16384 is not allowed.
161  */
162 
163 #define ERTS_SYS_READ_BUF_SZ (64*1024)
164 
165 /* I. Initialization */
166 
167 void
erl_sys_late_init(void)168 erl_sys_late_init(void)
169 {
170     SysDriverOpts opts = {0};
171     Port *port;
172 
173     sys_signal(SIGPIPE, SIG_IGN); /* Ignore - we'll handle the write failure */
174 
175     opts.packet_bytes = 0;
176     opts.use_stdio = 1;
177     opts.redir_stderr = 0;
178     opts.read_write = 0;
179     opts.hide_window = 0;
180     opts.wd = NULL;
181     erts_osenv_init(&opts.envir);
182     opts.exit_status = 0;
183     opts.overlapped_io = 0;
184     opts.spawn_type = ERTS_SPAWN_ANY;
185     opts.argv = NULL;
186     opts.parallelism = erts_port_parallelism;
187 
188     port =
189         erts_open_driver(&forker_driver, make_internal_pid(0), "forker", &opts, NULL, NULL);
190     erts_mtx_unlock(port->lock);
191     erts_sys_unix_later_init(); /* Need to be called after forker has been started */
192 }
193 
194 /* II. Prototypes */
195 
196 /* II.I Spawn prototypes */
197 static ErlDrvData spawn_start(ErlDrvPort, char*, SysDriverOpts*);
198 static ErlDrvSSizeT spawn_control(ErlDrvData, unsigned int, char *,
199                                   ErlDrvSizeT, char **, ErlDrvSizeT);
200 
201 /* II.II Vanilla prototypes */
202 static ErlDrvData vanilla_start(ErlDrvPort, char*, SysDriverOpts*);
203 
204 
205 /* II.III FD prototypes */
206 static ErlDrvData fd_start(ErlDrvPort, char*, SysDriverOpts*);
207 static void fd_async(void *);
208 static void fd_ready_async(ErlDrvData drv_data, ErlDrvThreadData thread_data);
209 static ErlDrvSSizeT fd_control(ErlDrvData, unsigned int, char *, ErlDrvSizeT,
210 			       char **, ErlDrvSizeT);
211 static void fd_stop(ErlDrvData);
212 static void fd_flush(ErlDrvData);
213 
214 /* II.IV Common prototypes */
215 static void stop(ErlDrvData);
216 static void ready_input(ErlDrvData, ErlDrvEvent);
217 static void ready_output(ErlDrvData, ErlDrvEvent);
218 static void output(ErlDrvData, char*, ErlDrvSizeT);
219 static void outputv(ErlDrvData, ErlIOVec*);
220 static void stop_select(ErlDrvEvent, void*);
221 
222 /* II.V Forker prototypes */
223 static ErlDrvData forker_start(ErlDrvPort, char*, SysDriverOpts*);
224 static void forker_stop(ErlDrvData);
225 static void forker_ready_input(ErlDrvData, ErlDrvEvent);
226 static void forker_ready_output(ErlDrvData, ErlDrvEvent);
227 static ErlDrvSSizeT forker_control(ErlDrvData, unsigned int, char *,
228                                    ErlDrvSizeT, char **, ErlDrvSizeT);
229 
230 /* III Driver entries */
231 
232 /* III.I The spawn driver */
233 struct erl_drv_entry spawn_driver_entry = {
234     NULL,
235     spawn_start,
236     stop,
237     output,
238     ready_input,
239     ready_output,
240     "spawn",
241     NULL,
242     NULL,
243     spawn_control,
244     NULL,
245     NULL,
246     NULL,
247     NULL,
248     NULL,
249     NULL,
250     ERL_DRV_EXTENDED_MARKER,
251     ERL_DRV_EXTENDED_MAJOR_VERSION,
252     ERL_DRV_EXTENDED_MINOR_VERSION,
253     ERL_DRV_FLAG_USE_PORT_LOCKING | ERL_DRV_FLAG_USE_INIT_ACK,
254     NULL, NULL,
255     stop_select
256 };
257 
258 /* III.II The fd driver */
259 struct erl_drv_entry fd_driver_entry = {
260     NULL,
261     fd_start,
262     fd_stop,
263     output,
264     ready_input,
265     ready_output,
266     "fd",
267     NULL,
268     NULL,
269     fd_control,
270     NULL,
271     outputv,
272     fd_ready_async, /* ready_async */
273     fd_flush, /* flush */
274     NULL, /* call */
275     NULL, /* event */
276     ERL_DRV_EXTENDED_MARKER,
277     ERL_DRV_EXTENDED_MAJOR_VERSION,
278     ERL_DRV_EXTENDED_MINOR_VERSION,
279     0, /* ERL_DRV_FLAGs */
280     NULL, /* handle2 */
281     NULL, /* process_exit */
282     stop_select
283 };
284 
285 /* III.III The vanilla driver */
286 struct erl_drv_entry vanilla_driver_entry = {
287     NULL,
288     vanilla_start,
289     stop,
290     output,
291     ready_input,
292     ready_output,
293     "vanilla",
294     NULL,
295     NULL,
296     NULL,
297     NULL,
298     NULL,
299     NULL,
300     NULL, /* flush */
301     NULL, /* call */
302     NULL, /* event */
303     ERL_DRV_EXTENDED_MARKER,
304     ERL_DRV_EXTENDED_MAJOR_VERSION,
305     ERL_DRV_EXTENDED_MINOR_VERSION,
306     0, /* ERL_DRV_FLAGs */
307     NULL, /* handle2 */
308     NULL, /* process_exit */
309     stop_select
310 };
311 
312 /* III.III The forker driver */
313 struct erl_drv_entry forker_driver_entry = {
314     NULL,
315     forker_start,
316     forker_stop,
317     NULL,
318     forker_ready_input,
319     forker_ready_output,
320     "spawn_forker",
321     NULL,
322     NULL,
323     forker_control,
324     NULL,
325     NULL,
326     NULL,
327     NULL,
328     NULL,
329     NULL,
330     ERL_DRV_EXTENDED_MARKER,
331     ERL_DRV_EXTENDED_MAJOR_VERSION,
332     ERL_DRV_EXTENDED_MINOR_VERSION,
333     0,
334     NULL, NULL,
335     stop_select
336 };
337 
338 /* Untility functions */
339 
set_blocking_data(ErtsSysDriverData * dd)340 static int set_blocking_data(ErtsSysDriverData *dd) {
341 
342     dd->blocking = erts_alloc(ERTS_ALC_T_SYS_BLOCKING, sizeof(ErtsSysBlocking));
343 
344     erts_atomic_add_nob(&sys_misc_mem_sz, sizeof(ErtsSysBlocking));
345 
346     dd->blocking->pdl = driver_pdl_create(dd->port_num);
347     dd->blocking->res = 0;
348     dd->blocking->err = 0;
349     dd->blocking->pkey = driver_async_port_key(dd->port_num);
350 
351     return 1;
352 }
353 
init_fd_data(ErtsSysFdData * fd_data,int fd)354 static void init_fd_data(ErtsSysFdData *fd_data, int fd)
355 {
356     fd_data->fd = fd;
357     fd_data->buf = NULL;
358     fd_data->cpos = NULL;
359     fd_data->remain = 0;
360     fd_data->sz = 0;
361     fd_data->psz = 0;
362 }
363 
364 static ErtsSysDriverData *
create_driver_data(ErlDrvPort port_num,int ifd,int ofd,int packet_bytes,int read_write,int exit_status,int pid,int is_blocking,SysDriverOpts * opts)365 create_driver_data(ErlDrvPort port_num,
366                    int ifd,
367                    int ofd,
368                    int packet_bytes,
369                    int read_write,
370                    int exit_status,
371                    int pid,
372                    int is_blocking,
373                    SysDriverOpts* opts)
374 {
375     Port *prt;
376     ErtsSysDriverData *driver_data;
377     char *data;
378     int size = sizeof(ErtsSysDriverData);
379 
380     if (read_write & DO_READ)
381         size += sizeof(ErtsSysFdData);
382 
383     if ((read_write & DO_WRITE) &&
384         ((ifd != ofd || ofd == -1) || !(read_write & DO_READ)))
385         size += sizeof(ErtsSysFdData);
386 
387     data = erts_alloc(ERTS_ALC_T_DRV_TAB,size);
388     erts_atomic_add_nob(&sys_misc_mem_sz, size);
389 
390     driver_data = (ErtsSysDriverData*)data;
391     data += sizeof(*driver_data);
392 
393     prt = erts_drvport2port(port_num);
394     if (prt != ERTS_INVALID_ERL_DRV_PORT)
395 	prt->os_pid = pid;
396 
397     driver_data->packet_bytes = packet_bytes;
398     driver_data->port_num = port_num;
399     driver_data->pid = pid;
400     driver_data->alive = exit_status ? 1 : 0;
401     driver_data->status = 0;
402     driver_data->terminating = 0;
403     driver_data->blocking = NULL;
404 
405     if (read_write & DO_READ) {
406         driver_data->ifd = (ErtsSysFdData*)data;
407         data += sizeof(*driver_data->ifd);
408         init_fd_data(driver_data->ifd, ifd);
409         driver_select(port_num, ifd, (ERL_DRV_READ|ERL_DRV_USE), 1);
410     } else {
411         driver_data->ifd = NULL;
412     }
413 
414     if (read_write & DO_WRITE) {
415         if (ofd != -1 && ifd == ofd && read_write & DO_READ) {
416             /* This is for when ifd and ofd are the same fd */
417             driver_data->ofd = driver_data->ifd;
418         } else {
419             driver_data->ofd = (ErtsSysFdData*)data;
420             data += sizeof(*driver_data->ofd);
421             init_fd_data(driver_data->ofd, ofd);
422         }
423         if (is_blocking)
424             if (!set_blocking_data(driver_data)) {
425                 erts_free(ERTS_ALC_T_DRV_TAB, driver_data);
426                 return NULL;
427             }
428     } else {
429         driver_data->ofd = NULL;
430     }
431 
432     driver_data->busy = 0;
433     driver_data->high_watermark = opts->high_watermark;
434     driver_data->low_watermark = opts->low_watermark;
435 
436     return driver_data;
437 }
438 
439 /* Spawn driver */
440 
close_pipes(int ifd[2],int ofd[2])441 static void close_pipes(int ifd[2], int ofd[2])
442 {
443     close(ifd[0]);
444     close(ifd[1]);
445     close(ofd[0]);
446     close(ofd[1]);
447 }
448 
449 struct __add_spawn_env_state {
450     struct iovec *iov;
451     int *iov_index;
452 
453     Sint32 *payload_size;
454     char *env_block;
455 };
456 
add_spawn_env_block_foreach(void * _state,const erts_osenv_data_t * key,const erts_osenv_data_t * value)457 static void add_spawn_env_block_foreach(void *_state,
458                                         const erts_osenv_data_t *key,
459                                         const erts_osenv_data_t *value)
460 {
461     struct __add_spawn_env_state *state;
462     struct iovec *iov;
463 
464     state = (struct __add_spawn_env_state*)(_state);
465     iov = &state->iov[*state->iov_index];
466 
467     iov->iov_base = state->env_block;
468 
469     sys_memcpy(state->env_block, key->data, key->length);
470     state->env_block += key->length;
471     *state->env_block++ = '=';
472     sys_memcpy(state->env_block, value->data, value->length);
473     state->env_block += value->length;
474     *state->env_block++ = '\0';
475 
476     iov->iov_len = state->env_block - (char*)iov->iov_base;
477 
478     (*state->payload_size) += iov->iov_len;
479     (*state->iov_index)++;
480 }
481 
add_spawn_env_block(const erts_osenv_t * env,struct iovec * iov,int * iov_index,Sint32 * payload_size)482 static void *add_spawn_env_block(const erts_osenv_t *env, struct iovec *iov,
483                                   int *iov_index, Sint32 *payload_size) {
484     struct __add_spawn_env_state add_state;
485     char *env_block;
486 
487     env_block = erts_alloc(ERTS_ALC_T_TMP, env->content_size +
488         env->variable_count * sizeof("=\0"));
489 
490     add_state.iov = iov;
491     add_state.iov_index = iov_index;
492     add_state.env_block = env_block;
493     add_state.payload_size = payload_size;
494 
495     erts_osenv_foreach_native(env, &add_state, add_spawn_env_block_foreach);
496 
497     return env_block;
498 }
499 
spawn_start(ErlDrvPort port_num,char * name,SysDriverOpts * opts)500 static ErlDrvData spawn_start(ErlDrvPort port_num, char* name,
501                               SysDriverOpts* opts)
502 {
503 #define CMD_LINE_PREFIX_STR "exec "
504 #define CMD_LINE_PREFIX_STR_SZ (sizeof(CMD_LINE_PREFIX_STR) - 1)
505 
506     int len;
507     ErtsSysDriverData *dd;
508     char *cmd_line;
509     char wd_buff[MAXPATHLEN+1];
510     char *wd, *cwd;
511     int ifd[2], ofd[2], stderrfd;
512 
513     if (pipe(ifd) < 0) return ERL_DRV_ERROR_ERRNO;
514     errno = EMFILE;		/* default for next three conditions */
515     if (ifd[0] >= sys_max_files() || pipe(ofd) < 0) {
516         close(ifd[0]);
517         close(ifd[1]);
518         return ERL_DRV_ERROR_ERRNO;
519     }
520     if (ofd[1] >= sys_max_files()) {
521         close_pipes(ifd, ofd);
522         errno = EMFILE;
523         return ERL_DRV_ERROR_ERRNO;
524     }
525 
526     SET_NONBLOCKING(ifd[0]);
527     SET_NONBLOCKING(ofd[1]);
528 
529     stderrfd = opts->redir_stderr ? ifd[1] : dup(2);
530 
531     if (stderrfd >= sys_max_files() || stderrfd < 0) {
532         close_pipes(ifd, ofd);
533         if (stderrfd > -1)
534             close(stderrfd);
535         return ERL_DRV_ERROR_ERRNO;
536     }
537 
538     if (opts->spawn_type == ERTS_SPAWN_EXECUTABLE) {
539 	/* started with spawn_executable, not with spawn */
540 	len = strlen(name);
541 	cmd_line = (char *) erts_alloc_fnf(ERTS_ALC_T_TMP, len + 1);
542 	if (!cmd_line) {
543             close_pipes(ifd, ofd);
544 	    errno = ENOMEM;
545 	    return ERL_DRV_ERROR_ERRNO;
546 	}
547 	memcpy((void *) cmd_line,(void *) name, len);
548 	cmd_line[len] = '\0';
549 	len = len + 1;
550 	if (access(cmd_line,X_OK) != 0) {
551 	    int save_errno = errno;
552 	    erts_free(ERTS_ALC_T_TMP, cmd_line);
553             close_pipes(ifd, ofd);
554 	    errno = save_errno;
555 	    return ERL_DRV_ERROR_ERRNO;
556 	}
557     } else {
558 	/* make the string suitable for giving to "sh" */
559 	len = strlen(name);
560 	cmd_line = (char *) erts_alloc_fnf(ERTS_ALC_T_TMP,
561 					   CMD_LINE_PREFIX_STR_SZ + len + 1);
562 	if (!cmd_line) {
563             close_pipes(ifd, ofd);
564 	    errno = ENOMEM;
565 	    return ERL_DRV_ERROR_ERRNO;
566 	}
567 	memcpy((void *) cmd_line,
568 	       (void *) CMD_LINE_PREFIX_STR,
569 	       CMD_LINE_PREFIX_STR_SZ);
570 	memcpy((void *) (cmd_line + CMD_LINE_PREFIX_STR_SZ), (void *) name, len);
571 	cmd_line[CMD_LINE_PREFIX_STR_SZ + len] = '\0';
572 	len = CMD_LINE_PREFIX_STR_SZ + len + 1;
573 }
574 
575     if ((cwd = getcwd(wd_buff, MAXPATHLEN+1)) == NULL) {
576         /* on some OSs this call opens a fd in the
577            background which means that this can
578            return EMFILE */
579         int err = errno;
580         close_pipes(ifd, ofd);
581         erts_free(ERTS_ALC_T_TMP, (void *) cmd_line);
582         errno = err;
583         return ERL_DRV_ERROR_ERRNO;
584     }
585 
586     wd = opts->wd;
587 
588     {
589         void *environment_block;
590         struct iovec *io_vector;
591         int iov_len = 5;
592         char nullbuff[] = "\0";
593         int j, i = 0, res;
594         Sint32 buffsz = 0, env_len = 0, argv_len = 0,
595             flags = (opts->use_stdio ? FORKER_FLAG_USE_STDIO : 0)
596             | (opts->exit_status ? FORKER_FLAG_EXIT_STATUS : 0)
597             | (opts->read_write & DO_READ ? FORKER_FLAG_DO_READ : 0)
598             | (opts->read_write & DO_WRITE ? FORKER_FLAG_DO_WRITE : 0);
599 
600         if (wd) iov_len++;
601 
602         /* num envs including size int */
603         iov_len += 1 + opts->envir.variable_count;
604 
605         /* count number of element in argument list */
606         if (opts->spawn_type == ERTS_SPAWN_EXECUTABLE) {
607             if (opts->argv != NULL) {
608                 while(opts->argv[argv_len] != NULL)
609                     argv_len++;
610             } else {
611                 argv_len++;
612             }
613             iov_len += 1 + argv_len; /* num argvs including size int */
614         }
615 
616         io_vector = erts_alloc_fnf(ERTS_ALC_T_TMP, sizeof(struct iovec) * iov_len);
617 
618         if (!io_vector) {
619             close_pipes(ifd, ofd);
620             erts_free(ERTS_ALC_T_TMP, (void *) cmd_line);
621             errno = ENOMEM;
622             return ERL_DRV_ERROR_ERRNO;
623         }
624 
625         /*
626          * Whitebox test port_SUITE:pipe_limit_env
627          * assumes this command payload format.
628          */
629         io_vector[i].iov_base = (void*)&buffsz;
630         io_vector[i++].iov_len = sizeof(buffsz);
631 
632         io_vector[i].iov_base = (void*)&flags;
633         flags = htonl(flags);
634         io_vector[i++].iov_len = sizeof(flags);
635         buffsz += sizeof(flags);
636 
637         io_vector[i].iov_base = cmd_line;
638         io_vector[i++].iov_len = len;
639         buffsz += len;
640 
641         io_vector[i].iov_base = cwd;
642         io_vector[i].iov_len = strlen(io_vector[i].iov_base) + 1;
643         buffsz += io_vector[i++].iov_len;
644 
645         if (wd) {
646             io_vector[i].iov_base = wd;
647             io_vector[i].iov_len = strlen(io_vector[i].iov_base) + 1;
648             buffsz += io_vector[i++].iov_len;
649         }
650 
651         io_vector[i].iov_base = nullbuff;
652         io_vector[i++].iov_len = 1;
653         buffsz += io_vector[i-1].iov_len;
654 
655         env_len = htonl(opts->envir.variable_count);
656         io_vector[i].iov_base = (void*)&env_len;
657         io_vector[i++].iov_len = sizeof(env_len);
658         buffsz += io_vector[i-1].iov_len;
659 
660         environment_block = add_spawn_env_block(&opts->envir, io_vector, &i,
661             &buffsz);
662 
663         /* only append arguments if this was a spawn_executable */
664         if (opts->spawn_type == ERTS_SPAWN_EXECUTABLE) {
665 
666             io_vector[i].iov_base = (void*)&argv_len;
667             argv_len = htonl(argv_len);
668             io_vector[i++].iov_len = sizeof(argv_len);
669             buffsz += io_vector[i-1].iov_len;
670 
671             if (opts->argv) {
672                 /* If there are arguments we copy in the references to
673                    them into the iov */
674                 for (j = 0; opts->argv[j]; j++) {
675                     if (opts->argv[j] == erts_default_arg0)
676                         io_vector[i].iov_base = cmd_line;
677                     else
678                         io_vector[i].iov_base = opts->argv[j];
679                     io_vector[i].iov_len = strlen(io_vector[i].iov_base) + 1;
680                     buffsz += io_vector[i++].iov_len;
681                 }
682             } else {
683                 io_vector[i].iov_base = cmd_line;
684                 io_vector[i].iov_len = strlen(io_vector[i].iov_base) + 1;
685                 buffsz += io_vector[i++].iov_len;
686             }
687         }
688 
689         /* we send the request to do the fork */
690         if ((res = writev(ofd[1], io_vector, iov_len > MAXIOV ? MAXIOV : iov_len)) < 0) {
691             if (errno == ERRNO_BLOCK || errno == EINTR) {
692                 res = 0;
693             } else {
694                 int err = errno;
695                 close_pipes(ifd, ofd);
696                 erts_free(ERTS_ALC_T_TMP, io_vector);
697                 erts_free(ERTS_ALC_T_TMP, (void *) cmd_line);
698                 errno = err;
699                 return ERL_DRV_ERROR_ERRNO;
700             }
701         }
702 
703         if (res < (buffsz + sizeof(buffsz))) {
704             /* we only wrote part of the command payload. Enqueue the rest. */
705             for (i = 0; i < iov_len; i++) {
706                 if (res >= io_vector[i].iov_len)
707                     res -= io_vector[i].iov_len;
708                 else {
709                     driver_enq(port_num, &((char*)io_vector[i].iov_base)[res],
710                                io_vector[i].iov_len - res);
711                     res = 0;
712                 }
713             }
714             driver_select(port_num, ofd[1], ERL_DRV_WRITE|ERL_DRV_USE, 1);
715         }
716 
717         erts_free(ERTS_ALC_T_TMP, environment_block);
718         erts_free(ERTS_ALC_T_TMP, io_vector);
719     }
720 
721     erts_free(ERTS_ALC_T_TMP, (void *) cmd_line);
722 
723     dd = create_driver_data(port_num, ifd[0], ofd[1], opts->packet_bytes,
724                              DO_WRITE | DO_READ, opts->exit_status,
725                             0, 0, opts);
726 
727     {
728         /* send ofd[0] + ifd[1] + stderrfd to forker port */
729         ErtsSysForkerProto *proto =
730             erts_alloc(ERTS_ALC_T_DRV_CTRL_DATA,
731                        sizeof(ErtsSysForkerProto));
732         memset(proto, 0, sizeof(ErtsSysForkerProto));
733         proto->action = ErtsSysForkerProtoAction_Start;
734         proto->u.start.fds[0] = ofd[0];
735         proto->u.start.fds[1] = ifd[1];
736         proto->u.start.fds[2] = stderrfd;
737         proto->u.start.port_id = opts->exit_status ? erts_drvport2id(port_num) : THE_NON_VALUE;
738         if (erl_drv_port_control(forker_port, ERTS_FORKER_DRV_CONTROL_MAGIC_NUMBER,
739                                  (char*)proto, sizeof(*proto))) {
740             /* The forker port has been killed, we close both fd's which will
741                make open_port throw an epipe error */
742             close(ofd[0]);
743             close(ifd[1]);
744         }
745     }
746 
747     /* we set these fds to negative to mark if
748        they should be closed after the handshake */
749     if (!(opts->read_write & DO_READ))
750         dd->ifd->fd *= -1;
751 
752     if (!(opts->read_write & DO_WRITE))
753         dd->ofd->fd *= -1;
754 
755     return (ErlDrvData)dd;
756 #undef CMD_LINE_PREFIX_STR
757 #undef CMD_LINE_PREFIX_STR_SZ
758 }
759 
spawn_control(ErlDrvData e,unsigned int cmd,char * buf,ErlDrvSizeT len,char ** rbuf,ErlDrvSizeT rlen)760 static ErlDrvSSizeT spawn_control(ErlDrvData e, unsigned int cmd, char *buf,
761                                   ErlDrvSizeT len, char **rbuf, ErlDrvSizeT rlen)
762 {
763     ErtsSysDriverData *dd = (ErtsSysDriverData*)e;
764     ErtsSysForkerProto *proto = (ErtsSysForkerProto *)buf;
765 
766     if (cmd != ERTS_SPAWN_DRV_CONTROL_MAGIC_NUMBER)
767         return -1;
768 
769     ASSERT(len == sizeof(*proto));
770     ASSERT(proto->action == ErtsSysForkerProtoAction_SigChld);
771 
772     dd->status = proto->u.sigchld.error_number;
773     dd->alive = -1;
774 
775     if (dd->ifd)
776         driver_select(dd->port_num, abs(dd->ifd->fd), ERL_DRV_READ | ERL_DRV_USE, 1);
777 
778     if (dd->ofd)
779         driver_select(dd->port_num, abs(dd->ofd->fd), ERL_DRV_WRITE | ERL_DRV_USE, 1);
780 
781     return 0;
782 }
783 
784 #define FD_DEF_HEIGHT 24
785 #define FD_DEF_WIDTH 80
786 /* Control op */
787 #define FD_CTRL_OP_GET_WINSIZE 100
788 
fd_get_window_size(int fd,Uint32 * width,Uint32 * height)789 static int fd_get_window_size(int fd, Uint32 *width, Uint32 *height)
790 {
791 #ifdef TIOCGWINSZ
792     struct winsize ws;
793     if (ioctl(fd,TIOCGWINSZ,&ws) == 0) {
794 	*width = (Uint32) ws.ws_col;
795 	*height = (Uint32) ws.ws_row;
796 	return 1;
797     }
798 #endif
799     return 0;
800 }
801 
fd_control(ErlDrvData drv_data,unsigned int command,char * buf,ErlDrvSizeT len,char ** rbuf,ErlDrvSizeT rlen)802 static ErlDrvSSizeT fd_control(ErlDrvData drv_data,
803 			       unsigned int command,
804 			       char *buf, ErlDrvSizeT len,
805 			       char **rbuf, ErlDrvSizeT rlen)
806 {
807     char resbuff[2*sizeof(Uint32)];
808     ErtsSysDriverData* dd = (ErtsSysDriverData*)drv_data;
809     command -= ERTS_TTYSL_DRV_CONTROL_MAGIC_NUMBER;
810     switch (command) {
811     case FD_CTRL_OP_GET_WINSIZE:
812 	{
813 	    Uint32 w,h;
814             int success = 0;
815             if (dd->ofd != NULL) {
816                 /* Try with output file descriptor */
817                 int out_fd = dd->ofd->fd;
818                 success = fd_get_window_size(out_fd,&w,&h);
819             }
820             if (!success && dd->ifd != NULL) {
821                 /* Try with input file descriptor */
822                 int in_fd = dd->ifd->fd;
823                 success = fd_get_window_size(in_fd,&w,&h);
824             }
825             if (!success) {
826                 return -1;
827             }
828             /* Succeeded */
829 	    memcpy(resbuff,&w,sizeof(Uint32));
830 	    memcpy(resbuff+sizeof(Uint32),&h,sizeof(Uint32));
831 	}
832 	break;
833     default:
834 	return -1;
835     }
836     if (rlen < 2*sizeof(Uint32)) {
837 	*rbuf = driver_alloc(2*sizeof(Uint32));
838     }
839     memcpy(*rbuf,resbuff,2*sizeof(Uint32));
840     return 2*sizeof(Uint32);
841 }
842 
fd_start(ErlDrvPort port_num,char * name,SysDriverOpts * opts)843 static ErlDrvData fd_start(ErlDrvPort port_num, char* name,
844 			   SysDriverOpts* opts)
845 {
846     int non_blocking = 0;
847 
848     if (((opts->read_write & DO_READ) && opts->ifd >= sys_max_files()) ||
849 	((opts->read_write & DO_WRITE) && opts->ofd >= sys_max_files()))
850 	return ERL_DRV_ERROR_GENERAL;
851 
852     /*
853      * Historical:
854      *
855      * "Note about nonblocking I/O.
856      *
857      * At least on Solaris, setting the write end of a TTY to nonblocking,
858      * will set the input end to nonblocking as well (and vice-versa).
859      * If erl is run in a pipeline like this:  cat | erl
860      * the input end of the TTY will be the standard input of cat.
861      * And cat is not prepared to handle nonblocking I/O."
862      *
863      * Actually, the reason for this is not that the tty itself gets set
864      * in non-blocking mode, but that the "input end" (cat's stdin) and
865      * the "output end" (erlang's stdout) are typically the "same" file
866      * descriptor, dup()'ed from a single fd by one of this process'
867      * ancestors.
868      *
869      * The workaround for this problem used to be a rather bad kludge,
870      * interposing an extra process ("internal cat") between erlang's
871      * stdout and the original stdout, allowing erlang to set its stdout
872      * in non-blocking mode without affecting the stdin of the preceding
873      * process in the pipeline - and being a kludge, it caused all kinds
874      * of weird problems.
875      *
876      * So, this is the current logic:
877      *
878      * The only reason to set non-blocking mode on the output fd at all is
879      * if it's something that can cause a write() to block, of course,
880      * i.e. primarily if it points to a tty, socket, pipe, or fifo.
881      *
882      * If we don't set non-blocking mode when we "should" have, and output
883      * becomes blocked, the entire runtime system will be suspended - this
884      * is normally bad of course, and can happen fairly "easily" - e.g. user
885      * hits ^S on tty - but doesn't necessarily happen.
886      *
887      * If we do set non-blocking mode when we "shouldn't" have, the runtime
888      * system will end up seeing EOF on the input fd (due to the preceding
889      * process dying), which typically will cause the entire runtime system
890      * to terminate immediately (due to whatever erlang process is seeing
891      * the EOF taking it as a signal to halt the system). This is *very* bad.
892      *
893      * I.e. we should take a conservative approach, and only set non-
894      * blocking mode when we a) need to, and b) are reasonably certain
895      * that it won't be a problem. And as in the example above, the problem
896      * occurs when input fd and output fd point to different "things".
897      *
898      * However, determining that they are not just the same "type" of
899      * "thing", but actually the same instance of that type of thing, is
900      * unreasonably complex in many/most cases.
901      *
902      * Also, with pipes, sockets, and fifos it's far from obvious that the
903      * user *wants* non-blocking output: If you're running erlang inside
904      * some complex pipeline, you're probably not running a real-time system
905      * that must never stop, but rather *want* it to suspend if the output
906      * channel is "full".
907      *
908      * So, the bottom line: We will only set the output fd non-blocking if
909      * it points to a tty, and either a) the input fd also points to a tty,
910      * or b) we can make sure that setting the output fd non-blocking
911      * doesn't interfere with someone else's input, via a somewhat milder
912      * kludge than the above.
913      *
914      * Also keep in mind that while this code is almost exclusively run as
915      * a result of an erlang open_port({fd,0,1}, ...), that isn't the only
916      * case - it can be called with any old pre-existing file descriptors,
917      * the relations between which (if they're even two) we can only guess
918      * at - still, we try our best...
919      *
920      * Added note OTP 18: Some systems seem to use stdout/stderr to log data
921      * using unix pipes, so we cannot allow the system to block on a write.
922      * Therefore we use an async thread to write the data to fd's that could
923      * not be set to non-blocking. When no async threads are available we
924      * fall back on the old behaviour.
925      *
926      * Also the guarantee about what is delivered to the OS has changed.
927      * Pre 18 the fd driver did no flushing of data before terminating.
928      * Now it does. This is because we want to be able to guarantee that things
929      * such as escripts and friends really have outputted all data before
930      * terminating. This could potentially block the termination of the system
931      * for a very long time, but if the user wants to terminate fast she should
932      * use erlang:halt with flush=false.
933      */
934 
935     /* Try to figure out if we can use non-blocking writes */
936     if (opts->read_write & DO_WRITE) {
937 
938 	/* If we don't have a read end, all bets are off - no non-blocking. */
939 	if (opts->read_write & DO_READ) {
940 
941 	    if (isatty(opts->ofd)) { /* output fd is a tty:-) */
942 
943 		if (isatty(opts->ifd)) { /* input fd is also a tty */
944 
945 		    /* To really do this "right", we should also check that
946 		       input and output fd point to the *same* tty - but
947 		       this seems like overkill; ttyname() isn't for free,
948 		       and this is a very common case - and it's hard to
949 		       imagine a scenario where setting non-blocking mode
950 		       here would cause problems - go ahead and do it. */
951 
952                     non_blocking = 1;
953 		    SET_NONBLOCKING(opts->ofd);
954 
955 		} else {	/* output fd is a tty, input fd isn't */
956 
957 		    /* This is a "problem case", but also common (see the
958 		       example above) - i.e. it makes sense to try a bit
959 		       harder before giving up on non-blocking mode: Try to
960 		       re-open the tty that the output fd points to, and if
961 		       successful replace the original one with the "new" fd
962 		       obtained this way, and set *that* one in non-blocking
963 		       mode. (Yes, this is a kludge.)
964 
965 		       However, re-opening the tty may fail in a couple of
966 		       (unusual) cases:
967 
968 		       1) The name of the tty (or an equivalent one, i.e.
969 			  same major/minor number) can't be found, because
970 			  it actually lives somewhere other than /dev (or
971 			  wherever ttyname() looks for it), and isn't
972 			  equivalent to any of those that do live in the
973 			  "standard" place - this should be *very* unusual.
974 
975 		       2) Permissions on the tty don't allow us to open it -
976 			  it's perfectly possible to have an fd open to an
977 			  object whose permissions wouldn't allow us to open
978 			  it. This is not as unusual as it sounds, one case
979 			  is if the user has su'ed to someone else (not
980 			  root) - we have a read/write fd open to the tty
981 			  (because it has been inherited all the way down
982 			  here), but we have neither read nor write
983 			  permission for the tty.
984 
985 		       In these cases, we finally give up, and don't set the
986 		       output fd in non-blocking mode. */
987 
988 		    char *tty;
989 		    int nfd;
990 
991 		    if ((tty = ttyname(opts->ofd)) != NULL &&
992 			(nfd = open(tty, O_WRONLY)) != -1) {
993 			dup2(nfd, opts->ofd);
994 			close(nfd);
995                         non_blocking = 1;
996 			SET_NONBLOCKING(opts->ofd);
997 		    }
998 		}
999 	    }
1000 	}
1001     }
1002     return (ErlDrvData)create_driver_data(port_num, opts->ifd, opts->ofd,
1003                                           opts->packet_bytes,
1004                                           opts->read_write, 0, -1,
1005                                           !non_blocking, opts);
1006 }
1007 
clear_fd_data(ErtsSysFdData * fdd)1008 static void clear_fd_data(ErtsSysFdData *fdd)
1009 {
1010     if (fdd->sz > 0) {
1011 	erts_free(ERTS_ALC_T_FD_ENTRY_BUF, (void *) fdd->buf);
1012 	ASSERT(erts_atomic_read_nob(&sys_misc_mem_sz) >= fdd->sz);
1013 	erts_atomic_add_nob(&sys_misc_mem_sz, -1*fdd->sz);
1014     }
1015     fdd->buf = NULL;
1016     fdd->sz = 0;
1017     fdd->remain = 0;
1018     fdd->cpos = NULL;
1019     fdd->psz = 0;
1020 }
1021 
nbio_stop_fd(ErlDrvPort prt,ErtsSysFdData * fdd,int use)1022 static void nbio_stop_fd(ErlDrvPort prt, ErtsSysFdData *fdd, int use)
1023 {
1024     clear_fd_data(fdd);
1025     SET_BLOCKING(abs(fdd->fd));
1026 }
1027 
fd_stop(ErlDrvData ev)1028 static void fd_stop(ErlDrvData ev)  /* Does not close the fds */
1029 {
1030     ErtsSysDriverData* dd = (ErtsSysDriverData*)ev;
1031     ErlDrvPort prt = dd->port_num;
1032     int sz = sizeof(ErtsSysDriverData);
1033 
1034     if (dd->blocking) {
1035         erts_free(ERTS_ALC_T_SYS_BLOCKING, dd->blocking);
1036         dd->blocking = NULL;
1037         sz += sizeof(ErtsSysBlocking);
1038     }
1039 
1040     if (dd->ifd) {
1041         sz += sizeof(ErtsSysFdData);
1042         driver_select(prt, abs(dd->ifd->fd), ERL_DRV_USE_NO_CALLBACK|DO_READ|DO_WRITE, 0);
1043         nbio_stop_fd(prt, dd->ifd, 1);
1044     }
1045     if (dd->ofd && dd->ofd != dd->ifd) {
1046         sz += sizeof(ErtsSysFdData);
1047         driver_select(prt, abs(dd->ofd->fd), ERL_DRV_USE_NO_CALLBACK|DO_WRITE, 0);
1048         nbio_stop_fd(prt, dd->ofd, 1);
1049     }
1050 
1051      erts_free(ERTS_ALC_T_DRV_TAB, dd);
1052      erts_atomic_add_nob(&sys_misc_mem_sz, -sz);
1053 }
1054 
fd_flush(ErlDrvData ev)1055 static void fd_flush(ErlDrvData ev)
1056 {
1057     ErtsSysDriverData* dd = (ErtsSysDriverData*)ev;
1058     if (!dd->terminating)
1059         dd->terminating = 1;
1060 }
1061 
vanilla_start(ErlDrvPort port_num,char * name,SysDriverOpts * opts)1062 static ErlDrvData vanilla_start(ErlDrvPort port_num, char* name,
1063 				SysDriverOpts* opts)
1064 {
1065     int flags, fd;
1066     ErlDrvData res;
1067 
1068     flags = (opts->read_write == DO_READ ? O_RDONLY :
1069 	     opts->read_write == DO_WRITE ? O_WRONLY|O_CREAT|O_TRUNC :
1070 	     O_RDWR|O_CREAT);
1071     if ((fd = open(name, flags, 0666)) < 0)
1072 	return ERL_DRV_ERROR_GENERAL;
1073     if (fd >= sys_max_files()) {
1074 	close(fd);
1075 	return ERL_DRV_ERROR_GENERAL;
1076     }
1077     SET_NONBLOCKING(fd);
1078 
1079     res = (ErlDrvData)(long)create_driver_data(port_num, fd, fd,
1080                                                opts->packet_bytes,
1081                                                opts->read_write, 0, -1, 0,
1082                                                opts);
1083     return res;
1084 }
1085 
1086 /* Note that driver_data[fd].ifd == fd if the port was opened for reading, */
1087 /* otherwise (i.e. write only) driver_data[fd].ofd = fd.  */
1088 
stop(ErlDrvData ev)1089 static void stop(ErlDrvData ev)
1090 {
1091     ErtsSysDriverData* dd = (ErtsSysDriverData*)ev;
1092     ErlDrvPort prt = dd->port_num;
1093 
1094     if (dd->ifd) {
1095         nbio_stop_fd(prt, dd->ifd, 0);
1096         driver_select(prt, abs(dd->ifd->fd), ERL_DRV_USE, 0);  /* close(ifd); */
1097     }
1098 
1099     if (dd->ofd && dd->ofd != dd->ifd) {
1100 	nbio_stop_fd(prt, dd->ofd, 0);
1101 	driver_select(prt, abs(dd->ofd->fd), ERL_DRV_USE, 0);  /* close(ofd); */
1102     }
1103 
1104     erts_free(ERTS_ALC_T_DRV_TAB, dd);
1105 }
1106 
1107 /* used by fd_driver */
outputv(ErlDrvData e,ErlIOVec * ev)1108 static void outputv(ErlDrvData e, ErlIOVec* ev)
1109 {
1110     ErtsSysDriverData *dd = (ErtsSysDriverData*)e;
1111     ErlDrvPort ix = dd->port_num;
1112     int pb = dd->packet_bytes;
1113     int ofd = dd->ofd ? dd->ofd->fd : -1;
1114     ssize_t n;
1115     char lb[4];
1116     char* lbp;
1117     ErlDrvSizeT len = ev->size;
1118     ErlDrvSizeT qsz;
1119 
1120     /* (len > ((unsigned long)-1 >> (4-pb)*8)) */
1121     /*    if (pb >= 0 && (len & (((ErlDrvSizeT)1 << (pb*8))) - 1) != len) {*/
1122     if (((pb == 2) && (len > 0xffff)) || (pb == 1 && len > 0xff)) {
1123 	driver_failure_posix(ix, EINVAL);
1124 	return; /* -1; */
1125     }
1126     /* Handles 0 <= pb <= 4 only */
1127     put_int32((Uint32) len, lb);
1128     lbp = lb + (4-pb);
1129 
1130     ev->iov[0].iov_base = lbp;
1131     ev->iov[0].iov_len = pb;
1132     ev->size += pb;
1133 
1134     if (dd->blocking)
1135         driver_pdl_lock(dd->blocking->pdl);
1136 
1137     qsz = driver_sizeq(ix);
1138     if (qsz) {
1139         if (qsz == (ErlDrvSizeT) -1) {
1140             if (dd->blocking)
1141                 driver_pdl_unlock(dd->blocking->pdl);
1142             driver_failure_posix(ix, EINVAL);
1143             return;
1144         }
1145         driver_enqv(ix, ev, 0);
1146         qsz += ev->size;
1147         if (!dd->busy && qsz >= dd->high_watermark)
1148             set_busy_port(ix, (dd->busy = !0));
1149         if (dd->blocking)
1150             driver_pdl_unlock(dd->blocking->pdl);
1151     }
1152     else if (!dd->blocking) {
1153         /* We try to write directly if the fd in non-blocking */
1154 	int vsize = ev->vsize > MAX_VSIZE ? MAX_VSIZE : ev->vsize;
1155 
1156 	n = writev(ofd, (const void *) (ev->iov), vsize);
1157 	if (n == ev->size)
1158 	    return; /* 0;*/
1159 	if (n < 0) {
1160 	    if ((errno != EINTR) && (errno != ERRNO_BLOCK)) {
1161 		driver_failure_posix(ix, errno);
1162 		return; /* -1;*/
1163 	    }
1164 	    n = 0;
1165 	}
1166 	driver_enqv(ix, ev, n);  /* n is the skip value */
1167         qsz = ev->size - n;
1168         if (!dd->busy && qsz >= dd->high_watermark)
1169             set_busy_port(ix, (dd->busy = !0));
1170 	driver_select(ix, ofd, ERL_DRV_WRITE|ERL_DRV_USE, 1);
1171     }
1172     else {
1173         if (ev->size != 0) {
1174             driver_enqv(ix, ev, 0);
1175             qsz = ev->size;
1176             if (!dd->busy && qsz >= dd->high_watermark)
1177                 set_busy_port(ix, (dd->busy = !0));
1178             driver_pdl_unlock(dd->blocking->pdl);
1179             driver_async(ix, &dd->blocking->pkey,
1180                          fd_async, dd, NULL);
1181         } else {
1182             driver_pdl_unlock(dd->blocking->pdl);
1183         }
1184     }
1185 
1186     /* return 0;*/
1187 }
1188 
1189 /* Used by spawn_driver and vanilla driver */
output(ErlDrvData e,char * buf,ErlDrvSizeT len)1190 static void output(ErlDrvData e, char* buf, ErlDrvSizeT len)
1191 {
1192     ErtsSysDriverData *dd = (ErtsSysDriverData*)e;
1193     ErlDrvPort ix = dd->port_num;
1194     int pb = dd->packet_bytes;
1195     int ofd = dd->ofd ? dd->ofd->fd : -1;
1196     ssize_t n;
1197     ErlDrvSizeT qsz;
1198     char lb[4];
1199     char* lbp;
1200     struct iovec iv[2];
1201 
1202     /* (len > ((unsigned long)-1 >> (4-pb)*8)) */
1203     if (((pb == 2) && (len > 0xffff))
1204         || (pb == 1 && len > 0xff)
1205         || dd->pid == 0 /* Attempt at output before port is ready */) {
1206 	driver_failure_posix(ix, EINVAL);
1207 	return; /* -1; */
1208     }
1209     put_int32(len, lb);
1210     lbp = lb + (4-pb);
1211 
1212     qsz = driver_sizeq(ix);
1213     if (qsz) {
1214         if (qsz == (ErlDrvSizeT) -1) {
1215             driver_failure_posix(ix, EINVAL);
1216             return;
1217         }
1218 	driver_enq(ix, lbp, pb);
1219 	driver_enq(ix, buf, len);
1220         qsz += len + pb;
1221     }
1222     else {
1223 	iv[0].iov_base = lbp;
1224 	iv[0].iov_len = pb;  /* should work for pb=0 */
1225 	iv[1].iov_base = buf;
1226 	iv[1].iov_len = len;
1227 	n = writev(ofd, iv, 2);
1228 	if (n == pb+len)
1229 	    return; /* 0; */
1230 	if (n < 0) {
1231 	    if ((errno != EINTR) && (errno != ERRNO_BLOCK)) {
1232 		driver_failure_posix(ix, errno);
1233 		return; /* -1; */
1234 	    }
1235 	    n = 0;
1236 	}
1237         qsz = pb + len - n;
1238 	if (n < pb) {
1239 	    driver_enq(ix, lbp+n, pb-n);
1240 	    driver_enq(ix, buf, len);
1241 	}
1242 	else {
1243 	    n -= pb;
1244 	    driver_enq(ix, buf+n, len-n);
1245 	}
1246 	driver_select(ix, ofd, ERL_DRV_WRITE|ERL_DRV_USE, 1);
1247     }
1248 
1249     if (!dd->busy && qsz >= dd->high_watermark)
1250         set_busy_port(ix, (dd->busy = !0));
1251 
1252     return; /* 0; */
1253 }
1254 
port_inp_failure(ErtsSysDriverData * dd,int res)1255 static int port_inp_failure(ErtsSysDriverData *dd, int res)
1256 				/* Result: 0 (eof) or -1 (error) */
1257 {
1258     int err = errno;
1259 
1260     ASSERT(res <= 0);
1261     if (dd->ifd) {
1262         driver_select(dd->port_num, dd->ifd->fd, ERL_DRV_READ|ERL_DRV_WRITE, 0);
1263         clear_fd_data(dd->ifd);
1264     }
1265 
1266     if (dd->blocking) {
1267         driver_pdl_lock(dd->blocking->pdl);
1268         if (driver_sizeq(dd->port_num) > 0) {
1269             driver_pdl_unlock(dd->blocking->pdl);
1270             /* We have stuff in the output queue, so we just
1271                set the state to terminating and wait for fd_async_ready
1272                to terminate the port */
1273             if (res == 0)
1274                 dd->terminating = 2;
1275             else
1276                 dd->terminating = -err;
1277             return 0;
1278         }
1279         driver_pdl_unlock(dd->blocking->pdl);
1280     }
1281 
1282     if (res == 0) {
1283         if (dd->alive == 1) {
1284             /*
1285              * We have eof and want to report exit status, but the process
1286              * hasn't exited yet. When it does ready_input will
1287              * driver_select() this fd which will make sure that we get
1288              * back here with dd->alive == -1 and dd->status set.
1289              */
1290             return 0;
1291         }
1292         else if (dd->alive == -1) {
1293             int status = dd->status;
1294 
1295             /* We need not be prepared for stopped/continued processes. */
1296             if (WIFSIGNALED(status))
1297                 status = 128 + WTERMSIG(status);
1298             else
1299                 status = WEXITSTATUS(status);
1300             driver_report_exit(dd->port_num, status);
1301         }
1302        driver_failure_eof(dd->port_num);
1303     } else if (dd->ifd) {
1304         if (dd->alive == -1)
1305             errno = dd->status;
1306         erl_drv_init_ack(dd->port_num, ERL_DRV_ERROR_ERRNO);
1307     } else {
1308 	driver_failure_posix(dd->port_num, err);
1309     }
1310     return 0;
1311 }
1312 
1313 /* fd is the drv_data that is returned from the */
1314 /* initial start routine                        */
1315 /* ready_fd is the descriptor that is ready to read */
1316 
ready_input(ErlDrvData e,ErlDrvEvent ready_fd)1317 static void ready_input(ErlDrvData e, ErlDrvEvent ready_fd)
1318 {
1319     ErtsSysDriverData *dd = (ErtsSysDriverData*)e;
1320     ErlDrvPort port_num;
1321     int packet_bytes;
1322     int res;
1323     Uint h;
1324 
1325     port_num = dd->port_num;
1326     packet_bytes = dd->packet_bytes;
1327 
1328     ASSERT(abs(dd->ifd->fd) == ready_fd);
1329 
1330     if (dd->pid == 0) {
1331         /* the pid is sent from erl_child_setup. spawn driver only. */
1332         ErtsSysForkerProto proto;
1333         int res;
1334 
1335         if((res = read(ready_fd, &proto, sizeof(proto))) <= 0) {
1336             if (res < 0 && (errno == ERRNO_BLOCK || errno == EINTR))
1337                 return;
1338             /* hmm, child setup seems to have closed the pipe too early...
1339                we close the port as there is not much else we can do */
1340             driver_select(port_num, ready_fd, ERL_DRV_READ, 0);
1341             if (res == 0)
1342                 errno = EPIPE;
1343             port_inp_failure(dd, -1);
1344             return;
1345         }
1346 
1347         ASSERT(proto.action == ErtsSysForkerProtoAction_Go);
1348         dd->pid = proto.u.go.os_pid;
1349 
1350         if (dd->pid == -1) {
1351             /* Setup failed! The only reason why this should happen is if
1352                the fork fails. */
1353             errno = proto.u.go.error_number;
1354             port_inp_failure(dd, -1);
1355             return;
1356         }
1357 
1358         proto.action = ErtsSysForkerProtoAction_Ack;
1359 
1360         if (driver_sizeq(port_num) > 0) {
1361             driver_enq(port_num, (char*)&proto, sizeof(proto));
1362             } else {
1363                 if (write(abs(dd->ofd->fd), &proto, sizeof(proto)) < 0)
1364                     if (errno == ERRNO_BLOCK || errno == EINTR)
1365                         driver_enq(port_num, (char*)&proto, sizeof(proto));
1366                 /* do nothing on failure here. If the ofd is broken, then
1367                    the ifd will probably also be broken and trigger
1368                    a port_inp_failure */
1369             }
1370 
1371             if (dd->ifd->fd < 0) {
1372                 driver_select(port_num, abs(dd->ifd->fd), ERL_DRV_READ|ERL_DRV_USE, 0);
1373                 erts_atomic_add_nob(&sys_misc_mem_sz, -sizeof(ErtsSysFdData));
1374                 dd->ifd = NULL;
1375             }
1376 
1377             if (dd->ofd->fd < 0  || driver_sizeq(port_num) > 0)
1378                 /* we select in order to close fd or write to queue,
1379                    child setup will close this fd if fd < 0 */
1380                 driver_select(port_num, abs(dd->ofd->fd), ERL_DRV_WRITE|ERL_DRV_USE, 1);
1381 
1382             erl_drv_set_os_pid(port_num, dd->pid);
1383             erl_drv_init_ack(port_num, e);
1384             return;
1385     }
1386 
1387     if (packet_bytes == 0) {
1388 	byte *read_buf = (byte *) erts_alloc(ERTS_ALC_T_SYS_READ_BUF,
1389 					     ERTS_SYS_READ_BUF_SZ);
1390 	res = read(ready_fd, read_buf, ERTS_SYS_READ_BUF_SZ);
1391 	if (res < 0) {
1392 	    if ((errno != EINTR) && (errno != ERRNO_BLOCK))
1393 		port_inp_failure(dd, res);
1394 	}
1395 	else if (res == 0)
1396 	    port_inp_failure(dd, res);
1397 	else
1398 	    driver_output(port_num, (char*) read_buf, res);
1399 	erts_free(ERTS_ALC_T_SYS_READ_BUF, (void *) read_buf);
1400     }
1401     else if (dd->ifd->remain > 0) { /* We try to read the remainder */
1402 	/* space is allocated in buf */
1403 	res = read(ready_fd, dd->ifd->cpos,
1404 		   dd->ifd->remain);
1405 	if (res < 0) {
1406 	    if ((errno != EINTR) && (errno != ERRNO_BLOCK))
1407 		port_inp_failure(dd, res);
1408 	}
1409 	else if (res == 0) {
1410 	    port_inp_failure(dd, res);
1411 	}
1412 	else if (res == dd->ifd->remain) { /* we're done  */
1413 	    driver_output(port_num, dd->ifd->buf,
1414 			  dd->ifd->sz);
1415 	    clear_fd_data(dd->ifd);
1416 	}
1417 	else { /*  if (res < dd->ifd->remain) */
1418 	    dd->ifd->cpos += res;
1419 	    dd->ifd->remain -= res;
1420 	}
1421     }
1422     else if (dd->ifd->remain == 0) { /* clean fd */
1423 	byte *read_buf = (byte *) erts_alloc(ERTS_ALC_T_SYS_READ_BUF,
1424 					     ERTS_SYS_READ_BUF_SZ);
1425 	/* We make one read attempt and see what happens */
1426 	res = read(ready_fd, read_buf, ERTS_SYS_READ_BUF_SZ);
1427 	if (res < 0) {
1428 	    if ((errno != EINTR) && (errno != ERRNO_BLOCK))
1429 		port_inp_failure(dd, res);
1430 	}
1431 	else if (res == 0) {     	/* eof */
1432 	    port_inp_failure(dd, res);
1433 	}
1434 	else if (res < packet_bytes - dd->ifd->psz) {
1435 	    memcpy(dd->ifd->pbuf+dd->ifd->psz,
1436 		   read_buf, res);
1437 	    dd->ifd->psz += res;
1438 	}
1439 	else  { /* if (res >= packet_bytes) */
1440 	    unsigned char* cpos = read_buf;
1441 	    int bytes_left = res;
1442 
1443 	    while (1) {
1444 		int psz = dd->ifd->psz;
1445 		char* pbp = dd->ifd->pbuf + psz;
1446 
1447 		while(bytes_left && (psz < packet_bytes)) {
1448 		    *pbp++ = *cpos++;
1449 		    bytes_left--;
1450 		    psz++;
1451 		}
1452 
1453 		if (psz < packet_bytes) {
1454 		    dd->ifd->psz = psz;
1455 		    break;
1456 		}
1457 		dd->ifd->psz = 0;
1458 
1459 		switch (packet_bytes) {
1460 		case 1: h = get_int8(dd->ifd->pbuf);  break;
1461 		case 2: h = get_int16(dd->ifd->pbuf); break;
1462 		case 4: h = get_uint32(dd->ifd->pbuf); break;
1463 		default: ASSERT(0); return; /* -1; */
1464 		}
1465 
1466 		if (h <= (bytes_left)) {
1467 		    driver_output(port_num, (char*) cpos, h);
1468 		    cpos += h;
1469 		    bytes_left -= h;
1470 		    continue;
1471 		}
1472 		else {		/* The last message we got was split */
1473                     char *buf = erts_alloc_fnf(ERTS_ALC_T_FD_ENTRY_BUF, h);
1474 		    if (!buf) {
1475 			errno = ENOMEM;
1476 			port_inp_failure(dd, -1);
1477 		    }
1478 		    else {
1479 			erts_atomic_add_nob(&sys_misc_mem_sz, h);
1480 			sys_memcpy(buf, cpos, bytes_left);
1481 			dd->ifd->buf = buf;
1482 			dd->ifd->sz = h;
1483 			dd->ifd->remain = h - bytes_left;
1484 			dd->ifd->cpos = buf + bytes_left;
1485 		    }
1486 		    break;
1487 		}
1488 	    }
1489 	}
1490 	erts_free(ERTS_ALC_T_SYS_READ_BUF, (void *) read_buf);
1491     }
1492 }
1493 
1494 
1495 /* fd is the drv_data that is returned from the */
1496 /* initial start routine                        */
1497 /* ready_fd is the descriptor that is ready to read */
1498 
ready_output(ErlDrvData e,ErlDrvEvent ready_fd)1499 static void ready_output(ErlDrvData e, ErlDrvEvent ready_fd)
1500 {
1501     ErtsSysDriverData *dd = (ErtsSysDriverData*)e;
1502     ErlDrvPort ix = dd->port_num;
1503     int n;
1504     struct iovec* iv;
1505     int vsize;
1506 
1507     if ((iv = (struct iovec*) driver_peekq(ix, &vsize)) == NULL) {
1508         if (dd->busy)
1509             set_busy_port(ix, (dd->busy = 0));
1510 	driver_select(ix, ready_fd, ERL_DRV_WRITE, 0);
1511         if (dd->pid > 0 && dd->ofd->fd < 0) {
1512             /* The port was opened with 'in' option, which means we
1513                should close the output fd as soon as the command has
1514                been sent. */
1515             driver_select(ix, ready_fd, ERL_DRV_WRITE|ERL_DRV_USE, 0);
1516             erts_atomic_add_nob(&sys_misc_mem_sz, -sizeof(ErtsSysFdData));
1517             dd->ofd = NULL;
1518         }
1519         if (dd->terminating)
1520             driver_failure_atom(dd->port_num,"normal");
1521 	return; /* 0; */
1522     }
1523     vsize = vsize > MAX_VSIZE ? MAX_VSIZE : vsize;
1524     if ((n = writev(ready_fd, iv, vsize)) > 0) {
1525         ErlDrvSizeT qsz = driver_deq(ix, n);
1526         if (qsz == (ErlDrvSizeT) -1) {
1527             driver_failure_posix(ix, EINVAL);
1528             return;
1529         }
1530         if (dd->busy && qsz < dd->low_watermark)
1531             set_busy_port(ix, (dd->busy = 0));
1532     }
1533     else if (n < 0) {
1534 	if (errno == ERRNO_BLOCK || errno == EINTR)
1535 	    return; /* 0; */
1536 	else {
1537 	    int res = errno;
1538 	    driver_select(ix, ready_fd, ERL_DRV_WRITE, 0);
1539 	    driver_failure_posix(ix, res);
1540 	    return; /* -1; */
1541 	}
1542     }
1543     return; /* 0; */
1544 }
1545 
stop_select(ErlDrvEvent fd,void * _)1546 static void stop_select(ErlDrvEvent fd, void* _)
1547 {
1548     close((int)fd);
1549 }
1550 
1551 
1552 static void
fd_async(void * async_data)1553 fd_async(void *async_data)
1554 {
1555     ErlDrvSSizeT res;
1556     ErtsSysDriverData *dd = (ErtsSysDriverData *)async_data;
1557     SysIOVec      *iov0;
1558     SysIOVec      *iov;
1559     int            iovlen;
1560     int            err = 0;
1561     /* much of this code is stolen from efile_drv:invoke_writev */
1562     driver_pdl_lock(dd->blocking->pdl);
1563     iov0 = driver_peekq(dd->port_num, &iovlen);
1564     iovlen = iovlen < MAXIOV ? iovlen : MAXIOV;
1565     iov = erts_alloc_fnf(ERTS_ALC_T_SYS_WRITE_BUF,
1566                          sizeof(SysIOVec)*iovlen);
1567     if (!iov) {
1568         res = -1;
1569         err = ENOMEM;
1570         driver_pdl_unlock(dd->blocking->pdl);
1571     } else {
1572         memcpy(iov,iov0,iovlen*sizeof(SysIOVec));
1573         driver_pdl_unlock(dd->blocking->pdl);
1574 
1575         do {
1576             res = writev(dd->ofd->fd, iov, iovlen);
1577         } while (res < 0 && errno == EINTR);
1578         if (res < 0)
1579             err = errno;
1580 
1581         erts_free(ERTS_ALC_T_SYS_WRITE_BUF, iov);
1582     }
1583     dd->blocking->res = res;
1584     dd->blocking->err = err;
1585 }
1586 
fd_ready_async(ErlDrvData drv_data,ErlDrvThreadData thread_data)1587 void fd_ready_async(ErlDrvData drv_data,
1588                     ErlDrvThreadData thread_data) {
1589     ErtsSysDriverData *dd = (ErtsSysDriverData *)thread_data;
1590     ErlDrvPort port_num = dd->port_num;
1591 
1592     ASSERT(dd->blocking);
1593 
1594     if (dd->blocking->res > 0) {
1595         ErlDrvSizeT qsz;
1596         driver_pdl_lock(dd->blocking->pdl);
1597         qsz = driver_deq(port_num, dd->blocking->res);
1598         if (qsz == (ErlDrvSizeT) -1) {
1599             driver_pdl_unlock(dd->blocking->pdl);
1600             driver_failure_posix(port_num, EINVAL);
1601             return;
1602         }
1603         if (dd->busy && qsz < dd->low_watermark)
1604             set_busy_port(port_num, (dd->busy = 0));
1605         driver_pdl_unlock(dd->blocking->pdl);
1606         if (qsz == 0) {
1607             if (dd->terminating) {
1608                 /* The port is has been ordered to terminate
1609                    from either fd_flush or port_inp_failure */
1610                 if (dd->terminating == 1)
1611                     driver_failure_atom(port_num, "normal");
1612                 else if (dd->terminating == 2)
1613                     driver_failure_eof(port_num);
1614                 else if (dd->terminating < 0)
1615                     driver_failure_posix(port_num, -dd->terminating);
1616                 return; /* -1; */
1617             }
1618         } else {
1619             /* still data left to write in queue */
1620             driver_async(port_num, &dd->blocking->pkey, fd_async, dd, NULL);
1621             return /* 0; */;
1622         }
1623     } else if (dd->blocking->res < 0) {
1624         if (dd->blocking->err == ERRNO_BLOCK) {
1625             /* still data left to write in queue */
1626             driver_async(port_num, &dd->blocking->pkey, fd_async, dd, NULL);
1627         } else
1628             driver_failure_posix(port_num, dd->blocking->err);
1629         return; /* -1; */
1630     }
1631     return; /* 0; */
1632 }
1633 
1634 
1635 /* Forker driver */
1636 
1637 static int forker_fd;
1638 
forker_start(ErlDrvPort port_num,char * name,SysDriverOpts * opts)1639 static ErlDrvData forker_start(ErlDrvPort port_num, char* name,
1640                                SysDriverOpts* opts)
1641 {
1642 
1643     int i;
1644     int fds[2];
1645     int res, unbind;
1646     char bindir[MAXPATHLEN];
1647     size_t bindirsz = sizeof(bindir);
1648     Uint csp_path_sz;
1649     char *child_setup_prog;
1650 
1651     forker_port = erts_drvport2id(port_num);
1652 
1653     res = erts_sys_explicit_8bit_getenv("BINDIR", bindir, &bindirsz);
1654     if (res == 0) {
1655         erts_exit(1, "Environment variable BINDIR is not set\n");
1656     } else if(res < 0) {
1657         erts_exit(1, "Value of environment variable BINDIR is too large\n");
1658     }
1659 
1660     if (bindir[0] != DIR_SEPARATOR_CHAR)
1661         erts_exit(1,
1662                  "Environment variable BINDIR does not contain an"
1663                  " absolute path\n");
1664     csp_path_sz = (strlen(bindir)
1665                    + 1 /* DIR_SEPARATOR_CHAR */
1666                    + sizeof(CHILD_SETUP_PROG_NAME)
1667                    + 1);
1668     child_setup_prog = erts_alloc(ERTS_ALC_T_CS_PROG_PATH, csp_path_sz);
1669     erts_snprintf(child_setup_prog, csp_path_sz,
1670                   "%s%c%s",
1671                   bindir,
1672                   DIR_SEPARATOR_CHAR,
1673                   CHILD_SETUP_PROG_NAME);
1674     if (socketpair(AF_UNIX, SOCK_STREAM, 0, fds) < 0) {
1675         erts_exit(ERTS_ABORT_EXIT,
1676                  "Could not open unix domain socket in spawn_init: %d\n",
1677                  errno);
1678     }
1679 
1680     forker_fd = fds[0];
1681 
1682     unbind = erts_sched_bind_atfork_prepare();
1683 
1684     i = fork();
1685 
1686     if (i == 0) {
1687         /* The child */
1688         char *cs_argv[FORKER_ARGV_NO_OF_ARGS] =
1689             {CHILD_SETUP_PROG_NAME, NULL, NULL};
1690         char buff[128];
1691 
1692         erts_sched_bind_atfork_child(unbind);
1693 
1694         snprintf(buff, 128, "%d", sys_max_files());
1695         cs_argv[FORKER_ARGV_MAX_FILES] = buff;
1696 
1697         /* We preallocate fd 3 for the uds fd */
1698         if (fds[1] != 3) {
1699             dup2(fds[1], 3);
1700         }
1701 
1702 #if defined(USE_SETPGRP_NOARGS)		/* SysV */
1703     (void) setpgrp();
1704 #elif defined(USE_SETPGRP)		/* BSD */
1705     (void) setpgrp(0, getpid());
1706 #else					/* POSIX */
1707     (void) setsid();
1708 #endif
1709 
1710         execv(child_setup_prog, cs_argv);
1711         _exit(1);
1712     }
1713 
1714     erts_sched_bind_atfork_parent(unbind);
1715 
1716     erts_free(ERTS_ALC_T_CS_PROG_PATH, child_setup_prog);
1717 
1718     close(fds[1]);
1719 
1720     SET_NONBLOCKING(forker_fd);
1721 
1722     return (ErlDrvData)port_num;
1723 }
1724 
forker_stop(ErlDrvData e)1725 static void forker_stop(ErlDrvData e)
1726 {
1727     /* we probably should do something here,
1728        the port has been closed by the user. */
1729 }
1730 
forker_deq(ErlDrvPort port_num,ErtsSysForkerProto * proto)1731 static ErlDrvSizeT forker_deq(ErlDrvPort port_num, ErtsSysForkerProto *proto)
1732 {
1733     close(proto->u.start.fds[0]);
1734     close(proto->u.start.fds[1]);
1735     if (proto->u.start.fds[1] != proto->u.start.fds[2])
1736         close(proto->u.start.fds[2]);
1737 
1738     return driver_deq(port_num, sizeof(*proto));
1739 }
1740 
forker_sigchld(Eterm port_id,int error)1741 static void forker_sigchld(Eterm port_id, int error)
1742 {
1743     ErtsSysForkerProto *proto = erts_alloc(ERTS_ALC_T_DRV_CTRL_DATA, sizeof(*proto));
1744     proto->action = ErtsSysForkerProtoAction_SigChld;
1745     proto->u.sigchld.error_number = error;
1746     proto->u.sigchld.port_id = port_id;
1747 
1748     /* ideally this would be a port_command call, but as command is
1749        already used by the spawn_driver, we use control instead.
1750        Note that when using erl_drv_port_control it is an asynchronous
1751        control. */
1752     erl_drv_port_control(port_id, ERTS_SPAWN_DRV_CONTROL_MAGIC_NUMBER,
1753                          (char*)proto, sizeof(*proto));
1754 }
1755 
forker_ready_input(ErlDrvData e,ErlDrvEvent fd)1756 static void forker_ready_input(ErlDrvData e, ErlDrvEvent fd)
1757 {
1758     int res;
1759     ErtsSysForkerProto proto;
1760 
1761     if ((res = read(fd, &proto, sizeof(proto))) < 0) {
1762         if (errno == ERRNO_BLOCK || errno == EINTR)
1763             return;
1764         erts_exit(ERTS_DUMP_EXIT, "Failed to read from erl_child_setup: %d\n", errno);
1765     }
1766 
1767     if (res == 0)
1768         erts_exit(ERTS_DUMP_EXIT, "erl_child_setup closed\n");
1769 
1770     ASSERT(res == sizeof(proto));
1771 
1772 #ifdef FORKER_PROTO_START_ACK
1773     if (proto.action == ErtsSysForkerProtoAction_StartAck) {
1774         /* Ideally we would like to not have to ack each Start
1775            command being sent over the uds, but it would seem
1776            that some operating systems (only observed on FreeBSD)
1777            throw away data on the uds when the socket becomes full,
1778            so we have to.
1779         */
1780         ErlDrvPort port_num = (ErlDrvPort)e;
1781         int vlen;
1782         SysIOVec *iov = driver_peekq(port_num, &vlen);
1783         ErtsSysForkerProto *qproto = (ErtsSysForkerProto *)iov[0].iov_base;
1784 
1785         if (forker_deq(port_num, qproto))
1786             driver_select(port_num, forker_fd, ERL_DRV_WRITE|ERL_DRV_USE, 1);
1787     } else
1788 #endif
1789     {
1790         ASSERT(proto.action == ErtsSysForkerProtoAction_SigChld);
1791         forker_sigchld(proto.u.sigchld.port_id, proto.u.sigchld.error_number);
1792     }
1793 
1794 }
1795 
forker_ready_output(ErlDrvData e,ErlDrvEvent fd)1796 static void forker_ready_output(ErlDrvData e, ErlDrvEvent fd)
1797 {
1798     ErlDrvPort port_num = (ErlDrvPort)e;
1799 
1800 #ifndef FORKER_PROTO_START_ACK
1801     int loops = 10;
1802     while (driver_sizeq(port_num) > 0 && --loops) {
1803 #endif
1804         int vlen;
1805         SysIOVec *iov = driver_peekq(port_num, &vlen);
1806         ErtsSysForkerProto *proto = (ErtsSysForkerProto *)iov[0].iov_base;
1807         ASSERT(iov[0].iov_len >= (sizeof(*proto)));
1808         if (sys_uds_write(forker_fd, (char*)proto, sizeof(*proto),
1809                           proto->u.start.fds, 3, 0) < 0) {
1810             if (errno == ERRNO_BLOCK || errno == EINTR) {
1811                 return;
1812             } else if (errno == EMFILE) {
1813                 forker_sigchld(proto->u.start.port_id, errno);
1814                 if (forker_deq(port_num, proto) == 0)
1815                     driver_select(port_num, forker_fd, ERL_DRV_WRITE, 0);
1816                 return;
1817             } else {
1818                 erts_exit(ERTS_DUMP_EXIT, "Failed to write to erl_child_setup: %d\n", errno);
1819             }
1820         }
1821 #ifndef FORKER_PROTO_START_ACK
1822         if (forker_deq(port_num, proto) == 0)
1823             driver_select(port_num, forker_fd, ERL_DRV_WRITE, 0);
1824     }
1825 #else
1826     driver_select(port_num, forker_fd, ERL_DRV_WRITE, 0);
1827 #endif
1828 }
1829 
forker_control(ErlDrvData e,unsigned int cmd,char * buf,ErlDrvSizeT len,char ** rbuf,ErlDrvSizeT rlen)1830 static ErlDrvSSizeT forker_control(ErlDrvData e, unsigned int cmd, char *buf,
1831                                    ErlDrvSizeT len, char **rbuf, ErlDrvSizeT rlen)
1832 {
1833     static int first_call = 1;
1834     ErtsSysForkerProto *proto = (ErtsSysForkerProto *)buf;
1835     ErlDrvPort port_num = (ErlDrvPort)e;
1836     int res;
1837 
1838     if (cmd != ERTS_FORKER_DRV_CONTROL_MAGIC_NUMBER)
1839         return -1;
1840 
1841     if (first_call) {
1842         /*
1843          * Do driver_select here when schedulers and their pollsets have started.
1844          */
1845         driver_select(port_num, forker_fd, ERL_DRV_READ|ERL_DRV_USE, 1);
1846         first_call = 0;
1847     }
1848 
1849     driver_enq(port_num, buf, len);
1850     if (driver_sizeq(port_num) > sizeof(*proto)) {
1851         return 0;
1852     }
1853 
1854     if ((res = sys_uds_write(forker_fd, (char*)proto, sizeof(*proto),
1855                              proto->u.start.fds, 3, 0)) < 0) {
1856         if (errno == ERRNO_BLOCK || errno == EINTR) {
1857             driver_select(port_num, forker_fd, ERL_DRV_WRITE|ERL_DRV_USE, 1);
1858             return 0;
1859         } else if (errno == EMFILE) {
1860             forker_sigchld(proto->u.start.port_id, errno);
1861             forker_deq(port_num, proto);
1862             return 0;
1863         } else {
1864             erts_exit(ERTS_DUMP_EXIT, "Failed to write to erl_child_setup: %d\n", errno);
1865         }
1866     }
1867 
1868 #ifndef FORKER_PROTO_START_ACK
1869     ASSERT(res == sizeof(*proto));
1870     forker_deq(port_num, proto);
1871 #endif
1872 
1873     return 0;
1874 }
1875