1 /*****************************************************************************\
2 * srun.c - user interface to allocate resources, submit jobs, and execute
3 * parallel jobs.
4 *****************************************************************************
5 * Copyright (C) 2002-2007 The Regents of the University of California.
6 * Copyright (C) 2008-2010 Lawrence Livermore National Security.
7 * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
8 * Written by Mark Grondona <grondona@llnl.gov>, et. al.
9 * CODE-OCEC-09-009. All rights reserved.
10 *
11 * This file is part of Slurm, a resource management program.
12 * For details, see <https://slurm.schedmd.com/>.
13 * Please also read the included file: DISCLAIMER.
14 *
15 * Slurm is free software; you can redistribute it and/or modify it under
16 * the terms of the GNU General Public License as published by the Free
17 * Software Foundation; either version 2 of the License, or (at your option)
18 * any later version.
19 *
20 * In addition, as a special exception, the copyright holders give permission
21 * to link the code of portions of this program with the OpenSSL library under
22 * certain conditions as described in each individual source file, and
23 * distribute linked combinations including the two. You must obey the GNU
24 * General Public License in all respects for all of the code used other than
25 * OpenSSL. If you modify file(s) with this exception, you may extend this
26 * exception to your version of the file(s), but you are not obligated to do
27 * so. If you do not wish to do so, delete this exception statement from your
28 * version. If you delete this exception statement from all source files in
29 * the program, then also delete it here.
30 *
31 * Slurm is distributed in the hope that it will be useful, but WITHOUT ANY
32 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
33 * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
34 * details.
35 *
36 * You should have received a copy of the GNU General Public License along
37 * with Slurm; if not, write to the Free Software Foundation, Inc.,
38 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
39 \*****************************************************************************/
40
41 #include "config.h"
42
43 #include <ctype.h>
44 #include <fcntl.h>
45 #include <grp.h>
46 #include <pthread.h>
47 #include <stdio.h>
48 #include <stdlib.h>
49 #include <string.h>
50 #include <signal.h>
51 #include <sys/param.h>
52 #include <sys/resource.h>
53 #include <sys/stat.h>
54 #include <sys/time.h>
55 #include <sys/types.h>
56 #include <sys/utsname.h>
57 #include <sys/wait.h>
58 #include <termios.h>
59 #include <unistd.h>
60
61 #include "src/common/fd.h"
62
63 #include "src/common/hostlist.h"
64 #include "src/common/log.h"
65 #include "src/common/net.h"
66 #include "src/common/plugstack.h"
67 #include "src/common/read_config.h"
68 #include "src/common/slurm_auth.h"
69 #include "src/common/slurm_jobacct_gather.h"
70 #include "src/common/slurm_opt.h"
71 #include "src/common/slurm_protocol_api.h"
72 #include "src/common/slurm_rlimits_info.h"
73 #include "src/common/switch.h"
74 #include "src/common/uid.h"
75 #include "src/common/xmalloc.h"
76 #include "src/common/xsignal.h"
77 #include "src/common/xstring.h"
78
79 #include "src/bcast/file_bcast.h"
80
81 #include "launch.h"
82 #include "allocate.h"
83 #include "srun_job.h"
84 #include "opt.h"
85 #include "debugger.h"
86 #include "src/srun/srun_pty.h"
87 #include "multi_prog.h"
88 #include "src/api/pmi_server.h"
89 #include "src/api/step_ctx.h"
90 #include "src/api/step_launch.h"
91
92 #ifndef OPEN_MPI_PORT_ERROR
93 /* This exit code indicates the launched Open MPI tasks could
94 * not open the reserved port. It was already open by some
95 * other process. */
96 #define OPEN_MPI_PORT_ERROR 108
97 #endif
98
99 static struct termios termdefaults;
100 static uint32_t global_rc = 0;
101 static uint32_t mpi_plugin_rc = 0;
102 static srun_job_t *job = NULL;
103
104 extern char **environ; /* job environment */
105 bool srun_max_timer = false;
106 bool srun_shutdown = false;
107 int sig_array[] = {
108 SIGINT, SIGQUIT, SIGCONT, SIGTERM, SIGHUP,
109 SIGALRM, SIGUSR1, SIGUSR2, SIGPIPE, 0 };
110 bitstr_t *g_het_grp_bits = NULL;
111
112 typedef struct _launch_app_data
113 {
114 bool got_alloc;
115 srun_job_t * job;
116 slurm_opt_t *opt_local;
117 int * step_cnt;
118 pthread_cond_t *step_cond;
119 pthread_mutex_t *step_mutex;
120 } _launch_app_data_t;
121
122 /*
123 * forward declaration of static funcs
124 */
125 static int _file_bcast(slurm_opt_t *opt_local, srun_job_t *job);
126 static void _launch_app(srun_job_t *job, List srun_job_list, bool got_alloc);
127 static void *_launch_one_app(void *data);
128 static void _pty_restore(void);
129 static void _set_exit_code(void);
130 static void _set_node_alias(void);
131 static void _setup_env_working_cluster(void);
132 static void _setup_job_env(srun_job_t *job, List srun_job_list,
133 bool got_alloc);
134 static void _setup_one_job_env(slurm_opt_t *opt_local, srun_job_t *job,
135 bool got_alloc);
136 static int _slurm_debug_env_val (void);
137 static char *_uint16_array_to_str(int count, const uint16_t *array);
138
139 /*
140 * from libvirt-0.6.2 GPL2
141 *
142 * console.c: A dumb serial console client
143 *
144 * Copyright (C) 2007, 2008 Red Hat, Inc.
145 *
146 */
147 #ifndef HAVE_CFMAKERAW
cfmakeraw(struct termios * attr)148 void cfmakeraw(struct termios *attr)
149 {
150 attr->c_iflag &= ~(IGNBRK | BRKINT | PARMRK | ISTRIP
151 | INLCR | IGNCR | ICRNL | IXON);
152 attr->c_oflag &= ~OPOST;
153 attr->c_lflag &= ~(ECHO | ECHONL | ICANON | ISIG | IEXTEN);
154 attr->c_cflag &= ~(CSIZE | PARENB);
155 attr->c_cflag |= CS8;
156 }
157 #endif
158
_enable_het_job_steps(void)159 static bool _enable_het_job_steps(void)
160 {
161 bool enabled = true;
162 char *sched_params = slurm_get_sched_params();
163
164 /* Continue supporting old terminology */
165 if (xstrcasestr(sched_params, "disable_hetero_steps") ||
166 xstrcasestr(sched_params, "disable_hetjob_steps"))
167 enabled = false;
168 else if (xstrcasestr(sched_params, "enable_hetero_steps") ||
169 xstrcasestr(sched_params, "enable_hetjob_steps"))
170 enabled = true;
171
172 xfree(sched_params);
173 return enabled;
174 }
175
srun(int ac,char ** av)176 int srun(int ac, char **av)
177 {
178 int debug_level;
179 log_options_t logopt = LOG_OPTS_STDERR_ONLY;
180 bool got_alloc = false;
181 List srun_job_list = NULL;
182
183 slurm_conf_init(NULL);
184 debug_level = _slurm_debug_env_val();
185 logopt.stderr_level += debug_level;
186 log_init(xbasename(av[0]), logopt, 0, NULL);
187 _set_exit_code();
188
189 if (slurm_select_init(0) != SLURM_SUCCESS)
190 fatal( "failed to initialize node selection plugin" );
191
192 if (switch_init(0) != SLURM_SUCCESS )
193 fatal("failed to initialize switch plugins");
194
195 _setup_env_working_cluster();
196
197 init_srun(ac, av, &logopt, debug_level, 1);
198 if (opt_list) {
199 if (!_enable_het_job_steps())
200 fatal("Job steps that span multiple components of a heterogeneous job are not currently supported");
201 create_srun_job((void **) &srun_job_list, &got_alloc, 0, 1);
202 } else
203 create_srun_job((void **) &job, &got_alloc, 0, 1);
204
205 _setup_job_env(job, srun_job_list, got_alloc);
206 _set_node_alias();
207 _launch_app(job, srun_job_list, got_alloc);
208
209 if ((global_rc & 0xff) == SIG_OOM)
210 global_rc = 1; /* Exit code 1 */
211 else if (mpi_plugin_rc) {
212 /*
213 * MPI plugin might have more precise information in some cases.
214 * For example, if PMI[?] abort was by task X with return code
215 * RC, the expectation is that srun will return RC as srun's
216 * return code. However, to ensure proper cleanup, the plugin
217 * kills the job with SIGKILL which obscures the original reason
218 * for job exit.
219 */
220 global_rc = mpi_plugin_rc;
221 }
222
223
224 #ifdef MEMORY_LEAK_DEBUG
225 slurm_select_fini();
226 switch_fini();
227 slurm_reset_all_options(&opt, false);
228 slurm_auth_fini();
229 slurm_conf_destroy();
230 log_fini();
231 #endif /* MEMORY_LEAK_DEBUG */
232
233 return (int)global_rc;
234 }
235
_launch_one_app(void * data)236 static void *_launch_one_app(void *data)
237 {
238 static pthread_mutex_t launch_mutex = PTHREAD_MUTEX_INITIALIZER;
239 static pthread_cond_t launch_cond = PTHREAD_COND_INITIALIZER;
240 static bool launch_begin = false;
241 static bool launch_fini = false;
242 _launch_app_data_t *opts = (_launch_app_data_t *) data;
243 slurm_opt_t *opt_local = opts->opt_local;
244 srun_job_t *job = opts->job;
245 bool got_alloc = opts->got_alloc;
246 slurm_step_io_fds_t cio_fds = SLURM_STEP_IO_FDS_INITIALIZER;
247 slurm_step_launch_callbacks_t step_callbacks;
248
249 memset(&step_callbacks, 0, sizeof(step_callbacks));
250 step_callbacks.step_signal = launch_g_fwd_signal;
251
252 /*
253 * Run pre-launch once for entire hetjob
254 */
255 slurm_mutex_lock(&launch_mutex);
256 if (!launch_begin) {
257 launch_begin = true;
258 slurm_mutex_unlock(&launch_mutex);
259
260 pre_launch_srun_job(job, 0, 1, opt_local);
261
262 slurm_mutex_lock(&launch_mutex);
263 launch_fini = true;
264 slurm_cond_broadcast(&launch_cond);
265 } else {
266 while (!launch_fini)
267 slurm_cond_wait(&launch_cond, &launch_mutex);
268 }
269 slurm_mutex_unlock(&launch_mutex);
270
271 relaunch:
272 launch_common_set_stdio_fds(job, &cio_fds, opt_local);
273
274 if (!launch_g_step_launch(job, &cio_fds, &global_rc, &step_callbacks,
275 opt_local)) {
276 if (launch_g_step_wait(job, got_alloc, opt_local) == -1)
277 goto relaunch;
278 if (job->step_ctx->launch_state->mpi_rc > mpi_plugin_rc)
279 mpi_plugin_rc = job->step_ctx->launch_state->mpi_rc;
280 }
281
282 if (opts->step_mutex) {
283 slurm_mutex_lock(opts->step_mutex);
284 (*opts->step_cnt)--;
285 slurm_cond_broadcast(opts->step_cond);
286 slurm_mutex_unlock(opts->step_mutex);
287 }
288 xfree(data);
289 return NULL;
290 }
291
292 /*
293 * The het_job_node_list may not be ordered across multiple components, which
294 * can cause problems for some MPI implementations. Put the het_job_node_list
295 * records in alphabetic order and reorder het_job_task_cnts het_job_tids to
296 * match
297 */
_reorder_het_job_recs(char ** in_node_list,uint16_t ** in_task_cnts,uint32_t *** in_tids,int total_nnodes)298 static void _reorder_het_job_recs(char **in_node_list, uint16_t **in_task_cnts,
299 uint32_t ***in_tids, int total_nnodes)
300 {
301 hostlist_t in_hl, out_hl;
302 uint16_t *out_task_cnts = NULL;
303 uint32_t **out_tids = NULL;
304 char *hostname;
305 int i, j;
306
307 in_hl = hostlist_create(*in_node_list);
308 if (!in_hl) {
309 error("%s: Invalid hostlist(%s)", __func__, *in_node_list);
310 return;
311 }
312 out_hl = hostlist_copy(in_hl);
313 hostlist_sort(out_hl);
314 hostlist_uniq(out_hl);
315 i = hostlist_count(out_hl);
316 if (i != total_nnodes) {
317 error("%s: Invalid hostlist(%s) count(%d)", __func__,
318 *in_node_list, total_nnodes);
319 goto fini;
320 }
321
322 out_task_cnts = xmalloc(sizeof(uint16_t) * total_nnodes);
323 out_tids = xmalloc(sizeof(uint32_t *) * total_nnodes);
324 for (i = 0; i < total_nnodes; i++) {
325 hostname = hostlist_nth(out_hl, i);
326 if (!hostname) {
327 error("%s: Invalid hostlist(%s) count(%d)", __func__,
328 *in_node_list, total_nnodes);
329 break;
330 }
331 j = hostlist_find(in_hl, hostname);
332 if (j == -1) {
333 error("%s: Invalid hostlist(%s) parsing", __func__,
334 *in_node_list);
335 free(hostname);
336 break;
337 }
338 out_task_cnts[i] = in_task_cnts[0][j];
339 out_tids[i] = in_tids[0][j];
340 free(hostname);
341 }
342
343 if (i >= total_nnodes) { /* Success */
344 xfree(*in_node_list);
345 *in_node_list = hostlist_ranged_string_xmalloc(out_hl);
346 xfree(*in_task_cnts);
347 *in_task_cnts = out_task_cnts;
348 out_task_cnts = NULL;
349 xfree(*in_tids);
350 *in_tids = out_tids;
351 out_tids = NULL;
352 }
353
354 #if 0
355 info("NODE_LIST[%d]:%s", total_nnodes, *in_node_list);
356 for (i = 0; i < total_nnodes; i++) {
357 info("TASK_CNT[%d]:%u", i, in_task_cnts[0][i]);
358 for (j = 0; j < in_task_cnts[0][i]; j++) {
359 info("TIDS[%d][%d]: %u", i, j, in_tids[0][i][j]);
360 }
361 }
362 #endif
363
364 fini: hostlist_destroy(in_hl);
365 hostlist_destroy(out_hl);
366 xfree(out_task_cnts);
367 xfree(out_tids);
368 }
369
_launch_app(srun_job_t * job,List srun_job_list,bool got_alloc)370 static void _launch_app(srun_job_t *job, List srun_job_list, bool got_alloc)
371 {
372 ListIterator opt_iter, job_iter;
373 slurm_opt_t *opt_local = NULL;
374 _launch_app_data_t *opts;
375 int total_ntasks = 0, total_nnodes = 0, step_cnt = 0, node_offset = 0;
376 pthread_mutex_t step_mutex = PTHREAD_MUTEX_INITIALIZER;
377 pthread_cond_t step_cond = PTHREAD_COND_INITIALIZER;
378 srun_job_t *first_job = NULL;
379 char *launch_type, *het_job_node_list = NULL;
380 bool need_mpir = false;
381 uint16_t *tmp_task_cnt = NULL, *het_job_task_cnts = NULL;
382 uint32_t **tmp_tids = NULL, **het_job_tids = NULL;
383 uint32_t *het_job_tid_offsets = NULL;
384
385 launch_type = slurm_get_launch_type();
386 if (launch_type && strstr(launch_type, "slurm"))
387 need_mpir = true;
388 xfree(launch_type);
389
390 if (srun_job_list) {
391 int het_job_step_cnt = list_count(srun_job_list);
392 first_job = (srun_job_t *) list_peek(srun_job_list);
393 if (!opt_list) {
394 if (first_job)
395 fini_srun(first_job, got_alloc, &global_rc, 0);
396 fatal("%s: have srun_job_list, but no opt_list",
397 __func__);
398 }
399
400 job_iter = list_iterator_create(srun_job_list);
401 while ((job = list_next(job_iter))) {
402 char *node_list = NULL;
403 int i, node_inx;
404 total_ntasks += job->ntasks;
405 total_nnodes += job->nhosts;
406
407 xrealloc(het_job_task_cnts,
408 sizeof(uint16_t)*total_nnodes);
409 (void) slurm_step_ctx_get(job->step_ctx,
410 SLURM_STEP_CTX_TASKS,
411 &tmp_task_cnt);
412 xrealloc(het_job_tid_offsets,
413 sizeof(uint32_t) * total_ntasks);
414
415 for (i = total_ntasks - job->ntasks;
416 i < total_ntasks;
417 i++)
418 het_job_tid_offsets[i] = job->het_job_offset;
419
420 if (!tmp_task_cnt) {
421 fatal("%s: job %u has NULL task array",
422 __func__, job->jobid);
423 break; /* To eliminate CLANG error */
424 }
425 memcpy(het_job_task_cnts + node_offset, tmp_task_cnt,
426 sizeof(uint16_t) * job->nhosts);
427
428 xrealloc(het_job_tids,
429 sizeof(uint32_t *) * total_nnodes);
430 (void) slurm_step_ctx_get(job->step_ctx,
431 SLURM_STEP_CTX_TIDS,
432 &tmp_tids);
433 if (!tmp_tids) {
434 fatal("%s: job %u has NULL task ID array",
435 __func__, job->jobid);
436 break; /* To eliminate CLANG error */
437 }
438 for (node_inx = 0; node_inx < job->nhosts; node_inx++) {
439 uint32_t *node_tids;
440 node_tids = xmalloc(sizeof(uint32_t) *
441 tmp_task_cnt[node_inx]);
442 for (i = 0; i < tmp_task_cnt[node_inx]; i++) {
443 node_tids[i] = tmp_tids[node_inx][i] +
444 job->het_job_task_offset;
445 }
446 het_job_tids[node_offset + node_inx] =
447 node_tids;
448 }
449
450 (void) slurm_step_ctx_get(job->step_ctx,
451 SLURM_STEP_CTX_NODE_LIST,
452 &node_list);
453 if (!node_list) {
454 fatal("%s: job %u has NULL hostname",
455 __func__, job->jobid);
456 }
457 if (het_job_node_list)
458 xstrfmtcat(het_job_node_list, ",%s", node_list);
459 else
460 het_job_node_list = xstrdup(node_list);
461 xfree(node_list);
462 node_offset += job->nhosts;
463 }
464 list_iterator_reset(job_iter);
465 _reorder_het_job_recs(&het_job_node_list, &het_job_task_cnts,
466 &het_job_tids, total_nnodes);
467
468 if (need_mpir)
469 mpir_init(total_ntasks);
470
471 opt_iter = list_iterator_create(opt_list);
472
473 /* copy aggregated hetjob data back into each sub-job */
474 while ((opt_local = list_next(opt_iter))) {
475 srun_opt_t *srun_opt = opt_local->srun_opt;
476 xassert(srun_opt);
477 job = list_next(job_iter);
478 if (!job) {
479 slurm_mutex_lock(&step_mutex);
480 while (step_cnt > 0)
481 slurm_cond_wait(&step_cond,&step_mutex);
482 slurm_mutex_unlock(&step_mutex);
483 if (first_job) {
484 fini_srun(first_job, got_alloc,
485 &global_rc, 0);
486 }
487 fatal("%s: job allocation count does not match request count (%d != %d)",
488 __func__, list_count(srun_job_list),
489 list_count(opt_list));
490 break; /* To eliminate CLANG error */
491 }
492
493 slurm_mutex_lock(&step_mutex);
494 step_cnt++;
495 slurm_mutex_unlock(&step_mutex);
496 job->het_job_node_list = xstrdup(het_job_node_list);
497 if ((het_job_step_cnt > 1) && het_job_task_cnts &&
498 het_job_tid_offsets) {
499 xassert(node_offset == job->het_job_nnodes);
500 job->het_job_task_cnts =
501 xcalloc(job->het_job_nnodes,
502 sizeof(uint16_t));
503 memcpy(job->het_job_task_cnts,
504 het_job_task_cnts,
505 sizeof(uint16_t) * job->het_job_nnodes);
506 job->het_job_tids = xcalloc(job->het_job_nnodes,
507 sizeof(uint32_t *));
508 memcpy(job->het_job_tids, het_job_tids,
509 sizeof(uint32_t *) *
510 job->het_job_nnodes);
511
512 job->het_job_tid_offsets = xcalloc(
513 total_ntasks, sizeof(uint32_t));
514 memcpy(job->het_job_tid_offsets,
515 het_job_tid_offsets,
516 sizeof(uint32_t) * total_ntasks);
517 }
518
519 opts = xmalloc(sizeof(_launch_app_data_t));
520 opts->got_alloc = got_alloc;
521 opts->job = job;
522 opts->opt_local = opt_local;
523 opts->step_cond = &step_cond;
524 opts->step_cnt = &step_cnt;
525 opts->step_mutex = &step_mutex;
526 srun_opt->het_step_cnt = het_job_step_cnt;
527
528 slurm_thread_create_detached(NULL, _launch_one_app,
529 opts);
530 }
531 xfree(het_job_node_list);
532 xfree(het_job_task_cnts);
533 xfree(het_job_tid_offsets);
534 list_iterator_destroy(job_iter);
535 list_iterator_destroy(opt_iter);
536 slurm_mutex_lock(&step_mutex);
537 while (step_cnt > 0)
538 slurm_cond_wait(&step_cond, &step_mutex);
539 slurm_mutex_unlock(&step_mutex);
540
541 if (first_job)
542 fini_srun(first_job, got_alloc, &global_rc, 0);
543 } else {
544 int i;
545 if (need_mpir)
546 mpir_init(job->ntasks);
547 if (job->het_job_id && (job->het_job_id != NO_VAL)) {
548 (void) slurm_step_ctx_get(job->step_ctx,
549 SLURM_STEP_CTX_TASKS,
550 &tmp_task_cnt);
551 job->het_job_task_cnts = xcalloc(job->het_job_nnodes,
552 sizeof(uint16_t));
553 memcpy(job->het_job_task_cnts, tmp_task_cnt,
554 sizeof(uint16_t) * job->het_job_nnodes);
555 (void) slurm_step_ctx_get(job->step_ctx,
556 SLURM_STEP_CTX_TIDS,
557 &tmp_tids);
558 job->het_job_tids = xcalloc(job->het_job_nnodes,
559 sizeof(uint32_t *));
560 memcpy(job->het_job_tids, tmp_tids,
561 sizeof(uint32_t *) * job->het_job_nnodes);
562
563 (void) slurm_step_ctx_get(job->step_ctx,
564 SLURM_STEP_CTX_NODE_LIST,
565 &job->het_job_node_list);
566 if (!job->het_job_node_list)
567 fatal("%s: job %u has NULL hostname",
568 __func__, job->jobid);
569
570 job->het_job_tid_offsets = xcalloc(job->ntasks,
571 sizeof(uint32_t));
572 if (job->het_job_offset) {
573 /*
574 * Only starting one hetjob component,
575 * het_job_offset should be zero
576 */
577 for (i = 0; i < job->ntasks; i++) {
578 job->het_job_tid_offsets[i] =
579 job->het_job_offset;
580 }
581 }
582 }
583 opts = xmalloc(sizeof(_launch_app_data_t));
584 opts->got_alloc = got_alloc;
585 opts->job = job;
586 opts->opt_local = &opt;
587 sropt.het_step_cnt = 1;
588 _launch_one_app(opts);
589 fini_srun(job, got_alloc, &global_rc, 0);
590 }
591 }
592
_setup_one_job_env(slurm_opt_t * opt_local,srun_job_t * job,bool got_alloc)593 static void _setup_one_job_env(slurm_opt_t *opt_local, srun_job_t *job,
594 bool got_alloc)
595 {
596 env_t *env = xmalloc(sizeof(env_t));
597 uint16_t *tasks = NULL;
598 srun_opt_t *srun_opt = opt_local->srun_opt;
599 xassert(srun_opt);
600
601 xassert(job);
602
603 env->localid = -1;
604 env->nodeid = -1;
605 env->procid = -1;
606 env->stepid = -1;
607
608 if (srun_opt->bcast_flag)
609 _file_bcast(opt_local, job);
610 if (opt_local->cpus_set)
611 env->cpus_per_task = opt_local->cpus_per_task;
612 if (opt_local->ntasks_per_node != NO_VAL)
613 env->ntasks_per_node = opt_local->ntasks_per_node;
614 if (opt_local->ntasks_per_socket != NO_VAL)
615 env->ntasks_per_socket = opt_local->ntasks_per_socket;
616 if (opt_local->ntasks_per_core != NO_VAL)
617 env->ntasks_per_core = opt_local->ntasks_per_core;
618 env->distribution = opt_local->distribution;
619 if (opt_local->plane_size != NO_VAL)
620 env->plane_size = opt_local->plane_size;
621 env->cpu_bind_type = srun_opt->cpu_bind_type;
622 env->cpu_bind = srun_opt->cpu_bind;
623
624 env->cpu_freq_min = opt_local->cpu_freq_min;
625 env->cpu_freq_max = opt_local->cpu_freq_max;
626 env->cpu_freq_gov = opt_local->cpu_freq_gov;
627 env->mem_bind_type = opt_local->mem_bind_type;
628 env->mem_bind = opt_local->mem_bind;
629 env->overcommit = opt_local->overcommit;
630 env->slurmd_debug = srun_opt->slurmd_debug;
631 env->labelio = srun_opt->labelio;
632 env->comm_port = slurmctld_comm_port;
633 if (opt_local->job_name)
634 env->job_name = opt_local->job_name;
635
636 slurm_step_ctx_get(job->step_ctx, SLURM_STEP_CTX_TASKS, &tasks);
637
638 env->select_jobinfo = job->select_jobinfo;
639 if (job->het_job_node_list)
640 env->nodelist = job->het_job_node_list;
641 else
642 env->nodelist = job->nodelist;
643 env->partition = job->partition;
644 if (job->het_job_nnodes != NO_VAL)
645 env->nhosts = job->het_job_nnodes;
646 else if (got_alloc) /* Don't overwrite unless we got allocation */
647 env->nhosts = job->nhosts;
648 if (job->het_job_ntasks != NO_VAL)
649 env->ntasks = job->het_job_ntasks;
650 else
651 env->ntasks = job->ntasks;
652 env->task_count = _uint16_array_to_str(job->nhosts, tasks);
653 if (job->het_job_id != NO_VAL)
654 env->jobid = job->het_job_id;
655 else
656 env->jobid = job->jobid;
657 env->stepid = job->stepid;
658 env->account = job->account;
659 env->qos = job->qos;
660 env->resv_name = job->resv_name;
661 env->uid = getuid();
662 env->user_name = uid_to_string(env->uid);
663
664 if (srun_opt->pty && (set_winsize(job) < 0)) {
665 error("Not using a pseudo-terminal, disregarding --pty option");
666 srun_opt->pty = false;
667 }
668 if (srun_opt->pty) {
669 struct termios term;
670 int fd = STDIN_FILENO;
671
672 /* Save terminal settings for restore */
673 tcgetattr(fd, &termdefaults);
674 tcgetattr(fd, &term);
675 /* Set raw mode on local tty */
676 cfmakeraw(&term);
677 /* Re-enable output processing such that debug() and
678 * and error() work properly. */
679 term.c_oflag |= OPOST;
680 tcsetattr(fd, TCSANOW, &term);
681 atexit(&_pty_restore);
682
683 block_sigwinch();
684 pty_thread_create(job);
685 env->pty_port = job->pty_port;
686 env->ws_col = job->ws_col;
687 env->ws_row = job->ws_row;
688 }
689
690 setup_env(env, srun_opt->preserve_env);
691 env_array_merge(&job->env, (const char **)environ);
692 xfree(env->task_count);
693 xfree(env->user_name);
694 xfree(env);
695 }
696
_setup_job_env(srun_job_t * job,List srun_job_list,bool got_alloc)697 static void _setup_job_env(srun_job_t *job, List srun_job_list, bool got_alloc)
698 {
699 ListIterator opt_iter, job_iter;
700 slurm_opt_t *opt_local;
701
702 if (srun_job_list) {
703 srun_job_t *first_job = list_peek(srun_job_list);
704 if (!opt_list) {
705 if (first_job)
706 fini_srun(first_job, got_alloc, &global_rc, 0);
707 fatal("%s: have srun_job_list, but no opt_list",
708 __func__);
709 }
710 job_iter = list_iterator_create(srun_job_list);
711 opt_iter = list_iterator_create(opt_list);
712 while ((opt_local = list_next(opt_iter))) {
713 job = list_next(job_iter);
714 if (!job) {
715 if (first_job) {
716 fini_srun(first_job, got_alloc,
717 &global_rc, 0);
718 }
719 fatal("%s: job allocation count does not match request count (%d != %d)",
720 __func__, list_count(srun_job_list),
721 list_count(opt_list));
722 }
723 _setup_one_job_env(opt_local, job, got_alloc);
724 }
725 list_iterator_destroy(job_iter);
726 list_iterator_destroy(opt_iter);
727 } else if (job) {
728 _setup_one_job_env(&opt, job, got_alloc);
729 } else {
730 fatal("%s: No job information", __func__);
731 }
732 }
733
_file_bcast(slurm_opt_t * opt_local,srun_job_t * job)734 static int _file_bcast(slurm_opt_t *opt_local, srun_job_t *job)
735 {
736 srun_opt_t *srun_opt = opt_local->srun_opt;
737 struct bcast_parameters *params;
738 int rc;
739 xassert(srun_opt);
740
741 if ((srun_opt->argc == 0) || (srun_opt->argv[0] == NULL)) {
742 error("No command name to broadcast");
743 return SLURM_ERROR;
744 }
745 params = xmalloc(sizeof(struct bcast_parameters));
746 params->block_size = 8 * 1024 * 1024;
747 params->compress = srun_opt->compress;
748 if (srun_opt->bcast_file) {
749 params->dst_fname = xstrdup(srun_opt->bcast_file);
750 } else {
751 xstrfmtcat(params->dst_fname, "%s/slurm_bcast_%u.%u",
752 opt_local->chdir, job->jobid, job->stepid);
753 }
754 params->fanout = 0;
755 params->job_id = job->jobid;
756 params->force = true;
757 if (srun_opt->het_grp_bits)
758 params->het_job_offset = bit_ffs(srun_opt->het_grp_bits);
759 else
760 params->het_job_offset = NO_VAL;
761 params->preserve = true;
762 params->src_fname = srun_opt->argv[0];
763 params->step_id = job->stepid;
764 params->timeout = 0;
765 params->verbose = 0;
766
767 rc = bcast_file(params);
768 if (rc == SLURM_SUCCESS) {
769 xfree(srun_opt->argv[0]);
770 srun_opt->argv[0] = params->dst_fname;
771 } else {
772 xfree(params->dst_fname);
773 }
774 xfree(params);
775
776 return rc;
777 }
778
_slurm_debug_env_val(void)779 static int _slurm_debug_env_val (void)
780 {
781 long int level = 0;
782 const char *val;
783
784 if ((val = getenv ("SLURM_DEBUG"))) {
785 char *p;
786 if ((level = strtol (val, &p, 10)) < -LOG_LEVEL_INFO)
787 level = -LOG_LEVEL_INFO;
788 if (p && *p != '\0')
789 level = 0;
790 }
791 return ((int) level);
792 }
793
794 /*
795 * Return a string representation of an array of uint32_t elements.
796 * Each value in the array is printed in decimal notation and elements
797 * are separated by a comma. If sequential elements in the array
798 * contain the same value, the value is written out just once followed
799 * by "(xN)", where "N" is the number of times the value is repeated.
800 *
801 * Example:
802 * The array "1, 2, 1, 1, 1, 3, 2" becomes the string "1,2,1(x3),3,2"
803 *
804 * Returns an xmalloc'ed string. Free with xfree().
805 */
_uint16_array_to_str(int array_len,const uint16_t * array)806 static char *_uint16_array_to_str(int array_len, const uint16_t *array)
807 {
808 int i;
809 int previous = 0;
810 char *sep = ","; /* seperator */
811 char *str = xstrdup("");
812
813 if (array == NULL)
814 return str;
815
816 for (i = 0; i < array_len; i++) {
817 if ((i+1 < array_len)
818 && (array[i] == array[i+1])) {
819 previous++;
820 continue;
821 }
822
823 if (i == array_len-1) /* last time through loop */
824 sep = "";
825 if (previous > 0) {
826 xstrfmtcat(str, "%u(x%u)%s",
827 array[i], previous+1, sep);
828 } else {
829 xstrfmtcat(str, "%u%s", array[i], sep);
830 }
831 previous = 0;
832 }
833
834 return str;
835 }
836
_set_exit_code(void)837 static void _set_exit_code(void)
838 {
839 int i;
840 char *val;
841
842 if ((val = getenv("SLURM_EXIT_ERROR"))) {
843 i = atoi(val);
844 if (i == 0)
845 error("SLURM_EXIT_ERROR has zero value");
846 else
847 error_exit = i;
848 }
849
850 if ((val = getenv("SLURM_EXIT_IMMEDIATE"))) {
851 i = atoi(val);
852 if (i == 0)
853 error("SLURM_EXIT_IMMEDIATE has zero value");
854 else
855 immediate_exit = i;
856 }
857 }
858
_set_node_alias(void)859 static void _set_node_alias(void)
860 {
861 char *aliases, *save_ptr = NULL, *tmp;
862 char *addr, *hostname, *slurm_name;
863
864 tmp = getenv("SLURM_NODE_ALIASES");
865 if (!tmp)
866 return;
867 aliases = xstrdup(tmp);
868 slurm_name = strtok_r(aliases, ":", &save_ptr);
869 while (slurm_name) {
870 addr = strtok_r(NULL, ":", &save_ptr);
871 if (!addr)
872 break;
873 slurm_reset_alias(slurm_name, addr, addr);
874 hostname = strtok_r(NULL, ",", &save_ptr);
875 if (!hostname)
876 break;
877 slurm_name = strtok_r(NULL, ":", &save_ptr);
878 }
879 xfree(aliases);
880 }
881
_pty_restore(void)882 static void _pty_restore(void)
883 {
884 /* STDIN is probably closed by now */
885 if (tcsetattr(STDOUT_FILENO, TCSANOW, &termdefaults) < 0)
886 fprintf(stderr, "tcsetattr: %s\n", strerror(errno));
887 }
888
_setup_env_working_cluster(void)889 static void _setup_env_working_cluster(void)
890 {
891 char *working_env, *addr_ptr, *port_ptr, *rpc_ptr, *select_ptr;
892
893 if ((working_env = xstrdup(getenv("SLURM_WORKING_CLUSTER"))) == NULL)
894 return;
895
896 /* Format is cluster_name:address:port:rpc[:plugin_id_select] */
897 if (!(addr_ptr = strchr(working_env, ':')) ||
898 !(port_ptr = strchr(addr_ptr + 1, ':')) ||
899 !(rpc_ptr = strchr(port_ptr + 1, ':'))) {
900 error("malformed cluster addr and port in SLURM_WORKING_CLUSTER env var: '%s'",
901 working_env);
902 exit(1);
903 }
904
905 *addr_ptr++ = '\0';
906 *port_ptr++ = '\0';
907 *rpc_ptr++ = '\0';
908
909 if ((select_ptr = strchr(rpc_ptr, ':')))
910 *select_ptr++ = '\0';
911
912 if (xstrcmp(slurmctld_conf.cluster_name, working_env)) {
913 working_cluster_rec = xmalloc(sizeof(slurmdb_cluster_rec_t));
914 slurmdb_init_cluster_rec(working_cluster_rec, false);
915
916 working_cluster_rec->name = xstrdup(working_env);
917 working_cluster_rec->control_host = xstrdup(addr_ptr);
918 working_cluster_rec->control_port = strtol(port_ptr, NULL, 10);
919 working_cluster_rec->rpc_version = strtol(rpc_ptr, NULL, 10);
920 slurm_set_addr(&working_cluster_rec->control_addr,
921 working_cluster_rec->control_port,
922 working_cluster_rec->control_host);
923
924 if (select_ptr)
925 working_cluster_rec->plugin_id_select =
926 select_get_plugin_id_pos(strtol(select_ptr,
927 NULL, 10));
928 }
929 xfree(working_env);
930 }
931