1 /*****************************************************************************\
2 * src/common/env.c - add an environment variable to environment vector
3 *****************************************************************************
4 * Copyright (C) 2002-2007 The Regents of the University of California.
5 * Copyright (C) 2008-2009 Lawrence Livermore National Security.
6 * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
7 * Written by Mark Grondona <mgrondona@llnl.gov>, Danny Auble <da@llnl.gov>.
8 * CODE-OCEC-09-009. All rights reserved.
9 *
10 * This file is part of Slurm, a resource management program.
11 * For details, see <https://slurm.schedmd.com/>.
12 * Please also read the included file: DISCLAIMER.
13 *
14 * Slurm is free software; you can redistribute it and/or modify it under
15 * the terms of the GNU General Public License as published by the Free
16 * Software Foundation; either version 2 of the License, or (at your option)
17 * any later version.
18 *
19 * In addition, as a special exception, the copyright holders give permission
20 * to link the code of portions of this program with the OpenSSL library under
21 * certain conditions as described in each individual source file, and
22 * distribute linked combinations including the two. You must obey the GNU
23 * General Public License in all respects for all of the code used other than
24 * OpenSSL. If you modify file(s) with this exception, you may extend this
25 * exception to your version of the file(s), but you are not obligated to do
26 * so. If you do not wish to do so, delete this exception statement from your
27 * version. If you delete this exception statement from all source files in
28 * the program, then also delete it here.
29 *
30 * Slurm is distributed in the hope that it will be useful, but WITHOUT ANY
31 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
32 * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
33 * details.
34 *
35 * You should have received a copy of the GNU General Public License along
36 * with Slurm; if not, write to the Free Software Foundation, Inc.,
37 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
38 \*****************************************************************************/
39
40 #include "config.h"
41
42 #include <fcntl.h>
43 #include <poll.h>
44 #include <signal.h>
45 #include <stdarg.h>
46 #include <stdio.h>
47 #include <stdlib.h>
48 #include <string.h>
49 #include <sys/stat.h>
50 #include <sys/types.h>
51 #include <sys/param.h> /* MAXPATHLEN */
52 #include <unistd.h>
53
54 #include "slurm/slurm.h"
55 #include "src/common/cpu_frequency.h"
56 #include "src/common/log.h"
57 #include "src/common/env.h"
58 #include "src/common/fd.h"
59 #include "src/common/node_select.h"
60 #include "src/common/macros.h"
61 #include "src/common/proc_args.h"
62 #include "src/common/read_config.h"
63 #include "src/common/slurm_opt.h"
64 #include "src/common/slurm_protocol_api.h"
65 #include "src/common/slurm_protocol_defs.h"
66 #include "src/common/slurm_step_layout.h"
67 #include "src/common/slurmdb_defs.h"
68 #include "src/common/strlcpy.h"
69 #include "src/common/xassert.h"
70 #include "src/common/xmalloc.h"
71 #include "src/common/xstring.h"
72
73 /*
74 * Define slurm-specific aliases for use by plugins, see slurm_xlator.h
75 * for details.
76 */
77 strong_alias(setenvf, slurm_setenvpf);
78 strong_alias(unsetenvp, slurm_unsetenvp);
79 strong_alias(getenvp, slurm_getenvp);
80 strong_alias(env_array_create, slurm_env_array_create);
81 strong_alias(env_array_merge, slurm_env_array_merge);
82 strong_alias(env_array_copy, slurm_env_array_copy);
83 strong_alias(env_array_free, slurm_env_array_free);
84 strong_alias(env_array_append, slurm_env_array_append);
85 strong_alias(env_array_append_fmt, slurm_env_array_append_fmt);
86 strong_alias(env_array_overwrite, slurm_env_array_overwrite);
87 strong_alias(env_array_overwrite_fmt, slurm_env_array_overwrite_fmt);
88 strong_alias(env_array_overwrite_het_fmt, slurm_env_array_overwrite_het_fmt);
89 strong_alias(env_unset_environment, slurm_env_unset_environment);
90
91 #define ENV_BUFSIZE (256 * 1024)
92 #define MAX_ENV_STRLEN (32 * 4096) /* Needed for CPU_BIND and MEM_BIND on
93 * SGI systems with huge CPU counts */
94
95 /*
96 * Return pointer to `name' entry in environment if found, or
97 * pointer to the last entry (i.e. NULL) if `name' is not
98 * currently set in `env'
99 *
100 */
101 static char **
_find_name_in_env(char ** env,const char * name)102 _find_name_in_env(char **env, const char *name)
103 {
104 char **ep;
105
106 ep = env;
107 while (*ep != NULL) {
108 size_t cnt = 0;
109
110 while ( ((*ep)[cnt] == name[cnt])
111 && ( name[cnt] != '\0')
112 && ((*ep)[cnt] != '\0') )
113 ++cnt;
114
115 if (name[cnt] == '\0' && (*ep)[cnt] == '=') {
116 break;
117 } else
118 ++ep;
119 }
120
121 return (ep);
122 }
123
124 /*
125 * Extend memory allocation for env by 1 entry. Make last entry == NULL.
126 * return pointer to last env entry;
127 */
128 static char **
_extend_env(char *** envp)129 _extend_env(char ***envp)
130 {
131 char **ep;
132 size_t newcnt = (xsize (*envp) / sizeof (char *)) + 1;
133
134 *envp = xrealloc (*envp, newcnt * sizeof (char *));
135
136 (*envp)[newcnt - 1] = NULL;
137 ep = &((*envp)[newcnt - 2]);
138
139 /*
140 * Find last non-NULL entry
141 */
142 while (*ep == NULL)
143 --ep;
144
145 return (++ep);
146 }
147
148 /* return true if the environment variables should not be set for
149 * srun's --get-user-env option */
_discard_env(char * name,char * value)150 static bool _discard_env(char *name, char *value)
151 {
152 if ((xstrcmp(name, "DISPLAY") == 0) ||
153 (xstrcmp(name, "ENVIRONMENT") == 0) ||
154 (xstrcmp(name, "HOSTNAME") == 0))
155 return true;
156
157 return false;
158 }
159
160 /*
161 * Return the number of elements in the environment `env'
162 */
163 int
envcount(char ** env)164 envcount (char **env)
165 {
166 int envc = 0;
167 while (env && env[envc])
168 envc++;
169 return (envc);
170 }
171
172 /*
173 * _setenvfs() (stolen from pdsh)
174 *
175 * Set a variable in the callers environment. Args are printf style.
176 * XXX Space is allocated on the heap and will never be reclaimed.
177 * Example: setenvfs("RMS_RANK=%d", rank);
178 */
179 int
setenvfs(const char * fmt,...)180 setenvfs(const char *fmt, ...)
181 {
182 va_list ap;
183 char *buf, *bufcpy, *loc;
184 int rc, size;
185
186 buf = xmalloc(ENV_BUFSIZE);
187 va_start(ap, fmt);
188 vsnprintf(buf, ENV_BUFSIZE, fmt, ap);
189 va_end(ap);
190
191 size = strlen(buf);
192 bufcpy = xstrdup(buf);
193 xfree(buf);
194
195 if (size >= MAX_ENV_STRLEN) {
196 if ((loc = strchr(bufcpy, '=')))
197 loc[0] = '\0';
198 error("environment variable %s is too long", bufcpy);
199 xfree(bufcpy);
200 rc = ENOMEM;
201 } else {
202 rc = putenv(bufcpy);
203 }
204
205 return rc;
206 }
207
setenvf(char *** envp,const char * name,const char * fmt,...)208 int setenvf(char ***envp, const char *name, const char *fmt, ...)
209 {
210 char *value;
211 va_list ap;
212 int size, rc;
213
214 if (!name || name[0] == '\0')
215 return EINVAL;
216
217 value = xmalloc(ENV_BUFSIZE);
218 va_start(ap, fmt);
219 vsnprintf(value, ENV_BUFSIZE, fmt, ap);
220 va_end(ap);
221
222 size = strlen(name) + strlen(value) + 2;
223 if (size >= MAX_ENV_STRLEN) {
224 error("environment variable %s is too long", name);
225 return ENOMEM;
226 }
227
228 if (envp && *envp) {
229 if (env_array_overwrite(envp, name, value) == 1)
230 rc = 0;
231 else
232 rc = 1;
233 } else {
234 rc = setenv(name, value, 1);
235 }
236
237 xfree(value);
238 return rc;
239 }
240
241 /*
242 * Remove environment variable `name' from "environment"
243 * contained in `env'
244 *
245 * [ This was taken almost verbatim from glibc's
246 * unsetenv() code. ]
247 */
unsetenvp(char ** env,const char * name)248 void unsetenvp(char **env, const char *name)
249 {
250 char **ep;
251
252 if (env == NULL)
253 return;
254
255 ep = env;
256 while ((ep = _find_name_in_env (ep, name)) && (*ep != NULL)) {
257 char **dp = ep;
258 xfree (*ep);
259 do
260 dp[0] = dp[1];
261 while (*dp++);
262
263 /* Continue loop in case `name' appears again. */
264 ++ep;
265 }
266 return;
267 }
268
getenvp(char ** env,const char * name)269 char *getenvp(char **env, const char *name)
270 {
271 size_t len;
272 char **ep;
273
274 if (!name || !env || !env[0])
275 return (NULL);
276
277 len = strlen(name);
278 ep = _find_name_in_env (env, name);
279
280 if (*ep != NULL)
281 return (&(*ep)[len+1]);
282
283 return NULL;
284 }
285
setup_env(env_t * env,bool preserve_env)286 int setup_env(env_t *env, bool preserve_env)
287 {
288 int rc = SLURM_SUCCESS;
289 char *addr, *dist = NULL, *lllp_dist = NULL;
290 char addrbuf[INET_ADDRSTRLEN];
291
292 if (env == NULL)
293 return SLURM_ERROR;
294
295 /*
296 * Always force SLURM_CONF into the environment. This ensures the
297 * "configless" operation is working, and prevents the client commands
298 * from falling back to separate RPC requests in case the cache dir
299 * is unresponsive.
300 */
301 if (setenvf(&env->env, "SLURM_CONF", "%s", getenv("SLURM_CONF"))) {
302 error("Unable to set SLURM_CONF environment variable");
303 rc = SLURM_ERROR;
304 }
305 /*
306 * Similarly, prevent this option from leaking in. SLURM_CONF would
307 * always take precedence, but tidy it up anyways.
308 */
309 unsetenvp(env->env, "SLURM_CONF_SERVER");
310
311 if (!preserve_env && env->ntasks) {
312 if (setenvf(&env->env, "SLURM_NTASKS", "%d", env->ntasks)) {
313 error("Unable to set SLURM_NTASKS environment variable");
314 rc = SLURM_ERROR;
315 }
316 if (setenvf(&env->env, "SLURM_NPROCS", "%d", env->ntasks)) {
317 error("Unable to set SLURM_NPROCS environment variable");
318 rc = SLURM_ERROR;
319 }
320 }
321
322 if (env->cpus_per_task &&
323 setenvf(&env->env, "SLURM_CPUS_PER_TASK", "%d",
324 env->cpus_per_task) ) {
325 error("Unable to set SLURM_CPUS_PER_TASK");
326 rc = SLURM_ERROR;
327 }
328
329 if (env->ntasks_per_node
330 && setenvf(&env->env, "SLURM_NTASKS_PER_NODE", "%d",
331 env->ntasks_per_node) ) {
332 error("Unable to set SLURM_NTASKS_PER_NODE");
333 rc = SLURM_ERROR;
334 }
335
336 if (env->ntasks_per_socket
337 && setenvf(&env->env, "SLURM_NTASKS_PER_SOCKET", "%d",
338 env->ntasks_per_socket) ) {
339 error("Unable to set SLURM_NTASKS_PER_SOCKET");
340 rc = SLURM_ERROR;
341 }
342
343 if (env->ntasks_per_core
344 && setenvf(&env->env, "SLURM_NTASKS_PER_CORE", "%d",
345 env->ntasks_per_core) ) {
346 error("Unable to set SLURM_NTASKS_PER_CORE");
347 rc = SLURM_ERROR;
348 }
349
350 if (env->cpus_on_node
351 && setenvf(&env->env, "SLURM_CPUS_ON_NODE", "%d",
352 env->cpus_on_node) ) {
353 error("Unable to set SLURM_CPUS_ON_NODE");
354 rc = SLURM_ERROR;
355 }
356
357 set_distribution(env->distribution, &dist, &lllp_dist);
358 if (dist)
359 if (setenvf(&env->env, "SLURM_DISTRIBUTION", "%s", dist)) {
360 error("Can't set SLURM_DISTRIBUTION env variable");
361 rc = SLURM_ERROR;
362 }
363
364 if ((env->distribution & SLURM_DIST_STATE_BASE) == SLURM_DIST_PLANE)
365 if (setenvf(&env->env, "SLURM_DIST_PLANESIZE", "%u",
366 env->plane_size)) {
367 error("Can't set SLURM_DIST_PLANESIZE env variable");
368 rc = SLURM_ERROR;
369 }
370
371 if (lllp_dist)
372 if (setenvf(&env->env, "SLURM_DIST_LLLP", "%s", lllp_dist)) {
373 error("Can't set SLURM_DIST_LLLP env variable");
374 rc = SLURM_ERROR;
375 }
376
377
378 if (env->cpu_bind_type) {
379 char *str_verbose, *str_bind1 = NULL, *str_bind2 = NULL;
380 char *str_bind_list, *str_bind_type = NULL, *str_bind = NULL;
381
382 if (!env->batch_flag) {
383 unsetenvp(env->env, "SLURM_CPU_BIND");
384 unsetenvp(env->env, "SLURM_CPU_BIND_LIST");
385 unsetenvp(env->env, "SLURM_CPU_BIND_TYPE");
386 unsetenvp(env->env, "SLURM_CPU_BIND_VERBOSE");
387 }
388
389 if (env->cpu_bind_type & CPU_BIND_VERBOSE)
390 str_verbose = "verbose";
391 else
392 str_verbose = "quiet";
393
394 if (env->cpu_bind_type & CPU_BIND_TO_THREADS) {
395 str_bind1 = "threads";
396 } else if (env->cpu_bind_type & CPU_BIND_TO_CORES) {
397 str_bind1 = "cores";
398 } else if (env->cpu_bind_type & CPU_BIND_TO_SOCKETS) {
399 str_bind1 = "sockets";
400 } else if (env->cpu_bind_type & CPU_BIND_TO_LDOMS) {
401 str_bind1 = "ldoms";
402 } else if (env->cpu_bind_type & CPU_BIND_TO_BOARDS) {
403 str_bind1 = "boards";
404 }
405
406 if (env->cpu_bind_type & CPU_BIND_NONE) {
407 str_bind2 = "none";
408 } else if (env->cpu_bind_type & CPU_BIND_RANK) {
409 str_bind2 = "rank";
410 } else if (env->cpu_bind_type & CPU_BIND_MAP) {
411 str_bind2 = "map_cpu:";
412 } else if (env->cpu_bind_type & CPU_BIND_MASK) {
413 str_bind2 = "mask_cpu:";
414 } else if (env->cpu_bind_type & CPU_BIND_LDRANK) {
415 str_bind2 = "rank_ldom";
416 } else if (env->cpu_bind_type & CPU_BIND_LDMAP) {
417 str_bind2 = "map_ldom:";
418 } else if (env->cpu_bind_type & CPU_BIND_LDMASK) {
419 str_bind2 = "mask_ldom:";
420 }
421
422 if (env->cpu_bind)
423 str_bind_list = env->cpu_bind;
424 else
425 str_bind_list = "";
426
427 /* combine first and second part with a comma if needed */
428 if (str_bind1)
429 xstrcat(str_bind_type, str_bind1);
430 if (str_bind1 && str_bind2)
431 xstrcatchar(str_bind_type, ',');
432 if (str_bind2)
433 xstrcat(str_bind_type, str_bind2);
434
435 xstrcat(str_bind, str_verbose);
436 if (str_bind_type) {
437 xstrcatchar(str_bind, ',');
438 xstrcat(str_bind, str_bind_type);
439 xstrcat(str_bind, str_bind_list);
440 } else
441 str_bind_type = xstrdup("");
442
443 if (!env->batch_flag) {
444 if (setenvf(&env->env, "SLURM_CPU_BIND", "%s", str_bind)) {
445 error("Unable to set SLURM_CPU_BIND");
446 rc = SLURM_ERROR;
447 }
448 if (setenvf(&env->env, "SLURM_CPU_BIND_LIST", "%s",
449 str_bind_list)) {
450 error("Unable to set SLURM_CPU_BIND_LIST");
451 rc = SLURM_ERROR;
452 }
453 if (setenvf(&env->env, "SLURM_CPU_BIND_TYPE", "%s",
454 str_bind_type)) {
455 error("Unable to set SLURM_CPU_BIND_TYPE");
456 rc = SLURM_ERROR;
457 }
458 if (setenvf(&env->env, "SLURM_CPU_BIND_VERBOSE", "%s",
459 str_verbose)) {
460 error("Unable to set SLURM_CPU_BIND_VERBOSE");
461 rc = SLURM_ERROR;
462 }
463 }
464
465 xfree(str_bind);
466 xfree(str_bind_type);
467 }
468
469 if (env->mem_bind_type) {
470 char *str_verbose, *str_bind_type = NULL, *str_bind_list;
471 char *str_prefer = NULL, *str_bind = NULL;
472 char *str_bind_sort = NULL;
473
474 if (env->batch_flag) {
475 unsetenvp(env->env, "SBATCH_MEM_BIND");
476 unsetenvp(env->env, "SBATCH_MEM_BIND_LIST");
477 unsetenvp(env->env, "SBATCH_MEM_BIND_PREFER");
478 unsetenvp(env->env, "SBATCH_MEM_BIND_TYPE");
479 unsetenvp(env->env, "SBATCH_MEM_BIND_VERBOSE");
480 } else {
481 unsetenvp(env->env, "SLURM_MEM_BIND");
482 unsetenvp(env->env, "SLURM_MEM_BIND_LIST");
483 unsetenvp(env->env, "SLURM_MEM_BIND_PREFER");
484 unsetenvp(env->env, "SLURM_MEM_BIND_SORT");
485 unsetenvp(env->env, "SLURM_MEM_BIND_TYPE");
486 unsetenvp(env->env, "SLURM_MEM_BIND_VERBOSE");
487 }
488
489 if (env->mem_bind_type & MEM_BIND_VERBOSE)
490 str_verbose = "verbose";
491 else
492 str_verbose = "quiet";
493 if (env->mem_bind_type & MEM_BIND_PREFER)
494 str_prefer = "prefer";
495 if (env->mem_bind_type & MEM_BIND_NONE) {
496 str_bind_type = "none";
497 } else if (env->mem_bind_type & MEM_BIND_RANK) {
498 str_bind_type = "rank";
499 } else if (env->mem_bind_type & MEM_BIND_MAP) {
500 str_bind_type = "map_mem:";
501 } else if (env->mem_bind_type & MEM_BIND_MASK) {
502 str_bind_type = "mask_mem:";
503 } else if (env->mem_bind_type & MEM_BIND_LOCAL) {
504 str_bind_type = "local";
505 }
506
507 if (env->mem_bind_type & MEM_BIND_SORT)
508 str_bind_sort = "sort";
509
510 if (env->mem_bind)
511 str_bind_list = env->mem_bind;
512 else
513 str_bind_list = "";
514
515 xstrcat(str_bind, str_verbose);
516 if (str_prefer) {
517 xstrcatchar(str_bind, ',');
518 xstrcat(str_bind, str_prefer);
519 }
520 if (str_bind_type) {
521 xstrcatchar(str_bind, ',');
522 xstrcat(str_bind, str_bind_type);
523 xstrcat(str_bind, str_bind_list);
524 } else
525 str_bind_type = "";
526
527 if (env->batch_flag) {
528 if (setenvf(&env->env, "SBATCH_MEM_BIND", "%s", str_bind)) {
529 error("Unable to set SBATCH_MEM_BIND");
530 rc = SLURM_ERROR;
531 }
532 if (setenvf(&env->env, "SBATCH_MEM_BIND_LIST", "%s",
533 str_bind_list)) {
534 error("Unable to set SBATCH_MEM_BIND_LIST");
535 rc = SLURM_ERROR;
536 }
537 if (str_prefer &&
538 setenvf(&env->env, "SBATCH_MEM_BIND_PREFER", "%s",
539 str_prefer)) {
540 error("Unable to set SBATCH_MEM_BIND_PREFER");
541 rc = SLURM_ERROR;
542 }
543 if (str_bind_sort &&
544 setenvf(&env->env, "SBATCH_MEM_BIND_SORT", "%s",
545 str_bind_sort)) {
546 error("Unable to set SBATCH_MEM_BIND_SORT");
547 rc = SLURM_ERROR;
548 }
549 if (setenvf(&env->env, "SBATCH_MEM_BIND_TYPE", "%s",
550 str_bind_type)) {
551 error("Unable to set SBATCH_MEM_BIND_TYPE");
552 rc = SLURM_ERROR;
553 }
554 if (setenvf(&env->env, "SBATCH_MEM_BIND_VERBOSE", "%s",
555 str_verbose)) {
556 error("Unable to set SBATCH_MEM_BIND_VERBOSE");
557 rc = SLURM_ERROR;
558 }
559 } else {
560 if (setenvf(&env->env, "SLURM_MEM_BIND", "%s", str_bind)) {
561 error("Unable to set SLURM_MEM_BIND");
562 rc = SLURM_ERROR;
563 }
564 if (setenvf(&env->env, "SLURM_MEM_BIND_LIST", "%s",
565 str_bind_list)) {
566 error("Unable to set SLURM_MEM_BIND_LIST");
567 rc = SLURM_ERROR;
568 }
569 if (str_prefer &&
570 setenvf(&env->env, "SLURM_MEM_BIND_PREFER", "%s",
571 str_prefer)) {
572 error("Unable to set SLURM_MEM_BIND_PREFER");
573 rc = SLURM_ERROR;
574 }
575 if (str_bind_sort &&
576 setenvf(&env->env, "SLURM_MEM_BIND_SORT", "%s",
577 str_bind_sort)) {
578 error("Unable to set SLURM_MEM_BIND_SORT");
579 rc = SLURM_ERROR;
580 }
581 if (setenvf(&env->env, "SLURM_MEM_BIND_TYPE", "%s",
582 str_bind_type)) {
583 error("Unable to set SLURM_MEM_BIND_TYPE");
584 rc = SLURM_ERROR;
585 }
586 if (setenvf(&env->env, "SLURM_MEM_BIND_VERBOSE", "%s",
587 str_verbose)) {
588 error("Unable to set SLURM_MEM_BIND_VERBOSE");
589 rc = SLURM_ERROR;
590 }
591 }
592
593 xfree(str_bind);
594 }
595
596 if (cpu_freq_set_env("SLURM_CPU_FREQ_REQ", env->cpu_freq_min,
597 env->cpu_freq_max, env->cpu_freq_gov) != SLURM_SUCCESS)
598 rc = SLURM_ERROR;
599
600 if (env->overcommit
601 && (setenvf(&env->env, "SLURM_OVERCOMMIT", "%s", "1"))) {
602 error("Unable to set SLURM_OVERCOMMIT environment variable");
603 rc = SLURM_ERROR;
604 }
605
606 if (env->slurmd_debug
607 && setenvf(&env->env, "SLURMD_DEBUG", "%d", env->slurmd_debug)) {
608 error("Can't set SLURMD_DEBUG environment variable");
609 rc = SLURM_ERROR;
610 }
611
612 if (env->labelio
613 && setenvf(&env->env, "SLURM_LABELIO", "1")) {
614 error("Unable to set SLURM_LABELIO environment variable");
615 rc = SLURM_ERROR;
616 }
617
618 if (env->jobid >= 0) {
619 if (setenvf(&env->env, "SLURM_JOB_ID", "%d", env->jobid)) {
620 error("Unable to set SLURM_JOB_ID environment");
621 rc = SLURM_ERROR;
622 }
623 /* and for backwards compatibility... */
624 if (setenvf(&env->env, "SLURM_JOBID", "%d", env->jobid)) {
625 error("Unable to set SLURM_JOBID environment");
626 rc = SLURM_ERROR;
627 }
628 }
629
630 if (env->job_name) {
631 if (setenvf(&env->env, "SLURM_JOB_NAME", "%s", env->job_name)) {
632 error("Unable to set SLURM_JOB_NAME environment");
633 rc = SLURM_ERROR;
634 }
635 }
636
637 /*
638 * These aren't relevant to a system not using Slurm as the
639 * launcher. Since there isn't a flag for that we check for
640 * the flags we do have.
641 */
642 if (env->task_pid &&
643 setenvf(&env->env, "SLURM_TASK_PID", "%d",
644 (int)env->task_pid)) {
645 error("Unable to set SLURM_TASK_PID environment "
646 "variable");
647 rc = SLURM_ERROR;
648 }
649 if ((env->nodeid >= 0) &&
650 setenvf(&env->env, "SLURM_NODEID", "%d", env->nodeid)) {
651 error("Unable to set SLURM_NODEID environment");
652 rc = SLURM_ERROR;
653 }
654
655 if ((env->procid >= 0) &&
656 setenvf(&env->env, "SLURM_PROCID", "%d", env->procid)) {
657 error("Unable to set SLURM_PROCID environment");
658 rc = SLURM_ERROR;
659 }
660
661 if ((env->localid >= 0) &&
662 setenvf(&env->env, "SLURM_LOCALID", "%d", env->localid)) {
663 error("Unable to set SLURM_LOCALID environment");
664 rc = SLURM_ERROR;
665 }
666
667 if (env->stepid >= 0) {
668 if (setenvf(&env->env, "SLURM_STEP_ID", "%d", env->stepid)) {
669 error("Unable to set SLURM_STEP_ID environment");
670 rc = SLURM_ERROR;
671 }
672 /* and for backwards compatibility... */
673 if (setenvf(&env->env, "SLURM_STEPID", "%d", env->stepid)) {
674 error("Unable to set SLURM_STEPID environment");
675 rc = SLURM_ERROR;
676 }
677 }
678
679 if (!preserve_env && env->nhosts
680 && setenvf(&env->env, "SLURM_NNODES", "%d", env->nhosts)) {
681 error("Unable to set SLURM_NNODES environment var");
682 rc = SLURM_ERROR;
683 }
684
685 if (env->nhosts
686 && setenvf(&env->env, "SLURM_JOB_NUM_NODES", "%d", env->nhosts)) {
687 error("Unable to set SLURM_JOB_NUM_NODES environment var");
688 rc = SLURM_ERROR;
689 }
690
691 if (env->nodelist &&
692 setenvf(&env->env, "SLURM_NODELIST", "%s", env->nodelist)) {
693 error("Unable to set SLURM_NODELIST environment var.");
694 rc = SLURM_ERROR;
695 }
696
697 if (env->partition
698 && setenvf(&env->env, "SLURM_JOB_PARTITION", "%s", env->partition)) {
699 error("Unable to set SLURM_JOB_PARTITION environment var.");
700 rc = SLURM_ERROR;
701 }
702
703 if (!preserve_env && env->task_count
704 && setenvf (&env->env,
705 "SLURM_TASKS_PER_NODE", "%s", env->task_count)) {
706 error ("Can't set SLURM_TASKS_PER_NODE env variable");
707 rc = SLURM_ERROR;
708 }
709
710 if (env->comm_port
711 && setenvf (&env->env, "SLURM_SRUN_COMM_PORT", "%u",
712 env->comm_port)) {
713 error ("Can't set SLURM_SRUN_COMM_PORT env variable");
714 rc = SLURM_ERROR;
715 }
716
717 if (env->cli) {
718
719 slurm_print_slurm_addr (env->cli, addrbuf, INET_ADDRSTRLEN);
720
721 /*
722 * XXX: Eventually, need a function for slurm_addrs that
723 * returns just the IP address (not addr:port)
724 */
725
726 if ((dist = strchr (addrbuf, ':')) != NULL)
727 *dist = '\0';
728 setenvf (&env->env, "SLURM_LAUNCH_NODE_IPADDR", "%s", addrbuf);
729 }
730
731 if (env->sgtids &&
732 setenvf(&env->env, "SLURM_GTIDS", "%s", env->sgtids)) {
733 error("Unable to set SLURM_GTIDS environment variable");
734 rc = SLURM_ERROR;
735 }
736
737 if (env->pty_port
738 && setenvf(&env->env, "SLURM_PTY_PORT", "%hu", env->pty_port)) {
739 error("Can't set SLURM_PTY_PORT env variable");
740 rc = SLURM_ERROR;
741 }
742 if (env->ws_col
743 && setenvf(&env->env, "SLURM_PTY_WIN_COL", "%hu", env->ws_col)) {
744 error("Can't set SLURM_PTY_WIN_COL env variable");
745 rc = SLURM_ERROR;
746 }
747 if (env->ws_row
748 && setenvf(&env->env, "SLURM_PTY_WIN_ROW", "%hu", env->ws_row)) {
749 error("Can't set SLURM_PTY_WIN_ROW env variable");
750 rc = SLURM_ERROR;
751 }
752
753 if (env->restart_cnt &&
754 setenvf(&env->env, "SLURM_RESTART_COUNT", "%u", env->restart_cnt)) {
755 error("Can't set SLURM_RESTART_COUNT env variable");
756 rc = SLURM_ERROR;
757 }
758
759 if (env->user_name) {
760 if (setenvf(&env->env, "SLURM_JOB_UID", "%u",
761 (unsigned int) env->uid)) {
762 error("Can't set SLURM_JOB_UID env variable");
763 rc = SLURM_ERROR;
764 }
765 if (setenvf(&env->env, "SLURM_JOB_USER", "%s", env->user_name)){
766 error("Can't set SLURM_JOB_USER env variable");
767 rc = SLURM_ERROR;
768 }
769 }
770
771 if (env->account) {
772 if (setenvf(&env->env,
773 "SLURM_JOB_ACCOUNT",
774 "%s",
775 env->account)) {
776 error("%s: can't set SLURM_JOB_ACCOUNT env variable",
777 __func__);
778 rc = SLURM_ERROR;
779 }
780 }
781 if (env->qos) {
782 if (setenvf(&env->env,
783 "SLURM_JOB_QOS",
784 "%s",
785 env->qos)) {
786 error("%s: can't set SLURM_JOB_QOS env variable",
787 __func__);
788 rc = SLURM_ERROR;
789 }
790 }
791 if (env->resv_name) {
792 if (setenvf(&env->env,
793 "SLURM_JOB_RESERVATION",
794 "%s",
795 env->resv_name)) {
796 error("%s: can't set SLURM_JOB_RESERVATION env variable",
797 __func__);
798 rc = SLURM_ERROR;
799 }
800 }
801
802 if (slurmctld_conf.slurmctld_addr)
803 addr = slurmctld_conf.slurmctld_addr;
804 else
805 addr = slurmctld_conf.control_addr[0];
806 setenvf(&env->env, "SLURM_WORKING_CLUSTER", "%s:%s:%d:%d:%d",
807 slurmctld_conf.cluster_name, addr,
808 slurmctld_conf.slurmctld_port, SLURM_PROTOCOL_VERSION,
809 select_get_plugin_id());
810
811 return rc;
812 }
813
814 /**********************************************************************
815 * From here on are the new environment variable management functions,
816 * used by the "new" commands: salloc, sbatch, and the step launch APIs.
817 **********************************************************************/
818
819 /*
820 * Return a string representation of an array of uint16_t elements.
821 * Each value in the array is printed in decimal notation and elements
822 * are separated by a comma. If sequential elements in the array
823 * contain the same value, the value is written out just once followed
824 * by "(xN)", where "N" is the number of times the value is repeated.
825 *
826 * Example:
827 * The array "1, 2, 1, 1, 1, 3, 2" becomes the string "1,2,1(x3),3,2"
828 *
829 * Returns an xmalloc'ed string. Free with xfree().
830 */
uint16_array_to_str(int array_len,const uint16_t * array)831 extern char *uint16_array_to_str(int array_len, const uint16_t *array)
832 {
833 int i;
834 int previous = 0;
835 char *sep = ","; /* seperator */
836 char *str = xstrdup("");
837
838 if (array == NULL)
839 return str;
840
841 for (i = 0; i < array_len; i++) {
842 if ((i+1 < array_len) && (array[i] == array[i+1])) {
843 previous++;
844 continue;
845 }
846
847 if (i == array_len-1) /* last time through loop */
848 sep = "";
849 if (previous > 0) {
850 xstrfmtcat(str, "%u(x%u)%s",
851 array[i], previous+1, sep);
852 } else {
853 xstrfmtcat(str, "%u%s", array[i], sep);
854 }
855 previous = 0;
856 }
857
858 return str;
859 }
860
861
862 /*
863 * The cpus-per-node representation in Slurm (and perhaps tasks-per-node
864 * in the future) is stored in a compressed format comprised of two
865 * equal-length arrays, and an integer holding the array length. In one
866 * array an element represents a count (number of cpus, number of tasks,
867 * etc.), and the corresponding element in the other array contains the
868 * number of times the count is repeated sequentially in the uncompressed
869 * something-per-node array.
870 *
871 * This function returns the string representation of the compressed
872 * array. Free with xfree().
873 */
uint32_compressed_to_str(uint32_t array_len,const uint16_t * array,const uint32_t * array_reps)874 extern char *uint32_compressed_to_str(uint32_t array_len,
875 const uint16_t *array,
876 const uint32_t *array_reps)
877 {
878 int i;
879 char *sep = ","; /* seperator */
880 char *str = xstrdup("");
881
882 if (!array || !array_reps)
883 return str;
884
885 for (i = 0; i < array_len; i++) {
886 if (i == array_len-1) /* last time through loop */
887 sep = "";
888 if (array_reps[i] > 1) {
889 xstrfmtcat(str, "%u(x%u)%s",
890 array[i], array_reps[i], sep);
891 } else {
892 xstrfmtcat(str, "%u%s", array[i], sep);
893 }
894 }
895
896 return str;
897 }
898
899 /*
900 * Set in "dest" the environment variables relevant to a Slurm job
901 * allocation, overwriting any environment variables of the same name.
902 * If the address pointed to by "dest" is NULL, memory will automatically be
903 * xmalloc'ed. The array is terminated by a NULL pointer, and thus is
904 * suitable for use by execle() and other env_array_* functions.
905 *
906 * Sets the variables:
907 * SLURM_JOB_ID
908 * SLURM_JOB_NAME
909 * SLURM_JOB_NUM_NODES
910 * SLURM_JOB_NODELIST
911 * SLURM_JOB_CPUS_PER_NODE
912 * SLURM_NODE_ALIASES
913 * SLURM_NTASKS_PER_NODE
914 *
915 * dest OUT - array in which to the set environment variables
916 * alloc IN - resource allocation response
917 * desc IN - job allocation request
918 * het_job_offset IN - component offset into hetjob, -1 if not hetjob
919 *
920 * Sets OBSOLETE variables (needed for MPI, do not remove):
921 * SLURM_JOBID
922 * SLURM_NNODES
923 * SLURM_NODELIST
924 * SLURM_TASKS_PER_NODE
925 */
env_array_for_job(char *** dest,const resource_allocation_response_msg_t * alloc,const job_desc_msg_t * desc,int het_job_offset)926 extern int env_array_for_job(char ***dest,
927 const resource_allocation_response_msg_t *alloc,
928 const job_desc_msg_t *desc, int het_job_offset)
929 {
930 char *tmp = NULL;
931 char *dist = NULL, *lllp_dist = NULL;
932 char *key, *value;
933 slurm_step_layout_t *step_layout = NULL;
934 int i, rc = SLURM_SUCCESS;
935 slurm_step_layout_req_t step_layout_req;
936 uint16_t cpus_per_task_array[1];
937 uint32_t cpus_task_reps[1];
938
939 if (!alloc || !desc)
940 return SLURM_ERROR;
941
942 memset(&step_layout_req, 0, sizeof(slurm_step_layout_req_t));
943 step_layout_req.num_tasks = desc->num_tasks;
944 step_layout_req.num_hosts = alloc->node_cnt;
945 cpus_per_task_array[0] = desc->cpus_per_task;
946 cpus_task_reps[0] = alloc->node_cnt;
947
948 if (het_job_offset < 1) {
949 env_array_overwrite_fmt(dest, "SLURM_JOB_ID", "%u",
950 alloc->job_id);
951 }
952 env_array_overwrite_het_fmt(dest, "SLURM_JOB_ID", het_job_offset,
953 "%u", alloc->job_id);
954 env_array_overwrite_het_fmt(dest, "SLURM_JOB_NAME", het_job_offset,
955 "%s", desc->name);
956 env_array_overwrite_het_fmt(dest, "SLURM_JOB_NUM_NODES", het_job_offset,
957 "%u", step_layout_req.num_hosts);
958 env_array_overwrite_het_fmt(dest, "SLURM_JOB_NODELIST", het_job_offset,
959 "%s", alloc->node_list);
960 env_array_overwrite_het_fmt(dest, "SLURM_NODE_ALIASES", het_job_offset,
961 "%s", alloc->alias_list);
962 env_array_overwrite_het_fmt(dest, "SLURM_JOB_PARTITION", het_job_offset,
963 "%s", alloc->partition);
964
965 set_distribution(desc->task_dist, &dist, &lllp_dist);
966 if (dist) {
967 env_array_overwrite_het_fmt(dest, "SLURM_DISTRIBUTION",
968 het_job_offset, "%s", dist);
969 }
970 if ((desc->task_dist & SLURM_DIST_STATE_BASE) == SLURM_DIST_PLANE) {
971 env_array_overwrite_het_fmt(dest, "SLURM_DIST_PLANESIZE",
972 het_job_offset, "%u",
973 desc->plane_size);
974 }
975 if (lllp_dist) {
976 env_array_overwrite_het_fmt(dest, "SLURM_DIST_LLLP",
977 het_job_offset, "%s", lllp_dist);
978 }
979 tmp = uint32_compressed_to_str(alloc->num_cpu_groups,
980 alloc->cpus_per_node,
981 alloc->cpu_count_reps);
982 env_array_overwrite_het_fmt(dest, "SLURM_JOB_CPUS_PER_NODE",
983 het_job_offset, "%s", tmp);
984 xfree(tmp);
985
986 if (alloc->pn_min_memory & MEM_PER_CPU) {
987 uint64_t tmp_mem = alloc->pn_min_memory & (~MEM_PER_CPU);
988 env_array_overwrite_het_fmt(dest, "SLURM_MEM_PER_CPU",
989 het_job_offset, "%"PRIu64"",
990 tmp_mem);
991 } else if (alloc->pn_min_memory) {
992 uint64_t tmp_mem = alloc->pn_min_memory;
993 env_array_overwrite_het_fmt(dest, "SLURM_MEM_PER_NODE",
994 het_job_offset, "%"PRIu64"",
995 tmp_mem);
996 }
997
998 /* OBSOLETE, but needed by MPI, do not remove */
999 env_array_overwrite_het_fmt(dest, "SLURM_JOBID", het_job_offset, "%u",
1000 alloc->job_id);
1001 env_array_overwrite_het_fmt(dest, "SLURM_NNODES", het_job_offset, "%u",
1002 step_layout_req.num_hosts);
1003 env_array_overwrite_het_fmt(dest, "SLURM_NODELIST", het_job_offset, "%s",
1004 alloc->node_list);
1005
1006 if (step_layout_req.num_tasks == NO_VAL) {
1007 /* If we know how many tasks we are going to do then
1008 we set SLURM_TASKS_PER_NODE */
1009 int i = 0;
1010 /* If no tasks were given we can figure it out here
1011 * by totalling up the cpus and then dividing by the
1012 * number of cpus per task */
1013
1014 step_layout_req.num_tasks = 0;
1015 for (i = 0; i < alloc->num_cpu_groups; i++) {
1016 step_layout_req.num_tasks += alloc->cpu_count_reps[i]
1017 * alloc->cpus_per_node[i];
1018 }
1019 if ((int)desc->cpus_per_task > 1
1020 && desc->cpus_per_task != NO_VAL16)
1021 step_layout_req.num_tasks /= desc->cpus_per_task;
1022 //num_tasks = desc->min_cpus;
1023 }
1024
1025 if ((desc->task_dist & SLURM_DIST_STATE_BASE) == SLURM_DIST_ARBITRARY) {
1026 step_layout_req.node_list = desc->req_nodes;
1027 env_array_overwrite_het_fmt(dest, "SLURM_ARBITRARY_NODELIST",
1028 het_job_offset, "%s",
1029 step_layout_req.node_list);
1030 } else
1031 step_layout_req.node_list = alloc->node_list;
1032
1033 step_layout_req.cpus_per_node = alloc->cpus_per_node;
1034 step_layout_req.cpu_count_reps = alloc->cpu_count_reps;
1035 step_layout_req.cpus_per_task = cpus_per_task_array;
1036 step_layout_req.cpus_task_reps = cpus_task_reps;
1037 step_layout_req.task_dist = desc->task_dist;
1038 step_layout_req.plane_size = desc->plane_size;
1039
1040 if (!(step_layout = slurm_step_layout_create(&step_layout_req)))
1041 return SLURM_ERROR;
1042
1043 tmp = uint16_array_to_str(step_layout->node_cnt, step_layout->tasks);
1044 slurm_step_layout_destroy(step_layout);
1045 env_array_overwrite_het_fmt(dest, "SLURM_TASKS_PER_NODE",
1046 het_job_offset,
1047 "%s", tmp);
1048 xfree(tmp);
1049
1050 if (alloc->account) {
1051 env_array_overwrite_het_fmt(dest, "SLURM_JOB_ACCOUNT",
1052 het_job_offset, "%s",
1053 alloc->account);
1054 }
1055 if (alloc->qos) {
1056 env_array_overwrite_het_fmt(dest, "SLURM_JOB_QOS",
1057 het_job_offset,
1058 "%s", alloc->qos);
1059 }
1060 if (alloc->resv_name) {
1061 env_array_overwrite_het_fmt(dest, "SLURM_JOB_RESERVATION",
1062 het_job_offset, "%s",
1063 alloc->resv_name);
1064 }
1065
1066 if (alloc->env_size) { /* Used to set Burst Buffer environment */
1067 for (i = 0; i < alloc->env_size; i++) {
1068 tmp = xstrdup(alloc->environment[i]);
1069 key = tmp;
1070 value = strchr(tmp, '=');
1071 if (value) {
1072 value[0] = '\0';
1073 value++;
1074 env_array_overwrite_het_fmt(dest, key,
1075 het_job_offset,
1076 "%s",
1077 value);
1078 }
1079 xfree(tmp);
1080 }
1081 }
1082
1083 if (desc->acctg_freq) {
1084 env_array_overwrite_het_fmt(dest, "SLURM_ACCTG_FREQ",
1085 het_job_offset, "%s",
1086 desc->acctg_freq);
1087 };
1088
1089 if (desc->network) {
1090 env_array_overwrite_het_fmt(dest, "SLURM_NETWORK",
1091 het_job_offset, "%s",
1092 desc->network);
1093 }
1094
1095 if (desc->overcommit != NO_VAL8) {
1096 env_array_overwrite_het_fmt(dest, "SLURM_OVERCOMMIT",
1097 het_job_offset, "%u",
1098 desc->overcommit);
1099 }
1100
1101 /* Add default task counts for srun, if not already set */
1102 if (desc->bitflags & JOB_NTASKS_SET) {
1103 env_array_overwrite_het_fmt(dest, "SLURM_NTASKS",
1104 het_job_offset,
1105 "%d", desc->num_tasks);
1106 /* maintain for old scripts */
1107 env_array_overwrite_het_fmt(dest, "SLURM_NPROCS",
1108 het_job_offset,
1109 "%d", desc->num_tasks);
1110 }
1111 if (desc->bitflags & JOB_CPUS_SET) {
1112 env_array_overwrite_het_fmt(dest, "SLURM_CPUS_PER_TASK",
1113 het_job_offset, "%d",
1114 desc->cpus_per_task);
1115 }
1116 if (desc->ntasks_per_node && (desc->ntasks_per_node != NO_VAL16)) {
1117 env_array_overwrite_het_fmt(dest, "SLURM_NTASKS_PER_NODE",
1118 het_job_offset, "%d",
1119 desc->ntasks_per_node);
1120 }
1121
1122 return rc;
1123 }
1124
1125 /*
1126 * Set in "dest" the environment variables strings relevant to a Slurm batch
1127 * job allocation, overwriting any environment variables of the same name.
1128 * If the address pointed to by "dest" is NULL, memory will automatically be
1129 * xmalloc'ed. The array is terminated by a NULL pointer, and thus is
1130 * suitable for use by execle() and other env_array_* functions.
1131 *
1132 * Sets the variables:
1133 * SLURM_CLUSTER_NAME
1134 * SLURM_JOB_ID
1135 * SLURM_JOB_NUM_NODES
1136 * SLURM_JOB_NODELIST
1137 * SLURM_JOB_CPUS_PER_NODE
1138 * SLURM_NODE_ALIASES
1139 * ENVIRONMENT=BATCH
1140 * HOSTNAME
1141 *
1142 * Sets OBSOLETE variables (needed for MPI, do not remove):
1143 * SLURM_JOBID
1144 * SLURM_NNODES
1145 * SLURM_NODELIST
1146 * SLURM_NTASKS
1147 * SLURM_TASKS_PER_NODE
1148 */
1149 extern int
env_array_for_batch_job(char *** dest,const batch_job_launch_msg_t * batch,const char * node_name)1150 env_array_for_batch_job(char ***dest, const batch_job_launch_msg_t *batch,
1151 const char *node_name)
1152 {
1153 char *tmp = NULL, *cluster_name;
1154 uint32_t num_cpus = 0;
1155 int i;
1156 slurm_step_layout_t *step_layout = NULL;
1157 uint16_t cpus_per_task;
1158 uint32_t task_dist;
1159 slurm_step_layout_req_t step_layout_req;
1160 uint16_t cpus_per_task_array[1];
1161 uint32_t cpus_task_reps[1];
1162
1163 if (!batch)
1164 return SLURM_ERROR;
1165
1166 memset(&step_layout_req, 0, sizeof(slurm_step_layout_req_t));
1167 step_layout_req.num_tasks = batch->ntasks;
1168
1169 /*
1170 * There is no explicit node count in the batch structure,
1171 * so we need to calculate the node count.
1172 */
1173 for (i = 0; i < batch->num_cpu_groups; i++) {
1174 step_layout_req.num_hosts += batch->cpu_count_reps[i];
1175 num_cpus += batch->cpu_count_reps[i] * batch->cpus_per_node[i];
1176 }
1177
1178 cluster_name = slurm_get_cluster_name();
1179 if (cluster_name) {
1180 env_array_overwrite_fmt(dest, "SLURM_CLUSTER_NAME", "%s",
1181 cluster_name);
1182 xfree(cluster_name);
1183 }
1184
1185 env_array_overwrite_fmt(dest, "SLURM_JOB_ID", "%u", batch->job_id);
1186 env_array_overwrite_fmt(dest, "SLURM_JOB_NUM_NODES", "%u",
1187 step_layout_req.num_hosts);
1188 if (batch->array_task_id != NO_VAL) {
1189 env_array_overwrite_fmt(dest, "SLURM_ARRAY_JOB_ID", "%u",
1190 batch->array_job_id);
1191 env_array_overwrite_fmt(dest, "SLURM_ARRAY_TASK_ID", "%u",
1192 batch->array_task_id);
1193 }
1194 env_array_overwrite_fmt(dest, "SLURM_JOB_NODELIST", "%s", batch->nodes);
1195 env_array_overwrite_fmt(dest, "SLURM_JOB_PARTITION", "%s",
1196 batch->partition);
1197 env_array_overwrite_fmt(dest, "SLURM_NODE_ALIASES", "%s",
1198 batch->alias_list);
1199
1200 tmp = uint32_compressed_to_str(batch->num_cpu_groups,
1201 batch->cpus_per_node,
1202 batch->cpu_count_reps);
1203 env_array_overwrite_fmt(dest, "SLURM_JOB_CPUS_PER_NODE", "%s", tmp);
1204 xfree(tmp);
1205
1206 env_array_overwrite_fmt(dest, "ENVIRONMENT", "BATCH");
1207 if (node_name)
1208 env_array_overwrite_fmt(dest, "HOSTNAME", "%s", node_name);
1209
1210 /* OBSOLETE, but needed by MPI, do not remove */
1211 env_array_overwrite_fmt(dest, "SLURM_JOBID", "%u", batch->job_id);
1212 env_array_overwrite_fmt(dest, "SLURM_NNODES", "%u",
1213 step_layout_req.num_hosts);
1214 env_array_overwrite_fmt(dest, "SLURM_NODELIST", "%s", batch->nodes);
1215
1216 if ((batch->cpus_per_task != 0) &&
1217 (batch->cpus_per_task != NO_VAL16))
1218 cpus_per_task = batch->cpus_per_task;
1219 else
1220 cpus_per_task = 1; /* default value */
1221 cpus_per_task_array[0] = cpus_per_task;
1222 cpus_task_reps[0] = step_layout_req.num_hosts;
1223
1224 /* Only overwrite this if it is set. They are set in
1225 * sbatch directly and could have changed. */
1226 if (getenvp(*dest, "SLURM_CPUS_PER_TASK"))
1227 env_array_overwrite_fmt(dest, "SLURM_CPUS_PER_TASK", "%u",
1228 cpus_per_task);
1229
1230 if (step_layout_req.num_tasks) {
1231 env_array_append_fmt(dest, "SLURM_NTASKS", "%u",
1232 step_layout_req.num_tasks);
1233 /* keep around for old scripts */
1234 env_array_append_fmt(dest, "SLURM_NPROCS", "%u",
1235 step_layout_req.num_tasks);
1236 } else {
1237 step_layout_req.num_tasks = num_cpus / cpus_per_task;
1238 }
1239
1240 if ((step_layout_req.node_list =
1241 getenvp(*dest, "SLURM_ARBITRARY_NODELIST"))) {
1242 task_dist = SLURM_DIST_ARBITRARY;
1243 } else {
1244 step_layout_req.node_list = batch->nodes;
1245 task_dist = SLURM_DIST_BLOCK;
1246 }
1247
1248 step_layout_req.cpus_per_node = batch->cpus_per_node;
1249 step_layout_req.cpu_count_reps = batch->cpu_count_reps;
1250 step_layout_req.cpus_per_task = cpus_per_task_array;
1251 step_layout_req.cpus_task_reps = cpus_task_reps;
1252 step_layout_req.task_dist = task_dist;
1253 step_layout_req.plane_size = NO_VAL16;
1254
1255 if (!(step_layout = slurm_step_layout_create(&step_layout_req)))
1256 return SLURM_ERROR;
1257
1258 tmp = uint16_array_to_str(step_layout->node_cnt, step_layout->tasks);
1259 slurm_step_layout_destroy(step_layout);
1260 env_array_overwrite_fmt(dest, "SLURM_TASKS_PER_NODE", "%s", tmp);
1261 xfree(tmp);
1262
1263 if (batch->pn_min_memory & MEM_PER_CPU) {
1264 uint64_t tmp_mem = batch->pn_min_memory & (~MEM_PER_CPU);
1265 env_array_overwrite_fmt(dest, "SLURM_MEM_PER_CPU", "%"PRIu64"",
1266 tmp_mem);
1267 } else if (batch->pn_min_memory) {
1268 uint64_t tmp_mem = batch->pn_min_memory;
1269 env_array_overwrite_fmt(dest, "SLURM_MEM_PER_NODE", "%"PRIu64"",
1270 tmp_mem);
1271 }
1272
1273 /* Set the SLURM_JOB_ACCOUNT, SLURM_JOB_QOS
1274 * and SLURM_JOB_RESERVATION if set by
1275 * the controller.
1276 */
1277 if (batch->account) {
1278 env_array_overwrite_fmt(dest,
1279 "SLURM_JOB_ACCOUNT",
1280 "%s",
1281 batch->account);
1282 }
1283
1284 if (batch->qos) {
1285 env_array_overwrite_fmt(dest,
1286 "SLURM_JOB_QOS",
1287 "%s",
1288 batch->qos);
1289 }
1290
1291 if (batch->resv_name) {
1292 env_array_overwrite_fmt(dest,
1293 "SLURM_JOB_RESERVATION",
1294 "%s",
1295 batch->resv_name);
1296 }
1297
1298 return SLURM_SUCCESS;
1299 }
1300
1301 /*
1302 * Set in "dest" the environment variables relevant to a Slurm job step,
1303 * overwriting any environment variables of the same name. If the address
1304 * pointed to by "dest" is NULL, memory will automatically be xmalloc'ed.
1305 * The array is terminated by a NULL pointer, and thus is suitable for
1306 * use by execle() and other env_array_* functions. If preserve_env is
1307 * true, the variables SLURM_NNODES, SLURM_NTASKS and SLURM_TASKS_PER_NODE
1308 * remain unchanged.
1309 *
1310 * Sets variables:
1311 * SLURM_STEP_ID
1312 * SLURM_STEP_NUM_NODES
1313 * SLURM_STEP_NUM_TASKS
1314 * SLURM_STEP_TASKS_PER_NODE
1315 * SLURM_STEP_LAUNCHER_PORT
1316 * SLURM_STEP_LAUNCHER_IPADDR
1317 * SLURM_STEP_RESV_PORTS
1318 * SLURM_STEP_SUB_MP
1319 *
1320 * Sets OBSOLETE variables:
1321 * SLURM_STEPID
1322 * SLURM_NNODES
1323 * SLURM_NTASKS
1324 * SLURM_NODELIST
1325 * SLURM_TASKS_PER_NODE
1326 * SLURM_SRUN_COMM_PORT
1327 * SLURM_LAUNCH_NODE_IPADDR
1328 *
1329 */
1330 extern void
env_array_for_step(char *** dest,const job_step_create_response_msg_t * step,launch_tasks_request_msg_t * launch,uint16_t launcher_port,bool preserve_env)1331 env_array_for_step(char ***dest,
1332 const job_step_create_response_msg_t *step,
1333 launch_tasks_request_msg_t *launch,
1334 uint16_t launcher_port,
1335 bool preserve_env)
1336 {
1337 char *tmp, *tpn;
1338 uint32_t node_cnt, task_cnt;
1339
1340 if (!step || !launch)
1341 return;
1342
1343 node_cnt = step->step_layout->node_cnt;
1344 env_array_overwrite_fmt(dest, "SLURM_STEP_ID", "%u", step->job_step_id);
1345
1346 if (launch->het_job_node_list) {
1347 tmp = launch->het_job_node_list;
1348 env_array_overwrite_fmt(dest, "SLURM_NODELIST", "%s", tmp);
1349 env_array_overwrite_fmt(dest, "SLURM_JOB_NODELIST", "%s", tmp);
1350 } else {
1351 tmp = step->step_layout->node_list;
1352 env_array_append_fmt(dest, "SLURM_JOB_NODELIST", "%s", tmp);
1353 }
1354 env_array_overwrite_fmt(dest, "SLURM_STEP_NODELIST", "%s", tmp);
1355
1356 if (launch->het_job_nnodes && (launch->het_job_nnodes != NO_VAL))
1357 node_cnt = launch->het_job_nnodes;
1358 env_array_overwrite_fmt(dest, "SLURM_STEP_NUM_NODES", "%u", node_cnt);
1359
1360 if (launch->het_job_ntasks && (launch->het_job_ntasks != NO_VAL))
1361 task_cnt = launch->het_job_ntasks;
1362 else
1363 task_cnt = step->step_layout->task_cnt;
1364 env_array_overwrite_fmt(dest, "SLURM_STEP_NUM_TASKS", "%u", task_cnt);
1365
1366 if (launch->het_job_task_cnts) {
1367 tpn = uint16_array_to_str(launch->het_job_nnodes,
1368 launch->het_job_task_cnts);
1369 env_array_overwrite_fmt(dest, "SLURM_TASKS_PER_NODE", "%s",
1370 tpn);
1371 env_array_overwrite_fmt(dest, "SLURM_NNODES", "%u",
1372 launch->het_job_nnodes);
1373 } else {
1374 tpn = uint16_array_to_str(step->step_layout->node_cnt,
1375 step->step_layout->tasks);
1376 if (!preserve_env) {
1377 env_array_overwrite_fmt(dest, "SLURM_TASKS_PER_NODE",
1378 "%s", tpn);
1379 }
1380 }
1381 env_array_overwrite_fmt(dest, "SLURM_STEP_TASKS_PER_NODE", "%s", tpn);
1382
1383 env_array_overwrite_fmt(dest, "SLURM_STEP_LAUNCHER_PORT",
1384 "%hu", launcher_port);
1385 if (step->resv_ports) {
1386 env_array_overwrite_fmt(dest, "SLURM_STEP_RESV_PORTS",
1387 "%s", step->resv_ports);
1388 }
1389
1390 /* OBSOLETE, but needed by some MPI implementations, do not remove */
1391 env_array_overwrite_fmt(dest, "SLURM_STEPID", "%u", step->job_step_id);
1392 if (!preserve_env) {
1393 env_array_overwrite_fmt(dest, "SLURM_NNODES", "%u", node_cnt);
1394 env_array_overwrite_fmt(dest, "SLURM_NTASKS", "%u", task_cnt);
1395 /* keep around for old scripts */
1396 env_array_overwrite_fmt(dest, "SLURM_NPROCS",
1397 "%u", step->step_layout->task_cnt);
1398 }
1399 env_array_overwrite_fmt(dest, "SLURM_SRUN_COMM_PORT",
1400 "%hu", launcher_port);
1401
1402 xfree(tpn);
1403 }
1404
1405 /*
1406 * Enviroment variables set elsewhere
1407 * ----------------------------------
1408 *
1409 * Set by slurmstepd:
1410 * SLURM_STEP_NODEID
1411 * SLURM_STEP_PROCID
1412 * SLURM_STEP_LOCALID
1413 *
1414 * OBSOLETE set by slurmstepd:
1415 * SLURM_NODEID
1416 * SLURM_PROCID
1417 * SLURM_LOCALID
1418 */
1419
1420 /***********************************************************************
1421 * Environment variable array support functions
1422 ***********************************************************************/
1423
1424 /*
1425 * Return an empty environment variable array (contains a single
1426 * pointer to NULL).
1427 */
env_array_create(void)1428 char **env_array_create(void)
1429 {
1430 char **env_array;
1431
1432 env_array = xmalloc(sizeof(char *));
1433 env_array[0] = NULL;
1434
1435 return env_array;
1436 }
1437
_env_array_update(char *** array_ptr,const char * name,const char * value,bool over_write)1438 static int _env_array_update(char ***array_ptr, const char *name,
1439 const char *value, bool over_write)
1440 {
1441 char **ep = NULL;
1442 char *str = NULL;
1443
1444 if (array_ptr == NULL)
1445 return 0;
1446
1447 if (*array_ptr == NULL)
1448 *array_ptr = env_array_create();
1449
1450 ep = _find_name_in_env(*array_ptr, name);
1451 if (*ep != NULL) {
1452 if (!over_write)
1453 return 0;
1454 xfree (*ep);
1455 } else {
1456 ep = _extend_env(array_ptr);
1457 }
1458
1459 xstrfmtcat(str, "%s=%s", name, value);
1460 *ep = str;
1461
1462 return 1;
1463 }
1464
1465 /*
1466 * Append a single environment variable to an environment variable array,
1467 * if and only if a variable by that name does not already exist in the
1468 * array.
1469 *
1470 * "value_fmt" supports printf-style formatting.
1471 *
1472 * Return 1 on success, and 0 on error.
1473 */
env_array_append_fmt(char *** array_ptr,const char * name,const char * value_fmt,...)1474 int env_array_append_fmt(char ***array_ptr, const char *name,
1475 const char *value_fmt, ...)
1476 {
1477 int rc;
1478 char *value;
1479 va_list ap;
1480
1481 value = xmalloc(ENV_BUFSIZE);
1482 va_start(ap, value_fmt);
1483 vsnprintf (value, ENV_BUFSIZE, value_fmt, ap);
1484 va_end(ap);
1485 rc = env_array_append(array_ptr, name, value);
1486 xfree(value);
1487
1488 return rc;
1489 }
1490
1491 /*
1492 * Append a single environment variable to an environment variable array,
1493 * if and only if a variable by that name does not already exist in the
1494 * array.
1495 *
1496 * Return 1 on success, and 0 on error.
1497 */
env_array_append(char *** array_ptr,const char * name,const char * value)1498 int env_array_append(char ***array_ptr, const char *name,
1499 const char *value)
1500 {
1501 return _env_array_update(array_ptr, name, value, false);
1502 }
1503
1504 /*
1505 * Append a single environment variable to an environment variable array
1506 * if a variable by that name does not already exist. If a variable
1507 * by the same name is found in the array, it is overwritten with the
1508 * new value.
1509 *
1510 * "value_fmt" supports printf-style formatting.
1511 *
1512 * Return 1 on success, and 0 on error.
1513 */
env_array_overwrite_fmt(char *** array_ptr,const char * name,const char * value_fmt,...)1514 int env_array_overwrite_fmt(char ***array_ptr, const char *name,
1515 const char *value_fmt, ...)
1516 {
1517 int rc;
1518 char *value;
1519 va_list ap;
1520
1521 value = xmalloc(ENV_BUFSIZE);
1522 va_start(ap, value_fmt);
1523 vsnprintf (value, ENV_BUFSIZE, value_fmt, ap);
1524 va_end(ap);
1525 rc = env_array_overwrite(array_ptr, name, value);
1526 xfree(value);
1527
1528 return rc;
1529 }
1530
1531 /*
1532 * Append a single environment variable to an environment variable array
1533 * if a variable by that name does not already exist. If a variable
1534 * by the same name is found in the array, it is overwritten with the
1535 * new value.
1536 *
1537 * "value_fmt" supports printf-style formatting.
1538 *
1539 * Return 1 on success, and 0 on error.
1540 */
env_array_overwrite_het_fmt(char *** array_ptr,const char * name,int het_job_offset,const char * value_fmt,...)1541 int env_array_overwrite_het_fmt(char ***array_ptr, const char *name,
1542 int het_job_offset,
1543 const char *value_fmt, ...)
1544 {
1545 int rc;
1546 char *value;
1547 va_list ap;
1548
1549 value = xmalloc(ENV_BUFSIZE);
1550 va_start(ap, value_fmt);
1551 vsnprintf (value, ENV_BUFSIZE, value_fmt, ap);
1552 va_end(ap);
1553 if (het_job_offset != -1) {
1554 char *het_comp_name = NULL;
1555 /* Continue support for old hetjob terminology. */
1556 xstrfmtcat(het_comp_name, "%s_PACK_GROUP_%d", name,
1557 het_job_offset);
1558 rc = env_array_overwrite(array_ptr, het_comp_name, value);
1559 xfree(het_comp_name);
1560 xstrfmtcat(het_comp_name, "%s_HET_GROUP_%d", name,
1561 het_job_offset);
1562 rc = env_array_overwrite(array_ptr, het_comp_name, value);
1563 xfree(het_comp_name);
1564 } else
1565 rc = env_array_overwrite(array_ptr, name, value);
1566 xfree(value);
1567
1568 return rc;
1569 }
1570
1571 /*
1572 * Append a single environment variable to an environment variable array
1573 * if a variable by that name does not already exist. If a variable
1574 * by the same name is found in the array, it is overwritten with the
1575 * new value.
1576 *
1577 * Return 1 on success, and 0 on error.
1578 */
env_array_overwrite(char *** array_ptr,const char * name,const char * value)1579 int env_array_overwrite(char ***array_ptr, const char *name,
1580 const char *value)
1581 {
1582 return _env_array_update(array_ptr, name, value, true);
1583 }
1584
1585 /*
1586 * Copy env_array must be freed by env_array_free
1587 */
env_array_copy(const char ** array)1588 char **env_array_copy(const char **array)
1589 {
1590 char **ptr = NULL;
1591
1592 env_array_merge(&ptr, array);
1593
1594 return ptr;
1595 }
1596
1597 /*
1598 * Free the memory used by an environment variable array.
1599 */
env_array_free(char ** env_array)1600 void env_array_free(char **env_array)
1601 {
1602 char **ptr;
1603
1604 if (env_array == NULL)
1605 return;
1606
1607 for (ptr = env_array; *ptr != NULL; ptr++) {
1608 xfree(*ptr);
1609 }
1610 xfree(env_array);
1611 }
1612
1613 /*
1614 * Given an environment variable "name=value" string,
1615 * copy the name portion into the "name" buffer, and the
1616 * value portion into the "value" buffer.
1617 *
1618 * Return 1 on success, 0 on failure.
1619 */
_env_array_entry_splitter(const char * entry,char * name,int name_len,char * value,int value_len)1620 static int _env_array_entry_splitter(const char *entry,
1621 char *name, int name_len,
1622 char *value, int value_len)
1623 {
1624 char *ptr;
1625 int len;
1626
1627 ptr = xstrchr(entry, '=');
1628 if (ptr == NULL) /* Bad parsing, no '=' found */
1629 return 0;
1630 /*
1631 * need to consider the byte pointed by ptr.
1632 * example: entry = 0x0 = "a=b"
1633 * ptr = 0x1
1634 * len = ptr - entry + 1 = 2 because we need
1635 * 2 characters to store 'a\0'
1636 */
1637 len = ptr - entry + 1;
1638 if (len > name_len)
1639 return 0;
1640 strlcpy(name, entry, len);
1641
1642 ptr++;
1643 /* account for '\0' here */
1644 len = strlen(ptr) + 1;
1645 if (len > value_len)
1646 return 0;
1647 strlcpy(value, ptr, len);
1648
1649 return 1;
1650 }
1651
1652 /*
1653 * Work similarly to putenv() (from C stdlib), but uses setenv()
1654 * under the covers. This avoids having pointers from the global
1655 * array "environ" into "string".
1656 *
1657 * Return 1 on success, 0 on failure.
1658 */
_env_array_putenv(const char * string)1659 static int _env_array_putenv(const char *string)
1660 {
1661 int rc = 0;
1662 char name[256], *value;
1663
1664 value = xmalloc(ENV_BUFSIZE);
1665 if ((_env_array_entry_splitter(string, name, sizeof(name),
1666 value, ENV_BUFSIZE)) &&
1667 (setenv(name, value, 1) != -1))
1668 rc = 1;
1669
1670 xfree(value);
1671 return rc;
1672 }
1673
1674 /*
1675 * Set all of the environment variables in a supplied environment
1676 * variable array.
1677 */
env_array_set_environment(char ** env_array)1678 void env_array_set_environment(char **env_array)
1679 {
1680 char **ptr;
1681
1682 if (env_array == NULL)
1683 return;
1684
1685 for (ptr = env_array; *ptr != NULL; ptr++) {
1686 _env_array_putenv(*ptr);
1687 }
1688 }
1689
1690 /*
1691 * Unset all of the environment variables in a user's current
1692 * environment.
1693 *
1694 * (Note: becuae the environ array is decrementing with each
1695 * unsetenv, only increment the ptr on a failure to unset.)
1696 */
env_unset_environment(void)1697 void env_unset_environment(void)
1698 {
1699 extern char **environ;
1700 char **ptr;
1701 char name[256], *value;
1702
1703 value = xmalloc(ENV_BUFSIZE);
1704 for (ptr = (char **)environ; *ptr != NULL; ) {
1705 if ((_env_array_entry_splitter(*ptr, name, sizeof(name),
1706 value, ENV_BUFSIZE)) &&
1707 (unsetenv(name) != -1))
1708 ;
1709 else
1710 ptr++;
1711 }
1712 xfree(value);
1713 }
1714
1715 /*
1716 * Merge all of the environment variables in src_array into the
1717 * array dest_array. Any variables already found in dest_array
1718 * will be overwritten with the value from src_array.
1719 */
env_array_merge(char *** dest_array,const char ** src_array)1720 void env_array_merge(char ***dest_array, const char **src_array)
1721 {
1722 char **ptr;
1723 char name[256], *value;
1724
1725 if (src_array == NULL)
1726 return;
1727
1728 value = xmalloc(ENV_BUFSIZE);
1729 for (ptr = (char **)src_array; *ptr != NULL; ptr++) {
1730 if (_env_array_entry_splitter(*ptr, name, sizeof(name),
1731 value, ENV_BUFSIZE))
1732 env_array_overwrite(dest_array, name, value);
1733 }
1734 xfree(value);
1735 }
1736
1737 /*
1738 * Merge the environment variables in src_array beginning with "SLURM" into the
1739 * array dest_array. Any variables already found in dest_array will be
1740 * overwritten with the value from src_array.
1741 */
env_array_merge_slurm(char *** dest_array,const char ** src_array)1742 void env_array_merge_slurm(char ***dest_array, const char **src_array)
1743 {
1744 char **ptr;
1745 char name[256], *value;
1746
1747 if (src_array == NULL)
1748 return;
1749
1750 value = xmalloc(ENV_BUFSIZE);
1751 for (ptr = (char **)src_array; *ptr != NULL; ptr++) {
1752 if (_env_array_entry_splitter(*ptr, name, sizeof(name),
1753 value, ENV_BUFSIZE) &&
1754 (xstrncmp(name, "SLURM", 5) == 0))
1755 env_array_overwrite(dest_array, name, value);
1756 }
1757 xfree(value);
1758 }
1759
1760 /*
1761 * Strip out trailing carriage returns and newlines
1762 */
_strip_cr_nl(char * line)1763 static void _strip_cr_nl(char *line)
1764 {
1765 int len = strlen(line);
1766 char *ptr;
1767
1768 for (ptr = line+len-1; ptr >= line; ptr--) {
1769 if (*ptr=='\r' || *ptr=='\n') {
1770 *ptr = '\0';
1771 } else {
1772 return;
1773 }
1774 }
1775 }
1776
1777 /* Return the net count of curly brackets in a string
1778 * '{' adds one and '}' subtracts one (zero means it is balanced).
1779 * Special case: return -1 if no open brackets are found */
_bracket_cnt(char * value)1780 static int _bracket_cnt(char *value)
1781 {
1782 int count = 0, i;
1783 for (i=0; value[i]; i++) {
1784 if (value[i] == '{')
1785 count++;
1786 else if (value[i] == '}')
1787 count--;
1788 }
1789 return count;
1790 }
1791
1792 /*
1793 * Load user environment from a specified file or file descriptor.
1794 *
1795 * This will read in a user specified file or fd, that is invoked
1796 * via the --export-file option in sbatch. The NAME=value entries must
1797 * be NULL separated to support special characters in the environment
1798 * definitions.
1799 *
1800 * (Note: This is being added to a minor release. For the
1801 * next major release, it might be a consideration to merge
1802 * this functionality with that of load_env_cache and update
1803 * env_cache_builder to use the NULL character.)
1804 */
env_array_from_file(const char * fname)1805 char **env_array_from_file(const char *fname)
1806 {
1807 char *buf = NULL, *ptr = NULL, *eptr = NULL;
1808 char *value, *p;
1809 char **env = NULL;
1810 char name[256];
1811 int buf_size = BUFSIZ, buf_left;
1812 int file_size = 0, tmp_size;
1813 int separator = '\0';
1814 int fd;
1815
1816 if (!fname)
1817 return NULL;
1818
1819 /*
1820 * If file name is a numeric value, then it is assumed to be a
1821 * file descriptor.
1822 */
1823 fd = (int)strtol(fname, &p, 10);
1824 if ((*p != '\0') || (fd < 3) || (fd > sysconf(_SC_OPEN_MAX)) ||
1825 (fcntl(fd, F_GETFL) < 0)) {
1826 fd = open(fname, O_RDONLY);
1827 if (fd == -1) {
1828 error("Could not open user environment file %s", fname);
1829 return NULL;
1830 }
1831 verbose("Getting environment variables from %s", fname);
1832 } else
1833 verbose("Getting environment variables from fd %d", fd);
1834
1835 /*
1836 * Read in the user's environment data.
1837 */
1838 buf = ptr = xmalloc(buf_size);
1839 buf_left = buf_size;
1840 while ((tmp_size = read(fd, ptr, buf_left))) {
1841 if (tmp_size < 0) {
1842 if (errno == EINTR)
1843 continue;
1844 error("read(environment_file): %m");
1845 break;
1846 }
1847 buf_left -= tmp_size;
1848 file_size += tmp_size;
1849 if (buf_left == 0) {
1850 buf_size += BUFSIZ;
1851 xrealloc(buf, buf_size);
1852 }
1853 ptr = buf + file_size;
1854 buf_left = buf_size - file_size;
1855 }
1856 close(fd);
1857
1858 /*
1859 * Parse the buffer into individual environment variable names
1860 * and build the environment.
1861 */
1862 env = env_array_create();
1863 value = xmalloc(ENV_BUFSIZE);
1864 for (ptr = buf; ; ptr = eptr+1) {
1865 eptr = strchr(ptr, separator);
1866 if ((ptr == eptr) || (eptr == NULL))
1867 break;
1868 if (_env_array_entry_splitter(ptr, name, sizeof(name),
1869 value, ENV_BUFSIZE) &&
1870 (!_discard_env(name, value))) {
1871 /*
1872 * Unset the SLURM_SUBMIT_DIR if it is defined so
1873 * that this new value does not get overwritten
1874 * in the subsequent call to env_array_merge().
1875 */
1876 if (xstrcmp(name, "SLURM_SUBMIT_DIR") == 0)
1877 unsetenv(name);
1878 env_array_overwrite(&env, name, value);
1879 }
1880 }
1881 xfree(buf);
1882 xfree(value);
1883
1884 return env;
1885 }
1886
1887 /*
1888 * Load user environment from a cache file located in
1889 * <state_save_location>/env_username
1890 */
_load_env_cache(const char * username)1891 static char **_load_env_cache(const char *username)
1892 {
1893 char *state_save_loc, fname[MAXPATHLEN];
1894 char *line, name[256], *value;
1895 char **env = NULL;
1896 FILE *fp;
1897 int i;
1898
1899 state_save_loc = slurm_get_state_save_location();
1900 i = snprintf(fname, sizeof(fname), "%s/env_cache/%s", state_save_loc,
1901 username);
1902 xfree(state_save_loc);
1903 if (i < 0) {
1904 error("Environment cache filename overflow");
1905 return NULL;
1906 }
1907 if (!(fp = fopen(fname, "r"))) {
1908 error("Could not open user environment cache at %s: %m",
1909 fname);
1910 return NULL;
1911 }
1912
1913 verbose("Getting cached environment variables at %s", fname);
1914 env = env_array_create();
1915 line = xmalloc(ENV_BUFSIZE);
1916 value = xmalloc(ENV_BUFSIZE);
1917 while (1) {
1918 if (!fgets(line, ENV_BUFSIZE, fp))
1919 break;
1920 _strip_cr_nl(line);
1921 if (_env_array_entry_splitter(line, name, sizeof(name),
1922 value, ENV_BUFSIZE) &&
1923 (!_discard_env(name, value))) {
1924 if (value[0] == '(') {
1925 /* This is a bash function.
1926 * It may span multiple lines */
1927 while (_bracket_cnt(value) > 0) {
1928 if (!fgets(line, ENV_BUFSIZE, fp))
1929 break;
1930 _strip_cr_nl(line);
1931 if ((strlen(value) + strlen(line)) >
1932 (ENV_BUFSIZE - 2))
1933 break;
1934 strcat(value, "\n");
1935 strcat(value, line);
1936 }
1937 }
1938 env_array_overwrite(&env, name, value);
1939 }
1940 }
1941 xfree(line);
1942 xfree(value);
1943
1944 fclose(fp);
1945 return env;
1946 }
1947
1948 /*
1949 * Return an array of strings representing the specified user's default
1950 * environment variables following a two-prongged approach.
1951 * 1. Execute (more or less): "/bin/su - <username> -c /usr/bin/env"
1952 * Depending upon the user's login scripts, this may take a very
1953 * long time to complete or possibly never return
1954 * 2. Load the user environment from a cache file. This is used
1955 * in the event that option 1 times out. This only happens if no_cache isn't
1956 * set. If it is set then NULL will be returned if the normal load fails.
1957 *
1958 * timeout value is in seconds or zero for default (2 secs)
1959 * mode is 1 for short ("su <user>"), 2 for long ("su - <user>")
1960 * On error, returns NULL.
1961 *
1962 * NOTE: The calling process must have an effective uid of root for
1963 * this function to succeed.
1964 */
env_array_user_default(const char * username,int timeout,int mode,bool no_cache)1965 char **env_array_user_default(const char *username, int timeout, int mode,
1966 bool no_cache)
1967 {
1968 char *line = NULL, *last = NULL, name[MAXPATHLEN], *value, *buffer;
1969 char **env = NULL;
1970 char *starttoken = "XXXXSLURMSTARTPARSINGHEREXXXX";
1971 char *stoptoken = "XXXXSLURMSTOPPARSINGHEREXXXXX";
1972 char cmdstr[256], *env_loc = NULL;
1973 char *stepd_path = NULL;
1974 int fd1, fd2, fildes[2], found, fval, len, rc, timeleft;
1975 int buf_read, buf_rem, config_timeout;
1976 pid_t child;
1977 struct timeval begin, now;
1978 struct pollfd ufds;
1979 struct stat buf;
1980
1981 if (geteuid() != (uid_t)0) {
1982 error("SlurmdUser must be root to use --get-user-env");
1983 return NULL;
1984 }
1985
1986 config_timeout = slurm_get_env_timeout();
1987
1988 if (config_timeout == 0) /* just read directly from cache */
1989 return _load_env_cache(username);
1990
1991 if (stat(SUCMD, &buf))
1992 fatal("Could not locate command: "SUCMD);
1993 if (stat("/bin/echo", &buf))
1994 fatal("Could not locate command: /bin/echo");
1995 stepd_path = slurm_get_stepd_loc();
1996 if (stat(stepd_path, &buf) == 0) {
1997 xstrcat(stepd_path, " getenv");
1998 env_loc = stepd_path;
1999 } else if (stat("/bin/env", &buf) == 0)
2000 env_loc = "/bin/env";
2001 else if (stat("/usr/bin/env", &buf) == 0)
2002 env_loc = "/usr/bin/env";
2003 else
2004 fatal("Could not location command: env");
2005 snprintf(cmdstr, sizeof(cmdstr),
2006 "/bin/echo; /bin/echo; /bin/echo; "
2007 "/bin/echo %s; %s; /bin/echo %s",
2008 starttoken, env_loc, stoptoken);
2009 xfree(stepd_path);
2010
2011 if (pipe(fildes) < 0) {
2012 fatal("pipe: %m");
2013 return NULL;
2014 }
2015
2016 child = fork();
2017 if (child == -1) {
2018 fatal("fork: %m");
2019 return NULL;
2020 }
2021 if (child == 0) {
2022 setenv("ENVIRONMENT", "BATCH", 1);
2023 setpgid(0, 0);
2024 close(0);
2025 if ((fd1 = open("/dev/null", O_RDONLY)) == -1)
2026 error("%s: open(/dev/null): %m", __func__);
2027 dup2(fildes[1], 1);
2028 close(2);
2029 if ((fd2 = open("/dev/null", O_WRONLY)) == -1)
2030 error("%s: open(/dev/null): %m", __func__);
2031 if (mode == 1)
2032 execl(SUCMD, "su", username, "-c", cmdstr, NULL);
2033 else if (mode == 2)
2034 execl(SUCMD, "su", "-", username, "-c", cmdstr, NULL);
2035 else { /* Default system configuration */
2036 #ifdef LOAD_ENV_NO_LOGIN
2037 execl(SUCMD, "su", username, "-c", cmdstr, NULL);
2038 #else
2039 execl(SUCMD, "su", "-", username, "-c", cmdstr, NULL);
2040 #endif
2041 }
2042 if (fd1 >= 0) /* Avoid Coverity resource leak notification */
2043 (void) close(fd1);
2044 if (fd2 >= 0) /* Avoid Coverity resource leak notification */
2045 (void) close(fd2);
2046 _exit(1);
2047 }
2048
2049 close(fildes[1]);
2050 if ((fval = fcntl(fildes[0], F_GETFL, 0)) < 0)
2051 error("fcntl(F_GETFL) failed: %m");
2052 else if (fcntl(fildes[0], F_SETFL, fval | O_NONBLOCK) < 0)
2053 error("fcntl(F_SETFL) failed: %m");
2054
2055 gettimeofday(&begin, NULL);
2056 ufds.fd = fildes[0];
2057 ufds.events = POLLIN;
2058
2059 /* Read all of the output from /bin/su into buffer */
2060 if (timeout == 0)
2061 timeout = config_timeout; /* != 0 test above */
2062 found = 0;
2063 buf_read = 0;
2064 buffer = xmalloc(ENV_BUFSIZE);
2065 while (1) {
2066 gettimeofday(&now, NULL);
2067 timeleft = timeout * 1000;
2068 timeleft -= (now.tv_sec - begin.tv_sec) * 1000;
2069 timeleft -= (now.tv_usec - begin.tv_usec) / 1000;
2070 if (timeleft <= 0) {
2071 verbose("timeout waiting for "SUCMD" to complete");
2072 kill(-child, 9);
2073 break;
2074 }
2075 if ((rc = poll(&ufds, 1, timeleft)) <= 0) {
2076 if (rc == 0) {
2077 verbose("timeout waiting for "SUCMD" to complete");
2078 break;
2079 }
2080 if ((errno == EINTR) || (errno == EAGAIN))
2081 continue;
2082 error("poll(): %m");
2083 break;
2084 }
2085 if (!(ufds.revents & POLLIN)) {
2086 if (ufds.revents & POLLHUP) { /* EOF */
2087 found = 1; /* success */
2088 } else if (ufds.revents & POLLERR) {
2089 error("POLLERR");
2090 } else {
2091 error("poll() revents=%d", ufds.revents);
2092 }
2093 break;
2094 }
2095 buf_rem = ENV_BUFSIZE - buf_read;
2096 if (buf_rem == 0) {
2097 error("buffer overflow loading env vars");
2098 break;
2099 }
2100 rc = read(fildes[0], &buffer[buf_read], buf_rem);
2101 if (rc > 0)
2102 buf_read += rc;
2103 else if (rc == 0) { /* EOF */
2104 found = 1; /* success */
2105 break;
2106 } else { /* error */
2107 error("read(env pipe): %m");
2108 break;
2109 }
2110 }
2111 close(fildes[0]);
2112 for (config_timeout=0; ; config_timeout++) {
2113 kill(-child, SIGKILL); /* Typically a no-op */
2114 if (config_timeout)
2115 sleep(1);
2116 if (waitpid(child, &rc, WNOHANG) > 0)
2117 break;
2118 if (config_timeout >= 2) {
2119 /*
2120 * Non-killable processes are indicative of file system
2121 * problems. The process will remain as a zombie, but
2122 * slurmd/salloc will not otherwise be effected.
2123 */
2124 error("Failed to kill program loading user environment");
2125 break;
2126 }
2127 }
2128
2129 if (!found) {
2130 error("Failed to load current user environment variables");
2131 xfree(buffer);
2132 return no_cache ? _load_env_cache(username) : NULL;
2133 }
2134
2135 /* First look for the start token in the output */
2136 len = strlen(starttoken);
2137 found = 0;
2138 line = strtok_r(buffer, "\n", &last);
2139 while (!found && line) {
2140 if (!xstrncmp(line, starttoken, len)) {
2141 found = 1;
2142 break;
2143 }
2144 line = strtok_r(NULL, "\n", &last);
2145 }
2146 if (!found) {
2147 error("Failed to get current user environment variables");
2148 xfree(buffer);
2149 return no_cache ? _load_env_cache(username) : NULL;
2150 }
2151
2152 /* Process environment variables until we find the stop token */
2153 len = strlen(stoptoken);
2154 found = 0;
2155 env = env_array_create();
2156 line = strtok_r(NULL, "\n", &last);
2157 value = xmalloc(ENV_BUFSIZE);
2158 while (!found && line) {
2159 if (!xstrncmp(line, stoptoken, len)) {
2160 found = 1;
2161 break;
2162 }
2163 if (_env_array_entry_splitter(line, name, sizeof(name),
2164 value, ENV_BUFSIZE) &&
2165 (!_discard_env(name, value))) {
2166 if (value[0] == '(') {
2167 /* This is a bash function.
2168 * It may span multiple lines */
2169 while (_bracket_cnt(value) > 0) {
2170 line = strtok_r(NULL, "\n", &last);
2171 if (!line)
2172 break;
2173 if ((strlen(value) + strlen(line)) >
2174 (ENV_BUFSIZE - 2))
2175 break;
2176 strcat(value, "\n");
2177 strcat(value, line);
2178 }
2179 }
2180 env_array_overwrite(&env, name, value);
2181 }
2182 line = strtok_r(NULL, "\n", &last);
2183 }
2184 xfree(value);
2185 xfree(buffer);
2186 if (!found) {
2187 error("Failed to get all user environment variables");
2188 env_array_free(env);
2189 return no_cache ? _load_env_cache(username) : NULL;
2190 }
2191
2192 return env;
2193 }
2194
2195 /*
2196 * Set TRES related env vars. Set here rather than env_array_for_job() since
2197 * we don't have array of opt values and the raw values are not stored in the
2198 * job_desc_msg_t structure (only the strings with possibly combined TRES)
2199 *
2200 * opt IN - options set by command parsing
2201 * dest IN/OUT - location to write environment variables
2202 * het_job_offset IN - component offset into hetjob, -1 if not hetjob
2203 */
set_env_from_opts(slurm_opt_t * opt,char *** dest,int het_job_offset)2204 extern void set_env_from_opts(slurm_opt_t *opt, char ***dest,
2205 int het_job_offset)
2206 {
2207 if (opt->cpus_per_gpu) {
2208 env_array_overwrite_het_fmt(dest, "SLURM_CPUS_PER_GPU",
2209 het_job_offset, "%d",
2210 opt->cpus_per_gpu);
2211 }
2212 if (opt->gpus) {
2213 env_array_overwrite_het_fmt(dest, "SLURM_GPUS",
2214 het_job_offset, "%s",
2215 opt->gpus);
2216 }
2217 if (opt->gpu_bind) {
2218 env_array_overwrite_het_fmt(dest, "SLURM_GPU_BIND",
2219 het_job_offset, "%s",
2220 opt->gpu_bind);
2221 }
2222 if (opt->gpu_freq) {
2223 env_array_overwrite_het_fmt(dest, "SLURM_GPU_FREQ",
2224 het_job_offset, "%s",
2225 opt->gpu_freq);
2226 }
2227 if (opt->gpus_per_node) {
2228 env_array_overwrite_het_fmt(dest, "SLURM_GPUS_PER_NODE",
2229 het_job_offset, "%s",
2230 opt->gpus_per_node);
2231 }
2232 if (opt->gpus_per_socket) {
2233 env_array_overwrite_het_fmt(dest, "SLURM_GPUS_PER_SOCKET",
2234 het_job_offset, "%s",
2235 opt->gpus_per_socket);
2236 }
2237 if (opt->gpus_per_task) {
2238 env_array_overwrite_het_fmt(dest, "SLURM_GPUS_PER_TASK",
2239 het_job_offset, "%s",
2240 opt->gpus_per_task);
2241 }
2242 if (opt->mem_per_gpu != NO_VAL64) {
2243 env_array_overwrite_het_fmt(dest, "SLURM_MEM_PER_GPU",
2244 het_job_offset, "%"PRIu64,
2245 opt->mem_per_gpu);
2246 }
2247 }
2248
find_quote_token(char * tmp,char * sep,char ** last)2249 extern char *find_quote_token(char *tmp, char *sep, char **last)
2250 {
2251 char *start;
2252 int i, quote_single = 0, quote_double = 0;
2253
2254 xassert(last);
2255 if (*last)
2256 start = *last;
2257 else
2258 start = tmp;
2259 if (start[0] == '\0')
2260 return NULL;
2261 for (i = 0; ; i++) {
2262 if (start[i] == '\'') {
2263 if (quote_single)
2264 quote_single--;
2265 else
2266 quote_single++;
2267 } else if (start[i] == '\"') {
2268 if (quote_double)
2269 quote_double--;
2270 else
2271 quote_double++;
2272 } else if (((start[i] == sep[0]) || (start[i] == '\0')) &&
2273 (quote_single == 0) && (quote_double == 0)) {
2274 if (((start[0] == '\'') && (start[i-1] == '\'')) ||
2275 ((start[0] == '\"') && (start[i-1] == '\"'))) {
2276 start++;
2277 i -= 2;
2278 }
2279 if (start[i] == '\0')
2280 *last = &start[i];
2281 else
2282 *last = &start[i] + 1;
2283 start[i] = '\0';
2284 return start;
2285 } else if (start[i] == '\0') {
2286 error("Improperly formed environment variable (%s)",
2287 start);
2288 *last = &start[i];
2289 return start;
2290 }
2291
2292 }
2293 }
2294