1 /*****************************************************************************\
2 * src/slurmd/slurmstepd/slurmstepd_job.c - stepd_step_rec_t routines
3 *****************************************************************************
4 * Copyright (C) 2002-2007 The Regents of the University of California.
5 * Copyright (C) 2008-2010 Lawrence Livermore National Security.
6 * Copyright (C) 2013 Intel, Inc.
7 * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
8 * Written by Mark Grondona <mgrondona@llnl.gov>.
9 * CODE-OCEC-09-009. All rights reserved.
10 *
11 * This file is part of Slurm, a resource management program.
12 * For details, see <https://slurm.schedmd.com/>.
13 * Please also read the included file: DISCLAIMER.
14 *
15 * Slurm is free software; you can redistribute it and/or modify it under
16 * the terms of the GNU General Public License as published by the Free
17 * Software Foundation; either version 2 of the License, or (at your option)
18 * any later version.
19 *
20 * In addition, as a special exception, the copyright holders give permission
21 * to link the code of portions of this program with the OpenSSL library under
22 * certain conditions as described in each individual source file, and
23 * distribute linked combinations including the two. You must obey the GNU
24 * General Public License in all respects for all of the code used other than
25 * OpenSSL. If you modify file(s) with this exception, you may extend this
26 * exception to your version of the file(s), but you are not obligated to do
27 * so. If you do not wish to do so, delete this exception statement from your
28 * version. If you delete this exception statement from all source files in
29 * the program, then also delete it here.
30 *
31 * Slurm is distributed in the hope that it will be useful, but WITHOUT ANY
32 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
33 * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
34 * details.
35 *
36 * You should have received a copy of the GNU General Public License along
37 * with Slurm; if not, write to the Free Software Foundation, Inc.,
38 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
39 \*****************************************************************************/
40
41 #include "config.h"
42
43 #include <grp.h>
44 #include <signal.h>
45 #include <stdlib.h>
46 #include <string.h>
47 #include <sys/types.h>
48
49 #include "src/common/eio.h"
50 #include "src/common/fd.h"
51 #include "src/common/gres.h"
52 #include "src/common/group_cache.h"
53 #include "src/common/log.h"
54 #include "src/common/macros.h"
55 #include "src/common/node_select.h"
56 #include "src/common/slurm_jobacct_gather.h"
57 #include "src/common/slurm_acct_gather_profile.h"
58 #include "src/common/slurm_protocol_api.h"
59 #include "src/common/uid.h"
60 #include "src/common/xassert.h"
61 #include "src/common/xmalloc.h"
62 #include "src/common/xstring.h"
63
64 #include "src/slurmd/common/fname.h"
65 #include "src/slurmd/common/xcpuinfo.h"
66 #include "src/slurmd/slurmd/slurmd.h"
67 #include "src/slurmd/slurmstepd/io.h"
68 #include "src/slurmd/slurmstepd/multi_prog.h"
69 #include "src/slurmd/slurmstepd/slurmstepd_job.h"
70
71 static char **_array_copy(int n, char **src);
72 static void _array_free(char ***array);
73 static void _job_init_task_info(stepd_step_rec_t *job, uint32_t **gtid,
74 char *ifname, char *ofname, char *efname);
75 static void _srun_info_destructor(void *arg);
76 static stepd_step_task_info_t *_task_info_create(int taskid, int gtaskid,
77 char *ifname, char *ofname,
78 char *efname);
79 static void _task_info_destroy(stepd_step_task_info_t *t, uint16_t multi_prog);
80
81 /*
82 * return the default output filename for a batch job
83 */
84 static char *
_batchfilename(stepd_step_rec_t * job,const char * name)85 _batchfilename(stepd_step_rec_t *job, const char *name)
86 {
87 if (name == NULL) {
88 if (job->array_task_id == NO_VAL)
89 return fname_create(job, "slurm-%J.out", 0);
90 else
91 return fname_create(job, "slurm-%A_%a.out", 0);
92 } else
93 return fname_create(job, name, 0);
94 }
95
96 /*
97 * Expand a stdio file name.
98 *
99 * If "filename" is NULL it means that an eio object should be created
100 * for that stdio file rather than a directly connecting it to a file.
101 *
102 * If the "filename" is a valid task number in string form and the
103 * number matches "taskid", then NULL is returned so that an eio
104 * object will be used. If is a valid number, but it does not match
105 * "taskid", then the file descriptor will be connected to /dev/null.
106 */
107 static char *
_expand_stdio_filename(char * filename,int gtaskid,stepd_step_rec_t * job)108 _expand_stdio_filename(char *filename, int gtaskid, stepd_step_rec_t *job)
109 {
110 int id;
111
112 if (filename == NULL)
113 return NULL;
114
115 id = fname_single_task_io(filename);
116
117 if (id < 0)
118 return fname_create(job, filename, gtaskid);
119 if (id >= job->ntasks) {
120 error("Task ID in filename is invalid");
121 return NULL;
122 }
123
124 if (id == gtaskid)
125 return NULL;
126 else
127 return xstrdup("/dev/null");
128 }
129
130 static void
_job_init_task_info(stepd_step_rec_t * job,uint32_t ** gtid,char * ifname,char * ofname,char * efname)131 _job_init_task_info(stepd_step_rec_t *job, uint32_t **gtid,
132 char *ifname, char *ofname, char *efname)
133 {
134 int i, node_id = job->nodeid;
135 char *in, *out, *err;
136 uint32_t het_job_offset = 0;
137
138 if (job->node_tasks == 0) {
139 error("User requested launch of zero tasks!");
140 job->task = NULL;
141 return;
142 }
143
144 if (job->het_job_offset != NO_VAL)
145 het_job_offset = job->het_job_offset;
146
147 #if defined(HAVE_NATIVE_CRAY)
148 for (i = 0; i < job->nnodes; i++) {
149 int j;
150 for (j = 1; j < job->task_cnts[i]; j++) {
151 if (gtid[i][j] != gtid[i][j-1] + 1) {
152 job->non_smp = 1;
153 break;
154 }
155 }
156 }
157 #endif
158
159 job->task = (stepd_step_task_info_t **)
160 xmalloc(job->node_tasks * sizeof(stepd_step_task_info_t *));
161
162 for (i = 0; i < job->node_tasks; i++) {
163 in = _expand_stdio_filename(ifname,
164 gtid[node_id][i] + het_job_offset,
165 job);
166 out = _expand_stdio_filename(ofname,
167 gtid[node_id][i] + het_job_offset,
168 job);
169 err = _expand_stdio_filename(efname,
170 gtid[node_id][i] + het_job_offset,
171 job);
172 job->task[i] = _task_info_create(i, gtid[node_id][i], in, out,
173 err);
174 if ((job->flags & LAUNCH_MULTI_PROG) == 0) {
175 job->task[i]->argc = job->argc;
176 job->task[i]->argv = job->argv;
177 }
178 }
179
180 if (job->flags & LAUNCH_MULTI_PROG) {
181 char *switch_type = slurm_get_switch_type();
182 if (!xstrcmp(switch_type, "switch/cray_aries"))
183 multi_prog_parse(job, gtid);
184 xfree(switch_type);
185 for (i = 0; i < job->node_tasks; i++){
186 multi_prog_get_argv(job->argv[1], job->env,
187 gtid[node_id][i],
188 &job->task[i]->argc,
189 &job->task[i]->argv,
190 job->argc, job->argv);
191 }
192 }
193 }
194
195 static char **
_array_copy(int n,char ** src)196 _array_copy(int n, char **src)
197 {
198 char **dst = xmalloc((n+1) * sizeof(char *));
199 int i;
200
201 for (i = 0; i < n; i++) {
202 dst[i] = xstrdup(src[i]);
203 }
204 dst[n] = NULL;
205
206 return dst;
207 }
208
209 static void
_array_free(char *** array)210 _array_free(char ***array)
211 {
212 int i = 0;
213 while ((*array)[i] != NULL)
214 xfree((*array)[i++]);
215 xfree(*array);
216 *array = NULL;
217 }
218
219 /* destructor for list routines */
220 static void
_srun_info_destructor(void * arg)221 _srun_info_destructor(void *arg)
222 {
223 srun_info_t *srun = (srun_info_t *)arg;
224 srun_info_destroy(srun);
225 }
226
227 static void
_task_info_destroy(stepd_step_task_info_t * t,uint16_t multi_prog)228 _task_info_destroy(stepd_step_task_info_t *t, uint16_t multi_prog)
229 {
230 slurm_mutex_lock(&t->mutex);
231 slurm_mutex_unlock(&t->mutex);
232 slurm_mutex_destroy(&t->mutex);
233 if (multi_prog) {
234 xfree(t->argv);
235 } /* otherwise, t->argv is a pointer to job->argv */
236 xfree(t);
237 }
238
_slurm_cred_to_step_rec(slurm_cred_t * cred,stepd_step_rec_t * job)239 static void _slurm_cred_to_step_rec(slurm_cred_t *cred, stepd_step_rec_t *job)
240 {
241 slurm_cred_arg_t cred_arg;
242 slurm_cred_get_args(cred, &cred_arg);
243
244 /*
245 * This may have been filed in already from batch_job_launch_msg_t
246 * or launch_tasks_request_msg_t.
247 */
248 if (!job->user_name) {
249 job->user_name = cred_arg.pw_name;
250 cred_arg.pw_name = NULL;
251 }
252
253 job->pw_gecos = cred_arg.pw_gecos;
254 cred_arg.pw_gecos = NULL;
255 job->pw_dir = cred_arg.pw_dir;
256 cred_arg.pw_dir = NULL;
257 job->pw_shell = cred_arg.pw_shell;
258 cred_arg.pw_shell = NULL;
259
260 job->ngids = cred_arg.ngids;
261 job->gids = cred_arg.gids;
262 cred_arg.gids = NULL;
263 job->gr_names = cred_arg.gr_names;
264 cred_arg.gr_names = NULL;
265
266 slurm_cred_free_args(&cred_arg);
267 }
268
269 /* create a slurmd job structure from a launch tasks message */
stepd_step_rec_create(launch_tasks_request_msg_t * msg,uint16_t protocol_version)270 extern stepd_step_rec_t *stepd_step_rec_create(launch_tasks_request_msg_t *msg,
271 uint16_t protocol_version)
272 {
273 stepd_step_rec_t *job = NULL;
274 srun_info_t *srun = NULL;
275 slurm_addr_t resp_addr;
276 slurm_addr_t io_addr;
277 int i, nodeid = NO_VAL;
278
279 xassert(msg != NULL);
280 xassert(msg->complete_nodelist != NULL);
281 debug3("entering stepd_step_rec_create");
282
283 if (acct_gather_check_acct_freq_task(msg->job_mem_lim, msg->acctg_freq))
284 return NULL;
285
286 job = xmalloc(sizeof(stepd_step_rec_t));
287 job->msg = msg;
288 #ifndef HAVE_FRONT_END
289 nodeid = nodelist_find(msg->complete_nodelist, conf->node_name);
290 job->node_name = xstrdup(conf->node_name);
291 #else
292 nodeid = 0;
293 job->node_name = xstrdup(msg->complete_nodelist);
294 #endif
295 if (nodeid < 0) {
296 error("couldn't find node %s in %s",
297 job->node_name, msg->complete_nodelist);
298 stepd_step_rec_destroy(job);
299 return NULL;
300 }
301
302 job->state = SLURMSTEPD_STEP_STARTING;
303 slurm_cond_init(&job->state_cond, NULL);
304 slurm_mutex_init(&job->state_mutex);
305 job->node_tasks = msg->tasks_to_launch[nodeid];
306 job->task_cnts = xcalloc(msg->nnodes, sizeof(uint16_t));
307 memcpy(job->task_cnts, msg->tasks_to_launch,
308 sizeof(uint16_t) * msg->nnodes);
309 job->ntasks = msg->ntasks;
310 job->jobid = msg->job_id;
311 job->stepid = msg->job_step_id;
312
313 job->uid = (uid_t) msg->uid;
314 job->gid = (gid_t) msg->gid;
315 job->user_name = xstrdup(msg->user_name);
316 _slurm_cred_to_step_rec(msg->cred, job);
317 /*
318 * Favor the group info in the launch cred if available - for 19.05+
319 * this is where it is managed, not in launch_tasks_request_msg_t.
320 * For older versions, or for when send_gids is disabled, fall back
321 * to the launch_tasks_request_msg_t info if necessary.
322 */
323 if (!job->ngids) {
324 job->ngids = (int) msg->ngids;
325 job->gids = copy_gids(msg->ngids, msg->gids);
326 }
327
328 job->cwd = xstrdup(msg->cwd);
329 job->task_dist = msg->task_dist;
330
331 job->cpu_bind_type = msg->cpu_bind_type;
332 job->cpu_bind = xstrdup(msg->cpu_bind);
333 job->mem_bind_type = msg->mem_bind_type;
334 job->mem_bind = xstrdup(msg->mem_bind);
335 job->tres_bind = xstrdup(msg->tres_bind);
336 job->tres_freq = xstrdup(msg->tres_freq);
337 job->cpu_freq_min = msg->cpu_freq_min;
338 job->cpu_freq_max = msg->cpu_freq_max;
339 job->cpu_freq_gov = msg->cpu_freq_gov;
340 job->cpus_per_task = msg->cpus_per_task;
341
342 job->env = _array_copy(msg->envc, msg->env);
343 job->array_job_id = msg->job_id;
344 job->array_task_id = NO_VAL;
345 /* Used for env vars */
346 job->het_job_node_offset = msg->het_job_node_offset;
347 job->het_job_step_cnt = msg->het_job_step_cnt;
348 job->het_job_id = msg->het_job_id; /* Used for env vars */
349 job->het_job_nnodes = msg->het_job_nnodes; /* Used for env vars */
350 if (msg->het_job_nnodes && msg->het_job_ntasks &&
351 msg->het_job_task_cnts) {
352 job->het_job_ntasks = msg->het_job_ntasks;/* Used for env vars*/
353 job->het_job_task_cnts = xcalloc(msg->het_job_nnodes,
354 sizeof(uint16_t));
355 memcpy(job->het_job_task_cnts, msg->het_job_task_cnts,
356 sizeof(uint16_t) * msg->het_job_nnodes);
357 if (msg->het_job_tids) {
358 /*
359 * het_job_tids == NULL if request from pre-v19.05
360 * srun
361 */
362 job->het_job_tids = xcalloc(msg->het_job_nnodes,
363 sizeof(uint32_t *));
364 for (i = 0; i < msg->het_job_nnodes; i++) {
365 job->het_job_tids[i] =
366 xcalloc(job->het_job_task_cnts[i],
367 sizeof(uint32_t));
368 memcpy(job->het_job_tids[i],
369 msg->het_job_tids[i],
370 sizeof(uint32_t) *
371 job->het_job_task_cnts[i]);
372 }
373 }
374 if (msg->het_job_tid_offsets) {
375 job->het_job_tid_offsets = xcalloc(job->het_job_ntasks,
376 sizeof(uint32_t));
377 memcpy(job->het_job_tid_offsets,
378 msg->het_job_tid_offsets,
379 job->het_job_ntasks * sizeof(uint32_t));
380 }
381 }
382 /* Used for env vars & labels */
383 job->het_job_offset = msg->het_job_offset;
384 /* Used for env vars & labels */
385 job->het_job_task_offset = msg->het_job_task_offset;
386 job->het_job_node_list = xstrdup(msg->het_job_node_list);
387 for (i = 0; i < msg->envc; i++) {
388 /* 1234567890123456789 */
389 if (!xstrncmp(msg->env[i], "SLURM_ARRAY_JOB_ID=", 19))
390 job->array_job_id = atoi(msg->env[i] + 19);
391 /* 12345678901234567890 */
392 if (!xstrncmp(msg->env[i], "SLURM_ARRAY_TASK_ID=", 20))
393 job->array_task_id = atoi(msg->env[i] + 20);
394 }
395
396 job->eio = eio_handle_create(0);
397 job->sruns = list_create((ListDelF) _srun_info_destructor);
398
399 /*
400 * Based on my testing the next 3 lists here could use the
401 * eio_obj_destroy, but if you do you can get an invalid read. Since
402 * these stay until the end of the job it isn't that big of a deal.
403 */
404 job->clients = list_create(NULL); /* FIXME! Needs destructor */
405 job->stdout_eio_objs = list_create(NULL); /* FIXME! Needs destructor */
406 job->stderr_eio_objs = list_create(NULL); /* FIXME! Needs destructor */
407 job->free_incoming = list_create(NULL); /* FIXME! Needs destructor */
408 job->incoming_count = 0;
409 job->free_outgoing = list_create(NULL); /* FIXME! Needs destructor */
410 job->outgoing_count = 0;
411 job->outgoing_cache = list_create(NULL); /* FIXME! Needs destructor */
412
413 job->envtp = xmalloc(sizeof(env_t));
414 job->envtp->jobid = -1;
415 job->envtp->stepid = -1;
416 job->envtp->procid = -1;
417 job->envtp->localid = -1;
418 job->envtp->nodeid = -1;
419
420 job->envtp->distribution = 0;
421 job->envtp->cpu_bind_type = 0;
422 job->envtp->cpu_bind = NULL;
423 job->envtp->mem_bind_type = 0;
424 job->envtp->mem_bind = NULL;
425 if (!msg->resp_port)
426 msg->num_resp_port = 0;
427 if (msg->num_resp_port) {
428 job->envtp->comm_port =
429 msg->resp_port[nodeid % msg->num_resp_port];
430 memcpy(&resp_addr, &msg->orig_addr, sizeof(slurm_addr_t));
431 slurm_set_addr(&resp_addr,
432 msg->resp_port[nodeid % msg->num_resp_port],
433 NULL);
434 } else {
435 memset(&resp_addr, 0, sizeof(slurm_addr_t));
436 }
437 if (!msg->io_port)
438 msg->flags |= LAUNCH_USER_MANAGED_IO;
439 if ((msg->flags & LAUNCH_USER_MANAGED_IO) == 0) {
440 memcpy(&io_addr, &msg->orig_addr, sizeof(slurm_addr_t));
441 slurm_set_addr(&io_addr,
442 msg->io_port[nodeid % msg->num_io_port],
443 NULL);
444 } else {
445 memset(&io_addr, 0, sizeof(slurm_addr_t));
446 }
447
448 srun = srun_info_create(msg->cred, &resp_addr, &io_addr,
449 protocol_version);
450
451 job->profile = msg->profile;
452 job->task_prolog = xstrdup(msg->task_prolog);
453 job->task_epilog = xstrdup(msg->task_epilog);
454
455 job->argc = msg->argc;
456 job->argv = _array_copy(job->argc, msg->argv);
457
458 job->nnodes = msg->nnodes;
459 job->nodeid = nodeid;
460 job->debug = msg->slurmd_debug;
461 job->cpus = msg->node_cpus;
462 job->job_core_spec = msg->job_core_spec;
463
464 /* This needs to happen before acct_gather_profile_startpoll
465 and only really looks at the profile in the job.
466 */
467 acct_gather_profile_g_node_step_start(job);
468
469 acct_gather_profile_startpoll(msg->acctg_freq,
470 conf->job_acct_gather_freq);
471
472 job->timelimit = (time_t) -1;
473 job->flags = msg->flags;
474 job->switch_job = msg->switch_job;
475 job->open_mode = msg->open_mode;
476 job->options = msg->options;
477 format_core_allocs(msg->cred, conf->node_name, conf->cpus,
478 &job->job_alloc_cores, &job->step_alloc_cores,
479 &job->job_mem, &job->step_mem);
480
481 if (job->step_mem && conf->job_acct_oom_kill) {
482 jobacct_gather_set_mem_limit(job->jobid, job->stepid,
483 job->step_mem);
484 } else if (job->job_mem && conf->job_acct_oom_kill) {
485 jobacct_gather_set_mem_limit(job->jobid, job->stepid,
486 job->job_mem);
487 }
488
489 /* only need these values on the extern step, don't copy otherwise */
490 if ((msg->job_step_id == SLURM_EXTERN_CONT) && msg->x11) {
491 job->x11 = msg->x11;
492 job->x11_alloc_host = xstrdup(msg->x11_alloc_host);
493 job->x11_alloc_port = msg->x11_alloc_port;
494 job->x11_magic_cookie = xstrdup(msg->x11_magic_cookie);
495 job->x11_target = xstrdup(msg->x11_target);
496 job->x11_target_port = msg->x11_target_port;
497 }
498
499 get_cred_gres(msg->cred, conf->node_name,
500 &job->job_gres_list, &job->step_gres_list);
501
502 list_append(job->sruns, (void *) srun);
503
504 _job_init_task_info(job, msg->global_task_ids,
505 msg->ifname, msg->ofname, msg->efname);
506
507 return job;
508 }
509
510 extern stepd_step_rec_t *
batch_stepd_step_rec_create(batch_job_launch_msg_t * msg)511 batch_stepd_step_rec_create(batch_job_launch_msg_t *msg)
512 {
513 stepd_step_rec_t *job;
514 srun_info_t *srun = NULL;
515 char *in_name;
516
517 xassert(msg != NULL);
518
519 debug3("entering batch_stepd_step_rec_create");
520
521 if (acct_gather_check_acct_freq_task(msg->job_mem, msg->acctg_freq))
522 return NULL;
523
524 job = xmalloc(sizeof(stepd_step_rec_t));
525
526 job->state = SLURMSTEPD_STEP_STARTING;
527 slurm_cond_init(&job->state_cond, NULL);
528 slurm_mutex_init(&job->state_mutex);
529 if (msg->cpus_per_node)
530 job->cpus = msg->cpus_per_node[0];
531 job->node_tasks = 1;
532 job->ntasks = msg->ntasks;
533 job->jobid = msg->job_id;
534 job->stepid = msg->step_id;
535 job->array_job_id = msg->array_job_id;
536 job->array_task_id = msg->array_task_id;
537 job->het_job_step_cnt = NO_VAL;
538 job->het_job_id = NO_VAL; /* Used to set env vars */
539 job->het_job_nnodes = NO_VAL; /* Used to set env vars */
540 job->het_job_ntasks = NO_VAL; /* Used to set env vars */
541 job->het_job_offset = NO_VAL; /* Used to set labels and env vars */
542 job->job_core_spec = msg->job_core_spec;
543
544 job->batch = true;
545 job->node_name = xstrdup(conf->node_name);
546
547 job->uid = (uid_t) msg->uid;
548 job->gid = (gid_t) msg->gid;
549 job->user_name = xstrdup(msg->user_name);
550 _slurm_cred_to_step_rec(msg->cred, job);
551 /*
552 * Favor the group info in the launch cred if available - for 19.05+
553 * this is where it is managed, not in batch_job_launch_msg_t.
554 * For older versions, or for when send_gids is disabled, fall back
555 * to the batch_job_launch_msg_t info if necessary.
556 */
557 if (!job->ngids) {
558 job->ngids = (int) msg->ngids;
559 job->gids = copy_gids(msg->ngids, msg->gids);
560 }
561
562 job->profile = msg->profile;
563
564 /* give them all to the 1 task */
565 job->cpus_per_task = job->cpus;
566
567 /* This needs to happen before acct_gather_profile_startpoll
568 and only really looks at the profile in the job.
569 */
570 acct_gather_profile_g_node_step_start(job);
571 /* needed for the jobacct_gather plugin to start */
572 acct_gather_profile_startpoll(msg->acctg_freq,
573 conf->job_acct_gather_freq);
574
575 job->open_mode = msg->open_mode;
576 job->overcommit = (bool) msg->overcommit;
577
578 job->cwd = xstrdup(msg->work_dir);
579
580 job->env = _array_copy(msg->envc, msg->environment);
581 job->eio = eio_handle_create(0);
582 job->sruns = list_create((ListDelF) _srun_info_destructor);
583 job->envtp = xmalloc(sizeof(env_t));
584 job->envtp->jobid = -1;
585 job->envtp->stepid = -1;
586 job->envtp->procid = -1;
587 job->envtp->localid = -1;
588 job->envtp->nodeid = -1;
589
590 job->envtp->distribution = 0;
591 job->cpu_bind_type = msg->cpu_bind_type;
592 job->cpu_bind = xstrdup(msg->cpu_bind);
593 job->envtp->mem_bind_type = 0;
594 job->envtp->mem_bind = NULL;
595 job->envtp->restart_cnt = msg->restart_cnt;
596
597 if (msg->cpus_per_node)
598 job->cpus = msg->cpus_per_node[0];
599
600 format_core_allocs(msg->cred, conf->node_name, conf->cpus,
601 &job->job_alloc_cores, &job->step_alloc_cores,
602 &job->job_mem, &job->step_mem);
603 if (job->step_mem && conf->job_acct_oom_kill)
604 jobacct_gather_set_mem_limit(job->jobid, NO_VAL, job->step_mem);
605 else if (job->job_mem && conf->job_acct_oom_kill)
606 jobacct_gather_set_mem_limit(job->jobid, NO_VAL, job->job_mem);
607
608 get_cred_gres(msg->cred, conf->node_name,
609 &job->job_gres_list, &job->step_gres_list);
610
611 srun = srun_info_create(NULL, NULL, NULL, NO_VAL16);
612
613 list_append(job->sruns, (void *) srun);
614
615 if (msg->argc) {
616 job->argc = msg->argc;
617 job->argv = _array_copy(job->argc, msg->argv);
618 } else {
619 job->argc = 1;
620 /* job script has not yet been written out to disk --
621 * argv will be filled in later by _make_batch_script()
622 */
623 job->argv = (char **) xmalloc(2 * sizeof(char *));
624 }
625
626 job->task = xmalloc(sizeof(stepd_step_task_info_t *));
627 if (msg->std_err == NULL)
628 msg->std_err = xstrdup(msg->std_out);
629
630 if (msg->std_in == NULL)
631 in_name = xstrdup("/dev/null");
632 else
633 in_name = fname_create(job, msg->std_in, 0);
634
635 job->task[0] = _task_info_create(0, 0, in_name,
636 _batchfilename(job, msg->std_out),
637 _batchfilename(job, msg->std_err));
638 job->task[0]->argc = job->argc;
639 job->task[0]->argv = job->argv;
640
641 return job;
642 }
643
644 extern void
stepd_step_rec_destroy(stepd_step_rec_t * job)645 stepd_step_rec_destroy(stepd_step_rec_t *job)
646 {
647 uint16_t multi_prog = 0;
648 int i;
649
650 _array_free(&job->env);
651 _array_free(&job->argv);
652
653 if (job->flags & LAUNCH_MULTI_PROG)
654 multi_prog = 1;
655 for (i = 0; i < job->node_tasks; i++)
656 _task_info_destroy(job->task[i], multi_prog);
657 xfree(job->task);
658 eio_handle_destroy(job->eio);
659 FREE_NULL_LIST(job->sruns);
660 FREE_NULL_LIST(job->clients);
661 FREE_NULL_LIST(job->stdout_eio_objs);
662 FREE_NULL_LIST(job->stderr_eio_objs);
663 FREE_NULL_LIST(job->free_incoming);
664 FREE_NULL_LIST(job->free_outgoing);
665 FREE_NULL_LIST(job->outgoing_cache);
666 FREE_NULL_LIST(job->job_gres_list);
667 FREE_NULL_LIST(job->step_gres_list);
668 xfree(job->cpu_bind);
669 xfree(job->cwd);
670 xfree(job->envtp);
671 xfree(job->pw_gecos);
672 xfree(job->pw_dir);
673 xfree(job->pw_shell);
674 xfree(job->gids);
675 xfree(job->mem_bind);
676 eio_handle_destroy(job->msg_handle);
677 xfree(job->node_name);
678 mpmd_free(job);
679 xfree(job->het_job_task_cnts);
680 if ((job->het_job_nnodes != NO_VAL) && job->het_job_tids) {
681 /* het_job_tids == NULL if request from pre-v19.05 srun */
682 for (i = 0; i < job->het_job_nnodes; i++)
683 xfree(job->het_job_tids[i]);
684 xfree(job->het_job_tids);
685 }
686 xfree(job->het_job_tid_offsets);
687 xfree(job->task_prolog);
688 xfree(job->task_epilog);
689 xfree(job->job_alloc_cores);
690 xfree(job->step_alloc_cores);
691 xfree(job->task_cnts);
692 xfree(job->tres_bind);
693 xfree(job->tres_freq);
694 xfree(job->user_name);
695 xfree(job->x11_xauthority);
696 xfree(job);
697 }
698
699 extern srun_info_t *
srun_info_create(slurm_cred_t * cred,slurm_addr_t * resp_addr,slurm_addr_t * ioaddr,uint16_t protocol_version)700 srun_info_create(slurm_cred_t *cred, slurm_addr_t *resp_addr,
701 slurm_addr_t *ioaddr, uint16_t protocol_version)
702 {
703 char *data = NULL;
704 uint32_t len = 0;
705 srun_info_t *srun = xmalloc(sizeof(srun_info_t));
706 srun_key_t *key = xmalloc(sizeof(srun_key_t));
707
708 srun->key = key;
709 if (!protocol_version || (protocol_version == NO_VAL16))
710 protocol_version = SLURM_PROTOCOL_VERSION;
711 srun->protocol_version = protocol_version;
712 /*
713 * If no credential was provided, return the empty
714 * srun info object. (This is used, for example, when
715 * creating a batch job structure)
716 */
717 if (!cred) return srun;
718
719 slurm_cred_get_signature(cred, &data, &len);
720
721 len = len > SLURM_IO_KEY_SIZE ? SLURM_IO_KEY_SIZE : len;
722
723 if (data != NULL) {
724 memcpy((void *) key->data, data, len);
725
726 if (len < SLURM_IO_KEY_SIZE)
727 memset( (void *) (key->data + len), 0,
728 SLURM_IO_KEY_SIZE - len);
729 }
730
731 if (ioaddr != NULL)
732 srun->ioaddr = *ioaddr;
733 if (resp_addr != NULL)
734 srun->resp_addr = *resp_addr;
735 return srun;
736 }
737
738 extern void
srun_info_destroy(srun_info_t * srun)739 srun_info_destroy(srun_info_t *srun)
740 {
741 xfree(srun->key);
742 xfree(srun);
743 }
744
_task_info_create(int taskid,int gtaskid,char * ifname,char * ofname,char * efname)745 static stepd_step_task_info_t *_task_info_create(int taskid, int gtaskid,
746 char *ifname, char *ofname,
747 char *efname)
748 {
749 stepd_step_task_info_t *t = xmalloc(sizeof(stepd_step_task_info_t));
750
751 xassert(taskid >= 0);
752 xassert(gtaskid >= 0);
753
754 slurm_mutex_init(&t->mutex);
755 slurm_mutex_lock(&t->mutex);
756 t->state = STEPD_STEP_TASK_INIT;
757 t->id = taskid;
758 t->gtid = gtaskid;
759 t->pid = (pid_t) -1;
760 t->ifname = ifname;
761 t->ofname = ofname;
762 t->efname = efname;
763 t->stdin_fd = -1;
764 t->to_stdin = -1;
765 t->stdout_fd = -1;
766 t->from_stdout = -1;
767 t->stderr_fd = -1;
768 t->from_stderr = -1;
769 t->in = NULL;
770 t->out = NULL;
771 t->err = NULL;
772 t->killed_by_cmd = false;
773 t->aborted = false;
774 t->esent = false;
775 t->exited = false;
776 t->estatus = -1;
777 t->argc = 0;
778 t->argv = NULL;
779 slurm_mutex_unlock(&t->mutex);
780 return t;
781 }
782