1 /*****************************************************************************\
2 * src/common/stepd_api.c - slurmstepd message API
3 *****************************************************************************
4 * Copyright (C) 2005-2007 The Regents of the University of California.
5 * Copyright (C) 2008-2010 Lawrence Livermore National Security.
6 * Portions Copyright (C) 2008 Vijay Ramasubramanian
7 * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
8 * Written by Christopher Morrone <morrone2@llnl.gov>
9 * CODE-OCEC-09-009. All rights reserved.
10 *
11 * This file is part of Slurm, a resource management program.
12 * For details, see <https://slurm.schedmd.com/>.
13 * Please also read the included file: DISCLAIMER.
14 *
15 * Slurm is free software; you can redistribute it and/or modify it under
16 * the terms of the GNU General Public License as published by the Free
17 * Software Foundation; either version 2 of the License, or (at your option)
18 * any later version.
19 *
20 * In addition, as a special exception, the copyright holders give permission
21 * to link the code of portions of this program with the OpenSSL library under
22 * certain conditions as described in each individual source file, and
23 * distribute linked combinations including the two. You must obey the GNU
24 * General Public License in all respects for all of the code used other than
25 * OpenSSL. If you modify file(s) with this exception, you may extend this
26 * exception to your version of the file(s), but you are not obligated to do
27 * so. If you do not wish to do so, delete this exception statement from your
28 * version. If you delete this exception statement from all source files in
29 * the program, then also delete it here.
30 *
31 * Slurm is distributed in the hope that it will be useful, but WITHOUT ANY
32 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
33 * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
34 * details.
35 *
36 * You should have received a copy of the GNU General Public License along
37 * with Slurm; if not, write to the Free Software Foundation, Inc.,
38 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
39 \*****************************************************************************/
40
41 #define _GNU_SOURCE
42
43 #include <dirent.h>
44 #include <grp.h>
45 #include <inttypes.h>
46 #include <regex.h>
47 #include <signal.h>
48 #include <stdlib.h>
49 #include <string.h>
50 #include <sys/param.h> /* MAXPATHLEN */
51 #include <sys/socket.h>
52 #include <sys/stat.h>
53 #include <sys/time.h>
54 #include <sys/types.h>
55 #include <sys/un.h>
56 #include <unistd.h>
57
58 #include "src/common/fd.h"
59 #include "src/common/list.h"
60 #include "src/common/macros.h"
61 #include "src/common/pack.h"
62 #include "src/common/read_config.h"
63 #include "src/common/slurm_auth.h"
64 #include "src/common/slurm_cred.h"
65 #include "src/common/slurm_jobacct_gather.h"
66 #include "src/common/slurm_protocol_api.h"
67 #include "src/common/stepd_api.h"
68 #include "src/common/strlcpy.h"
69 #include "src/common/xmalloc.h"
70 #include "src/common/xstring.h"
71
72 strong_alias(stepd_available, slurm_stepd_available);
73 strong_alias(stepd_connect, slurm_stepd_connect);
74 strong_alias(stepd_connect_nss, slurm_stepd_connect_nss);
75 strong_alias(stepd_get_uid, slurm_stepd_get_uid);
76 strong_alias(stepd_add_extern_pid, slurm_stepd_add_extern_pid);
77 strong_alias(stepd_get_x11_display, slurm_stepd_get_x11_display);
78 strong_alias(stepd_get_info, slurm_stepd_get_info);
79 strong_alias(stepd_getpw, slurm_stepd_getpw);
80 strong_alias(xfree_struct_passwd, slurm_xfree_struct_passwd);
81 strong_alias(stepd_getgr, slurm_stepd_getgr);
82 strong_alias(xfree_struct_group_array, slurm_xfree_struct_group_array);
83
84 static bool
_slurm_authorized_user()85 _slurm_authorized_user()
86 {
87 uid_t uid, slurm_user_id;
88 slurm_ctl_conf_t *conf;
89
90 conf = slurm_conf_lock();
91 slurm_user_id = (uid_t)conf->slurm_user_id;
92 slurm_conf_unlock();
93
94 uid = getuid();
95
96 return ((uid == (uid_t)0) || (uid == slurm_user_id));
97 }
98
99 /*
100 * Should be called when a connect() to a socket returns ECONNREFUSED.
101 * Presumably the ECONNREFUSED means that nothing is attached to the listening
102 * side of the unix domain socket.
103 * If the socket is at least 10 minutes old, then unlink it.
104 */
105 static void
_handle_stray_socket(const char * socket_name)106 _handle_stray_socket(const char *socket_name)
107 {
108 struct stat buf;
109 uid_t uid;
110 time_t now;
111
112 /* Only attempt to remove the stale socket if process is running
113 as root or the SlurmUser. */
114 if (!_slurm_authorized_user())
115 return;
116
117 if (stat(socket_name, &buf) == -1) {
118 debug3("_handle_stray_socket: unable to stat %s: %m",
119 socket_name);
120 return;
121 }
122
123 if ((uid = getuid()) != buf.st_uid) {
124 debug3("_handle_stray_socket: socket %s is not owned by uid %d",
125 socket_name, (int)uid);
126 return;
127 }
128
129 now = time(NULL);
130 if ((now - buf.st_mtime) > 600) {
131 /* remove the socket */
132 if (unlink(socket_name) == -1) {
133 if (errno != ENOENT) {
134 error("_handle_stray_socket: unable to clean up"
135 " stray socket %s: %m", socket_name);
136 }
137 } else {
138 debug("Cleaned up stray socket %s", socket_name);
139 }
140 }
141 }
142
_handle_stray_script(const char * directory,uint32_t job_id)143 static void _handle_stray_script(const char *directory, uint32_t job_id)
144 {
145 char *dir_path = NULL, *file_path = NULL;
146
147 xstrfmtcat(dir_path, "%s/job%05u", directory, job_id);
148 xstrfmtcat(file_path, "%s/slurm_script", dir_path);
149 info("%s: Purging vestigial job script %s", __func__, file_path);
150 (void) unlink(file_path);
151 (void) rmdir(dir_path);
152
153 xfree(dir_path);
154 xfree(file_path);
155 }
156
157 static int
_step_connect(const char * directory,const char * nodename,uint32_t jobid,uint32_t stepid)158 _step_connect(const char *directory, const char *nodename,
159 uint32_t jobid, uint32_t stepid)
160 {
161 int fd;
162 int len;
163 struct sockaddr_un addr;
164 char *name = xstrdup_printf("%s/%s_%u.%u",
165 directory, nodename, jobid, stepid);
166
167 /*
168 * If socket name would be truncated, emit error and exit
169 */
170 if (strlen(name) >= sizeof(addr.sun_path)) {
171 error("%s: Unix socket path '%s' is too long. (%ld > %ld)",
172 __func__, name, (long int)(strlen(name) + 1),
173 (long int)sizeof(addr.sun_path));
174 xfree(name);
175 return -1;
176 }
177
178 if ((fd = socket(AF_UNIX, SOCK_STREAM, 0)) < 0) {
179 error("%s: socket() failed dir %s node %s job %u step %u %m",
180 __func__, directory, nodename, jobid, stepid);
181 xfree(name);
182 return -1;
183 }
184
185 memset(&addr, 0, sizeof(addr));
186 addr.sun_family = AF_UNIX;
187 strlcpy(addr.sun_path, name, sizeof(addr.sun_path));
188 len = strlen(addr.sun_path) + 1 + sizeof(addr.sun_family);
189
190 if (connect(fd, (struct sockaddr *) &addr, len) < 0) {
191 /* Can indicate race condition at step termination */
192 debug("%s: connect() failed dir %s node %s step %u.%u %m",
193 __func__, directory, nodename, jobid, stepid);
194 if (errno == ECONNREFUSED && running_in_slurmd()) {
195 _handle_stray_socket(name);
196 if (stepid == SLURM_BATCH_SCRIPT)
197 _handle_stray_script(directory, jobid);
198 }
199 xfree(name);
200 close(fd);
201 return -1;
202 }
203
204 xfree(name);
205 return fd;
206 }
207
208
209 static char *
_guess_nodename(void)210 _guess_nodename(void)
211 {
212 char host[256];
213 char *nodename = NULL;
214
215 if (gethostname_short(host, 256) != 0)
216 return NULL;
217
218 nodename = slurm_conf_get_nodename(host);
219 if (nodename == NULL)
220 nodename = slurm_conf_get_aliased_nodename();
221 if (nodename == NULL) /* if no match, try localhost */
222 nodename = slurm_conf_get_nodename("localhost");
223
224 return nodename;
225 }
226
227 /*
228 * Legacy version for connecting to pre-19.05 stepds.
229 * Remove this two versions after 19.05 is released.
230 */
_stepd_connect_legacy(const char * directory,const char * nodename,uint32_t jobid,uint32_t stepid,uint16_t * protocol_version)231 static int _stepd_connect_legacy(const char *directory, const char *nodename,
232 uint32_t jobid, uint32_t stepid,
233 uint16_t *protocol_version)
234 {
235 int req = REQUEST_CONNECT;
236 int fd = -1;
237 int rc;
238 void *auth_cred;
239 char *auth_info;
240 char *local_nodename = NULL;
241 Buf buffer;
242 int len;
243
244 buffer = init_buf(0);
245 /* Create an auth credential */
246 auth_info = slurm_get_auth_info();
247 auth_cred = g_slurm_auth_create(AUTH_DEFAULT_INDEX, auth_info);
248 xfree(auth_info);
249 if (auth_cred == NULL) {
250 error("Creating authentication credential: %m");
251 slurm_seterrno(SLURM_PROTOCOL_AUTHENTICATION_ERROR);
252 goto fail1;
253 }
254
255 /*
256 * Pack the auth credential.
257 * Always send SLURM_MIN_PROTOCOL_VERSION since we don't know the
258 * version at the moment.
259 */
260 rc = g_slurm_auth_pack(auth_cred, buffer, SLURM_MIN_PROTOCOL_VERSION);
261 (void) g_slurm_auth_destroy(auth_cred);
262 if (rc) {
263 error("Packing authentication credential: %m");
264 slurm_seterrno(SLURM_PROTOCOL_AUTHENTICATION_ERROR);
265 goto fail1;
266 }
267
268 /* Connect to the step */
269 fd = _step_connect(directory, nodename, jobid, stepid);
270 if (fd == -1)
271 goto fail1;
272
273 safe_write(fd, &req, sizeof(int));
274 len = size_buf(buffer);
275 safe_write(fd, &len, sizeof(int));
276 safe_write(fd, get_buf_data(buffer), len);
277
278 safe_read(fd, &rc, sizeof(int));
279 if (rc < 0) {
280 error("slurmstepd refused authentication: %m");
281 slurm_seterrno(SLURM_PROTOCOL_AUTHENTICATION_ERROR);
282 goto rwfail;
283 } else if (rc) {
284 *protocol_version = rc;
285 }
286
287 free_buf(buffer);
288 xfree(local_nodename);
289 return fd;
290
291 rwfail:
292 close(fd);
293 fail1:
294 free_buf(buffer);
295 xfree(local_nodename);
296 return -1;
297 }
298
299 /*
300 * Connect to a slurmstepd proccess by way of its unix domain socket.
301 *
302 * Both "directory" and "nodename" may be null, in which case stepd_connect
303 * will attempt to determine them on its own. If you are using multiple
304 * slurmd on one node (unusual outside of development environments), you
305 * will get one of the local NodeNames more-or-less at random.
306 *
307 * Returns a file descriptor for the opened socket on success alongside the
308 * protocol_version for the stepd, or -1 on error.
309 */
stepd_connect(const char * directory,const char * nodename,uint32_t jobid,uint32_t stepid,uint16_t * protocol_version)310 extern int stepd_connect(const char *directory, const char *nodename,
311 uint32_t jobid, uint32_t stepid,
312 uint16_t *protocol_version)
313 {
314 int req = SLURM_PROTOCOL_VERSION;
315 int fd = -1;
316 int rc;
317 char *local_nodename = NULL;
318
319 *protocol_version = 0;
320
321 if (nodename == NULL) {
322 if (!(local_nodename = _guess_nodename()))
323 return -1;
324 nodename = local_nodename;
325 }
326 if (directory == NULL) {
327 slurm_ctl_conf_t *cf = slurm_conf_lock();
328 directory = slurm_conf_expand_slurmd_path(cf->slurmd_spooldir,
329 nodename);
330 slurm_conf_unlock();
331 }
332
333 /* Connect to the step */
334 fd = _step_connect(directory, nodename, jobid, stepid);
335 if (fd == -1)
336 goto fail1;
337
338 safe_write(fd, &req, sizeof(int));
339 safe_read(fd, &rc, sizeof(int));
340 if (rc < 0)
341 goto rwfail;
342 else if (rc)
343 *protocol_version = rc;
344
345 xfree(local_nodename);
346 return fd;
347
348 rwfail:
349 close(fd);
350 /*
351 * Most likely case for ending up here is when connecting to a
352 * pre-19.05 stepd. Assume that the stepd shut the connection down
353 * since we sent SLURM_PROTOCOL_VERSION instead of SOCKET_CONNECT,
354 * and retry with the older connection style. Remove this fallback
355 * 2 versions after 19.05.
356 */
357 fd = _stepd_connect_legacy(directory, nodename, jobid, stepid,
358 protocol_version);
359 fail1:
360 xfree(local_nodename);
361 return fd;
362 }
363
364 /*
365 * Connect to a slurmstepd proccess by way of its unix domain socket.
366 *
367 * This is specifically intended to be used with nss_slurm to prevent possible
368 * deadlocks. Neither "directory" or "nodename" may be null, and will result
369 * in an error. Remove this function in 20.11.
370 *
371 * Returns a file descriptor for the opened socket on success alongside the
372 * protocol_version for the stepd, or -1 on error.
373 */
stepd_connect_nss(const char * directory,const char * nodename,uint32_t jobid,uint32_t stepid,uint16_t * protocol_version)374 extern int stepd_connect_nss(const char *directory, const char *nodename,
375 uint32_t jobid, uint32_t stepid,
376 uint16_t *protocol_version)
377 {
378 int req = SLURM_PROTOCOL_VERSION;
379 int fd = -1;
380 int rc;
381
382 *protocol_version = 0;
383
384 if (!nodename || !directory) {
385 error("directory or nodename invalid");
386 return -1;
387 }
388
389 /* Connect to the step */
390 fd = _step_connect(directory, nodename, jobid, stepid);
391 if (fd == -1)
392 goto fail1;
393
394 safe_write(fd, &req, sizeof(int));
395 safe_read(fd, &rc, sizeof(int));
396 if (rc < 0)
397 goto rwfail;
398 else if (rc)
399 *protocol_version = rc;
400
401 return fd;
402
403 rwfail:
404 close(fd);
405 fail1:
406 return fd;
407 }
408
409 /*
410 * Retrieve a job step's current state.
411 */
412 slurmstepd_state_t
stepd_state(int fd,uint16_t protocol_version)413 stepd_state(int fd, uint16_t protocol_version)
414 {
415 int req = REQUEST_STATE;
416 slurmstepd_state_t status = SLURMSTEPD_NOT_RUNNING;
417
418 safe_write(fd, &req, sizeof(int));
419 safe_read(fd, &status, sizeof(slurmstepd_state_t));
420 rwfail:
421 return status;
422 }
423
424 /*
425 * Retrieve slurmstepd_info_t structure for a job step.
426 *
427 * Must be xfree'd by the caller.
428 */
stepd_get_info(int fd)429 slurmstepd_info_t *stepd_get_info(int fd)
430 {
431 int req = REQUEST_INFO;
432 slurmstepd_info_t *step_info = xmalloc(sizeof(*step_info));
433
434 safe_write(fd, &req, sizeof(int));
435
436 safe_read(fd, &step_info->uid, sizeof(uid_t));
437 safe_read(fd, &step_info->jobid, sizeof(uint32_t));
438 safe_read(fd, &step_info->stepid, sizeof(uint32_t));
439
440 safe_read(fd, &step_info->protocol_version, sizeof(uint16_t));
441 if (step_info->protocol_version >= SLURM_MIN_PROTOCOL_VERSION) {
442 safe_read(fd, &step_info->nodeid, sizeof(uint32_t));
443 safe_read(fd, &step_info->job_mem_limit, sizeof(uint64_t));
444 safe_read(fd, &step_info->step_mem_limit, sizeof(uint64_t));
445 } else {
446 error("%s: protocol_version %hu not supported",
447 __func__, step_info->protocol_version);
448 goto rwfail;
449 }
450
451 return step_info;
452
453 rwfail:
454 xfree(step_info);
455 return NULL;
456 }
457
458 /*
459 * Send job notification message to a batch job
460 */
461 int
stepd_notify_job(int fd,uint16_t protocol_version,char * message)462 stepd_notify_job(int fd, uint16_t protocol_version, char *message)
463 {
464 int req = REQUEST_JOB_NOTIFY;
465 int rc;
466
467 safe_write(fd, &req, sizeof(int));
468 if (message) {
469 rc = strlen(message) + 1;
470 safe_write(fd, &rc, sizeof(int));
471 safe_write(fd, message, rc);
472 } else {
473 rc = 0;
474 safe_write(fd, &rc, sizeof(int));
475 }
476
477 /* Receive the return code */
478 safe_read(fd, &rc, sizeof(int));
479 return rc;
480 rwfail:
481 return -1;
482 }
483
484 /*
485 * Send a signal to the proctrack container of a job step.
486 */
487 int
stepd_signal_container(int fd,uint16_t protocol_version,int signal,int flags,uid_t req_uid)488 stepd_signal_container(int fd, uint16_t protocol_version, int signal, int flags,
489 uid_t req_uid)
490 {
491 int req = REQUEST_SIGNAL_CONTAINER;
492 int rc;
493 int errnum = 0;
494
495 safe_write(fd, &req, sizeof(int));
496 if (protocol_version >= SLURM_MIN_PROTOCOL_VERSION) {
497 safe_write(fd, &signal, sizeof(int));
498 safe_write(fd, &flags, sizeof(int));
499 safe_write(fd, &req_uid, sizeof(uid_t));
500 } else {
501 error("%s: invalid protocol_version %u",
502 __func__, protocol_version);
503 goto rwfail;
504 }
505
506 /* Receive the return code and errno */
507 safe_read(fd, &rc, sizeof(int));
508 safe_read(fd, &errnum, sizeof(int));
509
510 errno = errnum;
511 return rc;
512 rwfail:
513 return -1;
514 }
515
516
517 /*
518 * Attach a client to a running job step.
519 *
520 * On success returns SLURM_SUCCESS and fills in resp->local_pids,
521 * resp->gtids, resp->ntasks, and resp->executable.
522 */
523 int
stepd_attach(int fd,uint16_t protocol_version,slurm_addr_t * ioaddr,slurm_addr_t * respaddr,void * job_cred_sig,reattach_tasks_response_msg_t * resp)524 stepd_attach(int fd, uint16_t protocol_version,
525 slurm_addr_t *ioaddr, slurm_addr_t *respaddr,
526 void *job_cred_sig, reattach_tasks_response_msg_t *resp)
527 {
528 int req = REQUEST_ATTACH;
529 int rc = SLURM_SUCCESS;
530
531 if (protocol_version >= SLURM_MIN_PROTOCOL_VERSION) {
532 safe_write(fd, &req, sizeof(int));
533 safe_write(fd, ioaddr, sizeof(slurm_addr_t));
534 safe_write(fd, respaddr, sizeof(slurm_addr_t));
535 safe_write(fd, job_cred_sig, SLURM_IO_KEY_SIZE);
536 safe_write(fd, &protocol_version, sizeof(uint16_t));
537 } else
538 goto rwfail;
539
540 /* Receive the return code */
541 safe_read(fd, &rc, sizeof(int));
542
543 if (rc == SLURM_SUCCESS) {
544 /* Receive response info */
545 uint32_t ntasks;
546 int len, i;
547
548 safe_read(fd, &ntasks, sizeof(uint32_t));
549 resp->ntasks = ntasks;
550 len = ntasks * sizeof(uint32_t);
551
552 resp->local_pids = xcalloc(ntasks, sizeof(uint32_t));
553 safe_read(fd, resp->local_pids, len);
554
555 resp->gtids = xcalloc(ntasks, sizeof(uint32_t));
556 safe_read(fd, resp->gtids, len);
557
558 resp->executable_names = xcalloc(ntasks, sizeof(char *));
559 for (i = 0; i < ntasks; i++) {
560 safe_read(fd, &len, sizeof(int));
561 resp->executable_names[i] = xmalloc(len);
562 safe_read(fd, resp->executable_names[i], len);
563 }
564 }
565
566 return rc;
567 rwfail:
568 return SLURM_ERROR;
569 }
570
571 static void
_free_step_loc_t(step_loc_t * loc)572 _free_step_loc_t(step_loc_t *loc)
573 {
574 if (loc->directory)
575 xfree(loc->directory);
576 if (loc->nodename)
577 xfree(loc->nodename);
578 xfree(loc);
579 }
580
581 static int
_sockname_regex_init(regex_t * re,const char * nodename)582 _sockname_regex_init(regex_t *re, const char *nodename)
583 {
584 char *pattern = NULL;
585
586 xstrcat(pattern, "^");
587 xstrcat(pattern, nodename);
588 xstrcat(pattern, "_([[:digit:]]*)\\.([[:digit:]]*)$");
589
590 if (regcomp(re, pattern, REG_EXTENDED) != 0) {
591 error("sockname regex compilation failed");
592 return -1;
593 }
594
595 xfree(pattern);
596
597 return 0;
598 }
599
600 static int
_sockname_regex(regex_t * re,const char * filename,uint32_t * jobid,uint32_t * stepid)601 _sockname_regex(regex_t *re, const char *filename,
602 uint32_t *jobid, uint32_t *stepid)
603 {
604 size_t nmatch = 5;
605 regmatch_t pmatch[5];
606 char *match;
607
608 memset(pmatch, 0, sizeof(regmatch_t)*nmatch);
609 if (regexec(re, filename, nmatch, pmatch, 0) == REG_NOMATCH) {
610 return -1;
611 }
612
613 match = xstrndup(filename + pmatch[1].rm_so,
614 (size_t)(pmatch[1].rm_eo - pmatch[1].rm_so));
615 *jobid = (uint32_t)atoll(match);
616 xfree(match);
617
618 match = xstrndup(filename + pmatch[2].rm_so,
619 (size_t)(pmatch[2].rm_eo - pmatch[2].rm_so));
620 *stepid = (uint32_t)atoll(match);
621 xfree(match);
622
623 return 0;
624 }
625
626 /*
627 * Scan for available running slurm step daemons by checking
628 * "directory" for unix domain sockets with names beginning in "nodename".
629 *
630 * Both "directory" and "nodename" may be null, in which case stepd_available
631 * will attempt to determine them on its own. If you are using multiple
632 * slurmd on one node (unusual outside of development environments), you
633 * will get one of the local NodeNames more-or-less at random.
634 *
635 * Returns a List of pointers to step_loc_t structures.
636 */
637 extern List
stepd_available(const char * directory,const char * nodename)638 stepd_available(const char *directory, const char *nodename)
639 {
640 List l;
641 DIR *dp;
642 struct dirent *ent;
643 regex_t re;
644 struct stat stat_buf;
645
646 if (nodename == NULL) {
647 if (!(nodename = _guess_nodename())) {
648 error("%s: Couldn't find nodename", __func__);
649 return NULL;
650 }
651 }
652 if (directory == NULL) {
653 slurm_ctl_conf_t *cf;
654
655 cf = slurm_conf_lock();
656 directory = slurm_conf_expand_slurmd_path(
657 cf->slurmd_spooldir, nodename);
658 slurm_conf_unlock();
659 }
660
661 l = list_create((ListDelF) _free_step_loc_t);
662 if (_sockname_regex_init(&re, nodename) == -1)
663 goto done;
664
665 /*
666 * Make sure that "directory" exists and is a directory.
667 */
668 if (stat(directory, &stat_buf) < 0) {
669 error("Domain socket directory %s: %m", directory);
670 goto done;
671 } else if (!S_ISDIR(stat_buf.st_mode)) {
672 error("%s is not a directory", directory);
673 goto done;
674 }
675
676 if ((dp = opendir(directory)) == NULL) {
677 error("Unable to open directory: %m");
678 goto done;
679 }
680
681 while ((ent = readdir(dp)) != NULL) {
682 step_loc_t *loc;
683 uint32_t jobid, stepid;
684
685 if (_sockname_regex(&re, ent->d_name, &jobid, &stepid) == 0) {
686 debug4("found jobid = %u, stepid = %u", jobid, stepid);
687 loc = xmalloc(sizeof(step_loc_t));
688 loc->directory = xstrdup(directory);
689 loc->nodename = xstrdup(nodename);
690 loc->jobid = jobid;
691 loc->stepid = stepid;
692 list_append(l, (void *)loc);
693 }
694 }
695
696 closedir(dp);
697 done:
698 regfree(&re);
699 return l;
700 }
701
702 /*
703 * Send the termination signal to all of the unix domain socket files
704 * for a given directory and nodename, and then unlink the files.
705 * Returns SLURM_ERROR if any sockets could not be unlinked.
706 */
707 int
stepd_cleanup_sockets(const char * directory,const char * nodename)708 stepd_cleanup_sockets(const char *directory, const char *nodename)
709 {
710 DIR *dp;
711 struct dirent *ent;
712 regex_t re;
713 struct stat stat_buf;
714 int rc = SLURM_SUCCESS;
715
716 _sockname_regex_init(&re, nodename);
717
718 /*
719 * Make sure that "directory" exists and is a directory.
720 */
721 if (stat(directory, &stat_buf) < 0) {
722 error("Domain socket directory %s: %m", directory);
723 goto done;
724 } else if (!S_ISDIR(stat_buf.st_mode)) {
725 error("%s is not a directory", directory);
726 goto done;
727 }
728
729 if ((dp = opendir(directory)) == NULL) {
730 error("Unable to open directory: %m");
731 goto done;
732 }
733
734 while ((ent = readdir(dp)) != NULL) {
735 uint32_t jobid, stepid;
736 if (_sockname_regex(&re, ent->d_name, &jobid, &stepid) == 0) {
737 char *path;
738 int fd;
739 uint16_t protocol_version;
740
741 path = NULL;
742 xstrfmtcat(path, "%s/%s", directory, ent->d_name);
743 verbose("Cleaning up stray job step %u.%u",
744 jobid, stepid);
745
746 /* signal the slurmstepd to terminate its step */
747 fd = stepd_connect((char *) directory,
748 (char *) nodename,
749 jobid, stepid, &protocol_version);
750 if (fd == -1) {
751 debug("Unable to connect to socket %s", path);
752 } else {
753 if (stepd_signal_container(
754 fd, protocol_version, SIGKILL, 0,
755 getuid())
756 == -1) {
757 debug("Error sending SIGKILL to job step %u.%u",
758 jobid, stepid);
759 }
760 close(fd);
761 }
762
763 /* make sure that the socket has been removed */
764 if (unlink(path) == -1 && errno != ENOENT) {
765 error("Unable to clean up stray socket %s: %m",
766 path);
767 rc = SLURM_ERROR;
768 }
769 xfree(path);
770 }
771 }
772
773 closedir(dp);
774 done:
775 regfree(&re);
776 return rc;
777 }
778
779 /*
780 * Return true if the process with process ID "pid" is found in
781 * the proctrack container of the slurmstepd "step".
782 */
783 bool
stepd_pid_in_container(int fd,uint16_t protocol_version,pid_t pid)784 stepd_pid_in_container(int fd, uint16_t protocol_version, pid_t pid)
785 {
786 int req = REQUEST_PID_IN_CONTAINER;
787 bool rc;
788
789 safe_write(fd, &req, sizeof(int));
790 safe_write(fd, &pid, sizeof(pid_t));
791
792 /* Receive the return code */
793 safe_read(fd, &rc, sizeof(bool));
794
795 debug("Leaving stepd_pid_in_container");
796 return rc;
797 rwfail:
798 return false;
799 }
800
801 /*
802 * Add a pid to the "extern" step of a job, meaning add it to the
803 * jobacct_gather and proctrack plugins.
804 */
stepd_add_extern_pid(int fd,uint16_t protocol_version,pid_t pid)805 extern int stepd_add_extern_pid(int fd, uint16_t protocol_version, pid_t pid)
806 {
807 int req = REQUEST_ADD_EXTERN_PID;
808 int rc;
809
810 safe_write(fd, &req, sizeof(int));
811 safe_write(fd, &pid, sizeof(pid_t));
812
813 /* Receive the return code */
814 safe_read(fd, &rc, sizeof(int));
815
816 debug("Leaving stepd_add_extern_pid");
817 return rc;
818 rwfail:
819 return SLURM_ERROR;
820 }
821
stepd_get_x11_display(int fd,uint16_t protocol_version,char ** xauthority)822 extern int stepd_get_x11_display(int fd, uint16_t protocol_version,
823 char **xauthority)
824 {
825 int req = REQUEST_X11_DISPLAY;
826 int display = 0, len = 0;
827
828 *xauthority = NULL;
829
830 safe_write(fd, &req, sizeof(int));
831
832 /*
833 * Receive the display number,
834 * or zero if x11 forwarding is not setup
835 */
836 safe_read(fd, &display, sizeof(int));
837
838 if (protocol_version >= SLURM_MIN_PROTOCOL_VERSION) {
839 safe_read(fd, &len, sizeof(int));
840 if (len) {
841 *xauthority = xmalloc(len);
842 safe_read(fd, *xauthority, len);
843 }
844 }
845
846 debug("Leaving stepd_get_x11_display");
847 return display;
848
849 rwfail:
850 return 0;
851 }
852
853 /*
854 *
855 */
stepd_getpw(int fd,uint16_t protocol_version,int mode,uid_t uid,const char * name)856 extern struct passwd *stepd_getpw(int fd, uint16_t protocol_version,
857 int mode, uid_t uid, const char *name)
858 {
859 int req = REQUEST_GETPW;
860 int found = 0;
861 int len = 0;
862 struct passwd *pwd = xmalloc(sizeof(struct passwd));
863
864 safe_write(fd, &req, sizeof(int));
865
866 safe_write(fd, &mode, sizeof(int));
867
868 safe_write(fd, &uid, sizeof(uid_t));
869 if (name) {
870 len = strlen(name);
871 safe_write(fd, &len, sizeof(int));
872 safe_write(fd, name, len);
873 } else {
874 safe_write(fd, &len, sizeof(int));
875 }
876
877 safe_read(fd, &found, sizeof(int));
878
879 if (!found) {
880 xfree(pwd);
881 return NULL;
882 }
883
884 safe_read(fd, &len, sizeof(int));
885 pwd->pw_name = xmalloc(len + 1);
886 safe_read(fd, pwd->pw_name, len);
887
888 safe_read(fd, &len, sizeof(int));
889 pwd->pw_passwd = xmalloc(len + 1);
890 safe_read(fd, pwd->pw_passwd, len);
891
892 safe_read(fd, &pwd->pw_uid, sizeof(uid_t));
893 safe_read(fd, &pwd->pw_gid, sizeof(gid_t));
894
895 safe_read(fd, &len, sizeof(int));
896 pwd->pw_gecos = xmalloc(len + 1);
897 safe_read(fd, pwd->pw_gecos, len);
898
899 safe_read(fd, &len, sizeof(int));
900 pwd->pw_dir = xmalloc(len + 1);
901 safe_read(fd, pwd->pw_dir, len);
902
903 safe_read(fd, &len, sizeof(int));
904 pwd->pw_shell = xmalloc(len + 1);
905 safe_read(fd, pwd->pw_shell, len);
906
907 debug("Leaving %s", __func__);
908 return pwd;
909
910 rwfail:
911 xfree_struct_passwd(pwd);
912 return NULL;
913 }
914
xfree_struct_passwd(struct passwd * pwd)915 extern void xfree_struct_passwd(struct passwd *pwd)
916 {
917 if (!pwd)
918 return;
919
920 xfree(pwd->pw_name);
921 xfree(pwd->pw_passwd);
922 xfree(pwd->pw_gecos);
923 xfree(pwd->pw_dir);
924 xfree(pwd->pw_shell);
925 xfree(pwd);
926 }
927
stepd_getgr(int fd,uint16_t protocol_version,int mode,gid_t gid,const char * name)928 extern struct group **stepd_getgr(int fd, uint16_t protocol_version,
929 int mode, gid_t gid, const char *name)
930 {
931 int req = REQUEST_GETGR;
932 int found = 0;
933 int len = 0;
934 struct group **grps = NULL;
935
936 safe_write(fd, &req, sizeof(int));
937
938 safe_write(fd, &mode, sizeof(int));
939
940 safe_write(fd, &gid, sizeof(gid_t));
941 if (name) {
942 len = strlen(name);
943 safe_write(fd, &len, sizeof(int));
944 safe_write(fd, name, len);
945 } else {
946 safe_write(fd, &len, sizeof(int));
947 }
948
949 safe_read(fd, &found, sizeof(int));
950
951 if (!found)
952 return NULL;
953
954 /* Add space for NULL termination of the array */
955 grps = xcalloc(found + 1, sizeof(struct group *));
956
957 for (int i = 0; i < found; i++) {
958 grps[i] = xmalloc(sizeof(struct group));
959
960 safe_read(fd, &len, sizeof(int));
961 grps[i]->gr_name = xmalloc(len + 1);
962 safe_read(fd, grps[i]->gr_name, len);
963
964 safe_read(fd, &len, sizeof(int));
965 grps[i]->gr_passwd = xmalloc(len + 1);
966 safe_read(fd, grps[i]->gr_passwd, len);
967
968 safe_read(fd, &grps[i]->gr_gid, sizeof(gid_t));
969
970 /*
971 * In the current implementation, we define each group to
972 * only have a single member - that of the user running the
973 * job. (Since gr_mem is a NULL terminated array, allocate
974 * space for two elements.)
975 */
976 grps[i]->gr_mem = xcalloc(2, sizeof(char *));
977 safe_read(fd, &len, sizeof(int));
978 grps[i]->gr_mem[0] = xmalloc(len + 1);
979 safe_read(fd, grps[i]->gr_mem[0], len);
980 }
981 debug("Leaving %s", __func__);
982 return grps;
983
984 rwfail:
985 xfree_struct_group_array(grps);
986 return NULL;
987 }
988
xfree_struct_group_array(struct group ** grps)989 extern void xfree_struct_group_array(struct group **grps)
990 {
991 for (int i = 0; grps && grps[i]; i++) {
992 xfree(grps[i]->gr_name);
993 xfree(grps[i]->gr_passwd);
994 xfree(grps[i]->gr_mem[0]);
995 xfree(grps[i]->gr_mem);
996 xfree(grps[i]);
997 }
998 xfree(grps);
999 }
1000
1001 /*
1002 * Return the process ID of the slurmstepd.
1003 */
1004 pid_t
stepd_daemon_pid(int fd,uint16_t protocol_version)1005 stepd_daemon_pid(int fd, uint16_t protocol_version)
1006 {
1007 int req = REQUEST_DAEMON_PID;
1008 pid_t pid;
1009
1010 safe_write(fd, &req, sizeof(int));
1011 safe_read(fd, &pid, sizeof(pid_t));
1012
1013 return pid;
1014 rwfail:
1015 return (pid_t)-1;
1016 }
1017
1018 /*
1019 * Suspend execution of the job step. Only root or SlurmUser is
1020 * authorized to use this call. Since this activity includes a 'sleep 1'
1021 * in the slurmstepd, initiate the "suspend" in parallel.
1022 *
1023 * Returns SLURM_SUCCESS is successful. On error returns SLURM_ERROR
1024 * and sets errno.
1025 */
1026 extern int
stepd_suspend(int fd,uint16_t protocol_version,suspend_int_msg_t * susp_req,int phase)1027 stepd_suspend(int fd, uint16_t protocol_version,
1028 suspend_int_msg_t *susp_req, int phase)
1029 {
1030 int req = REQUEST_STEP_SUSPEND;
1031 int rc = 0;
1032 int errnum = 0;
1033
1034 if (phase == 0) {
1035 safe_write(fd, &req, sizeof(int));
1036 safe_write(fd, &susp_req->job_core_spec, sizeof(uint16_t));
1037 } else {
1038 /* Receive the return code and errno */
1039 safe_read(fd, &rc, sizeof(int));
1040 safe_read(fd, &errnum, sizeof(int));
1041 errno = errnum;
1042 }
1043
1044 return rc;
1045 rwfail:
1046 return -1;
1047 }
1048
1049 /*
1050 * Resume execution of the job step that has been suspended by a
1051 * call to stepd_suspend(). Only root or SlurmUser is
1052 * authorized to use this call.
1053 *
1054 * Returns SLURM_SUCCESS is successful. On error returns SLURM_ERROR
1055 * and sets errno.
1056 */
1057 extern int
stepd_resume(int fd,uint16_t protocol_version,suspend_int_msg_t * susp_req,int phase)1058 stepd_resume(int fd, uint16_t protocol_version,
1059 suspend_int_msg_t *susp_req, int phase)
1060 {
1061 int req = REQUEST_STEP_RESUME;
1062 int rc = 0;
1063 int errnum = 0;
1064
1065 if (phase == 0) {
1066 safe_write(fd, &req, sizeof(int));
1067 safe_write(fd, &susp_req->job_core_spec, sizeof(uint16_t));
1068 } else {
1069 /* Receive the return code and errno */
1070 safe_read(fd, &rc, sizeof(int));
1071 safe_read(fd, &errnum, sizeof(int));
1072 errno = errnum;
1073 }
1074
1075 return rc;
1076 rwfail:
1077 return -1;
1078 }
1079
1080 /*
1081 * Reconfigure the job step (Primarily to allow the stepd to refresh
1082 * it's log file pointer.
1083 *
1084 * Returns SLURM_SUCCESS is successful. On error returns SLURM_ERROR
1085 * and sets errno.
1086 */
1087 int
stepd_reconfig(int fd,uint16_t protocol_version)1088 stepd_reconfig(int fd, uint16_t protocol_version)
1089 {
1090 int req = REQUEST_STEP_RECONFIGURE;
1091 int rc;
1092 int errnum = 0;
1093
1094 safe_write(fd, &req, sizeof(int));
1095
1096 /* Receive the return code and errno */
1097 safe_read(fd, &rc, sizeof(int));
1098 safe_read(fd, &errnum, sizeof(int));
1099
1100 errno = errnum;
1101 return rc;
1102 rwfail:
1103 return -1;
1104 }
1105
1106 /*
1107 * Terminate the job step.
1108 *
1109 * Returns SLURM_SUCCESS is successful. On error returns SLURM_ERROR
1110 * and sets errno.
1111 */
1112 int
stepd_terminate(int fd,uint16_t protocol_version)1113 stepd_terminate(int fd, uint16_t protocol_version)
1114 {
1115 int req = REQUEST_STEP_TERMINATE;
1116 int rc;
1117 int errnum = 0;
1118
1119 safe_write(fd, &req, sizeof(int));
1120
1121 /* Receive the return code and errno */
1122 safe_read(fd, &rc, sizeof(int));
1123 safe_read(fd, &errnum, sizeof(int));
1124
1125 errno = errnum;
1126 return rc;
1127 rwfail:
1128 return -1;
1129 }
1130
1131 /*
1132 *
1133 * Returns SLURM_SUCCESS if successful. On error returns SLURM_ERROR
1134 * and sets errno.
1135 */
1136 int
stepd_completion(int fd,uint16_t protocol_version,step_complete_msg_t * sent)1137 stepd_completion(int fd, uint16_t protocol_version, step_complete_msg_t *sent)
1138 {
1139 int req = REQUEST_STEP_COMPLETION_V2;
1140 int rc;
1141 int errnum = 0;
1142 Buf buffer;
1143 int len = 0;
1144
1145 buffer = init_buf(0);
1146
1147 debug("Entering stepd_completion for %u.%u, range_first = %d, range_last = %d",
1148 sent->job_id, sent->job_step_id,
1149 sent->range_first, sent->range_last);
1150
1151 if (protocol_version >= SLURM_MIN_PROTOCOL_VERSION) {
1152 safe_write(fd, &req, sizeof(int));
1153 safe_write(fd, &sent->range_first, sizeof(int));
1154 safe_write(fd, &sent->range_last, sizeof(int));
1155 safe_write(fd, &sent->step_rc, sizeof(int));
1156
1157 /*
1158 * We must not use setinfo over a pipe with slurmstepd here
1159 * Indeed, slurmd does a large use of getinfo over a pipe
1160 * with slurmstepd and doing the reverse can result in
1161 * a deadlock scenario with slurmstepd :
1162 * slurmd(lockforread,write)/slurmstepd(write,lockforread)
1163 * Do pack/unpack instead to be sure of independances of
1164 * slurmd and slurmstepd
1165 */
1166 jobacctinfo_pack(sent->jobacct, protocol_version,
1167 PROTOCOL_TYPE_SLURM, buffer);
1168 len = get_buf_offset(buffer);
1169 safe_write(fd, &len, sizeof(int));
1170 safe_write(fd, get_buf_data(buffer), len);
1171 FREE_NULL_BUFFER(buffer);
1172
1173 /* Receive the return code and errno */
1174 safe_read(fd, &rc, sizeof(int));
1175 safe_read(fd, &errnum, sizeof(int));
1176 } else {
1177 error("%s: bad protocol version %hu",
1178 __func__, protocol_version);
1179 rc = SLURM_ERROR;
1180 }
1181
1182 errno = errnum;
1183 return rc;
1184
1185 rwfail:
1186 FREE_NULL_BUFFER(buffer);
1187 return -1;
1188 }
1189
1190 /*
1191 *
1192 * Returns jobacctinfo_t struct on success, NULL on error.
1193 * jobacctinfo_t must be freed after calling this function.
1194 */
1195 int
stepd_stat_jobacct(int fd,uint16_t protocol_version,job_step_id_msg_t * sent,job_step_stat_t * resp)1196 stepd_stat_jobacct(int fd, uint16_t protocol_version,
1197 job_step_id_msg_t *sent, job_step_stat_t *resp)
1198 {
1199 int req = REQUEST_STEP_STAT;
1200 int rc = SLURM_SUCCESS;
1201 int tasks = 0;
1202
1203 /* NULL return indicates that accounting is disabled */
1204 if (!(resp->jobacct = jobacctinfo_create(NULL)))
1205 return rc;
1206
1207 debug("Entering stepd_stat_jobacct for job %u.%u",
1208 sent->job_id, sent->step_id);
1209
1210 safe_write(fd, &req, sizeof(int));
1211
1212 /* Do not attempt reading data until there is something to read.
1213 * Avoid locking the jobacct_gather plugin early and creating
1214 * possible deadlock. */
1215 if (wait_fd_readable(fd, 300))
1216 goto rwfail;
1217
1218 /* Fill in the jobacct struct and return */
1219 rc = jobacctinfo_getinfo(resp->jobacct, JOBACCT_DATA_PIPE, &fd,
1220 protocol_version);
1221
1222 safe_read(fd, &tasks, sizeof(int));
1223 resp->num_tasks = tasks;
1224
1225 return rc;
1226 rwfail:
1227 error("gathering job accounting: %d", rc);
1228 jobacctinfo_destroy(resp->jobacct);
1229 resp->jobacct = NULL;
1230 return rc;
1231 }
1232
1233 /*
1234 * List all of task process IDs and their local and global Slurm IDs.
1235 *
1236 * Returns SLURM_SUCCESS on success. On error returns SLURM_ERROR
1237 * and sets errno.
1238 */
1239 int
stepd_task_info(int fd,uint16_t protocol_version,slurmstepd_task_info_t ** task_info,uint32_t * task_info_count)1240 stepd_task_info(int fd, uint16_t protocol_version,
1241 slurmstepd_task_info_t **task_info,
1242 uint32_t *task_info_count)
1243 {
1244 int req = REQUEST_STEP_TASK_INFO;
1245 slurmstepd_task_info_t *task = NULL;
1246 uint32_t ntasks;
1247 int i;
1248
1249 safe_write(fd, &req, sizeof(int));
1250
1251 safe_read(fd, &ntasks, sizeof(uint32_t));
1252 task = xcalloc(ntasks, sizeof(slurmstepd_task_info_t));
1253 for (i = 0; i < ntasks; i++) {
1254 safe_read(fd, &(task[i].id), sizeof(int));
1255 safe_read(fd, &(task[i].gtid), sizeof(uint32_t));
1256 safe_read(fd, &(task[i].pid), sizeof(pid_t));
1257 safe_read(fd, &(task[i].exited), sizeof(bool));
1258 safe_read(fd, &(task[i].estatus), sizeof(int));
1259 }
1260
1261 if (ntasks == 0) {
1262 xfree(task);
1263 *task_info_count = 0;
1264 *task_info = NULL;
1265 } else {
1266 *task_info_count = ntasks;
1267 *task_info = task;
1268 }
1269
1270 return SLURM_SUCCESS;
1271 rwfail:
1272 xfree(task);
1273 *task_info_count = 0;
1274 *task_info = NULL;
1275 xfree(task);
1276 return SLURM_ERROR;
1277 }
1278
1279 /*
1280 * List all of process IDs in the proctrack container.
1281 *
1282 * Returns SLURM_SUCCESS is successful. On error returns SLURM_ERROR
1283 * and sets errno.
1284 */
1285 int
stepd_list_pids(int fd,uint16_t protocol_version,uint32_t ** pids_array,uint32_t * pids_count)1286 stepd_list_pids(int fd, uint16_t protocol_version,
1287 uint32_t **pids_array, uint32_t *pids_count)
1288 {
1289 int req = REQUEST_STEP_LIST_PIDS;
1290 uint32_t npids;
1291 uint32_t *pids = NULL;
1292 int i;
1293
1294 safe_write(fd, &req, sizeof(int));
1295
1296 /* read the pid list */
1297 safe_read(fd, &npids, sizeof(uint32_t));
1298 pids = xcalloc(npids, sizeof(uint32_t));
1299 for (i = 0; i < npids; i++) {
1300 safe_read(fd, &pids[i], sizeof(uint32_t));
1301 }
1302
1303 if (npids == 0)
1304 xfree(pids);
1305
1306 *pids_count = npids;
1307 *pids_array = pids;
1308 return SLURM_SUCCESS;
1309
1310 rwfail:
1311 xfree(pids);
1312 *pids_count = 0;
1313 *pids_array = NULL;
1314 return SLURM_ERROR;
1315 }
1316
1317 /*
1318 * Get the memory limits of the step
1319 * Returns uid of the running step if successful. On error returns -1.
1320 */
stepd_get_mem_limits(int fd,uint16_t protocol_version,slurmstepd_mem_info_t * stepd_mem_info)1321 extern int stepd_get_mem_limits(int fd, uint16_t protocol_version,
1322 slurmstepd_mem_info_t *stepd_mem_info)
1323 {
1324 int req = REQUEST_STEP_MEM_LIMITS;
1325
1326 xassert(stepd_mem_info);
1327 memset(stepd_mem_info, 0, sizeof(slurmstepd_mem_info_t));
1328
1329 if (protocol_version >= SLURM_MIN_PROTOCOL_VERSION) {
1330 safe_write(fd, &req, sizeof(int));
1331
1332 safe_read(fd, &stepd_mem_info->job_mem_limit, sizeof(uint32_t));
1333 safe_read(fd, &stepd_mem_info->step_mem_limit,
1334 sizeof(uint32_t));
1335 }
1336
1337 return SLURM_SUCCESS;
1338 rwfail:
1339 return SLURM_ERROR;
1340 }
1341
1342 /*
1343 * Get the uid of the step
1344 * Returns uid of the running step if successful. On error returns -1.
1345 *
1346 * FIXME: BUG: On Linux, uid_t is uint32_t but this can return -1.
1347 */
stepd_get_uid(int fd,uint16_t protocol_version)1348 extern uid_t stepd_get_uid(int fd, uint16_t protocol_version)
1349 {
1350 int req = REQUEST_STEP_UID;
1351 uid_t uid = -1;
1352
1353 if (protocol_version >= SLURM_MIN_PROTOCOL_VERSION) {
1354 safe_write(fd, &req, sizeof(int));
1355
1356 safe_read(fd, &uid, sizeof(uid_t));
1357 }
1358
1359 return uid;
1360 rwfail:
1361 return -1;
1362 }
1363
1364 /*
1365 * Get the nodeid of the stepd
1366 * Returns nodeid of the running stepd if successful. On error returns NO_VAL.
1367 */
stepd_get_nodeid(int fd,uint16_t protocol_version)1368 extern uint32_t stepd_get_nodeid(int fd, uint16_t protocol_version)
1369 {
1370 int req = REQUEST_STEP_NODEID;
1371 uint32_t nodeid = NO_VAL;
1372
1373 if (protocol_version >= SLURM_MIN_PROTOCOL_VERSION) {
1374 safe_write(fd, &req, sizeof(int));
1375
1376 safe_read(fd, &nodeid, sizeof(uid_t));
1377 }
1378
1379 return nodeid;
1380 rwfail:
1381 return NO_VAL;
1382 }
1383