1 /*****************************************************************************\
2  *  capmc_suspend.c - Power down identified nodes
3  *
4  *  Usage: "capmc_suspend <hostlist>"
5  *****************************************************************************
6  *  Copyright (C) 2016-2017 SchedMD LLC.
7  *
8  *  This file is part of Slurm, a resource management program.
9  *  For details, see <https://slurm.schedmd.com/>.
10  *  Please also read the included file: DISCLAIMER.
11  *
12  *  Slurm is free software; you can redistribute it and/or modify it under
13  *  the terms of the GNU General Public License as published by the Free
14  *  Software Foundation; either version 2 of the License, or (at your option)
15  *  any later version.
16  *
17  *  In addition, as a special exception, the copyright holders give permission
18  *  to link the code of portions of this program with the OpenSSL library under
19  *  certain conditions as described in each individual source file, and
20  *  distribute linked combinations including the two. You must obey the GNU
21  *  General Public License in all respects for all of the code used other than
22  *  OpenSSL. If you modify file(s) with this exception, you may extend this
23  *  exception to your version of the file(s), but you are not obligated to do
24  *  so. If you do not wish to do so, delete this exception statement from your
25  *  version.  If you delete this exception statement from all source files in
26  *  the program, then also delete it here.
27  *
28  *  Slurm is distributed in the hope that it will be useful, but WITHOUT ANY
29  *  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
30  *  FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
31  *  details.
32  *
33  *  You should have received a copy of the GNU General Public License along
34  *  with Slurm; if not, write to the Free Software Foundation, Inc.,
35  *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA.
36 \*****************************************************************************/
37 
38 #if HAVE_CONFIG_H
39 #  include "config.h"
40 #endif
41 
42 #define _GNU_SOURCE	/* For POLLRDHUP */
43 
44 #if HAVE_JSON_C_INC
45 #  include <json-c/json.h>
46 #elif HAVE_JSON_INC
47 #  include <json/json.h>
48 #endif
49 
50 #include <poll.h>
51 #include <signal.h>
52 #include <stdio.h>
53 #include <stdlib.h>
54 #include <string.h>
55 
56 #include "slurm/slurm.h"
57 #include "slurm/slurm_errno.h"
58 #include "src/common/hostlist.h"
59 #include "src/common/log.h"
60 #include "src/common/macros.h"
61 #include "src/common/parse_config.h"
62 #include "src/common/read_config.h"
63 #include "src/common/slurm_protocol_api.h"
64 #include "src/common/xassert.h"
65 #include "src/common/xmalloc.h"
66 #include "src/common/xstring.h"
67 
68 #define MAX_THREADS 256
69 
70 /* Maximum poll wait time for child processes, in milliseconds */
71 #define MAX_POLL_WAIT 500
72 
73 /* Default and minimum timeout parameters for the capmc command */
74 #define DEFAULT_CAPMC_RETRIES 4
75 #define DEFAULT_CAPMC_TIMEOUT 60000	/* 60 seconds */
76 #define MIN_CAPMC_TIMEOUT 1000		/* 1 second */
77 
78 /* Number of times to try performing "node_off" operation */
79 #define NODE_OFF_RETRIES 10
80 
81 /* How long to wait for a node to enter "off" state, in seconds */
82 #define NODE_OFF_STATE_WAIT (30 * 60)
83 
84 /* Static variables */
85 static char *capmc_path = NULL;
86 static uint32_t capmc_poll_freq = 45;   /* capmc state polling frequency */
87 static uint32_t capmc_retries = DEFAULT_CAPMC_RETRIES;
88 static uint32_t capmc_timeout = DEFAULT_CAPMC_TIMEOUT;
89 static char *log_file = NULL;
90 static char *prog_name = NULL;
91 
92 /* NOTE: Keep this table synchronized with the table in
93  * src/plugins/node_features/knl_cray/node_features_knl_cray.c */
94 static s_p_options_t knl_conf_file_options[] = {
95 	{"AllowMCDRAM", S_P_STRING},
96 	{"AllowNUMA", S_P_STRING},
97 	{"AllowUserBoot", S_P_STRING},
98 	{"BootTime", S_P_UINT32},
99 	{"CapmcPath", S_P_STRING},
100 	{"CapmcPollFreq", S_P_UINT32},
101 	{"CapmcRetries", S_P_UINT32},
102 	{"CapmcTimeout", S_P_UINT32},
103 	{"CnselectPath", S_P_STRING},
104 	{"DefaultMCDRAM", S_P_STRING},
105 	{"DefaultNUMA", S_P_STRING},
106 	{"LogFile", S_P_STRING},
107 	{"McPath", S_P_STRING},
108 	{"SyscfgPath", S_P_STRING},
109 	{"UmeCheckInterval", S_P_UINT32},
110 	{NULL}
111 };
112 
113 static s_p_hashtbl_t *_config_make_tbl(char *filename);
114 static void _read_config(void);
115 static char *_run_script(char **script_argv, int *status);
116 static int _tot_wait(struct timeval *start_time);
117 static int _update_all_nodes(char *node_names);
118 
_config_make_tbl(char * filename)119 static s_p_hashtbl_t *_config_make_tbl(char *filename)
120 {
121 	s_p_hashtbl_t *tbl = NULL;
122 
123 	xassert(filename);
124 
125 	if (!(tbl = s_p_hashtbl_create(knl_conf_file_options))) {
126 		error("%s: s_p_hashtbl_create error: %s", prog_name,
127 		      slurm_strerror(slurm_get_errno()));
128 		return tbl;
129 	}
130 
131 	if (s_p_parse_file(tbl, NULL, filename, false) == SLURM_ERROR) {
132 		error("%s: s_p_parse_file error: %s", prog_name,
133 		      slurm_strerror(slurm_get_errno()));
134 		s_p_hashtbl_destroy(tbl);
135 		tbl = NULL;
136 	}
137 
138 	return tbl;
139 }
140 
_read_config(void)141 static void _read_config(void)
142 {
143 	char *knl_conf_file;
144 	s_p_hashtbl_t *tbl;
145 
146 	capmc_timeout = DEFAULT_CAPMC_TIMEOUT;
147 	knl_conf_file = get_extra_conf_path("knl_cray.conf");
148 	if ((tbl = _config_make_tbl(knl_conf_file))) {
149 		(void) s_p_get_string(&capmc_path, "CapmcPath", tbl);
150 		(void) s_p_get_uint32(&capmc_poll_freq, "CapmcPollFreq", tbl);
151 		(void) s_p_get_uint32(&capmc_retries, "CapmcRetries", tbl);
152 		(void) s_p_get_uint32(&capmc_timeout, "CapmcTimeout", tbl);
153 		(void) s_p_get_string(&log_file, "LogFile", tbl);
154 	}
155 	xfree(knl_conf_file);
156 	s_p_hashtbl_destroy(tbl);
157 	if (!capmc_path)
158 		capmc_path = xstrdup("/opt/cray/capmc/default/bin/capmc");
159 	capmc_timeout = MAX(capmc_timeout, MIN_CAPMC_TIMEOUT);
160 	if (!log_file)
161 		log_file = slurm_get_job_slurmctld_logfile();
162 }
163 
164 /*
165  * Return time in msec since "start time"
166  */
_tot_wait(struct timeval * start_time)167 static int _tot_wait(struct timeval *start_time)
168 {
169 	struct timeval end_time;
170 	int msec_delay;
171 
172 	gettimeofday(&end_time, NULL);
173 	msec_delay =   (end_time.tv_sec  - start_time->tv_sec ) * 1000;
174 	msec_delay += ((end_time.tv_usec - start_time->tv_usec + 500) / 1000);
175 	return msec_delay;
176 }
177 
178 /* Run a script and return its stdout plus exit status */
_run_script(char ** script_argv,int * status)179 static char *_run_script(char **script_argv, int *status)
180 {
181 	int cc, i, new_wait, resp_size = 0, resp_offset = 0;
182 	pid_t cpid;
183 	char *resp = NULL;
184 	int pfd[2] = { -1, -1 };
185 
186 	if (access(capmc_path, R_OK | X_OK) < 0) {
187 		error("%s: Can not execute: %s", prog_name, capmc_path);
188 		*status = 127;
189 		resp = xstrdup("Slurm node_features/knl_cray configuration error");
190 		return resp;
191 	}
192 	if (pipe(pfd) != 0) {
193 		error("%s: pipe(): %s", prog_name,
194 		      slurm_strerror(slurm_get_errno()));
195 		*status = 127;
196 		resp = xstrdup("System error");
197 		return resp;
198 	}
199 
200 	if ((cpid = fork()) == 0) {
201 		cc = sysconf(_SC_OPEN_MAX);
202 		dup2(pfd[1], STDERR_FILENO);
203 		dup2(pfd[1], STDOUT_FILENO);
204 		for (i = 0; i < cc; i++) {
205 			if ((i != STDERR_FILENO) && (i != STDOUT_FILENO))
206 				close(i);
207 		}
208 		setpgid(0, 0);
209 		execv(capmc_path, script_argv);
210 		error("%s: execv(): %s", prog_name,
211 		      slurm_strerror(slurm_get_errno()));
212 		exit(127);
213 	} else if (cpid < 0) {
214 		close(pfd[0]);
215 		close(pfd[1]);
216 		error("%s: fork(): %s", prog_name,
217 		      slurm_strerror(slurm_get_errno()));
218 		*status = 127;
219 		resp = xstrdup("System error");
220 		return resp;
221 	} else {
222 		struct pollfd fds;
223 		struct timeval tstart;
224 		resp_size = 1024;
225 		resp = xmalloc(resp_size);
226 		close(pfd[1]);
227 		gettimeofday(&tstart, NULL);
228 		while (1) {
229 			fds.fd = pfd[0];
230 			fds.events = POLLIN | POLLHUP | POLLRDHUP;
231 			fds.revents = 0;
232 			new_wait = capmc_timeout - _tot_wait(&tstart);
233 			if (new_wait <= 0) {
234 				error("%s: poll() timeout @ %d msec", prog_name,
235 				      capmc_timeout);
236 				break;
237 			}
238 			new_wait = MIN(new_wait, MAX_POLL_WAIT);
239 			i = poll(&fds, 1, new_wait);
240 			if (i == 0) {
241 				continue;
242 			} else if (i < 0) {
243 				error("%s: poll(): %s", prog_name,
244 				      slurm_strerror(slurm_get_errno()));
245 				break;
246 			}
247 			if ((fds.revents & POLLIN) == 0)
248 				break;
249 			i = read(pfd[0], resp + resp_offset,
250 				 resp_size - resp_offset);
251 			if (i == 0) {
252 				break;
253 			} else if (i < 0) {
254 				if (errno == EAGAIN)
255 					continue;
256 				error("%s: read(): %s", prog_name,
257 				      slurm_strerror(slurm_get_errno()));
258 				break;
259 			} else {
260 				resp_offset += i;
261 				if (resp_offset + 1024 >= resp_size) {
262 					resp_size *= 2;
263 					resp = xrealloc(resp, resp_size);
264 				}
265 			}
266 		}
267 		killpg(cpid, SIGTERM);
268 		usleep(10000);
269 		killpg(cpid, SIGKILL);
270 		waitpid(cpid, status, 0);
271 		close(pfd[0]);
272 	}
273 	return resp;
274 }
275 
276 /* Convert node name string to equivalent nid string */
_node_names_2_nid_list(char * node_names)277 static char *_node_names_2_nid_list(char *node_names)
278 {
279 	char *nid_list = NULL;
280 	int i, last_nid_index = -1;
281 	bool is_dash = false;
282 	bitstr_t *node_bitmap;
283 
284 	node_bitmap = bit_alloc(100000);
285 	for (i = 0; node_names[i]; i++) {
286 		int nid_index = 0;
287 		/* skip "nid[" */
288 		if ((node_names[i] < '0') || (node_names[i] > '9'))
289 			continue;
290 		/* skip leading zeros */
291 		while (node_names[i] == '0')
292 			i++;
293 		if (node_names[i] == '[')
294 			i++;
295 		while ((node_names[i] >= '0') && (node_names[i] <= '9')) {
296 			nid_index *= 10;
297 			nid_index += (node_names[i++] - '0');
298 		}
299 		if (is_dash && (nid_index >= last_nid_index)) {
300 			bit_nset(node_bitmap, last_nid_index, nid_index);
301 		} else {
302 			bit_set(node_bitmap, nid_index);
303 		}
304 		if ((is_dash = (node_names[i] == '-')))
305 			last_nid_index = nid_index;
306 		else if (node_names[i] == '\0')
307 			break;
308 	}
309 
310 	i = strlen(node_names) + 1;
311 	nid_list = xmalloc(i);
312 	bit_fmt(nid_list, i, node_bitmap);
313 	bit_free(node_bitmap);
314 
315 	return nid_list;
316 }
317 
318 /* Attempt to shutdown all nodes in a single capmc call.
319  * RET 0 on success, -1 on failure */
_update_all_nodes(char * node_names)320 static int _update_all_nodes(char *node_names)
321 {
322 	char *argv[10], *nid_list, *resp_msg;
323 	int rc = 0, retry, status = 0;
324 
325 	nid_list = _node_names_2_nid_list(node_names);
326 	if (nid_list == NULL)
327 		return -1;
328 
329 	/* Request node power down.
330 	 * Example: "capmc node_off –n 43" */
331 	argv[0] = "capmc";
332 	argv[1] = "node_off";
333 	argv[2] = "-n";
334 	argv[3] = nid_list;
335 	argv[4] = NULL;
336 	for (retry = 0; ; retry++) {
337 		resp_msg = _run_script(argv, &status);
338 		if ((status == 0) ||
339 		    (resp_msg && strcasestr(resp_msg, "Success"))) {
340 			debug("%s: node_off sent to %s", prog_name, argv[3]);
341 			xfree(resp_msg);
342 			break;
343 		}
344 		error("%s: capmc(%s,%s,%s): %d %s", prog_name,
345 		      argv[1], argv[2], argv[3], status, resp_msg);
346 		if (resp_msg && strstr(resp_msg, "Could not lookup") &&
347 		    (retry <= capmc_retries)) {
348 			/* State Manager is down. Sleep and retry */
349 			error("Cray State Manager is down, retrying request");
350 			sleep(1);
351 			xfree(resp_msg);
352 		} else {
353 			/* Non-recoverable error */
354 			error("Aborting capmc_suspend for %s", nid_list);
355 			rc = -1;
356 			xfree(resp_msg);
357 			break;
358 		}
359 	}
360 
361 	xfree(resp_msg);
362 	xfree(nid_list);
363 	return rc;
364 }
365 
main(int argc,char * argv[])366 int main(int argc, char *argv[])
367 {
368 	log_options_t log_opts = LOG_OPTS_INITIALIZER;
369 
370 	xstrfmtcat(prog_name, "%s[%u]", argv[0], (uint32_t) getpid());
371 	_read_config();
372 	log_opts.stderr_level = LOG_LEVEL_QUIET;
373 	log_opts.syslog_level = LOG_LEVEL_QUIET;
374 	if (slurm_get_debug_flags() & DEBUG_FLAG_NODE_FEATURES)
375 		log_opts.logfile_level = LOG_LEVEL_DEBUG;
376 	else
377 		log_opts.logfile_level = LOG_LEVEL_ERROR;
378 	(void) log_init(argv[0], log_opts, LOG_DAEMON, log_file);
379 
380 	/* Attempt to shutdown all nodes in a single capmc call. */
381 	if (_update_all_nodes(argv[1]) != 0)
382 		exit(1);
383 
384 	xfree(prog_name);
385 	exit(0);
386 }
387