1 /*****************************************************************************\
2 * node_features_knl_generic.c - Plugin for managing Intel KNL state
3 * information on a generic Linux cluster
4 *****************************************************************************
5 * Copyright (C) 2016-2017 SchedMD LLC.
6 * Written by Morris Jette <jette@schedmd.com>
7 * Danny Auble <da@schedmd.com>
8 *
9 * This file is part of Slurm, a resource management program.
10 * For details, see <https://slurm.schedmd.com/>.
11 * Please also read the included file: DISCLAIMER.
12 *
13 * Slurm is free software; you can redistribute it and/or modify it under
14 * the terms of the GNU General Public License as published by the Free
15 * Software Foundation; either version 2 of the License, or (at your option)
16 * any later version.
17 *
18 * In addition, as a special exception, the copyright holders give permission
19 * to link the code of portions of this program with the OpenSSL library under
20 * certain conditions as described in each individual source file, and
21 * distribute linked combinations including the two. You must obey the GNU
22 * General Public License in all respects for all of the code used other than
23 * OpenSSL. If you modify file(s) with this exception, you may extend this
24 * exception to your version of the file(s), but you are not obligated to do
25 * so. If you do not wish to do so, delete this exception statement from your
26 * version. If you delete this exception statement from all source files in
27 * the program, then also delete it here.
28 *
29 * Slurm is distributed in the hope that it will be useful, but WITHOUT ANY
30 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
31 * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
32 * details.
33 *
34 * You should have received a copy of the GNU General Public License along
35 * with Slurm; if not, write to the Free Software Foundation, Inc.,
36 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
37 \*****************************************************************************/
38
39 #include "config.h"
40
41 #define _GNU_SOURCE /* For POLLRDHUP */
42 #include <ctype.h>
43 #include <fcntl.h>
44 #ifdef HAVE_NUMA
45 #undef NUMA_VERSION1_COMPATIBILITY
46 #include <numa.h>
47 #endif
48 #include <poll.h>
49 #include <pthread.h>
50 #include <signal.h>
51 #include <stdio.h>
52 #include <stdlib.h>
53 #include <sys/stat.h>
54 #include <sys/types.h>
55 #include <time.h>
56 #include <unistd.h>
57
58 #if defined(__APPLE__) || defined(__DragonFly__) || defined(__NetBSD__)
59 #define POLLRDHUP POLLHUP
60 #endif
61
62 #include "slurm/slurm.h"
63
64 #include "src/common/assoc_mgr.h"
65 #include "src/common/bitstring.h"
66 #include "src/common/fd.h"
67 #include "src/common/gres.h"
68 #include "src/common/list.h"
69 #include "src/common/macros.h"
70 #include "src/common/pack.h"
71 #include "src/common/parse_config.h"
72 #include "src/common/read_config.h"
73 #include "src/common/slurm_protocol_api.h"
74 #include "src/common/slurm_resource_info.h"
75 #include "src/common/timers.h"
76 #include "src/common/uid.h"
77 #include "src/common/xmalloc.h"
78 #include "src/common/xstring.h"
79 #include "src/slurmctld/job_scheduler.h"
80 #include "src/slurmctld/locks.h"
81 #include "src/slurmctld/node_scheduler.h"
82 #include "src/slurmctld/reservation.h"
83 #include "src/slurmctld/slurmctld.h"
84 #include "src/slurmctld/state_save.h"
85 #include "src/slurmd/slurmd/req.h"
86
87 /* Maximum poll wait time for child processes, in milliseconds */
88 #define MAX_POLL_WAIT 500
89 #define DEFAULT_SYSCFG_TIMEOUT 1000
90
91 /* Intel Knights Landing Configuration Modes */
92 #define KNL_NUMA_CNT 5
93 #define KNL_MCDRAM_CNT 5
94 #define KNL_NUMA_FLAG 0x00ff
95 #define KNL_ALL2ALL 0x0001
96 #define KNL_SNC2 0x0002
97 #define KNL_SNC4 0x0004
98 #define KNL_HEMI 0x0008
99 #define KNL_QUAD 0x0010
100 #define KNL_MCDRAM_FLAG 0xff00
101 #define KNL_CACHE 0x0100
102 #define KNL_EQUAL 0x0200
103 #define KNL_HYBRID 0x0400
104 #define KNL_FLAT 0x0800
105 #define KNL_AUTO 0x1000
106
107 #ifndef MODPROBE_PATH
108 #define MODPROBE_PATH "/sbin/modprobe"
109 #endif
110 #define ZONE_SORT_PATH "/sys/kernel/zone_sort_free_pages/nodeid"
111 //#define ZONE_SORT_PATH "/tmp/nodeid" /* For testing */
112
113 #ifndef DEFAULT_MCDRAM_SIZE
114 #define DEFAULT_MCDRAM_SIZE ((uint64_t) 16 * 1024 * 1024 * 1024)
115 #endif
116
117 /* These are defined here so when we link with something other than
118 * the slurmctld we will have these symbols defined. They will get
119 * overwritten when linking with the slurmctld.
120 */
121 #if defined (__APPLE__)
122 extern slurmctld_config_t slurmctld_config __attribute__((weak_import));
123 #else
124 slurmctld_config_t slurmctld_config;
125 #endif
126
127 typedef enum {
128 KNL_SYSTEM_TYPE_NOT_SET,
129 KNL_SYSTEM_TYPE_INTEL,
130 KNL_SYSTEM_TYPE_DELL,
131 } knl_system_type_t;
132
133 /*
134 * These variables are required by the burst buffer plugin interface. If they
135 * are not found in the plugin, the plugin loader will ignore it.
136 *
137 * plugin_name - a string giving a human-readable description of the
138 * plugin. There is no maximum length, but the symbol must refer to
139 * a valid string.
140 *
141 * plugin_type - a string suggesting the type of the plugin or its
142 * applicability to a particular form of data or method of data handling.
143 * If the low-level plugin API is used, the contents of this string are
144 * unimportant and may be anything. Slurm uses the higher-level plugin
145 * interface which requires this string to be of the form
146 *
147 * <application>/<method>
148 *
149 * where <application> is a description of the intended application of
150 * the plugin (e.g., "node_features" for Slurm node_features) and <method> is a
151 * description of how this plugin satisfies that application. Slurm will only
152 * load a node_features plugin if the plugin_type string has a prefix of
153 * "node_features/".
154 *
155 * plugin_version - an unsigned 32-bit integer containing the Slurm version
156 * (major.minor.micro combined into a single number).
157 */
158 const char plugin_name[] = "node_features knl_generic plugin";
159 const char plugin_type[] = "node_features/knl_generic";
160 const uint32_t plugin_version = SLURM_VERSION_NUMBER;
161
162 /* Configuration Parameters */
163 static uint16_t allow_mcdram = KNL_MCDRAM_FLAG;
164 static uint16_t allow_numa = KNL_NUMA_FLAG;
165 static uid_t *allowed_uid = NULL;
166 static int allowed_uid_cnt = 0;
167 static uint32_t boot_time = (5 * 60); /* 5 minute estimated boot time */
168 static pthread_mutex_t config_mutex = PTHREAD_MUTEX_INITIALIZER;
169 static uint32_t cpu_bind[KNL_NUMA_CNT]; /* Derived from numa_cpu_bind */
170 static bool debug_flag = false;
171 static uint16_t default_mcdram = KNL_CACHE;
172 static uint16_t default_numa = KNL_ALL2ALL;
173 static char *mc_path = NULL;
174 static uint32_t node_reboot_weight = (INFINITE - 1);
175 static char *numa_cpu_bind = NULL;
176 static uint32_t syscfg_timeout = 0;
177 static bool reconfig = false;
178 static time_t shutdown_time = 0;
179 static int syscfg_found = -1;
180 static char *syscfg_path = NULL;
181 static knl_system_type_t knl_system_type = KNL_SYSTEM_TYPE_INTEL;
182 static uint32_t ume_check_interval = 0;
183 static pthread_mutex_t ume_mutex = PTHREAD_MUTEX_INITIALIZER;
184 static pthread_t ume_thread = 0;
185 static uint32_t force_load = 0;
186 static int hw_is_knl = -1;
187
188 /* Percentage of MCDRAM used for cache by type, updated from syscfg */
189 static int mcdram_pct[KNL_MCDRAM_CNT];
190 static uint64_t *mcdram_per_node = NULL;
191 static bitstr_t *knl_node_bitmap = NULL; /* KNL nodes found by syscfg */
192
193 static s_p_options_t knl_conf_file_options[] = {
194 {"AllowMCDRAM", S_P_STRING},
195 {"AllowNUMA", S_P_STRING},
196 {"AllowUserBoot", S_P_STRING},
197 {"BootTime", S_P_UINT32},
198 {"DefaultMCDRAM", S_P_STRING},
199 {"DefaultNUMA", S_P_STRING},
200 {"Force", S_P_UINT32},
201 {"LogFile", S_P_STRING},
202 {"McPath", S_P_STRING},
203 {"NodeRebootWeight", S_P_UINT32},
204 {"NumaCpuBind", S_P_STRING},
205 {"SyscfgPath", S_P_STRING},
206 {"SyscfgTimeout", S_P_UINT32},
207 {"SystemType", S_P_STRING},
208 {"UmeCheckInterval", S_P_UINT32},
209 {NULL}
210 };
211
212 static s_p_hashtbl_t *_config_make_tbl(char *filename);
213 static int _knl_mcdram_bits_cnt(uint16_t mcdram_num);
214 static uint16_t _knl_mcdram_parse(char *mcdram_str, char *sep);
215 static char *_knl_mcdram_str(uint16_t mcdram_num);
216 static uint16_t _knl_mcdram_token(char *token);
217 static int _knl_numa_bits_cnt(uint16_t numa_num);
218 static uint16_t _knl_numa_parse(char *numa_str, char *sep);
219 static char *_knl_numa_str(uint16_t numa_num);
220 static int _knl_numa_inx(char *token);
221 static uint16_t _knl_numa_token(char *token);
222 static void _log_script_argv(char **script_argv, char *resp_msg);
223 static char *_run_script(char *cmd_path, char **script_argv, int *status);
224 static int _tot_wait (struct timeval *start_time);
225 static void _update_cpu_bind(void);
226
_config_make_tbl(char * filename)227 static s_p_hashtbl_t *_config_make_tbl(char *filename)
228 {
229 s_p_hashtbl_t *tbl = NULL;
230
231 xassert(filename);
232
233 if (!(tbl = s_p_hashtbl_create(knl_conf_file_options))) {
234 error("knl.conf: %s: s_p_hashtbl_create error: %m", __func__);
235 return tbl;
236 }
237
238 if (s_p_parse_file(tbl, NULL, filename, false) == SLURM_ERROR) {
239 error("knl.conf: %s: s_p_parse_file error: %m", __func__);
240 s_p_hashtbl_destroy(tbl);
241 tbl = NULL;
242 }
243
244 return tbl;
245 }
246
247 /*
248 * Return the count of MCDRAM bits set
249 */
_knl_mcdram_bits_cnt(uint16_t mcdram_num)250 static int _knl_mcdram_bits_cnt(uint16_t mcdram_num)
251 {
252 int cnt = 0, i;
253 uint16_t tmp = 1;
254
255 for (i = 0; i < 16; i++) {
256 if ((mcdram_num & KNL_MCDRAM_FLAG) & tmp)
257 cnt++;
258 tmp = tmp << 1;
259 }
260 return cnt;
261 }
262
263 /*
264 * Translate KNL MCDRAM string to equivalent numeric value
265 * mcdram_str IN - String to scan
266 * sep IN - token separator to search for
267 * RET MCDRAM numeric value
268 */
_knl_mcdram_parse(char * mcdram_str,char * sep)269 static uint16_t _knl_mcdram_parse(char *mcdram_str, char *sep)
270 {
271 char *save_ptr = NULL, *tmp, *tok;
272 uint16_t mcdram_num = 0;
273
274 if (!mcdram_str)
275 return mcdram_num;
276
277 tmp = xstrdup(mcdram_str);
278 tok = strtok_r(tmp, sep, &save_ptr);
279 while (tok) {
280 mcdram_num |= _knl_mcdram_token(tok);
281 tok = strtok_r(NULL, sep, &save_ptr);
282 }
283 xfree(tmp);
284
285 return mcdram_num;
286 }
287
288 /*
289 * Translate KNL MCDRAM number to equivalent string value
290 * Caller must free return value
291 */
_knl_mcdram_str(uint16_t mcdram_num)292 static char *_knl_mcdram_str(uint16_t mcdram_num)
293 {
294 char *mcdram_str = NULL, *sep = "";
295
296 if (mcdram_num & KNL_CACHE) {
297 xstrfmtcat(mcdram_str, "%scache", sep);
298 sep = ",";
299 }
300 if (mcdram_num & KNL_HYBRID) {
301 xstrfmtcat(mcdram_str, "%shybrid", sep);
302 sep = ",";
303 }
304 if (mcdram_num & KNL_FLAT) {
305 xstrfmtcat(mcdram_str, "%sflat", sep);
306 sep = ",";
307 }
308 if (mcdram_num & KNL_EQUAL) {
309 xstrfmtcat(mcdram_str, "%sequal", sep);
310 sep = ",";
311 }
312 if (mcdram_num & KNL_AUTO) {
313 xstrfmtcat(mcdram_str, "%sauto", sep);
314 // sep = ","; /* Remove to avoid CLANG error */
315 }
316
317 return mcdram_str;
318 }
319
320 /*
321 * Given a KNL MCDRAM token, return its equivalent numeric value
322 * token IN - String to scan
323 * RET MCDRAM numeric value
324 */
_knl_mcdram_token(char * token)325 static uint16_t _knl_mcdram_token(char *token)
326 {
327 uint16_t mcdram_num = 0;
328
329 if (!xstrcasecmp(token, "cache"))
330 mcdram_num = KNL_CACHE;
331 else if (!xstrcasecmp(token, "hybrid"))
332 mcdram_num = KNL_HYBRID;
333 else if (!xstrcasecmp(token, "flat") ||
334 !xstrcasecmp(token, "memory"))
335 mcdram_num = KNL_FLAT;
336 else if (!xstrcasecmp(token, "equal"))
337 mcdram_num = KNL_EQUAL;
338 else if (!xstrcasecmp(token, "auto"))
339 mcdram_num = KNL_AUTO;
340
341 return mcdram_num;
342 }
343
344 /*
345 * Return the count of NUMA bits set
346 */
_knl_numa_bits_cnt(uint16_t numa_num)347 static int _knl_numa_bits_cnt(uint16_t numa_num)
348 {
349 int cnt = 0, i;
350 uint16_t tmp = 1;
351
352 for (i = 0; i < 16; i++) {
353 if ((numa_num & KNL_NUMA_FLAG) & tmp)
354 cnt++;
355 tmp = tmp << 1;
356 }
357 return cnt;
358 }
359
360 /*
361 * Translate KNL NUMA string to equivalent numeric value
362 * numa_str IN - String to scan
363 * sep IN - token separator to search for
364 * RET NUMA numeric value
365 */
_knl_numa_parse(char * numa_str,char * sep)366 static uint16_t _knl_numa_parse(char *numa_str, char *sep)
367 {
368 char *save_ptr = NULL, *tmp, *tok;
369 uint16_t numa_num = 0;
370
371 if (!numa_str)
372 return numa_num;
373
374 tmp = xstrdup(numa_str);
375 tok = strtok_r(tmp, sep, &save_ptr);
376 while (tok) {
377 numa_num |= _knl_numa_token(tok);
378 tok = strtok_r(NULL, sep, &save_ptr);
379 }
380 xfree(tmp);
381
382 return numa_num;
383 }
384
385 /*
386 * Translate KNL NUMA number to equivalent string value
387 * Caller must free return value
388 */
_knl_numa_str(uint16_t numa_num)389 static char *_knl_numa_str(uint16_t numa_num)
390 {
391 char *numa_str = NULL, *sep = "";
392
393 if (numa_num & KNL_ALL2ALL) {
394 xstrfmtcat(numa_str, "%sa2a", sep);
395 sep = ",";
396 }
397 if (numa_num & KNL_SNC2) {
398 xstrfmtcat(numa_str, "%ssnc2", sep);
399 sep = ",";
400 }
401 if (numa_num & KNL_SNC4) {
402 xstrfmtcat(numa_str, "%ssnc4", sep);
403 sep = ",";
404 }
405 if (numa_num & KNL_HEMI) {
406 xstrfmtcat(numa_str, "%shemi", sep);
407 sep = ",";
408 }
409 if (numa_num & KNL_QUAD) {
410 xstrfmtcat(numa_str, "%squad", sep);
411 // sep = ","; /* Remove to avoid CLANG error */
412 }
413
414 return numa_str;
415
416 }
417
418 /*
419 * Given a KNL NUMA token, return its equivalent numeric value
420 * token IN - String to scan
421 * RET NUMA numeric value
422 */
_knl_numa_token(char * token)423 static uint16_t _knl_numa_token(char *token)
424 {
425 uint16_t numa_num = 0;
426
427 if (!xstrcasecmp(token, "a2a"))
428 numa_num |= KNL_ALL2ALL;
429 else if (!xstrcasecmp(token, "snc2"))
430 numa_num |= KNL_SNC2;
431 else if (!xstrcasecmp(token, "snc4"))
432 numa_num |= KNL_SNC4;
433 else if (!xstrcasecmp(token, "hemi"))
434 numa_num |= KNL_HEMI;
435 else if (!xstrcasecmp(token, "quad"))
436 numa_num |= KNL_QUAD;
437
438 return numa_num;
439 }
440
441 /*
442 * Given a KNL NUMA token, return its cpu_bind offset
443 * token IN - String to scan
444 * RET NUMA offset or -1 if not found
445 */
_knl_numa_inx(char * token)446 static int _knl_numa_inx(char *token)
447 {
448 uint16_t numa_num;
449 int i;
450
451 numa_num = _knl_numa_token(token);
452 for (i = 0; i < KNL_NUMA_CNT; i++) {
453 if ((0x01 << i) == numa_num)
454 return i;
455 }
456 return -1;
457 }
458
459 /*
460 * Translate KNL System enum to equivalent string value
461 */
_knl_system_type_str(knl_system_type_t system_type)462 static char *_knl_system_type_str(knl_system_type_t system_type)
463 {
464 switch (system_type) {
465 case KNL_SYSTEM_TYPE_INTEL:
466 return "Intel";
467 case KNL_SYSTEM_TYPE_DELL:
468 return "Dell";
469 case KNL_SYSTEM_TYPE_NOT_SET:
470 default:
471 return "Unknown";
472 }
473 }
474
475 /*
476 * Given a KNL System token, return its equivalent enum value
477 * token IN - String to scan
478 * RET System enum value
479 */
_knl_system_type_token(char * token)480 static knl_system_type_t _knl_system_type_token(char *token)
481 {
482 knl_system_type_t system_type;
483
484 if (!xstrcasecmp("intel", token))
485 system_type = KNL_SYSTEM_TYPE_INTEL;
486 else if (!xstrcasecmp("dell", token))
487 system_type = KNL_SYSTEM_TYPE_DELL;
488 else
489 system_type = KNL_SYSTEM_TYPE_NOT_SET;
490
491 return system_type;
492 }
493
494 /*
495 * Return time in msec since "start time"
496 */
_tot_wait(struct timeval * start_time)497 static int _tot_wait (struct timeval *start_time)
498 {
499 struct timeval end_time;
500 int msec_delay;
501
502 gettimeofday(&end_time, NULL);
503 msec_delay = (end_time.tv_sec - start_time->tv_sec ) * 1000;
504 msec_delay += ((end_time.tv_usec - start_time->tv_usec + 500) / 1000);
505 return msec_delay;
506 }
507
508 /*
509 * Update cpu_bind array from current numa_cpu_bind configuration parameter
510 */
_update_cpu_bind(void)511 static void _update_cpu_bind(void)
512 {
513 char *save_ptr = NULL, *sep, *tok, *tmp;
514 int rc = SLURM_SUCCESS;
515 int i, numa_inx, numa_def;
516 uint32_t cpu_bind_val = 0;
517
518 for (i = 0; i < KNL_NUMA_CNT; i++)
519 cpu_bind[0] = 0;
520
521 if (!numa_cpu_bind)
522 return;
523
524 tmp = xstrdup(numa_cpu_bind);
525 tok = strtok_r(tmp, ";", &save_ptr);
526 while (tok) {
527 sep = strchr(tok, '=');
528 if (!sep) {
529 rc = SLURM_ERROR;
530 break;
531 }
532 sep[0] = '\0';
533 numa_def = _knl_numa_token(tok);
534 if (numa_def == 0) {
535 rc = SLURM_ERROR;
536 break;
537 }
538 if (xlate_cpu_bind_str(sep + 1, &cpu_bind_val) !=
539 SLURM_SUCCESS) {
540 rc = SLURM_ERROR;
541 break;
542 }
543 numa_inx = -1;
544 for (i = 0; i < KNL_NUMA_CNT; i++) {
545 if ((0x1 << i) == numa_def) {
546 numa_inx = i;
547 break;
548 }
549 }
550 if (numa_inx > -1)
551 cpu_bind[numa_inx] = cpu_bind_val;
552 tok = strtok_r(NULL, ";", &save_ptr);
553 }
554 xfree(tmp);
555
556 if (rc != SLURM_SUCCESS) {
557 error("%s: Invalid NumaCpuBind (%s), ignored",
558 plugin_type, numa_cpu_bind);
559 }
560
561 if (debug_flag) {
562 for (i = 0; i < KNL_NUMA_CNT; i++) {
563 char cpu_bind_str[128], *numa_str;
564 if (cpu_bind[i] == 0)
565 continue;
566 numa_str = _knl_numa_str(0x1 << i);
567 slurm_sprint_cpu_bind_type(cpu_bind_str, cpu_bind[i]);
568 info("CpuBind[%s] = %s", numa_str, cpu_bind_str);
569 xfree(numa_str);
570 }
571 }
572 }
573
574 /* Log a command's arguments. */
_log_script_argv(char ** script_argv,char * resp_msg)575 static void _log_script_argv(char **script_argv, char *resp_msg)
576 {
577 char *cmd_line = NULL;
578 int i;
579
580 if (!debug_flag)
581 return;
582
583 for (i = 0; script_argv[i]; i++) {
584 if (i)
585 xstrcat(cmd_line, " ");
586 xstrcat(cmd_line, script_argv[i]);
587 }
588 info("%s", cmd_line);
589 if (resp_msg && resp_msg[0])
590 info("%s", resp_msg);
591 xfree(cmd_line);
592 }
593
594 /* Run a script and return its stdout plus exit status */
_run_script(char * cmd_path,char ** script_argv,int * status)595 static char *_run_script(char *cmd_path, char **script_argv, int *status)
596 {
597 int cc, i, new_wait, resp_size = 0, resp_offset = 0;
598 pid_t cpid;
599 char *resp = NULL;
600 int pfd[2] = { -1, -1 };
601
602 if (access(cmd_path, R_OK | X_OK) < 0) {
603 error("%s: %s can not be executed: %m", __func__, cmd_path);
604 *status = 127;
605 resp = xstrdup("Slurm node_features/knl_generic configuration error");
606 return resp;
607 }
608 if (pipe(pfd) != 0) {
609 error("%s: pipe(): %m", __func__);
610 *status = 127;
611 resp = xstrdup("System error");
612 return resp;
613 }
614
615 if ((cpid = fork()) == 0) {
616 cc = sysconf(_SC_OPEN_MAX);
617 dup2(pfd[1], STDERR_FILENO);
618 dup2(pfd[1], STDOUT_FILENO);
619 for (i = 0; i < cc; i++) {
620 if ((i != STDERR_FILENO) && (i != STDOUT_FILENO))
621 close(i);
622 }
623 setpgid(0, 0);
624 execv(cmd_path, script_argv);
625 error("%s: execv(%s): %m", __func__, cmd_path);
626 _exit(127);
627 } else if (cpid < 0) {
628 close(pfd[0]);
629 close(pfd[1]);
630 error("%s: fork(): %m", __func__);
631 } else {
632 struct pollfd fds;
633 struct timeval tstart;
634 resp_size = 1024;
635 resp = xmalloc(resp_size);
636 close(pfd[1]);
637 gettimeofday(&tstart, NULL);
638 while (1) {
639 if (slurmctld_config.shutdown_time) {
640 error("%s: killing %s operation on shutdown",
641 __func__, script_argv[1]);
642 break;
643 }
644 fds.fd = pfd[0];
645 fds.events = POLLIN | POLLHUP | POLLRDHUP;
646 fds.revents = 0;
647 new_wait = syscfg_timeout - _tot_wait(&tstart);
648 if (new_wait <= 0) {
649 error("%s: %s poll timeout @ %d msec",
650 __func__, script_argv[1], syscfg_timeout);
651 break;
652 }
653 new_wait = MIN(new_wait, MAX_POLL_WAIT);
654 i = poll(&fds, 1, new_wait);
655 if (i == 0) {
656 continue;
657 } else if (i < 0) {
658 error("%s: %s poll:%m", __func__,
659 script_argv[1]);
660 break;
661 }
662 if ((fds.revents & POLLIN) == 0)
663 break;
664 i = read(pfd[0], resp + resp_offset,
665 resp_size - resp_offset);
666 if (i == 0) {
667 break;
668 } else if (i < 0) {
669 if (errno == EAGAIN)
670 continue;
671 error("%s: read(%s): %m", __func__, syscfg_path);
672 break;
673 } else {
674 resp_offset += i;
675 if (resp_offset + 1024 >= resp_size) {
676 resp_size *= 2;
677 resp = xrealloc(resp, resp_size);
678 }
679 }
680 }
681 killpg(cpid, SIGTERM);
682 usleep(10000);
683 killpg(cpid, SIGKILL);
684 waitpid(cpid, status, 0);
685 close(pfd[0]);
686 }
687 return resp;
688 }
689
_make_uid_array(char * uid_str)690 static void _make_uid_array(char *uid_str)
691 {
692 char *save_ptr = NULL, *tmp_str, *tok;
693 int i, uid_cnt = 0;
694
695 if (!uid_str)
696 return;
697
698 /* Count the number of users */
699 for (i = 0; uid_str[i]; i++) {
700 if (uid_str[i] == ',')
701 uid_cnt++;
702 }
703 uid_cnt++;
704
705 allowed_uid = xmalloc(sizeof(uid_t) * uid_cnt);
706 allowed_uid_cnt = 0;
707 tmp_str = xstrdup(uid_str);
708 tok = strtok_r(tmp_str, ",", &save_ptr);
709 while (tok) {
710 if (uid_from_string(tok, &allowed_uid[allowed_uid_cnt++]) < 0)
711 error("knl_generic.conf: Invalid AllowUserBoot: %s", tok);
712 tok = strtok_r(NULL, ",", &save_ptr);
713 }
714 xfree(tmp_str);
715 }
716
_make_uid_str(uid_t * uid_array,int uid_cnt)717 static char *_make_uid_str(uid_t *uid_array, int uid_cnt)
718 {
719 char *sep = "", *tmp_str = NULL, *uid_str = NULL;
720 int i;
721
722 if (allowed_uid_cnt == 0) {
723 uid_str = xstrdup("ALL");
724 return uid_str;
725 }
726
727 for (i = 0; i < uid_cnt; i++) {
728 tmp_str = uid_to_string(uid_array[i]);
729 xstrfmtcat(uid_str, "%s%s(%d)", sep, tmp_str, uid_array[i]);
730 xfree(tmp_str);
731 sep = ",";
732 }
733
734 return uid_str;
735 }
736
737 /* Watch for Uncorrectable Memory Errors. Notify jobs if any detected */
_ume_agent(void * args)738 static void *_ume_agent(void *args)
739 {
740 struct timespec req;
741 int i, mc_num, csrow_num, ue_count, last_ue_count = -1;
742 int *fd = NULL, fd_cnt = 0, fd_size = 0, ume_path_size;
743 char buf[8], *ume_path;
744 ssize_t rd_size;
745
746 /* Identify and open array of UME file descriptors */
747 ume_path_size = strlen(mc_path) + 32;
748 ume_path = xmalloc(ume_path_size);
749 for (mc_num = 0; ; mc_num++) {
750 for (csrow_num = 0; ; csrow_num++) {
751 if (fd_cnt == fd_size) {
752 fd_size += 64;
753 fd = xrealloc(fd, sizeof(int) * fd_size);
754 }
755 snprintf(ume_path, ume_path_size,
756 "%s/mc%d/csrow%d/ue_count",
757 mc_path, mc_num, csrow_num);
758 if ((fd[fd_cnt] = open(ume_path, 0)) >= 0)
759 fd_cnt++;
760 else
761 break;
762 }
763 if (csrow_num == 0)
764 break;
765 }
766 xfree(ume_path);
767
768 while (!shutdown_time) {
769 /* Get current UME count */
770 ue_count = 0;
771 for (i = 0; i < fd_cnt; i++) {
772 (void) lseek(fd[i], 0, SEEK_SET);
773 rd_size = read(fd[i], buf, 7);
774 if (rd_size <= 0)
775 continue;
776 buf[rd_size] = '\0';
777 ue_count += atoi(buf);
778 }
779
780 if (shutdown_time)
781 break;
782 /* If UME count changed, notify all steps */
783 if ((last_ue_count < ue_count) && (last_ue_count != -1)) {
784 i = ume_notify();
785 error("UME error detected. Notified %d job steps", i);
786 }
787 last_ue_count = ue_count;
788
789 if (shutdown_time)
790 break;
791 /* Sleep before retry */
792 req.tv_sec = ume_check_interval / USEC_IN_SEC;
793 req.tv_nsec = (ume_check_interval % USEC_IN_SEC) *
794 NSEC_IN_USEC;
795 (void) nanosleep(&req, NULL);
796 }
797
798 for (i = 0; i < fd_cnt; i++)
799 (void) close(fd[i]);
800 xfree(fd);
801
802 return NULL;
803 }
804
805 /* Load configuration */
init(void)806 extern int init(void)
807 {
808 char *allow_mcdram_str, *allow_numa_str, *allow_user_str;
809 char *default_mcdram_str, *default_numa_str;
810 char *knl_conf_file, *tmp_str = NULL, *resume_program;
811 s_p_hashtbl_t *tbl;
812 struct stat stat_buf;
813 int i, rc = SLURM_SUCCESS;
814 char *cpuinfo_path = "/proc/cpuinfo";
815 FILE *cpu_info_file;
816 char buf[1024];
817
818 /* Set default values */
819 allow_mcdram = KNL_MCDRAM_FLAG;
820 allow_numa = KNL_NUMA_FLAG;
821 xfree(allowed_uid);
822 xfree(mc_path);
823 xfree(syscfg_path);
824 allowed_uid_cnt = 0;
825 for (i = 0; i < KNL_NUMA_CNT; i++)
826 cpu_bind[i] = 0;
827 syscfg_timeout = DEFAULT_SYSCFG_TIMEOUT;
828 debug_flag = false;
829 default_mcdram = KNL_CACHE;
830 default_numa = KNL_ALL2ALL;
831 //FIXME: Need better mechanism to get MCDRAM percentages
832 // for (i = 0; i < KNL_MCDRAM_CNT; i++)
833 // mcdram_pct[i] = -1;
834 mcdram_pct[0] = 100; // KNL_CACHE
835 mcdram_pct[1] = 50; // KNL_EQUAL
836 mcdram_pct[2] = 50; // KNL_HYBRID
837 mcdram_pct[3] = 0; // KNL_FLAT
838 mcdram_pct[4] = 0; // KNL_AUTO
839 xfree(numa_cpu_bind);
840
841 if (slurm_get_debug_flags() & DEBUG_FLAG_NODE_FEATURES)
842 debug_flag = true;
843
844 knl_conf_file = get_extra_conf_path("knl_generic.conf");
845 if ((stat(knl_conf_file, &stat_buf) == 0) &&
846 (tbl = _config_make_tbl(knl_conf_file))) {
847 if (s_p_get_string(&tmp_str, "AllowMCDRAM", tbl)) {
848 allow_mcdram = _knl_mcdram_parse(tmp_str, ",");
849 if (_knl_mcdram_bits_cnt(allow_mcdram) < 1) {
850 fatal("knl_generic.conf: Invalid AllowMCDRAM=%s",
851 tmp_str);
852 }
853 xfree(tmp_str);
854 }
855 if (s_p_get_string(&tmp_str, "AllowNUMA", tbl)) {
856 allow_numa = _knl_numa_parse(tmp_str, ",");
857 if (_knl_numa_bits_cnt(allow_numa) < 1) {
858 fatal("knl_generic.conf: Invalid AllowNUMA=%s",
859 tmp_str);
860 }
861 xfree(tmp_str);
862 }
863 if (s_p_get_string(&tmp_str, "AllowUserBoot", tbl)) {
864 _make_uid_array(tmp_str);
865 xfree(tmp_str);
866 }
867 (void) s_p_get_uint32(&boot_time, "BootTime", tbl);
868 if (s_p_get_string(&tmp_str, "DefaultMCDRAM", tbl)) {
869 default_mcdram = _knl_mcdram_parse(tmp_str, ",");
870 if (_knl_mcdram_bits_cnt(default_mcdram) != 1) {
871 fatal("knl_generic.conf: Invalid DefaultMCDRAM=%s",
872 tmp_str);
873 }
874 xfree(tmp_str);
875 }
876 if (s_p_get_string(&tmp_str, "DefaultNUMA", tbl)) {
877 default_numa = _knl_numa_parse(tmp_str, ",");
878 if (_knl_numa_bits_cnt(default_numa) != 1) {
879 fatal("knl_generic.conf: Invalid DefaultNUMA=%s",
880 tmp_str);
881 }
882 xfree(tmp_str);
883 }
884 (void) s_p_get_uint32(&force_load, "Force", tbl);
885 (void) s_p_get_string(&mc_path, "McPath", tbl);
886 (void) s_p_get_uint32(&node_reboot_weight, "NodeRebootWeight",
887 tbl);
888 if (s_p_get_string(&numa_cpu_bind, "NumaCpuBind", tbl))
889 _update_cpu_bind();
890 (void) s_p_get_string(&syscfg_path, "SyscfgPath", tbl);
891 if (s_p_get_string(&tmp_str, "SystemType", tbl)) {
892 if ((knl_system_type = _knl_system_type_token(tmp_str))
893 == KNL_SYSTEM_TYPE_NOT_SET)
894 fatal("knl_generic.conf: Invalid SystemType=%s.",
895 tmp_str);
896 xfree(tmp_str);
897 }
898 (void) s_p_get_uint32(&syscfg_timeout, "SyscfgTimeout", tbl);
899 (void) s_p_get_uint32(&ume_check_interval, "UmeCheckInterval",
900 tbl);
901
902 s_p_hashtbl_destroy(tbl);
903 } else if (errno != ENOENT) {
904 error("Error opening/reading knl_generic.conf: %m");
905 rc = SLURM_ERROR;
906 }
907 xfree(knl_conf_file);
908 if (!mc_path)
909 mc_path = xstrdup("/sys/devices/system/edac/mc");
910 if (!syscfg_path)
911 syscfg_path = xstrdup("/usr/bin/syscfg");
912 if (access(syscfg_path, X_OK) == 0)
913 syscfg_found = 1;
914 else
915 syscfg_found = 0;
916
917 hw_is_knl = 0;
918 cpu_info_file = fopen(cpuinfo_path, "r");
919 if (cpu_info_file == NULL) {
920 error("Error opening/reading %s: %m", cpuinfo_path);
921 } else {
922 while (fgets(buf, sizeof(buf), cpu_info_file)) {
923 if (strstr(buf, "Xeon Phi")) {
924 hw_is_knl = 1;
925 break;
926 }
927 }
928 fclose(cpu_info_file);
929 }
930
931 if ((resume_program = slurm_get_resume_program())) {
932 error("Use of ResumeProgram with %s not currently supported",
933 plugin_name);
934 xfree(resume_program);
935 rc = SLURM_ERROR;
936 }
937
938 if (slurm_get_debug_flags() & DEBUG_FLAG_NODE_FEATURES) {
939 allow_mcdram_str = _knl_mcdram_str(allow_mcdram);
940 allow_numa_str = _knl_numa_str(allow_numa);
941 allow_user_str = _make_uid_str(allowed_uid, allowed_uid_cnt);
942 default_mcdram_str = _knl_mcdram_str(default_mcdram);
943 default_numa_str = _knl_numa_str(default_numa);
944 info("AllowMCDRAM=%s AllowNUMA=%s",
945 allow_mcdram_str, allow_numa_str);
946 info("AllowUserBoot=%s", allow_user_str);
947 info("BootTIme=%u", boot_time);
948 info("DefaultMCDRAM=%s DefaultNUMA=%s",
949 default_mcdram_str, default_numa_str);
950 info("Force=%u", force_load);
951 info("McPath=%s", mc_path);
952 info("NodeRebootWeight=%u", node_reboot_weight);
953 info("NumaCpuBind=%s", numa_cpu_bind);
954 info("SyscfgPath=%s (Found=%d)", syscfg_path, syscfg_found);
955 info("SyscfgTimeout=%u msec", syscfg_timeout);
956 info("SystemType=%s", _knl_system_type_str(knl_system_type));
957 info("UmeCheckInterval=%u", ume_check_interval);
958 xfree(allow_mcdram_str);
959 xfree(allow_numa_str);
960 xfree(allow_user_str);
961 xfree(default_mcdram_str);
962 xfree(default_numa_str);
963 }
964 gres_plugin_add("hbm");
965
966 if ((rc == SLURM_SUCCESS) &&
967 ume_check_interval && running_in_slurmd()) {
968 slurm_mutex_lock(&ume_mutex);
969 slurm_thread_create(&ume_thread, _ume_agent, NULL);
970 slurm_mutex_unlock(&ume_mutex);
971 }
972
973 return rc;
974 }
975
976 /* Release allocated memory */
fini(void)977 extern int fini(void)
978 {
979 shutdown_time = time(NULL);
980 slurm_mutex_lock(&ume_mutex);
981 if (ume_thread) {
982 pthread_join(ume_thread, NULL);
983 ume_thread = 0;
984 }
985 slurm_mutex_unlock(&ume_mutex);
986 xfree(allowed_uid);
987 allowed_uid_cnt = 0;
988 debug_flag = false;
989 xfree(mcdram_per_node);
990 xfree(mc_path);
991 xfree(numa_cpu_bind);
992 xfree(syscfg_path);
993 FREE_NULL_BITMAP(knl_node_bitmap);
994
995 return SLURM_SUCCESS;
996 }
997
998 /* Reload configuration */
node_features_p_reconfig(void)999 extern int node_features_p_reconfig(void)
1000 {
1001 slurm_mutex_lock(&config_mutex);
1002 reconfig = true;
1003 slurm_mutex_unlock(&config_mutex);
1004 return SLURM_SUCCESS;
1005 }
1006
1007 /* Update active and available features on specified nodes,
1008 * sets features on all nodes if node_list is NULL */
node_features_p_get_node(char * node_list)1009 extern int node_features_p_get_node(char *node_list)
1010 {
1011 slurm_mutex_lock(&config_mutex);
1012 if (reconfig) {
1013 (void) init();
1014 reconfig = false;
1015 }
1016 slurm_mutex_unlock(&config_mutex);
1017 return SLURM_SUCCESS;
1018 }
1019
1020 /* Get this node's current and available MCDRAM and NUMA settings from BIOS.
1021 * avail_modes IN/OUT - append available modes, must be xfreed
1022 * current_mode IN/OUT - append current modes, must be xfreed
1023 *
1024 * NOTE: Not applicable on Cray systems; can be used on other systems.
1025 *
1026 * NOTES about syscfg (from Intel):
1027 * To display the BIOS Parameters:
1028 * >> syscfg /d biossettings <"BIOS variable Name">
1029 *
1030 * To Set the BIOS Parameters:
1031 * >> syscfg /bcs <AdminPw> <"BIOS variable Name"> <Value>
1032 * Note: If AdminPw is not set use ""
1033 */
node_features_p_node_state(char ** avail_modes,char ** current_mode)1034 extern void node_features_p_node_state(char **avail_modes, char **current_mode)
1035 {
1036 char *avail_states = NULL, *cur_state = NULL;
1037 char *resp_msg, *argv[10], *avail_sep = "", *cur_sep = "", *tok;
1038 int status = 0;
1039 int len = 0;
1040
1041 if (!syscfg_path || !avail_modes || !current_mode)
1042 return;
1043 if ((syscfg_found == 0) || (!hw_is_knl && !force_load)) {
1044 /* This node on cluster lacks syscfg; should not be KNL */
1045 static bool log_event = true;
1046 if (log_event) {
1047 info("%s: syscfg program not found or node isn't KNL, can not get KNL modes",
1048 __func__);
1049 log_event = false;
1050 }
1051 *avail_modes = NULL;
1052 *current_mode = NULL;
1053 return;
1054 }
1055
1056 switch (knl_system_type) {
1057 case KNL_SYSTEM_TYPE_INTEL:
1058 argv[0] = "syscfg";
1059 argv[1] = "/d";
1060 argv[2] = "BIOSSETTINGS";
1061 argv[3] = "Cluster Mode";
1062 argv[4] = NULL;
1063 break;
1064 case KNL_SYSTEM_TYPE_DELL:
1065 argv[0] = "syscfg";
1066 argv[1] = "--SystemMemoryModel";
1067 argv[2] = NULL;
1068 break;
1069 default:
1070 /* This should never happen */
1071 error("%s: Unknown SystemType. %d", __func__, knl_system_type);
1072 *avail_modes = NULL;
1073 *current_mode = NULL;
1074 return;
1075 }
1076 resp_msg = _run_script(syscfg_path, argv, &status);
1077 if (!WIFEXITED(status) || (WEXITSTATUS(status) != 0)) {
1078 error("%s: syscfg (get cluster mode) status:%u response:%s",
1079 __func__, status, resp_msg);
1080 }
1081 if (resp_msg == NULL) {
1082 info("%s: syscfg returned no information", __func__);
1083 } else {
1084 tok = NULL;
1085 _log_script_argv(argv, resp_msg);
1086 switch (knl_system_type) {
1087 case KNL_SYSTEM_TYPE_INTEL:
1088 tok = strstr(resp_msg, "Current Value : ");
1089 len = 16;
1090 break;
1091 case KNL_SYSTEM_TYPE_DELL:
1092 tok = strstr(resp_msg, "SystemMemoryModel=");
1093 len = 18;
1094 break;
1095 default:
1096 /* already handled above, should never get here */
1097 break;
1098 }
1099 if (tok) {
1100 tok += len;
1101 if (!xstrncasecmp(tok, "All2All", 3)) {
1102 cur_state = xstrdup("a2a");
1103 cur_sep = ",";
1104 } else if (!xstrncasecmp(tok, "Hemisphere", 3)) {
1105 cur_state = xstrdup("hemi");
1106 cur_sep = ",";
1107 } else if (!xstrncasecmp(tok, "Quadrant", 3)) {
1108 cur_state = xstrdup("quad");
1109 cur_sep = ",";
1110 } else if (!xstrncasecmp(tok, "SNC-2", 5)) {
1111 cur_state = xstrdup("snc2");
1112 cur_sep = ",";
1113 } else if (!xstrncasecmp(tok, "SNC-4", 5)) {
1114 cur_state = xstrdup("snc4");
1115 cur_sep = ",";
1116 }
1117 }
1118
1119 switch (knl_system_type) {
1120 case KNL_SYSTEM_TYPE_DELL:
1121 argv[0] = "syscfg";
1122 argv[1] = "-h";
1123 argv[2] = "--SystemMemoryModel";
1124 argv[3] = NULL;
1125
1126 xfree(resp_msg);
1127 resp_msg = _run_script(syscfg_path, argv, &status);
1128 if (!WIFEXITED(status) || (WEXITSTATUS(status) != 0)) {
1129 error("%s: syscfg (get cluster mode) status:%u response:%s",
1130 __func__, status, resp_msg);
1131 }
1132 if (resp_msg == NULL)
1133 info("%s: syscfg -h --SystemMemoryModel returned no information", __func__);
1134 break;
1135 default:
1136 break;
1137 }
1138
1139 if (xstrcasestr(resp_msg, "All2All")) {
1140 xstrfmtcat(avail_states, "%s%s", avail_sep, "a2a");
1141 avail_sep = ",";
1142 }
1143 if (xstrcasestr(resp_msg, "Hemisphere")) {
1144 xstrfmtcat(avail_states, "%s%s", avail_sep, "hemi");
1145 avail_sep = ",";
1146 }
1147 if (xstrcasestr(resp_msg, "Quadrant")) {
1148 xstrfmtcat(avail_states, "%s%s", avail_sep, "quad");
1149 avail_sep = ",";
1150 }
1151 if (xstrcasestr(resp_msg, "SNC-2")) {
1152 xstrfmtcat(avail_states, "%s%s", avail_sep, "snc2");
1153 avail_sep = ",";
1154 }
1155 if (xstrcasestr(resp_msg, "SNC-4")) {
1156 xstrfmtcat(avail_states, "%s%s", avail_sep, "snc4");
1157 avail_sep = ",";
1158 }
1159 xfree(resp_msg);
1160 }
1161
1162 switch (knl_system_type) {
1163 case KNL_SYSTEM_TYPE_INTEL:
1164 argv[0] = "syscfg";
1165 argv[1] = "/d";
1166 argv[2] = "BIOSSETTINGS";
1167 argv[3] = "Memory Mode";
1168 argv[4] = NULL;
1169 break;
1170 case KNL_SYSTEM_TYPE_DELL:
1171 argv[0] = "syscfg";
1172 argv[1] = "--ProcEmbMemMode";
1173 argv[2] = NULL;
1174 break;
1175 default:
1176 /* already handled above, should never get here */
1177 break;
1178 }
1179 resp_msg = _run_script(syscfg_path, argv, &status);
1180 if (!WIFEXITED(status) || (WEXITSTATUS(status) != 0)) {
1181 error("%s: syscfg (get memory mode) status:%u response:%s",
1182 __func__, status, resp_msg);
1183 }
1184 if (resp_msg == NULL) {
1185 info("%s: syscfg returned no information", __func__);
1186 } else {
1187 tok = NULL;
1188 _log_script_argv(argv, resp_msg);
1189 switch (knl_system_type) {
1190 case KNL_SYSTEM_TYPE_INTEL:
1191 tok = strstr(resp_msg, "Current Value : ");
1192 len = 16;
1193 break;
1194 case KNL_SYSTEM_TYPE_DELL:
1195 tok = strstr(resp_msg, "ProcEmbMemMode=");
1196 len = 15;
1197 break;
1198 default:
1199 /* already handled above, should never get here */
1200 break;
1201 }
1202 if (tok) {
1203 tok += len;
1204 if (!xstrncasecmp(tok, "Cache", 3)) {
1205 xstrfmtcat(cur_state, "%s%s", cur_sep, "cache");
1206 } else if (!xstrncasecmp(tok, "Flat", 3) ||
1207 !xstrncasecmp(tok, "Memory", 3)) {
1208 xstrfmtcat(cur_state, "%s%s", cur_sep, "flat");
1209 } else if (!xstrncasecmp(tok, "Hybrid", 3)) {
1210 xstrfmtcat(cur_state, "%s%s", cur_sep, "hybrid");
1211 } else if (!xstrncasecmp(tok, "Equal", 3)) {
1212 xstrfmtcat(cur_state, "%s%s", cur_sep, "equal");
1213 } else if (!xstrncasecmp(tok, "Auto", 3)) {
1214 xstrfmtcat(cur_state, "%s%s", cur_sep, "auto");
1215 }
1216 }
1217
1218 switch (knl_system_type) {
1219 case KNL_SYSTEM_TYPE_DELL:
1220 argv[0] = "syscfg";
1221 argv[1] = "-h";
1222 argv[2] = "--ProcEmbMemMode";
1223 argv[3] = NULL;
1224
1225 xfree(resp_msg);
1226 resp_msg = _run_script(syscfg_path, argv, &status);
1227 if (!WIFEXITED(status) || (WEXITSTATUS(status) != 0)) {
1228 error("%s: syscfg (get memory mode) status help:%u response:%s",
1229 __func__, status, resp_msg);
1230 }
1231 if (resp_msg == NULL)
1232 info("%s: syscfg -h returned no information", __func__);
1233 break;
1234 default:
1235 break;
1236 }
1237
1238 if (xstrcasestr(resp_msg, "Cache")) {
1239 xstrfmtcat(avail_states, "%s%s", avail_sep, "cache");
1240 avail_sep = ",";
1241 }
1242 if (xstrcasestr(resp_msg, "Flat") ||
1243 xstrcasestr(resp_msg, "Memory")) {
1244 xstrfmtcat(avail_states, "%s%s", avail_sep, "flat");
1245 avail_sep = ",";
1246 }
1247 if (xstrcasestr(resp_msg, "Hybrid")) {
1248 xstrfmtcat(avail_states, "%s%s", avail_sep, "hybrid");
1249 avail_sep = ",";
1250 }
1251 if (xstrcasestr(resp_msg, "Equal")) {
1252 xstrfmtcat(avail_states, "%s%s", avail_sep, "equal");
1253 avail_sep = ",";
1254 }
1255 if (xstrcasestr(resp_msg, "Auto")) {
1256 xstrfmtcat(avail_states, "%s%s", avail_sep, "auto");
1257 /* avail_sep = ","; CLANG error: Dead assignment */
1258 }
1259 xfree(resp_msg);
1260 }
1261
1262 if (*avail_modes) { /* Append for multiple node_features plugins */
1263 if (*avail_modes[0])
1264 avail_sep = ",";
1265 else
1266 avail_sep = "";
1267 xstrfmtcat(*avail_modes, "%s%s", avail_sep, avail_states);
1268 xfree(avail_states);
1269 } else {
1270 *avail_modes = avail_states;
1271 }
1272
1273 if (*current_mode) { /* Append for multiple node_features plugins */
1274 if (*current_mode[0])
1275 cur_sep = ",";
1276 else
1277 cur_sep = "";
1278 xstrfmtcat(*current_mode, "%s%s", cur_sep, cur_state);
1279 xfree(cur_state);
1280 } else {
1281 *current_mode = cur_state;
1282 }
1283 }
1284
1285 /* Test if a job's feature specification is valid */
node_features_p_job_valid(char * job_features)1286 extern int node_features_p_job_valid(char *job_features)
1287 {
1288 uint16_t job_mcdram, job_numa;
1289 int mcdram_cnt, numa_cnt;
1290 int last_mcdram_cnt = 0, last_numa_cnt = 0;
1291 int rc = SLURM_SUCCESS;
1292 char last_sep = '\0', *tmp, *tok, *save_ptr = NULL;
1293
1294 if ((job_features == NULL) || (job_features[0] == '\0'))
1295 return SLURM_SUCCESS;
1296
1297 tmp = xstrdup(job_features);
1298 tok = strtok_r(tmp, "[]()|", &save_ptr);
1299 while (tok) {
1300 last_sep = tok[strlen(tok) - 1];
1301 job_mcdram = _knl_mcdram_parse(tok, "&,*");
1302 mcdram_cnt = _knl_mcdram_bits_cnt(job_mcdram) + last_mcdram_cnt;
1303 if (mcdram_cnt > 1) { /* Multiple ANDed MCDRAM options */
1304 rc = ESLURM_INVALID_KNL;
1305 break;
1306 }
1307
1308 job_numa = _knl_numa_parse(tok, "&,*");
1309 numa_cnt = _knl_numa_bits_cnt(job_numa) + last_numa_cnt;
1310 if (numa_cnt > 1) { /* Multiple ANDed NUMA options */
1311 rc = ESLURM_INVALID_KNL;
1312 break;
1313 }
1314 tok = strtok_r(NULL, "[]()|", &save_ptr);
1315 if (tok &&
1316 ((last_sep == '&') || /* e.g. "equal&(flat|cache)" */
1317 (tok[0] == '&'))) { /* e.g. "(flat|cache)&equal" */
1318 last_mcdram_cnt += mcdram_cnt;
1319 last_numa_cnt += numa_cnt;
1320 } else {
1321 last_mcdram_cnt = 0;
1322 last_numa_cnt = 0;
1323 }
1324 }
1325 xfree(tmp);
1326
1327 return rc;
1328 }
1329
1330 /*
1331 * Translate a job's feature request to the node features needed at boot time.
1332 * If multiple MCDRAM or NUMA values are ORed, pick the first ones.
1333 * IN job_features - job's --constraint specification
1334 * RET features required on node reboot. Must xfree to release memory
1335 */
node_features_p_job_xlate(char * job_features)1336 extern char *node_features_p_job_xlate(char *job_features)
1337 {
1338 char *node_features = NULL;
1339 char *tmp, *save_ptr = NULL, *mult, *sep = "", *tok;
1340 bool has_numa = false, has_mcdram = false;
1341
1342 if ((job_features == NULL) || (job_features[0] == '\0'))
1343 return node_features;
1344
1345 tmp = xstrdup(job_features);
1346 tok = strtok_r(tmp, "[]()|&", &save_ptr);
1347 while (tok) {
1348 bool knl_opt = false;
1349 if ((mult = strchr(tok, '*')))
1350 mult[0] = '\0';
1351 if (_knl_mcdram_token(tok)) {
1352 if (!has_mcdram) {
1353 has_mcdram = true;
1354 knl_opt = true;
1355 }
1356 }
1357 if (_knl_numa_token(tok)) {
1358 if (!has_numa) {
1359 has_numa = true;
1360 knl_opt = true;
1361 }
1362 }
1363 if (knl_opt) {
1364 xstrfmtcat(node_features, "%s%s", sep, tok);
1365 sep = ",";
1366 }
1367 tok = strtok_r(NULL, "[]()|&", &save_ptr);
1368 }
1369 xfree(tmp);
1370
1371 return node_features;
1372 }
1373
_find_key_val(char * key,char * resp_msg)1374 static char *_find_key_val(char *key, char *resp_msg)
1375 {
1376 char *sep = NULL, *tok, *val = NULL;
1377 int i;
1378
1379 if ((key == NULL) || (resp_msg == NULL))
1380 return NULL;
1381
1382 if ((tok = strstr(resp_msg, "Possible Values")))
1383 tok += 15;
1384 else
1385 tok = resp_msg;
1386 if ((tok = strstr(tok, key)))
1387 sep = strchr(tok, ':');
1388 if (sep) {
1389 sep++;
1390 while ((sep[0] != '\0')&& !isdigit(sep[0]))
1391 sep++;
1392 if (isdigit(sep[0])) {
1393 val = xstrdup(sep);
1394 for (i = 1 ; val[i]; i++) {
1395 if (!isdigit(val[i])) {
1396 val[i] = '\0';
1397 break;
1398 }
1399 }
1400 }
1401 }
1402
1403 return val;
1404 }
1405
1406 /* Set's the node's active features based upon job constraints.
1407 * NOTE: Executed by the slurmd daemon.
1408 * IN active_features - New active features
1409 * RET error code */
node_features_p_node_set(char * active_features)1410 extern int node_features_p_node_set(char *active_features)
1411 {
1412 char *resp_msg, *argv[10], tmp[100];
1413 char *key;
1414 int error_code = SLURM_SUCCESS, status = 0;
1415 char *mcdram_mode = NULL, *numa_mode = NULL;
1416
1417 if ((active_features == NULL) || (active_features[0] == '\0'))
1418 return SLURM_SUCCESS;
1419
1420 if (!syscfg_path) {
1421 error("%s: SyscfgPath not configured", __func__);
1422 return SLURM_ERROR;
1423 }
1424 if ((syscfg_found == 0) || (!hw_is_knl && !force_load)) {
1425 /* This node on cluster lacks syscfg; should not be KNL */
1426 static bool log_event = true;
1427 if (log_event) {
1428 error("%s: syscfg program not found or node isn't KNL; can not set KNL modes",
1429 __func__);
1430 log_event = false;
1431 }
1432 return SLURM_ERROR;
1433 }
1434
1435 /* Identify available Cluster/NUMA modes */
1436 switch (knl_system_type) {
1437 case KNL_SYSTEM_TYPE_INTEL:
1438 argv[0] = "syscfg";
1439 argv[1] = "/d";
1440 argv[2] = "BIOSSETTINGS";
1441 argv[3] = "Cluster Mode";
1442 argv[4] = NULL;
1443 break;
1444 case KNL_SYSTEM_TYPE_DELL:
1445 argv[0] = "syscfg";
1446 argv[1] = "--SystemMemoryModel";
1447 argv[2] = NULL;
1448 break;
1449 default:
1450 /* This should never happen */
1451 error("%s: Unknown SystemType. %d", __func__, knl_system_type);
1452 return SLURM_ERROR;
1453 }
1454 resp_msg = _run_script(syscfg_path, argv, &status);
1455 if (!WIFEXITED(status) || (WEXITSTATUS(status) != 0)) {
1456 error("%s: syscfg (get cluster mode) status:%u response:%s",
1457 __func__, status, resp_msg);
1458 error_code = SLURM_ERROR;
1459 }
1460 if (resp_msg == NULL) {
1461 info("%s: syscfg returned no information", __func__);
1462 } else {
1463 _log_script_argv(argv, resp_msg);
1464 if (strstr(active_features, "a2a"))
1465 key = "All2All";
1466 else if (strstr(active_features, "hemi"))
1467 key = "Hemisphere";
1468 else if (strstr(active_features, "quad"))
1469 key = "Quadrant";
1470 else if (strstr(active_features, "snc2"))
1471 key = "SNC-2";
1472 else if (strstr(active_features, "snc4"))
1473 key = "SNC-4";
1474 else
1475 key = NULL;
1476 switch (knl_system_type) {
1477 case KNL_SYSTEM_TYPE_INTEL:
1478 numa_mode = _find_key_val(key, resp_msg);
1479 break;
1480 case KNL_SYSTEM_TYPE_DELL:
1481 numa_mode = xstrdup(key);
1482 default:
1483 break;
1484 }
1485 xfree(resp_msg);
1486 }
1487
1488 /* Reset current Cluster/NUMA mode */
1489 if (numa_mode) {
1490 switch (knl_system_type) {
1491 case KNL_SYSTEM_TYPE_INTEL:
1492 argv[0] = "syscfg";
1493 argv[1] = "/bcs";
1494 argv[2] = "";
1495 argv[3] = "BIOSSETTINGS";
1496 argv[4] = "Cluster Mode";
1497 argv[5] = numa_mode;
1498 argv[6] = NULL;
1499 break;
1500 case KNL_SYSTEM_TYPE_DELL:
1501 snprintf(tmp, sizeof(tmp),
1502 "--SystemMemoryModel=%s", numa_mode);
1503 argv[0] = "syscfg";
1504 argv[1] = tmp;
1505 argv[2] = NULL;
1506 break;
1507 default:
1508 /* already handled above, should never get here */
1509 break;
1510 }
1511 resp_msg = _run_script(syscfg_path, argv, &status);
1512 if (!WIFEXITED(status) || (WEXITSTATUS(status) != 0)) {
1513 error("%s: syscfg (set cluster mode) status:%u response:%s",
1514 __func__, status, resp_msg);
1515 error_code = SLURM_ERROR;
1516 } else {
1517 _log_script_argv(argv, resp_msg);
1518 }
1519 xfree(resp_msg);
1520 xfree(numa_mode);
1521 }
1522
1523 /* Identify available Memory/MCDRAM modes */
1524 switch (knl_system_type) {
1525 case KNL_SYSTEM_TYPE_INTEL:
1526 argv[0] = "syscfg";
1527 argv[1] = "/d";
1528 argv[2] = "BIOSSETTINGS";
1529 argv[3] = "Memory Mode";
1530 argv[4] = NULL;
1531 break;
1532 case KNL_SYSTEM_TYPE_DELL:
1533 argv[0] = "syscfg";
1534 argv[1] = "--ProcEmbMemMode";
1535 argv[2] = NULL;
1536 break;
1537 default:
1538 /* already handled above, should never get here */
1539 break;
1540 }
1541 resp_msg = _run_script(syscfg_path, argv, &status);
1542 if (!WIFEXITED(status) || (WEXITSTATUS(status) != 0)) {
1543 error("%s: syscfg (get memory mode) status:%u response:%s",
1544 __func__, status, resp_msg);
1545 error_code = SLURM_ERROR;
1546 }
1547 if (resp_msg == NULL) {
1548 info("%s: syscfg returned no information", __func__);
1549 } else {
1550 _log_script_argv(argv, resp_msg);
1551 if (strstr(active_features, "cache"))
1552 key = "Cache";
1553 else if (strstr(active_features, "flat"))
1554 switch (knl_system_type) {
1555 case KNL_SYSTEM_TYPE_INTEL:
1556 key = "Flat";
1557 break;
1558 case KNL_SYSTEM_TYPE_DELL:
1559 key = "Memory";
1560 break;
1561 default:
1562 key = NULL;
1563 break;
1564 }
1565 else if (strstr(active_features, "hybrid"))
1566 key = "Hybrid";
1567 else if (strstr(active_features, "equal"))
1568 key = "Equal";
1569 else if (strstr(active_features, "auto"))
1570 key = "Auto";
1571 else
1572 key = NULL;
1573
1574 switch (knl_system_type) {
1575 case KNL_SYSTEM_TYPE_INTEL:
1576 mcdram_mode = _find_key_val(key, resp_msg);
1577 break;
1578 case KNL_SYSTEM_TYPE_DELL:
1579 mcdram_mode = xstrdup(key);
1580 default:
1581 break;
1582 }
1583 xfree(resp_msg);
1584 }
1585
1586 /* Reset current Memory/MCDRAM mode */
1587 if (mcdram_mode) {
1588 switch (knl_system_type) {
1589 case KNL_SYSTEM_TYPE_INTEL:
1590 argv[0] = "syscfg";
1591 argv[1] = "/bcs";
1592 argv[2] = "";
1593 argv[3] = "BIOSSETTINGS";
1594 argv[4] = "Memory Mode";
1595 argv[5] = mcdram_mode;
1596 argv[6] = NULL;
1597 break;
1598 case KNL_SYSTEM_TYPE_DELL:
1599 snprintf(tmp, sizeof(tmp),
1600 "--ProcEmbMemMode=%s", mcdram_mode);
1601 argv[0] = "syscfg";
1602 argv[1] = tmp;
1603 argv[2] = NULL;
1604 break;
1605 default:
1606 /* already handled above, should never get here */
1607 break;
1608 }
1609
1610 resp_msg = _run_script(syscfg_path, argv, &status);
1611 if (!WIFEXITED(status) || (WEXITSTATUS(status) != 0)) {
1612 error("%s: syscfg (set memory mode) status:%u response:%s",
1613 __func__, status, resp_msg);
1614 error_code = SLURM_ERROR;
1615 } else {
1616 _log_script_argv(argv, resp_msg);
1617 }
1618 xfree(resp_msg);
1619 xfree(mcdram_mode);
1620 }
1621
1622 /* Clear features, do not pass as argument to reboot program
1623 * (assuming we are calling /sbin/reboot). */
1624 active_features[0] = '\0';
1625
1626 return error_code;
1627 }
1628
1629 /* Return bitmap of KNL nodes, NULL if none identified */
node_features_p_get_node_bitmap(void)1630 extern bitstr_t *node_features_p_get_node_bitmap(void)
1631 {
1632 if (knl_node_bitmap)
1633 return bit_copy(knl_node_bitmap);
1634 return NULL;
1635 }
1636
1637 /* Return count of overlaping bits in active_bitmap and knl_node_bitmap */
node_features_p_overlap(bitstr_t * active_bitmap)1638 extern int node_features_p_overlap(bitstr_t *active_bitmap)
1639 {
1640 int cnt = 0;
1641
1642 if (!knl_node_bitmap || !active_bitmap ||
1643 !(cnt = bit_overlap(active_bitmap, knl_node_bitmap)))
1644 return 0;
1645
1646 return cnt;
1647 }
1648 /* Return true if the plugin requires PowerSave mode for booting nodes */
node_features_p_node_power(void)1649 extern bool node_features_p_node_power(void)
1650 {
1651 return false;
1652 }
1653
1654 /*
1655 * Note the active features associated with a set of nodes have been updated.
1656 * Specifically update the node's "hbm" GRES and "CpuBind" values as needed.
1657 * IN active_features - New active features
1658 * IN node_bitmap - bitmap of nodes changed
1659 * RET error code
1660 */
node_features_p_node_update(char * active_features,bitstr_t * node_bitmap)1661 extern int node_features_p_node_update(char *active_features,
1662 bitstr_t *node_bitmap)
1663 {
1664 int i, i_first, i_last;
1665 int rc = SLURM_SUCCESS, numa_inx = -1;
1666 int mcdram_inx = 0;
1667 uint64_t mcdram_size;
1668 node_record_t *node_ptr;
1669 char *save_ptr = NULL, *tmp, *tok;
1670
1671 if (mcdram_per_node == NULL) {
1672 //FIXME: Additional logic is needed to determine the available MCDRAM space
1673 //FIXME: Additional logic will also be required to handle heterogeneous sizes
1674 mcdram_per_node = xmalloc(sizeof(uint64_t) * node_record_count);
1675 for (i = 0; i < node_record_count; i++)
1676 mcdram_per_node[i] = DEFAULT_MCDRAM_SIZE;
1677 }
1678
1679 if (active_features) {
1680 tmp = xstrdup(active_features);
1681 tok = strtok_r(tmp, ",", &save_ptr);
1682 while (tok) {
1683 if (numa_inx == -1)
1684 numa_inx = _knl_numa_inx(tok);
1685 mcdram_inx |= _knl_mcdram_token(tok);
1686 tok = strtok_r(NULL, ",", &save_ptr);
1687 }
1688 xfree(tmp);
1689 }
1690
1691 if (mcdram_inx >= 0) {
1692 for (i = 0; i < KNL_MCDRAM_CNT; i++) {
1693 if ((KNL_CACHE << i) == mcdram_inx)
1694 break;
1695 }
1696 if ((i >= KNL_MCDRAM_CNT) || (mcdram_pct[i] == -1))
1697 mcdram_inx = -1;
1698 else
1699 mcdram_inx = i;
1700 } else {
1701 mcdram_inx = -1;
1702 }
1703
1704 xassert(node_bitmap);
1705 i_first = bit_ffs(node_bitmap);
1706 if (i_first >= 0)
1707 i_last = bit_fls(node_bitmap);
1708 else
1709 i_last = i_first - 1;
1710 for (i = i_first; i <= i_last; i++) {
1711 if (!bit_test(node_bitmap, i))
1712 continue;
1713 if (i >= node_record_count) {
1714 error("%s: Invalid node index (%d >= %d)",
1715 __func__, i, node_record_count);
1716 rc = SLURM_ERROR;
1717 break;
1718 }
1719 node_ptr = node_record_table_ptr + i;
1720 if ((numa_inx >= 0) && cpu_bind[numa_inx])
1721 node_ptr->cpu_bind = cpu_bind[numa_inx];
1722 if (mcdram_per_node && (mcdram_inx >= 0)) {
1723 mcdram_size = mcdram_per_node[i] *
1724 (100 - mcdram_pct[mcdram_inx]) / 100;
1725 if (!node_ptr->gres)
1726 node_ptr->gres =
1727 xstrdup(node_ptr->config_ptr->gres);
1728 gres_plugin_node_feature(node_ptr->name, "hbm",
1729 mcdram_size, &node_ptr->gres,
1730 &node_ptr->gres_list);
1731 }
1732 }
1733
1734 return rc;
1735 }
1736
1737 /*
1738 * Return TRUE if the specified node update request is valid with respect
1739 * to features changes (i.e. don't permit a non-KNL node to set KNL features).
1740 *
1741 * arg IN - Pointer to node_record_t record
1742 * update_node_msg IN - Pointer to update request
1743 */
node_features_p_node_update_valid(void * arg,update_node_msg_t * update_node_msg)1744 extern bool node_features_p_node_update_valid(void *arg,
1745 update_node_msg_t *update_node_msg)
1746 {
1747 node_record_t *node_ptr = (node_record_t *) arg;
1748 char *tmp, *save_ptr = NULL, *tok;
1749 bool is_knl = false, invalid_feature = false;
1750
1751 /* No feature changes */
1752 if (!update_node_msg->features && !update_node_msg->features_act)
1753 return true;
1754
1755 /* Determine if this is KNL node based upon current features */
1756 if (node_ptr->features && node_ptr->features[0]) {
1757 tmp = xstrdup(node_ptr->features);
1758 tok = strtok_r(tmp, ",", &save_ptr);
1759 while (tok) {
1760 if (_knl_mcdram_token(tok) || _knl_numa_token(tok)) {
1761 is_knl = true;
1762 break;
1763 }
1764 tok = strtok_r(NULL, ",", &save_ptr);
1765 }
1766 xfree(tmp);
1767 }
1768 if (is_knl)
1769 return true;
1770
1771 /* Validate that AvailableFeatures update request has no KNL modes */
1772 if (update_node_msg->features) {
1773 tmp = xstrdup(update_node_msg->features);
1774 tok = strtok_r(tmp, ",", &save_ptr);
1775 while (tok) {
1776 if (_knl_mcdram_token(tok) || _knl_numa_token(tok)) {
1777 invalid_feature = true;
1778 break;
1779 }
1780 tok = strtok_r(NULL, ",", &save_ptr);
1781 }
1782 xfree(tmp);
1783 if (invalid_feature) {
1784 info("Invalid AvailableFeatures update request (%s) for non-KNL node %s",
1785 update_node_msg->features, node_ptr->name);
1786 return false;
1787 }
1788 }
1789
1790 /* Validate that ActiveFeatures update request has no KNL modes */
1791 if (update_node_msg->features_act) {
1792 tmp = xstrdup(update_node_msg->features_act);
1793 tok = strtok_r(tmp, ",", &save_ptr);
1794 while (tok) {
1795 if (_knl_mcdram_token(tok) || _knl_numa_token(tok)) {
1796 invalid_feature = true;
1797 break;
1798 }
1799 tok = strtok_r(NULL, ",", &save_ptr);
1800 }
1801 xfree(tmp);
1802 if (invalid_feature) {
1803 info("Invalid ActiveFeatures update request (%s) for non-KNL node %s",
1804 update_node_msg->features_act, node_ptr->name);
1805 return false;
1806 }
1807 }
1808
1809 /*
1810 * For non-KNL node, active and available features must match
1811 */
1812 if (!update_node_msg->features) {
1813 update_node_msg->features =
1814 xstrdup(update_node_msg->features_act);
1815 } else if (!update_node_msg->features_act) {
1816 update_node_msg->features_act =
1817 xstrdup(update_node_msg->features);
1818 } else if (xstrcmp(update_node_msg->features,
1819 update_node_msg->features_act)) {
1820 info("Invalid ActiveFeatures != AvailableFeatures (%s != %s) for non-KNL node %s",
1821 update_node_msg->features, update_node_msg->features_act,
1822 node_ptr->name);
1823 return false;
1824 }
1825
1826 return true;
1827 }
1828
1829 /* Return TRUE if this (one) feature name is under this plugin's control */
node_features_p_changeable_feature(char * feature)1830 extern bool node_features_p_changeable_feature(char *feature)
1831 {
1832 if (_knl_mcdram_token(feature) || _knl_numa_token(feature))
1833 return true;
1834 return false;
1835 }
1836
1837 /*
1838 * Translate a node's feature specification by replacing any features associated
1839 * with this plugin in the original value with the new values, preserving
1840 * any features that are not associated with this plugin
1841 * IN new_features - newly active features
1842 * IN orig_features - original active features
1843 * IN avail_features - original available features
1844 * IN node_inx - index of node in node table
1845 * RET node's new merged features, must be xfreed
1846 */
node_features_p_node_xlate(char * new_features,char * orig_features,char * avail_features,int node_inx)1847 extern char *node_features_p_node_xlate(char *new_features, char *orig_features,
1848 char *avail_features, int node_inx)
1849 {
1850 char *node_features = NULL;
1851 char *tmp, *save_ptr = NULL, *sep = "", *tok;
1852 uint16_t new_mcdram = 0, new_numa = 0;
1853 uint16_t tmp_mcdram, tmp_numa;
1854 bool is_knl = false;
1855
1856 if (avail_features) {
1857 tmp = xstrdup(avail_features);
1858 tok = strtok_r(tmp, ",", &save_ptr);
1859 while (tok) {
1860 if (_knl_mcdram_token(tok) || _knl_numa_token(tok)) {
1861 is_knl = true;
1862 } else {
1863 xstrfmtcat(node_features, "%s%s", sep, tok);
1864 sep = ",";
1865 }
1866 tok = strtok_r(NULL, ",", &save_ptr);
1867 }
1868 xfree(tmp);
1869 if (!is_knl) {
1870 xfree(node_features);
1871 sep = "";
1872 }
1873 }
1874
1875 if (new_features) {
1876 /* Copy non-KNL features */
1877 if (!is_knl && new_features) {
1878 tmp = xstrdup(new_features);
1879 tok = strtok_r(tmp, ",", &save_ptr);
1880 while (tok) {
1881 if ((_knl_mcdram_token(tok) == 0) &&
1882 (_knl_numa_token(tok) == 0)) {
1883 xstrfmtcat(node_features, "%s%s", sep,
1884 tok);
1885 sep = ",";
1886 }
1887 tok = strtok_r(NULL, ",", &save_ptr);
1888 }
1889 xfree(tmp);
1890 }
1891
1892 /* Copy new KNL features in MCDRAM/NUMA order */
1893 tmp = xstrdup(new_features);
1894 tok = strtok_r(tmp, ",", &save_ptr);
1895 while (tok) {
1896 if ((tmp_mcdram = _knl_mcdram_token(tok)))
1897 new_mcdram |= tmp_mcdram;
1898 else if ((tmp_numa = _knl_numa_token(tok)))
1899 new_numa |= tmp_numa;
1900 tok = strtok_r(NULL, ",", &save_ptr);
1901 }
1902 xfree(tmp);
1903
1904 if (is_knl && ((new_mcdram == 0) || (new_numa == 0))) {
1905 /*
1906 * New active features lacks current MCDRAM or NUMA,
1907 * copy values from original
1908 */
1909 tmp = xstrdup(orig_features);
1910 tok = strtok_r(tmp, ",", &save_ptr);
1911 while (tok) {
1912 if ((new_mcdram == 0) &&
1913 (tmp_mcdram = _knl_mcdram_token(tok)))
1914 new_mcdram |= tmp_mcdram;
1915 else if ((new_numa == 0) &&
1916 (tmp_numa = _knl_numa_token(tok)))
1917 new_numa |= tmp_numa;
1918 tok = strtok_r(NULL, ",", &save_ptr);
1919 }
1920 xfree(tmp);
1921 }
1922 if (new_mcdram) {
1923 tmp = _knl_mcdram_str(new_mcdram);
1924 xstrfmtcat(node_features, "%s%s", sep, tmp);
1925 xfree(tmp);
1926 sep = ",";
1927 }
1928 if (new_numa) {
1929 tmp = _knl_numa_str(new_numa);
1930 xstrfmtcat(node_features, "%s%s", sep, tmp);
1931 xfree(tmp);
1932 }
1933 }
1934
1935 if (is_knl) {
1936 if (!knl_node_bitmap)
1937 knl_node_bitmap = bit_alloc(node_record_count);
1938 bit_set(knl_node_bitmap, node_inx);
1939 }
1940
1941 return node_features;
1942 }
1943
1944 /* Translate a node's new feature specification into a "standard" ordering
1945 * RET node's new merged features, must be xfreed */
node_features_p_node_xlate2(char * new_features)1946 extern char *node_features_p_node_xlate2(char *new_features)
1947 {
1948 char *node_features = NULL;
1949 char *tmp, *save_ptr = NULL, *sep = "", *tok;
1950 uint16_t new_mcdram = 0, new_numa = 0;
1951 uint16_t tmp_mcdram, tmp_numa;
1952
1953 if (new_features && *new_features) {
1954 tmp = xstrdup(new_features);
1955 tok = strtok_r(tmp, ",", &save_ptr);
1956 while (tok) {
1957 if ((tmp_mcdram = _knl_mcdram_token(tok))) {
1958 new_mcdram |= tmp_mcdram;
1959 } else if ((tmp_numa = _knl_numa_token(tok))) {
1960 new_numa |= tmp_numa;
1961 } else {
1962 xstrfmtcat(node_features, "%s%s", sep, tok);
1963 sep = ",";
1964 }
1965 tok = strtok_r(NULL, ",", &save_ptr);
1966 }
1967 xfree(tmp);
1968 if (new_mcdram) {
1969 tmp = _knl_mcdram_str(new_mcdram);
1970 xstrfmtcat(node_features, "%s%s", sep, tmp);
1971 xfree(tmp);
1972 sep = ",";
1973 }
1974 if (new_numa) {
1975 tmp = _knl_numa_str(new_numa);
1976 xstrfmtcat(node_features, "%s%s", sep, tmp);
1977 xfree(tmp);
1978 }
1979 }
1980
1981 return node_features;
1982 }
1983
1984 /* Perform set up for step launch
1985 * mem_sort IN - Trigger sort of memory pages (KNL zonesort)
1986 * numa_bitmap IN - NUMA nodes allocated to this job */
node_features_p_step_config(bool mem_sort,bitstr_t * numa_bitmap)1987 extern void node_features_p_step_config(bool mem_sort, bitstr_t *numa_bitmap)
1988 {
1989 #ifdef HAVE_NUMA
1990 if (mem_sort && (numa_available() != -1)) {
1991 struct stat sb;
1992 int buf_len, fd, i, len;
1993 char buf[16];
1994
1995 if (stat(ZONE_SORT_PATH, &sb) == -1)
1996 if (system(MODPROBE_PATH " zonesort_module")) {
1997 /*
1998 * NOOP - compiling with optimizations throws
1999 * out a (void) cast and warns about ignoring
2000 * the return value
2001 */
2002 }
2003 if ((fd = open(ZONE_SORT_PATH, O_WRONLY | O_SYNC)) == -1) {
2004 error("%s: Could not open file %s: %m",
2005 __func__, ZONE_SORT_PATH);
2006 } else {
2007 len = numa_max_node() + 1;
2008 for (i = 0; i < len; i++) {
2009 if (numa_bitmap && !bit_test(numa_bitmap, i))
2010 continue;
2011 snprintf(buf, sizeof(buf), "%d", i);
2012 buf_len = strlen(buf) + 1;
2013 // info("SORT NUMA %s", buf);
2014 if (write(fd, buf, buf_len) != buf_len) {
2015 error("%s: Could not write file %s: %m",
2016 __func__, ZONE_SORT_PATH);
2017 }
2018 }
2019 (void) close(fd);
2020 }
2021 }
2022 #endif
2023 }
2024
2025 /* Determine if the specified user can modify the currently available node
2026 * features */
node_features_p_user_update(uid_t uid)2027 extern bool node_features_p_user_update(uid_t uid)
2028 {
2029 static int reboot_allowed = -1;
2030 int i;
2031
2032 if (reboot_allowed == -1) {
2033 char *reboot_program = slurm_get_reboot_program();
2034 if (reboot_program && reboot_program[0])
2035 reboot_allowed = 1;
2036 else
2037 reboot_allowed = 0;
2038 xfree(reboot_program);
2039 }
2040
2041 if (reboot_allowed != 1) {
2042 info("Change in KNL mode not supported. No RebootProgram configured");
2043 return false;
2044 }
2045
2046 if (allowed_uid_cnt == 0) /* Default is ALL users allowed to update */
2047 return true;
2048
2049 for (i = 0; i < allowed_uid_cnt; i++) {
2050 if (allowed_uid[i] == uid)
2051 return true;
2052 }
2053
2054 return false;
2055 }
2056
2057 /* Return estimated reboot time, in seconds */
node_features_p_boot_time(void)2058 extern uint32_t node_features_p_boot_time(void)
2059 {
2060 return boot_time;
2061 }
2062
2063 /* Get node features plugin configuration */
node_features_p_get_config(config_plugin_params_t * p)2064 extern void node_features_p_get_config(config_plugin_params_t *p)
2065 {
2066 config_key_pair_t *key_pair;
2067 List data;
2068
2069 xassert(p);
2070 xstrcat(p->name, plugin_type);
2071 data = p->key_pairs;
2072
2073 key_pair = xmalloc(sizeof(config_key_pair_t));
2074 key_pair->name = xstrdup("AllowMCDRAM");
2075 key_pair->value = _knl_mcdram_str(allow_mcdram);
2076 list_append(data, key_pair);
2077
2078 key_pair = xmalloc(sizeof(config_key_pair_t));
2079 key_pair->name = xstrdup("AllowNUMA");
2080 key_pair->value = _knl_numa_str(allow_numa);
2081 list_append(data, key_pair);
2082
2083 key_pair = xmalloc(sizeof(config_key_pair_t));
2084 key_pair->name = xstrdup("AllowUserBoot");
2085 key_pair->value = _make_uid_str(allowed_uid, allowed_uid_cnt);
2086 list_append(data, key_pair);
2087
2088 key_pair = xmalloc(sizeof(config_key_pair_t));
2089 key_pair->name = xstrdup("BootTime");
2090 key_pair->value = xstrdup_printf("%u", boot_time);
2091 list_append(data, key_pair);
2092
2093 key_pair = xmalloc(sizeof(config_key_pair_t));
2094 key_pair->name = xstrdup("DefaultMCDRAM");
2095 key_pair->value = _knl_mcdram_str(default_mcdram);
2096 list_append(data, key_pair);
2097
2098 key_pair = xmalloc(sizeof(config_key_pair_t));
2099 key_pair->name = xstrdup("DefaultNUMA");
2100 key_pair->value = _knl_numa_str(default_numa);
2101 list_append(data, key_pair);
2102
2103 key_pair = xmalloc(sizeof(config_key_pair_t));
2104 key_pair->name = xstrdup("Force");
2105 key_pair->value = xstrdup_printf("%u", force_load);
2106 list_append(data, key_pair);
2107
2108 key_pair = xmalloc(sizeof(config_key_pair_t));
2109 key_pair->name = xstrdup("McPath");
2110 key_pair->value = xstrdup(mc_path);
2111 list_append(data, key_pair);
2112
2113 key_pair = xmalloc(sizeof(config_key_pair_t));
2114 key_pair->name = xstrdup("NodeRebootWeight");
2115 key_pair->value = xstrdup_printf("%u", node_reboot_weight);
2116 list_append(data, key_pair);
2117
2118 key_pair = xmalloc(sizeof(config_key_pair_t));
2119 key_pair->name = xstrdup("SyscfgPath");
2120 key_pair->value = xstrdup(syscfg_path);
2121 list_append(data, key_pair);
2122
2123 key_pair = xmalloc(sizeof(config_key_pair_t));
2124 key_pair->name = xstrdup("SyscfgTimeout");
2125 key_pair->value = xstrdup_printf("%u", syscfg_timeout);
2126 list_append(data, key_pair);
2127
2128 key_pair = xmalloc(sizeof(config_key_pair_t));
2129 key_pair->name = xstrdup("SystemType");
2130 key_pair->value = xstrdup(_knl_system_type_str(knl_system_type));
2131 list_append(data, key_pair);
2132
2133 key_pair = xmalloc(sizeof(config_key_pair_t));
2134 key_pair->name = xstrdup("UmeCheckInterval");
2135 key_pair->value = xstrdup_printf("%u", ume_check_interval);
2136 list_append(data, key_pair);
2137
2138 list_sort(data, (ListCmpF) sort_key_pairs);
2139
2140 return;
2141 }
2142
2143 /*
2144 * Return node "weight" field if reboot required to change mode
2145 */
node_features_p_reboot_weight(void)2146 extern uint32_t node_features_p_reboot_weight(void)
2147 {
2148 return node_reboot_weight;
2149 }
2150