1 /*****************************************************************************\
2 * node_features.c - Infrastructure for changing a node's features on user
3 * demand
4 *****************************************************************************
5 * Copyright (C) 2015 SchedMD LLC.
6 * Written by Morris Jette <jette@schedmd.com>
7 *
8 * This file is part of Slurm, a resource management program.
9 * For details, see <https://slurm.schedmd.com/>.
10 * Please also read the included file: DISCLAIMER.
11 *
12 * Slurm is free software; you can redistribute it and/or modify it under
13 * the terms of the GNU General Public License as published by the Free
14 * Software Foundation; either version 2 of the License, or (at your option)
15 * any later version.
16 *
17 * In addition, as a special exception, the copyright holders give permission
18 * to link the code of portions of this program with the OpenSSL library under
19 * certain conditions as described in each individual source file, and
20 * distribute linked combinations including the two. You must obey the GNU
21 * General Public License in all respects for all of the code used other than
22 * OpenSSL. If you modify file(s) with this exception, you may extend this
23 * exception to your version of the file(s), but you are not obligated to do
24 * so. If you do not wish to do so, delete this exception statement from your
25 * version. If you delete this exception statement from all source files in
26 * the program, then also delete it here.
27 *
28 * Slurm is distributed in the hope that it will be useful, but WITHOUT ANY
29 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
30 * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
31 * details.
32 *
33 * You should have received a copy of the GNU General Public License along
34 * with Slurm; if not, write to the Free Software Foundation, Inc.,
35 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
36 \*****************************************************************************/
37
38 #include <inttypes.h>
39 #include <string.h>
40 #include <sys/types.h>
41 #include <unistd.h>
42
43 #include "slurm/slurm.h"
44 #include "src/common/macros.h"
45 #include "src/common/node_features.h"
46 #include "src/common/plugin.h"
47 #include "src/common/read_config.h"
48 #include "src/common/slurm_protocol_api.h"
49 #include "src/common/timers.h"
50 #include "src/common/xmalloc.h"
51 #include "src/common/xstring.h"
52
53 typedef struct node_features_ops {
54 uint32_t(*boot_time) (void);
55 bool (*changeable_feature) (char *feature);
56 int (*get_node) (char *node_list);
57 int (*job_valid) (char *job_features);
58 char * (*job_xlate) (char *job_features);
59 bitstr_t * (*get_node_bitmap) (void);
60 int (*overlap) (bitstr_t *active_bitmap);
61 bool (*node_power) (void);
62 int (*node_set) (char *active_features);
63 void (*node_state) (char **avail_modes, char **current_mode);
64 int (*node_update) (char *active_features, bitstr_t *node_bitmap);
65 bool (*node_update_valid) (void *node_ptr,
66 update_node_msg_t *update_node_msg);
67 char * (*node_xlate) (char *new_features, char *orig_features,
68 char *avail_features, int node_inx);
69 char * (*node_xlate2) (char *new_features);
70 void (*step_config) (bool mem_sort, bitstr_t *numa_bitmap);
71 uint32_t(*reboot_weight)(void);
72 int (*reconfig) (void);
73 bool (*user_update) (uid_t uid);
74 void (*get_config) (config_plugin_params_t *p);
75 } node_features_ops_t;
76
77 /*
78 * These strings must be kept in the same order as the fields
79 * declared for node_features_ops_t.
80 */
81 static const char *syms[] = {
82 "node_features_p_boot_time",
83 "node_features_p_changeable_feature",
84 "node_features_p_get_node",
85 "node_features_p_job_valid",
86 "node_features_p_job_xlate",
87 "node_features_p_get_node_bitmap",
88 "node_features_p_overlap",
89 "node_features_p_node_power",
90 "node_features_p_node_set",
91 "node_features_p_node_state",
92 "node_features_p_node_update",
93 "node_features_p_node_update_valid",
94 "node_features_p_node_xlate",
95 "node_features_p_node_xlate2",
96 "node_features_p_step_config",
97 "node_features_p_reboot_weight",
98 "node_features_p_reconfig",
99 "node_features_p_user_update",
100 "node_features_p_get_config"
101 };
102
103 static int g_context_cnt = -1;
104 static node_features_ops_t *ops = NULL;
105 static plugin_context_t **g_context = NULL;
106 static pthread_mutex_t g_context_lock = PTHREAD_MUTEX_INITIALIZER;
107 static char *node_features_plugin_list = NULL;
108 static bool init_run = false;
109
110 /* Perform plugin initialization: read configuration files, etc. */
node_features_g_init(void)111 extern int node_features_g_init(void)
112 {
113 int rc = SLURM_SUCCESS;
114 char *last = NULL, *names;
115 char *plugin_type = "node_features";
116 char *type;
117
118 if (init_run && (g_context_cnt >= 0))
119 return rc;
120
121 slurm_mutex_lock(&g_context_lock);
122 if (g_context_cnt >= 0)
123 goto fini;
124
125 node_features_plugin_list = slurm_get_node_features_plugins();
126 g_context_cnt = 0;
127 if ((node_features_plugin_list == NULL) ||
128 (node_features_plugin_list[0] == '\0'))
129 goto fini;
130
131 names = node_features_plugin_list;
132 while ((type = strtok_r(names, ",", &last))) {
133 xrealloc(ops,
134 (sizeof(node_features_ops_t) * (g_context_cnt + 1)));
135 xrealloc(g_context,
136 (sizeof(plugin_context_t *) * (g_context_cnt + 1)));
137 if (xstrncmp(type, "node_features/", 14) == 0)
138 type += 14; /* backward compatibility */
139 type = xstrdup_printf("node_features/%s", type);
140 g_context[g_context_cnt] = plugin_context_create(
141 plugin_type, type, (void **)&ops[g_context_cnt],
142 syms, sizeof(syms));
143 if (!g_context[g_context_cnt]) {
144 error("cannot create %s context for %s",
145 plugin_type, type);
146 rc = SLURM_ERROR;
147 xfree(type);
148 break;
149 }
150
151 xfree(type);
152 g_context_cnt++;
153 names = NULL; /* for next strtok_r() iteration */
154 }
155 init_run = true;
156
157 fini:
158 slurm_mutex_unlock(&g_context_lock);
159
160 if (rc != SLURM_SUCCESS)
161 node_features_g_fini();
162
163 return rc;
164 }
165
166 /* Perform plugin termination: save state, free memory, etc. */
node_features_g_fini(void)167 extern int node_features_g_fini(void)
168 {
169 int i, j, rc = SLURM_SUCCESS;
170
171 slurm_mutex_lock(&g_context_lock);
172 if (g_context_cnt < 0)
173 goto fini;
174
175 init_run = false;
176 for (i = 0; i < g_context_cnt; i++) {
177 if (g_context[i]) {
178 j = plugin_context_destroy(g_context[i]);
179 if (j != SLURM_SUCCESS)
180 rc = j;
181 }
182 }
183 xfree(ops);
184 xfree(g_context);
185 xfree(node_features_plugin_list);
186 g_context_cnt = -1;
187
188 fini: slurm_mutex_unlock(&g_context_lock);
189 return rc;
190 }
191
192 /* Return count of node_feature plugins configured */
node_features_g_count(void)193 extern int node_features_g_count(void)
194 {
195 int rc;
196
197 (void) node_features_g_init();
198 slurm_mutex_lock(&g_context_lock);
199 rc = g_context_cnt;
200 slurm_mutex_unlock(&g_context_lock);
201
202 return rc;
203 }
204
205 /* Perform set up for step launch
206 * mem_sort IN - Trigger sort of memory pages (KNL zonesort)
207 * numa_bitmap IN - NUMA nodes allocated to this job */
node_features_g_step_config(bool mem_sort,bitstr_t * numa_bitmap)208 extern void node_features_g_step_config(bool mem_sort, bitstr_t *numa_bitmap)
209 {
210 DEF_TIMERS;
211 int i;
212
213 START_TIMER;
214 if (node_features_g_init() != SLURM_SUCCESS)
215 return;
216 slurm_mutex_lock(&g_context_lock);
217 for (i = 0; i < g_context_cnt; i++)
218 (*(ops[i].step_config))(mem_sort, numa_bitmap);
219 slurm_mutex_unlock(&g_context_lock);
220 END_TIMER2("node_features_g_step_config");
221 }
222
223 /* Reset plugin configuration information */
node_features_g_reconfig(void)224 extern int node_features_g_reconfig(void)
225 {
226 DEF_TIMERS;
227 int i, rc;
228
229 START_TIMER;
230 rc = node_features_g_init();
231 slurm_mutex_lock(&g_context_lock);
232 for (i = 0; ((i < g_context_cnt) && (rc == SLURM_SUCCESS)); i++)
233 rc = (*(ops[i].reconfig))();
234 slurm_mutex_unlock(&g_context_lock);
235 END_TIMER2("node_features_g_reconfig");
236
237 return rc;
238 }
239
240 /* Return TRUE if this (one) feature name is under this plugin's control */
node_features_g_changeable_feature(char * feature)241 extern bool node_features_g_changeable_feature(char *feature)
242 {
243 DEF_TIMERS;
244 int i;
245 bool changeable = false;
246
247 START_TIMER;
248 (void) node_features_g_init();
249 slurm_mutex_lock(&g_context_lock);
250 for (i = 0; ((i < g_context_cnt) && !changeable); i++)
251 changeable = (*(ops[i].changeable_feature))(feature);
252 slurm_mutex_unlock(&g_context_lock);
253 END_TIMER2("node_features_g_reconfig");
254
255 return changeable;
256 }
257
258 /* Update active and available features on specified nodes, sets features on
259 * all nodes is node_list is NULL */
node_features_g_get_node(char * node_list)260 extern int node_features_g_get_node(char *node_list)
261 {
262 DEF_TIMERS;
263 int i, rc;
264
265 START_TIMER;
266 rc = node_features_g_init();
267 slurm_mutex_lock(&g_context_lock);
268 for (i = 0; ((i < g_context_cnt) && (rc == SLURM_SUCCESS)); i++)
269 rc = (*(ops[i].get_node))(node_list);
270 slurm_mutex_unlock(&g_context_lock);
271 END_TIMER2("node_features_g_get_node");
272
273 return rc;
274 }
275
276 /* Test if a job's feature specification is valid */
node_features_g_job_valid(char * job_features)277 extern int node_features_g_job_valid(char *job_features)
278 {
279 DEF_TIMERS;
280 int i, rc;
281
282 START_TIMER;
283 rc = node_features_g_init();
284 slurm_mutex_lock(&g_context_lock);
285 for (i = 0; ((i < g_context_cnt) && (rc == SLURM_SUCCESS)); i++)
286 rc = (*(ops[i].job_valid))(job_features);
287 slurm_mutex_unlock(&g_context_lock);
288 END_TIMER2("node_features_g_job_valid");
289
290 return rc;
291 }
292
293 /*
294 * Translate a job's feature request to the node features needed at boot time.
295 * If multiple MCDRAM or NUMA values are ORed, pick the first ones.
296 * IN job_features - job's --constraint specification
297 * RET features required on node reboot. Must xfree to release memory
298 */
node_features_g_job_xlate(char * job_features)299 extern char *node_features_g_job_xlate(char *job_features)
300 {
301 DEF_TIMERS;
302 char *node_features = NULL, *tmp_str;
303 int i;
304
305 START_TIMER;
306 (void) node_features_g_init();
307 slurm_mutex_lock(&g_context_lock);
308 for (i = 0; i < g_context_cnt; i++) {
309 tmp_str = (*(ops[i].job_xlate))(job_features);
310 if (tmp_str) {
311 if (node_features) {
312 xstrfmtcat(node_features, ",%s", tmp_str);
313 xfree(tmp_str);
314 } else {
315 node_features = tmp_str;
316 }
317 }
318 }
319 slurm_mutex_unlock(&g_context_lock);
320 END_TIMER2("node_features_g_job_xlate");
321
322 return node_features;
323 }
324
325 /* Return bitmap of KNL nodes, NULL if none identified */
node_features_g_get_node_bitmap(void)326 extern bitstr_t *node_features_g_get_node_bitmap(void)
327 {
328 DEF_TIMERS;
329 bitstr_t *node_bitmap = NULL;
330 int i;
331
332 START_TIMER;
333 (void) node_features_g_init();
334 slurm_mutex_lock(&g_context_lock);
335 for (i = 0; i < g_context_cnt; i++) {
336 node_bitmap = (*(ops[i].get_node_bitmap))();
337 if (node_bitmap)
338 break;
339 }
340 slurm_mutex_unlock(&g_context_lock);
341 END_TIMER2("node_features_g_get_node_bitmap");
342
343 return node_bitmap;
344 }
345
346 /* Return count of bits in active_bitmap that are in the features bitmap */
node_features_g_overlap(bitstr_t * active_bitmap)347 extern int node_features_g_overlap(bitstr_t *active_bitmap)
348 {
349 DEF_TIMERS;
350 int cnt = 0;
351 int i;
352
353 START_TIMER;
354 (void) node_features_g_init();
355 slurm_mutex_lock(&g_context_lock);
356 for (i = 0; i < g_context_cnt; i++)
357 cnt += (*(ops[i].overlap))(active_bitmap);
358 slurm_mutex_unlock(&g_context_lock);
359 END_TIMER2("node_features_g_overlap");
360
361 return cnt;
362 }
363
364 /* Return true if the plugin requires PowerSave mode for booting nodes */
node_features_g_node_power(void)365 extern bool node_features_g_node_power(void)
366 {
367 DEF_TIMERS;
368 bool node_power = false;
369 int i;
370
371 START_TIMER;
372 (void) node_features_g_init();
373 slurm_mutex_lock(&g_context_lock);
374 for (i = 0; i < g_context_cnt; i++) {
375 node_power = (*(ops[i].node_power))();
376 if (node_power)
377 break;
378 }
379 slurm_mutex_unlock(&g_context_lock);
380 END_TIMER2("node_features_g_node_power");
381
382 return node_power;
383 }
384
385 /* Set's the node's active features based upon job constraints.
386 * NOTE: Executed by the slurmd daemon.
387 * IN active_features - New active features
388 * RET error code */
node_features_g_node_set(char * active_features)389 extern int node_features_g_node_set(char *active_features)
390 {
391 DEF_TIMERS;
392 int i, rc = SLURM_SUCCESS;
393
394 START_TIMER;
395 (void) node_features_g_init();
396 slurm_mutex_lock(&g_context_lock);
397 for (i = 0; ((i < g_context_cnt) && (rc == SLURM_SUCCESS)); i++) {
398 rc = (*(ops[i].node_set))(active_features);
399 }
400 slurm_mutex_unlock(&g_context_lock);
401 END_TIMER2("node_features_g_node_set");
402
403 return rc;
404 }
405
406 /* Get this node's current and available MCDRAM and NUMA settings from BIOS.
407 * avail_modes IN/OUT - available modes, must be xfreed
408 * current_mode IN/OUT - current modes, must be xfreed */
node_features_g_node_state(char ** avail_modes,char ** current_mode)409 extern void node_features_g_node_state(char **avail_modes, char **current_mode)
410 {
411 DEF_TIMERS;
412 int i;
413
414 START_TIMER;
415 (void) node_features_g_init();
416 slurm_mutex_lock(&g_context_lock);
417 for (i = 0; i < g_context_cnt; i++) {
418 (*(ops[i].node_state))(avail_modes, current_mode);
419 }
420 slurm_mutex_unlock(&g_context_lock);
421 END_TIMER2("node_features_g_node_state");
422 }
423
424 /* Note the active features associated with a set of nodes have been updated.
425 * Specifically update the node's "hbm" GRES and "CpuBind" values as needed.
426 * IN active_features - New active features
427 * IN node_bitmap - bitmap of nodes changed
428 * RET error code */
node_features_g_node_update(char * active_features,bitstr_t * node_bitmap)429 extern int node_features_g_node_update(char *active_features,
430 bitstr_t *node_bitmap)
431 {
432 DEF_TIMERS;
433 int i, rc = SLURM_SUCCESS;
434
435 START_TIMER;
436 (void) node_features_g_init();
437 slurm_mutex_lock(&g_context_lock);
438 for (i = 0; ((i < g_context_cnt) && (rc == SLURM_SUCCESS)); i++) {
439 rc = (*(ops[i].node_update))(active_features, node_bitmap);
440 }
441 slurm_mutex_unlock(&g_context_lock);
442 END_TIMER2("node_features_g_node_update");
443
444 return rc;
445 }
446
447
448 /*
449 * Return TRUE if the specified node update request is valid with respect
450 * to features changes (i.e. don't permit a non-KNL node to set KNL features).
451 *
452 * node_ptr IN - Pointer to node_record_t record
453 * update_node_msg IN - Pointer to update request
454 */
node_features_g_node_update_valid(void * node_ptr,update_node_msg_t * update_node_msg)455 extern bool node_features_g_node_update_valid(void *node_ptr,
456 update_node_msg_t *update_node_msg)
457 {
458 DEF_TIMERS;
459 bool update_valid = true;
460 int i;
461
462 START_TIMER;
463 (void) node_features_g_init();
464 slurm_mutex_lock(&g_context_lock);
465 for (i = 0; i < g_context_cnt; i++) {
466 update_valid = (*(ops[i].node_update_valid))(node_ptr,
467 update_node_msg);
468 if (!update_valid)
469 break;
470 }
471 slurm_mutex_unlock(&g_context_lock);
472 END_TIMER2("node_features_g_node_update_valid");
473
474 return update_valid;
475 }
476
477 /*
478 * Translate a node's feature specification by replacing any features associated
479 * with this plugin in the original value with the new values, preserving
480 * any features that are not associated with this plugin
481 * IN new_features - newly active features
482 * IN orig_features - original active features
483 * IN avail_features - original available features
484 * IN node_inx - index of node in node table
485 * RET node's new merged features, must be xfreed
486 */
node_features_g_node_xlate(char * new_features,char * orig_features,char * avail_features,int node_inx)487 extern char *node_features_g_node_xlate(char *new_features, char *orig_features,
488 char *avail_features, int node_inx)
489 {
490 DEF_TIMERS;
491 char *new_value = NULL, *tmp_str;
492 int i;
493
494 START_TIMER;
495 (void) node_features_g_init();
496 slurm_mutex_lock(&g_context_lock);
497
498 if (!g_context_cnt)
499 new_value = xstrdup(new_features);
500
501 for (i = 0; i < g_context_cnt; i++) {
502 if (new_value)
503 tmp_str = new_value;
504 else if (orig_features)
505 tmp_str = xstrdup(orig_features);
506 else
507 tmp_str = NULL;
508 new_value = (*(ops[i].node_xlate))(new_features, tmp_str,
509 avail_features, node_inx);
510 xfree(tmp_str);
511
512 }
513 slurm_mutex_unlock(&g_context_lock);
514 END_TIMER2("node_features_g_node_xlate");
515
516 return new_value;
517 }
518
519 /* Translate a node's new feature specification into a "standard" ordering
520 * RET node's new merged features, must be xfreed */
node_features_g_node_xlate2(char * new_features)521 extern char *node_features_g_node_xlate2(char *new_features)
522 {
523 DEF_TIMERS;
524 char *new_value = NULL, *tmp_str;
525 int i;
526
527 START_TIMER;
528 (void) node_features_g_init();
529 slurm_mutex_lock(&g_context_lock);
530
531 if (!g_context_cnt)
532 new_value = xstrdup(new_features);
533
534 for (i = 0; i < g_context_cnt; i++) {
535 if (new_value)
536 tmp_str = xstrdup(new_value);
537 else
538 tmp_str = xstrdup(new_features);
539 new_value = (*(ops[i].node_xlate2))(tmp_str);
540 xfree(tmp_str);
541
542 }
543 slurm_mutex_unlock(&g_context_lock);
544 END_TIMER2("node_features_g_node_xlate2");
545
546 return new_value;
547 }
548
549 /* Determine if the specified user can modify the currently available node
550 * features */
node_features_g_user_update(uid_t uid)551 extern bool node_features_g_user_update(uid_t uid)
552 {
553 DEF_TIMERS;
554 bool result = true;
555 int i;
556
557 START_TIMER;
558 (void) node_features_g_init();
559 slurm_mutex_lock(&g_context_lock);
560 for (i = 0; ((i < g_context_cnt) && (result == true)); i++) {
561 result = (*(ops[i].user_update))(uid);
562 }
563 slurm_mutex_unlock(&g_context_lock);
564 END_TIMER2("node_features_g_user_update");
565
566 return result;
567 }
568
569 /* Return estimated reboot time, in seconds */
node_features_g_boot_time(void)570 extern uint32_t node_features_g_boot_time(void)
571 {
572 DEF_TIMERS;
573 uint32_t boot_time = 0;
574 int i;
575
576 START_TIMER;
577 (void) node_features_g_init();
578 slurm_mutex_lock(&g_context_lock);
579 for (i = 0; i < g_context_cnt; i++) {
580 boot_time = MAX(boot_time, (*(ops[i].boot_time))());
581 }
582 slurm_mutex_unlock(&g_context_lock);
583 END_TIMER2("node_features_g_user_update");
584
585 return boot_time;
586 }
587
588 /* Get node features plugin configuration */
node_features_g_get_config(void)589 extern List node_features_g_get_config(void)
590 {
591 DEF_TIMERS;
592 int i, rc;
593 List conf_list = NULL;
594 config_plugin_params_t *p;
595
596 START_TIMER;
597 rc = node_features_g_init();
598
599 if (g_context_cnt > 0)
600 conf_list = list_create(destroy_config_plugin_params);
601
602 slurm_mutex_lock(&g_context_lock);
603 for (i = 0; ((i < g_context_cnt) && (rc == SLURM_SUCCESS)); i++) {
604 p = xmalloc(sizeof(config_plugin_params_t));
605 p->key_pairs = list_create(destroy_config_key_pair);
606
607 (*(ops[i].get_config))(p);
608
609 if (!p->name)
610 destroy_config_plugin_params(p);
611 else
612 list_append(conf_list, p);
613 }
614 slurm_mutex_unlock(&g_context_lock);
615
616 END_TIMER2("node_features_g_get_config");
617
618 return conf_list;
619 }
620
621 /*
622 * Return node "weight" field if reboot required to change mode
623 */
node_features_g_reboot_weight(void)624 extern uint32_t node_features_g_reboot_weight(void)
625 {
626 DEF_TIMERS;
627 int weight = INFINITE - 1;
628
629 START_TIMER;
630 (void) node_features_g_init();
631 slurm_mutex_lock(&g_context_lock);
632 if (g_context_cnt > 0)
633 weight = (*(ops[0].reboot_weight))();
634 slurm_mutex_unlock(&g_context_lock);
635 END_TIMER2("node_features_g_reboot_weight");
636
637 return weight;
638 }
639