1 /*****************************************************************************\
2  *  node_features.c - Infrastructure for changing a node's features on user
3  *	demand
4  *****************************************************************************
5  *  Copyright (C) 2015 SchedMD LLC.
6  *  Written by Morris Jette <jette@schedmd.com>
7  *
8  *  This file is part of Slurm, a resource management program.
9  *  For details, see <https://slurm.schedmd.com/>.
10  *  Please also read the included file: DISCLAIMER.
11  *
12  *  Slurm is free software; you can redistribute it and/or modify it under
13  *  the terms of the GNU General Public License as published by the Free
14  *  Software Foundation; either version 2 of the License, or (at your option)
15  *  any later version.
16  *
17  *  In addition, as a special exception, the copyright holders give permission
18  *  to link the code of portions of this program with the OpenSSL library under
19  *  certain conditions as described in each individual source file, and
20  *  distribute linked combinations including the two. You must obey the GNU
21  *  General Public License in all respects for all of the code used other than
22  *  OpenSSL. If you modify file(s) with this exception, you may extend this
23  *  exception to your version of the file(s), but you are not obligated to do
24  *  so. If you do not wish to do so, delete this exception statement from your
25  *  version.  If you delete this exception statement from all source files in
26  *  the program, then also delete it here.
27  *
28  *  Slurm is distributed in the hope that it will be useful, but WITHOUT ANY
29  *  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
30  *  FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
31  *  details.
32  *
33  *  You should have received a copy of the GNU General Public License along
34  *  with Slurm; if not, write to the Free Software Foundation, Inc.,
35  *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA.
36 \*****************************************************************************/
37 
38 #include <inttypes.h>
39 #include <string.h>
40 #include <sys/types.h>
41 #include <unistd.h>
42 
43 #include "slurm/slurm.h"
44 #include "src/common/macros.h"
45 #include "src/common/node_features.h"
46 #include "src/common/plugin.h"
47 #include "src/common/read_config.h"
48 #include "src/common/slurm_protocol_api.h"
49 #include "src/common/timers.h"
50 #include "src/common/xmalloc.h"
51 #include "src/common/xstring.h"
52 
53 typedef struct node_features_ops {
54 	uint32_t(*boot_time)	(void);
55 	bool    (*changeable_feature) (char *feature);
56 	int	(*get_node)	(char *node_list);
57 	int	(*job_valid)	(char *job_features);
58 	char *	(*job_xlate)	(char *job_features);
59 	bitstr_t * (*get_node_bitmap) (void);
60 	int     (*overlap)      (bitstr_t *active_bitmap);
61 	bool	(*node_power)	(void);
62 	int	(*node_set)	(char *active_features);
63 	void	(*node_state)	(char **avail_modes, char **current_mode);
64 	int	(*node_update)	(char *active_features, bitstr_t *node_bitmap);
65 	bool	(*node_update_valid) (void *node_ptr,
66 				      update_node_msg_t *update_node_msg);
67 	char *	(*node_xlate)	(char *new_features, char *orig_features,
68 				 char *avail_features, int node_inx);
69 	char *	(*node_xlate2)	(char *new_features);
70 	void	(*step_config)	(bool mem_sort, bitstr_t *numa_bitmap);
71 	uint32_t(*reboot_weight)(void);
72 	int	(*reconfig)	(void);
73 	bool	(*user_update)	(uid_t uid);
74 	void	(*get_config)	(config_plugin_params_t *p);
75 } node_features_ops_t;
76 
77 /*
78  * These strings must be kept in the same order as the fields
79  * declared for node_features_ops_t.
80  */
81 static const char *syms[] = {
82 	"node_features_p_boot_time",
83 	"node_features_p_changeable_feature",
84 	"node_features_p_get_node",
85 	"node_features_p_job_valid",
86 	"node_features_p_job_xlate",
87 	"node_features_p_get_node_bitmap",
88 	"node_features_p_overlap",
89 	"node_features_p_node_power",
90 	"node_features_p_node_set",
91 	"node_features_p_node_state",
92 	"node_features_p_node_update",
93 	"node_features_p_node_update_valid",
94 	"node_features_p_node_xlate",
95 	"node_features_p_node_xlate2",
96 	"node_features_p_step_config",
97 	"node_features_p_reboot_weight",
98 	"node_features_p_reconfig",
99 	"node_features_p_user_update",
100 	"node_features_p_get_config"
101 };
102 
103 static int g_context_cnt = -1;
104 static node_features_ops_t *ops = NULL;
105 static plugin_context_t **g_context = NULL;
106 static pthread_mutex_t g_context_lock = PTHREAD_MUTEX_INITIALIZER;
107 static char *node_features_plugin_list = NULL;
108 static bool init_run = false;
109 
110 /* Perform plugin initialization: read configuration files, etc. */
node_features_g_init(void)111 extern int node_features_g_init(void)
112 {
113 	int rc = SLURM_SUCCESS;
114 	char *last = NULL, *names;
115 	char *plugin_type = "node_features";
116 	char *type;
117 
118 	if (init_run && (g_context_cnt >= 0))
119 		return rc;
120 
121 	slurm_mutex_lock(&g_context_lock);
122 	if (g_context_cnt >= 0)
123 		goto fini;
124 
125 	node_features_plugin_list = slurm_get_node_features_plugins();
126 	g_context_cnt = 0;
127 	if ((node_features_plugin_list == NULL) ||
128 	    (node_features_plugin_list[0] == '\0'))
129 		goto fini;
130 
131 	names = node_features_plugin_list;
132 	while ((type = strtok_r(names, ",", &last))) {
133 		xrealloc(ops,
134 			 (sizeof(node_features_ops_t) * (g_context_cnt + 1)));
135 		xrealloc(g_context,
136 			 (sizeof(plugin_context_t *) * (g_context_cnt + 1)));
137 		if (xstrncmp(type, "node_features/", 14) == 0)
138 			type += 14; /* backward compatibility */
139 		type = xstrdup_printf("node_features/%s", type);
140 		g_context[g_context_cnt] = plugin_context_create(
141 			plugin_type, type, (void **)&ops[g_context_cnt],
142 			syms, sizeof(syms));
143 		if (!g_context[g_context_cnt]) {
144 			error("cannot create %s context for %s",
145 			      plugin_type, type);
146 			rc = SLURM_ERROR;
147 			xfree(type);
148 			break;
149 		}
150 
151 		xfree(type);
152 		g_context_cnt++;
153 		names = NULL; /* for next strtok_r() iteration */
154 	}
155 	init_run = true;
156 
157 fini:
158 	slurm_mutex_unlock(&g_context_lock);
159 
160 	if (rc != SLURM_SUCCESS)
161 		node_features_g_fini();
162 
163 	return rc;
164 }
165 
166 /* Perform plugin termination: save state, free memory, etc. */
node_features_g_fini(void)167 extern int node_features_g_fini(void)
168 {
169 	int i, j, rc = SLURM_SUCCESS;
170 
171 	slurm_mutex_lock(&g_context_lock);
172 	if (g_context_cnt < 0)
173 		goto fini;
174 
175 	init_run = false;
176 	for (i = 0; i < g_context_cnt; i++) {
177 		if (g_context[i]) {
178 			j = plugin_context_destroy(g_context[i]);
179 			if (j != SLURM_SUCCESS)
180 				rc = j;
181 		}
182 	}
183 	xfree(ops);
184 	xfree(g_context);
185 	xfree(node_features_plugin_list);
186 	g_context_cnt = -1;
187 
188 fini:	slurm_mutex_unlock(&g_context_lock);
189 	return rc;
190 }
191 
192 /* Return count of node_feature plugins configured */
node_features_g_count(void)193 extern int node_features_g_count(void)
194 {
195 	int rc;
196 
197 	(void) node_features_g_init();
198 	slurm_mutex_lock(&g_context_lock);
199 	rc = g_context_cnt;
200 	slurm_mutex_unlock(&g_context_lock);
201 
202 	return rc;
203 }
204 
205 /* Perform set up for step launch
206  * mem_sort IN - Trigger sort of memory pages (KNL zonesort)
207  * numa_bitmap IN - NUMA nodes allocated to this job */
node_features_g_step_config(bool mem_sort,bitstr_t * numa_bitmap)208 extern void node_features_g_step_config(bool mem_sort, bitstr_t *numa_bitmap)
209 {
210 	DEF_TIMERS;
211 	int i;
212 
213 	START_TIMER;
214 	if (node_features_g_init() != SLURM_SUCCESS)
215 		return;
216 	slurm_mutex_lock(&g_context_lock);
217 	for (i = 0; i < g_context_cnt; i++)
218 		(*(ops[i].step_config))(mem_sort, numa_bitmap);
219 	slurm_mutex_unlock(&g_context_lock);
220 	END_TIMER2("node_features_g_step_config");
221 }
222 
223 /* Reset plugin configuration information */
node_features_g_reconfig(void)224 extern int node_features_g_reconfig(void)
225 {
226 	DEF_TIMERS;
227 	int i, rc;
228 
229 	START_TIMER;
230 	rc = node_features_g_init();
231 	slurm_mutex_lock(&g_context_lock);
232 	for (i = 0; ((i < g_context_cnt) && (rc == SLURM_SUCCESS)); i++)
233 		rc = (*(ops[i].reconfig))();
234 	slurm_mutex_unlock(&g_context_lock);
235 	END_TIMER2("node_features_g_reconfig");
236 
237 	return rc;
238 }
239 
240 /* Return TRUE if this (one) feature name is under this plugin's control */
node_features_g_changeable_feature(char * feature)241 extern bool node_features_g_changeable_feature(char *feature)
242 {
243 	DEF_TIMERS;
244 	int i;
245 	bool changeable = false;
246 
247 	START_TIMER;
248 	(void) node_features_g_init();
249 	slurm_mutex_lock(&g_context_lock);
250 	for (i = 0; ((i < g_context_cnt) && !changeable); i++)
251 		changeable = (*(ops[i].changeable_feature))(feature);
252 	slurm_mutex_unlock(&g_context_lock);
253 	END_TIMER2("node_features_g_reconfig");
254 
255 	return changeable;
256 }
257 
258 /* Update active and available features on specified nodes, sets features on
259  * all nodes is node_list is NULL */
node_features_g_get_node(char * node_list)260 extern int node_features_g_get_node(char *node_list)
261 {
262 	DEF_TIMERS;
263 	int i, rc;
264 
265 	START_TIMER;
266 	rc = node_features_g_init();
267 	slurm_mutex_lock(&g_context_lock);
268 	for (i = 0; ((i < g_context_cnt) && (rc == SLURM_SUCCESS)); i++)
269 		rc = (*(ops[i].get_node))(node_list);
270 	slurm_mutex_unlock(&g_context_lock);
271 	END_TIMER2("node_features_g_get_node");
272 
273 	return rc;
274 }
275 
276 /* Test if a job's feature specification is valid */
node_features_g_job_valid(char * job_features)277 extern int node_features_g_job_valid(char *job_features)
278 {
279 	DEF_TIMERS;
280 	int i, rc;
281 
282 	START_TIMER;
283 	rc = node_features_g_init();
284 	slurm_mutex_lock(&g_context_lock);
285 	for (i = 0; ((i < g_context_cnt) && (rc == SLURM_SUCCESS)); i++)
286 		rc = (*(ops[i].job_valid))(job_features);
287 	slurm_mutex_unlock(&g_context_lock);
288 	END_TIMER2("node_features_g_job_valid");
289 
290 	return rc;
291 }
292 
293 /*
294  * Translate a job's feature request to the node features needed at boot time.
295  *	If multiple MCDRAM or NUMA values are ORed, pick the first ones.
296  * IN job_features - job's --constraint specification
297  * RET features required on node reboot. Must xfree to release memory
298  */
node_features_g_job_xlate(char * job_features)299 extern char *node_features_g_job_xlate(char *job_features)
300 {
301 	DEF_TIMERS;
302 	char *node_features = NULL, *tmp_str;
303 	int i;
304 
305 	START_TIMER;
306 	(void) node_features_g_init();
307 	slurm_mutex_lock(&g_context_lock);
308 	for (i = 0; i < g_context_cnt; i++) {
309 		tmp_str = (*(ops[i].job_xlate))(job_features);
310 		if (tmp_str) {
311 			if (node_features) {
312 				xstrfmtcat(node_features, ",%s", tmp_str);
313 				xfree(tmp_str);
314 			} else {
315 				node_features = tmp_str;
316 			}
317 		}
318 	}
319 	slurm_mutex_unlock(&g_context_lock);
320 	END_TIMER2("node_features_g_job_xlate");
321 
322 	return node_features;
323 }
324 
325 /* Return bitmap of KNL nodes, NULL if none identified */
node_features_g_get_node_bitmap(void)326 extern bitstr_t *node_features_g_get_node_bitmap(void)
327 {
328 	DEF_TIMERS;
329 	bitstr_t *node_bitmap = NULL;
330 	int i;
331 
332 	START_TIMER;
333 	(void) node_features_g_init();
334 	slurm_mutex_lock(&g_context_lock);
335 	for (i = 0; i < g_context_cnt; i++) {
336 		node_bitmap = (*(ops[i].get_node_bitmap))();
337 		if (node_bitmap)
338 			break;
339 	}
340 	slurm_mutex_unlock(&g_context_lock);
341 	END_TIMER2("node_features_g_get_node_bitmap");
342 
343 	return node_bitmap;
344 }
345 
346 /* Return count of bits in active_bitmap that are in the features bitmap */
node_features_g_overlap(bitstr_t * active_bitmap)347 extern int node_features_g_overlap(bitstr_t *active_bitmap)
348 {
349 	DEF_TIMERS;
350 	int cnt = 0;
351 	int i;
352 
353 	START_TIMER;
354 	(void) node_features_g_init();
355 	slurm_mutex_lock(&g_context_lock);
356 	for (i = 0; i < g_context_cnt; i++)
357 		cnt += (*(ops[i].overlap))(active_bitmap);
358 	slurm_mutex_unlock(&g_context_lock);
359 	END_TIMER2("node_features_g_overlap");
360 
361 	return cnt;
362 }
363 
364 /* Return true if the plugin requires PowerSave mode for booting nodes */
node_features_g_node_power(void)365 extern bool node_features_g_node_power(void)
366 {
367 	DEF_TIMERS;
368 	bool node_power = false;
369 	int i;
370 
371 	START_TIMER;
372 	(void) node_features_g_init();
373 	slurm_mutex_lock(&g_context_lock);
374 	for (i = 0; i < g_context_cnt; i++) {
375 		node_power = (*(ops[i].node_power))();
376 		if (node_power)
377 			break;
378 	}
379 	slurm_mutex_unlock(&g_context_lock);
380 	END_TIMER2("node_features_g_node_power");
381 
382 	return node_power;
383 }
384 
385 /* Set's the node's active features based upon job constraints.
386  * NOTE: Executed by the slurmd daemon.
387  * IN active_features - New active features
388  * RET error code */
node_features_g_node_set(char * active_features)389 extern int node_features_g_node_set(char *active_features)
390 {
391 	DEF_TIMERS;
392 	int i, rc = SLURM_SUCCESS;
393 
394 	START_TIMER;
395 	(void) node_features_g_init();
396 	slurm_mutex_lock(&g_context_lock);
397 	for (i = 0; ((i < g_context_cnt) && (rc == SLURM_SUCCESS)); i++) {
398 		rc = (*(ops[i].node_set))(active_features);
399 	}
400 	slurm_mutex_unlock(&g_context_lock);
401 	END_TIMER2("node_features_g_node_set");
402 
403 	return rc;
404 }
405 
406 /* Get this node's current and available MCDRAM and NUMA settings from BIOS.
407  * avail_modes IN/OUT - available modes, must be xfreed
408  * current_mode IN/OUT - current modes, must be xfreed */
node_features_g_node_state(char ** avail_modes,char ** current_mode)409 extern void node_features_g_node_state(char **avail_modes, char **current_mode)
410 {
411 	DEF_TIMERS;
412 	int i;
413 
414 	START_TIMER;
415 	(void) node_features_g_init();
416 	slurm_mutex_lock(&g_context_lock);
417 	for (i = 0; i < g_context_cnt; i++) {
418 		(*(ops[i].node_state))(avail_modes, current_mode);
419 	}
420 	slurm_mutex_unlock(&g_context_lock);
421 	END_TIMER2("node_features_g_node_state");
422 }
423 
424 /* Note the active features associated with a set of nodes have been updated.
425  * Specifically update the node's "hbm" GRES and "CpuBind" values as needed.
426  * IN active_features - New active features
427  * IN node_bitmap - bitmap of nodes changed
428  * RET error code */
node_features_g_node_update(char * active_features,bitstr_t * node_bitmap)429 extern int node_features_g_node_update(char *active_features,
430 				       bitstr_t *node_bitmap)
431 {
432 	DEF_TIMERS;
433 	int i, rc = SLURM_SUCCESS;
434 
435 	START_TIMER;
436 	(void) node_features_g_init();
437 	slurm_mutex_lock(&g_context_lock);
438 	for (i = 0; ((i < g_context_cnt) && (rc == SLURM_SUCCESS)); i++) {
439 		rc = (*(ops[i].node_update))(active_features, node_bitmap);
440 	}
441 	slurm_mutex_unlock(&g_context_lock);
442 	END_TIMER2("node_features_g_node_update");
443 
444 	return rc;
445 }
446 
447 
448 /*
449  * Return TRUE if the specified node update request is valid with respect
450  * to features changes (i.e. don't permit a non-KNL node to set KNL features).
451  *
452  * node_ptr IN - Pointer to node_record_t record
453  * update_node_msg IN - Pointer to update request
454  */
node_features_g_node_update_valid(void * node_ptr,update_node_msg_t * update_node_msg)455 extern bool node_features_g_node_update_valid(void *node_ptr,
456 					update_node_msg_t *update_node_msg)
457 {
458 	DEF_TIMERS;
459 	bool update_valid = true;
460 	int i;
461 
462 	START_TIMER;
463 	(void) node_features_g_init();
464 	slurm_mutex_lock(&g_context_lock);
465 	for (i = 0; i < g_context_cnt; i++) {
466 		update_valid = (*(ops[i].node_update_valid))(node_ptr,
467 							     update_node_msg);
468 		if (!update_valid)
469 			break;
470 	}
471 	slurm_mutex_unlock(&g_context_lock);
472 	END_TIMER2("node_features_g_node_update_valid");
473 
474 	return update_valid;
475 }
476 
477 /*
478  * Translate a node's feature specification by replacing any features associated
479  *	with this plugin in the original value with the new values, preserving
480  *	any features that are not associated with this plugin
481  * IN new_features - newly active features
482  * IN orig_features - original active features
483  * IN avail_features - original available features
484  * IN node_inx - index of node in node table
485  * RET node's new merged features, must be xfreed
486  */
node_features_g_node_xlate(char * new_features,char * orig_features,char * avail_features,int node_inx)487 extern char *node_features_g_node_xlate(char *new_features, char *orig_features,
488 					char *avail_features, int node_inx)
489 {
490 	DEF_TIMERS;
491 	char *new_value = NULL, *tmp_str;
492 	int i;
493 
494 	START_TIMER;
495 	(void) node_features_g_init();
496 	slurm_mutex_lock(&g_context_lock);
497 
498 	if (!g_context_cnt)
499 		new_value = xstrdup(new_features);
500 
501 	for (i = 0; i < g_context_cnt; i++) {
502 		if (new_value)
503 			tmp_str = new_value;
504 		else if (orig_features)
505 			tmp_str = xstrdup(orig_features);
506 		else
507 			tmp_str = NULL;
508 		new_value = (*(ops[i].node_xlate))(new_features, tmp_str,
509 						   avail_features, node_inx);
510 		xfree(tmp_str);
511 
512 	}
513 	slurm_mutex_unlock(&g_context_lock);
514 	END_TIMER2("node_features_g_node_xlate");
515 
516 	return new_value;
517 }
518 
519 /* Translate a node's new feature specification into a "standard" ordering
520  * RET node's new merged features, must be xfreed */
node_features_g_node_xlate2(char * new_features)521 extern char *node_features_g_node_xlate2(char *new_features)
522 {
523 	DEF_TIMERS;
524 	char *new_value = NULL, *tmp_str;
525 	int i;
526 
527 	START_TIMER;
528 	(void) node_features_g_init();
529 	slurm_mutex_lock(&g_context_lock);
530 
531 	if (!g_context_cnt)
532 		new_value = xstrdup(new_features);
533 
534 	for (i = 0; i < g_context_cnt; i++) {
535 		if (new_value)
536 			tmp_str = xstrdup(new_value);
537 		else
538 			tmp_str = xstrdup(new_features);
539 		new_value = (*(ops[i].node_xlate2))(tmp_str);
540 		xfree(tmp_str);
541 
542 	}
543 	slurm_mutex_unlock(&g_context_lock);
544 	END_TIMER2("node_features_g_node_xlate2");
545 
546 	return new_value;
547 }
548 
549 /* Determine if the specified user can modify the currently available node
550  * features */
node_features_g_user_update(uid_t uid)551 extern bool node_features_g_user_update(uid_t uid)
552 {
553 	DEF_TIMERS;
554 	bool result = true;
555 	int i;
556 
557 	START_TIMER;
558 	(void) node_features_g_init();
559 	slurm_mutex_lock(&g_context_lock);
560 	for (i = 0; ((i < g_context_cnt) && (result == true)); i++) {
561 		result = (*(ops[i].user_update))(uid);
562 	}
563 	slurm_mutex_unlock(&g_context_lock);
564 	END_TIMER2("node_features_g_user_update");
565 
566 	return result;
567 }
568 
569 /* Return estimated reboot time, in seconds */
node_features_g_boot_time(void)570 extern uint32_t node_features_g_boot_time(void)
571 {
572 	DEF_TIMERS;
573 	uint32_t boot_time = 0;
574 	int i;
575 
576 	START_TIMER;
577 	(void) node_features_g_init();
578 	slurm_mutex_lock(&g_context_lock);
579 	for (i = 0; i < g_context_cnt; i++) {
580 		boot_time = MAX(boot_time, (*(ops[i].boot_time))());
581 	}
582 	slurm_mutex_unlock(&g_context_lock);
583 	END_TIMER2("node_features_g_user_update");
584 
585 	return boot_time;
586 }
587 
588 /* Get node features plugin configuration */
node_features_g_get_config(void)589 extern List node_features_g_get_config(void)
590 {
591 	DEF_TIMERS;
592 	int i, rc;
593 	List conf_list = NULL;
594 	config_plugin_params_t *p;
595 
596 	START_TIMER;
597 	rc = node_features_g_init();
598 
599 	if (g_context_cnt > 0)
600 		conf_list = list_create(destroy_config_plugin_params);
601 
602 	slurm_mutex_lock(&g_context_lock);
603 	for (i = 0; ((i < g_context_cnt) && (rc == SLURM_SUCCESS)); i++) {
604 		p = xmalloc(sizeof(config_plugin_params_t));
605 		p->key_pairs = list_create(destroy_config_key_pair);
606 
607 		(*(ops[i].get_config))(p);
608 
609 		if (!p->name)
610 			destroy_config_plugin_params(p);
611 		else
612 			list_append(conf_list, p);
613 	}
614 	slurm_mutex_unlock(&g_context_lock);
615 
616 	END_TIMER2("node_features_g_get_config");
617 
618 	return conf_list;
619 }
620 
621 /*
622  * Return node "weight" field if reboot required to change mode
623  */
node_features_g_reboot_weight(void)624 extern uint32_t node_features_g_reboot_weight(void)
625 {
626 	DEF_TIMERS;
627 	int weight = INFINITE - 1;
628 
629 	START_TIMER;
630 	(void) node_features_g_init();
631 	slurm_mutex_lock(&g_context_lock);
632 	if (g_context_cnt > 0)
633 		weight = (*(ops[0].reboot_weight))();
634 	slurm_mutex_unlock(&g_context_lock);
635 	END_TIMER2("node_features_g_reboot_weight");
636 
637 	return weight;
638 }
639