1 /*****************************************************************************\
2  *  switch_generic.c - Library for managing a generic switch resources.
3  *                     Can be used to optimize network communications for
4  *                     parallel jobs.
5  *****************************************************************************
6  *  Copyright (C) 2013 SchedMD LLC
7  *  Written by Morris Jette <jette@schedmd.com>
8  *
9  *  This file is part of Slurm, a resource management program.
10  *  For details, see <https://slurm.schedmd.com/>.
11  *  Please also read the included file: DISCLAIMER.
12  *
13  *  Slurm is free software; you can redistribute it and/or modify it under
14  *  the terms of the GNU General Public License as published by the Free
15  *  Software Foundation; either version 2 of the License, or (at your option)
16  *  any later version.
17  *
18  *  In addition, as a special exception, the copyright holders give permission
19  *  to link the code of portions of this program with the OpenSSL library under
20  *  certain conditions as described in each individual source file, and
21  *  distribute linked combinations including the two. You must obey the GNU
22  *  General Public License in all respects for all of the code used other than
23  *  OpenSSL. If you modify file(s) with this exception, you may extend this
24  *  exception to your version of the file(s), but you are not obligated to do
25  *  so. If you do not wish to do so, delete this exception statement from your
26  *  version.  If you delete this exception statement from all source files in
27  *  the program, then also delete it here.
28  *
29  *  Slurm is distributed in the hope that it will be useful, but WITHOUT ANY
30  *  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
31  *  FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
32  *  details.
33  *
34  *  You should have received a copy of the GNU General Public License along
35  *  with Slurm; if not, write to the Free Software Foundation, Inc.,
36  *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA.
37 \*****************************************************************************/
38 
39 #include <arpa/inet.h>
40 #include <netdb.h>
41 #include <netinet/in.h>
42 #include <signal.h>
43 #include <stdio.h>
44 #include <stdlib.h>
45 #include <string.h>
46 #include <sys/types.h>
47 #include <ifaddrs.h>
48 #include <sys/socket.h>
49 
50 /* net/if.h must come after sys/types.h on NetBSD */
51 #if !defined(__DragonFly__)
52 #include <net/if.h>
53 #endif
54 
55 #include "slurm/slurm_errno.h"
56 #include "src/common/slurm_xlator.h"
57 #include "src/common/xmalloc.h"
58 
59 #define SW_GEN_HASH_MAX		1000
60 #define SW_GEN_LIBSTATE_MAGIC	0x3b287d0c
61 #define SW_GEN_NODE_INFO_MAGIC	0x3b38ac0c
62 #define SW_GEN_STEP_INFO_MAGIC	0x58ae93cb
63 
64 /* Change GEN_STATE_VERSION value when changing the state save format */
65 #define GEN_STATE_VERSION      "NRT001"
66 
67 typedef struct sw_gen_ifa {
68 	char *ifa_name;		/* "eth0", "ib1", etc. */
69 	char *ifa_family;	/* "AF_INET" or "AF_INET6" */
70 	char *ifa_addr;		/* output from inet_ntop */
71 } sw_gen_ifa_t;
72 typedef struct sw_gen_node_info {
73 	uint32_t magic;
74 	uint16_t ifa_cnt;
75 	sw_gen_ifa_t **ifa_array;
76 	char *node_name;
77 	struct sw_gen_node_info *next;	/* used for hash table */
78 } sw_gen_node_info_t;
79 
80 typedef struct sw_gen_node {
81 	char *node_name;
82 	uint16_t ifa_cnt;
83 	sw_gen_ifa_t **ifa_array;
84 } sw_gen_node_t;
85 typedef struct sw_gen_step_info {
86 	uint32_t magic;
87 	uint32_t node_cnt;
88 	sw_gen_node_t **node_array;
89 } sw_gen_step_info_t;
90 
91 typedef struct sw_gen_libstate {
92 	uint32_t magic;
93 	uint32_t node_count;
94 	uint32_t hash_max;
95 	sw_gen_node_info_t **hash_table;
96 } sw_gen_libstate_t;
97 
98 /*
99  * These variables are required by the generic plugin interface.  If they
100  * are not found in the plugin, the plugin loader will ignore it.
101  *
102  * plugin_name - a string giving a human-readable description of the
103  * plugin.  There is no maximum length, but the symbol must refer to
104  * a valid string.
105  *
106  * plugin_type - a string suggesting the type of the plugin or its
107  * applicability to a particular form of data or method of data handling.
108  * If the low-level plugin API is used, the contents of this string are
109  * unimportant and may be anything.  Slurm uses the higher-level plugin
110  * interface which requires this string to be of the form
111  *
112  *      <application>/<method>
113  *
114  * where <application> is a description of the intended application of
115  * the plugin (e.g., "switch" for Slurm switch) and <method> is a description
116  * of how this plugin satisfies that application.  Slurm will only load
117  * a switch plugin if the plugin_type string has a prefix of "switch/".
118  *
119  * plugin_version - an unsigned 32-bit integer containing the Slurm version
120  * (major.minor.micro combined into a single number).
121  */
122 const char plugin_name[]        = "switch generic plugin";
123 const char plugin_type[]        = "switch/generic";
124 const uint32_t plugin_version   = SLURM_VERSION_NUMBER;
125 const uint32_t plugin_id	= SWITCH_PLUGIN_GENERIC;
126 
127 uint64_t debug_flags = 0;
128 pthread_mutex_t	global_lock = PTHREAD_MUTEX_INITIALIZER;
129 sw_gen_libstate_t *libstate = NULL;
130 
131 extern int switch_p_free_node_info(switch_node_info_t **switch_node);
132 extern int switch_p_alloc_node_info(switch_node_info_t **switch_node);
133 
134 static void
_alloc_libstate(void)135 _alloc_libstate(void)
136 {
137 	xassert(!libstate);
138 
139 	libstate = xmalloc(sizeof(sw_gen_libstate_t));
140 	libstate->magic = SW_GEN_LIBSTATE_MAGIC;
141 	libstate->node_count = 0;
142 	libstate->hash_max = SW_GEN_HASH_MAX;
143 	libstate->hash_table = xcalloc(libstate->hash_max,
144 				       sizeof(sw_gen_node_info_t *));
145 }
146 
147 static void
_free_libstate(void)148 _free_libstate(void)
149 {
150 	sw_gen_node_info_t *node_ptr, *next_node_ptr;
151 	int i;
152 
153 	if (!libstate)
154 		return;
155 	xassert(libstate->magic == SW_GEN_LIBSTATE_MAGIC);
156 	for (i = 0; i < libstate->hash_max; i++) {
157 		node_ptr = libstate->hash_table[i];
158 		while (node_ptr) {
159 			next_node_ptr = node_ptr->next;
160 			(void) switch_p_free_node_info((switch_node_info_t **)
161 						       &node_ptr);
162 			node_ptr = next_node_ptr;
163 		}
164 	}
165 	libstate->magic = 0;
166 	xfree(libstate->hash_table);
167 	xfree(libstate);
168 }
169 
170 /* The idea behind keeping the hash table was to avoid a linear
171  * search of the node list each time we want to retrieve or
172  * modify a node's data.  The _hash_index function translates
173  * a node name to an index into the hash table.
174  *
175  * Used by: slurmctld
176  */
177 static int
_hash_index(char * name)178 _hash_index(char *name)
179 {
180 	int index = 0;
181 	int j;
182 
183 	assert(name);
184 
185 	/* Multiply each character by its numerical position in the
186 	 * name string to add a bit of entropy, because host names such
187 	 * as cluster[0001-1000] can cause excessive index collisions.
188 	 */
189 	for (j = 1; *name; name++, j++)
190 		index += (int)*name * j;
191 	index %= libstate->hash_max;
192 
193 	return index;
194 }
195 
196 /* Tries to find a node fast using the hash table
197  *
198  * Used by: slurmctld
199  */
200 static sw_gen_node_info_t *
_find_node(char * node_name)201 _find_node(char *node_name)
202 {
203 	int i;
204 	sw_gen_node_info_t *n;
205 	node_record_t *node_ptr;
206 
207 	if (node_name == NULL) {
208 		error("%s: _find_node node name is NULL", plugin_type);
209 		return NULL;
210 	}
211 	if (libstate->node_count == 0)
212 		return NULL;
213 	xassert(libstate->magic == SW_GEN_LIBSTATE_MAGIC);
214 	if (libstate->hash_table) {
215 		i = _hash_index(node_name);
216 		n = libstate->hash_table[i];
217 		while (n) {
218 			xassert(n->magic == SW_GEN_NODE_INFO_MAGIC);
219 			if (!xstrcmp(n->node_name, node_name))
220 				return n;
221 			n = n->next;
222 		}
223 	}
224 
225 	/* This code is only needed if NodeName and NodeHostName differ */
226 	node_ptr = find_node_record(node_name);
227 	if (node_ptr && libstate->hash_table) {
228 		i = _hash_index(node_ptr->node_hostname);
229 		n = libstate->hash_table[i];
230 		while (n) {
231 			xassert(n->magic == SW_GEN_NODE_INFO_MAGIC);
232 			if (!xstrcmp(n->node_name, node_name))
233 				return n;
234 			n = n->next;
235 		}
236 	}
237 
238 	return NULL;
239 }
240 
241 /* Add the hash entry for a newly created node record */
242 static void
_hash_add_nodeinfo(sw_gen_node_info_t * new_node_info)243 _hash_add_nodeinfo(sw_gen_node_info_t *new_node_info)
244 {
245 	int index;
246 
247 	xassert(libstate);
248 	xassert(libstate->hash_table);
249 	xassert(libstate->hash_max >= libstate->node_count);
250 	xassert(libstate->magic == SW_GEN_LIBSTATE_MAGIC);
251 	if (!new_node_info->node_name || !new_node_info->node_name[0])
252 		return;
253 	index = _hash_index(new_node_info->node_name);
254 	new_node_info->next = libstate->hash_table[index];
255 	libstate->hash_table[index] = new_node_info;
256 	libstate->node_count++;
257 }
258 
259 /* Add the new node information to our libstate cache, making a copy if
260  * information is new. Otherwise, swap the data and return to the user old
261  * data, which is fine in this case since it is only deleted by slurmctld */
_cache_node_info(sw_gen_node_info_t * new_node_info)262 static void _cache_node_info(sw_gen_node_info_t *new_node_info)
263 {
264 	sw_gen_node_info_t *old_node_info;
265 	uint16_t ifa_cnt;
266 	sw_gen_ifa_t **ifa_array;
267 	struct sw_gen_node_info *next;
268 	bool new_alloc;      /* True if this is new node to be added to cache */
269 
270 	slurm_mutex_lock(&global_lock);
271 	old_node_info = _find_node(new_node_info->node_name);
272 	new_alloc = (old_node_info == NULL);
273 	if (new_alloc) {
274 		(void) switch_p_alloc_node_info((switch_node_info_t **)
275 						&old_node_info);
276 		old_node_info->node_name = xstrdup(new_node_info->node_name);
277 	}
278 
279 	/* Swap contents */
280 	ifa_cnt   = old_node_info->ifa_cnt;
281 	ifa_array = old_node_info->ifa_array;
282 	next      = old_node_info->next;
283 	old_node_info->ifa_cnt   = new_node_info->ifa_cnt;
284 	old_node_info->ifa_array = new_node_info->ifa_array;
285 	old_node_info->next      = new_node_info->next;
286 	new_node_info->ifa_cnt   = ifa_cnt;
287 	new_node_info->ifa_array = ifa_array;
288 	new_node_info->next      = next;
289 
290 	if (new_alloc)
291 		_hash_add_nodeinfo(old_node_info);
292 	slurm_mutex_unlock(&global_lock);
293 }
294 
295 /*
296  * init() is called when the plugin is loaded, before any other functions
297  * are called.  Put global initialization here.
298  */
init(void)299 int init(void)
300 {
301 	debug("%s loaded", plugin_name);
302 	debug_flags = slurm_get_debug_flags();
303 	return SLURM_SUCCESS;
304 }
305 
fini(void)306 int fini(void)
307 {
308 	slurm_mutex_lock(&global_lock);
309 	_free_libstate();
310 	slurm_mutex_unlock(&global_lock);
311 	return SLURM_SUCCESS;
312 }
313 
switch_p_reconfig(void)314 extern int switch_p_reconfig(void)
315 {
316 	debug_flags = slurm_get_debug_flags();
317 	return SLURM_SUCCESS;
318 }
319 
320 /*
321  * switch functions for global state save/restore
322  */
switch_p_libstate_save(char * dir_name)323 int switch_p_libstate_save(char * dir_name)
324 {
325 	if (debug_flags & DEBUG_FLAG_SWITCH)
326 		info("switch_p_libstate_save() starting");
327 	/* No state saved or restored for this plugin */
328 	return SLURM_SUCCESS;
329 }
330 
switch_p_libstate_restore(char * dir_name,bool recover)331 int switch_p_libstate_restore(char * dir_name, bool recover)
332 {
333 	if (debug_flags & DEBUG_FLAG_SWITCH)
334 		info("switch_p_libstate_restore() starting");
335 	/* No state saved or restored for this plugin, just initialize */
336 	slurm_mutex_lock(&global_lock);
337 	_alloc_libstate();
338 	slurm_mutex_unlock(&global_lock);
339 
340 	return SLURM_SUCCESS;
341 }
342 
switch_p_libstate_clear(void)343 int switch_p_libstate_clear(void)
344 {
345 	if (debug_flags & DEBUG_FLAG_SWITCH)
346 		info("switch_p_libstate_clear() starting");
347 	return SLURM_SUCCESS;
348 }
349 
350 /*
351  * switch functions for job step specific credential
352  */
switch_p_alloc_jobinfo(switch_jobinfo_t ** switch_job,uint32_t job_id,uint32_t step_id)353 int switch_p_alloc_jobinfo(switch_jobinfo_t **switch_job,
354 			   uint32_t job_id, uint32_t step_id)
355 {
356 	sw_gen_step_info_t *gen_step_info;
357 
358 	if (debug_flags & DEBUG_FLAG_SWITCH)
359 		info("switch_p_alloc_jobinfo() starting");
360 	xassert(switch_job);
361 	gen_step_info = xmalloc(sizeof(sw_gen_step_info_t));
362 	gen_step_info->magic = SW_GEN_STEP_INFO_MAGIC;
363 	*switch_job = (switch_jobinfo_t *) gen_step_info;
364 
365 	return SLURM_SUCCESS;
366 }
367 
switch_p_build_jobinfo(switch_jobinfo_t * switch_job,slurm_step_layout_t * step_layout,char * network)368 int switch_p_build_jobinfo(switch_jobinfo_t *switch_job,
369 			   slurm_step_layout_t *step_layout, char *network)
370 {
371 	sw_gen_step_info_t *gen_step_info = (sw_gen_step_info_t *) switch_job;
372 	sw_gen_node_info_t *gen_node_info;
373 	sw_gen_node_t *node_ptr;
374 	hostlist_t hl = NULL;
375 	hostlist_iterator_t hi;
376 	char *host = NULL;
377 	int i, j;
378 
379 	if (debug_flags & DEBUG_FLAG_SWITCH)
380 		info("switch_p_build_jobinfo() starting");
381 	xassert(gen_step_info);
382 	xassert(gen_step_info->magic == SW_GEN_STEP_INFO_MAGIC);
383 	hl = hostlist_create(step_layout->node_list);
384 	if (!hl)
385 		fatal("hostlist_create(%s): %m", step_layout->node_list);
386 	gen_step_info->node_cnt = hostlist_count(hl);
387 	gen_step_info->node_array = xcalloc(gen_step_info->node_cnt,
388 					    sizeof(sw_gen_node_t *));
389 	hi = hostlist_iterator_create(hl);
390 	for (i = 0; (host = hostlist_next(hi)); i++) {
391 		node_ptr = xmalloc(sizeof(sw_gen_node_t));
392 		gen_step_info->node_array[i] = node_ptr;
393 		node_ptr->node_name = xstrdup(host);
394 		gen_node_info = _find_node(host);
395 		if (gen_node_info) {	/* Copy node info to this step */
396 			node_ptr->ifa_cnt = gen_node_info->ifa_cnt;
397 			node_ptr->ifa_array = xcalloc(node_ptr->ifa_cnt,
398 						      sizeof(sw_gen_node_t *));
399 			for (j = 0; j < node_ptr->ifa_cnt; j++) {
400 				node_ptr->ifa_array[j] =
401 					xmalloc(sizeof(sw_gen_node_t));
402 				node_ptr->ifa_array[j]->ifa_addr = xstrdup(
403 					gen_node_info->ifa_array[j]->ifa_addr);
404 				node_ptr->ifa_array[j]->ifa_family = xstrdup(
405 					gen_node_info->ifa_array[j]->ifa_family);
406 				node_ptr->ifa_array[j]->ifa_name = xstrdup(
407 					gen_node_info->ifa_array[j]->ifa_name);
408 			}
409 		}
410 		free(host);
411 	}
412 	hostlist_iterator_destroy(hi);
413 	hostlist_destroy(hl);
414 
415 	return SLURM_SUCCESS;
416 }
417 
switch_p_duplicate_jobinfo(switch_jobinfo_t * source,switch_jobinfo_t ** dest)418 int switch_p_duplicate_jobinfo(switch_jobinfo_t *source,
419 			       switch_jobinfo_t **dest)
420 {
421 	sw_gen_step_info_t *gen_step_info;
422 
423 	if (debug_flags & DEBUG_FLAG_SWITCH)
424 		info("switch_p_alloc_jobinfo() starting");
425 	/* FIXME: If this is ever needed please flesh this out! */
426 
427 	xassert(source);
428 	switch_p_alloc_jobinfo((switch_jobinfo_t **)&gen_step_info,
429 			       NO_VAL, NO_VAL);
430 	*dest = (switch_jobinfo_t *) gen_step_info;
431 
432 	return SLURM_SUCCESS;
433 }
434 
switch_p_free_jobinfo(switch_jobinfo_t * switch_job)435 void switch_p_free_jobinfo(switch_jobinfo_t *switch_job)
436 {
437 	sw_gen_step_info_t *gen_step_info = (sw_gen_step_info_t *) switch_job;
438 	sw_gen_node_t *node_ptr;
439 	sw_gen_ifa_t *ifa_ptr;
440 	int i, j;
441 
442 	if (debug_flags & DEBUG_FLAG_SWITCH)
443 		info("switch_p_free_jobinfo() starting");
444 	xassert(gen_step_info);
445 	xassert(gen_step_info->magic == SW_GEN_STEP_INFO_MAGIC);
446 	for (i = 0; i < gen_step_info->node_cnt; i++) {
447 		node_ptr = gen_step_info->node_array[i];
448 		xfree(node_ptr->node_name);
449 		for (j = 0; j < node_ptr->ifa_cnt; j++) {
450 			ifa_ptr = node_ptr->ifa_array[j];
451 			xfree(ifa_ptr->ifa_addr);
452 			xfree(ifa_ptr->ifa_family);
453 			xfree(ifa_ptr->ifa_name);
454 			xfree(ifa_ptr);
455 		}
456 		xfree(node_ptr);
457 	}
458 	xfree(gen_step_info->node_array);
459 	xfree(gen_step_info);
460 
461 	return;
462 }
463 
switch_p_pack_jobinfo(switch_jobinfo_t * switch_job,Buf buffer,uint16_t protocol_version)464 int switch_p_pack_jobinfo(switch_jobinfo_t *switch_job, Buf buffer,
465 			  uint16_t protocol_version)
466 {
467 	sw_gen_step_info_t *gen_step_info = (sw_gen_step_info_t *) switch_job;
468 	sw_gen_node_t *node_ptr;
469 	sw_gen_ifa_t *ifa_ptr;
470 	int i, j;
471 
472 	if (debug_flags & DEBUG_FLAG_SWITCH)
473 		info("switch_p_pack_jobinfo() starting");
474 	xassert(gen_step_info);
475 	xassert(gen_step_info->magic == SW_GEN_STEP_INFO_MAGIC);
476 
477 	pack32(gen_step_info->node_cnt, buffer);
478 	for (i = 0; i < gen_step_info->node_cnt; i++) {
479 		node_ptr = gen_step_info->node_array[i];
480 		packstr(node_ptr->node_name, buffer);
481 		pack16(node_ptr->ifa_cnt, buffer);
482 		for (j = 0; j < node_ptr->ifa_cnt; j++) {
483 			ifa_ptr = node_ptr->ifa_array[j];
484 			if (debug_flags & DEBUG_FLAG_SWITCH) {
485 				info("node=%s name=%s family=%s addr=%s",
486 				     node_ptr->node_name, ifa_ptr->ifa_name,
487 				     ifa_ptr->ifa_family, ifa_ptr->ifa_addr);
488 			}
489 			packstr(ifa_ptr->ifa_addr, buffer);
490 			packstr(ifa_ptr->ifa_family, buffer);
491 			packstr(ifa_ptr->ifa_name, buffer);
492 		}
493 	}
494 
495 	return SLURM_SUCCESS;
496 }
497 
switch_p_unpack_jobinfo(switch_jobinfo_t ** switch_job,Buf buffer,uint16_t protocol_version)498 int switch_p_unpack_jobinfo(switch_jobinfo_t **switch_job, Buf buffer,
499 			    uint16_t protocol_version)
500 {
501 	sw_gen_step_info_t *gen_step_info;
502 	sw_gen_node_t *node_ptr;
503 	sw_gen_ifa_t *ifa_ptr;
504 	uint32_t uint32_tmp;
505 	int i, j;
506 
507 	switch_p_alloc_jobinfo(switch_job, 0, 0);
508 	gen_step_info = (sw_gen_step_info_t *) *switch_job;
509 
510 	if (debug_flags & DEBUG_FLAG_SWITCH)
511 		info("switch_p_unpack_jobinfo() starting");
512 	safe_unpack32(&gen_step_info->node_cnt, buffer);
513 	safe_xcalloc(gen_step_info->node_array, gen_step_info->node_cnt,
514 		     sizeof(sw_gen_node_t *));
515 	for (i = 0; i < gen_step_info->node_cnt; i++) {
516 		node_ptr = xmalloc(sizeof(sw_gen_node_t));
517 		gen_step_info->node_array[i] = node_ptr;
518 		safe_unpackstr_xmalloc(&node_ptr->node_name, &uint32_tmp,
519 				       buffer);
520 		safe_unpack16(&node_ptr->ifa_cnt, buffer);
521 		safe_xcalloc(node_ptr->ifa_array, node_ptr->ifa_cnt,
522 			     sizeof(sw_gen_ifa_t *));
523 		for (j = 0; j < node_ptr->ifa_cnt; j++) {
524 			ifa_ptr = xmalloc(sizeof(sw_gen_ifa_t));
525 			node_ptr->ifa_array[j] = ifa_ptr;
526 			safe_unpackstr_xmalloc(&ifa_ptr->ifa_addr, &uint32_tmp,
527 					       buffer);
528 			safe_unpackstr_xmalloc(&ifa_ptr->ifa_family,
529 					       &uint32_tmp, buffer);
530 			safe_unpackstr_xmalloc(&ifa_ptr->ifa_name, &uint32_tmp,
531 					       buffer);
532 			if (debug_flags & DEBUG_FLAG_SWITCH) {
533 				info("node=%s name=%s family=%s addr=%s",
534 				     node_ptr->node_name, ifa_ptr->ifa_name,
535 				     ifa_ptr->ifa_family, ifa_ptr->ifa_addr);
536 			}
537 		}
538 	}
539 
540 	return SLURM_SUCCESS;
541 
542 unpack_error:
543 
544 	switch_p_free_jobinfo((switch_jobinfo_t *)gen_step_info);
545 	*switch_job = NULL;
546 
547 	return SLURM_ERROR;
548 }
549 
switch_p_print_jobinfo(FILE * fp,switch_jobinfo_t * jobinfo)550 void switch_p_print_jobinfo(FILE *fp, switch_jobinfo_t *jobinfo)
551 {
552 	if (debug_flags & DEBUG_FLAG_SWITCH)
553 		info("switch_p_print_jobinfo() starting");
554 	return;
555 }
556 
switch_p_sprint_jobinfo(switch_jobinfo_t * switch_jobinfo,char * buf,size_t size)557 char *switch_p_sprint_jobinfo(switch_jobinfo_t *switch_jobinfo, char *buf,
558 			      size_t size)
559 {
560 	if (debug_flags & DEBUG_FLAG_SWITCH)
561 		info("switch_p_sprint_jobinfo() starting");
562 	if ((buf != NULL) && size) {
563 		buf[0] = '\0';
564 		return buf;
565 	}
566 	return NULL;
567 }
568 
569 /*
570  * switch functions for job initiation
571  */
switch_p_node_init(void)572 int switch_p_node_init(void)
573 {
574 	if (debug_flags & DEBUG_FLAG_SWITCH)
575 		info("switch_p_node_init() starting");
576 	return SLURM_SUCCESS;
577 }
578 
switch_p_node_fini(void)579 int switch_p_node_fini(void)
580 {
581 	if (debug_flags & DEBUG_FLAG_SWITCH)
582 		info("switch_p_node_fini() starting");
583 	return SLURM_SUCCESS;
584 }
585 
switch_p_job_preinit(switch_jobinfo_t * switch_job)586 int switch_p_job_preinit(switch_jobinfo_t *switch_job)
587 {
588 	sw_gen_step_info_t *gen_step_info = (sw_gen_step_info_t *) switch_job;
589 	sw_gen_node_t *node_ptr;
590 	sw_gen_ifa_t *ifa_ptr;
591 	int i, j;
592 
593 	if (debug_flags & DEBUG_FLAG_SWITCH) {
594 		info("switch_p_job_preinit() starting");
595 
596 		for (i = 0; i < gen_step_info->node_cnt; i++) {
597 			node_ptr = gen_step_info->node_array[i];
598 			for (j = 0; j < node_ptr->ifa_cnt; j++) {
599 				ifa_ptr = node_ptr->ifa_array[j];
600 				info("node=%s name=%s family=%s addr=%s",
601 				     node_ptr->node_name, ifa_ptr->ifa_name,
602 				     ifa_ptr->ifa_family, ifa_ptr->ifa_addr);
603 			}
604 		}
605 	}
606 
607 	return SLURM_SUCCESS;
608 }
609 
switch_p_job_init(stepd_step_rec_t * job)610 extern int switch_p_job_init(stepd_step_rec_t *job)
611 {
612 	if (debug_flags & DEBUG_FLAG_SWITCH)
613 		info("switch_p_job_init() starting");
614 	return SLURM_SUCCESS;
615 }
616 
switch_p_job_suspend_test(switch_jobinfo_t * jobinfo)617 extern int switch_p_job_suspend_test(switch_jobinfo_t *jobinfo)
618 {
619 	if (debug_flags & DEBUG_FLAG_SWITCH)
620 		info("switch_p_job_suspend_test() starting");
621 	return SLURM_SUCCESS;
622 }
623 
switch_p_job_suspend_info_get(switch_jobinfo_t * jobinfo,void ** suspend_info)624 extern void switch_p_job_suspend_info_get(switch_jobinfo_t *jobinfo,
625 					  void **suspend_info)
626 {
627 	if (debug_flags & DEBUG_FLAG_SWITCH)
628 		info("switch_p_job_suspend_info_get() starting");
629 	return;
630 }
631 
switch_p_job_suspend_info_pack(void * suspend_info,Buf buffer,uint16_t protocol_version)632 extern void switch_p_job_suspend_info_pack(void *suspend_info, Buf buffer,
633 					   uint16_t protocol_version)
634 {
635 	if (debug_flags & DEBUG_FLAG_SWITCH)
636 		info("switch_p_job_suspend_info_pack() starting");
637 	return;
638 }
639 
switch_p_job_suspend_info_unpack(void ** suspend_info,Buf buffer,uint16_t protocol_version)640 extern int switch_p_job_suspend_info_unpack(void **suspend_info, Buf buffer,
641 					    uint16_t protocol_version)
642 {
643 	if (debug_flags & DEBUG_FLAG_SWITCH)
644 		info("switch_p_job_suspend_info_unpack() starting");
645 	return SLURM_SUCCESS;
646 }
647 
switch_p_job_suspend_info_free(void * suspend_info)648 extern void switch_p_job_suspend_info_free(void *suspend_info)
649 {
650 	if (debug_flags & DEBUG_FLAG_SWITCH)
651 		info("switch_p_job_suspend_info_free() starting");
652 	return;
653 }
654 
switch_p_job_suspend(void * suspend_info,int max_wait)655 extern int switch_p_job_suspend(void *suspend_info, int max_wait)
656 {
657 	if (debug_flags & DEBUG_FLAG_SWITCH)
658 		info("switch_p_job_suspend() starting");
659 	return SLURM_SUCCESS;
660 }
661 
switch_p_job_resume(void * suspend_info,int max_wait)662 extern int switch_p_job_resume(void *suspend_info, int max_wait)
663 {
664 	if (debug_flags & DEBUG_FLAG_SWITCH)
665 		info("switch_p_job_resume() starting");
666 	return SLURM_SUCCESS;
667 }
668 
switch_p_job_fini(switch_jobinfo_t * jobinfo)669 int switch_p_job_fini(switch_jobinfo_t *jobinfo)
670 {
671 	if (debug_flags & DEBUG_FLAG_SWITCH)
672 		info("switch_p_job_fini() starting");
673 	return SLURM_SUCCESS;
674 }
675 
switch_p_job_postfini(stepd_step_rec_t * job)676 int switch_p_job_postfini(stepd_step_rec_t *job)
677 {
678 	uid_t pgid = job->jmgr_pid;
679 	if (debug_flags & DEBUG_FLAG_SWITCH)
680 		info("switch_p_job_postfini() starting");
681 	/*
682 	 *  Kill all processes in the job's session
683 	 */
684 	if (pgid) {
685 		debug2("Sending SIGKILL to pgid %lu",
686 			(unsigned long) pgid);
687 		kill(-pgid, SIGKILL);
688 	} else
689 		debug("Job %u.%u: Bad pid valud %lu", job->jobid,
690 		      job->stepid, (unsigned long) pgid);
691 
692 	return SLURM_SUCCESS;
693 }
694 
switch_p_job_attach(switch_jobinfo_t * jobinfo,char *** env,uint32_t nodeid,uint32_t procid,uint32_t nnodes,uint32_t nprocs,uint32_t rank)695 int switch_p_job_attach(switch_jobinfo_t *jobinfo, char ***env,
696 			uint32_t nodeid, uint32_t procid, uint32_t nnodes,
697 			uint32_t nprocs, uint32_t rank)
698 {
699 	if (debug_flags & DEBUG_FLAG_SWITCH)
700 		info("switch_p_job_attach() starting");
701 	return SLURM_SUCCESS;
702 }
703 
704 /*
705  * Allocates network information in resulting_data with xmalloc
706  * String result of format : (nodename,(iface,IP_V{4,6},address)*)
707  */
switch_p_get_jobinfo(switch_jobinfo_t * switch_job,int key,void * resulting_data)708 extern int switch_p_get_jobinfo(switch_jobinfo_t *switch_job,
709 								int key, void *resulting_data)
710 {
711 	int node_id = key;
712 	sw_gen_step_info_t *stepinfo = (sw_gen_step_info_t*) switch_job;
713 	sw_gen_node_t *node_ptr = stepinfo->node_array[node_id];
714 	sw_gen_ifa_t *ifa_ptr;
715 	int i, s;
716 	int bufsize = 1024;
717 	char *buf;
718 
719 #if defined(__DragonFly__)
720 #define IFNAMSIZ 16
721 #endif
722 	int triplet_len_max = IFNAMSIZ + INET6_ADDRSTRLEN + 5 + 5 + 1;
723 
724 	if (debug_flags & DEBUG_FLAG_SWITCH)
725 		info("switch_p_get_jobinfo() starting");
726 
727 	if (!resulting_data) {
728 		error("no pointer for resulting_data");
729 		return SLURM_ERROR;
730 	}
731 
732 	*(char **) resulting_data = NULL;
733 
734 	if (node_id < 0 || node_id >= stepinfo->node_cnt) {
735 		error("node_id out of range");
736 		return SLURM_ERROR;
737 	}
738 
739 	buf = xmalloc(bufsize);
740 	s = snprintf(buf, bufsize, "(%s", node_ptr->node_name);
741 	/* appends in buf triplets (ifname,ipversion,address) */
742 	for (i = 0; i < node_ptr->ifa_cnt; i++) {
743 		ifa_ptr = node_ptr->ifa_array[i];
744 		if (s + triplet_len_max > bufsize) {
745 			bufsize *= 2;
746 			xrealloc(buf, bufsize);
747 		}
748 		s += snprintf(buf+s, bufsize-s, ",(%s,%s,%s)",
749 			      ifa_ptr->ifa_name, ifa_ptr->ifa_family,
750 			      ifa_ptr->ifa_addr);
751 	}
752 	snprintf(buf+s, bufsize-s, ")");
753 
754 	*(char **)resulting_data = buf; /* return x-alloc'ed data */
755 
756 	return SLURM_SUCCESS;
757 }
758 
759 /*
760  * node switch state monitoring functions
761  * required for IBM Federation switch
762  */
switch_p_clear_node_state(void)763 extern int switch_p_clear_node_state(void)
764 {
765 	if (debug_flags & DEBUG_FLAG_SWITCH)
766 		info("switch_p_clear_node_state() starting");
767 	return SLURM_SUCCESS;
768 }
769 
switch_p_alloc_node_info(switch_node_info_t ** switch_node)770 extern int switch_p_alloc_node_info(switch_node_info_t **switch_node)
771 {
772 	sw_gen_node_info_t *gen_node_info;
773 
774 	if (debug_flags & DEBUG_FLAG_SWITCH)
775 		info("switch_p_alloc_node_info() starting");
776 	xassert(switch_node);
777 	gen_node_info = xmalloc(sizeof(sw_gen_node_info_t));
778 	gen_node_info->magic = SW_GEN_NODE_INFO_MAGIC;
779 	*switch_node = (switch_node_info_t *) gen_node_info;
780 
781 	return SLURM_SUCCESS;
782 }
783 
switch_p_build_node_info(switch_node_info_t * switch_node)784 extern int switch_p_build_node_info(switch_node_info_t *switch_node)
785 {
786 	sw_gen_node_info_t *gen_node_info = (sw_gen_node_info_t *) switch_node;
787 	struct ifaddrs *if_array = NULL, *if_rec;
788 	sw_gen_ifa_t *ifa_ptr;
789 	void *addr_ptr = NULL;
790 	char addr_str[INET6_ADDRSTRLEN], *ip_family;
791 	char hostname[256], *tmp;
792 
793 	if (debug_flags & DEBUG_FLAG_SWITCH)
794 		info("switch_p_build_node_info() starting");
795 	xassert(gen_node_info);
796 	xassert(gen_node_info->magic == SW_GEN_NODE_INFO_MAGIC);
797 	if (gethostname(hostname, sizeof(hostname)) < 0)
798 		return SLURM_ERROR;
799 	/* remove the domain portion, if necessary */
800 	tmp = strstr(hostname, ".");
801 	if (tmp)
802 		*tmp = '\0';
803 	gen_node_info->node_name = xstrdup(hostname);
804 	if (getifaddrs(&if_array) == 0) {
805 		for (if_rec = if_array; if_rec; if_rec = if_rec->ifa_next) {
806 #if !defined(__DragonFly__)
807 	   		if (if_rec->ifa_flags & IFF_LOOPBACK)
808 				continue;
809 #endif
810 			if (if_rec->ifa_addr->sa_family == AF_INET) {
811 				addr_ptr = &((struct sockaddr_in *)
812 						if_rec->ifa_addr)->sin_addr;
813 				ip_family = "IP_V4";
814 			} else if (if_rec->ifa_addr->sa_family == AF_INET6) {
815 				addr_ptr = &((struct sockaddr_in6 *)
816 						if_rec->ifa_addr)->sin6_addr;
817 				ip_family = "IP_V6";
818 			} else {
819 				/* AF_PACKET (statistics) and others ignored */
820 				continue;
821 			}
822 			(void) inet_ntop(if_rec->ifa_addr->sa_family,
823 					 addr_ptr, addr_str, sizeof(addr_str));
824 			xrealloc(gen_node_info->ifa_array,
825 				 sizeof(sw_gen_ifa_t *) *
826 				        (gen_node_info->ifa_cnt + 1));
827 			ifa_ptr = xmalloc(sizeof(sw_gen_ifa_t));
828 			ifa_ptr->ifa_addr   = xstrdup(addr_str);
829 			ifa_ptr->ifa_family = xstrdup(ip_family);
830 			ifa_ptr->ifa_name   = xstrdup(if_rec->ifa_name);
831 			gen_node_info->ifa_array[gen_node_info->ifa_cnt++] =
832 				ifa_ptr;
833 			if (debug_flags & DEBUG_FLAG_SWITCH) {
834 				info("%s: name=%s ip_family=%s address=%s",
835 				     plugin_type, if_rec->ifa_name, ip_family,
836 				     addr_str);
837 			}
838 		}
839 	}
840 	freeifaddrs(if_array);
841 
842 	return SLURM_SUCCESS;
843 }
844 
switch_p_pack_node_info(switch_node_info_t * switch_node,Buf buffer,uint16_t protocol_version)845 extern int switch_p_pack_node_info(switch_node_info_t *switch_node,
846 				   Buf buffer, uint16_t protocol_version)
847 {
848 	sw_gen_node_info_t *gen_node_info = (sw_gen_node_info_t *) switch_node;
849 	sw_gen_ifa_t *ifa_ptr;
850 	int i;
851 
852 	if (debug_flags & DEBUG_FLAG_SWITCH)
853 		info("switch_p_pack_node_info() starting");
854 	xassert(gen_node_info);
855 	xassert(gen_node_info->magic == SW_GEN_NODE_INFO_MAGIC);
856 	pack16(gen_node_info->ifa_cnt, buffer);
857 	packstr(gen_node_info->node_name,    buffer);
858 	for (i = 0; i < gen_node_info->ifa_cnt; i++) {
859 		ifa_ptr = gen_node_info->ifa_array[i];
860 		packstr(ifa_ptr->ifa_addr,   buffer);
861 		packstr(ifa_ptr->ifa_family, buffer);
862 		packstr(ifa_ptr->ifa_name,   buffer);
863 	}
864 
865 	return SLURM_SUCCESS;
866 }
867 
switch_p_free_node_info(switch_node_info_t ** switch_node)868 extern int switch_p_free_node_info(switch_node_info_t **switch_node)
869 {
870 	sw_gen_node_info_t *gen_node_info = (sw_gen_node_info_t *) *switch_node;
871 	int i;
872 
873 	if (debug_flags & DEBUG_FLAG_SWITCH)
874 		info("switch_p_free_node_info() starting");
875 	xassert(gen_node_info);
876 	xassert(gen_node_info->magic == SW_GEN_NODE_INFO_MAGIC);
877 	for (i = 0; i < gen_node_info->ifa_cnt; i++) {
878 		xfree(gen_node_info->ifa_array[i]->ifa_addr);
879 		xfree(gen_node_info->ifa_array[i]->ifa_family);
880 		xfree(gen_node_info->ifa_array[i]->ifa_name);
881 		xfree(gen_node_info->ifa_array[i]);
882 	}
883 	xfree(gen_node_info->ifa_array);
884 	xfree(gen_node_info->node_name);
885 	xfree(gen_node_info);
886 
887 	return SLURM_SUCCESS;
888 }
889 
switch_p_unpack_node_info(switch_node_info_t ** switch_node,Buf buffer,uint16_t protocol_version)890 extern int switch_p_unpack_node_info(switch_node_info_t **switch_node,
891 				     Buf buffer, uint16_t protocol_version)
892 {
893 	sw_gen_node_info_t *gen_node_info;
894 	sw_gen_ifa_t *ifa_ptr;
895 	uint32_t uint32_tmp;
896 	int i;
897 
898 	if (debug_flags & DEBUG_FLAG_SWITCH)
899 		info("switch_p_unpack_node_info() starting");
900 
901 	switch_p_alloc_node_info(switch_node);
902 	gen_node_info = (sw_gen_node_info_t *) *switch_node;
903 
904 	safe_unpack16(&gen_node_info->ifa_cnt, buffer);
905 	safe_xcalloc(gen_node_info->ifa_array, gen_node_info->ifa_cnt,
906 		     sizeof(sw_gen_ifa_t *));
907 	safe_unpackstr_xmalloc(&gen_node_info->node_name, &uint32_tmp,
908 			       buffer);
909 	for (i = 0; i < gen_node_info->ifa_cnt; i++) {
910 		ifa_ptr = xmalloc(sizeof(sw_gen_ifa_t));
911 		gen_node_info->ifa_array[i] = ifa_ptr;
912 		safe_unpackstr_xmalloc(&ifa_ptr->ifa_addr, &uint32_tmp, buffer);
913 		safe_unpackstr_xmalloc(&ifa_ptr->ifa_family, &uint32_tmp,
914 				       buffer);
915 		safe_unpackstr_xmalloc(&ifa_ptr->ifa_name, &uint32_tmp, buffer);
916 		if (debug_flags & DEBUG_FLAG_SWITCH) {
917 			info("%s: node=%s name=%s ip_family=%s address=%s",
918 			     plugin_type, gen_node_info->node_name,
919 			     ifa_ptr->ifa_name, ifa_ptr->ifa_family,
920 			     ifa_ptr->ifa_addr);
921 		}
922 	}
923 
924 	_cache_node_info(gen_node_info);
925 
926 	return SLURM_SUCCESS;
927 
928 unpack_error:
929 
930 	switch_p_free_node_info(switch_node);
931 
932 	return SLURM_ERROR;
933 }
934 
switch_p_sprintf_node_info(switch_node_info_t * switch_node,char * buf,size_t size)935 extern char *switch_p_sprintf_node_info(switch_node_info_t *switch_node,
936 				        char *buf, size_t size)
937 {
938 	if (debug_flags & DEBUG_FLAG_SWITCH)
939 		info("switch_p_sprintf_node_info() starting");
940 
941 	if ((buf != NULL) && size) {
942 		buf[0] = '\0';
943 		return buf;
944 	}
945 	/* Incomplete */
946 
947 	return NULL;
948 }
949 
switch_p_job_step_complete(switch_jobinfo_t * jobinfo,char * nodelist)950 extern int switch_p_job_step_complete(switch_jobinfo_t *jobinfo,
951 				      char *nodelist)
952 {
953 	if (debug_flags & DEBUG_FLAG_SWITCH)
954 		info("switch_p_job_step_complete() starting");
955 	return SLURM_SUCCESS;
956 }
957 
switch_p_job_step_part_comp(switch_jobinfo_t * jobinfo,char * nodelist)958 extern int switch_p_job_step_part_comp(switch_jobinfo_t *jobinfo,
959 				       char *nodelist)
960 {
961 	if (debug_flags & DEBUG_FLAG_SWITCH)
962 		info("switch_p_job_step_part_comp() starting");
963 	return SLURM_SUCCESS;
964 }
965 
switch_p_part_comp(void)966 extern bool switch_p_part_comp(void)
967 {
968 	if (debug_flags & DEBUG_FLAG_SWITCH)
969 		info("switch_p_part_comp() starting");
970 	return false;
971 }
972 
switch_p_job_step_allocated(switch_jobinfo_t * jobinfo,char * nodelist)973 extern int switch_p_job_step_allocated(switch_jobinfo_t *jobinfo,
974 				       char *nodelist)
975 {
976 	if (debug_flags & DEBUG_FLAG_SWITCH)
977 		info("switch_p_job_step_allocated() starting");
978 	return SLURM_SUCCESS;
979 }
980 
switch_p_slurmctld_init(void)981 extern int switch_p_slurmctld_init(void)
982 {
983 	if (debug_flags & DEBUG_FLAG_SWITCH)
984 		info("switch_p_slurmctld_init() starting");
985 	return SLURM_SUCCESS;
986 }
987 
switch_p_slurmd_init(void)988 extern int switch_p_slurmd_init(void)
989 {
990 	if (debug_flags & DEBUG_FLAG_SWITCH)
991 		info("switch_p_slurmd_init() starting");
992 	return SLURM_SUCCESS;
993 }
994 
switch_p_slurmd_step_init(void)995 extern int switch_p_slurmd_step_init(void)
996 {
997 	if (debug_flags & DEBUG_FLAG_SWITCH)
998 		info("switch_p_slurmd_step_init() starting");
999 	return SLURM_SUCCESS;
1000 }
1001 
switch_p_job_step_pre_suspend(stepd_step_rec_t * job)1002 extern int switch_p_job_step_pre_suspend(stepd_step_rec_t *job)
1003 {
1004 	if (debug_flags & DEBUG_FLAG_SWITCH)
1005 		info("switch_p_job_step_pre_suspend() starting");
1006 	return SLURM_SUCCESS;
1007 }
1008 
switch_p_job_step_post_suspend(stepd_step_rec_t * job)1009 extern int switch_p_job_step_post_suspend(stepd_step_rec_t *job)
1010 {
1011 	if (debug_flags & DEBUG_FLAG_SWITCH)
1012 		info("switch_p_job_step_post_suspend() starting");
1013 	return SLURM_SUCCESS;
1014 }
1015 
switch_p_job_step_pre_resume(stepd_step_rec_t * job)1016 extern int switch_p_job_step_pre_resume(stepd_step_rec_t *job)
1017 {
1018 	if (debug_flags & DEBUG_FLAG_SWITCH)
1019 		info("switch_p_job_step_pre_resume() starting");
1020 	return SLURM_SUCCESS;
1021 }
1022 
switch_p_job_step_post_resume(stepd_step_rec_t * job)1023 extern int switch_p_job_step_post_resume(stepd_step_rec_t *job)
1024 {
1025 	if (debug_flags & DEBUG_FLAG_SWITCH)
1026 		info("switch_p_job_step_post_resume() starting");
1027 	return SLURM_SUCCESS;
1028 }
1029