1 /*****************************************************************************\
2  *  gres.h - driver for gres plugin
3  *****************************************************************************
4  *  Copyright (C) 2010 Lawrence Livermore National Security.
5  *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
6  *  Written by Morris Jette <jette1@llnl.gov>
7  *  CODE-OCEC-09-009. All rights reserved.
8  *
9  *  This file is part of Slurm, a resource management program.
10  *  For details, see <https://slurm.schedmd.com/>.
11  *  Please also read the included file: DISCLAIMER.
12  *
13  *  Slurm is free software; you can redistribute it and/or modify it under
14  *  the terms of the GNU General Public License as published by the Free
15  *  Software Foundation; either version 2 of the License, or (at your option)
16  *  any later version.
17  *
18  *  In addition, as a special exception, the copyright holders give permission
19  *  to link the code of portions of this program with the OpenSSL library under
20  *  certain conditions as described in each individual source file, and
21  *  distribute linked combinations including the two. You must obey the GNU
22  *  General Public License in all respects for all of the code used other than
23  *  OpenSSL. If you modify file(s) with this exception, you may extend this
24  *  exception to your version of the file(s), but you are not obligated to do
25  *  so. If you do not wish to do so, delete this exception statement from your
26  *  version.  If you delete this exception statement from all source files in
27  *  the program, then also delete it here.
28  *
29  *  Slurm is distributed in the hope that it will be useful, but WITHOUT ANY
30  *  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
31  *  FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
32  *  details.
33  *
34  *  You should have received a copy of the GNU General Public License along
35  *  with Slurm; if not, write to the Free Software Foundation, Inc.,
36  *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA.
37 \*****************************************************************************/
38 
39 #ifndef _GRES_H
40 #define _GRES_H
41 
42 #include "slurm/slurm.h"
43 #include "slurm/slurmdb.h"
44 #include "src/common/bitstring.h"
45 #include "src/common/job_resources.h"
46 #include "src/common/node_conf.h"
47 #include "src/common/pack.h"
48 
49 #define GRES_MAGIC 0x438a34d4
50 #define GRES_MAX_LINK 1023
51 
52 enum {
53 	GRES_VAL_TYPE_FOUND  = 0,
54 	GRES_VAL_TYPE_CONFIG = 1,
55 	GRES_VAL_TYPE_AVAIL  = 2,
56 	GRES_VAL_TYPE_ALLOC  = 3
57 };
58 
59 typedef struct {
60 	int alloc;
61 	int dev_num;
62 	char *major;
63 	char *path;
64 } gres_device_t;
65 
66 #define GRES_CONF_HAS_FILE	0x02	/* File= is configured */
67 #define GRES_CONF_HAS_TYPE	0x04	/* Type= is configured */
68 #define GRES_CONF_COUNT_ONLY	0x08	/* GRES lacks plugin to load */
69 #define GRES_CONF_LOADED        0x10    /* used to avoid loading a plugin
70 					 * multiple times */
71 
72 #define GRES_NO_CONSUME		0x0001	/* Requesting no consume of resources */
73 
74 /* GRES AutoDetect options */
75 #define GRES_AUTODETECT_NONE    0x00000000
76 #define GRES_AUTODETECT_NVML    0x00000001
77 #define GRES_AUTODETECT_RSMI    0x00000002
78 
79 /* Gres state information gathered by slurmd daemon */
80 typedef struct gres_slurmd_conf {
81 	uint8_t config_flags;	/* See GRES_CONF_* values above */
82 
83 	/* Count of gres available in this configuration record */
84 	uint64_t count;
85 
86 	/* Specific CPUs associated with this configuration record */
87 	uint32_t cpu_cnt;
88 	char *cpus;
89 	bitstr_t *cpus_bitmap;	/* Using LOCAL mapping */
90 
91 	/* Device file associated with this configuration record */
92 	char *file;
93 
94 	/* Comma-separated list of communication link IDs (numbers) */
95 	char *links;
96 
97 	/* Name of this gres */
98 	char *name;
99 
100 	/* Type of this GRES (e.g. model name) */
101 	char *type_name;
102 
103 	/* GRES ID number */
104 	uint32_t plugin_id;
105 } gres_slurmd_conf_t;
106 
107 
108 /* Extra data and functions to be passed in to the node_config_load() */
109 typedef struct node_config_load {
110 	/* How many CPUs there are configured on the node */
111 	uint32_t cpu_cnt;
112 	/* A pointer to the mac_to_abs function */
113 	int (*xcpuinfo_mac_to_abs) (char *mac, char **abs);
114 } node_config_load_t;
115 
116 /* Current GRES state information managed by slurmctld daemon */
117 typedef struct gres_node_state {
118 	/* Actual hardware found */
119 	uint64_t gres_cnt_found;
120 
121 	/* Configured resources via "Gres" parameter */
122 	uint64_t gres_cnt_config;
123 
124 	/* Non-consumable: Do not track resources allocated to jobs */
125 	bool no_consume;
126 
127 	/* True if set by node_feature plugin, ignore info from compute node */
128 	bool node_feature;
129 
130 	/*
131 	 * Total resources available for allocation to jobs.
132 	 * gres_cnt_found or gres_cnt_config, depending upon config_overrides
133 	 */
134 	uint64_t gres_cnt_avail;
135 
136 	/* List of GRES in current use. Set NULL if needs to be rebuilt. */
137 	char *gres_used;
138 
139 	/* Resources currently allocated to jobs */
140 	uint64_t  gres_cnt_alloc;
141 	bitstr_t *gres_bit_alloc;	/* If gres.conf contains File field */
142 
143 	/*
144 	 * Topology specific information. In the case of gres/mps, there is one
145 	 * topo record per file (GPU) and the size of the GRES bitmaps (i.e.
146 	 * gres_bit_alloc and topo_gres_bitmap[#]) is equal to the number of
147 	 * GPUs on the node while the count is a site-configurable value.
148 	 */
149 	uint16_t topo_cnt;		/* Size of topo_ arrays */
150 	int link_len;			/* Size of link_cnt */
151 	int **links_cnt;		/* Count of links between GRES */
152 	bitstr_t **topo_core_bitmap;
153 	bitstr_t **topo_gres_bitmap;
154 	uint64_t *topo_gres_cnt_alloc;
155 	uint64_t *topo_gres_cnt_avail;
156 	uint32_t *topo_type_id;		/* GRES type (e.g. model ID) */
157 	char **topo_type_name;		/* GRES type (e.g. model name) */
158 
159 	/*
160 	 * GRES type specific information (if gres.conf contains type option)
161 	 *
162 	 * NOTE: If a job requests GRES without a type specification, these
163 	 * type_cnt_alloc will not be incremented. Only the gres_cnt_alloc
164 	 * will be incremented.
165 	 */
166 	uint16_t type_cnt;		/* Size of type_ arrays */
167 	uint64_t *type_cnt_alloc;
168 	uint64_t *type_cnt_avail;
169 	uint32_t *type_id;		/* GRES type (e.g. model ID) */
170 	char **type_name;		/* GRES type (e.g. model name) */
171 } gres_node_state_t;
172 
173 /* Gres job state as used by slurmctld daemon */
174 typedef struct gres_job_state {
175 	char *gres_name;		/* GRES name (e.g. "gpu") */
176 	uint32_t type_id;		/* GRES type (e.g. model ID) */
177 	char *type_name;		/* GRES type (e.g. model name) */
178 	uint16_t flags;			/* GRES_NO_CONSUME, etc. */
179 
180 	/* Count of required GRES resources plus associated CPUs and memory */
181 	uint16_t cpus_per_gres;
182 	uint64_t gres_per_job;
183 	uint64_t gres_per_node;
184 	uint64_t gres_per_socket;
185 	uint64_t gres_per_task;
186 	uint64_t mem_per_gres;
187 
188 	/*
189 	 * Default GRES configuration parameters. These values are subject to
190 	 * change depending upon which partition the job is currently being
191 	 * considered for scheduling in.
192 	 */
193 	uint16_t def_cpus_per_gres;
194 	uint64_t def_mem_per_gres;
195 
196 	/*
197 	 * Selected resource details. One entry per node on the cluster.
198 	 * Used by select/cons_tres to identify which resources would be
199 	 * allocated on a node IF that node is included in the job allocation.
200 	 * Once specific nodes are selected for the job allocation, select
201 	 * portions of these arrays are copied to gres_bit_alloc and
202 	 * gres_cnt_node_alloc. The fields can then be cleared.
203 	 */
204 	uint32_t total_node_cnt;	/* cluster total node count */
205 	bitstr_t **gres_bit_select;	/* Per node GRES selected,
206 					 * Used with GRES files */
207 	uint64_t *gres_cnt_node_select;	/* Per node GRES selected,
208 					 * Used without GRES files */
209 
210 	/* Allocated resources details */
211 	uint64_t total_gres;		/* Count of allocated GRES to job */
212 	uint32_t node_cnt;		/* 0 if no_consume */
213 	bitstr_t **gres_bit_alloc;	/* Per node GRES allocated,
214 					 * Used with GRES files */
215 	uint64_t *gres_cnt_node_alloc;	/* Per node GRES allocated,
216 					 * Used with and without GRES files */
217 
218 	/*
219 	 * Resources currently allocated to job steps on each node.
220 	 * This will be a subset of resources allocated to the job.
221 	 * gres_bit_step_alloc is a subset of gres_bit_alloc
222 	 */
223 	bitstr_t **gres_bit_step_alloc;
224 	uint64_t  *gres_cnt_step_alloc;
225 } gres_job_state_t;
226 
227 /* Used to set Prolog and Epilog env var. Currently designed for gres/mps. */
228 typedef struct gres_epilog_info {
229 	uint32_t plugin_id;	/* GRES ID number */
230 	uint32_t node_cnt;	/* Count of all hosts allocated to job */
231 	char *node_list;	/* List of all hosts allocated to job */
232 	bitstr_t **gres_bit_alloc; /* Per-node bitmap of allocated resources */
233 	uint64_t *gres_cnt_node_alloc;	/* Per node GRES allocated,
234 					 * Used with and without GRES files */
235 } gres_epilog_info_t;
236 
237 /* Gres job step state as used by slurmctld daemon */
238 typedef struct gres_step_state {
239 	uint32_t type_id;		/* GRES type (e.g. model ID) */
240 	char *type_name;		/* GRES type (e.g. model name) */
241 	uint16_t flags;			/* GRES_NO_CONSUME, etc. */
242 
243 	/* Count of required GRES resources plus associated CPUs and memory */
244 	uint16_t cpus_per_gres;
245 	uint64_t gres_per_step;
246 	uint64_t gres_per_node;
247 	uint64_t gres_per_socket;
248 	uint64_t gres_per_task;
249 	uint64_t mem_per_gres;
250 
251 	/*
252 	 * Allocated resources details
253 	 *
254 	 * NOTE: node_cnt and the size of node_in_use and gres_bit_alloc are
255 	 * identical to that of the job for simplicity. Bits in node_in_use
256 	 * are set for those node of the job that are used by this step and
257 	 * gres_bit_alloc are also set if the job's gres_bit_alloc is set
258 	 */
259 	uint64_t total_gres;		/* allocated GRES for this step */
260 	uint64_t gross_gres;		/* used during the scheduling phase,
261 					 * GRES that could be available for this
262 					 * step if no other steps active */
263 	uint64_t *gres_cnt_node_alloc;	/* Per node GRES allocated,
264 					 * Used without GRES files */
265 	uint32_t node_cnt;
266 	bitstr_t *node_in_use;
267 	bitstr_t **gres_bit_alloc;	/* Used with GRES files */
268 } gres_step_state_t;
269 
270 /* Per-socket GRES availability information for scheduling purposes */
271 typedef struct sock_gres {	/* GRES availability by socket */
272 	bitstr_t *bits_any_sock;/* Per-socket GRES bitmap of this name & type */
273 	bitstr_t **bits_by_sock;/* Per-socket GRES bitmap of this name & type */
274 	uint64_t cnt_any_sock;	/* GRES count unconstrained by cores */
275 	uint64_t *cnt_by_sock;	/* Per-socket GRES count of this name & type */
276 	char *gres_name;	/* GRES name */
277 	gres_job_state_t *job_specs;	/* Pointer to job info, for limits */
278 	uint64_t max_node_gres;	/* Maximum GRES permitted on this node */
279 	gres_node_state_t *node_specs;	/* Pointer to node info, for state */
280 	uint32_t plugin_id;	/* Plugin ID (for quick search) */
281 	int sock_cnt;		/* Socket count, size of bits_by_sock and
282 				 * cnt_by_sock arrays */
283 	uint64_t total_cnt;	/* Total GRES count of this name & type */
284 	uint32_t type_id;	/* GRES type (e.g. model ID) */
285 	char *type_name;	/* GRES type (e.g. model name) */
286 } sock_gres_t;
287 
288 /* Similar to multi_core_data_t in slurm_protocol_defs.h */
289 typedef struct gres_mc_data {
290 	uint16_t boards_per_node;   /* boards per node required by job */
291 	uint16_t sockets_per_board; /* sockets per board required by job */
292 	uint16_t sockets_per_node;  /* sockets per node required by job */
293 	uint16_t cores_per_socket;  /* cores per cpu required by job */
294 	uint16_t threads_per_core;  /* threads per core required by job */
295 
296 	uint16_t cpus_per_task;     /* Count of CPUs per task */
297 	uint32_t ntasks_per_job;    /* number of tasks to invoke for job or NO_VAL */
298 	uint16_t ntasks_per_node;   /* number of tasks to invoke on each node */
299 	uint16_t ntasks_per_board;  /* number of tasks to invoke on each board */
300 	uint16_t ntasks_per_socket; /* number of tasks to invoke on each socket */
301 	uint16_t ntasks_per_core;   /* number of tasks to invoke on each core */
302 	uint8_t overcommit;         /* processors being over subscribed */
303 	uint16_t plane_size;        /* plane size for SLURM_DIST_PLANE */
304 	uint32_t task_dist;         /* task distribution directives */
305 	uint8_t whole_node;         /* allocate entire node */
306 } gres_mc_data_t;
307 
308 typedef enum {
309 	GRES_STATE_TYPE_NODE = 0,
310 	GRES_STATE_TYPE_JOB,
311 	GRES_STATE_TYPE_STEP
312 } gres_state_type_enum_t;
313 
314 /*
315  * Initialize the GRES plugins.
316  *
317  * Returns a Slurm errno.
318  */
319 extern int gres_plugin_init(void);
320 
321 /*
322  * Terminate the GRES plugins. Free memory.
323  *
324  * Returns a Slurm errno.
325  */
326 extern int gres_plugin_fini(void);
327 
328 /*
329  **************************************************************************
330  *                          P L U G I N   C A L L S                       *
331  **************************************************************************
332  */
333 
334 /*
335  * Perform reconfig, re-read any configuration files
336  * OUT did_change - set if gres configuration changed
337  */
338 extern int gres_plugin_reconfig(void);
339 
340 /*
341  * Return a plugin-specific help message for salloc, sbatch and srun
342  * Result must be xfree()'d
343  */
344 extern char *gres_plugin_help_msg(void);
345 
346 /*
347  * Convert a GRES name or model into a number for faster comparison operations
348  * IN name - GRES name or model
349  * RET - An int representing a custom hash of the name
350  */
351 extern uint32_t gres_plugin_build_id(char *name);
352 
353 /*
354  * Takes a GRES config line (typically from slurm.conf) and remove any
355  * records for GRES which are not defined in GresTypes.
356  * RET string of valid GRES, Release memory using xfree()
357  */
358 extern char *gres_plugin_name_filter(char *orig_gres, char *nodes);
359 
360 /*
361  **************************************************************************
362  *                 PLUGIN CALLS FOR SLURMD DAEMON                         *
363  **************************************************************************
364  */
365 /*
366  * Load this node's configuration (how many resources it has, topology, etc.)
367  * IN cpu_cnt - Number of CPUs configured on this node
368  * IN node_name - Name of this node
369  * IN gres_list - Node's GRES information as loaded from slurm.conf by slurmd
370  * IN xcpuinfo_abs_to_mac - Pointer to xcpuinfo_abs_to_mac() funct, if available
371  * IN xcpuinfo_mac_to_abs - Pointer to xcpuinfo_mac_to_abs() funct, if available
372  * NOTE: Called from slurmd and slurmstepd
373  */
374 extern int gres_plugin_node_config_load(uint32_t cpu_cnt, char *node_name,
375 					List gres_list,
376 					void *xcpuinfo_abs_to_mac,
377 					void *xcpuinfo_mac_to_abs);
378 
379 /*
380  * Pack this node's gres configuration into a buffer
381  * IN/OUT buffer - message buffer to pack
382  */
383 extern int gres_plugin_node_config_pack(Buf buffer);
384 
385 /*
386  * Set GRES devices as allocated or not for a particular job
387  * IN gres_list - allocated gres devices
388  * IN is_job - if is job function expects gres_job_state_t's else
389  *             gres_step_state_t's
390  * RET - List of gres_device_t containing all devices from all GRES with alloc
391  *       set correctly if the device is allocated to the job/step.
392  */
393 extern List gres_plugin_get_allocated_devices(List gres_list, bool is_job);
394 
395 /* Send GRES information to slurmstepd on the specified file descriptor */
396 extern void gres_plugin_send_stepd(int fd);
397 
398 /* Receive GRES information from slurmd on the specified file descriptor */
399 extern void gres_plugin_recv_stepd(int fd);
400 
401 /*
402  **************************************************************************
403  *                 PLUGIN CALLS FOR SLURMCTLD DAEMON                      *
404  **************************************************************************
405  */
406 /*
407  * Build a node's gres record based only upon the slurm.conf contents
408  * IN node_name - name of the node for which the gres information applies
409  * IN orig_config - Gres information supplied from slurm.conf
410  * IN/OUT gres_list - List of Gres records for this node to track usage
411  */
412 extern int gres_plugin_init_node_config(char *node_name, char *orig_config,
413 					List *gres_list);
414 
415 /*
416  * Return how many gres Names are on the system.
417  */
418 extern int gres_plugin_get_gres_cnt(void);
419 
420 /* Add a GRES record. This is used by the node_features plugin after the
421  * slurm.conf file is read and the initial GRES records are built by
422  * gres_plugin_init(). */
423 extern void gres_plugin_add(char *gres_name);
424 
425 /*
426  * Unpack this node's configuration from a buffer (built/packed by slurmd)
427  * IN/OUT buffer - message buffer to unpack
428  * IN node_name - name of node whose data is being unpacked
429  */
430 extern int gres_plugin_node_config_unpack(Buf buffer, char *node_name);
431 
432 /*
433  * Validate a node's configuration and put a gres record onto a list
434  * Called immediately after gres_plugin_node_config_unpack().
435  * IN node_name - name of the node for which the gres information applies
436  * IN orig_config - Gres information supplied from merged slurm.conf/gres.conf
437  * IN/OUT new_config - Updated gres info from slurm.conf
438  * IN/OUT gres_list - List of Gres records for this node to track usage
439  * IN threads_per_core - Count of CPUs (threads) per core on this node
440  * IN cores_per_sock - Count of cores per socket on this node
441  * IN sock_cnt - Count of sockets on this node
442  * IN config_overrides - true: Don't validate hardware, use slurm.conf
443  *                             configuration
444  *		         false: Validate hardware config, but use slurm.conf
445  *                              config
446  * OUT reason_down - set to an explanation of failure, if any, don't set if NULL
447  */
448 extern int gres_plugin_node_config_validate(char *node_name,
449 					    char *orig_config,
450 					    char **new_config,
451 					    List *gres_list,
452 					    int threads_per_core,
453 					    int cores_per_sock, int sock_cnt,
454 					    bool config_overrides,
455 					    char **reason_down);
456 
457 /*
458  * Add a GRES from node_feature plugin
459  * IN node_name - name of the node for which the gres information applies
460  * IN gres_name - name of the GRES being added or updated from the plugin
461  * IN gres_size - count of this GRES on this node
462  * IN/OUT new_config - Updated GRES info from slurm.conf
463  * IN/OUT gres_list - List of GRES records for this node to track usage
464  */
465 extern void gres_plugin_node_feature(char *node_name,
466 				     char *gres_name, uint64_t gres_size,
467 				     char **new_config, List *gres_list);
468 
469 /*
470  * Note that a node's configuration has been modified (e.g. "scontol update ..")
471  * IN node_name - name of the node for which the gres information applies
472  * IN new_gres - Updated GRES information supplied from slurm.conf or scontrol
473  * IN/OUT gres_str - Node's current GRES string, updated as needed
474  * IN/OUT gres_list - List of Gres records for this node to track usage
475  * IN config_overrides - true: Don't validate hardware, use slurm.conf
476  *                             configuration
477  *		         false: Validate hardware config, but use slurm.conf
478  *                              config
479  * IN cores_per_sock - Number of cores per socket on this node
480  * IN sock_per_node - Total count of sockets on this node (on any board)
481  */
482 extern int gres_plugin_node_reconfig(char *node_name,
483 				     char *new_gres,
484 				     char **gres_str,
485 				     List *gres_list,
486 				     bool config_overrides,
487 				     int cores_per_sock,
488 				     int sock_per_node);
489 
490 /*
491  * Pack a node's current gres status, called from slurmctld for save/restore
492  * IN gres_list - generated by gres_plugin_node_config_validate()
493  * IN/OUT buffer - location to write state to
494  * IN node_name - name of the node for which the gres information applies
495  */
496 extern int gres_plugin_node_state_pack(List gres_list, Buf buffer,
497 				       char *node_name);
498 /*
499  * Unpack a node's current gres status, called from slurmctld for save/restore
500  * OUT gres_list - restored state stored by gres_plugin_node_state_pack()
501  * IN/OUT buffer - location to read state from
502  * IN node_name - name of the node for which the gres information applies
503  */
504 extern int gres_plugin_node_state_unpack(List *gres_list, Buf buffer,
505 					 char *node_name,
506 					 uint16_t protocol_version);
507 
508 /*
509  * Duplicate a node gres status (used for will-run logic)
510  * IN gres_list - node gres state information
511  * RET a copy of gres_list or NULL on failure
512  */
513 extern List gres_plugin_node_state_dup(List gres_list);
514 
515 /*
516  * Deallocate all resources on this node previous allocated to any jobs.
517  *	This function isused to synchronize state after slurmctld restarts or
518  *	is reconfigured.
519  * IN gres_list - node gres state information
520  */
521 extern void gres_plugin_node_state_dealloc_all(List gres_list);
522 
523 /*
524  * Log a node's current gres state
525  * IN gres_list - generated by gres_plugin_node_config_validate()
526  * IN node_name - name of the node for which the gres information applies
527  */
528 extern void gres_plugin_node_state_log(List gres_list, char *node_name);
529 
530 /*
531  * Build a string indicating a node's drained GRES
532  * IN gres_list - generated by gres_plugin_node_config_validate()
533  * RET - string, must be xfreed by caller
534  */
535 extern char *gres_get_node_drain(List gres_list);
536 
537 /*
538  * Build a string indicating a node's used GRES
539  * IN gres_list - generated by gres_plugin_node_config_validate()
540  * RET - string, must be xfreed by caller
541  */
542 extern char *gres_get_node_used(List gres_list);
543 
544 /*
545  * Give the total system count of a given GRES
546  * Returns NO_VAL64 if name not found
547  */
548 extern uint64_t gres_get_system_cnt(char *name);
549 
550 /*
551  * Get the count of a node's GRES
552  * IN gres_list - List of Gres records for this node to track usage
553  * IN name - name of gres
554  */
555 extern uint64_t gres_plugin_node_config_cnt(List gres_list, char *name);
556 
557 /*
558  * Fill in an array of GRES type ids contained within the given node gres_list
559  *		and an array of corresponding counts of those GRES types.
560  * IN gres_list - a List of GRES types found on a node.
561  * IN arr_len - Length of the arrays (the number of elements in the gres_list).
562  * IN gres_count_ids, gres_count_vals - the GRES type ID's and values found
563  *	 	in the gres_list.
564  * IN val_type - Type of value desired, see GRES_VAL_TYPE_*
565  * RET SLURM_SUCCESS or error code
566  */
567 extern int gres_plugin_node_count(List gres_list, int arr_len,
568 				  uint32_t *gres_count_ids,
569 				  uint64_t *gres_count_vals,
570 				  int val_type);
571 
572 /*
573  * Fill in an array of GRES type ids contained within the given job gres_list
574  *		and an array of corresponding counts of those GRES types.
575  * IN gres_list - a List of GRES types allocated to a job.
576  * IN arr_len - Length of the arrays (the number of elements in the gres_list).
577  * IN gres_count_ids, gres_count_vals - the GRES type ID's and values found
578  *	 	in the gres_list.
579  * RET SLURM_SUCCESS or error code
580  */
581 extern int gres_plugin_job_count(List gres_list, int arr_len,
582 				 uint32_t *gres_count_ids,
583 				 uint64_t *gres_count_vals);
584 
585 /*
586  * Build a string identifying total GRES counts of each type
587  * IN gres_list - a List of GRES types allocated to a job.
588  * RET string containing comma-separated list of gres type:model:count
589  *     must release memory using xfree()
590  */
591 extern char *gres_plugin_job_alloc_count(List gres_list);
592 
593 /*
594  * Pack a job's allocated gres information for use by prolog/epilog
595  * IN gres_list - generated by gres_plugin_job_config_validate()
596  * IN/OUT buffer - location to write state to
597  */
598 extern int gres_plugin_job_alloc_pack(List gres_list, Buf buffer,
599 				      uint16_t protocol_version);
600 
601 /*
602  * Unpack a job's allocated gres information for use by prolog/epilog
603  * OUT gres_list - restored state stored by gres_plugin_job_alloc_pack()
604  * IN/OUT buffer - location to read state from
605  */
606 extern int gres_plugin_job_alloc_unpack(List *gres_list, Buf buffer,
607 					uint16_t protocol_version);
608 
609 /*
610  * Build List of information needed to set job's Prolog or Epilog environment
611  * variables
612  *
613  * IN job_gres_list - job's GRES allocation info
614  * IN hostlist - list of nodes associated with the job
615  * RET information about the job's GRES allocation needed by Prolog or Epilog
616  */
617 extern List gres_plugin_epilog_build_env(List job_gres_list, char *node_list);
618 
619 /*
620  * Set environment variables as appropriate for a job's prolog or epilog based
621  * GRES allocated to the job.
622  *
623  * IN/OUT epilog_env_ptr - environment variable array
624  * IN epilog_gres_list - generated by TBD
625  * IN node_inx - zero origin node index
626  */
627 extern void gres_plugin_epilog_set_env(char ***epilog_env_ptr,
628 				       List epilog_gres_list, int node_inx);
629 
630 
631 /*
632  * Given a job's requested GRES configuration, validate it and build a GRES list
633  * Note: This function can be used for a new request with gres_list==NULL or
634  *	 used to update an existing job, in which case gres_list is a copy
635  *	 of the job's original value (so we can clear fields as needed)
636  * IN *tres* - job requested gres input string
637  * IN/OUT num_tasks - requested task count, may be reset to provide
638  *		      consistent gres_per_node/task values
639  * IN/OUT min_nodes - requested minimum node count, may be reset to provide
640  *		      consistent gres_per_node/task values
641  * IN/OUT max_nodes - requested maximum node count, may be reset to provide
642  *		      consistent gres_per_node/task values
643  * IN/OUT ntasks_per_node - requested tasks_per_node count, may be reset to
644  *		      provide consistent gres_per_node/task values
645  * IN/OUT ntasks_per_socket - requested ntasks_per_socket count, may be reset to
646  *		      provide consistent gres_per_node/task values
647  * IN/OUT sockets_per_node - requested sockets_per_node count, may be reset to
648  *		      provide consistent gres_per_socket/node values
649  * IN/OUT cpus_per_task - requested ntasks_per_socket count, may be reset to
650  *		      provide consistent gres_per_task/cpus_per_gres values
651  * OUT gres_list - List of GRES records for this job to track usage
652  * RET SLURM_SUCCESS or ESLURM_INVALID_GRES
653  */
654 extern int gres_plugin_job_state_validate(char *cpus_per_tres,
655 					  char *tres_freq,
656 					  char *tres_per_job,
657 					  char *tres_per_node,
658 					  char *tres_per_socket,
659 					  char *tres_per_task,
660 					  char *mem_per_tres,
661 					  uint32_t *num_tasks,
662 					  uint32_t *min_nodes,
663 					  uint32_t *max_nodes,
664 					  uint16_t *ntasks_per_node,
665 					  uint16_t *ntasks_per_socket,
666 					  uint16_t *sockets_per_node,
667 					  uint16_t *cpus_per_task,
668 					  List *gres_list);
669 
670 /*
671  * Determine if a job's specified GRES can be supported. This is designed to
672  * prevent the running of a job using the GRES options only supported by the
673  * select/cons_tres plugin when switching (on slurmctld restart) from the
674  * cons_tres plugin to any other select plugin.
675  *
676  * IN gres_list - List of GRES records for this job to track usage
677  * RET SLURM_SUCCESS or ESLURM_INVALID_GRES
678  */
679 extern int gres_plugin_job_revalidate(List gres_list);
680 
681 /*
682  * Determine if a job's specified GRES are currently valid. This is designed to
683  * manage jobs allocated GRES which are either no longer supported or a GRES
684  * configured with the "File" option in gres.conf where the count has changed,
685  * in which case we don't know how to map the job's old GRES bitmap onto the
686  * current GRES bitmaps.
687  *
688  * IN job_id - ID of job being validated (used for logging)
689  * IN job_gres_list - List of GRES records for this job to track usage
690  * RET SLURM_SUCCESS or ESLURM_INVALID_GRES
691  */
692 extern int gres_plugin_job_revalidate2(uint32_t job_id, List job_gres_list,
693 				       bitstr_t *node_bitmap);
694 
695 /*
696  * Clear GRES allocation info for all job GRES at start of scheduling cycle
697  * Return TRUE if any gres_per_job constraints to satisfy
698  */
699 extern bool gres_plugin_job_sched_init(List job_gres_list);
700 
701 /*
702  * Return TRUE if all gres_per_job specifications are satisfied
703  */
704 extern bool gres_plugin_job_sched_test(List job_gres_list, uint32_t job_id);
705 
706 /*
707  * Return TRUE if all gres_per_job specifications will be satisfied with
708  *	the addtitional resources provided by a single node
709  * IN job_gres_list - List of job's GRES requirements (job_gres_state_t)
710  * IN sock_gres_list - Per socket GRES availability on this node (sock_gres_t)
711  * IN job_id - The job being tested
712  */
713 extern bool gres_plugin_job_sched_test2(List job_gres_list, List sock_gres_list,
714 					uint32_t job_id);
715 
716 /*
717  * Update a job's total_gres counter as we add a node to potential allocation
718  * IN job_gres_list - List of job's GRES requirements (job_gres_state_t)
719  * IN sock_gres_list - Per socket GRES availability on this node (sock_gres_t)
720  * IN avail_cpus - CPUs currently available on this node
721  */
722 extern void gres_plugin_job_sched_add(List job_gres_list, List sock_gres_list,
723 				      uint16_t avail_cpus);
724 
725 /*
726  * Create/update List GRES that can be made available on the specified node
727  * IN/OUT consec_gres - List of sock_gres_t that can be made available on
728  *			a set of nodes
729  * IN job_gres_list - List of job's GRES requirements (gres_job_state_t)
730  * IN sock_gres_list - Per socket GRES availability on this node (sock_gres_t)
731  */
732 extern void gres_plugin_job_sched_consec(List *consec_gres, List job_gres_list,
733 					 List sock_gres_list);
734 
735 /*
736  * Determine if the additional sock_gres_list resources will result in
737  * satisfying the job's gres_per_job constraints
738  * IN job_gres_list - job's GRES requirements
739  * IN sock_gres_list - available GRES in a set of nodes, data structure built
740  *		       by gres_plugin_job_sched_consec()
741  */
742 extern bool gres_plugin_job_sched_sufficient(List job_gres_list,
743 					     List sock_gres_list);
744 
745 /*
746  * Given a List of sock_gres_t entries, return a string identifying the
747  * count of each GRES available on this set of nodes
748  * IN sock_gres_list - count of GRES available in this group of nodes
749  * IN job_gres_list - job GRES specification, used only to get GRES name/type
750  * RET xfree the returned string
751  */
752 extern char *gres_plugin_job_sched_str(List sock_gres_list, List job_gres_list);
753 
754 /*
755  * Create a (partial) copy of a job's gres state for job binding
756  * IN gres_list - List of Gres records for this job to track usage
757  * RET The copy or NULL on failure
758  * NOTE: Only gres_cnt_alloc, node_cnt and gres_bit_alloc are copied
759  *	 Job step details are NOT copied.
760  */
761 extern List gres_plugin_job_state_dup(List gres_list);
762 
763 /*
764  * Create a (partial) copy of a job's gres state for a particular node index
765  * IN gres_list - List of Gres records for this job to track usage
766  * IN node_index - zero-origin index to the node
767  * RET The copy or NULL on failure
768  */
769 extern List gres_plugin_job_state_extract(List gres_list, int node_index);
770 
771 /*
772  * Pack a job's current gres status, called from slurmctld for save/restore
773  * IN gres_list - generated by gres_plugin_job_config_validate()
774  * IN/OUT buffer - location to write state to
775  * IN job_id - job's ID
776  * IN details - if set then pack job step allocation details (only needed to
777  *	 	save/restore job state, not needed in job credential for
778  *		slurmd task binding)
779  *
780  * NOTE: A job's allocation to steps is not recorded here, but recovered with
781  *	 the job step state information upon slurmctld restart.
782  */
783 extern int gres_plugin_job_state_pack(List gres_list, Buf buffer,
784 				      uint32_t job_id, bool details,
785 				      uint16_t protocol_version);
786 
787 /*
788  * Unpack a job's current gres status, called from slurmctld for save/restore
789  * OUT gres_list - restored state stored by gres_plugin_job_state_pack()
790  * IN/OUT buffer - location to read state from
791  * IN job_id - job's ID
792  */
793 extern int gres_plugin_job_state_unpack(List *gres_list, Buf buffer,
794 					uint32_t job_id,
795 					uint16_t protocol_version);
796 
797 /*
798  * Clear the core_bitmap for cores which are not usable by this job (i.e. for
799  *	cores which are already bound to other jobs or lack GRES)
800  * IN job_gres_list   - job's gres_list built by gres_plugin_job_state_validate()
801  * IN node_gres_list  - node's gres_list built by
802  *                      gres_plugin_node_config_validate()
803  * IN use_total_gres  - if set then consider all GRES resources as available,
804  *		        and none are commited to running jobs
805  * IN/OUT core_bitmap - Identification of available cores (NULL if no restriction)
806  * IN core_start_bit  - index into core_bitmap for this node's first cores
807  * IN core_end_bit    - index into core_bitmap for this node's last cores
808  */
809 extern void gres_plugin_job_core_filter(List job_gres_list, List node_gres_list,
810 					bool use_total_gres,
811 					bitstr_t *core_bitmap,
812 					int core_start_bit, int core_end_bit,
813 					char *node_name);
814 
815 /*
816  * Determine how many cores on the node can be used by this job
817  * IN job_gres_list  - job's gres_list built by gres_plugin_job_state_validate()
818  * IN node_gres_list - node's gres_list built by gres_plugin_node_config_validate()
819  * IN use_total_gres - if set then consider all gres resources as available,
820  *		       and none are commited to running jobs
821  * IN core_bitmap    - Identification of available cores (NULL if no restriction)
822  * IN core_start_bit - index into core_bitmap for this node's first core
823  * IN core_end_bit   - index into core_bitmap for this node's last core
824  * IN job_id         - job's ID (for logging)
825  * IN node_name      - name of the node (for logging)
826  * IN disable binding- --gres-flags=disable-binding
827  * RET: NO_VAL    - All cores on node are available
828  *      otherwise - Count of available cores
829  */
830 extern uint32_t gres_plugin_job_test(List job_gres_list, List node_gres_list,
831 				     bool use_total_gres, bitstr_t *core_bitmap,
832 				     int core_start_bit, int core_end_bit,
833 				     uint32_t job_id, char *node_name,
834 				     bool disable_binding);
835 
836 /*
837  * Determine how many cores on each socket of a node can be used by this job
838  * IN job_gres_list   - job's gres_list built by gres_plugin_job_state_validate()
839  * IN node_gres_list  - node's gres_list built by gres_plugin_node_config_validate()
840  * IN use_total_gres  - if set then consider all gres resources as available,
841  *		        and none are commited to running jobs
842  * IN/OUT core_bitmap - Identification of available cores on this node
843  * IN sockets         - Count of sockets on the node
844  * IN cores_per_sock  - Count of cores per socket on this node
845  * IN job_id          - job's ID (for logging)
846  * IN node_name       - name of the node (for logging)
847  * IN enforce_binding - if true then only use GRES with direct access to cores
848  * IN s_p_n           - Expected sockets_per_node (NO_VAL if not limited)
849  * OUT req_sock_map   - bitmap of specific requires sockets
850  * IN user_id         - job's user ID
851  * IN node_inx        - index of node to be evaluated
852  * RET: List of sock_gres_t entries identifying what resources are available on
853  *	each core. Returns NULL if none available. Call FREE_NULL_LIST() to
854  *	release memory.
855  */
856 extern List gres_plugin_job_test2(List job_gres_list, List node_gres_list,
857 				  bool use_total_gres, bitstr_t *core_bitmap,
858 				  uint16_t sockets, uint16_t cores_per_sock,
859 				  uint32_t job_id, char *node_name,
860 				  bool enforce_binding, uint32_t s_p_n,
861 				  bitstr_t **req_sock_map, uint32_t user_id,
862 				  const uint32_t node_inx);
863 
864 /*
865  * Determine which GRES can be used on this node given the available cores.
866  *	Filter out unusable GRES.
867  * IN sock_gres_list  - list of sock_gres_t entries built by gres_plugin_job_test2()
868  * IN avail_mem       - memory available for the job
869  * IN max_cpus        - maximum CPUs available on this node (limited by
870  *                      specialized cores and partition CPUs-per-node)
871  * IN enforce_binding - GRES must be co-allocated with cores
872  * IN core_bitmap     - Identification of available cores on this node
873  * IN sockets         - Count of sockets on the node
874  * IN cores_per_sock  - Count of cores per socket on this node
875  * IN cpus_per_core   - Count of CPUs per core on this node
876  * IN sock_per_node   - sockets requested by job per node or NO_VAL
877  * IN task_per_node   - tasks requested by job per node or NO_VAL16
878  * IN whole_node      - we are requesting the whole node or not
879  * OUT avail_gpus     - Count of available GPUs on this node
880  * OUT near_gpus      - Count of GPUs available on sockets with available CPUs
881  * RET - 0 if job can use this node, -1 otherwise (some GRES limit prevents use)
882  */
883 extern int gres_plugin_job_core_filter2(List sock_gres_list, uint64_t avail_mem,
884 					uint16_t max_cpus,
885 					bool enforce_binding,
886 					bitstr_t *core_bitmap,
887 					uint16_t sockets,
888 					uint16_t cores_per_sock,
889 					uint16_t cpus_per_core,
890 					uint32_t sock_per_node,
891 					uint16_t task_per_node,
892 					bool whole_node,
893 					uint16_t *avail_gpus,
894 					uint16_t *near_gpus);
895 
896 /*
897  * Determine how many tasks can be started on a given node and which
898  *	sockets/cores are required
899  * IN mc_ptr - job's multi-core specs, NO_VAL and INFINITE mapped to zero
900  * IN sock_gres_list - list of sock_gres_t entries built by gres_plugin_job_test2()
901  * IN sockets - Count of sockets on the node
902  * IN cores_per_socket - Count of cores per socket on the node
903  * IN cpus_per_core - Count of CPUs per core on the node
904  * IN avail_cpus - Count of available CPUs on the node, UPDATED
905  * IN min_tasks_this_node - Minimum count of tasks that can be started on this
906  *                          node, UPDATED
907  * IN max_tasks_this_node - Maximum count of tasks that can be started on this
908  *                          node or NO_VAL, UPDATED
909  * IN rem_nodes - desired additional node count to allocate, including this node
910  * IN enforce_binding - GRES must be co-allocated with cores
911  * IN first_pass - set if first scheduling attempt for this job, use
912  *		   co-located GRES and cores if possible
913  * IN avail_cores - cores available on this node, UPDATED
914  */
915 extern void gres_plugin_job_core_filter3(gres_mc_data_t *mc_ptr,
916 					 List sock_gres_list,
917 					 uint16_t sockets,
918 					 uint16_t cores_per_socket,
919 					 uint16_t cpus_per_core,
920 					 uint16_t *avail_cpus,
921 					 uint32_t *min_tasks_this_node,
922 					 uint32_t *max_tasks_this_node,
923 					 int rem_nodes,
924 					 bool enforce_binding,
925 					 bool first_pass,
926 					 bitstr_t *avail_core);
927 
928 /*
929  * Return the maximum number of tasks that can be started on a node with
930  * sock_gres_list (per-socket GRES details for some node)
931  */
932 extern uint32_t gres_plugin_get_task_limit(List sock_gres_list);
933 
934 /*
935  * Make final GRES selection for the job
936  * sock_gres_list IN - per-socket GRES details, one record per allocated node
937  * job_id IN - job ID for logging
938  * job_res IN - job resource allocation
939  * overcommit IN - job's ability to overcommit resources
940  * tres_mc_ptr IN - job's multi-core options
941  * node_table_ptr IN - slurmctld's node records
942  * RET SLURM_SUCCESS or error code
943  */
944 extern int gres_plugin_job_core_filter4(List *sock_gres_list, uint32_t job_id,
945 					struct job_resources *job_res,
946 					uint8_t overcommit,
947 					gres_mc_data_t *tres_mc_ptr,
948 					node_record_t *node_table_ptr);
949 
950 /*
951  * Determine if job GRES specification includes a tres-per-task specification
952  * RET TRUE if any GRES requested by the job include a tres-per-task option
953  */
954 extern bool gres_plugin_job_tres_per_task(List job_gres_list);
955 
956 /*
957  * Determine if the job GRES specification includes a mem-per-tres specification
958  * RET largest mem-per-tres specification found
959  */
960 extern uint64_t gres_plugin_job_mem_max(List job_gres_list);
961 
962 /*
963  * Set per-node memory limits based upon GRES assignments
964  * RET TRUE if mem-per-tres specification used to set memory limits
965  */
966 extern bool gres_plugin_job_mem_set(List job_gres_list,
967 				    job_resources_t *job_res);
968 
969 /*
970  * Determine the minimum number of CPUs required to satify the job's GRES
971  *	request (based upon total GRES times cpus_per_gres value)
972  * node_count IN - count of nodes in job allocation
973  * sockets_per_node IN - count of sockets per node in job allocation
974  * task_count IN - count of tasks in job allocation
975  * job_gres_list IN - job GRES specification
976  * RET count of required CPUs for the job
977  */
978 extern int gres_plugin_job_min_cpus(uint32_t node_count,
979 				    uint32_t sockets_per_node,
980 				    uint32_t task_count,
981 				    List job_gres_list);
982 
983 /*
984  * Determine the minimum number of CPUs required to satify the job's GRES
985  *	request on one node
986  * sockets_per_node IN - count of sockets per node in job allocation
987  * tasks_per_node IN - count of tasks per node in job allocation
988  * job_gres_list IN - job GRES specification
989  * RET count of required CPUs for the job
990  */
991 extern int gres_plugin_job_min_cpu_node(uint32_t sockets_per_node,
992 					uint32_t tasks_per_node,
993 					List job_gres_list);
994 
995 /*
996  * Fill in job_gres_list with the total amount of GRES on a node.
997  * OUT job_gres_list - This list will be destroyed and remade with all GRES on
998  *                     node.
999  * IN node_gres_list - node's gres_list built by
1000  *		       gres_plugin_node_config_validate()
1001  * IN job_id      - job's ID (for logging)
1002  * IN node_name   - name of the node (for logging)
1003  * RET SLURM_SUCCESS or error code
1004  */
1005 extern int gres_plugin_job_select_whole_node(
1006 	List *job_gres_list, List node_gres_list,
1007 	uint32_t job_id, char *node_name);
1008 
1009 /*
1010  * Select and allocate all GRES on a node to a job and update node and job GRES
1011  * information
1012  * IN job_gres_list - job's gres_list built by gres_plugin_job_whole_node().
1013  * IN node_gres_list - node's gres_list built by
1014  *		       gres_plugin_node_config_validate()
1015  * IN node_cnt    - total number of nodes originally allocated to the job
1016  * IN node_index  - zero-origin global node index
1017  * IN node_offset - zero-origin index in job allocation to the node of interest
1018  * IN job_id      - job's ID (for logging)
1019  * IN node_name   - name of the node (for logging)
1020  * IN core_bitmap - cores allocated to this job on this node (NULL if not
1021  *                  available)
1022  * IN user_id     - job's user ID
1023  * RET SLURM_SUCCESS or error code
1024  */
1025 extern int gres_plugin_job_alloc_whole_node(
1026 	List job_gres_list, List node_gres_list,
1027 	int node_cnt, int node_index, int node_offset,
1028 	uint32_t job_id, char *node_name,
1029 	bitstr_t *core_bitmap, uint32_t user_id);
1030 
1031 /*
1032  * Select and allocate GRES to a job and update node and job GRES information
1033  * IN job_gres_list - job's gres_list built by gres_plugin_job_state_validate()
1034  * IN node_gres_list - node's gres_list built by
1035  *		       gres_plugin_node_config_validate()
1036  * IN node_cnt    - total number of nodes originally allocated to the job
1037  * IN node_index  - zero-origin global node index
1038  * IN node_offset - zero-origin index in job allocation to the node of interest
1039  * IN job_id      - job's ID (for logging)
1040  * IN node_name   - name of the node (for logging)
1041  * IN core_bitmap - cores allocated to this job on this node (NULL if not
1042  *                  available)
1043  * IN user_id     - job's user ID
1044  * RET SLURM_SUCCESS or error code
1045  */
1046 extern int gres_plugin_job_alloc(List job_gres_list, List node_gres_list,
1047 				 int node_cnt, int node_index, int node_offset,
1048 				 uint32_t job_id, char *node_name,
1049 				 bitstr_t *core_bitmap, uint32_t user_id);
1050 
1051 /* Clear any vestigial job gres state. This may be needed on job requeue. */
1052 extern void gres_plugin_job_clear(List job_gres_list);
1053 
1054 /*
1055  * Deallocate resource from a job and update node and job gres information
1056  * IN job_gres_list - job's gres_list built by gres_plugin_job_state_validate()
1057  * IN node_gres_list - node's gres_list built by
1058  *		gres_plugin_node_config_validate()
1059  * IN node_offset - zero-origin index to the node of interest
1060  * IN job_id      - job's ID (for logging)
1061  * IN node_name   - name of the node (for logging)
1062  * IN old_job     - true if job started before last slurmctld reboot.
1063  *		    Immediately after slurmctld restart and before the node's
1064  *		    registration, the GRES type and topology. This results in
1065  *		    some incorrect internal bookkeeping, but does not cause
1066  *		    failures in terms of allocating GRES to jobs.
1067  * IN user_id     - job's user ID
1068  * IN: job_fini   - job fully terminating on this node (not just a test)
1069  * RET SLURM_SUCCESS or error code
1070  */
1071 extern int gres_plugin_job_dealloc(List job_gres_list, List node_gres_list,
1072 				   int node_offset, uint32_t job_id,
1073 				   char *node_name, bool old_job,
1074 				   uint32_t user_id, bool job_fini);
1075 
1076 /*
1077  * Merge one job's gres allocation into another job's gres allocation.
1078  * IN from_job_gres_list - List of gres records for the job being merged
1079  *			into another job
1080  * IN from_job_node_bitmap - bitmap of nodes for the job being merged into
1081  *			another job
1082  * IN/OUT to_job_gres_list - List of gres records for the job being merged
1083  *			into job
1084  * IN to_job_node_bitmap - bitmap of nodes for the job being merged into
1085  */
1086 extern void gres_plugin_job_merge(List from_job_gres_list,
1087 				  bitstr_t *from_job_node_bitmap,
1088 				  List to_job_gres_list,
1089 				  bitstr_t *to_job_node_bitmap);
1090 
1091 /*
1092  * Set environment variables as required for a batch job
1093  * IN/OUT job_env_ptr - environment variable array
1094  * IN gres_list - generated by gres_plugin_job_alloc()
1095  * IN node_inx - zero origin node index
1096  */
1097 extern void gres_plugin_job_set_env(char ***job_env_ptr, List job_gres_list,
1098 				    int node_inx);
1099 
1100 /*
1101  * Set job default parameters in a given element of a list
1102  * IN job_gres_list - job's gres_list built by gres_plugin_job_state_validate()
1103  * IN gres_name - name of gres, apply defaults to all elements (e.g. updates to
1104  *		  gres_name="gpu" would apply to "gpu:tesla", "gpu:volta", etc.)
1105  * IN cpu_per_gpu - value to set as default
1106  * IN mem_per_gpu - value to set as default
1107  */
1108 extern void gres_plugin_job_set_defs(List job_gres_list, char *gres_name,
1109 				     uint64_t cpu_per_gpu,
1110 				     uint64_t mem_per_gpu);
1111 
1112 /*
1113  * Extract from the job record's gres_list the count of allocated resources of
1114  * 	the named gres type.
1115  * IN job_gres_list  - job record's gres_list.
1116  * IN gres_name_type - the name of the gres type to retrieve the associated
1117  *	value from.
1118  * RET The value associated with the gres type or NO_VAL if not found.
1119  */
1120 extern uint64_t gres_plugin_get_job_value_by_type(List job_gres_list,
1121 						  char *gres_name_type);
1122 
1123 /*
1124  * Log a job's current gres state
1125  * IN gres_list - generated by gres_plugin_job_state_validate()
1126  * IN job_id    - job's ID
1127  */
1128 extern void gres_plugin_job_state_log(List gres_list, uint32_t job_id);
1129 
1130 /*
1131  * Given a step's requested gres configuration, validate it and build gres list
1132  * IN *tres* - step's request's gres input string
1133  * OUT step_gres_list - List of Gres records for this step to track usage
1134  * IN job_gres_list - List of Gres records for this job
1135  * IN job_id, step_id - ID of the step being allocated.
1136  * RET SLURM_SUCCESS or ESLURM_INVALID_GRES
1137  */
1138 extern int gres_plugin_step_state_validate(char *cpus_per_tres,
1139 					   char *tres_per_step,
1140 					   char *tres_per_node,
1141 					   char *tres_per_socket,
1142 					   char *tres_per_task,
1143 					   char *mem_per_tres,
1144 					   List *step_gres_list,
1145 					   List job_gres_list, uint32_t job_id,
1146 					   uint32_t step_id);
1147 
1148 /*
1149  * Create a copy of a step's gres state
1150  * IN gres_list - List of Gres records for this step to track usage
1151  * RET The copy or NULL on failure
1152  */
1153 List gres_plugin_step_state_dup(List gres_list);
1154 
1155 /*
1156  * Create a copy of a step's gres state for a particular node index
1157  * IN gres_list - List of Gres records for this step to track usage
1158  * IN node_index - zero-origin index to the node
1159  * RET The copy or NULL on failure
1160  */
1161 List gres_plugin_step_state_extract(List gres_list, int node_index);
1162 
1163 /*
1164  * A job allocation size has changed. Update the job step gres information
1165  * bitmaps and other data structures.
1166  * IN gres_list - List of Gres records for this step to track usage
1167  * IN orig_job_node_bitmap - bitmap of nodes in the original job allocation
1168  * IN new_job_node_bitmap - bitmap of nodes in the new job allocation
1169  */
1170 void gres_plugin_step_state_rebase(List gres_list,
1171 				   bitstr_t *orig_job_node_bitmap,
1172 				   bitstr_t *new_job_node_bitmap);
1173 
1174 /*
1175  * Pack a step's current gres status, called from slurmctld for save/restore
1176  * IN gres_list - generated by gres_plugin_step_allocate()
1177  * IN/OUT buffer - location to write state to
1178  * IN job_id, step_id - job and step ID for logging
1179  */
1180 extern int gres_plugin_step_state_pack(List gres_list, Buf buffer,
1181 				       uint32_t job_id, uint32_t step_id,
1182 				       uint16_t protocol_version);
1183 
1184 /*
1185  * Unpack a step's current gres status, called from slurmctld for save/restore
1186  * OUT gres_list - restored state stored by gres_plugin_step_state_pack()
1187  * IN/OUT buffer - location to read state from
1188  * IN job_id, step_id - job and step ID for logging
1189  */
1190 extern int gres_plugin_step_state_unpack(List *gres_list, Buf buffer,
1191 					 uint32_t job_id, uint32_t step_id,
1192 					 uint16_t protocol_version);
1193 
1194 /* Return the count of GRES of a specific name on this machine
1195  * IN step_gres_list - generated by gres_plugin_step_allocate()
1196  * IN gres_name - name of the GRES to match
1197  * RET count of GRES of this specific name available to the job or NO_VAL64
1198  */
1199 extern uint64_t gres_plugin_step_count(List step_gres_list, char *gres_name);
1200 
1201 /*
1202  * Configure the GRES hardware allocated to the current step while privileged
1203  *
1204  * IN step_gres_list - Step's GRES specification
1205  * IN node_id        - relative position of this node in step
1206  * IN settings       - string containing configuration settings for the hardware
1207  */
1208 extern void gres_plugin_step_hardware_init(List step_gres_list,
1209 					   uint32_t node_id, char *settings);
1210 
1211 /*
1212  * Optionally undo GRES hardware configuration while privileged
1213  */
1214 extern void gres_plugin_step_hardware_fini(void);
1215 
1216 /*
1217  * Set environment as required for all tasks of a job step
1218  * IN/OUT job_env_ptr - environment variable array
1219  * IN step_gres_list - generated by gres_plugin_step_alloc()
1220  * IN accel_bind_type - GRES binding options (old format, a bitmap)
1221  * IN tres_bind - TRES binding directives (new format, a string)
1222  * IN local_proc_id - task rank, local to this compute node only
1223  */
1224 extern void gres_plugin_step_set_env(char ***job_env_ptr, List step_gres_list,
1225 				     uint16_t accel_bind_type, char *tres_bind,
1226 				     int local_proc_id);
1227 
1228 /*
1229  * Log a step's current gres state
1230  * IN gres_list - generated by gres_plugin_step_allocate()
1231  * IN job_id - job's ID
1232  */
1233 extern void gres_plugin_step_state_log(List gres_list, uint32_t job_id,
1234 				       uint32_t step_id);
1235 
1236 /*
1237  * Determine how many cores of a job's allocation can be allocated to a step
1238  *	on a specific node
1239  * IN job_gres_list - a running job's gres info
1240  * IN/OUT step_gres_list - a pending job step's gres requirements
1241  * IN node_offset - index into the job's node allocation
1242  * IN first_step_node - true if this is node zero of the step (do initialization)
1243  * IN cpus_per_task - number of CPUs required per task
1244  * IN max_rem_nodes - maximum nodes remaining for step (including this one)
1245  * IN ignore_alloc - if set ignore resources already allocated to running steps
1246  * IN job_id, step_id - ID of the step being allocated.
1247  * RET Count of available cores on this node (sort of):
1248  *     NO_VAL64 if no limit or 0 if node is not usable
1249  */
1250 extern uint64_t gres_plugin_step_test(List step_gres_list, List job_gres_list,
1251 				      int node_offset, bool first_step_node,
1252 				      uint16_t cpus_per_task, int max_rem_nodes,
1253 				      bool ignore_alloc,
1254 				      uint32_t job_id, uint32_t step_id);
1255 
1256 /*
1257  * Allocate resource to a step and update job and step gres information
1258  * IN step_gres_list - step's gres_list built by
1259  *		gres_plugin_step_state_validate()
1260  * IN job_gres_list - job's gres_list built by gres_plugin_job_state_validate()
1261  * IN node_offset - job's zero-origin index to the node of interest
1262  * IN first_step_node - true if this is node zero of the step (do initialization)
1263  * IN tasks_on_node - number of tasks to be launched on this node
1264  * IN rem_nodes - desired additional node count to allocate, including this node
1265  * IN job_id, step_id - ID of the step being allocated.
1266  * RET SLURM_SUCCESS or error code
1267  */
1268 extern int gres_plugin_step_alloc(List step_gres_list, List job_gres_list,
1269 				  int node_offset, bool first_step_node,
1270 				  uint16_t tasks_on_node, uint32_t rem_nodes,
1271 				  uint32_t job_id, uint32_t step_id);
1272 
1273 /*
1274  * Deallocate resource to a step and update job and step gres information
1275  * IN step_gres_list - step's gres_list built by
1276  *		gres_plugin_step_state_validate()
1277  * IN job_gres_list - job's gres_list built by gres_plugin_job_state_validate()
1278  * IN job_id, step_id - ID of the step being allocated.
1279  * RET SLURM_SUCCESS or error code
1280  */
1281 extern int gres_plugin_step_dealloc(List step_gres_list, List job_gres_list,
1282 				    uint32_t job_id, uint32_t step_id);
1283 
1284 /*
1285  * Build a string containing the GRES details for a given node and socket
1286  * sock_gres_list IN - List of sock_gres_t entries
1287  * sock_inx IN - zero-origin socket for which information is to be returned
1288  * RET string, must call xfree() to release memory
1289  */
1290 extern char *gres_plugin_sock_str(List sock_gres_list, int sock_inx);
1291 
1292 /*
1293  * Map a given GRES type ID back to a GRES type name.
1294  * gres_id IN - GRES type ID to search for.
1295  * gres_name IN - Pre-allocated string in which to store the GRES type name.
1296  * gres_name_len - Size of gres_name in bytes
1297  * RET - error code (currently not used--always return SLURM_SUCCESS)
1298  */
1299 extern int gres_gresid_to_gresname(uint32_t gres_id, char* gres_name,
1300 				   int gres_name_len);
1301 
1302 /*
1303  * Determine total count GRES of a given type are allocated to a job across
1304  * all nodes
1305  * IN job_gres_list - job's gres_list built by gres_plugin_job_state_validate()
1306  * IN gres_name - name of a GRES type
1307  * RET count of this GRES allocated to this job
1308  */
1309 extern uint64_t gres_get_value_by_type(List job_gres_list, char *gres_name);
1310 
1311 enum gres_job_data_type {
1312 	GRES_JOB_DATA_COUNT,	/* data-> uint64_t  */
1313 	GRES_JOB_DATA_BITMAP,	/* data-> bitstr_t* */
1314 };
1315 
1316 /*
1317  * get data from a job's GRES data structure
1318  * IN job_gres_list  - job's GRES data structure
1319  * IN gres_name - name of a GRES type
1320  * IN node_inx - zero-origin index of the node within the job's allocation
1321  *	for which data is desired
1322  * IN data_type - type of data to get from the job's data
1323  * OUT data - pointer to the data from job's GRES data structure
1324  *            DO NOT FREE: This is a pointer into the job's data structure
1325  * RET - SLURM_SUCCESS or error code
1326  */
1327 extern int gres_get_job_info(List job_gres_list, char *gres_name,
1328 			     uint32_t node_inx,
1329 			     enum gres_job_data_type data_type, void *data);
1330 
1331 /* Given a job's GRES data structure, return the indecies for selected elements
1332  * IN job_gres_list  - job's GRES data structure
1333  * OUT gres_detail_cnt - Number of elements (nodes) in gres_detail_str
1334  * OUT gres_detail_str - Description of GRES on each node
1335  * OUT total_gres_str - String containing all gres in the job and counts.
1336  */
1337 extern void gres_build_job_details(List job_gres_list,
1338 				   uint32_t *gres_detail_cnt,
1339 				   char ***gres_detail_str,
1340 				   char **total_gres_str);
1341 
1342 enum gres_step_data_type {
1343 	GRES_STEP_DATA_COUNT,	/* data-> uint64_t  */
1344 	GRES_STEP_DATA_BITMAP,	/* data-> bitstr_t* */
1345 };
1346 
1347 /*
1348  * get data from a step's GRES data structure
1349  * IN step_gres_list  - step's GRES data structure
1350  * IN gres_name - name of a GRES type
1351  * IN node_inx - zero-origin index of the node within the job's allocation
1352  *	for which data is desired. Note this can differ from the step's
1353  *	node allocation index.
1354  * IN data_type - type of data to get from the step's data
1355  * OUT data - pointer to the data from step's GRES data structure
1356  *            DO NOT FREE: This is a pointer into the step's data structure
1357  * RET - SLURM_SUCCESS or error code
1358  */
1359 extern int gres_get_step_info(List step_gres_list, char *gres_name,
1360 			      uint32_t node_inx,
1361 			      enum gres_step_data_type data_type, void *data);
1362 
1363 extern gres_job_state_t *gres_get_job_state(List gres_list, char *name);
1364 extern gres_step_state_t *gres_get_step_state(List gres_list, char *name);
1365 
1366 extern uint32_t gres_get_autodetect_types(void);
1367 
1368 /*
1369  * Translate a gres_list into a tres_str
1370  * IN gres_list - filled in with gres_job_state_t or gres_step_state_t's
1371  * IN is_job - if is job function expects gres_job_state_t's else
1372  *             gres_step_state_t's
1373  * IN locked - if the assoc_mgr tres read locked is locked or not
1374  * RET char * in a simple TRES format
1375  */
1376 extern char *gres_2_tres_str(List gres_list, bool is_job, bool locked);
1377 
1378 /* Fill in the job allocated tres_cnt based off the gres_list and node_cnt
1379  * IN gres_list - filled in with gres_job_state_t's
1380  * IN node_cnt - number of nodes in the job
1381  * OUT tres_cnt - gres spots filled in with total number of TRES
1382  *                requested for job that are requested in gres_list
1383  * IN locked - if the assoc_mgr tres read locked is locked or not
1384  */
1385 extern void gres_set_job_tres_cnt(List gres_list,
1386 				  uint32_t node_cnt,
1387 				  uint64_t *tres_cnt,
1388 				  bool locked);
1389 
1390 /* Fill in the node allocated tres_cnt based off the gres_list
1391  * IN gres_list - filled in with gres_node_state_t's gres_alloc_cnt
1392  * OUT tres_cnt - gres spots filled in with total number of TRES
1393  *                allocated on node
1394  * IN locked - if the assoc_mgr tres read locked is locked or not
1395  */
1396 extern void gres_set_node_tres_cnt(List gres_list, uint64_t *tres_cnt,
1397 				   bool locked);
1398 
1399 /* return the major info from a given path of a device */
1400 extern char *gres_device_major(char *dev_path);
1401 
1402 /* Free memory for gres_device_t record */
1403 extern void destroy_gres_device(void *gres_device_ptr);
1404 
1405 /* Destroy a gres_slurmd_conf_t record, free it's memory */
1406 extern void destroy_gres_slurmd_conf(void *x);
1407 
1408 /*
1409  * Convert GRES config_flags to a string. The pointer returned references local
1410  * storage in this function, which is not re-entrant.
1411  */
1412 extern char *gres_flags2str(uint8_t config_flags);
1413 
1414 /*
1415  * Creates a gres_slurmd_conf_t record to add to a list of gres_slurmd_conf_t
1416  * records
1417  */
1418 extern void add_gres_to_list(List gres_list, char *name, uint64_t device_cnt,
1419 			     int cpu_cnt, char *cpu_aff_abs_range,
1420 			     bitstr_t *cpu_aff_mac_bitstr, char *device_file,
1421 			     char *type, char *links);
1422 
1423 #endif /* !_GRES_H */
1424