1 /*****************************************************************************\
2  *  slurmctld.h - definitions of functions and structures for slurmcltd use
3  *****************************************************************************
4  *  Copyright (C) 2002-2007 The Regents of the University of California.
5  *  Copyright (C) 2008-2010 Lawrence Livermore National Security.
6  *  Portions Copyright (C) 2010-2014 SchedMD <https://www.schedmd.com>.
7  *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
8  *  Written by Morris Jette <jette1@llnl.gov> et. al.
9  *  CODE-OCEC-09-009. All rights reserved.
10  *
11  *  This file is part of Slurm, a resource management program.
12  *  For details, see <https://slurm.schedmd.com/>.
13  *  Please also read the included file: DISCLAIMER.
14  *
15  *  Slurm is free software; you can redistribute it and/or modify it under
16  *  the terms of the GNU General Public License as published by the Free
17  *  Software Foundation; either version 2 of the License, or (at your option)
18  *  any later version.
19  *
20  *  In addition, as a special exception, the copyright holders give permission
21  *  to link the code of portions of this program with the OpenSSL library under
22  *  certain conditions as described in each individual source file, and
23  *  distribute linked combinations including the two. You must obey the GNU
24  *  General Public License in all respects for all of the code used other than
25  *  OpenSSL. If you modify file(s) with this exception, you may extend this
26  *  exception to your version of the file(s), but you are not obligated to do
27  *  so. If you do not wish to do so, delete this exception statement from your
28  *  version.  If you delete this exception statement from all source files in
29  *  the program, then also delete it here.
30  *
31  *  Slurm is distributed in the hope that it will be useful, but WITHOUT ANY
32  *  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
33  *  FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
34  *  details.
35  *
36  *  You should have received a copy of the GNU General Public License along
37  *  with Slurm; if not, write to the Free Software Foundation, Inc.,
38  *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA.
39 \*****************************************************************************/
40 
41 #ifndef _HAVE_SLURMCTLD_H
42 #define _HAVE_SLURMCTLD_H
43 
44 #include "config.h"
45 
46 #include <inttypes.h>
47 #include <pthread.h>
48 #include <string.h>
49 #include <sys/types.h>
50 #include <time.h>
51 #include <unistd.h>
52 
53 #include "slurm/slurm.h"
54 
55 #include "src/common/bitstring.h"
56 #include "src/common/list.h"
57 #include "src/common/log.h"
58 #include "src/common/macros.h"
59 #include "src/common/node_conf.h"
60 #include "src/common/pack.h"
61 #include "src/common/read_config.h" /* location of slurmctld_conf */
62 #include "src/common/job_resources.h"
63 #include "src/common/slurm_cred.h"
64 #include "src/common/slurm_protocol_api.h"
65 #include "src/common/slurm_protocol_defs.h"
66 #include "src/common/switch.h"
67 #include "src/common/timers.h"
68 #include "src/common/xmalloc.h"
69 
70 /*****************************************************************************\
71  *  GENERAL CONFIGURATION parameters and data structures
72 \*****************************************************************************/
73 /* Maximum parallel threads to service incoming RPCs.
74  * Also maximum parallel threads to service outgoing RPCs (separate counter).
75  * Since some systems schedule pthread on a First-In-Last-Out basis,
76  * increasing this value is strongly discouraged. */
77 #ifndef MAX_SERVER_THREADS
78 #define MAX_SERVER_THREADS 256
79 #endif
80 
81 /* Maximum number of threads to service emails (see MailProg) */
82 #ifndef MAX_MAIL_THREADS
83 #define MAX_MAIL_THREADS 64
84 #endif
85 
86 /* Perform full slurmctld's state every PERIODIC_CHECKPOINT seconds */
87 #ifndef PERIODIC_CHECKPOINT
88 #define	PERIODIC_CHECKPOINT	300
89 #endif
90 
91 /* Retry an incomplete RPC agent request every RPC_RETRY_INTERVAL seconds */
92 #ifndef RPC_RETRY_INTERVAL
93 #define	RPC_RETRY_INTERVAL	60
94 #endif
95 
96 /* Check for jobs reaching their time limit every PERIODIC_TIMEOUT seconds */
97 #ifndef PERIODIC_TIMEOUT
98 #define	PERIODIC_TIMEOUT	30
99 #endif
100 
101 /* Attempt to purge defunct job records and resend job kill requests
102  * every PURGE_JOB_INTERVAL seconds */
103 #ifndef PURGE_JOB_INTERVAL
104 #define PURGE_JOB_INTERVAL 60
105 #endif
106 
107 /* Process pending trigger events every TRIGGER_INTERVAL seconds */
108 #ifndef TRIGGER_INTERVAL
109 #define TRIGGER_INTERVAL 15
110 #endif
111 
112 /* Report current node accounting state every PERIODIC_NODE_ACCT seconds */
113 #ifndef PERIODIC_NODE_ACCT
114 #define PERIODIC_NODE_ACCT 300
115 #endif
116 
117 /* Pathname of group file record for checking update times */
118 #ifndef GROUP_FILE
119 #define GROUP_FILE	"/etc/group"
120 #endif
121 
122 /* Seconds to wait for backup controller response to REQUEST_CONTROL RPC */
123 #ifndef CONTROL_TIMEOUT
124 #define CONTROL_TIMEOUT 30	/* seconds */
125 #endif
126 
127 /* Maximum number of requeue attempts before the job is put JOB_REQUEUE_HOLD
128  * with reason JobHeldUser.
129  */
130 #ifndef MAX_BATCH_REQUEUE
131 #define MAX_BATCH_REQUEUE 5
132 #endif
133 
134 /*****************************************************************************\
135  *  General configuration parameters and data structures
136 \*****************************************************************************/
137 
138 typedef struct slurmctld_config {
139 	char *	auth_info;
140 	pthread_cond_t backup_finish_cond; /* use thread_count_lock */
141 	time_t	boot_time;
142 	int	daemonize;
143 	char    node_name_long[MAX_SLURM_NAME];
144 	char    node_name_short[MAX_SLURM_NAME];
145 	bool	resume_backup;
146 	bool    scheduling_disabled;
147 	int	server_thread_count;
148 	time_t	shutdown_time;
149 	bool    submissions_disabled;
150 
151 	slurm_cred_ctx_t cred_ctx;
152 	pthread_cond_t thread_count_cond;
153 	pthread_mutex_t thread_count_lock;
154 	pthread_t thread_id_main;
155 	pthread_t thread_id_save;
156 	pthread_t thread_id_sig;
157 	pthread_t thread_id_power;
158 	pthread_t thread_id_purge_files;
159 	pthread_t thread_id_rpc;
160 } slurmctld_config_t;
161 
162 /* Job scheduling statistics */
163 typedef struct diag_stats {
164 	int proc_req_threads;
165 	int proc_req_raw;
166 
167 	uint32_t schedule_cycle_max;
168 	uint32_t schedule_cycle_last;
169 	uint32_t schedule_cycle_sum;
170 	uint32_t schedule_cycle_counter;
171 	uint32_t schedule_cycle_depth;
172 	uint32_t schedule_queue_len;
173 
174 	uint32_t jobs_submitted;
175 	uint32_t jobs_started;
176 	uint32_t jobs_completed;
177 	uint32_t jobs_canceled;
178 	uint32_t jobs_failed;
179 
180 	uint32_t job_states_ts;
181 	uint32_t jobs_pending;
182 	uint32_t jobs_running;
183 
184 	uint32_t backfilled_jobs;
185 	uint32_t last_backfilled_jobs;
186 	uint32_t backfilled_het_jobs;
187 	uint32_t bf_active;
188 	uint32_t bf_cycle_counter;
189 	uint32_t bf_cycle_last;
190 	uint32_t bf_cycle_max;
191 	uint64_t bf_cycle_sum;
192 	uint32_t bf_depth_sum;
193 	uint32_t bf_depth_try_sum;
194 	uint32_t bf_last_depth;
195 	uint32_t bf_last_depth_try;
196 	uint32_t bf_queue_len;
197 	uint32_t bf_queue_len_sum;
198 	uint32_t bf_table_size;
199 	uint32_t bf_table_size_sum;
200 	time_t   bf_when_last_cycle;
201 
202 	uint32_t latency;
203 } diag_stats_t;
204 
205 /* This is used to point out constants that exist in the
206  * curr_tres_array in tres_info_t  This should be the same order as
207  * the tres_types_t enum that is defined in src/common/slurmdb_defs.h
208  */
209 enum {
210 	TRES_ARRAY_CPU = 0,
211 	TRES_ARRAY_MEM,
212 	TRES_ARRAY_ENERGY,
213 	TRES_ARRAY_NODE,
214 	TRES_ARRAY_BILLING,
215 	TRES_ARRAY_FS_DISK,
216 	TRES_ARRAY_VMEM,
217 	TRES_ARRAY_PAGES,
218 	TRES_ARRAY_TOTAL_CNT
219 };
220 
221 extern bool  preempt_send_user_signal;
222 extern time_t	last_proc_req_start;
223 extern diag_stats_t slurmctld_diag_stats;
224 extern slurmctld_config_t slurmctld_config;
225 extern void *acct_db_conn;
226 extern uint16_t accounting_enforce;
227 extern int   association_based_accounting;
228 extern int   backup_inx;		/* BackupController# index */
229 extern int   batch_sched_delay;
230 extern time_t control_time;		/* Time when became primary controller */
231 extern uint32_t   cluster_cpus;
232 extern bool disable_remote_singleton;
233 extern int max_depend_depth;
234 extern bool node_features_updated;
235 extern pthread_cond_t purge_thread_cond;
236 extern pthread_mutex_t purge_thread_lock;
237 extern pthread_mutex_t check_bf_running_lock;
238 extern int   sched_interval;
239 extern bool  slurmctld_init_db;
240 extern int   slurmctld_primary;
241 extern int   slurmctld_tres_cnt;
242 extern slurmdb_cluster_rec_t *response_cluster_rec;
243 extern bool   test_config;
244 extern int    test_config_rc;
245 
246 /*****************************************************************************\
247  *  NODE parameters and data structures, mostly in src/common/node_conf.h
248 \*****************************************************************************/
249 extern bool ping_nodes_now;		/* if set, ping nodes immediately */
250 extern bool want_nodes_reboot;		/* if set, check for idle nodes */
251 extern bool ignore_state_errors;
252 
253 typedef struct node_features {
254 	uint32_t magic;		/* magic cookie to test data integrity */
255 	char *name;		/* name of a feature */
256 	bitstr_t *node_bitmap;	/* bitmap of nodes with this feature */
257 } node_feature_t;
258 
259 extern List active_feature_list;/* list of currently active node features */
260 extern List avail_feature_list;	/* list of available node features */
261 
262 /*****************************************************************************\
263  *  NODE states and bitmaps
264  *
265  *  avail_node_bitmap       Set if node's state is not DOWN, DRAINING/DRAINED,
266  *                          FAILING or NO_RESPOND (i.e. available to run a job)
267  *  booting_node_bitmap     Set if node in process of booting
268  *  cg_node_bitmap          Set if node in completing state
269  *  future_node_bitmap      Set if node in FUTURE state
270  *  idle_node_bitmap        Set if node has no jobs allocated to it
271  *  power_node_bitmap       Set for nodes which are powered down
272  *  share_node_bitmap       Set if no jobs allocated exclusive access to
273  *                          resources on that node (cleared if --exclusive
274  *                          option specified by job or Shared=NO configured for
275  *                          the job's partition)
276  *  up_node_bitmap          Set if the node's state is not DOWN
277 \*****************************************************************************/
278 extern bitstr_t *avail_node_bitmap;	/* bitmap of available nodes,
279 					 * state not DOWN, DRAIN or FAILING */
280 extern bitstr_t *bf_ignore_node_bitmap;	/* bitmap of nodes made available during
281 					 * backfill cycle */
282 extern bitstr_t *booting_node_bitmap;	/* bitmap of booting nodes */
283 extern bitstr_t *cg_node_bitmap;	/* bitmap of completing nodes */
284 extern bitstr_t *future_node_bitmap;	/* bitmap of FUTURE nodes */
285 extern bitstr_t *idle_node_bitmap;	/* bitmap of idle nodes */
286 extern bitstr_t *power_node_bitmap;	/* Powered down nodes */
287 extern bitstr_t *share_node_bitmap;	/* bitmap of sharable nodes */
288 extern bitstr_t *up_node_bitmap;	/* bitmap of up nodes, not DOWN */
289 extern bitstr_t *rs_node_bitmap;	/* next_state=resume nodes */
290 
291 /*****************************************************************************\
292  *  FRONT_END parameters and data structures
293 \*****************************************************************************/
294 #define FRONT_END_MAGIC 0xfe9b82fe
295 
296 typedef struct {
297 	uint32_t magic;			/* magic cookie to test data integrity */
298 					/* DO NOT ALPHABETIZE */
299 	gid_t *allow_gids;		/* zero terminated list of allowed groups */
300 	char *allow_groups;		/* allowed group string */
301 	uid_t *allow_uids;		/* zero terminated list of allowed users */
302 	char *allow_users;		/* allowed user string */
303 	time_t boot_time;		/* Time of node boot,
304 					 * computed from up_time */
305 	char *comm_name;		/* communications path name to node */
306 	gid_t *deny_gids;		/* zero terminated list of denied groups */
307 	char *deny_groups;		/* denied group string */
308 	uid_t *deny_uids;		/* zero terminated list of denied users */
309 	char *deny_users;		/* denied user string */
310 	uint32_t job_cnt_comp;		/* count of completing jobs on node */
311 	uint16_t job_cnt_run;		/* count of running or suspended jobs */
312 	time_t last_response;		/* Time of last communication */
313 	char *name;			/* frontend node name */
314 	uint32_t node_state;		/* enum node_states, ORed with
315 					 * NODE_STATE_NO_RESPOND if not
316 					 * responding */
317 	bool not_responding;		/* set if fails to respond,
318 					 * clear after logging this */
319 	slurm_addr_t slurm_addr;	/* network address */
320 	uint16_t port;			/* frontend specific port */
321 	uint16_t protocol_version;	/* Slurm version number */
322 	char *reason;			/* reason for down frontend node */
323 	time_t reason_time;		/* Time stamp when reason was set,
324 					 * ignore if no reason is set. */
325 	uint32_t reason_uid;   		/* User that set the reason, ignore if
326 					 * no reason is set. */
327 	time_t slurmd_start_time;	/* Time of slurmd startup */
328 	char *version;			/* Slurm version */
329 } front_end_record_t;
330 
331 extern front_end_record_t *front_end_nodes;
332 extern uint16_t front_end_node_cnt;
333 extern time_t last_front_end_update;	/* time of last front_end update */
334 
335 /*****************************************************************************\
336  *  PARTITION parameters and data structures
337 \*****************************************************************************/
338 #define PART_MAGIC 0xaefe8495
339 
340 typedef struct {
341 	slurmdb_bf_usage_t *job_usage;
342 	slurmdb_bf_usage_t *resv_usage;
343 	xhash_t *user_usage;
344 } bf_part_data_t;
345 
346 typedef struct {
347 	uint32_t magic;		/* magic cookie to test data integrity */
348 				/* DO NOT ALPHABETIZE */
349 	char *allow_accounts;	/* comma delimited list of accounts,
350 				 * NULL indicates all */
351 	char **allow_account_array; /* NULL terminated list of allowed
352 				 * accounts */
353 	char *allow_alloc_nodes;/* comma delimited list of allowed
354 				 * allocating nodes
355 				 * NULL indicates all */
356 	char *allow_groups;	/* comma delimited list of groups,
357 				 * NULL indicates all */
358 	uid_t *allow_uids;	/* zero terminated list of allowed user IDs */
359 	char *allow_qos;	/* comma delimited list of qos,
360 				 * NULL indicates all */
361 	bitstr_t *allow_qos_bitstr; /* (DON'T PACK) assocaited with
362 				 * char *allow_qos but used internally */
363 	char *alternate; 	/* name of alternate partition */
364 	double *billing_weights;    /* array of TRES billing weights */
365 	char   *billing_weights_str;/* per TRES billing weight string */
366 	uint32_t cpu_bind;	/* default CPU binding type */
367 	uint64_t def_mem_per_cpu; /* default MB memory per allocated CPU */
368 	uint32_t default_time;	/* minutes, NO_VAL or INFINITE */
369 	char *deny_accounts;	/* comma delimited list of denied accounts */
370 	char **deny_account_array; /* NULL terminated list of denied accounts */
371 	char *deny_qos;		/* comma delimited list of denied qos */
372 	bitstr_t *deny_qos_bitstr; /* (DON'T PACK) associated with
373 				 * char *deny_qos but used internallly */
374 	uint16_t flags;		/* see PART_FLAG_* in slurm.h */
375 	uint32_t grace_time;	/* default preempt grace time in seconds */
376 	List job_defaults_list;	/* List of job_defaults_t elements */
377 	uint32_t max_cpus_per_node; /* maximum allocated CPUs per node */
378 	uint64_t max_mem_per_cpu; /* maximum MB memory per allocated CPU */
379 	uint32_t max_nodes;	/* per job or INFINITE */
380 	uint32_t max_nodes_orig;/* unscaled value (c-nodes on BlueGene) */
381 	uint16_t max_share;	/* number of jobs to gang schedule */
382 	uint32_t max_time;	/* minutes or INFINITE */
383 	uint32_t min_nodes;	/* per job */
384 	uint32_t min_nodes_orig;/* unscaled value (c-nodes on BlueGene) */
385 	char *name;		/* name of the partition */
386 	bitstr_t *node_bitmap;	/* bitmap of nodes in partition */
387 	char *nodes;		/* comma delimited list names of nodes */
388 	double   norm_priority;	/* normalized scheduling priority for
389 				 * jobs (DON'T PACK) */
390 	uint16_t over_time_limit; /* job's time limit can be exceeded by this
391 				   * number of minutes before cancellation */
392 	uint16_t preempt_mode;	/* See PREEMPT_MODE_* in slurm/slurm.h */
393 	uint16_t priority_job_factor;	/* job priority weight factor */
394 	uint16_t priority_tier;	/* tier for scheduling and preemption */
395 	char *qos_char;         /* requested QOS from slurm.conf */
396 	slurmdb_qos_rec_t *qos_ptr; /* pointer to the quality of
397 				     * service record attached to this
398 				     * partition confirm the value before use */
399 	uint16_t state_up;	/* See PARTITION_* states in slurm.h */
400 	uint32_t total_nodes;	/* total number of nodes in the partition */
401 	uint32_t total_cpus;	/* total number of cpus in the partition */
402 	uint32_t max_cpu_cnt;	/* max # of cpus on a node in the partition */
403 	uint32_t max_core_cnt;	/* max # of cores on a node in the partition */
404 	uint16_t cr_type;	/* Custom CR values for partition (if supported by select plugin) */
405 	uint64_t *tres_cnt;	/* array of total TRES in partition. NO_PACK */
406 	char     *tres_fmt_str;	/* str of configured TRES in partition */
407 	bf_part_data_t *bf_data;/* backfill data, NO PACK */
408 } part_record_t;
409 
410 extern List part_list;			/* list of part_record entries */
411 extern time_t last_part_update;		/* time of last part_list update */
412 extern part_record_t default_part;	/* default configuration values */
413 extern char *default_part_name;		/* name of default partition */
414 extern part_record_t *default_part_loc;	/* default partition ptr */
415 
416 #define DEF_PART_MAX_PRIORITY   1
417 extern uint16_t part_max_priority;      /* max priority_job_factor in all parts */
418 
419 /*****************************************************************************\
420  *  RESERVATION parameters and data structures
421 \*****************************************************************************/
422 
423 typedef struct slurmctld_resv {
424 	uint16_t magic;		/* magic cookie, RESV_MAGIC		*/
425 				/* DO NOT ALPHABETIZE			*/
426 	char *accounts;		/* names of accounts permitted to use	*/
427 	int account_cnt;	/* count of accounts permitted to use	*/
428 	char **account_list;	/* list of accounts permitted to use	*/
429 	bool account_not;	/* account_list users NOT permitted to use */
430 	char *assoc_list;	/* list of associations			*/
431 	uint32_t boot_time;	/* time it would take to reboot a node	*/
432 	char *burst_buffer;	/* burst buffer resources		*/
433 	bitstr_t *core_bitmap;	/* bitmap of reserved cores		*/
434 	uint32_t core_cnt;	/* number of reserved cores		*/
435 	job_resources_t *core_resrcs;	/* details of allocated cores	*/
436 	uint32_t duration;	/* time in seconds for this
437 				 * reservation to last                  */
438 	time_t end_time;	/* end time of reservation		*/
439 	time_t idle_start_time;	/* first time when reservation had no jobs
440 				 * running on it */
441 	char *features;		/* required node features		*/
442 	uint64_t flags;		/* see RESERVE_FLAG_* in slurm.h	*/
443 	bool full_nodes;	/* when reservation uses full nodes or not */
444 	uint32_t job_pend_cnt;	/* number of pending jobs		*/
445 	uint32_t job_run_cnt;	/* number of running jobs		*/
446 	List license_list;	/* structure with license info		*/
447 	char *licenses;		/* required system licenses		*/
448 	bool flags_set_node;	/* flags (i.e. NODE_STATE_MAINT |
449 				 * NODE_STATE_RES) set for nodes	*/
450 	uint32_t max_start_delay;/* Maximum delay in which jobs outside of the
451 				  * reservation will be permitted to overlap
452 				  * once any jobs are queued for the
453 				  * reservation */
454 	char *name;		/* name of reservation			*/
455 	bitstr_t *node_bitmap;	/* bitmap of reserved nodes		*/
456 	uint32_t node_cnt;	/* count of nodes required		*/
457 	char *node_list;	/* list of reserved nodes or ALL	*/
458 	char *partition;	/* name of partition to be used		*/
459 	part_record_t *part_ptr;/* pointer to partition used		*/
460 	uint32_t purge_comp_time; /* If PURGE_COMP flag is set the amount of
461 				   * minutes this reservation will sit idle
462 				   * until it is revoked.
463 				   */
464 	uint32_t resv_id;	/* unique reservation ID, internal use	*/
465 	uint32_t resv_watts;	/* amount of power to reserve */
466 	bool run_epilog;	/* set if epilog has been executed	*/
467 	bool run_prolog;	/* set if prolog has been executed	*/
468 	time_t start_time;	/* start time of reservation		*/
469 	time_t start_time_first;/* when the reservation first started	*/
470 	time_t start_time_prev;	/* If start time was changed this is
471 				 * the pervious start time.  Needed
472 				 * for accounting */
473 	char *tres_fmt_str;     /* formatted string of tres to deal with */
474 	char *tres_str;         /* simple string of tres to deal with */
475 	char *users;		/* names of users permitted to use	*/
476 	int user_cnt;		/* count of users permitted to use	*/
477 	uid_t *user_list;	/* array of users permitted to use	*/
478 	bool user_not;		/* user_list users NOT permitted to use	*/
479 } slurmctld_resv_t;
480 
481 extern List resv_list;		/* list of slurmctld_resv entries */
482 extern time_t last_resv_update;	/* time of last resv_list update */
483 
484 /*****************************************************************************\
485  *  JOB parameters and data structures
486 \*****************************************************************************/
487 extern time_t last_job_update;	/* time of last update to job records */
488 
489 #define DETAILS_MAGIC	0xdea84e7
490 #define JOB_MAGIC	0xf0b7392c
491 
492 #define FEATURE_OP_OR   0
493 #define FEATURE_OP_AND  1
494 #define FEATURE_OP_XOR  2
495 #define FEATURE_OP_XAND 3
496 #define FEATURE_OP_END  4		/* last entry lacks separator */
497 typedef struct job_feature {
498 	char *name;			/* name of feature */
499 	bool changeable;		/* return value of
500 					 * node_features_g_changeable_feature */
501 	uint16_t count;			/* count of nodes with this feature */
502 	uint8_t op_code;		/* separator, see FEATURE_OP_ above */
503 	bitstr_t *node_bitmap_active;	/* nodes with this feature active */
504 	bitstr_t *node_bitmap_avail;	/* nodes with this feature available */
505 	uint16_t paren;			/* count of enclosing parenthesis */
506 } job_feature_t;
507 
508 /*
509  * these related to the JOB_SHARED_ macros in slurm.h
510  * but with the logic for zero vs one inverted
511  */
512 #define WHOLE_NODE_REQUIRED	0x01
513 #define WHOLE_NODE_USER		0x02
514 #define WHOLE_NODE_MCS		0x03
515 
516 /* job_details - specification of a job's constraints,
517  * can be purged after initiation */
518 struct job_details {
519 	uint32_t magic;			/* magic cookie for data integrity */
520 					/* DO NOT ALPHABETIZE */
521 	char *acctg_freq;		/* accounting polling interval */
522 	time_t accrue_time;             /* Time when we start accruing time for
523 					 * priority, */
524 	uint32_t argc;			/* count of argv elements */
525 	char **argv;			/* arguments for a batch job script */
526 	time_t begin_time;		/* start at this time (srun --begin),
527 					 * resets to time first eligible
528 					 * (all dependencies satisfied) */
529 	char *cluster_features;		/* required cluster_features */
530 	uint16_t contiguous;		/* set if requires contiguous nodes */
531 	uint16_t core_spec;		/* specialized core/thread count,
532 					 * threads if CORE_SPEC_THREAD flag set */
533 	char *cpu_bind;			/* binding map for map/mask_cpu - This
534 					 * currently does not matter to the
535 					 * job allocation, setting this does
536 					 * not do anything for steps. */
537 	uint16_t cpu_bind_type;		/* Default CPU bind type for steps,
538 					 * see cpu_bind_type_t */
539 	uint32_t cpu_freq_min;  	/* Minimum cpu frequency  */
540 	uint32_t cpu_freq_max;  	/* Maximum cpu frequency  */
541 	uint32_t cpu_freq_gov;  	/* cpu frequency governor */
542 	uint16_t cpus_per_task;		/* number of processors required for
543 					 * each task */
544 	uint16_t orig_cpus_per_task;	/* requested value of cpus_per_task */
545 	List depend_list;		/* list of job_ptr:state pairs */
546 	char *dependency;		/* wait for other jobs */
547 	char *orig_dependency;		/* original value (for archiving) */
548 	uint16_t env_cnt;		/* size of env_sup (see below) */
549 	char **env_sup;			/* supplemental environment variables */
550 	bitstr_t *exc_node_bitmap;	/* bitmap of excluded nodes */
551 	char *exc_nodes;		/* excluded nodes */
552 	uint32_t expanding_jobid;	/* ID of job to be expanded */
553 	char *extra;			/* extra field, unused */
554 	List feature_list;		/* required features with node counts */
555 	char *features;			/* required features */
556 	uint32_t max_cpus;		/* maximum number of cpus */
557 	uint32_t orig_max_cpus;		/* requested value of max_cpus */
558 	uint32_t max_nodes;		/* maximum number of nodes */
559 	multi_core_data_t *mc_ptr;	/* multi-core specific data */
560 	char *mem_bind;			/* binding map for map/mask_cpu */
561 	uint16_t mem_bind_type;		/* see mem_bind_type_t */
562 	uint32_t min_cpus;		/* minimum number of cpus */
563 	uint32_t orig_min_cpus;		/* requested value of min_cpus */
564 	int min_gres_cpu;		/* Minimum CPU count per node required
565 					 * to satisfy GRES requirements,
566 					 * not saved/restored, but rebuilt */
567 	uint32_t min_nodes;		/* minimum number of nodes */
568 	uint32_t nice;			/* requested priority change,
569 					 * NICE_OFFSET == no change */
570 	uint16_t ntasks_per_node;	/* number of tasks on each node */
571 	uint32_t num_tasks;		/* number of tasks to start */
572 	uint8_t open_mode;		/* stdout/err append or truncate */
573 	uint8_t overcommit;		/* processors being over subscribed */
574 	uint16_t plane_size;		/* plane size when task_dist =
575 					 * SLURM_DIST_PLANE */
576 	/* job constraints: */
577 	uint32_t pn_min_cpus;		/* minimum processors per node */
578 	uint32_t orig_pn_min_cpus;	/* requested value of pn_min_cpus */
579 	uint64_t pn_min_memory;		/* minimum memory per node (MB) OR
580 					 * memory per allocated
581 					 * CPU | MEM_PER_CPU */
582 	uint64_t orig_pn_min_memory;	/* requested value of pn_min_memory */
583 	uint32_t pn_min_tmp_disk;	/* minimum tempdisk per node, MB */
584 	uint8_t prolog_running;		/* set while prolog_slurmctld is
585 					 * running */
586 	uint32_t reserved_resources;	/* CPU minutes of resources reserved
587 					 * for this job while it was pending */
588 	bitstr_t *req_node_bitmap;	/* bitmap of required nodes */
589 	time_t preempt_start_time;	/* time that preeption began to start
590 					 * this job */
591 	char *req_nodes;		/* required nodes */
592 	uint16_t requeue;		/* controls ability requeue job */
593 	uint8_t share_res;		/* set if job can share resources with
594 					 * other jobs */
595 	char *std_err;			/* pathname of job's stderr file */
596 	char *std_in;			/* pathname of job's stdin file */
597 	char *std_out;			/* pathname of job's stdout file */
598 	time_t submit_time;		/* time of submission */
599 	uint32_t task_dist;		/* task layout for this job. Only
600 					 * useful when Consumable Resources
601 					 * is enabled */
602 	uint32_t usable_nodes;		/* node count needed by preemption */
603 	uint8_t whole_node;		/* WHOLE_NODE_REQUIRED: 1: --exclusive
604 					 * WHOLE_NODE_USER: 2: --exclusive=user
605 					 * WHOLE_NODE_MCS:  3: --exclusive=mcs */
606 	char *work_dir;			/* pathname of working directory */
607 	uint16_t x11;			/* --x11 flags */
608 	char *x11_magic_cookie;		/* x11 magic cookie */
609 	char *x11_target;		/* target host, or socket if port == 0 */
610 	uint16_t x11_target_port;	/* target TCP port on alloc_node */
611 };
612 
613 typedef struct job_array_struct {
614 	uint32_t task_cnt;		/* count of remaining task IDs */
615 	bitstr_t *task_id_bitmap;	/* bitmap of remaining task IDs */
616 	char *task_id_str;		/* string describing remaining task IDs,
617 					 * needs to be recalculated if NULL */
618 	uint32_t array_flags;		/* Flags to control behavior (FUTURE) */
619 	uint32_t max_run_tasks;		/* Maximum number of running tasks */
620 	uint32_t tot_run_tasks;		/* Current running task count */
621 	uint32_t min_exit_code;		/* Minimum exit code from any task */
622 	uint32_t max_exit_code;		/* Maximum exit code from any task */
623 	uint32_t pend_run_tasks;	/* Number of tasks ready to run due to
624 					 * preempting other jobs */
625 	uint32_t tot_comp_tasks;	/* Completed task count */
626 } job_array_struct_t;
627 
628 #define ADMIN_SET_LIMIT 0xffff
629 
630 typedef struct {
631 	uint16_t qos;
632 	uint16_t time;
633 	uint16_t *tres;
634 } acct_policy_limit_set_t;
635 
636 typedef struct {
637 	uint32_t cluster_lock;		/* sibling that has lock on job */
638 	char    *origin_str;		/* origin cluster name */
639 	uint64_t siblings_active;	/* bitmap of active sibling ids. */
640 	char    *siblings_active_str;	/* comma separated list of actual
641 					   sibling names */
642 	uint64_t siblings_viable;	/* bitmap of viable sibling ids. */
643 	char    *siblings_viable_str;	/* comma separated list of viable
644 					   sibling names */
645 } job_fed_details_t;
646 
647 #define HETJOB_PRIO_MIN	0x0001	/* Sort by mininum component priority[tier] */
648 #define HETJOB_PRIO_MAX	0x0002	/* Sort by maximum component priority[tier] */
649 #define HETJOB_PRIO_AVG	0x0004	/* Sort by average component priority[tier] */
650 
651 typedef struct {
652 	bool any_resv;			/* at least one component with resv */
653 	uint32_t priority_tier;		/* whole hetjob calculated tier */
654 	uint32_t priority;		/* whole hetjob calculated priority */
655 } het_job_details_t;
656 
657 /*
658  * NOTE: When adding fields to the job_record, or any underlying structures,
659  * be sure to sync with job_array_split.
660  */
661 typedef struct job_record job_record_t;
662 struct job_record {
663 	uint32_t magic;			/* magic cookie for data integrity */
664 					/* DO NOT ALPHABETIZE */
665 	char    *account;		/* account number to charge */
666 	char    *admin_comment;		/* administrator's arbitrary comment */
667 	char	*alias_list;		/* node name to address aliases */
668 	char    *alloc_node;		/* local node making resource alloc */
669 	uint16_t alloc_resp_port;	/* RESPONSE_RESOURCE_ALLOCATION port */
670 	uint32_t alloc_sid;		/* local sid making resource alloc */
671 	uint32_t array_job_id;		/* job_id of a job array or 0 if N/A */
672 	uint32_t array_task_id;		/* task_id of a job array */
673 	job_array_struct_t *array_recs;	/* job array details,
674 					 * only in meta-job record */
675 	uint32_t assoc_id;              /* used for accounting plugins */
676 	slurmdb_assoc_rec_t *assoc_ptr; /* job's assoc record ptr confirm the
677 					 * value before use */
678 	char *batch_features;		/* features required for batch script */
679 	uint16_t batch_flag;		/* 1 or 2 if batch job (with script),
680 					 * 2 indicates retry mode (one retry) */
681 	char *batch_host;		/* host executing batch script */
682 	double billable_tres;		/* calculated billable tres for the
683 					 * job, as defined by the partition's
684 					 * billing weight. Recalculated upon job
685 					 * resize.  Cannot be calculated until
686 					 * the job is alloocated resources. */
687 	uint32_t bit_flags;             /* various job flags */
688 	char *burst_buffer;		/* burst buffer specification */
689 	char *burst_buffer_state;	/* burst buffer state */
690 	char *clusters;			/* clusters job is submitted to with -M
691 					   option */
692 	char *comment;			/* arbitrary comment */
693 	uint32_t cpu_cnt;		/* current count of CPUs held
694 					 * by the job, decremented while job is
695 					 * completing */
696 	char *cpus_per_tres;		/* semicolon delimited list of TRES=# values */
697 	uint16_t cr_enabled;            /* specify if Consumable Resources
698 					 * is enabled. Needed since CR deals
699 					 * with a finer granularity in its
700 					 * node/cpu scheduling (available cpus
701 					 * instead of available nodes) than the
702 					 * linear plugin
703 					 * 0 if cr is NOT enabled,
704 					 * 1 if cr is enabled */
705 	uint32_t db_flags;              /* Flags to send to the database
706 					 * record */
707 	uint64_t db_index;              /* used only for database plugins */
708 	time_t deadline;		/* deadline */
709 	uint32_t delay_boot;		/* Delay boot for desired node mode */
710 	uint32_t derived_ec;		/* highest exit code of all job steps */
711 	struct job_details *details;	/* job details */
712 	uint16_t direct_set_prio;	/* Priority set directly if
713 					 * set the system will not
714 					 * change the priority any further. */
715 	time_t end_time;		/* time execution ended, actual or
716 					 * expected. if terminated from suspend
717 					 * state, this is time suspend began */
718 	time_t end_time_exp;		/* when we believe the job is
719 					   going to end. */
720 	bool epilog_running;		/* true of EpilogSlurmctld is running */
721 	uint32_t exit_code;		/* exit code for job (status from
722 					 * wait call) */
723 	job_fed_details_t *fed_details;	/* details for federated jobs. */
724 	front_end_record_t *front_end_ptr; /* Pointer to front-end node running
725 					 * this job */
726 	List gres_list;			/* generic resource allocation detail */
727 	char *gres_alloc;		/* Allocated GRES added over all nodes
728 					 * to be passed to slurmdbd */
729 	uint32_t gres_detail_cnt;	/* Count of gres_detail_str records,
730 					 * one per allocated node */
731 	char **gres_detail_str;		/* Details of GRES index alloc per node */
732 	char *gres_req;			/* Requested GRES added over all nodes
733 					 * to be passed to slurmdbd */
734 	char *gres_used;		/* Actual GRES use added over all nodes
735 					 * to be passed to slurmdbd */
736 	uint32_t group_id;		/* group submitted under */
737 	het_job_details_t *het_details;	/* HetJob details */
738 	uint32_t het_job_id;		/* job ID of HetJob leader */
739 	char *het_job_id_set;		/* job IDs for all components */
740 	uint32_t het_job_offset;	/* HetJob component index */
741 	List het_job_list;		/* List of job pointers to all
742 					 * components */
743 	uint32_t job_id;		/* job ID */
744 	job_record_t *job_next;		/* next entry with same hash index */
745 	job_record_t *job_array_next_j;	/* job array linked list by job_id */
746 	job_record_t *job_array_next_t;	/* job array linked list by task_id */
747 	job_record_t *job_preempt_comp; /* het job preempt component */
748 	job_resources_t *job_resrcs;	/* details of allocated cores */
749 	uint32_t job_state;		/* state of the job */
750 	uint16_t kill_on_node_fail;	/* 1 if job should be killed on
751 					 * node failure */
752 	time_t last_sched_eval;		/* last time job was evaluated for scheduling */
753 	char *licenses;			/* licenses required by the job */
754 	List license_list;		/* structure with license info */
755 	acct_policy_limit_set_t limit_set; /* flags if indicate an
756 					    * associated limit was set from
757 					    * a limit instead of from
758 					    * the request, or if the
759 					    * limit was set from admin */
760 	uint16_t mail_type;		/* see MAIL_JOB_* in slurm.h */
761 	char *mail_user;		/* user to get e-mail notification */
762 	char *mem_per_tres;		/* semicolon delimited list of TRES=# values */
763 	char *mcs_label;		/* mcs_label if mcs plugin in use */
764 	char *name;			/* name of the job */
765 	char *network;			/* network/switch requirement spec */
766 	uint32_t next_step_id;		/* next step id to be used */
767 	char *nodes;			/* list of nodes allocated to job */
768 	slurm_addr_t *node_addr;	/* addresses of the nodes allocated to
769 					 * job */
770 	bitstr_t *node_bitmap;		/* bitmap of nodes allocated to job */
771 	bitstr_t *node_bitmap_cg;	/* bitmap of nodes completing job */
772 	uint32_t node_cnt;		/* count of nodes currently
773 					 * allocated to job */
774 	uint32_t node_cnt_wag;		/* count of nodes Slurm thinks
775 					 * will be allocated when the
776 					 * job is pending and node_cnt
777 					 * wasn't given by the user.
778 					 * This is packed in total_nodes
779 					 * when dumping state.  When
780 					 * state is read in check for
781 					 * pending state and set this
782 					 * instead of total_nodes */
783 	char *nodes_completing;		/* nodes still in completing state
784 					 * for this job, used to ensure
785 					 * epilog is not re-run for job */
786 	char *origin_cluster;		/* cluster name that the job was
787 					 * submitted from */
788 	uint16_t other_port;		/* port for client communications */
789 	char *partition;		/* name of job partition(s) */
790 	List part_ptr_list;		/* list of pointers to partition recs */
791 	bool part_nodes_missing;	/* set if job's nodes removed from this
792 					 * partition */
793 	part_record_t *part_ptr;	/* pointer to the partition record */
794 	uint8_t power_flags;		/* power management flags,
795 					 * see SLURM_POWER_FLAGS_ */
796 	time_t pre_sus_time;		/* time job ran prior to last suspend */
797 	time_t preempt_time;		/* job preemption signal time */
798 	bool preempt_in_progress;	/* Premption of other jobs in progress
799 					 * in order to start this job,
800 					 * (Internal use only, don't save) */
801 	uint32_t prep_epilog_cnt;	/* count of epilog async tasks left */
802 	uint32_t prep_prolog_cnt;	/* count of prolog async tasks left */
803 	bool prep_prolog_failed;	/* any prolog_slurmctld failed */
804 	uint32_t priority;		/* relative priority of the job,
805 					 * zero == held (don't initiate) */
806 	uint32_t *priority_array;	/* partition based priority */
807 	priority_factors_object_t *prio_factors; /* cached value used
808 						  * by sprio command */
809 	uint32_t profile;		/* Acct_gather_profile option */
810 	uint32_t qos_id;		/* quality of service id */
811 	slurmdb_qos_rec_t *qos_ptr;	/* pointer to the quality of
812 					 * service record used for
813 					 * this job, confirm the
814 					 * value before use */
815 	void *qos_blocking_ptr;		/* internal use only, DON'T PACK */
816 	uint8_t reboot;			/* node reboot requested before start */
817 	uint16_t restart_cnt;		/* count of restarts */
818 	time_t resize_time;		/* time of latest size change */
819 	uint32_t resv_id;		/* reservation ID */
820 	char *resv_name;		/* reservation name */
821 	struct slurmctld_resv *resv_ptr;/* reservation structure pointer */
822 	uint32_t requid;	    	/* requester user ID */
823 	char *resp_host;		/* host for srun communications */
824 	char *sched_nodes;		/* list of nodes scheduled for job */
825 	dynamic_plugin_data_t *select_jobinfo;/* opaque data, BlueGene */
826 	uint32_t site_factor;		/* factor to consider in priority */
827 	char **spank_job_env;		/* environment variables for job prolog
828 					 * and epilog scripts as set by SPANK
829 					 * plugins */
830 	uint32_t spank_job_env_size;	/* element count in spank_env */
831 	uint16_t start_protocol_ver;	/* Slurm version job was
832 					 * started with either the
833 					 * creating message or the
834 					 * lowest slurmd in the
835 					 * allocation */
836 	time_t start_time;		/* time execution begins,
837 					 * actual or expected */
838 	char *state_desc;		/* optional details for state_reason */
839 	uint32_t state_reason;		/* reason job still pending or failed
840 					 * see slurm.h:enum job_state_reason */
841 	uint32_t state_reason_prev;	/* Previous state_reason, needed to
842 					 * return valid job information during
843 					 * scheduling cycle (state_reason is
844 					 * cleared at start of cycle) */
845 	uint32_t state_reason_prev_db;	/* Previous state_reason that isn't
846 					 * priority or resources, only stored in
847 					 * the database. */
848 	List step_list;			/* list of job's steps */
849 	time_t suspend_time;		/* time job last suspended or resumed */
850 	char *system_comment;		/* slurmctld's arbitrary comment */
851 	time_t time_last_active;	/* time of last job activity */
852 	uint32_t time_limit;		/* time_limit minutes or INFINITE,
853 					 * NO_VAL implies partition max_time */
854 	uint32_t time_min;		/* minimum time_limit minutes or
855 					 * INFINITE,
856 					 * zero implies same as time_limit */
857 	time_t tot_sus_time;		/* total time in suspend state */
858 	uint32_t total_cpus;		/* number of allocated cpus,
859 					 * for accounting */
860 	uint32_t total_nodes;		/* number of allocated nodes
861 					 * for accounting */
862 	char *tres_bind;		/* Task to TRES binding directives */
863 	char *tres_freq;		/* TRES frequency directives */
864 	char *tres_per_job;		/* comma delimited list of TRES values */
865 	char *tres_per_node;		/* comma delimited list of TRES values */
866 	char *tres_per_socket;		/* comma delimited list of TRES values */
867 	char *tres_per_task;		/* comma delimited list of TRES values */
868 	uint64_t *tres_req_cnt;         /* array of tres counts requested
869 					 * based off g_tres_count in
870 					 * assoc_mgr */
871 	char *tres_req_str;             /* string format of
872 					 * tres_req_cnt primarily
873 					 * used for state */
874 	char *tres_fmt_req_str;         /* formatted req tres string for job */
875 	uint64_t *tres_alloc_cnt;       /* array of tres counts allocated
876 					 * based off g_tres_count in
877 					 * assoc_mgr */
878 	char *tres_alloc_str;           /* simple tres string for job */
879 	char *tres_fmt_alloc_str;       /* formatted tres string for job */
880 	uint32_t user_id;		/* user the job runs as */
881 	char *user_name;		/* string version of user */
882 	uint16_t wait_all_nodes;	/* if set, wait for all nodes to boot
883 					 * before starting the job */
884 	uint16_t warn_flags;		/* flags for signal to send */
885 	uint16_t warn_signal;		/* signal to send before end_time */
886 	uint16_t warn_time;		/* when to send signal before
887 					 * end_time (secs) */
888 	char *wckey;			/* optional wckey */
889 
890 	/* Request number of switches support */
891 	uint32_t req_switch;  /* Minimum number of switches                */
892 	uint32_t wait4switch; /* Maximum time to wait for minimum switches */
893 	bool     best_switch; /* true=min number of switches met           */
894 	time_t wait4switch_start; /* Time started waiting for switch       */
895 };
896 
897 /* Job dependency specification, used in "depend_list" within job_record */
898 typedef enum {
899 	SLURM_DEPEND_AFTER = 1,	/* After job begins */
900 	SLURM_DEPEND_AFTER_ANY,	/* After job completes */
901 	SLURM_DEPEND_AFTER_NOT_OK, /* After job fails */
902 	SLURM_DEPEND_AFTER_OK,	/* After job completes successfully */
903 	SLURM_DEPEND_SINGLETON,	/* Only one job for this
904 				 * user/name at a time */
905 	SLURM_DEPEND_EXPAND,	/* Expand running job */
906 	SLURM_DEPEND_AFTER_CORRESPOND, /* After corresponding job array
907 					* elements completes */
908 	SLURM_DEPEND_BURST_BUFFER, /* After job burst buffer
909 				    * stage-out completes */
910 } slurm_depend_types_t;
911 
912 #define SLURM_FLAGS_OR		0x0001	/* OR job dependencies */
913 #define SLURM_FLAGS_REMOTE      0x0002  /* Is a remote dependency */
914 
915 /* Used as values for depend_state in depend_spec_t */
916 enum {
917 	DEPEND_NOT_FULFILLED = 0,
918 	DEPEND_FULFILLED,
919 	DEPEND_FAILED
920 };
921 
922 typedef struct depend_spec {
923 	uint32_t	array_task_id;	/* INFINITE for all array tasks */
924 	uint16_t	depend_type;	/* SLURM_DEPEND_* type */
925 	uint16_t	depend_flags;	/* SLURM_FLAGS_* type */
926 	uint32_t        depend_state;   /* Status of the dependency */
927 	uint32_t        depend_time;    /* time to wait (mins) */
928 	uint32_t	job_id;		/* Slurm job_id */
929 	job_record_t   *job_ptr;	/* pointer to this job */
930 	uint64_t 	singleton_bits; /* which clusters have satisfied the
931 					   singleton dependency */
932 } depend_spec_t;
933 
934 #define STEP_FLAG 0xbbbb
935 #define STEP_MAGIC 0xcafecafe
936 
937 typedef struct {
938 	uint32_t magic;			/* magic cookie to test data integrity */
939 					/* DO NOT ALPHABETIZE */
940 	uint16_t batch_step;		/* 1 if batch job step, 0 otherwise */
941 	bitstr_t *core_bitmap_job;	/* bitmap of cores allocated to this
942 					 * step relative to job's nodes,
943 					 * see src/common/job_resources.h */
944 	uint32_t cpu_count;		/* count of step's CPUs */
945 	uint32_t cpu_freq_min; 		/* Minimum cpu frequency  */
946 	uint32_t cpu_freq_max; 		/* Maximum cpu frequency  */
947 	uint32_t cpu_freq_gov; 		/* cpu frequency governor */
948 	uint16_t cpus_per_task;		/* cpus per task initiated */
949 	char *cpus_per_tres;		/* semicolon delimited list of TRES=# values */
950 	uint16_t cyclic_alloc;		/* set for cyclic task allocation
951 					 * across nodes */
952 	uint16_t exclusive;		/* dedicated resources for the step */
953 	uint32_t exit_code;		/* highest exit code from any task */
954 	bitstr_t *exit_node_bitmap;	/* bitmap of exited nodes */
955 	ext_sensors_data_t *ext_sensors; /* external sensors plugin data */
956 	List gres_list;			/* generic resource allocation detail */
957 	char *host;			/* host for srun communications */
958 	job_record_t *job_ptr;		/* ptr to the job that owns the step */
959 	jobacctinfo_t *jobacct;         /* keep track of process info in the
960 					 * step */
961 	char *mem_per_tres;		/* semicolon delimited list of TRES=# values */
962 	char *name;			/* name of job step */
963 	char *network;			/* step's network specification */
964 	uint8_t no_kill;		/* 1 if no kill on node failure */
965 	uint64_t pn_min_memory;		/* minimum real memory per node OR
966 					 * real memory per CPU | MEM_PER_CPU,
967 					 * default=0 (use job limit) */
968 	uint16_t port;			/* port for srun communications */
969 	time_t pre_sus_time;		/* time step ran prior to last suspend */
970 	uint16_t start_protocol_ver;	/* Slurm version step was
971 					 * started with either srun
972 					 * or the lowest slurmd
973 					 * version it is talking to */
974 	int *resv_port_array;		/* reserved port indexes */
975 	uint16_t resv_port_cnt;		/* count of ports reserved per node */
976 	char *resv_ports;		/* ports reserved for job */
977 	uint32_t requid;	    	/* requester user ID */
978 	time_t start_time;		/* step allocation start time */
979 	uint32_t time_limit;	  	/* step allocation time limit */
980 	dynamic_plugin_data_t *select_jobinfo;/* opaque data, BlueGene */
981 	uint32_t srun_pid;		/* PID of srun (also see host/port) */
982 	uint32_t state;			/* state of the step. See job_states */
983 	uint32_t step_id;		/* step number */
984 	slurm_step_layout_t *step_layout;/* info about how tasks are laid out
985 					  * in the step */
986 	bitstr_t *step_node_bitmap;	/* bitmap of nodes allocated to job
987 					 * step */
988 /*	time_t suspend_time;		 * time step last suspended or resumed
989 					 * implicitly the same as suspend_time
990 					 * in the job record */
991 	dynamic_plugin_data_t *switch_job; /* switch context, opaque */
992 	time_t time_last_active;	/* time step was last found on node */
993 	time_t tot_sus_time;		/* total time in suspended state */
994 	char *tres_alloc_str;           /* simple TRES string for step */
995 	char *tres_bind;		/* Task to TRES binding directives */
996 	char *tres_fmt_alloc_str;       /* formatted tres string for step */
997 	char *tres_freq;		/* TRES frequency directives */
998 	char *tres_per_step;		/* semicolon delimited list of TRES=# values */
999 	char *tres_per_node;		/* semicolon delimited list of TRES=# values */
1000 	char *tres_per_socket;		/* semicolon delimited list of TRES=# values */
1001 	char *tres_per_task;		/* semicolon delimited list of TRES=# values */
1002 } step_record_t;
1003 
1004 typedef struct {
1005 	job_record_t *job_ptr;
1006 	List job_queue;
1007 	part_record_t *part_ptr;
1008 	uint32_t prio;
1009 	slurmctld_resv_t *resv_ptr;
1010 } job_queue_req_t;
1011 
1012 extern List job_list;			/* list of job_record entries */
1013 extern List purge_files_list;		/* list of job ids to purge files of */
1014 
1015 /*****************************************************************************\
1016  *  Consumable Resources parameters and data structures
1017 \*****************************************************************************/
1018 
1019 /*
1020  * Define the type of update and of data retrieval that can happen
1021  * from the "select/cons_res" plugin. This information needed to
1022  * support processors as consumable resources.  This structure will be
1023  * useful when updating other types of consumable resources as well
1024 */
1025 enum select_plugindata_info {
1026 	SELECT_CR_PLUGIN,    /* data-> uint32 See SELECT_TYPE_* below */
1027 	SELECT_BITMAP,       /* Unused since version 2.0 */
1028 	SELECT_ALLOC_CPUS,   /* data-> uint16 alloc cpus (CR support) */
1029 	SELECT_ALLOC_LPS,    /* data-> uint32 alloc lps  (CR support) */
1030 	SELECT_AVAIL_MEMORY, /* data-> uint64 avail mem  (CR support) */
1031 	SELECT_STATIC_PART,  /* data-> uint16, 1 if static partitioning
1032 			      * BlueGene support */
1033 	SELECT_CONFIG_INFO,  /* data-> List get .conf info from select
1034 			      * plugin */
1035 	SELECT_SINGLE_JOB_TEST	/* data-> uint16 1 if one select_g_job_test()
1036 				 * call per job, node weights in node data
1037 				 * structure, 0 otherwise, for cons_tres */
1038 };
1039 #define SELECT_TYPE_CONS_RES	1
1040 #define SELECT_TYPE_CONS_TRES	2
1041 
1042 
1043 /*****************************************************************************\
1044  *  Global assoc_cache variables
1045 \*****************************************************************************/
1046 
1047 /* flag to let us know if we are running on cache or from the actual
1048  * database */
1049 extern uint16_t running_cache;
1050 /* mutex and signal to let us know if associations have been reset so we need to
1051  * redo all the pointers to the associations */
1052 extern pthread_mutex_t assoc_cache_mutex; /* assoc cache mutex */
1053 extern pthread_cond_t assoc_cache_cond; /* assoc cache condition */
1054 
1055 /*****************************************************************************\
1056  *  Global slurmctld functions
1057 \*****************************************************************************/
1058 
1059 /*
1060  * abort_job_on_node - Kill the specific job_id on a specific node,
1061  *	the request is not processed immediately, but queued.
1062  *	This is to prevent a flood of pthreads if slurmctld restarts
1063  *	without saved state and slurmd daemons register with a
1064  *	multitude of running jobs. Slurmctld will not recognize
1065  *	these jobs and use this function to kill them - one
1066  *	agent request per node as they register.
1067  * IN job_id - id of the job to be killed
1068  * IN job_ptr - pointer to terminating job (NULL if unknown, e.g. orphaned)
1069  * IN node_name - name of the node on which the job resides
1070  */
1071 extern void abort_job_on_node(uint32_t job_id, job_record_t *job_ptr,
1072 			      char *node_name);
1073 
1074 /*
1075  * abort_job_on_nodes - Kill the specific job_on the specific nodes,
1076  *	the request is not processed immediately, but queued.
1077  *	This is to prevent a flood of pthreads if slurmctld restarts
1078  *	without saved state and slurmd daemons register with a
1079  *	multitude of running jobs. Slurmctld will not recognize
1080  *	these jobs and use this function to kill them - one
1081  *	agent request per node as they register.
1082  * IN job_ptr - pointer to terminating job
1083  * IN node_name - name of the node on which the job resides
1084  */
1085 extern void abort_job_on_nodes(job_record_t *job_ptr, bitstr_t *node_bitmap);
1086 
1087 /*
1088  * If a job has a FAIL_ACCOUNT or FAIL_QOS start_reason check and set pointers
1089  * if they are now valid.
1090  */
1091 extern void set_job_failed_assoc_qos_ptr(job_record_t *job_ptr);
1092 
1093 /* set the tres_req_str and tres_req_fmt_str for the job.  assoc_mgr_locked
1094  * is set if the assoc_mgr read lock is already set.
1095  */
1096 extern void set_job_tres_req_str(job_record_t *job_ptr, bool assoc_mgr_locked);
1097 
1098 /* set the tres_alloc_str and tres_alloc_fmt_str for the job.  assoc_mgr_locked
1099  * is set if the assoc_mgr read lock is already set.
1100  */
1101 extern void set_job_tres_alloc_str(job_record_t *job_ptr,
1102 				   bool assoc_mgr_locked);
1103 
1104 /* Note that the backup slurmctld has assumed primary control.
1105  * This function can be called multiple times. */
1106 extern void backup_slurmctld_restart(void);
1107 
1108 /* Complete a batch job requeue logic after all steps complete so that
1109  * subsequent jobs appear in a separate accounting record. */
1110 extern void batch_requeue_fini(job_record_t *job_ptr);
1111 
1112 /* Build a bitmap of nodes completing this job */
1113 extern void build_cg_bitmap(job_record_t *job_ptr);
1114 
1115 /* Build structure with job allocation details */
1116 extern resource_allocation_response_msg_t *build_job_info_resp(
1117 	job_record_t *job_ptr);
1118 
1119 /*
1120  * create_part_record - create a partition record
1121  * IN name - name will be xstrdup()'d into the part_record
1122  * RET a pointer to the record or NULL if error
1123  * global: default_part - default partition parameters
1124  *         part_list - global partition list
1125  * NOTE: the record's values are initialized to those of default_part
1126  * NOTE: allocates memory that should be xfreed with delete_part_record
1127  */
1128 extern part_record_t *create_part_record(const char *name);
1129 
1130 /*
1131  * build_part_bitmap - update the total_cpus, total_nodes, and node_bitmap
1132  *	for the specified partition, also reset the partition pointers in
1133  *	the node back to this partition.
1134  * IN part_ptr - pointer to the partition
1135  * RET 0 if no error, errno otherwise
1136  * global: node_record_table_ptr - pointer to global node table
1137  * NOTE: this does not report nodes defined in more than one partition. this
1138  *	is checked only upon reading the configuration file, not on an update
1139  */
1140 extern int build_part_bitmap(part_record_t *part_ptr);
1141 
1142 /*
1143  * job_limits_check - check the limits specified for the job.
1144  * IN job_ptr - pointer to job table entry.
1145  * IN check_min_time - if true test job's minimum time limit,
1146  *		otherwise test maximum time limit
1147  * RET WAIT_NO_REASON on success, fail status otherwise.
1148  */
1149 extern int job_limits_check(job_record_t **job_pptr, bool check_min_time);
1150 
1151 /*
1152  * delete_partition - delete the specified partition
1153  * IN job_specs - job specification from RPC
1154  * RET 0 on success, errno otherwise
1155  */
1156 extern int delete_partition(delete_part_msg_t *part_desc_ptr);
1157 
1158 /*
1159  * delete_step_record - delete record for job step for specified job_ptr
1160  *	and step_id
1161  * IN job_ptr - pointer to job table entry to have step record removed
1162  * IN step_id - id of the desired job step
1163  * RET 0 on success, errno otherwise
1164  */
1165 extern int delete_step_record(job_record_t *job_ptr, uint32_t step_id);
1166 
1167 /*
1168  * delete_step_records - delete step record for specified job_ptr
1169  * IN job_ptr - pointer to job table entry to have step records removed
1170  */
1171 extern void delete_step_records(job_record_t *job_ptr);
1172 
1173 /*
1174  * Copy a job's dependency list
1175  * IN depend_list_src - a job's depend_lst
1176  * RET copy of depend_list_src, must bee freed by caller
1177  */
1178 extern List depended_list_copy(List depend_list_src);
1179 
1180 /*
1181  * drain_nodes - drain one or more nodes,
1182  *  no-op for nodes already drained or draining
1183  * IN nodes - nodes to drain
1184  * IN reason - reason to drain the nodes
1185  * IN reason_uid - who set the reason
1186  * RET SLURM_SUCCESS or error code
1187  * global: node_record_table_ptr - pointer to global node table
1188  */
1189 extern int drain_nodes ( char *nodes, char *reason, uint32_t reason_uid );
1190 
1191 /* dump_all_job_state - save the state of all jobs to file
1192  * RET 0 or error code */
1193 extern int dump_all_job_state ( void );
1194 
1195 /* dump_all_node_state - save the state of all nodes to file */
1196 extern int dump_all_node_state ( void );
1197 
1198 /* dump_all_part_state - save the state of all partitions to file */
1199 extern int dump_all_part_state ( void );
1200 
1201 /*
1202  * dump_job_desc - dump the incoming job submit request message
1203  * IN job_specs - job specification from RPC
1204  */
1205 extern void dump_job_desc(job_desc_msg_t * job_specs);
1206 
1207 /*
1208  * dump_job_step_state - dump the state of a specific job step to a buffer,
1209  *	load with load_step_state
1210  * IN step_ptr - pointer to job step for which information is to be dumped
1211  * IN/OUT buffer - location to store data, pointers automatically advanced
1212  */
1213 extern int dump_job_step_state(void *x, void *arg);
1214 
1215 /*
1216  * dump_step_desc - dump the incoming step initiate request message
1217  * IN step_spec - job step request specification from RPC
1218  */
1219 extern void dump_step_desc(job_step_create_request_msg_t *step_spec);
1220 
1221 /* Remove one node from a job's allocation */
1222 extern void excise_node_from_job(job_record_t *job_ptr,
1223 				 node_record_t *node_ptr);
1224 
1225 /* make_node_avail - flag specified node as available */
1226 extern void make_node_avail(int node_inx);
1227 
1228 /*
1229  * Copy a job's feature list
1230  * IN feature_list_src - a job's depend_lst
1231  * RET copy of depend_list_src, must be freed by caller
1232  */
1233 extern List feature_list_copy(List feature_list_src);
1234 
1235 /*
1236  * find_job_array_rec - return a pointer to the job record with the given
1237  *	array_job_id/array_task_id
1238  * IN job_id - requested job's id
1239  * IN array_task_id - requested job's task id,
1240  *		      NO_VAL if none specified (i.e. not a job array)
1241  *		      INFINITE return any task for specified job id
1242  * RET pointer to the job's record, NULL on error
1243  */
1244 extern job_record_t *find_job_array_rec(uint32_t array_job_id,
1245 					uint32_t array_task_id);
1246 
1247 /*
1248  * find_het_job_record - return a pointer to the job record with the given ID
1249  * IN job_id - requested job's ID
1250  * in het_job_id - hetjob component ID
1251  * RET pointer to the job's record, NULL on error
1252  */
1253 extern job_record_t *find_het_job_record(uint32_t job_id, uint32_t het_job_id);
1254 
1255 /*
1256  * find_job_record - return a pointer to the job record with the given job_id
1257  * IN job_id - requested job's id
1258  * RET pointer to the job's record, NULL on error
1259  */
1260 extern job_record_t *find_job_record(uint32_t job_id);
1261 
1262 /*
1263  * find_first_node_record - find a record for first node in the bitmap
1264  * IN node_bitmap
1265  */
1266 extern node_record_t *find_first_node_record(bitstr_t *node_bitmap);
1267 
1268 /*
1269  * find_part_record - find a record for partition with specified name
1270  * IN name - name of the desired partition
1271  * RET pointer to partition or NULL if not found
1272  */
1273 extern part_record_t *find_part_record(char *name);
1274 
1275 /*
1276  * find_step_record - return a pointer to the step record with the given
1277  *	job_id and step_id
1278  * IN job_ptr - pointer to job table entry to have step record added
1279  * IN step_id - id of the desired job step
1280  * RET pointer to the job step's record, NULL on error
1281  */
1282 extern step_record_t *find_step_record(job_record_t *job_ptr, uint32_t step_id);
1283 
1284 /*
1285  * free_null_array_recs - free an xmalloc'd job_array_struct_t structure inside
1286  *                        of a job_record_t and set job_ptr->array_recs to NULL.
1287  */
1288 extern void free_null_array_recs(job_record_t *array_recs);
1289 
1290 /*
1291  * get_job_env - return the environment variables and their count for a
1292  *	given job
1293  * IN job_ptr - pointer to job for which data is required
1294  * OUT env_size - number of elements to read
1295  * RET point to array of string pointers containing environment variables
1296  */
1297 extern char **get_job_env(job_record_t *job_ptr, uint32_t *env_size);
1298 
1299 /*
1300  * get_job_script - return the script for a given job
1301  * IN job_ptr - pointer to job for which data is required
1302  * RET Buf containing job script
1303  */
1304 extern Buf get_job_script(const job_record_t *job_ptr);
1305 
1306 /*
1307  * Return the next available job_id to be used.
1308  * IN test_only - if true, doesn't advance the job_id sequence, just returns
1309  * 	what the next job id will be.
1310  * RET a valid job_id or SLURM_ERROR if all job_ids are exhausted.
1311  */
1312 extern uint32_t get_next_job_id(bool test_only);
1313 
1314 /*
1315  * get_part_list - find record for named partition(s)
1316  * IN name - partition name(s) in a comma separated list
1317  * OUT err_part - The first invalid partition name.
1318  * RET List of pointers to the partitions or NULL if not found
1319  * NOTE: Caller must free the returned list
1320  * NOTE: Caller must free err_part
1321  */
1322 extern List get_part_list(char *name, char **err_part);
1323 
1324 /*
1325  * init_depend_policy()
1326  * Initialize variables from DependencyParameters
1327  */
1328 extern void init_depend_policy(void);
1329 
1330 /*
1331  * init_job_conf - initialize the job configuration tables and values.
1332  *	this should be called after creating node information, but
1333  *	before creating any job entries.
1334  * RET 0 if no error, otherwise an error code
1335  * global: last_job_update - time of last job table update
1336  *	job_list - pointer to global job list
1337  */
1338 extern int init_job_conf (void);
1339 
1340 /*
1341  * init_node_conf - initialize the node configuration tables and values.
1342  *	this should be called before creating any node or configuration
1343  *	entries.
1344  * RET 0 if no error, otherwise an error code
1345  * global: node_record_table_ptr - pointer to global node table
1346  *         default_node_record - default values for node records
1347  *         default_config_record - default values for configuration records
1348  *         hash_table - table of hash indexes
1349  *         last_node_update - time of last node table update
1350  */
1351 extern int init_node_conf (void);
1352 
1353 /*
1354  * init_part_conf - initialize the default partition configuration values
1355  *	and create a (global) partition list.
1356  * this should be called before creating any partition entries.
1357  * RET 0 if no error, otherwise an error code
1358  * global: default_part - default partition values
1359  *         part_list - global partition list
1360  */
1361 extern int init_part_conf (void);
1362 
1363 /* init_requeue_policy()
1364  * Initialize the requeue exit/hold bitmaps.
1365  */
1366 extern void init_requeue_policy(void);
1367 
1368 /*
1369  * is_node_down - determine if the specified node's state is DOWN
1370  * IN name - name of the node
1371  * RET true if node exists and is down, otherwise false
1372  */
1373 extern bool is_node_down (char *name);
1374 
1375 /*
1376  * is_node_resp - determine if the specified node's state is responding
1377  * IN name - name of the node
1378  * RET true if node exists and is responding, otherwise false
1379  */
1380 extern bool is_node_resp (char *name);
1381 
1382 /* Fail a job because the qos is no longer valid */
1383 extern int job_fail_qos(job_record_t *job_ptr, const char *func_name);
1384 
1385 /*
1386  * delete_job_desc_files - remove the state files and directory
1387  * for a given job_id from SlurmStateSaveLocation
1388  */
1389 extern void delete_job_desc_files(uint32_t job_id);
1390 
1391 /*
1392  * job_alloc_info - get details about an existing job allocation
1393  * IN uid - job issuing the code
1394  * IN job_id - ID of job for which info is requested
1395  * OUT job_pptr - set to pointer to job record
1396  * NOTE: See job_alloc_info_ptr() if job pointer is known
1397  */
1398 extern int job_alloc_info(uint32_t uid, uint32_t job_id,
1399 			  job_record_t **job_pptr);
1400 
1401 /*
1402  * job_alloc_info_ptr - get details about an existing job allocation
1403  * IN uid - job issuing the code
1404  * IN job_ptr - pointer to job record
1405  * NOTE: See job_alloc_info() if job pointer not known
1406  */
1407 extern int job_alloc_info_ptr(uint32_t uid, job_record_t *job_ptr);
1408 
1409 /*
1410  * job_allocate - create job_records for the supplied job specification and
1411  *	allocate nodes for it.
1412  * IN job_specs - job specifications
1413  * IN immediate - if set then either initiate the job immediately or fail
1414  * IN will_run - don't initiate the job if set, just test if it could run
1415  *	now or later
1416  * OUT resp - will run response (includes start location, time, etc.)
1417  * IN allocate - resource allocation request only if set, batch job if zero
1418  * IN submit_uid -uid of user issuing the request
1419  * OUT job_pptr - set to pointer to job record
1420  * OUT err_msg - Custom error message to the user, caller to xfree results
1421  * IN protocol_version - version of the code the caller is using
1422  * RET 0 or an error code. If the job would only be able to execute with
1423  *	some change in partition configuration then
1424  *	ESLURM_REQUESTED_PART_CONFIG_UNAVAILABLE is returned
1425  * NOTE: If allocating nodes lx[0-7] to a job and those nodes have cpu counts
1426  *	of 4, 4, 4, 4, 8, 8, 4, 4 then num_cpu_groups=3, cpus_per_node={4,8,4}
1427  *	and cpu_count_reps={4,2,2}
1428  * globals: job_list - pointer to global job list
1429  *	list_part - global list of partition info
1430  *	default_part_loc - pointer to default partition
1431  * NOTE: lock_slurmctld on entry: Read config Write job, Write node, Read part
1432  */
1433 extern int job_allocate(job_desc_msg_t * job_specs, int immediate,
1434 			int will_run, will_run_response_msg_t **resp,
1435 			int allocate, uid_t submit_uid,
1436 			job_record_t **job_pptr,
1437 			char **err_msg, uint16_t protocol_version);
1438 
1439 /* If this is a job array meta-job, prepare it for being scheduled */
1440 extern void job_array_pre_sched(job_record_t *job_ptr);
1441 
1442 /* If this is a job array meta-job, clean up after scheduling attempt */
1443 extern job_record_t *job_array_post_sched(job_record_t *job_ptr);
1444 
1445 /* Create an exact copy of an existing job record for a job array.
1446  * IN job_ptr - META job record for a job array, which is to become an
1447  *		individial task of the job array.
1448  *		Set the job's array_task_id to the task to be split out.
1449  * RET - The new job record, which is the new META job record. */
1450 extern job_record_t *job_array_split(job_record_t *job_ptr);
1451 
1452 /* Record the start of one job array task */
1453 extern void job_array_start(job_record_t *job_ptr);
1454 
1455 /* Return true if a job array task can be started */
1456 extern bool job_array_start_test(job_record_t *job_ptr);
1457 
1458 /* Clear job's CONFIGURING flag and advance end time as needed */
1459 extern void job_config_fini(job_record_t *job_ptr);
1460 
1461 /* Reset a job's end_time based upon it's start_time and time_limit.
1462  * NOTE: Do not reset the end_time if already being preempted */
1463 extern void job_end_time_reset(job_record_t *job_ptr);
1464 /*
1465  * job_hold_by_assoc_id - Hold all pending jobs with a given
1466  *	association ID. This happens when an association is deleted (e.g. when
1467  *	a user is removed from the association database).
1468  * RET count of held jobs
1469  */
1470 extern int job_hold_by_assoc_id(uint32_t assoc_id);
1471 
1472 /*
1473  * job_hold_by_qos_id - Hold all pending jobs with a given
1474  *	QOS ID. This happens when a QOS is deleted (e.g. when
1475  *	a QOS is removed from the association database).
1476  * RET count of held jobs
1477  */
1478 extern int job_hold_by_qos_id(uint32_t qos_id);
1479 
1480 /* log the completion of the specified job */
1481 extern void job_completion_logger(job_record_t *job_ptr, bool requeue);
1482 
1483 /*
1484  * Return total amount of memory allocated to a job. This can be based upon
1485  * a GRES specification with various GRES/memory allocations on each node.
1486  * If current allocation information is not available, estimate memory based
1487  * upon pn_min_memory and either CPU or node count.
1488  */
1489 extern uint64_t job_get_tres_mem(struct job_resources *job_res,
1490 				 uint64_t pn_min_memory, uint32_t cpu_cnt,
1491 				 uint32_t node_cnt);
1492 
1493 /*
1494  * job_epilog_complete - Note the completion of the epilog script for a
1495  *	given job
1496  * IN job_id      - id of the job for which the epilog was executed
1497  * IN node_name   - name of the node on which the epilog was executed
1498  * IN return_code - return code from epilog script
1499  * RET true if job is COMPLETED, otherwise false
1500  */
1501 extern bool job_epilog_complete(uint32_t job_id, char *node_name,
1502 		uint32_t return_code);
1503 
1504 /*
1505  * job_end_time - Process JOB_END_TIME
1506  * IN time_req_msg - job end time request
1507  * OUT timeout_msg - job timeout response to be sent
1508  * RET SLURM_SUCCESS or an error code
1509  */
1510 extern int job_end_time(job_alloc_info_msg_t *time_req_msg,
1511 			srun_timeout_msg_t *timeout_msg);
1512 
1513 /* job_fini - free all memory associated with job records */
1514 extern void job_fini (void);
1515 
1516 /*
1517  * job_fail - terminate a job due to initiation failure
1518  * IN job_id - id of the job to be killed
1519  * IN job_state - desired job state (JOB_BOOT_FAIL, JOB_NODE_FAIL, etc.)
1520  * RET 0 on success, otherwise ESlurm error code
1521  */
1522 extern int job_fail(uint32_t job_id, uint32_t job_state);
1523 
1524 
1525 /* job_hold_requeue()
1526  *
1527  * Requeue the job based upon its current state.
1528  * If JOB_SPECIAL_EXIT then requeue and hold with JOB_SPECIAL_EXIT state.
1529  * If JOB_REQUEUE_HOLD then requeue and hold.
1530  * If JOB_REQUEUE then requeue and let it run again.
1531  * The requeue can happen directly from job_requeue() or from
1532  * job_epilog_complete() after the last component has finished.
1533  */
1534 extern bool job_hold_requeue(job_record_t *job_ptr);
1535 
1536 /*
1537  * determine if job is ready to execute per the node select plugin
1538  * IN job_id - job to test
1539  * OUT ready - 1 if job is ready to execute 0 otherwise
1540  * RET Slurm error code
1541  */
1542 extern int job_node_ready(uint32_t job_id, int *ready);
1543 
1544 /* Record accounting information for a job immediately before changing size */
1545 extern void job_pre_resize_acctg(job_record_t *job_ptr);
1546 
1547 /* Record accounting information for a job immediately after changing size */
1548 extern void job_post_resize_acctg(job_record_t *job_ptr);
1549 
1550 /*
1551  * job_signal - signal the specified job, access checks already done
1552  * IN job_ptr - job to be signaled
1553  * IN signal - signal to send, SIGKILL == cancel the job
1554  * IN flags  - see KILL_JOB_* flags in slurm.h
1555  * IN uid - uid of requesting user
1556  * IN preempt - true if job being preempted
1557  * RET 0 on success, otherwise ESLURM error code
1558  */
1559 extern int job_signal(job_record_t *job_ptr, uint16_t signal,
1560 		      uint16_t flags, uid_t uid, bool preempt);
1561 
1562 /*
1563  * job_signal_id - signal the specified job
1564  * IN job_id - id of the job to be signaled
1565  * IN signal - signal to send, SIGKILL == cancel the job
1566  * IN flags  - see KILL_JOB_* flags in slurm.h
1567  * IN uid - uid of requesting user
1568  * IN preempt - true if job being preempted
1569  * RET 0 on success, otherwise ESLURM error code
1570  */
1571 extern int job_signal_id(uint32_t job_id, uint16_t signal, uint16_t flags,
1572 			 uid_t uid, bool preempt);
1573 /*
1574  * het_job_signal - signal all components of a hetjob
1575  * IN het_job_leader - job record of job hetjob leader
1576  * IN signal - signal to send, SIGKILL == cancel the job
1577  * IN flags  - see KILL_JOB_* flags in slurm.h
1578  * IN uid - uid of requesting user
1579  * IN preempt - true if job being preempted
1580  * RET 0 on success, otherwise ESLURM error code
1581  */
1582 extern int het_job_signal(job_record_t *het_job_leader, uint16_t signal,
1583 			   uint16_t flags, uid_t uid, bool preempt);
1584 
1585 /*
1586  * job_str_signal - signal the specified job
1587  * IN job_id_str - id of the job to be signaled, valid formats include "#"
1588  *	"#_#" and "#_[expr]"
1589  * IN signal - signal to send, SIGKILL == cancel the job
1590  * IN flags  - see KILL_JOB_* flags in slurm.h
1591  * IN uid - uid of requesting user
1592  * IN preempt - true if job being preempted
1593  * RET 0 on success, otherwise ESLURM error code
1594  */
1595 extern int job_str_signal(char *job_id_str, uint16_t signal, uint16_t flags,
1596 			  uid_t uid, bool preempt);
1597 
1598 /*
1599  * job_suspend/job_suspend2 - perform some suspend/resume operation
1600  * NB job_suspend  - Uses the job_id field and ignores job_id_str
1601  * NB job_suspend2 - Ignores the job_id field and uses job_id_str
1602  *
1603  * IN sus_ptr - suspend/resume request message
1604  * IN uid - user id of the user issuing the RPC
1605  * IN conn_fd - file descriptor on which to send reply,
1606  *              -1 if none
1607  * indf_susp IN - set if job is being suspended indefinitely by user or admin
1608  *                and we should clear it's priority, otherwise suspended
1609  *		  temporarily for gang scheduling
1610  * IN protocol_version - slurm protocol version of client
1611  * RET 0 on success, otherwise ESLURM error code
1612  */
1613 extern int job_suspend(suspend_msg_t *sus_ptr, uid_t uid,
1614 		       int conn_fd, bool indf_susp,
1615 		       uint16_t protocol_version);
1616 extern int job_suspend2(suspend_msg_t *sus_ptr, uid_t uid,
1617 			int conn_fd, bool indf_susp,
1618 			uint16_t protocol_version);
1619 
1620 /*
1621  * job_complete - note the normal termination the specified job
1622  * IN job_id - id of the job which completed
1623  * IN uid - user id of user issuing the RPC
1624  * IN requeue - job should be run again if possible
1625  * IN node_fail - true if job terminated due to node failure
1626  * IN job_return_code - job's return code, if set then set state to JOB_FAILED
1627  * RET - 0 on success, otherwise ESLURM error code
1628  * global: job_list - pointer global job list
1629  *	last_job_update - time of last job table update
1630  */
1631 extern int job_complete(uint32_t job_id, uid_t uid, bool requeue,
1632 			bool node_fail, uint32_t job_return_code);
1633 
1634 /*
1635  * job_independent - determine if this job has a dependent job pending
1636  *	or if the job's scheduled begin time is in the future
1637  * IN job_ptr - pointer to job being tested
1638  * RET - true if job no longer must be defered for another job
1639  */
1640 extern bool job_independent(job_record_t *job_ptr);
1641 
1642 /*
1643  * job_req_node_filter - job reqeust node filter.
1644  *	clear from a bitmap the nodes which can not be used for a job
1645  *	test memory size, required features, processor count, etc.
1646  * NOTE: Does not support exclusive OR of features.
1647  *	It just matches first element of XOR and ignores count.
1648  * IN job_ptr - pointer to node to be scheduled
1649  * IN/OUT bitmap - set of nodes being considered for use
1650  * RET SLURM_SUCCESS or EINVAL if can't filter (exclusive OR of features)
1651  */
1652 extern int job_req_node_filter(job_record_t *job_ptr, bitstr_t *avail_bitmap,
1653 			       bool test_only);
1654 
1655 /*
1656  * job_requeue - Requeue a running or pending batch job
1657  * IN uid - user id of user issuing the RPC
1658  * IN job_id - id of the job to be requeued
1659  * IN msg - slurm_msg to send response back on
1660  * IN preempt - true if job being preempted
1661  * IN flags - JobExitRequeue | Hold | JobFailed | etc.
1662  * RET 0 on success, otherwise ESLURM error code
1663  */
1664 extern int job_requeue(uid_t uid, uint32_t job_id, slurm_msg_t *msg,
1665 		       bool preempt, uint32_t flags);
1666 
1667 /*
1668  * job_requeue2 - Requeue a running or pending batch job
1669  * IN uid - user id of user issuing the RPC
1670  * IN req_ptr - request including ID of the job to be requeued
1671  * IN msg - slurm_msg to send response back on
1672  * IN preempt - true if job being preempted
1673  * RET 0 on success, otherwise ESLURM error code
1674  */
1675 extern int job_requeue2(uid_t uid, requeue_msg_t *req_ptr, slurm_msg_t *msg,
1676 			bool preempt);
1677 
1678 /*
1679  * job_set_top - Move the specified job to the top of the queue (at least
1680  *	for that user ID, partition, account, and QOS).
1681  *
1682  * IN top_ptr - user request
1683  * IN uid - user id of the user issuing the RPC
1684  * IN conn_fd - file descriptor on which to send reply,
1685  *              -1 if none
1686  * IN protocol_version - slurm protocol version of client
1687  * RET 0 on success, otherwise ESLURM error code
1688  */
1689 extern int job_set_top(top_job_msg_t *top_ptr, uid_t uid, int conn_fd,
1690 		       uint16_t protocol_version);
1691 
1692 /*
1693  * job_step_complete - note normal completion the specified job step
1694  * IN job_id - id of the job to be completed
1695  * IN step_id - id of the job step to be completed
1696  * IN uid - user id of user issuing the RPC
1697  * IN requeue - job should be run again if possible
1698  * IN job_return_code - job's return code, if set then set state to JOB_FAILED
1699  * RET 0 on success, otherwise ESLURM error code
1700  * global: job_list - pointer global job list
1701  *	last_job_update - time of last job table update
1702  */
1703 extern int job_step_complete (uint32_t job_id, uint32_t job_step_id,
1704 			uid_t uid, bool requeue, uint32_t job_return_code);
1705 
1706 /*
1707  * job_step_signal - signal the specified job step
1708  * IN job_id - id of the job to be cancelled
1709  * IN step_id - id of the job step to be cancelled
1710  * IN signal - user id of user issuing the RPC
1711  * IN flags - RPC flags
1712  * IN uid - user id of user issuing the RPC
1713  * RET 0 on success, otherwise ESLURM error code
1714  * global: job_list - pointer global job list
1715  *	last_job_update - time of last job table update
1716  */
1717 int job_step_signal(uint32_t job_id, uint32_t step_id,
1718 		    uint16_t signal, uint16_t flags, uid_t uid);
1719 
1720 /*
1721  * job_time_limit - terminate jobs which have exceeded their time limit
1722  * global: job_list - pointer global job list
1723  *	last_job_update - time of last job table update
1724  */
1725 extern void job_time_limit (void);
1726 
1727 /* Builds the tres_req_cnt and tres_req_str of a job.
1728  * Only set when job is pending.
1729  * NOTE: job write lock must be locked before calling this */
1730 extern void job_set_req_tres(job_record_t *job_ptr, bool assoc_mgr_locked);
1731 
1732 /*
1733  * job_set_tres - set the tres up when allocating the job.
1734  * Only set when job is running.
1735  * NOTE: job write lock must be locked before calling this */
1736 extern void job_set_alloc_tres(job_record_t *job_ptr, bool assoc_mgr_locked);
1737 
1738 /*
1739  * job_update_tres_cnt - when job is completing remove allocated tres
1740  *                      from count.
1741  * IN/OUT job_ptr - job structure to be updated
1742  * IN node_inx    - node bit that is finished with job.
1743  * RET SLURM_SUCCES on success SLURM_ERROR on cpu_cnt underflow
1744  */
1745 extern int job_update_tres_cnt(job_record_t *job_ptr, int node_inx);
1746 
1747 /*
1748  * Modify a job's memory limit if allocated all memory on a node and that node
1749  * reboots, possibly with a different memory size (e.g. KNL MCDRAM mode changed)
1750  */
1751 extern void job_validate_mem(job_record_t *job_ptr);
1752 
1753 /*
1754  * check_job_step_time_limit - terminate jobsteps which have exceeded
1755  * their time limit
1756  * IN job_ptr - pointer to job containing steps to check
1757  * IN now - current time to use for the limit check
1758  */
1759 extern void check_job_step_time_limit(job_record_t *job_ptr, time_t now);
1760 
1761 /*
1762  * Kill job or job step
1763  *
1764  * IN job_step_kill_msg - msg with specs on which job/step to cancel.
1765  * IN uid               - uid of user requesting job/step cancel.
1766  */
1767 extern int kill_job_step(job_step_kill_msg_t *job_step_kill_msg, uint32_t uid);
1768 
1769 /*
1770  * kill_job_by_part_name - Given a partition name, deallocate resource for
1771  *	its jobs and kill them
1772  * IN part_name - name of a partition
1773  * RET number of killed jobs
1774  */
1775 extern int kill_job_by_part_name(char *part_name);
1776 
1777 /*
1778  * kill_job_on_node - Kill the specific job on a specific node.
1779  * IN job_ptr - pointer to terminating job
1780  * IN node_ptr - pointer to the node on which the job resides
1781  */
1782 extern void kill_job_on_node(job_record_t *job_ptr, node_record_t *node_ptr);
1783 
1784 /*
1785  * kill_job_by_front_end_name - Given a front end node name, deallocate
1786  *	resource for its jobs and kill them.
1787  * IN node_name - name of a front end node
1788  * RET number of jobs associated with this front end node
1789  */
1790 extern int kill_job_by_front_end_name(char *node_name);
1791 
1792 /*
1793  * kill_running_job_by_node_name - Given a node name, deallocate RUNNING
1794  *	or COMPLETING jobs from the node or kill them
1795  * IN node_name - name of a node
1796  * RET number of killed jobs
1797  */
1798 extern int kill_running_job_by_node_name(char *node_name);
1799 
1800 /*
1801  * kill_step_on_node - determine if the specified job has any job steps
1802  *	allocated to the specified node and kill them unless no_kill flag
1803  *	is set on the step
1804  * IN job_ptr - pointer to an active job record
1805  * IN node_ptr - pointer to a node record
1806  * IN node_fail - true of removed node has failed
1807  * RET count of killed job steps
1808  */
1809 extern int kill_step_on_node(job_record_t *job_ptr, node_record_t *node_ptr,
1810 			     bool node_fail);
1811 
1812 /* list_compare_config - compare two entry from the config list based upon
1813  *	weight, see common/list.h for documentation */
1814 int list_compare_config (void *config_entry1, void *config_entry2);
1815 
1816 /*
1817  * list_find_feature - find an entry in the feature list, see list.h for
1818  *	documentation
1819  * IN key - is feature name or NULL for all features
1820  * RET 1 if found, 0 otherwise
1821  */
1822 extern int list_find_feature(void *feature_entry, void *key);
1823 
1824 /*
1825  * list_find_part - find an entry in the partition list, see common/list.h
1826  *	for documentation
1827  * IN key - partition name or "universal_key" for all partitions
1828  * RET 1 if matches key, 0 otherwise
1829  * global- part_list - the global partition list
1830  */
1831 extern int list_find_part (void *part_entry, void *key);
1832 
1833 /*
1834  * load_all_job_state - load the job state from file, recover from last
1835  *	checkpoint. Execute this after loading the configuration file data.
1836  * RET 0 or error code
1837  */
1838 extern int load_all_job_state ( void );
1839 
1840 /*
1841  * load_all_node_state - Load the node state from file, recover on slurmctld
1842  *	restart. Execute this after loading the configuration file data.
1843  *	Data goes into common storage.
1844  * IN state_only - if true over-write only node state, features, gres and reason
1845  * RET 0 or error code
1846  */
1847 extern int load_all_node_state ( bool state_only );
1848 
1849 /*
1850  * load_last_job_id - load only the last job ID from state save file.
1851  * RET 0 or error code
1852  */
1853 extern int load_last_job_id( void );
1854 
1855 /*
1856  * load_part_uid_allow_list - reload the allow_uid list of partitions
1857  *	if required (updated group file or force set)
1858  * IN force - if set then always reload the allow_uid list
1859  */
1860 extern void load_part_uid_allow_list ( int force );
1861 
1862 /*
1863  * load_all_part_state - load the partition state from file, recover from
1864  *	slurmctld restart. execute this after loading the configuration
1865  *	file data.
1866  */
1867 extern int load_all_part_state ( void );
1868 
1869 /*
1870  * Create a new job step from data in a buffer (as created by
1871  * dump_job_stepstate)
1872  * IN/OUT - job_ptr - point to a job for which the step is to be loaded.
1873  * IN/OUT buffer - location from which to get data, pointers
1874  *                 automatically advanced
1875  */
1876 extern int load_step_state(job_record_t *job_ptr, Buf buffer,
1877 			   uint16_t protocol_version);
1878 
1879 /*
1880  * Log contents of avail_feature_list and active_feature_list
1881  */
1882 extern void log_feature_lists(void);
1883 
1884 /* make_node_alloc - flag specified node as allocated to a job
1885  * IN node_ptr - pointer to node being allocated
1886  * IN job_ptr  - pointer to job that is starting
1887  */
1888 extern void make_node_alloc(node_record_t *node_ptr, job_record_t *job_ptr);
1889 
1890 /* make_node_comp - flag specified node as completing a job
1891  * IN node_ptr - pointer to node marked for completion of job
1892  * IN job_ptr  - pointer to job that is completing
1893  * IN suspended - true if job was previously suspended
1894  */
1895 extern void make_node_comp(node_record_t *node_ptr, job_record_t *job_ptr,
1896 			   bool suspended);
1897 
1898 /*
1899  * make_node_idle - flag specified node as having finished with a job
1900  * IN node_ptr - pointer to node reporting job completion
1901  * IN job_ptr - pointer to job that just completed or NULL if not applicable
1902  */
1903 extern void make_node_idle(node_record_t *node_ptr, job_record_t *job_ptr);
1904 
1905 /*
1906  * Determine of the specified job can execute right now or is currently
1907  * blocked by a partition state or limit. These job states should match the
1908  * reason values returned by job_limits_check().
1909  */
1910 extern bool misc_policy_job_runnable_state(job_record_t *job_ptr);
1911 
1912 /* msg_to_slurmd - send given msg_type every slurmd, no args */
1913 extern void msg_to_slurmd (slurm_msg_type_t msg_type);
1914 
1915 /* request a "configless" RPC be send to all slurmd nodes */
1916 void push_reconfig_to_slurmd(void);
1917 
1918 /* node_fini - free all memory associated with node records */
1919 extern void node_fini (void);
1920 
1921 /* node_did_resp - record that the specified node is responding
1922  * IN name - name of the node */
1923 extern void node_did_resp (char *name);
1924 
1925 /*
1926  * node_not_resp - record that the specified node is not responding
1927  * IN name - name of the node
1928  * IN msg_time - time message was sent
1929  * IN resp_type - what kind of response came back from the node
1930  */
1931 extern void node_not_resp (char *name, time_t msg_time,
1932 			   slurm_msg_type_t resp_type);
1933 
1934 /* For every node with the "not_responding" flag set, clear the flag
1935  * and log that the node is not responding using a hostlist expression */
1936 extern void node_no_resp_msg(void);
1937 
1938 /* For a given job ID return the number of PENDING tasks which have their
1939  * own separate job_record (do not count tasks in pending META job record) */
1940 extern int num_pending_job_array_tasks(uint32_t array_job_id);
1941 
1942 /*
1943  * pack_all_jobs - dump all job information for all jobs in
1944  *	machine independent form (for network transmission)
1945  * OUT buffer_ptr - the pointer is set to the allocated buffer.
1946  * OUT buffer_size - set to size of the buffer in bytes
1947  * IN show_flags - job filtering options
1948  * IN uid - uid of user making request (for partition filtering)
1949  * IN filter_uid - pack only jobs belonging to this user if not NO_VAL
1950  * IN protocol_version - slurm protocol version of client
1951  * global: job_list - global list of job records
1952  * NOTE: the buffer at *buffer_ptr must be xfreed by the caller
1953  * NOTE: change _unpack_job_desc_msg() in common/slurm_protocol_pack.c
1954  *	whenever the data format changes
1955  */
1956 extern void pack_all_jobs(char **buffer_ptr, int *buffer_size,
1957 			  uint16_t show_flags, uid_t uid, uint32_t filter_uid,
1958 			  uint16_t protocol_version);
1959 
1960 /*
1961  * pack_spec_jobs - dump job information for specified jobs in
1962  *	machine independent form (for network transmission)
1963  * OUT buffer_ptr - the pointer is set to the allocated buffer.
1964  * OUT buffer_size - set to size of the buffer in bytes
1965  * IN show_flags - job filtering options
1966  * IN job_ids - list of job_ids to pack
1967  * IN uid - uid of user making request (for partition filtering)
1968  * IN filter_uid - pack only jobs belonging to this user if not NO_VAL
1969  * global: job_list - global list of job records
1970  * NOTE: the buffer at *buffer_ptr must be xfreed by the caller
1971  * NOTE: change _unpack_job_desc_msg() in common/slurm_protocol_pack.c
1972  *	whenever the data format changes
1973  */
1974 extern void pack_spec_jobs(char **buffer_ptr, int *buffer_size, List job_ids,
1975 			   uint16_t show_flags, uid_t uid, uint32_t filter_uid,
1976 			   uint16_t protocol_version);
1977 
1978 /*
1979  * pack_all_node - dump all configuration and node information for all nodes
1980  *	in machine independent form (for network transmission)
1981  * OUT buffer_ptr - pointer to the stored data
1982  * OUT buffer_size - set to size of the buffer in bytes
1983  * IN show_flags - node filtering options
1984  * IN uid - uid of user making request (for partition filtering)
1985  * IN protocol_version - slurm protocol version of client
1986  * global: node_record_table_ptr - pointer to global node table
1987  * NOTE: the caller must xfree the buffer at *buffer_ptr
1988  * NOTE: change slurm_load_node() in api/node_info.c when data format changes
1989  * NOTE: READ lock_slurmctld config before entry
1990  */
1991 extern void pack_all_node (char **buffer_ptr, int *buffer_size,
1992 			   uint16_t show_flags, uid_t uid,
1993 			   uint16_t protocol_version);
1994 
1995 /* Pack all scheduling statistics */
1996 extern void pack_all_stat(int resp, char **buffer_ptr, int *buffer_size,
1997 			  uint16_t protocol_version);
1998 
1999 /*
2000  * pack_ctld_job_step_info_response_msg - packs job step info
2001  * IN job_id - specific id or NO_VAL for all
2002  * IN step_id - specific id or NO_VAL for all
2003  * IN uid - user issuing request
2004  * IN show_flags - job step filtering options
2005  * OUT buffer - location to store data, pointers automatically advanced
2006  * IN protocol_version - slurm protocol version of client
2007  * RET - 0 or error code
2008  * NOTE: MUST free_buf buffer
2009  */
2010 extern int pack_ctld_job_step_info_response_msg(
2011 	uint32_t job_id, uint32_t step_id, uid_t uid,
2012 	uint16_t show_flags, Buf buffer, uint16_t protocol_version);
2013 
2014 /*
2015  * pack_all_part - dump all partition information for all partitions in
2016  *	machine independent form (for network transmission)
2017  * OUT buffer_ptr - the pointer is set to the allocated buffer.
2018  * OUT buffer_size - set to size of the buffer in bytes
2019  * IN show_flags - partition filtering options
2020  * IN uid - uid of user making request (for partition filtering)
2021  * IN protocol_version - slurm protocol version of client
2022  * global: part_list - global list of partition records
2023  * NOTE: the buffer at *buffer_ptr must be xfreed by the caller
2024  * NOTE: change slurm_load_part() in api/part_info.c if data format changes
2025  */
2026 extern void pack_all_part(char **buffer_ptr, int *buffer_size,
2027 			  uint16_t show_flags, uid_t uid,
2028 			  uint16_t protocol_version);
2029 
2030 /*
2031  * pack_job - dump all configuration information about a specific job in
2032  *	machine independent form (for network transmission)
2033  * IN dump_job_ptr - pointer to job for which information is requested
2034  * IN show_flags - job filtering options
2035  * IN/OUT buffer - buffer in which data is placed, pointers automatically
2036  *	updated
2037  * IN uid - user requesting the data
2038  * NOTE: change _unpack_job_desc_msg() in common/slurm_protocol_pack.c
2039  *	  whenever the data format changes
2040  */
2041 extern void pack_job(job_record_t *dump_job_ptr, uint16_t show_flags,
2042 		     Buf buffer, uint16_t protocol_version, uid_t uid);
2043 
2044 /*
2045  * pack_part - dump all configuration information about a specific partition
2046  *	in machine independent form (for network transmission)
2047  * IN part_ptr - pointer to partition for which information is requested
2048  * IN/OUT buffer - buffer in which data is placed, pointers automatically
2049  *	updated
2050  * global: default_part_loc - pointer to the default partition
2051  * NOTE: if you make any changes here be sure to make the corresponding
2052  *	changes to load_part_config in api/partition_info.c
2053  */
2054 extern void pack_part(part_record_t *part_ptr, Buf buffer,
2055 		      uint16_t protocol_version);
2056 
2057 /*
2058  * pack_one_job - dump information for one jobs in
2059  *	machine independent form (for network transmission)
2060  * OUT buffer_ptr - the pointer is set to the allocated buffer.
2061  * OUT buffer_size - set to size of the buffer in bytes
2062  * IN job_id - ID of job that we want info for
2063  * IN show_flags - job filtering options
2064  * IN uid - uid of user making request (for partition filtering)
2065  * NOTE: the buffer at *buffer_ptr must be xfreed by the caller
2066  * NOTE: change _unpack_job_desc_msg() in common/slurm_protocol_pack.c
2067  *	whenever the data format changes
2068  */
2069 extern int pack_one_job(char **buffer_ptr, int *buffer_size,
2070 			uint32_t job_id, uint16_t show_flags, uid_t uid,
2071 			uint16_t protocol_version);
2072 
2073 /*
2074  * pack_one_node - dump all configuration and node information for one node
2075  *	in machine independent form (for network transmission)
2076  * OUT buffer_ptr - pointer to the stored data
2077  * OUT buffer_size - set to size of the buffer in bytes
2078  * IN show_flags - node filtering options
2079  * IN uid - uid of user making request (for partition filtering)
2080  * IN node_name - name of node for which information is desired,
2081  *		  use first node if name is NULL
2082  * IN protocol_version - slurm protocol version of client
2083  * global: node_record_table_ptr - pointer to global node table
2084  * NOTE: the caller must xfree the buffer at *buffer_ptr
2085  * NOTE: change slurm_load_node() in api/node_info.c when data format changes
2086  * NOTE: READ lock_slurmctld config before entry
2087  */
2088 extern void pack_one_node (char **buffer_ptr, int *buffer_size,
2089 			   uint16_t show_flags, uid_t uid, char *node_name,
2090 			   uint16_t protocol_version);
2091 
2092 /* part_is_visible - should user be able to see this partition */
2093 extern bool part_is_visible(part_record_t *part_ptr, uid_t uid);
2094 
2095 /* part_fini - free all memory associated with partition records */
2096 extern void part_fini (void);
2097 
2098 /*
2099  * Create a copy of a job's part_list *partition list
2100  * IN part_list_src - a job's part_list
2101  * RET copy of part_list_src, must be freed by caller
2102  */
2103 extern List part_list_copy(List part_list_src);
2104 
2105 /*
2106  * Determine of the specified job can execute right now or is currently
2107  * blocked by a partition state or limit. Execute job_limits_check() to
2108  * re-validate job state.
2109  */
2110 extern bool part_policy_job_runnable_state(job_record_t *job_ptr);
2111 
2112 /*
2113  * Validate a job's account against the partition's AllowAccounts or
2114  *	DenyAccounts parameters.
2115  * IN part_ptr - Partition pointer
2116  * IN acct - account name
2117  * in job_ptr - Job pointer or NULL. If set and job can not run, then set the
2118  *		job's state_desc and state_reason fields
2119  * RET SLURM_SUCCESS or error code
2120  */
2121 extern int part_policy_valid_acct(part_record_t *part_ptr, char *acct,
2122 				  job_record_t *job_ptr);
2123 
2124 /*
2125  * Validate a job's QOS against the partition's AllowQOS or DenyQOS parameters.
2126  * IN part_ptr - Partition pointer
2127  * IN qos_ptr - QOS pointer
2128  * in job_ptr - Job pointer or NULL. If set and job can not run, then set the
2129  *		job's state_desc and state_reason fields
2130  * RET SLURM_SUCCESS or error code
2131  */
2132 extern int part_policy_valid_qos(part_record_t *part_ptr,
2133 				 slurmdb_qos_rec_t *qos_ptr,
2134 				 job_record_t *job_ptr);
2135 
2136 /*
2137  * partition_in_use - determine whether a partition is in use by a RUNNING
2138  *	PENDING or SUSPENDED job
2139  * IN part_name - name of a partition
2140  * RET true if the partition is in use, else false
2141  */
2142 extern bool partition_in_use(char *part_name);
2143 
2144 /*
2145  * Set "batch_host" for this job based upon it's "batch_features" and
2146  * "node_bitmap". The selection is deferred in case a node's "active_features"
2147  * is changed by a reboot.
2148  * Return SLURM_SUCCESS or error code
2149  */
2150 extern int pick_batch_host(job_record_t *job_ptr);
2151 
2152 /*
2153  * prolog_complete - note the normal termination of the prolog
2154  * IN job_id - id of the job which completed
2155  * IN prolog_return_code - prolog's return code,
2156  *    if set then set job state to FAILED
2157  * RET - 0 on success, otherwise ESLURM error code
2158  * global: job_list - pointer global job list
2159  *	last_job_update - time of last job table update
2160  */
2161 extern int prolog_complete(uint32_t job_id, uint32_t prolog_return_code);
2162 
2163 /*
2164  * If the job or slurm.conf requests to not kill on invalid dependency,
2165  * then set the job state reason to WAIT_DEP_INVALID. Otherwise, kill the
2166  * job.
2167  */
2168 extern void handle_invalid_dependency(job_record_t *job_ptr);
2169 
2170 /*
2171  * purge_old_job - purge old job records.
2172  *	The jobs must have completed at least MIN_JOB_AGE minutes ago.
2173  *	Test job dependencies, handle after_ok, after_not_ok before
2174  *	purging any jobs.
2175  * NOTE: READ lock slurmctld config and WRITE lock jobs before entry
2176  */
2177 void purge_old_job(void);
2178 
2179 /* Convert a comma delimited list of QOS names into a bitmap */
2180 extern void qos_list_build(char *qos, bitstr_t **qos_bits);
2181 
2182 /* Request that the job scheduler execute soon (typically within seconds) */
2183 extern void queue_job_scheduler(void);
2184 
2185 /*
2186  * rehash_jobs - Create or rebuild the job hash table.
2187  * NOTE: run lock_slurmctld before entry: Read config, write job
2188  */
2189 extern void rehash_jobs(void);
2190 
2191 /*
2192  * Rebuild a job step's core_bitmap_job after a job has just changed size
2193  * job_ptr IN - job that was just re-sized
2194  * orig_job_node_bitmap IN - The job's original node bitmap
2195  */
2196 extern void rebuild_step_bitmaps(job_record_t *job_ptr,
2197 				 bitstr_t *orig_job_node_bitmap);
2198 
2199 /*
2200  * After a job has fully completed run this to release the resouces
2201  * and remove it from the system.
2202  */
2203 extern int post_job_step(step_record_t *step_ptr);
2204 
2205 /*
2206  * Create the extern step and add it to the job.
2207  */
2208 extern step_record_t *build_extern_step(job_record_t *job_ptr);
2209 
2210 /*
2211  * Create the batch step and add it to the job.
2212  */
2213 extern step_record_t *build_batch_step(job_record_t *job_ptr_in);
2214 
2215 /* update first assigned job id as needed on reconfigure */
2216 extern void reset_first_job_id(void);
2217 
2218 /*
2219  * reset_job_bitmaps - reestablish bitmaps for existing jobs.
2220  *	this should be called after rebuilding node information,
2221  *	but before using any job entries.
2222  * global: last_job_update - time of last job table update
2223  *	job_list - pointer to global job list
2224  */
2225 extern void reset_job_bitmaps (void);
2226 
2227 /* Reset a node's CPU load value */
2228 extern void reset_node_load(char *node_name, uint32_t cpu_load);
2229 
2230 /* Reset a node's free memory value */
2231 extern void reset_node_free_mem(char *node_name, uint64_t free_mem);
2232 
2233 /* Reset all scheduling statistics
2234  * level IN - clear backfilled_jobs count if set */
2235 extern void reset_stats(int level);
2236 
2237 /*
2238  * restore_node_features - Make node and config (from slurm.conf) fields
2239  *	consistent for Features, Gres and Weight
2240  * IN recover -
2241  *              0, 1 - use data from config record, built using slurm.conf
2242  *              2 = use data from node record, built from saved state
2243  */
2244 extern void restore_node_features(int recover);
2245 
2246 /* Update time stamps for job step resume */
2247 extern void resume_job_step(job_record_t *job_ptr);
2248 
2249 /* run_backup - this is the backup controller, it should run in standby
2250  *	mode, assuming control when the primary controller stops responding */
2251 extern void run_backup(slurm_trigger_callbacks_t *callbacks);
2252 
2253 /*
2254  * ping_controllers - ping other controllers in HA configuration.
2255  * IN active_controller - true if active controller, false if backup
2256  */
2257 extern int ping_controllers(bool active_controller);
2258 
2259 /* Spawn health check function for every node that is not DOWN */
2260 extern void run_health_check(void);
2261 
2262 /* save_all_state - save entire slurmctld state for later recovery */
2263 extern void save_all_state(void);
2264 
2265 /* make sure the assoc_mgr lists are up and running and state is
2266  * restored */
2267 extern void ctld_assoc_mgr_init(slurm_trigger_callbacks_t *callbacks);
2268 
2269 /* send all info for the controller to accounting */
2270 extern void send_all_to_accounting(time_t event_time, int db_rc);
2271 
2272 /* A slurmctld lock needs to at least have a node read lock set before
2273  * this is called */
2274 extern void set_cluster_tres(bool assoc_mgr_locked);
2275 
2276 /* sends all jobs in eligible state to accounting.  Only needed at
2277  * first registration
2278  */
2279 extern int send_jobs_to_accounting(void);
2280 
2281 /* send all nodes in a down like state to accounting.  Only needed at
2282  * first registration
2283  */
2284 extern int send_nodes_to_accounting(time_t event_time);
2285 
2286 /* Decrement slurmctld thread count (as applies to thread limit) */
2287 extern void server_thread_decr(void);
2288 
2289 /* Increment slurmctld thread count (as applies to thread limit) */
2290 extern void server_thread_incr(void);
2291 
2292 /* Set a job's alias_list string */
2293 extern void set_job_alias_list(job_record_t *job_ptr);
2294 
2295 /*
2296  * set_job_prio - set a default job priority
2297  * IN job_ptr - pointer to the job_record
2298  */
2299 extern void set_job_prio(job_record_t *job_ptr);
2300 
2301 /*
2302  * set_node_down - make the specified node's state DOWN if possible
2303  *	(not in a DRAIN state), kill jobs as needed
2304  * IN name - name of the node
2305  * IN reason - why the node is DOWN
2306  */
2307 extern void set_node_down (char *name, char *reason);
2308 
2309 /*
2310  * set_node_down_ptr - make the specified compute node's state DOWN and
2311  *	kill jobs as needed
2312  * IN node_ptr - node_ptr to the node
2313  * IN reason - why the node is DOWN
2314  */
2315 void set_node_down_ptr(node_record_t *node_ptr, char *reason);
2316 
2317 /*
2318  * set_slurmctld_state_loc - create state directory as needed and "cd" to it
2319  */
2320 extern void set_slurmctld_state_loc(void);
2321 
2322 /*
2323  * signal_step_tasks - send specific signal to specific job step
2324  * IN step_ptr - step record pointer
2325  * IN signal - signal to send
2326  * IN msg_type - message type to send
2327  */
2328 void signal_step_tasks(step_record_t *step_ptr, uint16_t signal,
2329 		       slurm_msg_type_t msg_type);
2330 
2331 /*
2332  * signal_step_tasks_on_node - send specific signal to specific job step
2333  *                             on a specific node.
2334  * IN node_name - name of node on which to signal tasks
2335  * IN step_ptr - step record pointer
2336  * IN signal - signal to send
2337  * IN msg_type - message type to send
2338  */
2339 void signal_step_tasks_on_node(char* node_name, step_record_t *step_ptr,
2340 			       uint16_t signal, slurm_msg_type_t msg_type);
2341 
2342 /*
2343  * slurmctld_shutdown - wake up slurm_rpc_mgr thread via signal
2344  * RET 0 or error code
2345  */
2346 extern int slurmctld_shutdown(void);
2347 
2348 /* Update a job's record of allocated CPUs when a job step gets scheduled */
2349 extern void step_alloc_lps(step_record_t *step_ptr);
2350 
2351 /*
2352  * step_create - creates a step_record in step_specs->job_id, sets up the
2353  *	according to the step_specs.
2354  * IN step_specs - job step specifications
2355  * OUT new_step_record - pointer to the new step_record (NULL on error)
2356  * IN protocol_version - slurm protocol version of client
2357   * RET - 0 or error code
2358  * NOTE: don't free the returned step_record because that is managed through
2359  * 	the job.
2360  */
2361 extern int step_create(job_step_create_request_msg_t *step_specs,
2362 		       step_record_t **new_step_record,
2363 		       uint16_t protocol_version);
2364 
2365 /*
2366  * step_layout_create - creates a step_layout according to the inputs.
2367  * IN step_ptr - step having tasks layed out
2368  * IN step_node_list - node list of hosts in step
2369  * IN node_count - count of nodes in step allocation
2370  * IN num_tasks - number of tasks in step
2371  * IN cpus_per_task - number of cpus per task
2372  * IN task_dist - type of task distribution
2373  * IN plane_size - size of plane (only needed for the plane distribution)
2374  * RET - NULL or slurm_step_layout_t *
2375  * NOTE: you need to free the returned step_layout usually when the
2376  *       step is freed.
2377  */
2378 extern slurm_step_layout_t *step_layout_create(step_record_t *step_ptr,
2379 					       char *step_node_list,
2380 					       uint32_t node_count,
2381 					       uint32_t num_tasks,
2382 					       uint16_t cpus_per_task,
2383 					       uint32_t task_dist,
2384 					       uint16_t plane_size);
2385 
2386 /*
2387  * step_list_purge - Simple purge of a job's step list records.
2388  * IN job_ptr - pointer to job table entry to have step records removed
2389  */
2390 extern void step_list_purge(job_record_t *job_ptr);
2391 
2392 /*
2393  * step_epilog_complete - note completion of epilog on some node and
2394  *	release it's switch windows if appropriate. can perform partition
2395  *	switch window releases.
2396  * IN job_ptr - pointer to job which has completed epilog
2397  * IN node_name - name of node which has completed epilog
2398  */
2399 extern int step_epilog_complete(job_record_t *job_ptr, char *node_name);
2400 
2401 /*
2402  * step_partial_comp - Note the completion of a job step on at least
2403  *	some of its nodes
2404  * IN req     - step_completion_msg RPC from slurmstepd
2405  * IN uid     - UID issuing the request
2406  * OUT rem    - count of nodes for which responses are still pending
2407  * OUT max_rc - highest return code for any step thus far
2408  * RET 0 on success, otherwise ESLURM error code
2409  */
2410 extern int step_partial_comp(step_complete_msg_t *req, uid_t uid,
2411 			     int *rem, uint32_t *max_rc);
2412 
2413 /*
2414  * step_set_alloc_tres - set the tres up when allocating the step.
2415  * Only set when job is running.
2416  * NOTE: job write lock must be locked before calling this */
2417 extern void step_set_alloc_tres(step_record_t *step_ptr, uint32_t node_count,
2418 				bool assoc_mgr_locked, bool make_formatted);
2419 
2420 /* Update time stamps for job step suspend */
2421 extern void suspend_job_step(job_record_t *job_ptr);
2422 
2423 /* For the job array data structure, build the string representation of the
2424  * bitmap.
2425  * NOTE: bit_fmt_hexmask() is far more scalable than bit_fmt(). */
2426 extern void build_array_str(job_record_t *job_ptr);
2427 
2428 /* Return true if ALL tasks of specific array job ID are complete */
2429 extern bool test_job_array_complete(uint32_t array_job_id);
2430 
2431 /* Return true if ALL tasks of specific array job ID are completed */
2432 extern bool test_job_array_completed(uint32_t array_job_id);
2433 
2434 /* Return true if ALL tasks of specific array job ID are finished */
2435 extern bool test_job_array_finished(uint32_t array_job_id);
2436 
2437 /* Return true if ANY tasks of specific array job ID are pending */
2438 extern bool test_job_array_pending(uint32_t array_job_id);
2439 
2440 /* Determine of the nodes are ready to run a job
2441  * RET true if ready */
2442 extern bool test_job_nodes_ready(job_record_t *job_ptr);
2443 
2444 /*
2445  * Synchronize the batch job in the system with their files.
2446  * All pending batch jobs must have script and environment files
2447  * No other jobs should have such files
2448  */
2449 extern int sync_job_files(void);
2450 
2451 /* After recovering job state, if using priority/basic then we increment the
2452  * priorities of all jobs to avoid decrementing the base down to zero */
2453 extern void sync_job_priorities(void);
2454 
2455 /* True if running jobs are allowed to expand, false otherwise. */
2456 extern bool permit_job_expansion(void);
2457 
2458 /* True if running jobs are allowed to shrink, false otherwise. */
2459 extern bool permit_job_shrink(void);
2460 
2461 /*
2462  * update_job - update a job's parameters per the supplied specifications
2463  * IN msg - RPC to update job, including change specification
2464  * IN uid - uid of user issuing RPC
2465  * IN send_msg - whether to send msg back or not
2466  * RET returns an error code from slurm_errno.h
2467  * global: job_list - global list of job entries
2468  *	last_job_update - time of last job table update
2469  */
2470 extern int update_job(slurm_msg_t *msg, uid_t uid, bool send_msg);
2471 
2472 /*
2473  * IN msg - RPC to update job, including change specification
2474  * IN job_specs - a job's specification
2475  * IN uid - uid of user issuing RPC
2476  * RET returns an error code from slurm_errno.h
2477  * global: job_list - global list of job entries
2478  *	last_job_update - time of last job table update
2479  */
2480 extern int update_job_str(slurm_msg_t *msg, uid_t uid);
2481 
2482 /*
2483  * Modify the wckey associated with a pending job
2484  * IN module - where this is called from
2485  * IN job_ptr - pointer to job which should be modified
2486  * IN new_wckey - desired wckey name
2487  * RET SLURM_SUCCESS or error code
2488  */
2489 extern int update_job_wckey(char *module, job_record_t *job_ptr,
2490 			    char *new_wckey);
2491 
2492 /* Reset nodes_completing field for all jobs */
2493 extern void update_job_nodes_completing(void);
2494 
2495 /* Reset slurmctld logging based upon configuration parameters
2496  * uses common slurmctld_conf data structure */
2497 extern void update_logging(void);
2498 
2499 /*
2500  * update_node - update the configuration data for one or more nodes
2501  * IN update_node_msg - update node request
2502  * RET 0 or error code
2503  * global: node_record_table_ptr - pointer to global node table
2504  */
2505 extern int update_node ( update_node_msg_t * update_node_msg )  ;
2506 
2507 /* Update nodes accounting usage data */
2508 extern void update_nodes_acct_gather_data(void);
2509 
2510 /*
2511  * update_node_record_acct_gather_data - update the energy data in the
2512  * node_record
2513  * IN msg - node energy data message
2514  * RET 0 if no error, ENOENT if no such node
2515  */
2516 extern int update_node_record_acct_gather_data(
2517 	acct_gather_node_resp_msg_t *msg);
2518 
2519 /*
2520  * Process string and set partition fields to appropriate values if valid
2521  *
2522  * IN billing_weights_str - suggested billing weights
2523  * IN part_ptr - pointer to partition
2524  * IN fail - whether the inner function should fatal if the string is invalid.
2525  * RET return SLURM_ERROR on error, SLURM_SUCESS otherwise.
2526  */
2527 extern int set_partition_billing_weights(char *billing_weights_str,
2528 					 part_record_t *part_ptr, bool fail);
2529 
2530 /*
2531  * update_part - create or update a partition's configuration data
2532  * IN part_desc - description of partition changes
2533  * IN create_flag - create a new partition
2534  * RET 0 or an error code
2535  * global: part_list - list of partition entries
2536  *	last_part_update - update time of partition records
2537  */
2538 extern int update_part (update_part_msg_t * part_desc, bool create_flag);
2539 
2540 /* Process job step update request from specified user,
2541  * RET - 0 or error code */
2542 extern int update_step(step_update_request_msg_t *req, uid_t uid);
2543 
2544 /*
2545  * validate_alloc_node - validate that the allocating node
2546  * is allowed to use this partition
2547  * IN part_ptr - pointer to a partition
2548  * IN alloc_node - allocting node of the request
2549  * RET 1 if permitted to run, 0 otherwise
2550  */
2551 extern int validate_alloc_node(part_record_t *part_ptr, char *alloc_node);
2552 
2553 /*
2554  * validate_group - validate that the submit uid is authorized to run in
2555  *	this partition
2556  * IN part_ptr - pointer to a partition
2557  * IN run_uid - user to run the job as
2558  * RET 1 if permitted to run, 0 otherwise
2559  */
2560 extern int validate_group(part_record_t *part_ptr, uid_t run_uid);
2561 
2562 /* Perform some size checks on strings we store to prevent
2563  * malicious user filling slurmctld's memory
2564  * IN job_desc   - user job submit request
2565  * IN submit_uid - UID making job submit request
2566  * OUT err_msg   - custom error message to return
2567  * RET 0 or error code */
2568 extern int validate_job_create_req(job_desc_msg_t * job_desc, uid_t submit_uid,
2569 				   char **err_msg);
2570 
2571 /*
2572  * validate_jobs_on_node - validate that any jobs that should be on the node
2573  *	are actually running, if not clean up the job records and/or node
2574  *	records, call this function after validate_node_specs() sets the node
2575  *	state properly
2576  * IN reg_msg - node registration message
2577  */
2578 extern void validate_jobs_on_node(slurm_node_registration_status_msg_t *reg_msg);
2579 
2580 /*
2581  * validate_node_specs - validate the node's specifications as valid,
2582  *	if not set state to down, in any case update last_response
2583  * IN reg_msg - node registration message
2584  * IN protocol_version - Version of Slurm on this node
2585  * OUT newly_up - set if node newly brought into service
2586  * RET 0 if no error, ENOENT if no such node, EINVAL if values too low
2587  * NOTE: READ lock_slurmctld config before entry
2588  */
2589 extern int validate_node_specs(slurm_node_registration_status_msg_t *reg_msg,
2590 			       uint16_t protocol_version, bool *newly_up);
2591 
2592 /*
2593  * validate_nodes_via_front_end - validate all nodes on a cluster as having
2594  *	a valid configuration as soon as the front-end registers. Individual
2595  *	nodes will not register with this configuration
2596  * IN reg_msg - node registration message
2597  * IN protocol_version - Version of Slurm on this node
2598  * OUT newly_up - set if node newly brought into service
2599  * RET 0 if no error, Slurm error code otherwise
2600  * NOTE: READ lock_slurmctld config before entry
2601  */
2602 extern int validate_nodes_via_front_end(
2603 		slurm_node_registration_status_msg_t *reg_msg,
2604 		uint16_t protocol_version, bool *newly_up);
2605 
2606 /*
2607  * validate_slurm_user - validate that the uid is authorized to see
2608  *      privileged data (either user root or SlurmUser)
2609  * IN uid - user to validate
2610  * RET true if permitted to run, false otherwise
2611  */
2612 extern bool validate_slurm_user(uid_t uid);
2613 
2614 /*
2615  * validate_super_user - validate that the uid is authorized at the
2616  *      root, SlurmUser, or SLURMDB_ADMIN_SUPER_USER level
2617  * IN uid - user to validate
2618  * RET true if permitted to run, false otherwise
2619  */
2620 extern bool validate_super_user(uid_t uid);
2621 
2622 /*
2623  * validate_operator - validate that the uid is authorized at the
2624  *      root, SlurmUser, or SLURMDB_ADMIN_OPERATOR level
2625  * IN uid - user to validate
2626  * RET true if permitted to run, false otherwise
2627  */
2628 extern bool validate_operator(uid_t uid);
2629 
2630 /* cleanup_completing()
2631  *
2632  * Clean up the JOB_COMPLETING flag and eventually
2633  * requeue the job if there is a pending request
2634  * for it. This function assumes the caller has the
2635  * appropriate locks on the job_record.
2636  * This function is called when a job completes
2637  * by either when the slurmd epilog finishes or
2638  * when the slurmctld epilog finishes, whichever
2639  * comes last.
2640  */
2641 extern void cleanup_completing(job_record_t *job_ptr);
2642 
2643 /* trace_job() - print the job details if
2644  *               the DEBUG_FLAG_TRACE_JOBS is set
2645  */
2646 extern void trace_job(job_record_t *job_ptr, const char *, const char *);
2647 
2648 /*
2649  * Determine if slurmctld will respond to "configless" RPCs. If so,
2650  * load the internal cached config values to avoid regenerating on each
2651  * RPC.
2652  */
2653 extern void configless_setup(void);
2654 /* Free cached values to avoid memory leak. */
2655 extern void configless_clear(void);
2656 
2657 /*
2658  */
2659 int
2660 waitpid_timeout(const char *, pid_t, int *, int);
2661 
2662 /*
2663  * Calculate and populate the number of tres' for all partitions.
2664  */
2665 extern void set_partition_tres();
2666 
2667 /*
2668  * Update job's federated siblings strings.
2669  *
2670  * IN job_ptr - job_ptr to update
2671  */
2672 extern void update_job_fed_details(job_record_t *job_ptr);
2673 
2674 /*
2675  * purge_job_record - purge specific job record. No testing is performed to
2676  *	ensure the job records has no active references. Use only for job
2677  *	records that were never fully operational (e.g. WILL_RUN test, failed
2678  *	job load, failed job create, etc.).
2679  * IN job_id - job_id of job record to be purged
2680  * RET int - count of job's purged
2681  * global: job_list - global job table
2682  */
2683 extern int purge_job_record(uint32_t job_id);
2684 
2685 /*
2686  * Remove job from job hashes so that it can't be found, but leave job in
2687  * job_table so that it can be deleted by _list_delete_job().
2688  *
2689  * IN job_ptr - job_ptr to be unlinked
2690  */
2691 extern void unlink_job_record(job_record_t *job_ptr);
2692 
2693 /*
2694  * copy_job_record_to_job_desc - construct a job_desc_msg_t for a job.
2695  * IN job_ptr - the job record
2696  * RET the job_desc_msg_t, NULL on error
2697  */
2698 extern job_desc_msg_t *copy_job_record_to_job_desc(job_record_t *job_ptr);
2699 
2700 
2701 /*
2702  * Set the allocation response with the current cluster's information and the
2703  * job's allocated node's addr's if the allocation is being filled by a cluster
2704  * other than the cluster that submitted the job
2705  *
2706  * Note: make sure that the resp's working_cluster_rec is NULL'ed out before the
2707  * resp is free'd since it points to global memory.
2708  *
2709  * IN resp - allocation response being sent back to client.
2710  * IN job_ptr - allocated job
2711  * IN req_cluster - the cluster requesting the allocation info.
2712  */
2713 extern void
2714 set_remote_working_response(resource_allocation_response_msg_t *resp,
2715 			    job_record_t *job_ptr,
2716 			    const char *req_cluster);
2717 
2718 /*
2719  * Free job's fed_details ptr.
2720  */
2721 extern void free_job_fed_details(job_fed_details_t **fed_details_pptr);
2722 
2723 /*
2724  * Calculate billable TRES based on partition's defined BillingWeights. If none
2725  * is defined, return total_cpus. This is cached on job_ptr->billable_tres and
2726  * is updated if the job was resized since the last iteration.
2727  *
2728  * IN job_ptr          - job to calc billable tres on
2729  * IN start_time       - time the has started or been resized
2730  * IN assoc_mgr_locked - whether the tres assoc lock is set or not
2731  */
2732 extern double calc_job_billable_tres(job_record_t *job_ptr, time_t start_time,
2733 				     bool assoc_mgr_locked);
2734 
2735 /*
2736  * Realloc and possibly update a job_ptr->limit_set->tres array.
2737  *
2738  * If a new TRES is added the TRES positions in the array could have been moved
2739  * around. The array either needs to be grown and/or the values need to be put
2740  * in their new position.
2741  *
2742  * IN: tres_limits - job_ptr->limit_set->tres array.
2743  */
2744 extern void update_job_limit_set_tres(uint16_t **tres_limits);
2745 
2746 /*
2747  * Validate TRES specification of the form:
2748  * "name=[type:]#[,[type:]#][;name=[type:]#]"
2749  * For example: "gpu:kepler:2,craynetwork=1"
2750  */
2751 extern bool valid_tres_cnt(char *tres);
2752 
2753 /*
2754  * Validate the named TRES is valid for scheduling parameters.
2755  * This is currently a subset of all defined TRES.
2756  */
2757 extern bool valid_tres_name(char *name);
2758 
2759 /*
2760  * Check for nodes that haven't rebooted yet.
2761  *
2762  * If the node hasn't booted by ResumeTimeout, mark the node as down.
2763  */
2764 extern void check_reboot_nodes();
2765 
2766 /*
2767  * Send warning signal to job before end time.
2768  *
2769  * IN job_ptr - job to send warn signal to.
2770  * IN ignore_time - If set, ignore the warn time and just send it.
2771  */
2772 extern void send_job_warn_signal(job_record_t *job_ptr, bool ignore_time);
2773 
2774 /*
2775  * Check if waiting for the node to still boot.
2776  *
2777  * IN node_ptr - node to check if still waiting for boot.
2778  *
2779  * RET return true if still expecting the node to boot, false otherwise.
2780  */
2781 extern bool waiting_for_node_boot(struct node_record *node_ptr);
2782 /*
2783  * Check if any part of job_ptr is overlaping node_map.
2784  * IN node_map - bitstr of nodes set.
2785  * IN job_ptr (hetjob or not) to check.
2786  *
2787  * RET true if we overlap, false otherwise
2788  */
2789 extern bool job_overlap_and_running(bitstr_t *node_map, job_record_t *job_ptr);
2790 
2791 /*
2792  * Respond to request for backup slurmctld status
2793  */
2794 extern void slurm_rpc_control_status(slurm_msg_t *msg, time_t control_time);
2795 
2796 /*
2797  * Callbacks to let the PrEp plugins signal completion if running async.
2798  */
2799 extern void prep_prolog_slurmctld_callback(int rc, uint32_t job_id);
2800 extern void prep_epilog_slurmctld_callback(int rc, uint32_t job_id);
2801 
2802 #endif /* !_HAVE_SLURMCTLD_H */
2803