1 /*****************************************************************************\
2  *  node_scheduler.h - definitions of functions in node_scheduler.c
3  *****************************************************************************
4  *  Copyright (C) 2004-2007 The Regents of the University of California.
5  *  Copyright (C) 2008 Lawrence Livermore National Security.
6  *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
7  *  Written by Morris Jette <jette@llnl.gov> et. al.
8  *  CODE-OCEC-09-009. All rights reserved.
9  *
10  *  This file is part of Slurm, a resource management program.
11  *  For details, see <https://slurm.schedmd.com/>.
12  *  Please also read the included file: DISCLAIMER.
13  *
14  *  Slurm is free software; you can redistribute it and/or modify it under
15  *  the terms of the GNU General Public License as published by the Free
16  *  Software Foundation; either version 2 of the License, or (at your option)
17  *  any later version.
18  *
19  *  In addition, as a special exception, the copyright holders give permission
20  *  to link the code of portions of this program with the OpenSSL library under
21  *  certain conditions as described in each individual source file, and
22  *  distribute linked combinations including the two. You must obey the GNU
23  *  General Public License in all respects for all of the code used other than
24  *  OpenSSL. If you modify file(s) with this exception, you may extend this
25  *  exception to your version of the file(s), but you are not obligated to do
26  *  so. If you do not wish to do so, delete this exception statement from your
27  *  version.  If you delete this exception statement from all source files in
28  *  the program, then also delete it here.
29  *
30  *  Slurm is distributed in the hope that it will be useful, but WITHOUT ANY
31  *  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
32  *  FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
33  *  details.
34  *
35  *  You should have received a copy of the GNU General Public License along
36  *  with Slurm; if not, write to the Free Software Foundation, Inc.,
37  *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA.
38 \*****************************************************************************/
39 
40 #ifndef _HAVE_NODE_SCHEDULER_H
41 #define _HAVE_NODE_SCHEDULER_H
42 
43 /*
44  * allocate_nodes - change state of specified nodes to NODE_STATE_ALLOCATED
45  *	also claim required licenses
46  * IN job_ptr - job being allocated resources
47  */
48 extern void allocate_nodes(job_record_t *job_ptr);
49 
50 /* For a given job, if the available nodes differ from those with currently
51  *	active features, return a bitmap of nodes with the job's required
52  *	features currently active
53  * IN job_ptr - job requesting resource allocation
54  * IN avail_bitmap - nodes currently available for this job
55  * OUT active_bitmap - nodes with job's features currently active, NULL if
56  *	identical to avail_bitmap
57  * NOTE: Currently supports only simple AND of features
58  */
59 extern void build_active_feature_bitmap(job_record_t *job_ptr,
60 					bitstr_t *avail_bitmap,
61 					bitstr_t **active_bitmap);
62 
63 /* Return bitmap of nodes with all specified features currently active */
64 extern bitstr_t *build_active_feature_bitmap2(char *reboot_features);
65 
66 /*
67  * build_node_details - sets addresses for allocated nodes
68  * IN job_ptr - pointer to a job record
69  * IN new_alloc - set if new job allocation, cleared if state recovery
70  */
71 extern void build_node_details(job_record_t *job_ptr, bool new_alloc);
72 
73 /*
74  * deallocate_nodes - for a given job, deallocate its nodes and make
75  *	their state NODE_STATE_COMPLETING
76  *	also release the job's licenses
77  * IN job_ptr - pointer to terminating job (already in some COMPLETING state)
78  * IN timeout - true if job exhausted time limit, send REQUEST_KILL_TIMELIMIT
79  *	RPC instead of REQUEST_TERMINATE_JOB
80  * IN suspended - true if job was already suspended (node's run_job_cnt
81  *	already decremented);
82  * IN preempted - true if job is being preempted
83  */
84 extern void deallocate_nodes(job_record_t *job_ptr, bool timeout,
85 			     bool suspended, bool preempted);
86 
87 /* Remove nodes from consideration for allocation based upon "mcs" by
88  * other users
89  * job_ptr IN - Job to be scheduled
90  * usable_node_mask IN/OUT - Nodes available for use by this job's mcs
91  */
92 extern void filter_by_node_mcs(job_record_t *job_ptr, int mcs_select,
93 			       bitstr_t *usable_node_mask);
94 
95 /* Remove nodes from consideration for allocation based upon "ownership" by
96  * other users
97  * job_ptr IN - Job to be scheduled
98  * usable_node_mask IN/OUT - Nodes available for use by this job's user
99  */
100 extern void filter_by_node_owner(job_record_t *job_ptr,
101 				 bitstr_t *usable_node_mask);
102 
103 /*
104  * For every element in the feature_list, identify the nodes with that feature
105  * either active or available and set the feature_list's node_bitmap_active and
106  * node_bitmap_avail fields accordingly.
107  */
108 extern void find_feature_nodes(List feature_list, bool can_reboot);
109 
110 /*
111  * re_kill_job - for a given job, deallocate its nodes for a second time,
112  *	basically a cleanup for failed deallocate() calls
113  * IN job_ptr - pointer to terminating job (already in some COMPLETING state)
114  * globals: node_record_count - number of nodes in the system
115  *	node_record_table_ptr - pointer to global node table
116  */
117 extern void re_kill_job(job_record_t *job_ptr);
118 
119 /*
120  * select_nodes - select and allocate nodes to a specific job
121  * IN job_ptr - pointer to the job record
122  * IN test_only - if set do not allocate nodes, just confirm they
123  *	could be allocated now
124  * IN select_node_bitmap - bitmap of nodes to be used for the
125  *	job's resource allocation (not returned if NULL), caller
126  *	must free
127  * IN submission - if set ignore reservations
128  * IN scheduler_type - which scheduler is calling this
129  *      (i.e. SLURMDB_JOB_FLAG_BACKFILL, SLURMDB_JOB_FLAG_SCHED, etc)
130  * OUT err_msg - if not NULL set to error message for job, caller must xfree
131  * RET 0 on success, ESLURM code from slurm_errno.h otherwise
132  * globals: list_part - global list of partition info
133  *	default_part_loc - pointer to default partition
134  *	config_list - global list of node configuration info
135  * Notes: The algorithm is
136  *	1) Build a table (node_set_ptr) of nodes with the requisite
137  *	   configuration. Each table entry includes their weight,
138  *	   node_list, features, etc.
139  *	2) Call _pick_best_nodes() to select those nodes best satisfying
140  *	   the request, (e.g. best-fit or other criterion)
141  *	3) Call allocate_nodes() to perform the actual allocation
142  */
143 extern int select_nodes(job_record_t *job_ptr, bool test_only,
144 			bitstr_t **select_node_bitmap, char **err_msg,
145 			bool submission, uint32_t scheduler_type);
146 
147 /*
148  * get_node_cnts - determine the number of nodes for the requested job.
149  * IN job_ptr - pointer to the job record.
150  * IN qos_flags - Flags of the job_ptr's qos.  This is so we don't have to send
151  *                in a pointer or lock the qos read lock before calling.
152  * IN part_ptr - pointer to the job's partition.
153  * OUT min_nodes - The minimum number of nodes for the job.
154  * OUT req_nodes - The number of node the select plugin should target.
155  * OUT max_nodes - The max number of nodes for the job.
156  * RET SLURM_SUCCESS on success, ESLURM code from slurm_errno.h otherwise.
157  */
158 extern int get_node_cnts(job_record_t *job_ptr, uint32_t qos_flags,
159 			 part_record_t *part_ptr, uint32_t *min_nodes,
160 			 uint32_t *req_nodes, uint32_t *max_nodes);
161 
162 /* launch_prolog - launch job prolog script by slurmd on allocated nodes
163  * IN job_ptr - pointer to the job record
164  */
165 extern void launch_prolog(job_record_t *job_ptr);
166 
167 /*
168  * valid_feature_counts - validate a job's features can be satisfied
169  *	by the selected nodes (NOTE: does not process XOR or XAND operators)
170  * IN job_ptr - job to operate on
171  * IN use_active - if set, then only consider nodes with the identified features
172  *	active, otherwise use available features
173  * IN/OUT node_bitmap - nodes available for use, clear if unusable
174  * OUT has_xor - set if XOR/XAND found in feature expression
175  * RET true if valid, false otherwise
176  */
177 extern bool valid_feature_counts(job_record_t *job_ptr, bool use_active,
178 				 bitstr_t *node_bitmap, bool *has_xor);
179 
180 #endif /* !_HAVE_NODE_SCHEDULER_H */
181