1 /*****************************************************************************\
2 * builtin.c - Simple builtin (FIFO) scheduler plugin.
3 * Periodically when pending jobs can start.
4 * This is a minimal implementation of the logic found in
5 * src/plugins/sched/backfill/backfill.c and disregards
6 * how jobs are scheduled sequencially.
7 *****************************************************************************
8 * Copyright (C) 2003-2007 The Regents of the University of California.
9 * Copyright (C) 2008-2010 Lawrence Livermore National Security.
10 * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
11 * Written by Morris Jette <jette1@llnl.gov>
12 * CODE-OCEC-09-009. All rights reserved.
13 *
14 * This file is part of Slurm, a resource management program.
15 * For details, see <https://slurm.schedmd.com/>.
16 * Please also read the included file: DISCLAIMER.
17 *
18 * Slurm is free software; you can redistribute it and/or modify it under
19 * the terms of the GNU General Public License as published by the Free
20 * Software Foundation; either version 2 of the License, or (at your option)
21 * any later version.
22 *
23 * In addition, as a special exception, the copyright holders give permission
24 * to link the code of portions of this program with the OpenSSL library under
25 * certain conditions as described in each individual source file, and
26 * distribute linked combinations including the two. You must obey the GNU
27 * General Public License in all respects for all of the code used other than
28 * OpenSSL. If you modify file(s) with this exception, you may extend this
29 * exception to your version of the file(s), but you are not obligated to do
30 * so. If you do not wish to do so, delete this exception statement from your
31 * version. If you delete this exception statement from all source files in
32 * the program, then also delete it here.
33 *
34 * Slurm is distributed in the hope that it will be useful, but WITHOUT ANY
35 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
36 * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
37 * details.
38 *
39 * You should have received a copy of the GNU General Public License along
40 * with Slurm; if not, write to the Free Software Foundation, Inc.,
41 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
42 \*****************************************************************************/
43
44 #include <pthread.h>
45 #include <stdio.h>
46 #include <stdlib.h>
47 #include <string.h>
48 #include <time.h>
49 #include <unistd.h>
50
51 #include "slurm/slurm.h"
52 #include "slurm/slurm_errno.h"
53
54 #include "src/common/list.h"
55 #include "src/common/macros.h"
56 #include "src/common/node_select.h"
57 #include "src/common/parse_time.h"
58 #include "src/common/slurm_protocol_api.h"
59 #include "src/common/xmalloc.h"
60 #include "src/common/xstring.h"
61
62 #include "src/slurmctld/burst_buffer.h"
63 #include "src/slurmctld/locks.h"
64 #include "src/slurmctld/preempt.h"
65 #include "src/slurmctld/reservation.h"
66 #include "src/slurmctld/slurmctld.h"
67 #include "src/plugins/sched/builtin/builtin.h"
68
69 #ifndef BACKFILL_INTERVAL
70 # define BACKFILL_INTERVAL 30
71 #endif
72
73 /*********************** local variables *********************/
74 static bool stop_builtin = false;
75 static pthread_mutex_t term_lock = PTHREAD_MUTEX_INITIALIZER;
76 static pthread_cond_t term_cond = PTHREAD_COND_INITIALIZER;
77 static bool config_flag = false;
78 static int builtin_interval = BACKFILL_INTERVAL;
79 static int max_sched_job_cnt = 50;
80 static int sched_timeout = 0;
81
82 /*********************** local functions *********************/
83 static void _compute_start_times(void);
84 static void _load_config(void);
85 static void _my_sleep(int secs);
86
87 /* Terminate builtin_agent */
stop_builtin_agent(void)88 extern void stop_builtin_agent(void)
89 {
90 slurm_mutex_lock(&term_lock);
91 stop_builtin = true;
92 slurm_cond_signal(&term_cond);
93 slurm_mutex_unlock(&term_lock);
94 }
95
_my_sleep(int secs)96 static void _my_sleep(int secs)
97 {
98 struct timespec ts = {0, 0};
99 struct timeval now;
100
101 gettimeofday(&now, NULL);
102 ts.tv_sec = now.tv_sec + secs;
103 ts.tv_nsec = now.tv_usec * 1000;
104 slurm_mutex_lock(&term_lock);
105 if (!stop_builtin)
106 slurm_cond_timedwait(&term_cond, &term_lock, &ts);
107 slurm_mutex_unlock(&term_lock);
108 }
109
_load_config(void)110 static void _load_config(void)
111 {
112 char *sched_params = slurm_get_sched_params();
113 char *tmp_ptr;
114
115 sched_timeout = slurm_get_msg_timeout() / 2;
116 sched_timeout = MAX(sched_timeout, 1);
117 sched_timeout = MIN(sched_timeout, 10);
118
119 if ((tmp_ptr = xstrcasestr(sched_params, "interval=")))
120 builtin_interval = atoi(tmp_ptr + 9);
121 if (builtin_interval < 1) {
122 error("Invalid SchedulerParameters interval: %d",
123 builtin_interval);
124 builtin_interval = BACKFILL_INTERVAL;
125 }
126
127 if ((tmp_ptr = xstrcasestr(sched_params, "max_job_bf=")))
128 max_sched_job_cnt = atoi(tmp_ptr + 11);
129 if ((tmp_ptr = xstrcasestr(sched_params, "bf_max_job_test=")))
130 max_sched_job_cnt = atoi(tmp_ptr + 16);
131 if (max_sched_job_cnt < 1) {
132 error("Invalid SchedulerParameters bf_max_job_test: %d",
133 max_sched_job_cnt);
134 max_sched_job_cnt = 50;
135 }
136 xfree(sched_params);
137 }
138
_compute_start_times(void)139 static void _compute_start_times(void)
140 {
141 int j, rc = SLURM_SUCCESS, job_cnt = 0;
142 List job_queue;
143 job_queue_rec_t *job_queue_rec;
144 job_record_t *job_ptr;
145 part_record_t *part_ptr;
146 bitstr_t *alloc_bitmap = NULL, *avail_bitmap = NULL;
147 bitstr_t *exc_core_bitmap = NULL;
148 uint32_t max_nodes, min_nodes, req_nodes, time_limit;
149 time_t now = time(NULL), sched_start, last_job_alloc;
150 bool resv_overlap = false;
151
152 sched_start = now;
153 last_job_alloc = now - 1;
154 alloc_bitmap = bit_alloc(node_record_count);
155 job_queue = build_job_queue(true, false);
156 sort_job_queue(job_queue);
157 while ((job_queue_rec = (job_queue_rec_t *) list_pop(job_queue))) {
158 job_ptr = job_queue_rec->job_ptr;
159 part_ptr = job_queue_rec->part_ptr;
160 xfree(job_queue_rec);
161 if (part_ptr != job_ptr->part_ptr)
162 continue; /* Only test one partition */
163
164 if (job_cnt++ > max_sched_job_cnt) {
165 debug2("scheduling loop exiting after %d jobs",
166 max_sched_job_cnt);
167 break;
168 }
169
170 /* Determine minimum and maximum node counts */
171 /* On BlueGene systems don't adjust the min/max node limits
172 here. We are working on midplane values. */
173 min_nodes = MAX(job_ptr->details->min_nodes,
174 part_ptr->min_nodes);
175
176 if (job_ptr->details->max_nodes == 0)
177 max_nodes = part_ptr->max_nodes;
178 else
179 max_nodes = MIN(job_ptr->details->max_nodes,
180 part_ptr->max_nodes);
181
182 max_nodes = MIN(max_nodes, 500000); /* prevent overflows */
183
184 if (job_ptr->details->max_nodes)
185 req_nodes = max_nodes;
186 else
187 req_nodes = min_nodes;
188
189 if (min_nodes > max_nodes) {
190 /* job's min_nodes exceeds partition's max_nodes */
191 continue;
192 }
193
194 j = job_test_resv(job_ptr, &now, true, &avail_bitmap,
195 &exc_core_bitmap, &resv_overlap, false);
196 if (j != SLURM_SUCCESS) {
197 FREE_NULL_BITMAP(avail_bitmap);
198 FREE_NULL_BITMAP(exc_core_bitmap);
199 continue;
200 }
201
202 rc = select_g_job_test(job_ptr, avail_bitmap,
203 min_nodes, max_nodes, req_nodes,
204 SELECT_MODE_WILL_RUN,
205 NULL, NULL,
206 exc_core_bitmap);
207 if (rc == SLURM_SUCCESS) {
208 last_job_update = now;
209 if (job_ptr->time_limit == INFINITE)
210 time_limit = 365 * 24 * 60 * 60;
211 else if (job_ptr->time_limit != NO_VAL)
212 time_limit = job_ptr->time_limit * 60;
213 else if (job_ptr->part_ptr &&
214 (job_ptr->part_ptr->max_time != INFINITE))
215 time_limit = job_ptr->part_ptr->max_time * 60;
216 else
217 time_limit = 365 * 24 * 60 * 60;
218 if (bit_overlap_any(alloc_bitmap, avail_bitmap) &&
219 (job_ptr->start_time <= last_job_alloc)) {
220 job_ptr->start_time = last_job_alloc;
221 }
222 bit_or(alloc_bitmap, avail_bitmap);
223 last_job_alloc = job_ptr->start_time + time_limit;
224 }
225 FREE_NULL_BITMAP(avail_bitmap);
226 FREE_NULL_BITMAP(exc_core_bitmap);
227
228 if ((time(NULL) - sched_start) >= sched_timeout) {
229 debug2("scheduling loop exiting after %d jobs",
230 max_sched_job_cnt);
231 break;
232 }
233 }
234 FREE_NULL_LIST(job_queue);
235 FREE_NULL_BITMAP(alloc_bitmap);
236 }
237
238 /* Note that slurm.conf has changed */
builtin_reconfig(void)239 extern void builtin_reconfig(void)
240 {
241 config_flag = true;
242 }
243
244 /* builtin_agent - detached thread periodically when pending jobs can start */
builtin_agent(void * args)245 extern void *builtin_agent(void *args)
246 {
247 time_t now;
248 double wait_time;
249 static time_t last_sched_time = 0;
250 /* Read config, nodes and partitions; Write jobs */
251 slurmctld_lock_t all_locks = {
252 READ_LOCK, WRITE_LOCK, READ_LOCK, READ_LOCK, READ_LOCK };
253
254 _load_config();
255 last_sched_time = time(NULL);
256 while (!stop_builtin) {
257 _my_sleep(builtin_interval);
258 if (stop_builtin)
259 break;
260 if (config_flag) {
261 config_flag = false;
262 _load_config();
263 }
264 now = time(NULL);
265 wait_time = difftime(now, last_sched_time);
266 if ((wait_time < builtin_interval))
267 continue;
268
269 lock_slurmctld(all_locks);
270 _compute_start_times();
271 last_sched_time = time(NULL);
272 (void) bb_g_job_try_stage_in();
273 unlock_slurmctld(all_locks);
274 }
275 return NULL;
276 }
277