1 /*****************************************************************************\
2  *  builtin.c - Simple builtin (FIFO) scheduler plugin.
3  *		Periodically when pending jobs can start.
4  *		This is a minimal implementation of the logic found in
5  *		src/plugins/sched/backfill/backfill.c and disregards
6  *		how jobs are scheduled sequencially.
7  *****************************************************************************
8  *  Copyright (C) 2003-2007 The Regents of the University of California.
9  *  Copyright (C) 2008-2010 Lawrence Livermore National Security.
10  *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
11  *  Written by Morris Jette <jette1@llnl.gov>
12  *  CODE-OCEC-09-009. All rights reserved.
13  *
14  *  This file is part of Slurm, a resource management program.
15  *  For details, see <https://slurm.schedmd.com/>.
16  *  Please also read the included file: DISCLAIMER.
17  *
18  *  Slurm is free software; you can redistribute it and/or modify it under
19  *  the terms of the GNU General Public License as published by the Free
20  *  Software Foundation; either version 2 of the License, or (at your option)
21  *  any later version.
22  *
23  *  In addition, as a special exception, the copyright holders give permission
24  *  to link the code of portions of this program with the OpenSSL library under
25  *  certain conditions as described in each individual source file, and
26  *  distribute linked combinations including the two. You must obey the GNU
27  *  General Public License in all respects for all of the code used other than
28  *  OpenSSL. If you modify file(s) with this exception, you may extend this
29  *  exception to your version of the file(s), but you are not obligated to do
30  *  so. If you do not wish to do so, delete this exception statement from your
31  *  version.  If you delete this exception statement from all source files in
32  *  the program, then also delete it here.
33  *
34  *  Slurm is distributed in the hope that it will be useful, but WITHOUT ANY
35  *  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
36  *  FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
37  *  details.
38  *
39  *  You should have received a copy of the GNU General Public License along
40  *  with Slurm; if not, write to the Free Software Foundation, Inc.,
41  *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA.
42 \*****************************************************************************/
43 
44 #include <pthread.h>
45 #include <stdio.h>
46 #include <stdlib.h>
47 #include <string.h>
48 #include <time.h>
49 #include <unistd.h>
50 
51 #include "slurm/slurm.h"
52 #include "slurm/slurm_errno.h"
53 
54 #include "src/common/list.h"
55 #include "src/common/macros.h"
56 #include "src/common/node_select.h"
57 #include "src/common/parse_time.h"
58 #include "src/common/slurm_protocol_api.h"
59 #include "src/common/xmalloc.h"
60 #include "src/common/xstring.h"
61 
62 #include "src/slurmctld/burst_buffer.h"
63 #include "src/slurmctld/locks.h"
64 #include "src/slurmctld/preempt.h"
65 #include "src/slurmctld/reservation.h"
66 #include "src/slurmctld/slurmctld.h"
67 #include "src/plugins/sched/builtin/builtin.h"
68 
69 #ifndef BACKFILL_INTERVAL
70 #  define BACKFILL_INTERVAL	30
71 #endif
72 
73 /*********************** local variables *********************/
74 static bool stop_builtin = false;
75 static pthread_mutex_t term_lock = PTHREAD_MUTEX_INITIALIZER;
76 static pthread_cond_t  term_cond = PTHREAD_COND_INITIALIZER;
77 static bool config_flag = false;
78 static int builtin_interval = BACKFILL_INTERVAL;
79 static int max_sched_job_cnt = 50;
80 static int sched_timeout = 0;
81 
82 /*********************** local functions *********************/
83 static void _compute_start_times(void);
84 static void _load_config(void);
85 static void _my_sleep(int secs);
86 
87 /* Terminate builtin_agent */
stop_builtin_agent(void)88 extern void stop_builtin_agent(void)
89 {
90 	slurm_mutex_lock(&term_lock);
91 	stop_builtin = true;
92 	slurm_cond_signal(&term_cond);
93 	slurm_mutex_unlock(&term_lock);
94 }
95 
_my_sleep(int secs)96 static void _my_sleep(int secs)
97 {
98 	struct timespec ts = {0, 0};
99 	struct timeval now;
100 
101 	gettimeofday(&now, NULL);
102 	ts.tv_sec = now.tv_sec + secs;
103 	ts.tv_nsec = now.tv_usec * 1000;
104 	slurm_mutex_lock(&term_lock);
105 	if (!stop_builtin)
106 		slurm_cond_timedwait(&term_cond, &term_lock, &ts);
107 	slurm_mutex_unlock(&term_lock);
108 }
109 
_load_config(void)110 static void _load_config(void)
111 {
112 	char *sched_params = slurm_get_sched_params();
113 	char *tmp_ptr;
114 
115 	sched_timeout = slurm_get_msg_timeout() / 2;
116 	sched_timeout = MAX(sched_timeout, 1);
117 	sched_timeout = MIN(sched_timeout, 10);
118 
119 	if ((tmp_ptr = xstrcasestr(sched_params, "interval=")))
120 		builtin_interval = atoi(tmp_ptr + 9);
121 	if (builtin_interval < 1) {
122 		error("Invalid SchedulerParameters interval: %d",
123 		      builtin_interval);
124 		builtin_interval = BACKFILL_INTERVAL;
125 	}
126 
127 	if ((tmp_ptr = xstrcasestr(sched_params, "max_job_bf=")))
128 		max_sched_job_cnt = atoi(tmp_ptr + 11);
129 	if ((tmp_ptr = xstrcasestr(sched_params, "bf_max_job_test=")))
130 		max_sched_job_cnt = atoi(tmp_ptr + 16);
131 	if (max_sched_job_cnt < 1) {
132 		error("Invalid SchedulerParameters bf_max_job_test: %d",
133 		      max_sched_job_cnt);
134 		max_sched_job_cnt = 50;
135 	}
136 	xfree(sched_params);
137 }
138 
_compute_start_times(void)139 static void _compute_start_times(void)
140 {
141 	int j, rc = SLURM_SUCCESS, job_cnt = 0;
142 	List job_queue;
143 	job_queue_rec_t *job_queue_rec;
144 	job_record_t *job_ptr;
145 	part_record_t *part_ptr;
146 	bitstr_t *alloc_bitmap = NULL, *avail_bitmap = NULL;
147 	bitstr_t *exc_core_bitmap = NULL;
148 	uint32_t max_nodes, min_nodes, req_nodes, time_limit;
149 	time_t now = time(NULL), sched_start, last_job_alloc;
150 	bool resv_overlap = false;
151 
152 	sched_start = now;
153 	last_job_alloc = now - 1;
154 	alloc_bitmap = bit_alloc(node_record_count);
155 	job_queue = build_job_queue(true, false);
156 	sort_job_queue(job_queue);
157 	while ((job_queue_rec = (job_queue_rec_t *) list_pop(job_queue))) {
158 		job_ptr  = job_queue_rec->job_ptr;
159 		part_ptr = job_queue_rec->part_ptr;
160 		xfree(job_queue_rec);
161 		if (part_ptr != job_ptr->part_ptr)
162 			continue;	/* Only test one partition */
163 
164 		if (job_cnt++ > max_sched_job_cnt) {
165 			debug2("scheduling loop exiting after %d jobs",
166 			       max_sched_job_cnt);
167 			break;
168 		}
169 
170 		/* Determine minimum and maximum node counts */
171 		/* On BlueGene systems don't adjust the min/max node limits
172 		   here.  We are working on midplane values. */
173 		min_nodes = MAX(job_ptr->details->min_nodes,
174 				part_ptr->min_nodes);
175 
176 		if (job_ptr->details->max_nodes == 0)
177 			max_nodes = part_ptr->max_nodes;
178 		else
179 			max_nodes = MIN(job_ptr->details->max_nodes,
180 					part_ptr->max_nodes);
181 
182 		max_nodes = MIN(max_nodes, 500000);     /* prevent overflows */
183 
184 		if (job_ptr->details->max_nodes)
185 			req_nodes = max_nodes;
186 		else
187 			req_nodes = min_nodes;
188 
189 		if (min_nodes > max_nodes) {
190 			/* job's min_nodes exceeds partition's max_nodes */
191 			continue;
192 		}
193 
194 		j = job_test_resv(job_ptr, &now, true, &avail_bitmap,
195 				  &exc_core_bitmap, &resv_overlap, false);
196 		if (j != SLURM_SUCCESS) {
197 			FREE_NULL_BITMAP(avail_bitmap);
198 			FREE_NULL_BITMAP(exc_core_bitmap);
199 			continue;
200 		}
201 
202 		rc = select_g_job_test(job_ptr, avail_bitmap,
203 				       min_nodes, max_nodes, req_nodes,
204 				       SELECT_MODE_WILL_RUN,
205 				       NULL, NULL,
206 				       exc_core_bitmap);
207 		if (rc == SLURM_SUCCESS) {
208 			last_job_update = now;
209 			if (job_ptr->time_limit == INFINITE)
210 				time_limit = 365 * 24 * 60 * 60;
211 			else if (job_ptr->time_limit != NO_VAL)
212 				time_limit = job_ptr->time_limit * 60;
213 			else if (job_ptr->part_ptr &&
214 				 (job_ptr->part_ptr->max_time != INFINITE))
215 				time_limit = job_ptr->part_ptr->max_time * 60;
216 			else
217 				time_limit = 365 * 24 * 60 * 60;
218 			if (bit_overlap_any(alloc_bitmap, avail_bitmap) &&
219 			    (job_ptr->start_time <= last_job_alloc)) {
220 				job_ptr->start_time = last_job_alloc;
221 			}
222 			bit_or(alloc_bitmap, avail_bitmap);
223 			last_job_alloc = job_ptr->start_time + time_limit;
224 		}
225 		FREE_NULL_BITMAP(avail_bitmap);
226 		FREE_NULL_BITMAP(exc_core_bitmap);
227 
228 		if ((time(NULL) - sched_start) >= sched_timeout) {
229 			debug2("scheduling loop exiting after %d jobs",
230 			       max_sched_job_cnt);
231 			break;
232 		}
233 	}
234 	FREE_NULL_LIST(job_queue);
235 	FREE_NULL_BITMAP(alloc_bitmap);
236 }
237 
238 /* Note that slurm.conf has changed */
builtin_reconfig(void)239 extern void builtin_reconfig(void)
240 {
241 	config_flag = true;
242 }
243 
244 /* builtin_agent - detached thread periodically when pending jobs can start */
builtin_agent(void * args)245 extern void *builtin_agent(void *args)
246 {
247 	time_t now;
248 	double wait_time;
249 	static time_t last_sched_time = 0;
250 	/* Read config, nodes and partitions; Write jobs */
251 	slurmctld_lock_t all_locks = {
252 		READ_LOCK, WRITE_LOCK, READ_LOCK, READ_LOCK, READ_LOCK };
253 
254 	_load_config();
255 	last_sched_time = time(NULL);
256 	while (!stop_builtin) {
257 		_my_sleep(builtin_interval);
258 		if (stop_builtin)
259 			break;
260 		if (config_flag) {
261 			config_flag = false;
262 			_load_config();
263 		}
264 		now = time(NULL);
265 		wait_time = difftime(now, last_sched_time);
266 		if ((wait_time < builtin_interval))
267 			continue;
268 
269 		lock_slurmctld(all_locks);
270 		_compute_start_times();
271 		last_sched_time = time(NULL);
272 		(void) bb_g_job_try_stage_in();
273 		unlock_slurmctld(all_locks);
274 	}
275 	return NULL;
276 }
277