1 /*****************************************************************************\
2 * read_config.c - read the overall slurm configuration file
3 *****************************************************************************
4 * Copyright (C) 2002-2007 The Regents of the University of California.
5 * Copyright (C) 2008-2010 Lawrence Livermore National Security.
6 * Portions Copyright (C) 2010-2016 SchedMD <https://www.schedmd.com>.
7 * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
8 * Written by Morris Jette <jette1@llnl.gov>.
9 * CODE-OCEC-09-009. All rights reserved.
10 *
11 * This file is part of Slurm, a resource management program.
12 * For details, see <https://slurm.schedmd.com/>.
13 * Please also read the included file: DISCLAIMER.
14 *
15 * Slurm is free software; you can redistribute it and/or modify it under
16 * the terms of the GNU General Public License as published by the Free
17 * Software Foundation; either version 2 of the License, or (at your option)
18 * any later version.
19 *
20 * In addition, as a special exception, the copyright holders give permission
21 * to link the code of portions of this program with the OpenSSL library under
22 * certain conditions as described in each individual source file, and
23 * distribute linked combinations including the two. You must obey the GNU
24 * General Public License in all respects for all of the code used other than
25 * OpenSSL. If you modify file(s) with this exception, you may extend this
26 * exception to your version of the file(s), but you are not obligated to do
27 * so. If you do not wish to do so, delete this exception statement from your
28 * version. If you delete this exception statement from all source files in
29 * the program, then also delete it here.
30 *
31 * Slurm is distributed in the hope that it will be useful, but WITHOUT ANY
32 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
33 * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
34 * details.
35 *
36 * You should have received a copy of the GNU General Public License along
37 * with Slurm; if not, write to the Free Software Foundation, Inc.,
38 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
39 \*****************************************************************************/
40
41 #include "config.h"
42
43 #include <ctype.h>
44 #include <errno.h>
45 #include <stdio.h>
46 #include <stdlib.h>
47 #include <string.h>
48 #include <syslog.h>
49 #include <sys/stat.h>
50 #include <sys/types.h>
51 #include <time.h>
52 #include <unistd.h>
53
54 #include "src/common/assoc_mgr.h"
55 #include "src/common/cpu_frequency.h"
56 #include "src/common/gres.h"
57 #include "src/common/hostlist.h"
58 #include "src/common/layouts_mgr.h"
59 #include "src/common/list.h"
60 #include "src/common/macros.h"
61 #include "src/common/node_features.h"
62 #include "src/common/node_select.h"
63 #include "src/common/power.h"
64 #include "src/common/prep.h"
65 #include "src/common/read_config.h"
66 #include "src/common/slurm_jobcomp.h"
67 #include "src/common/slurm_mcs.h"
68 #include "src/common/slurm_topology.h"
69 #include "src/common/slurm_rlimits_info.h"
70 #include "src/common/slurm_route.h"
71 #include "src/common/strnatcmp.h"
72 #include "src/common/switch.h"
73 #include "src/common/xstring.h"
74 #include "src/common/xcgroup_read_config.h"
75
76 #include "src/slurmctld/acct_policy.h"
77 #include "src/slurmctld/burst_buffer.h"
78 #include "src/slurmctld/fed_mgr.h"
79 #include "src/slurmctld/front_end.h"
80 #include "src/slurmctld/gang.h"
81 #include "src/slurmctld/job_scheduler.h"
82 #include "src/slurmctld/job_submit.h"
83 #include "src/slurmctld/licenses.h"
84 #include "src/slurmctld/locks.h"
85 #include "src/slurmctld/node_scheduler.h"
86 #include "src/slurmctld/port_mgr.h"
87 #include "src/slurmctld/preempt.h"
88 #include "src/slurmctld/proc_req.h"
89 #include "src/slurmctld/read_config.h"
90 #include "src/slurmctld/reservation.h"
91 #include "src/slurmctld/sched_plugin.h"
92 #include "src/slurmctld/slurmctld.h"
93 #include "src/slurmctld/srun_comm.h"
94 #include "src/slurmctld/trigger_mgr.h"
95
96 #define FEATURE_MAGIC 0x34dfd8b5
97
98 /* Global variables */
99 List active_feature_list; /* list of currently active features_records */
100 List avail_feature_list; /* list of available features_records */
101 bool node_features_updated = true;
102 bool slurmctld_init_db = true;
103
104 static void _acct_restore_active_jobs(void);
105 static void _add_config_feature(List feature_list, char *feature,
106 bitstr_t *node_bitmap);
107 static void _add_config_feature_inx(List feature_list, char *feature,
108 int node_inx);
109 static void _build_bitmaps(void);
110 static void _build_bitmaps_pre_select(void);
111 static int _compare_hostnames(node_record_t *old_node_table,
112 int old_node_count, node_record_t *node_table,
113 int node_count);
114 static void _gres_reconfig(bool reconfig);
115 static int _init_all_slurm_conf(void);
116 static void _list_delete_feature(void *feature_entry);
117 static int _preserve_select_type_param(slurm_ctl_conf_t * ctl_conf_ptr,
118 uint16_t old_select_type_p);
119 static void _purge_old_node_state(node_record_t *old_node_table_ptr,
120 int old_node_record_count);
121 static void _purge_old_part_state(List old_part_list, char *old_def_part_name);
122 static int _reset_node_bitmaps(void *x, void *arg);
123 static void _restore_job_accounting();
124
125 static int _restore_node_state(int recover, node_record_t *old_node_table_ptr,
126 int old_node_record_count);
127 static int _restore_part_state(List old_part_list, char *old_def_part_name,
128 uint16_t flags);
129 static void _set_features(node_record_t *old_node_table_ptr,
130 int old_node_record_count, int recover);
131 static void _stat_slurm_dirs(void);
132 static int _sync_nodes_to_comp_job(void);
133 static int _sync_nodes_to_jobs(bool reconfig);
134 static int _sync_nodes_to_active_job(job_record_t *job_ptr);
135 static void _sync_nodes_to_suspended_job(job_record_t *job_ptr);
136 static void _sync_part_prio(void);
137 static int _update_preempt(uint16_t old_enable_preempt);
138
139
140 /*
141 * Setup the global response_cluster_rec
142 */
_set_response_cluster_rec(void)143 static void _set_response_cluster_rec(void)
144 {
145 if (response_cluster_rec)
146 return;
147
148 response_cluster_rec = xmalloc(sizeof(slurmdb_cluster_rec_t));
149 response_cluster_rec->name = xstrdup(slurmctld_conf.cluster_name);
150 if (slurmctld_conf.slurmctld_addr) {
151 response_cluster_rec->control_host =
152 xstrdup(slurmctld_conf.slurmctld_addr);
153 } else {
154 response_cluster_rec->control_host =
155 xstrdup(slurmctld_conf.control_addr[0]);
156 }
157 response_cluster_rec->control_port = slurmctld_conf.slurmctld_port;
158 response_cluster_rec->rpc_version = SLURM_PROTOCOL_VERSION;
159 response_cluster_rec->plugin_id_select = select_get_plugin_id();
160 }
161
162 /*
163 * Free the global response_cluster_rec
164 */
cluster_rec_free(void)165 extern void cluster_rec_free(void)
166 {
167 if (response_cluster_rec) {
168 xfree(response_cluster_rec->control_host);
169 xfree(response_cluster_rec->name);
170 xfree(response_cluster_rec);
171 }
172 }
173
174 /* Verify that Slurm directories are secure, not world writable */
_stat_slurm_dirs(void)175 static void _stat_slurm_dirs(void)
176 {
177 struct stat stat_buf;
178 char *problem_dir = NULL;
179
180 /*
181 * PluginDir may have multiple values, and is checked by
182 * _is_valid_path() instead
183 */
184
185 if (slurmctld_conf.plugstack &&
186 (stat(slurmctld_conf.plugstack, &stat_buf) == 0) &&
187 (stat_buf.st_mode & S_IWOTH)) {
188 problem_dir = "PlugStack";
189 }
190 if ((stat(slurmctld_conf.slurmd_spooldir, &stat_buf) == 0) &&
191 (stat_buf.st_mode & S_IWOTH)) {
192 problem_dir = "SlurmdSpoolDir";
193 }
194 if ((stat(slurmctld_conf.state_save_location, &stat_buf) == 0) &&
195 (stat_buf.st_mode & S_IWOTH)) {
196 problem_dir = "StateSaveLocation";
197 }
198
199 if (problem_dir) {
200 error("################################################");
201 error("### SEVERE SECURITY VULERABILTY ###");
202 error("### %s DIRECTORY IS WORLD WRITABLE ###", problem_dir);
203 error("### CORRECT FILE PERMISSIONS ###");
204 error("################################################");
205 }
206 }
207
208 /*
209 * _reorder_nodes_by_name - order node table in ascending order of name
210 */
_reorder_nodes_by_name(void)211 static void _reorder_nodes_by_name(void)
212 {
213 node_record_t *node_ptr, *node_ptr2;
214 int i, j, min_inx;
215
216 /* Now we need to sort the node records */
217 for (i = 0; i < node_record_count; i++) {
218 min_inx = i;
219 for (j = i + 1; j < node_record_count; j++) {
220 if (strnatcmp(node_record_table_ptr[j].name,
221 node_record_table_ptr[min_inx].name) < 0)
222 min_inx = j;
223 }
224
225 if (min_inx != i) { /* swap records */
226 node_record_t node_record_tmp;
227
228 j = sizeof(node_record_t);
229 node_ptr = node_record_table_ptr + i;
230 node_ptr2 = node_record_table_ptr + min_inx;
231
232 memcpy(&node_record_tmp, node_ptr, j);
233 memcpy(node_ptr, node_ptr2, j);
234 memcpy(node_ptr2, &node_record_tmp, j);
235 }
236 }
237
238 #if _DEBUG
239 /* Log the results */
240 for (i=0, node_ptr = node_record_table_ptr; i < node_record_count;
241 i++, node_ptr++) {
242 info("node_rank[%d]: %s", i, node_ptr->name);
243 }
244 #endif
245 }
246
247 /*
248 * _reorder_nodes_by_rank - order node table in ascending order of node_rank
249 * This depends on the TopologyPlugin and/or SelectPlugin, which may generate
250 * such a ranking.
251 */
_reorder_nodes_by_rank(void)252 static void _reorder_nodes_by_rank(void)
253 {
254 node_record_t *node_ptr, *node_ptr2;
255 int i, j, min_inx;
256 uint32_t min_val;
257
258 /* Now we need to sort the node records */
259 for (i = 0; i < node_record_count; i++) {
260 min_val = node_record_table_ptr[i].node_rank;
261 min_inx = i;
262 for (j = i + 1; j < node_record_count; j++) {
263 if (node_record_table_ptr[j].node_rank < min_val) {
264 min_val = node_record_table_ptr[j].node_rank;
265 min_inx = j;
266 }
267 }
268
269 if (min_inx != i) { /* swap records */
270 node_record_t node_record_tmp;
271
272 j = sizeof(node_record_t);
273 node_ptr = node_record_table_ptr + i;
274 node_ptr2 = node_record_table_ptr + min_inx;
275
276 memcpy(&node_record_tmp, node_ptr, j);
277 memcpy(node_ptr, node_ptr2, j);
278 memcpy(node_ptr2, &node_record_tmp, j);
279 }
280 }
281
282 #if _DEBUG
283 /* Log the results */
284 for (i=0, node_ptr = node_record_table_ptr; i < node_record_count;
285 i++, node_ptr++) {
286 info("node_rank[%u]: %s", node_ptr->node_rank, node_ptr->name);
287 }
288 #endif
289 }
290
291 /*
292 * Unfortunately the global feature bitmaps have not been set up at this point,
293 * so we'll have to scan through the node_record_table directly to locate
294 * the appropriate records.
295 */
_add_nodes_with_feature(hostlist_t hl,char * feature)296 static void _add_nodes_with_feature(hostlist_t hl, char *feature)
297 {
298 for (int i = 0; i < node_record_count; i++) {
299 char *features, *tmp, *tok, *last = NULL;
300
301 if (!node_record_table_ptr[i].features)
302 continue;
303
304 features = tmp = xstrdup(node_record_table_ptr[i].features);
305
306 while ((tok = strtok_r(tmp, ",", &last))) {
307 if (!xstrcmp(tok, feature)) {
308 hostlist_push_host(hl, node_record_table_ptr[i].name);
309 break;
310 }
311 tmp = NULL;
312 }
313 xfree(features);
314 }
315 }
316
_handle_nodesets(char ** nodeline)317 static void _handle_nodesets(char **nodeline)
318 {
319 int count;
320 slurm_conf_nodeset_t *ptr, **ptr_array;
321 hostlist_t hl;
322
323 count = slurm_conf_nodeset_array(&ptr_array);
324
325 hl = hostlist_create(*nodeline);
326
327 for (int i = 0; i < count; i++) {
328 ptr = ptr_array[i];
329
330 /* swap the nodeset entry with the applicable nodes */
331 if (hostlist_delete_host(hl, ptr->name)) {
332 if (ptr->feature) {
333 _add_nodes_with_feature(hl, ptr->feature);
334 }
335
336 if (ptr->nodes)
337 hostlist_push_host(hl, ptr->nodes);
338 }
339 }
340
341 xfree(*nodeline);
342 *nodeline = hostlist_ranged_string_xmalloc(hl);
343 hostlist_destroy(hl);
344 }
345
346 /*
347 * _build_bitmaps_pre_select - recover some state for jobs and nodes prior to
348 * calling the select_* functions
349 */
_build_bitmaps_pre_select(void)350 static void _build_bitmaps_pre_select(void)
351 {
352 part_record_t *part_ptr;
353 node_record_t *node_ptr;
354 ListIterator part_iterator;
355 int i;
356
357 /* scan partition table and identify nodes in each */
358 part_iterator = list_iterator_create(part_list);
359 while ((part_ptr = list_next(part_iterator))) {
360 _handle_nodesets(&part_ptr->nodes);
361 if (build_part_bitmap(part_ptr) == ESLURM_INVALID_NODE_NAME)
362 fatal("Invalid node names in partition %s",
363 part_ptr->name);
364 }
365 list_iterator_destroy(part_iterator);
366
367 /* initialize the configuration bitmaps */
368 list_for_each(config_list, _reset_node_bitmaps, NULL);
369
370 for (i = 0, node_ptr = node_record_table_ptr;
371 i < node_record_count; i++, node_ptr++) {
372 if (node_ptr->config_ptr)
373 bit_set(node_ptr->config_ptr->node_bitmap, i);
374 }
375
376 return;
377 }
378
_reset_node_bitmaps(void * x,void * arg)379 static int _reset_node_bitmaps(void *x, void *arg)
380 {
381 config_record_t *config_ptr = (config_record_t *) x;
382
383 FREE_NULL_BITMAP(config_ptr->node_bitmap);
384 config_ptr->node_bitmap = (bitstr_t *) bit_alloc(node_record_count);
385
386 return 0;
387 }
388
_set_share_node_bitmap(void * x,void * arg)389 static int _set_share_node_bitmap(void *x, void *arg)
390 {
391 job_record_t *job_ptr = (job_record_t *) x;
392
393 if (!IS_JOB_RUNNING(job_ptr) ||
394 (job_ptr->node_bitmap == NULL) ||
395 (job_ptr->details == NULL) ||
396 (job_ptr->details->share_res != 0))
397 return 0;
398
399 bit_and_not(share_node_bitmap, job_ptr->node_bitmap);
400
401 return 0;
402 }
403
404 /*
405 * _set_slurmd_addr - establish the slurm_addr_t for the slurmd on each node
406 * Uses common data structures.
407 */
_set_slurmd_addr(void)408 static void _set_slurmd_addr(void)
409 {
410 #ifndef HAVE_FRONT_END
411 int i;
412 node_record_t *node_ptr = node_record_table_ptr;
413 DEF_TIMERS;
414
415 xassert(verify_lock(CONF_LOCK, READ_LOCK));
416
417 START_TIMER;
418 for (i = 0; i < node_record_count; i++, node_ptr++) {
419 if ((node_ptr->name == NULL) ||
420 (node_ptr->name[0] == '\0'))
421 continue;
422 if (IS_NODE_FUTURE(node_ptr))
423 continue;
424 if (IS_NODE_CLOUD(node_ptr)) {
425 if (slurmctld_conf.suspend_time < 1 ||
426 slurmctld_conf.resume_program == NULL ||
427 slurmctld_conf.suspend_program == NULL)
428 error("%s: Node %s configured with CLOUD state but "
429 "missing any of SuspendTime, SuspendProgram "
430 "or ResumeProgram options",__func__,
431 node_ptr->name);
432 if (IS_NODE_POWER_SAVE(node_ptr))
433 continue;
434 }
435 if (node_ptr->port == 0)
436 node_ptr->port = slurmctld_conf.slurmd_port;
437 slurm_set_addr(&node_ptr->slurm_addr, node_ptr->port,
438 node_ptr->comm_name);
439 if (node_ptr->slurm_addr.sin_port)
440 continue;
441 error("%s: failure on %s", __func__, node_ptr->comm_name);
442 node_ptr->node_state = NODE_STATE_FUTURE;
443 node_ptr->port = 0;
444 xfree(node_ptr->reason);
445 node_ptr->reason = xstrdup("NO NETWORK ADDRESS FOUND");
446 node_ptr->reason_time = time(NULL);
447 node_ptr->reason_uid = slurmctld_conf.slurm_user_id;
448 }
449
450 END_TIMER2("_set_slurmd_addr");
451 #endif
452 }
453
454 /*
455 * _build_bitmaps - build node bitmaps to define which nodes are in which
456 * 1) partition 2) configuration record 3) up state 4) idle state
457 * also sets values of total_nodes and total_cpus for every partition.
458 * RET 0 if no error, errno otherwise
459 * Note: Operates on common variables, no arguments
460 * node_record_count - number of nodes in the system
461 * node_record_table_ptr - pointer to global node table
462 * part_list - pointer to global partition list
463 */
_build_bitmaps(void)464 static void _build_bitmaps(void)
465 {
466 int i;
467 node_record_t *node_ptr;
468
469 last_node_update = time(NULL);
470 last_part_update = time(NULL);
471
472 /* initialize the idle and up bitmaps */
473 FREE_NULL_BITMAP(avail_node_bitmap);
474 FREE_NULL_BITMAP(bf_ignore_node_bitmap);
475 FREE_NULL_BITMAP(booting_node_bitmap);
476 FREE_NULL_BITMAP(cg_node_bitmap);
477 FREE_NULL_BITMAP(future_node_bitmap);
478 FREE_NULL_BITMAP(idle_node_bitmap);
479 FREE_NULL_BITMAP(power_node_bitmap);
480 FREE_NULL_BITMAP(share_node_bitmap);
481 FREE_NULL_BITMAP(up_node_bitmap);
482 FREE_NULL_BITMAP(rs_node_bitmap);
483 avail_node_bitmap = (bitstr_t *) bit_alloc(node_record_count);
484 bf_ignore_node_bitmap = bit_alloc(node_record_count);
485 booting_node_bitmap = (bitstr_t *) bit_alloc(node_record_count);
486 cg_node_bitmap = (bitstr_t *) bit_alloc(node_record_count);
487 future_node_bitmap = (bitstr_t *) bit_alloc(node_record_count);
488 idle_node_bitmap = (bitstr_t *) bit_alloc(node_record_count);
489 power_node_bitmap = (bitstr_t *) bit_alloc(node_record_count);
490 share_node_bitmap = (bitstr_t *) bit_alloc(node_record_count);
491 up_node_bitmap = (bitstr_t *) bit_alloc(node_record_count);
492 rs_node_bitmap = (bitstr_t *) bit_alloc(node_record_count);
493
494 /* Set all bits, all nodes initially available for sharing */
495 bit_set_all(share_node_bitmap);
496
497 /* identify all nodes non-sharable due to non-sharing jobs */
498 list_for_each(job_list, _set_share_node_bitmap, NULL);
499
500 /* scan all nodes and identify which are up, idle and
501 * their configuration, resync DRAINED vs. DRAINING state */
502 for (i = 0, node_ptr = node_record_table_ptr;
503 i < node_record_count; i++, node_ptr++) {
504 uint32_t drain_flag, job_cnt;
505
506 if (node_ptr->name[0] == '\0')
507 continue; /* defunct */
508 drain_flag = IS_NODE_DRAIN(node_ptr) |
509 IS_NODE_FAIL(node_ptr);
510 job_cnt = node_ptr->run_job_cnt + node_ptr->comp_job_cnt;
511
512 if ((IS_NODE_IDLE(node_ptr) && (job_cnt == 0)) ||
513 IS_NODE_DOWN(node_ptr))
514 bit_set(idle_node_bitmap, i);
515 if (IS_NODE_POWER_UP(node_ptr))
516 bit_set(booting_node_bitmap, i);
517 if (IS_NODE_COMPLETING(node_ptr))
518 bit_set(cg_node_bitmap, i);
519 if (IS_NODE_IDLE(node_ptr) ||
520 IS_NODE_ALLOCATED(node_ptr) ||
521 (IS_NODE_REBOOT(node_ptr) &&
522 (node_ptr->next_state == NODE_RESUME))) {
523 if ((drain_flag == 0) &&
524 (!IS_NODE_NO_RESPOND(node_ptr)))
525 make_node_avail(i);
526 bit_set(up_node_bitmap, i);
527 }
528 if (IS_NODE_POWER_SAVE(node_ptr))
529 bit_set(power_node_bitmap, i);
530 if (IS_NODE_POWERING_DOWN(node_ptr))
531 bit_clear(avail_node_bitmap, i);
532 if (IS_NODE_FUTURE(node_ptr))
533 bit_set(future_node_bitmap, i);
534
535 if (IS_NODE_REBOOT(node_ptr) &&
536 (node_ptr->next_state == NODE_RESUME))
537 bit_set(rs_node_bitmap, i);
538 }
539 }
540
541
542 /*
543 * _init_all_slurm_conf - initialize or re-initialize the slurm
544 * configuration values.
545 * RET 0 if no error, otherwise an error code.
546 * NOTE: We leave the job table intact
547 * NOTE: Operates on common variables, no arguments
548 */
_init_all_slurm_conf(void)549 static int _init_all_slurm_conf(void)
550 {
551 int error_code;
552 char *conf_name = xstrdup(slurmctld_conf.slurm_conf);
553
554 slurm_conf_reinit(conf_name);
555 xfree(conf_name);
556
557 if ((error_code = init_node_conf()))
558 return error_code;
559
560 if ((error_code = init_part_conf()))
561 return error_code;
562
563 if ((error_code = init_job_conf()))
564 return error_code;
565
566 return 0;
567 }
568
_handle_downnodes_line(slurm_conf_downnodes_t * down)569 static int _handle_downnodes_line(slurm_conf_downnodes_t *down)
570 {
571 int error_code = 0;
572 node_record_t *node_rec = NULL;
573 hostlist_t alias_list = NULL;
574 char *alias = NULL;
575 int state_val = NODE_STATE_DOWN;
576
577 if (down->state != NULL) {
578 state_val = state_str2int(down->state, down->nodenames);
579 if (state_val == NO_VAL) {
580 error("Invalid State \"%s\"", down->state);
581 goto cleanup;
582 }
583 }
584
585 if ((alias_list = hostlist_create(down->nodenames)) == NULL) {
586 error("Unable to create NodeName list from %s",
587 down->nodenames);
588 error_code = errno;
589 goto cleanup;
590 }
591
592 while ((alias = hostlist_shift(alias_list))) {
593 node_rec = find_node_record(alias);
594 if (node_rec == NULL) {
595 error("DownNode \"%s\" does not exist!", alias);
596 free(alias);
597 continue;
598 }
599
600 if ((state_val != NO_VAL) &&
601 (state_val != NODE_STATE_UNKNOWN))
602 node_rec->node_state = state_val;
603 if (down->reason) {
604 xfree(node_rec->reason);
605 node_rec->reason = xstrdup(down->reason);
606 node_rec->reason_time = time(NULL);
607 node_rec->reason_uid = slurmctld_conf.slurm_user_id;
608 }
609 free(alias);
610 }
611
612 cleanup:
613 if (alias_list)
614 hostlist_destroy(alias_list);
615 return error_code;
616 }
617
_handle_all_downnodes(void)618 static void _handle_all_downnodes(void)
619 {
620 slurm_conf_downnodes_t *ptr, **ptr_array;
621 int count;
622 int i;
623
624 count = slurm_conf_downnodes_array(&ptr_array);
625 if (count == 0) {
626 debug("No DownNodes");
627 return;
628 }
629
630 for (i = 0; i < count; i++) {
631 ptr = ptr_array[i];
632
633 _handle_downnodes_line(ptr);
634 }
635 }
636
637 /*
638 * _build_all_nodeline_info - get a array of slurm_conf_node_t structures
639 * from the slurm.conf reader, build table, and set values
640 * RET 0 if no error, error code otherwise
641 * Note: Operates on common variables
642 * default_node_record - default node configuration values
643 */
_build_all_nodeline_info(void)644 static int _build_all_nodeline_info(void)
645 {
646 int rc, rc2;
647
648 /* Load the node table here */
649 rc = build_all_nodeline_info(false, slurmctld_tres_cnt);
650 (void)acct_storage_g_reconfig(acct_db_conn, 0);
651 rc2 = build_all_frontend_info(false);
652 rc = MAX(rc, rc2);
653
654 return rc;
655 }
656
657 /* Convert a comma delimited list of account names into a NULL terminated
658 * array of pointers to strings. Call accounts_list_free() to release memory */
accounts_list_build(char * accounts,char *** accounts_array)659 extern void accounts_list_build(char *accounts, char ***accounts_array)
660 {
661 char *tmp_accts, *one_acct_name, *name_ptr = NULL, **tmp_array = NULL;
662 int array_len = 0, array_used = 0;
663
664 if (!accounts) {
665 accounts_list_free(accounts_array);
666 *accounts_array = NULL;
667 return;
668 }
669
670 tmp_accts = xstrdup(accounts);
671 one_acct_name = strtok_r(tmp_accts, ",", &name_ptr);
672 while (one_acct_name) {
673 if (array_len < array_used + 2) {
674 array_len += 10;
675 xrealloc(tmp_array, sizeof(char *) * array_len);
676 }
677 tmp_array[array_used++] = xstrdup(one_acct_name);
678 one_acct_name = strtok_r(NULL, ",", &name_ptr);
679 }
680 xfree(tmp_accts);
681 accounts_list_free(accounts_array);
682 *accounts_array = tmp_array;
683 }
684 /* Free memory allocated for an account array by accounts_list_build() */
accounts_list_free(char *** accounts_array)685 extern void accounts_list_free(char ***accounts_array)
686 {
687 int i;
688
689 if (*accounts_array == NULL)
690 return;
691 for (i = 0; accounts_array[0][i]; i++)
692 xfree(accounts_array[0][i]);
693 xfree(*accounts_array);
694 }
695
696 /* Convert a comma delimited list of QOS names into a bitmap */
qos_list_build(char * qos,bitstr_t ** qos_bits)697 extern void qos_list_build(char *qos, bitstr_t **qos_bits)
698 {
699 char *tmp_qos, *one_qos_name, *name_ptr = NULL;
700 slurmdb_qos_rec_t qos_rec, *qos_ptr = NULL;
701 bitstr_t *tmp_qos_bitstr;
702 int rc;
703 assoc_mgr_lock_t locks = { .qos = READ_LOCK };
704
705 if (!qos) {
706 FREE_NULL_BITMAP(*qos_bits);
707 return;
708 }
709
710 /* Lock here to avoid g_qos_count changing under us */
711 assoc_mgr_lock(&locks);
712 if (!g_qos_count) {
713 error("We have no QOS on the system Ignoring invalid "
714 "Allow/DenyQOS value(s) %s",
715 qos);
716 assoc_mgr_unlock(&locks);
717 FREE_NULL_BITMAP(*qos_bits);
718 *qos_bits = NULL;
719 return;
720 }
721
722 tmp_qos_bitstr = bit_alloc(g_qos_count);
723 tmp_qos = xstrdup(qos);
724 one_qos_name = strtok_r(tmp_qos, ",", &name_ptr);
725 while (one_qos_name) {
726 memset(&qos_rec, 0, sizeof(slurmdb_qos_rec_t));
727 qos_rec.name = one_qos_name;
728 rc = assoc_mgr_fill_in_qos(acct_db_conn, &qos_rec,
729 accounting_enforce,
730 &qos_ptr, 1);
731 if ((rc != SLURM_SUCCESS) || (qos_rec.id >= g_qos_count)) {
732 error("Ignoring invalid Allow/DenyQOS value: %s",
733 one_qos_name);
734 } else {
735 bit_set(tmp_qos_bitstr, qos_rec.id);
736 }
737 one_qos_name = strtok_r(NULL, ",", &name_ptr);
738 }
739 assoc_mgr_unlock(&locks);
740 xfree(tmp_qos);
741 FREE_NULL_BITMAP(*qos_bits);
742 *qos_bits = tmp_qos_bitstr;
743 }
744
745 /*
746 * _build_single_partitionline_info - get a array of slurm_conf_partition_t
747 * structures from the slurm.conf reader, build table, and set values
748 * RET 0 if no error, error code otherwise
749 * Note: Operates on common variables
750 * global: part_list - global partition list pointer
751 * default_part - default parameters for a partition
752 */
_build_single_partitionline_info(slurm_conf_partition_t * part)753 static int _build_single_partitionline_info(slurm_conf_partition_t *part)
754 {
755 part_record_t *part_ptr;
756
757 if (list_find_first(part_list, &list_find_part, part->name))
758 fatal("%s: duplicate entry for partition %s",
759 __func__, part->name);
760
761 part_ptr = create_part_record(part->name);
762
763 if (part->default_flag) {
764 if (default_part_name &&
765 xstrcmp(default_part_name, part->name)) {
766 info("_parse_part_spec: changing default partition "
767 "from %s to %s", default_part_name, part->name);
768 default_part_loc->flags &= (~PART_FLAG_DEFAULT);
769 }
770 xfree(default_part_name);
771 default_part_name = xstrdup(part->name);
772 default_part_loc = part_ptr;
773 part_ptr->flags |= PART_FLAG_DEFAULT;
774 }
775
776 part_ptr->cpu_bind = part->cpu_bind;
777
778 if (part->preempt_mode != NO_VAL16)
779 part_ptr->preempt_mode = part->preempt_mode;
780
781 if (part->disable_root_jobs == NO_VAL16) {
782 if (slurmctld_conf.conf_flags & CTL_CONF_DRJ)
783 part_ptr->flags |= PART_FLAG_NO_ROOT;
784 } else if (part->disable_root_jobs) {
785 part_ptr->flags |= PART_FLAG_NO_ROOT;
786 } else {
787 part_ptr->flags &= (~PART_FLAG_NO_ROOT);
788 }
789 if (part_ptr->flags & PART_FLAG_NO_ROOT)
790 debug2("partition %s does not allow root jobs", part_ptr->name);
791
792 if ((part->default_time != NO_VAL) &&
793 (part->default_time > part->max_time)) {
794 info("partition %s DefaultTime exceeds MaxTime (%u > %u)",
795 part->name, part->default_time, part->max_time);
796 part->default_time = NO_VAL;
797 }
798
799 if (part->exclusive_user)
800 part_ptr->flags |= PART_FLAG_EXCLUSIVE_USER;
801 if (part->hidden_flag)
802 part_ptr->flags |= PART_FLAG_HIDDEN;
803 if (part->root_only_flag)
804 part_ptr->flags |= PART_FLAG_ROOT_ONLY;
805 if (part->req_resv_flag)
806 part_ptr->flags |= PART_FLAG_REQ_RESV;
807 if (part->lln_flag)
808 part_ptr->flags |= PART_FLAG_LLN;
809 part_ptr->max_time = part->max_time;
810 part_ptr->def_mem_per_cpu = part->def_mem_per_cpu;
811 part_ptr->default_time = part->default_time;
812 FREE_NULL_LIST(part_ptr->job_defaults_list);
813 part_ptr->job_defaults_list =
814 job_defaults_copy(part->job_defaults_list);
815 part_ptr->max_cpus_per_node = part->max_cpus_per_node;
816 part_ptr->max_share = part->max_share;
817 part_ptr->max_mem_per_cpu = part->max_mem_per_cpu;
818 part_ptr->max_nodes = part->max_nodes;
819 part_ptr->max_nodes_orig = part->max_nodes;
820 part_ptr->min_nodes = part->min_nodes;
821 part_ptr->min_nodes_orig = part->min_nodes;
822 part_ptr->over_time_limit = part->over_time_limit;
823 part_ptr->preempt_mode = part->preempt_mode;
824 part_ptr->priority_job_factor = part->priority_job_factor;
825 part_ptr->priority_tier = part->priority_tier;
826 part_ptr->qos_char = xstrdup(part->qos_char);
827 part_ptr->state_up = part->state_up;
828 part_ptr->grace_time = part->grace_time;
829 part_ptr->cr_type = part->cr_type;
830
831 part_ptr->allow_alloc_nodes = xstrdup(part->allow_alloc_nodes);
832 part_ptr->allow_groups = xstrdup(part->allow_groups);
833 part_ptr->alternate = xstrdup(part->alternate);
834 part_ptr->nodes = xstrdup(part->nodes);
835
836 if (part->billing_weights_str) {
837 set_partition_billing_weights(part->billing_weights_str,
838 part_ptr, true);
839 }
840
841 if (part->allow_accounts) {
842 part_ptr->allow_accounts = xstrdup(part->allow_accounts);
843 accounts_list_build(part_ptr->allow_accounts,
844 &part_ptr->allow_account_array);
845 }
846
847 if (part->allow_qos) {
848 part_ptr->allow_qos = xstrdup(part->allow_qos);
849 qos_list_build(part_ptr->allow_qos,&part_ptr->allow_qos_bitstr);
850 }
851
852 if (part->deny_accounts) {
853 part_ptr->deny_accounts = xstrdup(part->deny_accounts);
854 accounts_list_build(part_ptr->deny_accounts,
855 &part_ptr->deny_account_array);
856 }
857
858 if (part->deny_qos) {
859 part_ptr->deny_qos = xstrdup(part->deny_qos);
860 qos_list_build(part_ptr->deny_qos, &part_ptr->deny_qos_bitstr);
861 }
862
863 if (part->qos_char) {
864 slurmdb_qos_rec_t qos_rec;
865 part_ptr->qos_char = xstrdup(part->qos_char);
866
867 memset(&qos_rec, 0, sizeof(slurmdb_qos_rec_t));
868 qos_rec.name = part_ptr->qos_char;
869 if (assoc_mgr_fill_in_qos(
870 acct_db_conn, &qos_rec, accounting_enforce,
871 (slurmdb_qos_rec_t **)&part_ptr->qos_ptr, 0)
872 != SLURM_SUCCESS) {
873 fatal("Partition %s has an invalid qos (%s), "
874 "please check your configuration",
875 part_ptr->name, qos_rec.name);
876 }
877 }
878
879 return 0;
880 }
881
882 /*
883 * _build_all_partitionline_info - get a array of slurm_conf_partition_t
884 * structures from the slurm.conf reader, build table, and set values
885 * RET 0 if no error, error code otherwise
886 * Note: Operates on common variables
887 * global: part_list - global partition list pointer
888 * default_part - default parameters for a partition
889 */
_build_all_partitionline_info(void)890 static int _build_all_partitionline_info(void)
891 {
892 slurm_conf_partition_t **ptr_array;
893 int count;
894 int i;
895
896 count = slurm_conf_partition_array(&ptr_array);
897 if (count == 0)
898 fatal("No PartitionName information available!");
899
900 for (i = 0; i < count; i++)
901 _build_single_partitionline_info(ptr_array[i]);
902
903 return SLURM_SUCCESS;
904 }
905
_set_max_part_prio(void * x,void * arg)906 static int _set_max_part_prio(void *x, void *arg)
907 {
908 part_record_t *part_ptr = (part_record_t *) x;
909
910 if (part_ptr->priority_job_factor > part_max_priority)
911 part_max_priority = part_ptr->priority_job_factor;
912
913 return 0;
914 }
915
_reset_part_prio(void * x,void * arg)916 static int _reset_part_prio(void *x, void *arg)
917 {
918 part_record_t *part_ptr = (part_record_t *) x;
919
920 /* protect against div0 if all partition priorities are zero */
921 if (part_max_priority == 0) {
922 part_ptr->norm_priority = 0;
923 return 0;
924 }
925
926 part_ptr->norm_priority = (double)part_ptr->priority_job_factor /
927 (double)part_max_priority;
928
929 return 0;
930 }
931
932 /* _sync_part_prio - Set normalized partition priorities */
_sync_part_prio(void)933 static void _sync_part_prio(void)
934 {
935 /* reset global value from part list */
936 part_max_priority = DEF_PART_MAX_PRIORITY;
937 list_for_each(part_list, _set_max_part_prio, NULL);
938 /* renormalize values after finding new max */
939 list_for_each(part_list, _reset_part_prio, NULL);
940 }
941
_abort_job(job_record_t * job_ptr,uint32_t job_state,uint16_t state_reason,char * reason_string)942 static void _abort_job(job_record_t *job_ptr, uint32_t job_state,
943 uint16_t state_reason, char *reason_string)
944 {
945 time_t now = time(NULL);
946
947 job_ptr->job_state = job_state | JOB_COMPLETING;
948 build_cg_bitmap(job_ptr);
949 job_ptr->end_time = MIN(job_ptr->end_time, now);
950 job_ptr->state_reason = state_reason;
951 xfree(job_ptr->state_desc);
952 job_ptr->state_desc = xstrdup(reason_string);
953 job_completion_logger(job_ptr, false);
954 if (job_ptr->job_state == JOB_NODE_FAIL) {
955 /* build_cg_bitmap() may clear JOB_COMPLETING */
956 epilog_slurmctld(job_ptr);
957 }
958 }
959
_mark_het_job_unused(void * x,void * arg)960 static int _mark_het_job_unused(void *x, void *arg)
961 {
962 job_record_t *job_ptr = (job_record_t *) x;
963 job_ptr->bit_flags &= (~HET_JOB_FLAG);
964 return 0;
965 }
966
_mark_het_job_used(void * x,void * arg)967 static int _mark_het_job_used(void *x, void *arg)
968 {
969 job_record_t *job_ptr = (job_record_t *) x;
970 job_ptr->bit_flags |= HET_JOB_FLAG;
971 return 0;
972 }
973
_test_het_job_used(void * x,void * arg)974 static int _test_het_job_used(void *x, void *arg)
975 {
976 job_record_t *job_ptr = (job_record_t *) x;
977
978 if ((job_ptr->het_job_id == 0) || IS_JOB_FINISHED(job_ptr))
979 return 0;
980 if (job_ptr->bit_flags & HET_JOB_FLAG)
981 return 0;
982
983 error("Incomplete hetjob being aborted %pJ", job_ptr);
984 _abort_job(job_ptr, JOB_FAILED, FAIL_SYSTEM, "incomplete hetjob");
985
986 return 0;
987 }
988
989 /*
990 * Validate heterogeneous jobs
991 *
992 * Make sure that every active (not yet complete) job has all of its components
993 * and they are all in the same state. Also rebuild het_job_list.
994 * If hetjob is corrupted, aborts and removes it from job_list.
995 */
_validate_het_jobs(void)996 static void _validate_het_jobs(void)
997 {
998 ListIterator job_iterator;
999 job_record_t *job_ptr, *het_job_ptr;
1000 hostset_t hs;
1001 char *job_id_str;
1002 uint32_t job_id;
1003 bool het_job_valid;
1004
1005 list_for_each(job_list, _mark_het_job_unused, NULL);
1006
1007 job_iterator = list_iterator_create(job_list);
1008 while ((job_ptr = list_next(job_iterator))) {
1009 /* Checking for corrupted hetjob components */
1010 if (job_ptr->het_job_offset != 0) {
1011 het_job_ptr = find_job_record(job_ptr->het_job_id);
1012 if (!het_job_ptr) {
1013 error("Could not find hetjob leader (JobId=%u) of %pJ. Aborting and removing job as it is corrupted.",
1014 job_ptr->het_job_id, job_ptr);
1015 _abort_job(job_ptr, JOB_FAILED, FAIL_SYSTEM,
1016 "invalid het_job_id_set");
1017 if (list_delete_item(job_iterator) != 1)
1018 error("Not able to remove the job.");
1019 continue;
1020 }
1021 }
1022
1023 if ((job_ptr->het_job_id == 0) ||
1024 (job_ptr->het_job_offset != 0))
1025 continue;
1026 /* active het job leader found */
1027 FREE_NULL_LIST(job_ptr->het_job_list);
1028 job_id_str = NULL;
1029 /* Need to wrap numbers with brackets for hostset functions */
1030 xstrfmtcat(job_id_str, "[%s]", job_ptr->het_job_id_set);
1031 hs = hostset_create(job_id_str);
1032 xfree(job_id_str);
1033 if (!hs) {
1034 error("%pJ has invalid het_job_id_set(%s). Aborting and removing job as it is corrupted.",
1035 job_ptr, job_ptr->het_job_id_set);
1036 _abort_job(job_ptr, JOB_FAILED, FAIL_SYSTEM,
1037 "invalid het_job_id_set");
1038 if (list_delete_item(job_iterator) != 1)
1039 error("Not able to remove the job.");
1040 continue;
1041 }
1042 job_ptr->het_job_list = list_create(NULL);
1043 het_job_valid = true; /* assume valid for now */
1044 while (het_job_valid && (job_id_str = hostset_shift(hs))) {
1045 job_id = (uint32_t) strtoll(job_id_str, NULL, 10);
1046 het_job_ptr = find_job_record(job_id);
1047 if (!het_job_ptr) {
1048 error("Could not find JobId=%u, part of hetjob JobId=%u",
1049 job_id, job_ptr->job_id);
1050 het_job_valid = false;
1051 } else if (het_job_ptr->het_job_id !=
1052 job_ptr->job_id) {
1053 error("Invalid state of JobId=%u, part of hetjob JobId=%u",
1054 job_id, job_ptr->job_id);
1055 het_job_valid = false;
1056 } else {
1057 list_append(job_ptr->het_job_list,
1058 het_job_ptr);
1059 }
1060 free(job_id_str);
1061 }
1062 hostset_destroy(hs);
1063 if (het_job_valid) {
1064 list_for_each(job_ptr->het_job_list, _mark_het_job_used,
1065 NULL);
1066 }
1067 }
1068 list_iterator_destroy(job_iterator);
1069
1070 list_for_each(job_list, _test_het_job_used, NULL);
1071 }
1072
1073 /* Log an error if SlurmdUser is not root and any cgroup plugin is used */
_test_cgroup_plugin_use(void)1074 static void _test_cgroup_plugin_use(void)
1075 {
1076 char *plugins;
1077
1078 plugins = slurm_get_task_plugin();
1079 if (xstrstr(plugins, "cgroup"))
1080 error("task/cgroup plugin will not work unless SlurmdUser is root");
1081 xfree(plugins);
1082
1083 plugins = slurm_get_proctrack_type();
1084 if (xstrstr(plugins, "cgroup"))
1085 error("proctrack/cgroup plugin will not work unless SlurmdUser is root");
1086 xfree(plugins);
1087 }
1088
1089 /*
1090 * read_slurm_conf - load the slurm configuration from the configured file.
1091 * read_slurm_conf can be called more than once if so desired.
1092 * IN recover - replace job, node and/or partition data with latest
1093 * available information depending upon value
1094 * 0 = use no saved state information, rebuild everything from
1095 * slurm.conf contents
1096 * 1 = recover saved job and trigger state,
1097 * node DOWN/DRAIN/FAIL state and reason information
1098 * 2 = recover all saved state
1099 * IN reconfig - true if SIGHUP or "scontrol reconfig" and there is state in
1100 * memory to preserve, otherwise recover state from disk
1101 * RET SLURM_SUCCESS if no error, otherwise an error code
1102 * Note: Operates on common variables only
1103 */
read_slurm_conf(int recover,bool reconfig)1104 int read_slurm_conf(int recover, bool reconfig)
1105 {
1106 DEF_TIMERS;
1107 int error_code, i, rc = 0, load_job_ret = SLURM_SUCCESS;
1108 int old_node_record_count = 0;
1109 node_record_t *old_node_table_ptr = NULL, *node_ptr;
1110 bool do_reorder_nodes = false;
1111 List old_part_list = NULL;
1112 char *old_def_part_name = NULL;
1113 char *old_auth_type = xstrdup(slurmctld_conf.authtype);
1114 char *old_bb_type = xstrdup(slurmctld_conf.bb_type);
1115 char *old_cred_type = xstrdup(slurmctld_conf.cred_type);
1116 uint16_t old_preempt_mode = slurmctld_conf.preempt_mode;
1117 char *old_preempt_type = xstrdup(slurmctld_conf.preempt_type);
1118 char *old_sched_type = xstrdup(slurmctld_conf.schedtype);
1119 char *old_select_type = xstrdup(slurmctld_conf.select_type);
1120 char *old_switch_type = xstrdup(slurmctld_conf.switch_type);
1121 char *state_save_dir = xstrdup(slurmctld_conf.state_save_location);
1122 char *mpi_params;
1123 uint16_t old_select_type_p = slurmctld_conf.select_type_param;
1124 bool cgroup_mem_confinement = false;
1125
1126 /* initialization */
1127 START_TIMER;
1128
1129 xfree(slurmctld_config.auth_info);
1130 slurmctld_config.auth_info = slurm_get_auth_info();
1131 if (reconfig) {
1132 /*
1133 * In order to re-use job state information,
1134 * update nodes_completing string (based on node bitmaps)
1135 */
1136 update_job_nodes_completing();
1137
1138 /* save node and partition states for reconfig RPC */
1139 old_node_record_count = node_record_count;
1140 old_node_table_ptr = node_record_table_ptr;
1141
1142 for (i = 0, node_ptr = old_node_table_ptr;
1143 i < node_record_count; i++, node_ptr++) {
1144 /*
1145 * Store the original configured CPU count somewhere
1146 * (port is reused here for that purpose) so we can
1147 * report changes in its configuration.
1148 */
1149 node_ptr->port = node_ptr->config_ptr->cpus;
1150 node_ptr->weight = node_ptr->config_ptr->weight;
1151 }
1152 node_record_table_ptr = NULL;
1153 node_record_count = 0;
1154 xhash_free(node_hash_table);
1155 old_part_list = part_list;
1156 part_list = NULL;
1157 old_def_part_name = default_part_name;
1158 default_part_name = NULL;
1159 }
1160
1161 if ((error_code = _init_all_slurm_conf())) {
1162 node_record_table_ptr = old_node_table_ptr;
1163 node_record_count = old_node_record_count;
1164 part_list = old_part_list;
1165 default_part_name = old_def_part_name;
1166 old_def_part_name = NULL;
1167 goto end_it;
1168 }
1169
1170 if (reconfig)
1171 xcgroup_reconfig_slurm_cgroup_conf();
1172
1173 cgroup_mem_confinement = xcgroup_mem_cgroup_job_confinement();
1174
1175 if (slurmctld_conf.job_acct_oom_kill && cgroup_mem_confinement)
1176 fatal("Jobs memory is being constrained by both TaskPlugin cgroup and JobAcctGather plugin. This enables two incompatible memory enforcement mechanisms, one of them must be disabled.");
1177 else if (slurmctld_conf.job_acct_oom_kill)
1178 info("Memory enforcing by using JobAcctGather's mechanism is discouraged, task/cgroup is recommended where available.");
1179 else if (!cgroup_mem_confinement)
1180 info("No memory enforcing mechanism configured.");
1181
1182 if (slurm_get_slurmd_user_id() != 0)
1183 _test_cgroup_plugin_use();
1184
1185 if (layouts_init() != SLURM_SUCCESS) {
1186 if (test_config) {
1187 error("Failed to initialize the layouts framework");
1188 test_config_rc = 1;
1189 } else {
1190 fatal("Failed to initialize the layouts framework");
1191 }
1192 }
1193
1194 if (slurm_topo_init() != SLURM_SUCCESS) {
1195 if (test_config) {
1196 error("Failed to initialize topology plugin");
1197 test_config_rc = 1;
1198 } else {
1199 fatal("Failed to initialize topology plugin");
1200 }
1201 }
1202
1203 /* Build node and partition information based upon slurm.conf file */
1204 _build_all_nodeline_info();
1205 if (reconfig) {
1206 if (_compare_hostnames(old_node_table_ptr,
1207 old_node_record_count,
1208 node_record_table_ptr,
1209 node_record_count) < 0) {
1210 fatal("%s: hostnames inconsistency detected", __func__);
1211 }
1212 }
1213 _handle_all_downnodes();
1214 _build_all_partitionline_info();
1215 if (!reconfig) {
1216 restore_front_end_state(recover);
1217
1218 /* currently load/dump_state_lite has to run before
1219 * load_all_job_state. */
1220
1221 /* load old config */
1222 load_config_state_lite();
1223
1224 /* store new config */
1225 if (!test_config)
1226 dump_config_state_lite();
1227 }
1228 update_logging();
1229 g_slurm_jobcomp_init(slurmctld_conf.job_comp_loc);
1230 if (slurm_sched_init() != SLURM_SUCCESS) {
1231 if (test_config) {
1232 error("Failed to initialize sched plugin");
1233 test_config_rc = 1;
1234 } else {
1235 fatal("Failed to initialize sched plugin");
1236 }
1237 }
1238 if (!reconfig && (old_preempt_mode & PREEMPT_MODE_GANG)) {
1239 /* gs_init() must immediately follow slurm_sched_init() */
1240 gs_init();
1241 }
1242 if (switch_init(1) != SLURM_SUCCESS) {
1243 if (test_config) {
1244 error("Failed to initialize switch plugin");
1245 test_config_rc = 1;
1246 } else {
1247 fatal("Failed to initialize switch plugin");
1248 }
1249 }
1250
1251 if (default_part_loc == NULL)
1252 error("read_slurm_conf: default partition not set.");
1253
1254 if (node_record_count < 1) {
1255 error("read_slurm_conf: no nodes configured.");
1256 test_config_rc = 1;
1257 _purge_old_node_state(old_node_table_ptr,
1258 old_node_record_count);
1259 _purge_old_part_state(old_part_list, old_def_part_name);
1260 error_code = EINVAL;
1261 goto end_it;
1262 }
1263
1264 /*
1265 * Node reordering needs to be done by the topology and/or select
1266 * plugin. Reordering the table must be done before hashing the
1267 * nodes, and before any position-relative bitmaps are created.
1268 */
1269 do_reorder_nodes |= slurm_topo_generate_node_ranking();
1270 do_reorder_nodes |= select_g_node_ranking(node_record_table_ptr,
1271 node_record_count);
1272 if (do_reorder_nodes)
1273 _reorder_nodes_by_rank();
1274 else
1275 _reorder_nodes_by_name();
1276
1277 rehash_node();
1278 slurm_topo_build_config();
1279 route_g_reconfigure();
1280 if (reconfig)
1281 power_g_reconfig();
1282 cpu_freq_reconfig();
1283
1284 rehash_jobs();
1285 _set_slurmd_addr();
1286
1287 _stat_slurm_dirs();
1288
1289 /*
1290 * Load the layouts configuration.
1291 * Only load it at init time, not during reconfiguration stages.
1292 * It requires a full restart to switch to a new configuration for now.
1293 */
1294 if (!reconfig && (layouts_load_config(recover) != SLURM_SUCCESS)) {
1295 if (test_config) {
1296 error("Failed to load the layouts framework configuration");
1297 test_config_rc = 1;
1298 } else {
1299 fatal("Failed to load the layouts framework configuration");
1300 }
1301 }
1302
1303 /*
1304 * Set standard features and preserve the plugin controlled ones.
1305 * A reconfig always imply load the state from slurm.conf
1306 */
1307 if (reconfig) { /* Preserve state from memory */
1308 if (old_node_table_ptr) {
1309 info("restoring original state of nodes");
1310 _set_features(old_node_table_ptr, old_node_record_count,
1311 recover);
1312 rc = _restore_node_state(recover, old_node_table_ptr,
1313 old_node_record_count);
1314 error_code = MAX(error_code, rc); /* not fatal */
1315 }
1316 if (old_part_list && ((recover > 1) ||
1317 (slurmctld_conf.reconfig_flags & RECONFIG_KEEP_PART_INFO))) {
1318 info("restoring original partition state");
1319 rc = _restore_part_state(old_part_list,
1320 old_def_part_name,
1321 slurmctld_conf.reconfig_flags);
1322 error_code = MAX(error_code, rc); /* not fatal */
1323 } else if (old_part_list && (slurmctld_conf.reconfig_flags &
1324 RECONFIG_KEEP_PART_STAT)) {
1325 info("restoring original partition state only (up/down)");
1326 rc = _restore_part_state(old_part_list,
1327 old_def_part_name,
1328 slurmctld_conf.reconfig_flags);
1329 error_code = MAX(error_code, rc); /* not fatal */
1330 }
1331 load_last_job_id();
1332 reset_first_job_id();
1333 (void) slurm_sched_g_reconfig();
1334 } else if (recover == 0) { /* Build everything from slurm.conf */
1335 _set_features(node_record_table_ptr, node_record_count,
1336 recover);
1337 load_last_job_id();
1338 reset_first_job_id();
1339 (void) slurm_sched_g_reconfig();
1340 } else if (recover == 1) { /* Load job & node state files */
1341 (void) load_all_node_state(true);
1342 _set_features(node_record_table_ptr, node_record_count,
1343 recover);
1344 (void) load_all_front_end_state(true);
1345 load_job_ret = load_all_job_state();
1346 sync_job_priorities();
1347 } else if (recover > 1) { /* Load node, part & job state files */
1348 (void) load_all_node_state(false);
1349 _set_features(old_node_table_ptr, old_node_record_count,
1350 recover);
1351 (void) load_all_front_end_state(false);
1352 (void) load_all_part_state();
1353 load_job_ret = load_all_job_state();
1354 sync_job_priorities();
1355 }
1356
1357 _sync_part_prio();
1358 _build_bitmaps_pre_select();
1359 if ((select_g_node_init(node_record_table_ptr, node_record_count)
1360 != SLURM_SUCCESS) ||
1361 (select_g_state_restore(state_save_dir) != SLURM_SUCCESS) ||
1362 (select_g_job_init(job_list) != SLURM_SUCCESS)) {
1363 if (test_config) {
1364 error("Failed to initialize node selection plugin state");
1365 test_config_rc = 1;
1366 } else {
1367 fatal("Failed to initialize node selection plugin state, "
1368 "Clean start required.");
1369 }
1370 }
1371
1372 _gres_reconfig(reconfig);
1373 reset_job_bitmaps(); /* must follow select_g_job_init() */
1374
1375 (void) _sync_nodes_to_jobs(reconfig);
1376 (void) sync_job_files();
1377 _purge_old_node_state(old_node_table_ptr, old_node_record_count);
1378 _purge_old_part_state(old_part_list, old_def_part_name);
1379
1380 mpi_params = slurm_get_mpi_params();
1381 reserve_port_config(mpi_params);
1382 xfree(mpi_params);
1383
1384 if (license_update(slurmctld_conf.licenses) != SLURM_SUCCESS) {
1385 if (test_config) {
1386 error("Invalid Licenses value: %s",
1387 slurmctld_conf.licenses);
1388 test_config_rc = 1;
1389 } else {
1390 fatal("Invalid Licenses value: %s",
1391 slurmctld_conf.licenses);
1392 }
1393 }
1394
1395 init_requeue_policy();
1396 init_depend_policy();
1397
1398 /* NOTE: Run restore_node_features before _restore_job_accounting */
1399 restore_node_features(recover);
1400
1401 if ((node_features_g_count() > 0) &&
1402 (node_features_g_get_node(NULL) != SLURM_SUCCESS)) {
1403 error("failed to initialize node features");
1404 test_config_rc = 1;
1405 }
1406
1407 /*
1408 * _build_bitmaps() must follow node_features_g_get_node() and
1409 * preceed build_features_list_*()
1410 */
1411 _build_bitmaps();
1412
1413 /* Active and available features can be different on -R */
1414 if ((node_features_g_count() == 0) && (recover != 2))
1415 build_feature_list_eq();
1416 else
1417 build_feature_list_ne();
1418
1419 /*
1420 * Must be at after nodes and partitons (e.g.
1421 * _build_bitmaps_pre_select()) have been created and before
1422 * _sync_nodes_to_comp_job().
1423 */
1424 if (!test_config)
1425 set_cluster_tres(false);
1426
1427 _validate_het_jobs();
1428 (void) _sync_nodes_to_comp_job();/* must follow select_g_node_init() */
1429 load_part_uid_allow_list(1);
1430
1431 /* NOTE: Run load_all_resv_state() before _restore_job_accounting */
1432 if (reconfig) {
1433 load_all_resv_state(0);
1434 } else {
1435 load_all_resv_state(recover);
1436 if (recover >= 1) {
1437 trigger_state_restore();
1438 (void) slurm_sched_g_reconfig();
1439 }
1440 }
1441 if (test_config)
1442 goto end_it;
1443
1444 _restore_job_accounting();
1445
1446 /* sort config_list by weight for scheduling */
1447 list_sort(config_list, &list_compare_config);
1448
1449 /* Update plugins as possible */
1450 if (xstrcmp(old_auth_type, slurmctld_conf.authtype)) {
1451 xfree(slurmctld_conf.authtype);
1452 slurmctld_conf.authtype = old_auth_type;
1453 rc = ESLURM_INVALID_AUTHTYPE_CHANGE;
1454 }
1455
1456 if (xstrcmp(old_bb_type, slurmctld_conf.bb_type)) {
1457 xfree(slurmctld_conf.bb_type);
1458 slurmctld_conf.bb_type = old_bb_type;
1459 old_bb_type = NULL;
1460 rc = ESLURM_INVALID_BURST_BUFFER_CHANGE;
1461 }
1462
1463 if (xstrcmp(old_cred_type, slurmctld_conf.cred_type)) {
1464 xfree(slurmctld_conf.cred_type);
1465 slurmctld_conf.cred_type = old_cred_type;
1466 old_cred_type = NULL;
1467 rc = ESLURM_INVALID_CRED_TYPE_CHANGE;
1468 }
1469
1470 if (xstrcmp(old_sched_type, slurmctld_conf.schedtype)) {
1471 xfree(slurmctld_conf.schedtype);
1472 slurmctld_conf.schedtype = old_sched_type;
1473 old_sched_type = NULL;
1474 rc = ESLURM_INVALID_SCHEDTYPE_CHANGE;
1475 }
1476
1477 if (xstrcmp(old_select_type, slurmctld_conf.select_type)) {
1478 xfree(slurmctld_conf.select_type);
1479 slurmctld_conf.select_type = old_select_type;
1480 old_select_type = NULL;
1481 rc = ESLURM_INVALID_SELECTTYPE_CHANGE;
1482 }
1483
1484 if (xstrcmp(old_switch_type, slurmctld_conf.switch_type)) {
1485 xfree(slurmctld_conf.switch_type);
1486 slurmctld_conf.switch_type = old_switch_type;
1487 old_switch_type = NULL;
1488 rc = ESLURM_INVALID_SWITCHTYPE_CHANGE;
1489 }
1490
1491 if ((slurmctld_conf.control_cnt < 2) ||
1492 (slurmctld_conf.control_machine[1] == NULL))
1493 info("%s: backup_controller not specified", __func__);
1494
1495 error_code = MAX(error_code, rc); /* not fatal */
1496
1497 if (xstrcmp(old_preempt_type, slurmctld_conf.preempt_type)) {
1498 info("Changing PreemptType from %s to %s",
1499 old_preempt_type, slurmctld_conf.preempt_type);
1500 (void) slurm_preempt_fini();
1501 if (slurm_preempt_init() != SLURM_SUCCESS) {
1502 if (test_config) {
1503 error("failed to initialize preempt plugin");
1504 test_config_rc = 1;
1505 } else {
1506 fatal("failed to initialize preempt plugin");
1507 }
1508 }
1509 }
1510 rc = _update_preempt(old_preempt_mode);
1511 error_code = MAX(error_code, rc); /* not fatal */
1512
1513 /* Update plugin parameters as possible */
1514 rc = job_submit_plugin_reconfig();
1515 error_code = MAX(error_code, rc); /* not fatal */
1516 rc = prep_plugin_reconfig();
1517 error_code = MAX(error_code, rc); /* not fatal */
1518 rc = switch_g_reconfig();
1519 error_code = MAX(error_code, rc); /* not fatal */
1520 if (reconfig) {
1521 rc = node_features_g_reconfig();
1522 error_code = MAX(error_code, rc); /* not fatal */
1523 }
1524 rc = _preserve_select_type_param(&slurmctld_conf, old_select_type_p);
1525 error_code = MAX(error_code, rc); /* not fatal */
1526 if (reconfig)
1527 rc = bb_g_reconfig();
1528 else
1529 rc = bb_g_load_state(true);
1530 error_code = MAX(error_code, rc); /* not fatal */
1531
1532 /*
1533 * Restore job accounting info if file missing or corrupted,
1534 * an extremely rare situation
1535 */
1536 if (load_job_ret)
1537 _acct_restore_active_jobs();
1538
1539 /* Sync select plugin with synchronized job/node/part data */
1540 gres_plugin_reconfig(); /* Clear gres/mps counters */
1541 select_g_reconfigure();
1542 if (reconfig && (slurm_mcs_reconfig() != SLURM_SUCCESS))
1543 fatal("Failed to reconfigure mcs plugin");
1544
1545 _set_response_cluster_rec();
1546
1547 slurmctld_conf.last_update = time(NULL);
1548 end_it:
1549 xfree(old_auth_type);
1550 xfree(old_bb_type);
1551 xfree(old_cred_type);
1552 xfree(old_preempt_type);
1553 xfree(old_sched_type);
1554 xfree(old_select_type);
1555 xfree(old_switch_type);
1556 xfree(state_save_dir);
1557
1558 END_TIMER2("read_slurm_conf");
1559 return error_code;
1560
1561 }
1562
1563 /* Add feature to list
1564 * feature_list IN - destination list, either active_feature_list or
1565 * avail_feature_list
1566 * feature IN - name of the feature to add
1567 * node_bitmap IN - bitmap of nodes with named feature */
_add_config_feature(List feature_list,char * feature,bitstr_t * node_bitmap)1568 static void _add_config_feature(List feature_list, char *feature,
1569 bitstr_t *node_bitmap)
1570 {
1571 node_feature_t *feature_ptr;
1572 ListIterator feature_iter;
1573 bool match = false;
1574
1575 /* If feature already in avail_feature_list, just update the bitmap */
1576 feature_iter = list_iterator_create(feature_list);
1577 while ((feature_ptr = list_next(feature_iter))) {
1578 if (xstrcmp(feature, feature_ptr->name))
1579 continue;
1580 bit_or(feature_ptr->node_bitmap, node_bitmap);
1581 match = true;
1582 break;
1583 }
1584 list_iterator_destroy(feature_iter);
1585
1586 if (!match) { /* Need to create new avail_feature_list record */
1587 feature_ptr = xmalloc(sizeof(node_feature_t));
1588 feature_ptr->magic = FEATURE_MAGIC;
1589 feature_ptr->name = xstrdup(feature);
1590 feature_ptr->node_bitmap = bit_copy(node_bitmap);
1591 list_append(feature_list, feature_ptr);
1592 }
1593 }
1594
1595 /* Add feature to list
1596 * feature_list IN - destination list, either active_feature_list or
1597 * avail_feature_list
1598 * feature IN - name of the feature to add
1599 * node_inx IN - index of the node with named feature */
_add_config_feature_inx(List feature_list,char * feature,int node_inx)1600 static void _add_config_feature_inx(List feature_list, char *feature,
1601 int node_inx)
1602 {
1603 node_feature_t *feature_ptr;
1604 ListIterator feature_iter;
1605 bool match = false;
1606
1607 /* If feature already in avail_feature_list, just update the bitmap */
1608 feature_iter = list_iterator_create(feature_list);
1609 while ((feature_ptr = list_next(feature_iter))) {
1610 if (xstrcmp(feature, feature_ptr->name))
1611 continue;
1612 bit_set(feature_ptr->node_bitmap, node_inx);
1613 match = true;
1614 break;
1615 }
1616 list_iterator_destroy(feature_iter);
1617
1618 if (!match) { /* Need to create new avail_feature_list record */
1619 feature_ptr = xmalloc(sizeof(node_feature_t));
1620 feature_ptr->magic = FEATURE_MAGIC;
1621 feature_ptr->name = xstrdup(feature);
1622 feature_ptr->node_bitmap = bit_alloc(node_record_count);
1623 bit_set(feature_ptr->node_bitmap, node_inx);
1624 list_append(feature_list, feature_ptr);
1625 }
1626 }
1627
1628 /* _list_delete_feature - delete an entry from the feature list,
1629 * see list.h for documentation */
_list_delete_feature(void * feature_entry)1630 static void _list_delete_feature(void *feature_entry)
1631 {
1632 node_feature_t *feature_ptr = (node_feature_t *) feature_entry;
1633
1634 xassert(feature_ptr);
1635 xassert(feature_ptr->magic == FEATURE_MAGIC);
1636 xfree (feature_ptr->name);
1637 FREE_NULL_BITMAP (feature_ptr->node_bitmap);
1638 xfree (feature_ptr);
1639 }
1640
1641 /*
1642 * For a configuration where available_features == active_features,
1643 * build new active and available feature lists
1644 */
build_feature_list_eq(void)1645 extern void build_feature_list_eq(void)
1646 {
1647 ListIterator config_iterator;
1648 config_record_t *config_ptr;
1649 node_feature_t *active_feature_ptr, *avail_feature_ptr;
1650 ListIterator feature_iter;
1651 char *tmp_str, *token, *last = NULL;
1652
1653 FREE_NULL_LIST(active_feature_list);
1654 FREE_NULL_LIST(avail_feature_list);
1655 active_feature_list = list_create(_list_delete_feature);
1656 avail_feature_list = list_create(_list_delete_feature);
1657
1658 config_iterator = list_iterator_create(config_list);
1659 while ((config_ptr = list_next(config_iterator))) {
1660 if (config_ptr->feature) {
1661 tmp_str = xstrdup(config_ptr->feature);
1662 token = strtok_r(tmp_str, ",", &last);
1663 while (token) {
1664 _add_config_feature(avail_feature_list, token,
1665 config_ptr->node_bitmap);
1666 token = strtok_r(NULL, ",", &last);
1667 }
1668 xfree(tmp_str);
1669 }
1670 }
1671 list_iterator_destroy(config_iterator);
1672
1673 /* Copy avail_feature_list to active_feature_list */
1674 feature_iter = list_iterator_create(avail_feature_list);
1675 while ((avail_feature_ptr = list_next(feature_iter))) {
1676 active_feature_ptr = xmalloc(sizeof(node_feature_t));
1677 active_feature_ptr->magic = FEATURE_MAGIC;
1678 active_feature_ptr->name = xstrdup(avail_feature_ptr->name);
1679 active_feature_ptr->node_bitmap =
1680 bit_copy(avail_feature_ptr->node_bitmap);
1681 list_append(active_feature_list, active_feature_ptr);
1682 }
1683 list_iterator_destroy(feature_iter);
1684 }
1685
1686 /*
1687 * Log contents of avail_feature_list and active_feature_list
1688 */
log_feature_lists(void)1689 extern void log_feature_lists(void)
1690 {
1691 node_feature_t *feature_ptr;
1692 char *node_str;
1693 ListIterator feature_iter;
1694
1695 feature_iter = list_iterator_create(avail_feature_list);
1696 while ((feature_ptr = list_next(feature_iter))) {
1697 node_str = bitmap2node_name(feature_ptr->node_bitmap);
1698 info("AVAIL FEATURE:%s NODES:%s", feature_ptr->name, node_str);
1699 xfree(node_str);
1700 }
1701 list_iterator_destroy(feature_iter);
1702
1703 feature_iter = list_iterator_create(active_feature_list);
1704 while ((feature_ptr = list_next(feature_iter))) {
1705 node_str = bitmap2node_name(feature_ptr->node_bitmap);
1706 info("ACTIVE FEATURE:%s NODES:%s", feature_ptr->name, node_str);
1707 xfree(node_str);
1708 }
1709 list_iterator_destroy(feature_iter);
1710 }
1711
1712 /*
1713 * For a configuration where available_features != active_features,
1714 * build new active and available feature lists
1715 */
build_feature_list_ne(void)1716 extern void build_feature_list_ne(void)
1717 {
1718 node_record_t *node_ptr;
1719 char *tmp_str, *token, *last = NULL;
1720 int i;
1721
1722 FREE_NULL_LIST(active_feature_list);
1723 FREE_NULL_LIST(avail_feature_list);
1724 active_feature_list = list_create(_list_delete_feature);
1725 avail_feature_list = list_create(_list_delete_feature);
1726
1727 for (i = 0, node_ptr = node_record_table_ptr; i < node_record_count;
1728 i++, node_ptr++) {
1729 if (node_ptr->features_act) {
1730 tmp_str = xstrdup(node_ptr->features_act);
1731 token = strtok_r(tmp_str, ",", &last);
1732 while (token) {
1733 _add_config_feature_inx(active_feature_list,
1734 token, i);
1735 token = strtok_r(NULL, ",", &last);
1736 }
1737 xfree(tmp_str);
1738 }
1739 if (node_ptr->features) {
1740 tmp_str = xstrdup(node_ptr->features);
1741 token = strtok_r(tmp_str, ",", &last);
1742 while (token) {
1743 _add_config_feature_inx(avail_feature_list,
1744 token, i);
1745 if (!node_ptr->features_act) {
1746 _add_config_feature_inx(
1747 active_feature_list,
1748 token, i);
1749 }
1750 token = strtok_r(NULL, ",", &last);
1751 }
1752 xfree(tmp_str);
1753 }
1754 }
1755 }
1756
1757 /*
1758 * Update active_feature_list or avail_feature_list
1759 * feature_list IN - List to update: active_feature_list or avail_feature_list
1760 * new_features IN - New active_features
1761 * node_bitmap IN - Nodes with the new active_features value
1762 */
update_feature_list(List feature_list,char * new_features,bitstr_t * node_bitmap)1763 extern void update_feature_list(List feature_list, char *new_features,
1764 bitstr_t *node_bitmap)
1765 {
1766 node_feature_t *feature_ptr;
1767 ListIterator feature_iter;
1768 char *tmp_str, *token, *last = NULL;
1769
1770 /*
1771 * Clear these nodes from the feature_list record,
1772 * then restore as needed
1773 */
1774 feature_iter = list_iterator_create(feature_list);
1775 while ((feature_ptr = list_next(feature_iter))) {
1776 bit_and_not(feature_ptr->node_bitmap, node_bitmap);
1777 }
1778 list_iterator_destroy(feature_iter);
1779
1780 if (new_features) {
1781 tmp_str = xstrdup(new_features);
1782 token = strtok_r(tmp_str, ",", &last);
1783 while (token) {
1784 _add_config_feature(feature_list, token, node_bitmap);
1785 token = strtok_r(NULL, ",", &last);
1786 }
1787 xfree(tmp_str);
1788 }
1789 node_features_updated = true;
1790 }
1791
_gres_reconfig(bool reconfig)1792 static void _gres_reconfig(bool reconfig)
1793 {
1794 node_record_t *node_ptr;
1795 char *gres_name;
1796 int i;
1797
1798 if (reconfig) {
1799 gres_plugin_reconfig();
1800 } else {
1801 for (i = 0, node_ptr = node_record_table_ptr;
1802 i < node_record_count; i++, node_ptr++) {
1803 if (node_ptr->gres)
1804 gres_name = node_ptr->gres;
1805 else
1806 gres_name = node_ptr->config_ptr->gres;
1807 gres_plugin_init_node_config(node_ptr->name, gres_name,
1808 &node_ptr->gres_list);
1809 if (!IS_NODE_CLOUD(node_ptr))
1810 continue;
1811
1812 /*
1813 * Load in GRES for node now. By default Slurm gets this
1814 * information when the node registers for the first
1815 * time, which can take a while for a node in the cloud
1816 * to boot.
1817 */
1818 gres_plugin_node_config_load(
1819 node_ptr->config_ptr->cpus, node_ptr->name,
1820 node_ptr->gres_list, NULL, NULL);
1821 gres_plugin_node_config_validate(
1822 node_ptr->name, node_ptr->config_ptr->gres,
1823 &node_ptr->gres, &node_ptr->gres_list,
1824 node_ptr->config_ptr->threads,
1825 node_ptr->config_ptr->cores,
1826 node_ptr->config_ptr->sockets,
1827 slurmctld_conf.conf_flags & CTL_CONF_OR, NULL);
1828 }
1829 }
1830 }
1831 /*
1832 * Configure node features.
1833 * IN old_node_table_ptr IN - Previous nodes information
1834 * IN old_node_record_count IN - Count of previous nodes information
1835 * IN recover - replace node features data depending upon value.
1836 * 0, 1 - use data from config record, built using slurm.conf
1837 * 2 = use data from node record, built from saved state
1838 */
_set_features(node_record_t * old_node_table_ptr,int old_node_record_count,int recover)1839 static void _set_features(node_record_t *old_node_table_ptr,
1840 int old_node_record_count, int recover)
1841 {
1842 node_record_t *node_ptr, *old_node_ptr;
1843 char *tmp, *tok, *sep;
1844 int i, node_features_cnt = node_features_g_count();
1845
1846 for (i = 0, old_node_ptr = old_node_table_ptr;
1847 i < old_node_record_count;
1848 i++, old_node_ptr++) {
1849
1850 node_ptr = find_node_record(old_node_ptr->name);
1851
1852 if (node_ptr == NULL)
1853 continue;
1854
1855 /*
1856 * Load all from state, ignore what has been read from
1857 * slurm.conf. Features in node record just a placeholder
1858 * for restore_node_features() to set up new config records.
1859 */
1860 if (recover == 2) {
1861 xfree(node_ptr->features);
1862 xfree(node_ptr->features_act);
1863 node_ptr->features = old_node_ptr->features;
1864 node_ptr->features_act = old_node_ptr->features_act;
1865 old_node_ptr->features = NULL;
1866 old_node_ptr->features_act = NULL;
1867 continue;
1868 }
1869
1870 xfree(node_ptr->features_act);
1871 node_ptr->features_act = xstrdup(node_ptr->features);
1872
1873 if (node_features_cnt == 0)
1874 continue;
1875
1876 /* If we are here, there's a node_features plugin active */
1877
1878 /*
1879 * The subset of plugin-controlled features_available
1880 * and features_active found in the old node_ptr for this node
1881 * are copied into new node respective fields.
1882 * This will make that KNL modes are preserved while doing a
1883 * reconfigure. Otherwise, we should wait until node is
1884 * registered to get KNL available and active features.
1885 */
1886 if (old_node_ptr->features != NULL) {
1887 char *save_ptr = NULL;
1888 if (node_ptr->features)
1889 sep = ",";
1890 else
1891 sep = "";
1892 tmp = xstrdup(old_node_ptr->features);
1893 tok = strtok_r(tmp, ",", &save_ptr);
1894 while (tok) {
1895 if (node_features_g_changeable_feature(tok)) {
1896 xstrfmtcat(node_ptr->features,
1897 "%s%s", sep, tok);
1898 sep = ",";
1899 }
1900 tok = strtok_r(NULL, ",", &save_ptr);
1901 }
1902 xfree(tmp);
1903 }
1904
1905 if (old_node_ptr->features_act != NULL) {
1906 char *save_ptr = NULL;
1907 if (node_ptr->features_act)
1908 sep = ",";
1909 else
1910 sep = "";
1911 tmp = xstrdup(old_node_ptr->features_act);
1912 tok = strtok_r(tmp, ",", &save_ptr);
1913 while (tok) {
1914 if (node_features_g_changeable_feature(tok)) {
1915 xstrfmtcat(node_ptr->features_act,
1916 "%s%s", sep, tok);
1917 sep = ",";
1918 }
1919 tok = strtok_r(NULL, ",", &save_ptr);
1920 }
1921 xfree(tmp);
1922 }
1923 }
1924 }
1925 /* Restore node state and size information from saved records which match
1926 * the node registration message. If a node was re-configured to be down or
1927 * drained, we set those states. We only recover a node's Features if
1928 * recover==2. */
_restore_node_state(int recover,node_record_t * old_node_table_ptr,int old_node_record_count)1929 static int _restore_node_state(int recover,
1930 node_record_t *old_node_table_ptr,
1931 int old_node_record_count)
1932 {
1933 node_record_t *node_ptr, *old_node_ptr;
1934 int i, rc = SLURM_SUCCESS;
1935 hostset_t hs = NULL;
1936 bool power_save_mode = false;
1937
1938 if (slurmctld_conf.suspend_program && slurmctld_conf.resume_program)
1939 power_save_mode = true;
1940
1941 for (i=0, node_ptr=node_record_table_ptr; i<node_record_count;
1942 i++, node_ptr++) {
1943 node_ptr->not_responding = true;
1944 }
1945
1946 for (i=0, old_node_ptr=old_node_table_ptr; i<old_node_record_count;
1947 i++, old_node_ptr++) {
1948 bool drain_flag = false, down_flag = false;
1949 dynamic_plugin_data_t *tmp_select_nodeinfo;
1950
1951 node_ptr = find_node_record(old_node_ptr->name);
1952 if (node_ptr == NULL)
1953 continue;
1954
1955 node_ptr->not_responding = false;
1956 if (IS_NODE_DOWN(node_ptr))
1957 down_flag = true;
1958 if (IS_NODE_DRAIN(node_ptr))
1959 drain_flag = true;
1960 if ( IS_NODE_FUTURE(old_node_ptr) &&
1961 !IS_NODE_FUTURE(node_ptr)) {
1962 /* Replace FUTURE state with new state, but preserve
1963 * state flags (e.g. POWER) */
1964 node_ptr->node_state =
1965 (node_ptr->node_state & NODE_STATE_BASE) |
1966 (old_node_ptr->node_state & NODE_STATE_FLAGS);
1967 } else {
1968 node_ptr->node_state = old_node_ptr->node_state;
1969 }
1970
1971 if (down_flag) {
1972 node_ptr->node_state &= NODE_STATE_FLAGS;
1973 node_ptr->node_state |= NODE_STATE_DOWN;
1974 }
1975 if (drain_flag)
1976 node_ptr->node_state |= NODE_STATE_DRAIN;
1977 if ((!power_save_mode) &&
1978 (IS_NODE_POWER_SAVE(node_ptr) ||
1979 IS_NODE_POWER_UP(node_ptr))) {
1980 node_ptr->node_state &= (~NODE_STATE_POWER_SAVE);
1981 node_ptr->node_state &= (~NODE_STATE_POWER_UP);
1982 if (hs)
1983 hostset_insert(hs, node_ptr->name);
1984 else
1985 hs = hostset_create(node_ptr->name);
1986 }
1987
1988 if (IS_NODE_CLOUD(node_ptr) && !IS_NODE_POWER_SAVE(node_ptr)) {
1989 /* Preserve NodeHostname + NodeAddr set by scontrol */
1990 xfree(node_ptr->comm_name);
1991 node_ptr->comm_name = old_node_ptr->comm_name;
1992 old_node_ptr->comm_name = NULL;
1993 xfree(node_ptr->node_hostname);
1994 node_ptr->node_hostname = old_node_ptr->node_hostname;
1995 old_node_ptr->node_hostname = NULL;
1996 slurm_reset_alias(node_ptr->name, node_ptr->comm_name,
1997 node_ptr->node_hostname);
1998 }
1999
2000 node_ptr->last_response = old_node_ptr->last_response;
2001 node_ptr->protocol_version = old_node_ptr->protocol_version;
2002 node_ptr->cpu_load = old_node_ptr->cpu_load;
2003
2004 /* make sure we get the old state from the select
2005 * plugin, just swap it out to avoid possible memory leak */
2006 tmp_select_nodeinfo = node_ptr->select_nodeinfo;
2007 node_ptr->select_nodeinfo = old_node_ptr->select_nodeinfo;
2008 old_node_ptr->select_nodeinfo = tmp_select_nodeinfo;
2009
2010 if (old_node_ptr->port != node_ptr->config_ptr->cpus) {
2011 rc = ESLURM_NEED_RESTART;
2012 error("Configured cpu count change on %s (%u to %u)",
2013 node_ptr->name, old_node_ptr->port,
2014 node_ptr->config_ptr->cpus);
2015 }
2016
2017 node_ptr->boot_time = old_node_ptr->boot_time;
2018 node_ptr->cpus = old_node_ptr->cpus;
2019 node_ptr->cores = old_node_ptr->cores;
2020 xfree(node_ptr->cpu_spec_list);
2021 node_ptr->cpu_spec_list = old_node_ptr->cpu_spec_list;
2022 old_node_ptr->cpu_spec_list = NULL;
2023 node_ptr->core_spec_cnt = old_node_ptr->core_spec_cnt;
2024 node_ptr->last_idle = old_node_ptr->last_idle;
2025 node_ptr->boards = old_node_ptr->boards;
2026 node_ptr->sockets = old_node_ptr->sockets;
2027 node_ptr->threads = old_node_ptr->threads;
2028 node_ptr->real_memory = old_node_ptr->real_memory;
2029 node_ptr->mem_spec_limit = old_node_ptr->mem_spec_limit;
2030 node_ptr->slurmd_start_time = old_node_ptr->slurmd_start_time;
2031 node_ptr->tmp_disk = old_node_ptr->tmp_disk;
2032 node_ptr->weight = old_node_ptr->weight;
2033
2034 node_ptr->sus_job_cnt = old_node_ptr->sus_job_cnt;
2035
2036 FREE_NULL_LIST(node_ptr->gres_list);
2037 node_ptr->gres_list = old_node_ptr->gres_list;
2038 old_node_ptr->gres_list = NULL;
2039
2040 if (node_ptr->reason == NULL) {
2041 /* Recover only if not explicitly set in slurm.conf */
2042 node_ptr->reason = old_node_ptr->reason;
2043 node_ptr->reason_time = old_node_ptr->reason_time;
2044 old_node_ptr->reason = NULL;
2045 }
2046 if (recover == 2) {
2047 xfree(node_ptr->gres);
2048 node_ptr->gres = old_node_ptr->gres;
2049 old_node_ptr->gres = NULL;
2050 }
2051 if (old_node_ptr->arch) {
2052 xfree(node_ptr->arch);
2053 node_ptr->arch = old_node_ptr->arch;
2054 old_node_ptr->arch = NULL;
2055 }
2056 if (old_node_ptr->os) {
2057 xfree(node_ptr->os);
2058 node_ptr->os = old_node_ptr->os;
2059 old_node_ptr->os = NULL;
2060 }
2061 if (old_node_ptr->node_spec_bitmap) {
2062 FREE_NULL_BITMAP(node_ptr->node_spec_bitmap);
2063 node_ptr->node_spec_bitmap =
2064 old_node_ptr->node_spec_bitmap;
2065 old_node_ptr->node_spec_bitmap = NULL;
2066 }
2067 }
2068
2069 if (hs) {
2070 char node_names[128];
2071 hostset_ranged_string(hs, sizeof(node_names), node_names);
2072 info("Cleared POWER_SAVE flag from nodes %s", node_names);
2073 hostset_destroy(hs);
2074 hs = NULL;
2075 }
2076
2077 for (i=0, node_ptr=node_record_table_ptr; i<node_record_count;
2078 i++, node_ptr++) {
2079 if (!node_ptr->not_responding)
2080 continue;
2081 node_ptr->not_responding = false;
2082 if (hs)
2083 hostset_insert(hs, node_ptr->name);
2084 else
2085 hs = hostset_create(node_ptr->name);
2086 }
2087 if (hs) {
2088 char node_names[128];
2089 hostset_ranged_string(hs, sizeof(node_names), node_names);
2090 error("Nodes added to configuration (%s)", node_names);
2091 error("Reboot of all slurm daemons is recommended");
2092 hostset_destroy(hs);
2093 }
2094
2095 return rc;
2096 }
2097
2098 /* Purge old node state information */
_purge_old_node_state(node_record_t * old_node_table_ptr,int old_node_record_count)2099 static void _purge_old_node_state(node_record_t *old_node_table_ptr,
2100 int old_node_record_count)
2101 {
2102 int i;
2103 node_record_t *node_ptr;
2104
2105 node_ptr = old_node_table_ptr;
2106 if (old_node_table_ptr) {
2107 for (i = 0; i< old_node_record_count; i++, node_ptr++)
2108 purge_node_rec(node_ptr);
2109 xfree(old_node_table_ptr);
2110 }
2111 }
2112
2113 /* Restore partition information from saved records */
_restore_part_state(List old_part_list,char * old_def_part_name,uint16_t flags)2114 static int _restore_part_state(List old_part_list, char *old_def_part_name,
2115 uint16_t flags)
2116 {
2117 int rc = SLURM_SUCCESS;
2118 ListIterator part_iterator;
2119 part_record_t *old_part_ptr, *part_ptr;
2120
2121 if (!old_part_list)
2122 return rc;
2123
2124 /* For each part in list, find and update recs */
2125 part_iterator = list_iterator_create(old_part_list);
2126 while ((old_part_ptr = list_next(part_iterator))) {
2127 xassert(old_part_ptr->magic == PART_MAGIC);
2128 part_ptr = find_part_record(old_part_ptr->name);
2129 if (part_ptr) {
2130 if ( !(flags & RECONFIG_KEEP_PART_INFO) &&
2131 (flags & RECONFIG_KEEP_PART_STAT) ) {
2132 if (part_ptr->state_up != old_part_ptr->state_up) {
2133 info("Partition %s State differs from "
2134 "slurm.conf", part_ptr->name);
2135 part_ptr->state_up = old_part_ptr->state_up;
2136 }
2137 continue;
2138 }
2139 /* Current partition found in slurm.conf,
2140 * report differences from slurm.conf configuration */
2141 if (xstrcmp(part_ptr->allow_accounts,
2142 old_part_ptr->allow_accounts)) {
2143 error("Partition %s AllowAccounts differs from slurm.conf",
2144 part_ptr->name);
2145 xfree(part_ptr->allow_accounts);
2146 part_ptr->allow_accounts =
2147 xstrdup(old_part_ptr->allow_accounts);
2148 accounts_list_build(part_ptr->allow_accounts,
2149 &part_ptr->allow_account_array);
2150 }
2151 if (xstrcmp(part_ptr->allow_alloc_nodes,
2152 old_part_ptr->allow_alloc_nodes)) {
2153 error("Partition %s AllowNodes differs from slurm.conf",
2154 part_ptr->name);
2155 xfree(part_ptr->allow_alloc_nodes);
2156 part_ptr->allow_alloc_nodes =
2157 xstrdup(old_part_ptr->allow_alloc_nodes);
2158 }
2159 if (xstrcmp(part_ptr->allow_groups,
2160 old_part_ptr->allow_groups)) {
2161 error("Partition %s AllowGroups differs from "
2162 "slurm.conf", part_ptr->name);
2163 xfree(part_ptr->allow_groups);
2164 part_ptr->allow_groups = xstrdup(old_part_ptr->
2165 allow_groups);
2166 }
2167 if (xstrcmp(part_ptr->allow_qos,
2168 old_part_ptr->allow_qos)) {
2169 error("Partition %s AllowQos differs from "
2170 "slurm.conf", part_ptr->name);
2171 xfree(part_ptr->allow_qos);
2172 part_ptr->allow_qos = xstrdup(old_part_ptr->
2173 allow_qos);
2174 qos_list_build(part_ptr->allow_qos,
2175 &part_ptr->allow_qos_bitstr);
2176 }
2177 if (xstrcmp(part_ptr->alternate,
2178 old_part_ptr->alternate)) {
2179 error("Partition %s Alternate differs from slurm.conf",
2180 part_ptr->name);
2181 xfree(part_ptr->alternate);
2182 part_ptr->alternate =
2183 xstrdup(old_part_ptr->alternate);
2184 }
2185 if (part_ptr->def_mem_per_cpu !=
2186 old_part_ptr->def_mem_per_cpu) {
2187 error("Partition %s DefMemPerCPU differs from slurm.conf",
2188 part_ptr->name);
2189 part_ptr->def_mem_per_cpu =
2190 old_part_ptr->def_mem_per_cpu;
2191 }
2192 if (part_ptr->default_time !=
2193 old_part_ptr->default_time) {
2194 error("Partition %s DefaultTime differs from slurm.conf",
2195 part_ptr->name);
2196 part_ptr->default_time =
2197 old_part_ptr->default_time;
2198 }
2199 if (xstrcmp(part_ptr->deny_accounts,
2200 old_part_ptr->deny_accounts)) {
2201 error("Partition %s DenyAccounts differs from "
2202 "slurm.conf", part_ptr->name);
2203 xfree(part_ptr->deny_accounts);
2204 part_ptr->deny_accounts =
2205 xstrdup(old_part_ptr->deny_accounts);
2206 accounts_list_build(part_ptr->deny_accounts,
2207 &part_ptr->deny_account_array);
2208 }
2209 if (xstrcmp(part_ptr->deny_qos,
2210 old_part_ptr->deny_qos)) {
2211 error("Partition %s DenyQos differs from "
2212 "slurm.conf", part_ptr->name);
2213 xfree(part_ptr->deny_qos);
2214 part_ptr->deny_qos = xstrdup(old_part_ptr->
2215 deny_qos);
2216 qos_list_build(part_ptr->deny_qos,
2217 &part_ptr->deny_qos_bitstr);
2218 }
2219 if ((part_ptr->flags & PART_FLAG_HIDDEN) !=
2220 (old_part_ptr->flags & PART_FLAG_HIDDEN)) {
2221 error("Partition %s Hidden differs from "
2222 "slurm.conf", part_ptr->name);
2223 if (old_part_ptr->flags & PART_FLAG_HIDDEN)
2224 part_ptr->flags |= PART_FLAG_HIDDEN;
2225 else
2226 part_ptr->flags &= (~PART_FLAG_HIDDEN);
2227 }
2228 if ((part_ptr->flags & PART_FLAG_NO_ROOT) !=
2229 (old_part_ptr->flags & PART_FLAG_NO_ROOT)) {
2230 error("Partition %s DisableRootJobs differs "
2231 "from slurm.conf", part_ptr->name);
2232 if (old_part_ptr->flags & PART_FLAG_NO_ROOT)
2233 part_ptr->flags |= PART_FLAG_NO_ROOT;
2234 else
2235 part_ptr->flags &= (~PART_FLAG_NO_ROOT);
2236 }
2237 if ((part_ptr->flags & PART_FLAG_EXCLUSIVE_USER) !=
2238 (old_part_ptr->flags & PART_FLAG_EXCLUSIVE_USER)) {
2239 error("Partition %s ExclusiveUser differs "
2240 "from slurm.conf", part_ptr->name);
2241 if (old_part_ptr->flags &
2242 PART_FLAG_EXCLUSIVE_USER) {
2243 part_ptr->flags |=
2244 PART_FLAG_EXCLUSIVE_USER;
2245 } else {
2246 part_ptr->flags &=
2247 (~PART_FLAG_EXCLUSIVE_USER);
2248 }
2249 }
2250 if ((part_ptr->flags & PART_FLAG_ROOT_ONLY) !=
2251 (old_part_ptr->flags & PART_FLAG_ROOT_ONLY)) {
2252 error("Partition %s RootOnly differs from "
2253 "slurm.conf", part_ptr->name);
2254 if (old_part_ptr->flags & PART_FLAG_ROOT_ONLY)
2255 part_ptr->flags |= PART_FLAG_ROOT_ONLY;
2256 else
2257 part_ptr->flags &= (~PART_FLAG_ROOT_ONLY);
2258 }
2259 if ((part_ptr->flags & PART_FLAG_REQ_RESV) !=
2260 (old_part_ptr->flags & PART_FLAG_REQ_RESV)) {
2261 error("Partition %s ReqResv differs from "
2262 "slurm.conf", part_ptr->name);
2263 if (old_part_ptr->flags & PART_FLAG_REQ_RESV)
2264 part_ptr->flags |= PART_FLAG_REQ_RESV;
2265 else
2266 part_ptr->flags &= (~PART_FLAG_REQ_RESV);
2267 }
2268 if ((part_ptr->flags & PART_FLAG_LLN) !=
2269 (old_part_ptr->flags & PART_FLAG_LLN)) {
2270 error("Partition %s LLN differs from "
2271 "slurm.conf", part_ptr->name);
2272 if (old_part_ptr->flags & PART_FLAG_LLN)
2273 part_ptr->flags |= PART_FLAG_LLN;
2274 else
2275 part_ptr->flags &= (~PART_FLAG_LLN);
2276 }
2277 if (part_ptr->grace_time != old_part_ptr->grace_time) {
2278 error("Partition %s GraceTime differs from slurm.conf",
2279 part_ptr->name);
2280 part_ptr->grace_time = old_part_ptr->grace_time;
2281 }
2282 if (part_ptr->max_cpus_per_node !=
2283 old_part_ptr->max_cpus_per_node) {
2284 error("Partition %s MaxCPUsPerNode differs from slurm.conf"
2285 " (%u != %u)",
2286 part_ptr->name,
2287 part_ptr->max_cpus_per_node,
2288 old_part_ptr->max_cpus_per_node);
2289 part_ptr->max_cpus_per_node =
2290 old_part_ptr->max_cpus_per_node;
2291 }
2292 if (part_ptr->max_mem_per_cpu !=
2293 old_part_ptr->max_mem_per_cpu) {
2294 error("Partition %s MaxMemPerNode/MaxMemPerCPU differs from slurm.conf"
2295 " (%"PRIu64" != %"PRIu64")",
2296 part_ptr->name,
2297 part_ptr->max_mem_per_cpu,
2298 old_part_ptr->max_mem_per_cpu);
2299 part_ptr->max_mem_per_cpu =
2300 old_part_ptr->max_mem_per_cpu;
2301 }
2302 if (part_ptr->max_nodes_orig !=
2303 old_part_ptr->max_nodes_orig) {
2304 error("Partition %s MaxNodes differs from "
2305 "slurm.conf (%u != %u)", part_ptr->name,
2306 part_ptr->max_nodes_orig,
2307 old_part_ptr->max_nodes_orig);
2308 part_ptr->max_nodes = old_part_ptr->
2309 max_nodes_orig;
2310 part_ptr->max_nodes_orig = old_part_ptr->
2311 max_nodes_orig;
2312 }
2313 if (part_ptr->max_share != old_part_ptr->max_share) {
2314 error("Partition %s Shared differs from "
2315 "slurm.conf", part_ptr->name);
2316 part_ptr->max_share = old_part_ptr->max_share;
2317 }
2318 if (part_ptr->max_time != old_part_ptr->max_time) {
2319 error("Partition %s MaxTime differs from "
2320 "slurm.conf", part_ptr->name);
2321 part_ptr->max_time = old_part_ptr->max_time;
2322 }
2323 if (part_ptr->min_nodes_orig !=
2324 old_part_ptr->min_nodes_orig) {
2325 error("Partition %s MinNodes differs from "
2326 "slurm.conf (%u != %u)", part_ptr->name,
2327 part_ptr->min_nodes_orig,
2328 old_part_ptr->min_nodes_orig);
2329 part_ptr->min_nodes = old_part_ptr->
2330 min_nodes_orig;
2331 part_ptr->min_nodes_orig = old_part_ptr->
2332 min_nodes_orig;
2333 }
2334 if (xstrcmp(part_ptr->nodes, old_part_ptr->nodes)) {
2335 error("Partition %s Nodes differs from "
2336 "slurm.conf", part_ptr->name);
2337 xfree(part_ptr->nodes);
2338 part_ptr->nodes = xstrdup(old_part_ptr->nodes);
2339 }
2340 if (part_ptr->over_time_limit !=
2341 old_part_ptr->over_time_limit) {
2342 error("Partition %s OverTimeLimit differs from slurm.conf",
2343 part_ptr->name);
2344 part_ptr->over_time_limit =
2345 old_part_ptr->over_time_limit;
2346 }
2347 if (part_ptr->preempt_mode !=
2348 old_part_ptr->preempt_mode) {
2349 error("Partition %s PreemptMode differs from "
2350 "slurm.conf", part_ptr->name);
2351 part_ptr->preempt_mode = old_part_ptr->
2352 preempt_mode;
2353 }
2354 if (part_ptr->priority_job_factor !=
2355 old_part_ptr->priority_job_factor) {
2356 error("Partition %s PriorityJobFactor differs "
2357 "from slurm.conf", part_ptr->name);
2358 part_ptr->priority_job_factor =
2359 old_part_ptr->priority_job_factor;
2360 }
2361 if (part_ptr->priority_tier !=
2362 old_part_ptr->priority_tier) {
2363 error("Partition %s PriorityTier differs from "
2364 "slurm.conf", part_ptr->name);
2365 part_ptr->priority_tier =
2366 old_part_ptr->priority_tier;
2367 }
2368 if (xstrcmp(part_ptr->qos_char,
2369 old_part_ptr->qos_char)) {
2370 error("Partition %s QOS differs from slurm.conf",
2371 part_ptr->name);
2372 xfree(part_ptr->qos_char);
2373 part_ptr->qos_char =
2374 xstrdup(old_part_ptr->qos_char);
2375 part_ptr->qos_ptr = old_part_ptr->qos_ptr;
2376 }
2377 if (part_ptr->state_up != old_part_ptr->state_up) {
2378 error("Partition %s State differs from "
2379 "slurm.conf", part_ptr->name);
2380 part_ptr->state_up = old_part_ptr->state_up;
2381 }
2382 } else {
2383 if ( !(flags & RECONFIG_KEEP_PART_INFO) &&
2384 (flags & RECONFIG_KEEP_PART_STAT) ) {
2385 info("Partition %s missing from slurm.conf, "
2386 "not restoring it", old_part_ptr->name);
2387 continue;
2388 }
2389 error("Partition %s missing from slurm.conf, "
2390 "restoring it", old_part_ptr->name);
2391 part_ptr = create_part_record(old_part_ptr->name);
2392
2393 part_ptr->allow_accounts =
2394 xstrdup(old_part_ptr->allow_accounts);
2395 accounts_list_build(part_ptr->allow_accounts,
2396 &part_ptr->allow_account_array);
2397 part_ptr->allow_alloc_nodes =
2398 xstrdup(old_part_ptr->allow_alloc_nodes);
2399 part_ptr->allow_groups = xstrdup(old_part_ptr->
2400 allow_groups);
2401 part_ptr->allow_qos = xstrdup(old_part_ptr->
2402 allow_qos);
2403 qos_list_build(part_ptr->allow_qos,
2404 &part_ptr->allow_qos_bitstr);
2405 part_ptr->def_mem_per_cpu =
2406 old_part_ptr->def_mem_per_cpu;
2407 part_ptr->default_time = old_part_ptr->default_time;
2408 part_ptr->deny_accounts = xstrdup(old_part_ptr->
2409 deny_accounts);
2410 accounts_list_build(part_ptr->deny_accounts,
2411 &part_ptr->deny_account_array);
2412 part_ptr->deny_qos = xstrdup(old_part_ptr->
2413 deny_qos);
2414 qos_list_build(part_ptr->deny_qos,
2415 &part_ptr->deny_qos_bitstr);
2416 part_ptr->flags = old_part_ptr->flags;
2417 part_ptr->grace_time = old_part_ptr->grace_time;
2418 part_ptr->job_defaults_list =
2419 job_defaults_copy(old_part_ptr->job_defaults_list);
2420 part_ptr->max_cpus_per_node =
2421 old_part_ptr->max_cpus_per_node;
2422 part_ptr->max_mem_per_cpu =
2423 old_part_ptr->max_mem_per_cpu;
2424 part_ptr->max_nodes = old_part_ptr->max_nodes;
2425 part_ptr->max_nodes_orig = old_part_ptr->
2426 max_nodes_orig;
2427 part_ptr->max_share = old_part_ptr->max_share;
2428 part_ptr->max_time = old_part_ptr->max_time;
2429 part_ptr->min_nodes = old_part_ptr->min_nodes;
2430 part_ptr->min_nodes_orig = old_part_ptr->
2431 min_nodes_orig;
2432 part_ptr->nodes = xstrdup(old_part_ptr->nodes);
2433 part_ptr->over_time_limit =
2434 old_part_ptr->over_time_limit;
2435 part_ptr->preempt_mode = old_part_ptr->preempt_mode;
2436 part_ptr->priority_job_factor =
2437 old_part_ptr->priority_job_factor;
2438 part_ptr->priority_tier = old_part_ptr->priority_tier;
2439 part_ptr->qos_char =
2440 xstrdup(old_part_ptr->qos_char);
2441 part_ptr->qos_ptr = old_part_ptr->qos_ptr;
2442 part_ptr->state_up = old_part_ptr->state_up;
2443 }
2444 }
2445 list_iterator_destroy(part_iterator);
2446
2447 if (old_def_part_name &&
2448 ((default_part_name == NULL) ||
2449 xstrcmp(old_def_part_name, default_part_name))) {
2450 part_ptr = find_part_record(old_def_part_name);
2451 if (part_ptr) {
2452 error("Default partition reset to %s",
2453 old_def_part_name);
2454 default_part_loc = part_ptr;
2455 xfree(default_part_name);
2456 default_part_name = xstrdup(old_def_part_name);
2457 }
2458 }
2459
2460 return rc;
2461 }
2462
2463 /* Purge old partition state information */
_purge_old_part_state(List old_part_list,char * old_def_part_name)2464 static void _purge_old_part_state(List old_part_list, char *old_def_part_name)
2465 {
2466 xfree(old_def_part_name);
2467
2468 if (!old_part_list)
2469 return;
2470 FREE_NULL_LIST(old_part_list);
2471 }
2472
2473 /*
2474 * _preserve_select_type_param - preserve original plugin parameters.
2475 * Daemons and/or commands must be restarted for some
2476 * select plugin value changes to take effect.
2477 * RET zero or error code
2478 */
_preserve_select_type_param(slurm_ctl_conf_t * ctl_conf_ptr,uint16_t old_select_type_p)2479 static int _preserve_select_type_param(slurm_ctl_conf_t *ctl_conf_ptr,
2480 uint16_t old_select_type_p)
2481 {
2482 int rc = SLURM_SUCCESS;
2483
2484 /* SelectTypeParameters cannot change */
2485 if (old_select_type_p) {
2486 if (old_select_type_p != ctl_conf_ptr->select_type_param) {
2487 ctl_conf_ptr->select_type_param = old_select_type_p;
2488 rc = ESLURM_INVALID_SELECTTYPE_CHANGE;
2489 }
2490 }
2491 return rc;
2492 }
2493
2494 /* Start or stop the gang scheduler module as needed based upon changes in
2495 * configuration */
_update_preempt(uint16_t old_preempt_mode)2496 static int _update_preempt(uint16_t old_preempt_mode)
2497 {
2498 uint16_t new_preempt_mode = slurm_get_preempt_mode();
2499
2500 if ((old_preempt_mode & PREEMPT_MODE_GANG) ==
2501 (new_preempt_mode & PREEMPT_MODE_GANG))
2502 return SLURM_SUCCESS;
2503
2504 if (new_preempt_mode & PREEMPT_MODE_GANG) {
2505 info("Enabling gang scheduling");
2506 gs_init();
2507 return SLURM_SUCCESS;
2508 }
2509
2510 if (old_preempt_mode == PREEMPT_MODE_GANG) {
2511 info("Disabling gang scheduling");
2512 gs_wake_jobs();
2513 gs_fini();
2514 return SLURM_SUCCESS;
2515 }
2516
2517 error("Invalid gang scheduling mode change");
2518 return EINVAL;
2519 }
2520
2521 /*
2522 * _sync_nodes_to_jobs - sync node state to job states on slurmctld restart.
2523 * This routine marks nodes allocated to a job as busy no matter what
2524 * the node's last saved state
2525 * RET count of nodes having state changed
2526 * Note: Operates on common variables, no arguments
2527 */
_sync_nodes_to_jobs(bool reconfig)2528 static int _sync_nodes_to_jobs(bool reconfig)
2529 {
2530 job_record_t *job_ptr;
2531 ListIterator job_iterator;
2532 int update_cnt = 0;
2533
2534 job_iterator = list_iterator_create(job_list);
2535 while ((job_ptr = list_next(job_iterator))) {
2536 if (!reconfig &&
2537 job_ptr->details && job_ptr->details->prolog_running) {
2538 job_ptr->details->prolog_running = 0;
2539 if (IS_JOB_CONFIGURING(job_ptr)) {
2540 prolog_slurmctld(job_ptr);
2541 (void) bb_g_job_begin(job_ptr);
2542 }
2543 }
2544
2545 if (job_ptr->node_bitmap == NULL)
2546 ;
2547 else if (IS_JOB_RUNNING(job_ptr) || IS_JOB_COMPLETING(job_ptr))
2548 update_cnt += _sync_nodes_to_active_job(job_ptr);
2549 else if (IS_JOB_SUSPENDED(job_ptr))
2550 _sync_nodes_to_suspended_job(job_ptr);
2551
2552 }
2553 list_iterator_destroy(job_iterator);
2554
2555 if (update_cnt) {
2556 info("_sync_nodes_to_jobs updated state of %d nodes",
2557 update_cnt);
2558 }
2559 sync_front_end_state();
2560 return update_cnt;
2561 }
2562
2563 /* For jobs which are in state COMPLETING, deallocate the nodes and
2564 * issue the RPC to kill the job */
_sync_nodes_to_comp_job(void)2565 static int _sync_nodes_to_comp_job(void)
2566 {
2567 job_record_t *job_ptr;
2568 ListIterator job_iterator;
2569 int update_cnt = 0;
2570
2571 job_iterator = list_iterator_create(job_list);
2572 while ((job_ptr = list_next(job_iterator))) {
2573 if ((job_ptr->node_bitmap) && IS_JOB_COMPLETING(job_ptr)) {
2574
2575 /* If the controller is reconfiguring
2576 * and the job is in completing state
2577 * and the slurmctld epilog is already
2578 * running which means deallocate_nodes()
2579 * was alredy called, do invoke it again
2580 * and don't start another epilog.
2581 */
2582 if (job_ptr->epilog_running == true)
2583 continue;
2584
2585 update_cnt++;
2586 info("%s: %pJ in completing state", __func__, job_ptr);
2587 if (!job_ptr->node_bitmap_cg)
2588 build_cg_bitmap(job_ptr);
2589
2590 /* deallocate_nodes will remove this job from
2591 * the system before it was added, so add it
2592 * now
2593 */
2594 if (accounting_enforce & ACCOUNTING_ENFORCE_LIMITS)
2595 acct_policy_job_begin(job_ptr);
2596
2597 if (job_ptr->front_end_ptr)
2598 job_ptr->front_end_ptr->job_cnt_run++;
2599 deallocate_nodes(job_ptr, false, false, false);
2600 /* The job in completing state at slurmctld restart or
2601 * reconfiguration, do not log completion again.
2602 * job_completion_logger(job_ptr, false); */
2603 }
2604 }
2605 list_iterator_destroy(job_iterator);
2606 if (update_cnt)
2607 info("%s: completing %d jobs", __func__, update_cnt);
2608 return update_cnt;
2609 }
2610
2611 /* Synchronize states of nodes and active jobs (RUNNING or COMPLETING state)
2612 * RET count of jobs with state changes */
_sync_nodes_to_active_job(job_record_t * job_ptr)2613 static int _sync_nodes_to_active_job(job_record_t *job_ptr)
2614 {
2615 int i, cnt = 0;
2616 uint32_t node_flags;
2617 node_record_t *node_ptr = node_record_table_ptr;
2618
2619 if (job_ptr->node_bitmap_cg) /* job completing */
2620 job_ptr->node_cnt = bit_set_count(job_ptr->node_bitmap_cg);
2621 else
2622 job_ptr->node_cnt = bit_set_count(job_ptr->node_bitmap);
2623 for (i = 0; i < node_record_count; i++, node_ptr++) {
2624 if (job_ptr->node_bitmap_cg) { /* job completing */
2625 if (bit_test(job_ptr->node_bitmap_cg, i) == 0)
2626 continue;
2627 } else if (bit_test(job_ptr->node_bitmap, i) == 0)
2628 continue;
2629
2630 if ((job_ptr->details &&
2631 (job_ptr->details->whole_node == WHOLE_NODE_USER)) ||
2632 (job_ptr->part_ptr &&
2633 (job_ptr->part_ptr->flags & PART_FLAG_EXCLUSIVE_USER))) {
2634 node_ptr->owner_job_cnt++;
2635 node_ptr->owner = job_ptr->user_id;
2636 }
2637
2638 if (slurm_mcs_get_select(job_ptr) == 1) {
2639 xfree(node_ptr->mcs_label);
2640 node_ptr->mcs_label = xstrdup(job_ptr->mcs_label);
2641 }
2642
2643 node_flags = node_ptr->node_state & NODE_STATE_FLAGS;
2644
2645 node_ptr->run_job_cnt++; /* NOTE:
2646 * This counter moved to comp_job_cnt
2647 * by _sync_nodes_to_comp_job() */
2648 if ((job_ptr->details) && (job_ptr->details->share_res == 0))
2649 node_ptr->no_share_job_cnt++;
2650
2651 if (IS_NODE_DOWN(node_ptr) &&
2652 IS_JOB_RUNNING(job_ptr) &&
2653 (job_ptr->kill_on_node_fail == 0) &&
2654 (job_ptr->node_cnt > 1)) {
2655 /* This should only happen if a job was running
2656 * on a node that was newly configured DOWN */
2657 int save_accounting_enforce;
2658 info("Removing failed node %s from %pJ",
2659 node_ptr->name, job_ptr);
2660 /*
2661 * Disable accounting here. Accounting reset for all
2662 * jobs in _restore_job_accounting()
2663 */
2664 save_accounting_enforce = accounting_enforce;
2665 accounting_enforce &= (~ACCOUNTING_ENFORCE_LIMITS);
2666 job_pre_resize_acctg(job_ptr);
2667 srun_node_fail(job_ptr, node_ptr->name);
2668 kill_step_on_node(job_ptr, node_ptr, true);
2669 excise_node_from_job(job_ptr, node_ptr);
2670 job_post_resize_acctg(job_ptr);
2671 accounting_enforce = save_accounting_enforce;
2672 } else if (IS_NODE_DOWN(node_ptr) && IS_JOB_RUNNING(job_ptr)) {
2673 info("Killing %pJ on DOWN node %s",
2674 job_ptr, node_ptr->name);
2675 _abort_job(job_ptr, JOB_NODE_FAIL, FAIL_DOWN_NODE,
2676 NULL);
2677 cnt++;
2678 } else if (IS_NODE_IDLE(node_ptr)) {
2679 cnt++;
2680 node_ptr->node_state = NODE_STATE_ALLOCATED |
2681 node_flags;
2682 }
2683 }
2684
2685 if ((IS_JOB_RUNNING(job_ptr) || IS_JOB_SUSPENDED(job_ptr)) &&
2686 (job_ptr->front_end_ptr != NULL))
2687 job_ptr->front_end_ptr->job_cnt_run++;
2688
2689 return cnt;
2690 }
2691
2692 /* Synchronize states of nodes and suspended jobs */
_sync_nodes_to_suspended_job(job_record_t * job_ptr)2693 static void _sync_nodes_to_suspended_job(job_record_t *job_ptr)
2694 {
2695 int i;
2696 node_record_t *node_ptr = node_record_table_ptr;
2697
2698 for (i = 0; i < node_record_count; i++, node_ptr++) {
2699 if (bit_test(job_ptr->node_bitmap, i) == 0)
2700 continue;
2701
2702 node_ptr->sus_job_cnt++;
2703 }
2704 return;
2705 }
2706
2707 /*
2708 * Build license_list for every job.
2709 * Reset accounting for every job.
2710 * Reset the running job count for scheduling policy.
2711 * This must be called after load_all_resv_state() and restore_node_features().
2712 */
_restore_job_accounting(void)2713 static void _restore_job_accounting(void)
2714 {
2715 job_record_t *job_ptr;
2716 ListIterator job_iterator;
2717 bool valid = true;
2718 List license_list;
2719
2720 assoc_mgr_clear_used_info();
2721
2722 job_iterator = list_iterator_create(job_list);
2723 while ((job_ptr = list_next(job_iterator))) {
2724 if (job_ptr->array_recs)
2725 job_ptr->array_recs->tot_run_tasks = 0;
2726 }
2727
2728 list_iterator_reset(job_iterator);
2729 while ((job_ptr = list_next(job_iterator))) {
2730 (void) build_feature_list(job_ptr);
2731
2732 if (IS_JOB_RUNNING(job_ptr) || IS_JOB_SUSPENDED(job_ptr))
2733 job_array_start(job_ptr);
2734
2735 if (accounting_enforce & ACCOUNTING_ENFORCE_LIMITS) {
2736 if (!IS_JOB_FINISHED(job_ptr))
2737 acct_policy_add_job_submit(job_ptr);
2738 if (IS_JOB_RUNNING(job_ptr) ||
2739 IS_JOB_SUSPENDED(job_ptr)) {
2740 acct_policy_job_begin(job_ptr);
2741 job_claim_resv(job_ptr);
2742 } else if (IS_JOB_PENDING(job_ptr) &&
2743 job_ptr->details &&
2744 job_ptr->details->accrue_time)
2745 acct_policy_add_accrue_time(job_ptr, true);
2746 }
2747
2748 license_list = license_validate(job_ptr->licenses, false, false,
2749 job_ptr->tres_req_cnt, &valid);
2750 FREE_NULL_LIST(job_ptr->license_list);
2751 if (valid) {
2752 job_ptr->license_list = license_list;
2753 xfree(job_ptr->licenses);
2754 job_ptr->licenses =
2755 license_list_to_string(license_list);
2756 }
2757
2758 if (IS_JOB_RUNNING(job_ptr) || IS_JOB_SUSPENDED(job_ptr))
2759 license_job_get(job_ptr);
2760
2761 }
2762 list_iterator_destroy(job_iterator);
2763 }
2764
2765 /*
2766 * NOTE: Can be removed in/after 21.08 because the controller won't need to
2767 * build details->depend_list from the dependency string anymore because in
2768 * 20.02 the depend_list is state saved and doesn't rely on the dependency
2769 * string anymore.
2770 */
restore_job_dependencies(void)2771 extern int restore_job_dependencies(void)
2772 {
2773 job_record_t *job_ptr;
2774 ListIterator job_iterator;
2775 int error_code = SLURM_SUCCESS, rc;
2776 char *new_depend;
2777 slurmctld_lock_t job_fed_lock = {.job = WRITE_LOCK, .fed = READ_LOCK};
2778
2779 lock_slurmctld(job_fed_lock);
2780
2781 job_iterator = list_iterator_create(job_list);
2782 while ((job_ptr = list_next(job_iterator))) {
2783 if ((job_ptr->details == NULL) ||
2784 (job_ptr->details->dependency == NULL) ||
2785 job_ptr->details->depend_list)
2786 continue;
2787 new_depend = job_ptr->details->dependency;
2788 job_ptr->details->dependency = NULL;
2789 rc = update_job_dependency(job_ptr, new_depend);
2790 if (rc != SLURM_SUCCESS) {
2791 error("Invalid dependencies discarded for %pJ: %s",
2792 job_ptr, new_depend);
2793 error_code = rc;
2794 }
2795 xfree(new_depend);
2796 }
2797 list_iterator_destroy(job_iterator);
2798 unlock_slurmctld(job_fed_lock);
2799
2800 return error_code;
2801 }
2802
2803 /* Flush accounting information on this cluster, then for each running or
2804 * suspended job, restore its state in the accounting system */
_acct_restore_active_jobs(void)2805 static void _acct_restore_active_jobs(void)
2806 {
2807 job_record_t *job_ptr;
2808 ListIterator job_iterator;
2809 step_record_t *step_ptr;
2810 ListIterator step_iterator;
2811
2812 info("Reinitializing job accounting state");
2813 acct_storage_g_flush_jobs_on_cluster(acct_db_conn,
2814 time(NULL));
2815 job_iterator = list_iterator_create(job_list);
2816 while ((job_ptr = list_next(job_iterator))) {
2817 if (IS_JOB_SUSPENDED(job_ptr))
2818 jobacct_storage_g_job_suspend(acct_db_conn, job_ptr);
2819 if (IS_JOB_SUSPENDED(job_ptr) || IS_JOB_RUNNING(job_ptr)) {
2820 if (!with_slurmdbd)
2821 jobacct_storage_g_job_start(
2822 acct_db_conn, job_ptr);
2823 else if (job_ptr->db_index != NO_VAL64)
2824 job_ptr->db_index = 0;
2825 step_iterator = list_iterator_create(
2826 job_ptr->step_list);
2827 while ((step_ptr = list_next(step_iterator))) {
2828 jobacct_storage_g_step_start(acct_db_conn,
2829 step_ptr);
2830 }
2831 list_iterator_destroy (step_iterator);
2832 }
2833 }
2834 list_iterator_destroy(job_iterator);
2835 }
2836
2837 /* _compare_hostnames()
2838 */
_compare_hostnames(node_record_t * old_node_table,int old_node_count,node_record_t * node_table,int node_count)2839 static int _compare_hostnames(node_record_t *old_node_table,
2840 int old_node_count, node_record_t *node_table,
2841 int node_count)
2842 {
2843 int cc;
2844 int set_size;
2845 char *old_ranged;
2846 char *ranged;
2847 hostset_t old_set;
2848 hostset_t set;
2849
2850 if (old_node_count != node_count) {
2851 error("%s: node count has changed before reconfiguration "
2852 "from %d to %d. You have to restart slurmctld.",
2853 __func__, old_node_count, node_count);
2854 return -1;
2855 }
2856
2857 old_set = hostset_create("");
2858 for (cc = 0; cc < old_node_count; cc++)
2859 hostset_insert(old_set, old_node_table[cc].name);
2860
2861 set = hostset_create("");
2862 for (cc = 0; cc < node_count; cc++)
2863 hostset_insert(set, node_table[cc].name);
2864
2865 set_size = MAXHOSTNAMELEN * node_count + node_count + 1;
2866
2867 old_ranged = xmalloc(set_size);
2868 ranged = xmalloc(set_size);
2869
2870 hostset_ranged_string(old_set, set_size, old_ranged);
2871 hostset_ranged_string(set, set_size, ranged);
2872
2873 cc = 0;
2874 if (xstrcmp(old_ranged, ranged) != 0) {
2875 error("%s: node names changed before reconfiguration. "
2876 "You have to restart slurmctld.", __func__);
2877 cc = -1;
2878 }
2879
2880 hostset_destroy(old_set);
2881 hostset_destroy(set);
2882 xfree(old_ranged);
2883 xfree(ranged);
2884
2885 return cc;
2886 }
2887
dump_config_state_lite(void)2888 extern int dump_config_state_lite(void)
2889 {
2890 static int high_buffer_size = (1024 * 1024);
2891 int error_code = 0, log_fd;
2892 char *old_file = NULL, *new_file = NULL, *reg_file = NULL;
2893 Buf buffer = init_buf(high_buffer_size);
2894
2895 DEF_TIMERS;
2896
2897 START_TIMER;
2898 /* write header: version, time */
2899 pack16(SLURM_PROTOCOL_VERSION, buffer);
2900 pack_time(time(NULL), buffer);
2901 packstr(slurmctld_conf.accounting_storage_type, buffer);
2902
2903 /* write the buffer to file */
2904 reg_file = xstrdup_printf("%s/last_config_lite",
2905 slurmctld_conf.state_save_location);
2906 old_file = xstrdup_printf("%s.old", reg_file);
2907 new_file = xstrdup_printf("%s.new", reg_file);
2908
2909 log_fd = creat(new_file, 0600);
2910 if (log_fd < 0) {
2911 error("Can't save state, create file %s error %m",
2912 new_file);
2913 error_code = errno;
2914 } else {
2915 int pos = 0, nwrite = get_buf_offset(buffer), amount;
2916 char *data = (char *)get_buf_data(buffer);
2917 high_buffer_size = MAX(nwrite, high_buffer_size);
2918 while (nwrite > 0) {
2919 amount = write(log_fd, &data[pos], nwrite);
2920 if ((amount < 0) && (errno != EINTR)) {
2921 error("Error writing file %s, %m", new_file);
2922 error_code = errno;
2923 break;
2924 }
2925 nwrite -= amount;
2926 pos += amount;
2927 }
2928 fsync(log_fd);
2929 close(log_fd);
2930 }
2931 if (error_code)
2932 (void) unlink(new_file);
2933 else { /* file shuffle */
2934 (void) unlink(old_file);
2935 if (link(reg_file, old_file))
2936 debug4("unable to create link for %s -> %s: %m",
2937 reg_file, old_file);
2938 (void) unlink(reg_file);
2939 if (link(new_file, reg_file))
2940 debug4("unable to create link for %s -> %s: %m",
2941 new_file, reg_file);
2942 (void) unlink(new_file);
2943 }
2944 xfree(old_file);
2945 xfree(reg_file);
2946 xfree(new_file);
2947
2948 free_buf(buffer);
2949
2950 END_TIMER2("dump_config_state_lite");
2951 return error_code;
2952
2953 }
2954
load_config_state_lite(void)2955 extern int load_config_state_lite(void)
2956 {
2957 uint32_t uint32_tmp = 0;
2958 uint16_t ver = 0;
2959 char *state_file;
2960 Buf buffer;
2961 time_t buf_time;
2962 char *last_accounting_storage_type = NULL;
2963
2964 /* Always ignore .old file */
2965 state_file = xstrdup_printf("%s/last_config_lite",
2966 slurmctld_conf.state_save_location);
2967
2968 //info("looking at the %s file", state_file);
2969 if (!(buffer = create_mmap_buf(state_file))) {
2970 debug2("No last_config_lite file (%s) to recover", state_file);
2971 xfree(state_file);
2972 return ENOENT;
2973 }
2974 xfree(state_file);
2975
2976 safe_unpack16(&ver, buffer);
2977 debug3("Version in last_conf_lite header is %u", ver);
2978 if (ver > SLURM_PROTOCOL_VERSION || ver < SLURM_MIN_PROTOCOL_VERSION) {
2979 if (!ignore_state_errors)
2980 fatal("Can not recover last_conf_lite, incompatible version, (%u not between %d and %d), start with '-i' to ignore this. Warning: using -i will lose the data that can't be recovered.",
2981 ver, SLURM_MIN_PROTOCOL_VERSION,
2982 SLURM_PROTOCOL_VERSION);
2983 error("***********************************************");
2984 error("Can not recover last_conf_lite, incompatible version, "
2985 "(%u not between %d and %d)",
2986 ver, SLURM_MIN_PROTOCOL_VERSION, SLURM_PROTOCOL_VERSION);
2987 error("***********************************************");
2988 free_buf(buffer);
2989 return EFAULT;
2990 } else {
2991 safe_unpack_time(&buf_time, buffer);
2992 safe_unpackstr_xmalloc(&last_accounting_storage_type,
2993 &uint32_tmp, buffer);
2994 }
2995 xassert(slurmctld_conf.accounting_storage_type);
2996
2997 if (last_accounting_storage_type
2998 && !xstrcmp(last_accounting_storage_type,
2999 slurmctld_conf.accounting_storage_type))
3000 slurmctld_init_db = 0;
3001 xfree(last_accounting_storage_type);
3002
3003 free_buf(buffer);
3004 return SLURM_SUCCESS;
3005
3006 unpack_error:
3007 if (!ignore_state_errors)
3008 fatal("Incomplete last_config_lite checkpoint file, start with '-i' to ignore this. Warning: using -i will lose the data that can't be recovered.");
3009 error("Incomplete last_config_lite checkpoint file");
3010 free_buf(buffer);
3011
3012 return SLURM_ERROR;
3013 }
3014