1 /*****************************************************************************\
2 * backup.c - backup slurm controller
3 *****************************************************************************
4 * Copyright (C) 2002-2007 The Regents of the University of California.
5 * Copyright (C) 2008-2010 Lawrence Livermore National Security.
6 * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
7 * Written by Morris Jette <jette@llnl.gov>, et. al.
8 * CODE-OCEC-09-009. All rights reserved.
9 *
10 * This file is part of Slurm, a resource management program.
11 * For details, see <https://slurm.schedmd.com/>.
12 * Please also read the included file: DISCLAIMER.
13 *
14 * Slurm is free software; you can redistribute it and/or modify it under
15 * the terms of the GNU General Public License as published by the Free
16 * Software Foundation; either version 2 of the License, or (at your option)
17 * any later version.
18 *
19 * In addition, as a special exception, the copyright holders give permission
20 * to link the code of portions of this program with the OpenSSL library under
21 * certain conditions as described in each individual source file, and
22 * distribute linked combinations including the two. You must obey the GNU
23 * General Public License in all respects for all of the code used other than
24 * OpenSSL. If you modify file(s) with this exception, you may extend this
25 * exception to your version of the file(s), but you are not obligated to do
26 * so. If you do not wish to do so, delete this exception statement from your
27 * version. If you delete this exception statement from all source files in
28 * the program, then also delete it here.
29 *
30 * Slurm is distributed in the hope that it will be useful, but WITHOUT ANY
31 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
32 * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
33 * details.
34 *
35 * You should have received a copy of the GNU General Public License along
36 * with Slurm; if not, write to the Free Software Foundation, Inc.,
37 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
38 \*****************************************************************************/
39
40 #include "config.h"
41
42 #include <errno.h>
43 #include <pthread.h>
44 #include <signal.h>
45 #include <stdio.h>
46 #include <stdlib.h>
47 #include <string.h>
48 #include <sys/resource.h>
49 #include <sys/stat.h>
50
51 #include "slurm/slurm_errno.h"
52
53 #include "src/common/daemonize.h"
54 #include "src/common/log.h"
55 #include "src/common/macros.h"
56 #include "src/common/node_select.h"
57 #include "src/common/slurm_auth.h"
58 #include "src/common/slurm_accounting_storage.h"
59 #include "src/common/switch.h"
60 #include "src/common/xsignal.h"
61 #include "src/common/xstring.h"
62
63 #include "src/slurmctld/heartbeat.h"
64 #include "src/slurmctld/locks.h"
65 #include "src/slurmctld/proc_req.h"
66 #include "src/slurmctld/read_config.h"
67 #include "src/slurmctld/slurmctld.h"
68 #include "src/slurmctld/trigger_mgr.h"
69
70 #define _DEBUG 0
71 #define SHUTDOWN_WAIT 2 /* Time to wait for primary server shutdown */
72
73 static int _background_process_msg(slurm_msg_t * msg);
74 static void * _background_rpc_mgr(void *no_data);
75 static void * _background_signal_hand(void *no_data);
76 static void _backup_reconfig(void);
77 static int _shutdown_primary_controller(int wait_time);
78 static void * _trigger_slurmctld_event(void *arg);
79 inline static void _update_cred_key(void);
80
81 typedef struct ping_struct {
82 int backup_inx;
83 char *control_addr;
84 char *control_machine;
85 uint32_t slurmctld_port;
86 } ping_struct_t;
87
88 typedef struct {
89 time_t control_time;
90 bool responding;
91 } ctld_ping_t;
92
93 /* Local variables */
94 static ctld_ping_t * ctld_ping = NULL;
95 static bool dump_core = false;
96 static time_t last_controller_response;
97 static pthread_mutex_t ping_mutex = PTHREAD_MUTEX_INITIALIZER;
98 static volatile bool takeover = false;
99 static pthread_cond_t shutdown_cond = PTHREAD_COND_INITIALIZER;
100 static pthread_mutex_t shutdown_mutex = PTHREAD_MUTEX_INITIALIZER;
101 static int shutdown_rc = SLURM_SUCCESS;
102 static int shutdown_thread_cnt = 0;
103 static int shutdown_timeout = 0;
104
105 /*
106 * Static list of signals to block in this process
107 * *Must be zero-terminated*
108 */
109 static int backup_sigarray[] = {
110 SIGINT, SIGTERM, SIGCHLD, SIGUSR1,
111 SIGUSR2, SIGTSTP, SIGXCPU, SIGQUIT,
112 SIGPIPE, SIGALRM, SIGABRT, SIGHUP, 0
113 };
114
115 /*
116 * run_backup - this is the backup controller, it should run in standby
117 * mode, assuming control when the primary controller stops responding
118 */
run_backup(slurm_trigger_callbacks_t * callbacks)119 void run_backup(slurm_trigger_callbacks_t *callbacks)
120 {
121 int i;
122 time_t last_ping = 0;
123 slurmctld_lock_t config_read_lock = {
124 READ_LOCK, NO_LOCK, NO_LOCK, NO_LOCK, NO_LOCK };
125 slurmctld_lock_t config_write_lock = {
126 WRITE_LOCK, WRITE_LOCK, WRITE_LOCK, WRITE_LOCK, NO_LOCK };
127
128 info("slurmctld running in background mode");
129 takeover = false;
130 last_controller_response = time(NULL);
131
132 /* default: don't resume if shutdown */
133 slurmctld_config.resume_backup = false;
134
135 /* It is now ok to tell the primary I am done (if I ever had control) */
136 slurm_mutex_lock(&slurmctld_config.thread_count_lock);
137 slurm_cond_broadcast(&slurmctld_config.backup_finish_cond);
138 slurm_mutex_unlock(&slurmctld_config.thread_count_lock);
139
140 if (xsignal_block(backup_sigarray) < 0)
141 error("Unable to block signals");
142
143 /*
144 * create attached thread to process RPCs
145 */
146 slurm_thread_create(&slurmctld_config.thread_id_rpc,
147 _background_rpc_mgr, NULL);
148
149 /*
150 * create attached thread for signal handling
151 */
152 slurm_thread_create(&slurmctld_config.thread_id_sig,
153 _background_signal_hand, NULL);
154
155 slurm_thread_create_detached(NULL, _trigger_slurmctld_event, NULL);
156
157 for (i = 0; ((i < 5) && (slurmctld_config.shutdown_time == 0)); i++) {
158 sleep(1); /* Give the primary slurmctld set-up time */
159 }
160
161 /* repeatedly ping ControlMachine */
162 while (slurmctld_config.shutdown_time == 0) {
163 sleep(1);
164 /* Lock of slurmctld_conf below not important */
165 if (slurmctld_conf.slurmctld_timeout &&
166 (takeover == false) &&
167 ((time(NULL) - last_ping) <
168 (slurmctld_conf.slurmctld_timeout / 3)))
169 continue;
170
171 last_ping = time(NULL);
172 if (ping_controllers(false) == SLURM_SUCCESS)
173 last_controller_response = time(NULL);
174 else if (takeover) {
175 /*
176 * in takeover mode, take control as soon as
177 * primary no longer respond
178 */
179 break;
180 } else {
181 time_t use_time, last_heartbeat;
182 int server_inx = -1;
183 last_heartbeat = get_last_heartbeat(&server_inx);
184 debug("%s: last_heartbeat %ld from server %d",
185 __func__, last_heartbeat, server_inx);
186
187 use_time = last_controller_response;
188 if (server_inx > backup_inx) {
189 info("Lower priority slurmctld is currently primary (%d > %d)",
190 server_inx, backup_inx);
191 } else if (last_heartbeat > last_controller_response) {
192 /* Race condition for time stamps */
193 debug("Last message to the controller was at %ld,"
194 " but the last heartbeat was written at %ld,"
195 " trusting the filesystem instead of the network"
196 " and not asserting control at this time.",
197 last_controller_response, last_heartbeat);
198 use_time = last_heartbeat;
199 }
200
201 if ((time(NULL) - use_time) >
202 slurmctld_conf.slurmctld_timeout)
203 break;
204 }
205 }
206
207 if (slurmctld_config.shutdown_time != 0) {
208 /*
209 * Since pidfile is created as user root (its owner is
210 * changed to SlurmUser) SlurmUser may not be able to
211 * remove it, so this is not necessarily an error.
212 * No longer need slurmctld_conf lock after above join.
213 */
214 if (unlink(slurmctld_conf.slurmctld_pidfile) < 0)
215 verbose("Unable to remove pidfile '%s': %m",
216 slurmctld_conf.slurmctld_pidfile);
217
218 info("BackupController terminating");
219 pthread_join(slurmctld_config.thread_id_sig, NULL);
220 log_fini();
221 if (dump_core)
222 abort();
223 else
224 exit(0);
225 }
226
227 lock_slurmctld(config_read_lock);
228 error("ControlMachine %s not responding, BackupController%d %s taking over",
229 slurmctld_conf.control_machine[0], backup_inx,
230 slurmctld_config.node_name_short);
231 unlock_slurmctld(config_read_lock);
232
233 backup_slurmctld_restart();
234 trigger_primary_ctld_fail();
235 trigger_backup_ctld_as_ctrl();
236
237 pthread_kill(slurmctld_config.thread_id_sig, SIGTERM);
238 pthread_join(slurmctld_config.thread_id_sig, NULL);
239 pthread_join(slurmctld_config.thread_id_rpc, NULL);
240
241 /*
242 * The job list needs to be freed before we run
243 * ctld_assoc_mgr_init, it should be empty here in the first place.
244 */
245 lock_slurmctld(config_write_lock);
246 job_fini();
247 init_job_conf();
248 unlock_slurmctld(config_write_lock);
249
250 ctld_assoc_mgr_init(callbacks);
251
252 /* clear old state and read new state */
253 lock_slurmctld(config_write_lock);
254 if (switch_g_restore(slurmctld_conf.state_save_location, true)) {
255 error("failed to restore switch state");
256 abort();
257 }
258 if (read_slurm_conf(2, false)) { /* Recover all state */
259 error("Unable to recover slurm state");
260 abort();
261 }
262 slurmctld_config.shutdown_time = (time_t) 0;
263 unlock_slurmctld(config_write_lock);
264 select_g_select_nodeinfo_set_all();
265
266 return;
267 }
268
269 /*
270 * _background_signal_hand - Process daemon-wide signals for the
271 * backup controller
272 */
_background_signal_hand(void * no_data)273 static void *_background_signal_hand(void *no_data)
274 {
275 int sig, rc;
276 sigset_t set;
277 /* Locks: Write configuration, job, node, and partition */
278 slurmctld_lock_t config_write_lock = {
279 WRITE_LOCK, WRITE_LOCK, WRITE_LOCK, WRITE_LOCK, NO_LOCK };
280
281 (void) pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL);
282 (void) pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL);
283
284 while (slurmctld_config.shutdown_time == 0) {
285 xsignal_sigset_create(backup_sigarray, &set);
286 rc = sigwait(&set, &sig);
287 if (rc == EINTR)
288 continue;
289 switch (sig) {
290 case SIGINT: /* kill -2 or <CTRL-C> */
291 case SIGTERM: /* kill -15 */
292 info("Terminate signal (SIGINT or SIGTERM) received");
293 slurmctld_config.shutdown_time = time(NULL);
294 slurmctld_shutdown();
295 return NULL; /* Normal termination */
296 break;
297 case SIGHUP: /* kill -1 */
298 info("Reconfigure signal (SIGHUP) received");
299 /*
300 * XXX - need to shut down the scheduler
301 * plugin, re-read the configuration, and then
302 * restart the (possibly new) plugin.
303 */
304 lock_slurmctld(config_write_lock);
305 _backup_reconfig();
306 /* Leave config lock set through this */
307 _update_cred_key();
308 unlock_slurmctld(config_write_lock);
309 break;
310 case SIGABRT: /* abort */
311 info("SIGABRT received");
312 slurmctld_config.shutdown_time = time(NULL);
313 slurmctld_shutdown();
314 dump_core = true;
315 return NULL; /* Normal termination */
316 break;
317 case SIGUSR2:
318 info("Logrotate signal (SIGUSR2) received");
319 update_logging();
320 break;
321 default:
322 error("Invalid signal (%d) received", sig);
323 }
324 }
325 return NULL;
326 }
327
328 /*
329 * Reset the job credential key based upon configuration parameters.
330 * slurmctld_conf is locked on entry.
331 */
_update_cred_key(void)332 static void _update_cred_key(void)
333 {
334 slurm_cred_ctx_key_update(slurmctld_config.cred_ctx,
335 slurmctld_conf.job_credential_private_key);
336 }
337
_sig_handler(int signal)338 static void _sig_handler(int signal)
339 {
340 }
341
342 /*
343 * _background_rpc_mgr - Read and process incoming RPCs to the background
344 * controller (that's us)
345 */
_background_rpc_mgr(void * no_data)346 static void *_background_rpc_mgr(void *no_data)
347 {
348 int newsockfd, sockfd;
349 slurm_addr_t cli_addr;
350 slurm_msg_t msg;
351 int error_code;
352
353 /* Read configuration only */
354 slurmctld_lock_t config_read_lock = {
355 READ_LOCK, NO_LOCK, NO_LOCK, NO_LOCK, NO_LOCK };
356 int sigarray[] = {SIGUSR1, 0};
357
358 (void) pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL);
359 (void) pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL);
360 debug3("_background_rpc_mgr pid = %lu", (unsigned long) getpid());
361
362 /* initialize port for RPCs */
363 lock_slurmctld(config_read_lock);
364
365 if ((sockfd = slurm_init_msg_engine_port(slurmctld_conf.slurmctld_port))
366 == SLURM_ERROR)
367 fatal("slurm_init_msg_engine_port error %m");
368 unlock_slurmctld(config_read_lock);
369
370 /*
371 * Prepare to catch SIGUSR1 to interrupt accept(). This signal is
372 * generated by the slurmctld signal handler thread upon receipt of
373 * SIGABRT, SIGINT, or SIGTERM. That thread does all processing of
374 * all signals.
375 */
376 xsignal(SIGUSR1, _sig_handler);
377 xsignal_unblock(sigarray);
378
379 /*
380 * Process incoming RPCs indefinitely
381 */
382 while (slurmctld_config.shutdown_time == 0) {
383 /*
384 * accept needed for stream implementation is a no-op in
385 * message implementation that just passes sockfd to newsockfd
386 */
387 if ((newsockfd = slurm_accept_msg_conn(sockfd, &cli_addr))
388 == SLURM_ERROR) {
389 if (errno != EINTR)
390 error("slurm_accept_msg_conn: %m");
391 continue;
392 }
393
394 slurm_msg_t_init(&msg);
395 if (slurm_receive_msg(newsockfd, &msg, 0) != 0)
396 error("slurm_receive_msg: %m");
397
398 error_code = _background_process_msg(&msg);
399 if ((error_code == SLURM_SUCCESS) &&
400 (msg.msg_type == REQUEST_SHUTDOWN_IMMEDIATE) &&
401 (slurmctld_config.shutdown_time == 0))
402 slurmctld_config.shutdown_time = time(NULL);
403
404 slurm_free_msg_members(&msg);
405
406 close(newsockfd); /* close new socket */
407 }
408
409 debug3("_background_rpc_mgr shutting down");
410 close(sockfd); /* close the main socket */
411 pthread_exit((void *) 0);
412 return NULL;
413 }
414
415 /*
416 * _background_process_msg - process an RPC to the backup_controller
417 */
_background_process_msg(slurm_msg_t * msg)418 static int _background_process_msg(slurm_msg_t *msg)
419 {
420 int error_code = SLURM_SUCCESS;
421 bool send_rc = true;
422
423 if (msg->msg_type != REQUEST_PING) {
424 bool super_user = false;
425 uid_t uid = g_slurm_auth_get_uid(msg->auth_cred);
426
427 if (validate_slurm_user(uid))
428 super_user = true;
429
430 if (super_user &&
431 (msg->msg_type == REQUEST_SHUTDOWN_IMMEDIATE)) {
432 info("Performing RPC: REQUEST_SHUTDOWN_IMMEDIATE");
433 send_rc = false;
434 } else if (super_user &&
435 (msg->msg_type == REQUEST_SHUTDOWN)) {
436 info("Performing RPC: REQUEST_SHUTDOWN");
437 pthread_kill(slurmctld_config.thread_id_sig, SIGTERM);
438 } else if (super_user &&
439 (msg->msg_type == REQUEST_TAKEOVER)) {
440 info("Performing RPC: REQUEST_TAKEOVER");
441 (void) _shutdown_primary_controller(SHUTDOWN_WAIT);
442 takeover = true;
443 error_code = SLURM_SUCCESS;
444 } else if (super_user &&
445 (msg->msg_type == REQUEST_CONTROL)) {
446 debug3("Ignoring RPC: REQUEST_CONTROL");
447 error_code = ESLURM_DISABLED;
448 last_controller_response = time(NULL);
449 } else if (msg->msg_type == REQUEST_CONTROL_STATUS) {
450 slurm_rpc_control_status(msg, 0);
451 send_rc = false;
452 } else {
453 error("Invalid RPC received %d while in standby mode",
454 msg->msg_type);
455 error_code = ESLURM_IN_STANDBY_MODE;
456 }
457 }
458 if (send_rc)
459 slurm_send_rc_msg(msg, error_code);
460 return error_code;
461 }
462
_ping_ctld_thread(void * arg)463 static void *_ping_ctld_thread(void *arg)
464 {
465 ping_struct_t *ping = (ping_struct_t *) arg;
466 slurm_msg_t req, resp;
467 control_status_msg_t *control_msg;
468 time_t control_time = (time_t) 0;
469 bool responding = false;
470
471 slurm_msg_t_init(&req);
472 slurm_set_addr(&req.address, ping->slurmctld_port, ping->control_addr);
473 req.msg_type = REQUEST_CONTROL_STATUS;
474 if (slurm_send_recv_node_msg(&req, &resp, 0) == SLURM_SUCCESS) {
475 switch (resp.msg_type) {
476 case RESPONSE_CONTROL_STATUS:
477 control_msg = (control_status_msg_t *) resp.data;
478 if (ping->backup_inx != control_msg->backup_inx) {
479 error("%s: BackupController# index mismatch (%d != %u) from host %s",
480 __func__, ping->backup_inx,
481 control_msg->backup_inx,
482 ping->control_machine);
483 }
484 control_time = control_msg->control_time;
485 responding = true;
486 break;
487 default:
488 error("%s:, Unknown response message %u from host %s",
489 __func__, resp.msg_type, ping->control_machine);
490 break;
491 }
492 slurm_free_msg_data(resp.msg_type, resp.data);
493 if (resp.auth_cred)
494 g_slurm_auth_destroy(resp.auth_cred);
495 }
496
497 slurm_mutex_lock(&ping_mutex);
498 if (responding) {
499 ctld_ping[ping->backup_inx].control_time = control_time;
500 ctld_ping[ping->backup_inx].responding = true;
501 }
502 slurm_mutex_unlock(&ping_mutex);
503
504 xfree(ping->control_addr);
505 xfree(ping->control_machine);
506 xfree(ping);
507
508 return NULL;
509 }
510
511 /*
512 * Ping all higher-priority control nodes.
513 * RET SLURM_SUCCESS if a currently active controller is found
514 */
ping_controllers(bool active_controller)515 extern int ping_controllers(bool active_controller)
516 {
517 int i, ping_target_cnt;
518 ping_struct_t *ping;
519 pthread_t *ping_tids;
520 /* Locks: Read configuration */
521 slurmctld_lock_t config_read_lock = {
522 READ_LOCK, NO_LOCK, NO_LOCK, NO_LOCK, NO_LOCK };
523 bool active_ctld = false, avail_ctld = false;
524
525 if (active_controller)
526 ping_target_cnt = slurmctld_conf.control_cnt;
527 else
528 ping_target_cnt = backup_inx;
529
530 ctld_ping = xcalloc(ping_target_cnt, sizeof(ctld_ping_t));
531 ping_tids = xcalloc(ping_target_cnt, sizeof(pthread_t));
532
533 for (i = 0; i < ping_target_cnt; i++) {
534 ctld_ping[i].control_time = (time_t) 0;
535 ctld_ping[i].responding = false;
536 }
537
538 lock_slurmctld(config_read_lock);
539 for (i = 0; i < ping_target_cnt; i++) {
540 if (i == backup_inx) /* Avoid pinging ourselves */
541 continue;
542
543 ping = xmalloc(sizeof(ping_struct_t));
544 ping->backup_inx = i;
545 ping->control_addr = xstrdup(slurmctld_conf.control_addr[i]);
546 ping->control_machine = xstrdup(slurmctld_conf.control_machine[i]);
547 ping->slurmctld_port = slurmctld_conf.slurmctld_port;
548 slurm_thread_create(&ping_tids[i], _ping_ctld_thread, ping);
549 }
550 unlock_slurmctld(config_read_lock);
551
552 for (i = 0; i < ping_target_cnt; i++) {
553 if (i == backup_inx) /* Avoid pinging ourselves */
554 continue;
555 pthread_join(ping_tids[i], NULL);
556 }
557 xfree(ping_tids);
558
559 for (i = 0; i < ping_target_cnt; i++) {
560 if (i == backup_inx) /* Avoid pinging ourselves */
561 continue;
562 if (ctld_ping[i].control_time) {
563 /*
564 * Higher priority slurmctld is already in
565 * primary mode
566 */
567 active_ctld = true;
568 }
569 if (ctld_ping[i].responding) {
570 /*
571 * Higher priority slurmctld is available to
572 * enter primary mode
573 */
574 avail_ctld = true;
575 } else if (active_controller) {
576 trigger_backup_ctld_fail(i);
577 }
578 }
579
580 xfree(ctld_ping);
581 if (active_ctld || avail_ctld)
582 return SLURM_SUCCESS;
583 return SLURM_ERROR;
584 }
585
586 /*
587 * Reload the slurm.conf parameters without any processing
588 * of the node, partition, or state information.
589 * Specifically, we don't want to purge batch scripts based
590 * upon old job state information.
591 * This is a stripped down version of read_slurm_conf(0).
592 */
_backup_reconfig(void)593 static void _backup_reconfig(void)
594 {
595 slurm_conf_reinit(NULL);
596 update_logging();
597 slurmctld_conf.last_update = time(NULL);
598 return;
599 }
600
_shutdown_controller(void * arg)601 static void *_shutdown_controller(void *arg)
602 {
603 int shutdown_inx, rc = SLURM_SUCCESS, rc2 = SLURM_SUCCESS;
604 slurm_msg_t req;
605
606 shutdown_inx = *((int *) arg);
607 xfree(arg);
608
609 slurm_msg_t_init(&req);
610 slurm_set_addr(&req.address, slurmctld_conf.slurmctld_port,
611 slurmctld_conf.control_addr[shutdown_inx]);
612 req.msg_type = REQUEST_CONTROL;
613 if (slurm_send_recv_rc_msg_only_one(&req, &rc2, shutdown_timeout) < 0) {
614 error("%s: send/recv(%s): %m", __func__,
615 slurmctld_conf.control_machine[shutdown_inx]);
616 rc = SLURM_ERROR;
617 } else if (rc2 == ESLURM_DISABLED) {
618 debug("primary controller responding");
619 } else if (rc2 == SLURM_SUCCESS) {
620 debug("primary controller has relinquished control");
621 } else {
622 error("%s(%s): %s", __func__,
623 slurmctld_conf.control_machine[shutdown_inx],
624 slurm_strerror(rc2));
625 rc = SLURM_ERROR;
626 }
627
628 slurm_mutex_lock(&shutdown_mutex);
629 if (rc != SLURM_SUCCESS)
630 shutdown_rc = rc;
631 shutdown_thread_cnt--;
632 slurm_cond_signal(&shutdown_cond);
633 slurm_mutex_unlock(&shutdown_mutex);
634 return NULL;
635 }
636
637 /*
638 * Tell the primary controller and all other possible controller daemons to
639 * relinquish control, primary control_machine has to suspend operation
640 * Based on _shutdown_backup_controller from controller.c
641 * wait_time - How long to wait for primary controller to write state, seconds.
642 * RET 0 or an error code
643 * NOTE: READ lock_slurmctld config before entry (or be single-threaded)
644 */
_shutdown_primary_controller(int wait_time)645 static int _shutdown_primary_controller(int wait_time)
646 {
647 int i, *arg;
648
649 if (shutdown_timeout == 0) {
650 shutdown_timeout = slurm_get_msg_timeout() / 2;
651 shutdown_timeout = MAX(shutdown_timeout, 2); /* 2 sec min */
652 shutdown_timeout = MIN(shutdown_timeout, CONTROL_TIMEOUT);
653 shutdown_timeout *= 1000; /* sec to msec */
654 }
655
656 if ((slurmctld_conf.control_addr[0] == NULL) ||
657 (slurmctld_conf.control_addr[0][0] == '\0')) {
658 error("%s: no primary controller to shutdown", __func__);
659 return SLURM_ERROR;
660 }
661
662 shutdown_rc = SLURM_SUCCESS;
663 for (i = 0; i < slurmctld_conf.control_cnt; i++) {
664 if (i == backup_inx)
665 continue; /* No message to self */
666
667 arg = xmalloc(sizeof(int));
668 *arg = i;
669 slurm_thread_create_detached(NULL, _shutdown_controller, arg);
670 slurm_mutex_lock(&shutdown_mutex);
671 shutdown_thread_cnt++;
672 slurm_mutex_unlock(&shutdown_mutex);
673 }
674
675 slurm_mutex_lock(&shutdown_mutex);
676 while (shutdown_thread_cnt != 0) {
677 slurm_cond_wait(&shutdown_cond, &shutdown_mutex);
678 }
679 slurm_mutex_unlock(&shutdown_mutex);
680
681 /*
682 * FIXME: Ideally the REQUEST_CONTROL RPC does not return until all
683 * other activity has ceased and the state has been saved. That is
684 * not presently the case (it returns when no other work is pending,
685 * so the state save should occur right away). We sleep for a while
686 * here and give the primary controller time to shutdown
687 */
688 if (wait_time)
689 sleep(wait_time);
690
691 return shutdown_rc;
692 }
693
_trigger_slurmctld_event(void * arg)694 static void *_trigger_slurmctld_event(void *arg)
695 {
696 trigger_info_t ti;
697
698 memset(&ti, 0, sizeof(ti));
699 ti.res_id = "*";
700 ti.res_type = TRIGGER_RES_TYPE_SLURMCTLD;
701 ti.trig_type = TRIGGER_TYPE_BU_CTLD_RES_OP;
702 ti.control_inx = backup_inx;
703 if (slurm_pull_trigger(&ti)) {
704 error("%s: TRIGGER_TYPE_BU_CTLD_RES_OP send failure: %m",
705 __func__);
706 } else {
707 verbose("%s: TRIGGER_TYPE_BU_CTLD_RES_OP sent", __func__);
708 }
709 return NULL;
710 }
711