1 /* Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
2
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License, version 2.0,
5 as published by the Free Software Foundation.
6
7 This program is also distributed with certain software (including
8 but not limited to OpenSSL) that is licensed under separate terms,
9 as designated in a particular file or component or in included license
10 documentation. The authors of MySQL hereby grant you an additional
11 permission to link the program and your derivative works with the
12 separately licensed software that they have included with MySQL.
13
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License, version 2.0, for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA */
22
23
24 #include <ndb_global.h>
25 #include <ndb_version.h>
26
27 #include "angel.hpp"
28 #include "ndbd.hpp"
29
30 #include <NdbConfig.h>
31 #include <NdbAutoPtr.hpp>
32 #include <portlib/ndb_daemon.h>
33 #include <portlib/NdbSleep.h>
34 #include <portlib/NdbDir.hpp>
35
36 #include <ConfigRetriever.hpp>
37
38 #include <EventLogger.hpp>
39 extern EventLogger * g_eventLogger;
40
41 static void
angel_exit(int code)42 angel_exit(int code)
43 {
44 ndb_daemon_exit(code);
45 }
46
47 #include "../mgmapi/mgmapi_configuration.hpp"
48
49 static void
reportShutdown(const ndb_mgm_configuration * config,NodeId nodeid,int error_exit,bool restart,bool nostart,bool initial,Uint32 error,Uint32 signum,Uint32 sphase)50 reportShutdown(const ndb_mgm_configuration* config,
51 NodeId nodeid, int error_exit,
52 bool restart, bool nostart, bool initial,
53 Uint32 error, Uint32 signum, Uint32 sphase)
54 {
55 // Only allow "initial" and "nostart" to be set if "restart" is set
56 assert(restart ||
57 (!restart && !initial && !nostart));
58
59 Uint32 length, theData[25];
60 EventReport *rep= CAST_PTR(EventReport, &theData[0]);
61
62 rep->setNodeId(nodeid);
63 if (restart)
64 theData[1]=1 |
65 (nostart ? 2 : 0) |
66 (initial ? 4 : 0);
67 else
68 theData[1]=0;
69
70 if (error_exit == 0)
71 {
72 rep->setEventType(NDB_LE_NDBStopCompleted);
73 theData[2]=signum;
74 length=3;
75 } else
76 {
77 rep->setEventType(NDB_LE_NDBStopForced);
78 theData[2]=signum;
79 theData[3]=error;
80 theData[4]=sphase;
81 theData[5]=0; // extra
82 length=6;
83 }
84
85 // Log event locally
86 g_eventLogger->log(rep->getEventType(), theData, length,
87 rep->getNodeId(), 0);
88
89 // Log event to cluster log
90 ndb_mgm_configuration_iterator iter(*config, CFG_SECTION_NODE);
91 for (iter.first(); iter.valid(); iter.next())
92 {
93 Uint32 type;
94 if (iter.get(CFG_TYPE_OF_SECTION, &type) ||
95 type != NODE_TYPE_MGM)
96 continue;
97
98 Uint32 port;
99 if (iter.get(CFG_MGM_PORT, &port))
100 continue;
101
102 const char* hostname;
103 if (iter.get(CFG_NODE_HOST, &hostname))
104 continue;
105
106 BaseString connect_str;
107 connect_str.assfmt("%s:%d", hostname, port);
108
109
110 NdbMgmHandle h = ndb_mgm_create_handle();
111 if (h == 0)
112 {
113 g_eventLogger->warning("Unable to report shutdown reason "
114 "to '%s'(failed to create mgm handle)",
115 connect_str.c_str());
116 continue;
117 }
118
119 if (ndb_mgm_set_connectstring(h, connect_str.c_str()) ||
120 ndb_mgm_connect(h, 1, 0, 0) ||
121 ndb_mgm_report_event(h, theData, length))
122 {
123 g_eventLogger->warning("Unable to report shutdown reason "
124 "to '%s'(error: %s - %s)",
125 connect_str.c_str(),
126 ndb_mgm_get_latest_error_msg(h),
127 ndb_mgm_get_latest_error_desc(h));
128 }
129
130 ndb_mgm_destroy_handle(&h);
131 }
132 }
133
134
135 static void
ignore_signals(void)136 ignore_signals(void)
137 {
138 static const int ignore_list[] = {
139 #ifdef SIGBREAK
140 SIGBREAK,
141 #endif
142 #ifdef SIGHUP
143 SIGHUP,
144 #endif
145 SIGINT,
146 #if defined SIGPWR
147 SIGPWR,
148 #elif defined SIGINFO
149 SIGINFO,
150 #endif
151 SIGQUIT,
152 SIGTERM,
153 #ifdef SIGTSTP
154 SIGTSTP,
155 #endif
156 #ifdef SIGTTIN
157 SIGTTIN,
158 #endif
159 #ifdef SIGTTOU
160 SIGTTOU,
161 #endif
162 SIGABRT,
163 #ifdef SIGALRM
164 SIGALRM,
165 #endif
166 #ifdef SIGBUS
167 SIGBUS,
168 #endif
169 SIGFPE,
170 SIGILL,
171 #ifdef SIGIO
172 SIGIO,
173 #endif
174 #ifdef SIGPOLL
175 SIGPOLL,
176 #endif
177 SIGSEGV,
178 SIGPIPE,
179 #ifdef SIGTRAP
180 SIGTRAP
181 #endif
182 };
183
184 for(size_t i = 0; i < sizeof(ignore_list)/sizeof(ignore_list[0]); i++)
185 signal(ignore_list[i], SIG_IGN);
186 }
187
188 #ifdef _WIN32
189 static inline
pipe(int pipefd[2])190 int pipe(int pipefd[2]){
191 const unsigned int buffer_size = 4096;
192 const int flags = 0;
193 return _pipe(pipefd, buffer_size, flags);
194 }
195
196 #undef getpid
197 #include <process.h>
198
199 typedef DWORD pid_t;
200
201 static const int WNOHANG = 37;
202
203 static inline
waitpid(pid_t pid,int * stat_loc,int options)204 pid_t waitpid(pid_t pid, int *stat_loc, int options)
205 {
206 /* Only support waitpid(,,WNOHANG) */
207 assert(options == WNOHANG);
208 assert(stat_loc);
209
210 HANDLE handle = OpenProcess(PROCESS_ALL_ACCESS, FALSE, pid);
211 if (handle == NULL)
212 {
213 g_eventLogger->error("waitpid: Could not open handle for pid %d, "
214 "error: %d", pid, GetLastError());
215 return -1;
216 }
217
218 DWORD exit_code;
219 if (!GetExitCodeProcess(handle, &exit_code))
220 {
221 g_eventLogger->error("waitpid: GetExitCodeProcess failed, pid: %d, "
222 "error: %d", pid, GetLastError());
223 CloseHandle(handle);
224 return -1;
225 }
226 CloseHandle(handle);
227
228 if (exit_code == STILL_ACTIVE)
229 {
230 /* Still alive */
231 return 0;
232 }
233
234 *stat_loc = exit_code;
235
236 return pid;
237 }
238
239 static inline
WIFEXITED(int status)240 bool WIFEXITED(int status)
241 {
242 return true;
243 }
244
245 static inline
WEXITSTATUS(int status)246 int WEXITSTATUS(int status)
247 {
248 return status;
249 }
250
251 static inline
WIFSIGNALED(int status)252 bool WIFSIGNALED(int status)
253 {
254 return false;
255 }
256
257 static inline
WTERMSIG(int status)258 int WTERMSIG(int status)
259 {
260 return 0;
261 }
262
263 static int
kill(pid_t pid,int sig)264 kill(pid_t pid, int sig)
265 {
266 int retry_open_event = 10;
267
268 char shutdown_event_name[32];
269 _snprintf(shutdown_event_name, sizeof(shutdown_event_name),
270 "ndbd_shutdown_%d", pid);
271
272 /* Open the event to signal */
273 HANDLE shutdown_event;
274 while ((shutdown_event =
275 OpenEvent(EVENT_MODIFY_STATE, FALSE, shutdown_event_name)) == NULL)
276 {
277 /*
278 Check if the process is alive, otherwise there is really
279 no sense to retry the open of the event
280 */
281 DWORD exit_code;
282 HANDLE process = OpenProcess(SYNCHRONIZE | PROCESS_QUERY_INFORMATION,
283 FALSE, pid);
284 if (!process)
285 {
286 /* Already died */
287 return -1;
288 }
289
290 if (!GetExitCodeProcess(process,&exit_code))
291 {
292 g_eventLogger->error("GetExitCodeProcess failed, pid: %d, error: %d",
293 pid, GetLastError());
294 CloseHandle(process);
295 return -1;
296 }
297 CloseHandle(process);
298
299 if (exit_code != STILL_ACTIVE)
300 {
301 /* Already died */
302 return -1;
303 }
304
305 if (retry_open_event--)
306 Sleep(100);
307 else
308 {
309 g_eventLogger->error("Failed to open shutdown_event '%s', error: %d",
310 shutdown_event_name, GetLastError());
311 return -1;
312 }
313 }
314
315 if (SetEvent(shutdown_event) == 0)
316 {
317 g_eventLogger->error("Failed to signal shutdown_event '%s', error: %d",
318 shutdown_event_name, GetLastError());
319 }
320 CloseHandle(shutdown_event);
321 return pid;
322 }
323 #endif
324
325 extern int real_main(int, char**);
326
327
328 static
create_argv(const Vector<BaseString> & args)329 char** create_argv(const Vector<BaseString>& args)
330 {
331 char **argv = (char **)malloc(sizeof(char*) * (args.size() + 1));
332 if(argv == NULL)
333 return NULL;
334
335 for(unsigned i = 0; i < args.size(); i++)
336 argv[i] = strdup(args[i].c_str());
337 argv[args.size()] = NULL;
338 return argv;
339 }
340
341
342 static
free_argv(char ** argv)343 void free_argv(char** argv)
344 {
345 char** argp = argv;
346 while(*argp)
347 {
348 free((void*)*argp);
349 argp++;
350 }
351 free((void*)argv);
352 }
353
354
355 static pid_t
spawn_process(const char * progname,const Vector<BaseString> & args)356 spawn_process(const char* progname, const Vector<BaseString>& args)
357 {
358 #ifdef _WIN32
359 // Get full path name of this executeble
360 char path[MAX_PATH];
361 DWORD len = GetModuleFileName(NULL, path, sizeof(path));
362 if (len == 0 || len == sizeof(path))
363 {
364 g_eventLogger->warning("spawn_process: Could not extract full path, "
365 "len: %u, error: %u\n",
366 len, GetLastError());
367 // Fall through and try with progname as it was supplied
368 }
369 else
370 {
371 progname = path;
372 }
373 #endif
374
375 char** argv = create_argv(args);
376 if (!argv)
377 {
378 g_eventLogger->error("spawn_process: Failed to create argv, errno: %d",
379 errno);
380 return -1;
381 }
382
383 #ifdef _WIN32
384
385 intptr_t spawn_handle = _spawnv(P_NOWAIT, progname, argv);
386 if (spawn_handle == -1)
387 {
388 g_eventLogger->error("spawn_process: Failed to spawn process, errno: %d",
389 errno);
390 // Print the _spawnv arguments to aid debugging
391 g_eventLogger->error(" progname: '%s'", progname);
392 char** argp = argv;
393 while(*argp)
394 g_eventLogger->error("argv: '%s'", *argp++);
395
396 free_argv(argv);
397 return -1;
398 }
399 free_argv(argv);
400
401 // Convert the handle returned from spawnv_ to a pid
402 DWORD pid = GetProcessId((HANDLE)spawn_handle);
403 if (pid == 0)
404 {
405 g_eventLogger->error("spawn_process: Failed to convert handle %d "
406 "to pid, error: %d", spawn_handle, GetLastError());
407 CloseHandle((HANDLE)spawn_handle);
408 return -1;
409 }
410 CloseHandle((HANDLE)spawn_handle);
411 return pid;
412 #else
413 pid_t pid = fork();
414 if (pid == -1)
415 {
416 g_eventLogger->error("Failed to fork, errno: %d", errno);
417 free_argv(argv);
418 return -1;
419 }
420
421 if (pid)
422 {
423 free_argv(argv);
424 // Parent
425 return pid;
426 }
427
428 // Count number of arguments
429 int argc = 0;
430 while(argv[argc])
431 argc++;
432
433 // Calling 'main' to start program from beginning
434 // without loading (possibly new version) from disk
435 (void)real_main(argc, argv);
436 assert(false); // main should never return
437 exit(1);
438 return -1; // Never reached
439 #endif
440 }
441
442 /*
443 retry failed spawn after sleep until fork suceeds or
444 max number of retries occurs
445 */
446
447 static pid_t
retry_spawn_process(const char * progname,const Vector<BaseString> & args)448 retry_spawn_process(const char* progname, const Vector<BaseString>& args)
449 {
450 const unsigned max_retries = 10;
451 unsigned retry_counter = 0;
452 while(true)
453 {
454 pid_t pid = spawn_process(progname, args);
455 if (pid == -1)
456 {
457 if (retry_counter++ == max_retries)
458 {
459 g_eventLogger->error("Angel failed to spawn %d times, giving up",
460 retry_counter);
461 angel_exit(1);
462 }
463
464 g_eventLogger->warning("Angel failed to spawn, sleep and retry");
465
466 NdbSleep_SecSleep(1);
467 continue;
468 }
469 return pid;
470 }
471 }
472
473 static Uint32 stop_on_error;
474 static Uint32 config_max_start_fail_retries;
475 static Uint32 config_restart_delay_secs;
476
477
478 /*
479 Extract the config parameters that concerns angel
480 */
481
482 static bool
configure(const ndb_mgm_configuration * conf,NodeId nodeid)483 configure(const ndb_mgm_configuration* conf, NodeId nodeid)
484 {
485 Uint32 generation = 0;
486 ndb_mgm_configuration_iterator sys_iter(*conf, CFG_SECTION_SYSTEM);
487 if (sys_iter.get(CFG_SYS_CONFIG_GENERATION, &generation))
488 {
489 g_eventLogger->warning("Configuration didn't contain generation "
490 "(likely old ndb_mgmd");
491 }
492 g_eventLogger->debug("Using configuration with generation %u", generation);
493
494 ndb_mgm_configuration_iterator iter(*conf, CFG_SECTION_NODE);
495 if (iter.find(CFG_NODE_ID, nodeid))
496 {
497 g_eventLogger->error("Invalid configuration fetched, could not "
498 "find own node id %d", nodeid);
499 return false;
500 }
501
502 if (iter.get(CFG_DB_STOP_ON_ERROR, &stop_on_error))
503 {
504 g_eventLogger->error("Invalid configuration fetched, could not "
505 "find StopOnError");
506 return false;
507 }
508 g_eventLogger->debug("Using StopOnError: %u", stop_on_error);
509
510 if (iter.get(CFG_DB_MAX_START_FAIL, &config_max_start_fail_retries))
511 {
512 /* Old Management node, use default value */
513 config_max_start_fail_retries = 3;
514 }
515
516 if (iter.get(CFG_DB_START_FAIL_DELAY_SECS, &config_restart_delay_secs))
517 {
518 /* Old Management node, use default value */
519 config_restart_delay_secs = 0;
520 }
521
522 const char * datadir;
523 if (iter.get(CFG_NODE_DATADIR, &datadir))
524 {
525 g_eventLogger->error("Invalid configuration fetched, could not "
526 "find DataDir");
527 return false;
528 }
529 g_eventLogger->debug("Using DataDir: %s", datadir);
530
531 NdbConfig_SetPath(datadir);
532
533 if (NdbDir::chdir(NdbConfig_get_path(NULL)) != 0)
534 {
535 g_eventLogger->warning("Cannot change directory to '%s', error: %d",
536 NdbConfig_get_path(NULL), errno);
537 // Ignore error
538 }
539
540 return true;
541 }
542
543 bool stop_child = false;
544
545 void
angel_run(const char * progname,const Vector<BaseString> & original_args,const char * connect_str,int force_nodeid,const char * bind_address,bool initial,bool no_start,bool daemon)546 angel_run(const char* progname,
547 const Vector<BaseString>& original_args,
548 const char* connect_str,
549 int force_nodeid,
550 const char* bind_address,
551 bool initial,
552 bool no_start,
553 bool daemon)
554 {
555 ConfigRetriever retriever(connect_str,
556 force_nodeid,
557 NDB_VERSION,
558 NDB_MGM_NODE_TYPE_NDB,
559 bind_address);
560 if (retriever.hasError())
561 {
562 g_eventLogger->error("Could not initialize connection to management "
563 "server, error: '%s'", retriever.getErrorString());
564 angel_exit(1);
565 }
566
567 const int connnect_retries = 12;
568 const int connect_delay = 5;
569 const int verbose = 1;
570 if (retriever.do_connect(connnect_retries, connect_delay, verbose) != 0)
571 {
572 g_eventLogger->error("Could not connect to management server, "
573 "error: '%s'", retriever.getErrorString());
574 angel_exit(1);
575 }
576 g_eventLogger->info("Angel connected to '%s:%d'",
577 retriever.get_mgmd_host(),
578 retriever.get_mgmd_port());
579
580 const int alloc_retries = 2;
581 const int alloc_delay = 3;
582 const Uint32 nodeid = retriever.allocNodeId(alloc_retries, alloc_delay);
583 if (nodeid == 0)
584 {
585 g_eventLogger->error("Failed to allocate nodeid, error: '%s'",
586 retriever.getErrorString());
587 angel_exit(1);
588 }
589 g_eventLogger->info("Angel allocated nodeid: %u", nodeid);
590
591 ndb_mgm_configuration * config = retriever.getConfig(nodeid);
592 NdbAutoPtr<ndb_mgm_configuration> config_autoptr(config);
593 if (config == 0)
594 {
595 g_eventLogger->error("Could not fetch configuration/invalid "
596 "configuration, error: '%s'",
597 retriever.getErrorString());
598 angel_exit(1);
599 }
600
601 if (!configure(config, nodeid))
602 {
603 // Failed to configure, error already printed
604 angel_exit(1);
605 }
606
607 if (daemon)
608 {
609 // Become a daemon
610 char *lockfile = NdbConfig_PidFileName(nodeid);
611 char *logfile = NdbConfig_StdoutFileName(nodeid);
612 NdbAutoPtr<char> tmp_aptr1(lockfile), tmp_aptr2(logfile);
613
614 if (ndb_daemonize(lockfile, logfile) != 0)
615 {
616 g_eventLogger->error("Couldn't start as daemon, error: '%s'",
617 ndb_daemon_error);
618 angel_exit(1);
619 }
620 }
621
622 // Counter for consecutive failed startups
623 Uint32 failed_startups_counter = 0;
624 while (true)
625 {
626
627 // Create pipe where ndbd process will report extra shutdown status
628 int fds[2];
629 if (pipe(fds))
630 {
631 g_eventLogger->error("Failed to create pipe, errno: %d (%s)",
632 errno, strerror(errno));
633 angel_exit(1);
634 }
635
636 FILE *child_info_r;
637 if (!(child_info_r = fdopen(fds[0], "r")))
638 {
639 g_eventLogger->error("Failed to open stream for pipe, errno: %d (%s)",
640 errno, strerror(errno));
641 angel_exit(1);
642 }
643
644 // Build the args used to start ndbd by appending
645 // the arguments that may have changed at the end
646 // of original argument list
647 BaseString one_arg;
648 Vector<BaseString> args;
649 args = original_args;
650
651 // Pass fd number of the pipe which ndbd should use
652 // for sending extra status to angel
653 one_arg.assfmt("--report-fd=%d", fds[1]);
654 args.push_back(one_arg);
655
656 // The nodeid which has been allocated by angel
657 one_arg.assfmt("--allocated-nodeid=%d", nodeid);
658 args.push_back(one_arg);
659
660 one_arg.assfmt("--initial=%d", initial);
661 args.push_back(one_arg);
662
663 one_arg.assfmt("--nostart=%d", no_start);
664 args.push_back(one_arg);
665
666 pid_t child = retry_spawn_process(progname, args);
667 if (child <= 0)
668 {
669 // safety, retry_spawn_process returns valid child or give up
670 g_eventLogger->error("retry_spawn_process, child: %d", child);
671 angel_exit(1);
672 }
673
674 /**
675 * Parent
676 */
677 g_eventLogger->info("Angel pid: %d started child: %d",
678 getpid(), child);
679
680 ignore_signals();
681
682 int status=0, error_exit=0;
683 while(true)
684 {
685 pid_t ret_pid = waitpid(child, &status, WNOHANG);
686 if (ret_pid == child)
687 {
688 g_eventLogger->debug("Angel got child %d", child);
689 break;
690 }
691 if (ret_pid > 0)
692 {
693 g_eventLogger->warning("Angel got unexpected pid %d "
694 "when waiting for %d",
695 ret_pid, child);
696 }
697
698 if (stop_child)
699 {
700 g_eventLogger->info("Angel shutting down ndbd with pid %d", child);
701 kill(child, SIGINT);
702 }
703 NdbSleep_MilliSleep(100);
704 }
705
706 // Close the write end of pipe
707 close(fds[1]);
708
709 // Read info from the child's pipe
710 char buf[128];
711 Uint32 child_error = 0, child_signal = 0, child_sphase = 0;
712 while (fgets(buf, sizeof (buf), child_info_r))
713 {
714 int value;
715 if (sscanf(buf, "error=%d\n", &value) == 1)
716 child_error = value;
717 else if (sscanf(buf, "signal=%d\n", &value) == 1)
718 child_signal = value;
719 else if (sscanf(buf, "sphase=%d\n", &value) == 1)
720 child_sphase = value;
721 else if (strcmp(buf, "\n") != 0)
722 fprintf(stderr, "unknown info from child: '%s'\n", buf);
723 }
724 g_eventLogger->debug("error: %u, signal: %u, sphase: %u",
725 child_error, child_signal, child_sphase);
726 // Close read end of pipe in parent
727 fclose(child_info_r);
728
729 if (WIFEXITED(status))
730 {
731 switch (WEXITSTATUS(status)) {
732 case NRT_Default:
733 g_eventLogger->info("Angel shutting down");
734 reportShutdown(config, nodeid, 0, 0, false, false,
735 child_error, child_signal, child_sphase);
736 angel_exit(0);
737 break;
738 case NRT_NoStart_Restart:
739 initial = false;
740 no_start = true;
741 break;
742 case NRT_NoStart_InitialStart:
743 initial = true;
744 no_start = true;
745 break;
746 case NRT_DoStart_InitialStart:
747 initial = true;
748 no_start = false;
749 break;
750 default:
751 error_exit=1;
752 if (stop_on_error)
753 {
754 /**
755 * Error shutdown && stopOnError()
756 */
757 reportShutdown(config, nodeid,
758 error_exit, 0, false, false,
759 child_error, child_signal, child_sphase);
760 angel_exit(0);
761 }
762 // Fall-through
763 case NRT_DoStart_Restart:
764 initial = false;
765 no_start = false;
766 break;
767 }
768 } else
769 {
770 error_exit=1;
771 if (WIFSIGNALED(status))
772 {
773 child_signal = WTERMSIG(status);
774 }
775 else
776 {
777 child_signal = 127;
778 g_eventLogger->info("Unknown exit reason. Stopped.");
779 }
780 if (stop_on_error)
781 {
782 /**
783 * Error shutdown && stopOnError()
784 */
785 reportShutdown(config, nodeid,
786 error_exit, 0, false, false,
787 child_error, child_signal, child_sphase);
788 angel_exit(0);
789 }
790 }
791
792 // Check startup failure
793 const Uint32 STARTUP_FAILURE_SPHASE = 6;
794 Uint32 restart_delay_secs = 0;
795 if (error_exit && // Only check startup failure if ndbd exited uncontrolled
796 child_sphase <= STARTUP_FAILURE_SPHASE)
797 {
798 if (++failed_startups_counter >= config_max_start_fail_retries)
799 {
800 g_eventLogger->alert("Angel detected too many startup failures(%d), "
801 "not restarting again", failed_startups_counter);
802 reportShutdown(config, nodeid,
803 error_exit, 0, false, false,
804 child_error, child_signal, child_sphase);
805 angel_exit(0);
806 }
807 g_eventLogger->info("Angel detected startup failure, count: %u",
808 failed_startups_counter);
809
810 restart_delay_secs = config_restart_delay_secs;
811 }
812 else
813 {
814 // Reset the counter for consecutive failed startups
815 failed_startups_counter = 0;
816 }
817
818 reportShutdown(config, nodeid,
819 error_exit, 1,
820 no_start,
821 initial,
822 child_error, child_signal, child_sphase);
823 g_eventLogger->info("Ndb has terminated (pid %d) restarting", child);
824
825 g_eventLogger->debug("Angel reconnecting to management server");
826 (void)retriever.disconnect();
827
828 if (restart_delay_secs > 0)
829 {
830 g_eventLogger->info("Delaying Ndb restart for %u seconds.",
831 restart_delay_secs);
832 NdbSleep_SecSleep(restart_delay_secs);
833 };
834
835 const int connnect_retries = 12;
836 const int connect_delay = 5;
837 const int verbose = 1;
838 if (retriever.do_connect(connnect_retries, connect_delay, verbose) != 0)
839 {
840 g_eventLogger->error("Could not connect to management server, "
841 "error: '%s'", retriever.getErrorString());
842 angel_exit(1);
843 }
844 g_eventLogger->info("Angel reconnected to '%s:%d'",
845 retriever.get_mgmd_host(),
846 retriever.get_mgmd_port());
847
848 // Tell retriver to allocate the same nodeid again
849 retriever.setNodeId(nodeid);
850
851 g_eventLogger->debug("Angel reallocating nodeid %d", nodeid);
852 const int alloc_retries = 10;
853 const int alloc_delay = 3;
854 const Uint32 realloced = retriever.allocNodeId(alloc_retries, alloc_delay);
855 if (realloced == 0)
856 {
857 g_eventLogger->error("Angel failed to allocate nodeid, error: '%s'",
858 retriever.getErrorString());
859 angel_exit(1);
860 }
861 if (realloced != nodeid)
862 {
863 g_eventLogger->error("Angel failed to reallocate nodeid %d, got %d",
864 nodeid, realloced);
865 angel_exit(1);
866 }
867 g_eventLogger->info("Angel reallocated nodeid: %u", nodeid);
868
869 }
870
871 abort(); // Never reached
872 }
873
874
875 /*
876 Order angel to shutdown it's ndbd
877 */
angel_stop(void)878 void angel_stop(void)
879 {
880 stop_child = true;
881 }
882