1 /* Copyright (c) 2009, 2021, Oracle and/or its affiliates.
2
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License, version 2.0,
5 as published by the Free Software Foundation.
6
7 This program is also distributed with certain software (including
8 but not limited to OpenSSL) that is licensed under separate terms,
9 as designated in a particular file or component or in included license
10 documentation. The authors of MySQL hereby grant you an additional
11 permission to link the program and your derivative works with the
12 separately licensed software that they have included with MySQL.
13
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License, version 2.0, for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA */
22
23
24 #include <ndb_global.h>
25 #include <ndb_version.h>
26
27 #include "angel.hpp"
28 #include "ndbd.hpp"
29
30 #include <NdbConfig.h>
31 #include <NdbAutoPtr.hpp>
32 #include <portlib/ndb_daemon.h>
33 #include <portlib/NdbSleep.h>
34 #include <portlib/NdbDir.hpp>
35
36 #include <ConfigRetriever.hpp>
37
38 #include <EventLogger.hpp>
39 extern EventLogger * g_eventLogger;
40
41 static void
angel_exit(int code)42 angel_exit(int code)
43 {
44 ndb_daemon_exit(code);
45 }
46
47 #include "../mgmapi/mgmapi_configuration.hpp"
48
49 static void
reportShutdown(const ndb_mgm_configuration * config,NodeId nodeid,int error_exit,bool restart,bool nostart,bool initial,Uint32 error,Uint32 signum,Uint32 sphase)50 reportShutdown(const ndb_mgm_configuration* config,
51 NodeId nodeid, int error_exit,
52 bool restart, bool nostart, bool initial,
53 Uint32 error, Uint32 signum, Uint32 sphase)
54 {
55 // Only allow "initial" and "nostart" to be set if "restart" is set
56 assert(restart ||
57 (!restart && !initial && !nostart));
58
59 Uint32 length, theData[25];
60 EventReport *rep= CAST_PTR(EventReport, &theData[0]);
61 rep->eventType = 0; /* Ensure it's initialised */
62
63 rep->setNodeId(nodeid);
64 if (restart)
65 theData[1]=1 |
66 (nostart ? 2 : 0) |
67 (initial ? 4 : 0);
68 else
69 theData[1]=0;
70
71 if (error_exit == 0)
72 {
73 rep->setEventType(NDB_LE_NDBStopCompleted);
74 theData[2]=signum;
75 length=3;
76 } else
77 {
78 rep->setEventType(NDB_LE_NDBStopForced);
79 theData[2]=signum;
80 theData[3]=error;
81 theData[4]=sphase;
82 theData[5]=0; // extra
83 length=6;
84 }
85
86 // Log event locally
87 g_eventLogger->log(rep->getEventType(), theData, length,
88 rep->getNodeId(), 0);
89
90 // Log event to cluster log
91 ndb_mgm_configuration_iterator iter(*config, CFG_SECTION_NODE);
92 for (iter.first(); iter.valid(); iter.next())
93 {
94 Uint32 type;
95 if (iter.get(CFG_TYPE_OF_SECTION, &type) ||
96 type != NODE_TYPE_MGM)
97 continue;
98
99 Uint32 port;
100 if (iter.get(CFG_MGM_PORT, &port))
101 continue;
102
103 const char* hostname;
104 if (iter.get(CFG_NODE_HOST, &hostname))
105 continue;
106
107 BaseString connect_str;
108 connect_str.assfmt("%s:%d", hostname, port);
109
110
111 NdbMgmHandle h = ndb_mgm_create_handle();
112 if (h == 0)
113 {
114 g_eventLogger->warning("Unable to report shutdown reason "
115 "to '%s'(failed to create mgm handle)",
116 connect_str.c_str());
117 continue;
118 }
119
120 if (ndb_mgm_set_connectstring(h, connect_str.c_str()) ||
121 ndb_mgm_connect(h, 1, 0, 0) ||
122 ndb_mgm_report_event(h, theData, length))
123 {
124 g_eventLogger->warning("Unable to report shutdown reason "
125 "to '%s'(error: %s - %s)",
126 connect_str.c_str(),
127 ndb_mgm_get_latest_error_msg(h),
128 ndb_mgm_get_latest_error_desc(h));
129 }
130
131 ndb_mgm_destroy_handle(&h);
132 }
133 }
134
135
136 static void
ignore_signals(void)137 ignore_signals(void)
138 {
139 static const int ignore_list[] = {
140 #ifdef SIGBREAK
141 SIGBREAK,
142 #endif
143 #ifdef SIGHUP
144 SIGHUP,
145 #endif
146 SIGINT,
147 #if defined SIGPWR
148 SIGPWR,
149 #elif defined SIGINFO
150 SIGINFO,
151 #endif
152 #ifdef _WIN32
153 SIGTERM,
154 #else
155 SIGQUIT,
156 #endif
157 SIGTERM,
158 #ifdef SIGTSTP
159 SIGTSTP,
160 #endif
161 #ifdef SIGTTIN
162 SIGTTIN,
163 #endif
164 #ifdef SIGTTOU
165 SIGTTOU,
166 #endif
167 SIGABRT,
168 #ifdef SIGALRM
169 SIGALRM,
170 #endif
171 #ifdef SIGBUS
172 SIGBUS,
173 #endif
174 SIGFPE,
175 SIGILL,
176 #ifdef SIGIO
177 SIGIO,
178 #endif
179 #ifdef SIGPOLL
180 SIGPOLL,
181 #endif
182 SIGSEGV,
183 #ifdef _WIN32
184 SIGINT,
185 #else
186 SIGPIPE,
187 #endif
188 #ifdef SIGTRAP
189 SIGTRAP
190 #endif
191 };
192
193 for(size_t i = 0; i < sizeof(ignore_list)/sizeof(ignore_list[0]); i++)
194 signal(ignore_list[i], SIG_IGN);
195 }
196
197 #ifdef _WIN32
198 static inline
pipe(int pipefd[2])199 int pipe(int pipefd[2]){
200 const unsigned int buffer_size = 4096;
201 const int flags = 0;
202 return _pipe(pipefd, buffer_size, flags);
203 }
204
205 #undef getpid
206 #include <process.h>
207
208 typedef DWORD pid_t;
209
210 static const int WNOHANG = 37;
211
212 static inline
waitpid(pid_t pid,int * stat_loc,int options)213 pid_t waitpid(pid_t pid, int *stat_loc, int options)
214 {
215 /* Only support waitpid(,,WNOHANG) */
216 assert(options == WNOHANG);
217 assert(stat_loc);
218
219 HANDLE handle = OpenProcess(PROCESS_ALL_ACCESS, FALSE, pid);
220 if (handle == NULL)
221 {
222 g_eventLogger->error("waitpid: Could not open handle for pid %d, "
223 "error: %d", pid, GetLastError());
224 return -1;
225 }
226
227 DWORD exit_code;
228 if (!GetExitCodeProcess(handle, &exit_code))
229 {
230 g_eventLogger->error("waitpid: GetExitCodeProcess failed, pid: %d, "
231 "error: %d", pid, GetLastError());
232 CloseHandle(handle);
233 return -1;
234 }
235 CloseHandle(handle);
236
237 if (exit_code == STILL_ACTIVE)
238 {
239 /* Still alive */
240 return 0;
241 }
242
243 *stat_loc = exit_code;
244
245 return pid;
246 }
247
248 static inline
WIFEXITED(int status)249 bool WIFEXITED(int status)
250 {
251 return true;
252 }
253
254 static inline
WEXITSTATUS(int status)255 int WEXITSTATUS(int status)
256 {
257 return status;
258 }
259
260 static inline
WIFSIGNALED(int status)261 bool WIFSIGNALED(int status)
262 {
263 return false;
264 }
265
266 static inline
WTERMSIG(int status)267 int WTERMSIG(int status)
268 {
269 return 0;
270 }
271
272 static int
kill(pid_t pid,int sig)273 kill(pid_t pid, int sig)
274 {
275 int retry_open_event = 10;
276
277 char shutdown_event_name[32];
278 _snprintf(shutdown_event_name, sizeof(shutdown_event_name),
279 "ndbd_shutdown_%d", pid);
280
281 /* Open the event to signal */
282 HANDLE shutdown_event;
283 while ((shutdown_event =
284 OpenEvent(EVENT_MODIFY_STATE, FALSE, shutdown_event_name)) == NULL)
285 {
286 /*
287 Check if the process is alive, otherwise there is really
288 no sense to retry the open of the event
289 */
290 DWORD exit_code;
291 HANDLE process = OpenProcess(SYNCHRONIZE | PROCESS_QUERY_INFORMATION,
292 FALSE, pid);
293 if (!process)
294 {
295 /* Already died */
296 return -1;
297 }
298
299 if (!GetExitCodeProcess(process,&exit_code))
300 {
301 g_eventLogger->error("GetExitCodeProcess failed, pid: %d, error: %d",
302 pid, GetLastError());
303 CloseHandle(process);
304 return -1;
305 }
306 CloseHandle(process);
307
308 if (exit_code != STILL_ACTIVE)
309 {
310 /* Already died */
311 return -1;
312 }
313
314 if (retry_open_event--)
315 Sleep(100);
316 else
317 {
318 g_eventLogger->error("Failed to open shutdown_event '%s', error: %d",
319 shutdown_event_name, GetLastError());
320 return -1;
321 }
322 }
323
324 if (SetEvent(shutdown_event) == 0)
325 {
326 g_eventLogger->error("Failed to signal shutdown_event '%s', error: %d",
327 shutdown_event_name, GetLastError());
328 }
329 CloseHandle(shutdown_event);
330 return pid;
331 }
332 #endif
333
334 #define JAM_FILE_ID 333
335
336
337 extern int real_main(int, char**);
338
339
340 static
create_argv(const Vector<BaseString> & args)341 char** create_argv(const Vector<BaseString>& args)
342 {
343 char **argv = (char **)malloc(sizeof(char*) * (args.size() + 1));
344 if(argv == NULL)
345 return NULL;
346
347 for(unsigned i = 0; i < args.size(); i++)
348 argv[i] = strdup(args[i].c_str());
349 argv[args.size()] = NULL;
350 return argv;
351 }
352
353
354 static
free_argv(char ** argv)355 void free_argv(char** argv)
356 {
357 char** argp = argv;
358 while(*argp)
359 {
360 free((void*)*argp);
361 argp++;
362 }
363 free((void*)argv);
364 }
365
366
367 static pid_t
spawn_process(const char * progname,const Vector<BaseString> & args)368 spawn_process(const char* progname, const Vector<BaseString>& args)
369 {
370 #ifdef _WIN32
371 // Get full path name of this executeble
372 char path[MAX_PATH];
373 DWORD len = GetModuleFileName(NULL, path, sizeof(path));
374 if (len == 0 || len == sizeof(path))
375 {
376 g_eventLogger->warning("spawn_process: Could not extract full path, "
377 "len: %u, error: %u\n",
378 len, GetLastError());
379 // Fall through and try with progname as it was supplied
380 }
381 else
382 {
383 progname = path;
384 }
385 #endif
386
387 char** argv = create_argv(args);
388 if (!argv)
389 {
390 g_eventLogger->error("spawn_process: Failed to create argv, errno: %d",
391 errno);
392 return -1;
393 }
394
395 #ifdef _WIN32
396
397 intptr_t spawn_handle = _spawnv(P_NOWAIT, progname, argv);
398 if (spawn_handle == -1)
399 {
400 g_eventLogger->error("spawn_process: Failed to spawn process, errno: %d",
401 errno);
402 // Print the _spawnv arguments to aid debugging
403 g_eventLogger->error(" progname: '%s'", progname);
404 char** argp = argv;
405 while(*argp)
406 g_eventLogger->error("argv: '%s'", *argp++);
407
408 free_argv(argv);
409 return -1;
410 }
411 free_argv(argv);
412
413 // Convert the handle returned from spawnv_ to a pid
414 DWORD pid = GetProcessId((HANDLE)spawn_handle);
415 if (pid == 0)
416 {
417 g_eventLogger->error("spawn_process: Failed to convert handle %d "
418 "to pid, error: %d", spawn_handle, GetLastError());
419 CloseHandle((HANDLE)spawn_handle);
420 return -1;
421 }
422 CloseHandle((HANDLE)spawn_handle);
423 return pid;
424 #else
425 pid_t pid = fork();
426 if (pid == -1)
427 {
428 g_eventLogger->error("Failed to fork, errno: %d", errno);
429 free_argv(argv);
430 return -1;
431 }
432
433 if (pid)
434 {
435 free_argv(argv);
436 // Parent
437 return pid;
438 }
439
440 // Count number of arguments
441 int argc = 0;
442 while(argv[argc])
443 argc++;
444
445 // Calling 'main' to start program from beginning
446 // without loading (possibly new version) from disk
447 (void)real_main(argc, argv);
448 assert(false); // main should never return
449 exit(1);
450 return -1; // Never reached
451 #endif
452 }
453
454 /*
455 retry failed spawn after sleep until fork suceeds or
456 max number of retries occurs
457 */
458
459 static pid_t
retry_spawn_process(const char * progname,const Vector<BaseString> & args)460 retry_spawn_process(const char* progname, const Vector<BaseString>& args)
461 {
462 const unsigned max_retries = 10;
463 unsigned retry_counter = 0;
464 while(true)
465 {
466 pid_t pid = spawn_process(progname, args);
467 if (pid == -1)
468 {
469 if (retry_counter++ == max_retries)
470 {
471 g_eventLogger->error("Angel failed to spawn %d times, giving up",
472 retry_counter);
473 angel_exit(1);
474 }
475
476 g_eventLogger->warning("Angel failed to spawn, sleep and retry");
477
478 NdbSleep_SecSleep(1);
479 continue;
480 }
481 return pid;
482 }
483 }
484
485 static Uint32 stop_on_error;
486 static Uint32 config_max_start_fail_retries;
487 static Uint32 config_restart_delay_secs;
488
489
490 /*
491 Extract the config parameters that concerns angel
492 */
493
494 static bool
configure(const ndb_mgm_configuration * conf,NodeId nodeid)495 configure(const ndb_mgm_configuration* conf, NodeId nodeid)
496 {
497 Uint32 generation = 0;
498 ndb_mgm_configuration_iterator sys_iter(*conf, CFG_SECTION_SYSTEM);
499 if (sys_iter.get(CFG_SYS_CONFIG_GENERATION, &generation))
500 {
501 g_eventLogger->warning("Configuration didn't contain generation "
502 "(likely old ndb_mgmd");
503 }
504 g_eventLogger->debug("Using configuration with generation %u", generation);
505
506 ndb_mgm_configuration_iterator iter(*conf, CFG_SECTION_NODE);
507 if (iter.find(CFG_NODE_ID, nodeid))
508 {
509 g_eventLogger->error("Invalid configuration fetched, could not "
510 "find own node id %d", nodeid);
511 return false;
512 }
513
514 if (iter.get(CFG_DB_STOP_ON_ERROR, &stop_on_error))
515 {
516 g_eventLogger->error("Invalid configuration fetched, could not "
517 "find StopOnError");
518 return false;
519 }
520 g_eventLogger->debug("Using StopOnError: %u", stop_on_error);
521
522 if (iter.get(CFG_DB_MAX_START_FAIL, &config_max_start_fail_retries))
523 {
524 /* Old Management node, use default value */
525 config_max_start_fail_retries = 3;
526 }
527
528 if (iter.get(CFG_DB_START_FAIL_DELAY_SECS, &config_restart_delay_secs))
529 {
530 /* Old Management node, use default value */
531 config_restart_delay_secs = 0;
532 }
533
534 const char * datadir;
535 if (iter.get(CFG_NODE_DATADIR, &datadir))
536 {
537 g_eventLogger->error("Invalid configuration fetched, could not "
538 "find DataDir");
539 return false;
540 }
541 g_eventLogger->debug("Using DataDir: %s", datadir);
542
543 NdbConfig_SetPath(datadir);
544
545 if (NdbDir::chdir(NdbConfig_get_path(NULL)) != 0)
546 {
547 g_eventLogger->warning("Cannot change directory to '%s', error: %d",
548 NdbConfig_get_path(NULL), errno);
549 // Ignore error
550 }
551
552 return true;
553 }
554
555 bool stop_child = false;
556
557 void
angel_run(const char * progname,const Vector<BaseString> & original_args,const char * connect_str,int force_nodeid,const char * bind_address,bool initial,bool no_start,bool daemon,int connnect_retries,int connect_delay)558 angel_run(const char* progname,
559 const Vector<BaseString>& original_args,
560 const char* connect_str,
561 int force_nodeid,
562 const char* bind_address,
563 bool initial,
564 bool no_start,
565 bool daemon,
566 int connnect_retries,
567 int connect_delay)
568 {
569 ConfigRetriever retriever(connect_str,
570 force_nodeid,
571 NDB_VERSION,
572 NDB_MGM_NODE_TYPE_NDB,
573 bind_address);
574 if (retriever.hasError())
575 {
576 g_eventLogger->error("Could not initialize connection to management "
577 "server, error: '%s'", retriever.getErrorString());
578 angel_exit(1);
579 }
580
581 const int verbose = 1;
582 if (retriever.do_connect(connnect_retries, connect_delay, verbose) != 0)
583 {
584 g_eventLogger->error("Could not connect to management server, "
585 "error: '%s'", retriever.getErrorString());
586 angel_exit(1);
587 }
588 g_eventLogger->info("Angel connected to '%s:%d'",
589 retriever.get_mgmd_host(),
590 retriever.get_mgmd_port());
591
592 const int alloc_retries = 10;
593 const int alloc_delay = 3;
594 const Uint32 nodeid = retriever.allocNodeId(alloc_retries, alloc_delay);
595 if (nodeid == 0)
596 {
597 g_eventLogger->error("Failed to allocate nodeid, error: '%s'",
598 retriever.getErrorString());
599 angel_exit(1);
600 }
601 g_eventLogger->info("Angel allocated nodeid: %u", nodeid);
602
603 ndb_mgm_configuration * config = retriever.getConfig(nodeid);
604 NdbAutoPtr<ndb_mgm_configuration> config_autoptr(config);
605 if (config == 0)
606 {
607 g_eventLogger->error("Could not fetch configuration/invalid "
608 "configuration, error: '%s'",
609 retriever.getErrorString());
610 angel_exit(1);
611 }
612
613 if (!configure(config, nodeid))
614 {
615 // Failed to configure, error already printed
616 angel_exit(1);
617 }
618
619 if (daemon)
620 {
621 // Become a daemon
622 char *lockfile = NdbConfig_PidFileName(nodeid);
623 char *logfile = NdbConfig_StdoutFileName(nodeid);
624 NdbAutoPtr<char> tmp_aptr1(lockfile), tmp_aptr2(logfile);
625
626 if (ndb_daemonize(lockfile, logfile) != 0)
627 {
628 g_eventLogger->error("Couldn't start as daemon, error: '%s'",
629 ndb_daemon_error);
630 angel_exit(1);
631 }
632 }
633
634 // Counter for consecutive failed startups
635 Uint32 failed_startups_counter = 0;
636 while (true)
637 {
638
639 // Create pipe where ndbd process will report extra shutdown status
640 int fds[2];
641 if (pipe(fds))
642 {
643 g_eventLogger->error("Failed to create pipe, errno: %d (%s)",
644 errno, strerror(errno));
645 angel_exit(1);
646 }
647
648 FILE *child_info_r;
649 if (!(child_info_r = fdopen(fds[0], "r")))
650 {
651 g_eventLogger->error("Failed to open stream for pipe, errno: %d (%s)",
652 errno, strerror(errno));
653 angel_exit(1);
654 }
655
656 // Build the args used to start ndbd by appending
657 // the arguments that may have changed at the end
658 // of original argument list
659 BaseString one_arg;
660 Vector<BaseString> args;
661 args = original_args;
662
663 // Pass fd number of the pipe which ndbd should use
664 // for sending extra status to angel
665 one_arg.assfmt("--report-fd=%d", fds[1]);
666 args.push_back(one_arg);
667
668 // The nodeid which has been allocated by angel
669 one_arg.assfmt("--allocated-nodeid=%d", nodeid);
670 args.push_back(one_arg);
671
672 one_arg.assfmt("--initial=%d", initial);
673 args.push_back(one_arg);
674
675 one_arg.assfmt("--nostart=%d", no_start);
676 args.push_back(one_arg);
677
678 pid_t child = retry_spawn_process(progname, args);
679 if (child <= 0)
680 {
681 // safety, retry_spawn_process returns valid child or give up
682 g_eventLogger->error("retry_spawn_process, child: %d", child);
683 angel_exit(1);
684 }
685
686 /**
687 * Parent
688 */
689 g_eventLogger->info("Angel pid: %d started child: %d",
690 getpid(), child);
691
692 ignore_signals();
693
694 int status=0, error_exit=0;
695 while(true)
696 {
697 pid_t ret_pid = waitpid(child, &status, WNOHANG);
698 if (ret_pid == child)
699 {
700 g_eventLogger->debug("Angel got child %d", child);
701 break;
702 }
703 if (ret_pid > 0)
704 {
705 g_eventLogger->warning("Angel got unexpected pid %d "
706 "when waiting for %d",
707 ret_pid, child);
708 }
709
710 if (stop_child)
711 {
712 g_eventLogger->info("Angel shutting down ndbd with pid %d", child);
713 kill(child, SIGINT);
714 }
715 NdbSleep_MilliSleep(100);
716 }
717
718 // Close the write end of pipe
719 close(fds[1]);
720
721 // Read info from the child's pipe
722 char buf[128];
723 Uint32 child_error = 0, child_signal = 0, child_sphase = 0;
724 while (fgets(buf, sizeof (buf), child_info_r))
725 {
726 int value;
727 if (sscanf(buf, "error=%d\n", &value) == 1)
728 child_error = value;
729 else if (sscanf(buf, "signal=%d\n", &value) == 1)
730 child_signal = value;
731 else if (sscanf(buf, "sphase=%d\n", &value) == 1)
732 child_sphase = value;
733 else if (strcmp(buf, "\n") != 0)
734 fprintf(stderr, "unknown info from child: '%s'\n", buf);
735 }
736 g_eventLogger->debug("error: %u, signal: %u, sphase: %u",
737 child_error, child_signal, child_sphase);
738 // Close read end of pipe in parent
739 fclose(child_info_r);
740
741 if (WIFEXITED(status))
742 {
743 switch (WEXITSTATUS(status)) {
744 case NRT_Default:
745 g_eventLogger->info("Angel shutting down");
746 reportShutdown(config, nodeid, 0, 0, false, false,
747 child_error, child_signal, child_sphase);
748 angel_exit(0);
749 break;
750 case NRT_NoStart_Restart:
751 initial = false;
752 no_start = true;
753 break;
754 case NRT_NoStart_InitialStart:
755 initial = true;
756 no_start = true;
757 break;
758 case NRT_DoStart_InitialStart:
759 initial = true;
760 no_start = false;
761 break;
762 default:
763 error_exit=1;
764 if (stop_on_error)
765 {
766 /**
767 * Error shutdown && stopOnError()
768 */
769 reportShutdown(config, nodeid,
770 error_exit, 0, false, false,
771 child_error, child_signal, child_sphase);
772 angel_exit(0);
773 }
774 // Fall-through
775 case NRT_DoStart_Restart:
776 initial = false;
777 no_start = false;
778 break;
779 }
780 } else
781 {
782 error_exit=1;
783 if (WIFSIGNALED(status))
784 {
785 child_signal = WTERMSIG(status);
786 }
787 else
788 {
789 child_signal = 127;
790 g_eventLogger->info("Unknown exit reason. Stopped.");
791 }
792 if (stop_on_error)
793 {
794 /**
795 * Error shutdown && stopOnError()
796 */
797 reportShutdown(config, nodeid,
798 error_exit, 0, false, false,
799 child_error, child_signal, child_sphase);
800 angel_exit(0);
801 }
802 }
803
804 // Check startup failure
805 const Uint32 STARTUP_FAILURE_SPHASE = 6;
806 Uint32 restart_delay_secs = 0;
807 if (error_exit && // Only check startup failure if ndbd exited uncontrolled
808 child_sphase <= STARTUP_FAILURE_SPHASE)
809 {
810 if (++failed_startups_counter >= config_max_start_fail_retries)
811 {
812 g_eventLogger->alert("Angel detected too many startup failures(%d), "
813 "not restarting again", failed_startups_counter);
814 reportShutdown(config, nodeid,
815 error_exit, 0, false, false,
816 child_error, child_signal, child_sphase);
817 angel_exit(0);
818 }
819 g_eventLogger->info("Angel detected startup failure, count: %u",
820 failed_startups_counter);
821
822 restart_delay_secs = config_restart_delay_secs;
823 }
824 else
825 {
826 // Reset the counter for consecutive failed startups
827 failed_startups_counter = 0;
828 }
829
830 reportShutdown(config, nodeid,
831 error_exit, 1,
832 no_start,
833 initial,
834 child_error, child_signal, child_sphase);
835 g_eventLogger->info("Ndb has terminated (pid %d) restarting", child);
836
837 g_eventLogger->debug("Angel reconnecting to management server");
838 (void)retriever.disconnect();
839
840 if (restart_delay_secs > 0)
841 {
842 g_eventLogger->info("Delaying Ndb restart for %u seconds.",
843 restart_delay_secs);
844 NdbSleep_SecSleep(restart_delay_secs);
845 };
846
847 const int verbose = 1;
848 if (retriever.do_connect(connnect_retries, connect_delay, verbose) != 0)
849 {
850 g_eventLogger->error("Could not connect to management server, "
851 "error: '%s'", retriever.getErrorString());
852 angel_exit(1);
853 }
854 g_eventLogger->info("Angel reconnected to '%s:%d'",
855 retriever.get_mgmd_host(),
856 retriever.get_mgmd_port());
857
858 // Tell retriver to allocate the same nodeid again
859 retriever.setNodeId(nodeid);
860
861 g_eventLogger->debug("Angel reallocating nodeid %d", nodeid);
862 const int alloc_retries = 20;
863 const int alloc_delay = 3;
864 const Uint32 realloced = retriever.allocNodeId(alloc_retries, alloc_delay);
865 if (realloced == 0)
866 {
867 g_eventLogger->error("Angel failed to allocate nodeid, error: '%s'",
868 retriever.getErrorString());
869 angel_exit(1);
870 }
871 if (realloced != nodeid)
872 {
873 g_eventLogger->error("Angel failed to reallocate nodeid %d, got %d",
874 nodeid, realloced);
875 angel_exit(1);
876 }
877 g_eventLogger->info("Angel reallocated nodeid: %u", nodeid);
878
879 }
880
881 abort(); // Never reached
882 }
883
884
885 /*
886 Order angel to shutdown it's ndbd
887 */
angel_stop(void)888 void angel_stop(void)
889 {
890 stop_child = true;
891 }
892