1 /* Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
2 
3    This program is free software; you can redistribute it and/or modify
4    it under the terms of the GNU General Public License, version 2.0,
5    as published by the Free Software Foundation.
6 
7    This program is also distributed with certain software (including
8    but not limited to OpenSSL) that is licensed under separate terms,
9    as designated in a particular file or component or in included license
10    documentation.  The authors of MySQL hereby grant you an additional
11    permission to link the program and your derivative works with the
12    separately licensed software that they have included with MySQL.
13 
14    This program is distributed in the hope that it will be useful,
15    but WITHOUT ANY WARRANTY; without even the implied warranty of
16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17    GNU General Public License, version 2.0, for more details.
18 
19    You should have received a copy of the GNU General Public License
20    along with this program; if not, write to the Free Software
21    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA */
22 
23 
24 #include <ndb_global.h>
25 #include <ndb_version.h>
26 
27 #include "angel.hpp"
28 #include "ndbd.hpp"
29 
30 #include <NdbConfig.h>
31 #include <NdbAutoPtr.hpp>
32 #include <portlib/ndb_daemon.h>
33 #include <portlib/NdbSleep.h>
34 #include <portlib/NdbDir.hpp>
35 
36 #include <ConfigRetriever.hpp>
37 
38 #include <EventLogger.hpp>
39 extern EventLogger * g_eventLogger;
40 
41 static void
angel_exit(int code)42 angel_exit(int code)
43 {
44   ndb_daemon_exit(code);
45 }
46 
47 #include "../mgmapi/mgmapi_configuration.hpp"
48 
49 static void
reportShutdown(const ndb_mgm_configuration * config,NodeId nodeid,int error_exit,bool restart,bool nostart,bool initial,Uint32 error,Uint32 signum,Uint32 sphase)50 reportShutdown(const ndb_mgm_configuration* config,
51                NodeId nodeid, int error_exit,
52                bool restart, bool nostart, bool initial,
53                Uint32 error, Uint32 signum, Uint32 sphase)
54 {
55   // Only allow "initial" and "nostart" to be set if "restart" is set
56   assert(restart ||
57          (!restart && !initial && !nostart));
58 
59   Uint32 length, theData[25];
60   EventReport *rep= CAST_PTR(EventReport, &theData[0]);
61 
62   rep->setNodeId(nodeid);
63   if (restart)
64     theData[1]=1 |
65       (nostart ? 2 : 0) |
66       (initial ? 4 : 0);
67   else
68     theData[1]=0;
69 
70   if (error_exit == 0)
71   {
72     rep->setEventType(NDB_LE_NDBStopCompleted);
73     theData[2]=signum;
74     length=3;
75   } else
76   {
77     rep->setEventType(NDB_LE_NDBStopForced);
78     theData[2]=signum;
79     theData[3]=error;
80     theData[4]=sphase;
81     theData[5]=0; // extra
82     length=6;
83   }
84 
85   // Log event locally
86   g_eventLogger->log(rep->getEventType(), theData, length,
87                      rep->getNodeId(), 0);
88 
89   // Log event to cluster log
90   ndb_mgm_configuration_iterator iter(*config, CFG_SECTION_NODE);
91   for (iter.first(); iter.valid(); iter.next())
92   {
93     Uint32 type;
94     if (iter.get(CFG_TYPE_OF_SECTION, &type) ||
95        type != NODE_TYPE_MGM)
96       continue;
97 
98     Uint32 port;
99     if (iter.get(CFG_MGM_PORT, &port))
100       continue;
101 
102     const char* hostname;
103     if (iter.get(CFG_NODE_HOST, &hostname))
104       continue;
105 
106     BaseString connect_str;
107     connect_str.assfmt("%s:%d", hostname, port);
108 
109 
110     NdbMgmHandle h = ndb_mgm_create_handle();
111     if (h == 0)
112     {
113       g_eventLogger->warning("Unable to report shutdown reason "
114                              "to '%s'(failed to create mgm handle)",
115                              connect_str.c_str());
116       continue;
117     }
118 
119     if (ndb_mgm_set_connectstring(h, connect_str.c_str()) ||
120         ndb_mgm_connect(h, 1, 0, 0) ||
121         ndb_mgm_report_event(h, theData, length))
122     {
123       g_eventLogger->warning("Unable to report shutdown reason "
124                              "to '%s'(error: %s - %s)",
125                              connect_str.c_str(),
126                              ndb_mgm_get_latest_error_msg(h),
127                              ndb_mgm_get_latest_error_desc(h));
128     }
129 
130     ndb_mgm_destroy_handle(&h);
131   }
132 }
133 
134 
135 static void
ignore_signals(void)136 ignore_signals(void)
137 {
138   static const int ignore_list[] = {
139 #ifdef SIGBREAK
140     SIGBREAK,
141 #endif
142 #ifdef SIGHUP
143     SIGHUP,
144 #endif
145     SIGINT,
146 #if defined SIGPWR
147     SIGPWR,
148 #elif defined SIGINFO
149     SIGINFO,
150 #endif
151     SIGQUIT,
152     SIGTERM,
153 #ifdef SIGTSTP
154     SIGTSTP,
155 #endif
156 #ifdef SIGTTIN
157     SIGTTIN,
158 #endif
159 #ifdef SIGTTOU
160     SIGTTOU,
161 #endif
162     SIGABRT,
163 #ifdef SIGALRM
164     SIGALRM,
165 #endif
166 #ifdef SIGBUS
167     SIGBUS,
168 #endif
169     SIGFPE,
170     SIGILL,
171 #ifdef SIGIO
172     SIGIO,
173 #endif
174 #ifdef SIGPOLL
175     SIGPOLL,
176 #endif
177     SIGSEGV,
178     SIGPIPE,
179 #ifdef SIGTRAP
180     SIGTRAP
181 #endif
182   };
183 
184   for(size_t i = 0; i < sizeof(ignore_list)/sizeof(ignore_list[0]); i++)
185     signal(ignore_list[i], SIG_IGN);
186 }
187 
188 #ifdef _WIN32
189 static inline
pipe(int pipefd[2])190 int pipe(int pipefd[2]){
191   const unsigned int buffer_size = 4096;
192   const int flags = 0;
193   return _pipe(pipefd, buffer_size, flags);
194 }
195 
196 #undef getpid
197 #include <process.h>
198 
199 typedef DWORD pid_t;
200 
201 static const int WNOHANG = 37;
202 
203 static inline
waitpid(pid_t pid,int * stat_loc,int options)204 pid_t waitpid(pid_t pid, int *stat_loc, int options)
205 {
206   /* Only support waitpid(,,WNOHANG) */
207   assert(options == WNOHANG);
208   assert(stat_loc);
209 
210   HANDLE handle = OpenProcess(PROCESS_ALL_ACCESS, FALSE, pid);
211   if (handle == NULL)
212   {
213     g_eventLogger->error("waitpid: Could not open handle for pid %d, "
214                          "error: %d", pid, GetLastError());
215     return -1;
216   }
217 
218   DWORD exit_code;
219   if (!GetExitCodeProcess(handle, &exit_code))
220   {
221     g_eventLogger->error("waitpid: GetExitCodeProcess failed, pid: %d, "
222                          "error: %d", pid, GetLastError());
223     CloseHandle(handle);
224     return -1;
225   }
226   CloseHandle(handle);
227 
228   if (exit_code == STILL_ACTIVE)
229   {
230     /* Still alive */
231     return 0;
232   }
233 
234   *stat_loc = exit_code;
235 
236   return pid;
237 }
238 
239 static inline
WIFEXITED(int status)240 bool WIFEXITED(int status)
241 {
242   return true;
243 }
244 
245 static inline
WEXITSTATUS(int status)246 int WEXITSTATUS(int status)
247 {
248   return status;
249 }
250 
251 static inline
WIFSIGNALED(int status)252 bool WIFSIGNALED(int status)
253 {
254   return false;
255 }
256 
257 static inline
WTERMSIG(int status)258 int WTERMSIG(int status)
259 {
260   return 0;
261 }
262 
263 static int
kill(pid_t pid,int sig)264 kill(pid_t pid, int sig)
265 {
266   int retry_open_event = 10;
267 
268   char shutdown_event_name[32];
269   _snprintf(shutdown_event_name, sizeof(shutdown_event_name),
270             "ndbd_shutdown_%d", pid);
271 
272   /* Open the event to signal */
273   HANDLE shutdown_event;
274   while ((shutdown_event =
275           OpenEvent(EVENT_MODIFY_STATE, FALSE, shutdown_event_name)) == NULL)
276   {
277      /*
278       Check if the process is alive, otherwise there is really
279       no sense to retry the open of the event
280      */
281     DWORD exit_code;
282     HANDLE process = OpenProcess(SYNCHRONIZE | PROCESS_QUERY_INFORMATION,
283                                   FALSE, pid);
284     if (!process)
285     {
286       /* Already died */
287       return -1;
288     }
289 
290     if (!GetExitCodeProcess(process,&exit_code))
291     {
292       g_eventLogger->error("GetExitCodeProcess failed, pid: %d, error: %d",
293                            pid, GetLastError());
294       CloseHandle(process);
295       return -1;
296     }
297     CloseHandle(process);
298 
299     if (exit_code != STILL_ACTIVE)
300     {
301       /* Already died */
302       return -1;
303     }
304 
305     if (retry_open_event--)
306       Sleep(100);
307     else
308     {
309       g_eventLogger->error("Failed to open shutdown_event '%s', error: %d",
310                             shutdown_event_name, GetLastError());
311       return -1;
312     }
313   }
314 
315   if (SetEvent(shutdown_event) == 0)
316   {
317     g_eventLogger->error("Failed to signal shutdown_event '%s', error: %d",
318                          shutdown_event_name, GetLastError());
319   }
320   CloseHandle(shutdown_event);
321   return pid;
322 }
323 #endif
324 
325 extern int real_main(int, char**);
326 
327 
328 static
create_argv(const Vector<BaseString> & args)329 char** create_argv(const Vector<BaseString>& args)
330 {
331   char **argv = (char **)malloc(sizeof(char*) * (args.size() + 1));
332   if(argv == NULL)
333     return NULL;
334 
335   for(unsigned i = 0; i < args.size(); i++)
336     argv[i] = strdup(args[i].c_str());
337   argv[args.size()] = NULL;
338   return argv;
339 }
340 
341 
342 static
free_argv(char ** argv)343 void free_argv(char** argv)
344 {
345   char** argp = argv;
346   while(*argp)
347   {
348     free((void*)*argp);
349     argp++;
350   }
351   free((void*)argv);
352 }
353 
354 
355 static pid_t
spawn_process(const char * progname,const Vector<BaseString> & args)356 spawn_process(const char* progname, const Vector<BaseString>& args)
357 {
358 #ifdef _WIN32
359   // Get full path name of this executeble
360   char path[MAX_PATH];
361   DWORD len = GetModuleFileName(NULL, path, sizeof(path));
362   if (len == 0 || len == sizeof(path))
363   {
364     g_eventLogger->warning("spawn_process: Could not extract full path, "
365                            "len: %u, error: %u\n",
366                            len, GetLastError());
367     // Fall through and try with progname as it was supplied
368   }
369   else
370   {
371     progname = path;
372   }
373 #endif
374 
375   char** argv = create_argv(args);
376   if (!argv)
377   {
378     g_eventLogger->error("spawn_process: Failed to create argv, errno: %d",
379                          errno);
380     return -1;
381   }
382 
383 #ifdef _WIN32
384 
385   intptr_t spawn_handle = _spawnv(P_NOWAIT, progname, argv);
386   if (spawn_handle == -1)
387   {
388     g_eventLogger->error("spawn_process: Failed to spawn process, errno: %d",
389                          errno);
390     // Print the _spawnv arguments to aid debugging
391     g_eventLogger->error(" progname: '%s'", progname);
392     char** argp = argv;
393     while(*argp)
394       g_eventLogger->error("argv: '%s'", *argp++);
395 
396     free_argv(argv);
397     return -1;
398   }
399   free_argv(argv);
400 
401   // Convert the handle returned from spawnv_ to a pid
402   DWORD pid = GetProcessId((HANDLE)spawn_handle);
403   if (pid == 0)
404   {
405     g_eventLogger->error("spawn_process: Failed to convert handle %d "
406                          "to pid, error: %d", spawn_handle, GetLastError());
407     CloseHandle((HANDLE)spawn_handle);
408     return -1;
409   }
410   CloseHandle((HANDLE)spawn_handle);
411   return pid;
412 #else
413   pid_t pid = fork();
414   if (pid == -1)
415   {
416     g_eventLogger->error("Failed to fork, errno: %d", errno);
417     free_argv(argv);
418     return -1;
419   }
420 
421   if (pid)
422   {
423     free_argv(argv);
424     // Parent
425     return pid;
426   }
427 
428   // Count number of arguments
429   int argc = 0;
430   while(argv[argc])
431     argc++;
432 
433   // Calling 'main' to start program from beginning
434   // without loading (possibly new version) from disk
435   (void)real_main(argc, argv);
436   assert(false); // main should never return
437   exit(1);
438   return -1; // Never reached
439 #endif
440 }
441 
442 /*
443   retry failed spawn after sleep until fork suceeds or
444   max number of retries occurs
445 */
446 
447 static pid_t
retry_spawn_process(const char * progname,const Vector<BaseString> & args)448 retry_spawn_process(const char* progname, const Vector<BaseString>& args)
449 {
450   const unsigned max_retries = 10;
451   unsigned retry_counter = 0;
452   while(true)
453   {
454     pid_t pid = spawn_process(progname, args);
455     if (pid == -1)
456     {
457       if (retry_counter++ == max_retries)
458       {
459         g_eventLogger->error("Angel failed to spawn %d times, giving up",
460                              retry_counter);
461         angel_exit(1);
462       }
463 
464       g_eventLogger->warning("Angel failed to spawn, sleep and retry");
465 
466       NdbSleep_SecSleep(1);
467       continue;
468     }
469     return pid;
470   }
471 }
472 
473 static Uint32 stop_on_error;
474 static Uint32 config_max_start_fail_retries;
475 static Uint32 config_restart_delay_secs;
476 
477 
478 /*
479   Extract the config parameters that concerns angel
480 */
481 
482 static bool
configure(const ndb_mgm_configuration * conf,NodeId nodeid)483 configure(const ndb_mgm_configuration* conf, NodeId nodeid)
484 {
485   Uint32 generation = 0;
486   ndb_mgm_configuration_iterator sys_iter(*conf, CFG_SECTION_SYSTEM);
487   if (sys_iter.get(CFG_SYS_CONFIG_GENERATION, &generation))
488   {
489     g_eventLogger->warning("Configuration didn't contain generation "
490                            "(likely old ndb_mgmd");
491   }
492   g_eventLogger->debug("Using configuration with generation %u", generation);
493 
494   ndb_mgm_configuration_iterator iter(*conf, CFG_SECTION_NODE);
495   if (iter.find(CFG_NODE_ID, nodeid))
496   {
497     g_eventLogger->error("Invalid configuration fetched, could not "
498                          "find own node id %d", nodeid);
499     return false;
500   }
501 
502   if (iter.get(CFG_DB_STOP_ON_ERROR, &stop_on_error))
503   {
504     g_eventLogger->error("Invalid configuration fetched, could not "
505                          "find StopOnError");
506     return false;
507   }
508   g_eventLogger->debug("Using StopOnError: %u", stop_on_error);
509 
510   if (iter.get(CFG_DB_MAX_START_FAIL, &config_max_start_fail_retries))
511   {
512     /* Old Management node, use default value */
513     config_max_start_fail_retries = 3;
514   }
515 
516   if (iter.get(CFG_DB_START_FAIL_DELAY_SECS, &config_restart_delay_secs))
517   {
518     /* Old Management node, use default value */
519     config_restart_delay_secs = 0;
520   }
521 
522   const char * datadir;
523   if (iter.get(CFG_NODE_DATADIR, &datadir))
524   {
525     g_eventLogger->error("Invalid configuration fetched, could not "
526                          "find DataDir");
527     return false;
528   }
529   g_eventLogger->debug("Using DataDir: %s", datadir);
530 
531   NdbConfig_SetPath(datadir);
532 
533   if (NdbDir::chdir(NdbConfig_get_path(NULL)) != 0)
534   {
535     g_eventLogger->warning("Cannot change directory to '%s', error: %d",
536                            NdbConfig_get_path(NULL), errno);
537     // Ignore error
538   }
539 
540   return true;
541 }
542 
543 bool stop_child = false;
544 
545 void
angel_run(const char * progname,const Vector<BaseString> & original_args,const char * connect_str,int force_nodeid,const char * bind_address,bool initial,bool no_start,bool daemon)546 angel_run(const char* progname,
547           const Vector<BaseString>& original_args,
548           const char* connect_str,
549           int force_nodeid,
550           const char* bind_address,
551           bool initial,
552           bool no_start,
553           bool daemon)
554 {
555   ConfigRetriever retriever(connect_str,
556                             force_nodeid,
557                             NDB_VERSION,
558                             NDB_MGM_NODE_TYPE_NDB,
559                             bind_address);
560   if (retriever.hasError())
561   {
562     g_eventLogger->error("Could not initialize connection to management "
563                          "server, error: '%s'", retriever.getErrorString());
564     angel_exit(1);
565   }
566 
567   const int connnect_retries = 12;
568   const int connect_delay = 5;
569   const int verbose = 1;
570   if (retriever.do_connect(connnect_retries, connect_delay, verbose) != 0)
571   {
572     g_eventLogger->error("Could not connect to management server, "
573                          "error: '%s'", retriever.getErrorString());
574     angel_exit(1);
575   }
576   g_eventLogger->info("Angel connected to '%s:%d'",
577                       retriever.get_mgmd_host(),
578                       retriever.get_mgmd_port());
579 
580   const int alloc_retries = 2;
581   const int alloc_delay = 3;
582   const Uint32 nodeid = retriever.allocNodeId(alloc_retries, alloc_delay);
583   if (nodeid == 0)
584   {
585     g_eventLogger->error("Failed to allocate nodeid, error: '%s'",
586                          retriever.getErrorString());
587     angel_exit(1);
588   }
589   g_eventLogger->info("Angel allocated nodeid: %u", nodeid);
590 
591   ndb_mgm_configuration * config = retriever.getConfig(nodeid);
592   NdbAutoPtr<ndb_mgm_configuration> config_autoptr(config);
593   if (config == 0)
594   {
595     g_eventLogger->error("Could not fetch configuration/invalid "
596                          "configuration, error: '%s'",
597                          retriever.getErrorString());
598     angel_exit(1);
599   }
600 
601   if (!configure(config, nodeid))
602   {
603     // Failed to configure, error already printed
604     angel_exit(1);
605   }
606 
607   if (daemon)
608   {
609     // Become a daemon
610     char *lockfile = NdbConfig_PidFileName(nodeid);
611     char *logfile = NdbConfig_StdoutFileName(nodeid);
612     NdbAutoPtr<char> tmp_aptr1(lockfile), tmp_aptr2(logfile);
613 
614     if (ndb_daemonize(lockfile, logfile) != 0)
615     {
616       g_eventLogger->error("Couldn't start as daemon, error: '%s'",
617                            ndb_daemon_error);
618       angel_exit(1);
619     }
620   }
621 
622   // Counter for consecutive failed startups
623   Uint32 failed_startups_counter = 0;
624   while (true)
625   {
626 
627     // Create pipe where ndbd process will report extra shutdown status
628     int fds[2];
629     if (pipe(fds))
630     {
631       g_eventLogger->error("Failed to create pipe, errno: %d (%s)",
632                            errno, strerror(errno));
633       angel_exit(1);
634     }
635 
636     FILE *child_info_r;
637     if (!(child_info_r = fdopen(fds[0], "r")))
638     {
639       g_eventLogger->error("Failed to open stream for pipe, errno: %d (%s)",
640                            errno, strerror(errno));
641       angel_exit(1);
642     }
643 
644     // Build the args used to start ndbd by appending
645     // the arguments that may have changed at the end
646     // of original argument list
647     BaseString one_arg;
648     Vector<BaseString> args;
649     args = original_args;
650 
651     // Pass fd number of the pipe which ndbd should use
652     // for sending extra status to angel
653     one_arg.assfmt("--report-fd=%d", fds[1]);
654     args.push_back(one_arg);
655 
656     // The nodeid which has been allocated by angel
657     one_arg.assfmt("--allocated-nodeid=%d", nodeid);
658     args.push_back(one_arg);
659 
660     one_arg.assfmt("--initial=%d", initial);
661     args.push_back(one_arg);
662 
663     one_arg.assfmt("--nostart=%d", no_start);
664     args.push_back(one_arg);
665 
666     pid_t child = retry_spawn_process(progname, args);
667     if (child <= 0)
668     {
669       // safety, retry_spawn_process returns valid child or give up
670       g_eventLogger->error("retry_spawn_process, child: %d", child);
671       angel_exit(1);
672     }
673 
674     /**
675      * Parent
676      */
677     g_eventLogger->info("Angel pid: %d started child: %d",
678                         getpid(), child);
679 
680     ignore_signals();
681 
682     int status=0, error_exit=0;
683     while(true)
684     {
685       pid_t ret_pid = waitpid(child, &status, WNOHANG);
686       if (ret_pid == child)
687       {
688         g_eventLogger->debug("Angel got child %d", child);
689         break;
690       }
691       if (ret_pid > 0)
692       {
693         g_eventLogger->warning("Angel got unexpected pid %d "
694                                "when waiting for %d",
695                                ret_pid, child);
696       }
697 
698       if (stop_child)
699       {
700         g_eventLogger->info("Angel shutting down ndbd with pid %d", child);
701         kill(child, SIGINT);
702        }
703       NdbSleep_MilliSleep(100);
704     }
705 
706     // Close the write end of pipe
707     close(fds[1]);
708 
709     // Read info from the child's pipe
710     char buf[128];
711     Uint32 child_error = 0, child_signal = 0, child_sphase = 0;
712     while (fgets(buf, sizeof (buf), child_info_r))
713     {
714       int value;
715       if (sscanf(buf, "error=%d\n", &value) == 1)
716         child_error = value;
717       else if (sscanf(buf, "signal=%d\n", &value) == 1)
718         child_signal = value;
719       else if (sscanf(buf, "sphase=%d\n", &value) == 1)
720         child_sphase = value;
721       else if (strcmp(buf, "\n") != 0)
722         fprintf(stderr, "unknown info from child: '%s'\n", buf);
723     }
724     g_eventLogger->debug("error: %u, signal: %u, sphase: %u",
725                          child_error, child_signal, child_sphase);
726     // Close read end of pipe in parent
727     fclose(child_info_r);
728 
729     if (WIFEXITED(status))
730     {
731       switch (WEXITSTATUS(status)) {
732       case NRT_Default:
733         g_eventLogger->info("Angel shutting down");
734         reportShutdown(config, nodeid, 0, 0, false, false,
735                        child_error, child_signal, child_sphase);
736         angel_exit(0);
737         break;
738       case NRT_NoStart_Restart:
739         initial = false;
740         no_start = true;
741         break;
742       case NRT_NoStart_InitialStart:
743         initial = true;
744         no_start = true;
745         break;
746       case NRT_DoStart_InitialStart:
747         initial = true;
748         no_start = false;
749         break;
750       default:
751         error_exit=1;
752         if (stop_on_error)
753         {
754           /**
755            * Error shutdown && stopOnError()
756            */
757           reportShutdown(config, nodeid,
758                          error_exit, 0, false, false,
759                          child_error, child_signal, child_sphase);
760           angel_exit(0);
761         }
762         // Fall-through
763       case NRT_DoStart_Restart:
764         initial = false;
765         no_start = false;
766         break;
767       }
768     } else
769     {
770       error_exit=1;
771       if (WIFSIGNALED(status))
772       {
773         child_signal = WTERMSIG(status);
774       }
775       else
776       {
777         child_signal = 127;
778         g_eventLogger->info("Unknown exit reason. Stopped.");
779       }
780       if (stop_on_error)
781       {
782         /**
783          * Error shutdown && stopOnError()
784          */
785         reportShutdown(config, nodeid,
786                        error_exit, 0, false, false,
787                        child_error, child_signal, child_sphase);
788         angel_exit(0);
789       }
790     }
791 
792     // Check startup failure
793     const Uint32 STARTUP_FAILURE_SPHASE = 6;
794     Uint32 restart_delay_secs = 0;
795     if (error_exit && // Only check startup failure if ndbd exited uncontrolled
796         child_sphase <= STARTUP_FAILURE_SPHASE)
797     {
798       if (++failed_startups_counter >= config_max_start_fail_retries)
799       {
800         g_eventLogger->alert("Angel detected too many startup failures(%d), "
801                              "not restarting again", failed_startups_counter);
802         reportShutdown(config, nodeid,
803                        error_exit, 0, false, false,
804                        child_error, child_signal, child_sphase);
805         angel_exit(0);
806       }
807       g_eventLogger->info("Angel detected startup failure, count: %u",
808                           failed_startups_counter);
809 
810       restart_delay_secs = config_restart_delay_secs;
811     }
812     else
813     {
814       // Reset the counter for consecutive failed startups
815       failed_startups_counter = 0;
816     }
817 
818     reportShutdown(config, nodeid,
819                    error_exit, 1,
820                    no_start,
821                    initial,
822                    child_error, child_signal, child_sphase);
823     g_eventLogger->info("Ndb has terminated (pid %d) restarting", child);
824 
825     g_eventLogger->debug("Angel reconnecting to management server");
826     (void)retriever.disconnect();
827 
828     if (restart_delay_secs > 0)
829     {
830       g_eventLogger->info("Delaying Ndb restart for %u seconds.",
831                           restart_delay_secs);
832       NdbSleep_SecSleep(restart_delay_secs);
833     };
834 
835     const int connnect_retries = 12;
836     const int connect_delay = 5;
837     const int verbose = 1;
838     if (retriever.do_connect(connnect_retries, connect_delay, verbose) != 0)
839     {
840       g_eventLogger->error("Could not connect to management server, "
841                            "error: '%s'", retriever.getErrorString());
842       angel_exit(1);
843     }
844     g_eventLogger->info("Angel reconnected to '%s:%d'",
845                         retriever.get_mgmd_host(),
846                         retriever.get_mgmd_port());
847 
848     // Tell retriver to allocate the same nodeid again
849     retriever.setNodeId(nodeid);
850 
851     g_eventLogger->debug("Angel reallocating nodeid %d", nodeid);
852     const int alloc_retries = 10;
853     const int alloc_delay = 3;
854     const Uint32 realloced = retriever.allocNodeId(alloc_retries, alloc_delay);
855     if (realloced == 0)
856     {
857       g_eventLogger->error("Angel failed to allocate nodeid, error: '%s'",
858                            retriever.getErrorString());
859       angel_exit(1);
860     }
861     if (realloced != nodeid)
862     {
863       g_eventLogger->error("Angel failed to reallocate nodeid %d, got %d",
864                            nodeid, realloced);
865       angel_exit(1);
866     }
867     g_eventLogger->info("Angel reallocated nodeid: %u", nodeid);
868 
869   }
870 
871   abort(); // Never reached
872 }
873 
874 
875 /*
876   Order angel to shutdown it's ndbd
877 */
angel_stop(void)878 void angel_stop(void)
879 {
880   stop_child = true;
881 }
882