1 /* Copyright (c) 2009, 2021, Oracle and/or its affiliates.
2 
3    This program is free software; you can redistribute it and/or modify
4    it under the terms of the GNU General Public License, version 2.0,
5    as published by the Free Software Foundation.
6 
7    This program is also distributed with certain software (including
8    but not limited to OpenSSL) that is licensed under separate terms,
9    as designated in a particular file or component or in included license
10    documentation.  The authors of MySQL hereby grant you an additional
11    permission to link the program and your derivative works with the
12    separately licensed software that they have included with MySQL.
13 
14    This program is distributed in the hope that it will be useful,
15    but WITHOUT ANY WARRANTY; without even the implied warranty of
16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17    GNU General Public License, version 2.0, for more details.
18 
19    You should have received a copy of the GNU General Public License
20    along with this program; if not, write to the Free Software
21    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA */
22 
23 
24 #include <ndb_global.h>
25 #include <ndb_version.h>
26 
27 #include "angel.hpp"
28 #include "ndbd.hpp"
29 
30 #include <NdbConfig.h>
31 #include <NdbAutoPtr.hpp>
32 #include <portlib/ndb_daemon.h>
33 #include <portlib/NdbSleep.h>
34 #include <portlib/NdbDir.hpp>
35 
36 #include <ConfigRetriever.hpp>
37 
38 #include <EventLogger.hpp>
39 extern EventLogger * g_eventLogger;
40 
41 static void
angel_exit(int code)42 angel_exit(int code)
43 {
44   ndb_daemon_exit(code);
45 }
46 
47 #include "../mgmapi/mgmapi_configuration.hpp"
48 
49 static void
reportShutdown(const ndb_mgm_configuration * config,NodeId nodeid,int error_exit,bool restart,bool nostart,bool initial,Uint32 error,Uint32 signum,Uint32 sphase)50 reportShutdown(const ndb_mgm_configuration* config,
51                NodeId nodeid, int error_exit,
52                bool restart, bool nostart, bool initial,
53                Uint32 error, Uint32 signum, Uint32 sphase)
54 {
55   // Only allow "initial" and "nostart" to be set if "restart" is set
56   assert(restart ||
57          (!restart && !initial && !nostart));
58 
59   Uint32 length, theData[25];
60   EventReport *rep= CAST_PTR(EventReport, &theData[0]);
61   rep->eventType = 0; /* Ensure it's initialised */
62 
63   rep->setNodeId(nodeid);
64   if (restart)
65     theData[1]=1 |
66       (nostart ? 2 : 0) |
67       (initial ? 4 : 0);
68   else
69     theData[1]=0;
70 
71   if (error_exit == 0)
72   {
73     rep->setEventType(NDB_LE_NDBStopCompleted);
74     theData[2]=signum;
75     length=3;
76   } else
77   {
78     rep->setEventType(NDB_LE_NDBStopForced);
79     theData[2]=signum;
80     theData[3]=error;
81     theData[4]=sphase;
82     theData[5]=0; // extra
83     length=6;
84   }
85 
86   // Log event locally
87   g_eventLogger->log(rep->getEventType(), theData, length,
88                      rep->getNodeId(), 0);
89 
90   // Log event to cluster log
91   ndb_mgm_configuration_iterator iter(*config, CFG_SECTION_NODE);
92   for (iter.first(); iter.valid(); iter.next())
93   {
94     Uint32 type;
95     if (iter.get(CFG_TYPE_OF_SECTION, &type) ||
96        type != NODE_TYPE_MGM)
97       continue;
98 
99     Uint32 port;
100     if (iter.get(CFG_MGM_PORT, &port))
101       continue;
102 
103     const char* hostname;
104     if (iter.get(CFG_NODE_HOST, &hostname))
105       continue;
106 
107     BaseString connect_str;
108     connect_str.assfmt("%s:%d", hostname, port);
109 
110 
111     NdbMgmHandle h = ndb_mgm_create_handle();
112     if (h == 0)
113     {
114       g_eventLogger->warning("Unable to report shutdown reason "
115                              "to '%s'(failed to create mgm handle)",
116                              connect_str.c_str());
117       continue;
118     }
119 
120     if (ndb_mgm_set_connectstring(h, connect_str.c_str()) ||
121         ndb_mgm_connect(h, 1, 0, 0) ||
122         ndb_mgm_report_event(h, theData, length))
123     {
124       g_eventLogger->warning("Unable to report shutdown reason "
125                              "to '%s'(error: %s - %s)",
126                              connect_str.c_str(),
127                              ndb_mgm_get_latest_error_msg(h),
128                              ndb_mgm_get_latest_error_desc(h));
129     }
130 
131     ndb_mgm_destroy_handle(&h);
132   }
133 }
134 
135 
136 static void
ignore_signals(void)137 ignore_signals(void)
138 {
139   static const int ignore_list[] = {
140 #ifdef SIGBREAK
141     SIGBREAK,
142 #endif
143 #ifdef SIGHUP
144     SIGHUP,
145 #endif
146     SIGINT,
147 #if defined SIGPWR
148     SIGPWR,
149 #elif defined SIGINFO
150     SIGINFO,
151 #endif
152 #ifdef _WIN32
153     SIGTERM,
154 #else
155     SIGQUIT,
156 #endif
157     SIGTERM,
158 #ifdef SIGTSTP
159     SIGTSTP,
160 #endif
161 #ifdef SIGTTIN
162     SIGTTIN,
163 #endif
164 #ifdef SIGTTOU
165     SIGTTOU,
166 #endif
167     SIGABRT,
168 #ifdef SIGALRM
169     SIGALRM,
170 #endif
171 #ifdef SIGBUS
172     SIGBUS,
173 #endif
174     SIGFPE,
175     SIGILL,
176 #ifdef SIGIO
177     SIGIO,
178 #endif
179 #ifdef SIGPOLL
180     SIGPOLL,
181 #endif
182     SIGSEGV,
183 #ifdef _WIN32
184     SIGINT,
185 #else
186     SIGPIPE,
187 #endif
188 #ifdef SIGTRAP
189     SIGTRAP
190 #endif
191   };
192 
193   for(size_t i = 0; i < sizeof(ignore_list)/sizeof(ignore_list[0]); i++)
194     signal(ignore_list[i], SIG_IGN);
195 }
196 
197 #ifdef _WIN32
198 static inline
pipe(int pipefd[2])199 int pipe(int pipefd[2]){
200   const unsigned int buffer_size = 4096;
201   const int flags = 0;
202   return _pipe(pipefd, buffer_size, flags);
203 }
204 
205 #undef getpid
206 #include <process.h>
207 
208 typedef DWORD pid_t;
209 
210 static const int WNOHANG = 37;
211 
212 static inline
waitpid(pid_t pid,int * stat_loc,int options)213 pid_t waitpid(pid_t pid, int *stat_loc, int options)
214 {
215   /* Only support waitpid(,,WNOHANG) */
216   assert(options == WNOHANG);
217   assert(stat_loc);
218 
219   HANDLE handle = OpenProcess(PROCESS_ALL_ACCESS, FALSE, pid);
220   if (handle == NULL)
221   {
222     g_eventLogger->error("waitpid: Could not open handle for pid %d, "
223                          "error: %d", pid, GetLastError());
224     return -1;
225   }
226 
227   DWORD exit_code;
228   if (!GetExitCodeProcess(handle, &exit_code))
229   {
230     g_eventLogger->error("waitpid: GetExitCodeProcess failed, pid: %d, "
231                          "error: %d", pid, GetLastError());
232     CloseHandle(handle);
233     return -1;
234   }
235   CloseHandle(handle);
236 
237   if (exit_code == STILL_ACTIVE)
238   {
239     /* Still alive */
240     return 0;
241   }
242 
243   *stat_loc = exit_code;
244 
245   return pid;
246 }
247 
248 static inline
WIFEXITED(int status)249 bool WIFEXITED(int status)
250 {
251   return true;
252 }
253 
254 static inline
WEXITSTATUS(int status)255 int WEXITSTATUS(int status)
256 {
257   return status;
258 }
259 
260 static inline
WIFSIGNALED(int status)261 bool WIFSIGNALED(int status)
262 {
263   return false;
264 }
265 
266 static inline
WTERMSIG(int status)267 int WTERMSIG(int status)
268 {
269   return 0;
270 }
271 
272 static int
kill(pid_t pid,int sig)273 kill(pid_t pid, int sig)
274 {
275   int retry_open_event = 10;
276 
277   char shutdown_event_name[32];
278   _snprintf(shutdown_event_name, sizeof(shutdown_event_name),
279             "ndbd_shutdown_%d", pid);
280 
281   /* Open the event to signal */
282   HANDLE shutdown_event;
283   while ((shutdown_event =
284           OpenEvent(EVENT_MODIFY_STATE, FALSE, shutdown_event_name)) == NULL)
285   {
286      /*
287       Check if the process is alive, otherwise there is really
288       no sense to retry the open of the event
289      */
290     DWORD exit_code;
291     HANDLE process = OpenProcess(SYNCHRONIZE | PROCESS_QUERY_INFORMATION,
292                                   FALSE, pid);
293     if (!process)
294     {
295       /* Already died */
296       return -1;
297     }
298 
299     if (!GetExitCodeProcess(process,&exit_code))
300     {
301       g_eventLogger->error("GetExitCodeProcess failed, pid: %d, error: %d",
302                            pid, GetLastError());
303       CloseHandle(process);
304       return -1;
305     }
306     CloseHandle(process);
307 
308     if (exit_code != STILL_ACTIVE)
309     {
310       /* Already died */
311       return -1;
312     }
313 
314     if (retry_open_event--)
315       Sleep(100);
316     else
317     {
318       g_eventLogger->error("Failed to open shutdown_event '%s', error: %d",
319                             shutdown_event_name, GetLastError());
320       return -1;
321     }
322   }
323 
324   if (SetEvent(shutdown_event) == 0)
325   {
326     g_eventLogger->error("Failed to signal shutdown_event '%s', error: %d",
327                          shutdown_event_name, GetLastError());
328   }
329   CloseHandle(shutdown_event);
330   return pid;
331 }
332 #endif
333 
334 #define JAM_FILE_ID 333
335 
336 
337 extern int real_main(int, char**);
338 
339 
340 static
create_argv(const Vector<BaseString> & args)341 char** create_argv(const Vector<BaseString>& args)
342 {
343   char **argv = (char **)malloc(sizeof(char*) * (args.size() + 1));
344   if(argv == NULL)
345     return NULL;
346 
347   for(unsigned i = 0; i < args.size(); i++)
348     argv[i] = strdup(args[i].c_str());
349   argv[args.size()] = NULL;
350   return argv;
351 }
352 
353 
354 static
free_argv(char ** argv)355 void free_argv(char** argv)
356 {
357   char** argp = argv;
358   while(*argp)
359   {
360     free((void*)*argp);
361     argp++;
362   }
363   free((void*)argv);
364 }
365 
366 
367 static pid_t
spawn_process(const char * progname,const Vector<BaseString> & args)368 spawn_process(const char* progname, const Vector<BaseString>& args)
369 {
370 #ifdef _WIN32
371   // Get full path name of this executeble
372   char path[MAX_PATH];
373   DWORD len = GetModuleFileName(NULL, path, sizeof(path));
374   if (len == 0 || len == sizeof(path))
375   {
376     g_eventLogger->warning("spawn_process: Could not extract full path, "
377                            "len: %u, error: %u\n",
378                            len, GetLastError());
379     // Fall through and try with progname as it was supplied
380   }
381   else
382   {
383     progname = path;
384   }
385 #endif
386 
387   char** argv = create_argv(args);
388   if (!argv)
389   {
390     g_eventLogger->error("spawn_process: Failed to create argv, errno: %d",
391                          errno);
392     return -1;
393   }
394 
395 #ifdef _WIN32
396 
397   intptr_t spawn_handle = _spawnv(P_NOWAIT, progname, argv);
398   if (spawn_handle == -1)
399   {
400     g_eventLogger->error("spawn_process: Failed to spawn process, errno: %d",
401                          errno);
402     // Print the _spawnv arguments to aid debugging
403     g_eventLogger->error(" progname: '%s'", progname);
404     char** argp = argv;
405     while(*argp)
406       g_eventLogger->error("argv: '%s'", *argp++);
407 
408     free_argv(argv);
409     return -1;
410   }
411   free_argv(argv);
412 
413   // Convert the handle returned from spawnv_ to a pid
414   DWORD pid = GetProcessId((HANDLE)spawn_handle);
415   if (pid == 0)
416   {
417     g_eventLogger->error("spawn_process: Failed to convert handle %d "
418                          "to pid, error: %d", spawn_handle, GetLastError());
419     CloseHandle((HANDLE)spawn_handle);
420     return -1;
421   }
422   CloseHandle((HANDLE)spawn_handle);
423   return pid;
424 #else
425   pid_t pid = fork();
426   if (pid == -1)
427   {
428     g_eventLogger->error("Failed to fork, errno: %d", errno);
429     free_argv(argv);
430     return -1;
431   }
432 
433   if (pid)
434   {
435     free_argv(argv);
436     // Parent
437     return pid;
438   }
439 
440   // Count number of arguments
441   int argc = 0;
442   while(argv[argc])
443     argc++;
444 
445   // Calling 'main' to start program from beginning
446   // without loading (possibly new version) from disk
447   (void)real_main(argc, argv);
448   assert(false); // main should never return
449   exit(1);
450   return -1; // Never reached
451 #endif
452 }
453 
454 /*
455   retry failed spawn after sleep until fork suceeds or
456   max number of retries occurs
457 */
458 
459 static pid_t
retry_spawn_process(const char * progname,const Vector<BaseString> & args)460 retry_spawn_process(const char* progname, const Vector<BaseString>& args)
461 {
462   const unsigned max_retries = 10;
463   unsigned retry_counter = 0;
464   while(true)
465   {
466     pid_t pid = spawn_process(progname, args);
467     if (pid == -1)
468     {
469       if (retry_counter++ == max_retries)
470       {
471         g_eventLogger->error("Angel failed to spawn %d times, giving up",
472                              retry_counter);
473         angel_exit(1);
474       }
475 
476       g_eventLogger->warning("Angel failed to spawn, sleep and retry");
477 
478       NdbSleep_SecSleep(1);
479       continue;
480     }
481     return pid;
482   }
483 }
484 
485 static Uint32 stop_on_error;
486 static Uint32 config_max_start_fail_retries;
487 static Uint32 config_restart_delay_secs;
488 
489 
490 /*
491   Extract the config parameters that concerns angel
492 */
493 
494 static bool
configure(const ndb_mgm_configuration * conf,NodeId nodeid)495 configure(const ndb_mgm_configuration* conf, NodeId nodeid)
496 {
497   Uint32 generation = 0;
498   ndb_mgm_configuration_iterator sys_iter(*conf, CFG_SECTION_SYSTEM);
499   if (sys_iter.get(CFG_SYS_CONFIG_GENERATION, &generation))
500   {
501     g_eventLogger->warning("Configuration didn't contain generation "
502                            "(likely old ndb_mgmd");
503   }
504   g_eventLogger->debug("Using configuration with generation %u", generation);
505 
506   ndb_mgm_configuration_iterator iter(*conf, CFG_SECTION_NODE);
507   if (iter.find(CFG_NODE_ID, nodeid))
508   {
509     g_eventLogger->error("Invalid configuration fetched, could not "
510                          "find own node id %d", nodeid);
511     return false;
512   }
513 
514   if (iter.get(CFG_DB_STOP_ON_ERROR, &stop_on_error))
515   {
516     g_eventLogger->error("Invalid configuration fetched, could not "
517                          "find StopOnError");
518     return false;
519   }
520   g_eventLogger->debug("Using StopOnError: %u", stop_on_error);
521 
522   if (iter.get(CFG_DB_MAX_START_FAIL, &config_max_start_fail_retries))
523   {
524     /* Old Management node, use default value */
525     config_max_start_fail_retries = 3;
526   }
527 
528   if (iter.get(CFG_DB_START_FAIL_DELAY_SECS, &config_restart_delay_secs))
529   {
530     /* Old Management node, use default value */
531     config_restart_delay_secs = 0;
532   }
533 
534   const char * datadir;
535   if (iter.get(CFG_NODE_DATADIR, &datadir))
536   {
537     g_eventLogger->error("Invalid configuration fetched, could not "
538                          "find DataDir");
539     return false;
540   }
541   g_eventLogger->debug("Using DataDir: %s", datadir);
542 
543   NdbConfig_SetPath(datadir);
544 
545   if (NdbDir::chdir(NdbConfig_get_path(NULL)) != 0)
546   {
547     g_eventLogger->warning("Cannot change directory to '%s', error: %d",
548                            NdbConfig_get_path(NULL), errno);
549     // Ignore error
550   }
551 
552   return true;
553 }
554 
555 bool stop_child = false;
556 
557 void
angel_run(const char * progname,const Vector<BaseString> & original_args,const char * connect_str,int force_nodeid,const char * bind_address,bool initial,bool no_start,bool daemon,int connnect_retries,int connect_delay)558 angel_run(const char* progname,
559           const Vector<BaseString>& original_args,
560           const char* connect_str,
561           int force_nodeid,
562           const char* bind_address,
563           bool initial,
564           bool no_start,
565           bool daemon,
566           int connnect_retries,
567           int connect_delay)
568 {
569   ConfigRetriever retriever(connect_str,
570                             force_nodeid,
571                             NDB_VERSION,
572                             NDB_MGM_NODE_TYPE_NDB,
573                             bind_address);
574   if (retriever.hasError())
575   {
576     g_eventLogger->error("Could not initialize connection to management "
577                          "server, error: '%s'", retriever.getErrorString());
578     angel_exit(1);
579   }
580 
581   const int verbose = 1;
582   if (retriever.do_connect(connnect_retries, connect_delay, verbose) != 0)
583   {
584     g_eventLogger->error("Could not connect to management server, "
585                          "error: '%s'", retriever.getErrorString());
586     angel_exit(1);
587   }
588   g_eventLogger->info("Angel connected to '%s:%d'",
589                       retriever.get_mgmd_host(),
590                       retriever.get_mgmd_port());
591 
592   const int alloc_retries = 10;
593   const int alloc_delay = 3;
594   const Uint32 nodeid = retriever.allocNodeId(alloc_retries, alloc_delay);
595   if (nodeid == 0)
596   {
597     g_eventLogger->error("Failed to allocate nodeid, error: '%s'",
598                          retriever.getErrorString());
599     angel_exit(1);
600   }
601   g_eventLogger->info("Angel allocated nodeid: %u", nodeid);
602 
603   ndb_mgm_configuration * config = retriever.getConfig(nodeid);
604   NdbAutoPtr<ndb_mgm_configuration> config_autoptr(config);
605   if (config == 0)
606   {
607     g_eventLogger->error("Could not fetch configuration/invalid "
608                          "configuration, error: '%s'",
609                          retriever.getErrorString());
610     angel_exit(1);
611   }
612 
613   if (!configure(config, nodeid))
614   {
615     // Failed to configure, error already printed
616     angel_exit(1);
617   }
618 
619   if (daemon)
620   {
621     // Become a daemon
622     char *lockfile = NdbConfig_PidFileName(nodeid);
623     char *logfile = NdbConfig_StdoutFileName(nodeid);
624     NdbAutoPtr<char> tmp_aptr1(lockfile), tmp_aptr2(logfile);
625 
626     if (ndb_daemonize(lockfile, logfile) != 0)
627     {
628       g_eventLogger->error("Couldn't start as daemon, error: '%s'",
629                            ndb_daemon_error);
630       angel_exit(1);
631     }
632   }
633 
634   // Counter for consecutive failed startups
635   Uint32 failed_startups_counter = 0;
636   while (true)
637   {
638 
639     // Create pipe where ndbd process will report extra shutdown status
640     int fds[2];
641     if (pipe(fds))
642     {
643       g_eventLogger->error("Failed to create pipe, errno: %d (%s)",
644                            errno, strerror(errno));
645       angel_exit(1);
646     }
647 
648     FILE *child_info_r;
649     if (!(child_info_r = fdopen(fds[0], "r")))
650     {
651       g_eventLogger->error("Failed to open stream for pipe, errno: %d (%s)",
652                            errno, strerror(errno));
653       angel_exit(1);
654     }
655 
656     // Build the args used to start ndbd by appending
657     // the arguments that may have changed at the end
658     // of original argument list
659     BaseString one_arg;
660     Vector<BaseString> args;
661     args = original_args;
662 
663     // Pass fd number of the pipe which ndbd should use
664     // for sending extra status to angel
665     one_arg.assfmt("--report-fd=%d", fds[1]);
666     args.push_back(one_arg);
667 
668     // The nodeid which has been allocated by angel
669     one_arg.assfmt("--allocated-nodeid=%d", nodeid);
670     args.push_back(one_arg);
671 
672     one_arg.assfmt("--initial=%d", initial);
673     args.push_back(one_arg);
674 
675     one_arg.assfmt("--nostart=%d", no_start);
676     args.push_back(one_arg);
677 
678     pid_t child = retry_spawn_process(progname, args);
679     if (child <= 0)
680     {
681       // safety, retry_spawn_process returns valid child or give up
682       g_eventLogger->error("retry_spawn_process, child: %d", child);
683       angel_exit(1);
684     }
685 
686     /**
687      * Parent
688      */
689     g_eventLogger->info("Angel pid: %d started child: %d",
690                         getpid(), child);
691 
692     ignore_signals();
693 
694     int status=0, error_exit=0;
695     while(true)
696     {
697       pid_t ret_pid = waitpid(child, &status, WNOHANG);
698       if (ret_pid == child)
699       {
700         g_eventLogger->debug("Angel got child %d", child);
701         break;
702       }
703       if (ret_pid > 0)
704       {
705         g_eventLogger->warning("Angel got unexpected pid %d "
706                                "when waiting for %d",
707                                ret_pid, child);
708       }
709 
710       if (stop_child)
711       {
712         g_eventLogger->info("Angel shutting down ndbd with pid %d", child);
713         kill(child, SIGINT);
714        }
715       NdbSleep_MilliSleep(100);
716     }
717 
718     // Close the write end of pipe
719     close(fds[1]);
720 
721     // Read info from the child's pipe
722     char buf[128];
723     Uint32 child_error = 0, child_signal = 0, child_sphase = 0;
724     while (fgets(buf, sizeof (buf), child_info_r))
725     {
726       int value;
727       if (sscanf(buf, "error=%d\n", &value) == 1)
728         child_error = value;
729       else if (sscanf(buf, "signal=%d\n", &value) == 1)
730         child_signal = value;
731       else if (sscanf(buf, "sphase=%d\n", &value) == 1)
732         child_sphase = value;
733       else if (strcmp(buf, "\n") != 0)
734         fprintf(stderr, "unknown info from child: '%s'\n", buf);
735     }
736     g_eventLogger->debug("error: %u, signal: %u, sphase: %u",
737                          child_error, child_signal, child_sphase);
738     // Close read end of pipe in parent
739     fclose(child_info_r);
740 
741     if (WIFEXITED(status))
742     {
743       switch (WEXITSTATUS(status)) {
744       case NRT_Default:
745         g_eventLogger->info("Angel shutting down");
746         reportShutdown(config, nodeid, 0, 0, false, false,
747                        child_error, child_signal, child_sphase);
748         angel_exit(0);
749         break;
750       case NRT_NoStart_Restart:
751         initial = false;
752         no_start = true;
753         break;
754       case NRT_NoStart_InitialStart:
755         initial = true;
756         no_start = true;
757         break;
758       case NRT_DoStart_InitialStart:
759         initial = true;
760         no_start = false;
761         break;
762       default:
763         error_exit=1;
764         if (stop_on_error)
765         {
766           /**
767            * Error shutdown && stopOnError()
768            */
769           reportShutdown(config, nodeid,
770                          error_exit, 0, false, false,
771                          child_error, child_signal, child_sphase);
772           angel_exit(0);
773         }
774         // Fall-through
775       case NRT_DoStart_Restart:
776         initial = false;
777         no_start = false;
778         break;
779       }
780     } else
781     {
782       error_exit=1;
783       if (WIFSIGNALED(status))
784       {
785         child_signal = WTERMSIG(status);
786       }
787       else
788       {
789         child_signal = 127;
790         g_eventLogger->info("Unknown exit reason. Stopped.");
791       }
792       if (stop_on_error)
793       {
794         /**
795          * Error shutdown && stopOnError()
796          */
797         reportShutdown(config, nodeid,
798                        error_exit, 0, false, false,
799                        child_error, child_signal, child_sphase);
800         angel_exit(0);
801       }
802     }
803 
804     // Check startup failure
805     const Uint32 STARTUP_FAILURE_SPHASE = 6;
806     Uint32 restart_delay_secs = 0;
807     if (error_exit && // Only check startup failure if ndbd exited uncontrolled
808         child_sphase <= STARTUP_FAILURE_SPHASE)
809     {
810       if (++failed_startups_counter >= config_max_start_fail_retries)
811       {
812         g_eventLogger->alert("Angel detected too many startup failures(%d), "
813                              "not restarting again", failed_startups_counter);
814         reportShutdown(config, nodeid,
815                        error_exit, 0, false, false,
816                        child_error, child_signal, child_sphase);
817         angel_exit(0);
818       }
819       g_eventLogger->info("Angel detected startup failure, count: %u",
820                           failed_startups_counter);
821 
822       restart_delay_secs = config_restart_delay_secs;
823     }
824     else
825     {
826       // Reset the counter for consecutive failed startups
827       failed_startups_counter = 0;
828     }
829 
830     reportShutdown(config, nodeid,
831                    error_exit, 1,
832                    no_start,
833                    initial,
834                    child_error, child_signal, child_sphase);
835     g_eventLogger->info("Ndb has terminated (pid %d) restarting", child);
836 
837     g_eventLogger->debug("Angel reconnecting to management server");
838     (void)retriever.disconnect();
839 
840     if (restart_delay_secs > 0)
841     {
842       g_eventLogger->info("Delaying Ndb restart for %u seconds.",
843                           restart_delay_secs);
844       NdbSleep_SecSleep(restart_delay_secs);
845     };
846 
847     const int verbose = 1;
848     if (retriever.do_connect(connnect_retries, connect_delay, verbose) != 0)
849     {
850       g_eventLogger->error("Could not connect to management server, "
851                            "error: '%s'", retriever.getErrorString());
852       angel_exit(1);
853     }
854     g_eventLogger->info("Angel reconnected to '%s:%d'",
855                         retriever.get_mgmd_host(),
856                         retriever.get_mgmd_port());
857 
858     // Tell retriver to allocate the same nodeid again
859     retriever.setNodeId(nodeid);
860 
861     g_eventLogger->debug("Angel reallocating nodeid %d", nodeid);
862     const int alloc_retries = 20;
863     const int alloc_delay = 3;
864     const Uint32 realloced = retriever.allocNodeId(alloc_retries, alloc_delay);
865     if (realloced == 0)
866     {
867       g_eventLogger->error("Angel failed to allocate nodeid, error: '%s'",
868                            retriever.getErrorString());
869       angel_exit(1);
870     }
871     if (realloced != nodeid)
872     {
873       g_eventLogger->error("Angel failed to reallocate nodeid %d, got %d",
874                            nodeid, realloced);
875       angel_exit(1);
876     }
877     g_eventLogger->info("Angel reallocated nodeid: %u", nodeid);
878 
879   }
880 
881   abort(); // Never reached
882 }
883 
884 
885 /*
886   Order angel to shutdown it's ndbd
887 */
angel_stop(void)888 void angel_stop(void)
889 {
890   stop_child = true;
891 }
892