1 /*
2    Copyright (c) 2003, 2021, Oracle and/or its affiliates.
3 
4    This program is free software; you can redistribute it and/or modify
5    it under the terms of the GNU General Public License, version 2.0,
6    as published by the Free Software Foundation.
7 
8    This program is also distributed with certain software (including
9    but not limited to OpenSSL) that is licensed under separate terms,
10    as designated in a particular file or component or in included license
11    documentation.  The authors of MySQL hereby grant you an additional
12    permission to link the program and your derivative works with the
13    separately licensed software that they have included with MySQL.
14 
15    This program is distributed in the hope that it will be useful,
16    but WITHOUT ANY WARRANTY; without even the implied warranty of
17    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18    GNU General Public License, version 2.0, for more details.
19 
20    You should have received a copy of the GNU General Public License
21    along with this program; if not, write to the Free Software
22    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA
23 */
24 
25 #include <ndb_global.h>
26 
27 #include "MgmtSrvr.hpp"
28 #include "ndb_mgmd_error.h"
29 #include "Services.hpp"
30 #include "ConfigManager.hpp"
31 #include "Defragger.hpp"
32 
33 #include <NdbOut.hpp>
34 #include <NdbApiSignal.hpp>
35 #include <kernel_types.h>
36 #include <GlobalSignalNumbers.h>
37 #include <signaldata/TestOrd.hpp>
38 #include <signaldata/TamperOrd.hpp>
39 #include <signaldata/StartOrd.hpp>
40 #include <signaldata/ApiVersion.hpp>
41 #include <signaldata/ResumeReq.hpp>
42 #include <signaldata/SetLogLevelOrd.hpp>
43 #include <signaldata/EventSubscribeReq.hpp>
44 #include <signaldata/EventReport.hpp>
45 #include <signaldata/DumpStateOrd.hpp>
46 #include <signaldata/BackupSignalData.hpp>
47 #include <signaldata/NFCompleteRep.hpp>
48 #include <signaldata/NodeFailRep.hpp>
49 #include <signaldata/AllocNodeId.hpp>
50 #include <signaldata/SchemaTrans.hpp>
51 #include <signaldata/CreateNodegroup.hpp>
52 #include <signaldata/DropNodegroup.hpp>
53 #include <signaldata/Sync.hpp>
54 #include <signaldata/GetConfig.hpp>
55 #include <NdbSleep.h>
56 #include <portlib/NdbDir.hpp>
57 #include <EventLogger.hpp>
58 #include <logger/FileLogHandler.hpp>
59 #include <logger/ConsoleLogHandler.hpp>
60 #include <logger/SysLogHandler.hpp>
61 #include <DebuggerNames.hpp>
62 #include <ndb_version.h>
63 
64 #include <SocketServer.hpp>
65 #include <NdbConfig.h>
66 
67 #include <NdbAutoPtr.hpp>
68 #include <NdbDir.hpp>
69 #include <ndberror.h>
70 
71 #include <mgmapi.h>
72 #include <mgmapi_configuration.hpp>
73 #include <mgmapi_config_parameters.h>
74 
75 #include <SignalSender.hpp>
76 
77 int g_errorInsert = 0;
78 #define ERROR_INSERTED(x) (g_errorInsert == x)
79 
80 #define INIT_SIGNAL_SENDER(ss,nodeId) \
81   SignalSender ss(theFacade); \
82   ss.lock(); /* lock will be released on exit */ \
83   {\
84     int result = okToSendTo(nodeId, true);\
85     if (result != 0) {\
86       return result;\
87     }\
88   }
89 
90 extern "C" my_bool opt_core;
91 
92 void *
logLevelThread_C(void * m)93 MgmtSrvr::logLevelThread_C(void* m)
94 {
95   MgmtSrvr *mgm = (MgmtSrvr*)m;
96   mgm->logLevelThreadRun();
97   return 0;
98 }
99 
100 extern EventLogger * g_eventLogger;
101 
102 #ifdef NOT_USED
103 static NdbOut&
operator <<(NdbOut & out,const LogLevel & ll)104 operator<<(NdbOut& out, const LogLevel & ll)
105 {
106   out << "[LogLevel: ";
107   for(size_t i = 0; i<LogLevel::LOGLEVEL_CATEGORIES; i++)
108     out << ll.getLogLevel((LogLevel::EventCategory)i) << " ";
109   out << "]";
110   return out;
111 }
112 #endif
113 
114 void
logLevelThreadRun()115 MgmtSrvr::logLevelThreadRun()
116 {
117   while (!_isStopThread)
118   {
119     Vector<NodeId> failed_started_nodes;
120     Vector<EventSubscribeReq> failed_log_level_requests;
121 
122     /**
123      * Handle started nodes
124      */
125     m_started_nodes.lock();
126     if (m_started_nodes.size() > 0)
127     {
128       // calculate max log level
129       EventSubscribeReq req;
130       {
131         LogLevel tmp;
132         m_event_listner.lock();
133         for(int i = m_event_listner.m_clients.size() - 1; i >= 0; i--)
134           tmp.set_max(m_event_listner[i].m_logLevel);
135         m_event_listner.unlock();
136         req.assign(tmp);
137       }
138       req.blockRef = _ownReference;
139       while (m_started_nodes.size() > 0)
140       {
141         Uint32 node = m_started_nodes[0];
142         m_started_nodes.erase(0, false);
143         m_started_nodes.unlock();
144 
145         if (setEventReportingLevelImpl(node, req))
146         {
147           failed_started_nodes.push_back(node);
148         }
149         else
150         {
151           SetLogLevelOrd ord;
152           ord.assign(m_nodeLogLevel[node]);
153           setNodeLogLevelImpl(node, ord);
154         }
155         m_started_nodes.lock();
156       }
157     }
158     m_started_nodes.unlock();
159 
160     m_log_level_requests.lock();
161     while (m_log_level_requests.size() > 0)
162     {
163       EventSubscribeReq req = m_log_level_requests[0];
164       m_log_level_requests.erase(0, false);
165       m_log_level_requests.unlock();
166 
167       if(req.blockRef == 0)
168       {
169         req.blockRef = _ownReference;
170         if (setEventReportingLevelImpl(0, req))
171         {
172           failed_log_level_requests.push_back(req);
173         }
174       }
175       else
176       {
177         SetLogLevelOrd ord;
178         ord.assign(req);
179         if (setNodeLogLevelImpl(req.blockRef, ord))
180         {
181           failed_log_level_requests.push_back(req);
182         }
183       }
184       m_log_level_requests.lock();
185     }
186     m_log_level_requests.unlock();
187 
188     if(!ERROR_INSERTED(10000))
189       m_event_listner.check_listeners();
190 
191     Uint32 sleeptime = _logLevelThreadSleep;
192     if (failed_started_nodes.size())
193     {
194       m_started_nodes.lock();
195       for (Uint32 i = 0; i<failed_started_nodes.size(); i++)
196         m_started_nodes.push_back(failed_started_nodes[i], false);
197       m_started_nodes.unlock();
198       failed_started_nodes.clear();
199       sleeptime = 100;
200     }
201 
202     if (failed_log_level_requests.size())
203     {
204       m_log_level_requests.lock();
205       for (Uint32 i = 0; i<failed_log_level_requests.size(); i++)
206         m_log_level_requests.push_back(failed_log_level_requests[i], false);
207       m_log_level_requests.unlock();
208       failed_log_level_requests.clear();
209       sleeptime = 100;
210     }
211 
212     NdbSleep_MilliSleep(sleeptime);
213   }
214 }
215 
216 
217 static int
translateStopRef(Uint32 errCode)218 translateStopRef(Uint32 errCode)
219 {
220   switch(errCode){
221   case StopRef::NodeShutdownInProgress:
222     return NODE_SHUTDOWN_IN_PROGESS;
223     break;
224   case StopRef::SystemShutdownInProgress:
225     return SYSTEM_SHUTDOWN_IN_PROGRESS;
226     break;
227   case StopRef::NodeShutdownWouldCauseSystemCrash:
228     return NODE_SHUTDOWN_WOULD_CAUSE_SYSTEM_CRASH;
229     break;
230   case StopRef::UnsupportedNodeShutdown:
231     return UNSUPPORTED_NODE_SHUTDOWN;
232     break;
233   }
234   return 4999;
235 }
236 
237 
MgmtSrvr(const MgmtOpts & opts)238 MgmtSrvr::MgmtSrvr(const MgmtOpts& opts) :
239   m_opts(opts),
240   _blockNumber(-1),
241   _ownNodeId(0),
242   m_port(0),
243   m_local_config(NULL),
244   _ownReference(0),
245   m_config_manager(NULL),
246   m_need_restart(false),
247   theFacade(NULL),
248   _isStopThread(false),
249   _logLevelThreadSleep(500),
250   m_event_listner(this),
251   m_master_node(0),
252   _logLevelThread(NULL),
253   m_version_string(ndbGetOwnVersionString())
254 {
255   DBUG_ENTER("MgmtSrvr::MgmtSrvr");
256 
257   m_local_config_mutex= NdbMutex_Create();
258   m_reserved_nodes_mutex= NdbMutex_Create();
259   if (!m_local_config_mutex || !m_reserved_nodes_mutex)
260   {
261     g_eventLogger->error("Failed to create MgmtSrvr mutexes");
262     require(false);
263   }
264 
265   /* Init node arrays */
266   for(Uint32 i = 0; i<MAX_NODES; i++) {
267     nodeTypes[i] = (enum ndb_mgm_node_type)-1;
268     clear_connect_address_cache(i);
269   }
270 
271   /* Setup clusterlog as client[0] in m_event_listner */
272   {
273     Ndb_mgmd_event_service::Event_listener se;
274     my_socket_invalidate(&(se.m_socket));
275     for(size_t t = 0; t<LogLevel::LOGLEVEL_CATEGORIES; t++){
276       se.m_logLevel.setLogLevel((LogLevel::EventCategory)t, 7);
277     }
278     se.m_logLevel.setLogLevel(LogLevel::llError, 15);
279     se.m_logLevel.setLogLevel(LogLevel::llConnection, 8);
280     se.m_logLevel.setLogLevel(LogLevel::llBackup, 15);
281     m_event_listner.m_clients.push_back(se);
282     m_event_listner.m_logLevel = se.m_logLevel;
283   }
284 
285   DBUG_VOID_RETURN;
286 }
287 
288 
289 /*
290   check_configdir
291 
292   Make sure configdir exist and try to create it if not
293 
294 */
295 
296 const char*
check_configdir() const297 MgmtSrvr::check_configdir() const
298 {
299   if (m_opts.configdir &&
300       strcmp(m_opts.configdir, MYSQLCLUSTERDIR) != 0)
301   {
302     // Specified on commmand line
303     if (access(m_opts.configdir, F_OK))
304     {
305       g_eventLogger->error("Directory '%s' specified with --configdir " \
306                            "does not exist. Either create it or pass " \
307                            "the path to an already existing directory.",
308                            m_opts.configdir);
309       return NULL;
310     }
311     return m_opts.configdir;
312   }
313   else
314   {
315     // Compiled in path MYSQLCLUSTERDIR
316     if (access(MYSQLCLUSTERDIR, F_OK))
317     {
318       g_eventLogger->info("The default config directory '%s' "            \
319                           "does not exist. Trying to create it...",
320                           MYSQLCLUSTERDIR);
321 
322       if (!NdbDir::create(MYSQLCLUSTERDIR) ||
323           access(MYSQLCLUSTERDIR, F_OK))
324       {
325         g_eventLogger->error("Could not create directory '%s'. "        \
326                              "Either create it manually or "            \
327                              "specify a different directory with "      \
328                              "--configdir=<path>",
329                              MYSQLCLUSTERDIR);
330         return NULL;
331       }
332 
333       g_eventLogger->info("Sucessfully created config directory");
334     }
335     return MYSQLCLUSTERDIR;
336   }
337 }
338 
339 
340 bool
init()341 MgmtSrvr::init()
342 {
343   DBUG_ENTER("MgmtSrvr::init");
344 
345   const char* configdir;
346 
347   if (!m_opts.config_cache)
348   {
349     g_eventLogger->info("Skipping check of config directory since "
350                         "config cache is disabled.");
351     configdir = NULL;
352   }
353   else
354   {
355     if (!(configdir= check_configdir()))
356       DBUG_RETURN(false);
357   }
358 
359   if (!(m_config_manager= new ConfigManager(m_opts, configdir)))
360   {
361     g_eventLogger->error("Failed to create ConfigManager");
362     DBUG_RETURN(false);
363   }
364 
365   if (m_config_manager->add_config_change_subscriber(this) < 0)
366   {
367     g_eventLogger->error("Failed to add MgmtSrvr as config change subscriber");
368     DBUG_RETURN(false);
369   }
370 
371   if (!m_config_manager->init())
372   {
373     DBUG_RETURN(false);
374   }
375 
376   /* 'config_changed' should have been called from 'init' */
377   require(m_local_config != 0);
378 
379   if (m_opts.print_full_config)
380   {
381     print_config();
382     DBUG_RETURN(false);
383   }
384 
385   assert(_ownNodeId);
386 
387   DBUG_RETURN(true);
388 }
389 
390 
391 bool
start_transporter(const Config * config)392 MgmtSrvr::start_transporter(const Config* config)
393 {
394   DBUG_ENTER("MgmtSrvr::start_transporter");
395 
396   theFacade= new TransporterFacade(0);
397   if (theFacade == 0)
398   {
399     g_eventLogger->error("Could not create TransporterFacade.");
400     DBUG_RETURN(false);
401   }
402 
403   assert(_blockNumber == -1); // Blocknumber shouldn't been allocated yet
404 
405   /*
406     Register ourself at TransporterFacade to be able to receive signals
407     and to be notified when a database process has died.
408   */
409   Uint32 res;
410   if ((res = open(theFacade)) == 0)
411   {
412     g_eventLogger->error("Failed to open block in TransporterFacade");
413     theFacade->stop_instance();
414     delete theFacade;
415     theFacade = 0;
416     DBUG_RETURN(false);
417   }
418   _blockNumber = refToBlock(res);
419 
420   /**
421    * Need to call ->open() prior to actually starting TF
422    */
423   m_config_manager->set_facade(theFacade);
424 
425   if (theFacade->start_instance(_ownNodeId,
426                                 config->m_configValues) < 0)
427   {
428     g_eventLogger->error("Failed to start transporter");
429     delete theFacade;
430     theFacade = 0;
431     DBUG_RETURN(false);
432   }
433 
434   _ownReference = numberToRef(_blockNumber, _ownNodeId);
435 
436   /*
437     set api reg req frequency quite high:
438 
439     100 ms interval to make sure we have fairly up-to-date
440     info from the nodes.  This to make sure that this info
441     is not dependent on heartbeat settings in the
442     configuration
443   */
444   theFacade->ext_set_max_api_reg_req_interval(100);
445 
446   DBUG_RETURN(true);
447 }
448 
449 
450 bool
start_mgm_service(const Config * config)451 MgmtSrvr::start_mgm_service(const Config* config)
452 {
453   DBUG_ENTER("MgmtSrvr::start_mgm_service");
454 
455   assert(m_port == 0);
456   {
457     // Find the portnumber to use for mgm service
458     ConfigIter iter(config, CFG_SECTION_NODE);
459 
460     if(iter.find(CFG_NODE_ID, _ownNodeId) != 0){
461       g_eventLogger->error("Could not find node %d in config", _ownNodeId);
462       DBUG_RETURN(false);
463     }
464 
465     unsigned type;
466     if(iter.get(CFG_TYPE_OF_SECTION, &type) != 0 ||
467        type != NODE_TYPE_MGM){
468       g_eventLogger->error("Node %d is not defined as management server",
469                            _ownNodeId);
470       DBUG_RETURN(false);
471     }
472 
473     if(iter.get(CFG_MGM_PORT, &m_port) != 0){
474       g_eventLogger->error("PortNumber not defined for node %d", _ownNodeId);
475       DBUG_RETURN(false);
476     }
477   }
478 
479   unsigned short port= m_port;
480   DBUG_PRINT("info", ("Using port %d", port));
481   if (port == 0)
482   {
483     g_eventLogger->error("Could not find out which port to use"\
484                         " for management service");
485     DBUG_RETURN(false);
486   }
487 
488   {
489     int count= 5; // no of retries for tryBind
490     while(!m_socket_server.tryBind(port, m_opts.bind_address))
491     {
492       if (--count > 0)
493       {
494 	NdbSleep_SecSleep(1);
495 	continue;
496       }
497       g_eventLogger->error("Unable to bind management service port: %s:%d!\n"
498                            "Please check if the port is already used,\n"
499                            "(perhaps a ndb_mgmd is already running),\n"
500                            "and if you are executing on the correct computer",
501                            (m_opts.bind_address ? m_opts.bind_address : "*"),
502                            port);
503       DBUG_RETURN(false);
504     }
505   }
506 
507   {
508     MgmApiService * mapi = new MgmApiService(*this);
509     if (mapi == NULL)
510     {
511       g_eventLogger->error("Could not allocate MgmApiService");
512       DBUG_RETURN(false);
513     }
514 
515     if(!m_socket_server.setup(mapi, &port, m_opts.bind_address))
516     {
517       delete mapi; // Will be deleted by SocketServer in all other cases
518       g_eventLogger->error("Unable to setup management service port: %s:%d!\n"
519                            "Please check if the port is already used,\n"
520                            "(perhaps a ndb_mgmd is already running),\n"
521                            "and if you are executing on the correct computer",
522                            (m_opts.bind_address ? m_opts.bind_address : "*"),
523                            port);
524       DBUG_RETURN(false);
525     }
526 
527     if (port != m_port)
528     {
529       g_eventLogger->error("Couldn't start management service on the "\
530                            "requested port: %d. Got port: %d instead",
531                           m_port, port);
532       DBUG_RETURN(false);
533     }
534   }
535 
536   m_socket_server.startServer();
537 
538   g_eventLogger->info("Id: %d, Command port: %s:%d",
539                       _ownNodeId,
540                       m_opts.bind_address ? m_opts.bind_address : "*",
541                       port);
542   DBUG_RETURN(true);
543 }
544 
545 
546 bool
start()547 MgmtSrvr::start()
548 {
549   DBUG_ENTER("MgmtSrvr::start");
550 
551   /* Start transporter */
552   if(!start_transporter(m_local_config))
553   {
554     g_eventLogger->error("Failed to start transporter!");
555     DBUG_RETURN(false);
556   }
557 
558   /* Start mgm service */
559   if (!start_mgm_service(m_local_config))
560   {
561     g_eventLogger->error("Failed to start mangement service!");
562     DBUG_RETURN(false);
563   }
564 
565   /* Use local MGM port for TransporterRegistry */
566   if(!connect_to_self())
567   {
568     g_eventLogger->error("Failed to connect to ourself!");
569     DBUG_RETURN(false);
570   }
571 
572   /* Start config manager */
573   if (!m_config_manager->start())
574   {
575     g_eventLogger->error("Failed to start ConfigManager");
576     DBUG_RETURN(false);
577   }
578 
579   /* Loglevel thread */
580   assert(_isStopThread == false);
581   _logLevelThread = NdbThread_Create(logLevelThread_C,
582 				     (void**)this,
583                                      0, // default stack size
584 				     "MgmtSrvr_Loglevel",
585 				     NDB_THREAD_PRIO_LOW);
586 
587   DBUG_RETURN(true);
588 }
589 
590 
591 void
configure_eventlogger(const BaseString & logdestination) const592 MgmtSrvr::configure_eventlogger(const BaseString& logdestination) const
593 {
594   // Close old log handlers before creating the new
595   g_eventLogger->close();
596 
597   Vector<BaseString> logdestinations;
598   logdestination.split(logdestinations, ";");
599 
600   for(unsigned i = 0; i < logdestinations.size(); i++)
601   {
602     // Extract type(everything left of colon)
603     Vector<BaseString> v_type_params;
604     logdestinations[i].split(v_type_params, ":", 2);
605     BaseString type(v_type_params[0]);
606 
607     // Extract params(everything right of colon)
608     BaseString params;
609     if(v_type_params.size() >= 2)
610       params = v_type_params[1];
611 
612     LogHandler *handler = NULL;
613     if(type == "FILE")
614     {
615       char *default_file_name= NdbConfig_ClusterLogFileName(_ownNodeId);
616       handler = new FileLogHandler(default_file_name);
617       free(default_file_name);
618     }
619     else if(type == "CONSOLE")
620     {
621       handler = new ConsoleLogHandler();
622     }
623 #ifndef _WIN32
624     else if(type == "SYSLOG")
625     {
626       handler = new SysLogHandler();
627     }
628 #endif
629     if(handler == NULL)
630     {
631       ndbout_c("INTERNAL ERROR: Could not create log handler for: '%s'",
632                logdestinations[i].c_str());
633       continue;
634     }
635 
636     if(!handler->parseParams(params))
637     {
638       ndbout_c("Failed to parse parameters for log handler: '%s', error: %d '%s'",
639                logdestinations[i].c_str(), handler->getErrorCode(), handler->getErrorStr());
640       delete handler;
641       continue;
642     }
643 
644     if (!g_eventLogger->addHandler(handler))
645     {
646       ndbout_c("INTERNAL ERROR: Could not add %s log handler", handler->handler_type());
647       g_eventLogger->error("INTERNAL ERROR: Could not add %s log handler",
648                            handler->handler_type());
649       delete handler;
650       continue;
651     }
652   }
653 }
654 
655 
656 void
setClusterLog(const Config * config)657 MgmtSrvr::setClusterLog(const Config* config)
658 {
659   assert(_ownNodeId);
660 
661   ConfigIter iter(config, CFG_SECTION_NODE);
662   require(iter.find(CFG_NODE_ID, _ownNodeId) == 0);
663 
664   // Update DataDir from config
665   const char *datadir;
666   require(iter.get(CFG_NODE_DATADIR, &datadir) == 0);
667   NdbConfig_SetPath(datadir);
668 
669   if (NdbDir::chdir(NdbConfig_get_path(NULL)) != 0)
670   {
671     g_eventLogger->warning("Cannot change directory to '%s', error: %d",
672                            NdbConfig_get_path(NULL), errno);
673     // Ignore error
674   }
675 
676   // Get log destination from config
677   BaseString logdest;
678   const char *value;
679   if(iter.get(CFG_LOG_DESTINATION, &value) == 0){
680     logdest.assign(value);
681   }
682 
683   bool logdest_configured = true;
684   if(logdest.length() == 0 || logdest == "") {
685     // No LogDestination set, use default settings
686     char *clusterLog= NdbConfig_ClusterLogFileName(_ownNodeId);
687     logdest.assfmt("FILE:filename=%s,maxsize=1000000,maxfiles=6",
688 		   clusterLog);
689     free(clusterLog);
690     logdest_configured = false;
691   }
692 
693   configure_eventlogger(logdest);
694 
695   if (logdest_configured == false &&
696       m_opts.non_interactive)
697   {
698     g_eventLogger->createConsoleHandler();
699   }
700 
701 #ifdef _WIN32
702   /* Output to Windows event log */
703   g_eventLogger->createEventLogHandler("MySQL Cluster Management Server");
704 #endif
705 
706   if (m_opts.verbose)
707     g_eventLogger->enable(Logger::LL_DEBUG);
708 }
709 
710 
711 void
config_changed(NodeId node_id,const Config * new_config)712 MgmtSrvr::config_changed(NodeId node_id, const Config* new_config)
713 {
714   DBUG_ENTER("MgmtSrvr::config_changed");
715 
716   Guard g(m_local_config_mutex);
717 
718   // Don't allow nodeid to change, once it's been set
719   require(_ownNodeId == 0 || _ownNodeId == node_id);
720 
721   _ownNodeId= node_id;
722 
723   if (m_local_config)
724     delete m_local_config;
725 
726   m_local_config= new Config(new_config); // Copy
727   require(m_local_config != 0);
728 
729   /* Rebuild node arrays */
730   ConfigIter iter(m_local_config, CFG_SECTION_NODE);
731   for(Uint32 i = 0; i<MAX_NODES; i++) {
732 
733     clear_connect_address_cache(i);
734 
735     if (iter.first())
736       continue;
737 
738     if (iter.find(CFG_NODE_ID, i) == 0){
739       unsigned type;
740       require(iter.get(CFG_TYPE_OF_SECTION, &type) == 0);
741 
742       switch(type){
743       case NODE_TYPE_DB:
744         nodeTypes[i] = NDB_MGM_NODE_TYPE_NDB;
745         break;
746       case NODE_TYPE_API:
747         nodeTypes[i] = NDB_MGM_NODE_TYPE_API;
748         break;
749       case NODE_TYPE_MGM:
750         nodeTypes[i] = NDB_MGM_NODE_TYPE_MGM;
751         break;
752       default:
753         break;
754       }
755     }
756     else
757     {
758       nodeTypes[i] = (enum ndb_mgm_node_type)-1;
759     }
760 
761   }
762 
763   // Setup cluster log
764   setClusterLog(m_local_config);
765 
766   if (theFacade)
767   {
768     if (!theFacade->configure(_ownNodeId,
769                               m_local_config->m_configValues))
770     {
771       g_eventLogger->warning("Could not reconfigure everything online, "
772                              "this node need a restart");
773       m_need_restart= true;
774     }
775   }
776 
777   DBUG_VOID_RETURN;
778 }
779 
780 
781 bool
get_packed_config(ndb_mgm_node_type node_type,BaseString & buf64,BaseString & error)782 MgmtSrvr::get_packed_config(ndb_mgm_node_type node_type,
783                             BaseString& buf64, BaseString& error)
784 {
785   return m_config_manager->get_packed_config(node_type, &buf64, error);
786 }
787 
788 bool
get_packed_config_from_node(NodeId nodeId,BaseString & buf64,BaseString & error)789 MgmtSrvr::get_packed_config_from_node(NodeId nodeId,
790                             BaseString& buf64, BaseString& error)
791 {
792   DBUG_ENTER("get_packed_config_from_node");
793 
794   if (nodeId >= MAX_NODES_ID)
795   {
796     error.assfmt("Nodeid %d is greater than max nodeid %d. ",
797                  nodeId, MAX_NODES_ID);
798     DBUG_RETURN(false);
799   }
800 
801   if (getNodeType(nodeId) == NDB_MGM_NODE_TYPE_UNKNOWN)
802   {
803     error.assfmt("Nodeid %d does not exist. ", nodeId);
804     DBUG_RETURN(false);
805   }
806 
807   if (getNodeType(nodeId) != NDB_MGM_NODE_TYPE_NDB)
808   {
809     error.assfmt("Node %d is not a data node. ", nodeId);
810     DBUG_RETURN(false);
811   }
812 
813   trp_node node = getNodeInfo(nodeId);
814 
815   if (!node.m_alive)
816   {
817     error.assfmt("Data node %d is not alive. ", nodeId);
818     DBUG_RETURN(false);
819   }
820 
821   const Uint32 version = node.m_info.m_version;
822 
823   if (!ndbd_get_config_supported(version))
824   {
825     error.assfmt("Data node %d (version %d.%d.%d) does not support getting config. ",
826                  nodeId, ndbGetMajor(version),
827                  ndbGetMinor(version), ndbGetBuild(version));
828     DBUG_RETURN(false);
829   }
830 
831   INIT_SIGNAL_SENDER(ss,nodeId);
832 
833   SimpleSignal ssig;
834   GetConfigReq* req = CAST_PTR(GetConfigReq, ssig.getDataPtrSend());
835   req->senderRef = ss.getOwnRef();
836   req->nodeId = nodeId;
837 
838   g_eventLogger->debug("Sending GET_CONFIG_REQ to %d", nodeId);
839 
840   ssig.set(ss, TestOrd::TraceAPI, CMVMI, GSN_GET_CONFIG_REQ,
841            GetConfigReq::SignalLength);
842   if ((ss.sendSignal(nodeId, &ssig)) != SEND_OK)
843   {
844     DBUG_RETURN(false);
845   }
846 
847   Defragger defragger;
848   while (true)
849   {
850     SimpleSignal *signal = ss.waitFor();
851     int gsn = signal->readSignalNumber();
852 
853     switch (gsn)
854     {
855     case GSN_GET_CONFIG_CONF:
856     {
857       if (refToNode(signal->header.theSendersBlockRef) != nodeId)
858       {
859         error.assfmt("Internal Error: Reply from wrong node %d, expected from %d. ",
860                      refToNode(signal->header.theSendersBlockRef),
861                      nodeId);
862         DBUG_RETURN(false);
863       }
864 
865       const GetConfigConf * const conf =
866 	CAST_CONSTPTR(GetConfigConf, signal->getDataPtr());
867 
868       if (signal->header.m_noOfSections != 1)
869       {
870         error.assfmt("Internal Error: Wrong number of sections %d received, expected %d. ",
871                      signal->header.m_noOfSections, 1);
872         DBUG_RETURN(false);
873       }
874 
875       if (defragger.defragment(signal))
876       {
877         ConfigValuesFactory cf;
878         require(cf.unpack(signal->ptr[0].p, conf->configLength));
879 
880         Config received_config(cf.getConfigValues());
881         if (!received_config.pack64(buf64))
882         {
883           error.assign("Failed to pack64");
884           DBUG_RETURN(false);
885         }
886         DBUG_RETURN(true);
887       }
888       // wait until all fragments are received
889       continue;
890     }
891 
892     case GSN_GET_CONFIG_REF:
893     {
894       if (refToNode(ssig.header.theSendersBlockRef) != nodeId)
895       {
896         error.assfmt("Internal Error: Reply from wrong node %d, expected from %d. ",
897                      refToNode(signal->header.theSendersBlockRef),
898                      nodeId);
899         DBUG_RETURN(false);
900       }
901       const GetConfigRef * const ref =
902 	CAST_CONSTPTR(GetConfigRef, signal->getDataPtr());
903       error.assfmt("Error in retrieving config from node %d: Internal error: %d",
904                    nodeId, ref->error);
905 
906       DBUG_RETURN(false);
907     }
908 
909     case GSN_NF_COMPLETEREP:
910     {
911       const NFCompleteRep * rep = CAST_CONSTPTR(NFCompleteRep,
912                                                 signal->getDataPtr());
913       if (rep->failedNodeId == nodeId)
914       {
915         error.assfmt("Node %d is not available", nodeId);
916         DBUG_RETURN(false);
917       }
918       continue;
919     }
920 
921     case GSN_NODE_FAILREP:
922     {
923       // Wait until GSN_NODE_COMPLETEREP is received.
924       continue;
925     }
926 
927     case GSN_API_REGCONF:
928     case GSN_TAKE_OVERTCCONF:
929     case GSN_CONNECT_REP:
930       // Ignore
931       continue;
932 
933     default:
934       report_unknown_signal(signal);
935       DBUG_RETURN(false);
936     }
937   }
938   // Should never come here
939   require(false);
940   DBUG_RETURN(false);
941 }
942 
~MgmtSrvr()943 MgmtSrvr::~MgmtSrvr()
944 {
945   /* Stop log level thread */
946   void* res = 0;
947   _isStopThread = true;
948 
949   if (_logLevelThread != NULL) {
950     NdbThread_WaitFor(_logLevelThread, &res);
951     NdbThread_Destroy(&_logLevelThread);
952   }
953 
954   /* Stop mgm service, don't allow new connections */
955   m_socket_server.stopServer();
956 
957   /* Stop all active session */
958   if (!m_socket_server.stopSessions(true,
959                                     2 * MgmApiSession::SOCKET_TIMEOUT))
960   {
961     g_eventLogger->error("Failed to wait for all sessions to stop, "
962                          "continuing with shutdown anyway.");
963   }
964 
965   /* Stop config manager */
966   if (m_config_manager != 0)
967   {
968     m_config_manager->stop();
969     delete m_config_manager;
970     m_config_manager= 0;
971   }
972 
973   this->close(); // close trp_client before stopping TransporterFacade
974 
975   // Stop transporter
976   if(theFacade != 0){
977     theFacade->stop_instance();
978     delete theFacade;
979     theFacade = 0;
980   }
981 
982   delete m_local_config;
983 
984   NdbMutex_Destroy(m_local_config_mutex);
985   NdbMutex_Destroy(m_reserved_nodes_mutex);
986 }
987 
988 
989 //****************************************************************************
990 //****************************************************************************
991 
okToSendTo(NodeId nodeId,bool unCond)992 int MgmtSrvr::okToSendTo(NodeId nodeId, bool unCond)
993 {
994   if(nodeId == 0 || getNodeType(nodeId) != NDB_MGM_NODE_TYPE_NDB)
995     return WRONG_PROCESS_TYPE;
996   // Check if we have contact with it
997   if(unCond){
998     if (getNodeInfo(nodeId).is_confirmed())
999       return 0;
1000   }
1001   else if (getNodeInfo(nodeId).m_alive == true)
1002     return 0;
1003   return NO_CONTACT_WITH_PROCESS;
1004 }
1005 
1006 void
report_unknown_signal(SimpleSignal * signal)1007 MgmtSrvr::report_unknown_signal(SimpleSignal *signal)
1008 {
1009   signal->print();
1010   g_eventLogger->error("Unknown signal received. SignalNumber: "
1011                        "%i from (%d, 0x%x)",
1012                        signal->readSignalNumber(),
1013                        refToNode(signal->header.theSendersBlockRef),
1014                        refToBlock(signal->header.theSendersBlockRef));
1015   assert(false);
1016 }
1017 
1018 /*****************************************************************************
1019  * Starting and stopping database nodes
1020  ****************************************************************************/
1021 
1022 int
sendSTART_ORD(int nodeId)1023 MgmtSrvr::sendSTART_ORD(int nodeId)
1024 {
1025   INIT_SIGNAL_SENDER(ss,nodeId);
1026 
1027   SimpleSignal ssig;
1028   StartOrd* const startOrd = CAST_PTR(StartOrd, ssig.getDataPtrSend());
1029   ssig.set(ss,TestOrd::TraceAPI, CMVMI, GSN_START_ORD, StartOrd::SignalLength);
1030   startOrd->restartInfo = 0;
1031 
1032   return ss.sendSignal(nodeId, &ssig) == SEND_OK ? 0 : SEND_OR_RECEIVE_FAILED;
1033 }
1034 
1035 /*****************************************************************************
1036  * Version handling
1037  *****************************************************************************/
1038 
1039 void
status_api(int nodeId,ndb_mgm_node_status & node_status,Uint32 & version,Uint32 & mysql_version,const char ** address,char * addr_buf,size_t addr_buf_size)1040 MgmtSrvr::status_api(int nodeId,
1041                      ndb_mgm_node_status& node_status,
1042                      Uint32& version, Uint32& mysql_version,
1043                      const char **address,
1044                      char *addr_buf,
1045                      size_t addr_buf_size)
1046 {
1047   assert(getNodeType(nodeId) == NDB_MGM_NODE_TYPE_API);
1048   assert(version == 0 && mysql_version == 0);
1049 
1050   if (sendVersionReq(nodeId,
1051                      version,
1052                      mysql_version,
1053                      address,
1054                      addr_buf,
1055                      addr_buf_size) != 0)
1056   {
1057     // Couldn't get version from any NDB node.
1058     assert(version == 0);
1059     node_status = NDB_MGM_NODE_STATUS_UNKNOWN;
1060     return;
1061   }
1062 
1063   if (version)
1064   {
1065     assert(mysql_version);
1066     node_status = NDB_MGM_NODE_STATUS_CONNECTED;
1067   }
1068   else
1069   {
1070     assert(mysql_version == 0);
1071     node_status = NDB_MGM_NODE_STATUS_NO_CONTACT;
1072   }
1073   return;
1074 }
1075 
1076 
1077 int
sendVersionReq(int v_nodeId,Uint32 & version,Uint32 & mysql_version,const char ** address,char * addr_buf,size_t addr_buf_size)1078 MgmtSrvr::sendVersionReq(int v_nodeId,
1079 			 Uint32 &version,
1080 			 Uint32& mysql_version,
1081 			 const char **address,
1082                          char *addr_buf,
1083                          size_t addr_buf_size)
1084 {
1085   SignalSender ss(theFacade);
1086   ss.lock();
1087 
1088   SimpleSignal ssig;
1089   ApiVersionReq* req = CAST_PTR(ApiVersionReq, ssig.getDataPtrSend());
1090   req->senderRef = ss.getOwnRef();
1091   req->nodeId = v_nodeId;
1092   ssig.set(ss, TestOrd::TraceAPI, QMGR,
1093            GSN_API_VERSION_REQ, ApiVersionReq::SignalLength);
1094 
1095   NodeId nodeId = 0;
1096   bool do_send = true;
1097   while(true)
1098   {
1099     if (do_send)
1100     {
1101       nodeId = ss.get_an_alive_node();
1102       if (nodeId == 0)
1103       {
1104         return NO_CONTACT_WITH_DB_NODES;
1105       }
1106 
1107       if (ss.sendSignal(nodeId, &ssig) != SEND_OK)
1108       {
1109         return SEND_OR_RECEIVE_FAILED;
1110       }
1111 
1112       do_send = false;
1113     }
1114 
1115     SimpleSignal *signal = ss.waitFor();
1116 
1117     switch (signal->readSignalNumber()) {
1118     case GSN_API_VERSION_CONF: {
1119       const ApiVersionConf * const conf =
1120 	CAST_CONSTPTR(ApiVersionConf, signal->getDataPtr());
1121 
1122       assert((int) conf->nodeId == v_nodeId);
1123 
1124       version = conf->version;
1125       mysql_version = conf->mysql_version;
1126       if (version < NDBD_SPLIT_VERSION)
1127 	mysql_version = 0;
1128       struct in_addr in;
1129       in.s_addr= conf->m_inet_addr;
1130       *address= Ndb_inet_ntop(AF_INET,
1131                               static_cast<void*>(&in),
1132                               addr_buf,
1133                               (socklen_t)addr_buf_size);
1134 
1135       return 0;
1136     }
1137 
1138     case GSN_NF_COMPLETEREP:{
1139       const NFCompleteRep * const rep =
1140 	CAST_CONSTPTR(NFCompleteRep, signal->getDataPtr());
1141       if (rep->failedNodeId == nodeId)
1142 	do_send = true; // retry with other node
1143       continue;
1144     }
1145 
1146     case GSN_NODE_FAILREP:{
1147       const NodeFailRep * const rep =
1148 	CAST_CONSTPTR(NodeFailRep, signal->getDataPtr());
1149       Uint32 len = NodeFailRep::getNodeMaskLength(signal->getLength());
1150       assert(len == NodeBitmask::Size); // only full length in ndbapi
1151       if (BitmaskImpl::safe_get(len, rep->theAllNodes, nodeId))
1152       {
1153 	do_send = true; // retry with other node
1154       }
1155       continue;
1156     }
1157     case GSN_API_REGCONF:
1158     case GSN_TAKE_OVERTCCONF:
1159     case GSN_CONNECT_REP:
1160       // Ignore
1161       continue;
1162     default:
1163       report_unknown_signal(signal);
1164       return SEND_OR_RECEIVE_FAILED;
1165     }
1166   }
1167 
1168   // Should never come here
1169   require(false);
1170   return -1;
1171 }
1172 
1173 
sendStopMgmd(NodeId nodeId,bool abort,bool stop,bool restart,bool nostart,bool initialStart)1174 int MgmtSrvr::sendStopMgmd(NodeId nodeId,
1175 			   bool abort,
1176 			   bool stop,
1177 			   bool restart,
1178 			   bool nostart,
1179 			   bool initialStart)
1180 {
1181   const char* hostname;
1182   Uint32 port;
1183   BaseString connect_string;
1184 
1185   {
1186     Guard g(m_local_config_mutex);
1187     {
1188       ConfigIter iter(m_local_config, CFG_SECTION_NODE);
1189 
1190       if(iter.first())                       return SEND_OR_RECEIVE_FAILED;
1191       if(iter.find(CFG_NODE_ID, nodeId))     return SEND_OR_RECEIVE_FAILED;
1192       if(iter.get(CFG_NODE_HOST, &hostname)) return SEND_OR_RECEIVE_FAILED;
1193     }
1194     {
1195       ConfigIter iter(m_local_config, CFG_SECTION_NODE);
1196 
1197       if(iter.first())                   return SEND_OR_RECEIVE_FAILED;
1198       if(iter.find(CFG_NODE_ID, nodeId)) return SEND_OR_RECEIVE_FAILED;
1199       if(iter.get(CFG_MGM_PORT, &port))  return SEND_OR_RECEIVE_FAILED;
1200     }
1201     if( strlen(hostname) == 0 )
1202       return SEND_OR_RECEIVE_FAILED;
1203 
1204   }
1205   connect_string.assfmt("%s:%u",hostname,port);
1206 
1207   DBUG_PRINT("info",("connect string: %s",connect_string.c_str()));
1208 
1209   NdbMgmHandle h= ndb_mgm_create_handle();
1210   if ( h && connect_string.length() > 0 )
1211   {
1212     ndb_mgm_set_connectstring(h,connect_string.c_str());
1213     if(ndb_mgm_connect(h,1,0,0))
1214     {
1215       DBUG_PRINT("info",("failed ndb_mgm_connect"));
1216       ndb_mgm_destroy_handle(&h);
1217       return SEND_OR_RECEIVE_FAILED;
1218     }
1219     if(!restart)
1220     {
1221       int nodes[1];
1222       nodes[0]= (int)nodeId;
1223       if(ndb_mgm_stop(h, 1, nodes) < 0)
1224       {
1225         ndb_mgm_destroy_handle(&h);
1226         return SEND_OR_RECEIVE_FAILED;
1227       }
1228     }
1229     else
1230     {
1231       int nodes[1];
1232       nodes[0]= (int)nodeId;
1233       if(ndb_mgm_restart2(h, 1, nodes, initialStart, nostart, abort) < 0)
1234       {
1235         ndb_mgm_destroy_handle(&h);
1236         return SEND_OR_RECEIVE_FAILED;
1237       }
1238     }
1239   }
1240   ndb_mgm_destroy_handle(&h);
1241 
1242   return 0;
1243 }
1244 
1245 /**
1246  * send STOP_REQ to all DB-nodes
1247  *   and wait for them to stop or refuse
1248  *
1249  */
1250 int
sendall_STOP_REQ(NodeBitmask & stoppedNodes,bool abort,bool stop,bool restart,bool nostart,bool initialStart)1251 MgmtSrvr::sendall_STOP_REQ(NodeBitmask &stoppedNodes,
1252                            bool abort,
1253                            bool stop,
1254                            bool restart,
1255                            bool nostart,
1256                            bool initialStart)
1257 {
1258   int error = 0;
1259   DBUG_ENTER("MgmtSrvr::sendall_STOP_REQ");
1260   DBUG_PRINT("enter", ("abort: %d  stop: %d  restart: %d  "
1261                        "nostart: %d  initialStart: %d",
1262                        abort, stop, restart, nostart, initialStart));
1263 
1264   stoppedNodes.clear();
1265 
1266   SignalSender ss(theFacade);
1267   ss.lock(); // lock will be released on exit
1268 
1269   SimpleSignal ssig;
1270   StopReq* const stopReq = CAST_PTR(StopReq, ssig.getDataPtrSend());
1271   ssig.set(ss, TestOrd::TraceAPI, NDBCNTR, GSN_STOP_REQ, StopReq::SignalLength);
1272 
1273   stopReq->requestInfo = 0;
1274   stopReq->apiTimeout = 5000;
1275   stopReq->transactionTimeout = 1000;
1276   stopReq->readOperationTimeout = 1000;
1277   stopReq->operationTimeout = 1000;
1278   stopReq->senderData = 12;
1279   stopReq->senderRef = ss.getOwnRef();
1280   stopReq->singleuser = 0;
1281   StopReq::setSystemStop(stopReq->requestInfo, stop);
1282   StopReq::setPerformRestart(stopReq->requestInfo, restart);
1283   StopReq::setStopAbort(stopReq->requestInfo, abort);
1284   StopReq::setNoStart(stopReq->requestInfo, nostart);
1285   StopReq::setInitialStart(stopReq->requestInfo, initialStart);
1286 
1287   // send the signals
1288   int failed = 0;
1289   NodeBitmask nodes;
1290   {
1291     NodeId nodeId = 0;
1292     while(getNextNodeId(&nodeId, NDB_MGM_NODE_TYPE_NDB))
1293     {
1294       if (okToSendTo(nodeId, true) == 0)
1295       {
1296 	SendStatus result = ss.sendSignal(nodeId, &ssig);
1297 	if (result == SEND_OK)
1298 	  nodes.set(nodeId);
1299         else
1300           failed++;
1301       }
1302     }
1303   }
1304 
1305   if (nodes.isclear() && failed > 0)
1306   {
1307     DBUG_RETURN(SEND_OR_RECEIVE_FAILED);
1308   }
1309 
1310   // now wait for the replies
1311   while (!nodes.isclear())
1312   {
1313     SimpleSignal *signal = ss.waitFor();
1314     int gsn = signal->readSignalNumber();
1315     switch (gsn) {
1316     case GSN_STOP_REF:
1317     {
1318       const StopRef * const ref = CAST_CONSTPTR(StopRef, signal->getDataPtr());
1319       const NodeId nodeId = refToNode(signal->header.theSendersBlockRef);
1320 #ifdef VM_TRACE
1321       ndbout_c("Node %d refused stop", nodeId);
1322 #endif
1323       assert(nodes.get(nodeId));
1324       nodes.clear(nodeId);
1325       error = translateStopRef(ref->errorCode);
1326       break;
1327     }
1328     case GSN_STOP_CONF:
1329     {
1330       const NodeId nodeId = refToNode(signal->header.theSendersBlockRef);
1331       assert(nodes.get(nodeId));
1332       nodes.clear(nodeId);
1333       break;
1334     }
1335     case GSN_NF_COMPLETEREP:
1336     {
1337       const NFCompleteRep * rep = CAST_CONSTPTR(NFCompleteRep,
1338                                                 signal->getDataPtr());
1339       if (rep->failedNodeId <= nodes.max_size())
1340         nodes.clear(rep->failedNodeId); // clear the failed node
1341 
1342       if (rep->failedNodeId <= stoppedNodes.max_size())
1343         stoppedNodes.set(rep->failedNodeId);
1344       break;
1345     }
1346     case GSN_NODE_FAILREP:
1347     {
1348       const NodeFailRep * rep = CAST_CONSTPTR(NodeFailRep,
1349                                               signal->getDataPtr());
1350       Uint32 len = NodeFailRep::getNodeMaskLength(signal->getLength());
1351       assert(len == NodeBitmask::Size); // only full length in ndbapi
1352       NodeBitmask mask;
1353       mask.assign(len, rep->theAllNodes);
1354       nodes.bitANDC(mask);
1355       stoppedNodes.bitOR(mask);
1356       break;
1357     }
1358     case GSN_API_REGCONF:
1359     case GSN_TAKE_OVERTCCONF:
1360     case GSN_CONNECT_REP:
1361       continue;
1362     default:
1363       report_unknown_signal(signal);
1364       DBUG_RETURN(SEND_OR_RECEIVE_FAILED);
1365     }
1366   }
1367 
1368   DBUG_RETURN(error);
1369 }
1370 
1371 int
guess_master_node(SignalSender & ss)1372 MgmtSrvr::guess_master_node(SignalSender& ss)
1373 {
1374   /**
1375    * First check if m_master_node is started
1376    */
1377   NodeId guess = m_master_node;
1378   if (guess != 0)
1379   {
1380     trp_node node = ss.getNodeInfo(guess);
1381     if (node.m_state.startLevel == NodeState::SL_STARTED)
1382       return guess;
1383   }
1384 
1385   /**
1386    * Check for any started node
1387    */
1388   guess = 0;
1389   while(getNextNodeId(&guess, NDB_MGM_NODE_TYPE_NDB))
1390   {
1391     trp_node node = ss.getNodeInfo(guess);
1392     if (node.m_state.startLevel == NodeState::SL_STARTED)
1393     {
1394       return guess;
1395     }
1396   }
1397 
1398   /**
1399    * Check any confirmed node
1400    */
1401   guess = 0;
1402   while(getNextNodeId(&guess, NDB_MGM_NODE_TYPE_NDB))
1403   {
1404     trp_node node = ss.getNodeInfo(guess);
1405     if (node.is_confirmed())
1406     {
1407       return guess;
1408     }
1409   }
1410 
1411   /**
1412    * Check any connected node
1413    */
1414   guess = 0;
1415   while(getNextNodeId(&guess, NDB_MGM_NODE_TYPE_NDB))
1416   {
1417     trp_node node = ss.getNodeInfo(guess);
1418     if (node.is_connected())
1419     {
1420       return guess;
1421     }
1422   }
1423 
1424   return 0; // give up
1425 }
1426 
1427 /*
1428  * Common method for handeling all STOP_REQ signalling that
1429  * is used by Stopping, Restarting and Single user commands
1430  *
1431  * In the event that we need to stop a mgmd, we create a mgm
1432  * client connection to that mgmd and stop it that way.
1433  * This allows us to stop mgm servers when there isn't any real
1434  * distributed communication up.
1435  *
1436  * node_ids.size()==0 means to stop all DB nodes.
1437  *                    MGM nodes will *NOT* be stopped.
1438  *
1439  * If we work out we should be stopping or restarting ourselves,
1440  * we return <0 in stopSelf for restart, >0 for stop
1441  * and 0 for do nothing.
1442  */
1443 
sendSTOP_REQ(const Vector<NodeId> & node_ids,NodeBitmask & stoppedNodes,bool abort,bool stop,bool restart,bool nostart,bool initialStart,int * stopSelf)1444 int MgmtSrvr::sendSTOP_REQ(const Vector<NodeId> &node_ids,
1445 			   NodeBitmask &stoppedNodes,
1446 			   bool abort,
1447 			   bool stop,
1448 			   bool restart,
1449 			   bool nostart,
1450 			   bool initialStart,
1451                            int* stopSelf)
1452 {
1453   int error = 0;
1454   DBUG_ENTER("MgmtSrvr::sendSTOP_REQ");
1455   DBUG_PRINT("enter", ("no of nodes: %d "
1456                        "abort: %d  stop: %d  restart: %d  "
1457                        "nostart: %d  initialStart: %d",
1458                        node_ids.size(),
1459                        abort, stop, restart, nostart, initialStart));
1460 
1461   stoppedNodes.clear();
1462   *stopSelf= 0;
1463 
1464   NodeBitmask ndb_nodes_to_stop;
1465   NodeBitmask mgm_nodes_to_stop;
1466 
1467   SignalSender ss(theFacade);
1468   ss.lock(); // lock will be released on exit
1469 
1470   /**
1471    * First verify arguments
1472    */
1473   for (unsigned i = 0; i < node_ids.size(); i++)
1474   {
1475     switch(getNodeType(node_ids[i])){
1476     case NDB_MGM_NODE_TYPE_MGM:
1477       mgm_nodes_to_stop.set(node_ids[i]);
1478       break;
1479     case NDB_MGM_NODE_TYPE_NDB:
1480       ndb_nodes_to_stop.set(node_ids[i]);
1481       break;
1482     default:
1483       DBUG_RETURN(WRONG_PROCESS_TYPE);
1484     }
1485   }
1486 
1487   /**
1488    * Process ndb_mgmd
1489    */
1490   for (Uint32 i = mgm_nodes_to_stop.find(0);
1491        i != mgm_nodes_to_stop.NotFound;
1492        i = mgm_nodes_to_stop.find(i + 1))
1493   {
1494     if (i != getOwnNodeId())
1495     {
1496       error= sendStopMgmd(i, abort, stop, restart,
1497                           nostart, initialStart);
1498       if (error == 0)
1499       {
1500         stoppedNodes.set(i);
1501       }
1502     }
1503     else
1504     {
1505       g_eventLogger->info("Stopping this node");
1506       * stopSelf = (restart)? -1 : 1;
1507       stoppedNodes.set(i);
1508     }
1509   }
1510 
1511   /**
1512    * Process ndbd
1513    */
1514   SimpleSignal ssig;
1515   StopReq* const stopReq = CAST_PTR(StopReq, ssig.getDataPtrSend());
1516   ssig.set(ss, TestOrd::TraceAPI, NDBCNTR, GSN_STOP_REQ, StopReq::SignalLength);
1517 
1518   stopReq->requestInfo = 0;
1519   stopReq->apiTimeout = 5000;
1520   stopReq->transactionTimeout = 1000;
1521   stopReq->readOperationTimeout = 1000;
1522   stopReq->operationTimeout = 1000;
1523   stopReq->senderData = 12;
1524   stopReq->senderRef = ss.getOwnRef();
1525   stopReq->singleuser = 0;
1526   StopReq::setSystemStop(stopReq->requestInfo, stop);
1527   StopReq::setPerformRestart(stopReq->requestInfo, restart);
1528   StopReq::setStopAbort(stopReq->requestInfo, abort);
1529   StopReq::setNoStart(stopReq->requestInfo, nostart);
1530   StopReq::setInitialStart(stopReq->requestInfo, initialStart);
1531 
1532   int use_master_node = 0;
1533   int do_send = 0;
1534   if (ndb_nodes_to_stop.count() > 1)
1535   {
1536     do_send = 1;
1537     use_master_node = 1;
1538     ndb_nodes_to_stop.copyto(NdbNodeBitmask::Size, stopReq->nodes);
1539     StopReq::setStopNodes(stopReq->requestInfo, 1);
1540   }
1541   else if (ndb_nodes_to_stop.count() == 1)
1542   {
1543     Uint32 nodeId = ndb_nodes_to_stop.find(0);
1544     if (okToSendTo(nodeId, true) == 0)
1545     {
1546       SendStatus result = ss.sendSignal(nodeId, &ssig);
1547       if (result != SEND_OK)
1548       {
1549         DBUG_RETURN(SEND_OR_RECEIVE_FAILED);
1550       }
1551     }
1552     else
1553     {
1554       DBUG_RETURN(SEND_OR_RECEIVE_FAILED);
1555     }
1556   }
1557 
1558 
1559   // now wait for the replies
1560   Uint32 sendNodeId = ndb_nodes_to_stop.find(0);
1561   while (!stoppedNodes.contains(ndb_nodes_to_stop))
1562   {
1563     if (do_send)
1564     {
1565       assert(use_master_node);
1566       sendNodeId = guess_master_node(ss);
1567       if (okToSendTo(sendNodeId, true) != 0)
1568       {
1569         DBUG_RETURN(SEND_OR_RECEIVE_FAILED);
1570       }
1571 
1572       if (ss.sendSignal(sendNodeId, &ssig) != SEND_OK)
1573       {
1574         DBUG_RETURN(SEND_OR_RECEIVE_FAILED);
1575       }
1576       do_send = 0;
1577     }
1578 
1579     SimpleSignal *signal = ss.waitFor();
1580     int gsn = signal->readSignalNumber();
1581     switch (gsn) {
1582     case GSN_STOP_REF:{
1583       const StopRef * const ref = CAST_CONSTPTR(StopRef, signal->getDataPtr());
1584       const NodeId nodeId = refToNode(signal->header.theSendersBlockRef);
1585       require(nodeId == sendNodeId);
1586       if (ref->errorCode == StopRef::MultiNodeShutdownNotMaster)
1587       {
1588         assert(use_master_node);
1589         m_master_node= ref->masterNodeId;
1590         do_send = 1;
1591         continue;
1592       }
1593       DBUG_RETURN(translateStopRef(ref->errorCode));
1594       break;
1595     }
1596     case GSN_STOP_CONF:{
1597 #ifdef NOT_USED
1598       const StopConf * const ref = CAST_CONSTPTR(StopConf, signal->getDataPtr());
1599 #endif
1600       const NodeId nodeId = refToNode(signal->header.theSendersBlockRef);
1601       require(nodeId == sendNodeId);
1602       stoppedNodes.bitOR(ndb_nodes_to_stop);
1603       break;
1604     }
1605     case GSN_NF_COMPLETEREP:{
1606       const NFCompleteRep * const rep =
1607 	CAST_CONSTPTR(NFCompleteRep, signal->getDataPtr());
1608       if (rep->failedNodeId <= stoppedNodes.max_size())
1609         stoppedNodes.set(rep->failedNodeId);
1610       break;
1611     }
1612     case GSN_NODE_FAILREP:{
1613       const NodeFailRep * const rep =
1614 	CAST_CONSTPTR(NodeFailRep, signal->getDataPtr());
1615       Uint32 len = NodeFailRep::getNodeMaskLength(signal->getLength());
1616       require(len == NodeBitmask::Size); // only full length in ndbapi
1617       NodeBitmask mask;
1618       mask.assign(len, rep->theAllNodes);
1619       stoppedNodes.bitOR(mask);
1620       break;
1621     }
1622     case GSN_API_REGCONF:
1623     case GSN_TAKE_OVERTCCONF:
1624     case GSN_CONNECT_REP:
1625       continue;
1626     default:
1627       report_unknown_signal(signal);
1628       DBUG_RETURN(SEND_OR_RECEIVE_FAILED);
1629     }
1630   }
1631   if (error && *stopSelf)
1632   {
1633     *stopSelf= 0;
1634   }
1635   DBUG_RETURN(error);
1636 }
1637 
1638 /*
1639  * Stop one nodes
1640  */
1641 
stopNodes(const Vector<NodeId> & node_ids,int * stopCount,bool abort,bool force,int * stopSelf)1642 int MgmtSrvr::stopNodes(const Vector<NodeId> &node_ids,
1643                         int *stopCount, bool abort, bool force,
1644                         int* stopSelf)
1645 {
1646   if (force || abort)
1647     ; // Skip node state checks
1648   else if (is_any_node_starting())
1649   {
1650     /* Refuse to stop since some node(s) are starting */
1651     return OPERATION_NOT_ALLOWED_START_STOP;
1652   }
1653 
1654   NodeBitmask nodes;
1655   int ret = 0;
1656   if (node_ids.size() > 0)
1657   {
1658     ret = sendSTOP_REQ(node_ids, nodes,
1659                        abort, false, false, false, false,
1660                        stopSelf);
1661   }
1662   else
1663   {
1664     ret = sendall_STOP_REQ(nodes,
1665                            abort, false, false, false, false);
1666   }
1667 
1668   if (stopCount)
1669     *stopCount= nodes.count();
1670   return ret;
1671 }
1672 
shutdownMGM(int * stopCount,bool abort,int * stopSelf)1673 int MgmtSrvr::shutdownMGM(int *stopCount, bool abort, int *stopSelf)
1674 {
1675   NodeId nodeId = 0;
1676   int error;
1677 
1678   while(getNextNodeId(&nodeId, NDB_MGM_NODE_TYPE_MGM))
1679   {
1680     if(nodeId==getOwnNodeId())
1681       continue;
1682     error= sendStopMgmd(nodeId, abort, true, false,
1683                         false, false);
1684     if (error == 0)
1685       (*stopCount)++;
1686   }
1687 
1688   *stopSelf= 1;
1689   (*stopCount)++;
1690 
1691   return 0;
1692 }
1693 
1694 /*
1695  * Perform DB nodes shutdown.
1696  * MGM servers are left in their current state
1697  */
1698 
shutdownDB(int * stopCount,bool abort)1699 int MgmtSrvr::shutdownDB(int * stopCount, bool abort)
1700 {
1701   NodeBitmask nodes;
1702 
1703   int ret = sendall_STOP_REQ(nodes,
1704                              abort,
1705                              true,
1706                              false,
1707                              false,
1708                              false);
1709 
1710   if (stopCount)
1711     *stopCount = nodes.count();
1712   return ret;
1713 }
1714 
1715 /*
1716  * Enter single user mode on all live nodes
1717  */
1718 
enterSingleUser(int * stopCount,Uint32 apiNodeId)1719 int MgmtSrvr::enterSingleUser(int * stopCount, Uint32 apiNodeId)
1720 {
1721   if (getNodeType(apiNodeId) != NDB_MGM_NODE_TYPE_API)
1722     return NODE_NOT_API_NODE;
1723 
1724   // Init
1725   if (stopCount)
1726   {
1727     * stopCount = 0;
1728   }
1729 
1730   SignalSender ss(theFacade);
1731   ss.lock(); // lock will be released on exit
1732 
1733   SimpleSignal ssig;
1734   StopReq* const stopReq = CAST_PTR(StopReq, ssig.getDataPtrSend());
1735   ssig.set(ss, TestOrd::TraceAPI, NDBCNTR, GSN_STOP_REQ, StopReq::SignalLength);
1736 
1737   stopReq->requestInfo = 0;
1738   stopReq->apiTimeout = 5000;
1739   stopReq->transactionTimeout = 1000;
1740   stopReq->readOperationTimeout = 1000;
1741   stopReq->operationTimeout = 1000;
1742   stopReq->senderData = 12;
1743   stopReq->senderRef = ss.getOwnRef();
1744   stopReq->singleuser = 1;
1745   stopReq->singleUserApi = apiNodeId;
1746   StopReq::setSystemStop(stopReq->requestInfo, false);
1747   StopReq::setPerformRestart(stopReq->requestInfo, false);
1748   StopReq::setStopAbort(stopReq->requestInfo, false);
1749 
1750   NodeBitmask nodes;
1751   {
1752     NodeId nodeId = 0;
1753     Uint32 failed = 0;
1754     while (getNextNodeId(&nodeId, NDB_MGM_NODE_TYPE_NDB))
1755     {
1756       if (okToSendTo(nodeId, true) == 0)
1757       {
1758 	SendStatus result = ss.sendSignal(nodeId, &ssig);
1759 	if (result == SEND_OK)
1760 	  nodes.set(nodeId);
1761         else
1762           failed++;
1763       }
1764       else
1765       {
1766         failed++;
1767       }
1768     }
1769     if (nodes.isclear())
1770     {
1771       if (failed)
1772       {
1773         return SEND_OR_RECEIVE_FAILED;
1774       }
1775       return NO_CONTACT_WITH_DB_NODES;
1776     }
1777   }
1778 
1779   int error = 0;
1780   int ok = 0;
1781   while (!nodes.isclear())
1782   {
1783     SimpleSignal *signal = ss.waitFor();
1784     int gsn = signal->readSignalNumber();
1785     switch (gsn) {
1786     case GSN_STOP_REF:
1787     {
1788       const StopRef * const ref = CAST_CONSTPTR(StopRef, signal->getDataPtr());
1789       nodes.clear(refToNode(signal->header.theSendersBlockRef));
1790       error = translateStopRef(ref->errorCode);
1791       break;
1792     }
1793     case GSN_STOP_CONF:
1794     {
1795       ok++;
1796       nodes.clear(refToNode(signal->header.theSendersBlockRef));
1797       break;
1798     }
1799     case GSN_NF_COMPLETEREP:
1800     {
1801       const NFCompleteRep * rep = CAST_CONSTPTR(NFCompleteRep,
1802                                                 signal->getDataPtr());
1803       if (rep->failedNodeId <= nodes.max_size())
1804         nodes.clear(rep->failedNodeId);
1805       break;
1806     }
1807 
1808     case GSN_NODE_FAILREP:
1809     {
1810       const NodeFailRep * rep = CAST_CONSTPTR(NodeFailRep,
1811                                               signal->getDataPtr());
1812       Uint32 len = NodeFailRep::getNodeMaskLength(signal->getLength());
1813       assert(len == NodeBitmask::Size); // only full length in ndbapi
1814       NodeBitmask mask;
1815       mask.assign(len, rep->theAllNodes);
1816       nodes.bitANDC(mask);
1817       break;
1818     }
1819     case GSN_API_REGCONF:
1820     case GSN_TAKE_OVERTCCONF:
1821     case GSN_CONNECT_REP:
1822       continue;
1823 
1824     default:
1825       report_unknown_signal(signal);
1826       return SEND_OR_RECEIVE_FAILED;
1827     }
1828   }
1829 
1830   if (stopCount)
1831   {
1832     * stopCount = ok;
1833   }
1834 
1835   return error;
1836 }
1837 
1838 /*
1839  * Perform node restart
1840  */
1841 
is_any_node_stopping()1842 bool MgmtSrvr::is_any_node_stopping()
1843 {
1844   NodeId nodeId = 0;
1845   trp_node node;
1846   while(getNextNodeId(&nodeId, NDB_MGM_NODE_TYPE_NDB))
1847   {
1848     node = getNodeInfo(nodeId);
1849     if((node.m_state.startLevel == NodeState::SL_STOPPING_1) ||
1850        (node.m_state.startLevel == NodeState::SL_STOPPING_2) ||
1851        (node.m_state.startLevel == NodeState::SL_STOPPING_3) ||
1852        (node.m_state.startLevel == NodeState::SL_STOPPING_4))
1853       return true; // At least one node was stopping
1854   }
1855   return false; // No node was stopping
1856 }
1857 
is_any_node_starting()1858 bool MgmtSrvr::is_any_node_starting()
1859 {
1860   NodeId nodeId = 0;
1861   trp_node node;
1862   while(getNextNodeId(&nodeId, NDB_MGM_NODE_TYPE_NDB))
1863   {
1864     node = getNodeInfo(nodeId);
1865     if (node.m_state.startLevel == NodeState::SL_STARTING)
1866       return true; // At least one node was starting
1867   }
1868   return false; // No node was starting
1869 }
1870 
is_cluster_single_user()1871 bool MgmtSrvr::is_cluster_single_user()
1872 {
1873   NodeId nodeId = 0;
1874   trp_node node;
1875   while(getNextNodeId(&nodeId, NDB_MGM_NODE_TYPE_NDB))
1876   {
1877     node = getNodeInfo(nodeId);
1878     if (node.m_state.startLevel == NodeState::SL_SINGLEUSER)
1879       return true; // Cluster is in single user modes
1880   }
1881   return false; // Cluster is not in single user mode
1882 }
1883 
restartNodes(const Vector<NodeId> & node_ids,int * stopCount,bool nostart,bool initialStart,bool abort,bool force,int * stopSelf,unsigned int num_secs_to_wait_for_node)1884 int MgmtSrvr::restartNodes(const Vector<NodeId> &node_ids,
1885                            int * stopCount, bool nostart,
1886                            bool initialStart, bool abort,
1887                            bool force,
1888                            int *stopSelf,
1889                            unsigned int num_secs_to_wait_for_node)
1890 {
1891   if (is_cluster_single_user())
1892   {
1893     /*
1894       Refuse to restart since cluster is in single user mode
1895       and when the node is restarting it would not be allowed to
1896       join cluster, see BUG#31056
1897     */
1898     return OPERATION_NOT_ALLOWED_START_STOP;
1899   }
1900 
1901   if (force || abort)
1902     ; // Skip node state checks
1903   else if (is_any_node_starting())
1904   {
1905     /* Refuse to restart since some node(s) are starting */
1906     return OPERATION_NOT_ALLOWED_START_STOP;
1907   }
1908 
1909   NodeBitmask nodes;
1910   int ret = 0;
1911   if (node_ids.size() > 0)
1912   {
1913     ret = sendSTOP_REQ(node_ids, nodes,
1914                        abort, false, true, true, initialStart,
1915                        stopSelf);
1916   }
1917   else
1918   {
1919     ret = sendall_STOP_REQ(nodes,
1920                            abort, false, true, true, initialStart);
1921   }
1922 
1923   if (ret)
1924     return ret;
1925 
1926   if (stopCount)
1927     *stopCount = nodes.count();
1928 
1929   // start up the nodes again
1930   const Uint64 waitTime = 12000;
1931   const NDB_TICKS startTime = NdbTick_getCurrentTicks();
1932   for (unsigned i = 0; i < node_ids.size(); i++)
1933   {
1934     NodeId nodeId= node_ids[i];
1935     enum ndb_mgm_node_status s;
1936     s = NDB_MGM_NODE_STATUS_NO_CONTACT;
1937 #ifdef VM_TRACE
1938     ndbout_c("Waiting for %d not started", nodeId);
1939 #endif
1940     while (s != NDB_MGM_NODE_STATUS_NOT_STARTED &&
1941            NdbTick_Elapsed(startTime,NdbTick_getCurrentTicks()).milliSec() < waitTime)
1942     {
1943       Uint32 startPhase = 0, version = 0, dynamicId = 0, nodeGroup = 0;
1944       Uint32 mysql_version = 0;
1945       Uint32 connectCount = 0;
1946       bool system;
1947       const char *address= NULL;
1948       char addr_buf[NDB_ADDR_STRLEN];
1949       status(nodeId, &s, &version, &mysql_version, &startPhase,
1950              &system, &dynamicId, &nodeGroup, &connectCount,
1951              &address, addr_buf, sizeof(addr_buf));
1952       NdbSleep_MilliSleep(100);
1953     }
1954   }
1955 
1956   if (nostart)
1957     return 0;
1958 
1959   /*
1960     verify that no nodes are stopping before starting as this would cause
1961     the starting node to shutdown
1962   */
1963   int retry= 600*10;
1964   for (;is_any_node_stopping();)
1965   {
1966     if (--retry)
1967       break;
1968     NdbSleep_MilliSleep(100);
1969   }
1970 
1971   /*
1972     start the nodes
1973   */
1974   for (unsigned i = 0; i < node_ids.size(); i++)
1975   {
1976     unsigned int loop_count = 0;
1977     do
1978     {
1979       int result = sendSTART_ORD(node_ids[i]);
1980       if (result == SEND_OR_RECEIVE_FAILED ||
1981           result == NO_CONTACT_WITH_PROCESS)
1982       {
1983         if (loop_count >= num_secs_to_wait_for_node)
1984           break;
1985         loop_count++;
1986         NdbSleep_MilliSleep(1000);
1987       }
1988       else
1989       {
1990         break;
1991       }
1992     } while (1);
1993   }
1994   return 0;
1995 }
1996 
1997 /*
1998  * Perform restart of all DB nodes
1999  */
2000 
restartDB(bool nostart,bool initialStart,bool abort,int * stopCount,unsigned int num_secs_to_wait_for_node)2001 int MgmtSrvr::restartDB(bool nostart, bool initialStart,
2002                         bool abort, int * stopCount,
2003                         unsigned int num_secs_to_wait_for_node)
2004 {
2005   NodeBitmask nodes;
2006 
2007   int ret = sendall_STOP_REQ(nodes,
2008                              abort,
2009                              true,
2010                              true,
2011                              true,
2012                              initialStart);
2013 
2014   if (ret)
2015     return ret;
2016 
2017   if (stopCount)
2018     *stopCount = nodes.count();
2019 
2020 #ifdef VM_TRACE
2021     ndbout_c("Stopped %d nodes", nodes.count());
2022 #endif
2023   /**
2024    * Here all nodes were correctly stopped,
2025    * so we wait for all nodes to be contactable
2026    */
2027   NodeId nodeId = 0;
2028   const Uint64 waitTime = 12000;
2029   const NDB_TICKS startTime = NdbTick_getCurrentTicks();
2030 
2031 
2032   while(getNextNodeId(&nodeId, NDB_MGM_NODE_TYPE_NDB)) {
2033     if (!nodes.get(nodeId))
2034       continue;
2035     enum ndb_mgm_node_status s;
2036     s = NDB_MGM_NODE_STATUS_NO_CONTACT;
2037 #ifdef VM_TRACE
2038     ndbout_c("Waiting for %d not started", nodeId);
2039 #endif
2040     while (s != NDB_MGM_NODE_STATUS_NOT_STARTED &&
2041            NdbTick_Elapsed(startTime,NdbTick_getCurrentTicks()).milliSec() < waitTime)
2042     {
2043       Uint32 startPhase = 0, version = 0, dynamicId = 0, nodeGroup = 0;
2044       Uint32 mysql_version = 0;
2045       Uint32 connectCount = 0;
2046       bool system;
2047       const char *address;
2048       char addr_buf[NDB_ADDR_STRLEN];
2049       status(nodeId, &s, &version, &mysql_version, &startPhase,
2050 	     &system, &dynamicId, &nodeGroup, &connectCount,
2051              &address, addr_buf, sizeof(addr_buf));
2052       NdbSleep_MilliSleep(100);
2053     }
2054   }
2055 
2056   if(nostart)
2057     return 0;
2058 
2059   /**
2060    * Now we start all database nodes (i.e. we make them non-idle)
2061    * We ignore the result we get from the start command.
2062    */
2063   nodeId = 0;
2064   while(getNextNodeId(&nodeId, NDB_MGM_NODE_TYPE_NDB)) {
2065     if (!nodes.get(nodeId))
2066       continue;
2067     int result;
2068     unsigned int loop_count = 0;
2069     do
2070     {
2071       result = sendSTART_ORD(nodeId);
2072       if (result != SEND_OR_RECEIVE_FAILED &&
2073           result != NO_CONTACT_WITH_PROCESS)
2074         break;
2075       if (loop_count >= num_secs_to_wait_for_node)
2076         break;
2077       NdbSleep_MilliSleep(1000);
2078       loop_count++;
2079     } while (1);
2080     g_eventLogger->debug("Started node %d with result %d", nodeId, result);
2081     /**
2082      * Errors from this call are deliberately ignored.
2083      * Maybe the user only wanted to restart a subset of the nodes.
2084      * It is also easy for the user to check which nodes have
2085      * started and which nodes have not.
2086      */
2087   }
2088 
2089   return 0;
2090 }
2091 
2092 int
exitSingleUser(int * stopCount,bool abort)2093 MgmtSrvr::exitSingleUser(int * stopCount, bool abort)
2094 {
2095   NodeId nodeId = 0;
2096   int count = 0;
2097 
2098   SignalSender ss(theFacade);
2099   ss.lock(); // lock will be released on exit
2100 
2101   SimpleSignal ssig;
2102   ResumeReq* const resumeReq =
2103     CAST_PTR(ResumeReq, ssig.getDataPtrSend());
2104 
2105   ssig.set(ss,TestOrd::TraceAPI, NDBCNTR, GSN_RESUME_REQ,
2106 	   ResumeReq::SignalLength);
2107   resumeReq->senderData = 12;
2108   resumeReq->senderRef = ss.getOwnRef();
2109 
2110   while(getNextNodeId(&nodeId, NDB_MGM_NODE_TYPE_NDB)){
2111     if(okToSendTo(nodeId, true) == 0){
2112       SendStatus result = ss.sendSignal(nodeId, &ssig);
2113       if (result == SEND_OK)
2114 	count++;
2115     }
2116   }
2117 
2118   if(stopCount != 0)
2119     * stopCount = count;
2120 
2121   return 0;
2122 }
2123 
2124 /*****************************************************************************
2125  * Status
2126  ****************************************************************************/
2127 
2128 void
status_mgmd(NodeId node_id,ndb_mgm_node_status & node_status,Uint32 & version,Uint32 & mysql_version,const char ** address,char * addr_buf,size_t addr_buf_size)2129 MgmtSrvr::status_mgmd(NodeId node_id,
2130                       ndb_mgm_node_status& node_status,
2131                       Uint32& version, Uint32& mysql_version,
2132                       const char **address,
2133                       char *addr_buf,
2134                       size_t addr_buf_size)
2135 {
2136   assert(getNodeType(node_id) == NDB_MGM_NODE_TYPE_MGM);
2137 
2138   if (node_id == getOwnNodeId())
2139   {
2140     /*
2141       Special case to get version of own node
2142       - version and mysql_version is hardcoded
2143       - address should be the address seen from ndbd(if it's connected)
2144         else use HostName from config
2145     */
2146     Uint32 tmp_version = 0, tmp_mysql_version = 0;
2147     sendVersionReq(node_id,
2148                    tmp_version,
2149                    tmp_mysql_version,
2150                    address,
2151                    addr_buf,
2152                    addr_buf_size);
2153     // Check that the version returned is equal to compiled in version
2154     assert(tmp_version == 0 ||
2155            (tmp_version == NDB_VERSION &&
2156             tmp_mysql_version == NDB_MYSQL_VERSION_D));
2157 
2158     version = NDB_VERSION;
2159     mysql_version = NDB_MYSQL_VERSION_D;
2160     if(!*address)
2161     {
2162       // No address returned from ndbd -> get HostName from config
2163       Guard g(m_local_config_mutex);
2164       ConfigIter iter(m_local_config, CFG_SECTION_NODE);
2165       require(iter.find(CFG_NODE_ID, node_id) == 0);
2166       require(iter.get(CFG_NODE_HOST, address) == 0);
2167 
2168       /*
2169         Try to convert HostName to numerical ip address
2170         (to get same output as if ndbd had replied)
2171       */
2172       struct in_addr addr;
2173       if (Ndb_getInAddr(&addr, *address) == 0)
2174       {
2175         *address = Ndb_inet_ntop(AF_INET,
2176                                  static_cast<void*>(&addr),
2177                                  addr_buf,
2178                                  (socklen_t)addr_buf_size);
2179       }
2180     }
2181 
2182     node_status = NDB_MGM_NODE_STATUS_CONNECTED;
2183     return;
2184   }
2185 
2186   /*
2187     MGM nodes are connected directly to all other MGM
2188     node(s), return status as seen by ClusterMgr
2189   */
2190   const trp_node node = getNodeInfo(node_id);
2191   if(node.is_connected())
2192   {
2193     version = node.m_info.m_version;
2194     mysql_version = node.m_info.m_mysql_version;
2195     node_status = NDB_MGM_NODE_STATUS_CONNECTED;
2196     *address= get_connect_address(node_id,
2197                                   addr_buf,
2198                                   addr_buf_size);
2199   }
2200   else
2201   {
2202     version = 0;
2203     mysql_version = 0;
2204     node_status = NDB_MGM_NODE_STATUS_NO_CONTACT;
2205   }
2206 
2207   return;
2208 }
2209 
2210 int
status(int nodeId,ndb_mgm_node_status * _status,Uint32 * version,Uint32 * mysql_version,Uint32 * _phase,bool * _system,Uint32 * dynamic,Uint32 * nodegroup,Uint32 * connectCount,const char ** address,char * addr_buf,size_t addr_buf_size)2211 MgmtSrvr::status(int nodeId,
2212                  ndb_mgm_node_status * _status,
2213 		 Uint32 * version,
2214 		 Uint32 * mysql_version,
2215 		 Uint32 * _phase,
2216 		 bool * _system,
2217 		 Uint32 * dynamic,
2218 		 Uint32 * nodegroup,
2219 		 Uint32 * connectCount,
2220 		 const char **address,
2221                  char *addr_buf,
2222                  size_t addr_buf_size)
2223 {
2224   switch(getNodeType(nodeId)){
2225   case NDB_MGM_NODE_TYPE_API:
2226     status_api(nodeId,
2227                *_status,
2228                *version,
2229                *mysql_version,
2230                address,
2231                addr_buf,
2232                addr_buf_size);
2233     return 0;
2234     break;
2235 
2236   case NDB_MGM_NODE_TYPE_MGM:
2237     status_mgmd(nodeId,
2238                 *_status,
2239                 *version,
2240                 *mysql_version,
2241                 address,
2242                 addr_buf,
2243                 addr_buf_size);
2244     return 0;
2245     break;
2246 
2247   case NDB_MGM_NODE_TYPE_NDB:
2248     break;
2249 
2250   default:
2251     abort();
2252     break;
2253   }
2254 
2255   const trp_node node = getNodeInfo(nodeId);
2256   assert(getNodeType(nodeId) == NDB_MGM_NODE_TYPE_NDB &&
2257          node.m_info.getType() == NodeInfo::DB);
2258 
2259   if(!node.is_connected()){
2260     * _status = NDB_MGM_NODE_STATUS_NO_CONTACT;
2261     return 0;
2262   }
2263 
2264   * version = node.m_info.m_version;
2265   * mysql_version = node.m_info.m_mysql_version;
2266 
2267   *address= get_connect_address(nodeId, addr_buf, addr_buf_size);
2268 
2269   * dynamic = node.m_state.dynamicId;
2270   * nodegroup = node.m_state.nodeGroup;
2271   * connectCount = node.m_info.m_connectCount;
2272 
2273   switch(node.m_state.startLevel){
2274   case NodeState::SL_CMVMI:
2275     * _status = NDB_MGM_NODE_STATUS_NOT_STARTED;
2276     * _phase = 0;
2277     return 0;
2278     break;
2279   case NodeState::SL_STARTING:
2280     * _status     = NDB_MGM_NODE_STATUS_STARTING;
2281     * _phase = node.m_state.starting.startPhase;
2282     return 0;
2283     break;
2284   case NodeState::SL_STARTED:
2285     * _status = NDB_MGM_NODE_STATUS_STARTED;
2286     * _phase = 0;
2287     return 0;
2288     break;
2289   case NodeState::SL_STOPPING_1:
2290     * _status = NDB_MGM_NODE_STATUS_SHUTTING_DOWN;
2291     * _phase = 1;
2292     * _system = node.m_state.stopping.systemShutdown != 0;
2293     return 0;
2294     break;
2295   case NodeState::SL_STOPPING_2:
2296     * _status = NDB_MGM_NODE_STATUS_SHUTTING_DOWN;
2297     * _phase = 2;
2298     * _system = node.m_state.stopping.systemShutdown != 0;
2299     return 0;
2300     break;
2301   case NodeState::SL_STOPPING_3:
2302     * _status = NDB_MGM_NODE_STATUS_SHUTTING_DOWN;
2303     * _phase = 3;
2304     * _system = node.m_state.stopping.systemShutdown != 0;
2305     return 0;
2306     break;
2307   case NodeState::SL_STOPPING_4:
2308     * _status = NDB_MGM_NODE_STATUS_SHUTTING_DOWN;
2309     * _phase = 4;
2310     * _system = node.m_state.stopping.systemShutdown != 0;
2311     return 0;
2312     break;
2313   case NodeState::SL_SINGLEUSER:
2314     * _status = NDB_MGM_NODE_STATUS_SINGLEUSER;
2315     * _phase  = 0;
2316     return 0;
2317     break;
2318   default:
2319     * _status = NDB_MGM_NODE_STATUS_UNKNOWN;
2320     * _phase = 0;
2321     return 0;
2322   }
2323 
2324   return -1;
2325 }
2326 
2327 int
setEventReportingLevelImpl(int nodeId_arg,const EventSubscribeReq & ll)2328 MgmtSrvr::setEventReportingLevelImpl(int nodeId_arg,
2329 				     const EventSubscribeReq& ll)
2330 {
2331   SignalSender ss(theFacade);
2332   NdbNodeBitmask nodes;
2333   nodes.clear();
2334   while (1)
2335   {
2336     Uint32 nodeId, max;
2337     ss.lock();
2338     SimpleSignal ssig;
2339     EventSubscribeReq * dst =
2340       CAST_PTR(EventSubscribeReq, ssig.getDataPtrSend());
2341     ssig.set(ss,TestOrd::TraceAPI, CMVMI, GSN_EVENT_SUBSCRIBE_REQ,
2342              EventSubscribeReq::SignalLength);
2343     *dst = ll;
2344 
2345     if (nodeId_arg == 0)
2346     {
2347       // all nodes
2348       nodeId = 1;
2349       max = MAX_NDB_NODES;
2350     }
2351     else
2352     {
2353       // only one node
2354       max = nodeId = nodeId_arg;
2355     }
2356     // first make sure nodes are sendable
2357     for(; nodeId <= max; nodeId++)
2358     {
2359       if (nodeTypes[nodeId] != NODE_TYPE_DB)
2360         continue;
2361       if (okToSendTo(nodeId, true))
2362       {
2363         if (getNodeInfo(nodeId).is_connected()  == false)
2364         {
2365           // node not connected we can safely skip this one
2366           continue;
2367         }
2368         // api_reg_conf not recevied yet, need to retry
2369         return SEND_OR_RECEIVE_FAILED;
2370       }
2371     }
2372 
2373     if (nodeId_arg == 0)
2374     {
2375       // all nodes
2376       nodeId = 1;
2377       max = MAX_NDB_NODES;
2378     }
2379     else
2380     {
2381       // only one node
2382       max = nodeId = nodeId_arg;
2383     }
2384     // now send to all sendable nodes nodes
2385     // note, lock is held, so states have not changed
2386     for(; (Uint32) nodeId <= max; nodeId++)
2387     {
2388       if (nodeTypes[nodeId] != NODE_TYPE_DB)
2389         continue;
2390       if (getNodeInfo(nodeId).is_connected()  == false)
2391         continue; // node is not connected, skip
2392       if (ss.sendSignal(nodeId, &ssig) == SEND_OK)
2393         nodes.set(nodeId);
2394       else if (max == nodeId)
2395       {
2396         return SEND_OR_RECEIVE_FAILED;
2397       }
2398     }
2399     break;
2400   }
2401 
2402   if (nodes.isclear())
2403   {
2404     return SEND_OR_RECEIVE_FAILED;
2405   }
2406 
2407   int error = 0;
2408   while (!nodes.isclear())
2409   {
2410     Uint32 nodeId;
2411     SimpleSignal *signal = ss.waitFor();
2412     int gsn = signal->readSignalNumber();
2413     nodeId = refToNode(signal->header.theSendersBlockRef);
2414     switch (gsn) {
2415     case GSN_EVENT_SUBSCRIBE_CONF:{
2416       nodes.clear(nodeId);
2417       break;
2418     }
2419     case GSN_EVENT_SUBSCRIBE_REF:{
2420       nodes.clear(nodeId);
2421       error = 1;
2422       break;
2423     }
2424       // Since sending okToSend(true),
2425       // there is no guarantee that NF_COMPLETEREP will come
2426       // i.e listen also to NODE_FAILREP
2427     case GSN_NODE_FAILREP: {
2428       const NodeFailRep * const rep =
2429 	CAST_CONSTPTR(NodeFailRep, signal->getDataPtr());
2430       Uint32 len = NodeFailRep::getNodeMaskLength(signal->getLength());
2431       require(len == NodeBitmask::Size); // only full length in ndbapi
2432       NdbNodeBitmask mask;
2433       // only care about data nodes
2434       mask.assign(NdbNodeBitmask::Size, rep->theNodes);
2435       nodes.bitANDC(mask);
2436       break;
2437     }
2438 
2439     case GSN_NF_COMPLETEREP:{
2440       const NFCompleteRep * const rep =
2441 	CAST_CONSTPTR(NFCompleteRep, signal->getDataPtr());
2442       if (rep->failedNodeId <= nodes.max_size())
2443         nodes.clear(rep->failedNodeId);
2444       break;
2445     }
2446     case GSN_API_REGCONF:
2447     case GSN_TAKE_OVERTCCONF:
2448     case GSN_CONNECT_REP:
2449       continue;
2450     default:
2451       report_unknown_signal(signal);
2452       return SEND_OR_RECEIVE_FAILED;
2453     }
2454   }
2455   if (error)
2456     return SEND_OR_RECEIVE_FAILED;
2457   return 0;
2458 }
2459 
2460 //****************************************************************************
2461 //****************************************************************************
2462 int
setNodeLogLevelImpl(int nodeId,const SetLogLevelOrd & ll)2463 MgmtSrvr::setNodeLogLevelImpl(int nodeId, const SetLogLevelOrd & ll)
2464 {
2465   INIT_SIGNAL_SENDER(ss,nodeId);
2466 
2467   SimpleSignal ssig;
2468   ssig.set(ss,TestOrd::TraceAPI, CMVMI, GSN_SET_LOGLEVELORD,
2469 	   SetLogLevelOrd::SignalLength);
2470   SetLogLevelOrd* const dst = CAST_PTR(SetLogLevelOrd, ssig.getDataPtrSend());
2471   *dst = ll;
2472 
2473   return ss.sendSignal(nodeId, &ssig) == SEND_OK ? 0 : SEND_OR_RECEIVE_FAILED;
2474 }
2475 
2476 //****************************************************************************
2477 //****************************************************************************
2478 
2479 int
insertError(int nodeId,int errorNo,Uint32 * extra)2480 MgmtSrvr::insertError(int nodeId, int errorNo, Uint32 * extra)
2481 {
2482   int block;
2483 
2484   if (errorNo < 0) {
2485     return INVALID_ERROR_NUMBER;
2486   }
2487 
2488   SignalSender ss(theFacade);
2489   ss.lock(); /* lock will be released on exit */
2490 
2491   if(getNodeType(nodeId) == NDB_MGM_NODE_TYPE_NDB)
2492   {
2493     block= CMVMI;
2494   }
2495   else if(nodeId == _ownNodeId)
2496   {
2497     g_errorInsert= errorNo;
2498     return 0;
2499   }
2500   else if(getNodeType(nodeId) == NDB_MGM_NODE_TYPE_MGM)
2501     block= _blockNumber;
2502   else
2503     return WRONG_PROCESS_TYPE;
2504 
2505   SimpleSignal ssig;
2506   ssig.set(ss,TestOrd::TraceAPI, block, GSN_TAMPER_ORD,
2507 	   TamperOrd::SignalLength);
2508   TamperOrd* const tamperOrd = CAST_PTR(TamperOrd, ssig.getDataPtrSend());
2509   tamperOrd->errorNo = errorNo;
2510 
2511   if (extra)
2512   {
2513     ssig.getDataPtrSend()[1] = * extra;
2514     ssig.header.theLength++;
2515   }
2516 
2517   int res = ss.sendSignal(nodeId, &ssig) == SEND_OK ? 0 :SEND_OR_RECEIVE_FAILED;
2518 
2519   if (res == 0)
2520   {
2521     /**
2522      * In order to make NDB_TAMPER (almost) syncronous,
2523      *   make a syncronous request *after* the NDB_TAMPER
2524      */
2525     make_sync_req(ss, Uint32(nodeId));
2526   }
2527 
2528   return res;
2529 }
2530 
2531 
2532 int
startSchemaTrans(SignalSender & ss,NodeId & out_nodeId,Uint32 transId,Uint32 & out_transKey)2533 MgmtSrvr::startSchemaTrans(SignalSender& ss, NodeId & out_nodeId,
2534                            Uint32 transId, Uint32 & out_transKey)
2535 {
2536   SimpleSignal ssig;
2537 
2538   ssig.set(ss, 0, DBDICT, GSN_SCHEMA_TRANS_BEGIN_REQ,
2539            SchemaTransBeginReq::SignalLength);
2540 
2541   SchemaTransBeginReq* req =
2542     CAST_PTR(SchemaTransBeginReq, ssig.getDataPtrSend());
2543 
2544   req->clientRef =  ss.getOwnRef();
2545   req->transId = transId;
2546   req->requestInfo = 0;
2547 
2548   NodeId nodeId = ss.get_an_alive_node();
2549 
2550 retry:
2551   if (ss.get_node_alive(nodeId) == false)
2552   {
2553     nodeId = ss.get_an_alive_node();
2554   }
2555 
2556   if (ss.sendSignal(nodeId, &ssig) != SEND_OK)
2557   {
2558     return SEND_OR_RECEIVE_FAILED;
2559   }
2560 
2561   while (true)
2562   {
2563     SimpleSignal *signal = ss.waitFor();
2564     int gsn = signal->readSignalNumber();
2565     switch (gsn) {
2566     case GSN_SCHEMA_TRANS_BEGIN_CONF: {
2567       const SchemaTransBeginConf * conf =
2568         CAST_CONSTPTR(SchemaTransBeginConf, signal->getDataPtr());
2569       out_transKey = conf->transKey;
2570       out_nodeId = nodeId;
2571       return 0;
2572     }
2573     case GSN_SCHEMA_TRANS_BEGIN_REF: {
2574       const SchemaTransBeginRef * ref =
2575         CAST_CONSTPTR(SchemaTransBeginRef, signal->getDataPtr());
2576 
2577       switch(ref->errorCode){
2578       case SchemaTransBeginRef::NotMaster:
2579         nodeId = ref->masterNodeId;
2580         // Fall-through
2581       case SchemaTransBeginRef::Busy:
2582       case SchemaTransBeginRef::BusyWithNR:
2583         goto retry;
2584       default:
2585         return ref->errorCode;
2586       }
2587     }
2588     case GSN_NF_COMPLETEREP:
2589       // ignore
2590       break;
2591     case GSN_NODE_FAILREP:{
2592       const NodeFailRep * const rep =
2593         CAST_CONSTPTR(NodeFailRep, signal->getDataPtr());
2594       Uint32 len = NodeFailRep::getNodeMaskLength(signal->getLength());
2595       assert(len == NodeBitmask::Size); // only full length in ndbapi
2596       if (BitmaskImpl::safe_get(len, rep->theAllNodes, nodeId))
2597       {
2598         nodeId++;
2599         goto retry;
2600       }
2601       break;
2602     }
2603     case GSN_API_REGCONF:
2604     case GSN_TAKE_OVERTCCONF:
2605     case GSN_CONNECT_REP:
2606       break;
2607     default:
2608       report_unknown_signal(signal);
2609       return SEND_OR_RECEIVE_FAILED;
2610     }
2611   }
2612 }
2613 
2614 int
endSchemaTrans(SignalSender & ss,NodeId nodeId,Uint32 transId,Uint32 transKey,Uint32 flags)2615 MgmtSrvr::endSchemaTrans(SignalSender& ss, NodeId nodeId,
2616                          Uint32 transId, Uint32 transKey,
2617                          Uint32 flags)
2618 {
2619   SimpleSignal ssig;
2620 
2621   ssig.set(ss, 0, DBDICT, GSN_SCHEMA_TRANS_END_REQ,
2622            SchemaTransEndReq::SignalLength);
2623 
2624   SchemaTransEndReq* req =
2625     CAST_PTR(SchemaTransEndReq, ssig.getDataPtrSend());
2626 
2627   req->clientRef =  ss.getOwnRef();
2628   req->transId = transId;
2629   req->requestInfo = 0;
2630   req->transKey = transKey;
2631   req->flags = flags;
2632 
2633   if (ss.sendSignal(nodeId, &ssig) != SEND_OK)
2634   {
2635     return SEND_OR_RECEIVE_FAILED;
2636   }
2637 
2638   while (true)
2639   {
2640     SimpleSignal *signal = ss.waitFor();
2641     int gsn = signal->readSignalNumber();
2642     switch (gsn) {
2643     case GSN_SCHEMA_TRANS_END_CONF: {
2644       return 0;
2645     }
2646     case GSN_SCHEMA_TRANS_END_REF: {
2647       const SchemaTransEndRef * ref =
2648         CAST_CONSTPTR(SchemaTransEndRef, signal->getDataPtr());
2649       return ref->errorCode;
2650     }
2651     case GSN_NF_COMPLETEREP:
2652       // ignore
2653       break;
2654     case GSN_NODE_FAILREP:{
2655       const NodeFailRep * const rep =
2656         CAST_CONSTPTR(NodeFailRep, signal->getDataPtr());
2657       Uint32 len = NodeFailRep::getNodeMaskLength(signal->getLength());
2658       assert(len == NodeBitmask::Size); // only full length in ndbapi
2659       if (BitmaskImpl::safe_get(len, rep->theAllNodes, nodeId))
2660       {
2661         return -1;
2662       }
2663       break;
2664     }
2665     case GSN_API_REGCONF:
2666     case GSN_TAKE_OVERTCCONF:
2667     case GSN_CONNECT_REP:
2668       break;
2669     default:
2670       report_unknown_signal(signal);
2671       return SEND_OR_RECEIVE_FAILED;
2672     }
2673   }
2674 }
2675 
2676 int
createNodegroup(int * nodes,int count,int * ng)2677 MgmtSrvr::createNodegroup(int *nodes, int count, int *ng)
2678 {
2679   int res;
2680   SignalSender ss(theFacade);
2681   ss.lock();
2682 
2683   Uint32 transId = rand();
2684   Uint32 transKey;
2685   NodeId nodeId;
2686 
2687   if ((res = startSchemaTrans(ss, nodeId, transId, transKey)))
2688   {
2689     return res;
2690   }
2691 
2692   SimpleSignal ssig;
2693   ssig.set(ss, 0, DBDICT, GSN_CREATE_NODEGROUP_REQ,
2694            CreateNodegroupReq::SignalLength);
2695 
2696   CreateNodegroupReq* req =
2697     CAST_PTR(CreateNodegroupReq, ssig.getDataPtrSend());
2698 
2699   req->transId = transId;
2700   req->transKey = transKey;
2701   req->nodegroupId = RNIL;
2702   req->senderData = 77;
2703   req->senderRef = ss.getOwnRef();
2704   bzero(req->nodes, sizeof(req->nodes));
2705 
2706   if (ng)
2707   {
2708     if (* ng != -1)
2709     {
2710       req->nodegroupId = * ng;
2711     }
2712   }
2713   for (int i = 0; i<count && i<(int)NDB_ARRAY_SIZE(req->nodes); i++)
2714   {
2715     req->nodes[i] = nodes[i];
2716   }
2717 
2718   if (ss.sendSignal(nodeId, &ssig) != SEND_OK)
2719   {
2720     return SEND_OR_RECEIVE_FAILED;
2721   }
2722 
2723   bool wait = true;
2724   while (wait)
2725   {
2726     SimpleSignal *signal = ss.waitFor();
2727     int gsn = signal->readSignalNumber();
2728     switch (gsn) {
2729     case GSN_CREATE_NODEGROUP_CONF: {
2730       const CreateNodegroupConf * conf =
2731         CAST_CONSTPTR(CreateNodegroupConf, signal->getDataPtr());
2732 
2733       if (ng)
2734       {
2735         * ng = conf->nodegroupId;
2736       }
2737 
2738       wait = false;
2739       break;
2740     }
2741     case GSN_CREATE_NODEGROUP_REF:{
2742       const CreateNodegroupRef * ref =
2743         CAST_CONSTPTR(CreateNodegroupRef, signal->getDataPtr());
2744       Uint32 err = ref->errorCode;
2745       endSchemaTrans(ss, nodeId, transId, transKey,
2746                      SchemaTransEndReq::SchemaTransAbort);
2747       return err;
2748     }
2749     case GSN_NF_COMPLETEREP:
2750       // ignore
2751       break;
2752     case GSN_NODE_FAILREP:{
2753       const NodeFailRep * const rep =
2754         CAST_CONSTPTR(NodeFailRep, signal->getDataPtr());
2755       Uint32 len = NodeFailRep::getNodeMaskLength(signal->getLength());
2756       assert(len == NodeBitmask::Size); // only full length in ndbapi
2757       if (BitmaskImpl::safe_get(len, rep->theAllNodes, nodeId))
2758       {
2759         return SchemaTransBeginRef::Nodefailure;
2760       }
2761       break;
2762     }
2763     case GSN_API_REGCONF:
2764     case GSN_TAKE_OVERTCCONF:
2765     case GSN_CONNECT_REP:
2766       break;
2767     default:
2768       report_unknown_signal(signal);
2769       return SEND_OR_RECEIVE_FAILED;
2770     }
2771   }
2772 
2773   return endSchemaTrans(ss, nodeId, transId, transKey, 0);
2774 }
2775 
2776 int
dropNodegroup(int ng)2777 MgmtSrvr::dropNodegroup(int ng)
2778 {
2779   int res;
2780   SignalSender ss(theFacade);
2781   ss.lock();
2782 
2783   Uint32 transId = rand();
2784   Uint32 transKey;
2785   NodeId nodeId;
2786 
2787   if ((res = startSchemaTrans(ss, nodeId, transId, transKey)))
2788   {
2789     return res;
2790   }
2791 
2792   SimpleSignal ssig;
2793   ssig.set(ss, 0, DBDICT, GSN_DROP_NODEGROUP_REQ, DropNodegroupReq::SignalLength);
2794 
2795   DropNodegroupReq* req =
2796     CAST_PTR(DropNodegroupReq, ssig.getDataPtrSend());
2797 
2798   req->transId = transId;
2799   req->transKey = transKey;
2800   req->nodegroupId = ng;
2801   req->senderData = 77;
2802   req->senderRef = ss.getOwnRef();
2803 
2804   if (ss.sendSignal(nodeId, &ssig) != SEND_OK)
2805   {
2806     return SEND_OR_RECEIVE_FAILED;
2807   }
2808 
2809   bool wait = true;
2810   while (wait)
2811   {
2812     SimpleSignal *signal = ss.waitFor();
2813     int gsn = signal->readSignalNumber();
2814     switch (gsn) {
2815     case GSN_DROP_NODEGROUP_CONF: {
2816       wait = false;
2817       break;
2818     }
2819     case GSN_DROP_NODEGROUP_REF:
2820     {
2821       const DropNodegroupRef * ref =
2822         CAST_CONSTPTR(DropNodegroupRef, signal->getDataPtr());
2823       endSchemaTrans(ss, nodeId, transId, transKey,
2824                      SchemaTransEndReq::SchemaTransAbort);
2825       return ref->errorCode;
2826     }
2827     case GSN_NF_COMPLETEREP:
2828       // ignore
2829       break;
2830     case GSN_NODE_FAILREP:{
2831       const NodeFailRep * const rep =
2832         CAST_CONSTPTR(NodeFailRep, signal->getDataPtr());
2833       Uint32 len = NodeFailRep::getNodeMaskLength(signal->getLength());
2834       assert(len == NodeBitmask::Size); // only full length in ndbapi
2835       if (BitmaskImpl::safe_get(len, rep->theAllNodes, nodeId))
2836       {
2837         return SchemaTransBeginRef::Nodefailure;
2838       }
2839       break;
2840     }
2841     case GSN_API_REGCONF:
2842     case GSN_TAKE_OVERTCCONF:
2843     case GSN_CONNECT_REP:
2844       break;
2845     default:
2846       report_unknown_signal(signal);
2847       return SEND_OR_RECEIVE_FAILED;
2848     }
2849   }
2850 
2851   return endSchemaTrans(ss, nodeId, transId, transKey, 0);
2852 }
2853 
2854 
2855 //****************************************************************************
2856 //****************************************************************************
2857 
2858 int
setTraceNo(int nodeId,int traceNo)2859 MgmtSrvr::setTraceNo(int nodeId, int traceNo)
2860 {
2861   if (traceNo < 0) {
2862     return INVALID_TRACE_NUMBER;
2863   }
2864 
2865   INIT_SIGNAL_SENDER(ss,nodeId);
2866 
2867   SimpleSignal ssig;
2868   ssig.set(ss,TestOrd::TraceAPI, CMVMI, GSN_TEST_ORD, TestOrd::SignalLength);
2869   TestOrd* const testOrd = CAST_PTR(TestOrd, ssig.getDataPtrSend());
2870   testOrd->clear();
2871   // Assume TRACE command causes toggling. Not really defined... ? TODO
2872   testOrd->setTraceCommand(TestOrd::Toggle,
2873 			   (TestOrd::TraceSpecification)traceNo);
2874 
2875   return ss.sendSignal(nodeId, &ssig) == SEND_OK ? 0 : SEND_OR_RECEIVE_FAILED;
2876 }
2877 
2878 //****************************************************************************
2879 //****************************************************************************
2880 
2881 int
setSignalLoggingMode(int nodeId,LogMode mode,const Vector<BaseString> & blocks)2882 MgmtSrvr::setSignalLoggingMode(int nodeId, LogMode mode,
2883 			       const Vector<BaseString>& blocks)
2884 {
2885   INIT_SIGNAL_SENDER(ss,nodeId);
2886 
2887   // Convert from MgmtSrvr format...
2888 
2889   TestOrd::Command command;
2890   if (mode == Off) {
2891     command = TestOrd::Off;
2892   }
2893   else {
2894     command = TestOrd::On;
2895   }
2896 
2897   TestOrd::SignalLoggerSpecification logSpec;
2898   switch (mode) {
2899   case In:
2900     logSpec = TestOrd::InputSignals;
2901     break;
2902   case Out:
2903     logSpec = TestOrd::OutputSignals;
2904     break;
2905   case InOut:
2906     logSpec = TestOrd::InputOutputSignals;
2907     break;
2908   case Off:
2909     // In MgmtSrvr interface it's just possible to switch off all logging, both
2910     // "in" and "out" (this should probably be changed).
2911     logSpec = TestOrd::InputOutputSignals;
2912     break;
2913   default:
2914     ndbout_c("Unexpected value %d, MgmtSrvr::setSignalLoggingMode, line %d",
2915 	     (unsigned)mode, __LINE__);
2916     assert(false);
2917     return -1;
2918   }
2919 
2920   SimpleSignal ssig;
2921   ssig.set(ss,TestOrd::TraceAPI, CMVMI, GSN_TEST_ORD, TestOrd::SignalLength);
2922 
2923   TestOrd* const testOrd = CAST_PTR(TestOrd, ssig.getDataPtrSend());
2924   testOrd->clear();
2925 
2926   if (blocks.size() == 0 || blocks[0] == "ALL") {
2927     // Logg command for all blocks
2928     testOrd->addSignalLoggerCommand(command, logSpec);
2929   } else {
2930     for(unsigned i = 0; i < blocks.size(); i++)
2931     {
2932       BlockNumber blockNumber = getBlockNo(blocks[i].c_str());
2933       if (blockNumber == 0)
2934         return INVALID_BLOCK_NAME;
2935       testOrd->addSignalLoggerCommand(blockNumber, command, logSpec);
2936     }
2937   }
2938 
2939   return ss.sendSignal(nodeId, &ssig) == SEND_OK ? 0 : SEND_OR_RECEIVE_FAILED;
2940 }
2941 
2942 /*****************************************************************************
2943  * Signal tracing
2944  *****************************************************************************/
startSignalTracing(int nodeId)2945 int MgmtSrvr::startSignalTracing(int nodeId)
2946 {
2947   INIT_SIGNAL_SENDER(ss,nodeId);
2948 
2949   SimpleSignal ssig;
2950   ssig.set(ss,TestOrd::TraceAPI, CMVMI, GSN_TEST_ORD, TestOrd::SignalLength);
2951 
2952   TestOrd* const testOrd = CAST_PTR(TestOrd, ssig.getDataPtrSend());
2953   testOrd->clear();
2954   testOrd->setTestCommand(TestOrd::On);
2955 
2956   return ss.sendSignal(nodeId, &ssig) == SEND_OK ? 0 : SEND_OR_RECEIVE_FAILED;
2957 }
2958 
2959 int
stopSignalTracing(int nodeId)2960 MgmtSrvr::stopSignalTracing(int nodeId)
2961 {
2962   INIT_SIGNAL_SENDER(ss,nodeId);
2963 
2964   SimpleSignal ssig;
2965   ssig.set(ss,TestOrd::TraceAPI, CMVMI, GSN_TEST_ORD, TestOrd::SignalLength);
2966   TestOrd* const testOrd = CAST_PTR(TestOrd, ssig.getDataPtrSend());
2967   testOrd->clear();
2968   testOrd->setTestCommand(TestOrd::Off);
2969 
2970   return ss.sendSignal(nodeId, &ssig) == SEND_OK ? 0 : SEND_OR_RECEIVE_FAILED;
2971 }
2972 
2973 
2974 /*****************************************************************************
2975  * Dump state
2976  *****************************************************************************/
2977 
2978 int
dumpState(int nodeId,const char * args)2979 MgmtSrvr::dumpState(int nodeId, const char* args)
2980 {
2981   // Convert the space separeted args
2982   // string to an int array
2983   Uint32 args_array[25];
2984   Uint32 numArgs = 0;
2985 
2986   char buf[10];
2987   int b  = 0;
2988   memset(buf, 0, 10);
2989   for (size_t i = 0; i <= strlen(args); i++){
2990     if (args[i] == ' ' || args[i] == 0){
2991       args_array[numArgs] = atoi(buf);
2992       numArgs++;
2993       memset(buf, 0, 10);
2994       b = 0;
2995     } else {
2996       buf[b] = args[i];
2997       b++;
2998     }
2999   }
3000 
3001   return dumpState(nodeId, args_array, numArgs);
3002 }
3003 
3004 int
dumpState(int nodeId,const Uint32 args[],Uint32 no)3005 MgmtSrvr::dumpState(int nodeId, const Uint32 args[], Uint32 no)
3006 {
3007   INIT_SIGNAL_SENDER(ss,nodeId);
3008 
3009   const Uint32 len = no > 25 ? 25 : no;
3010 
3011   SimpleSignal ssig;
3012   DumpStateOrd * const dumpOrd =
3013     CAST_PTR(DumpStateOrd, ssig.getDataPtrSend());
3014   ssig.set(ss,TestOrd::TraceAPI, CMVMI, GSN_DUMP_STATE_ORD, len);
3015   for(Uint32 i = 0; i<25; i++){
3016     if (i < len)
3017       dumpOrd->args[i] = args[i];
3018     else
3019       dumpOrd->args[i] = 0;
3020   }
3021 
3022   int res = ss.sendSignal(nodeId, &ssig) == SEND_OK ? 0 :SEND_OR_RECEIVE_FAILED;
3023 
3024   if (res == 0)
3025   {
3026     /**
3027      * In order to make DUMP (almost) syncronous,
3028      *   make a syncronous request *after* the NDB_TAMPER
3029      */
3030     make_sync_req(ss, Uint32(nodeId));
3031   }
3032 
3033   return res;
3034 
3035 }
3036 
3037 
3038 //****************************************************************************
3039 //****************************************************************************
3040 
getErrorText(int errorCode,char * buf,int buf_sz)3041 const char* MgmtSrvr::getErrorText(int errorCode, char *buf, int buf_sz)
3042 {
3043   ndb_error_string(errorCode, buf, buf_sz);
3044   buf[buf_sz-1]= 0;
3045   return buf;
3046 }
3047 
3048 
3049 void
trp_deliver_signal(const NdbApiSignal * signal,const LinearSectionPtr ptr[3])3050 MgmtSrvr::trp_deliver_signal(const NdbApiSignal* signal,
3051                              const LinearSectionPtr ptr[3])
3052 {
3053   int gsn = signal->readSignalNumber();
3054 
3055   switch (gsn) {
3056   case GSN_EVENT_REP:
3057   {
3058     eventReport(signal->getDataPtr(), signal->getLength());
3059     break;
3060   }
3061 
3062   case GSN_NF_COMPLETEREP:{
3063     const NFCompleteRep * rep = CAST_CONSTPTR(NFCompleteRep,
3064                                                signal->getDataPtr());
3065     /* Clear local nodeid reservation(if any) */
3066     release_local_nodeid_reservation(rep->failedNodeId);
3067 
3068      clear_connect_address_cache(rep->failedNodeId);
3069     break;
3070   }
3071   case GSN_TAMPER_ORD:
3072     ndbout << "TAMPER ORD" << endl;
3073     break;
3074   case GSN_API_REGCONF:
3075   case GSN_TAKE_OVERTCCONF:
3076     break;
3077   case GSN_CONNECT_REP:{
3078     const Uint32 nodeId = signal->getDataPtr()[0];
3079 
3080     /*
3081       Clear local nodeid reservation since nodeid is
3082       now reserved by a connected transporter
3083     */
3084     release_local_nodeid_reservation(nodeId);
3085 
3086     union {
3087       Uint32 theData[25];
3088       EventReport repData;
3089     };
3090     EventReport * rep = &repData;
3091     theData[1] = nodeId;
3092     rep->setEventType(NDB_LE_Connected);
3093 
3094     if (nodeTypes[nodeId] == NODE_TYPE_DB)
3095     {
3096       m_started_nodes.push_back(nodeId);
3097     }
3098     rep->setEventType(NDB_LE_Connected);
3099     rep->setNodeId(_ownNodeId);
3100     eventReport(theData, 1);
3101     return;
3102   }
3103   case GSN_NODE_FAILREP:
3104   {
3105     union {
3106       Uint32 theData[25];
3107       EventReport repData;
3108     };
3109     bzero(theData, sizeof(theData));
3110     EventReport * event = &repData;
3111     event->setEventType(NDB_LE_Disconnected);
3112     event->setNodeId(_ownNodeId);
3113 
3114     const NodeFailRep *rep = CAST_CONSTPTR(NodeFailRep,
3115                                            signal->getDataPtr());
3116     Uint32 len = NodeFailRep::getNodeMaskLength(signal->getLength());
3117     assert(len == NodeBitmask::Size); // only full length in ndbapi
3118     for (Uint32 i = BitmaskImpl::find_first(len, rep->theAllNodes);
3119          i != BitmaskImpl::NotFound;
3120          i = BitmaskImpl::find_next(len, rep->theAllNodes, i + 1))
3121     {
3122       theData[1] = i;
3123       eventReport(theData, 1);
3124 
3125       /* Clear local nodeid reservation(if any) */
3126       release_local_nodeid_reservation(i);
3127 
3128       clear_connect_address_cache(i);
3129     }
3130     return;
3131   }
3132   case GSN_CLOSE_COMREQ:
3133   {
3134     theFacade->perform_close_clnt(this);
3135     break;
3136   }
3137   default:
3138     g_eventLogger->error("Unknown signal received. SignalNumber: "
3139                          "%i from (%d, 0x%x)",
3140                          gsn,
3141                          refToNode(signal->theSendersBlockRef),
3142                          refToBlock(signal->theSendersBlockRef));
3143     assert(false);
3144   }
3145 }
3146 
3147 
3148 void
trp_node_status(Uint32 nodeId,Uint32 _event)3149 MgmtSrvr::trp_node_status(Uint32 nodeId, Uint32 _event)
3150 {
3151 }
3152 
3153 enum ndb_mgm_node_type
getNodeType(NodeId nodeId) const3154 MgmtSrvr::getNodeType(NodeId nodeId) const
3155 {
3156   if(nodeId >= MAX_NODES)
3157     return (enum ndb_mgm_node_type)-1;
3158 
3159   return nodeTypes[nodeId];
3160 }
3161 
3162 
3163 const char*
get_connect_address(NodeId node_id,char * addr_buf,size_t addr_buf_size)3164 MgmtSrvr::get_connect_address(NodeId node_id,
3165                               char *addr_buf,
3166                               size_t addr_buf_size)
3167 {
3168   assert(node_id < NDB_ARRAY_SIZE(m_connect_address));
3169 
3170   if (m_connect_address[node_id].s_addr == 0)
3171   {
3172     // No cached connect address available
3173     const trp_node &node= getNodeInfo(node_id);
3174     if (node.is_connected())
3175     {
3176       // Cache the connect address, it's valid until
3177       // node disconnects
3178       m_connect_address[node_id] = theFacade->ext_get_connect_address(node_id);
3179     }
3180   }
3181 
3182   // Return the cached connect address
3183   return Ndb_inet_ntop(AF_INET,
3184                        static_cast<void*>(&m_connect_address[node_id]),
3185                        addr_buf,
3186                        (socklen_t)addr_buf_size);
3187 }
3188 
3189 
3190 void
clear_connect_address_cache(NodeId nodeid)3191 MgmtSrvr::clear_connect_address_cache(NodeId nodeid)
3192 {
3193   assert(nodeid < NDB_ARRAY_SIZE(m_connect_address));
3194   if (nodeid < NDB_ARRAY_SIZE(m_connect_address))
3195   {
3196     m_connect_address[nodeid].s_addr = 0;
3197   }
3198 }
3199 
3200 /***************************************************************************
3201  * Alloc nodeid
3202  ***************************************************************************/
3203 
NodeIdReservations()3204 MgmtSrvr::NodeIdReservations::NodeIdReservations()
3205 {
3206   memset(m_reservations, 0, sizeof(m_reservations));
3207 }
3208 
3209 
3210 void
check_array(NodeId n) const3211 MgmtSrvr::NodeIdReservations::check_array(NodeId n) const
3212 {
3213   assert( n < NDB_ARRAY_SIZE(m_reservations));
3214 }
3215 
3216 
3217 bool
get(NodeId n) const3218 MgmtSrvr::NodeIdReservations::get(NodeId n) const
3219 {
3220   check_array(n);
3221 
3222   return (m_reservations[n].m_timeout != 0);
3223 }
3224 
3225 
3226 void
set(NodeId n,unsigned timeout)3227 MgmtSrvr::NodeIdReservations::set(NodeId n, unsigned timeout)
3228 {
3229   check_array(n);
3230 
3231   Reservation& r = m_reservations[n];
3232   // Dont't allow double set
3233   assert(r.m_timeout == 0 && !NdbTick_IsValid(r.m_start));
3234 
3235   r.m_timeout = timeout;
3236   r.m_start = NdbTick_getCurrentTicks();
3237 }
3238 
3239 
3240 BaseString
pretty_str() const3241 MgmtSrvr::NodeIdReservations::pretty_str() const
3242 {
3243   const char* sep = "";
3244   BaseString str;
3245   for (size_t i = 0; i < NDB_ARRAY_SIZE(m_reservations); i++)
3246   {
3247     const Reservation& r = m_reservations[i];
3248     if (r.m_timeout)
3249     {
3250       str.appfmt("%s%u", sep, (unsigned)i);
3251       sep = ",";
3252     }
3253   }
3254   return str;
3255 }
3256 
3257 
3258 void
clear(NodeId n)3259 MgmtSrvr::NodeIdReservations::clear(NodeId n)
3260 {
3261   check_array(n);
3262 
3263   Reservation& r = m_reservations[n];
3264   // Dont't allow double clear
3265   assert(r.m_timeout != 0 && NdbTick_IsValid(r.m_start));
3266 
3267   r.m_timeout = 0;
3268   NdbTick_Invalidate(&r.m_start);
3269 }
3270 
3271 
3272 bool
has_timedout(NodeId n,NDB_TICKS now) const3273 MgmtSrvr::NodeIdReservations::has_timedout(NodeId n, NDB_TICKS now) const
3274 {
3275   check_array(n);
3276 
3277   const Reservation& r = m_reservations[n];
3278   if (r.m_timeout &&
3279       NdbTick_Elapsed(r.m_start,now).milliSec() > r.m_timeout)
3280     return true;
3281   return false;
3282 }
3283 
3284 
3285 void
release_local_nodeid_reservation(NodeId nodeid)3286 MgmtSrvr::release_local_nodeid_reservation(NodeId nodeid)
3287 {
3288   NdbMutex_Lock(m_reserved_nodes_mutex);
3289   if (m_reserved_nodes.get(nodeid))
3290   {
3291     g_eventLogger->debug("Releasing local reservation for nodeid %d", nodeid);
3292     m_reserved_nodes.clear(nodeid);
3293   }
3294   NdbMutex_Unlock(m_reserved_nodes_mutex);
3295 }
3296 
3297 
3298 int
alloc_node_id_req(NodeId free_node_id,enum ndb_mgm_node_type type,Uint32 timeout_ms)3299 MgmtSrvr::alloc_node_id_req(NodeId free_node_id,
3300                             enum ndb_mgm_node_type type,
3301                             Uint32 timeout_ms)
3302 {
3303   bool first_attempt = true;
3304   SignalSender ss(theFacade);
3305   ss.lock(); // lock will be released on exit
3306 
3307   SimpleSignal ssig;
3308   AllocNodeIdReq* req = CAST_PTR(AllocNodeIdReq, ssig.getDataPtrSend());
3309   ssig.set(ss, TestOrd::TraceAPI, QMGR, GSN_ALLOC_NODEID_REQ,
3310            AllocNodeIdReq::SignalLength);
3311 
3312   req->senderRef = ss.getOwnRef();
3313   req->senderData = 19;
3314   req->nodeId = free_node_id;
3315   req->nodeType = type;
3316   req->timeout = timeout_ms;
3317 
3318   int do_send = 1;
3319   NodeId nodeId = 0;
3320   while (1)
3321   {
3322     if (nodeId == 0)
3323     {
3324       bool next;
3325       while((next = getNextNodeId(&nodeId, NDB_MGM_NODE_TYPE_NDB)) == true &&
3326             getNodeInfo(nodeId).is_confirmed() == false)
3327         ;
3328       if (!next)
3329         return NO_CONTACT_WITH_DB_NODES;
3330       do_send = 1;
3331     }
3332     if (do_send)
3333     {
3334       if (ss.sendSignal(nodeId, &ssig) != SEND_OK)
3335         return SEND_OR_RECEIVE_FAILED;
3336       do_send = 0;
3337     }
3338 
3339     SimpleSignal *signal = ss.waitFor();
3340 
3341     int gsn = signal->readSignalNumber();
3342     switch (gsn) {
3343     case GSN_ALLOC_NODEID_CONF:
3344     {
3345 #ifdef NOT_USED
3346       const AllocNodeIdConf * const conf =
3347         CAST_CONSTPTR(AllocNodeIdConf, signal->getDataPtr());
3348 #endif
3349       g_eventLogger->info("Alloc node id %u succeeded", free_node_id);
3350       return 0;
3351     }
3352     case GSN_ALLOC_NODEID_REF:
3353     {
3354       const AllocNodeIdRef * const ref =
3355         CAST_CONSTPTR(AllocNodeIdRef, signal->getDataPtr());
3356       if (ref->errorCode == AllocNodeIdRef::NotMaster &&
3357           refToNode(ref->masterRef) == 0xFFFF)
3358       {
3359         /*
3360           The data nodes haven't decided who is the president (yet)
3361           and thus can't allocate nodeids -> return "no contact"
3362         */
3363         g_eventLogger->info("Alloc node id %u failed, no new president yet",
3364                             free_node_id);
3365         return NO_CONTACT_WITH_DB_NODES;
3366       }
3367 
3368       if (ref->errorCode == AllocNodeIdRef::NotMaster ||
3369           ref->errorCode == AllocNodeIdRef::Busy ||
3370           ref->errorCode == AllocNodeIdRef::NodeFailureHandlingNotCompleted)
3371       {
3372         do_send = 1;
3373         nodeId = refToNode(ref->masterRef);
3374 	if (!getNodeInfo(nodeId).is_confirmed())
3375 	  nodeId = 0;
3376         if (ref->errorCode != AllocNodeIdRef::NotMaster)
3377         {
3378           if (first_attempt)
3379           {
3380             first_attempt = false;
3381             g_eventLogger->info("Alloc node id %u failed with error code %u, will retry",
3382                                 free_node_id,
3383                                 ref->errorCode);
3384           }
3385           /* sleep for a while (100ms) before retrying */
3386           ss.unlock();
3387           NdbSleep_MilliSleep(100);
3388           ss.lock();
3389         }
3390         continue;
3391       }
3392       return ref->errorCode;
3393     }
3394     case GSN_NF_COMPLETEREP:
3395     {
3396       continue;
3397     }
3398     case GSN_NODE_FAILREP:{
3399       /**
3400        * ok to trap using NODE_FAILREP
3401        *   as we don't really wait on anything interesting
3402        */
3403       const NodeFailRep * const rep =
3404 	CAST_CONSTPTR(NodeFailRep, signal->getDataPtr());
3405       Uint32 len = NodeFailRep::getNodeMaskLength(signal->getLength());
3406       assert(len == NodeBitmask::Size); // only full length in ndbapi
3407       if (BitmaskImpl::safe_get(len, rep->theAllNodes, nodeId))
3408       {
3409         do_send = 1;
3410         nodeId = 0;
3411       }
3412       continue;
3413     }
3414     case GSN_API_REGCONF:
3415     case GSN_TAKE_OVERTCCONF:
3416     case GSN_CONNECT_REP:
3417       continue;
3418     default:
3419       report_unknown_signal(signal);
3420       return SEND_OR_RECEIVE_FAILED;
3421     }
3422   }
3423   return 0;
3424 }
3425 
3426 static int
match_hostname(const struct sockaddr * clnt_addr,const char * config_hostname)3427 match_hostname(const struct sockaddr *clnt_addr,
3428                const char *config_hostname)
3429 {
3430   struct in_addr config_addr= {0};
3431   if (clnt_addr)
3432   {
3433     const struct in_addr *clnt_in_addr = &((sockaddr_in*)clnt_addr)->sin_addr;
3434 
3435     if (Ndb_getInAddr(&config_addr, config_hostname) != 0
3436         || memcmp(&config_addr, clnt_in_addr, sizeof(config_addr)) != 0)
3437     {
3438       struct in_addr tmp_addr;
3439       if (Ndb_getInAddr(&tmp_addr, "localhost") != 0
3440           || memcmp(&tmp_addr, clnt_in_addr, sizeof(config_addr)) != 0)
3441       {
3442         // not localhost
3443         return -1;
3444       }
3445 
3446       // connecting through localhost
3447       // check if config_hostname is local
3448       if (!SocketServer::tryBind(0, config_hostname))
3449         return -1;
3450     }
3451   }
3452   else
3453   {
3454     if (!SocketServer::tryBind(0, config_hostname))
3455       return -1;
3456   }
3457   return 0;
3458 }
3459 
3460 int
find_node_type(NodeId node_id,ndb_mgm_node_type type,const struct sockaddr * client_addr,Vector<PossibleNode> & nodes,int & error_code,BaseString & error_string)3461 MgmtSrvr::find_node_type(NodeId node_id,
3462                          ndb_mgm_node_type type,
3463                          const struct sockaddr* client_addr,
3464                          Vector<PossibleNode>& nodes,
3465                          int& error_code, BaseString& error_string)
3466 {
3467   const char* found_config_hostname= 0;
3468   unsigned type_c= (unsigned)type;
3469 
3470   Guard g(m_local_config_mutex);
3471 
3472   ConfigIter iter(m_local_config, CFG_SECTION_NODE);
3473   for(iter.first(); iter.valid(); iter.next())
3474   {
3475     unsigned id;
3476     if (iter.get(CFG_NODE_ID, &id))
3477       require(false);
3478     if (node_id && node_id != id)
3479       continue;
3480     if (iter.get(CFG_TYPE_OF_SECTION, &type_c))
3481       require(false);
3482     if (type_c != (unsigned)type)
3483     {
3484       if (!node_id)
3485         continue;
3486       goto error;
3487     }
3488     bool exact_match = false;
3489     const char *config_hostname= 0;
3490     if (iter.get(CFG_NODE_HOST, &config_hostname))
3491       require(false);
3492     if (config_hostname == 0 || config_hostname[0] == 0)
3493     {
3494       config_hostname= "";
3495     }
3496     else
3497     {
3498       found_config_hostname= config_hostname;
3499       if (match_hostname(client_addr, config_hostname))
3500       {
3501         if (!node_id)
3502           continue;
3503         goto error;
3504       }
3505       exact_match = true;
3506     }
3507     /*
3508       Insert this node in the nodes list sorted with the
3509       exact matches ahead of the open nodes
3510     */
3511     PossibleNode possible_node= {id, config_hostname, exact_match};
3512     if (exact_match)
3513     {
3514       // Find the position of first !exact match
3515       unsigned position = 0;
3516       for (unsigned j = 0; j < nodes.size(); j++)
3517       {
3518         if (nodes[j].exact_match)
3519           position++;
3520       }
3521       nodes.push(possible_node, position);
3522     }
3523     else
3524     {
3525       nodes.push_back(possible_node);
3526     }
3527 
3528     if (node_id)
3529       break;
3530   }
3531   if (nodes.size() != 0)
3532   {
3533     return 0;
3534   }
3535 
3536  error:
3537   /*
3538     lock on m_configMutex held because found_config_hostname may have
3539     reference inot config structure
3540   */
3541   error_code= NDB_MGM_ALLOCID_CONFIG_MISMATCH;
3542   if (node_id)
3543   {
3544     if (type_c != (unsigned) type)
3545     {
3546       BaseString type_string, type_c_string;
3547       const char *alias, *str;
3548       alias= ndb_mgm_get_node_type_alias_string(type, &str);
3549       type_string.assfmt("%s(%s)", alias, str);
3550       alias= ndb_mgm_get_node_type_alias_string((enum ndb_mgm_node_type)type_c,
3551                                                 &str);
3552       type_c_string.assfmt("%s(%s)", alias, str);
3553       error_string.appfmt("Id %d configured as %s, connect attempted as %s.",
3554                           node_id, type_c_string.c_str(),
3555                           type_string.c_str());
3556       return -1;
3557     }
3558     if (found_config_hostname)
3559     {
3560       char addr_buf[NDB_ADDR_STRLEN];
3561       char *addr_str;
3562       struct in_addr config_addr= {0};
3563       struct in_addr conn_addr =
3564         ((struct sockaddr_in*)(client_addr))->sin_addr;
3565       int r_config_addr= Ndb_getInAddr(&config_addr, found_config_hostname);
3566       addr_str = Ndb_inet_ntop(AF_INET,
3567                                static_cast<void*>(&conn_addr),
3568                                addr_buf,
3569                                (socklen_t)sizeof(addr_buf));
3570       error_string.appfmt("Connection with id %d done from wrong host ip %s,",
3571                           node_id, addr_str);
3572       addr_str = Ndb_inet_ntop(AF_INET,
3573                                static_cast<void*>(&config_addr),
3574                                addr_buf,
3575                                (socklen_t)sizeof(addr_buf));
3576       error_string.appfmt(" expected %s(%s).", found_config_hostname,
3577                           r_config_addr ?
3578                           "lookup failed" : addr_str);
3579       return -1;
3580     }
3581     error_string.appfmt("No node defined with id=%d in config file.", node_id);
3582     return -1;
3583   }
3584 
3585   // node_id == 0 and nodes.size() == 0
3586   if (found_config_hostname)
3587   {
3588     char addr_buf[NDB_ADDR_STRLEN];
3589     struct in_addr conn_addr =
3590       ((struct sockaddr_in*)(client_addr))->sin_addr;
3591     char *addr_str = Ndb_inet_ntop(AF_INET,
3592                                    static_cast<void*>(&conn_addr),
3593                                    addr_buf,
3594                                    (socklen_t)sizeof(addr_buf));
3595     error_string.appfmt("Connection done from wrong host ip %s.",
3596                         (client_addr) ? addr_str : "");
3597     return -1;
3598   }
3599 
3600   error_string.append("No nodes defined in config file.");
3601   return -1;
3602 }
3603 
3604 
3605 int
try_alloc(NodeId id,ndb_mgm_node_type type,Uint32 timeout_ms)3606 MgmtSrvr::try_alloc(NodeId id,
3607                     ndb_mgm_node_type type,
3608                     Uint32 timeout_ms)
3609 {
3610   assert(type == NDB_MGM_NODE_TYPE_NDB ||
3611          type == NDB_MGM_NODE_TYPE_API);
3612 
3613   const NDB_TICKS start = NdbTick_getCurrentTicks();
3614   while (true)
3615   {
3616     int res = alloc_node_id_req(id, type, timeout_ms);
3617     if (res == 0)
3618     {
3619       /* Node id allocation suceeded */
3620       g_eventLogger->debug("Allocated nodeid %u in cluster", id);
3621       assert(id > 0);
3622       return id;
3623     }
3624 
3625     if (res == NO_CONTACT_WITH_DB_NODES &&
3626         type == NDB_MGM_NODE_TYPE_API)
3627     {
3628       const Uint64 retry_timeout = 3000; // milliseconds
3629       const NDB_TICKS now = NdbTick_getCurrentTicks();
3630       const Uint64 elapsed = NdbTick_Elapsed(start,now).milliSec();
3631       if (elapsed > retry_timeout)
3632       {
3633         /*
3634           Have waited long enough time for data nodes to
3635           decide on a master, return error
3636         */
3637         g_eventLogger->debug("Failed to allocate nodeid %u for API node " \
3638                              "in cluster (retried during %u milliseconds)",
3639                              id, (unsigned)elapsed);
3640         return -1;
3641       }
3642 
3643       g_eventLogger->debug("Retrying allocation of nodeid %u...", id);
3644       NdbSleep_MilliSleep(100);
3645       continue;
3646     }
3647 
3648     if (res == NO_CONTACT_WITH_DB_NODES &&
3649         type == NDB_MGM_NODE_TYPE_NDB)
3650     {
3651       /*
3652         No reply from data node(s) -> use the requested nodeid
3653         so that data node can start
3654       */
3655       g_eventLogger->debug("Nodeid %u for data node reserved locally "  \
3656                            "since cluster was not available ", id);
3657       return id;
3658     }
3659 
3660     /* Unspecified error */
3661     return 0;
3662   }
3663 
3664   assert(false); // Never reached
3665   return 0;
3666 }
3667 
3668 
3669 bool
try_alloc_from_list(NodeId & nodeid,ndb_mgm_node_type type,Uint32 timeout_ms,Vector<PossibleNode> & nodes)3670 MgmtSrvr::try_alloc_from_list(NodeId& nodeid,
3671                               ndb_mgm_node_type type,
3672                               Uint32 timeout_ms,
3673                               Vector<PossibleNode>& nodes)
3674 {
3675   for (unsigned i = 0; i < nodes.size(); i++)
3676   {
3677     const unsigned id= nodes[i].id;
3678     if (theFacade->ext_isConnected(id))
3679     {
3680       // Node is already reserved(connected via transporter)
3681       continue;
3682     }
3683 
3684     NdbMutex_Lock(m_reserved_nodes_mutex);
3685     if (m_reserved_nodes.get(id))
3686     {
3687       // Node is already reserved(locally in this node)
3688       NdbMutex_Unlock(m_reserved_nodes_mutex);
3689       continue;
3690     }
3691 
3692     /*
3693       Reserve the nodeid locally while checking if it can
3694       be allocated in the data nodes
3695     */
3696     m_reserved_nodes.set(id, timeout_ms);
3697 
3698     NdbMutex_Unlock(m_reserved_nodes_mutex);
3699     int res = try_alloc(id, type, timeout_ms);
3700     if (res > 0)
3701     {
3702       // Nodeid allocation succeeded
3703       nodeid= id;
3704 
3705       if (type == NDB_MGM_NODE_TYPE_API)
3706       {
3707         /*
3708           Release the local reservation(which was set to avoid that
3709           more than one thread asked for same nodeid) since it's
3710           now reserved in data node
3711         */
3712         release_local_nodeid_reservation(id);
3713       }
3714 
3715       return true;
3716     }
3717 
3718     /* Release the local reservation */
3719     release_local_nodeid_reservation(id);
3720 
3721     if (res < 0)
3722     {
3723       // Don't try any more nodes from the list
3724       return false;
3725     }
3726   }
3727   return false;
3728 }
3729 
3730 
3731 bool
alloc_node_id_impl(NodeId & nodeid,enum ndb_mgm_node_type type,const struct sockaddr * client_addr,int & error_code,BaseString & error_string,Uint32 timeout_s)3732 MgmtSrvr::alloc_node_id_impl(NodeId& nodeid,
3733                              enum ndb_mgm_node_type type,
3734                              const struct sockaddr* client_addr,
3735                              int& error_code, BaseString& error_string,
3736                              Uint32 timeout_s)
3737 {
3738   if (m_opts.no_nodeid_checks)
3739   {
3740     if (nodeid == 0)
3741     {
3742       error_string.appfmt("no-nodeid-checks set in management server. "
3743 			  "node id must be set explicitly in connectstring");
3744       error_code = NDB_MGM_ALLOCID_CONFIG_MISMATCH;
3745       return false;
3746     }
3747     return true;
3748   }
3749 
3750   /* Don't allow allocation of this ndb_mgmd's nodeid */
3751   assert(_ownNodeId);
3752   if (nodeid == _ownNodeId)
3753   {
3754     // Fatal error
3755     error_code= NDB_MGM_ALLOCID_CONFIG_MISMATCH;
3756     if (type != NDB_MGM_NODE_TYPE_MGM)
3757     {
3758       /**
3759        * be backwards compatile wrt error messages
3760        */
3761       BaseString type_string, type_c_string;
3762       const char *alias, *str;
3763       alias= ndb_mgm_get_node_type_alias_string(type, &str);
3764       type_string.assfmt("%s(%s)", alias, str);
3765       alias= ndb_mgm_get_node_type_alias_string(NDB_MGM_NODE_TYPE_MGM, &str);
3766       type_c_string.assfmt("%s(%s)", alias, str);
3767       error_string.appfmt("Id %d configured as %s, connect attempted as %s.",
3768                           nodeid, type_c_string.c_str(),
3769                           type_string.c_str());
3770     }
3771     else
3772     {
3773       error_string.appfmt("Id %d is already allocated by this ndb_mgmd",
3774                           nodeid);
3775     }
3776     return false;
3777   }
3778 
3779   /* Make sure that config is confirmed before allocating nodeid */
3780   Uint32 timeout_ms = timeout_s * 1000;
3781   {
3782     const NDB_TICKS start = NdbTick_getCurrentTicks();
3783     BaseString getconfig_message;
3784     while (!m_config_manager->get_packed_config(type, 0, getconfig_message))
3785     {
3786       const NDB_TICKS now = NdbTick_getCurrentTicks();
3787       if (NdbTick_Elapsed(start,now).milliSec() > timeout_ms)
3788       {
3789         error_code = NDB_MGM_ALLOCID_ERROR;
3790         error_string.append("Unable to allocate nodeid as configuration"
3791                             " not yet confirmed");
3792         return false;
3793       }
3794 
3795       NdbSleep_MilliSleep(20);
3796     }
3797   }
3798 
3799   /* Find possible nodeids */
3800   Vector<PossibleNode> nodes;
3801   if (find_node_type(nodeid, type, client_addr,
3802                      nodes, error_code, error_string))
3803     return false;
3804 
3805   // Print list of possible nodes
3806   for (unsigned i = 0; i < nodes.size(); i++)
3807   {
3808     const PossibleNode& node = nodes[i];
3809     g_eventLogger->debug(" [%u]: %u, '%s', %d",
3810                          (unsigned)i, node.id,
3811                          node.host.c_str(),
3812                          node.exact_match);
3813   }
3814 
3815   // nodes.size() == 0 handled inside find_node_type
3816   assert(nodes.size() != 0);
3817 
3818   if (type == NDB_MGM_NODE_TYPE_MGM && nodes.size() > 1)
3819   {
3820     // mgmt server may only have one match
3821     error_string.appfmt("Ambiguous node id's %d and %d. "
3822                         "Suggest specifying node id in connectstring, "
3823                         "or specifying unique host names in config file.",
3824                         nodes[0].id, nodes[1].id);
3825     error_code= NDB_MGM_ALLOCID_CONFIG_MISMATCH;
3826     return false;
3827   }
3828 
3829   /* Check timeout of nodeid reservations for NDB */
3830   if (type == NDB_MGM_NODE_TYPE_NDB)
3831   {
3832     const NDB_TICKS now = NdbTick_getCurrentTicks();
3833     for (unsigned i = 0; i < nodes.size(); i++)
3834     {
3835       const NodeId ndb_nodeid = nodes[i].id;
3836       {
3837         Guard g(m_reserved_nodes_mutex);
3838         if (!m_reserved_nodes.has_timedout(ndb_nodeid, now))
3839           continue;
3840       }
3841 
3842       // Found a timedout reservation
3843       if (theFacade->ext_isConnected(ndb_nodeid))
3844         continue; // Still connected, ignore the timeout
3845 
3846       g_eventLogger->warning("Found timedout nodeid reservation for %u, " \
3847                              "releasing it", ndb_nodeid);
3848 
3849       // Clear the reservation
3850       release_local_nodeid_reservation(ndb_nodeid);
3851     }
3852   }
3853 
3854   if (try_alloc_from_list(nodeid, type, timeout_ms, nodes))
3855   {
3856     if (type == NDB_MGM_NODE_TYPE_NDB)
3857     {
3858       /* Be ready to accept connections from this node */
3859       theFacade->ext_doConnect(nodeid);
3860     }
3861 
3862     return true;
3863   }
3864 
3865   /*
3866     there are nodes with correct type available but
3867     allocation failed for some reason
3868   */
3869   if (nodeid)
3870   {
3871     error_string.appfmt("Id %d already allocated by another node.",
3872                         nodeid);
3873   }
3874   else
3875   {
3876     const char *alias, *str;
3877     alias= ndb_mgm_get_node_type_alias_string(type, &str);
3878     error_string.appfmt("No free node id found for %s(%s).",
3879                         alias, str);
3880   }
3881   error_code = NDB_MGM_ALLOCID_ERROR;
3882   return false;
3883 }
3884 
3885 
3886 bool
alloc_node_id(NodeId & nodeid,enum ndb_mgm_node_type type,const struct sockaddr * client_addr,int & error_code,BaseString & error_string,bool log_event,Uint32 timeout_s)3887 MgmtSrvr::alloc_node_id(NodeId& nodeid,
3888 			enum ndb_mgm_node_type type,
3889 			const struct sockaddr* client_addr,
3890 			int& error_code, BaseString& error_string,
3891                         bool log_event,
3892                         Uint32 timeout_s)
3893 {
3894   char addr_buf[NDB_ADDR_STRLEN];
3895   struct in_addr conn_addr = ((sockaddr_in*)client_addr)->sin_addr;
3896   const char* type_str = ndb_mgm_get_node_type_string(type);
3897   char* addr_str = Ndb_inet_ntop(AF_INET,
3898                                  static_cast<void*>(&conn_addr),
3899                                  addr_buf,
3900                                  (socklen_t)sizeof(addr_buf));
3901 
3902   g_eventLogger->debug("Trying to allocate nodeid for %s" \
3903                        "(nodeid: %u, type: %s)",
3904                        addr_str, (unsigned)nodeid, type_str);
3905 
3906 
3907   if (alloc_node_id_impl(nodeid, type, client_addr,
3908                          error_code, error_string,
3909                          timeout_s))
3910   {
3911     g_eventLogger->info("Nodeid %u allocated for %s at %s",
3912                         (unsigned)nodeid, type_str, addr_str);
3913     return true;
3914   }
3915 
3916   if (!log_event)
3917     return false;
3918 
3919   g_eventLogger->warning("Failed to allocate nodeid for %s at %s. "
3920                          "Returned error: '%s'",
3921                          type_str, addr_str, error_string.c_str());
3922 
3923   return false;
3924 }
3925 
3926 
3927 bool
getNextNodeId(NodeId * nodeId,enum ndb_mgm_node_type type) const3928 MgmtSrvr::getNextNodeId(NodeId * nodeId, enum ndb_mgm_node_type type) const
3929 {
3930   NodeId tmp = * nodeId;
3931 
3932   tmp++;
3933   while(nodeTypes[tmp] != type && tmp < MAX_NODES)
3934     tmp++;
3935 
3936   if(tmp == MAX_NODES){
3937     return false;
3938   }
3939 
3940   * nodeId = tmp;
3941   return true;
3942 }
3943 
3944 #include "Services.hpp"
3945 
3946 void
eventReport(const Uint32 * theData,Uint32 len)3947 MgmtSrvr::eventReport(const Uint32 * theData, Uint32 len)
3948 {
3949   const EventReport * const eventReport = (EventReport *)&theData[0];
3950 
3951   NodeId nodeId = eventReport->getNodeId();
3952   Ndb_logevent_type type = eventReport->getEventType();
3953   // Log event
3954   g_eventLogger->log(type, theData, len, nodeId,
3955                      &m_event_listner[0].m_logLevel);
3956   m_event_listner.log(type, theData, len, nodeId);
3957 }
3958 
3959 /***************************************************************************
3960  * Backup
3961  ***************************************************************************/
3962 
3963 int
startBackup(Uint32 & backupId,int waitCompleted,Uint32 input_backupId,Uint32 backuppoint)3964 MgmtSrvr::startBackup(Uint32& backupId, int waitCompleted, Uint32 input_backupId, Uint32 backuppoint)
3965 {
3966   SignalSender ss(theFacade);
3967   ss.lock(); // lock will be released on exit
3968 
3969   NodeId nodeId = m_master_node;
3970   if (okToSendTo(nodeId, false) != 0)
3971   {
3972     bool next;
3973     nodeId = m_master_node = 0;
3974     while((next = getNextNodeId(&nodeId, NDB_MGM_NODE_TYPE_NDB)) == true &&
3975           okToSendTo(nodeId, false) != 0);
3976     if(!next)
3977       return NO_CONTACT_WITH_DB_NODES;
3978   }
3979 
3980   SimpleSignal ssig;
3981   BackupReq* req = CAST_PTR(BackupReq, ssig.getDataPtrSend());
3982   /*
3983    * Single-threaded backup.  Set instance key 1.  In the kernel
3984    * this maps to main instance 0 or worker instance 1 (if MT LQH).
3985    */
3986   BlockNumber backupBlockNo = numberToBlock(BACKUP, 1);
3987   if(input_backupId > 0)
3988   {
3989     ssig.set(ss, TestOrd::TraceAPI, backupBlockNo, GSN_BACKUP_REQ,
3990 	     BackupReq::SignalLength);
3991     req->inputBackupId = input_backupId;
3992   }
3993   else
3994     ssig.set(ss, TestOrd::TraceAPI, backupBlockNo, GSN_BACKUP_REQ,
3995 	     BackupReq::SignalLength - 1);
3996 
3997   req->senderData = 19;
3998   req->backupDataLen = 0;
3999   assert(waitCompleted < 3);
4000   req->flags = waitCompleted & 0x3;
4001   if(backuppoint == 1)
4002     req->flags |= BackupReq::USE_UNDO_LOG;
4003 
4004   int do_send = 1;
4005   while (1) {
4006     if (do_send)
4007     {
4008       if (ss.sendSignal(nodeId, &ssig) != SEND_OK) {
4009 	return SEND_OR_RECEIVE_FAILED;
4010       }
4011       if (waitCompleted == 0)
4012 	return 0;
4013       do_send = 0;
4014     }
4015     SimpleSignal *signal = ss.waitFor();
4016 
4017     int gsn = signal->readSignalNumber();
4018     switch (gsn) {
4019     case GSN_BACKUP_CONF:{
4020       const BackupConf * const conf =
4021 	CAST_CONSTPTR(BackupConf, signal->getDataPtr());
4022 #ifdef VM_TRACE
4023       ndbout_c("Backup(%d) master is %d", conf->backupId,
4024 	       refToNode(signal->header.theSendersBlockRef));
4025 #endif
4026       backupId = conf->backupId;
4027       if (waitCompleted == 1)
4028 	return 0;
4029       // wait for next signal
4030       break;
4031     }
4032     case GSN_BACKUP_COMPLETE_REP:{
4033       const BackupCompleteRep * const rep =
4034 	CAST_CONSTPTR(BackupCompleteRep, signal->getDataPtr());
4035 #ifdef VM_TRACE
4036       ndbout_c("Backup(%d) completed", rep->backupId);
4037 #endif
4038       backupId = rep->backupId;
4039       return 0;
4040     }
4041     case GSN_BACKUP_REF:{
4042       const BackupRef * const ref =
4043 	CAST_CONSTPTR(BackupRef, signal->getDataPtr());
4044       if(ref->errorCode == BackupRef::IAmNotMaster){
4045 	m_master_node = nodeId = refToNode(ref->masterRef);
4046 #ifdef VM_TRACE
4047 	ndbout_c("I'm not master resending to %d", nodeId);
4048 #endif
4049 	do_send = 1; // try again
4050 	if (!getNodeInfo(nodeId).m_alive)
4051 	  m_master_node = nodeId = 0;
4052 	continue;
4053       }
4054       return ref->errorCode;
4055     }
4056     case GSN_BACKUP_ABORT_REP:{
4057       const BackupAbortRep * const rep =
4058 	CAST_CONSTPTR(BackupAbortRep, signal->getDataPtr());
4059 #ifdef VM_TRACE
4060       ndbout_c("Backup %d aborted", rep->backupId);
4061 #endif
4062       return rep->reason;
4063     }
4064     case GSN_NF_COMPLETEREP:{
4065       const NFCompleteRep * const rep =
4066 	CAST_CONSTPTR(NFCompleteRep, signal->getDataPtr());
4067 #ifdef VM_TRACE
4068       ndbout_c("Node %d fail completed", rep->failedNodeId);
4069 #endif
4070       if (rep->failedNodeId == nodeId ||
4071 	  waitCompleted == 1)
4072 	return 1326;
4073       // wait for next signal
4074       // master node will report aborted backup
4075       break;
4076     }
4077     case GSN_NODE_FAILREP:{
4078       const NodeFailRep * const rep =
4079 	CAST_CONSTPTR(NodeFailRep, signal->getDataPtr());
4080       Uint32 len = NodeFailRep::getNodeMaskLength(signal->getLength());
4081       assert(len == NodeBitmask::Size); // only full length in ndbapi
4082       if (BitmaskImpl::safe_get(len, rep->theAllNodes,nodeId) ||
4083 	  waitCompleted == 1)
4084 	return 1326;
4085       // wait for next signal
4086       // master node will report aborted backup
4087       break;
4088     }
4089     case GSN_API_REGCONF:
4090     case GSN_TAKE_OVERTCCONF:
4091     case GSN_CONNECT_REP:
4092       continue;
4093     default:
4094       report_unknown_signal(signal);
4095       return SEND_OR_RECEIVE_FAILED;
4096     }
4097   }
4098 }
4099 
4100 int
abortBackup(Uint32 backupId)4101 MgmtSrvr::abortBackup(Uint32 backupId)
4102 {
4103   SignalSender ss(theFacade);
4104   ss.lock(); // lock will be released on exit
4105 
4106   bool next;
4107   NodeId nodeId = 0;
4108   while((next = getNextNodeId(&nodeId, NDB_MGM_NODE_TYPE_NDB)) == true &&
4109 	getNodeInfo(nodeId).m_alive == false);
4110 
4111   if(!next){
4112     return NO_CONTACT_WITH_DB_NODES;
4113   }
4114 
4115   SimpleSignal ssig;
4116 
4117   AbortBackupOrd* ord = CAST_PTR(AbortBackupOrd, ssig.getDataPtrSend());
4118   /*
4119    * Single-threaded backup.  Set instance key 1.  In the kernel
4120    * this maps to main instance 0 or worker instance 1 (if MT LQH).
4121    */
4122   BlockNumber backupBlockNo = numberToBlock(BACKUP, 1);
4123   ssig.set(ss, TestOrd::TraceAPI, backupBlockNo, GSN_ABORT_BACKUP_ORD,
4124 	   AbortBackupOrd::SignalLength);
4125 
4126   ord->requestType = AbortBackupOrd::ClientAbort;
4127   ord->senderData = 19;
4128   ord->backupId = backupId;
4129 
4130   return ss.sendSignal(nodeId, &ssig) == SEND_OK ? 0 : SEND_OR_RECEIVE_FAILED;
4131 }
4132 
4133 
4134 int
setDbParameter(int node,int param,const char * value,BaseString & msg)4135 MgmtSrvr::setDbParameter(int node, int param, const char * value,
4136 			 BaseString& msg)
4137 {
4138 
4139   Guard g(m_local_config_mutex);
4140 
4141   /**
4142    * Check parameter
4143    */
4144   ConfigIter iter(m_local_config, CFG_SECTION_NODE);
4145   if(iter.first() != 0){
4146     msg.assign("Unable to find node section (iter.first())");
4147     return -1;
4148   }
4149 
4150   Uint32 type = NODE_TYPE_DB + 1;
4151   if(node != 0){
4152     // Set parameter only in the specified node
4153     if(iter.find(CFG_NODE_ID, node) != 0){
4154       msg.assign("Unable to find node (iter.find())");
4155       return -1;
4156     }
4157     if(iter.get(CFG_TYPE_OF_SECTION, &type) != 0){
4158       msg.assign("Unable to get node type(iter.get(CFG_TYPE_OF_SECTION))");
4159       return -1;
4160     }
4161   } else {
4162     // Set parameter in all DB nodes
4163     do {
4164       if(iter.get(CFG_TYPE_OF_SECTION, &type) != 0){
4165 	msg.assign("Unable to get node type(iter.get(CFG_TYPE_OF_SECTION))");
4166 	return -1;
4167       }
4168       if(type == NODE_TYPE_DB)
4169 	break;
4170     } while(iter.next() == 0);
4171   }
4172 
4173   if(type != NODE_TYPE_DB){
4174     msg.assfmt("Invalid node type or no such node (%d %d)",
4175 	       type, NODE_TYPE_DB);
4176     return -1;
4177   }
4178 
4179   int p_type;
4180   unsigned val_32;
4181   Uint64 val_64;
4182   const char * val_char;
4183   do {
4184     p_type = 0;
4185     if(iter.get(param, &val_32) == 0){
4186       val_32 = atoi(value);
4187       break;
4188     }
4189 
4190     p_type++;
4191     if(iter.get(param, &val_64) == 0){
4192       val_64 = my_strtoll(value, 0, 10);
4193       break;
4194     }
4195     p_type++;
4196     if(iter.get(param, &val_char) == 0){
4197       val_char = value;
4198       break;
4199     }
4200     msg.assign("Could not get parameter");
4201     return -1;
4202   } while(0);
4203 
4204   bool res = false;
4205   do {
4206     int ret = iter.get(CFG_TYPE_OF_SECTION, &type);
4207     assert(ret == 0);
4208 
4209     if(type != NODE_TYPE_DB)
4210       continue;
4211 
4212     Uint32 node;
4213     ret = iter.get(CFG_NODE_ID, &node);
4214     assert(ret == 0);
4215 
4216     ConfigValues::Iterator i2(m_local_config->m_configValues->m_config,
4217 			      iter.m_config);
4218     switch(p_type){
4219     case 0:
4220       res = i2.set(param, val_32);
4221       ndbout_c("Updating node %d param: %d to %d",  node, param, val_32);
4222       break;
4223     case 1:
4224       res = i2.set(param, val_64);
4225       ndbout_c("Updating node %d param: %d to %u",  node, param, val_32);
4226       break;
4227     case 2:
4228       res = i2.set(param, val_char);
4229       ndbout_c("Updating node %d param: %d to %s",  node, param, val_char);
4230       break;
4231     default:
4232       require(false);
4233     }
4234     assert(res);
4235   } while(node == 0 && iter.next() == 0);
4236 
4237   msg.assign("Success");
4238   return 0;
4239 }
4240 
4241 
4242 int
setConnectionDbParameter(int node1,int node2,int param,int value,BaseString & msg)4243 MgmtSrvr::setConnectionDbParameter(int node1, int node2,
4244                                    int param, int value,
4245                                    BaseString& msg)
4246 {
4247   DBUG_ENTER("MgmtSrvr::setConnectionDbParameter");
4248   DBUG_PRINT("enter", ("node1: %d, node2: %d, param: %d, value: %d",
4249                        node1, node2, param, value));
4250 
4251   // This function only supports setting dynamic ports
4252   if (param != CFG_CONNECTION_SERVER_PORT)
4253   {
4254     msg.assign("Only param CFG_CONNECTION_SERVER_PORT can be set");
4255     DBUG_RETURN(-1);
4256   }
4257 
4258   if (!m_config_manager->set_dynamic_port(node1, node2, value, msg))
4259     DBUG_RETURN(-1);
4260 
4261   DBUG_PRINT("exit", ("Set parameter(%d) to %d for %d -> %d",
4262                       param, value, node1, node2));
4263   DBUG_RETURN(1);
4264 }
4265 
4266 
setDynamicPorts(int node,DynPortSpec ports[],unsigned num_ports,BaseString & msg)4267 bool MgmtSrvr::setDynamicPorts(int node, DynPortSpec ports[],
4268                                unsigned num_ports, BaseString& msg)
4269 {
4270   return m_config_manager->set_dynamic_ports(node, ports, num_ports, msg);
4271 }
4272 
4273 
4274 int
getConnectionDbParameter(int node1,int node2,int param,int * value,BaseString & msg)4275 MgmtSrvr::getConnectionDbParameter(int node1, int node2,
4276                                    int param, int *value,
4277                                    BaseString& msg)
4278 {
4279   DBUG_ENTER("MgmtSrvr::getConnectionDbParameter");
4280   DBUG_PRINT("enter", ("node1: %d, node2: %d, param: %d",
4281                        node1, node2, param));
4282 
4283   // This function only supports asking about dynamic ports
4284   if (param != CFG_CONNECTION_SERVER_PORT)
4285   {
4286     msg.assign("Only param CFG_CONNECTION_SERVER_PORT can be retrieved");
4287     DBUG_RETURN(-1);
4288   }
4289 
4290   if (!m_config_manager->get_dynamic_port(node1, node2, value, msg))
4291     DBUG_RETURN(-1);
4292 
4293   DBUG_PRINT("exit", ("Return parameter(%d): %u for %d -> %d, msg: %s",
4294                       param, *value, node1, node2, msg.c_str()));
4295   DBUG_RETURN(1);
4296 }
4297 
4298 
4299 bool
transporter_connect(NDB_SOCKET_TYPE sockfd,BaseString & msg,bool & close_with_reset)4300 MgmtSrvr::transporter_connect(NDB_SOCKET_TYPE sockfd,
4301                               BaseString& msg,
4302                               bool& close_with_reset)
4303 {
4304   DBUG_ENTER("MgmtSrvr::transporter_connect");
4305   TransporterRegistry* tr= theFacade->get_registry();
4306   if (!tr->connect_server(sockfd, msg, close_with_reset))
4307     DBUG_RETURN(false);
4308 
4309   /**
4310    * TransporterRegistry::update_connections() is responsible
4311    * for doing the final step of bringing the connection into
4312    * CONNECTED state when it detects it 'isConnected()'.
4313    * This is required due to all such state changes has to
4314    * be synchroniced with ::performReceive().
4315    * To speed up CONNECTED detection, we request it to
4316    * happen ASAP. (There is no guarantee when it happen though)
4317    */
4318   theFacade->request_connection_check();
4319   DBUG_RETURN(true);
4320 }
4321 
4322 
connect_to_self()4323 bool MgmtSrvr::connect_to_self()
4324 {
4325   BaseString buf;
4326   NdbMgmHandle mgm_handle= ndb_mgm_create_handle();
4327 
4328   buf.assfmt("%s:%u",
4329              m_opts.bind_address ? m_opts.bind_address : "localhost",
4330              m_port);
4331   ndb_mgm_set_connectstring(mgm_handle, buf.c_str());
4332 
4333   if(ndb_mgm_connect(mgm_handle, 0, 0, 0) < 0)
4334   {
4335     g_eventLogger->warning("%d %s",
4336                            ndb_mgm_get_latest_error(mgm_handle),
4337                            ndb_mgm_get_latest_error_desc(mgm_handle));
4338     ndb_mgm_destroy_handle(&mgm_handle);
4339     return false;
4340   }
4341   // TransporterRegistry now owns the handle and will destroy it.
4342   theFacade->get_registry()->set_mgm_handle(mgm_handle);
4343 
4344   return true;
4345 }
4346 
4347 
4348 bool
change_config(Config & new_config,BaseString & msg)4349 MgmtSrvr::change_config(Config& new_config, BaseString& msg)
4350 {
4351   SignalSender ss(theFacade);
4352   ss.lock();
4353 
4354   SimpleSignal ssig;
4355   UtilBuffer buf;
4356   new_config.pack(buf);
4357   ssig.ptr[0].p = (Uint32*)buf.get_data();
4358   ssig.ptr[0].sz = (buf.length() + 3) / 4;
4359   ssig.header.m_noOfSections = 1;
4360 
4361   ConfigChangeReq *req= CAST_PTR(ConfigChangeReq, ssig.getDataPtrSend());
4362   req->length = buf.length();
4363 
4364   NodeBitmask mgm_nodes;
4365   {
4366     Guard g(m_local_config_mutex);
4367     m_local_config->get_nodemask(mgm_nodes, NDB_MGM_NODE_TYPE_MGM);
4368   }
4369 
4370   NodeId nodeId= ss.find_confirmed_node(mgm_nodes);
4371   if (nodeId == 0)
4372   {
4373     msg = "INTERNAL ERROR Could not find any mgmd!";
4374     return false;
4375   }
4376 
4377   if (ss.sendFragmentedSignal(nodeId, ssig,
4378                               MGM_CONFIG_MAN, GSN_CONFIG_CHANGE_REQ,
4379                               ConfigChangeReq::SignalLength) != 0)
4380   {
4381     msg.assfmt("Could not start configuration change, send to "
4382                "node %d failed", nodeId);
4383     return false;
4384   }
4385   mgm_nodes.clear(nodeId);
4386 
4387   bool done = false;
4388   while(!done)
4389   {
4390     SimpleSignal *signal= ss.waitFor();
4391 
4392     switch(signal->readSignalNumber()){
4393     case GSN_CONFIG_CHANGE_CONF:
4394       done= true;
4395       break;
4396     case GSN_CONFIG_CHANGE_REF:
4397     {
4398       const ConfigChangeRef * const ref =
4399         CAST_CONSTPTR(ConfigChangeRef, signal->getDataPtr());
4400       g_eventLogger->debug("Got CONFIG_CHANGE_REF, error: %d", ref->errorCode);
4401       switch(ref->errorCode)
4402       {
4403       case ConfigChangeRef::NotMaster:{
4404         // Retry with next node if any
4405         NodeId nodeId= ss.find_confirmed_node(mgm_nodes);
4406         if (nodeId == 0)
4407         {
4408           msg = "INTERNAL ERROR Could not find any mgmd!";
4409           return false;
4410         }
4411 
4412         if (ss.sendFragmentedSignal(nodeId, ssig,
4413                                     MGM_CONFIG_MAN, GSN_CONFIG_CHANGE_REQ,
4414                                     ConfigChangeReq::SignalLength) != 0)
4415         {
4416           msg.assfmt("Could not start configuration change, send to "
4417                      "node %d failed", nodeId);
4418           return false;
4419         }
4420         mgm_nodes.clear(nodeId);
4421         break;
4422       }
4423 
4424       default:
4425         msg = ConfigChangeRef::errorMessage(ref->errorCode);
4426         return false;
4427       }
4428 
4429       break;
4430     }
4431 
4432     case GSN_API_REGCONF:
4433     case GSN_TAKE_OVERTCCONF:
4434     case GSN_CONNECT_REP:
4435       // Ignore;
4436       break;
4437 
4438 
4439     case GSN_NODE_FAILREP:
4440       // ignore, NF_COMPLETEREP will come
4441       break;
4442 
4443     case GSN_NF_COMPLETEREP:
4444     {
4445       NodeId nodeId = refToNode(signal->header.theSendersBlockRef);
4446       msg.assign("Node %d failed during configuration change", nodeId);
4447       return false;
4448       break;
4449     }
4450 
4451     default:
4452       report_unknown_signal(signal);
4453       return false;
4454 
4455     }
4456   }
4457 
4458   g_eventLogger->info("Config change completed");
4459 
4460   return true;
4461 }
4462 
4463 
4464 void
print_config(const char * section_filter,NodeId nodeid_filter,const char * param_filter,NdbOut & out)4465 MgmtSrvr::print_config(const char* section_filter, NodeId nodeid_filter,
4466                        const char* param_filter,
4467                        NdbOut& out)
4468 {
4469   Guard g(m_local_config_mutex);
4470   m_local_config->print(section_filter, nodeid_filter,
4471                         param_filter, out);
4472 }
4473 
4474 
4475 bool
reload_config(const char * config_filename,bool mycnf,BaseString & msg)4476 MgmtSrvr::reload_config(const char* config_filename, bool mycnf,
4477                         BaseString& msg)
4478 {
4479   if (config_filename && mycnf)
4480   {
4481     msg = "ERROR: Both mycnf and config_filename is not supported";
4482     return false;
4483   }
4484 
4485   if (config_filename)
4486   {
4487     if (m_opts.mycnf)
4488     {
4489       msg.assfmt("ERROR: Can't switch to use config.ini '%s' when "
4490                  "node was started from my.cnf", config_filename);
4491       return false;
4492     }
4493   }
4494   else
4495   {
4496     if (mycnf)
4497     {
4498       // Reload from my.cnf
4499       if (!m_opts.mycnf)
4500       {
4501         if (m_opts.config_filename)
4502         {
4503           msg.assfmt("ERROR: Can't switch to use my.cnf when "
4504                      "node was started from '%s'", m_opts.config_filename);
4505           return false;
4506         }
4507       }
4508     }
4509     else
4510     {
4511       /* No config file name supplied and not told to use mycnf */
4512       if (m_opts.config_filename)
4513       {
4514         g_eventLogger->info("No config file name supplied, using '%s'",
4515                             m_opts.config_filename);
4516         config_filename = m_opts.config_filename;
4517       }
4518       else
4519       {
4520         msg = "ERROR: Neither config file name or mycnf available";
4521         return false;
4522       }
4523     }
4524   }
4525 
4526   Config* new_conf_ptr;
4527   if ((new_conf_ptr= ConfigManager::load_config(config_filename,
4528                                                 mycnf, msg)) == NULL)
4529     return false;
4530   Config new_conf(new_conf_ptr);
4531 
4532   {
4533     Guard g(m_local_config_mutex);
4534 
4535     /* Copy the necessary values from old to new config */
4536     if (!new_conf.setGeneration(m_local_config->getGeneration()) ||
4537         !new_conf.setName(m_local_config->getName()) ||
4538         !new_conf.setPrimaryMgmNode(m_local_config->getPrimaryMgmNode()))
4539     {
4540       msg = "Failed to initialize reloaded config";
4541       return false;
4542     }
4543   }
4544 
4545   if (!change_config(new_conf, msg))
4546     return false;
4547   return true;
4548 }
4549 
4550 void
show_variables(NdbOut & out)4551 MgmtSrvr::show_variables(NdbOut& out)
4552 {
4553   out << "daemon: " << yes_no(m_opts.daemon) << endl;
4554   out << "non_interactive: " << yes_no(m_opts.non_interactive) << endl;
4555   out << "interactive: " << yes_no(m_opts.interactive) << endl;
4556   out << "config_filename: " << str_null(m_opts.config_filename) << endl;
4557   out << "mycnf: " << yes_no(m_opts.mycnf) << endl;
4558   out << "bind_address: " << str_null(m_opts.bind_address) << endl;
4559   out << "no_nodeid_checks: " << yes_no(m_opts.no_nodeid_checks) << endl;
4560   out << "print_full_config: " << yes_no(m_opts.print_full_config) << endl;
4561   out << "configdir: " << str_null(m_opts.configdir) << endl;
4562   out << "config_cache: " << yes_no(m_opts.config_cache) << endl;
4563   out << "verbose: " << yes_no(m_opts.verbose) << endl;
4564   out << "reload: " << yes_no(m_opts.reload) << endl;
4565 
4566   out << "nodeid: " << _ownNodeId << endl;
4567   out << "blocknumber: " << hex <<_blockNumber << endl;
4568   out << "own_reference: " << hex << _ownReference << endl;
4569   out << "port: " << m_port << endl;
4570   out << "need_restart: " << m_need_restart << endl;
4571   out << "is_stop_thread: " << _isStopThread << endl;
4572   out << "log_level_thread_sleep: " << _logLevelThreadSleep << endl;
4573   out << "master_node: " << m_master_node << endl;
4574 }
4575 
4576 void
make_sync_req(SignalSender & ss,Uint32 nodeId)4577 MgmtSrvr::make_sync_req(SignalSender& ss, Uint32 nodeId)
4578 {
4579   const trp_node node = ss.getNodeInfo(nodeId);
4580   if (!ndbd_sync_req_support(node.m_info.m_version))
4581   {
4582     /* The node hasn't got SYNC_REQ support */
4583     return;
4584   }
4585 
4586   /**
4587    * This subroutine is used to make a async request(error insert/dump)
4588    *   "more" syncronous, i.e increasing the likelyhood that
4589    *   the async request has really reached the destination
4590    *   before returning to the api
4591    *
4592    * I.e it's a work-around...
4593    *
4594    */
4595   SimpleSignal ssig;
4596   SyncReq* req = CAST_PTR(SyncReq, ssig.getDataPtrSend());
4597   req->senderRef = ss.getOwnRef();
4598   req->senderData = 12;
4599   req->prio = 1; // prio b
4600   ssig.set(ss,TestOrd::TraceAPI, CMVMI, GSN_SYNC_REQ, SyncReq::SignalLength);
4601 
4602   if (ss.sendSignal(nodeId, &ssig) != SEND_OK)
4603   {
4604     return;
4605   }
4606 
4607   while (true)
4608   {
4609     SimpleSignal *signal = ss.waitFor();
4610 
4611     int gsn = signal->readSignalNumber();
4612     switch (gsn) {
4613     case GSN_SYNC_REF:
4614     case GSN_SYNC_CONF:
4615       return;
4616 
4617     case GSN_NF_COMPLETEREP:{
4618       const NFCompleteRep * const rep =
4619         CAST_CONSTPTR(NFCompleteRep, signal->getDataPtr());
4620       if (rep->failedNodeId == nodeId)
4621         return;
4622       break;
4623     }
4624 
4625     case GSN_NODE_FAILREP:{
4626       const NodeFailRep * const rep =
4627 	CAST_CONSTPTR(NodeFailRep, signal->getDataPtr());
4628       Uint32 len = NodeFailRep::getNodeMaskLength(signal->getLength());
4629       assert(len == NodeBitmask::Size); // only full length in ndbapi
4630       if (BitmaskImpl::safe_get(len, rep->theAllNodes,nodeId))
4631 	return;
4632       break;
4633     }
4634     case GSN_API_REGCONF:
4635     case GSN_TAKE_OVERTCCONF:
4636     case GSN_CONNECT_REP:
4637       break;
4638     default:
4639       return;
4640     }
4641   }
4642 }
4643 
4644 
4645 bool
request_events(NdbNodeBitmask nodes,Uint32 reports_per_node,Uint32 dump_type,Vector<SimpleSignal> & events)4646 MgmtSrvr::request_events(NdbNodeBitmask nodes, Uint32 reports_per_node,
4647                          Uint32 dump_type,
4648                          Vector<SimpleSignal>& events)
4649 {
4650   int nodes_counter[MAX_NDB_NODES];
4651 #ifndef NDEBUG
4652   NdbNodeBitmask save = nodes;
4653 #endif
4654   SignalSender ss(theFacade);
4655   ss.lock();
4656 
4657   // Send the dump command to all requested NDB nodes
4658   const bool all = nodes.isclear();
4659   for (int i = 1; i < MAX_NDB_NODES; i++)
4660   {
4661     // Check if node should be involved
4662     if (!all && !nodes.get(i))
4663       continue;
4664 
4665     // Only request from confirmed DB nodes
4666     const trp_node node = ss.getNodeInfo(i);
4667     if (node.m_info.getType() != NodeInfo::DB ||
4668         !node.is_confirmed())
4669     {
4670       nodes.clear(i);
4671       continue;
4672     }
4673 
4674     SimpleSignal ssig;
4675     DumpStateOrd * const dumpOrd = (DumpStateOrd*)ssig.getDataPtrSend();
4676 
4677     dumpOrd->args[0] = dump_type;
4678     dumpOrd->args[1] = ss.getOwnRef(); // Return to sender
4679 
4680     if (ss.sendSignal(i, ssig, CMVMI, GSN_DUMP_STATE_ORD, 2) == SEND_OK)
4681     {
4682       nodes.set(i);
4683       nodes_counter[i] = (int)reports_per_node;
4684     }
4685   }
4686 
4687 
4688   while (true)
4689   {
4690     // Check if all nodes are done
4691     if (nodes.isclear())
4692       break;
4693 
4694     SimpleSignal *signal = ss.waitFor();
4695     switch (signal->readSignalNumber()) {
4696     case GSN_EVENT_REP:{
4697       const NodeId nodeid = refToNode(signal->header.theSendersBlockRef);
4698       const EventReport * const event =
4699         (const EventReport*)signal->getDataPtr();
4700 
4701       if (!nodes.get(nodeid))
4702       {
4703         // The reporting node was not expected
4704 #ifndef NDEBUG
4705         ndbout_c("nodeid: %u", nodeid);
4706         ndbout_c("save: %s", BaseString::getPrettyText(save).c_str());
4707 #endif
4708         assert(false);
4709         return false;
4710       }
4711 
4712       if (event->getEventType() == NDB_LE_SavedEvent &&
4713           signal->getDataPtr()[1] == 0)
4714       {
4715         nodes_counter[nodeid] = 1;
4716       }
4717       else
4718       {
4719         // Save signal
4720         events.push_back(SimpleSignal(*signal));
4721       }
4722 
4723       // Check if node is done
4724       nodes_counter[nodeid]--;
4725       if (nodes_counter[nodeid] == 0)
4726         nodes.clear(nodeid);
4727 
4728       break;
4729     }
4730 
4731     case GSN_NODE_FAILREP:{
4732       const NodeFailRep * const rep =
4733         (const NodeFailRep*)signal->getDataPtr();
4734       // only care about data-nodes
4735       for (NodeId i = 1; i < MAX_NDB_NODES; i++)
4736       {
4737         if (NdbNodeBitmask::get(rep->theNodes, i))
4738         {
4739           nodes.clear(i);
4740 
4741           // Remove any previous reports from this node
4742           // it should not be reported
4743           for (unsigned j = 0; j < events.size(); j++)
4744           {
4745             const SimpleSignal& ssig = events[j];
4746             const NodeId nodeid = refToNode(ssig.header.theSendersBlockRef);
4747             if (nodeid == i)
4748             {
4749               events.erase(j);
4750               j--;
4751             }
4752           }
4753         }
4754       }
4755       break;
4756     }
4757 
4758     default:
4759       // Ignore all other signals
4760       break;
4761     }
4762   }
4763   ss.unlock();
4764 
4765   return true;
4766 }
4767 
4768 template class MutexVector<NodeId>;
4769 template class MutexVector<Ndb_mgmd_event_service::Event_listener>;
4770 template class Vector<EventSubscribeReq>;
4771 template class MutexVector<EventSubscribeReq>;
4772 template class Vector< Vector<BaseString> >;
4773 template class Vector<MgmtSrvr::PossibleNode>;
4774 template class Vector<Defragger::DefragBuffer*>;
4775