1 /*
2    Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
3 
4    This program is free software; you can redistribute it and/or modify
5    it under the terms of the GNU General Public License, version 2.0,
6    as published by the Free Software Foundation.
7 
8    This program is also distributed with certain software (including
9    but not limited to OpenSSL) that is licensed under separate terms,
10    as designated in a particular file or component or in included license
11    documentation.  The authors of MySQL hereby grant you an additional
12    permission to link the program and your derivative works with the
13    separately licensed software that they have included with MySQL.
14 
15    This program is distributed in the hope that it will be useful,
16    but WITHOUT ANY WARRANTY; without even the implied warranty of
17    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18    GNU General Public License, version 2.0, for more details.
19 
20    You should have received a copy of the GNU General Public License
21    along with this program; if not, write to the Free Software
22    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA
23 */
24 
25 #include <ndb_global.h>
26 
27 #include "MgmtSrvr.hpp"
28 #include "ndb_mgmd_error.h"
29 #include "Services.hpp"
30 #include "ConfigManager.hpp"
31 #include "Defragger.hpp"
32 
33 #include <NdbOut.hpp>
34 #include <NdbApiSignal.hpp>
35 #include <kernel_types.h>
36 #include <GlobalSignalNumbers.h>
37 #include <signaldata/TestOrd.hpp>
38 #include <signaldata/TamperOrd.hpp>
39 #include <signaldata/StartOrd.hpp>
40 #include <signaldata/ApiVersion.hpp>
41 #include <signaldata/ResumeReq.hpp>
42 #include <signaldata/SetLogLevelOrd.hpp>
43 #include <signaldata/EventSubscribeReq.hpp>
44 #include <signaldata/EventReport.hpp>
45 #include <signaldata/DumpStateOrd.hpp>
46 #include <signaldata/BackupSignalData.hpp>
47 #include <signaldata/NFCompleteRep.hpp>
48 #include <signaldata/NodeFailRep.hpp>
49 #include <signaldata/AllocNodeId.hpp>
50 #include <signaldata/SchemaTrans.hpp>
51 #include <signaldata/CreateNodegroup.hpp>
52 #include <signaldata/DropNodegroup.hpp>
53 #include <signaldata/Sync.hpp>
54 #include <signaldata/GetConfig.hpp>
55 #include <NdbSleep.h>
56 #include <portlib/NdbDir.hpp>
57 #include <EventLogger.hpp>
58 #include <logger/FileLogHandler.hpp>
59 #include <logger/ConsoleLogHandler.hpp>
60 #include <logger/SysLogHandler.hpp>
61 #include <DebuggerNames.hpp>
62 #include <ndb_version.h>
63 #include <OwnProcessInfo.hpp>
64 
65 #include <SocketServer.hpp>
66 #include <NdbConfig.h>
67 
68 #include <NdbAutoPtr.hpp>
69 #include <NdbDir.hpp>
70 #include <ndberror.h>
71 
72 #include <mgmapi.h>
73 #include <mgmapi_configuration.hpp>
74 #include <mgmapi_config_parameters.h>
75 
76 #include <SignalSender.hpp>
77 
78 #include <LogBuffer.hpp>
79 #include <BufferedLogHandler.hpp>
80 
81 int g_errorInsert = 0;
82 #define ERROR_INSERTED(x) (g_errorInsert == x)
83 
84 #define INIT_SIGNAL_SENDER(ss,nodeId) \
85   SignalSender ss(theFacade); \
86   ss.lock(); /* lock will be released on exit */ \
87   {\
88     int result = okToSendTo(nodeId, true);\
89     if (result != 0) {\
90       return result;\
91     }\
92   }
93 
94 extern "C" bool opt_core;
95 
96 void *
logLevelThread_C(void * m)97 MgmtSrvr::logLevelThread_C(void* m)
98 {
99   MgmtSrvr *mgm = (MgmtSrvr*)m;
100   mgm->logLevelThreadRun();
101   return 0;
102 }
103 
104 extern EventLogger * g_eventLogger;
105 
106 #ifdef NOT_USED
107 static NdbOut&
operator <<(NdbOut & out,const LogLevel & ll)108 operator<<(NdbOut& out, const LogLevel & ll)
109 {
110   out << "[LogLevel: ";
111   for(size_t i = 0; i<LogLevel::LOGLEVEL_CATEGORIES; i++)
112     out << ll.getLogLevel((LogLevel::EventCategory)i) << " ";
113   out << "]";
114   return out;
115 }
116 #endif
117 
118 void
logLevelThreadRun()119 MgmtSrvr::logLevelThreadRun()
120 {
121   while (!_isStopThread)
122   {
123     Vector<NodeId> failed_started_nodes;
124     Vector<EventSubscribeReq> failed_log_level_requests;
125 
126     /**
127      * Handle started nodes
128      */
129     m_started_nodes.lock();
130     if (m_started_nodes.size() > 0)
131     {
132       // calculate max log level
133       EventSubscribeReq req;
134       {
135         LogLevel tmp;
136         m_event_listner.lock();
137         for(int i = m_event_listner.m_clients.size() - 1; i >= 0; i--)
138           tmp.set_max(m_event_listner[i].m_logLevel);
139         m_event_listner.unlock();
140         req.assign(tmp);
141       }
142       req.blockRef = _ownReference;
143       while (m_started_nodes.size() > 0)
144       {
145         Uint32 node = m_started_nodes[0];
146         m_started_nodes.erase(0, false);
147         m_started_nodes.unlock();
148 
149         if (setEventReportingLevelImpl(node, req))
150         {
151           failed_started_nodes.push_back(node);
152         }
153         else
154         {
155           SetLogLevelOrd ord;
156           ord.assign(m_nodeLogLevel[node]);
157           setNodeLogLevelImpl(node, ord);
158         }
159         m_started_nodes.lock();
160       }
161     }
162     m_started_nodes.unlock();
163 
164     m_log_level_requests.lock();
165     while (m_log_level_requests.size() > 0)
166     {
167       EventSubscribeReq req = m_log_level_requests[0];
168       m_log_level_requests.erase(0, false);
169       m_log_level_requests.unlock();
170 
171       if(req.blockRef == 0)
172       {
173         req.blockRef = _ownReference;
174         if (setEventReportingLevelImpl(0, req))
175         {
176           failed_log_level_requests.push_back(req);
177         }
178       }
179       else
180       {
181         SetLogLevelOrd ord;
182         ord.assign(req);
183         if (setNodeLogLevelImpl(req.blockRef, ord))
184         {
185           failed_log_level_requests.push_back(req);
186         }
187       }
188       m_log_level_requests.lock();
189     }
190     m_log_level_requests.unlock();
191 
192     if(!ERROR_INSERTED(10000))
193       m_event_listner.check_listeners();
194 
195     Uint32 sleeptime = _logLevelThreadSleep;
196     if (failed_started_nodes.size())
197     {
198       m_started_nodes.lock();
199       for (Uint32 i = 0; i<failed_started_nodes.size(); i++)
200         m_started_nodes.push_back(failed_started_nodes[i], false);
201       m_started_nodes.unlock();
202       failed_started_nodes.clear();
203       sleeptime = 100;
204     }
205 
206     if (failed_log_level_requests.size())
207     {
208       m_log_level_requests.lock();
209       for (Uint32 i = 0; i<failed_log_level_requests.size(); i++)
210         m_log_level_requests.push_back(failed_log_level_requests[i], false);
211       m_log_level_requests.unlock();
212       failed_log_level_requests.clear();
213       sleeptime = 100;
214     }
215 
216     NdbSleep_MilliSleep(sleeptime);
217   }
218 }
219 
220 
221 static int
translateStopRef(Uint32 errCode)222 translateStopRef(Uint32 errCode)
223 {
224   switch(errCode){
225   case StopRef::NodeShutdownInProgress:
226     return NODE_SHUTDOWN_IN_PROGESS;
227     break;
228   case StopRef::SystemShutdownInProgress:
229     return SYSTEM_SHUTDOWN_IN_PROGRESS;
230     break;
231   case StopRef::NodeShutdownWouldCauseSystemCrash:
232     return NODE_SHUTDOWN_WOULD_CAUSE_SYSTEM_CRASH;
233     break;
234   case StopRef::UnsupportedNodeShutdown:
235     return UNSUPPORTED_NODE_SHUTDOWN;
236     break;
237   }
238   return 4999;
239 }
240 
241 
MgmtSrvr(const MgmtOpts & opts)242 MgmtSrvr::MgmtSrvr(const MgmtOpts& opts) :
243   m_opts(opts),
244   _blockNumber(0),
245   _ownNodeId(0),
246   m_port(0),
247   m_local_config(NULL),
248   _ownReference(0),
249   m_config_manager(NULL),
250   m_need_restart(false),
251   theFacade(NULL),
252   _isStopThread(false),
253   _logLevelThreadSleep(500),
254   m_event_listner(this),
255   m_master_node(0),
256   _logLevelThread(NULL),
257   m_version_string(ndbGetOwnVersionString()),
258   m_async_cluster_logging(false)
259 {
260   DBUG_ENTER("MgmtSrvr::MgmtSrvr");
261 
262   m_local_config_mutex= NdbMutex_Create();
263   m_reserved_nodes_mutex= NdbMutex_Create();
264   if (!m_local_config_mutex || !m_reserved_nodes_mutex)
265   {
266     g_eventLogger->error("Failed to create MgmtSrvr mutexes");
267     require(false);
268   }
269 
270   /* Init node arrays */
271   for(Uint32 i = 0; i<MAX_NODES; i++) {
272     nodeTypes[i] = (enum ndb_mgm_node_type)-1;
273     clear_connect_address_cache(i);
274   }
275 
276   /* Setup clusterlog as client[0] in m_event_listner */
277   {
278     Ndb_mgmd_event_service::Event_listener se;
279     ndb_socket_invalidate(&(se.m_socket));
280     for(size_t t = 0; t<LogLevel::LOGLEVEL_CATEGORIES; t++){
281       se.m_logLevel.setLogLevel((LogLevel::EventCategory)t, 7);
282     }
283     se.m_logLevel.setLogLevel(LogLevel::llError, 15);
284     se.m_logLevel.setLogLevel(LogLevel::llConnection, 8);
285     se.m_logLevel.setLogLevel(LogLevel::llBackup, 15);
286     m_event_listner.m_clients.push_back(se);
287     m_event_listner.m_logLevel = se.m_logLevel;
288   }
289 
290   DBUG_VOID_RETURN;
291 }
292 
293 
294 /*
295   check_configdir
296 
297   Make sure configdir exist and try to create it if not
298 
299 */
300 
301 const char*
check_configdir() const302 MgmtSrvr::check_configdir() const
303 {
304   if (m_opts.configdir &&
305       strcmp(m_opts.configdir, MYSQLCLUSTERDIR) != 0)
306   {
307     // Specified on commmand line
308     if (access(m_opts.configdir, F_OK))
309     {
310       g_eventLogger->error("Directory '%s' specified with --configdir " \
311                            "does not exist. Either create it or pass " \
312                            "the path to an already existing directory.",
313                            m_opts.configdir);
314       return NULL;
315     }
316     return m_opts.configdir;
317   }
318   else
319   {
320     // Compiled in path MYSQLCLUSTERDIR
321     if (access(MYSQLCLUSTERDIR, F_OK))
322     {
323       g_eventLogger->info("The default config directory '%s' "            \
324                           "does not exist. Trying to create it...",
325                           MYSQLCLUSTERDIR);
326 
327       if (!NdbDir::create(MYSQLCLUSTERDIR) ||
328           access(MYSQLCLUSTERDIR, F_OK))
329       {
330         g_eventLogger->error("Could not create directory '%s'. "        \
331                              "Either create it manually or "            \
332                              "specify a different directory with "      \
333                              "--configdir=<path>",
334                              MYSQLCLUSTERDIR);
335         return NULL;
336       }
337 
338       g_eventLogger->info("Sucessfully created config directory");
339     }
340     return MYSQLCLUSTERDIR;
341   }
342 }
343 
344 
345 bool
init()346 MgmtSrvr::init()
347 {
348   DBUG_ENTER("MgmtSrvr::init");
349 
350   const char* configdir;
351 
352   if (!m_opts.config_cache)
353   {
354     g_eventLogger->info("Skipping check of config directory since "
355                         "config cache is disabled.");
356     configdir = NULL;
357   }
358   else
359   {
360     if (!(configdir= check_configdir()))
361       DBUG_RETURN(false);
362   }
363 
364   if (!(m_config_manager= new ConfigManager(m_opts, configdir)))
365   {
366     g_eventLogger->error("Failed to create ConfigManager");
367     DBUG_RETURN(false);
368   }
369 
370   if (m_config_manager->add_config_change_subscriber(this) < 0)
371   {
372     g_eventLogger->error("Failed to add MgmtSrvr as config change subscriber");
373     DBUG_RETURN(false);
374   }
375 
376   if (!m_config_manager->init())
377   {
378     DBUG_RETURN(false);
379   }
380 
381   /* 'config_changed' should have been called from 'init' */
382   require(m_local_config != 0);
383 
384   if (m_opts.print_full_config)
385   {
386     print_config();
387     DBUG_RETURN(false);
388   }
389 
390   assert(_ownNodeId);
391 
392   DBUG_RETURN(true);
393 }
394 
395 
396 bool
start_transporter(const Config * config)397 MgmtSrvr::start_transporter(const Config* config)
398 {
399   DBUG_ENTER("MgmtSrvr::start_transporter");
400 
401   theFacade= new TransporterFacade(0);
402   if (theFacade == 0)
403   {
404     g_eventLogger->error("Could not create TransporterFacade.");
405     DBUG_RETURN(false);
406   }
407 
408   assert(_blockNumber == 0); // Blocknumber shouldn't been allocated yet
409 
410   /*
411     Register ourself at TransporterFacade to be able to receive signals
412     and to be notified when a database process has died.
413   */
414   Uint32 res;
415   if ((res = open(theFacade)) == 0)
416   {
417     g_eventLogger->error("Failed to open block in TransporterFacade");
418     theFacade->stop_instance();
419     delete theFacade;
420     theFacade = 0;
421     DBUG_RETURN(false);
422   }
423   _blockNumber = refToBlock(res);
424   assert(_blockNumber > 0);
425 
426   /**
427    * Need to call ->open() prior to actually starting TF
428    */
429   m_config_manager->set_facade(theFacade);
430 
431   if (theFacade->start_instance(_ownNodeId,
432                                 config->m_configValues) < 0)
433   {
434     g_eventLogger->error("Failed to start transporter");
435     delete theFacade;
436     theFacade = 0;
437     DBUG_RETURN(false);
438   }
439 
440   _ownReference = numberToRef(_blockNumber, _ownNodeId);
441 
442   /*
443     set api reg req frequency quite high:
444 
445     100 ms interval to make sure we have fairly up-to-date
446     info from the nodes.  This to make sure that this info
447     is not dependent on heartbeat settings in the
448     configuration
449   */
450   theFacade->ext_set_max_api_reg_req_interval(100);
451 
452   DBUG_RETURN(true);
453 }
454 
455 
456 bool
start_mgm_service(const Config * config)457 MgmtSrvr::start_mgm_service(const Config* config)
458 {
459   DBUG_ENTER("MgmtSrvr::start_mgm_service");
460 
461   assert(m_port == 0);
462   {
463     // Find the portnumber to use for mgm service
464     ConfigIter iter(config, CFG_SECTION_NODE);
465 
466     if(iter.find(CFG_NODE_ID, _ownNodeId) != 0){
467       g_eventLogger->error("Could not find node %d in config", _ownNodeId);
468       DBUG_RETURN(false);
469     }
470 
471     unsigned type;
472     if(iter.get(CFG_TYPE_OF_SECTION, &type) != 0 ||
473        type != NODE_TYPE_MGM){
474       g_eventLogger->error("Node %d is not defined as management server",
475                            _ownNodeId);
476       DBUG_RETURN(false);
477     }
478 
479     if(iter.get(CFG_MGM_PORT, &m_port) != 0){
480       g_eventLogger->error("PortNumber not defined for node %d", _ownNodeId);
481       DBUG_RETURN(false);
482     }
483   }
484 
485   unsigned short port= m_port;
486   DBUG_PRINT("info", ("Using port %d", port));
487   if (port == 0)
488   {
489     g_eventLogger->error("Could not find out which port to use"\
490                         " for management service");
491     DBUG_RETURN(false);
492   }
493 
494   {
495     int count= 5; // no of retries for tryBind
496     while(!m_socket_server.tryBind(port, m_opts.bind_address))
497     {
498       if (--count > 0)
499       {
500 	NdbSleep_SecSleep(1);
501 	continue;
502       }
503       g_eventLogger->error("Unable to bind management service port: %s:%d!\n"
504                            "Please check if the port is already used,\n"
505                            "(perhaps a ndb_mgmd is already running),\n"
506                            "and if you are executing on the correct computer",
507                            (m_opts.bind_address ? m_opts.bind_address : "*"),
508                            port);
509       DBUG_RETURN(false);
510     }
511   }
512 
513   {
514     MgmApiService * mapi = new MgmApiService(*this);
515     if (mapi == NULL)
516     {
517       g_eventLogger->error("Could not allocate MgmApiService");
518       DBUG_RETURN(false);
519     }
520 
521     if(!m_socket_server.setup(mapi, &port, m_opts.bind_address))
522     {
523       delete mapi; // Will be deleted by SocketServer in all other cases
524       g_eventLogger->error("Unable to setup management service port: %s:%d!\n"
525                            "Please check if the port is already used,\n"
526                            "(perhaps a ndb_mgmd is already running),\n"
527                            "and if you are executing on the correct computer",
528                            (m_opts.bind_address ? m_opts.bind_address : "*"),
529                            port);
530       DBUG_RETURN(false);
531     }
532 
533     if (port != m_port)
534     {
535       g_eventLogger->error("Couldn't start management service on the "\
536                            "requested port: %d. Got port: %d instead",
537                           m_port, port);
538       DBUG_RETURN(false);
539     }
540   }
541   setOwnProcessInfoPort(port);
542 
543   m_socket_server.startServer();
544 
545   g_eventLogger->info("Id: %d, Command port: %s:%d",
546                       _ownNodeId,
547                       m_opts.bind_address ? m_opts.bind_address : "*",
548                       port);
549   DBUG_RETURN(true);
550 }
551 
552 
553 bool
start()554 MgmtSrvr::start()
555 {
556   DBUG_ENTER("MgmtSrvr::start");
557 
558   /* Start transporter */
559   if(!start_transporter(m_local_config))
560   {
561     g_eventLogger->error("Failed to start transporter!");
562     DBUG_RETURN(false);
563   }
564 
565   /* Start mgm service */
566   if (!start_mgm_service(m_local_config))
567   {
568     g_eventLogger->error("Failed to start mangement service!");
569     DBUG_RETURN(false);
570   }
571 
572   /* Use local MGM port for TransporterRegistry */
573   if(!connect_to_self())
574   {
575     g_eventLogger->error("Failed to connect to ourself!");
576     DBUG_RETURN(false);
577   }
578 
579   set_async_cluster_logging(true);
580   /* Start config manager */
581   if (!m_config_manager->start())
582   {
583     g_eventLogger->error("Failed to start ConfigManager");
584     DBUG_RETURN(false);
585   }
586 
587   /* Loglevel thread */
588   assert(_isStopThread == false);
589   _logLevelThread = NdbThread_Create(logLevelThread_C,
590 				     (void**)this,
591                                      0, // default stack size
592 				     "MgmtSrvr_Loglevel",
593 				     NDB_THREAD_PRIO_LOW);
594 
595   DBUG_RETURN(true);
596 }
597 
598 void
set_async_cluster_logging(bool async_cluster_logging)599 MgmtSrvr::set_async_cluster_logging(bool async_cluster_logging)
600 {
601   m_async_cluster_logging = true;
602 }
603 
604 void
configure_eventlogger(const BaseString & logdestination) const605 MgmtSrvr::configure_eventlogger(const BaseString& logdestination) const
606 {
607   // Close old log handlers before creating the new
608   g_eventLogger->close();
609 
610   Vector<BaseString> logdestinations;
611   logdestination.split(logdestinations, ";");
612 
613   for(unsigned i = 0; i < logdestinations.size(); i++)
614   {
615     // Extract type(everything left of colon)
616     Vector<BaseString> v_type_params;
617     logdestinations[i].split(v_type_params, ":", 2);
618     BaseString type(v_type_params[0]);
619 
620     // Extract params(everything right of colon)
621     BaseString params;
622     if(v_type_params.size() >= 2)
623       params = v_type_params[1];
624 
625     LogHandler *handler = NULL;
626     if(type == "FILE")
627     {
628       char *default_file_name= NdbConfig_ClusterLogFileName(_ownNodeId);
629       FileLogHandler* file_handler = new FileLogHandler(default_file_name);
630       free(default_file_name);
631 
632       if(m_async_cluster_logging)
633       {
634         /**
635          *  Log to a buffered log handler, and pass the file log handler
636          *  as the destination log handler.
637          */
638         file_handler->parseParams(params);
639         if (!file_handler->is_open() &&
640             !file_handler->open())
641         {
642           ndbout_c("INTERNAL ERROR: Could not create log handler for: '%s'",
643                    logdestinations[i].c_str());
644           continue;
645         }
646 
647         handler = new BufferedLogHandler(file_handler);
648       }
649       else
650       {
651         handler = file_handler;
652       }
653     }
654     else if(type == "CONSOLE")
655     {
656       handler = new ConsoleLogHandler();
657     }
658 #ifndef _WIN32
659     else if(type == "SYSLOG")
660     {
661       handler = new SysLogHandler();
662     }
663 #endif
664     if(handler == NULL)
665     {
666       ndbout_c("INTERNAL ERROR: Could not create log handler for: '%s'",
667                logdestinations[i].c_str());
668       continue;
669     }
670 
671     if(!handler->parseParams(params))
672     {
673       ndbout_c("Failed to parse parameters for log handler: '%s', error: %d '%s'",
674                logdestinations[i].c_str(), handler->getErrorCode(), handler->getErrorStr());
675       delete handler;
676       continue;
677     }
678 
679     if (!g_eventLogger->addHandler(handler))
680     {
681       ndbout_c("INTERNAL ERROR: Could not add %s log handler", handler->handler_type());
682       g_eventLogger->error("INTERNAL ERROR: Could not add %s log handler",
683                            handler->handler_type());
684       delete handler;
685       continue;
686     }
687   }
688 }
689 
690 
691 void
setClusterLog(const Config * config)692 MgmtSrvr::setClusterLog(const Config* config)
693 {
694   DBUG_ASSERT(_ownNodeId);
695 
696   ConfigIter iter(config, CFG_SECTION_NODE);
697   require(iter.find(CFG_NODE_ID, _ownNodeId) == 0);
698 
699   // Update DataDir from config
700   const char *datadir;
701   require(iter.get(CFG_NODE_DATADIR, &datadir) == 0);
702   NdbConfig_SetPath(datadir);
703 
704   if (NdbDir::chdir(NdbConfig_get_path(NULL)) != 0)
705   {
706     g_eventLogger->warning("Cannot change directory to '%s', error: %d",
707                            NdbConfig_get_path(NULL), errno);
708     // Ignore error
709   }
710 
711   // Get log destination from config
712   BaseString logdest;
713   const char *value;
714   if(iter.get(CFG_LOG_DESTINATION, &value) == 0){
715     logdest.assign(value);
716   }
717 
718   bool logdest_configured = true;
719   if(logdest.length() == 0 || logdest == "") {
720     // No LogDestination set, use default settings
721     char *clusterLog= NdbConfig_ClusterLogFileName(_ownNodeId);
722     logdest.assfmt("FILE:filename=%s,maxsize=1000000,maxfiles=6",
723 		   clusterLog);
724     free(clusterLog);
725     logdest_configured = false;
726   }
727 
728   configure_eventlogger(logdest);
729 
730   if (logdest_configured == false &&
731       m_opts.non_interactive)
732   {
733     g_eventLogger->createConsoleHandler();
734   }
735 
736 #ifdef _WIN32
737   /* Output to Windows event log */
738   g_eventLogger->createEventLogHandler("MySQL Cluster Management Server");
739 #endif
740 
741   if (m_opts.verbose)
742     g_eventLogger->enable(Logger::LL_DEBUG);
743 }
744 
745 
746 void
config_changed(NodeId node_id,const Config * new_config)747 MgmtSrvr::config_changed(NodeId node_id, const Config* new_config)
748 {
749   DBUG_ENTER("MgmtSrvr::config_changed");
750 
751   Guard g(m_local_config_mutex);
752 
753   // Don't allow nodeid to change, once it's been set
754   require(_ownNodeId == 0 || _ownNodeId == node_id);
755 
756   _ownNodeId= node_id;
757 
758   if (m_local_config)
759     delete m_local_config;
760 
761   m_local_config= new Config(new_config); // Copy
762   require(m_local_config != 0);
763 
764   /* Rebuild node arrays */
765   ConfigIter iter(m_local_config, CFG_SECTION_NODE);
766   for(Uint32 i = 0; i<MAX_NODES; i++) {
767 
768     clear_connect_address_cache(i);
769 
770     if (iter.first())
771       continue;
772 
773     if (iter.find(CFG_NODE_ID, i) == 0){
774       unsigned type;
775       require(iter.get(CFG_TYPE_OF_SECTION, &type) == 0);
776 
777       switch(type){
778       case NODE_TYPE_DB:
779         nodeTypes[i] = NDB_MGM_NODE_TYPE_NDB;
780         break;
781       case NODE_TYPE_API:
782         nodeTypes[i] = NDB_MGM_NODE_TYPE_API;
783         break;
784       case NODE_TYPE_MGM:
785         nodeTypes[i] = NDB_MGM_NODE_TYPE_MGM;
786         break;
787       default:
788         break;
789       }
790     }
791     else
792     {
793       nodeTypes[i] = (enum ndb_mgm_node_type)-1;
794     }
795 
796   }
797 
798   // Setup cluster log
799   setClusterLog(m_local_config);
800 
801   if (theFacade)
802   {
803     if (!theFacade->configure(_ownNodeId,
804                               m_local_config->m_configValues))
805     {
806       g_eventLogger->warning("Could not reconfigure everything online, "
807                              "this node need a restart");
808       m_need_restart= true;
809     }
810   }
811 
812   DBUG_VOID_RETURN;
813 }
814 
815 
816 bool
get_packed_config(ndb_mgm_node_type node_type,BaseString & buf64,BaseString & error,bool v2,Uint32 node_id)817 MgmtSrvr::get_packed_config(ndb_mgm_node_type node_type,
818                             BaseString& buf64,
819                             BaseString& error,
820                             bool v2,
821                             Uint32 node_id)
822 {
823   return m_config_manager->get_packed_config(node_type,
824                                              &buf64,
825                                              error,
826                                              v2,
827                                              node_id);
828 }
829 
830 bool
get_packed_config_from_node(NodeId nodeId,BaseString & buf64,BaseString & error,bool v2_requester)831 MgmtSrvr::get_packed_config_from_node(NodeId nodeId,
832                             BaseString& buf64,
833                             BaseString& error,
834                             bool v2_requester)
835 {
836   DBUG_ENTER("get_packed_config_from_node");
837 
838   if (nodeId >= MAX_NODES_ID)
839   {
840     error.assfmt("Nodeid %d is greater than max nodeid %d. ",
841                  nodeId, MAX_NODES_ID);
842     DBUG_RETURN(false);
843   }
844 
845   if (getNodeType(nodeId) == NDB_MGM_NODE_TYPE_UNKNOWN)
846   {
847     error.assfmt("Nodeid %d does not exist. ", nodeId);
848     DBUG_RETURN(false);
849   }
850 
851   if (getNodeType(nodeId) != NDB_MGM_NODE_TYPE_NDB)
852   {
853     error.assfmt("Node %d is not a data node. ", nodeId);
854     DBUG_RETURN(false);
855   }
856 
857   trp_node node = getNodeInfo(nodeId);
858 
859   if (!node.m_alive)
860   {
861     error.assfmt("Data node %d is not alive. ", nodeId);
862     DBUG_RETURN(false);
863   }
864 
865   const Uint32 version = node.m_info.m_version;
866   bool v2_data_node = ndb_config_version_v2(version);
867   INIT_SIGNAL_SENDER(ss,nodeId);
868 
869   SimpleSignal ssig;
870   GetConfigReq* req = CAST_PTR(GetConfigReq, ssig.getDataPtrSend());
871   req->senderRef = ss.getOwnRef();
872   req->nodeId = nodeId;
873 
874   g_eventLogger->debug("Sending GET_CONFIG_REQ to %d", nodeId);
875 
876   ssig.set(ss, TestOrd::TraceAPI, CMVMI, GSN_GET_CONFIG_REQ,
877            GetConfigReq::SignalLength);
878   if ((ss.sendSignal(nodeId, &ssig)) != SEND_OK)
879   {
880     DBUG_RETURN(false);
881   }
882 
883   Defragger defragger;
884   while (true)
885   {
886     SimpleSignal *signal = ss.waitFor();
887     int gsn = signal->readSignalNumber();
888 
889     switch (gsn)
890     {
891     case GSN_GET_CONFIG_CONF:
892     {
893       if (refToNode(signal->header.theSendersBlockRef) != nodeId)
894       {
895         error.assfmt("Internal Error: Reply from wrong node %d, expected from %d. ",
896                      refToNode(signal->header.theSendersBlockRef),
897                      nodeId);
898         DBUG_RETURN(false);
899       }
900 
901       const GetConfigConf * const conf =
902 	CAST_CONSTPTR(GetConfigConf, signal->getDataPtr());
903 
904       if (signal->header.m_noOfSections != 1)
905       {
906         error.assfmt("Internal Error: Wrong number of sections %d received, expected %d. ",
907                      signal->header.m_noOfSections, 1);
908         DBUG_RETURN(false);
909       }
910 
911       if (defragger.defragment(signal))
912       {
913         ConfigValuesFactory cf;
914         if (v2_data_node)
915           require(cf.unpack_v2(signal->ptr[0].p, conf->configLength));
916         else
917           require(cf.unpack_v1(signal->ptr[0].p, conf->configLength));
918 
919         Config received_config(cf.getConfigValues());
920         bool ret;
921         if (v2_requester)
922           ret = received_config.pack64_v2(buf64);
923         else
924           ret = received_config.pack64_v1(buf64);
925         if (!ret)
926         {
927           error.assign("Failed to pack64");
928           DBUG_RETURN(false);
929         }
930         DBUG_RETURN(true);
931       }
932       // wait until all fragments are received
933       continue;
934     }
935 
936     case GSN_GET_CONFIG_REF:
937     {
938       if (refToNode(ssig.header.theSendersBlockRef) != nodeId)
939       {
940         error.assfmt("Internal Error: Reply from wrong node %d, expected from %d. ",
941                      refToNode(signal->header.theSendersBlockRef),
942                      nodeId);
943         DBUG_RETURN(false);
944       }
945       const GetConfigRef * const ref =
946 	CAST_CONSTPTR(GetConfigRef, signal->getDataPtr());
947       error.assfmt("Error in retrieving config from node %d: Internal error: %d",
948                    nodeId, ref->error);
949 
950       DBUG_RETURN(false);
951     }
952 
953     case GSN_NF_COMPLETEREP:
954     {
955       const NFCompleteRep * rep = CAST_CONSTPTR(NFCompleteRep,
956                                                 signal->getDataPtr());
957       if (rep->failedNodeId == nodeId)
958       {
959         error.assfmt("Node %d is not available", nodeId);
960         DBUG_RETURN(false);
961       }
962       continue;
963     }
964 
965     case GSN_NODE_FAILREP:
966     {
967       // Wait until GSN_NODE_COMPLETEREP is received.
968       continue;
969     }
970 
971     case GSN_API_REGCONF:
972     case GSN_TAKE_OVERTCCONF:
973     case GSN_CONNECT_REP:
974       // Ignore
975       continue;
976 
977     default:
978       report_unknown_signal(signal);
979       DBUG_RETURN(false);
980     }
981   }
982   // Should never come here
983   require(false);
984   DBUG_RETURN(false);
985 }
986 
~MgmtSrvr()987 MgmtSrvr::~MgmtSrvr()
988 {
989   /* Stop log level thread */
990   void* res = 0;
991   _isStopThread = true;
992 
993   if (_logLevelThread != NULL) {
994     NdbThread_WaitFor(_logLevelThread, &res);
995     NdbThread_Destroy(&_logLevelThread);
996   }
997 
998   /* Stop mgm service, don't allow new connections */
999   m_socket_server.stopServer();
1000 
1001   /* Stop all active session */
1002   if (!m_socket_server.stopSessions(true,
1003                                     2 * MgmApiSession::SOCKET_TIMEOUT))
1004   {
1005     g_eventLogger->error("Failed to wait for all sessions to stop, "
1006                          "continuing with shutdown anyway.");
1007   }
1008 
1009   /* Stop config manager */
1010   if (m_config_manager != 0)
1011   {
1012     m_config_manager->stop();
1013     delete m_config_manager;
1014     m_config_manager= 0;
1015   }
1016 
1017   this->close(); // close trp_client before stopping TransporterFacade
1018 
1019   // Stop transporter
1020   if(theFacade != 0){
1021     theFacade->stop_instance();
1022     delete theFacade;
1023     theFacade = 0;
1024   }
1025 
1026   delete m_local_config;
1027 
1028   NdbMutex_Destroy(m_local_config_mutex);
1029   NdbMutex_Destroy(m_reserved_nodes_mutex);
1030 }
1031 
1032 
1033 //****************************************************************************
1034 //****************************************************************************
1035 
okToSendTo(NodeId nodeId,bool unCond)1036 int MgmtSrvr::okToSendTo(NodeId nodeId, bool unCond)
1037 {
1038   if(nodeId == 0 || getNodeType(nodeId) != NDB_MGM_NODE_TYPE_NDB)
1039     return WRONG_PROCESS_TYPE;
1040   // Check if we have contact with it
1041   if(unCond){
1042     if (getNodeInfo(nodeId).is_confirmed())
1043       return 0;
1044   }
1045   else if (getNodeInfo(nodeId).m_alive == true)
1046     return 0;
1047   return NO_CONTACT_WITH_PROCESS;
1048 }
1049 
1050 void
report_unknown_signal(SimpleSignal * signal)1051 MgmtSrvr::report_unknown_signal(SimpleSignal *signal)
1052 {
1053   signal->print();
1054   g_eventLogger->error("Unknown signal received. SignalNumber: "
1055                        "%i from (%d, 0x%x)",
1056                        signal->readSignalNumber(),
1057                        refToNode(signal->header.theSendersBlockRef),
1058                        refToBlock(signal->header.theSendersBlockRef));
1059   assert(false);
1060 }
1061 
1062 /*****************************************************************************
1063  * Starting and stopping database nodes
1064  ****************************************************************************/
1065 
1066 int
sendSTART_ORD(int nodeId)1067 MgmtSrvr::sendSTART_ORD(int nodeId)
1068 {
1069   INIT_SIGNAL_SENDER(ss,nodeId);
1070 
1071   SimpleSignal ssig;
1072   StartOrd* const startOrd = CAST_PTR(StartOrd, ssig.getDataPtrSend());
1073   ssig.set(ss,TestOrd::TraceAPI, CMVMI, GSN_START_ORD, StartOrd::SignalLength);
1074   startOrd->restartInfo = 0;
1075 
1076   return ss.sendSignal(nodeId, &ssig) == SEND_OK ? 0 : SEND_OR_RECEIVE_FAILED;
1077 }
1078 
1079 /*****************************************************************************
1080  * Version handling
1081  *****************************************************************************/
1082 
1083 void
status_api(int nodeId,ndb_mgm_node_status & node_status,Uint32 & version,Uint32 & mysql_version,const char ** address,char * addr_buf,size_t addr_buf_size,bool & is_single_user)1084 MgmtSrvr::status_api(int nodeId,
1085                      ndb_mgm_node_status& node_status,
1086                      Uint32& version, Uint32& mysql_version,
1087                      const char **address,
1088                      char *addr_buf,
1089                      size_t addr_buf_size,
1090                      bool& is_single_user)
1091 {
1092   assert(getNodeType(nodeId) == NDB_MGM_NODE_TYPE_API);
1093   assert(version == 0 && mysql_version == 0);
1094 
1095   if (sendVersionReq(nodeId,
1096                      version,
1097                      mysql_version,
1098                      address,
1099                      addr_buf,
1100                      addr_buf_size,
1101                      is_single_user) != 0)
1102   {
1103     // Couldn't get version from any NDB node.
1104     assert(version == 0);
1105     node_status = NDB_MGM_NODE_STATUS_UNKNOWN;
1106     return;
1107   }
1108 
1109   if (version)
1110   {
1111     assert(mysql_version);
1112     node_status = NDB_MGM_NODE_STATUS_CONNECTED;
1113   }
1114   else
1115   {
1116     assert(mysql_version == 0);
1117     node_status = NDB_MGM_NODE_STATUS_NO_CONTACT;
1118   }
1119   return;
1120 }
1121 
1122 
1123 int
sendVersionReq(int v_nodeId,Uint32 & version,Uint32 & mysql_version,const char ** address,char * addr_buf,size_t addr_buf_size,bool & is_single_user)1124 MgmtSrvr::sendVersionReq(int v_nodeId,
1125 			 Uint32 &version,
1126 			 Uint32& mysql_version,
1127 			 const char **address,
1128                          char *addr_buf,
1129                          size_t addr_buf_size,
1130                          bool& is_single_user)
1131 {
1132   SignalSender ss(theFacade);
1133   ss.lock();
1134 
1135   SimpleSignal ssig;
1136   ApiVersionReq* req = CAST_PTR(ApiVersionReq, ssig.getDataPtrSend());
1137   req->senderRef = ss.getOwnRef();
1138   req->nodeId = v_nodeId;
1139   ssig.set(ss, TestOrd::TraceAPI, QMGR,
1140            GSN_API_VERSION_REQ, ApiVersionReq::SignalLength);
1141 
1142   NodeId nodeId = 0;
1143   bool do_send = true;
1144   while(true)
1145   {
1146     if (do_send)
1147     {
1148       nodeId = ss.get_an_alive_node();
1149       if (nodeId == 0)
1150       {
1151         return NO_CONTACT_WITH_DB_NODES;
1152       }
1153 
1154       if (ss.sendSignal(nodeId, &ssig) != SEND_OK)
1155       {
1156         return SEND_OR_RECEIVE_FAILED;
1157       }
1158 
1159       do_send = false;
1160     }
1161 
1162     SimpleSignal *signal = ss.waitFor();
1163 
1164     switch (signal->readSignalNumber()) {
1165     case GSN_API_VERSION_CONF: {
1166       const ApiVersionConf * const conf =
1167 	CAST_CONSTPTR(ApiVersionConf, signal->getDataPtr());
1168 
1169       assert((int) conf->nodeId == v_nodeId);
1170 
1171       version = conf->version;
1172       mysql_version = conf->mysql_version;
1173       struct in_addr in;
1174       in.s_addr= conf->m_inet_addr;
1175       *address= Ndb_inet_ntop(AF_INET,
1176                               static_cast<void*>(&in),
1177                               addr_buf,
1178                               addr_buf_size);
1179       is_single_user = false;
1180       if (signal->getLength() > ApiVersionConf::SignalLengthWithoutSingleUser) {
1181         // New nodes will return info about single user
1182         is_single_user = conf->isSingleUser;
1183       }
1184       return 0;
1185     }
1186 
1187     case GSN_NF_COMPLETEREP:{
1188       const NFCompleteRep * const rep =
1189 	CAST_CONSTPTR(NFCompleteRep, signal->getDataPtr());
1190       if (rep->failedNodeId == nodeId)
1191 	do_send = true; // retry with other node
1192       continue;
1193     }
1194 
1195     case GSN_NODE_FAILREP:{
1196       const NodeFailRep * const rep =
1197 	CAST_CONSTPTR(NodeFailRep, signal->getDataPtr());
1198       Uint32 len = NodeFailRep::getNodeMaskLength(signal->getLength());
1199       assert(len == NodeBitmask::Size ||
1200              len == 0); // only full length in ndbapi
1201       if (signal->header.m_noOfSections >= 1)
1202       {
1203         len = signal->ptr[0].sz;
1204         if (BitmaskImpl::safe_get(len, signal->ptr[0].p, nodeId))
1205         {
1206           do_send = true;
1207         }
1208       }
1209       else
1210       {
1211         assert(len > 0);
1212         if (BitmaskImpl::safe_get(len, rep->theAllNodes, nodeId))
1213         {
1214 	  do_send = true; // retry with other node
1215         }
1216       }
1217       continue;
1218     }
1219     case GSN_API_REGCONF:
1220     case GSN_TAKE_OVERTCCONF:
1221     case GSN_CONNECT_REP:
1222       // Ignore
1223       continue;
1224     default:
1225       report_unknown_signal(signal);
1226       return SEND_OR_RECEIVE_FAILED;
1227     }
1228   }
1229 
1230   // Should never come here
1231   require(false);
1232   return -1;
1233 }
1234 
1235 
sendStopMgmd(NodeId nodeId,bool abort,bool stop,bool restart,bool nostart,bool initialStart)1236 int MgmtSrvr::sendStopMgmd(NodeId nodeId,
1237 			   bool abort,
1238 			   bool stop,
1239 			   bool restart,
1240 			   bool nostart,
1241 			   bool initialStart)
1242 {
1243   const char* hostname;
1244   Uint32 port;
1245   BaseString connect_string;
1246 
1247   {
1248     Guard g(m_local_config_mutex);
1249     {
1250       ConfigIter iter(m_local_config, CFG_SECTION_NODE);
1251 
1252       if(iter.first())                       return SEND_OR_RECEIVE_FAILED;
1253       if(iter.find(CFG_NODE_ID, nodeId))     return SEND_OR_RECEIVE_FAILED;
1254       if(iter.get(CFG_NODE_HOST, &hostname)) return SEND_OR_RECEIVE_FAILED;
1255     }
1256     {
1257       ConfigIter iter(m_local_config, CFG_SECTION_NODE);
1258 
1259       if(iter.first())                   return SEND_OR_RECEIVE_FAILED;
1260       if(iter.find(CFG_NODE_ID, nodeId)) return SEND_OR_RECEIVE_FAILED;
1261       if(iter.get(CFG_MGM_PORT, &port))  return SEND_OR_RECEIVE_FAILED;
1262     }
1263     if( strlen(hostname) == 0 )
1264       return SEND_OR_RECEIVE_FAILED;
1265 
1266   }
1267   connect_string.assfmt("%s:%u",hostname,port);
1268 
1269   DBUG_PRINT("info",("connect string: %s",connect_string.c_str()));
1270 
1271   NdbMgmHandle h= ndb_mgm_create_handle();
1272   if ( h && connect_string.length() > 0 )
1273   {
1274     ndb_mgm_set_connectstring(h,connect_string.c_str());
1275     if(ndb_mgm_connect(h,1,0,0))
1276     {
1277       DBUG_PRINT("info",("failed ndb_mgm_connect"));
1278       ndb_mgm_destroy_handle(&h);
1279       return SEND_OR_RECEIVE_FAILED;
1280     }
1281     if(!restart)
1282     {
1283       int nodes[1];
1284       nodes[0]= (int)nodeId;
1285       if(ndb_mgm_stop(h, 1, nodes) < 0)
1286       {
1287         ndb_mgm_destroy_handle(&h);
1288         return SEND_OR_RECEIVE_FAILED;
1289       }
1290     }
1291     else
1292     {
1293       int nodes[1];
1294       nodes[0]= (int)nodeId;
1295       if(ndb_mgm_restart2(h, 1, nodes, initialStart, nostart, abort) < 0)
1296       {
1297         ndb_mgm_destroy_handle(&h);
1298         return SEND_OR_RECEIVE_FAILED;
1299       }
1300     }
1301   }
1302   ndb_mgm_destroy_handle(&h);
1303 
1304   return 0;
1305 }
1306 
1307 /**
1308  * send STOP_REQ to all DB-nodes
1309  *   and wait for them to stop or refuse
1310  *
1311  */
1312 int
sendall_STOP_REQ(NodeBitmask & stoppedNodes,bool abort,bool stop,bool restart,bool nostart,bool initialStart)1313 MgmtSrvr::sendall_STOP_REQ(NodeBitmask &stoppedNodes,
1314                            bool abort,
1315                            bool stop,
1316                            bool restart,
1317                            bool nostart,
1318                            bool initialStart)
1319 {
1320   int error = 0;
1321   DBUG_ENTER("MgmtSrvr::sendall_STOP_REQ");
1322   DBUG_PRINT("enter", ("abort: %d  stop: %d  restart: %d  "
1323                        "nostart: %d  initialStart: %d",
1324                        abort, stop, restart, nostart, initialStart));
1325 
1326   if (ERROR_INSERTED(10006))
1327   {
1328     /*
1329      * This error insert is for Bug #11757421. Error
1330      * 10006 is used to skip the STOP_REQ call sent by
1331      * the restart command thus ensuring that the nodes
1332      * do not start the shut down process.
1333      */
1334     DBUG_RETURN(error);
1335   }
1336 
1337   stoppedNodes.clear();
1338 
1339   SignalSender ss(theFacade);
1340   ss.lock(); // lock will be released on exit
1341 
1342   SimpleSignal ssig;
1343   StopReq* const stopReq = CAST_PTR(StopReq, ssig.getDataPtrSend());
1344   ssig.set(ss, TestOrd::TraceAPI, NDBCNTR, GSN_STOP_REQ, StopReq::SignalLength);
1345 
1346   stopReq->requestInfo = 0;
1347   stopReq->apiTimeout = 5000;
1348   stopReq->transactionTimeout = 1000;
1349   stopReq->readOperationTimeout = 1000;
1350   stopReq->operationTimeout = 1000;
1351   stopReq->senderData = 12;
1352   stopReq->senderRef = ss.getOwnRef();
1353   stopReq->singleuser = 0;
1354   StopReq::setSystemStop(stopReq->requestInfo, stop);
1355   StopReq::setPerformRestart(stopReq->requestInfo, restart);
1356   StopReq::setStopAbort(stopReq->requestInfo, abort);
1357   StopReq::setNoStart(stopReq->requestInfo, nostart);
1358   StopReq::setInitialStart(stopReq->requestInfo, initialStart);
1359 
1360   if (ERROR_INSERTED(10007))
1361   {
1362     /*
1363      * This error insert is for Bug #11757421. Error
1364      * 10007 is used to hard code a value of false to
1365      * the nostart flag in the signal. This ensures
1366      * that the nodes do not reach NOT_STARTED state.
1367      */
1368     StopReq::setNoStart(stopReq->requestInfo, false);
1369   }
1370 
1371   // send the signals
1372   int failed = 0;
1373   NodeBitmask nodes;
1374   {
1375     NodeId nodeId = 0;
1376     while(getNextNodeId(&nodeId, NDB_MGM_NODE_TYPE_NDB))
1377     {
1378       if (okToSendTo(nodeId, true) == 0)
1379       {
1380 	SendStatus result = ss.sendSignal(nodeId, &ssig);
1381 	if (result == SEND_OK)
1382 	  nodes.set(nodeId);
1383         else
1384           failed++;
1385       }
1386     }
1387   }
1388 
1389   if (nodes.isclear() && failed > 0)
1390   {
1391     DBUG_RETURN(SEND_OR_RECEIVE_FAILED);
1392   }
1393 
1394   // now wait for the replies
1395   while (!nodes.isclear())
1396   {
1397     SimpleSignal *signal = ss.waitFor();
1398     int gsn = signal->readSignalNumber();
1399     switch (gsn) {
1400     case GSN_STOP_REF:
1401     {
1402       const StopRef * const ref = CAST_CONSTPTR(StopRef, signal->getDataPtr());
1403       const NodeId nodeId = refToNode(signal->header.theSendersBlockRef);
1404 #ifdef VM_TRACE
1405       ndbout_c("Node %d refused stop", nodeId);
1406 #endif
1407       assert(nodes.get(nodeId));
1408       nodes.clear(nodeId);
1409       error = translateStopRef(ref->errorCode);
1410       break;
1411     }
1412     case GSN_STOP_CONF:
1413     {
1414       const NodeId nodeId = refToNode(signal->header.theSendersBlockRef);
1415       assert(nodes.get(nodeId));
1416       nodes.clear(nodeId);
1417       break;
1418     }
1419     case GSN_NF_COMPLETEREP:
1420     {
1421       const NFCompleteRep * rep = CAST_CONSTPTR(NFCompleteRep,
1422                                                 signal->getDataPtr());
1423       if (rep->failedNodeId <= nodes.max_size())
1424         nodes.clear(rep->failedNodeId); // clear the failed node
1425 
1426       if (rep->failedNodeId <= stoppedNodes.max_size())
1427         stoppedNodes.set(rep->failedNodeId);
1428       break;
1429     }
1430     case GSN_NODE_FAILREP:
1431     {
1432       const NodeFailRep * rep = CAST_CONSTPTR(NodeFailRep,
1433                                               signal->getDataPtr());
1434       Uint32 len = NodeFailRep::getNodeMaskLength(signal->getLength());
1435       assert(len == NodeBitmask::Size || // only full length in ndbapi
1436              len == 0);
1437       NodeBitmask mask;
1438       if (signal->header.m_noOfSections >= 1)
1439       {
1440         mask.assign(signal->ptr[0].sz, signal->ptr[0].p);
1441       }
1442       else
1443       {
1444         mask.assign(len, rep->theAllNodes);
1445       }
1446       nodes.bitANDC(mask);
1447       stoppedNodes.bitOR(mask);
1448       break;
1449     }
1450     case GSN_API_REGCONF:
1451     case GSN_TAKE_OVERTCCONF:
1452     case GSN_CONNECT_REP:
1453       continue;
1454     default:
1455       report_unknown_signal(signal);
1456       DBUG_RETURN(SEND_OR_RECEIVE_FAILED);
1457     }
1458   }
1459 
1460   DBUG_RETURN(error);
1461 }
1462 
1463 int
guess_master_node(SignalSender & ss)1464 MgmtSrvr::guess_master_node(SignalSender& ss)
1465 {
1466   /**
1467    * First check if m_master_node is started
1468    */
1469   NodeId guess = m_master_node;
1470   if (guess != 0)
1471   {
1472     trp_node node = ss.getNodeInfo(guess);
1473     if (node.m_state.startLevel == NodeState::SL_STARTED)
1474       return guess;
1475   }
1476 
1477   /**
1478    * Check for any started node
1479    */
1480   guess = 0;
1481   while(getNextNodeId(&guess, NDB_MGM_NODE_TYPE_NDB))
1482   {
1483     trp_node node = ss.getNodeInfo(guess);
1484     if (node.m_state.startLevel == NodeState::SL_STARTED)
1485     {
1486       return guess;
1487     }
1488   }
1489 
1490   /**
1491    * Check any confirmed node
1492    */
1493   guess = 0;
1494   while(getNextNodeId(&guess, NDB_MGM_NODE_TYPE_NDB))
1495   {
1496     trp_node node = ss.getNodeInfo(guess);
1497     if (node.is_confirmed())
1498     {
1499       return guess;
1500     }
1501   }
1502 
1503   /**
1504    * Check any connected node
1505    */
1506   guess = 0;
1507   while(getNextNodeId(&guess, NDB_MGM_NODE_TYPE_NDB))
1508   {
1509     trp_node node = ss.getNodeInfo(guess);
1510     if (node.is_connected())
1511     {
1512       return guess;
1513     }
1514   }
1515 
1516   return 0; // give up
1517 }
1518 
1519 /*
1520  * Common method for handeling all STOP_REQ signalling that
1521  * is used by Stopping, Restarting and Single user commands
1522  *
1523  * In the event that we need to stop a mgmd, we create a mgm
1524  * client connection to that mgmd and stop it that way.
1525  * This allows us to stop mgm servers when there isn't any real
1526  * distributed communication up.
1527  *
1528  * node_ids.size()==0 means to stop all DB nodes.
1529  *                    MGM nodes will *NOT* be stopped.
1530  *
1531  * If we work out we should be stopping or restarting ourselves,
1532  * we return <0 in stopSelf for restart, >0 for stop
1533  * and 0 for do nothing.
1534  */
1535 
sendSTOP_REQ(const Vector<NodeId> & node_ids,NodeBitmask & stoppedNodes,bool abort,bool stop,bool restart,bool nostart,bool initialStart,int * stopSelf)1536 int MgmtSrvr::sendSTOP_REQ(const Vector<NodeId> &node_ids,
1537 			   NodeBitmask &stoppedNodes,
1538 			   bool abort,
1539 			   bool stop,
1540 			   bool restart,
1541 			   bool nostart,
1542 			   bool initialStart,
1543                            int* stopSelf)
1544 {
1545   int error = 0;
1546   DBUG_ENTER("MgmtSrvr::sendSTOP_REQ");
1547   DBUG_PRINT("enter", ("no of nodes: %d "
1548                        "abort: %d  stop: %d  restart: %d  "
1549                        "nostart: %d  initialStart: %d",
1550                        node_ids.size(),
1551                        abort, stop, restart, nostart, initialStart));
1552 
1553   if (ERROR_INSERTED(10006))
1554   {
1555     /*
1556      * This error insert is for Bug #11757421. Error
1557      * 10006 is used to skip the STOP_REQ call sent by
1558      * the restart command thus ensuring that the node
1559      * does not start the shut down process.
1560      */
1561     DBUG_RETURN(error);
1562   }
1563 
1564   stoppedNodes.clear();
1565   *stopSelf= 0;
1566 
1567   NodeBitmask ndb_nodes_to_stop;
1568   NodeBitmask mgm_nodes_to_stop;
1569 
1570   SignalSender ss(theFacade);
1571   ss.lock(); // lock will be released on exit
1572 
1573   /**
1574    * First verify arguments
1575    */
1576   for (unsigned i = 0; i < node_ids.size(); i++)
1577   {
1578     switch(getNodeType(node_ids[i])){
1579     case NDB_MGM_NODE_TYPE_MGM:
1580       mgm_nodes_to_stop.set(node_ids[i]);
1581       break;
1582     case NDB_MGM_NODE_TYPE_NDB:
1583       ndb_nodes_to_stop.set(node_ids[i]);
1584       break;
1585     default:
1586       DBUG_RETURN(WRONG_PROCESS_TYPE);
1587     }
1588   }
1589 
1590   /**
1591    * Process ndb_mgmd
1592    */
1593   for (Uint32 i = mgm_nodes_to_stop.find(0);
1594        i != mgm_nodes_to_stop.NotFound;
1595        i = mgm_nodes_to_stop.find(i + 1))
1596   {
1597     if (i != getOwnNodeId())
1598     {
1599       error= sendStopMgmd(i, abort, stop, restart,
1600                           nostart, initialStart);
1601       if (error == 0)
1602       {
1603         stoppedNodes.set(i);
1604       }
1605     }
1606     else
1607     {
1608       g_eventLogger->info("Stopping this node");
1609       * stopSelf = (restart)? -1 : 1;
1610       stoppedNodes.set(i);
1611     }
1612   }
1613 
1614   /**
1615    * Process ndbd
1616    */
1617   SimpleSignal ssig;
1618   StopReq* const stopReq = CAST_PTR(StopReq, ssig.getDataPtrSend());
1619   ssig.set(ss, TestOrd::TraceAPI, NDBCNTR, GSN_STOP_REQ, StopReq::SignalLength_v1);
1620 
1621   stopReq->requestInfo = 0;
1622   stopReq->apiTimeout = 5000;
1623   stopReq->transactionTimeout = 1000;
1624   stopReq->readOperationTimeout = 1000;
1625   stopReq->operationTimeout = 1000;
1626   stopReq->senderData = 12;
1627   stopReq->senderRef = ss.getOwnRef();
1628   stopReq->singleuser = 0;
1629   StopReq::setSystemStop(stopReq->requestInfo, stop);
1630   StopReq::setPerformRestart(stopReq->requestInfo, restart);
1631   StopReq::setStopAbort(stopReq->requestInfo, abort);
1632   StopReq::setNoStart(stopReq->requestInfo, nostart);
1633   StopReq::setInitialStart(stopReq->requestInfo, initialStart);
1634 
1635   if (ERROR_INSERTED(10007))
1636   {
1637     /*
1638      * This error insert is for Bug #11757421. Error
1639      * 10007 is used to hard code a value of false to
1640      * the nostart flag in the signal. This ensures
1641      * that the node does not reach NOT_STARTED state.
1642      */
1643     StopReq::setNoStart(stopReq->requestInfo, false);
1644   }
1645 
1646   int use_master_node = 0;
1647   int do_send = 0;
1648   Uint32 packed_length = 0;
1649   if (ndb_nodes_to_stop.count() > 1)
1650   {
1651     do_send = 1;
1652     use_master_node = 1;
1653     ndb_nodes_to_stop.copyto(NdbNodeBitmask::Size, stopReq->nodes);
1654     packed_length = ndb_nodes_to_stop.getPackedLengthInWords();
1655     StopReq::setStopNodes(stopReq->requestInfo, 1);
1656   }
1657   else if (ndb_nodes_to_stop.count() == 1)
1658   {
1659     Uint32 nodeId = ndb_nodes_to_stop.find(0);
1660     if (okToSendTo(nodeId, true) == 0)
1661     {
1662       if (ndbd_send_node_bitmask_in_section(getNodeInfo(nodeId).m_info.m_version))
1663       {
1664         ssig.ptr[0].p = stopReq->nodes;
1665         ssig.ptr[0].sz = packed_length;
1666         ssig.header.m_noOfSections = 1;
1667         ssig.header.theLength = StopReq::SignalLength;
1668       }
1669       else
1670       {
1671         assert(packed_length <= NdbNodeBitmask48::Size);
1672       }
1673 
1674       SendStatus result = ss.sendSignal(nodeId, &ssig);
1675       if (result != SEND_OK)
1676       {
1677         DBUG_RETURN(SEND_OR_RECEIVE_FAILED);
1678       }
1679     }
1680     else
1681     {
1682       DBUG_RETURN(SEND_OR_RECEIVE_FAILED);
1683     }
1684   }
1685 
1686 
1687   // now wait for the replies
1688   Uint32 sendNodeId = ndb_nodes_to_stop.find(0);
1689   while (!stoppedNodes.contains(ndb_nodes_to_stop))
1690   {
1691     if (do_send)
1692     {
1693       assert(use_master_node);
1694       sendNodeId = guess_master_node(ss);
1695       if (okToSendTo(sendNodeId, true) != 0)
1696       {
1697         DBUG_RETURN(SEND_OR_RECEIVE_FAILED);
1698       }
1699 
1700       if (ndbd_send_node_bitmask_in_section(getNodeInfo(sendNodeId).m_info.m_version))
1701       {
1702         ssig.ptr[0].p = stopReq->nodes;
1703         ssig.ptr[0].sz = packed_length;
1704         ssig.header.m_noOfSections = 1;
1705         ssig.header.theLength = StopReq::SignalLength;
1706       }
1707       else
1708       {
1709         assert(packed_length <= NdbNodeBitmask48::Size);
1710       }
1711 
1712       if (ss.sendSignal(sendNodeId, &ssig) != SEND_OK)
1713       {
1714         DBUG_RETURN(SEND_OR_RECEIVE_FAILED);
1715       }
1716       do_send = 0;
1717     }
1718 
1719     SimpleSignal *signal = ss.waitFor();
1720     int gsn = signal->readSignalNumber();
1721     switch (gsn) {
1722     case GSN_STOP_REF:{
1723       const StopRef * const ref = CAST_CONSTPTR(StopRef, signal->getDataPtr());
1724       const NodeId nodeId = refToNode(signal->header.theSendersBlockRef);
1725       require(nodeId == sendNodeId);
1726       if (ref->errorCode == StopRef::MultiNodeShutdownNotMaster)
1727       {
1728         assert(use_master_node);
1729         m_master_node= ref->masterNodeId;
1730         do_send = 1;
1731         continue;
1732       }
1733       DBUG_RETURN(translateStopRef(ref->errorCode));
1734       break;
1735     }
1736     case GSN_STOP_CONF:{
1737 #ifdef NOT_USED
1738       const StopConf * const ref = CAST_CONSTPTR(StopConf, signal->getDataPtr());
1739 #endif
1740       const NodeId nodeId = refToNode(signal->header.theSendersBlockRef);
1741       require(nodeId == sendNodeId);
1742       stoppedNodes.bitOR(ndb_nodes_to_stop);
1743       break;
1744     }
1745     case GSN_NF_COMPLETEREP:{
1746       const NFCompleteRep * const rep =
1747 	CAST_CONSTPTR(NFCompleteRep, signal->getDataPtr());
1748       if (rep->failedNodeId <= stoppedNodes.max_size())
1749         stoppedNodes.set(rep->failedNodeId);
1750       break;
1751     }
1752     case GSN_NODE_FAILREP:{
1753       const NodeFailRep * const rep =
1754 	CAST_CONSTPTR(NodeFailRep, signal->getDataPtr());
1755       Uint32 len = NodeFailRep::getNodeMaskLength(signal->getLength());
1756       require(len == NodeBitmask::Size || // only full length in ndbapi
1757               len == 0); // bitmask sent in signal section
1758       NodeBitmask mask;
1759       if (len == 0)
1760       {
1761         mask.assign(signal->ptr[0].sz, signal->ptr[0].p);
1762       }
1763       else
1764       {
1765         mask.assign(len, rep->theAllNodes);
1766       }
1767       stoppedNodes.bitOR(mask);
1768       break;
1769     }
1770     case GSN_API_REGCONF:
1771     case GSN_TAKE_OVERTCCONF:
1772     case GSN_CONNECT_REP:
1773       continue;
1774     default:
1775       report_unknown_signal(signal);
1776       DBUG_RETURN(SEND_OR_RECEIVE_FAILED);
1777     }
1778   }
1779   if (error && *stopSelf)
1780   {
1781     *stopSelf= 0;
1782   }
1783   DBUG_RETURN(error);
1784 }
1785 
1786 /*
1787  * Stop one nodes
1788  */
1789 
stopNodes(const Vector<NodeId> & node_ids,int * stopCount,bool abort,bool force,int * stopSelf)1790 int MgmtSrvr::stopNodes(const Vector<NodeId> &node_ids,
1791                         int *stopCount, bool abort, bool force,
1792                         int* stopSelf)
1793 {
1794   if (force || abort)
1795     ; // Skip node state checks
1796   else if (is_any_node_starting())
1797   {
1798     /* Refuse to stop since some node(s) are starting */
1799     return OPERATION_NOT_ALLOWED_START_STOP;
1800   }
1801 
1802   NodeBitmask nodes;
1803   int ret = 0;
1804   if (node_ids.size() > 0)
1805   {
1806     ret = sendSTOP_REQ(node_ids, nodes,
1807                        abort, false, false, false, false,
1808                        stopSelf);
1809   }
1810   else
1811   {
1812     ret = sendall_STOP_REQ(nodes,
1813                            abort, false, false, false, false);
1814   }
1815 
1816   if (stopCount)
1817     *stopCount= nodes.count();
1818   return ret;
1819 }
1820 
shutdownMGM(int * stopCount,bool abort,int * stopSelf)1821 int MgmtSrvr::shutdownMGM(int *stopCount, bool abort, int *stopSelf)
1822 {
1823   NodeId nodeId = 0;
1824   int error;
1825 
1826   while(getNextNodeId(&nodeId, NDB_MGM_NODE_TYPE_MGM))
1827   {
1828     if(nodeId==getOwnNodeId())
1829       continue;
1830     error= sendStopMgmd(nodeId, abort, true, false,
1831                         false, false);
1832     if (error == 0)
1833       (*stopCount)++;
1834   }
1835 
1836   *stopSelf= 1;
1837   (*stopCount)++;
1838 
1839   return 0;
1840 }
1841 
1842 /*
1843  * Perform DB nodes shutdown.
1844  * MGM servers are left in their current state
1845  */
1846 
shutdownDB(int * stopCount,bool abort)1847 int MgmtSrvr::shutdownDB(int * stopCount, bool abort)
1848 {
1849   NodeBitmask nodes;
1850 
1851   int ret = sendall_STOP_REQ(nodes,
1852                              abort,
1853                              true,
1854                              false,
1855                              false,
1856                              false);
1857 
1858   if (stopCount)
1859     *stopCount = nodes.count();
1860   return ret;
1861 }
1862 
1863 /*
1864  * Enter single user mode on all live nodes
1865  */
1866 
enterSingleUser(int * stopCount,Uint32 apiNodeId)1867 int MgmtSrvr::enterSingleUser(int * stopCount, Uint32 apiNodeId)
1868 {
1869   if (getNodeType(apiNodeId) != NDB_MGM_NODE_TYPE_API)
1870     return NODE_NOT_API_NODE;
1871 
1872   // Init
1873   if (stopCount)
1874   {
1875     * stopCount = 0;
1876   }
1877 
1878   SignalSender ss(theFacade);
1879   ss.lock(); // lock will be released on exit
1880 
1881   SimpleSignal ssig;
1882   StopReq* const stopReq = CAST_PTR(StopReq, ssig.getDataPtrSend());
1883   ssig.set(ss, TestOrd::TraceAPI, NDBCNTR, GSN_STOP_REQ, StopReq::SignalLength);
1884 
1885   stopReq->requestInfo = 0;
1886   stopReq->apiTimeout = 5000;
1887   stopReq->transactionTimeout = 1000;
1888   stopReq->readOperationTimeout = 1000;
1889   stopReq->operationTimeout = 1000;
1890   stopReq->senderData = 12;
1891   stopReq->senderRef = ss.getOwnRef();
1892   stopReq->singleuser = 1;
1893   stopReq->singleUserApi = apiNodeId;
1894   StopReq::setSystemStop(stopReq->requestInfo, false);
1895   StopReq::setPerformRestart(stopReq->requestInfo, false);
1896   StopReq::setStopAbort(stopReq->requestInfo, false);
1897 
1898   NodeBitmask nodes;
1899   {
1900     NodeId nodeId = 0;
1901     Uint32 failed = 0;
1902     while (getNextNodeId(&nodeId, NDB_MGM_NODE_TYPE_NDB))
1903     {
1904       if (okToSendTo(nodeId, true) == 0)
1905       {
1906 	SendStatus result = ss.sendSignal(nodeId, &ssig);
1907 	if (result == SEND_OK)
1908 	  nodes.set(nodeId);
1909         else
1910           failed++;
1911       }
1912       else
1913       {
1914         failed++;
1915       }
1916     }
1917     if (nodes.isclear())
1918     {
1919       if (failed)
1920       {
1921         return SEND_OR_RECEIVE_FAILED;
1922       }
1923       return NO_CONTACT_WITH_DB_NODES;
1924     }
1925   }
1926 
1927   int error = 0;
1928   int ok = 0;
1929   while (!nodes.isclear())
1930   {
1931     SimpleSignal *signal = ss.waitFor();
1932     int gsn = signal->readSignalNumber();
1933     switch (gsn) {
1934     case GSN_STOP_REF:
1935     {
1936       const StopRef * const ref = CAST_CONSTPTR(StopRef, signal->getDataPtr());
1937       nodes.clear(refToNode(signal->header.theSendersBlockRef));
1938       error = translateStopRef(ref->errorCode);
1939       break;
1940     }
1941     case GSN_STOP_CONF:
1942     {
1943       ok++;
1944       nodes.clear(refToNode(signal->header.theSendersBlockRef));
1945       break;
1946     }
1947     case GSN_NF_COMPLETEREP:
1948     {
1949       const NFCompleteRep * rep = CAST_CONSTPTR(NFCompleteRep,
1950                                                 signal->getDataPtr());
1951       if (rep->failedNodeId <= nodes.max_size())
1952         nodes.clear(rep->failedNodeId);
1953       break;
1954     }
1955 
1956     case GSN_NODE_FAILREP:
1957     {
1958       const NodeFailRep * rep = CAST_CONSTPTR(NodeFailRep,
1959                                               signal->getDataPtr());
1960       Uint32 len = NodeFailRep::getNodeMaskLength(signal->getLength());
1961       assert(len == NodeBitmask::Size || // only full length in ndbapi
1962              len == 0);
1963       NodeBitmask mask;
1964 
1965       if (signal->header.m_noOfSections >= 1)
1966       {
1967         mask.assign(signal->ptr[0].sz, signal->ptr[0].p);
1968       }
1969       else
1970       {
1971         mask.assign(len, rep->theAllNodes);
1972       }
1973       nodes.bitANDC(mask);
1974       break;
1975     }
1976     case GSN_API_REGCONF:
1977     case GSN_TAKE_OVERTCCONF:
1978     case GSN_CONNECT_REP:
1979       continue;
1980 
1981     default:
1982       report_unknown_signal(signal);
1983       return SEND_OR_RECEIVE_FAILED;
1984     }
1985   }
1986 
1987   if (stopCount)
1988   {
1989     * stopCount = ok;
1990   }
1991 
1992   return error;
1993 }
1994 
1995 /*
1996  * Perform node restart
1997  */
1998 
is_any_node_stopping()1999 bool MgmtSrvr::is_any_node_stopping()
2000 {
2001   NodeId nodeId = 0;
2002   trp_node node;
2003   while(getNextNodeId(&nodeId, NDB_MGM_NODE_TYPE_NDB))
2004   {
2005     node = getNodeInfo(nodeId);
2006     if((node.m_state.startLevel == NodeState::SL_STOPPING_1) ||
2007        (node.m_state.startLevel == NodeState::SL_STOPPING_2) ||
2008        (node.m_state.startLevel == NodeState::SL_STOPPING_3) ||
2009        (node.m_state.startLevel == NodeState::SL_STOPPING_4))
2010       return true; // At least one node was stopping
2011   }
2012   return false; // No node was stopping
2013 }
2014 
is_any_node_starting()2015 bool MgmtSrvr::is_any_node_starting()
2016 {
2017   NodeId nodeId = 0;
2018   trp_node node;
2019   while(getNextNodeId(&nodeId, NDB_MGM_NODE_TYPE_NDB))
2020   {
2021     node = getNodeInfo(nodeId);
2022     if (node.m_state.startLevel == NodeState::SL_STARTING)
2023       return true; // At least one node was starting
2024   }
2025   return false; // No node was starting
2026 }
2027 
is_any_node_alive()2028 bool MgmtSrvr::is_any_node_alive()
2029 {
2030   NodeId nodeId = 0;
2031   while (getNextNodeId(&nodeId, NDB_MGM_NODE_TYPE_NDB))
2032   {
2033     if (getNodeInfo(nodeId).m_alive == true)
2034       return true; // At least one node in alive state
2035   }
2036   return false; // No node in alive state
2037 }
2038 
is_any_node_in_started_state()2039 bool MgmtSrvr::is_any_node_in_started_state()
2040 {
2041   NodeId nodeId = 0;
2042   trp_node node;
2043   while(getNextNodeId(&nodeId, NDB_MGM_NODE_TYPE_NDB))
2044   {
2045     node = getNodeInfo(nodeId);
2046     if (node.m_state.startLevel == NodeState::SL_STARTED)
2047       return true; // At least one node is in started state
2048   }
2049   return false; // No node is in started state
2050 }
2051 
are_all_nodes_in_cmvmi_state()2052 bool MgmtSrvr::are_all_nodes_in_cmvmi_state()
2053 {
2054   NodeId nodeId = 0;
2055   trp_node node;
2056   while(getNextNodeId(&nodeId, NDB_MGM_NODE_TYPE_NDB))
2057   {
2058     node = getNodeInfo(nodeId);
2059     if (node.m_state.startLevel != NodeState::SL_CMVMI)
2060       return false; // At least one node is not in CMVMI state
2061   }
2062   return true; // All nodes are in CMVMI state
2063 }
2064 
isTimeUp(const NDB_TICKS startTime,const Uint64 delay,const Uint64 sleepInterval)2065 bool MgmtSrvr::isTimeUp(const NDB_TICKS startTime,
2066                         const Uint64 delay,
2067                         const Uint64 sleepInterval)
2068 {
2069   if(NdbTick_Elapsed(startTime, NdbTick_getCurrentTicks()).milliSec()
2070       < delay)
2071   {
2072     NdbSleep_MilliSleep(sleepInterval);
2073     return false;
2074   }
2075   return true;
2076 }
2077 
is_cluster_single_user()2078 bool MgmtSrvr::is_cluster_single_user()
2079 {
2080   NodeId nodeId = 0;
2081   trp_node node;
2082   while(getNextNodeId(&nodeId, NDB_MGM_NODE_TYPE_NDB))
2083   {
2084     node = getNodeInfo(nodeId);
2085     if (node.m_state.startLevel == NodeState::SL_SINGLEUSER)
2086       return true; // Cluster is in single user modes
2087   }
2088   return false; // Cluster is not in single user mode
2089 }
2090 
restartNodes(const Vector<NodeId> & node_ids,int * stopCount,bool nostart,bool initialStart,bool abort,bool force,int * stopSelf,unsigned int num_secs_to_wait_for_node)2091 int MgmtSrvr::restartNodes(const Vector<NodeId> &node_ids,
2092                            int * stopCount, bool nostart,
2093                            bool initialStart, bool abort,
2094                            bool force,
2095                            int *stopSelf,
2096                            unsigned int num_secs_to_wait_for_node)
2097 {
2098   if (is_cluster_single_user())
2099   {
2100     /*
2101       Refuse to restart since cluster is in single user mode
2102       and when the node is restarting it would not be allowed to
2103       join cluster, see BUG#31056
2104     */
2105     return OPERATION_NOT_ALLOWED_START_STOP;
2106   }
2107 
2108   if (force || abort)
2109     ; // Skip node state checks
2110   else if (is_any_node_starting())
2111   {
2112     /* Refuse to restart since some node(s) are starting */
2113     return OPERATION_NOT_ALLOWED_START_STOP;
2114   }
2115 
2116   NodeBitmask nodes;
2117   int ret = 0;
2118   if (node_ids.size() > 0)
2119   {
2120     ret = sendSTOP_REQ(node_ids, nodes,
2121                        abort, false, true, true, initialStart,
2122                        stopSelf);
2123   }
2124   else
2125   {
2126     ret = sendall_STOP_REQ(nodes,
2127                            abort, false, true, true, initialStart);
2128   }
2129 
2130   if (ret)
2131     return ret;
2132 
2133   if (stopCount)
2134     *stopCount = nodes.count();
2135 
2136   // start up the nodes again
2137 
2138   /*
2139    * The wait for all nodes to reach NOT_STARTED state is
2140    * split into 2 separate checks:
2141    * 1. Wait for ndbd to start shutting down
2142    * 2. Wait for ndbd to shutdown and reach NOT_STARTED
2143    *    state
2144    *
2145    * Wait 1: Wait for ndbd to start shutting down. A short
2146    * wait duration of 12 seconds is being used.
2147    *
2148    * During shutdown the nodes traverse the 4 stopping
2149    * levels namely, SL_STOPPING_1 through SL_STOPPING_4.
2150    *
2151    * Thus, waiting for all the nodes to enter one of these
2152    * levels would be the obvious and intuitive approach for
2153    * this wait. However, the nodes pass these levels in
2154    * exec_STOP_REQ before the flow of execution reaches
2155    * here. An alternate approach adopted here is to check if
2156    * the nodes leave the SL_STARTED state in the first place.
2157    * A failure to leave this state would indicate that for
2158    * some reason the shutdown process failed to start and
2159    * can be considered the equivalent of checking if the
2160    * nodes have transitioned to any of the stopping levels.
2161    *
2162    * The immediate question that arises is how can one be sure
2163    * that the nodes have not gone from STARTED -> STOPPED ->
2164    * STARTED. This scenario is not an issue since we are waiting
2165    * for NOT_STARTED state and only once that state is reached is
2166    * the START_ORD fired which makes the node transition from
2167    * SL_NOTHING to further states.
2168    *
2169    * To summarize, the first of the two waits will wait a short
2170    * (12s) time to check if the shutdown process has been initiated
2171    * and exit in case any of the nodes have not left the
2172    * SL_STARTED state.
2173    */
2174   Uint64 waitTime = 12000;
2175   NDB_TICKS startTime = NdbTick_getCurrentTicks();
2176   bool any_node_in_started_state;
2177   do
2178   {
2179     /*
2180      * Check if any of the data nodes are still
2181      * stuck in STARTED state
2182      */
2183     any_node_in_started_state = false;
2184     for (unsigned i = 0; i < node_ids.size(); i++)
2185     {
2186       NodeId nodeId = node_ids[i];
2187       /*
2188        * Check performed only for data nodes
2189        */
2190       if(getNodeType(nodeId) == NDB_MGM_NODE_TYPE_NDB)
2191       {
2192         trp_node node = getNodeInfo(nodeId);
2193         any_node_in_started_state |= (node.m_state.startLevel ==
2194                   NodeState::SL_STARTED);
2195       }
2196     }
2197   } while(any_node_in_started_state && !isTimeUp(startTime,waitTime,100));
2198 
2199   if(any_node_in_started_state)
2200   {
2201     return WAIT_FOR_NDBD_TO_START_SHUTDOWN_FAILED;
2202   }
2203 
2204   /*
2205    * Wait 2: Wait for ndbd to shutdown and reach NOT_STARTED state
2206    *
2207    * Having confirmed that the shutdown is on its way, the
2208    * second wait involves simply waiting for the shutdown to complete
2209    * and the nodes to enter the NOT_STARTED state.
2210    *
2211    * Once the nodes reach the NOT_STARTED state, they are ready for the
2212    * START_ORD signal. It must be noted that while NOT_STARTED state has
2213    * been mentioned throughout the comments since it is better known from
2214    * a user's perspective, since we are dealing with data nodes, it is
2215    * quicker and more efficient to check if the state is SL_CMVMI which is
2216    * the equivalent of the MGMAPI state of NOT_STARTED.
2217    *
2218    * The wait time in this case is the value of num_secs_to_wait_for_node
2219    */
2220 
2221   startTime = NdbTick_getCurrentTicks();
2222   waitTime = num_secs_to_wait_for_node * 1000;
2223   bool all_nodes_in_cmvmi_state;
2224   do
2225   {
2226     /*
2227      * Check if all the data nodes are in
2228      * SL_CMVMI state
2229      */
2230     all_nodes_in_cmvmi_state = true;
2231     for (unsigned i = 0; i < node_ids.size(); i++)
2232     {
2233       NodeId nodeId= node_ids[i];
2234       if(getNodeType(nodeId) == NDB_MGM_NODE_TYPE_NDB)
2235       {
2236         trp_node node = getNodeInfo(nodeId);
2237         all_nodes_in_cmvmi_state &= (node.m_state.startLevel ==
2238                   NodeState::SL_CMVMI);
2239       }
2240     }
2241   } while(!all_nodes_in_cmvmi_state &&
2242           !isTimeUp(startTime,waitTime,1000));
2243 
2244   if(!all_nodes_in_cmvmi_state)
2245   {
2246     return WAIT_FOR_NDBD_SHUTDOWN_FAILED;
2247   }
2248 
2249   if (nostart)
2250     return 0;
2251 
2252   /*
2253     verify that no nodes are stopping before starting as this would cause
2254     the starting node to shutdown
2255   */
2256   int retry= 600*10;
2257   for (;is_any_node_stopping();)
2258   {
2259     if (--retry)
2260       break;
2261     NdbSleep_MilliSleep(100);
2262   }
2263 
2264   /*
2265     start the nodes
2266   */
2267   for (unsigned i = 0; i < node_ids.size(); i++)
2268   {
2269     unsigned int loop_count = 0;
2270     do
2271     {
2272       int result = sendSTART_ORD(node_ids[i]);
2273       if (result == SEND_OR_RECEIVE_FAILED ||
2274           result == NO_CONTACT_WITH_PROCESS)
2275       {
2276         if (loop_count >= num_secs_to_wait_for_node)
2277           break;
2278         loop_count++;
2279         NdbSleep_MilliSleep(1000);
2280       }
2281       else
2282       {
2283         break;
2284       }
2285     } while (1);
2286   }
2287   return 0;
2288 }
2289 
2290 /*
2291  * Perform restart of all DB nodes
2292  */
2293 
restartDB(bool nostart,bool initialStart,bool abort,int * stopCount,unsigned int num_secs_to_wait_for_node)2294 int MgmtSrvr::restartDB(bool nostart, bool initialStart,
2295                         bool abort, int * stopCount,
2296                         unsigned int num_secs_to_wait_for_node)
2297 {
2298   NodeBitmask nodes;
2299 
2300   /*
2301   * Restart cannot be performed without any data nodes being started.
2302   */
2303   if (!is_any_node_alive())
2304   {
2305     return 0;
2306   }
2307 
2308   int ret = sendall_STOP_REQ(nodes,
2309                              abort,
2310                              true,
2311                              true,
2312                              true,
2313                              initialStart);
2314 
2315   if (ret)
2316     return ret;
2317 
2318   if (stopCount)
2319     *stopCount = nodes.count();
2320 
2321 #ifdef VM_TRACE
2322     ndbout_c("Stopped %d nodes", nodes.count());
2323 #endif
2324 
2325 
2326   /*
2327    * The wait for all nodes to reach NOT_STARTED state is
2328    * split into 2 separate checks:
2329    * 1. Wait for ndbd to start shutting down
2330    * 2. Wait for ndbd to shutdown and reach NOT_STARTED
2331    *    state
2332    *
2333    * Wait 1: Wait for ndbd to start shutting down. A short
2334    * wait duration of 12 seconds is being used.
2335    *
2336    * During shutdown the nodes traverse the 4 stopping
2337    * levels namely, SL_STOPPING_1 through SL_STOPPING_4.
2338    *
2339    * Thus, waiting for all the nodes to enter one of these
2340    * levels would be the obvious and intuitive approach for
2341    * this wait. However, the nodes pass these levels in
2342    * exec_STOP_REQ before the flow of execution reaches
2343    * here. An alternate approach adopted here is to check if
2344    * the nodes leave the SL_STARTED state in the first place.
2345    * A failure to leave this state would indicate that for
2346    * some reason the shutdown process failed to start and
2347    * can be considered the equivalent of checking if the
2348    * nodes have transitioned to any of the stopping levels.
2349    *
2350    * The immediate question that arises is how can one be sure
2351    * that the nodes have not gone from STARTED -> STOPPED ->
2352    * STARTED. This scenario is not an issue since we are waiting
2353    * for NOT_STARTED state and only once that state is reached is
2354    * the START_ORD fired which makes the node transition from
2355    * SL_NOTHING to further states.
2356    *
2357    * To summarize, the first of the two waits will wait a short
2358    * (12s) time to check if the shutdown process has been initiated
2359    * and exit in case any of the nodes have not left the
2360    * SL_STARTED state.
2361    */
2362   Uint64 waitTime = 12000;
2363   NDB_TICKS startTime = NdbTick_getCurrentTicks();
2364 
2365   /*
2366    * Check if any of the data nodes are still
2367    * stuck in STARTED state
2368    */
2369   while(is_any_node_in_started_state() &&
2370       !isTimeUp(startTime,waitTime,100));
2371 
2372   if(is_any_node_in_started_state())
2373   {
2374     return WAIT_FOR_NDBD_TO_START_SHUTDOWN_FAILED;
2375   }
2376 
2377   /*
2378    * Wait 2: Wait for ndbd to shutdown and reach NOT_STARTED state
2379    *
2380    * Having confirmed that the shutdown is on its way, the
2381    * second wait involves simply waiting for the shutdown to complete
2382    * and the nodes to enter the NOT_STARTED state.
2383    *
2384    * Once the nodes reach the NOT_STARTED state, they are ready for the
2385    * START_ORD signal. It must be noted that while NOT_STARTED state has
2386    * been mentioned throughout the comments since it is better known from
2387    * a user's perspective, since we are dealing with data nodes, it is
2388    * quicker and more efficient to check if the state is SL_CMVMI which is
2389    * the equivalent of the MGMAPI state of NOT_STARTED.
2390    *
2391    * The wait time in this case is the value of num_secs_to_wait_for_node
2392    */
2393   startTime = NdbTick_getCurrentTicks();
2394   waitTime = num_secs_to_wait_for_node * 1000;
2395   while(!are_all_nodes_in_cmvmi_state() &&
2396           !isTimeUp(startTime,waitTime,1000));
2397 
2398   if(!are_all_nodes_in_cmvmi_state())
2399   {
2400     return WAIT_FOR_NDBD_SHUTDOWN_FAILED;
2401   }
2402 
2403   if(nostart)
2404     return 0;
2405 
2406   /**
2407    * Now we start all database nodes (i.e. we make them non-idle)
2408    * We ignore the result we get from the start command.
2409    */
2410   NodeId nodeId = 0;
2411   while(getNextNodeId(&nodeId, NDB_MGM_NODE_TYPE_NDB)) {
2412     if (!nodes.get(nodeId))
2413       continue;
2414     int result;
2415     unsigned int loop_count = 0;
2416     do
2417     {
2418       result = sendSTART_ORD(nodeId);
2419       if (result != SEND_OR_RECEIVE_FAILED &&
2420           result != NO_CONTACT_WITH_PROCESS)
2421         break;
2422       if (loop_count >= num_secs_to_wait_for_node)
2423         break;
2424       NdbSleep_MilliSleep(1000);
2425       loop_count++;
2426     } while (1);
2427     g_eventLogger->debug("Started node %d with result %d", nodeId, result);
2428     /**
2429      * Errors from this call are deliberately ignored.
2430      * Maybe the user only wanted to restart a subset of the nodes.
2431      * It is also easy for the user to check which nodes have
2432      * started and which nodes have not.
2433      */
2434   }
2435 
2436   return 0;
2437 }
2438 
2439 int
exitSingleUser(int * stopCount,bool abort)2440 MgmtSrvr::exitSingleUser(int * stopCount, bool abort)
2441 {
2442   NodeId nodeId = 0;
2443   int count = 0;
2444 
2445   SignalSender ss(theFacade);
2446   ss.lock(); // lock will be released on exit
2447 
2448   SimpleSignal ssig;
2449   ResumeReq* const resumeReq =
2450     CAST_PTR(ResumeReq, ssig.getDataPtrSend());
2451 
2452   ssig.set(ss,TestOrd::TraceAPI, NDBCNTR, GSN_RESUME_REQ,
2453 	   ResumeReq::SignalLength);
2454   resumeReq->senderData = 12;
2455   resumeReq->senderRef = ss.getOwnRef();
2456 
2457   while(getNextNodeId(&nodeId, NDB_MGM_NODE_TYPE_NDB)){
2458     if(okToSendTo(nodeId, true) == 0){
2459       SendStatus result = ss.sendSignal(nodeId, &ssig);
2460       if (result == SEND_OK)
2461 	count++;
2462     }
2463   }
2464 
2465   if(stopCount != 0)
2466     * stopCount = count;
2467 
2468   return 0;
2469 }
2470 
2471 /*****************************************************************************
2472  * Status
2473  ****************************************************************************/
2474 
2475 void
status_mgmd(NodeId node_id,ndb_mgm_node_status & node_status,Uint32 & version,Uint32 & mysql_version,const char ** address,char * addr_buf,size_t addr_buf_size,bool & is_single_user)2476 MgmtSrvr::status_mgmd(NodeId node_id,
2477                       ndb_mgm_node_status& node_status,
2478                       Uint32& version, Uint32& mysql_version,
2479                       const char **address,
2480                       char *addr_buf,
2481                       size_t addr_buf_size,
2482                       bool& is_single_user)
2483 {
2484   assert(getNodeType(node_id) == NDB_MGM_NODE_TYPE_MGM);
2485 
2486   if (node_id == getOwnNodeId())
2487   {
2488     /*
2489       Special case to get version of own node
2490       - version and mysql_version is hardcoded
2491       - address should be the address seen from ndbd(if it's connected)
2492         else use HostName from config
2493     */
2494     Uint32 tmp_version = 0, tmp_mysql_version = 0;
2495     sendVersionReq(node_id,
2496                    tmp_version,
2497                    tmp_mysql_version,
2498                    address,
2499                    addr_buf,
2500                    addr_buf_size,
2501                    is_single_user);
2502     // Check that the version returned is equal to compiled in version
2503     assert(tmp_version == 0 ||
2504            (tmp_version == NDB_VERSION &&
2505             tmp_mysql_version == NDB_MYSQL_VERSION_D));
2506 
2507     version = NDB_VERSION;
2508     mysql_version = NDB_MYSQL_VERSION_D;
2509     if(!*address)
2510     {
2511       // No address returned from ndbd -> get HostName from config
2512       Guard g(m_local_config_mutex);
2513       ConfigIter iter(m_local_config, CFG_SECTION_NODE);
2514       require(iter.find(CFG_NODE_ID, node_id) == 0);
2515       require(iter.get(CFG_NODE_HOST, address) == 0);
2516 
2517       /*
2518         Try to convert HostName to numerical ip address
2519         (to get same output as if ndbd had replied)
2520       */
2521       struct in_addr addr;
2522       if (Ndb_getInAddr(&addr, *address) == 0)
2523       {
2524         *address = Ndb_inet_ntop(AF_INET,
2525                                  static_cast<void*>(&addr),
2526                                  addr_buf,
2527                                  addr_buf_size);
2528       }
2529     }
2530 
2531     node_status = NDB_MGM_NODE_STATUS_CONNECTED;
2532     return;
2533   }
2534 
2535   /*
2536     MGM nodes are connected directly to all other MGM
2537     node(s), return status as seen by ClusterMgr
2538   */
2539   const trp_node node = getNodeInfo(node_id);
2540   if(node.is_connected())
2541   {
2542     version = node.m_info.m_version;
2543     mysql_version = node.m_info.m_mysql_version;
2544     node_status = NDB_MGM_NODE_STATUS_CONNECTED;
2545     *address= get_connect_address(node_id,
2546                                   addr_buf,
2547                                   addr_buf_size);
2548   }
2549   else
2550   {
2551     version = 0;
2552     mysql_version = 0;
2553     node_status = NDB_MGM_NODE_STATUS_NO_CONTACT;
2554   }
2555 
2556   return;
2557 }
2558 
2559 int
status(int nodeId,ndb_mgm_node_status * _status,Uint32 * version,Uint32 * mysql_version,Uint32 * _phase,bool * _system,Uint32 * dynamic,Uint32 * nodegroup,Uint32 * connectCount,const char ** address,char * addr_buf,size_t addr_buf_size,bool * is_single_user)2560 MgmtSrvr::status(int nodeId,
2561                  ndb_mgm_node_status * _status,
2562 		 Uint32 * version,
2563 		 Uint32 * mysql_version,
2564 		 Uint32 * _phase,
2565 		 bool * _system,
2566 		 Uint32 * dynamic,
2567 		 Uint32 * nodegroup,
2568 		 Uint32 * connectCount,
2569 		 const char **address,
2570                  char *addr_buf,
2571                  size_t addr_buf_size,
2572                  bool* is_single_user)
2573 {
2574   switch(getNodeType(nodeId)){
2575   case NDB_MGM_NODE_TYPE_API:
2576     status_api(nodeId,
2577                *_status,
2578                *version,
2579                *mysql_version,
2580                address,
2581                addr_buf,
2582                addr_buf_size,
2583                *is_single_user);
2584     return 0;
2585     break;
2586 
2587   case NDB_MGM_NODE_TYPE_MGM:
2588     status_mgmd(nodeId,
2589                 *_status,
2590                 *version,
2591                 *mysql_version,
2592                 address,
2593                 addr_buf,
2594                 addr_buf_size,
2595                 *is_single_user);
2596     return 0;
2597     break;
2598 
2599   case NDB_MGM_NODE_TYPE_NDB:
2600     break;
2601 
2602   default:
2603     abort();
2604     break;
2605   }
2606 
2607   const trp_node node = getNodeInfo(nodeId);
2608   assert(getNodeType(nodeId) == NDB_MGM_NODE_TYPE_NDB &&
2609          node.m_info.getType() == NodeInfo::DB);
2610 
2611   if(!node.is_connected()){
2612     * _status = NDB_MGM_NODE_STATUS_NO_CONTACT;
2613     return 0;
2614   }
2615 
2616   * version = node.m_info.m_version;
2617   * mysql_version = node.m_info.m_mysql_version;
2618 
2619   *address= get_connect_address(nodeId, addr_buf, addr_buf_size);
2620 
2621   * dynamic = node.m_state.dynamicId;
2622   * nodegroup = node.m_state.nodeGroup;
2623   * connectCount = node.m_info.m_connectCount;
2624 
2625   switch(node.m_state.startLevel){
2626   case NodeState::SL_CMVMI:
2627     * _status = NDB_MGM_NODE_STATUS_NOT_STARTED;
2628     * _phase = 0;
2629     return 0;
2630     break;
2631   case NodeState::SL_STARTING:
2632     * _status     = NDB_MGM_NODE_STATUS_STARTING;
2633     * _phase = node.m_state.starting.startPhase;
2634     return 0;
2635     break;
2636   case NodeState::SL_STARTED:
2637     * _status = NDB_MGM_NODE_STATUS_STARTED;
2638     * _phase = 0;
2639     return 0;
2640     break;
2641   case NodeState::SL_STOPPING_1:
2642     * _status = NDB_MGM_NODE_STATUS_SHUTTING_DOWN;
2643     * _phase = 1;
2644     * _system = node.m_state.stopping.systemShutdown != 0;
2645     return 0;
2646     break;
2647   case NodeState::SL_STOPPING_2:
2648     * _status = NDB_MGM_NODE_STATUS_SHUTTING_DOWN;
2649     * _phase = 2;
2650     * _system = node.m_state.stopping.systemShutdown != 0;
2651     return 0;
2652     break;
2653   case NodeState::SL_STOPPING_3:
2654     * _status = NDB_MGM_NODE_STATUS_SHUTTING_DOWN;
2655     * _phase = 3;
2656     * _system = node.m_state.stopping.systemShutdown != 0;
2657     return 0;
2658     break;
2659   case NodeState::SL_STOPPING_4:
2660     * _status = NDB_MGM_NODE_STATUS_SHUTTING_DOWN;
2661     * _phase = 4;
2662     * _system = node.m_state.stopping.systemShutdown != 0;
2663     return 0;
2664     break;
2665   case NodeState::SL_SINGLEUSER:
2666     * _status = NDB_MGM_NODE_STATUS_SINGLEUSER;
2667     * _phase  = 0;
2668     return 0;
2669     break;
2670   default:
2671     * _status = NDB_MGM_NODE_STATUS_UNKNOWN;
2672     * _phase = 0;
2673     return 0;
2674   }
2675 
2676   return -1;
2677 }
2678 
2679 int
setEventReportingLevelImpl(int nodeId_arg,const EventSubscribeReq & ll)2680 MgmtSrvr::setEventReportingLevelImpl(int nodeId_arg,
2681 				     const EventSubscribeReq& ll)
2682 {
2683   SignalSender ss(theFacade);
2684   NdbNodeBitmask nodes;
2685   nodes.clear();
2686   while (1)
2687   {
2688     Uint32 nodeId, max;
2689     ss.lock();
2690     SimpleSignal ssig;
2691     EventSubscribeReq * dst =
2692       CAST_PTR(EventSubscribeReq, ssig.getDataPtrSend());
2693     ssig.set(ss,TestOrd::TraceAPI, CMVMI, GSN_EVENT_SUBSCRIBE_REQ,
2694              EventSubscribeReq::SignalLength);
2695     *dst = ll;
2696 
2697     if (nodeId_arg == 0)
2698     {
2699       // all nodes
2700       nodeId = 1;
2701       max = MAX_NDB_NODES;
2702     }
2703     else
2704     {
2705       // only one node
2706       max = nodeId = nodeId_arg;
2707     }
2708     // first make sure nodes are sendable
2709     for(; nodeId <= max; nodeId++)
2710     {
2711       if (nodeTypes[nodeId] != NODE_TYPE_DB)
2712         continue;
2713       if (okToSendTo(nodeId, true))
2714       {
2715         if (getNodeInfo(nodeId).is_connected()  == false)
2716         {
2717           // node not connected we can safely skip this one
2718           continue;
2719         }
2720         // api_reg_conf not recevied yet, need to retry
2721         return SEND_OR_RECEIVE_FAILED;
2722       }
2723     }
2724 
2725     if (nodeId_arg == 0)
2726     {
2727       // all nodes
2728       nodeId = 1;
2729       max = MAX_NDB_NODES;
2730     }
2731     else
2732     {
2733       // only one node
2734       max = nodeId = nodeId_arg;
2735     }
2736     // now send to all sendable nodes nodes
2737     // note, lock is held, so states have not changed
2738     for(; (Uint32) nodeId <= max; nodeId++)
2739     {
2740       if (nodeTypes[nodeId] != NODE_TYPE_DB)
2741         continue;
2742       if (getNodeInfo(nodeId).is_connected()  == false)
2743         continue; // node is not connected, skip
2744       if (ss.sendSignal(nodeId, &ssig) == SEND_OK)
2745         nodes.set(nodeId);
2746       else if (max == nodeId)
2747       {
2748         return SEND_OR_RECEIVE_FAILED;
2749       }
2750     }
2751     break;
2752   }
2753 
2754   if (nodes.isclear())
2755   {
2756     return SEND_OR_RECEIVE_FAILED;
2757   }
2758 
2759   int error = 0;
2760   while (!nodes.isclear())
2761   {
2762     Uint32 nodeId;
2763     SimpleSignal *signal = ss.waitFor();
2764     int gsn = signal->readSignalNumber();
2765     nodeId = refToNode(signal->header.theSendersBlockRef);
2766     switch (gsn) {
2767     case GSN_EVENT_SUBSCRIBE_CONF:{
2768       nodes.clear(nodeId);
2769       break;
2770     }
2771     case GSN_EVENT_SUBSCRIBE_REF:{
2772       nodes.clear(nodeId);
2773       error = 1;
2774       break;
2775     }
2776       // Since sending okToSend(true),
2777       // there is no guarantee that NF_COMPLETEREP will come
2778       // i.e listen also to NODE_FAILREP
2779     case GSN_NODE_FAILREP: {
2780       const NodeFailRep * const rep =
2781 	CAST_CONSTPTR(NodeFailRep, signal->getDataPtr());
2782       Uint32 len = NodeFailRep::getNodeMaskLength(signal->getLength());
2783       require(len == NodeBitmask::Size || // only full length in ndbapi
2784               len == 0);
2785       NdbNodeBitmask mask;
2786       // only care about data nodes
2787       if (signal->header.m_noOfSections >= 1)
2788       {
2789         mask.assign(signal->ptr[0].sz, signal->ptr[0].p);
2790       }
2791       else
2792       {
2793         mask.assign(NdbNodeBitmask::Size, rep->theNodes);
2794       }
2795       nodes.bitANDC(mask);
2796       break;
2797     }
2798 
2799     case GSN_NF_COMPLETEREP:{
2800       const NFCompleteRep * const rep =
2801 	CAST_CONSTPTR(NFCompleteRep, signal->getDataPtr());
2802       if (rep->failedNodeId <= nodes.max_size())
2803         nodes.clear(rep->failedNodeId);
2804       break;
2805     }
2806     case GSN_API_REGCONF:
2807     case GSN_TAKE_OVERTCCONF:
2808     case GSN_CONNECT_REP:
2809       continue;
2810     default:
2811       report_unknown_signal(signal);
2812       return SEND_OR_RECEIVE_FAILED;
2813     }
2814   }
2815   if (error)
2816     return SEND_OR_RECEIVE_FAILED;
2817   return 0;
2818 }
2819 
2820 //****************************************************************************
2821 //****************************************************************************
2822 int
setNodeLogLevelImpl(int nodeId,const SetLogLevelOrd & ll)2823 MgmtSrvr::setNodeLogLevelImpl(int nodeId, const SetLogLevelOrd & ll)
2824 {
2825   INIT_SIGNAL_SENDER(ss,nodeId);
2826 
2827   SimpleSignal ssig;
2828   ssig.set(ss,TestOrd::TraceAPI, CMVMI, GSN_SET_LOGLEVELORD,
2829 	   SetLogLevelOrd::SignalLength);
2830   SetLogLevelOrd* const dst = CAST_PTR(SetLogLevelOrd, ssig.getDataPtrSend());
2831   *dst = ll;
2832 
2833   return ss.sendSignal(nodeId, &ssig) == SEND_OK ? 0 : SEND_OR_RECEIVE_FAILED;
2834 }
2835 
2836 //****************************************************************************
2837 //****************************************************************************
2838 
2839 int
insertError(int nodeId,int errorNo,Uint32 * extra)2840 MgmtSrvr::insertError(int nodeId, int errorNo, Uint32 * extra)
2841 {
2842   BlockNumber block;
2843 
2844   if (errorNo < 0) {
2845     return INVALID_ERROR_NUMBER;
2846   }
2847 
2848   SignalSender ss(theFacade);
2849   ss.lock(); /* lock will be released on exit */
2850 
2851   if(getNodeType(nodeId) == NDB_MGM_NODE_TYPE_NDB)
2852   {
2853     block= CMVMI;
2854   }
2855   else if(nodeId == _ownNodeId)
2856   {
2857     g_errorInsert= errorNo;
2858     return 0;
2859   }
2860   else if(getNodeType(nodeId) == NDB_MGM_NODE_TYPE_MGM)
2861     block= _blockNumber;
2862   else
2863     return WRONG_PROCESS_TYPE;
2864 
2865   SimpleSignal ssig;
2866   ssig.set(ss,TestOrd::TraceAPI, block, GSN_TAMPER_ORD,
2867 	   TamperOrd::SignalLength);
2868   TamperOrd* const tamperOrd = CAST_PTR(TamperOrd, ssig.getDataPtrSend());
2869   tamperOrd->errorNo = errorNo;
2870 
2871   if (extra)
2872   {
2873     ssig.getDataPtrSend()[1] = * extra;
2874     ssig.header.theLength++;
2875   }
2876 
2877   int res = ss.sendSignal(nodeId, &ssig) == SEND_OK ? 0 :SEND_OR_RECEIVE_FAILED;
2878 
2879   if (res == 0)
2880   {
2881     /**
2882      * In order to make NDB_TAMPER (almost) syncronous,
2883      *   make a syncronous request *after* the NDB_TAMPER
2884      */
2885     make_sync_req(ss, Uint32(nodeId));
2886   }
2887 
2888   return res;
2889 }
2890 
2891 
2892 int
startSchemaTrans(SignalSender & ss,NodeId & out_nodeId,Uint32 transId,Uint32 & out_transKey)2893 MgmtSrvr::startSchemaTrans(SignalSender& ss, NodeId & out_nodeId,
2894                            Uint32 transId, Uint32 & out_transKey)
2895 {
2896   SimpleSignal ssig;
2897 
2898   ssig.set(ss, 0, DBDICT, GSN_SCHEMA_TRANS_BEGIN_REQ,
2899            SchemaTransBeginReq::SignalLength);
2900 
2901   SchemaTransBeginReq* req =
2902     CAST_PTR(SchemaTransBeginReq, ssig.getDataPtrSend());
2903 
2904   req->clientRef =  ss.getOwnRef();
2905   req->transId = transId;
2906   req->requestInfo = 0;
2907 
2908   NodeId nodeId = ss.get_an_alive_node();
2909 
2910 retry:
2911   if (ss.get_node_alive(nodeId) == false)
2912   {
2913     nodeId = ss.get_an_alive_node();
2914   }
2915 
2916   if (ss.sendSignal(nodeId, &ssig) != SEND_OK)
2917   {
2918     return SEND_OR_RECEIVE_FAILED;
2919   }
2920 
2921   while (true)
2922   {
2923     SimpleSignal *signal = ss.waitFor();
2924     int gsn = signal->readSignalNumber();
2925     switch (gsn) {
2926     case GSN_SCHEMA_TRANS_BEGIN_CONF: {
2927       const SchemaTransBeginConf * conf =
2928         CAST_CONSTPTR(SchemaTransBeginConf, signal->getDataPtr());
2929       out_transKey = conf->transKey;
2930       out_nodeId = nodeId;
2931       return 0;
2932     }
2933     case GSN_SCHEMA_TRANS_BEGIN_REF: {
2934       const SchemaTransBeginRef * ref =
2935         CAST_CONSTPTR(SchemaTransBeginRef, signal->getDataPtr());
2936 
2937       switch(ref->errorCode){
2938       case SchemaTransBeginRef::NotMaster:
2939         nodeId = ref->masterNodeId;
2940         // Fall-through
2941       case SchemaTransBeginRef::Busy:
2942       case SchemaTransBeginRef::BusyWithNR:
2943         goto retry;
2944       default:
2945         return ref->errorCode;
2946       }
2947     }
2948     case GSN_NF_COMPLETEREP:
2949       // ignore
2950       break;
2951     case GSN_NODE_FAILREP:{
2952       const NodeFailRep * const rep =
2953         CAST_CONSTPTR(NodeFailRep, signal->getDataPtr());
2954       Uint32 len = NodeFailRep::getNodeMaskLength(signal->getLength());
2955       assert(len == NodeBitmask::Size || // only full length in ndbapi
2956              len == 0);
2957       if (signal->header.m_noOfSections >= 1)
2958       {
2959         if (BitmaskImpl::safe_get(NodeBitmask::getPackedLengthInWords(signal->ptr[0].p),
2960                                   signal->ptr[0].p, nodeId))
2961         {
2962           nodeId++;
2963           goto retry;
2964         }
2965       }
2966       else if (BitmaskImpl::safe_get(len, rep->theAllNodes, nodeId))
2967       {
2968         nodeId++;
2969         goto retry;
2970       }
2971       break;
2972     }
2973     case GSN_API_REGCONF:
2974     case GSN_TAKE_OVERTCCONF:
2975     case GSN_CONNECT_REP:
2976       break;
2977     default:
2978       report_unknown_signal(signal);
2979       return SEND_OR_RECEIVE_FAILED;
2980     }
2981   }
2982 }
2983 
2984 int
endSchemaTrans(SignalSender & ss,NodeId nodeId,Uint32 transId,Uint32 transKey,Uint32 flags)2985 MgmtSrvr::endSchemaTrans(SignalSender& ss, NodeId nodeId,
2986                          Uint32 transId, Uint32 transKey,
2987                          Uint32 flags)
2988 {
2989   SimpleSignal ssig;
2990 
2991   ssig.set(ss, 0, DBDICT, GSN_SCHEMA_TRANS_END_REQ,
2992            SchemaTransEndReq::SignalLength);
2993 
2994   SchemaTransEndReq* req =
2995     CAST_PTR(SchemaTransEndReq, ssig.getDataPtrSend());
2996 
2997   req->clientRef =  ss.getOwnRef();
2998   req->transId = transId;
2999   req->requestInfo = 0;
3000   req->transKey = transKey;
3001   req->flags = flags;
3002 
3003   if (ss.sendSignal(nodeId, &ssig) != SEND_OK)
3004   {
3005     return SEND_OR_RECEIVE_FAILED;
3006   }
3007 
3008   while (true)
3009   {
3010     SimpleSignal *signal = ss.waitFor();
3011     int gsn = signal->readSignalNumber();
3012     switch (gsn) {
3013     case GSN_SCHEMA_TRANS_END_CONF: {
3014       return 0;
3015     }
3016     case GSN_SCHEMA_TRANS_END_REF: {
3017       const SchemaTransEndRef * ref =
3018         CAST_CONSTPTR(SchemaTransEndRef, signal->getDataPtr());
3019       return ref->errorCode;
3020     }
3021     case GSN_NF_COMPLETEREP:
3022       // ignore
3023       break;
3024     case GSN_NODE_FAILREP:{
3025       const NodeFailRep * const rep =
3026         CAST_CONSTPTR(NodeFailRep, signal->getDataPtr());
3027       Uint32 len = NodeFailRep::getNodeMaskLength(signal->getLength());
3028       assert(len == NodeBitmask::Size || // only full length in ndbapi
3029              len == 0);
3030 
3031       if (signal->header.m_noOfSections >= 1)
3032       {
3033         if (BitmaskImpl::safe_get(NodeBitmask::getPackedLengthInWords(signal->ptr[0].p),
3034                                   signal->ptr[0].p, nodeId))
3035         {
3036           return -1;
3037         }
3038       }
3039       else if (BitmaskImpl::safe_get(len, rep->theAllNodes, nodeId))
3040       {
3041         return -1;
3042       }
3043       break;
3044     }
3045     case GSN_API_REGCONF:
3046     case GSN_TAKE_OVERTCCONF:
3047     case GSN_CONNECT_REP:
3048       break;
3049     default:
3050       report_unknown_signal(signal);
3051       return SEND_OR_RECEIVE_FAILED;
3052     }
3053   }
3054 }
3055 
3056 int
createNodegroup(int * nodes,int count,int * ng)3057 MgmtSrvr::createNodegroup(int *nodes, int count, int *ng)
3058 {
3059   int res;
3060   SignalSender ss(theFacade);
3061   ss.lock();
3062 
3063   Uint32 transId = rand();
3064   Uint32 transKey;
3065   NodeId nodeId;
3066 
3067   if ((res = startSchemaTrans(ss, nodeId, transId, transKey)))
3068   {
3069     return res;
3070   }
3071 
3072   SimpleSignal ssig;
3073   ssig.set(ss, 0, DBDICT, GSN_CREATE_NODEGROUP_REQ,
3074            CreateNodegroupReq::SignalLength);
3075 
3076   CreateNodegroupReq* req =
3077     CAST_PTR(CreateNodegroupReq, ssig.getDataPtrSend());
3078 
3079   req->transId = transId;
3080   req->transKey = transKey;
3081   req->nodegroupId = RNIL;
3082   req->senderData = 77;
3083   req->senderRef = ss.getOwnRef();
3084   bzero(req->nodes, sizeof(req->nodes));
3085 
3086   if (ng)
3087   {
3088     if (* ng != -1)
3089     {
3090       req->nodegroupId = * ng;
3091     }
3092   }
3093   for (int i = 0; i<count && i<(int)NDB_ARRAY_SIZE(req->nodes); i++)
3094   {
3095     req->nodes[i] = nodes[i];
3096   }
3097 
3098   if (ss.sendSignal(nodeId, &ssig) != SEND_OK)
3099   {
3100     return SEND_OR_RECEIVE_FAILED;
3101   }
3102 
3103   bool wait = true;
3104   while (wait)
3105   {
3106     SimpleSignal *signal = ss.waitFor();
3107     int gsn = signal->readSignalNumber();
3108     switch (gsn) {
3109     case GSN_CREATE_NODEGROUP_CONF: {
3110       const CreateNodegroupConf * conf =
3111         CAST_CONSTPTR(CreateNodegroupConf, signal->getDataPtr());
3112 
3113       if (ng)
3114       {
3115         * ng = conf->nodegroupId;
3116       }
3117 
3118       wait = false;
3119       break;
3120     }
3121     case GSN_CREATE_NODEGROUP_REF:{
3122       const CreateNodegroupRef * ref =
3123         CAST_CONSTPTR(CreateNodegroupRef, signal->getDataPtr());
3124       Uint32 err = ref->errorCode;
3125       endSchemaTrans(ss, nodeId, transId, transKey,
3126                      SchemaTransEndReq::SchemaTransAbort);
3127       return err;
3128     }
3129     case GSN_NF_COMPLETEREP:
3130       // ignore
3131       break;
3132     case GSN_NODE_FAILREP:{
3133       const NodeFailRep * const rep =
3134         CAST_CONSTPTR(NodeFailRep, signal->getDataPtr());
3135       Uint32 len = NodeFailRep::getNodeMaskLength(signal->getLength());
3136       assert(len == NodeBitmask::Size || // only full length in ndbapi
3137              len == 0);
3138 
3139       if (signal->header.m_noOfSections >= 1)
3140       {
3141         if (BitmaskImpl::safe_get(NodeBitmask::getPackedLengthInWords(signal->ptr[0].p),
3142                                   signal->ptr[0].p, nodeId))
3143         {
3144           return SchemaTransBeginRef::Nodefailure;
3145         }
3146       }
3147       else if (BitmaskImpl::safe_get(len, rep->theAllNodes, nodeId))
3148       {
3149         return SchemaTransBeginRef::Nodefailure;
3150       }
3151       break;
3152     }
3153     case GSN_API_REGCONF:
3154     case GSN_TAKE_OVERTCCONF:
3155     case GSN_CONNECT_REP:
3156       break;
3157     default:
3158       report_unknown_signal(signal);
3159       return SEND_OR_RECEIVE_FAILED;
3160     }
3161   }
3162 
3163   return endSchemaTrans(ss, nodeId, transId, transKey, 0);
3164 }
3165 
3166 int
dropNodegroup(int ng)3167 MgmtSrvr::dropNodegroup(int ng)
3168 {
3169   int res;
3170   SignalSender ss(theFacade);
3171   ss.lock();
3172 
3173   Uint32 transId = rand();
3174   Uint32 transKey;
3175   NodeId nodeId;
3176 
3177   if ((res = startSchemaTrans(ss, nodeId, transId, transKey)))
3178   {
3179     return res;
3180   }
3181 
3182   SimpleSignal ssig;
3183   ssig.set(ss, 0, DBDICT, GSN_DROP_NODEGROUP_REQ, DropNodegroupReq::SignalLength);
3184 
3185   DropNodegroupReq* req =
3186     CAST_PTR(DropNodegroupReq, ssig.getDataPtrSend());
3187 
3188   req->transId = transId;
3189   req->transKey = transKey;
3190   req->nodegroupId = ng;
3191   req->senderData = 77;
3192   req->senderRef = ss.getOwnRef();
3193 
3194   if (ss.sendSignal(nodeId, &ssig) != SEND_OK)
3195   {
3196     return SEND_OR_RECEIVE_FAILED;
3197   }
3198 
3199   bool wait = true;
3200   while (wait)
3201   {
3202     SimpleSignal *signal = ss.waitFor();
3203     int gsn = signal->readSignalNumber();
3204     switch (gsn) {
3205     case GSN_DROP_NODEGROUP_CONF: {
3206       wait = false;
3207       break;
3208     }
3209     case GSN_DROP_NODEGROUP_REF:
3210     {
3211       const DropNodegroupRef * ref =
3212         CAST_CONSTPTR(DropNodegroupRef, signal->getDataPtr());
3213       endSchemaTrans(ss, nodeId, transId, transKey,
3214                      SchemaTransEndReq::SchemaTransAbort);
3215       return ref->errorCode;
3216     }
3217     case GSN_NF_COMPLETEREP:
3218       // ignore
3219       break;
3220     case GSN_NODE_FAILREP:{
3221       const NodeFailRep * const rep =
3222         CAST_CONSTPTR(NodeFailRep, signal->getDataPtr());
3223       Uint32 len = NodeFailRep::getNodeMaskLength(signal->getLength());
3224       assert(len == NodeBitmask::Size || // only full length in ndbapi
3225              len == 0);
3226 
3227       if (signal->header.m_noOfSections >= 1)
3228       {
3229         if (BitmaskImpl::safe_get(NodeBitmask::getPackedLengthInWords(signal->ptr[0].p),
3230                                   signal->ptr[0].p, nodeId))
3231         {
3232           return SchemaTransBeginRef::Nodefailure;
3233         }
3234       }
3235       else if (BitmaskImpl::safe_get(len, rep->theAllNodes, nodeId))
3236       {
3237         return SchemaTransBeginRef::Nodefailure;
3238       }
3239       break;
3240     }
3241     case GSN_API_REGCONF:
3242     case GSN_TAKE_OVERTCCONF:
3243     case GSN_CONNECT_REP:
3244       break;
3245     default:
3246       report_unknown_signal(signal);
3247       return SEND_OR_RECEIVE_FAILED;
3248     }
3249   }
3250 
3251   return endSchemaTrans(ss, nodeId, transId, transKey, 0);
3252 }
3253 
3254 
3255 //****************************************************************************
3256 //****************************************************************************
3257 
3258 int
setTraceNo(int nodeId,int traceNo)3259 MgmtSrvr::setTraceNo(int nodeId, int traceNo)
3260 {
3261   if (traceNo < 0) {
3262     return INVALID_TRACE_NUMBER;
3263   }
3264 
3265   INIT_SIGNAL_SENDER(ss,nodeId);
3266 
3267   SimpleSignal ssig;
3268   ssig.set(ss,TestOrd::TraceAPI, CMVMI, GSN_TEST_ORD, TestOrd::SignalLength);
3269   TestOrd* const testOrd = CAST_PTR(TestOrd, ssig.getDataPtrSend());
3270   testOrd->clear();
3271   // Assume TRACE command causes toggling. Not really defined... ? TODO
3272   testOrd->setTraceCommand(TestOrd::Toggle,
3273 			   (TestOrd::TraceSpecification)traceNo);
3274 
3275   return ss.sendSignal(nodeId, &ssig) == SEND_OK ? 0 : SEND_OR_RECEIVE_FAILED;
3276 }
3277 
3278 //****************************************************************************
3279 //****************************************************************************
3280 
3281 int
setSignalLoggingMode(int nodeId,LogMode mode,const Vector<BaseString> & blocks)3282 MgmtSrvr::setSignalLoggingMode(int nodeId, LogMode mode,
3283 			       const Vector<BaseString>& blocks)
3284 {
3285   INIT_SIGNAL_SENDER(ss,nodeId);
3286 
3287   // Convert from MgmtSrvr format...
3288 
3289   TestOrd::Command command;
3290   if (mode == Off) {
3291     command = TestOrd::Off;
3292   }
3293   else {
3294     command = TestOrd::On;
3295   }
3296 
3297   TestOrd::SignalLoggerSpecification logSpec;
3298   switch (mode) {
3299   case In:
3300     logSpec = TestOrd::InputSignals;
3301     break;
3302   case Out:
3303     logSpec = TestOrd::OutputSignals;
3304     break;
3305   case InOut:
3306     logSpec = TestOrd::InputOutputSignals;
3307     break;
3308   case Off:
3309     // In MgmtSrvr interface it's just possible to switch off all logging, both
3310     // "in" and "out" (this should probably be changed).
3311     logSpec = TestOrd::InputOutputSignals;
3312     break;
3313   default:
3314     ndbout_c("Unexpected value %d, MgmtSrvr::setSignalLoggingMode, line %d",
3315 	     (unsigned)mode, __LINE__);
3316     assert(false);
3317     return -1;
3318   }
3319 
3320   SimpleSignal ssig;
3321   ssig.set(ss,TestOrd::TraceAPI, CMVMI, GSN_TEST_ORD, TestOrd::SignalLength);
3322 
3323   TestOrd* const testOrd = CAST_PTR(TestOrd, ssig.getDataPtrSend());
3324   testOrd->clear();
3325 
3326   if (blocks.size() == 0 || blocks[0] == "ALL") {
3327     // Logg command for all blocks
3328     testOrd->addSignalLoggerCommand(command, logSpec);
3329   } else {
3330     for(unsigned i = 0; i < blocks.size(); i++)
3331     {
3332       BlockNumber blockNumber = getBlockNo(blocks[i].c_str());
3333       if (blockNumber == 0)
3334         return INVALID_BLOCK_NAME;
3335       testOrd->addSignalLoggerCommand(blockNumber, command, logSpec);
3336     }
3337   }
3338 
3339   return ss.sendSignal(nodeId, &ssig) == SEND_OK ? 0 : SEND_OR_RECEIVE_FAILED;
3340 }
3341 
3342 /*****************************************************************************
3343  * Signal tracing
3344  *****************************************************************************/
startSignalTracing(int nodeId)3345 int MgmtSrvr::startSignalTracing(int nodeId)
3346 {
3347   INIT_SIGNAL_SENDER(ss,nodeId);
3348 
3349   SimpleSignal ssig;
3350   ssig.set(ss,TestOrd::TraceAPI, CMVMI, GSN_TEST_ORD, TestOrd::SignalLength);
3351 
3352   TestOrd* const testOrd = CAST_PTR(TestOrd, ssig.getDataPtrSend());
3353   testOrd->clear();
3354   testOrd->setTestCommand(TestOrd::On);
3355 
3356   return ss.sendSignal(nodeId, &ssig) == SEND_OK ? 0 : SEND_OR_RECEIVE_FAILED;
3357 }
3358 
3359 int
stopSignalTracing(int nodeId)3360 MgmtSrvr::stopSignalTracing(int nodeId)
3361 {
3362   INIT_SIGNAL_SENDER(ss,nodeId);
3363 
3364   SimpleSignal ssig;
3365   ssig.set(ss,TestOrd::TraceAPI, CMVMI, GSN_TEST_ORD, TestOrd::SignalLength);
3366   TestOrd* const testOrd = CAST_PTR(TestOrd, ssig.getDataPtrSend());
3367   testOrd->clear();
3368   testOrd->setTestCommand(TestOrd::Off);
3369 
3370   return ss.sendSignal(nodeId, &ssig) == SEND_OK ? 0 : SEND_OR_RECEIVE_FAILED;
3371 }
3372 
3373 
3374 /*****************************************************************************
3375  * Dump state
3376  *****************************************************************************/
3377 
3378 int
dumpState(int nodeId,const char * args)3379 MgmtSrvr::dumpState(int nodeId, const char* args)
3380 {
3381   // Convert the space separeted args
3382   // string to an int array
3383   Uint32 args_array[25];
3384   Uint32 numArgs = 0;
3385 
3386   const int BufSz = 12; /* 32 bit signed = 10 digits + sign + trailing \0 */
3387   char buf[BufSz];
3388   int b  = 0;
3389   memset(buf, 0, BufSz);
3390   for (size_t i = 0; i <= strlen(args); i++){
3391     if (args[i] == ' ' || args[i] == 0){
3392       assert(b < BufSz);
3393       assert(buf[b] == 0);
3394       args_array[numArgs] = atoi(buf);
3395       numArgs++;
3396       memset(buf, 0, BufSz);
3397       b = 0;
3398     } else {
3399       buf[b] = args[i];
3400       b++;
3401     }
3402   }
3403 
3404   return dumpState(nodeId, args_array, numArgs);
3405 }
3406 
3407 int
dumpState(int nodeId,const Uint32 args[],Uint32 no)3408 MgmtSrvr::dumpState(int nodeId, const Uint32 args[], Uint32 no)
3409 {
3410   if (nodeId == _ownNodeId)
3411   {
3412     return dumpStateSelf(args, no);
3413   }
3414 
3415   INIT_SIGNAL_SENDER(ss,nodeId);
3416 
3417   const Uint32 len = no > 25 ? 25 : no;
3418 
3419   SimpleSignal ssig;
3420   DumpStateOrd * const dumpOrd =
3421     CAST_PTR(DumpStateOrd, ssig.getDataPtrSend());
3422   ssig.set(ss,TestOrd::TraceAPI, CMVMI, GSN_DUMP_STATE_ORD, len);
3423   for(Uint32 i = 0; i<25; i++){
3424     if (i < len)
3425       dumpOrd->args[i] = args[i];
3426     else
3427       dumpOrd->args[i] = 0;
3428   }
3429 
3430   int res = ss.sendSignal(nodeId, &ssig) == SEND_OK ? 0 :SEND_OR_RECEIVE_FAILED;
3431 
3432   if (res == 0)
3433   {
3434     /**
3435      * In order to make DUMP (almost) syncronous,
3436      *   make a syncronous request *after* the NDB_TAMPER
3437      */
3438     make_sync_req(ss, Uint32(nodeId));
3439   }
3440 
3441   return res;
3442 
3443 }
3444 
3445 int
dumpStateSelf(const Uint32 args[],Uint32 no)3446 MgmtSrvr::dumpStateSelf(const Uint32 args[], Uint32 no)
3447 {
3448   if (no < 1)
3449     return -1;
3450 
3451   switch(args[0])
3452   {
3453 #ifdef ERROR_INSERT
3454   case 9994:
3455   {
3456     /* Transporter send blocking */
3457     if (no >= 2)
3458     {
3459       Uint32 nodeId = args[1];
3460       ndbout_c("Blocking send to node %u",
3461                nodeId);
3462       TransporterRegistry* tr = theFacade->get_registry();
3463       tr->blockSend(*theFacade, nodeId);
3464     }
3465     break;
3466   }
3467   case 9995:
3468   {
3469     /* Transporter send unblocking */
3470     if (no >= 2)
3471     {
3472       Uint32 nodeId = args[1];
3473       ndbout_c("Unblocking send to node %u",
3474                nodeId);
3475       TransporterRegistry* tr = theFacade->get_registry();
3476       tr->unblockSend(*theFacade, nodeId);
3477     }
3478     break;
3479   }
3480 
3481   case 9996:
3482   {
3483     /* Sendbuffer consumption */
3484     if (no >= 2)
3485     {
3486       Uint64 remain_bytes = args[1];
3487       ndbout_c("Consuming sendbuffer except for %llu bytes",
3488                remain_bytes);
3489       theFacade->consume_sendbuffer(remain_bytes);
3490     }
3491     break;
3492   }
3493   case 9997:
3494   {
3495     /* Sendbuffer release */
3496     ndbout_c("Releasing consumed sendbuffer");
3497     theFacade->release_consumed_sendbuffer();
3498     break;
3499   }
3500 #endif
3501   default:
3502     ;
3503   }
3504 
3505   return 0;
3506 }
3507 
3508 
3509 
3510 //****************************************************************************
3511 //****************************************************************************
3512 
getErrorText(int errorCode,char * buf,int buf_sz)3513 const char* MgmtSrvr::getErrorText(int errorCode, char *buf, int buf_sz)
3514 {
3515   ndb_error_string(errorCode, buf, buf_sz);
3516   buf[buf_sz-1]= 0;
3517   return buf;
3518 }
3519 
3520 
3521 void
trp_deliver_signal(const NdbApiSignal * signal,const LinearSectionPtr ptr[3])3522 MgmtSrvr::trp_deliver_signal(const NdbApiSignal* signal,
3523                              const LinearSectionPtr ptr[3])
3524 {
3525   int gsn = signal->readSignalNumber();
3526 
3527   switch (gsn) {
3528   case GSN_EVENT_REP:
3529   {
3530     /**
3531      * This EVENT_REP receives all infoEvent and eventLog messages that
3532      * are NOT generated through a DUMP command.
3533      */
3534     const Uint32 *data = signal->getDataPtr();
3535     Uint32 sz = signal->getLength();
3536     if (signal->getNoOfSections() > 0)
3537     {
3538       /**
3539        * Data comes in segmented part.
3540        */
3541       data = ptr[0].p;
3542       sz = ptr[0].sz;
3543     }
3544     eventReport(signal->getDataPtr(), sz, data);
3545     break;
3546   }
3547 
3548   case GSN_NF_COMPLETEREP:{
3549     const NFCompleteRep * rep = CAST_CONSTPTR(NFCompleteRep,
3550                                                signal->getDataPtr());
3551     /* Clear local nodeid reservation(if any) */
3552     release_local_nodeid_reservation(rep->failedNodeId);
3553 
3554      clear_connect_address_cache(rep->failedNodeId);
3555     break;
3556   }
3557   case GSN_TAMPER_ORD:
3558     ndbout << "TAMPER ORD" << endl;
3559     break;
3560   case GSN_API_REGCONF:
3561   case GSN_TAKE_OVERTCCONF:
3562     break;
3563   case GSN_CONNECT_REP:{
3564     const Uint32 nodeId = signal->getDataPtr()[0];
3565 
3566     /*
3567       Clear local nodeid reservation since nodeid is
3568       now reserved by a connected transporter
3569     */
3570     release_local_nodeid_reservation(nodeId);
3571 
3572     union {
3573       Uint32 theData[25];
3574       EventReport repData;
3575     };
3576     EventReport * rep = &repData;
3577     theData[1] = nodeId;
3578     rep->setEventType(NDB_LE_Connected);
3579 
3580     if (nodeTypes[nodeId] == NODE_TYPE_DB)
3581     {
3582       m_started_nodes.push_back(nodeId);
3583     }
3584     rep->setEventType(NDB_LE_Connected);
3585     rep->setNodeId(_ownNodeId);
3586     eventReport(theData, 1, theData);
3587     return;
3588   }
3589   case GSN_NODE_FAILREP:
3590   {
3591     union {
3592       Uint32 theData[25];
3593       EventReport repData;
3594     };
3595     bzero(theData, sizeof(theData));
3596     EventReport * event = &repData;
3597     event->setEventType(NDB_LE_Disconnected);
3598     event->setNodeId(_ownNodeId);
3599 
3600     const NodeFailRep *rep = CAST_CONSTPTR(NodeFailRep,
3601                                            signal->getDataPtr());
3602     Uint32 len = NodeFailRep::getNodeMaskLength(signal->getLength());
3603 
3604     const Uint32* nbm;
3605     if (signal->m_noOfSections >= 1)
3606     {
3607       assert (len == 0);
3608       nbm = ptr[0].p;
3609       len = ptr[0].sz;
3610     }
3611     else
3612     {
3613       assert(len == NodeBitmask::Size); // only full length in ndbapi
3614       nbm = rep->theAllNodes;
3615     }
3616 
3617     for (Uint32 i = BitmaskImpl::find_first(len, nbm);
3618          i != BitmaskImpl::NotFound;
3619          i = BitmaskImpl::find_next(len, nbm, i + 1))
3620     {
3621       theData[1] = i;
3622       eventReport(theData, 1, theData);
3623 
3624       /* Clear local nodeid reservation(if any) */
3625       release_local_nodeid_reservation(i);
3626 
3627       clear_connect_address_cache(i);
3628     }
3629     return;
3630   }
3631   case GSN_CLOSE_COMREQ:
3632   {
3633     theFacade->perform_close_clnt(this);
3634     break;
3635   }
3636   default:
3637     g_eventLogger->error("Unknown signal received. SignalNumber: "
3638                          "%i from (%d, 0x%x)",
3639                          gsn,
3640                          refToNode(signal->theSendersBlockRef),
3641                          refToBlock(signal->theSendersBlockRef));
3642     assert(false);
3643   }
3644 }
3645 
3646 
3647 void
trp_node_status(Uint32 nodeId,Uint32 _event)3648 MgmtSrvr::trp_node_status(Uint32 nodeId, Uint32 _event)
3649 {
3650 }
3651 
3652 enum ndb_mgm_node_type
getNodeType(NodeId nodeId) const3653 MgmtSrvr::getNodeType(NodeId nodeId) const
3654 {
3655   if(nodeId >= MAX_NODES)
3656     return (enum ndb_mgm_node_type)-1;
3657 
3658   return nodeTypes[nodeId];
3659 }
3660 
3661 
3662 const char*
get_connect_address(NodeId node_id,char * addr_buf,size_t addr_buf_size)3663 MgmtSrvr::get_connect_address(NodeId node_id,
3664                               char *addr_buf,
3665                               size_t addr_buf_size)
3666 {
3667   assert(node_id < NDB_ARRAY_SIZE(m_connect_address));
3668 
3669   if (m_connect_address[node_id].s_addr == 0)
3670   {
3671     // No cached connect address available
3672     const trp_node &node= getNodeInfo(node_id);
3673     if (node.is_connected())
3674     {
3675       // Cache the connect address, it's valid until
3676       // node disconnects
3677       m_connect_address[node_id] = theFacade->ext_get_connect_address(node_id);
3678     }
3679   }
3680 
3681   // Return the cached connect address
3682   return Ndb_inet_ntop(AF_INET,
3683                        static_cast<void*>(&m_connect_address[node_id]),
3684                        addr_buf,
3685                        addr_buf_size);
3686 }
3687 
3688 
3689 void
clear_connect_address_cache(NodeId nodeid)3690 MgmtSrvr::clear_connect_address_cache(NodeId nodeid)
3691 {
3692   assert(nodeid < NDB_ARRAY_SIZE(m_connect_address));
3693   if (nodeid < NDB_ARRAY_SIZE(m_connect_address))
3694   {
3695     m_connect_address[nodeid].s_addr = 0;
3696   }
3697 }
3698 
3699 /***************************************************************************
3700  * Alloc nodeid
3701  ***************************************************************************/
3702 
NodeIdReservations()3703 MgmtSrvr::NodeIdReservations::NodeIdReservations()
3704 {
3705   memset(m_reservations, 0, sizeof(m_reservations));
3706 }
3707 
3708 
3709 void
check_array(NodeId n) const3710 MgmtSrvr::NodeIdReservations::check_array(NodeId n) const
3711 {
3712   assert( n < NDB_ARRAY_SIZE(m_reservations));
3713 }
3714 
3715 
3716 bool
get(NodeId n) const3717 MgmtSrvr::NodeIdReservations::get(NodeId n) const
3718 {
3719   check_array(n);
3720 
3721   return (m_reservations[n].m_timeout != 0);
3722 }
3723 
3724 
3725 void
set(NodeId n,unsigned timeout)3726 MgmtSrvr::NodeIdReservations::set(NodeId n, unsigned timeout)
3727 {
3728   check_array(n);
3729 
3730   Reservation& r = m_reservations[n];
3731   // Dont't allow double set
3732   assert(r.m_timeout == 0 && !NdbTick_IsValid(r.m_start));
3733 
3734   r.m_timeout = timeout;
3735   r.m_start = NdbTick_getCurrentTicks();
3736 }
3737 
3738 
3739 BaseString
pretty_str() const3740 MgmtSrvr::NodeIdReservations::pretty_str() const
3741 {
3742   const char* sep = "";
3743   BaseString str;
3744   for (size_t i = 0; i < NDB_ARRAY_SIZE(m_reservations); i++)
3745   {
3746     const Reservation& r = m_reservations[i];
3747     if (r.m_timeout)
3748     {
3749       str.appfmt("%s%u", sep, (unsigned)i);
3750       sep = ",";
3751     }
3752   }
3753   return str;
3754 }
3755 
3756 
3757 void
clear(NodeId n)3758 MgmtSrvr::NodeIdReservations::clear(NodeId n)
3759 {
3760   check_array(n);
3761 
3762   Reservation& r = m_reservations[n];
3763   // Dont't allow double clear
3764   assert(r.m_timeout != 0 && NdbTick_IsValid(r.m_start));
3765 
3766   r.m_timeout = 0;
3767   NdbTick_Invalidate(&r.m_start);
3768 }
3769 
3770 
3771 bool
has_timedout(NodeId n,NDB_TICKS now) const3772 MgmtSrvr::NodeIdReservations::has_timedout(NodeId n, NDB_TICKS now) const
3773 {
3774   check_array(n);
3775 
3776   const Reservation& r = m_reservations[n];
3777   if (r.m_timeout &&
3778       NdbTick_Elapsed(r.m_start,now).milliSec() > r.m_timeout)
3779     return true;
3780   return false;
3781 }
3782 
3783 
3784 void
release_local_nodeid_reservation(NodeId nodeid)3785 MgmtSrvr::release_local_nodeid_reservation(NodeId nodeid)
3786 {
3787   NdbMutex_Lock(m_reserved_nodes_mutex);
3788   if (m_reserved_nodes.get(nodeid))
3789   {
3790     g_eventLogger->debug("Releasing local reservation for nodeid %d", nodeid);
3791     m_reserved_nodes.clear(nodeid);
3792   }
3793   NdbMutex_Unlock(m_reserved_nodes_mutex);
3794 }
3795 
3796 
3797 int
alloc_node_id_req(NodeId free_node_id,enum ndb_mgm_node_type type,Uint32 timeout_ms)3798 MgmtSrvr::alloc_node_id_req(NodeId free_node_id,
3799                             enum ndb_mgm_node_type type,
3800                             Uint32 timeout_ms)
3801 {
3802   bool first_attempt = true;
3803   SignalSender ss(theFacade);
3804   ss.lock(); // lock will be released on exit
3805 
3806   SimpleSignal ssig;
3807   AllocNodeIdReq* req = CAST_PTR(AllocNodeIdReq, ssig.getDataPtrSend());
3808   ssig.set(ss, TestOrd::TraceAPI, QMGR, GSN_ALLOC_NODEID_REQ,
3809            AllocNodeIdReq::SignalLength);
3810 
3811   req->senderRef = ss.getOwnRef();
3812   req->senderData = 19;
3813   req->nodeId = free_node_id;
3814   req->nodeType = type;
3815   req->timeout = timeout_ms;
3816 
3817   int do_send = 1;
3818   NodeId nodeId = 0;
3819   while (1)
3820   {
3821     if (nodeId == 0)
3822     {
3823       bool next;
3824       while((next = getNextNodeId(&nodeId, NDB_MGM_NODE_TYPE_NDB)) == true &&
3825             getNodeInfo(nodeId).is_confirmed() == false)
3826         ;
3827       if (!next)
3828         return NO_CONTACT_WITH_DB_NODES;
3829       do_send = 1;
3830     }
3831     if (do_send)
3832     {
3833       if (ss.sendSignal(nodeId, &ssig) != SEND_OK)
3834         return SEND_OR_RECEIVE_FAILED;
3835       do_send = 0;
3836     }
3837 
3838     SimpleSignal *signal = ss.waitFor();
3839 
3840     int gsn = signal->readSignalNumber();
3841     switch (gsn) {
3842     case GSN_ALLOC_NODEID_CONF:
3843     {
3844 #ifdef NOT_USED
3845       const AllocNodeIdConf * const conf =
3846         CAST_CONSTPTR(AllocNodeIdConf, signal->getDataPtr());
3847 #endif
3848       g_eventLogger->info("Alloc node id %u succeeded", free_node_id);
3849       return 0;
3850     }
3851     case GSN_ALLOC_NODEID_REF:
3852     {
3853       const AllocNodeIdRef * const ref =
3854         CAST_CONSTPTR(AllocNodeIdRef, signal->getDataPtr());
3855       if (ref->errorCode == AllocNodeIdRef::NotMaster &&
3856           refToNode(ref->masterRef) == 0xFFFF)
3857       {
3858         /*
3859           This data node is not aware of who is the president (yet)
3860           and thus cannot allocate nodeids.
3861           If all data nodes are in the same state, then there's
3862           effectively 'no contact'.
3863           However, some other data nodes might be 'up' (node(s) in
3864           NOT_STARTED state).
3865         */
3866         bool next;
3867         while((next = getNextNodeId(&nodeId, NDB_MGM_NODE_TYPE_NDB)) == true &&
3868               getNodeInfo(nodeId).is_confirmed() == false)
3869           ;
3870         if (!next)
3871         {
3872           /* No viable node(s) */
3873           g_eventLogger->info("Alloc node id %u rejected, no new president yet",
3874                               free_node_id);
3875           return NO_CONTACT_WITH_DB_NODES;
3876         }
3877 
3878         /* Found another node, try to allocate a nodeid from it */
3879         do_send = 1;
3880         continue;
3881       }
3882 
3883       if (ref->errorCode == AllocNodeIdRef::NotReady)
3884       {
3885         g_eventLogger->info("Alloc node id %u request rejected, cluster not ready yet",
3886                             free_node_id);
3887         return NO_CONTACT_WITH_DB_NODES;
3888       }
3889 
3890       const bool refFromMaster = (refToNode(ref->masterRef) == nodeId);
3891       if (ref->errorCode == AllocNodeIdRef::NotMaster ||
3892           ref->errorCode == AllocNodeIdRef::Busy ||
3893           ref->errorCode == AllocNodeIdRef::NodeFailureHandlingNotCompleted)
3894       {
3895         do_send = 1;
3896         nodeId = refToNode(ref->masterRef);
3897 	if (!getNodeInfo(nodeId).is_confirmed())
3898 	  nodeId = 0;
3899         if (first_attempt && (ref->errorCode != AllocNodeIdRef::NotMaster))
3900         {
3901           first_attempt = false;
3902           g_eventLogger->info("Alloc node id %u rejected with error code %u, will retry",
3903                               free_node_id,
3904                               ref->errorCode);
3905         }
3906         /* sleep for a while before retrying */
3907         ss.unlock();
3908         if (ref->errorCode == AllocNodeIdRef::Busy)
3909         {
3910           NdbSleep_MilliSleep(100);
3911         }
3912         else if (ref->errorCode == AllocNodeIdRef::NotMaster)
3913         {
3914           if (refFromMaster)
3915           {
3916             /* AllocNodeIdReq sent to master node, but master not ready
3917              * to alloc node ID. Sleep before retrying. */
3918             NdbSleep_SecSleep(1);
3919           }
3920           else
3921           {
3922             /* AllocNodeIdReq sent to non-master node, retry by sending
3923              * AllocNodeIdReq to ref->masterRef. No sleep before retrying */
3924           }
3925         }
3926         else /* AllocNodeIdRef::NodeFailureHandlingNotCompleted */
3927         {
3928           NdbSleep_SecSleep(1);
3929         }
3930         ss.lock();
3931         continue;
3932       }
3933       return ref->errorCode;
3934     }
3935     case GSN_NF_COMPLETEREP:
3936     {
3937       continue;
3938     }
3939     case GSN_NODE_FAILREP:{
3940       /**
3941        * ok to trap using NODE_FAILREP
3942        *   as we don't really wait on anything interesting
3943        */
3944       const NodeFailRep * const rep =
3945 	CAST_CONSTPTR(NodeFailRep, signal->getDataPtr());
3946       Uint32 len = NodeFailRep::getNodeMaskLength(signal->getLength());
3947       const Uint32* nbm;
3948       if (signal->header.m_noOfSections >= 1)
3949       {
3950         assert (len == 0);
3951         nbm = signal->ptr[0].p;
3952         len = signal->ptr[0].sz;
3953       }
3954       else
3955       {
3956         assert(len == NodeBitmask::Size); // only full length in ndbapi
3957         nbm = rep->theAllNodes;
3958       }
3959 
3960       if (BitmaskImpl::safe_get(len, nbm, nodeId))
3961       {
3962         do_send = 1;
3963         nodeId = 0;
3964       }
3965       continue;
3966     }
3967     case GSN_API_REGCONF:
3968     case GSN_TAKE_OVERTCCONF:
3969     case GSN_CONNECT_REP:
3970       continue;
3971     default:
3972       report_unknown_signal(signal);
3973       return SEND_OR_RECEIVE_FAILED;
3974     }
3975   }
3976   return 0;
3977 }
3978 
3979 static int
match_hostname(const struct sockaddr * clnt_addr,const char * config_hostname)3980 match_hostname(const struct sockaddr *clnt_addr,
3981                const char *config_hostname)
3982 {
3983   if (clnt_addr)
3984   {
3985     const struct in_addr *clnt_in_addr = &((sockaddr_in*)clnt_addr)->sin_addr;
3986 
3987     struct in_addr config_addr;
3988     if (Ndb_getInAddr(&config_addr, config_hostname) != 0
3989         || memcmp(&config_addr, clnt_in_addr, sizeof(config_addr)) != 0)
3990     {
3991       struct in_addr tmp_addr;
3992       if (Ndb_getInAddr(&tmp_addr, "localhost") != 0
3993           || memcmp(&tmp_addr, clnt_in_addr, sizeof(config_addr)) != 0)
3994       {
3995         // not localhost
3996         return -1;
3997       }
3998 
3999       // connecting through localhost
4000       // check if config_hostname is local
4001       if (!SocketServer::tryBind(0, config_hostname))
4002         return -1;
4003     }
4004   }
4005   else
4006   {
4007     if (!SocketServer::tryBind(0, config_hostname))
4008       return -1;
4009   }
4010   return 0;
4011 }
4012 
4013 int
find_node_type(NodeId node_id,ndb_mgm_node_type type,const struct sockaddr * client_addr,Vector<PossibleNode> & nodes,int & error_code,BaseString & error_string)4014 MgmtSrvr::find_node_type(NodeId node_id,
4015                          ndb_mgm_node_type type,
4016                          const struct sockaddr* client_addr,
4017                          Vector<PossibleNode>& nodes,
4018                          int& error_code, BaseString& error_string)
4019 {
4020   const char* found_config_hostname= 0;
4021   unsigned type_c= (unsigned)type;
4022 
4023   Guard g(m_local_config_mutex);
4024 
4025   ConfigIter iter(m_local_config, CFG_SECTION_NODE);
4026   for(iter.first(); iter.valid(); iter.next())
4027   {
4028     unsigned id;
4029     if (iter.get(CFG_NODE_ID, &id))
4030       require(false);
4031     if (node_id && node_id != id)
4032       continue;
4033     if (iter.get(CFG_TYPE_OF_SECTION, &type_c))
4034       require(false);
4035     if (type_c != (unsigned)type)
4036     {
4037       if (!node_id)
4038         continue;
4039       goto error;
4040     }
4041     bool exact_match = false;
4042     const char *config_hostname= 0;
4043     if (iter.get(CFG_NODE_HOST, &config_hostname))
4044       require(false);
4045     if (config_hostname == 0 || config_hostname[0] == 0)
4046     {
4047       config_hostname= "";
4048     }
4049     else
4050     {
4051       found_config_hostname= config_hostname;
4052       if (match_hostname(client_addr, config_hostname))
4053       {
4054         if (!node_id)
4055           continue;
4056         goto error;
4057       }
4058       exact_match = true;
4059     }
4060     unsigned dedicated_node = 0;
4061     iter.get(CFG_NODE_DEDICATED, &dedicated_node);
4062     if (dedicated_node && id != node_id)
4063     {
4064       // id is only handed out if explicitly requested.
4065       continue;
4066     }
4067     /*
4068       Insert this node in the nodes list sorted with the
4069       exact matches ahead of the open nodes
4070     */
4071     PossibleNode possible_node= {id, config_hostname, exact_match};
4072     if (exact_match)
4073     {
4074       // Find the position of first !exact match
4075       unsigned position = 0;
4076       for (unsigned j = 0; j < nodes.size(); j++)
4077       {
4078         if (nodes[j].exact_match)
4079           position++;
4080       }
4081       nodes.push(possible_node, position);
4082     }
4083     else
4084     {
4085       nodes.push_back(possible_node);
4086     }
4087 
4088     if (node_id)
4089       break;
4090   }
4091   if (nodes.size() != 0)
4092   {
4093     return 0;
4094   }
4095 
4096  error:
4097   /*
4098     lock on m_configMutex held because found_config_hostname may have
4099     reference inot config structure
4100   */
4101   error_code= NDB_MGM_ALLOCID_CONFIG_MISMATCH;
4102   if (node_id)
4103   {
4104     if (type_c != (unsigned) type)
4105     {
4106       BaseString type_string, type_c_string;
4107       const char *alias, *str;
4108       alias= ndb_mgm_get_node_type_alias_string(type, &str);
4109       type_string.assfmt("%s(%s)", alias, str);
4110       alias= ndb_mgm_get_node_type_alias_string((enum ndb_mgm_node_type)type_c,
4111                                                 &str);
4112       type_c_string.assfmt("%s(%s)", alias, str);
4113       error_string.appfmt("Id %d configured as %s, connect attempted as %s.",
4114                           node_id, type_c_string.c_str(),
4115                           type_string.c_str());
4116       return -1;
4117     }
4118     if (found_config_hostname)
4119     {
4120       char addr_buf[NDB_ADDR_STRLEN];
4121       {
4122         // Append error describing which host the faulty connection was from
4123         struct in_addr conn_addr =
4124           ((struct sockaddr_in*)(client_addr))->sin_addr;
4125         char* addr_str =
4126             Ndb_inet_ntop(AF_INET,
4127                           static_cast<void*>(&conn_addr),
4128                           addr_buf,
4129                           sizeof(addr_buf));
4130         error_string.appfmt("Connection with id %d done from wrong host ip %s,",
4131                             node_id, addr_str);
4132       }
4133       {
4134         // Append error describing which was the expected host
4135         struct in_addr config_addr;
4136         int r_config_addr= Ndb_getInAddr(&config_addr, found_config_hostname);
4137         char* addr_str =
4138             Ndb_inet_ntop(AF_INET,
4139                           static_cast<void*>(&config_addr),
4140                           addr_buf,
4141                           sizeof(addr_buf));
4142         error_string.appfmt(" expected %s(%s).", found_config_hostname,
4143                             r_config_addr ?
4144                             "lookup failed" : addr_str);
4145       }
4146       return -1;
4147     }
4148     error_string.appfmt("No node defined with id=%d in config file.", node_id);
4149     return -1;
4150   }
4151 
4152   // node_id == 0 and nodes.size() == 0
4153   if (found_config_hostname)
4154   {
4155     char addr_buf[NDB_ADDR_STRLEN];
4156     struct in_addr conn_addr =
4157       ((struct sockaddr_in*)(client_addr))->sin_addr;
4158     char *addr_str = Ndb_inet_ntop(AF_INET,
4159                                    static_cast<void*>(&conn_addr),
4160                                    addr_buf,
4161                                    sizeof(addr_buf));
4162     error_string.appfmt("Connection done from wrong host ip %s.",
4163                         (client_addr) ? addr_str : "");
4164     return -1;
4165   }
4166 
4167   error_string.append("No nodes defined in config file.");
4168   return -1;
4169 }
4170 
4171 
4172 int
try_alloc(NodeId id,ndb_mgm_node_type type,Uint32 timeout_ms,int & error_code,BaseString & error_string)4173 MgmtSrvr::try_alloc(NodeId id,
4174                     ndb_mgm_node_type type,
4175                     Uint32 timeout_ms,
4176                     int& error_code,
4177                     BaseString& error_string)
4178 {
4179   assert(type == NDB_MGM_NODE_TYPE_NDB ||
4180          type == NDB_MGM_NODE_TYPE_API);
4181 
4182   const NDB_TICKS start = NdbTick_getCurrentTicks();
4183   while (true)
4184   {
4185     int res = alloc_node_id_req(id, type, timeout_ms);
4186     if (res == 0)
4187     {
4188       /* Node id allocation suceeded */
4189       g_eventLogger->debug("Allocated nodeid %u in cluster", id);
4190       assert(id > 0);
4191       return id;
4192     }
4193 
4194     if (res == NO_CONTACT_WITH_DB_NODES &&
4195         type == NDB_MGM_NODE_TYPE_API)
4196     {
4197       const Uint64 retry_timeout = 3000; // milliseconds
4198       const NDB_TICKS now = NdbTick_getCurrentTicks();
4199       const Uint64 elapsed = NdbTick_Elapsed(start,now).milliSec();
4200       if (elapsed > retry_timeout)
4201       {
4202         /*
4203           Have waited long enough time for data nodes to
4204           decide on a master, return error
4205         */
4206         g_eventLogger->debug("Unable to allocate nodeid %u for API node " \
4207                              "in cluster (retried during %u milliseconds)",
4208                              id, (unsigned)elapsed);
4209         error_string.appfmt("No contact with data nodes to get node id %u",
4210                             id);
4211         error_code = NDB_MGM_ALLOCID_ERROR;
4212         return -1;
4213       }
4214 
4215       g_eventLogger->debug("Retrying allocation of nodeid %u...", id);
4216       NdbSleep_MilliSleep(1000);
4217       continue;
4218     }
4219 
4220     if (res == NO_CONTACT_WITH_DB_NODES &&
4221         type == NDB_MGM_NODE_TYPE_NDB)
4222     {
4223       /*
4224         No reply from data node(s) -> use the requested nodeid
4225         so that data node can start
4226       */
4227       g_eventLogger->debug("Nodeid %u for data node reserved locally "  \
4228                            "since cluster was not available ", id);
4229       return id;
4230     }
4231 
4232     /* Unspecified error */
4233     return 0;
4234   }
4235 
4236   assert(false); // Never reached
4237   return 0;
4238 }
4239 
4240 /**
4241  * try_alloc_from_list
4242  *
4243  * returns :
4244  *    0 : Nodeid allocated
4245  *   -1 : Nodeid not available
4246  *   -2 : No contact with cluster
4247  */
4248 int
try_alloc_from_list(NodeId & nodeid,ndb_mgm_node_type type,Uint32 timeout_ms,Vector<PossibleNode> & nodes,int & error_code,BaseString & error_string)4249 MgmtSrvr::try_alloc_from_list(NodeId& nodeid,
4250                               ndb_mgm_node_type type,
4251                               Uint32 timeout_ms,
4252                               Vector<PossibleNode>& nodes,
4253                               int& error_code,
4254                               BaseString& error_string)
4255 {
4256   for (unsigned i = 0; i < nodes.size(); i++)
4257   {
4258     const unsigned id= nodes[i].id;
4259     if (theFacade->ext_isConnected(id))
4260     {
4261       // Node is already reserved(connected via transporter)
4262       continue;
4263     }
4264 
4265     NdbMutex_Lock(m_reserved_nodes_mutex);
4266     if (m_reserved_nodes.get(id))
4267     {
4268       // Node is already reserved(locally in this node)
4269       NdbMutex_Unlock(m_reserved_nodes_mutex);
4270       continue;
4271     }
4272 
4273     /*
4274       Reserve the nodeid locally while checking if it can
4275       be allocated in the data nodes
4276     */
4277     m_reserved_nodes.set(id, timeout_ms);
4278 
4279     NdbMutex_Unlock(m_reserved_nodes_mutex);
4280     int res = try_alloc(id,
4281                         type,
4282                         timeout_ms,
4283                         error_code,
4284                         error_string);
4285     if (res > 0)
4286     {
4287       // Nodeid allocation succeeded
4288       nodeid= id;
4289 
4290       if (type == NDB_MGM_NODE_TYPE_API)
4291       {
4292         /*
4293           Release the local reservation(which was set to avoid that
4294           more than one thread asked for same nodeid) since it's
4295           now reserved in data node
4296         */
4297         release_local_nodeid_reservation(id);
4298       }
4299 
4300       return 0; /* Nodeid allocated */
4301     }
4302 
4303     /* Release the local reservation */
4304     release_local_nodeid_reservation(id);
4305 
4306     if (res < 0)
4307     {
4308       // Don't try any more nodes from the list
4309       return -2; /* No contact with cluster */
4310     }
4311   }
4312   return -1; /* Nodeid not available */
4313 }
4314 
4315 
4316 bool
alloc_node_id_impl(NodeId & nodeid,enum ndb_mgm_node_type type,const struct sockaddr * client_addr,int & error_code,BaseString & error_string,Uint32 timeout_s)4317 MgmtSrvr::alloc_node_id_impl(NodeId& nodeid,
4318                              enum ndb_mgm_node_type type,
4319                              const struct sockaddr* client_addr,
4320                              int& error_code, BaseString& error_string,
4321                              Uint32 timeout_s)
4322 {
4323   if (m_opts.no_nodeid_checks)
4324   {
4325     if (nodeid == 0)
4326     {
4327       error_string.appfmt("no-nodeid-checks set in management server. "
4328 			  "node id must be set explicitly in connectstring");
4329       error_code = NDB_MGM_ALLOCID_CONFIG_MISMATCH;
4330       return false;
4331     }
4332     return true;
4333   }
4334   /* Don't allow allocation of this ndb_mgmd's nodeid */
4335   assert(_ownNodeId);
4336   if (nodeid == _ownNodeId)
4337   {
4338     // Fatal error
4339     error_code= NDB_MGM_ALLOCID_CONFIG_MISMATCH;
4340     if (type != NDB_MGM_NODE_TYPE_MGM)
4341     {
4342       /**
4343        * be backwards compatile wrt error messages
4344        */
4345       BaseString type_string, type_c_string;
4346       const char *alias, *str;
4347       alias= ndb_mgm_get_node_type_alias_string(type, &str);
4348       type_string.assfmt("%s(%s)", alias, str);
4349       alias= ndb_mgm_get_node_type_alias_string(NDB_MGM_NODE_TYPE_MGM, &str);
4350       type_c_string.assfmt("%s(%s)", alias, str);
4351       error_string.appfmt("Id %d configured as %s, connect attempted as %s.",
4352                           nodeid, type_c_string.c_str(),
4353                           type_string.c_str());
4354     }
4355     else
4356     {
4357       error_string.appfmt("Id %d is already allocated by this ndb_mgmd",
4358                           nodeid);
4359     }
4360     return false;
4361   }
4362 
4363   /* Make sure that config is confirmed before allocating nodeid */
4364   Uint32 timeout_ms = timeout_s * 1000;
4365   {
4366     const NDB_TICKS start = NdbTick_getCurrentTicks();
4367     BaseString getconfig_message;
4368     while (!m_config_manager->get_packed_config(type,
4369                                                 0,
4370                                                 getconfig_message,
4371                                                 true,
4372                                                 nodeid))
4373     {
4374       const NDB_TICKS now = NdbTick_getCurrentTicks();
4375       if (NdbTick_Elapsed(start,now).milliSec() > timeout_ms)
4376       {
4377         error_code = NDB_MGM_ALLOCID_ERROR;
4378         error_string.append("Unable to allocate nodeid as configuration"
4379                             " not yet confirmed");
4380         return false;
4381       }
4382 
4383       NdbSleep_MilliSleep(20);
4384     }
4385   }
4386 
4387   /* Find possible nodeids */
4388   Vector<PossibleNode> nodes;
4389   if (find_node_type(nodeid, type, client_addr,
4390                      nodes, error_code, error_string))
4391     return false;
4392 
4393   // Print list of possible nodes
4394   for (unsigned i = 0; i < nodes.size(); i++)
4395   {
4396     const PossibleNode& node = nodes[i];
4397     g_eventLogger->debug(" [%u]: %u, '%s', %d",
4398                          (unsigned)i, node.id,
4399                          node.host.c_str(),
4400                          node.exact_match);
4401   }
4402 
4403   // nodes.size() == 0 handled inside find_node_type
4404   DBUG_ASSERT(nodes.size() != 0);
4405 
4406   if (type == NDB_MGM_NODE_TYPE_MGM && nodes.size() > 1)
4407   {
4408     // mgmt server may only have one match
4409     error_string.appfmt("Ambiguous node id's %d and %d. "
4410                         "Suggest specifying node id in connectstring, "
4411                         "or specifying unique host names in config file.",
4412                         nodes[0].id, nodes[1].id);
4413     error_code= NDB_MGM_ALLOCID_CONFIG_MISMATCH;
4414     return false;
4415   }
4416 
4417   /* Check timeout of nodeid reservations for NDB */
4418   if (type == NDB_MGM_NODE_TYPE_NDB)
4419   {
4420     const NDB_TICKS now = NdbTick_getCurrentTicks();
4421     for (unsigned i = 0; i < nodes.size(); i++)
4422     {
4423       const NodeId ndb_nodeid = nodes[i].id;
4424       {
4425         Guard g(m_reserved_nodes_mutex);
4426         if (!m_reserved_nodes.has_timedout(ndb_nodeid, now))
4427           continue;
4428       }
4429 
4430       // Found a timedout reservation
4431       if (theFacade->ext_isConnected(ndb_nodeid))
4432         continue; // Still connected, ignore the timeout
4433 
4434       g_eventLogger->warning("Found timedout nodeid reservation for %u, " \
4435                              "releasing it", ndb_nodeid);
4436 
4437       // Clear the reservation
4438       release_local_nodeid_reservation(ndb_nodeid);
4439     }
4440   }
4441 
4442   const int try_alloc_rc =
4443     try_alloc_from_list(nodeid,
4444                         type,
4445                         timeout_ms,
4446                         nodes,
4447                         error_code,
4448                         error_string);
4449   if (try_alloc_rc == 0)
4450   {
4451     if (type == NDB_MGM_NODE_TYPE_NDB)
4452     {
4453       /* Be ready to accept connections from this node */
4454       theFacade->ext_doConnect(nodeid);
4455     }
4456 
4457     return true;
4458   }
4459 
4460 
4461   if (try_alloc_rc == -1)
4462   {
4463     /*
4464       there are nodes with correct type available but
4465       allocation failed for some reason
4466     */
4467     if (nodeid)
4468     {
4469       if (error_code == 0)
4470       {
4471         error_string.appfmt("Id %d already allocated by another node.",
4472                             nodeid);
4473       }
4474     }
4475     else
4476     {
4477       if (error_code == 0)
4478       {
4479         const char *alias, *str;
4480         alias = ndb_mgm_get_node_type_alias_string(type, &str);
4481         error_string.appfmt("No free node id found for %s(%s).",
4482                             alias,
4483                             str);
4484       }
4485     }
4486     error_code = NDB_MGM_ALLOCID_ERROR;
4487   }
4488   else
4489   {
4490     assert(try_alloc_rc == -2); /* No contact with cluster */
4491     error_string.assfmt("Cluster not ready for nodeid allocation.");
4492   }
4493   return false;
4494 }
4495 
4496 
4497 bool
alloc_node_id(NodeId & nodeid,enum ndb_mgm_node_type type,const struct sockaddr * client_addr,int & error_code,BaseString & error_string,bool log_event,Uint32 timeout_s)4498 MgmtSrvr::alloc_node_id(NodeId& nodeid,
4499 			enum ndb_mgm_node_type type,
4500 			const struct sockaddr* client_addr,
4501 			int& error_code, BaseString& error_string,
4502                         bool log_event,
4503                         Uint32 timeout_s)
4504 {
4505   char addr_buf[NDB_ADDR_STRLEN];
4506   struct in_addr conn_addr = ((sockaddr_in*)client_addr)->sin_addr;
4507   const char* type_str = ndb_mgm_get_node_type_string(type);
4508   char* addr_str = Ndb_inet_ntop(AF_INET,
4509                                  static_cast<void*>(&conn_addr),
4510                                  addr_buf,
4511                                  sizeof(addr_buf));
4512 
4513   error_code = 0;
4514   g_eventLogger->debug("Trying to allocate nodeid for %s" \
4515                        "(nodeid: %u, type: %s)",
4516                        addr_str, (unsigned)nodeid, type_str);
4517 
4518 
4519   if (alloc_node_id_impl(nodeid, type, client_addr,
4520                          error_code, error_string,
4521                          timeout_s))
4522   {
4523     g_eventLogger->info("Nodeid %u allocated for %s at %s",
4524                         (unsigned)nodeid, type_str, addr_str);
4525     return true;
4526   }
4527 
4528   if (!log_event)
4529     return false;
4530 
4531   g_eventLogger->warning("Unable to allocate nodeid for %s at %s. "
4532                          "Returned error: '%s'",
4533                          type_str, addr_str, error_string.c_str());
4534 
4535   return false;
4536 }
4537 
4538 
4539 bool
getNextNodeId(NodeId * nodeId,enum ndb_mgm_node_type type) const4540 MgmtSrvr::getNextNodeId(NodeId * nodeId, enum ndb_mgm_node_type type) const
4541 {
4542   NodeId tmp = * nodeId;
4543 
4544   tmp++;
4545   while(nodeTypes[tmp] != type && tmp < MAX_NODES)
4546     tmp++;
4547 
4548   if(tmp == MAX_NODES){
4549     return false;
4550   }
4551 
4552   * nodeId = tmp;
4553   return true;
4554 }
4555 
4556 #include "Services.hpp"
4557 
4558 void
eventReport(const Uint32 * theSignalData,Uint32 len,const Uint32 * theData)4559 MgmtSrvr::eventReport(const Uint32 *theSignalData,
4560                       Uint32 len,
4561                       const Uint32 *theData)
4562 {
4563   const EventReport * const eventReport = (EventReport *)&theSignalData[0];
4564 
4565   NodeId nodeId = eventReport->getNodeId();
4566   Ndb_logevent_type type = eventReport->getEventType();
4567   // Log event
4568   g_eventLogger->log(type, theData, len, nodeId,
4569                      &m_event_listner[0].m_logLevel);
4570   m_event_listner.log(type, theData, len, nodeId);
4571 }
4572 
4573 /***************************************************************************
4574  * Backup
4575  ***************************************************************************/
4576 
4577 int
startBackup(Uint32 & backupId,int waitCompleted,Uint32 input_backupId,Uint32 backuppoint)4578 MgmtSrvr::startBackup(Uint32& backupId, int waitCompleted, Uint32 input_backupId, Uint32 backuppoint)
4579 {
4580   SignalSender ss(theFacade);
4581   ss.lock(); // lock will be released on exit
4582 
4583   NodeId nodeId = m_master_node;
4584   if (okToSendTo(nodeId, false) != 0)
4585   {
4586     bool next;
4587     nodeId = m_master_node = 0;
4588     while((next = getNextNodeId(&nodeId, NDB_MGM_NODE_TYPE_NDB)) == true &&
4589           okToSendTo(nodeId, false) != 0);
4590     if(!next)
4591       return NO_CONTACT_WITH_DB_NODES;
4592   }
4593 
4594   SimpleSignal ssig;
4595   BackupReq* req = CAST_PTR(BackupReq, ssig.getDataPtrSend());
4596   /*
4597    * Single-threaded backup.  Set instance key 1.  In the kernel
4598    * this maps to main instance 0 or worker instance 1 (if MT LQH).
4599    */
4600   BlockNumber backupBlockNo = numberToBlock(BACKUP, 1);
4601   if(input_backupId > 0)
4602   {
4603     ssig.set(ss, TestOrd::TraceAPI, backupBlockNo, GSN_BACKUP_REQ,
4604 	     BackupReq::SignalLength);
4605     req->inputBackupId = input_backupId;
4606   }
4607   else
4608     ssig.set(ss, TestOrd::TraceAPI, backupBlockNo, GSN_BACKUP_REQ,
4609 	     BackupReq::SignalLength - 1);
4610 
4611   req->senderData = 19;
4612   req->backupDataLen = 0;
4613   assert(waitCompleted < 3);
4614   req->flags = waitCompleted & 0x3;
4615   if(backuppoint == 1)
4616     req->flags |= BackupReq::USE_UNDO_LOG;
4617 
4618   int do_send = 1;
4619   while (1) {
4620     if (do_send)
4621     {
4622       if (ss.sendSignal(nodeId, &ssig) != SEND_OK) {
4623 	return SEND_OR_RECEIVE_FAILED;
4624       }
4625       if (waitCompleted == 0)
4626 	return 0;
4627       do_send = 0;
4628     }
4629     SimpleSignal *signal = ss.waitFor();
4630 
4631     int gsn = signal->readSignalNumber();
4632     switch (gsn) {
4633     case GSN_BACKUP_CONF:{
4634       const BackupConf * const conf =
4635 	CAST_CONSTPTR(BackupConf, signal->getDataPtr());
4636 #ifdef VM_TRACE
4637       ndbout_c("Backup(%d) master is %d", conf->backupId,
4638 	       refToNode(signal->header.theSendersBlockRef));
4639 #endif
4640       backupId = conf->backupId;
4641       if (waitCompleted == 1)
4642 	return 0;
4643       // wait for next signal
4644       break;
4645     }
4646     case GSN_BACKUP_COMPLETE_REP:{
4647       const BackupCompleteRep * const rep =
4648 	CAST_CONSTPTR(BackupCompleteRep, signal->getDataPtr());
4649 #ifdef VM_TRACE
4650       ndbout_c("Backup(%d) completed", rep->backupId);
4651 #endif
4652       backupId = rep->backupId;
4653       return 0;
4654     }
4655     case GSN_BACKUP_REF:{
4656       const BackupRef * const ref =
4657 	CAST_CONSTPTR(BackupRef, signal->getDataPtr());
4658       if(ref->errorCode == BackupRef::IAmNotMaster){
4659 	m_master_node = nodeId = refToNode(ref->masterRef);
4660 #ifdef VM_TRACE
4661 	ndbout_c("I'm not master resending to %d", nodeId);
4662 #endif
4663 	do_send = 1; // try again
4664 	if (!getNodeInfo(nodeId).m_alive)
4665 	  m_master_node = nodeId = 0;
4666 	continue;
4667       }
4668       return ref->errorCode;
4669     }
4670     case GSN_BACKUP_ABORT_REP:{
4671       const BackupAbortRep * const rep =
4672 	CAST_CONSTPTR(BackupAbortRep, signal->getDataPtr());
4673 #ifdef VM_TRACE
4674       ndbout_c("Backup %d aborted", rep->backupId);
4675 #endif
4676       return rep->reason;
4677     }
4678     case GSN_NF_COMPLETEREP:{
4679       const NFCompleteRep * const rep =
4680 	CAST_CONSTPTR(NFCompleteRep, signal->getDataPtr());
4681 #ifdef VM_TRACE
4682       ndbout_c("Node %d fail completed", rep->failedNodeId);
4683 #endif
4684       if (rep->failedNodeId == nodeId ||
4685 	  waitCompleted == 1)
4686 	return 1326;
4687       // wait for next signal
4688       // master node will report aborted backup
4689       break;
4690     }
4691     case GSN_NODE_FAILREP:{
4692       const NodeFailRep * const rep =
4693 	CAST_CONSTPTR(NodeFailRep, signal->getDataPtr());
4694       Uint32 len = NodeFailRep::getNodeMaskLength(signal->getLength());
4695       const Uint32* nbm;
4696        if (signal->header.m_noOfSections >= 1)
4697        {
4698          assert (len == 0);
4699          nbm = signal->ptr[0].p;
4700          len = signal->ptr[0].sz;
4701        }
4702        else
4703        {
4704          assert(len == NodeBitmask::Size); // only full length in ndbapi
4705          nbm = rep->theAllNodes;
4706        }
4707 
4708       if (BitmaskImpl::safe_get(len, nbm, nodeId) ||
4709 	  waitCompleted == 1)
4710 	return 1326;
4711       // wait for next signal
4712       // master node will report aborted backup
4713       break;
4714     }
4715     case GSN_API_REGCONF:
4716     case GSN_TAKE_OVERTCCONF:
4717     case GSN_CONNECT_REP:
4718       continue;
4719     default:
4720       report_unknown_signal(signal);
4721       return SEND_OR_RECEIVE_FAILED;
4722     }
4723   }
4724 }
4725 
4726 int
abortBackup(Uint32 backupId)4727 MgmtSrvr::abortBackup(Uint32 backupId)
4728 {
4729   SignalSender ss(theFacade);
4730   ss.lock(); // lock will be released on exit
4731 
4732   bool next;
4733   NodeId nodeId = 0;
4734   while((next = getNextNodeId(&nodeId, NDB_MGM_NODE_TYPE_NDB)) == true &&
4735 	getNodeInfo(nodeId).m_alive == false);
4736 
4737   if(!next){
4738     return NO_CONTACT_WITH_DB_NODES;
4739   }
4740 
4741   SimpleSignal ssig;
4742 
4743   AbortBackupOrd* ord = CAST_PTR(AbortBackupOrd, ssig.getDataPtrSend());
4744   /*
4745    * Single-threaded backup.  Set instance key 1.  In the kernel
4746    * this maps to main instance 0 or worker instance 1 (if MT LQH).
4747    */
4748   BlockNumber backupBlockNo = numberToBlock(BACKUP, 1);
4749   ssig.set(ss, TestOrd::TraceAPI, backupBlockNo, GSN_ABORT_BACKUP_ORD,
4750 	   AbortBackupOrd::SignalLength);
4751 
4752   ord->requestType = AbortBackupOrd::ClientAbort;
4753   ord->senderData = 19;
4754   ord->backupId = backupId;
4755 
4756   return ss.sendSignal(nodeId, &ssig) == SEND_OK ? 0 : SEND_OR_RECEIVE_FAILED;
4757 }
4758 
4759 
4760 int
setDbParameter(int node,int param,const char * value,BaseString & msg)4761 MgmtSrvr::setDbParameter(int node, int param, const char * value,
4762 			 BaseString& msg)
4763 {
4764 
4765   Guard g(m_local_config_mutex);
4766 
4767   /**
4768    * Check parameter
4769    */
4770   ConfigIter iter(m_local_config, CFG_SECTION_NODE);
4771   if(iter.first() != 0){
4772     msg.assign("Unable to find node section (iter.first())");
4773     return -1;
4774   }
4775 
4776   Uint32 type = NODE_TYPE_DB + 1;
4777   if(node != 0){
4778     // Set parameter only in the specified node
4779     if(iter.find(CFG_NODE_ID, node) != 0){
4780       msg.assign("Unable to find node (iter.find())");
4781       return -1;
4782     }
4783     if(iter.get(CFG_TYPE_OF_SECTION, &type) != 0){
4784       msg.assign("Unable to get node type(iter.get(CFG_TYPE_OF_SECTION))");
4785       return -1;
4786     }
4787   } else {
4788     // Set parameter in all DB nodes
4789     do {
4790       if(iter.get(CFG_TYPE_OF_SECTION, &type) != 0){
4791 	msg.assign("Unable to get node type(iter.get(CFG_TYPE_OF_SECTION))");
4792 	return -1;
4793       }
4794       if(type == NODE_TYPE_DB)
4795 	break;
4796     } while(iter.next() == 0);
4797   }
4798 
4799   if(type != NODE_TYPE_DB){
4800     msg.assfmt("Invalid node type or no such node (%d %d)",
4801 	       type, NODE_TYPE_DB);
4802     return -1;
4803   }
4804 
4805   int p_type;
4806   unsigned val_32;
4807   Uint64 val_64 = 0;
4808   const char * val_char;
4809   do {
4810     p_type = 0;
4811     if(iter.get(param, &val_32) == 0){
4812       val_32 = atoi(value);
4813       break;
4814     }
4815 
4816     p_type++;
4817     if(iter.get(param, &val_64) == 0){
4818       val_64 = my_strtoll(value, 0, 10);
4819       break;
4820     }
4821     p_type++;
4822     if(iter.get(param, &val_char) == 0){
4823       val_char = value;
4824       break;
4825     }
4826     msg.assign("Could not get parameter");
4827     return -1;
4828   } while(0);
4829 
4830   bool res = false;
4831   do {
4832     int ret = iter.get(CFG_TYPE_OF_SECTION, &type);
4833     assert(ret == 0);
4834 
4835     if(type != NODE_TYPE_DB)
4836       continue;
4837 
4838     Uint32 node;
4839     ret = iter.get(CFG_NODE_ID, &node);
4840     assert(ret == 0);
4841 
4842     ConfigValues::Iterator i2(m_local_config->m_configValues->m_config,
4843 			      iter.m_config);
4844     switch(p_type){
4845     case 0:
4846       res = i2.set(param, val_32);
4847       ndbout_c("Updating node %d param: %d to %d",  node, param, val_32);
4848       break;
4849     case 1:
4850       res = i2.set(param, val_64);
4851       ndbout_c("Updating node %d param: %d to %u",  node, param, val_32);
4852       break;
4853     case 2:
4854       res = i2.set(param, val_char);
4855       ndbout_c("Updating node %d param: %d to %s",  node, param, val_char);
4856       break;
4857     default:
4858       require(false);
4859     }
4860     require(res);
4861   } while(node == 0 && iter.next() == 0);
4862 
4863   msg.assign("Success");
4864   return 0;
4865 }
4866 
4867 
4868 int
setConnectionDbParameter(int node1,int node2,int param,int value,BaseString & msg)4869 MgmtSrvr::setConnectionDbParameter(int node1, int node2,
4870                                    int param, int value,
4871                                    BaseString& msg)
4872 {
4873   DBUG_ENTER("MgmtSrvr::setConnectionDbParameter");
4874   DBUG_PRINT("enter", ("node1: %d, node2: %d, param: %d, value: %d",
4875                        node1, node2, param, value));
4876 
4877   // This function only supports setting dynamic ports
4878   if (param != CFG_CONNECTION_SERVER_PORT)
4879   {
4880     msg.assign("Only param CFG_CONNECTION_SERVER_PORT can be set");
4881     DBUG_RETURN(-1);
4882   }
4883 
4884   if (!m_config_manager->set_dynamic_port(node1, node2, value, msg))
4885     DBUG_RETURN(-1);
4886 
4887   DBUG_PRINT("exit", ("Set parameter(%d) to %d for %d -> %d",
4888                       param, value, node1, node2));
4889   DBUG_RETURN(1);
4890 }
4891 
4892 
setDynamicPorts(int node,DynPortSpec ports[],unsigned num_ports,BaseString & msg)4893 bool MgmtSrvr::setDynamicPorts(int node, DynPortSpec ports[],
4894                                unsigned num_ports, BaseString& msg)
4895 {
4896   return m_config_manager->set_dynamic_ports(node, ports, num_ports, msg);
4897 }
4898 
4899 
4900 int
getConnectionDbParameter(int node1,int node2,int param,int * value,BaseString & msg)4901 MgmtSrvr::getConnectionDbParameter(int node1, int node2,
4902                                    int param, int *value,
4903                                    BaseString& msg)
4904 {
4905   DBUG_ENTER("MgmtSrvr::getConnectionDbParameter");
4906   DBUG_PRINT("enter", ("node1: %d, node2: %d, param: %d",
4907                        node1, node2, param));
4908 
4909   // This function only supports asking about dynamic ports
4910   if (param != CFG_CONNECTION_SERVER_PORT)
4911   {
4912     msg.assign("Only param CFG_CONNECTION_SERVER_PORT can be retrieved");
4913     DBUG_RETURN(-1);
4914   }
4915 
4916   if (!m_config_manager->get_dynamic_port(node1, node2, value, msg))
4917     DBUG_RETURN(-1);
4918 
4919   DBUG_PRINT("exit", ("Return parameter(%d): %u for %d -> %d, msg: %s",
4920                       param, *value, node1, node2, msg.c_str()));
4921   DBUG_RETURN(1);
4922 }
4923 
4924 
4925 bool
transporter_connect(NDB_SOCKET_TYPE sockfd,BaseString & msg,bool & close_with_reset)4926 MgmtSrvr::transporter_connect(NDB_SOCKET_TYPE sockfd,
4927                               BaseString& msg,
4928                               bool& close_with_reset)
4929 {
4930   DBUG_ENTER("MgmtSrvr::transporter_connect");
4931   TransporterRegistry* tr= theFacade->get_registry();
4932   bool dummy_log_failure = false;
4933   if (!tr->connect_server(sockfd, msg, close_with_reset, dummy_log_failure))
4934     DBUG_RETURN(false);
4935 
4936   /**
4937    * TransporterRegistry::update_connections() is responsible
4938    * for doing the final step of bringing the connection into
4939    * CONNECTED state when it detects it 'isConnected()'.
4940    * This is required due to all such state changes has to
4941    * be synchroniced with ::performReceive().
4942    * To speed up CONNECTED detection, we request it to
4943    * happen ASAP. (There is no guarantee when it happen though)
4944    */
4945   theFacade->request_connection_check();
4946   DBUG_RETURN(true);
4947 }
4948 
4949 
connect_to_self()4950 bool MgmtSrvr::connect_to_self()
4951 {
4952   BaseString buf;
4953   NdbMgmHandle mgm_handle= ndb_mgm_create_handle();
4954 
4955   buf.assfmt("%s:%u",
4956              m_opts.bind_address ? m_opts.bind_address : "localhost",
4957              m_port);
4958   ndb_mgm_set_connectstring(mgm_handle, buf.c_str());
4959 
4960   if(ndb_mgm_connect(mgm_handle, 0, 0, 0) < 0)
4961   {
4962     g_eventLogger->warning("%d %s",
4963                            ndb_mgm_get_latest_error(mgm_handle),
4964                            ndb_mgm_get_latest_error_desc(mgm_handle));
4965     ndb_mgm_destroy_handle(&mgm_handle);
4966     return false;
4967   }
4968   // TransporterRegistry now owns the handle and will destroy it.
4969   theFacade->get_registry()->set_mgm_handle(mgm_handle);
4970 
4971   return true;
4972 }
4973 
4974 
4975 bool
change_config(Config & new_config,BaseString & msg)4976 MgmtSrvr::change_config(Config& new_config, BaseString& msg)
4977 {
4978   SignalSender ss(theFacade);
4979   ss.lock();
4980 
4981   NodeBitmask mgm_nodes;
4982   {
4983     Guard g(m_local_config_mutex);
4984     m_local_config->get_nodemask(mgm_nodes, NDB_MGM_NODE_TYPE_MGM);
4985   }
4986 
4987   NodeId nodeId= ss.find_confirmed_node(mgm_nodes);
4988   if (nodeId == 0)
4989   {
4990     msg = "INTERNAL ERROR Could not find any mgmd!";
4991     return false;
4992   }
4993 
4994   bool v2;
4995   {
4996     const trp_node node = ss.getNodeInfo(nodeId);
4997     v2 = ndb_config_version_v2(node.m_info.m_version);
4998   }
4999   SimpleSignal ssig;
5000   UtilBuffer buf;
5001   UtilBuffer *buf_ptr = &buf;
5002   new_config.pack(buf, v2);
5003   ssig.ptr[0].p = (Uint32*)buf.get_data();
5004   ssig.ptr[0].sz = (buf.length() + 3) / 4;
5005   ssig.header.m_noOfSections = 1;
5006 
5007   ConfigChangeReq *req= CAST_PTR(ConfigChangeReq, ssig.getDataPtrSend());
5008   req->length = buf.length();
5009 
5010   if (ss.sendFragmentedSignal(nodeId, ssig,
5011                               MGM_CONFIG_MAN, GSN_CONFIG_CHANGE_REQ,
5012                               ConfigChangeReq::SignalLength) != 0)
5013   {
5014     msg.assfmt("Could not start configuration change, send to "
5015                "node %d failed", nodeId);
5016     return false;
5017   }
5018   mgm_nodes.clear(nodeId);
5019 
5020   bool done = false;
5021   while(!done)
5022   {
5023     SimpleSignal *signal= ss.waitFor();
5024 
5025     switch(signal->readSignalNumber()){
5026     case GSN_CONFIG_CHANGE_CONF:
5027       done= true;
5028       break;
5029     case GSN_CONFIG_CHANGE_REF:
5030     {
5031       const ConfigChangeRef * const ref =
5032         CAST_CONSTPTR(ConfigChangeRef, signal->getDataPtr());
5033       g_eventLogger->debug("Got CONFIG_CHANGE_REF, error: %d", ref->errorCode);
5034       switch(ref->errorCode)
5035       {
5036       case ConfigChangeRef::NotMaster:
5037       {
5038         // Retry with next node if any
5039         NodeId nodeId= ss.find_confirmed_node(mgm_nodes);
5040         if (nodeId == 0)
5041         {
5042           msg = "INTERNAL ERROR Could not find any mgmd!";
5043           return false;
5044         }
5045         {
5046           const trp_node node = ss.getNodeInfo(nodeId);
5047           bool v2_new = ndb_config_version_v2(node.m_info.m_version);
5048           if (v2 != v2_new)
5049           {
5050             /**
5051              * Free old buffer and create a new one.
5052              */
5053             delete buf_ptr;
5054             buf_ptr = new (buf_ptr) UtilBuffer;
5055             require(new_config.pack(buf, v2_new));
5056             v2 = v2_new;
5057           }
5058         }
5059         req->length = buf.length();
5060         ssig.ptr[0].p = (Uint32*)buf.get_data();
5061         ssig.ptr[0].sz = (buf.length() + 3) / 4;
5062         ssig.header.m_noOfSections = 1;
5063         if (ss.sendFragmentedSignal(nodeId, ssig,
5064                                     MGM_CONFIG_MAN, GSN_CONFIG_CHANGE_REQ,
5065                                     ConfigChangeReq::SignalLength) != 0)
5066         {
5067           msg.assfmt("Could not start configuration change, send to "
5068                      "node %d failed", nodeId);
5069           return false;
5070         }
5071         mgm_nodes.clear(nodeId);
5072         break;
5073       }
5074 
5075       default:
5076         msg = ConfigChangeRef::errorMessage(ref->errorCode);
5077         return false;
5078       }
5079 
5080       break;
5081     }
5082 
5083     case GSN_API_REGCONF:
5084     case GSN_TAKE_OVERTCCONF:
5085     case GSN_CONNECT_REP:
5086       // Ignore;
5087       break;
5088 
5089 
5090     case GSN_NODE_FAILREP:
5091       // ignore, NF_COMPLETEREP will come
5092       break;
5093 
5094     case GSN_NF_COMPLETEREP:
5095     {
5096       NodeId nodeId = refToNode(signal->header.theSendersBlockRef);
5097       msg.assign("Node %d failed during configuration change", nodeId);
5098       return false;
5099       break;
5100     }
5101 
5102     default:
5103       report_unknown_signal(signal);
5104       return false;
5105 
5106     }
5107   }
5108 
5109   g_eventLogger->info("Config change completed");
5110 
5111   return true;
5112 }
5113 
5114 
5115 void
print_config(const char * section_filter,NodeId nodeid_filter,const char * param_filter,NdbOut & out)5116 MgmtSrvr::print_config(const char* section_filter, NodeId nodeid_filter,
5117                        const char* param_filter,
5118                        NdbOut& out)
5119 {
5120   Guard g(m_local_config_mutex);
5121   m_local_config->print(section_filter, nodeid_filter,
5122                         param_filter, out);
5123 }
5124 
5125 
5126 bool
reload_config(const char * config_filename,bool mycnf,BaseString & msg)5127 MgmtSrvr::reload_config(const char* config_filename, bool mycnf,
5128                         BaseString& msg)
5129 {
5130   if (config_filename && mycnf)
5131   {
5132     msg = "ERROR: Both mycnf and config_filename is not supported";
5133     return false;
5134   }
5135 
5136   if (config_filename)
5137   {
5138     if (m_opts.mycnf)
5139     {
5140       msg.assfmt("ERROR: Can't switch to use config.ini '%s' when "
5141                  "node was started from my.cnf", config_filename);
5142       return false;
5143     }
5144   }
5145   else
5146   {
5147     if (mycnf)
5148     {
5149       // Reload from my.cnf
5150       if (!m_opts.mycnf)
5151       {
5152         if (m_opts.config_filename)
5153         {
5154           msg.assfmt("ERROR: Can't switch to use my.cnf when "
5155                      "node was started from '%s'", m_opts.config_filename);
5156           return false;
5157         }
5158       }
5159     }
5160     else
5161     {
5162       /* No config file name supplied and not told to use mycnf */
5163       if (m_opts.config_filename)
5164       {
5165         g_eventLogger->info("No config file name supplied, using '%s'",
5166                             m_opts.config_filename);
5167         config_filename = m_opts.config_filename;
5168       }
5169       else
5170       {
5171         msg = "ERROR: Neither config file name or mycnf available";
5172         return false;
5173       }
5174     }
5175   }
5176 
5177   Config* new_conf_ptr;
5178   if ((new_conf_ptr= ConfigManager::load_config(config_filename,
5179                                                 mycnf, msg)) == NULL)
5180     return false;
5181   Config new_conf(new_conf_ptr);
5182 
5183   {
5184     Guard g(m_local_config_mutex);
5185 
5186     /* Copy the necessary values from old to new config */
5187     if (!new_conf.setGeneration(m_local_config->getGeneration()) ||
5188         !new_conf.setName(m_local_config->getName()) ||
5189         !new_conf.setPrimaryMgmNode(m_local_config->getPrimaryMgmNode()))
5190     {
5191       msg = "Failed to initialize reloaded config";
5192       return false;
5193     }
5194   }
5195 
5196   if (!change_config(new_conf, msg))
5197     return false;
5198   return true;
5199 }
5200 
5201 void
show_variables(NdbOut & out)5202 MgmtSrvr::show_variables(NdbOut& out)
5203 {
5204   out << "daemon: " << yes_no(m_opts.daemon) << endl;
5205   out << "non_interactive: " << yes_no(m_opts.non_interactive) << endl;
5206   out << "interactive: " << yes_no(m_opts.interactive) << endl;
5207   out << "config_filename: " << str_null(m_opts.config_filename) << endl;
5208   out << "mycnf: " << yes_no(m_opts.mycnf) << endl;
5209   out << "bind_address: " << str_null(m_opts.bind_address) << endl;
5210   out << "no_nodeid_checks: " << yes_no(m_opts.no_nodeid_checks) << endl;
5211   out << "print_full_config: " << yes_no(m_opts.print_full_config) << endl;
5212   out << "configdir: " << str_null(m_opts.configdir) << endl;
5213   out << "config_cache: " << yes_no(m_opts.config_cache) << endl;
5214   out << "verbose: " << yes_no(m_opts.verbose) << endl;
5215   out << "reload: " << yes_no(m_opts.reload) << endl;
5216 
5217   out << "nodeid: " << _ownNodeId << endl;
5218   out << "blocknumber: " << hex <<_blockNumber << endl;
5219   out << "own_reference: " << hex << _ownReference << endl;
5220   out << "port: " << m_port << endl;
5221   out << "need_restart: " << m_need_restart << endl;
5222   out << "is_stop_thread: " << _isStopThread << endl;
5223   out << "log_level_thread_sleep: " << _logLevelThreadSleep << endl;
5224   out << "master_node: " << m_master_node << endl;
5225 }
5226 
5227 void
make_sync_req(SignalSender & ss,Uint32 nodeId)5228 MgmtSrvr::make_sync_req(SignalSender& ss, Uint32 nodeId)
5229 {
5230   /**
5231    * This subroutine is used to make a async request(error insert/dump)
5232    *   "more" syncronous, i.e increasing the likelyhood that
5233    *   the async request has really reached the destination
5234    *   before returning to the api
5235    *
5236    * I.e it's a work-around...
5237    *
5238    */
5239   SimpleSignal ssig;
5240   SyncReq* req = CAST_PTR(SyncReq, ssig.getDataPtrSend());
5241   req->senderRef = ss.getOwnRef();
5242   req->senderData = 12;
5243   req->prio = 1; // prio b
5244   ssig.set(ss,TestOrd::TraceAPI, CMVMI, GSN_SYNC_REQ, SyncReq::SignalLength);
5245 
5246   if (ss.sendSignal(nodeId, &ssig) != SEND_OK)
5247   {
5248     return;
5249   }
5250 
5251   while (true)
5252   {
5253     SimpleSignal *signal = ss.waitFor();
5254 
5255     int gsn = signal->readSignalNumber();
5256     switch (gsn) {
5257     case GSN_SYNC_REF:
5258     case GSN_SYNC_CONF:
5259       return;
5260 
5261     case GSN_NF_COMPLETEREP:{
5262       const NFCompleteRep * const rep =
5263         CAST_CONSTPTR(NFCompleteRep, signal->getDataPtr());
5264       if (rep->failedNodeId == nodeId)
5265         return;
5266       break;
5267     }
5268 
5269     case GSN_NODE_FAILREP:{
5270       const NodeFailRep * const rep =
5271 	CAST_CONSTPTR(NodeFailRep, signal->getDataPtr());
5272       Uint32 len = NodeFailRep::getNodeMaskLength(signal->getLength());
5273       const Uint32* nbm;
5274        if (signal->header.m_noOfSections >= 1)
5275        {
5276          assert (len == 0);
5277          nbm = signal->ptr[0].p;
5278          len = signal->ptr[0].sz;
5279        }
5280        else
5281        {
5282          assert(len == NodeBitmask::Size); // only full length in ndbapi
5283          nbm = rep->theAllNodes;
5284        }
5285 
5286       if (BitmaskImpl::safe_get(len, nbm, nodeId))
5287 	return;
5288       break;
5289     }
5290     case GSN_API_REGCONF:
5291     case GSN_TAKE_OVERTCCONF:
5292     case GSN_CONNECT_REP:
5293       break;
5294     default:
5295       return;
5296     }
5297   }
5298 }
5299 
5300 
5301 bool
request_events(NdbNodeBitmask nodes,Uint32 reports_per_node,Uint32 dump_type,Vector<SimpleSignal> & events)5302 MgmtSrvr::request_events(NdbNodeBitmask nodes, Uint32 reports_per_node,
5303                          Uint32 dump_type,
5304                          Vector<SimpleSignal>& events)
5305 {
5306   int nodes_counter[MAX_NDB_NODES];
5307 #ifndef NDEBUG
5308   NdbNodeBitmask save = nodes;
5309 #endif
5310   SignalSender ss(theFacade);
5311   ss.lock();
5312 
5313   // Send the dump command to all requested NDB nodes
5314   const bool all = nodes.isclear();
5315   for (int i = 1; i < MAX_NDB_NODES; i++)
5316   {
5317     // Check if node should be involved
5318     if (!all && !nodes.get(i))
5319       continue;
5320 
5321     // Only request from confirmed DB nodes
5322     const trp_node node = ss.getNodeInfo(i);
5323     if (node.m_info.getType() != NodeInfo::DB ||
5324         !node.is_confirmed())
5325     {
5326       nodes.clear(i);
5327       continue;
5328     }
5329 
5330     SimpleSignal ssig;
5331     DumpStateOrd * const dumpOrd = (DumpStateOrd*)ssig.getDataPtrSend();
5332 
5333     dumpOrd->args[0] = dump_type;
5334     dumpOrd->args[1] = ss.getOwnRef(); // Return to sender
5335 
5336     if (ss.sendSignal(i, ssig, CMVMI, GSN_DUMP_STATE_ORD, 2) == SEND_OK)
5337     {
5338       nodes.set(i);
5339       nodes_counter[i] = (int)reports_per_node;
5340     }
5341   }
5342 
5343 
5344   while (true)
5345   {
5346     // Check if all nodes are done
5347     if (nodes.isclear())
5348       break;
5349 
5350     SimpleSignal *signal = ss.waitFor();
5351     switch (signal->readSignalNumber()) {
5352     case GSN_EVENT_REP:{
5353       /**
5354        * This EVENT_REP receives all infoEvent and eventLog messages that
5355        * ARE generated through a DUMP command.
5356        */
5357       const NodeId nodeid = refToNode(signal->header.theSendersBlockRef);
5358       const EventReport * const event =
5359         (const EventReport*)signal->getDataPtr();
5360 
5361       if (!nodes.get(nodeid))
5362       {
5363         // The reporting node was not expected
5364 #ifndef NDEBUG
5365         ndbout_c("nodeid: %u", nodeid);
5366         ndbout_c("save: %s", BaseString::getPrettyText(save).c_str());
5367 #endif
5368         assert(false);
5369         return false;
5370       }
5371 
5372       if (event->getEventType() == NDB_LE_SavedEvent &&
5373           signal->getDataPtr()[1] == 0)
5374       {
5375         nodes_counter[nodeid] = 1;
5376       }
5377       else
5378       {
5379         // Save signal
5380         events.push_back(SimpleSignal(*signal));
5381       }
5382 
5383       // Check if node is done
5384       nodes_counter[nodeid]--;
5385       if (nodes_counter[nodeid] == 0)
5386         nodes.clear(nodeid);
5387 
5388       break;
5389     }
5390 
5391     case GSN_NODE_FAILREP:{
5392       const NodeFailRep * const rep =
5393         (const NodeFailRep*)signal->getDataPtr();
5394       const Uint32* theNodes = NULL;
5395       if (signal->header.m_noOfSections >= 1)
5396       {
5397         theNodes = signal->ptr[0].p;
5398       }
5399       else
5400       {
5401         theNodes = rep->theNodes;
5402       }
5403       // only care about data-nodes
5404       for (NodeId i = 1; i < MAX_NDB_NODES; i++)
5405       {
5406         if (NdbNodeBitmask::get(theNodes, i))
5407         {
5408           nodes.clear(i);
5409 
5410           // Remove any previous reports from this node
5411           // it should not be reported
5412           for (unsigned j = 0; j < events.size(); j++)
5413           {
5414             const SimpleSignal& ssig = events[j];
5415             const NodeId nodeid = refToNode(ssig.header.theSendersBlockRef);
5416             if (nodeid == i)
5417             {
5418               events.erase(j);
5419               j--;
5420             }
5421           }
5422         }
5423       }
5424       break;
5425     }
5426 
5427     default:
5428       // Ignore all other signals
5429       break;
5430     }
5431   }
5432   ss.unlock();
5433 
5434   return true;
5435 }
5436 
5437 template class MutexVector<NodeId>;
5438 template class MutexVector<Ndb_mgmd_event_service::Event_listener>;
5439 template class Vector<EventSubscribeReq>;
5440 template class MutexVector<EventSubscribeReq>;
5441 template class Vector< Vector<BaseString> >;
5442 template class Vector<MgmtSrvr::PossibleNode>;
5443 template class Vector<Defragger::DefragBuffer*>;
5444