1 /*
2 Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License, version 2.0,
6 as published by the Free Software Foundation.
7
8 This program is also distributed with certain software (including
9 but not limited to OpenSSL) that is licensed under separate terms,
10 as designated in a particular file or component or in included license
11 documentation. The authors of MySQL hereby grant you an additional
12 permission to link the program and your derivative works with the
13 separately licensed software that they have included with MySQL.
14
15 This program is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License, version 2.0, for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with this program; if not, write to the Free Software
22 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
23 */
24
25 #include <ndb_global.h>
26
27 #include "MgmtSrvr.hpp"
28 #include "ndb_mgmd_error.h"
29 #include "Services.hpp"
30 #include "ConfigManager.hpp"
31 #include "Defragger.hpp"
32
33 #include <NdbOut.hpp>
34 #include <NdbApiSignal.hpp>
35 #include <kernel_types.h>
36 #include <GlobalSignalNumbers.h>
37 #include <signaldata/TestOrd.hpp>
38 #include <signaldata/TamperOrd.hpp>
39 #include <signaldata/StartOrd.hpp>
40 #include <signaldata/ApiVersion.hpp>
41 #include <signaldata/ResumeReq.hpp>
42 #include <signaldata/SetLogLevelOrd.hpp>
43 #include <signaldata/EventSubscribeReq.hpp>
44 #include <signaldata/EventReport.hpp>
45 #include <signaldata/DumpStateOrd.hpp>
46 #include <signaldata/BackupSignalData.hpp>
47 #include <signaldata/NFCompleteRep.hpp>
48 #include <signaldata/NodeFailRep.hpp>
49 #include <signaldata/AllocNodeId.hpp>
50 #include <signaldata/SchemaTrans.hpp>
51 #include <signaldata/CreateNodegroup.hpp>
52 #include <signaldata/DropNodegroup.hpp>
53 #include <signaldata/Sync.hpp>
54 #include <signaldata/GetConfig.hpp>
55 #include <NdbSleep.h>
56 #include <portlib/NdbDir.hpp>
57 #include <EventLogger.hpp>
58 #include <DebuggerNames.hpp>
59 #include <ndb_version.h>
60
61 #include <SocketServer.hpp>
62 #include <NdbConfig.h>
63
64 #include <NdbAutoPtr.hpp>
65 #include <NdbDir.hpp>
66 #include <ndberror.h>
67
68 #include <mgmapi.h>
69 #include <mgmapi_configuration.hpp>
70 #include <mgmapi_config_parameters.h>
71
72 #include <SignalSender.hpp>
73
74 int g_errorInsert;
75 #define ERROR_INSERTED(x) (g_errorInsert == x)
76
77 #define INIT_SIGNAL_SENDER(ss,nodeId) \
78 SignalSender ss(theFacade); \
79 ss.lock(); /* lock will be released on exit */ \
80 {\
81 int result = okToSendTo(nodeId, true);\
82 if (result != 0) {\
83 return result;\
84 }\
85 }
86
87 extern "C" my_bool opt_core;
88
89 void *
logLevelThread_C(void * m)90 MgmtSrvr::logLevelThread_C(void* m)
91 {
92 MgmtSrvr *mgm = (MgmtSrvr*)m;
93 mgm->logLevelThreadRun();
94 return 0;
95 }
96
97 extern EventLogger * g_eventLogger;
98
99 #ifdef NOT_USED
100 static NdbOut&
operator <<(NdbOut & out,const LogLevel & ll)101 operator<<(NdbOut& out, const LogLevel & ll)
102 {
103 out << "[LogLevel: ";
104 for(size_t i = 0; i<LogLevel::LOGLEVEL_CATEGORIES; i++)
105 out << ll.getLogLevel((LogLevel::EventCategory)i) << " ";
106 out << "]";
107 return out;
108 }
109 #endif
110
111 void
logLevelThreadRun()112 MgmtSrvr::logLevelThreadRun()
113 {
114 while (!_isStopThread)
115 {
116 Vector<NodeId> failed_started_nodes;
117 Vector<EventSubscribeReq> failed_log_level_requests;
118
119 /**
120 * Handle started nodes
121 */
122 m_started_nodes.lock();
123 if (m_started_nodes.size() > 0)
124 {
125 // calculate max log level
126 EventSubscribeReq req;
127 {
128 LogLevel tmp;
129 m_event_listner.lock();
130 for(int i = m_event_listner.m_clients.size() - 1; i >= 0; i--)
131 tmp.set_max(m_event_listner[i].m_logLevel);
132 m_event_listner.unlock();
133 req.assign(tmp);
134 }
135 req.blockRef = _ownReference;
136 while (m_started_nodes.size() > 0)
137 {
138 Uint32 node = m_started_nodes[0];
139 m_started_nodes.erase(0, false);
140 m_started_nodes.unlock();
141
142 if (setEventReportingLevelImpl(node, req))
143 {
144 failed_started_nodes.push_back(node);
145 }
146 else
147 {
148 SetLogLevelOrd ord;
149 ord.assign(m_nodeLogLevel[node]);
150 setNodeLogLevelImpl(node, ord);
151 }
152 m_started_nodes.lock();
153 }
154 }
155 m_started_nodes.unlock();
156
157 m_log_level_requests.lock();
158 while (m_log_level_requests.size() > 0)
159 {
160 EventSubscribeReq req = m_log_level_requests[0];
161 m_log_level_requests.erase(0, false);
162 m_log_level_requests.unlock();
163
164 if(req.blockRef == 0)
165 {
166 req.blockRef = _ownReference;
167 if (setEventReportingLevelImpl(0, req))
168 {
169 failed_log_level_requests.push_back(req);
170 }
171 }
172 else
173 {
174 SetLogLevelOrd ord;
175 ord.assign(req);
176 if (setNodeLogLevelImpl(req.blockRef, ord))
177 {
178 failed_log_level_requests.push_back(req);
179 }
180 }
181 m_log_level_requests.lock();
182 }
183 m_log_level_requests.unlock();
184
185 if(!ERROR_INSERTED(10000))
186 m_event_listner.check_listeners();
187
188 Uint32 sleeptime = _logLevelThreadSleep;
189 if (failed_started_nodes.size())
190 {
191 m_started_nodes.lock();
192 for (Uint32 i = 0; i<failed_started_nodes.size(); i++)
193 m_started_nodes.push_back(failed_started_nodes[i], false);
194 m_started_nodes.unlock();
195 failed_started_nodes.clear();
196 sleeptime = 100;
197 }
198
199 if (failed_log_level_requests.size())
200 {
201 m_log_level_requests.lock();
202 for (Uint32 i = 0; i<failed_log_level_requests.size(); i++)
203 m_log_level_requests.push_back(failed_log_level_requests[i], false);
204 m_log_level_requests.unlock();
205 failed_log_level_requests.clear();
206 sleeptime = 100;
207 }
208
209 NdbSleep_MilliSleep(sleeptime);
210 }
211 }
212
213
214 static int
translateStopRef(Uint32 errCode)215 translateStopRef(Uint32 errCode)
216 {
217 switch(errCode){
218 case StopRef::NodeShutdownInProgress:
219 return NODE_SHUTDOWN_IN_PROGESS;
220 break;
221 case StopRef::SystemShutdownInProgress:
222 return SYSTEM_SHUTDOWN_IN_PROGRESS;
223 break;
224 case StopRef::NodeShutdownWouldCauseSystemCrash:
225 return NODE_SHUTDOWN_WOULD_CAUSE_SYSTEM_CRASH;
226 break;
227 case StopRef::UnsupportedNodeShutdown:
228 return UNSUPPORTED_NODE_SHUTDOWN;
229 break;
230 }
231 return 4999;
232 }
233
234
MgmtSrvr(const MgmtOpts & opts)235 MgmtSrvr::MgmtSrvr(const MgmtOpts& opts) :
236 m_opts(opts),
237 _blockNumber(-1),
238 _ownNodeId(0),
239 m_port(0),
240 m_local_config(NULL),
241 _ownReference(0),
242 m_config_manager(NULL),
243 m_need_restart(false),
244 theFacade(NULL),
245 _isStopThread(false),
246 _logLevelThreadSleep(500),
247 m_event_listner(this),
248 m_master_node(0),
249 _logLevelThread(NULL),
250 m_version_string(ndbGetOwnVersionString())
251 {
252 DBUG_ENTER("MgmtSrvr::MgmtSrvr");
253
254 m_local_config_mutex= NdbMutex_Create();
255 m_node_id_mutex = NdbMutex_Create();
256 if (!m_local_config_mutex || !m_node_id_mutex)
257 {
258 g_eventLogger->error("Failed to create MgmtSrvr mutexes");
259 require(false);
260 }
261
262 /* Init node arrays */
263 for(Uint32 i = 0; i<MAX_NODES; i++) {
264 nodeTypes[i] = (enum ndb_mgm_node_type)-1;
265 m_connect_address[i].s_addr= 0;
266 }
267
268 /* Setup clusterlog as client[0] in m_event_listner */
269 {
270 Ndb_mgmd_event_service::Event_listener se;
271 my_socket_invalidate(&(se.m_socket));
272 for(size_t t = 0; t<LogLevel::LOGLEVEL_CATEGORIES; t++){
273 se.m_logLevel.setLogLevel((LogLevel::EventCategory)t, 7);
274 }
275 se.m_logLevel.setLogLevel(LogLevel::llError, 15);
276 se.m_logLevel.setLogLevel(LogLevel::llConnection, 8);
277 se.m_logLevel.setLogLevel(LogLevel::llBackup, 15);
278 m_event_listner.m_clients.push_back(se);
279 m_event_listner.m_logLevel = se.m_logLevel;
280 }
281
282 DBUG_VOID_RETURN;
283 }
284
285
286 /*
287 check_configdir
288
289 Make sure configdir exist and try to create it if not
290
291 */
292
293 const char*
check_configdir() const294 MgmtSrvr::check_configdir() const
295 {
296 if (m_opts.configdir &&
297 strcmp(m_opts.configdir, MYSQLCLUSTERDIR) != 0)
298 {
299 // Specified on commmand line
300 if (access(m_opts.configdir, F_OK))
301 {
302 g_eventLogger->error("Directory '%s' specified with --configdir " \
303 "does not exist. Either create it or pass " \
304 "the path to an already existing directory.",
305 m_opts.configdir);
306 return NULL;
307 }
308 return m_opts.configdir;
309 }
310 else
311 {
312 // Compiled in path MYSQLCLUSTERDIR
313 if (access(MYSQLCLUSTERDIR, F_OK))
314 {
315 g_eventLogger->info("The default config directory '%s' " \
316 "does not exist. Trying to create it...",
317 MYSQLCLUSTERDIR);
318
319 if (!NdbDir::create(MYSQLCLUSTERDIR) ||
320 access(MYSQLCLUSTERDIR, F_OK))
321 {
322 g_eventLogger->error("Could not create directory '%s'. " \
323 "Either create it manually or " \
324 "specify a different directory with " \
325 "--configdir=<path>",
326 MYSQLCLUSTERDIR);
327 return NULL;
328 }
329
330 g_eventLogger->info("Sucessfully created config directory");
331 }
332 return MYSQLCLUSTERDIR;
333 }
334 }
335
336
337 bool
init()338 MgmtSrvr::init()
339 {
340 DBUG_ENTER("MgmtSrvr::init");
341
342 const char* configdir;
343 if (!(configdir= check_configdir()))
344 DBUG_RETURN(false);
345
346 if (!(m_config_manager= new ConfigManager(m_opts, configdir)))
347 {
348 g_eventLogger->error("Failed to create ConfigManager");
349 DBUG_RETURN(false);
350 }
351
352 if (m_config_manager->add_config_change_subscriber(this) < 0)
353 {
354 g_eventLogger->error("Failed to add MgmtSrvr as config change subscriber");
355 DBUG_RETURN(false);
356 }
357
358 if (!m_config_manager->init())
359 {
360 DBUG_RETURN(false);
361 }
362
363 /* 'config_changed' should have been called from 'init' */
364 require(m_local_config != 0);
365
366 if (m_opts.print_full_config)
367 {
368 print_config();
369 DBUG_RETURN(false);
370 }
371
372 assert(_ownNodeId);
373
374 /* Reserve the node id with ourself */
375 NodeId nodeId= _ownNodeId;
376 int error_code;
377 BaseString error_string;
378 if (!alloc_node_id(&nodeId, NDB_MGM_NODE_TYPE_MGM,
379 0, 0, /* client_addr, len */
380 error_code, error_string,
381 0 /* log_event */ ))
382 {
383 g_eventLogger->error("INTERNAL ERROR: Could not allocate nodeid: %d, " \
384 "error: %d, '%s'",
385 _ownNodeId, error_code, error_string.c_str());
386 DBUG_RETURN(false);
387 }
388
389 if (nodeId != _ownNodeId)
390 {
391 g_eventLogger->error("INTERNAL ERROR: Nodeid %d allocated " \
392 "when %d was requested",
393 nodeId, _ownNodeId);
394 DBUG_RETURN(false);
395 }
396
397 DBUG_RETURN(true);
398 }
399
400
401 bool
start_transporter(const Config * config)402 MgmtSrvr::start_transporter(const Config* config)
403 {
404 DBUG_ENTER("MgmtSrvr::start_transporter");
405
406 theFacade= new TransporterFacade(0);
407 if (theFacade == 0)
408 {
409 g_eventLogger->error("Could not create TransporterFacade.");
410 DBUG_RETURN(false);
411 }
412
413 assert(_blockNumber == -1); // Blocknumber shouldn't been allocated yet
414
415 /*
416 Register ourself at TransporterFacade to be able to receive signals
417 and to be notified when a database process has died.
418 */
419 Uint32 res;
420 if ((res = open(theFacade)) == 0)
421 {
422 g_eventLogger->error("Failed to open block in TransporterFacade");
423 theFacade->stop_instance();
424 delete theFacade;
425 theFacade = 0;
426 DBUG_RETURN(false);
427 }
428 _blockNumber = refToBlock(res);
429
430 /**
431 * Need to call ->open() prior to actually starting TF
432 */
433 m_config_manager->set_facade(theFacade);
434
435 if (theFacade->start_instance(_ownNodeId,
436 config->m_configValues) < 0)
437 {
438 g_eventLogger->error("Failed to start transporter");
439 delete theFacade;
440 theFacade = 0;
441 DBUG_RETURN(false);
442 }
443
444 _ownReference = numberToRef(_blockNumber, _ownNodeId);
445
446 /*
447 set api reg req frequency quite high:
448
449 100 ms interval to make sure we have fairly up-to-date
450 info from the nodes. This to make sure that this info
451 is not dependent on heartbeat settings in the
452 configuration
453 */
454 theFacade->ext_set_max_api_reg_req_interval(100);
455
456 DBUG_RETURN(true);
457 }
458
459
460 bool
start_mgm_service(const Config * config)461 MgmtSrvr::start_mgm_service(const Config* config)
462 {
463 DBUG_ENTER("MgmtSrvr::start_mgm_service");
464
465 assert(m_port == 0);
466 {
467 // Find the portnumber to use for mgm service
468 ConfigIter iter(config, CFG_SECTION_NODE);
469
470 if(iter.find(CFG_NODE_ID, _ownNodeId) != 0){
471 g_eventLogger->error("Could not find node %d in config", _ownNodeId);
472 DBUG_RETURN(false);
473 }
474
475 unsigned type;
476 if(iter.get(CFG_TYPE_OF_SECTION, &type) != 0 ||
477 type != NODE_TYPE_MGM){
478 g_eventLogger->error("Node %d is not defined as management server",
479 _ownNodeId);
480 DBUG_RETURN(false);
481 }
482
483 if(iter.get(CFG_MGM_PORT, &m_port) != 0){
484 g_eventLogger->error("PortNumber not defined for node %d", _ownNodeId);
485 DBUG_RETURN(false);
486 }
487 }
488
489 unsigned short port= m_port;
490 DBUG_PRINT("info", ("Using port %d", port));
491 if (port == 0)
492 {
493 g_eventLogger->error("Could not find out which port to use"\
494 " for management service");
495 DBUG_RETURN(false);
496 }
497
498 {
499 int count= 5; // no of retries for tryBind
500 while(!m_socket_server.tryBind(port, m_opts.bind_address))
501 {
502 if (--count > 0)
503 {
504 NdbSleep_SecSleep(1);
505 continue;
506 }
507 g_eventLogger->error("Unable to bind management service port: %s:%d!\n"
508 "Please check if the port is already used,\n"
509 "(perhaps a ndb_mgmd is already running),\n"
510 "and if you are executing on the correct computer",
511 (m_opts.bind_address ? m_opts.bind_address : "*"),
512 port);
513 DBUG_RETURN(false);
514 }
515 }
516
517 {
518 MgmApiService * mapi = new MgmApiService(*this);
519 if (mapi == NULL)
520 {
521 g_eventLogger->error("Could not allocate MgmApiService");
522 DBUG_RETURN(false);
523 }
524
525 if(!m_socket_server.setup(mapi, &port, m_opts.bind_address))
526 {
527 delete mapi; // Will be deleted by SocketServer in all other cases
528 g_eventLogger->error("Unable to setup management service port: %s:%d!\n"
529 "Please check if the port is already used,\n"
530 "(perhaps a ndb_mgmd is already running),\n"
531 "and if you are executing on the correct computer",
532 (m_opts.bind_address ? m_opts.bind_address : "*"),
533 port);
534 DBUG_RETURN(false);
535 }
536
537 if (port != m_port)
538 {
539 g_eventLogger->error("Couldn't start management service on the "\
540 "requested port: %d. Got port: %d instead",
541 m_port, port);
542 DBUG_RETURN(false);
543 }
544 }
545
546 m_socket_server.startServer();
547
548 g_eventLogger->info("Id: %d, Command port: %s:%d",
549 _ownNodeId,
550 m_opts.bind_address ? m_opts.bind_address : "*",
551 port);
552 DBUG_RETURN(true);
553 }
554
555
556 bool
start()557 MgmtSrvr::start()
558 {
559 DBUG_ENTER("MgmtSrvr::start");
560
561 Guard g(m_local_config_mutex);
562
563 /* Start transporter */
564 if(!start_transporter(m_local_config))
565 {
566 g_eventLogger->error("Failed to start transporter!");
567 DBUG_RETURN(false);
568 }
569
570 /* Start mgm service */
571 if (!start_mgm_service(m_local_config))
572 {
573 g_eventLogger->error("Failed to start mangement service!");
574 DBUG_RETURN(false);
575 }
576
577 /* Use local MGM port for TransporterRegistry */
578 if(!connect_to_self())
579 {
580 g_eventLogger->error("Failed to connect to ourself!");
581 DBUG_RETURN(false);
582 }
583
584 /* Start config manager */
585 if (!m_config_manager->start())
586 {
587 g_eventLogger->error("Failed to start ConfigManager");
588 DBUG_RETURN(false);
589 }
590
591 /* Loglevel thread */
592 assert(_isStopThread == false);
593 _logLevelThread = NdbThread_Create(logLevelThread_C,
594 (void**)this,
595 0, // default stack size
596 "MgmtSrvr_Loglevel",
597 NDB_THREAD_PRIO_LOW);
598
599 DBUG_RETURN(true);
600 }
601
602
603 void
setClusterLog(const Config * config)604 MgmtSrvr::setClusterLog(const Config* config)
605 {
606 DBUG_ASSERT(_ownNodeId);
607
608 ConfigIter iter(config, CFG_SECTION_NODE);
609 require(iter.find(CFG_NODE_ID, _ownNodeId) == 0);
610
611 // Update DataDir from config
612 const char *datadir;
613 require(iter.get(CFG_NODE_DATADIR, &datadir) == 0);
614 NdbConfig_SetPath(datadir);
615
616 if (NdbDir::chdir(NdbConfig_get_path(NULL)) != 0)
617 {
618 g_eventLogger->warning("Cannot change directory to '%s', error: %d",
619 NdbConfig_get_path(NULL), errno);
620 // Ignore error
621 }
622
623 // Get log destination from config
624 BaseString logdest;
625 const char *value;
626 if(iter.get(CFG_LOG_DESTINATION, &value) == 0){
627 logdest.assign(value);
628 }
629
630 bool logdest_configured = true;
631 if(logdest.length() == 0 || logdest == "") {
632 // No LogDestination set, use default settings
633 char *clusterLog= NdbConfig_ClusterLogFileName(_ownNodeId);
634 logdest.assfmt("FILE:filename=%s,maxsize=1000000,maxfiles=6",
635 clusterLog);
636 free(clusterLog);
637 logdest_configured = false;
638 }
639
640 g_eventLogger->close();
641
642 int err= 0;
643 char errStr[100]= {0};
644 if(!g_eventLogger->addHandler(logdest, &err, sizeof(errStr), errStr)) {
645 ndbout << "Warning: could not add log destination '"
646 << logdest.c_str() << "'. Reason: ";
647 if(err)
648 ndbout << strerror(err);
649 if(err && errStr[0]!='\0')
650 ndbout << ", ";
651 if(errStr[0]!='\0')
652 ndbout << errStr;
653 ndbout << endl;
654 }
655
656 if (logdest_configured == false &&
657 m_opts.non_interactive)
658 {
659 g_eventLogger->createConsoleHandler();
660 }
661
662 #ifdef _WIN32
663 /* Output to Windows event log */
664 g_eventLogger->createEventLogHandler("MySQL Cluster Management Server");
665 #endif
666
667 if (m_opts.verbose)
668 g_eventLogger->enable(Logger::LL_DEBUG);
669 }
670
671
672 void
config_changed(NodeId node_id,const Config * new_config)673 MgmtSrvr::config_changed(NodeId node_id, const Config* new_config)
674 {
675 DBUG_ENTER("MgmtSrvr::config_changed");
676
677 Guard g(m_local_config_mutex);
678
679 // Don't allow nodeid to change, once it's been set
680 require(_ownNodeId == 0 || _ownNodeId == node_id);
681
682 _ownNodeId= node_id;
683
684 if (m_local_config)
685 delete m_local_config;
686
687 m_local_config= new Config(new_config); // Copy
688 require(m_local_config != 0);
689
690 /* Rebuild node arrays */
691 ConfigIter iter(m_local_config, CFG_SECTION_NODE);
692 for(Uint32 i = 0; i<MAX_NODES; i++) {
693
694 m_connect_address[i].s_addr= 0;
695
696 if (iter.first())
697 continue;
698
699 if (iter.find(CFG_NODE_ID, i) == 0){
700 unsigned type;
701 require(iter.get(CFG_TYPE_OF_SECTION, &type) == 0);
702
703 switch(type){
704 case NODE_TYPE_DB:
705 nodeTypes[i] = NDB_MGM_NODE_TYPE_NDB;
706 break;
707 case NODE_TYPE_API:
708 nodeTypes[i] = NDB_MGM_NODE_TYPE_API;
709 break;
710 case NODE_TYPE_MGM:
711 nodeTypes[i] = NDB_MGM_NODE_TYPE_MGM;
712 break;
713 default:
714 break;
715 }
716 }
717 else
718 {
719 nodeTypes[i] = (enum ndb_mgm_node_type)-1;
720 }
721
722 }
723
724 // Setup cluster log
725 setClusterLog(m_local_config);
726
727 if (theFacade)
728 {
729 if (!theFacade->configure(_ownNodeId,
730 m_local_config->m_configValues))
731 {
732 g_eventLogger->warning("Could not reconfigure everything online, "
733 "this node need a restart");
734 m_need_restart= true;
735 }
736 }
737
738 DBUG_VOID_RETURN;
739 }
740
741
742 bool
get_packed_config(ndb_mgm_node_type node_type,BaseString & buf64,BaseString & error)743 MgmtSrvr::get_packed_config(ndb_mgm_node_type node_type,
744 BaseString& buf64, BaseString& error)
745 {
746 return m_config_manager->get_packed_config(node_type, &buf64, error);
747 }
748
749 bool
get_packed_config_from_node(NodeId nodeId,BaseString & buf64,BaseString & error)750 MgmtSrvr::get_packed_config_from_node(NodeId nodeId,
751 BaseString& buf64, BaseString& error)
752 {
753 DBUG_ENTER("get_packed_config_from_node");
754
755 if (nodeId >= MAX_NODES_ID)
756 {
757 error.assfmt("Nodeid %d is greater than max nodeid %d. ",
758 nodeId, MAX_NODES_ID);
759 DBUG_RETURN(false);
760 }
761
762 if (getNodeType(nodeId) == NDB_MGM_NODE_TYPE_UNKNOWN)
763 {
764 error.assfmt("Nodeid %d does not exist. ", nodeId);
765 DBUG_RETURN(false);
766 }
767
768 if (getNodeType(nodeId) != NDB_MGM_NODE_TYPE_NDB)
769 {
770 error.assfmt("Node %d is not a data node. ", nodeId);
771 DBUG_RETURN(false);
772 }
773
774 trp_node node = getNodeInfo(nodeId);
775
776 if (!node.m_alive)
777 {
778 error.assfmt("Data node %d is not alive. ", nodeId);
779 DBUG_RETURN(false);
780 }
781
782 const Uint32 version = node.m_info.m_version;
783
784 if (!ndbd_get_config_supported(version))
785 {
786 error.assfmt("Data node %d (version %d.%d.%d) does not support getting config. ",
787 nodeId, ndbGetMajor(version),
788 ndbGetMinor(version), ndbGetBuild(version));
789 DBUG_RETURN(false);
790 }
791
792 INIT_SIGNAL_SENDER(ss,nodeId);
793
794 SimpleSignal ssig;
795 GetConfigReq* req = CAST_PTR(GetConfigReq, ssig.getDataPtrSend());
796 req->senderRef = ss.getOwnRef();
797 req->nodeId = nodeId;
798
799 g_eventLogger->debug("Sending GET_CONFIG_REQ to %d", nodeId);
800
801 ssig.set(ss, TestOrd::TraceAPI, CMVMI, GSN_GET_CONFIG_REQ,
802 GetConfigReq::SignalLength);
803 if ((ss.sendSignal(nodeId, &ssig)) != SEND_OK)
804 {
805 DBUG_RETURN(false);
806 }
807
808 Defragger defragger;
809 while (true)
810 {
811 SimpleSignal *signal = ss.waitFor();
812 int gsn = signal->readSignalNumber();
813
814 switch (gsn)
815 {
816 case GSN_GET_CONFIG_CONF:
817 {
818 if (refToNode(signal->header.theSendersBlockRef) != nodeId)
819 {
820 error.assfmt("Internal Error: Reply from wrong node %d, expected from %d. ",
821 refToNode(signal->header.theSendersBlockRef),
822 nodeId);
823 DBUG_RETURN(false);
824 }
825
826 const GetConfigConf * const conf =
827 CAST_CONSTPTR(GetConfigConf, signal->getDataPtr());
828
829 if (signal->header.m_noOfSections != 1)
830 {
831 error.assfmt("Internal Error: Wrong number of sections %d received, expected %d. ",
832 signal->header.m_noOfSections, 1);
833 DBUG_RETURN(false);
834 }
835
836 if (defragger.defragment(signal))
837 {
838 ConfigValuesFactory cf;
839 require(cf.unpack(signal->ptr[0].p, conf->configLength));
840
841 Config received_config(cf.getConfigValues());
842 if (!received_config.pack64(buf64))
843 {
844 error.assign("Failed to pack64");
845 DBUG_RETURN(false);
846 }
847 DBUG_RETURN(true);
848 }
849 // wait until all fragments are received
850 continue;
851 }
852
853 case GSN_GET_CONFIG_REF:
854 {
855 if (refToNode(ssig.header.theSendersBlockRef) != nodeId)
856 {
857 error.assfmt("Internal Error: Reply from wrong node %d, expected from %d. ",
858 refToNode(signal->header.theSendersBlockRef),
859 nodeId);
860 DBUG_RETURN(false);
861 }
862 const GetConfigRef * const ref =
863 CAST_CONSTPTR(GetConfigRef, signal->getDataPtr());
864 error.assfmt("Error in retrieving config from node %d: Internal error: %d",
865 nodeId, ref->error);
866
867 DBUG_RETURN(false);
868 }
869
870 case GSN_NF_COMPLETEREP:
871 {
872 const NFCompleteRep * rep = CAST_CONSTPTR(NFCompleteRep,
873 signal->getDataPtr());
874 if (rep->failedNodeId == nodeId)
875 {
876 error.assfmt("Node %d is not available", nodeId);
877 DBUG_RETURN(false);
878 }
879 continue;
880 }
881
882 case GSN_NODE_FAILREP:
883 {
884 // Wait until GSN_NODE_COMPLETEREP is received.
885 continue;
886 }
887
888 case GSN_API_REGCONF:
889 case GSN_TAKE_OVERTCCONF:
890 case GSN_CONNECT_REP:
891 // Ignore
892 continue;
893
894 default:
895 report_unknown_signal(signal);
896 DBUG_RETURN(false);
897 }
898 }
899 // Should never come here
900 require(false);
901 DBUG_RETURN(false);
902 }
903
~MgmtSrvr()904 MgmtSrvr::~MgmtSrvr()
905 {
906 /* Stop log level thread */
907 void* res = 0;
908 _isStopThread = true;
909
910 if (_logLevelThread != NULL) {
911 NdbThread_WaitFor(_logLevelThread, &res);
912 NdbThread_Destroy(&_logLevelThread);
913 }
914
915 /* Stop mgm service, don't allow new connections */
916 m_socket_server.stopServer();
917
918 /* Stop all active session */
919 if (!m_socket_server.stopSessions(true,
920 2 * MgmApiSession::SOCKET_TIMEOUT))
921 {
922 g_eventLogger->error("Failed to wait for all sessions to stop, "
923 "continuing with shutdown anyway.");
924 }
925
926 /* Stop config manager */
927 if (m_config_manager != 0)
928 {
929 m_config_manager->stop();
930 delete m_config_manager;
931 m_config_manager= 0;
932 }
933
934 this->close(); // close trp_client before stopping TransporterFacade
935
936 // Stop transporter
937 if(theFacade != 0){
938 theFacade->stop_instance();
939 delete theFacade;
940 theFacade = 0;
941 }
942
943 delete m_local_config;
944
945 NdbMutex_Destroy(m_local_config_mutex);
946 NdbMutex_Destroy(m_node_id_mutex);
947 }
948
949
950 //****************************************************************************
951 //****************************************************************************
952
okToSendTo(NodeId nodeId,bool unCond)953 int MgmtSrvr::okToSendTo(NodeId nodeId, bool unCond)
954 {
955 if(nodeId == 0 || getNodeType(nodeId) != NDB_MGM_NODE_TYPE_NDB)
956 return WRONG_PROCESS_TYPE;
957 // Check if we have contact with it
958 if(unCond){
959 if (getNodeInfo(nodeId).is_confirmed())
960 return 0;
961 }
962 else if (getNodeInfo(nodeId).m_alive == true)
963 return 0;
964 return NO_CONTACT_WITH_PROCESS;
965 }
966
967 void
report_unknown_signal(SimpleSignal * signal)968 MgmtSrvr::report_unknown_signal(SimpleSignal *signal)
969 {
970 signal->print();
971 g_eventLogger->error("Unknown signal received. SignalNumber: "
972 "%i from (%d, 0x%x)",
973 signal->readSignalNumber(),
974 refToNode(signal->header.theSendersBlockRef),
975 refToBlock(signal->header.theSendersBlockRef));
976 assert(false);
977 }
978
979 /*****************************************************************************
980 * Starting and stopping database nodes
981 ****************************************************************************/
982
983 int
start(int nodeId)984 MgmtSrvr::start(int nodeId)
985 {
986 INIT_SIGNAL_SENDER(ss,nodeId);
987
988 SimpleSignal ssig;
989 StartOrd* const startOrd = CAST_PTR(StartOrd, ssig.getDataPtrSend());
990 ssig.set(ss,TestOrd::TraceAPI, CMVMI, GSN_START_ORD, StartOrd::SignalLength);
991 startOrd->restartInfo = 0;
992
993 return ss.sendSignal(nodeId, &ssig) == SEND_OK ? 0 : SEND_OR_RECEIVE_FAILED;
994 }
995
996 /*****************************************************************************
997 * Version handling
998 *****************************************************************************/
999
1000 void
status_api(int nodeId,ndb_mgm_node_status & node_status,Uint32 & version,Uint32 & mysql_version,const char ** address)1001 MgmtSrvr::status_api(int nodeId,
1002 ndb_mgm_node_status& node_status,
1003 Uint32& version, Uint32& mysql_version,
1004 const char **address)
1005 {
1006 assert(getNodeType(nodeId) == NDB_MGM_NODE_TYPE_API);
1007 assert(version == 0 && mysql_version == 0);
1008
1009 if (sendVersionReq(nodeId, version, mysql_version, address) != 0)
1010 {
1011 // Couldn't get version from any NDB node.
1012 assert(version == 0);
1013 node_status = NDB_MGM_NODE_STATUS_UNKNOWN;
1014 return;
1015 }
1016
1017 if (version)
1018 {
1019 assert(mysql_version);
1020 node_status = NDB_MGM_NODE_STATUS_CONNECTED;
1021 }
1022 else
1023 {
1024 assert(mysql_version == 0);
1025 node_status = NDB_MGM_NODE_STATUS_NO_CONTACT;
1026 }
1027 return;
1028 }
1029
1030
1031 int
sendVersionReq(int v_nodeId,Uint32 & version,Uint32 & mysql_version,const char ** address)1032 MgmtSrvr::sendVersionReq(int v_nodeId,
1033 Uint32 &version,
1034 Uint32& mysql_version,
1035 const char **address)
1036 {
1037 SignalSender ss(theFacade);
1038 ss.lock();
1039
1040 SimpleSignal ssig;
1041 ApiVersionReq* req = CAST_PTR(ApiVersionReq, ssig.getDataPtrSend());
1042 req->senderRef = ss.getOwnRef();
1043 req->nodeId = v_nodeId;
1044 ssig.set(ss, TestOrd::TraceAPI, QMGR,
1045 GSN_API_VERSION_REQ, ApiVersionReq::SignalLength);
1046
1047 NodeId nodeId = 0;
1048 bool do_send = true;
1049 while(true)
1050 {
1051 if (do_send)
1052 {
1053 nodeId = ss.get_an_alive_node();
1054 if (nodeId == 0)
1055 {
1056 return NO_CONTACT_WITH_DB_NODES;
1057 }
1058
1059 if (ss.sendSignal(nodeId, &ssig) != SEND_OK)
1060 {
1061 return SEND_OR_RECEIVE_FAILED;
1062 }
1063
1064 do_send = false;
1065 }
1066
1067 SimpleSignal *signal = ss.waitFor();
1068
1069 switch (signal->readSignalNumber()) {
1070 case GSN_API_VERSION_CONF: {
1071 const ApiVersionConf * const conf =
1072 CAST_CONSTPTR(ApiVersionConf, signal->getDataPtr());
1073
1074 assert((int) conf->nodeId == v_nodeId);
1075
1076 version = conf->version;
1077 mysql_version = conf->mysql_version;
1078 if (version < NDBD_SPLIT_VERSION)
1079 mysql_version = 0;
1080 struct in_addr in;
1081 in.s_addr= conf->inet_addr;
1082 *address= inet_ntoa(in);
1083
1084 return 0;
1085 }
1086
1087 case GSN_NF_COMPLETEREP:{
1088 const NFCompleteRep * const rep =
1089 CAST_CONSTPTR(NFCompleteRep, signal->getDataPtr());
1090 if (rep->failedNodeId == nodeId)
1091 do_send = true; // retry with other node
1092 continue;
1093 }
1094
1095 case GSN_NODE_FAILREP:{
1096 const NodeFailRep * const rep =
1097 CAST_CONSTPTR(NodeFailRep, signal->getDataPtr());
1098 if (NdbNodeBitmask::get(rep->theNodes,nodeId))
1099 do_send = true; // retry with other node
1100 continue;
1101 }
1102 case GSN_API_REGCONF:
1103 case GSN_TAKE_OVERTCCONF:
1104 case GSN_CONNECT_REP:
1105 // Ignore
1106 continue;
1107 default:
1108 report_unknown_signal(signal);
1109 return SEND_OR_RECEIVE_FAILED;
1110 }
1111 }
1112
1113 // Should never come here
1114 require(false);
1115 return -1;
1116 }
1117
1118
sendStopMgmd(NodeId nodeId,bool abort,bool stop,bool restart,bool nostart,bool initialStart)1119 int MgmtSrvr::sendStopMgmd(NodeId nodeId,
1120 bool abort,
1121 bool stop,
1122 bool restart,
1123 bool nostart,
1124 bool initialStart)
1125 {
1126 const char* hostname;
1127 Uint32 port;
1128 BaseString connect_string;
1129
1130 {
1131 Guard g(m_local_config_mutex);
1132 {
1133 ConfigIter iter(m_local_config, CFG_SECTION_NODE);
1134
1135 if(iter.first()) return SEND_OR_RECEIVE_FAILED;
1136 if(iter.find(CFG_NODE_ID, nodeId)) return SEND_OR_RECEIVE_FAILED;
1137 if(iter.get(CFG_NODE_HOST, &hostname)) return SEND_OR_RECEIVE_FAILED;
1138 }
1139 {
1140 ConfigIter iter(m_local_config, CFG_SECTION_NODE);
1141
1142 if(iter.first()) return SEND_OR_RECEIVE_FAILED;
1143 if(iter.find(CFG_NODE_ID, nodeId)) return SEND_OR_RECEIVE_FAILED;
1144 if(iter.get(CFG_MGM_PORT, &port)) return SEND_OR_RECEIVE_FAILED;
1145 }
1146 if( strlen(hostname) == 0 )
1147 return SEND_OR_RECEIVE_FAILED;
1148
1149 }
1150 connect_string.assfmt("%s:%u",hostname,port);
1151
1152 DBUG_PRINT("info",("connect string: %s",connect_string.c_str()));
1153
1154 NdbMgmHandle h= ndb_mgm_create_handle();
1155 if ( h && connect_string.length() > 0 )
1156 {
1157 ndb_mgm_set_connectstring(h,connect_string.c_str());
1158 if(ndb_mgm_connect(h,1,0,0))
1159 {
1160 DBUG_PRINT("info",("failed ndb_mgm_connect"));
1161 ndb_mgm_destroy_handle(&h);
1162 return SEND_OR_RECEIVE_FAILED;
1163 }
1164 if(!restart)
1165 {
1166 int nodes[1];
1167 nodes[0]= (int)nodeId;
1168 if(ndb_mgm_stop(h, 1, nodes) < 0)
1169 {
1170 ndb_mgm_destroy_handle(&h);
1171 return SEND_OR_RECEIVE_FAILED;
1172 }
1173 }
1174 else
1175 {
1176 int nodes[1];
1177 nodes[0]= (int)nodeId;
1178 if(ndb_mgm_restart2(h, 1, nodes, initialStart, nostart, abort) < 0)
1179 {
1180 ndb_mgm_destroy_handle(&h);
1181 return SEND_OR_RECEIVE_FAILED;
1182 }
1183 }
1184 }
1185 ndb_mgm_destroy_handle(&h);
1186
1187 return 0;
1188 }
1189
1190 /**
1191 * send STOP_REQ to all DB-nodes
1192 * and wait for them to stop or refuse
1193 *
1194 */
1195 int
sendall_STOP_REQ(NodeBitmask & stoppedNodes,bool abort,bool stop,bool restart,bool nostart,bool initialStart)1196 MgmtSrvr::sendall_STOP_REQ(NodeBitmask &stoppedNodes,
1197 bool abort,
1198 bool stop,
1199 bool restart,
1200 bool nostart,
1201 bool initialStart)
1202 {
1203 int error = 0;
1204 DBUG_ENTER("MgmtSrvr::sendall_STOP_REQ");
1205 DBUG_PRINT("enter", ("abort: %d stop: %d restart: %d "
1206 "nostart: %d initialStart: %d",
1207 abort, stop, restart, nostart, initialStart));
1208
1209 stoppedNodes.clear();
1210
1211 SignalSender ss(theFacade);
1212 ss.lock(); // lock will be released on exit
1213
1214 SimpleSignal ssig;
1215 StopReq* const stopReq = CAST_PTR(StopReq, ssig.getDataPtrSend());
1216 ssig.set(ss, TestOrd::TraceAPI, NDBCNTR, GSN_STOP_REQ, StopReq::SignalLength);
1217
1218 stopReq->requestInfo = 0;
1219 stopReq->apiTimeout = 5000;
1220 stopReq->transactionTimeout = 1000;
1221 stopReq->readOperationTimeout = 1000;
1222 stopReq->operationTimeout = 1000;
1223 stopReq->senderData = 12;
1224 stopReq->senderRef = ss.getOwnRef();
1225 stopReq->singleuser = 0;
1226 StopReq::setSystemStop(stopReq->requestInfo, stop);
1227 StopReq::setPerformRestart(stopReq->requestInfo, restart);
1228 StopReq::setStopAbort(stopReq->requestInfo, abort);
1229 StopReq::setNoStart(stopReq->requestInfo, nostart);
1230 StopReq::setInitialStart(stopReq->requestInfo, initialStart);
1231
1232 // send the signals
1233 int failed = 0;
1234 NodeBitmask nodes;
1235 {
1236 NodeId nodeId = 0;
1237 while(getNextNodeId(&nodeId, NDB_MGM_NODE_TYPE_NDB))
1238 {
1239 if (okToSendTo(nodeId, true) == 0)
1240 {
1241 SendStatus result = ss.sendSignal(nodeId, &ssig);
1242 if (result == SEND_OK)
1243 nodes.set(nodeId);
1244 else
1245 failed++;
1246 }
1247 }
1248 }
1249
1250 if (nodes.isclear() && failed > 0)
1251 {
1252 DBUG_RETURN(SEND_OR_RECEIVE_FAILED);
1253 }
1254
1255 // now wait for the replies
1256 while (!nodes.isclear())
1257 {
1258 SimpleSignal *signal = ss.waitFor();
1259 int gsn = signal->readSignalNumber();
1260 switch (gsn) {
1261 case GSN_STOP_REF:
1262 {
1263 const StopRef * const ref = CAST_CONSTPTR(StopRef, signal->getDataPtr());
1264 const NodeId nodeId = refToNode(signal->header.theSendersBlockRef);
1265 #ifdef VM_TRACE
1266 ndbout_c("Node %d refused stop", nodeId);
1267 #endif
1268 assert(nodes.get(nodeId));
1269 nodes.clear(nodeId);
1270 error = translateStopRef(ref->errorCode);
1271 break;
1272 }
1273 case GSN_STOP_CONF:
1274 {
1275 const NodeId nodeId = refToNode(signal->header.theSendersBlockRef);
1276 assert(nodes.get(nodeId));
1277 nodes.clear(nodeId);
1278 break;
1279 }
1280 case GSN_NF_COMPLETEREP:
1281 {
1282 const NFCompleteRep * rep = CAST_CONSTPTR(NFCompleteRep,
1283 signal->getDataPtr());
1284 nodes.clear(rep->failedNodeId); // clear the failed node
1285 stoppedNodes.set(rep->failedNodeId);
1286 break;
1287 }
1288 case GSN_NODE_FAILREP:
1289 {
1290 const NodeFailRep * rep = CAST_CONSTPTR(NodeFailRep,
1291 signal->getDataPtr());
1292 NodeBitmask mask;
1293 mask.assign(NdbNodeBitmask::Size, rep->theNodes);
1294 nodes.bitANDC(mask);
1295 stoppedNodes.bitOR(mask);
1296 break;
1297 }
1298 case GSN_API_REGCONF:
1299 case GSN_TAKE_OVERTCCONF:
1300 case GSN_CONNECT_REP:
1301 continue;
1302 default:
1303 report_unknown_signal(signal);
1304 DBUG_RETURN(SEND_OR_RECEIVE_FAILED);
1305 }
1306 }
1307
1308 DBUG_RETURN(error);
1309 }
1310
1311 int
guess_master_node(SignalSender & ss)1312 MgmtSrvr::guess_master_node(SignalSender& ss)
1313 {
1314 /**
1315 * First check if m_master_node is started
1316 */
1317 NodeId guess = m_master_node;
1318 if (guess != 0)
1319 {
1320 trp_node node = ss.getNodeInfo(guess);
1321 if (node.m_state.startLevel == NodeState::SL_STARTED)
1322 return guess;
1323 }
1324
1325 /**
1326 * Check for any started node
1327 */
1328 guess = 0;
1329 while(getNextNodeId(&guess, NDB_MGM_NODE_TYPE_NDB))
1330 {
1331 trp_node node = ss.getNodeInfo(guess);
1332 if (node.m_state.startLevel == NodeState::SL_STARTED)
1333 {
1334 return guess;
1335 }
1336 }
1337
1338 /**
1339 * Check any confirmed node
1340 */
1341 guess = 0;
1342 while(getNextNodeId(&guess, NDB_MGM_NODE_TYPE_NDB))
1343 {
1344 trp_node node = ss.getNodeInfo(guess);
1345 if (node.is_confirmed())
1346 {
1347 return guess;
1348 }
1349 }
1350
1351 /**
1352 * Check any connected node
1353 */
1354 guess = 0;
1355 while(getNextNodeId(&guess, NDB_MGM_NODE_TYPE_NDB))
1356 {
1357 trp_node node = ss.getNodeInfo(guess);
1358 if (node.is_connected())
1359 {
1360 return guess;
1361 }
1362 }
1363
1364 return 0; // give up
1365 }
1366
1367 /*
1368 * Common method for handeling all STOP_REQ signalling that
1369 * is used by Stopping, Restarting and Single user commands
1370 *
1371 * In the event that we need to stop a mgmd, we create a mgm
1372 * client connection to that mgmd and stop it that way.
1373 * This allows us to stop mgm servers when there isn't any real
1374 * distributed communication up.
1375 *
1376 * node_ids.size()==0 means to stop all DB nodes.
1377 * MGM nodes will *NOT* be stopped.
1378 *
1379 * If we work out we should be stopping or restarting ourselves,
1380 * we return <0 in stopSelf for restart, >0 for stop
1381 * and 0 for do nothing.
1382 */
1383
sendSTOP_REQ(const Vector<NodeId> & node_ids,NodeBitmask & stoppedNodes,bool abort,bool stop,bool restart,bool nostart,bool initialStart,int * stopSelf)1384 int MgmtSrvr::sendSTOP_REQ(const Vector<NodeId> &node_ids,
1385 NodeBitmask &stoppedNodes,
1386 bool abort,
1387 bool stop,
1388 bool restart,
1389 bool nostart,
1390 bool initialStart,
1391 int* stopSelf)
1392 {
1393 int error = 0;
1394 DBUG_ENTER("MgmtSrvr::sendSTOP_REQ");
1395 DBUG_PRINT("enter", ("no of nodes: %d "
1396 "abort: %d stop: %d restart: %d "
1397 "nostart: %d initialStart: %d",
1398 node_ids.size(),
1399 abort, stop, restart, nostart, initialStart));
1400
1401 stoppedNodes.clear();
1402 *stopSelf= 0;
1403
1404 NodeBitmask ndb_nodes_to_stop;
1405 NodeBitmask mgm_nodes_to_stop;
1406
1407 SignalSender ss(theFacade);
1408 ss.lock(); // lock will be released on exit
1409
1410 /**
1411 * First verify arguments
1412 */
1413 for (unsigned i = 0; i < node_ids.size(); i++)
1414 {
1415 switch(getNodeType(node_ids[i])){
1416 case NDB_MGM_NODE_TYPE_MGM:
1417 mgm_nodes_to_stop.set(node_ids[i]);
1418 break;
1419 case NDB_MGM_NODE_TYPE_NDB:
1420 ndb_nodes_to_stop.set(node_ids[i]);
1421 break;
1422 default:
1423 DBUG_RETURN(WRONG_PROCESS_TYPE);
1424 }
1425 }
1426
1427 /**
1428 * Process ndb_mgmd
1429 */
1430 for (Uint32 i = mgm_nodes_to_stop.find(0);
1431 i != mgm_nodes_to_stop.NotFound;
1432 i = mgm_nodes_to_stop.find(i + 1))
1433 {
1434 if (i != getOwnNodeId())
1435 {
1436 error= sendStopMgmd(i, abort, stop, restart,
1437 nostart, initialStart);
1438 if (error == 0)
1439 {
1440 stoppedNodes.set(i);
1441 }
1442 }
1443 else
1444 {
1445 g_eventLogger->info("Stopping this node");
1446 * stopSelf = (restart)? -1 : 1;
1447 stoppedNodes.set(i);
1448 }
1449 }
1450
1451 /**
1452 * Process ndbd
1453 */
1454 SimpleSignal ssig;
1455 StopReq* const stopReq = CAST_PTR(StopReq, ssig.getDataPtrSend());
1456 ssig.set(ss, TestOrd::TraceAPI, NDBCNTR, GSN_STOP_REQ, StopReq::SignalLength);
1457
1458 stopReq->requestInfo = 0;
1459 stopReq->apiTimeout = 5000;
1460 stopReq->transactionTimeout = 1000;
1461 stopReq->readOperationTimeout = 1000;
1462 stopReq->operationTimeout = 1000;
1463 stopReq->senderData = 12;
1464 stopReq->senderRef = ss.getOwnRef();
1465 stopReq->singleuser = 0;
1466 StopReq::setSystemStop(stopReq->requestInfo, stop);
1467 StopReq::setPerformRestart(stopReq->requestInfo, restart);
1468 StopReq::setStopAbort(stopReq->requestInfo, abort);
1469 StopReq::setNoStart(stopReq->requestInfo, nostart);
1470 StopReq::setInitialStart(stopReq->requestInfo, initialStart);
1471
1472 int use_master_node = 0;
1473 int do_send = 0;
1474 if (ndb_nodes_to_stop.count() > 1)
1475 {
1476 do_send = 1;
1477 use_master_node = 1;
1478 ndb_nodes_to_stop.copyto(NdbNodeBitmask::Size, stopReq->nodes);
1479 StopReq::setStopNodes(stopReq->requestInfo, 1);
1480 }
1481 else if (ndb_nodes_to_stop.count() == 1)
1482 {
1483 Uint32 nodeId = ndb_nodes_to_stop.find(0);
1484 if (okToSendTo(nodeId, true) == 0)
1485 {
1486 SendStatus result = ss.sendSignal(nodeId, &ssig);
1487 if (result != SEND_OK)
1488 {
1489 DBUG_RETURN(SEND_OR_RECEIVE_FAILED);
1490 }
1491 }
1492 else
1493 {
1494 DBUG_RETURN(SEND_OR_RECEIVE_FAILED);
1495 }
1496 }
1497
1498
1499 // now wait for the replies
1500 Uint32 sendNodeId = ndb_nodes_to_stop.find(0);
1501 while (!stoppedNodes.contains(ndb_nodes_to_stop))
1502 {
1503 if (do_send)
1504 {
1505 assert(use_master_node);
1506 sendNodeId = guess_master_node(ss);
1507 if (okToSendTo(sendNodeId, true) != 0)
1508 {
1509 DBUG_RETURN(SEND_OR_RECEIVE_FAILED);
1510 }
1511
1512 if (ss.sendSignal(sendNodeId, &ssig) != SEND_OK)
1513 {
1514 DBUG_RETURN(SEND_OR_RECEIVE_FAILED);
1515 }
1516 do_send = 0;
1517 }
1518
1519 SimpleSignal *signal = ss.waitFor();
1520 int gsn = signal->readSignalNumber();
1521 switch (gsn) {
1522 case GSN_STOP_REF:{
1523 const StopRef * const ref = CAST_CONSTPTR(StopRef, signal->getDataPtr());
1524 const NodeId nodeId = refToNode(signal->header.theSendersBlockRef);
1525 assert(nodeId == sendNodeId);
1526 if (ref->errorCode == StopRef::MultiNodeShutdownNotMaster)
1527 {
1528 assert(use_master_node);
1529 m_master_node= ref->masterNodeId;
1530 do_send = 1;
1531 continue;
1532 }
1533 DBUG_RETURN(translateStopRef(ref->errorCode));
1534 break;
1535 }
1536 case GSN_STOP_CONF:{
1537 #ifdef NOT_USED
1538 const StopConf * const ref = CAST_CONSTPTR(StopConf, signal->getDataPtr());
1539 #endif
1540 const NodeId nodeId = refToNode(signal->header.theSendersBlockRef);
1541 assert(nodeId == sendNodeId);
1542 stoppedNodes.bitOR(ndb_nodes_to_stop);
1543 break;
1544 }
1545 case GSN_NF_COMPLETEREP:{
1546 const NFCompleteRep * const rep =
1547 CAST_CONSTPTR(NFCompleteRep, signal->getDataPtr());
1548 stoppedNodes.set(rep->failedNodeId);
1549 break;
1550 }
1551 case GSN_NODE_FAILREP:{
1552 const NodeFailRep * const rep =
1553 CAST_CONSTPTR(NodeFailRep, signal->getDataPtr());
1554 NodeBitmask mask;
1555 mask.assign(NdbNodeBitmask::Size, rep->theNodes);
1556 stoppedNodes.bitOR(mask);
1557 break;
1558 }
1559 case GSN_API_REGCONF:
1560 case GSN_TAKE_OVERTCCONF:
1561 case GSN_CONNECT_REP:
1562 continue;
1563 default:
1564 report_unknown_signal(signal);
1565 DBUG_RETURN(SEND_OR_RECEIVE_FAILED);
1566 }
1567 }
1568 if (error && *stopSelf)
1569 {
1570 *stopSelf= 0;
1571 }
1572 DBUG_RETURN(error);
1573 }
1574
1575 /*
1576 * Stop one nodes
1577 */
1578
stopNodes(const Vector<NodeId> & node_ids,int * stopCount,bool abort,bool force,int * stopSelf)1579 int MgmtSrvr::stopNodes(const Vector<NodeId> &node_ids,
1580 int *stopCount, bool abort, bool force,
1581 int* stopSelf)
1582 {
1583 if (force || abort)
1584 ; // Skip node state checks
1585 else if (is_any_node_starting())
1586 {
1587 /* Refuse to stop since some node(s) are starting */
1588 return OPERATION_NOT_ALLOWED_START_STOP;
1589 }
1590
1591 NodeBitmask nodes;
1592 int ret = 0;
1593 if (node_ids.size() > 0)
1594 {
1595 ret = sendSTOP_REQ(node_ids, nodes,
1596 abort, false, false, false, false,
1597 stopSelf);
1598 }
1599 else
1600 {
1601 ret = sendall_STOP_REQ(nodes,
1602 abort, false, false, false, false);
1603 }
1604
1605 if (stopCount)
1606 *stopCount= nodes.count();
1607 return ret;
1608 }
1609
shutdownMGM(int * stopCount,bool abort,int * stopSelf)1610 int MgmtSrvr::shutdownMGM(int *stopCount, bool abort, int *stopSelf)
1611 {
1612 NodeId nodeId = 0;
1613 int error;
1614
1615 while(getNextNodeId(&nodeId, NDB_MGM_NODE_TYPE_MGM))
1616 {
1617 if(nodeId==getOwnNodeId())
1618 continue;
1619 error= sendStopMgmd(nodeId, abort, true, false,
1620 false, false);
1621 if (error == 0)
1622 (*stopCount)++;
1623 }
1624
1625 *stopSelf= 1;
1626 (*stopCount)++;
1627
1628 return 0;
1629 }
1630
1631 /*
1632 * Perform DB nodes shutdown.
1633 * MGM servers are left in their current state
1634 */
1635
shutdownDB(int * stopCount,bool abort)1636 int MgmtSrvr::shutdownDB(int * stopCount, bool abort)
1637 {
1638 NodeBitmask nodes;
1639
1640 int ret = sendall_STOP_REQ(nodes,
1641 abort,
1642 true,
1643 false,
1644 false,
1645 false);
1646
1647 if (stopCount)
1648 *stopCount = nodes.count();
1649 return ret;
1650 }
1651
1652 /*
1653 * Enter single user mode on all live nodes
1654 */
1655
enterSingleUser(int * stopCount,Uint32 apiNodeId)1656 int MgmtSrvr::enterSingleUser(int * stopCount, Uint32 apiNodeId)
1657 {
1658 if (getNodeType(apiNodeId) != NDB_MGM_NODE_TYPE_API)
1659 return NODE_NOT_API_NODE;
1660
1661 // Init
1662 if (stopCount)
1663 {
1664 * stopCount = 0;
1665 }
1666
1667 SignalSender ss(theFacade);
1668 ss.lock(); // lock will be released on exit
1669
1670 SimpleSignal ssig;
1671 StopReq* const stopReq = CAST_PTR(StopReq, ssig.getDataPtrSend());
1672 ssig.set(ss, TestOrd::TraceAPI, NDBCNTR, GSN_STOP_REQ, StopReq::SignalLength);
1673
1674 stopReq->requestInfo = 0;
1675 stopReq->apiTimeout = 5000;
1676 stopReq->transactionTimeout = 1000;
1677 stopReq->readOperationTimeout = 1000;
1678 stopReq->operationTimeout = 1000;
1679 stopReq->senderData = 12;
1680 stopReq->senderRef = ss.getOwnRef();
1681 stopReq->singleuser = 1;
1682 stopReq->singleUserApi = apiNodeId;
1683 StopReq::setSystemStop(stopReq->requestInfo, false);
1684 StopReq::setPerformRestart(stopReq->requestInfo, false);
1685 StopReq::setStopAbort(stopReq->requestInfo, false);
1686
1687 NodeBitmask nodes;
1688 {
1689 NodeId nodeId = 0;
1690 Uint32 failed = 0;
1691 while (getNextNodeId(&nodeId, NDB_MGM_NODE_TYPE_NDB))
1692 {
1693 if (okToSendTo(nodeId, true) == 0)
1694 {
1695 SendStatus result = ss.sendSignal(nodeId, &ssig);
1696 if (result == SEND_OK)
1697 nodes.set(nodeId);
1698 else
1699 failed++;
1700 }
1701 else
1702 {
1703 failed++;
1704 }
1705 }
1706 if (nodes.isclear())
1707 {
1708 if (failed)
1709 {
1710 return SEND_OR_RECEIVE_FAILED;
1711 }
1712 return NO_CONTACT_WITH_DB_NODES;
1713 }
1714 }
1715
1716 int error = 0;
1717 int ok = 0;
1718 while (!nodes.isclear())
1719 {
1720 SimpleSignal *signal = ss.waitFor();
1721 int gsn = signal->readSignalNumber();
1722 switch (gsn) {
1723 case GSN_STOP_REF:
1724 {
1725 const StopRef * const ref = CAST_CONSTPTR(StopRef, signal->getDataPtr());
1726 nodes.clear(refToNode(signal->header.theSendersBlockRef));
1727 error = translateStopRef(ref->errorCode);
1728 break;
1729 }
1730 case GSN_STOP_CONF:
1731 {
1732 ok++;
1733 nodes.clear(refToNode(signal->header.theSendersBlockRef));
1734 break;
1735 }
1736 case GSN_NF_COMPLETEREP:
1737 {
1738 const NFCompleteRep * rep = CAST_CONSTPTR(NFCompleteRep,
1739 signal->getDataPtr());
1740 nodes.clear(rep->failedNodeId);
1741 break;
1742 }
1743
1744 case GSN_NODE_FAILREP:
1745 {
1746 const NodeFailRep * rep = CAST_CONSTPTR(NodeFailRep,
1747 signal->getDataPtr());
1748 NodeBitmask mask;
1749 mask.assign(NdbNodeBitmask::Size, rep->theNodes);
1750 nodes.bitANDC(mask);
1751 break;
1752 }
1753 case GSN_API_REGCONF:
1754 case GSN_TAKE_OVERTCCONF:
1755 case GSN_CONNECT_REP:
1756 continue;
1757
1758 default:
1759 report_unknown_signal(signal);
1760 return SEND_OR_RECEIVE_FAILED;
1761 }
1762 }
1763
1764 if (stopCount)
1765 {
1766 * stopCount = ok;
1767 }
1768
1769 return error;
1770 }
1771
1772 /*
1773 * Perform node restart
1774 */
1775
is_any_node_stopping()1776 bool MgmtSrvr::is_any_node_stopping()
1777 {
1778 NodeId nodeId = 0;
1779 trp_node node;
1780 while(getNextNodeId(&nodeId, NDB_MGM_NODE_TYPE_NDB))
1781 {
1782 node = getNodeInfo(nodeId);
1783 if((node.m_state.startLevel == NodeState::SL_STOPPING_1) ||
1784 (node.m_state.startLevel == NodeState::SL_STOPPING_2) ||
1785 (node.m_state.startLevel == NodeState::SL_STOPPING_3) ||
1786 (node.m_state.startLevel == NodeState::SL_STOPPING_4))
1787 return true; // At least one node was stopping
1788 }
1789 return false; // No node was stopping
1790 }
1791
is_any_node_starting()1792 bool MgmtSrvr::is_any_node_starting()
1793 {
1794 NodeId nodeId = 0;
1795 trp_node node;
1796 while(getNextNodeId(&nodeId, NDB_MGM_NODE_TYPE_NDB))
1797 {
1798 node = getNodeInfo(nodeId);
1799 if((node.m_state.startLevel == NodeState::SL_STARTING))
1800 return true; // At least one node was starting
1801 }
1802 return false; // No node was starting
1803 }
1804
is_cluster_single_user()1805 bool MgmtSrvr::is_cluster_single_user()
1806 {
1807 NodeId nodeId = 0;
1808 trp_node node;
1809 while(getNextNodeId(&nodeId, NDB_MGM_NODE_TYPE_NDB))
1810 {
1811 node = getNodeInfo(nodeId);
1812 if((node.m_state.startLevel == NodeState::SL_SINGLEUSER))
1813 return true; // Cluster is in single user modes
1814 }
1815 return false; // Cluster is not in single user mode
1816 }
1817
restartNodes(const Vector<NodeId> & node_ids,int * stopCount,bool nostart,bool initialStart,bool abort,bool force,int * stopSelf)1818 int MgmtSrvr::restartNodes(const Vector<NodeId> &node_ids,
1819 int * stopCount, bool nostart,
1820 bool initialStart, bool abort,
1821 bool force,
1822 int *stopSelf)
1823 {
1824 if (is_cluster_single_user())
1825 {
1826 /*
1827 Refuse to restart since cluster is in single user mode
1828 and when the node is restarting it would not be allowed to
1829 join cluster, see BUG#31056
1830 */
1831 return OPERATION_NOT_ALLOWED_START_STOP;
1832 }
1833
1834 if (force || abort)
1835 ; // Skip node state checks
1836 else if (is_any_node_starting())
1837 {
1838 /* Refuse to restart since some node(s) are starting */
1839 return OPERATION_NOT_ALLOWED_START_STOP;
1840 }
1841
1842 NodeBitmask nodes;
1843 int ret = 0;
1844 if (node_ids.size() > 0)
1845 {
1846 ret = sendSTOP_REQ(node_ids, nodes,
1847 abort, false, true, true, initialStart,
1848 stopSelf);
1849 }
1850 else
1851 {
1852 ret = sendall_STOP_REQ(nodes,
1853 abort, false, true, true, initialStart);
1854 }
1855
1856 if (ret)
1857 return ret;
1858
1859 if (stopCount)
1860 *stopCount = nodes.count();
1861
1862 // start up the nodes again
1863 const NDB_TICKS waitTime = 12000;
1864 const NDB_TICKS startTime = NdbTick_CurrentMillisecond();
1865 for (unsigned i = 0; i < node_ids.size(); i++)
1866 {
1867 NodeId nodeId= node_ids[i];
1868 enum ndb_mgm_node_status s;
1869 s = NDB_MGM_NODE_STATUS_NO_CONTACT;
1870 #ifdef VM_TRACE
1871 ndbout_c("Waiting for %d not started", nodeId);
1872 #endif
1873 while (s != NDB_MGM_NODE_STATUS_NOT_STARTED &&
1874 (NdbTick_CurrentMillisecond() - startTime) < waitTime)
1875 {
1876 Uint32 startPhase = 0, version = 0, dynamicId = 0, nodeGroup = 0;
1877 Uint32 mysql_version = 0;
1878 Uint32 connectCount = 0;
1879 bool system;
1880 const char *address= NULL;
1881 status(nodeId, &s, &version, &mysql_version, &startPhase,
1882 &system, &dynamicId, &nodeGroup, &connectCount, &address);
1883 NdbSleep_MilliSleep(100);
1884 }
1885 }
1886
1887 if (nostart)
1888 return 0;
1889
1890 /*
1891 verify that no nodes are stopping before starting as this would cause
1892 the starting node to shutdown
1893 */
1894 int retry= 600*10;
1895 for (;is_any_node_stopping();)
1896 {
1897 if (--retry)
1898 break;
1899 NdbSleep_MilliSleep(100);
1900 }
1901
1902 /*
1903 start the nodes
1904 */
1905 for (unsigned i = 0; i < node_ids.size(); i++)
1906 {
1907 (void) start(node_ids[i]);
1908 }
1909 return 0;
1910 }
1911
1912 /*
1913 * Perform restart of all DB nodes
1914 */
1915
restartDB(bool nostart,bool initialStart,bool abort,int * stopCount)1916 int MgmtSrvr::restartDB(bool nostart, bool initialStart,
1917 bool abort, int * stopCount)
1918 {
1919 NodeBitmask nodes;
1920
1921 int ret = sendall_STOP_REQ(nodes,
1922 abort,
1923 true,
1924 true,
1925 true,
1926 initialStart);
1927
1928 if (ret)
1929 return ret;
1930
1931 if (stopCount)
1932 *stopCount = nodes.count();
1933
1934 #ifdef VM_TRACE
1935 ndbout_c("Stopped %d nodes", nodes.count());
1936 #endif
1937 /**
1938 * Here all nodes were correctly stopped,
1939 * so we wait for all nodes to be contactable
1940 */
1941 NodeId nodeId = 0;
1942 const NDB_TICKS waitTime = 12000;
1943 const NDB_TICKS startTime = NdbTick_CurrentMillisecond();
1944
1945 while(getNextNodeId(&nodeId, NDB_MGM_NODE_TYPE_NDB)) {
1946 if (!nodes.get(nodeId))
1947 continue;
1948 enum ndb_mgm_node_status s;
1949 s = NDB_MGM_NODE_STATUS_NO_CONTACT;
1950 #ifdef VM_TRACE
1951 ndbout_c("Waiting for %d not started", nodeId);
1952 #endif
1953 while (s != NDB_MGM_NODE_STATUS_NOT_STARTED &&
1954 (NdbTick_CurrentMillisecond() - startTime) < waitTime)
1955 {
1956 Uint32 startPhase = 0, version = 0, dynamicId = 0, nodeGroup = 0;
1957 Uint32 mysql_version = 0;
1958 Uint32 connectCount = 0;
1959 bool system;
1960 const char *address;
1961 status(nodeId, &s, &version, &mysql_version, &startPhase,
1962 &system, &dynamicId, &nodeGroup, &connectCount, &address);
1963 NdbSleep_MilliSleep(100);
1964 }
1965 }
1966
1967 if(nostart)
1968 return 0;
1969
1970 /**
1971 * Now we start all database nodes (i.e. we make them non-idle)
1972 * We ignore the result we get from the start command.
1973 */
1974 nodeId = 0;
1975 while(getNextNodeId(&nodeId, NDB_MGM_NODE_TYPE_NDB)) {
1976 if (!nodes.get(nodeId))
1977 continue;
1978 int result;
1979 result = start(nodeId);
1980 g_eventLogger->debug("Started node %d with result %d", nodeId, result);
1981 /**
1982 * Errors from this call are deliberately ignored.
1983 * Maybe the user only wanted to restart a subset of the nodes.
1984 * It is also easy for the user to check which nodes have
1985 * started and which nodes have not.
1986 */
1987 }
1988
1989 return 0;
1990 }
1991
1992 int
exitSingleUser(int * stopCount,bool abort)1993 MgmtSrvr::exitSingleUser(int * stopCount, bool abort)
1994 {
1995 NodeId nodeId = 0;
1996 int count = 0;
1997
1998 SignalSender ss(theFacade);
1999 ss.lock(); // lock will be released on exit
2000
2001 SimpleSignal ssig;
2002 ResumeReq* const resumeReq =
2003 CAST_PTR(ResumeReq, ssig.getDataPtrSend());
2004
2005 ssig.set(ss,TestOrd::TraceAPI, NDBCNTR, GSN_RESUME_REQ,
2006 ResumeReq::SignalLength);
2007 resumeReq->senderData = 12;
2008 resumeReq->senderRef = ss.getOwnRef();
2009
2010 while(getNextNodeId(&nodeId, NDB_MGM_NODE_TYPE_NDB)){
2011 if(okToSendTo(nodeId, true) == 0){
2012 SendStatus result = ss.sendSignal(nodeId, &ssig);
2013 if (result == SEND_OK)
2014 count++;
2015 }
2016 }
2017
2018 if(stopCount != 0)
2019 * stopCount = count;
2020
2021 return 0;
2022 }
2023
2024 /*****************************************************************************
2025 * Status
2026 ****************************************************************************/
2027
2028 void
updateStatus()2029 MgmtSrvr::updateStatus()
2030 {
2031 theFacade->ext_forceHB();
2032 }
2033
2034
2035 void
status_mgmd(NodeId node_id,ndb_mgm_node_status & node_status,Uint32 & version,Uint32 & mysql_version,const char ** address)2036 MgmtSrvr::status_mgmd(NodeId node_id,
2037 ndb_mgm_node_status& node_status,
2038 Uint32& version, Uint32& mysql_version,
2039 const char **address)
2040 {
2041 assert(getNodeType(node_id) == NDB_MGM_NODE_TYPE_MGM);
2042
2043 if (node_id == getOwnNodeId())
2044 {
2045 /*
2046 Special case to get version of own node
2047 - version and mysql_version is hardcoded
2048 - address should be the address seen from ndbd(if it's connected)
2049 else use HostName from config
2050 */
2051 Uint32 tmp_version = 0, tmp_mysql_version = 0;
2052 sendVersionReq(node_id, tmp_version, tmp_mysql_version, address);
2053 // Check that the version returned is equal to compiled in version
2054 assert(tmp_version == 0 ||
2055 (tmp_version == NDB_VERSION &&
2056 tmp_mysql_version == NDB_MYSQL_VERSION_D));
2057
2058 version = NDB_VERSION;
2059 mysql_version = NDB_MYSQL_VERSION_D;
2060 if(!*address)
2061 {
2062 // No address returned from ndbd -> get HostName from config
2063 Guard g(m_local_config_mutex);
2064 ConfigIter iter(m_local_config, CFG_SECTION_NODE);
2065 require(iter.find(CFG_NODE_ID, node_id) == 0);
2066 require(iter.get(CFG_NODE_HOST, address) == 0);
2067
2068 /*
2069 Try to convert HostName to numerical ip address
2070 (to get same output as if ndbd had replied)
2071 */
2072 struct in_addr addr;
2073 if (Ndb_getInAddr(&addr, *address) == 0)
2074 *address = inet_ntoa(addr);
2075 }
2076
2077 node_status = NDB_MGM_NODE_STATUS_CONNECTED;
2078 return;
2079 }
2080
2081 /*
2082 MGM nodes are connected directly to all other MGM
2083 node(s), return status as seen by ClusterMgr
2084 */
2085 const trp_node node = getNodeInfo(node_id);
2086 if(node.is_connected())
2087 {
2088 version = node.m_info.m_version;
2089 mysql_version = node.m_info.m_mysql_version;
2090 node_status = NDB_MGM_NODE_STATUS_CONNECTED;
2091 *address= get_connect_address(node_id);
2092 }
2093 else
2094 {
2095 version = 0;
2096 mysql_version = 0;
2097 node_status = NDB_MGM_NODE_STATUS_NO_CONTACT;
2098 }
2099
2100 return;
2101 }
2102
2103 int
status(int nodeId,ndb_mgm_node_status * _status,Uint32 * version,Uint32 * mysql_version,Uint32 * _phase,bool * _system,Uint32 * dynamic,Uint32 * nodegroup,Uint32 * connectCount,const char ** address)2104 MgmtSrvr::status(int nodeId,
2105 ndb_mgm_node_status * _status,
2106 Uint32 * version,
2107 Uint32 * mysql_version,
2108 Uint32 * _phase,
2109 bool * _system,
2110 Uint32 * dynamic,
2111 Uint32 * nodegroup,
2112 Uint32 * connectCount,
2113 const char **address)
2114 {
2115 switch(getNodeType(nodeId)){
2116 case NDB_MGM_NODE_TYPE_API:
2117 status_api(nodeId, *_status, *version, *mysql_version, address);
2118 return 0;
2119 break;
2120
2121 case NDB_MGM_NODE_TYPE_MGM:
2122 status_mgmd(nodeId, *_status, *version, *mysql_version, address);
2123 return 0;
2124 break;
2125
2126 case NDB_MGM_NODE_TYPE_NDB:
2127 break;
2128
2129 default:
2130 abort();
2131 break;
2132 }
2133
2134 const trp_node node = getNodeInfo(nodeId);
2135 assert(getNodeType(nodeId) == NDB_MGM_NODE_TYPE_NDB &&
2136 node.m_info.getType() == NodeInfo::DB);
2137
2138 if(!node.is_connected()){
2139 * _status = NDB_MGM_NODE_STATUS_NO_CONTACT;
2140 return 0;
2141 }
2142
2143 * version = node.m_info.m_version;
2144 * mysql_version = node.m_info.m_mysql_version;
2145
2146 *address= get_connect_address(nodeId);
2147
2148 * dynamic = node.m_state.dynamicId;
2149 * nodegroup = node.m_state.nodeGroup;
2150 * connectCount = node.m_info.m_connectCount;
2151
2152 switch(node.m_state.startLevel){
2153 case NodeState::SL_CMVMI:
2154 * _status = NDB_MGM_NODE_STATUS_NOT_STARTED;
2155 * _phase = 0;
2156 return 0;
2157 break;
2158 case NodeState::SL_STARTING:
2159 * _status = NDB_MGM_NODE_STATUS_STARTING;
2160 * _phase = node.m_state.starting.startPhase;
2161 return 0;
2162 break;
2163 case NodeState::SL_STARTED:
2164 * _status = NDB_MGM_NODE_STATUS_STARTED;
2165 * _phase = 0;
2166 return 0;
2167 break;
2168 case NodeState::SL_STOPPING_1:
2169 * _status = NDB_MGM_NODE_STATUS_SHUTTING_DOWN;
2170 * _phase = 1;
2171 * _system = node.m_state.stopping.systemShutdown != 0;
2172 return 0;
2173 break;
2174 case NodeState::SL_STOPPING_2:
2175 * _status = NDB_MGM_NODE_STATUS_SHUTTING_DOWN;
2176 * _phase = 2;
2177 * _system = node.m_state.stopping.systemShutdown != 0;
2178 return 0;
2179 break;
2180 case NodeState::SL_STOPPING_3:
2181 * _status = NDB_MGM_NODE_STATUS_SHUTTING_DOWN;
2182 * _phase = 3;
2183 * _system = node.m_state.stopping.systemShutdown != 0;
2184 return 0;
2185 break;
2186 case NodeState::SL_STOPPING_4:
2187 * _status = NDB_MGM_NODE_STATUS_SHUTTING_DOWN;
2188 * _phase = 4;
2189 * _system = node.m_state.stopping.systemShutdown != 0;
2190 return 0;
2191 break;
2192 case NodeState::SL_SINGLEUSER:
2193 * _status = NDB_MGM_NODE_STATUS_SINGLEUSER;
2194 * _phase = 0;
2195 return 0;
2196 break;
2197 default:
2198 * _status = NDB_MGM_NODE_STATUS_UNKNOWN;
2199 * _phase = 0;
2200 return 0;
2201 }
2202
2203 return -1;
2204 }
2205
2206 int
setEventReportingLevelImpl(int nodeId_arg,const EventSubscribeReq & ll)2207 MgmtSrvr::setEventReportingLevelImpl(int nodeId_arg,
2208 const EventSubscribeReq& ll)
2209 {
2210 SignalSender ss(theFacade);
2211 NdbNodeBitmask nodes;
2212 nodes.clear();
2213 while (1)
2214 {
2215 Uint32 nodeId, max;
2216 ss.lock();
2217 SimpleSignal ssig;
2218 EventSubscribeReq * dst =
2219 CAST_PTR(EventSubscribeReq, ssig.getDataPtrSend());
2220 ssig.set(ss,TestOrd::TraceAPI, CMVMI, GSN_EVENT_SUBSCRIBE_REQ,
2221 EventSubscribeReq::SignalLength);
2222 *dst = ll;
2223
2224 if (nodeId_arg == 0)
2225 {
2226 // all nodes
2227 nodeId = 1;
2228 max = MAX_NDB_NODES;
2229 }
2230 else
2231 {
2232 // only one node
2233 max = nodeId = nodeId_arg;
2234 }
2235 // first make sure nodes are sendable
2236 for(; nodeId <= max; nodeId++)
2237 {
2238 if (nodeTypes[nodeId] != NODE_TYPE_DB)
2239 continue;
2240 if (okToSendTo(nodeId, true))
2241 {
2242 if (getNodeInfo(nodeId).is_connected() == false)
2243 {
2244 // node not connected we can safely skip this one
2245 continue;
2246 }
2247 // api_reg_conf not recevied yet, need to retry
2248 return SEND_OR_RECEIVE_FAILED;
2249 }
2250 }
2251
2252 if (nodeId_arg == 0)
2253 {
2254 // all nodes
2255 nodeId = 1;
2256 max = MAX_NDB_NODES;
2257 }
2258 else
2259 {
2260 // only one node
2261 max = nodeId = nodeId_arg;
2262 }
2263 // now send to all sendable nodes nodes
2264 // note, lock is held, so states have not changed
2265 for(; (Uint32) nodeId <= max; nodeId++)
2266 {
2267 if (nodeTypes[nodeId] != NODE_TYPE_DB)
2268 continue;
2269 if (getNodeInfo(nodeId).is_connected() == false)
2270 continue; // node is not connected, skip
2271 if (ss.sendSignal(nodeId, &ssig) == SEND_OK)
2272 nodes.set(nodeId);
2273 else if (max == nodeId)
2274 {
2275 return SEND_OR_RECEIVE_FAILED;
2276 }
2277 }
2278 break;
2279 }
2280
2281 if (nodes.isclear())
2282 {
2283 return SEND_OR_RECEIVE_FAILED;
2284 }
2285
2286 int error = 0;
2287 while (!nodes.isclear())
2288 {
2289 Uint32 nodeId;
2290 SimpleSignal *signal = ss.waitFor();
2291 int gsn = signal->readSignalNumber();
2292 nodeId = refToNode(signal->header.theSendersBlockRef);
2293 switch (gsn) {
2294 case GSN_EVENT_SUBSCRIBE_CONF:{
2295 nodes.clear(nodeId);
2296 break;
2297 }
2298 case GSN_EVENT_SUBSCRIBE_REF:{
2299 nodes.clear(nodeId);
2300 error = 1;
2301 break;
2302 }
2303 // Since sending okToSend(true),
2304 // there is no guarantee that NF_COMPLETEREP will come
2305 // i.e listen also to NODE_FAILREP
2306 case GSN_NODE_FAILREP: {
2307 const NodeFailRep * const rep =
2308 CAST_CONSTPTR(NodeFailRep, signal->getDataPtr());
2309 NdbNodeBitmask mask;
2310 mask.assign(NdbNodeBitmask::Size, rep->theNodes);
2311 nodes.bitANDC(mask);
2312 break;
2313 }
2314
2315 case GSN_NF_COMPLETEREP:{
2316 const NFCompleteRep * const rep =
2317 CAST_CONSTPTR(NFCompleteRep, signal->getDataPtr());
2318 nodes.clear(rep->failedNodeId);
2319 break;
2320 }
2321 case GSN_API_REGCONF:
2322 case GSN_TAKE_OVERTCCONF:
2323 case GSN_CONNECT_REP:
2324 continue;
2325 default:
2326 report_unknown_signal(signal);
2327 return SEND_OR_RECEIVE_FAILED;
2328 }
2329 }
2330 if (error)
2331 return SEND_OR_RECEIVE_FAILED;
2332 return 0;
2333 }
2334
2335 //****************************************************************************
2336 //****************************************************************************
2337 int
setNodeLogLevelImpl(int nodeId,const SetLogLevelOrd & ll)2338 MgmtSrvr::setNodeLogLevelImpl(int nodeId, const SetLogLevelOrd & ll)
2339 {
2340 INIT_SIGNAL_SENDER(ss,nodeId);
2341
2342 SimpleSignal ssig;
2343 ssig.set(ss,TestOrd::TraceAPI, CMVMI, GSN_SET_LOGLEVELORD,
2344 SetLogLevelOrd::SignalLength);
2345 SetLogLevelOrd* const dst = CAST_PTR(SetLogLevelOrd, ssig.getDataPtrSend());
2346 *dst = ll;
2347
2348 return ss.sendSignal(nodeId, &ssig) == SEND_OK ? 0 : SEND_OR_RECEIVE_FAILED;
2349 }
2350
2351 //****************************************************************************
2352 //****************************************************************************
2353
2354 int
insertError(int nodeId,int errorNo)2355 MgmtSrvr::insertError(int nodeId, int errorNo)
2356 {
2357 int block;
2358
2359 if (errorNo < 0) {
2360 return INVALID_ERROR_NUMBER;
2361 }
2362
2363 SignalSender ss(theFacade);
2364 ss.lock(); /* lock will be released on exit */
2365
2366 if(getNodeType(nodeId) == NDB_MGM_NODE_TYPE_NDB)
2367 {
2368 block= CMVMI;
2369 }
2370 else if(nodeId == _ownNodeId)
2371 {
2372 g_errorInsert= errorNo;
2373 return 0;
2374 }
2375 else if(getNodeType(nodeId) == NDB_MGM_NODE_TYPE_MGM)
2376 block= _blockNumber;
2377 else
2378 return WRONG_PROCESS_TYPE;
2379
2380 SimpleSignal ssig;
2381 ssig.set(ss,TestOrd::TraceAPI, block, GSN_TAMPER_ORD,
2382 TamperOrd::SignalLength);
2383 TamperOrd* const tamperOrd = CAST_PTR(TamperOrd, ssig.getDataPtrSend());
2384 tamperOrd->errorNo = errorNo;
2385
2386 int res = ss.sendSignal(nodeId, &ssig) == SEND_OK ? 0 :SEND_OR_RECEIVE_FAILED;
2387
2388 if (res == 0)
2389 {
2390 /**
2391 * In order to make NDB_TAMPER (almost) syncronous,
2392 * make a syncronous request *after* the NDB_TAMPER
2393 */
2394 make_sync_req(ss, Uint32(nodeId));
2395 }
2396
2397 return res;
2398 }
2399
2400
2401 int
startSchemaTrans(SignalSender & ss,NodeId & out_nodeId,Uint32 transId,Uint32 & out_transKey)2402 MgmtSrvr::startSchemaTrans(SignalSender& ss, NodeId & out_nodeId,
2403 Uint32 transId, Uint32 & out_transKey)
2404 {
2405 SimpleSignal ssig;
2406
2407 ssig.set(ss, 0, DBDICT, GSN_SCHEMA_TRANS_BEGIN_REQ,
2408 SchemaTransBeginReq::SignalLength);
2409
2410 SchemaTransBeginReq* req =
2411 CAST_PTR(SchemaTransBeginReq, ssig.getDataPtrSend());
2412
2413 req->clientRef = ss.getOwnRef();
2414 req->transId = transId;
2415 req->requestInfo = 0;
2416
2417 NodeId nodeId = ss.get_an_alive_node();
2418
2419 retry:
2420 if (ss.get_node_alive(nodeId) == false)
2421 {
2422 nodeId = ss.get_an_alive_node();
2423 }
2424
2425 if (ss.sendSignal(nodeId, &ssig) != SEND_OK)
2426 {
2427 return SEND_OR_RECEIVE_FAILED;
2428 }
2429
2430 while (true)
2431 {
2432 SimpleSignal *signal = ss.waitFor();
2433 int gsn = signal->readSignalNumber();
2434 switch (gsn) {
2435 case GSN_SCHEMA_TRANS_BEGIN_CONF: {
2436 const SchemaTransBeginConf * conf =
2437 CAST_CONSTPTR(SchemaTransBeginConf, signal->getDataPtr());
2438 out_transKey = conf->transKey;
2439 out_nodeId = nodeId;
2440 return 0;
2441 }
2442 case GSN_SCHEMA_TRANS_BEGIN_REF: {
2443 const SchemaTransBeginRef * ref =
2444 CAST_CONSTPTR(SchemaTransBeginRef, signal->getDataPtr());
2445
2446 switch(ref->errorCode){
2447 case SchemaTransBeginRef::NotMaster:
2448 nodeId = ref->masterNodeId;
2449 // Fall-through
2450 case SchemaTransBeginRef::Busy:
2451 case SchemaTransBeginRef::BusyWithNR:
2452 goto retry;
2453 default:
2454 return ref->errorCode;
2455 }
2456 }
2457 case GSN_NF_COMPLETEREP:
2458 // ignore
2459 break;
2460 case GSN_NODE_FAILREP:{
2461 const NodeFailRep * const rep =
2462 CAST_CONSTPTR(NodeFailRep, signal->getDataPtr());
2463 if (NdbNodeBitmask::get(rep->theNodes, nodeId))
2464 {
2465 nodeId++;
2466 goto retry;
2467 }
2468 break;
2469 }
2470 case GSN_API_REGCONF:
2471 case GSN_TAKE_OVERTCCONF:
2472 case GSN_CONNECT_REP:
2473 break;
2474 default:
2475 report_unknown_signal(signal);
2476 return SEND_OR_RECEIVE_FAILED;
2477 }
2478 }
2479 }
2480
2481 int
endSchemaTrans(SignalSender & ss,NodeId nodeId,Uint32 transId,Uint32 transKey,Uint32 flags)2482 MgmtSrvr::endSchemaTrans(SignalSender& ss, NodeId nodeId,
2483 Uint32 transId, Uint32 transKey,
2484 Uint32 flags)
2485 {
2486 SimpleSignal ssig;
2487
2488 ssig.set(ss, 0, DBDICT, GSN_SCHEMA_TRANS_END_REQ,
2489 SchemaTransEndReq::SignalLength);
2490
2491 SchemaTransEndReq* req =
2492 CAST_PTR(SchemaTransEndReq, ssig.getDataPtrSend());
2493
2494 req->clientRef = ss.getOwnRef();
2495 req->transId = transId;
2496 req->requestInfo = 0;
2497 req->transKey = transKey;
2498 req->flags = flags;
2499
2500 if (ss.sendSignal(nodeId, &ssig) != SEND_OK)
2501 {
2502 return SEND_OR_RECEIVE_FAILED;
2503 }
2504
2505 while (true)
2506 {
2507 SimpleSignal *signal = ss.waitFor();
2508 int gsn = signal->readSignalNumber();
2509 switch (gsn) {
2510 case GSN_SCHEMA_TRANS_END_CONF: {
2511 return 0;
2512 }
2513 case GSN_SCHEMA_TRANS_END_REF: {
2514 const SchemaTransEndRef * ref =
2515 CAST_CONSTPTR(SchemaTransEndRef, signal->getDataPtr());
2516 return ref->errorCode;
2517 }
2518 case GSN_NF_COMPLETEREP:
2519 // ignore
2520 break;
2521 case GSN_NODE_FAILREP:{
2522 const NodeFailRep * const rep =
2523 CAST_CONSTPTR(NodeFailRep, signal->getDataPtr());
2524 if (NdbNodeBitmask::get(rep->theNodes, nodeId))
2525 {
2526 return -1;
2527 }
2528 break;
2529 }
2530 case GSN_API_REGCONF:
2531 case GSN_TAKE_OVERTCCONF:
2532 case GSN_CONNECT_REP:
2533 break;
2534 default:
2535 report_unknown_signal(signal);
2536 return SEND_OR_RECEIVE_FAILED;
2537 }
2538 }
2539 }
2540
2541 int
createNodegroup(int * nodes,int count,int * ng)2542 MgmtSrvr::createNodegroup(int *nodes, int count, int *ng)
2543 {
2544 int res;
2545 SignalSender ss(theFacade);
2546 ss.lock();
2547
2548 Uint32 transId = rand();
2549 Uint32 transKey;
2550 NodeId nodeId;
2551
2552 if ((res = startSchemaTrans(ss, nodeId, transId, transKey)))
2553 {
2554 return res;
2555 }
2556
2557 SimpleSignal ssig;
2558 ssig.set(ss, 0, DBDICT, GSN_CREATE_NODEGROUP_REQ,
2559 CreateNodegroupReq::SignalLength);
2560
2561 CreateNodegroupReq* req =
2562 CAST_PTR(CreateNodegroupReq, ssig.getDataPtrSend());
2563
2564 req->transId = transId;
2565 req->transKey = transKey;
2566 req->nodegroupId = RNIL;
2567 req->senderData = 77;
2568 req->senderRef = ss.getOwnRef();
2569 bzero(req->nodes, sizeof(req->nodes));
2570
2571 if (ng)
2572 {
2573 if (* ng != -1)
2574 {
2575 req->nodegroupId = * ng;
2576 }
2577 }
2578 for (int i = 0; i<count && i<(int)NDB_ARRAY_SIZE(req->nodes); i++)
2579 {
2580 req->nodes[i] = nodes[i];
2581 }
2582
2583 if (ss.sendSignal(nodeId, &ssig) != SEND_OK)
2584 {
2585 return SEND_OR_RECEIVE_FAILED;
2586 }
2587
2588 bool wait = true;
2589 while (wait)
2590 {
2591 SimpleSignal *signal = ss.waitFor();
2592 int gsn = signal->readSignalNumber();
2593 switch (gsn) {
2594 case GSN_CREATE_NODEGROUP_CONF: {
2595 const CreateNodegroupConf * conf =
2596 CAST_CONSTPTR(CreateNodegroupConf, signal->getDataPtr());
2597
2598 if (ng)
2599 {
2600 * ng = conf->nodegroupId;
2601 }
2602
2603 wait = false;
2604 break;
2605 }
2606 case GSN_CREATE_NODEGROUP_REF:{
2607 const CreateNodegroupRef * ref =
2608 CAST_CONSTPTR(CreateNodegroupRef, signal->getDataPtr());
2609 Uint32 err = ref->errorCode;
2610 endSchemaTrans(ss, nodeId, transId, transKey,
2611 SchemaTransEndReq::SchemaTransAbort);
2612 return err;
2613 }
2614 case GSN_NF_COMPLETEREP:
2615 // ignore
2616 break;
2617 case GSN_NODE_FAILREP:{
2618 const NodeFailRep * const rep =
2619 CAST_CONSTPTR(NodeFailRep, signal->getDataPtr());
2620 if (NdbNodeBitmask::get(rep->theNodes, nodeId))
2621 {
2622 return SchemaTransBeginRef::Nodefailure;
2623 }
2624 break;
2625 }
2626 case GSN_API_REGCONF:
2627 case GSN_TAKE_OVERTCCONF:
2628 case GSN_CONNECT_REP:
2629 break;
2630 default:
2631 report_unknown_signal(signal);
2632 return SEND_OR_RECEIVE_FAILED;
2633 }
2634 }
2635
2636 return endSchemaTrans(ss, nodeId, transId, transKey, 0);
2637 }
2638
2639 int
dropNodegroup(int ng)2640 MgmtSrvr::dropNodegroup(int ng)
2641 {
2642 int res;
2643 SignalSender ss(theFacade);
2644 ss.lock();
2645
2646 Uint32 transId = rand();
2647 Uint32 transKey;
2648 NodeId nodeId;
2649
2650 if ((res = startSchemaTrans(ss, nodeId, transId, transKey)))
2651 {
2652 return res;
2653 }
2654
2655 SimpleSignal ssig;
2656 ssig.set(ss, 0, DBDICT, GSN_DROP_NODEGROUP_REQ, DropNodegroupReq::SignalLength);
2657
2658 DropNodegroupReq* req =
2659 CAST_PTR(DropNodegroupReq, ssig.getDataPtrSend());
2660
2661 req->transId = transId;
2662 req->transKey = transKey;
2663 req->nodegroupId = ng;
2664 req->senderData = 77;
2665 req->senderRef = ss.getOwnRef();
2666
2667 if (ss.sendSignal(nodeId, &ssig) != SEND_OK)
2668 {
2669 return SEND_OR_RECEIVE_FAILED;
2670 }
2671
2672 bool wait = true;
2673 while (wait)
2674 {
2675 SimpleSignal *signal = ss.waitFor();
2676 int gsn = signal->readSignalNumber();
2677 switch (gsn) {
2678 case GSN_DROP_NODEGROUP_CONF: {
2679 wait = false;
2680 break;
2681 }
2682 case GSN_DROP_NODEGROUP_REF:
2683 {
2684 const DropNodegroupRef * ref =
2685 CAST_CONSTPTR(DropNodegroupRef, signal->getDataPtr());
2686 endSchemaTrans(ss, nodeId, transId, transKey,
2687 SchemaTransEndReq::SchemaTransAbort);
2688 return ref->errorCode;
2689 }
2690 case GSN_NF_COMPLETEREP:
2691 // ignore
2692 break;
2693 case GSN_NODE_FAILREP:{
2694 const NodeFailRep * const rep =
2695 CAST_CONSTPTR(NodeFailRep, signal->getDataPtr());
2696 if (NdbNodeBitmask::get(rep->theNodes, nodeId))
2697 {
2698 return SchemaTransBeginRef::Nodefailure;
2699 }
2700 break;
2701 }
2702 case GSN_API_REGCONF:
2703 case GSN_TAKE_OVERTCCONF:
2704 case GSN_CONNECT_REP:
2705 break;
2706 default:
2707 report_unknown_signal(signal);
2708 return SEND_OR_RECEIVE_FAILED;
2709 }
2710 }
2711
2712 return endSchemaTrans(ss, nodeId, transId, transKey, 0);
2713 }
2714
2715
2716 //****************************************************************************
2717 //****************************************************************************
2718
2719 int
setTraceNo(int nodeId,int traceNo)2720 MgmtSrvr::setTraceNo(int nodeId, int traceNo)
2721 {
2722 if (traceNo < 0) {
2723 return INVALID_TRACE_NUMBER;
2724 }
2725
2726 INIT_SIGNAL_SENDER(ss,nodeId);
2727
2728 SimpleSignal ssig;
2729 ssig.set(ss,TestOrd::TraceAPI, CMVMI, GSN_TEST_ORD, TestOrd::SignalLength);
2730 TestOrd* const testOrd = CAST_PTR(TestOrd, ssig.getDataPtrSend());
2731 testOrd->clear();
2732 // Assume TRACE command causes toggling. Not really defined... ? TODO
2733 testOrd->setTraceCommand(TestOrd::Toggle,
2734 (TestOrd::TraceSpecification)traceNo);
2735
2736 return ss.sendSignal(nodeId, &ssig) == SEND_OK ? 0 : SEND_OR_RECEIVE_FAILED;
2737 }
2738
2739 //****************************************************************************
2740 //****************************************************************************
2741
2742 int
getBlockNumber(const BaseString & blockName)2743 MgmtSrvr::getBlockNumber(const BaseString &blockName)
2744 {
2745 short bno = getBlockNo(blockName.c_str());
2746 if(bno != 0)
2747 return bno;
2748 return -1;
2749 }
2750
2751 //****************************************************************************
2752 //****************************************************************************
2753
2754 int
setSignalLoggingMode(int nodeId,LogMode mode,const Vector<BaseString> & blocks)2755 MgmtSrvr::setSignalLoggingMode(int nodeId, LogMode mode,
2756 const Vector<BaseString>& blocks)
2757 {
2758 INIT_SIGNAL_SENDER(ss,nodeId);
2759
2760 // Convert from MgmtSrvr format...
2761
2762 TestOrd::Command command;
2763 if (mode == Off) {
2764 command = TestOrd::Off;
2765 }
2766 else {
2767 command = TestOrd::On;
2768 }
2769
2770 TestOrd::SignalLoggerSpecification logSpec;
2771 switch (mode) {
2772 case In:
2773 logSpec = TestOrd::InputSignals;
2774 break;
2775 case Out:
2776 logSpec = TestOrd::OutputSignals;
2777 break;
2778 case InOut:
2779 logSpec = TestOrd::InputOutputSignals;
2780 break;
2781 case Off:
2782 // In MgmtSrvr interface it's just possible to switch off all logging, both
2783 // "in" and "out" (this should probably be changed).
2784 logSpec = TestOrd::InputOutputSignals;
2785 break;
2786 default:
2787 ndbout_c("Unexpected value %d, MgmtSrvr::setSignalLoggingMode, line %d",
2788 (unsigned)mode, __LINE__);
2789 assert(false);
2790 return -1;
2791 }
2792
2793 SimpleSignal ssig;
2794 ssig.set(ss,TestOrd::TraceAPI, CMVMI, GSN_TEST_ORD, TestOrd::SignalLength);
2795
2796 TestOrd* const testOrd = CAST_PTR(TestOrd, ssig.getDataPtrSend());
2797 testOrd->clear();
2798
2799 if (blocks.size() == 0 || blocks[0] == "ALL") {
2800 // Logg command for all blocks
2801 testOrd->addSignalLoggerCommand(command, logSpec);
2802 } else {
2803 for(unsigned i = 0; i < blocks.size(); i++){
2804 int blockNumber = getBlockNumber(blocks[i]);
2805 if (blockNumber == -1) {
2806 return INVALID_BLOCK_NAME;
2807 }
2808 testOrd->addSignalLoggerCommand(blockNumber, command, logSpec);
2809 } // for
2810 } // else
2811
2812 return ss.sendSignal(nodeId, &ssig) == SEND_OK ? 0 : SEND_OR_RECEIVE_FAILED;
2813 }
2814
2815 /*****************************************************************************
2816 * Signal tracing
2817 *****************************************************************************/
startSignalTracing(int nodeId)2818 int MgmtSrvr::startSignalTracing(int nodeId)
2819 {
2820 INIT_SIGNAL_SENDER(ss,nodeId);
2821
2822 SimpleSignal ssig;
2823 ssig.set(ss,TestOrd::TraceAPI, CMVMI, GSN_TEST_ORD, TestOrd::SignalLength);
2824
2825 TestOrd* const testOrd = CAST_PTR(TestOrd, ssig.getDataPtrSend());
2826 testOrd->clear();
2827 testOrd->setTestCommand(TestOrd::On);
2828
2829 return ss.sendSignal(nodeId, &ssig) == SEND_OK ? 0 : SEND_OR_RECEIVE_FAILED;
2830 }
2831
2832 int
stopSignalTracing(int nodeId)2833 MgmtSrvr::stopSignalTracing(int nodeId)
2834 {
2835 INIT_SIGNAL_SENDER(ss,nodeId);
2836
2837 SimpleSignal ssig;
2838 ssig.set(ss,TestOrd::TraceAPI, CMVMI, GSN_TEST_ORD, TestOrd::SignalLength);
2839 TestOrd* const testOrd = CAST_PTR(TestOrd, ssig.getDataPtrSend());
2840 testOrd->clear();
2841 testOrd->setTestCommand(TestOrd::Off);
2842
2843 return ss.sendSignal(nodeId, &ssig) == SEND_OK ? 0 : SEND_OR_RECEIVE_FAILED;
2844 }
2845
2846
2847 /*****************************************************************************
2848 * Dump state
2849 *****************************************************************************/
2850
2851 int
dumpState(int nodeId,const char * args)2852 MgmtSrvr::dumpState(int nodeId, const char* args)
2853 {
2854 // Convert the space separeted args
2855 // string to an int array
2856 Uint32 args_array[25];
2857 Uint32 numArgs = 0;
2858
2859 char buf[10];
2860 int b = 0;
2861 memset(buf, 0, 10);
2862 for (size_t i = 0; i <= strlen(args); i++){
2863 if (args[i] == ' ' || args[i] == 0){
2864 args_array[numArgs] = atoi(buf);
2865 numArgs++;
2866 memset(buf, 0, 10);
2867 b = 0;
2868 } else {
2869 buf[b] = args[i];
2870 b++;
2871 }
2872 }
2873
2874 return dumpState(nodeId, args_array, numArgs);
2875 }
2876
2877 int
dumpState(int nodeId,const Uint32 args[],Uint32 no)2878 MgmtSrvr::dumpState(int nodeId, const Uint32 args[], Uint32 no)
2879 {
2880 INIT_SIGNAL_SENDER(ss,nodeId);
2881
2882 const Uint32 len = no > 25 ? 25 : no;
2883
2884 SimpleSignal ssig;
2885 DumpStateOrd * const dumpOrd =
2886 CAST_PTR(DumpStateOrd, ssig.getDataPtrSend());
2887 ssig.set(ss,TestOrd::TraceAPI, CMVMI, GSN_DUMP_STATE_ORD, len);
2888 for(Uint32 i = 0; i<25; i++){
2889 if (i < len)
2890 dumpOrd->args[i] = args[i];
2891 else
2892 dumpOrd->args[i] = 0;
2893 }
2894
2895 int res = ss.sendSignal(nodeId, &ssig) == SEND_OK ? 0 :SEND_OR_RECEIVE_FAILED;
2896
2897 if (res == 0)
2898 {
2899 /**
2900 * In order to make DUMP (almost) syncronous,
2901 * make a syncronous request *after* the NDB_TAMPER
2902 */
2903 make_sync_req(ss, Uint32(nodeId));
2904 }
2905
2906 return res;
2907
2908 }
2909
2910
2911 //****************************************************************************
2912 //****************************************************************************
2913
getErrorText(int errorCode,char * buf,int buf_sz)2914 const char* MgmtSrvr::getErrorText(int errorCode, char *buf, int buf_sz)
2915 {
2916 ndb_error_string(errorCode, buf, buf_sz);
2917 buf[buf_sz-1]= 0;
2918 return buf;
2919 }
2920
2921
2922 void
trp_deliver_signal(const NdbApiSignal * signal,const LinearSectionPtr ptr[3])2923 MgmtSrvr::trp_deliver_signal(const NdbApiSignal* signal,
2924 const LinearSectionPtr ptr[3])
2925 {
2926 int gsn = signal->readSignalNumber();
2927
2928 switch (gsn) {
2929 case GSN_EVENT_REP:
2930 {
2931 eventReport(signal->getDataPtr(), signal->getLength());
2932 break;
2933 }
2934
2935 case GSN_NF_COMPLETEREP:
2936 break;
2937 case GSN_TAMPER_ORD:
2938 ndbout << "TAMPER ORD" << endl;
2939 break;
2940 case GSN_API_REGCONF:
2941 case GSN_TAKE_OVERTCCONF:
2942 break;
2943 case GSN_CONNECT_REP:{
2944 Uint32 nodeId = signal->getDataPtr()[0];
2945
2946 union {
2947 Uint32 theData[25];
2948 EventReport repData;
2949 };
2950 EventReport * rep = &repData;
2951 theData[1] = nodeId;
2952 rep->setEventType(NDB_LE_Connected);
2953
2954 if (nodeTypes[nodeId] == NODE_TYPE_DB)
2955 {
2956 m_started_nodes.push_back(nodeId);
2957 }
2958 rep->setEventType(NDB_LE_Connected);
2959 rep->setNodeId(_ownNodeId);
2960 eventReport(theData, 1);
2961 return;
2962 }
2963 case GSN_NODE_FAILREP:
2964 {
2965 union {
2966 Uint32 theData[25];
2967 EventReport repData;
2968 };
2969 bzero(theData, sizeof(theData));
2970 EventReport * event = &repData;
2971 event->setEventType(NDB_LE_Disconnected);
2972 event->setNodeId(_ownNodeId);
2973
2974 const NodeFailRep *rep = CAST_CONSTPTR(NodeFailRep,
2975 signal->getDataPtr());
2976 for (Uint32 i = NdbNodeBitmask::find_first(rep->theNodes);
2977 i != NdbNodeBitmask::NotFound;
2978 i = NdbNodeBitmask::find_next(rep->theNodes, i + 1))
2979 {
2980 theData[1] = i;
2981 eventReport(theData, 1);
2982 }
2983 return;
2984 }
2985 default:
2986 g_eventLogger->error("Unknown signal received. SignalNumber: "
2987 "%i from (%d, 0x%x)",
2988 gsn,
2989 refToNode(signal->theSendersBlockRef),
2990 refToBlock(signal->theSendersBlockRef));
2991 assert(false);
2992 }
2993 }
2994
2995
2996 void
trp_node_status(Uint32 nodeId,Uint32 _event)2997 MgmtSrvr::trp_node_status(Uint32 nodeId, Uint32 _event)
2998 {
2999 }
3000
3001 enum ndb_mgm_node_type
getNodeType(NodeId nodeId) const3002 MgmtSrvr::getNodeType(NodeId nodeId) const
3003 {
3004 if(nodeId >= MAX_NODES)
3005 return (enum ndb_mgm_node_type)-1;
3006
3007 return nodeTypes[nodeId];
3008 }
3009
get_connect_address(Uint32 node_id)3010 const char *MgmtSrvr::get_connect_address(Uint32 node_id)
3011 {
3012 if (theFacade &&
3013 m_connect_address[node_id].s_addr == 0 &&
3014 (getNodeType(node_id) == NDB_MGM_NODE_TYPE_MGM ||
3015 getNodeType(node_id) == NDB_MGM_NODE_TYPE_NDB))
3016 {
3017 const trp_node &node= getNodeInfo(node_id);
3018 if (node.is_connected())
3019 {
3020 m_connect_address[node_id] = theFacade->ext_get_connect_address(node_id);
3021 }
3022 }
3023 return inet_ntoa(m_connect_address[node_id]);
3024 }
3025
3026 void
get_connected_nodes(NodeBitmask & connected_nodes) const3027 MgmtSrvr::get_connected_nodes(NodeBitmask &connected_nodes) const
3028 {
3029 if (theFacade)
3030 {
3031 for(Uint32 i = 0; i < MAX_NDB_NODES; i++)
3032 {
3033 if (getNodeType(i) == NDB_MGM_NODE_TYPE_NDB)
3034 {
3035 const trp_node &node= getNodeInfo(i);
3036 connected_nodes.bitOR(node.m_state.m_connected_nodes);
3037 }
3038 }
3039 }
3040 }
3041
3042 int
alloc_node_id_req(NodeId free_node_id,enum ndb_mgm_node_type type,Uint32 timeout_ms)3043 MgmtSrvr::alloc_node_id_req(NodeId free_node_id,
3044 enum ndb_mgm_node_type type,
3045 Uint32 timeout_ms)
3046 {
3047 SignalSender ss(theFacade);
3048 ss.lock(); // lock will be released on exit
3049
3050 SimpleSignal ssig;
3051 AllocNodeIdReq* req = CAST_PTR(AllocNodeIdReq, ssig.getDataPtrSend());
3052 ssig.set(ss, TestOrd::TraceAPI, QMGR, GSN_ALLOC_NODEID_REQ,
3053 AllocNodeIdReq::SignalLength);
3054
3055 req->senderRef = ss.getOwnRef();
3056 req->senderData = 19;
3057 req->nodeId = free_node_id;
3058 req->nodeType = type;
3059 req->timeout = timeout_ms;
3060
3061 int do_send = 1;
3062 NodeId nodeId = 0;
3063 while (1)
3064 {
3065 if (nodeId == 0)
3066 {
3067 bool next;
3068 while((next = getNextNodeId(&nodeId, NDB_MGM_NODE_TYPE_NDB)) == true &&
3069 getNodeInfo(nodeId).m_alive == false);
3070 if (!next)
3071 return NO_CONTACT_WITH_DB_NODES;
3072 do_send = 1;
3073 }
3074 if (do_send)
3075 {
3076 if (ss.sendSignal(nodeId, &ssig) != SEND_OK) {
3077 return SEND_OR_RECEIVE_FAILED;
3078 }
3079 do_send = 0;
3080 }
3081
3082 SimpleSignal *signal = ss.waitFor();
3083
3084 int gsn = signal->readSignalNumber();
3085 switch (gsn) {
3086 case GSN_ALLOC_NODEID_CONF:
3087 {
3088 #ifdef NOT_USED
3089 const AllocNodeIdConf * const conf =
3090 CAST_CONSTPTR(AllocNodeIdConf, signal->getDataPtr());
3091 #endif
3092 return 0;
3093 }
3094 case GSN_ALLOC_NODEID_REF:
3095 {
3096 const AllocNodeIdRef * const ref =
3097 CAST_CONSTPTR(AllocNodeIdRef, signal->getDataPtr());
3098 if (ref->errorCode == AllocNodeIdRef::NotMaster ||
3099 ref->errorCode == AllocNodeIdRef::Busy ||
3100 ref->errorCode == AllocNodeIdRef::NodeFailureHandlingNotCompleted)
3101 {
3102 do_send = 1;
3103 nodeId = refToNode(ref->masterRef);
3104 if (!getNodeInfo(nodeId).m_alive)
3105 nodeId = 0;
3106 if (ref->errorCode != AllocNodeIdRef::NotMaster)
3107 {
3108 /* sleep for a while (100ms) before retrying */
3109 ss.unlock();
3110 NdbSleep_MilliSleep(100);
3111 ss.lock();
3112 }
3113 continue;
3114 }
3115 return ref->errorCode;
3116 }
3117 case GSN_NF_COMPLETEREP:
3118 {
3119 continue;
3120 }
3121 case GSN_NODE_FAILREP:{
3122 /**
3123 * ok to trap using NODE_FAILREP
3124 * as we don't really wait on anything interesting
3125 */
3126 const NodeFailRep * const rep =
3127 CAST_CONSTPTR(NodeFailRep, signal->getDataPtr());
3128 if (NdbNodeBitmask::get(rep->theNodes, nodeId))
3129 {
3130 do_send = 1;
3131 nodeId = 0;
3132 }
3133 continue;
3134 }
3135 case GSN_API_REGCONF:
3136 case GSN_TAKE_OVERTCCONF:
3137 case GSN_CONNECT_REP:
3138 continue;
3139 default:
3140 report_unknown_signal(signal);
3141 return SEND_OR_RECEIVE_FAILED;
3142 }
3143 }
3144 return 0;
3145 }
3146
3147 static int
match_hostname(const struct sockaddr * clnt_addr,const char * config_hostname)3148 match_hostname(const struct sockaddr *clnt_addr,
3149 const char *config_hostname)
3150 {
3151 struct in_addr config_addr= {0};
3152 if (clnt_addr)
3153 {
3154 const struct in_addr *clnt_in_addr = &((sockaddr_in*)clnt_addr)->sin_addr;
3155
3156 if (Ndb_getInAddr(&config_addr, config_hostname) != 0
3157 || memcmp(&config_addr, clnt_in_addr, sizeof(config_addr)) != 0)
3158 {
3159 struct in_addr tmp_addr;
3160 if (Ndb_getInAddr(&tmp_addr, "localhost") != 0
3161 || memcmp(&tmp_addr, clnt_in_addr, sizeof(config_addr)) != 0)
3162 {
3163 // not localhost
3164 return -1;
3165 }
3166
3167 // connecting through localhost
3168 // check if config_hostname is local
3169 if (!SocketServer::tryBind(0, config_hostname))
3170 return -1;
3171 }
3172 }
3173 else
3174 {
3175 if (!SocketServer::tryBind(0, config_hostname))
3176 return -1;
3177 }
3178 return 0;
3179 }
3180
3181 int
find_node_type(unsigned node_id,enum ndb_mgm_node_type type,const struct sockaddr * client_addr,NodeBitmask & nodes,NodeBitmask & exact_nodes,Vector<struct nodeid_and_host> & nodes_info,int & error_code,BaseString & error_string)3182 MgmtSrvr::find_node_type(unsigned node_id, enum ndb_mgm_node_type type,
3183 const struct sockaddr *client_addr,
3184 NodeBitmask &nodes,
3185 NodeBitmask &exact_nodes,
3186 Vector<struct nodeid_and_host> &nodes_info,
3187 int &error_code, BaseString &error_string)
3188 {
3189 const char *found_config_hostname= 0;
3190 unsigned type_c= (unsigned)type;
3191
3192 Guard g(m_local_config_mutex);
3193
3194 ConfigIter iter(m_local_config, CFG_SECTION_NODE);
3195 for(iter.first(); iter.valid(); iter.next())
3196 {
3197 unsigned id;
3198 if (iter.get(CFG_NODE_ID, &id))
3199 require(false);
3200 if (node_id && node_id != id)
3201 continue;
3202 if (iter.get(CFG_TYPE_OF_SECTION, &type_c))
3203 require(false);
3204 if (type_c != (unsigned)type)
3205 {
3206 if (!node_id)
3207 continue;
3208 goto error;
3209 }
3210 const char *config_hostname= 0;
3211 if (iter.get(CFG_NODE_HOST, &config_hostname))
3212 require(false);
3213 if (config_hostname == 0 || config_hostname[0] == 0)
3214 {
3215 config_hostname= "";
3216 }
3217 else
3218 {
3219 found_config_hostname= config_hostname;
3220 if (match_hostname(client_addr, config_hostname))
3221 {
3222 if (!node_id)
3223 continue;
3224 goto error;
3225 }
3226 exact_nodes.set(id);
3227 }
3228 nodes.set(id);
3229 struct nodeid_and_host a= {id, config_hostname};
3230 nodes_info.push_back(a);
3231 if (node_id)
3232 break;
3233 }
3234 if (nodes_info.size() != 0)
3235 {
3236 return 0;
3237 }
3238
3239 error:
3240 /*
3241 lock on m_configMutex held because found_config_hostname may have
3242 reference inot config structure
3243 */
3244 error_code= NDB_MGM_ALLOCID_CONFIG_MISMATCH;
3245 if (node_id)
3246 {
3247 if (type_c != (unsigned) type)
3248 {
3249 BaseString type_string, type_c_string;
3250 const char *alias, *str;
3251 alias= ndb_mgm_get_node_type_alias_string(type, &str);
3252 type_string.assfmt("%s(%s)", alias, str);
3253 alias= ndb_mgm_get_node_type_alias_string((enum ndb_mgm_node_type)type_c,
3254 &str);
3255 type_c_string.assfmt("%s(%s)", alias, str);
3256 error_string.appfmt("Id %d configured as %s, connect attempted as %s.",
3257 node_id, type_c_string.c_str(),
3258 type_string.c_str());
3259 return -1;
3260 }
3261 if (found_config_hostname)
3262 {
3263 struct in_addr config_addr= {0};
3264 int r_config_addr= Ndb_getInAddr(&config_addr, found_config_hostname);
3265 error_string.appfmt("Connection with id %d done from wrong host ip %s,",
3266 node_id, inet_ntoa(((struct sockaddr_in *)
3267 (client_addr))->sin_addr));
3268 error_string.appfmt(" expected %s(%s).", found_config_hostname,
3269 r_config_addr ?
3270 "lookup failed" : inet_ntoa(config_addr));
3271 return -1;
3272 }
3273 error_string.appfmt("No node defined with id=%d in config file.", node_id);
3274 return -1;
3275 }
3276
3277 // node_id == 0 and nodes_info.size() == 0
3278 if (found_config_hostname)
3279 {
3280 error_string.appfmt("Connection done from wrong host ip %s.",
3281 (client_addr)?
3282 inet_ntoa(((struct sockaddr_in *)
3283 (client_addr))->sin_addr):"");
3284 return -1;
3285 }
3286
3287 error_string.append("No nodes defined in config file.");
3288 return -1;
3289 }
3290
3291 int
try_alloc(unsigned id,const char * config_hostname,enum ndb_mgm_node_type type,const struct sockaddr * client_addr,Uint32 timeout_ms)3292 MgmtSrvr::try_alloc(unsigned id, const char *config_hostname,
3293 enum ndb_mgm_node_type type,
3294 const struct sockaddr *client_addr,
3295 Uint32 timeout_ms)
3296 {
3297 if (theFacade && theFacade->ext_isConnected(id))
3298 {
3299 return -1;
3300 }
3301 if (client_addr != 0)
3302 {
3303 int res = alloc_node_id_req(id, type, timeout_ms);
3304 switch (res)
3305 {
3306 case 0:
3307 // ok continue
3308 break;
3309 case NO_CONTACT_WITH_DB_NODES:
3310 // ok continue
3311 break;
3312 default:
3313 // something wrong
3314 return -1;
3315 }
3316 }
3317
3318 DBUG_PRINT("info", ("allocating node id %d",id));
3319 {
3320 int r= 0;
3321 if (client_addr)
3322 {
3323 m_connect_address[id]= ((struct sockaddr_in *)client_addr)->sin_addr;
3324 }
3325 else if (config_hostname)
3326 {
3327 r= Ndb_getInAddr(&(m_connect_address[id]), config_hostname);
3328 }
3329 else
3330 {
3331 char name[256];
3332 r= gethostname(name, sizeof(name));
3333 if (r == 0)
3334 {
3335 name[sizeof(name)-1]= 0;
3336 r= Ndb_getInAddr(&(m_connect_address[id]), name);
3337 }
3338 }
3339 if (r)
3340 {
3341 m_connect_address[id].s_addr= 0;
3342 }
3343 }
3344 if (theFacade && id != theFacade->ownId())
3345 {
3346 /**
3347 * Make sure we're ready to accept connections from this node
3348 */
3349 theFacade->ext_doConnect(id);
3350 }
3351
3352 g_eventLogger->info("Mgmt server state: nodeid %d reserved for ip %s, "
3353 "m_reserved_nodes %s.",
3354 id, get_connect_address(id),
3355 BaseString::getPrettyText(m_reserved_nodes).c_str());
3356
3357 return 0;
3358 }
3359
3360 bool
alloc_node_id(NodeId * nodeId,enum ndb_mgm_node_type type,const struct sockaddr * client_addr,SOCKET_SIZE_TYPE * client_addr_len,int & error_code,BaseString & error_string,int log_event,int timeout_s)3361 MgmtSrvr::alloc_node_id(NodeId * nodeId,
3362 enum ndb_mgm_node_type type,
3363 const struct sockaddr *client_addr,
3364 SOCKET_SIZE_TYPE *client_addr_len,
3365 int &error_code, BaseString &error_string,
3366 int log_event,
3367 int timeout_s)
3368 {
3369 DBUG_ENTER("MgmtSrvr::alloc_node_id");
3370 DBUG_PRINT("enter", ("nodeid: %d type: %d client_addr: 0x%ld",
3371 *nodeId, type, (long) client_addr));
3372 if (m_opts.no_nodeid_checks) {
3373 if (*nodeId == 0) {
3374 error_string.appfmt("no-nodeid-checks set in management server. "
3375 "node id must be set explicitly in connectstring");
3376 error_code = NDB_MGM_ALLOCID_CONFIG_MISMATCH;
3377 DBUG_RETURN(false);
3378 }
3379 DBUG_RETURN(true);
3380 }
3381
3382 Uint32 timeout_ms = Uint32(1000 * timeout_s);
3383 Uint64 stop = NdbTick_CurrentMillisecond() + timeout_ms;
3384 BaseString getconfig_message;
3385 while (!m_config_manager->get_packed_config(type, 0, getconfig_message))
3386 {
3387 /**
3388 * Wait for config to get confirmed before allocating node id
3389 */
3390 if (NdbTick_CurrentMillisecond() > stop)
3391 {
3392 error_code = NDB_MGM_ALLOCID_ERROR;
3393 error_string.append("Unable to allocate nodeid as configuration"
3394 " not yet confirmed");
3395 DBUG_RETURN(false);
3396 }
3397
3398 NdbSleep_MilliSleep(20);
3399 }
3400
3401 Guard g(m_node_id_mutex);
3402
3403 NodeBitmask connected_nodes;
3404 get_connected_nodes(connected_nodes);
3405
3406 NodeBitmask nodes, exact_nodes;
3407 Vector<struct nodeid_and_host> nodes_info;
3408
3409 /* find all nodes with correct type */
3410 if (find_node_type(*nodeId, type, client_addr, nodes, exact_nodes, nodes_info,
3411 error_code, error_string))
3412 goto error;
3413
3414 // nodes_info.size() == 0 handled inside find_node_type
3415 DBUG_ASSERT(nodes_info.size() != 0);
3416
3417 if (type == NDB_MGM_NODE_TYPE_MGM && nodes_info.size() > 1)
3418 {
3419 // mgmt server may only have one match
3420 error_string.appfmt("Ambiguous node id's %d and %d. "
3421 "Suggest specifying node id in connectstring, "
3422 "or specifying unique host names in config file.",
3423 nodes_info[0].id, nodes_info[1].id);
3424 error_code= NDB_MGM_ALLOCID_CONFIG_MISMATCH;
3425 goto error;
3426 }
3427
3428 /* remove connected and reserved nodes from possible nodes to allocate */
3429 nodes.bitANDC(connected_nodes);
3430 nodes.bitANDC(m_reserved_nodes);
3431
3432 /* first try all nodes with exact match of hostname */
3433 for (Uint32 i = 0; i < nodes_info.size(); i++)
3434 {
3435 unsigned id= nodes_info[i].id;
3436 if (!nodes.get(id))
3437 continue;
3438
3439 if (!exact_nodes.get(id))
3440 continue;
3441
3442 const char *config_hostname= nodes_info[i].host.c_str();
3443 /**
3444 * set bit as reserved, release mutex, try-alloc reaquire mutex
3445 * and clear bit if alloc failed
3446 */
3447 m_reserved_nodes.set(id);
3448 NdbMutex_Unlock(m_node_id_mutex);
3449 if (!try_alloc(id, config_hostname, type, client_addr, timeout_ms))
3450 {
3451 NdbMutex_Lock(m_node_id_mutex);
3452 // success
3453 *nodeId= id;
3454 DBUG_RETURN(true);
3455 }
3456 NdbMutex_Lock(m_node_id_mutex);
3457 m_reserved_nodes.clear(id);
3458 }
3459
3460 /* now try the open nodes */
3461 for (Uint32 i = 0; i < nodes_info.size(); i++)
3462 {
3463 unsigned id= nodes_info[i].id;
3464 if (!nodes.get(id))
3465 continue;
3466
3467 /**
3468 * exact node tried in loop above
3469 */
3470 if (exact_nodes.get(id))
3471 continue;
3472
3473 /**
3474 * set bit as reserved, release mutex, try-alloc reaquire mutex
3475 * and clear bit if alloc failed
3476 */
3477 m_reserved_nodes.set(id);
3478 NdbMutex_Unlock(m_node_id_mutex);
3479 if (!try_alloc(id, NULL, type, client_addr, timeout_ms))
3480 {
3481 NdbMutex_Lock(m_node_id_mutex);
3482 // success
3483 *nodeId= id;
3484 DBUG_RETURN(true);
3485 }
3486 NdbMutex_Lock(m_node_id_mutex);
3487 m_reserved_nodes.clear(id);
3488 }
3489
3490 /*
3491 there are nodes with correct type available but
3492 allocation failed for some reason
3493 */
3494 if (*nodeId)
3495 {
3496 error_string.appfmt("Id %d already allocated by another node.",
3497 *nodeId);
3498 }
3499 else
3500 {
3501 const char *alias, *str;
3502 alias= ndb_mgm_get_node_type_alias_string(type, &str);
3503 error_string.appfmt("No free node id found for %s(%s).",
3504 alias, str);
3505 }
3506 error_code = NDB_MGM_ALLOCID_ERROR;
3507
3508 error:
3509 if (error_code != NDB_MGM_ALLOCID_CONFIG_MISMATCH)
3510 {
3511 // we have a temporary error which might be due to that
3512 // we have got the latest connect status from db-nodes. Force update.
3513 updateStatus();
3514 }
3515
3516 if (log_event || error_code == NDB_MGM_ALLOCID_CONFIG_MISMATCH)
3517 {
3518 g_eventLogger->warning("Allocate nodeid (%d) failed. Connection from ip %s."
3519 " Returned error string \"%s\"",
3520 *nodeId,
3521 client_addr != 0
3522 ? inet_ntoa(((struct sockaddr_in *)
3523 (client_addr))->sin_addr)
3524 : "<none>",
3525 error_string.c_str());
3526
3527 BaseString tmp_connected, tmp_not_connected;
3528 for(Uint32 i = 0; i < MAX_NODES; i++)
3529 {
3530 if (connected_nodes.get(i))
3531 {
3532 if (!m_reserved_nodes.get(i))
3533 {
3534 tmp_connected.appfmt("%d ", i);
3535 }
3536 }
3537 else if (m_reserved_nodes.get(i))
3538 {
3539 tmp_not_connected.appfmt("%d ", i);
3540 }
3541 }
3542
3543 if (tmp_connected.length() > 0)
3544 {
3545 g_eventLogger->info
3546 ("Mgmt server state: node id's %sconnected but not reserved",
3547 tmp_connected.c_str());
3548 }
3549
3550 if (tmp_not_connected.length() > 0)
3551 {
3552 g_eventLogger->info
3553 ("Mgmt server state: node id's %snot connected but reserved",
3554 tmp_not_connected.c_str());
3555 }
3556 }
3557 DBUG_RETURN(false);
3558 }
3559
3560
3561 bool
getNextNodeId(NodeId * nodeId,enum ndb_mgm_node_type type) const3562 MgmtSrvr::getNextNodeId(NodeId * nodeId, enum ndb_mgm_node_type type) const
3563 {
3564 NodeId tmp = * nodeId;
3565
3566 tmp++;
3567 while(nodeTypes[tmp] != type && tmp < MAX_NODES)
3568 tmp++;
3569
3570 if(tmp == MAX_NODES){
3571 return false;
3572 }
3573
3574 * nodeId = tmp;
3575 return true;
3576 }
3577
3578 #include "Services.hpp"
3579
3580 void
eventReport(const Uint32 * theData,Uint32 len)3581 MgmtSrvr::eventReport(const Uint32 * theData, Uint32 len)
3582 {
3583 const EventReport * const eventReport = (EventReport *)&theData[0];
3584
3585 NodeId nodeId = eventReport->getNodeId();
3586 Ndb_logevent_type type = eventReport->getEventType();
3587 // Log event
3588 g_eventLogger->log(type, theData, len, nodeId,
3589 &m_event_listner[0].m_logLevel);
3590 m_event_listner.log(type, theData, len, nodeId);
3591 }
3592
3593 /***************************************************************************
3594 * Backup
3595 ***************************************************************************/
3596
3597 int
startBackup(Uint32 & backupId,int waitCompleted,Uint32 input_backupId,Uint32 backuppoint)3598 MgmtSrvr::startBackup(Uint32& backupId, int waitCompleted, Uint32 input_backupId, Uint32 backuppoint)
3599 {
3600 SignalSender ss(theFacade);
3601 ss.lock(); // lock will be released on exit
3602
3603 NodeId nodeId = m_master_node;
3604 if (okToSendTo(nodeId, false) != 0)
3605 {
3606 bool next;
3607 nodeId = m_master_node = 0;
3608 while((next = getNextNodeId(&nodeId, NDB_MGM_NODE_TYPE_NDB)) == true &&
3609 okToSendTo(nodeId, false) != 0);
3610 if(!next)
3611 return NO_CONTACT_WITH_DB_NODES;
3612 }
3613
3614 SimpleSignal ssig;
3615 BackupReq* req = CAST_PTR(BackupReq, ssig.getDataPtrSend());
3616 /*
3617 * Single-threaded backup. Set instance key 1. In the kernel
3618 * this maps to main instance 0 or worker instance 1 (if MT LQH).
3619 */
3620 BlockNumber backupBlockNo = numberToBlock(BACKUP, 1);
3621 if(input_backupId > 0)
3622 {
3623 ssig.set(ss, TestOrd::TraceAPI, backupBlockNo, GSN_BACKUP_REQ,
3624 BackupReq::SignalLength);
3625 req->inputBackupId = input_backupId;
3626 }
3627 else
3628 ssig.set(ss, TestOrd::TraceAPI, backupBlockNo, GSN_BACKUP_REQ,
3629 BackupReq::SignalLength - 1);
3630
3631 req->senderData = 19;
3632 req->backupDataLen = 0;
3633 assert(waitCompleted < 3);
3634 req->flags = waitCompleted & 0x3;
3635 if(backuppoint == 1)
3636 req->flags |= BackupReq::USE_UNDO_LOG;
3637
3638 int do_send = 1;
3639 while (1) {
3640 if (do_send)
3641 {
3642 if (ss.sendSignal(nodeId, &ssig) != SEND_OK) {
3643 return SEND_OR_RECEIVE_FAILED;
3644 }
3645 if (waitCompleted == 0)
3646 return 0;
3647 do_send = 0;
3648 }
3649 SimpleSignal *signal = ss.waitFor();
3650
3651 int gsn = signal->readSignalNumber();
3652 switch (gsn) {
3653 case GSN_BACKUP_CONF:{
3654 const BackupConf * const conf =
3655 CAST_CONSTPTR(BackupConf, signal->getDataPtr());
3656 #ifdef VM_TRACE
3657 ndbout_c("Backup(%d) master is %d", conf->backupId,
3658 refToNode(signal->header.theSendersBlockRef));
3659 #endif
3660 backupId = conf->backupId;
3661 if (waitCompleted == 1)
3662 return 0;
3663 // wait for next signal
3664 break;
3665 }
3666 case GSN_BACKUP_COMPLETE_REP:{
3667 const BackupCompleteRep * const rep =
3668 CAST_CONSTPTR(BackupCompleteRep, signal->getDataPtr());
3669 #ifdef VM_TRACE
3670 ndbout_c("Backup(%d) completed", rep->backupId);
3671 #endif
3672 backupId = rep->backupId;
3673 return 0;
3674 }
3675 case GSN_BACKUP_REF:{
3676 const BackupRef * const ref =
3677 CAST_CONSTPTR(BackupRef, signal->getDataPtr());
3678 if(ref->errorCode == BackupRef::IAmNotMaster){
3679 m_master_node = nodeId = refToNode(ref->masterRef);
3680 #ifdef VM_TRACE
3681 ndbout_c("I'm not master resending to %d", nodeId);
3682 #endif
3683 do_send = 1; // try again
3684 if (!getNodeInfo(nodeId).m_alive)
3685 m_master_node = nodeId = 0;
3686 continue;
3687 }
3688 return ref->errorCode;
3689 }
3690 case GSN_BACKUP_ABORT_REP:{
3691 const BackupAbortRep * const rep =
3692 CAST_CONSTPTR(BackupAbortRep, signal->getDataPtr());
3693 #ifdef VM_TRACE
3694 ndbout_c("Backup %d aborted", rep->backupId);
3695 #endif
3696 return rep->reason;
3697 }
3698 case GSN_NF_COMPLETEREP:{
3699 const NFCompleteRep * const rep =
3700 CAST_CONSTPTR(NFCompleteRep, signal->getDataPtr());
3701 #ifdef VM_TRACE
3702 ndbout_c("Node %d fail completed", rep->failedNodeId);
3703 #endif
3704 if (rep->failedNodeId == nodeId ||
3705 waitCompleted == 1)
3706 return 1326;
3707 // wait for next signal
3708 // master node will report aborted backup
3709 break;
3710 }
3711 case GSN_NODE_FAILREP:{
3712 const NodeFailRep * const rep =
3713 CAST_CONSTPTR(NodeFailRep, signal->getDataPtr());
3714 if (NdbNodeBitmask::get(rep->theNodes,nodeId) ||
3715 waitCompleted == 1)
3716 return 1326;
3717 // wait for next signal
3718 // master node will report aborted backup
3719 break;
3720 }
3721 case GSN_API_REGCONF:
3722 case GSN_TAKE_OVERTCCONF:
3723 case GSN_CONNECT_REP:
3724 continue;
3725 default:
3726 report_unknown_signal(signal);
3727 return SEND_OR_RECEIVE_FAILED;
3728 }
3729 }
3730 }
3731
3732 int
abortBackup(Uint32 backupId)3733 MgmtSrvr::abortBackup(Uint32 backupId)
3734 {
3735 SignalSender ss(theFacade);
3736 ss.lock(); // lock will be released on exit
3737
3738 bool next;
3739 NodeId nodeId = 0;
3740 while((next = getNextNodeId(&nodeId, NDB_MGM_NODE_TYPE_NDB)) == true &&
3741 getNodeInfo(nodeId).m_alive == false);
3742
3743 if(!next){
3744 return NO_CONTACT_WITH_DB_NODES;
3745 }
3746
3747 SimpleSignal ssig;
3748
3749 AbortBackupOrd* ord = CAST_PTR(AbortBackupOrd, ssig.getDataPtrSend());
3750 /*
3751 * Single-threaded backup. Set instance key 1. In the kernel
3752 * this maps to main instance 0 or worker instance 1 (if MT LQH).
3753 */
3754 BlockNumber backupBlockNo = numberToBlock(BACKUP, 1);
3755 ssig.set(ss, TestOrd::TraceAPI, backupBlockNo, GSN_ABORT_BACKUP_ORD,
3756 AbortBackupOrd::SignalLength);
3757
3758 ord->requestType = AbortBackupOrd::ClientAbort;
3759 ord->senderData = 19;
3760 ord->backupId = backupId;
3761
3762 return ss.sendSignal(nodeId, &ssig) == SEND_OK ? 0 : SEND_OR_RECEIVE_FAILED;
3763 }
3764
3765
Allocated_resources(MgmtSrvr & m)3766 MgmtSrvr::Allocated_resources::Allocated_resources(MgmtSrvr &m)
3767 : m_mgmsrv(m)
3768 {
3769 m_reserved_nodes.clear();
3770 m_alloc_timeout= 0;
3771 }
3772
~Allocated_resources()3773 MgmtSrvr::Allocated_resources::~Allocated_resources()
3774 {
3775 if (m_reserved_nodes.isclear())
3776 {
3777 /**
3778 * No need to aquire mutex if we didn't have any reservation in
3779 * our sesssion
3780 */
3781 return;
3782 }
3783
3784 Guard g(m_mgmsrv.m_node_id_mutex);
3785 m_mgmsrv.m_reserved_nodes.bitANDC(m_reserved_nodes);
3786
3787 // node has been reserved, force update signal to ndb nodes
3788 m_mgmsrv.updateStatus();
3789
3790 g_eventLogger->
3791 info("Mgmt server state: nodeid %d freed, m_reserved_nodes %s.",
3792 get_nodeid(),
3793 BaseString::getPrettyText(m_mgmsrv.m_reserved_nodes).c_str());
3794 }
3795
3796 void
reserve_node(NodeId id,NDB_TICKS timeout)3797 MgmtSrvr::Allocated_resources::reserve_node(NodeId id, NDB_TICKS timeout)
3798 {
3799 m_reserved_nodes.set(id);
3800 m_alloc_timeout= NdbTick_CurrentMillisecond() + timeout;
3801 }
3802
3803 bool
is_timed_out(NDB_TICKS tick)3804 MgmtSrvr::Allocated_resources::is_timed_out(NDB_TICKS tick)
3805 {
3806 if (m_alloc_timeout && tick > m_alloc_timeout)
3807 {
3808 g_eventLogger->info("Mgmt server state: nodeid %d timed out.",
3809 get_nodeid());
3810 return true;
3811 }
3812 return false;
3813 }
3814
3815 NodeId
get_nodeid() const3816 MgmtSrvr::Allocated_resources::get_nodeid() const
3817 {
3818 for(Uint32 i = 0; i < MAX_NODES; i++)
3819 {
3820 if (m_reserved_nodes.get(i))
3821 return i;
3822 }
3823 return 0;
3824 }
3825
3826 int
setDbParameter(int node,int param,const char * value,BaseString & msg)3827 MgmtSrvr::setDbParameter(int node, int param, const char * value,
3828 BaseString& msg)
3829 {
3830
3831 Guard g(m_local_config_mutex);
3832
3833 /**
3834 * Check parameter
3835 */
3836 ConfigIter iter(m_local_config, CFG_SECTION_NODE);
3837 if(iter.first() != 0){
3838 msg.assign("Unable to find node section (iter.first())");
3839 return -1;
3840 }
3841
3842 Uint32 type = NODE_TYPE_DB + 1;
3843 if(node != 0){
3844 // Set parameter only in the specified node
3845 if(iter.find(CFG_NODE_ID, node) != 0){
3846 msg.assign("Unable to find node (iter.find())");
3847 return -1;
3848 }
3849 if(iter.get(CFG_TYPE_OF_SECTION, &type) != 0){
3850 msg.assign("Unable to get node type(iter.get(CFG_TYPE_OF_SECTION))");
3851 return -1;
3852 }
3853 } else {
3854 // Set parameter in all DB nodes
3855 do {
3856 if(iter.get(CFG_TYPE_OF_SECTION, &type) != 0){
3857 msg.assign("Unable to get node type(iter.get(CFG_TYPE_OF_SECTION))");
3858 return -1;
3859 }
3860 if(type == NODE_TYPE_DB)
3861 break;
3862 } while(iter.next() == 0);
3863 }
3864
3865 if(type != NODE_TYPE_DB){
3866 msg.assfmt("Invalid node type or no such node (%d %d)",
3867 type, NODE_TYPE_DB);
3868 return -1;
3869 }
3870
3871 int p_type;
3872 unsigned val_32;
3873 Uint64 val_64;
3874 const char * val_char;
3875 do {
3876 p_type = 0;
3877 if(iter.get(param, &val_32) == 0){
3878 val_32 = atoi(value);
3879 break;
3880 }
3881
3882 p_type++;
3883 if(iter.get(param, &val_64) == 0){
3884 val_64 = strtoll(value, 0, 10);
3885 break;
3886 }
3887 p_type++;
3888 if(iter.get(param, &val_char) == 0){
3889 val_char = value;
3890 break;
3891 }
3892 msg.assign("Could not get parameter");
3893 return -1;
3894 } while(0);
3895
3896 bool res = false;
3897 do {
3898 int ret = iter.get(CFG_TYPE_OF_SECTION, &type);
3899 assert(ret == 0);
3900
3901 if(type != NODE_TYPE_DB)
3902 continue;
3903
3904 Uint32 node;
3905 ret = iter.get(CFG_NODE_ID, &node);
3906 assert(ret == 0);
3907
3908 ConfigValues::Iterator i2(m_local_config->m_configValues->m_config,
3909 iter.m_config);
3910 switch(p_type){
3911 case 0:
3912 res = i2.set(param, val_32);
3913 ndbout_c("Updating node %d param: %d to %d", node, param, val_32);
3914 break;
3915 case 1:
3916 res = i2.set(param, val_64);
3917 ndbout_c("Updating node %d param: %d to %u", node, param, val_32);
3918 break;
3919 case 2:
3920 res = i2.set(param, val_char);
3921 ndbout_c("Updating node %d param: %d to %s", node, param, val_char);
3922 break;
3923 default:
3924 require(false);
3925 }
3926 assert(res);
3927 } while(node == 0 && iter.next() == 0);
3928
3929 msg.assign("Success");
3930 return 0;
3931 }
3932
3933
3934 int
setConnectionDbParameter(int node1,int node2,int param,int value,BaseString & msg)3935 MgmtSrvr::setConnectionDbParameter(int node1, int node2,
3936 int param, int value,
3937 BaseString& msg)
3938 {
3939 DBUG_ENTER("MgmtSrvr::setConnectionDbParameter");
3940 DBUG_PRINT("enter", ("node1: %d, node2: %d, param: %d, value: %d",
3941 node1, node2, param, value));
3942
3943 // This function only supports setting dynamic ports
3944 if (param != CFG_CONNECTION_SERVER_PORT)
3945 {
3946 msg.assign("Only param CFG_CONNECTION_SERVER_PORT can be set");
3947 DBUG_RETURN(-1);
3948 }
3949
3950 if (!m_config_manager->set_dynamic_port(node1, node2, value, msg))
3951 DBUG_RETURN(-1);
3952
3953 DBUG_PRINT("exit", ("Set parameter(%d) to %d for %d -> %d",
3954 param, value, node1, node2));
3955 DBUG_RETURN(1);
3956 }
3957
3958
3959 int
getConnectionDbParameter(int node1,int node2,int param,int * value,BaseString & msg)3960 MgmtSrvr::getConnectionDbParameter(int node1, int node2,
3961 int param, int *value,
3962 BaseString& msg)
3963 {
3964 DBUG_ENTER("MgmtSrvr::getConnectionDbParameter");
3965 DBUG_PRINT("enter", ("node1: %d, node2: %d, param: %d",
3966 node1, node2, param));
3967
3968 // This function only supports asking about dynamic ports
3969 if (param != CFG_CONNECTION_SERVER_PORT)
3970 {
3971 msg.assign("Only param CFG_CONNECTION_SERVER_PORT can be retrieved");
3972 DBUG_RETURN(-1);
3973 }
3974
3975 if (!m_config_manager->get_dynamic_port(node1, node2, value, msg))
3976 DBUG_RETURN(-1);
3977
3978 DBUG_PRINT("exit", ("Return parameter(%d): %u for %d -> %d, msg: %s",
3979 param, *value, node1, node2, msg.c_str()));
3980 DBUG_RETURN(1);
3981 }
3982
3983
3984 bool
transporter_connect(NDB_SOCKET_TYPE sockfd,BaseString & msg)3985 MgmtSrvr::transporter_connect(NDB_SOCKET_TYPE sockfd, BaseString& msg)
3986 {
3987 DBUG_ENTER("MgmtSrvr::transporter_connect");
3988 TransporterRegistry* tr= theFacade->get_registry();
3989 if (!tr->connect_server(sockfd, msg))
3990 DBUG_RETURN(false);
3991
3992 /*
3993 Force an update_connections() so that the
3994 ClusterMgr and TransporterFacade is up to date
3995 with the new connection.
3996 Important for correct node id reservation handling
3997 */
3998 theFacade->ext_update_connections();
3999
4000 DBUG_RETURN(true);
4001 }
4002
4003
connect_to_self()4004 bool MgmtSrvr::connect_to_self()
4005 {
4006 BaseString buf;
4007 NdbMgmHandle mgm_handle= ndb_mgm_create_handle();
4008
4009 buf.assfmt("%s:%u",
4010 m_opts.bind_address ? m_opts.bind_address : "localhost",
4011 m_port);
4012 ndb_mgm_set_connectstring(mgm_handle, buf.c_str());
4013
4014 if(ndb_mgm_connect(mgm_handle, 0, 0, 0) < 0)
4015 {
4016 g_eventLogger->warning("%d %s",
4017 ndb_mgm_get_latest_error(mgm_handle),
4018 ndb_mgm_get_latest_error_desc(mgm_handle));
4019 ndb_mgm_destroy_handle(&mgm_handle);
4020 return false;
4021 }
4022 // TransporterRegistry now owns the handle and will destroy it.
4023 theFacade->get_registry()->set_mgm_handle(mgm_handle);
4024
4025 return true;
4026 }
4027
4028
4029 bool
change_config(Config & new_config,BaseString & msg)4030 MgmtSrvr::change_config(Config& new_config, BaseString& msg)
4031 {
4032 SignalSender ss(theFacade);
4033 ss.lock();
4034
4035 SimpleSignal ssig;
4036 UtilBuffer buf;
4037 new_config.pack(buf);
4038 ssig.ptr[0].p = (Uint32*)buf.get_data();
4039 ssig.ptr[0].sz = (buf.length() + 3) / 4;
4040 ssig.header.m_noOfSections = 1;
4041
4042 ConfigChangeReq *req= CAST_PTR(ConfigChangeReq, ssig.getDataPtrSend());
4043 req->length = buf.length();
4044
4045 NodeBitmask mgm_nodes;
4046 m_local_config->get_nodemask(mgm_nodes, NDB_MGM_NODE_TYPE_MGM);
4047
4048 NodeId nodeId= ss.find_confirmed_node(mgm_nodes);
4049 if (nodeId == 0)
4050 {
4051 msg = "INTERNAL ERROR Could not find any mgmd!";
4052 return false;
4053 }
4054
4055 if (ss.sendFragmentedSignal(nodeId, ssig,
4056 MGM_CONFIG_MAN, GSN_CONFIG_CHANGE_REQ,
4057 ConfigChangeReq::SignalLength) != 0)
4058 {
4059 msg.assfmt("Could not start configuration change, send to "
4060 "node %d failed", nodeId);
4061 return false;
4062 }
4063 mgm_nodes.clear(nodeId);
4064
4065 bool done = false;
4066 while(!done)
4067 {
4068 SimpleSignal *signal= ss.waitFor();
4069
4070 switch(signal->readSignalNumber()){
4071 case GSN_CONFIG_CHANGE_CONF:
4072 done= true;
4073 break;
4074 case GSN_CONFIG_CHANGE_REF:
4075 {
4076 const ConfigChangeRef * const ref =
4077 CAST_CONSTPTR(ConfigChangeRef, signal->getDataPtr());
4078 g_eventLogger->debug("Got CONFIG_CHANGE_REF, error: %d", ref->errorCode);
4079 switch(ref->errorCode)
4080 {
4081 case ConfigChangeRef::NotMaster:{
4082 // Retry with next node if any
4083 NodeId nodeId= ss.find_confirmed_node(mgm_nodes);
4084 if (nodeId == 0)
4085 {
4086 msg = "INTERNAL ERROR Could not find any mgmd!";
4087 return false;
4088 }
4089
4090 if (ss.sendFragmentedSignal(nodeId, ssig,
4091 MGM_CONFIG_MAN, GSN_CONFIG_CHANGE_REQ,
4092 ConfigChangeReq::SignalLength) != 0)
4093 {
4094 msg.assfmt("Could not start configuration change, send to "
4095 "node %d failed", nodeId);
4096 return false;
4097 }
4098 mgm_nodes.clear(nodeId);
4099 break;
4100 }
4101
4102 default:
4103 msg = ConfigChangeRef::errorMessage(ref->errorCode);
4104 return false;
4105 }
4106
4107 break;
4108 }
4109
4110 case GSN_API_REGCONF:
4111 case GSN_TAKE_OVERTCCONF:
4112 case GSN_CONNECT_REP:
4113 // Ignore;
4114 break;
4115
4116
4117 case GSN_NODE_FAILREP:
4118 // ignore, NF_COMPLETEREP will come
4119 break;
4120
4121 case GSN_NF_COMPLETEREP:
4122 {
4123 NodeId nodeId = refToNode(signal->header.theSendersBlockRef);
4124 msg.assign("Node %d failed during configuration change", nodeId);
4125 return false;
4126 break;
4127 }
4128
4129 default:
4130 report_unknown_signal(signal);
4131 return false;
4132
4133 }
4134 }
4135
4136 g_eventLogger->info("Config change completed");
4137
4138 return true;
4139 }
4140
4141
4142 void
print_config(const char * section_filter,NodeId nodeid_filter,const char * param_filter,NdbOut & out)4143 MgmtSrvr::print_config(const char* section_filter, NodeId nodeid_filter,
4144 const char* param_filter,
4145 NdbOut& out)
4146 {
4147 Guard g(m_local_config_mutex);
4148 m_local_config->print(section_filter, nodeid_filter,
4149 param_filter, out);
4150 }
4151
4152
4153 bool
reload_config(const char * config_filename,bool mycnf,BaseString & msg)4154 MgmtSrvr::reload_config(const char* config_filename, bool mycnf,
4155 BaseString& msg)
4156 {
4157 if (config_filename && mycnf)
4158 {
4159 msg = "ERROR: Both mycnf and config_filename is not supported";
4160 return false;
4161 }
4162
4163 if (config_filename)
4164 {
4165 if (m_opts.mycnf)
4166 {
4167 msg.assfmt("ERROR: Can't switch to use config.ini '%s' when "
4168 "node was started from my.cnf", config_filename);
4169 return false;
4170 }
4171 }
4172 else
4173 {
4174 if (mycnf)
4175 {
4176 // Reload from my.cnf
4177 if (!m_opts.mycnf)
4178 {
4179 if (m_opts.config_filename)
4180 {
4181 msg.assfmt("ERROR: Can't switch to use my.cnf when "
4182 "node was started from '%s'", m_opts.config_filename);
4183 return false;
4184 }
4185 }
4186 }
4187 else
4188 {
4189 /* No config file name supplied and not told to use mycnf */
4190 if (m_opts.config_filename)
4191 {
4192 g_eventLogger->info("No config file name supplied, using '%s'",
4193 m_opts.config_filename);
4194 config_filename = m_opts.config_filename;
4195 }
4196 else
4197 {
4198 msg = "ERROR: Neither config file name or mycnf available";
4199 return false;
4200 }
4201 }
4202 }
4203
4204 Config* new_conf_ptr;
4205 if ((new_conf_ptr= ConfigManager::load_config(config_filename,
4206 mycnf, msg)) == NULL)
4207 return false;
4208 Config new_conf(new_conf_ptr);
4209
4210 {
4211 Guard g(m_local_config_mutex);
4212
4213 /* Copy the necessary values from old to new config */
4214 if (!new_conf.setGeneration(m_local_config->getGeneration()) ||
4215 !new_conf.setName(m_local_config->getName()) ||
4216 !new_conf.setPrimaryMgmNode(m_local_config->getPrimaryMgmNode()))
4217 {
4218 msg = "Failed to initialize reloaded config";
4219 return false;
4220 }
4221 }
4222
4223 if (!change_config(new_conf, msg))
4224 return false;
4225 return true;
4226 }
4227
4228 void
show_variables(NdbOut & out)4229 MgmtSrvr::show_variables(NdbOut& out)
4230 {
4231 out << "daemon: " << yes_no(m_opts.daemon) << endl;
4232 out << "non_interactive: " << yes_no(m_opts.non_interactive) << endl;
4233 out << "interactive: " << yes_no(m_opts.interactive) << endl;
4234 out << "config_filename: " << str_null(m_opts.config_filename) << endl;
4235 out << "mycnf: " << yes_no(m_opts.mycnf) << endl;
4236 out << "bind_address: " << str_null(m_opts.bind_address) << endl;
4237 out << "no_nodeid_checks: " << yes_no(m_opts.no_nodeid_checks) << endl;
4238 out << "print_full_config: " << yes_no(m_opts.print_full_config) << endl;
4239 out << "configdir: " << str_null(m_opts.configdir) << endl;
4240 out << "verbose: " << yes_no(m_opts.verbose) << endl;
4241 out << "reload: " << yes_no(m_opts.reload) << endl;
4242
4243 out << "nodeid: " << _ownNodeId << endl;
4244 out << "blocknumber: " << hex <<_blockNumber << endl;
4245 out << "own_reference: " << hex << _ownReference << endl;
4246 out << "port: " << m_port << endl;
4247 out << "need_restart: " << m_need_restart << endl;
4248 out << "is_stop_thread: " << _isStopThread << endl;
4249 out << "log_level_thread_sleep: " << _logLevelThreadSleep << endl;
4250 out << "master_node: " << m_master_node << endl;
4251 }
4252
4253 void
make_sync_req(SignalSender & ss,Uint32 nodeId)4254 MgmtSrvr::make_sync_req(SignalSender& ss, Uint32 nodeId)
4255 {
4256 const trp_node node = ss.getNodeInfo(nodeId);
4257 if (!ndbd_sync_req_support(node.m_info.m_version))
4258 {
4259 /* The node hasn't got SYNC_REQ support */
4260 return;
4261 }
4262
4263 /**
4264 * This subroutine is used to make a async request(error insert/dump)
4265 * "more" syncronous, i.e increasing the likelyhood that
4266 * the async request has really reached the destination
4267 * before returning to the api
4268 *
4269 * I.e it's a work-around...
4270 *
4271 */
4272 SimpleSignal ssig;
4273 SyncReq* req = CAST_PTR(SyncReq, ssig.getDataPtrSend());
4274 req->senderRef = ss.getOwnRef();
4275 req->senderData = 12;
4276 req->prio = 1; // prio b
4277 ssig.set(ss,TestOrd::TraceAPI, CMVMI, GSN_SYNC_REQ, SyncReq::SignalLength);
4278
4279 if (ss.sendSignal(nodeId, &ssig) != SEND_OK)
4280 {
4281 return;
4282 }
4283
4284 while (true)
4285 {
4286 SimpleSignal *signal = ss.waitFor();
4287
4288 int gsn = signal->readSignalNumber();
4289 switch (gsn) {
4290 case GSN_SYNC_REF:
4291 case GSN_SYNC_CONF:
4292 return;
4293
4294 case GSN_NF_COMPLETEREP:{
4295 const NFCompleteRep * const rep =
4296 CAST_CONSTPTR(NFCompleteRep, signal->getDataPtr());
4297 if (rep->failedNodeId == nodeId)
4298 return;
4299 break;
4300 }
4301
4302 case GSN_NODE_FAILREP:{
4303 const NodeFailRep * const rep =
4304 CAST_CONSTPTR(NodeFailRep, signal->getDataPtr());
4305 if (NdbNodeBitmask::get(rep->theNodes,nodeId))
4306 return;
4307 break;
4308 }
4309 case GSN_API_REGCONF:
4310 case GSN_TAKE_OVERTCCONF:
4311 case GSN_CONNECT_REP:
4312 break;
4313 default:
4314 return;
4315 }
4316 }
4317 }
4318
4319
4320 bool
request_events(NdbNodeBitmask nodes,Uint32 reports_per_node,Uint32 dump_type,Vector<SimpleSignal> & events)4321 MgmtSrvr::request_events(NdbNodeBitmask nodes, Uint32 reports_per_node,
4322 Uint32 dump_type,
4323 Vector<SimpleSignal>& events)
4324 {
4325 int nodes_counter[MAX_NDB_NODES];
4326 SignalSender ss(theFacade);
4327 ss.lock();
4328
4329 // Send the dump command to all requested NDB nodes
4330 const bool all = nodes.isclear();
4331 for (int i = 1; i < MAX_NDB_NODES; i++)
4332 {
4333 // Check if node should be involved
4334 if (!all && !nodes.get(i))
4335 continue;
4336
4337 // Only request from confirmed DB nodes
4338 const trp_node node = ss.getNodeInfo(i);
4339 if (node.m_info.getType() != NodeInfo::DB ||
4340 !node.is_confirmed())
4341 {
4342 nodes.clear(i);
4343 continue;
4344 }
4345
4346 SimpleSignal ssig;
4347 DumpStateOrd * const dumpOrd = (DumpStateOrd*)ssig.getDataPtrSend();
4348
4349 dumpOrd->args[0] = dump_type;
4350 dumpOrd->args[1] = ss.getOwnRef(); // Return to sender
4351
4352 if (ss.sendSignal(i, ssig, CMVMI, GSN_DUMP_STATE_ORD, 2) == SEND_OK)
4353 {
4354 nodes.set(i);
4355 nodes_counter[i] = (int)reports_per_node;
4356 }
4357 }
4358
4359
4360 while (true)
4361 {
4362 // Check if all nodes are done
4363 if (nodes.isclear())
4364 break;
4365
4366 SimpleSignal *signal = ss.waitFor();
4367 switch (signal->readSignalNumber()) {
4368 case GSN_EVENT_REP:{
4369 const NodeId nodeid = refToNode(signal->header.theSendersBlockRef);
4370 const EventReport * const event =
4371 (const EventReport*)signal->getDataPtr();
4372
4373 if (!nodes.get(nodeid))
4374 {
4375 // The reporting node was not expected
4376 assert(false);
4377 return false;
4378 }
4379
4380 if (event->getEventType() == NDB_LE_SavedEvent &&
4381 signal->getDataPtr()[1] == 0)
4382 {
4383 nodes_counter[nodeid] = 1;
4384 }
4385 else
4386 {
4387 // Save signal
4388 events.push_back(SimpleSignal(*signal));
4389 }
4390
4391 // Check if node is done
4392 nodes_counter[nodeid]--;
4393 if (nodes_counter[nodeid] == 0)
4394 nodes.clear(nodeid);
4395
4396 break;
4397 }
4398
4399 case GSN_NODE_FAILREP:{
4400 const NodeFailRep * const rep =
4401 (const NodeFailRep*)signal->getDataPtr();
4402 for (NodeId i = 1; i < MAX_NDB_NODES; i++)
4403 {
4404 if (NdbNodeBitmask::get(rep->theNodes, i))
4405 {
4406 nodes.clear(i);
4407
4408 // Remove any previous reports from this node
4409 // it should not be reported
4410 for (unsigned j = 0; j < events.size(); j++)
4411 {
4412 const SimpleSignal& ssig = events[j];
4413 const NodeId nodeid = refToNode(ssig.header.theSendersBlockRef);
4414 if (nodeid == i)
4415 {
4416 events.erase(j);
4417 j--;
4418 }
4419 }
4420 }
4421 }
4422 break;
4423 }
4424
4425 default:
4426 // Ignore all other signals
4427 break;
4428 }
4429 }
4430 ss.unlock();
4431
4432 return true;
4433 }
4434
4435 template class MutexVector<NodeId>;
4436 template class MutexVector<Ndb_mgmd_event_service::Event_listener>;
4437 template class Vector<EventSubscribeReq>;
4438 template class MutexVector<EventSubscribeReq>;
4439 template class Vector< Vector<BaseString> >;
4440 template class Vector<MgmtSrvr::nodeid_and_host>;
4441 template class Vector<Defragger::DefragBuffer*>;
4442