1 /*
2 Copyright (c) 2003, 2021, Oracle and/or its affiliates.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License, version 2.0,
6 as published by the Free Software Foundation.
7
8 This program is also distributed with certain software (including
9 but not limited to OpenSSL) that is licensed under separate terms,
10 as designated in a particular file or component or in included license
11 documentation. The authors of MySQL hereby grant you an additional
12 permission to link the program and your derivative works with the
13 separately licensed software that they have included with MySQL.
14
15 This program is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License, version 2.0, for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with this program; if not, write to the Free Software
22 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
23 */
24
25 #include <ndb_global.h>
26
27 #include "MgmtSrvr.hpp"
28 #include "ndb_mgmd_error.h"
29 #include "Services.hpp"
30 #include "ConfigManager.hpp"
31 #include "Defragger.hpp"
32
33 #include <NdbOut.hpp>
34 #include <NdbApiSignal.hpp>
35 #include <kernel_types.h>
36 #include <GlobalSignalNumbers.h>
37 #include <signaldata/TestOrd.hpp>
38 #include <signaldata/TamperOrd.hpp>
39 #include <signaldata/StartOrd.hpp>
40 #include <signaldata/ApiVersion.hpp>
41 #include <signaldata/ResumeReq.hpp>
42 #include <signaldata/SetLogLevelOrd.hpp>
43 #include <signaldata/EventSubscribeReq.hpp>
44 #include <signaldata/EventReport.hpp>
45 #include <signaldata/DumpStateOrd.hpp>
46 #include <signaldata/BackupSignalData.hpp>
47 #include <signaldata/NFCompleteRep.hpp>
48 #include <signaldata/NodeFailRep.hpp>
49 #include <signaldata/AllocNodeId.hpp>
50 #include <signaldata/SchemaTrans.hpp>
51 #include <signaldata/CreateNodegroup.hpp>
52 #include <signaldata/DropNodegroup.hpp>
53 #include <signaldata/Sync.hpp>
54 #include <signaldata/GetConfig.hpp>
55 #include <NdbSleep.h>
56 #include <portlib/NdbDir.hpp>
57 #include <EventLogger.hpp>
58 #include <logger/FileLogHandler.hpp>
59 #include <logger/ConsoleLogHandler.hpp>
60 #include <logger/SysLogHandler.hpp>
61 #include <DebuggerNames.hpp>
62 #include <ndb_version.h>
63
64 #include <SocketServer.hpp>
65 #include <NdbConfig.h>
66
67 #include <NdbAutoPtr.hpp>
68 #include <NdbDir.hpp>
69 #include <ndberror.h>
70
71 #include <mgmapi.h>
72 #include <mgmapi_configuration.hpp>
73 #include <mgmapi_config_parameters.h>
74
75 #include <SignalSender.hpp>
76
77 int g_errorInsert = 0;
78 #define ERROR_INSERTED(x) (g_errorInsert == x)
79
80 #define INIT_SIGNAL_SENDER(ss,nodeId) \
81 SignalSender ss(theFacade); \
82 ss.lock(); /* lock will be released on exit */ \
83 {\
84 int result = okToSendTo(nodeId, true);\
85 if (result != 0) {\
86 return result;\
87 }\
88 }
89
90 extern "C" my_bool opt_core;
91
92 void *
logLevelThread_C(void * m)93 MgmtSrvr::logLevelThread_C(void* m)
94 {
95 MgmtSrvr *mgm = (MgmtSrvr*)m;
96 mgm->logLevelThreadRun();
97 return 0;
98 }
99
100 extern EventLogger * g_eventLogger;
101
102 #ifdef NOT_USED
103 static NdbOut&
operator <<(NdbOut & out,const LogLevel & ll)104 operator<<(NdbOut& out, const LogLevel & ll)
105 {
106 out << "[LogLevel: ";
107 for(size_t i = 0; i<LogLevel::LOGLEVEL_CATEGORIES; i++)
108 out << ll.getLogLevel((LogLevel::EventCategory)i) << " ";
109 out << "]";
110 return out;
111 }
112 #endif
113
114 void
logLevelThreadRun()115 MgmtSrvr::logLevelThreadRun()
116 {
117 while (!_isStopThread)
118 {
119 Vector<NodeId> failed_started_nodes;
120 Vector<EventSubscribeReq> failed_log_level_requests;
121
122 /**
123 * Handle started nodes
124 */
125 m_started_nodes.lock();
126 if (m_started_nodes.size() > 0)
127 {
128 // calculate max log level
129 EventSubscribeReq req;
130 {
131 LogLevel tmp;
132 m_event_listner.lock();
133 for(int i = m_event_listner.m_clients.size() - 1; i >= 0; i--)
134 tmp.set_max(m_event_listner[i].m_logLevel);
135 m_event_listner.unlock();
136 req.assign(tmp);
137 }
138 req.blockRef = _ownReference;
139 while (m_started_nodes.size() > 0)
140 {
141 Uint32 node = m_started_nodes[0];
142 m_started_nodes.erase(0, false);
143 m_started_nodes.unlock();
144
145 if (setEventReportingLevelImpl(node, req))
146 {
147 failed_started_nodes.push_back(node);
148 }
149 else
150 {
151 SetLogLevelOrd ord;
152 ord.assign(m_nodeLogLevel[node]);
153 setNodeLogLevelImpl(node, ord);
154 }
155 m_started_nodes.lock();
156 }
157 }
158 m_started_nodes.unlock();
159
160 m_log_level_requests.lock();
161 while (m_log_level_requests.size() > 0)
162 {
163 EventSubscribeReq req = m_log_level_requests[0];
164 m_log_level_requests.erase(0, false);
165 m_log_level_requests.unlock();
166
167 if(req.blockRef == 0)
168 {
169 req.blockRef = _ownReference;
170 if (setEventReportingLevelImpl(0, req))
171 {
172 failed_log_level_requests.push_back(req);
173 }
174 }
175 else
176 {
177 SetLogLevelOrd ord;
178 ord.assign(req);
179 if (setNodeLogLevelImpl(req.blockRef, ord))
180 {
181 failed_log_level_requests.push_back(req);
182 }
183 }
184 m_log_level_requests.lock();
185 }
186 m_log_level_requests.unlock();
187
188 if(!ERROR_INSERTED(10000))
189 m_event_listner.check_listeners();
190
191 Uint32 sleeptime = _logLevelThreadSleep;
192 if (failed_started_nodes.size())
193 {
194 m_started_nodes.lock();
195 for (Uint32 i = 0; i<failed_started_nodes.size(); i++)
196 m_started_nodes.push_back(failed_started_nodes[i], false);
197 m_started_nodes.unlock();
198 failed_started_nodes.clear();
199 sleeptime = 100;
200 }
201
202 if (failed_log_level_requests.size())
203 {
204 m_log_level_requests.lock();
205 for (Uint32 i = 0; i<failed_log_level_requests.size(); i++)
206 m_log_level_requests.push_back(failed_log_level_requests[i], false);
207 m_log_level_requests.unlock();
208 failed_log_level_requests.clear();
209 sleeptime = 100;
210 }
211
212 NdbSleep_MilliSleep(sleeptime);
213 }
214 }
215
216
217 static int
translateStopRef(Uint32 errCode)218 translateStopRef(Uint32 errCode)
219 {
220 switch(errCode){
221 case StopRef::NodeShutdownInProgress:
222 return NODE_SHUTDOWN_IN_PROGESS;
223 break;
224 case StopRef::SystemShutdownInProgress:
225 return SYSTEM_SHUTDOWN_IN_PROGRESS;
226 break;
227 case StopRef::NodeShutdownWouldCauseSystemCrash:
228 return NODE_SHUTDOWN_WOULD_CAUSE_SYSTEM_CRASH;
229 break;
230 case StopRef::UnsupportedNodeShutdown:
231 return UNSUPPORTED_NODE_SHUTDOWN;
232 break;
233 }
234 return 4999;
235 }
236
237
MgmtSrvr(const MgmtOpts & opts)238 MgmtSrvr::MgmtSrvr(const MgmtOpts& opts) :
239 m_opts(opts),
240 _blockNumber(-1),
241 _ownNodeId(0),
242 m_port(0),
243 m_local_config(NULL),
244 _ownReference(0),
245 m_config_manager(NULL),
246 m_need_restart(false),
247 theFacade(NULL),
248 _isStopThread(false),
249 _logLevelThreadSleep(500),
250 m_event_listner(this),
251 m_master_node(0),
252 _logLevelThread(NULL),
253 m_version_string(ndbGetOwnVersionString())
254 {
255 DBUG_ENTER("MgmtSrvr::MgmtSrvr");
256
257 m_local_config_mutex= NdbMutex_Create();
258 m_reserved_nodes_mutex= NdbMutex_Create();
259 if (!m_local_config_mutex || !m_reserved_nodes_mutex)
260 {
261 g_eventLogger->error("Failed to create MgmtSrvr mutexes");
262 require(false);
263 }
264
265 /* Init node arrays */
266 for(Uint32 i = 0; i<MAX_NODES; i++) {
267 nodeTypes[i] = (enum ndb_mgm_node_type)-1;
268 clear_connect_address_cache(i);
269 }
270
271 /* Setup clusterlog as client[0] in m_event_listner */
272 {
273 Ndb_mgmd_event_service::Event_listener se;
274 my_socket_invalidate(&(se.m_socket));
275 for(size_t t = 0; t<LogLevel::LOGLEVEL_CATEGORIES; t++){
276 se.m_logLevel.setLogLevel((LogLevel::EventCategory)t, 7);
277 }
278 se.m_logLevel.setLogLevel(LogLevel::llError, 15);
279 se.m_logLevel.setLogLevel(LogLevel::llConnection, 8);
280 se.m_logLevel.setLogLevel(LogLevel::llBackup, 15);
281 m_event_listner.m_clients.push_back(se);
282 m_event_listner.m_logLevel = se.m_logLevel;
283 }
284
285 DBUG_VOID_RETURN;
286 }
287
288
289 /*
290 check_configdir
291
292 Make sure configdir exist and try to create it if not
293
294 */
295
296 const char*
check_configdir() const297 MgmtSrvr::check_configdir() const
298 {
299 if (m_opts.configdir &&
300 strcmp(m_opts.configdir, MYSQLCLUSTERDIR) != 0)
301 {
302 // Specified on commmand line
303 if (access(m_opts.configdir, F_OK))
304 {
305 g_eventLogger->error("Directory '%s' specified with --configdir " \
306 "does not exist. Either create it or pass " \
307 "the path to an already existing directory.",
308 m_opts.configdir);
309 return NULL;
310 }
311 return m_opts.configdir;
312 }
313 else
314 {
315 // Compiled in path MYSQLCLUSTERDIR
316 if (access(MYSQLCLUSTERDIR, F_OK))
317 {
318 g_eventLogger->info("The default config directory '%s' " \
319 "does not exist. Trying to create it...",
320 MYSQLCLUSTERDIR);
321
322 if (!NdbDir::create(MYSQLCLUSTERDIR) ||
323 access(MYSQLCLUSTERDIR, F_OK))
324 {
325 g_eventLogger->error("Could not create directory '%s'. " \
326 "Either create it manually or " \
327 "specify a different directory with " \
328 "--configdir=<path>",
329 MYSQLCLUSTERDIR);
330 return NULL;
331 }
332
333 g_eventLogger->info("Sucessfully created config directory");
334 }
335 return MYSQLCLUSTERDIR;
336 }
337 }
338
339
340 bool
init()341 MgmtSrvr::init()
342 {
343 DBUG_ENTER("MgmtSrvr::init");
344
345 const char* configdir;
346
347 if (!m_opts.config_cache)
348 {
349 g_eventLogger->info("Skipping check of config directory since "
350 "config cache is disabled.");
351 configdir = NULL;
352 }
353 else
354 {
355 if (!(configdir= check_configdir()))
356 DBUG_RETURN(false);
357 }
358
359 if (!(m_config_manager= new ConfigManager(m_opts, configdir)))
360 {
361 g_eventLogger->error("Failed to create ConfigManager");
362 DBUG_RETURN(false);
363 }
364
365 if (m_config_manager->add_config_change_subscriber(this) < 0)
366 {
367 g_eventLogger->error("Failed to add MgmtSrvr as config change subscriber");
368 DBUG_RETURN(false);
369 }
370
371 if (!m_config_manager->init())
372 {
373 DBUG_RETURN(false);
374 }
375
376 /* 'config_changed' should have been called from 'init' */
377 require(m_local_config != 0);
378
379 if (m_opts.print_full_config)
380 {
381 print_config();
382 DBUG_RETURN(false);
383 }
384
385 assert(_ownNodeId);
386
387 DBUG_RETURN(true);
388 }
389
390
391 bool
start_transporter(const Config * config)392 MgmtSrvr::start_transporter(const Config* config)
393 {
394 DBUG_ENTER("MgmtSrvr::start_transporter");
395
396 theFacade= new TransporterFacade(0);
397 if (theFacade == 0)
398 {
399 g_eventLogger->error("Could not create TransporterFacade.");
400 DBUG_RETURN(false);
401 }
402
403 assert(_blockNumber == -1); // Blocknumber shouldn't been allocated yet
404
405 /*
406 Register ourself at TransporterFacade to be able to receive signals
407 and to be notified when a database process has died.
408 */
409 Uint32 res;
410 if ((res = open(theFacade)) == 0)
411 {
412 g_eventLogger->error("Failed to open block in TransporterFacade");
413 theFacade->stop_instance();
414 delete theFacade;
415 theFacade = 0;
416 DBUG_RETURN(false);
417 }
418 _blockNumber = refToBlock(res);
419
420 /**
421 * Need to call ->open() prior to actually starting TF
422 */
423 m_config_manager->set_facade(theFacade);
424
425 if (theFacade->start_instance(_ownNodeId,
426 config->m_configValues) < 0)
427 {
428 g_eventLogger->error("Failed to start transporter");
429 delete theFacade;
430 theFacade = 0;
431 DBUG_RETURN(false);
432 }
433
434 _ownReference = numberToRef(_blockNumber, _ownNodeId);
435
436 /*
437 set api reg req frequency quite high:
438
439 100 ms interval to make sure we have fairly up-to-date
440 info from the nodes. This to make sure that this info
441 is not dependent on heartbeat settings in the
442 configuration
443 */
444 theFacade->ext_set_max_api_reg_req_interval(100);
445
446 DBUG_RETURN(true);
447 }
448
449
450 bool
start_mgm_service(const Config * config)451 MgmtSrvr::start_mgm_service(const Config* config)
452 {
453 DBUG_ENTER("MgmtSrvr::start_mgm_service");
454
455 assert(m_port == 0);
456 {
457 // Find the portnumber to use for mgm service
458 ConfigIter iter(config, CFG_SECTION_NODE);
459
460 if(iter.find(CFG_NODE_ID, _ownNodeId) != 0){
461 g_eventLogger->error("Could not find node %d in config", _ownNodeId);
462 DBUG_RETURN(false);
463 }
464
465 unsigned type;
466 if(iter.get(CFG_TYPE_OF_SECTION, &type) != 0 ||
467 type != NODE_TYPE_MGM){
468 g_eventLogger->error("Node %d is not defined as management server",
469 _ownNodeId);
470 DBUG_RETURN(false);
471 }
472
473 if(iter.get(CFG_MGM_PORT, &m_port) != 0){
474 g_eventLogger->error("PortNumber not defined for node %d", _ownNodeId);
475 DBUG_RETURN(false);
476 }
477 }
478
479 unsigned short port= m_port;
480 DBUG_PRINT("info", ("Using port %d", port));
481 if (port == 0)
482 {
483 g_eventLogger->error("Could not find out which port to use"\
484 " for management service");
485 DBUG_RETURN(false);
486 }
487
488 {
489 int count= 5; // no of retries for tryBind
490 while(!m_socket_server.tryBind(port, m_opts.bind_address))
491 {
492 if (--count > 0)
493 {
494 NdbSleep_SecSleep(1);
495 continue;
496 }
497 g_eventLogger->error("Unable to bind management service port: %s:%d!\n"
498 "Please check if the port is already used,\n"
499 "(perhaps a ndb_mgmd is already running),\n"
500 "and if you are executing on the correct computer",
501 (m_opts.bind_address ? m_opts.bind_address : "*"),
502 port);
503 DBUG_RETURN(false);
504 }
505 }
506
507 {
508 MgmApiService * mapi = new MgmApiService(*this);
509 if (mapi == NULL)
510 {
511 g_eventLogger->error("Could not allocate MgmApiService");
512 DBUG_RETURN(false);
513 }
514
515 if(!m_socket_server.setup(mapi, &port, m_opts.bind_address))
516 {
517 delete mapi; // Will be deleted by SocketServer in all other cases
518 g_eventLogger->error("Unable to setup management service port: %s:%d!\n"
519 "Please check if the port is already used,\n"
520 "(perhaps a ndb_mgmd is already running),\n"
521 "and if you are executing on the correct computer",
522 (m_opts.bind_address ? m_opts.bind_address : "*"),
523 port);
524 DBUG_RETURN(false);
525 }
526
527 if (port != m_port)
528 {
529 g_eventLogger->error("Couldn't start management service on the "\
530 "requested port: %d. Got port: %d instead",
531 m_port, port);
532 DBUG_RETURN(false);
533 }
534 }
535
536 m_socket_server.startServer();
537
538 g_eventLogger->info("Id: %d, Command port: %s:%d",
539 _ownNodeId,
540 m_opts.bind_address ? m_opts.bind_address : "*",
541 port);
542 DBUG_RETURN(true);
543 }
544
545
546 bool
start()547 MgmtSrvr::start()
548 {
549 DBUG_ENTER("MgmtSrvr::start");
550
551 /* Start transporter */
552 if(!start_transporter(m_local_config))
553 {
554 g_eventLogger->error("Failed to start transporter!");
555 DBUG_RETURN(false);
556 }
557
558 /* Start mgm service */
559 if (!start_mgm_service(m_local_config))
560 {
561 g_eventLogger->error("Failed to start mangement service!");
562 DBUG_RETURN(false);
563 }
564
565 /* Use local MGM port for TransporterRegistry */
566 if(!connect_to_self())
567 {
568 g_eventLogger->error("Failed to connect to ourself!");
569 DBUG_RETURN(false);
570 }
571
572 /* Start config manager */
573 if (!m_config_manager->start())
574 {
575 g_eventLogger->error("Failed to start ConfigManager");
576 DBUG_RETURN(false);
577 }
578
579 /* Loglevel thread */
580 assert(_isStopThread == false);
581 _logLevelThread = NdbThread_Create(logLevelThread_C,
582 (void**)this,
583 0, // default stack size
584 "MgmtSrvr_Loglevel",
585 NDB_THREAD_PRIO_LOW);
586
587 DBUG_RETURN(true);
588 }
589
590
591 void
configure_eventlogger(const BaseString & logdestination) const592 MgmtSrvr::configure_eventlogger(const BaseString& logdestination) const
593 {
594 // Close old log handlers before creating the new
595 g_eventLogger->close();
596
597 Vector<BaseString> logdestinations;
598 logdestination.split(logdestinations, ";");
599
600 for(unsigned i = 0; i < logdestinations.size(); i++)
601 {
602 // Extract type(everything left of colon)
603 Vector<BaseString> v_type_params;
604 logdestinations[i].split(v_type_params, ":", 2);
605 BaseString type(v_type_params[0]);
606
607 // Extract params(everything right of colon)
608 BaseString params;
609 if(v_type_params.size() >= 2)
610 params = v_type_params[1];
611
612 LogHandler *handler = NULL;
613 if(type == "FILE")
614 {
615 char *default_file_name= NdbConfig_ClusterLogFileName(_ownNodeId);
616 handler = new FileLogHandler(default_file_name);
617 free(default_file_name);
618 }
619 else if(type == "CONSOLE")
620 {
621 handler = new ConsoleLogHandler();
622 }
623 #ifndef _WIN32
624 else if(type == "SYSLOG")
625 {
626 handler = new SysLogHandler();
627 }
628 #endif
629 if(handler == NULL)
630 {
631 ndbout_c("INTERNAL ERROR: Could not create log handler for: '%s'",
632 logdestinations[i].c_str());
633 continue;
634 }
635
636 if(!handler->parseParams(params))
637 {
638 ndbout_c("Failed to parse parameters for log handler: '%s', error: %d '%s'",
639 logdestinations[i].c_str(), handler->getErrorCode(), handler->getErrorStr());
640 delete handler;
641 continue;
642 }
643
644 if (!g_eventLogger->addHandler(handler))
645 {
646 ndbout_c("INTERNAL ERROR: Could not add %s log handler", handler->handler_type());
647 g_eventLogger->error("INTERNAL ERROR: Could not add %s log handler",
648 handler->handler_type());
649 delete handler;
650 continue;
651 }
652 }
653 }
654
655
656 void
setClusterLog(const Config * config)657 MgmtSrvr::setClusterLog(const Config* config)
658 {
659 assert(_ownNodeId);
660
661 ConfigIter iter(config, CFG_SECTION_NODE);
662 require(iter.find(CFG_NODE_ID, _ownNodeId) == 0);
663
664 // Update DataDir from config
665 const char *datadir;
666 require(iter.get(CFG_NODE_DATADIR, &datadir) == 0);
667 NdbConfig_SetPath(datadir);
668
669 if (NdbDir::chdir(NdbConfig_get_path(NULL)) != 0)
670 {
671 g_eventLogger->warning("Cannot change directory to '%s', error: %d",
672 NdbConfig_get_path(NULL), errno);
673 // Ignore error
674 }
675
676 // Get log destination from config
677 BaseString logdest;
678 const char *value;
679 if(iter.get(CFG_LOG_DESTINATION, &value) == 0){
680 logdest.assign(value);
681 }
682
683 bool logdest_configured = true;
684 if(logdest.length() == 0 || logdest == "") {
685 // No LogDestination set, use default settings
686 char *clusterLog= NdbConfig_ClusterLogFileName(_ownNodeId);
687 logdest.assfmt("FILE:filename=%s,maxsize=1000000,maxfiles=6",
688 clusterLog);
689 free(clusterLog);
690 logdest_configured = false;
691 }
692
693 configure_eventlogger(logdest);
694
695 if (logdest_configured == false &&
696 m_opts.non_interactive)
697 {
698 g_eventLogger->createConsoleHandler();
699 }
700
701 #ifdef _WIN32
702 /* Output to Windows event log */
703 g_eventLogger->createEventLogHandler("MySQL Cluster Management Server");
704 #endif
705
706 if (m_opts.verbose)
707 g_eventLogger->enable(Logger::LL_DEBUG);
708 }
709
710
711 void
config_changed(NodeId node_id,const Config * new_config)712 MgmtSrvr::config_changed(NodeId node_id, const Config* new_config)
713 {
714 DBUG_ENTER("MgmtSrvr::config_changed");
715
716 Guard g(m_local_config_mutex);
717
718 // Don't allow nodeid to change, once it's been set
719 require(_ownNodeId == 0 || _ownNodeId == node_id);
720
721 _ownNodeId= node_id;
722
723 if (m_local_config)
724 delete m_local_config;
725
726 m_local_config= new Config(new_config); // Copy
727 require(m_local_config != 0);
728
729 /* Rebuild node arrays */
730 ConfigIter iter(m_local_config, CFG_SECTION_NODE);
731 for(Uint32 i = 0; i<MAX_NODES; i++) {
732
733 clear_connect_address_cache(i);
734
735 if (iter.first())
736 continue;
737
738 if (iter.find(CFG_NODE_ID, i) == 0){
739 unsigned type;
740 require(iter.get(CFG_TYPE_OF_SECTION, &type) == 0);
741
742 switch(type){
743 case NODE_TYPE_DB:
744 nodeTypes[i] = NDB_MGM_NODE_TYPE_NDB;
745 break;
746 case NODE_TYPE_API:
747 nodeTypes[i] = NDB_MGM_NODE_TYPE_API;
748 break;
749 case NODE_TYPE_MGM:
750 nodeTypes[i] = NDB_MGM_NODE_TYPE_MGM;
751 break;
752 default:
753 break;
754 }
755 }
756 else
757 {
758 nodeTypes[i] = (enum ndb_mgm_node_type)-1;
759 }
760
761 }
762
763 // Setup cluster log
764 setClusterLog(m_local_config);
765
766 if (theFacade)
767 {
768 if (!theFacade->configure(_ownNodeId,
769 m_local_config->m_configValues))
770 {
771 g_eventLogger->warning("Could not reconfigure everything online, "
772 "this node need a restart");
773 m_need_restart= true;
774 }
775 }
776
777 DBUG_VOID_RETURN;
778 }
779
780
781 bool
get_packed_config(ndb_mgm_node_type node_type,BaseString & buf64,BaseString & error)782 MgmtSrvr::get_packed_config(ndb_mgm_node_type node_type,
783 BaseString& buf64, BaseString& error)
784 {
785 return m_config_manager->get_packed_config(node_type, &buf64, error);
786 }
787
788 bool
get_packed_config_from_node(NodeId nodeId,BaseString & buf64,BaseString & error)789 MgmtSrvr::get_packed_config_from_node(NodeId nodeId,
790 BaseString& buf64, BaseString& error)
791 {
792 DBUG_ENTER("get_packed_config_from_node");
793
794 if (nodeId >= MAX_NODES_ID)
795 {
796 error.assfmt("Nodeid %d is greater than max nodeid %d. ",
797 nodeId, MAX_NODES_ID);
798 DBUG_RETURN(false);
799 }
800
801 if (getNodeType(nodeId) == NDB_MGM_NODE_TYPE_UNKNOWN)
802 {
803 error.assfmt("Nodeid %d does not exist. ", nodeId);
804 DBUG_RETURN(false);
805 }
806
807 if (getNodeType(nodeId) != NDB_MGM_NODE_TYPE_NDB)
808 {
809 error.assfmt("Node %d is not a data node. ", nodeId);
810 DBUG_RETURN(false);
811 }
812
813 trp_node node = getNodeInfo(nodeId);
814
815 if (!node.m_alive)
816 {
817 error.assfmt("Data node %d is not alive. ", nodeId);
818 DBUG_RETURN(false);
819 }
820
821 const Uint32 version = node.m_info.m_version;
822
823 if (!ndbd_get_config_supported(version))
824 {
825 error.assfmt("Data node %d (version %d.%d.%d) does not support getting config. ",
826 nodeId, ndbGetMajor(version),
827 ndbGetMinor(version), ndbGetBuild(version));
828 DBUG_RETURN(false);
829 }
830
831 INIT_SIGNAL_SENDER(ss,nodeId);
832
833 SimpleSignal ssig;
834 GetConfigReq* req = CAST_PTR(GetConfigReq, ssig.getDataPtrSend());
835 req->senderRef = ss.getOwnRef();
836 req->nodeId = nodeId;
837
838 g_eventLogger->debug("Sending GET_CONFIG_REQ to %d", nodeId);
839
840 ssig.set(ss, TestOrd::TraceAPI, CMVMI, GSN_GET_CONFIG_REQ,
841 GetConfigReq::SignalLength);
842 if ((ss.sendSignal(nodeId, &ssig)) != SEND_OK)
843 {
844 DBUG_RETURN(false);
845 }
846
847 Defragger defragger;
848 while (true)
849 {
850 SimpleSignal *signal = ss.waitFor();
851 int gsn = signal->readSignalNumber();
852
853 switch (gsn)
854 {
855 case GSN_GET_CONFIG_CONF:
856 {
857 if (refToNode(signal->header.theSendersBlockRef) != nodeId)
858 {
859 error.assfmt("Internal Error: Reply from wrong node %d, expected from %d. ",
860 refToNode(signal->header.theSendersBlockRef),
861 nodeId);
862 DBUG_RETURN(false);
863 }
864
865 const GetConfigConf * const conf =
866 CAST_CONSTPTR(GetConfigConf, signal->getDataPtr());
867
868 if (signal->header.m_noOfSections != 1)
869 {
870 error.assfmt("Internal Error: Wrong number of sections %d received, expected %d. ",
871 signal->header.m_noOfSections, 1);
872 DBUG_RETURN(false);
873 }
874
875 if (defragger.defragment(signal))
876 {
877 ConfigValuesFactory cf;
878 require(cf.unpack(signal->ptr[0].p, conf->configLength));
879
880 Config received_config(cf.getConfigValues());
881 if (!received_config.pack64(buf64))
882 {
883 error.assign("Failed to pack64");
884 DBUG_RETURN(false);
885 }
886 DBUG_RETURN(true);
887 }
888 // wait until all fragments are received
889 continue;
890 }
891
892 case GSN_GET_CONFIG_REF:
893 {
894 if (refToNode(ssig.header.theSendersBlockRef) != nodeId)
895 {
896 error.assfmt("Internal Error: Reply from wrong node %d, expected from %d. ",
897 refToNode(signal->header.theSendersBlockRef),
898 nodeId);
899 DBUG_RETURN(false);
900 }
901 const GetConfigRef * const ref =
902 CAST_CONSTPTR(GetConfigRef, signal->getDataPtr());
903 error.assfmt("Error in retrieving config from node %d: Internal error: %d",
904 nodeId, ref->error);
905
906 DBUG_RETURN(false);
907 }
908
909 case GSN_NF_COMPLETEREP:
910 {
911 const NFCompleteRep * rep = CAST_CONSTPTR(NFCompleteRep,
912 signal->getDataPtr());
913 if (rep->failedNodeId == nodeId)
914 {
915 error.assfmt("Node %d is not available", nodeId);
916 DBUG_RETURN(false);
917 }
918 continue;
919 }
920
921 case GSN_NODE_FAILREP:
922 {
923 // Wait until GSN_NODE_COMPLETEREP is received.
924 continue;
925 }
926
927 case GSN_API_REGCONF:
928 case GSN_TAKE_OVERTCCONF:
929 case GSN_CONNECT_REP:
930 // Ignore
931 continue;
932
933 default:
934 report_unknown_signal(signal);
935 DBUG_RETURN(false);
936 }
937 }
938 // Should never come here
939 require(false);
940 DBUG_RETURN(false);
941 }
942
~MgmtSrvr()943 MgmtSrvr::~MgmtSrvr()
944 {
945 /* Stop log level thread */
946 void* res = 0;
947 _isStopThread = true;
948
949 if (_logLevelThread != NULL) {
950 NdbThread_WaitFor(_logLevelThread, &res);
951 NdbThread_Destroy(&_logLevelThread);
952 }
953
954 /* Stop mgm service, don't allow new connections */
955 m_socket_server.stopServer();
956
957 /* Stop all active session */
958 if (!m_socket_server.stopSessions(true,
959 2 * MgmApiSession::SOCKET_TIMEOUT))
960 {
961 g_eventLogger->error("Failed to wait for all sessions to stop, "
962 "continuing with shutdown anyway.");
963 }
964
965 /* Stop config manager */
966 if (m_config_manager != 0)
967 {
968 m_config_manager->stop();
969 delete m_config_manager;
970 m_config_manager= 0;
971 }
972
973 this->close(); // close trp_client before stopping TransporterFacade
974
975 // Stop transporter
976 if(theFacade != 0){
977 theFacade->stop_instance();
978 delete theFacade;
979 theFacade = 0;
980 }
981
982 delete m_local_config;
983
984 NdbMutex_Destroy(m_local_config_mutex);
985 NdbMutex_Destroy(m_reserved_nodes_mutex);
986 }
987
988
989 //****************************************************************************
990 //****************************************************************************
991
okToSendTo(NodeId nodeId,bool unCond)992 int MgmtSrvr::okToSendTo(NodeId nodeId, bool unCond)
993 {
994 if(nodeId == 0 || getNodeType(nodeId) != NDB_MGM_NODE_TYPE_NDB)
995 return WRONG_PROCESS_TYPE;
996 // Check if we have contact with it
997 if(unCond){
998 if (getNodeInfo(nodeId).is_confirmed())
999 return 0;
1000 }
1001 else if (getNodeInfo(nodeId).m_alive == true)
1002 return 0;
1003 return NO_CONTACT_WITH_PROCESS;
1004 }
1005
1006 void
report_unknown_signal(SimpleSignal * signal)1007 MgmtSrvr::report_unknown_signal(SimpleSignal *signal)
1008 {
1009 signal->print();
1010 g_eventLogger->error("Unknown signal received. SignalNumber: "
1011 "%i from (%d, 0x%x)",
1012 signal->readSignalNumber(),
1013 refToNode(signal->header.theSendersBlockRef),
1014 refToBlock(signal->header.theSendersBlockRef));
1015 assert(false);
1016 }
1017
1018 /*****************************************************************************
1019 * Starting and stopping database nodes
1020 ****************************************************************************/
1021
1022 int
sendSTART_ORD(int nodeId)1023 MgmtSrvr::sendSTART_ORD(int nodeId)
1024 {
1025 INIT_SIGNAL_SENDER(ss,nodeId);
1026
1027 SimpleSignal ssig;
1028 StartOrd* const startOrd = CAST_PTR(StartOrd, ssig.getDataPtrSend());
1029 ssig.set(ss,TestOrd::TraceAPI, CMVMI, GSN_START_ORD, StartOrd::SignalLength);
1030 startOrd->restartInfo = 0;
1031
1032 return ss.sendSignal(nodeId, &ssig) == SEND_OK ? 0 : SEND_OR_RECEIVE_FAILED;
1033 }
1034
1035 /*****************************************************************************
1036 * Version handling
1037 *****************************************************************************/
1038
1039 void
status_api(int nodeId,ndb_mgm_node_status & node_status,Uint32 & version,Uint32 & mysql_version,const char ** address,char * addr_buf,size_t addr_buf_size)1040 MgmtSrvr::status_api(int nodeId,
1041 ndb_mgm_node_status& node_status,
1042 Uint32& version, Uint32& mysql_version,
1043 const char **address,
1044 char *addr_buf,
1045 size_t addr_buf_size)
1046 {
1047 assert(getNodeType(nodeId) == NDB_MGM_NODE_TYPE_API);
1048 assert(version == 0 && mysql_version == 0);
1049
1050 if (sendVersionReq(nodeId,
1051 version,
1052 mysql_version,
1053 address,
1054 addr_buf,
1055 addr_buf_size) != 0)
1056 {
1057 // Couldn't get version from any NDB node.
1058 assert(version == 0);
1059 node_status = NDB_MGM_NODE_STATUS_UNKNOWN;
1060 return;
1061 }
1062
1063 if (version)
1064 {
1065 assert(mysql_version);
1066 node_status = NDB_MGM_NODE_STATUS_CONNECTED;
1067 }
1068 else
1069 {
1070 assert(mysql_version == 0);
1071 node_status = NDB_MGM_NODE_STATUS_NO_CONTACT;
1072 }
1073 return;
1074 }
1075
1076
1077 int
sendVersionReq(int v_nodeId,Uint32 & version,Uint32 & mysql_version,const char ** address,char * addr_buf,size_t addr_buf_size)1078 MgmtSrvr::sendVersionReq(int v_nodeId,
1079 Uint32 &version,
1080 Uint32& mysql_version,
1081 const char **address,
1082 char *addr_buf,
1083 size_t addr_buf_size)
1084 {
1085 SignalSender ss(theFacade);
1086 ss.lock();
1087
1088 SimpleSignal ssig;
1089 ApiVersionReq* req = CAST_PTR(ApiVersionReq, ssig.getDataPtrSend());
1090 req->senderRef = ss.getOwnRef();
1091 req->nodeId = v_nodeId;
1092 ssig.set(ss, TestOrd::TraceAPI, QMGR,
1093 GSN_API_VERSION_REQ, ApiVersionReq::SignalLength);
1094
1095 NodeId nodeId = 0;
1096 bool do_send = true;
1097 while(true)
1098 {
1099 if (do_send)
1100 {
1101 nodeId = ss.get_an_alive_node();
1102 if (nodeId == 0)
1103 {
1104 return NO_CONTACT_WITH_DB_NODES;
1105 }
1106
1107 if (ss.sendSignal(nodeId, &ssig) != SEND_OK)
1108 {
1109 return SEND_OR_RECEIVE_FAILED;
1110 }
1111
1112 do_send = false;
1113 }
1114
1115 SimpleSignal *signal = ss.waitFor();
1116
1117 switch (signal->readSignalNumber()) {
1118 case GSN_API_VERSION_CONF: {
1119 const ApiVersionConf * const conf =
1120 CAST_CONSTPTR(ApiVersionConf, signal->getDataPtr());
1121
1122 assert((int) conf->nodeId == v_nodeId);
1123
1124 version = conf->version;
1125 mysql_version = conf->mysql_version;
1126 if (version < NDBD_SPLIT_VERSION)
1127 mysql_version = 0;
1128 struct in_addr in;
1129 in.s_addr= conf->m_inet_addr;
1130 *address= Ndb_inet_ntop(AF_INET,
1131 static_cast<void*>(&in),
1132 addr_buf,
1133 (socklen_t)addr_buf_size);
1134
1135 return 0;
1136 }
1137
1138 case GSN_NF_COMPLETEREP:{
1139 const NFCompleteRep * const rep =
1140 CAST_CONSTPTR(NFCompleteRep, signal->getDataPtr());
1141 if (rep->failedNodeId == nodeId)
1142 do_send = true; // retry with other node
1143 continue;
1144 }
1145
1146 case GSN_NODE_FAILREP:{
1147 const NodeFailRep * const rep =
1148 CAST_CONSTPTR(NodeFailRep, signal->getDataPtr());
1149 Uint32 len = NodeFailRep::getNodeMaskLength(signal->getLength());
1150 assert(len == NodeBitmask::Size); // only full length in ndbapi
1151 if (BitmaskImpl::safe_get(len, rep->theAllNodes, nodeId))
1152 {
1153 do_send = true; // retry with other node
1154 }
1155 continue;
1156 }
1157 case GSN_API_REGCONF:
1158 case GSN_TAKE_OVERTCCONF:
1159 case GSN_CONNECT_REP:
1160 // Ignore
1161 continue;
1162 default:
1163 report_unknown_signal(signal);
1164 return SEND_OR_RECEIVE_FAILED;
1165 }
1166 }
1167
1168 // Should never come here
1169 require(false);
1170 return -1;
1171 }
1172
1173
sendStopMgmd(NodeId nodeId,bool abort,bool stop,bool restart,bool nostart,bool initialStart)1174 int MgmtSrvr::sendStopMgmd(NodeId nodeId,
1175 bool abort,
1176 bool stop,
1177 bool restart,
1178 bool nostart,
1179 bool initialStart)
1180 {
1181 const char* hostname;
1182 Uint32 port;
1183 BaseString connect_string;
1184
1185 {
1186 Guard g(m_local_config_mutex);
1187 {
1188 ConfigIter iter(m_local_config, CFG_SECTION_NODE);
1189
1190 if(iter.first()) return SEND_OR_RECEIVE_FAILED;
1191 if(iter.find(CFG_NODE_ID, nodeId)) return SEND_OR_RECEIVE_FAILED;
1192 if(iter.get(CFG_NODE_HOST, &hostname)) return SEND_OR_RECEIVE_FAILED;
1193 }
1194 {
1195 ConfigIter iter(m_local_config, CFG_SECTION_NODE);
1196
1197 if(iter.first()) return SEND_OR_RECEIVE_FAILED;
1198 if(iter.find(CFG_NODE_ID, nodeId)) return SEND_OR_RECEIVE_FAILED;
1199 if(iter.get(CFG_MGM_PORT, &port)) return SEND_OR_RECEIVE_FAILED;
1200 }
1201 if( strlen(hostname) == 0 )
1202 return SEND_OR_RECEIVE_FAILED;
1203
1204 }
1205 connect_string.assfmt("%s:%u",hostname,port);
1206
1207 DBUG_PRINT("info",("connect string: %s",connect_string.c_str()));
1208
1209 NdbMgmHandle h= ndb_mgm_create_handle();
1210 if ( h && connect_string.length() > 0 )
1211 {
1212 ndb_mgm_set_connectstring(h,connect_string.c_str());
1213 if(ndb_mgm_connect(h,1,0,0))
1214 {
1215 DBUG_PRINT("info",("failed ndb_mgm_connect"));
1216 ndb_mgm_destroy_handle(&h);
1217 return SEND_OR_RECEIVE_FAILED;
1218 }
1219 if(!restart)
1220 {
1221 int nodes[1];
1222 nodes[0]= (int)nodeId;
1223 if(ndb_mgm_stop(h, 1, nodes) < 0)
1224 {
1225 ndb_mgm_destroy_handle(&h);
1226 return SEND_OR_RECEIVE_FAILED;
1227 }
1228 }
1229 else
1230 {
1231 int nodes[1];
1232 nodes[0]= (int)nodeId;
1233 if(ndb_mgm_restart2(h, 1, nodes, initialStart, nostart, abort) < 0)
1234 {
1235 ndb_mgm_destroy_handle(&h);
1236 return SEND_OR_RECEIVE_FAILED;
1237 }
1238 }
1239 }
1240 ndb_mgm_destroy_handle(&h);
1241
1242 return 0;
1243 }
1244
1245 /**
1246 * send STOP_REQ to all DB-nodes
1247 * and wait for them to stop or refuse
1248 *
1249 */
1250 int
sendall_STOP_REQ(NodeBitmask & stoppedNodes,bool abort,bool stop,bool restart,bool nostart,bool initialStart)1251 MgmtSrvr::sendall_STOP_REQ(NodeBitmask &stoppedNodes,
1252 bool abort,
1253 bool stop,
1254 bool restart,
1255 bool nostart,
1256 bool initialStart)
1257 {
1258 int error = 0;
1259 DBUG_ENTER("MgmtSrvr::sendall_STOP_REQ");
1260 DBUG_PRINT("enter", ("abort: %d stop: %d restart: %d "
1261 "nostart: %d initialStart: %d",
1262 abort, stop, restart, nostart, initialStart));
1263
1264 stoppedNodes.clear();
1265
1266 SignalSender ss(theFacade);
1267 ss.lock(); // lock will be released on exit
1268
1269 SimpleSignal ssig;
1270 StopReq* const stopReq = CAST_PTR(StopReq, ssig.getDataPtrSend());
1271 ssig.set(ss, TestOrd::TraceAPI, NDBCNTR, GSN_STOP_REQ, StopReq::SignalLength);
1272
1273 stopReq->requestInfo = 0;
1274 stopReq->apiTimeout = 5000;
1275 stopReq->transactionTimeout = 1000;
1276 stopReq->readOperationTimeout = 1000;
1277 stopReq->operationTimeout = 1000;
1278 stopReq->senderData = 12;
1279 stopReq->senderRef = ss.getOwnRef();
1280 stopReq->singleuser = 0;
1281 StopReq::setSystemStop(stopReq->requestInfo, stop);
1282 StopReq::setPerformRestart(stopReq->requestInfo, restart);
1283 StopReq::setStopAbort(stopReq->requestInfo, abort);
1284 StopReq::setNoStart(stopReq->requestInfo, nostart);
1285 StopReq::setInitialStart(stopReq->requestInfo, initialStart);
1286
1287 // send the signals
1288 int failed = 0;
1289 NodeBitmask nodes;
1290 {
1291 NodeId nodeId = 0;
1292 while(getNextNodeId(&nodeId, NDB_MGM_NODE_TYPE_NDB))
1293 {
1294 if (okToSendTo(nodeId, true) == 0)
1295 {
1296 SendStatus result = ss.sendSignal(nodeId, &ssig);
1297 if (result == SEND_OK)
1298 nodes.set(nodeId);
1299 else
1300 failed++;
1301 }
1302 }
1303 }
1304
1305 if (nodes.isclear() && failed > 0)
1306 {
1307 DBUG_RETURN(SEND_OR_RECEIVE_FAILED);
1308 }
1309
1310 // now wait for the replies
1311 while (!nodes.isclear())
1312 {
1313 SimpleSignal *signal = ss.waitFor();
1314 int gsn = signal->readSignalNumber();
1315 switch (gsn) {
1316 case GSN_STOP_REF:
1317 {
1318 const StopRef * const ref = CAST_CONSTPTR(StopRef, signal->getDataPtr());
1319 const NodeId nodeId = refToNode(signal->header.theSendersBlockRef);
1320 #ifdef VM_TRACE
1321 ndbout_c("Node %d refused stop", nodeId);
1322 #endif
1323 assert(nodes.get(nodeId));
1324 nodes.clear(nodeId);
1325 error = translateStopRef(ref->errorCode);
1326 break;
1327 }
1328 case GSN_STOP_CONF:
1329 {
1330 const NodeId nodeId = refToNode(signal->header.theSendersBlockRef);
1331 assert(nodes.get(nodeId));
1332 nodes.clear(nodeId);
1333 break;
1334 }
1335 case GSN_NF_COMPLETEREP:
1336 {
1337 const NFCompleteRep * rep = CAST_CONSTPTR(NFCompleteRep,
1338 signal->getDataPtr());
1339 if (rep->failedNodeId <= nodes.max_size())
1340 nodes.clear(rep->failedNodeId); // clear the failed node
1341
1342 if (rep->failedNodeId <= stoppedNodes.max_size())
1343 stoppedNodes.set(rep->failedNodeId);
1344 break;
1345 }
1346 case GSN_NODE_FAILREP:
1347 {
1348 const NodeFailRep * rep = CAST_CONSTPTR(NodeFailRep,
1349 signal->getDataPtr());
1350 Uint32 len = NodeFailRep::getNodeMaskLength(signal->getLength());
1351 assert(len == NodeBitmask::Size); // only full length in ndbapi
1352 NodeBitmask mask;
1353 mask.assign(len, rep->theAllNodes);
1354 nodes.bitANDC(mask);
1355 stoppedNodes.bitOR(mask);
1356 break;
1357 }
1358 case GSN_API_REGCONF:
1359 case GSN_TAKE_OVERTCCONF:
1360 case GSN_CONNECT_REP:
1361 continue;
1362 default:
1363 report_unknown_signal(signal);
1364 DBUG_RETURN(SEND_OR_RECEIVE_FAILED);
1365 }
1366 }
1367
1368 DBUG_RETURN(error);
1369 }
1370
1371 int
guess_master_node(SignalSender & ss)1372 MgmtSrvr::guess_master_node(SignalSender& ss)
1373 {
1374 /**
1375 * First check if m_master_node is started
1376 */
1377 NodeId guess = m_master_node;
1378 if (guess != 0)
1379 {
1380 trp_node node = ss.getNodeInfo(guess);
1381 if (node.m_state.startLevel == NodeState::SL_STARTED)
1382 return guess;
1383 }
1384
1385 /**
1386 * Check for any started node
1387 */
1388 guess = 0;
1389 while(getNextNodeId(&guess, NDB_MGM_NODE_TYPE_NDB))
1390 {
1391 trp_node node = ss.getNodeInfo(guess);
1392 if (node.m_state.startLevel == NodeState::SL_STARTED)
1393 {
1394 return guess;
1395 }
1396 }
1397
1398 /**
1399 * Check any confirmed node
1400 */
1401 guess = 0;
1402 while(getNextNodeId(&guess, NDB_MGM_NODE_TYPE_NDB))
1403 {
1404 trp_node node = ss.getNodeInfo(guess);
1405 if (node.is_confirmed())
1406 {
1407 return guess;
1408 }
1409 }
1410
1411 /**
1412 * Check any connected node
1413 */
1414 guess = 0;
1415 while(getNextNodeId(&guess, NDB_MGM_NODE_TYPE_NDB))
1416 {
1417 trp_node node = ss.getNodeInfo(guess);
1418 if (node.is_connected())
1419 {
1420 return guess;
1421 }
1422 }
1423
1424 return 0; // give up
1425 }
1426
1427 /*
1428 * Common method for handeling all STOP_REQ signalling that
1429 * is used by Stopping, Restarting and Single user commands
1430 *
1431 * In the event that we need to stop a mgmd, we create a mgm
1432 * client connection to that mgmd and stop it that way.
1433 * This allows us to stop mgm servers when there isn't any real
1434 * distributed communication up.
1435 *
1436 * node_ids.size()==0 means to stop all DB nodes.
1437 * MGM nodes will *NOT* be stopped.
1438 *
1439 * If we work out we should be stopping or restarting ourselves,
1440 * we return <0 in stopSelf for restart, >0 for stop
1441 * and 0 for do nothing.
1442 */
1443
sendSTOP_REQ(const Vector<NodeId> & node_ids,NodeBitmask & stoppedNodes,bool abort,bool stop,bool restart,bool nostart,bool initialStart,int * stopSelf)1444 int MgmtSrvr::sendSTOP_REQ(const Vector<NodeId> &node_ids,
1445 NodeBitmask &stoppedNodes,
1446 bool abort,
1447 bool stop,
1448 bool restart,
1449 bool nostart,
1450 bool initialStart,
1451 int* stopSelf)
1452 {
1453 int error = 0;
1454 DBUG_ENTER("MgmtSrvr::sendSTOP_REQ");
1455 DBUG_PRINT("enter", ("no of nodes: %d "
1456 "abort: %d stop: %d restart: %d "
1457 "nostart: %d initialStart: %d",
1458 node_ids.size(),
1459 abort, stop, restart, nostart, initialStart));
1460
1461 stoppedNodes.clear();
1462 *stopSelf= 0;
1463
1464 NodeBitmask ndb_nodes_to_stop;
1465 NodeBitmask mgm_nodes_to_stop;
1466
1467 SignalSender ss(theFacade);
1468 ss.lock(); // lock will be released on exit
1469
1470 /**
1471 * First verify arguments
1472 */
1473 for (unsigned i = 0; i < node_ids.size(); i++)
1474 {
1475 switch(getNodeType(node_ids[i])){
1476 case NDB_MGM_NODE_TYPE_MGM:
1477 mgm_nodes_to_stop.set(node_ids[i]);
1478 break;
1479 case NDB_MGM_NODE_TYPE_NDB:
1480 ndb_nodes_to_stop.set(node_ids[i]);
1481 break;
1482 default:
1483 DBUG_RETURN(WRONG_PROCESS_TYPE);
1484 }
1485 }
1486
1487 /**
1488 * Process ndb_mgmd
1489 */
1490 for (Uint32 i = mgm_nodes_to_stop.find(0);
1491 i != mgm_nodes_to_stop.NotFound;
1492 i = mgm_nodes_to_stop.find(i + 1))
1493 {
1494 if (i != getOwnNodeId())
1495 {
1496 error= sendStopMgmd(i, abort, stop, restart,
1497 nostart, initialStart);
1498 if (error == 0)
1499 {
1500 stoppedNodes.set(i);
1501 }
1502 }
1503 else
1504 {
1505 g_eventLogger->info("Stopping this node");
1506 * stopSelf = (restart)? -1 : 1;
1507 stoppedNodes.set(i);
1508 }
1509 }
1510
1511 /**
1512 * Process ndbd
1513 */
1514 SimpleSignal ssig;
1515 StopReq* const stopReq = CAST_PTR(StopReq, ssig.getDataPtrSend());
1516 ssig.set(ss, TestOrd::TraceAPI, NDBCNTR, GSN_STOP_REQ, StopReq::SignalLength);
1517
1518 stopReq->requestInfo = 0;
1519 stopReq->apiTimeout = 5000;
1520 stopReq->transactionTimeout = 1000;
1521 stopReq->readOperationTimeout = 1000;
1522 stopReq->operationTimeout = 1000;
1523 stopReq->senderData = 12;
1524 stopReq->senderRef = ss.getOwnRef();
1525 stopReq->singleuser = 0;
1526 StopReq::setSystemStop(stopReq->requestInfo, stop);
1527 StopReq::setPerformRestart(stopReq->requestInfo, restart);
1528 StopReq::setStopAbort(stopReq->requestInfo, abort);
1529 StopReq::setNoStart(stopReq->requestInfo, nostart);
1530 StopReq::setInitialStart(stopReq->requestInfo, initialStart);
1531
1532 int use_master_node = 0;
1533 int do_send = 0;
1534 if (ndb_nodes_to_stop.count() > 1)
1535 {
1536 do_send = 1;
1537 use_master_node = 1;
1538 ndb_nodes_to_stop.copyto(NdbNodeBitmask::Size, stopReq->nodes);
1539 StopReq::setStopNodes(stopReq->requestInfo, 1);
1540 }
1541 else if (ndb_nodes_to_stop.count() == 1)
1542 {
1543 Uint32 nodeId = ndb_nodes_to_stop.find(0);
1544 if (okToSendTo(nodeId, true) == 0)
1545 {
1546 SendStatus result = ss.sendSignal(nodeId, &ssig);
1547 if (result != SEND_OK)
1548 {
1549 DBUG_RETURN(SEND_OR_RECEIVE_FAILED);
1550 }
1551 }
1552 else
1553 {
1554 DBUG_RETURN(SEND_OR_RECEIVE_FAILED);
1555 }
1556 }
1557
1558
1559 // now wait for the replies
1560 Uint32 sendNodeId = ndb_nodes_to_stop.find(0);
1561 while (!stoppedNodes.contains(ndb_nodes_to_stop))
1562 {
1563 if (do_send)
1564 {
1565 assert(use_master_node);
1566 sendNodeId = guess_master_node(ss);
1567 if (okToSendTo(sendNodeId, true) != 0)
1568 {
1569 DBUG_RETURN(SEND_OR_RECEIVE_FAILED);
1570 }
1571
1572 if (ss.sendSignal(sendNodeId, &ssig) != SEND_OK)
1573 {
1574 DBUG_RETURN(SEND_OR_RECEIVE_FAILED);
1575 }
1576 do_send = 0;
1577 }
1578
1579 SimpleSignal *signal = ss.waitFor();
1580 int gsn = signal->readSignalNumber();
1581 switch (gsn) {
1582 case GSN_STOP_REF:{
1583 const StopRef * const ref = CAST_CONSTPTR(StopRef, signal->getDataPtr());
1584 const NodeId nodeId = refToNode(signal->header.theSendersBlockRef);
1585 require(nodeId == sendNodeId);
1586 if (ref->errorCode == StopRef::MultiNodeShutdownNotMaster)
1587 {
1588 assert(use_master_node);
1589 m_master_node= ref->masterNodeId;
1590 do_send = 1;
1591 continue;
1592 }
1593 DBUG_RETURN(translateStopRef(ref->errorCode));
1594 break;
1595 }
1596 case GSN_STOP_CONF:{
1597 #ifdef NOT_USED
1598 const StopConf * const ref = CAST_CONSTPTR(StopConf, signal->getDataPtr());
1599 #endif
1600 const NodeId nodeId = refToNode(signal->header.theSendersBlockRef);
1601 require(nodeId == sendNodeId);
1602 stoppedNodes.bitOR(ndb_nodes_to_stop);
1603 break;
1604 }
1605 case GSN_NF_COMPLETEREP:{
1606 const NFCompleteRep * const rep =
1607 CAST_CONSTPTR(NFCompleteRep, signal->getDataPtr());
1608 if (rep->failedNodeId <= stoppedNodes.max_size())
1609 stoppedNodes.set(rep->failedNodeId);
1610 break;
1611 }
1612 case GSN_NODE_FAILREP:{
1613 const NodeFailRep * const rep =
1614 CAST_CONSTPTR(NodeFailRep, signal->getDataPtr());
1615 Uint32 len = NodeFailRep::getNodeMaskLength(signal->getLength());
1616 require(len == NodeBitmask::Size); // only full length in ndbapi
1617 NodeBitmask mask;
1618 mask.assign(len, rep->theAllNodes);
1619 stoppedNodes.bitOR(mask);
1620 break;
1621 }
1622 case GSN_API_REGCONF:
1623 case GSN_TAKE_OVERTCCONF:
1624 case GSN_CONNECT_REP:
1625 continue;
1626 default:
1627 report_unknown_signal(signal);
1628 DBUG_RETURN(SEND_OR_RECEIVE_FAILED);
1629 }
1630 }
1631 if (error && *stopSelf)
1632 {
1633 *stopSelf= 0;
1634 }
1635 DBUG_RETURN(error);
1636 }
1637
1638 /*
1639 * Stop one nodes
1640 */
1641
stopNodes(const Vector<NodeId> & node_ids,int * stopCount,bool abort,bool force,int * stopSelf)1642 int MgmtSrvr::stopNodes(const Vector<NodeId> &node_ids,
1643 int *stopCount, bool abort, bool force,
1644 int* stopSelf)
1645 {
1646 if (force || abort)
1647 ; // Skip node state checks
1648 else if (is_any_node_starting())
1649 {
1650 /* Refuse to stop since some node(s) are starting */
1651 return OPERATION_NOT_ALLOWED_START_STOP;
1652 }
1653
1654 NodeBitmask nodes;
1655 int ret = 0;
1656 if (node_ids.size() > 0)
1657 {
1658 ret = sendSTOP_REQ(node_ids, nodes,
1659 abort, false, false, false, false,
1660 stopSelf);
1661 }
1662 else
1663 {
1664 ret = sendall_STOP_REQ(nodes,
1665 abort, false, false, false, false);
1666 }
1667
1668 if (stopCount)
1669 *stopCount= nodes.count();
1670 return ret;
1671 }
1672
shutdownMGM(int * stopCount,bool abort,int * stopSelf)1673 int MgmtSrvr::shutdownMGM(int *stopCount, bool abort, int *stopSelf)
1674 {
1675 NodeId nodeId = 0;
1676 int error;
1677
1678 while(getNextNodeId(&nodeId, NDB_MGM_NODE_TYPE_MGM))
1679 {
1680 if(nodeId==getOwnNodeId())
1681 continue;
1682 error= sendStopMgmd(nodeId, abort, true, false,
1683 false, false);
1684 if (error == 0)
1685 (*stopCount)++;
1686 }
1687
1688 *stopSelf= 1;
1689 (*stopCount)++;
1690
1691 return 0;
1692 }
1693
1694 /*
1695 * Perform DB nodes shutdown.
1696 * MGM servers are left in their current state
1697 */
1698
shutdownDB(int * stopCount,bool abort)1699 int MgmtSrvr::shutdownDB(int * stopCount, bool abort)
1700 {
1701 NodeBitmask nodes;
1702
1703 int ret = sendall_STOP_REQ(nodes,
1704 abort,
1705 true,
1706 false,
1707 false,
1708 false);
1709
1710 if (stopCount)
1711 *stopCount = nodes.count();
1712 return ret;
1713 }
1714
1715 /*
1716 * Enter single user mode on all live nodes
1717 */
1718
enterSingleUser(int * stopCount,Uint32 apiNodeId)1719 int MgmtSrvr::enterSingleUser(int * stopCount, Uint32 apiNodeId)
1720 {
1721 if (getNodeType(apiNodeId) != NDB_MGM_NODE_TYPE_API)
1722 return NODE_NOT_API_NODE;
1723
1724 // Init
1725 if (stopCount)
1726 {
1727 * stopCount = 0;
1728 }
1729
1730 SignalSender ss(theFacade);
1731 ss.lock(); // lock will be released on exit
1732
1733 SimpleSignal ssig;
1734 StopReq* const stopReq = CAST_PTR(StopReq, ssig.getDataPtrSend());
1735 ssig.set(ss, TestOrd::TraceAPI, NDBCNTR, GSN_STOP_REQ, StopReq::SignalLength);
1736
1737 stopReq->requestInfo = 0;
1738 stopReq->apiTimeout = 5000;
1739 stopReq->transactionTimeout = 1000;
1740 stopReq->readOperationTimeout = 1000;
1741 stopReq->operationTimeout = 1000;
1742 stopReq->senderData = 12;
1743 stopReq->senderRef = ss.getOwnRef();
1744 stopReq->singleuser = 1;
1745 stopReq->singleUserApi = apiNodeId;
1746 StopReq::setSystemStop(stopReq->requestInfo, false);
1747 StopReq::setPerformRestart(stopReq->requestInfo, false);
1748 StopReq::setStopAbort(stopReq->requestInfo, false);
1749
1750 NodeBitmask nodes;
1751 {
1752 NodeId nodeId = 0;
1753 Uint32 failed = 0;
1754 while (getNextNodeId(&nodeId, NDB_MGM_NODE_TYPE_NDB))
1755 {
1756 if (okToSendTo(nodeId, true) == 0)
1757 {
1758 SendStatus result = ss.sendSignal(nodeId, &ssig);
1759 if (result == SEND_OK)
1760 nodes.set(nodeId);
1761 else
1762 failed++;
1763 }
1764 else
1765 {
1766 failed++;
1767 }
1768 }
1769 if (nodes.isclear())
1770 {
1771 if (failed)
1772 {
1773 return SEND_OR_RECEIVE_FAILED;
1774 }
1775 return NO_CONTACT_WITH_DB_NODES;
1776 }
1777 }
1778
1779 int error = 0;
1780 int ok = 0;
1781 while (!nodes.isclear())
1782 {
1783 SimpleSignal *signal = ss.waitFor();
1784 int gsn = signal->readSignalNumber();
1785 switch (gsn) {
1786 case GSN_STOP_REF:
1787 {
1788 const StopRef * const ref = CAST_CONSTPTR(StopRef, signal->getDataPtr());
1789 nodes.clear(refToNode(signal->header.theSendersBlockRef));
1790 error = translateStopRef(ref->errorCode);
1791 break;
1792 }
1793 case GSN_STOP_CONF:
1794 {
1795 ok++;
1796 nodes.clear(refToNode(signal->header.theSendersBlockRef));
1797 break;
1798 }
1799 case GSN_NF_COMPLETEREP:
1800 {
1801 const NFCompleteRep * rep = CAST_CONSTPTR(NFCompleteRep,
1802 signal->getDataPtr());
1803 if (rep->failedNodeId <= nodes.max_size())
1804 nodes.clear(rep->failedNodeId);
1805 break;
1806 }
1807
1808 case GSN_NODE_FAILREP:
1809 {
1810 const NodeFailRep * rep = CAST_CONSTPTR(NodeFailRep,
1811 signal->getDataPtr());
1812 Uint32 len = NodeFailRep::getNodeMaskLength(signal->getLength());
1813 assert(len == NodeBitmask::Size); // only full length in ndbapi
1814 NodeBitmask mask;
1815 mask.assign(len, rep->theAllNodes);
1816 nodes.bitANDC(mask);
1817 break;
1818 }
1819 case GSN_API_REGCONF:
1820 case GSN_TAKE_OVERTCCONF:
1821 case GSN_CONNECT_REP:
1822 continue;
1823
1824 default:
1825 report_unknown_signal(signal);
1826 return SEND_OR_RECEIVE_FAILED;
1827 }
1828 }
1829
1830 if (stopCount)
1831 {
1832 * stopCount = ok;
1833 }
1834
1835 return error;
1836 }
1837
1838 /*
1839 * Perform node restart
1840 */
1841
is_any_node_stopping()1842 bool MgmtSrvr::is_any_node_stopping()
1843 {
1844 NodeId nodeId = 0;
1845 trp_node node;
1846 while(getNextNodeId(&nodeId, NDB_MGM_NODE_TYPE_NDB))
1847 {
1848 node = getNodeInfo(nodeId);
1849 if((node.m_state.startLevel == NodeState::SL_STOPPING_1) ||
1850 (node.m_state.startLevel == NodeState::SL_STOPPING_2) ||
1851 (node.m_state.startLevel == NodeState::SL_STOPPING_3) ||
1852 (node.m_state.startLevel == NodeState::SL_STOPPING_4))
1853 return true; // At least one node was stopping
1854 }
1855 return false; // No node was stopping
1856 }
1857
is_any_node_starting()1858 bool MgmtSrvr::is_any_node_starting()
1859 {
1860 NodeId nodeId = 0;
1861 trp_node node;
1862 while(getNextNodeId(&nodeId, NDB_MGM_NODE_TYPE_NDB))
1863 {
1864 node = getNodeInfo(nodeId);
1865 if (node.m_state.startLevel == NodeState::SL_STARTING)
1866 return true; // At least one node was starting
1867 }
1868 return false; // No node was starting
1869 }
1870
is_cluster_single_user()1871 bool MgmtSrvr::is_cluster_single_user()
1872 {
1873 NodeId nodeId = 0;
1874 trp_node node;
1875 while(getNextNodeId(&nodeId, NDB_MGM_NODE_TYPE_NDB))
1876 {
1877 node = getNodeInfo(nodeId);
1878 if (node.m_state.startLevel == NodeState::SL_SINGLEUSER)
1879 return true; // Cluster is in single user modes
1880 }
1881 return false; // Cluster is not in single user mode
1882 }
1883
restartNodes(const Vector<NodeId> & node_ids,int * stopCount,bool nostart,bool initialStart,bool abort,bool force,int * stopSelf,unsigned int num_secs_to_wait_for_node)1884 int MgmtSrvr::restartNodes(const Vector<NodeId> &node_ids,
1885 int * stopCount, bool nostart,
1886 bool initialStart, bool abort,
1887 bool force,
1888 int *stopSelf,
1889 unsigned int num_secs_to_wait_for_node)
1890 {
1891 if (is_cluster_single_user())
1892 {
1893 /*
1894 Refuse to restart since cluster is in single user mode
1895 and when the node is restarting it would not be allowed to
1896 join cluster, see BUG#31056
1897 */
1898 return OPERATION_NOT_ALLOWED_START_STOP;
1899 }
1900
1901 if (force || abort)
1902 ; // Skip node state checks
1903 else if (is_any_node_starting())
1904 {
1905 /* Refuse to restart since some node(s) are starting */
1906 return OPERATION_NOT_ALLOWED_START_STOP;
1907 }
1908
1909 NodeBitmask nodes;
1910 int ret = 0;
1911 if (node_ids.size() > 0)
1912 {
1913 ret = sendSTOP_REQ(node_ids, nodes,
1914 abort, false, true, true, initialStart,
1915 stopSelf);
1916 }
1917 else
1918 {
1919 ret = sendall_STOP_REQ(nodes,
1920 abort, false, true, true, initialStart);
1921 }
1922
1923 if (ret)
1924 return ret;
1925
1926 if (stopCount)
1927 *stopCount = nodes.count();
1928
1929 // start up the nodes again
1930 const Uint64 waitTime = 12000;
1931 const NDB_TICKS startTime = NdbTick_getCurrentTicks();
1932 for (unsigned i = 0; i < node_ids.size(); i++)
1933 {
1934 NodeId nodeId= node_ids[i];
1935 enum ndb_mgm_node_status s;
1936 s = NDB_MGM_NODE_STATUS_NO_CONTACT;
1937 #ifdef VM_TRACE
1938 ndbout_c("Waiting for %d not started", nodeId);
1939 #endif
1940 while (s != NDB_MGM_NODE_STATUS_NOT_STARTED &&
1941 NdbTick_Elapsed(startTime,NdbTick_getCurrentTicks()).milliSec() < waitTime)
1942 {
1943 Uint32 startPhase = 0, version = 0, dynamicId = 0, nodeGroup = 0;
1944 Uint32 mysql_version = 0;
1945 Uint32 connectCount = 0;
1946 bool system;
1947 const char *address= NULL;
1948 char addr_buf[NDB_ADDR_STRLEN];
1949 status(nodeId, &s, &version, &mysql_version, &startPhase,
1950 &system, &dynamicId, &nodeGroup, &connectCount,
1951 &address, addr_buf, sizeof(addr_buf));
1952 NdbSleep_MilliSleep(100);
1953 }
1954 }
1955
1956 if (nostart)
1957 return 0;
1958
1959 /*
1960 verify that no nodes are stopping before starting as this would cause
1961 the starting node to shutdown
1962 */
1963 int retry= 600*10;
1964 for (;is_any_node_stopping();)
1965 {
1966 if (--retry)
1967 break;
1968 NdbSleep_MilliSleep(100);
1969 }
1970
1971 /*
1972 start the nodes
1973 */
1974 for (unsigned i = 0; i < node_ids.size(); i++)
1975 {
1976 unsigned int loop_count = 0;
1977 do
1978 {
1979 int result = sendSTART_ORD(node_ids[i]);
1980 if (result == SEND_OR_RECEIVE_FAILED ||
1981 result == NO_CONTACT_WITH_PROCESS)
1982 {
1983 if (loop_count >= num_secs_to_wait_for_node)
1984 break;
1985 loop_count++;
1986 NdbSleep_MilliSleep(1000);
1987 }
1988 else
1989 {
1990 break;
1991 }
1992 } while (1);
1993 }
1994 return 0;
1995 }
1996
1997 /*
1998 * Perform restart of all DB nodes
1999 */
2000
restartDB(bool nostart,bool initialStart,bool abort,int * stopCount,unsigned int num_secs_to_wait_for_node)2001 int MgmtSrvr::restartDB(bool nostart, bool initialStart,
2002 bool abort, int * stopCount,
2003 unsigned int num_secs_to_wait_for_node)
2004 {
2005 NodeBitmask nodes;
2006
2007 int ret = sendall_STOP_REQ(nodes,
2008 abort,
2009 true,
2010 true,
2011 true,
2012 initialStart);
2013
2014 if (ret)
2015 return ret;
2016
2017 if (stopCount)
2018 *stopCount = nodes.count();
2019
2020 #ifdef VM_TRACE
2021 ndbout_c("Stopped %d nodes", nodes.count());
2022 #endif
2023 /**
2024 * Here all nodes were correctly stopped,
2025 * so we wait for all nodes to be contactable
2026 */
2027 NodeId nodeId = 0;
2028 const Uint64 waitTime = 12000;
2029 const NDB_TICKS startTime = NdbTick_getCurrentTicks();
2030
2031
2032 while(getNextNodeId(&nodeId, NDB_MGM_NODE_TYPE_NDB)) {
2033 if (!nodes.get(nodeId))
2034 continue;
2035 enum ndb_mgm_node_status s;
2036 s = NDB_MGM_NODE_STATUS_NO_CONTACT;
2037 #ifdef VM_TRACE
2038 ndbout_c("Waiting for %d not started", nodeId);
2039 #endif
2040 while (s != NDB_MGM_NODE_STATUS_NOT_STARTED &&
2041 NdbTick_Elapsed(startTime,NdbTick_getCurrentTicks()).milliSec() < waitTime)
2042 {
2043 Uint32 startPhase = 0, version = 0, dynamicId = 0, nodeGroup = 0;
2044 Uint32 mysql_version = 0;
2045 Uint32 connectCount = 0;
2046 bool system;
2047 const char *address;
2048 char addr_buf[NDB_ADDR_STRLEN];
2049 status(nodeId, &s, &version, &mysql_version, &startPhase,
2050 &system, &dynamicId, &nodeGroup, &connectCount,
2051 &address, addr_buf, sizeof(addr_buf));
2052 NdbSleep_MilliSleep(100);
2053 }
2054 }
2055
2056 if(nostart)
2057 return 0;
2058
2059 /**
2060 * Now we start all database nodes (i.e. we make them non-idle)
2061 * We ignore the result we get from the start command.
2062 */
2063 nodeId = 0;
2064 while(getNextNodeId(&nodeId, NDB_MGM_NODE_TYPE_NDB)) {
2065 if (!nodes.get(nodeId))
2066 continue;
2067 int result;
2068 unsigned int loop_count = 0;
2069 do
2070 {
2071 result = sendSTART_ORD(nodeId);
2072 if (result != SEND_OR_RECEIVE_FAILED &&
2073 result != NO_CONTACT_WITH_PROCESS)
2074 break;
2075 if (loop_count >= num_secs_to_wait_for_node)
2076 break;
2077 NdbSleep_MilliSleep(1000);
2078 loop_count++;
2079 } while (1);
2080 g_eventLogger->debug("Started node %d with result %d", nodeId, result);
2081 /**
2082 * Errors from this call are deliberately ignored.
2083 * Maybe the user only wanted to restart a subset of the nodes.
2084 * It is also easy for the user to check which nodes have
2085 * started and which nodes have not.
2086 */
2087 }
2088
2089 return 0;
2090 }
2091
2092 int
exitSingleUser(int * stopCount,bool abort)2093 MgmtSrvr::exitSingleUser(int * stopCount, bool abort)
2094 {
2095 NodeId nodeId = 0;
2096 int count = 0;
2097
2098 SignalSender ss(theFacade);
2099 ss.lock(); // lock will be released on exit
2100
2101 SimpleSignal ssig;
2102 ResumeReq* const resumeReq =
2103 CAST_PTR(ResumeReq, ssig.getDataPtrSend());
2104
2105 ssig.set(ss,TestOrd::TraceAPI, NDBCNTR, GSN_RESUME_REQ,
2106 ResumeReq::SignalLength);
2107 resumeReq->senderData = 12;
2108 resumeReq->senderRef = ss.getOwnRef();
2109
2110 while(getNextNodeId(&nodeId, NDB_MGM_NODE_TYPE_NDB)){
2111 if(okToSendTo(nodeId, true) == 0){
2112 SendStatus result = ss.sendSignal(nodeId, &ssig);
2113 if (result == SEND_OK)
2114 count++;
2115 }
2116 }
2117
2118 if(stopCount != 0)
2119 * stopCount = count;
2120
2121 return 0;
2122 }
2123
2124 /*****************************************************************************
2125 * Status
2126 ****************************************************************************/
2127
2128 void
status_mgmd(NodeId node_id,ndb_mgm_node_status & node_status,Uint32 & version,Uint32 & mysql_version,const char ** address,char * addr_buf,size_t addr_buf_size)2129 MgmtSrvr::status_mgmd(NodeId node_id,
2130 ndb_mgm_node_status& node_status,
2131 Uint32& version, Uint32& mysql_version,
2132 const char **address,
2133 char *addr_buf,
2134 size_t addr_buf_size)
2135 {
2136 assert(getNodeType(node_id) == NDB_MGM_NODE_TYPE_MGM);
2137
2138 if (node_id == getOwnNodeId())
2139 {
2140 /*
2141 Special case to get version of own node
2142 - version and mysql_version is hardcoded
2143 - address should be the address seen from ndbd(if it's connected)
2144 else use HostName from config
2145 */
2146 Uint32 tmp_version = 0, tmp_mysql_version = 0;
2147 sendVersionReq(node_id,
2148 tmp_version,
2149 tmp_mysql_version,
2150 address,
2151 addr_buf,
2152 addr_buf_size);
2153 // Check that the version returned is equal to compiled in version
2154 assert(tmp_version == 0 ||
2155 (tmp_version == NDB_VERSION &&
2156 tmp_mysql_version == NDB_MYSQL_VERSION_D));
2157
2158 version = NDB_VERSION;
2159 mysql_version = NDB_MYSQL_VERSION_D;
2160 if(!*address)
2161 {
2162 // No address returned from ndbd -> get HostName from config
2163 Guard g(m_local_config_mutex);
2164 ConfigIter iter(m_local_config, CFG_SECTION_NODE);
2165 require(iter.find(CFG_NODE_ID, node_id) == 0);
2166 require(iter.get(CFG_NODE_HOST, address) == 0);
2167
2168 /*
2169 Try to convert HostName to numerical ip address
2170 (to get same output as if ndbd had replied)
2171 */
2172 struct in_addr addr;
2173 if (Ndb_getInAddr(&addr, *address) == 0)
2174 {
2175 *address = Ndb_inet_ntop(AF_INET,
2176 static_cast<void*>(&addr),
2177 addr_buf,
2178 (socklen_t)addr_buf_size);
2179 }
2180 }
2181
2182 node_status = NDB_MGM_NODE_STATUS_CONNECTED;
2183 return;
2184 }
2185
2186 /*
2187 MGM nodes are connected directly to all other MGM
2188 node(s), return status as seen by ClusterMgr
2189 */
2190 const trp_node node = getNodeInfo(node_id);
2191 if(node.is_connected())
2192 {
2193 version = node.m_info.m_version;
2194 mysql_version = node.m_info.m_mysql_version;
2195 node_status = NDB_MGM_NODE_STATUS_CONNECTED;
2196 *address= get_connect_address(node_id,
2197 addr_buf,
2198 addr_buf_size);
2199 }
2200 else
2201 {
2202 version = 0;
2203 mysql_version = 0;
2204 node_status = NDB_MGM_NODE_STATUS_NO_CONTACT;
2205 }
2206
2207 return;
2208 }
2209
2210 int
status(int nodeId,ndb_mgm_node_status * _status,Uint32 * version,Uint32 * mysql_version,Uint32 * _phase,bool * _system,Uint32 * dynamic,Uint32 * nodegroup,Uint32 * connectCount,const char ** address,char * addr_buf,size_t addr_buf_size)2211 MgmtSrvr::status(int nodeId,
2212 ndb_mgm_node_status * _status,
2213 Uint32 * version,
2214 Uint32 * mysql_version,
2215 Uint32 * _phase,
2216 bool * _system,
2217 Uint32 * dynamic,
2218 Uint32 * nodegroup,
2219 Uint32 * connectCount,
2220 const char **address,
2221 char *addr_buf,
2222 size_t addr_buf_size)
2223 {
2224 switch(getNodeType(nodeId)){
2225 case NDB_MGM_NODE_TYPE_API:
2226 status_api(nodeId,
2227 *_status,
2228 *version,
2229 *mysql_version,
2230 address,
2231 addr_buf,
2232 addr_buf_size);
2233 return 0;
2234 break;
2235
2236 case NDB_MGM_NODE_TYPE_MGM:
2237 status_mgmd(nodeId,
2238 *_status,
2239 *version,
2240 *mysql_version,
2241 address,
2242 addr_buf,
2243 addr_buf_size);
2244 return 0;
2245 break;
2246
2247 case NDB_MGM_NODE_TYPE_NDB:
2248 break;
2249
2250 default:
2251 abort();
2252 break;
2253 }
2254
2255 const trp_node node = getNodeInfo(nodeId);
2256 assert(getNodeType(nodeId) == NDB_MGM_NODE_TYPE_NDB &&
2257 node.m_info.getType() == NodeInfo::DB);
2258
2259 if(!node.is_connected()){
2260 * _status = NDB_MGM_NODE_STATUS_NO_CONTACT;
2261 return 0;
2262 }
2263
2264 * version = node.m_info.m_version;
2265 * mysql_version = node.m_info.m_mysql_version;
2266
2267 *address= get_connect_address(nodeId, addr_buf, addr_buf_size);
2268
2269 * dynamic = node.m_state.dynamicId;
2270 * nodegroup = node.m_state.nodeGroup;
2271 * connectCount = node.m_info.m_connectCount;
2272
2273 switch(node.m_state.startLevel){
2274 case NodeState::SL_CMVMI:
2275 * _status = NDB_MGM_NODE_STATUS_NOT_STARTED;
2276 * _phase = 0;
2277 return 0;
2278 break;
2279 case NodeState::SL_STARTING:
2280 * _status = NDB_MGM_NODE_STATUS_STARTING;
2281 * _phase = node.m_state.starting.startPhase;
2282 return 0;
2283 break;
2284 case NodeState::SL_STARTED:
2285 * _status = NDB_MGM_NODE_STATUS_STARTED;
2286 * _phase = 0;
2287 return 0;
2288 break;
2289 case NodeState::SL_STOPPING_1:
2290 * _status = NDB_MGM_NODE_STATUS_SHUTTING_DOWN;
2291 * _phase = 1;
2292 * _system = node.m_state.stopping.systemShutdown != 0;
2293 return 0;
2294 break;
2295 case NodeState::SL_STOPPING_2:
2296 * _status = NDB_MGM_NODE_STATUS_SHUTTING_DOWN;
2297 * _phase = 2;
2298 * _system = node.m_state.stopping.systemShutdown != 0;
2299 return 0;
2300 break;
2301 case NodeState::SL_STOPPING_3:
2302 * _status = NDB_MGM_NODE_STATUS_SHUTTING_DOWN;
2303 * _phase = 3;
2304 * _system = node.m_state.stopping.systemShutdown != 0;
2305 return 0;
2306 break;
2307 case NodeState::SL_STOPPING_4:
2308 * _status = NDB_MGM_NODE_STATUS_SHUTTING_DOWN;
2309 * _phase = 4;
2310 * _system = node.m_state.stopping.systemShutdown != 0;
2311 return 0;
2312 break;
2313 case NodeState::SL_SINGLEUSER:
2314 * _status = NDB_MGM_NODE_STATUS_SINGLEUSER;
2315 * _phase = 0;
2316 return 0;
2317 break;
2318 default:
2319 * _status = NDB_MGM_NODE_STATUS_UNKNOWN;
2320 * _phase = 0;
2321 return 0;
2322 }
2323
2324 return -1;
2325 }
2326
2327 int
setEventReportingLevelImpl(int nodeId_arg,const EventSubscribeReq & ll)2328 MgmtSrvr::setEventReportingLevelImpl(int nodeId_arg,
2329 const EventSubscribeReq& ll)
2330 {
2331 SignalSender ss(theFacade);
2332 NdbNodeBitmask nodes;
2333 nodes.clear();
2334 while (1)
2335 {
2336 Uint32 nodeId, max;
2337 ss.lock();
2338 SimpleSignal ssig;
2339 EventSubscribeReq * dst =
2340 CAST_PTR(EventSubscribeReq, ssig.getDataPtrSend());
2341 ssig.set(ss,TestOrd::TraceAPI, CMVMI, GSN_EVENT_SUBSCRIBE_REQ,
2342 EventSubscribeReq::SignalLength);
2343 *dst = ll;
2344
2345 if (nodeId_arg == 0)
2346 {
2347 // all nodes
2348 nodeId = 1;
2349 max = MAX_NDB_NODES;
2350 }
2351 else
2352 {
2353 // only one node
2354 max = nodeId = nodeId_arg;
2355 }
2356 // first make sure nodes are sendable
2357 for(; nodeId <= max; nodeId++)
2358 {
2359 if (nodeTypes[nodeId] != NODE_TYPE_DB)
2360 continue;
2361 if (okToSendTo(nodeId, true))
2362 {
2363 if (getNodeInfo(nodeId).is_connected() == false)
2364 {
2365 // node not connected we can safely skip this one
2366 continue;
2367 }
2368 // api_reg_conf not recevied yet, need to retry
2369 return SEND_OR_RECEIVE_FAILED;
2370 }
2371 }
2372
2373 if (nodeId_arg == 0)
2374 {
2375 // all nodes
2376 nodeId = 1;
2377 max = MAX_NDB_NODES;
2378 }
2379 else
2380 {
2381 // only one node
2382 max = nodeId = nodeId_arg;
2383 }
2384 // now send to all sendable nodes nodes
2385 // note, lock is held, so states have not changed
2386 for(; (Uint32) nodeId <= max; nodeId++)
2387 {
2388 if (nodeTypes[nodeId] != NODE_TYPE_DB)
2389 continue;
2390 if (getNodeInfo(nodeId).is_connected() == false)
2391 continue; // node is not connected, skip
2392 if (ss.sendSignal(nodeId, &ssig) == SEND_OK)
2393 nodes.set(nodeId);
2394 else if (max == nodeId)
2395 {
2396 return SEND_OR_RECEIVE_FAILED;
2397 }
2398 }
2399 break;
2400 }
2401
2402 if (nodes.isclear())
2403 {
2404 return SEND_OR_RECEIVE_FAILED;
2405 }
2406
2407 int error = 0;
2408 while (!nodes.isclear())
2409 {
2410 Uint32 nodeId;
2411 SimpleSignal *signal = ss.waitFor();
2412 int gsn = signal->readSignalNumber();
2413 nodeId = refToNode(signal->header.theSendersBlockRef);
2414 switch (gsn) {
2415 case GSN_EVENT_SUBSCRIBE_CONF:{
2416 nodes.clear(nodeId);
2417 break;
2418 }
2419 case GSN_EVENT_SUBSCRIBE_REF:{
2420 nodes.clear(nodeId);
2421 error = 1;
2422 break;
2423 }
2424 // Since sending okToSend(true),
2425 // there is no guarantee that NF_COMPLETEREP will come
2426 // i.e listen also to NODE_FAILREP
2427 case GSN_NODE_FAILREP: {
2428 const NodeFailRep * const rep =
2429 CAST_CONSTPTR(NodeFailRep, signal->getDataPtr());
2430 Uint32 len = NodeFailRep::getNodeMaskLength(signal->getLength());
2431 require(len == NodeBitmask::Size); // only full length in ndbapi
2432 NdbNodeBitmask mask;
2433 // only care about data nodes
2434 mask.assign(NdbNodeBitmask::Size, rep->theNodes);
2435 nodes.bitANDC(mask);
2436 break;
2437 }
2438
2439 case GSN_NF_COMPLETEREP:{
2440 const NFCompleteRep * const rep =
2441 CAST_CONSTPTR(NFCompleteRep, signal->getDataPtr());
2442 if (rep->failedNodeId <= nodes.max_size())
2443 nodes.clear(rep->failedNodeId);
2444 break;
2445 }
2446 case GSN_API_REGCONF:
2447 case GSN_TAKE_OVERTCCONF:
2448 case GSN_CONNECT_REP:
2449 continue;
2450 default:
2451 report_unknown_signal(signal);
2452 return SEND_OR_RECEIVE_FAILED;
2453 }
2454 }
2455 if (error)
2456 return SEND_OR_RECEIVE_FAILED;
2457 return 0;
2458 }
2459
2460 //****************************************************************************
2461 //****************************************************************************
2462 int
setNodeLogLevelImpl(int nodeId,const SetLogLevelOrd & ll)2463 MgmtSrvr::setNodeLogLevelImpl(int nodeId, const SetLogLevelOrd & ll)
2464 {
2465 INIT_SIGNAL_SENDER(ss,nodeId);
2466
2467 SimpleSignal ssig;
2468 ssig.set(ss,TestOrd::TraceAPI, CMVMI, GSN_SET_LOGLEVELORD,
2469 SetLogLevelOrd::SignalLength);
2470 SetLogLevelOrd* const dst = CAST_PTR(SetLogLevelOrd, ssig.getDataPtrSend());
2471 *dst = ll;
2472
2473 return ss.sendSignal(nodeId, &ssig) == SEND_OK ? 0 : SEND_OR_RECEIVE_FAILED;
2474 }
2475
2476 //****************************************************************************
2477 //****************************************************************************
2478
2479 int
insertError(int nodeId,int errorNo,Uint32 * extra)2480 MgmtSrvr::insertError(int nodeId, int errorNo, Uint32 * extra)
2481 {
2482 int block;
2483
2484 if (errorNo < 0) {
2485 return INVALID_ERROR_NUMBER;
2486 }
2487
2488 SignalSender ss(theFacade);
2489 ss.lock(); /* lock will be released on exit */
2490
2491 if(getNodeType(nodeId) == NDB_MGM_NODE_TYPE_NDB)
2492 {
2493 block= CMVMI;
2494 }
2495 else if(nodeId == _ownNodeId)
2496 {
2497 g_errorInsert= errorNo;
2498 return 0;
2499 }
2500 else if(getNodeType(nodeId) == NDB_MGM_NODE_TYPE_MGM)
2501 block= _blockNumber;
2502 else
2503 return WRONG_PROCESS_TYPE;
2504
2505 SimpleSignal ssig;
2506 ssig.set(ss,TestOrd::TraceAPI, block, GSN_TAMPER_ORD,
2507 TamperOrd::SignalLength);
2508 TamperOrd* const tamperOrd = CAST_PTR(TamperOrd, ssig.getDataPtrSend());
2509 tamperOrd->errorNo = errorNo;
2510
2511 if (extra)
2512 {
2513 ssig.getDataPtrSend()[1] = * extra;
2514 ssig.header.theLength++;
2515 }
2516
2517 int res = ss.sendSignal(nodeId, &ssig) == SEND_OK ? 0 :SEND_OR_RECEIVE_FAILED;
2518
2519 if (res == 0)
2520 {
2521 /**
2522 * In order to make NDB_TAMPER (almost) syncronous,
2523 * make a syncronous request *after* the NDB_TAMPER
2524 */
2525 make_sync_req(ss, Uint32(nodeId));
2526 }
2527
2528 return res;
2529 }
2530
2531
2532 int
startSchemaTrans(SignalSender & ss,NodeId & out_nodeId,Uint32 transId,Uint32 & out_transKey)2533 MgmtSrvr::startSchemaTrans(SignalSender& ss, NodeId & out_nodeId,
2534 Uint32 transId, Uint32 & out_transKey)
2535 {
2536 SimpleSignal ssig;
2537
2538 ssig.set(ss, 0, DBDICT, GSN_SCHEMA_TRANS_BEGIN_REQ,
2539 SchemaTransBeginReq::SignalLength);
2540
2541 SchemaTransBeginReq* req =
2542 CAST_PTR(SchemaTransBeginReq, ssig.getDataPtrSend());
2543
2544 req->clientRef = ss.getOwnRef();
2545 req->transId = transId;
2546 req->requestInfo = 0;
2547
2548 NodeId nodeId = ss.get_an_alive_node();
2549
2550 retry:
2551 if (ss.get_node_alive(nodeId) == false)
2552 {
2553 nodeId = ss.get_an_alive_node();
2554 }
2555
2556 if (ss.sendSignal(nodeId, &ssig) != SEND_OK)
2557 {
2558 return SEND_OR_RECEIVE_FAILED;
2559 }
2560
2561 while (true)
2562 {
2563 SimpleSignal *signal = ss.waitFor();
2564 int gsn = signal->readSignalNumber();
2565 switch (gsn) {
2566 case GSN_SCHEMA_TRANS_BEGIN_CONF: {
2567 const SchemaTransBeginConf * conf =
2568 CAST_CONSTPTR(SchemaTransBeginConf, signal->getDataPtr());
2569 out_transKey = conf->transKey;
2570 out_nodeId = nodeId;
2571 return 0;
2572 }
2573 case GSN_SCHEMA_TRANS_BEGIN_REF: {
2574 const SchemaTransBeginRef * ref =
2575 CAST_CONSTPTR(SchemaTransBeginRef, signal->getDataPtr());
2576
2577 switch(ref->errorCode){
2578 case SchemaTransBeginRef::NotMaster:
2579 nodeId = ref->masterNodeId;
2580 // Fall-through
2581 case SchemaTransBeginRef::Busy:
2582 case SchemaTransBeginRef::BusyWithNR:
2583 goto retry;
2584 default:
2585 return ref->errorCode;
2586 }
2587 }
2588 case GSN_NF_COMPLETEREP:
2589 // ignore
2590 break;
2591 case GSN_NODE_FAILREP:{
2592 const NodeFailRep * const rep =
2593 CAST_CONSTPTR(NodeFailRep, signal->getDataPtr());
2594 Uint32 len = NodeFailRep::getNodeMaskLength(signal->getLength());
2595 assert(len == NodeBitmask::Size); // only full length in ndbapi
2596 if (BitmaskImpl::safe_get(len, rep->theAllNodes, nodeId))
2597 {
2598 nodeId++;
2599 goto retry;
2600 }
2601 break;
2602 }
2603 case GSN_API_REGCONF:
2604 case GSN_TAKE_OVERTCCONF:
2605 case GSN_CONNECT_REP:
2606 break;
2607 default:
2608 report_unknown_signal(signal);
2609 return SEND_OR_RECEIVE_FAILED;
2610 }
2611 }
2612 }
2613
2614 int
endSchemaTrans(SignalSender & ss,NodeId nodeId,Uint32 transId,Uint32 transKey,Uint32 flags)2615 MgmtSrvr::endSchemaTrans(SignalSender& ss, NodeId nodeId,
2616 Uint32 transId, Uint32 transKey,
2617 Uint32 flags)
2618 {
2619 SimpleSignal ssig;
2620
2621 ssig.set(ss, 0, DBDICT, GSN_SCHEMA_TRANS_END_REQ,
2622 SchemaTransEndReq::SignalLength);
2623
2624 SchemaTransEndReq* req =
2625 CAST_PTR(SchemaTransEndReq, ssig.getDataPtrSend());
2626
2627 req->clientRef = ss.getOwnRef();
2628 req->transId = transId;
2629 req->requestInfo = 0;
2630 req->transKey = transKey;
2631 req->flags = flags;
2632
2633 if (ss.sendSignal(nodeId, &ssig) != SEND_OK)
2634 {
2635 return SEND_OR_RECEIVE_FAILED;
2636 }
2637
2638 while (true)
2639 {
2640 SimpleSignal *signal = ss.waitFor();
2641 int gsn = signal->readSignalNumber();
2642 switch (gsn) {
2643 case GSN_SCHEMA_TRANS_END_CONF: {
2644 return 0;
2645 }
2646 case GSN_SCHEMA_TRANS_END_REF: {
2647 const SchemaTransEndRef * ref =
2648 CAST_CONSTPTR(SchemaTransEndRef, signal->getDataPtr());
2649 return ref->errorCode;
2650 }
2651 case GSN_NF_COMPLETEREP:
2652 // ignore
2653 break;
2654 case GSN_NODE_FAILREP:{
2655 const NodeFailRep * const rep =
2656 CAST_CONSTPTR(NodeFailRep, signal->getDataPtr());
2657 Uint32 len = NodeFailRep::getNodeMaskLength(signal->getLength());
2658 assert(len == NodeBitmask::Size); // only full length in ndbapi
2659 if (BitmaskImpl::safe_get(len, rep->theAllNodes, nodeId))
2660 {
2661 return -1;
2662 }
2663 break;
2664 }
2665 case GSN_API_REGCONF:
2666 case GSN_TAKE_OVERTCCONF:
2667 case GSN_CONNECT_REP:
2668 break;
2669 default:
2670 report_unknown_signal(signal);
2671 return SEND_OR_RECEIVE_FAILED;
2672 }
2673 }
2674 }
2675
2676 int
createNodegroup(int * nodes,int count,int * ng)2677 MgmtSrvr::createNodegroup(int *nodes, int count, int *ng)
2678 {
2679 int res;
2680 SignalSender ss(theFacade);
2681 ss.lock();
2682
2683 Uint32 transId = rand();
2684 Uint32 transKey;
2685 NodeId nodeId;
2686
2687 if ((res = startSchemaTrans(ss, nodeId, transId, transKey)))
2688 {
2689 return res;
2690 }
2691
2692 SimpleSignal ssig;
2693 ssig.set(ss, 0, DBDICT, GSN_CREATE_NODEGROUP_REQ,
2694 CreateNodegroupReq::SignalLength);
2695
2696 CreateNodegroupReq* req =
2697 CAST_PTR(CreateNodegroupReq, ssig.getDataPtrSend());
2698
2699 req->transId = transId;
2700 req->transKey = transKey;
2701 req->nodegroupId = RNIL;
2702 req->senderData = 77;
2703 req->senderRef = ss.getOwnRef();
2704 bzero(req->nodes, sizeof(req->nodes));
2705
2706 if (ng)
2707 {
2708 if (* ng != -1)
2709 {
2710 req->nodegroupId = * ng;
2711 }
2712 }
2713 for (int i = 0; i<count && i<(int)NDB_ARRAY_SIZE(req->nodes); i++)
2714 {
2715 req->nodes[i] = nodes[i];
2716 }
2717
2718 if (ss.sendSignal(nodeId, &ssig) != SEND_OK)
2719 {
2720 return SEND_OR_RECEIVE_FAILED;
2721 }
2722
2723 bool wait = true;
2724 while (wait)
2725 {
2726 SimpleSignal *signal = ss.waitFor();
2727 int gsn = signal->readSignalNumber();
2728 switch (gsn) {
2729 case GSN_CREATE_NODEGROUP_CONF: {
2730 const CreateNodegroupConf * conf =
2731 CAST_CONSTPTR(CreateNodegroupConf, signal->getDataPtr());
2732
2733 if (ng)
2734 {
2735 * ng = conf->nodegroupId;
2736 }
2737
2738 wait = false;
2739 break;
2740 }
2741 case GSN_CREATE_NODEGROUP_REF:{
2742 const CreateNodegroupRef * ref =
2743 CAST_CONSTPTR(CreateNodegroupRef, signal->getDataPtr());
2744 Uint32 err = ref->errorCode;
2745 endSchemaTrans(ss, nodeId, transId, transKey,
2746 SchemaTransEndReq::SchemaTransAbort);
2747 return err;
2748 }
2749 case GSN_NF_COMPLETEREP:
2750 // ignore
2751 break;
2752 case GSN_NODE_FAILREP:{
2753 const NodeFailRep * const rep =
2754 CAST_CONSTPTR(NodeFailRep, signal->getDataPtr());
2755 Uint32 len = NodeFailRep::getNodeMaskLength(signal->getLength());
2756 assert(len == NodeBitmask::Size); // only full length in ndbapi
2757 if (BitmaskImpl::safe_get(len, rep->theAllNodes, nodeId))
2758 {
2759 return SchemaTransBeginRef::Nodefailure;
2760 }
2761 break;
2762 }
2763 case GSN_API_REGCONF:
2764 case GSN_TAKE_OVERTCCONF:
2765 case GSN_CONNECT_REP:
2766 break;
2767 default:
2768 report_unknown_signal(signal);
2769 return SEND_OR_RECEIVE_FAILED;
2770 }
2771 }
2772
2773 return endSchemaTrans(ss, nodeId, transId, transKey, 0);
2774 }
2775
2776 int
dropNodegroup(int ng)2777 MgmtSrvr::dropNodegroup(int ng)
2778 {
2779 int res;
2780 SignalSender ss(theFacade);
2781 ss.lock();
2782
2783 Uint32 transId = rand();
2784 Uint32 transKey;
2785 NodeId nodeId;
2786
2787 if ((res = startSchemaTrans(ss, nodeId, transId, transKey)))
2788 {
2789 return res;
2790 }
2791
2792 SimpleSignal ssig;
2793 ssig.set(ss, 0, DBDICT, GSN_DROP_NODEGROUP_REQ, DropNodegroupReq::SignalLength);
2794
2795 DropNodegroupReq* req =
2796 CAST_PTR(DropNodegroupReq, ssig.getDataPtrSend());
2797
2798 req->transId = transId;
2799 req->transKey = transKey;
2800 req->nodegroupId = ng;
2801 req->senderData = 77;
2802 req->senderRef = ss.getOwnRef();
2803
2804 if (ss.sendSignal(nodeId, &ssig) != SEND_OK)
2805 {
2806 return SEND_OR_RECEIVE_FAILED;
2807 }
2808
2809 bool wait = true;
2810 while (wait)
2811 {
2812 SimpleSignal *signal = ss.waitFor();
2813 int gsn = signal->readSignalNumber();
2814 switch (gsn) {
2815 case GSN_DROP_NODEGROUP_CONF: {
2816 wait = false;
2817 break;
2818 }
2819 case GSN_DROP_NODEGROUP_REF:
2820 {
2821 const DropNodegroupRef * ref =
2822 CAST_CONSTPTR(DropNodegroupRef, signal->getDataPtr());
2823 endSchemaTrans(ss, nodeId, transId, transKey,
2824 SchemaTransEndReq::SchemaTransAbort);
2825 return ref->errorCode;
2826 }
2827 case GSN_NF_COMPLETEREP:
2828 // ignore
2829 break;
2830 case GSN_NODE_FAILREP:{
2831 const NodeFailRep * const rep =
2832 CAST_CONSTPTR(NodeFailRep, signal->getDataPtr());
2833 Uint32 len = NodeFailRep::getNodeMaskLength(signal->getLength());
2834 assert(len == NodeBitmask::Size); // only full length in ndbapi
2835 if (BitmaskImpl::safe_get(len, rep->theAllNodes, nodeId))
2836 {
2837 return SchemaTransBeginRef::Nodefailure;
2838 }
2839 break;
2840 }
2841 case GSN_API_REGCONF:
2842 case GSN_TAKE_OVERTCCONF:
2843 case GSN_CONNECT_REP:
2844 break;
2845 default:
2846 report_unknown_signal(signal);
2847 return SEND_OR_RECEIVE_FAILED;
2848 }
2849 }
2850
2851 return endSchemaTrans(ss, nodeId, transId, transKey, 0);
2852 }
2853
2854
2855 //****************************************************************************
2856 //****************************************************************************
2857
2858 int
setTraceNo(int nodeId,int traceNo)2859 MgmtSrvr::setTraceNo(int nodeId, int traceNo)
2860 {
2861 if (traceNo < 0) {
2862 return INVALID_TRACE_NUMBER;
2863 }
2864
2865 INIT_SIGNAL_SENDER(ss,nodeId);
2866
2867 SimpleSignal ssig;
2868 ssig.set(ss,TestOrd::TraceAPI, CMVMI, GSN_TEST_ORD, TestOrd::SignalLength);
2869 TestOrd* const testOrd = CAST_PTR(TestOrd, ssig.getDataPtrSend());
2870 testOrd->clear();
2871 // Assume TRACE command causes toggling. Not really defined... ? TODO
2872 testOrd->setTraceCommand(TestOrd::Toggle,
2873 (TestOrd::TraceSpecification)traceNo);
2874
2875 return ss.sendSignal(nodeId, &ssig) == SEND_OK ? 0 : SEND_OR_RECEIVE_FAILED;
2876 }
2877
2878 //****************************************************************************
2879 //****************************************************************************
2880
2881 int
setSignalLoggingMode(int nodeId,LogMode mode,const Vector<BaseString> & blocks)2882 MgmtSrvr::setSignalLoggingMode(int nodeId, LogMode mode,
2883 const Vector<BaseString>& blocks)
2884 {
2885 INIT_SIGNAL_SENDER(ss,nodeId);
2886
2887 // Convert from MgmtSrvr format...
2888
2889 TestOrd::Command command;
2890 if (mode == Off) {
2891 command = TestOrd::Off;
2892 }
2893 else {
2894 command = TestOrd::On;
2895 }
2896
2897 TestOrd::SignalLoggerSpecification logSpec;
2898 switch (mode) {
2899 case In:
2900 logSpec = TestOrd::InputSignals;
2901 break;
2902 case Out:
2903 logSpec = TestOrd::OutputSignals;
2904 break;
2905 case InOut:
2906 logSpec = TestOrd::InputOutputSignals;
2907 break;
2908 case Off:
2909 // In MgmtSrvr interface it's just possible to switch off all logging, both
2910 // "in" and "out" (this should probably be changed).
2911 logSpec = TestOrd::InputOutputSignals;
2912 break;
2913 default:
2914 ndbout_c("Unexpected value %d, MgmtSrvr::setSignalLoggingMode, line %d",
2915 (unsigned)mode, __LINE__);
2916 assert(false);
2917 return -1;
2918 }
2919
2920 SimpleSignal ssig;
2921 ssig.set(ss,TestOrd::TraceAPI, CMVMI, GSN_TEST_ORD, TestOrd::SignalLength);
2922
2923 TestOrd* const testOrd = CAST_PTR(TestOrd, ssig.getDataPtrSend());
2924 testOrd->clear();
2925
2926 if (blocks.size() == 0 || blocks[0] == "ALL") {
2927 // Logg command for all blocks
2928 testOrd->addSignalLoggerCommand(command, logSpec);
2929 } else {
2930 for(unsigned i = 0; i < blocks.size(); i++)
2931 {
2932 BlockNumber blockNumber = getBlockNo(blocks[i].c_str());
2933 if (blockNumber == 0)
2934 return INVALID_BLOCK_NAME;
2935 testOrd->addSignalLoggerCommand(blockNumber, command, logSpec);
2936 }
2937 }
2938
2939 return ss.sendSignal(nodeId, &ssig) == SEND_OK ? 0 : SEND_OR_RECEIVE_FAILED;
2940 }
2941
2942 /*****************************************************************************
2943 * Signal tracing
2944 *****************************************************************************/
startSignalTracing(int nodeId)2945 int MgmtSrvr::startSignalTracing(int nodeId)
2946 {
2947 INIT_SIGNAL_SENDER(ss,nodeId);
2948
2949 SimpleSignal ssig;
2950 ssig.set(ss,TestOrd::TraceAPI, CMVMI, GSN_TEST_ORD, TestOrd::SignalLength);
2951
2952 TestOrd* const testOrd = CAST_PTR(TestOrd, ssig.getDataPtrSend());
2953 testOrd->clear();
2954 testOrd->setTestCommand(TestOrd::On);
2955
2956 return ss.sendSignal(nodeId, &ssig) == SEND_OK ? 0 : SEND_OR_RECEIVE_FAILED;
2957 }
2958
2959 int
stopSignalTracing(int nodeId)2960 MgmtSrvr::stopSignalTracing(int nodeId)
2961 {
2962 INIT_SIGNAL_SENDER(ss,nodeId);
2963
2964 SimpleSignal ssig;
2965 ssig.set(ss,TestOrd::TraceAPI, CMVMI, GSN_TEST_ORD, TestOrd::SignalLength);
2966 TestOrd* const testOrd = CAST_PTR(TestOrd, ssig.getDataPtrSend());
2967 testOrd->clear();
2968 testOrd->setTestCommand(TestOrd::Off);
2969
2970 return ss.sendSignal(nodeId, &ssig) == SEND_OK ? 0 : SEND_OR_RECEIVE_FAILED;
2971 }
2972
2973
2974 /*****************************************************************************
2975 * Dump state
2976 *****************************************************************************/
2977
2978 int
dumpState(int nodeId,const char * args)2979 MgmtSrvr::dumpState(int nodeId, const char* args)
2980 {
2981 // Convert the space separeted args
2982 // string to an int array
2983 Uint32 args_array[25];
2984 Uint32 numArgs = 0;
2985
2986 char buf[10];
2987 int b = 0;
2988 memset(buf, 0, 10);
2989 for (size_t i = 0; i <= strlen(args); i++){
2990 if (args[i] == ' ' || args[i] == 0){
2991 args_array[numArgs] = atoi(buf);
2992 numArgs++;
2993 memset(buf, 0, 10);
2994 b = 0;
2995 } else {
2996 buf[b] = args[i];
2997 b++;
2998 }
2999 }
3000
3001 return dumpState(nodeId, args_array, numArgs);
3002 }
3003
3004 int
dumpState(int nodeId,const Uint32 args[],Uint32 no)3005 MgmtSrvr::dumpState(int nodeId, const Uint32 args[], Uint32 no)
3006 {
3007 INIT_SIGNAL_SENDER(ss,nodeId);
3008
3009 const Uint32 len = no > 25 ? 25 : no;
3010
3011 SimpleSignal ssig;
3012 DumpStateOrd * const dumpOrd =
3013 CAST_PTR(DumpStateOrd, ssig.getDataPtrSend());
3014 ssig.set(ss,TestOrd::TraceAPI, CMVMI, GSN_DUMP_STATE_ORD, len);
3015 for(Uint32 i = 0; i<25; i++){
3016 if (i < len)
3017 dumpOrd->args[i] = args[i];
3018 else
3019 dumpOrd->args[i] = 0;
3020 }
3021
3022 int res = ss.sendSignal(nodeId, &ssig) == SEND_OK ? 0 :SEND_OR_RECEIVE_FAILED;
3023
3024 if (res == 0)
3025 {
3026 /**
3027 * In order to make DUMP (almost) syncronous,
3028 * make a syncronous request *after* the NDB_TAMPER
3029 */
3030 make_sync_req(ss, Uint32(nodeId));
3031 }
3032
3033 return res;
3034
3035 }
3036
3037
3038 //****************************************************************************
3039 //****************************************************************************
3040
getErrorText(int errorCode,char * buf,int buf_sz)3041 const char* MgmtSrvr::getErrorText(int errorCode, char *buf, int buf_sz)
3042 {
3043 ndb_error_string(errorCode, buf, buf_sz);
3044 buf[buf_sz-1]= 0;
3045 return buf;
3046 }
3047
3048
3049 void
trp_deliver_signal(const NdbApiSignal * signal,const LinearSectionPtr ptr[3])3050 MgmtSrvr::trp_deliver_signal(const NdbApiSignal* signal,
3051 const LinearSectionPtr ptr[3])
3052 {
3053 int gsn = signal->readSignalNumber();
3054
3055 switch (gsn) {
3056 case GSN_EVENT_REP:
3057 {
3058 eventReport(signal->getDataPtr(), signal->getLength());
3059 break;
3060 }
3061
3062 case GSN_NF_COMPLETEREP:{
3063 const NFCompleteRep * rep = CAST_CONSTPTR(NFCompleteRep,
3064 signal->getDataPtr());
3065 /* Clear local nodeid reservation(if any) */
3066 release_local_nodeid_reservation(rep->failedNodeId);
3067
3068 clear_connect_address_cache(rep->failedNodeId);
3069 break;
3070 }
3071 case GSN_TAMPER_ORD:
3072 ndbout << "TAMPER ORD" << endl;
3073 break;
3074 case GSN_API_REGCONF:
3075 case GSN_TAKE_OVERTCCONF:
3076 break;
3077 case GSN_CONNECT_REP:{
3078 const Uint32 nodeId = signal->getDataPtr()[0];
3079
3080 /*
3081 Clear local nodeid reservation since nodeid is
3082 now reserved by a connected transporter
3083 */
3084 release_local_nodeid_reservation(nodeId);
3085
3086 union {
3087 Uint32 theData[25];
3088 EventReport repData;
3089 };
3090 EventReport * rep = &repData;
3091 theData[1] = nodeId;
3092 rep->setEventType(NDB_LE_Connected);
3093
3094 if (nodeTypes[nodeId] == NODE_TYPE_DB)
3095 {
3096 m_started_nodes.push_back(nodeId);
3097 }
3098 rep->setEventType(NDB_LE_Connected);
3099 rep->setNodeId(_ownNodeId);
3100 eventReport(theData, 1);
3101 return;
3102 }
3103 case GSN_NODE_FAILREP:
3104 {
3105 union {
3106 Uint32 theData[25];
3107 EventReport repData;
3108 };
3109 bzero(theData, sizeof(theData));
3110 EventReport * event = &repData;
3111 event->setEventType(NDB_LE_Disconnected);
3112 event->setNodeId(_ownNodeId);
3113
3114 const NodeFailRep *rep = CAST_CONSTPTR(NodeFailRep,
3115 signal->getDataPtr());
3116 Uint32 len = NodeFailRep::getNodeMaskLength(signal->getLength());
3117 assert(len == NodeBitmask::Size); // only full length in ndbapi
3118 for (Uint32 i = BitmaskImpl::find_first(len, rep->theAllNodes);
3119 i != BitmaskImpl::NotFound;
3120 i = BitmaskImpl::find_next(len, rep->theAllNodes, i + 1))
3121 {
3122 theData[1] = i;
3123 eventReport(theData, 1);
3124
3125 /* Clear local nodeid reservation(if any) */
3126 release_local_nodeid_reservation(i);
3127
3128 clear_connect_address_cache(i);
3129 }
3130 return;
3131 }
3132 case GSN_CLOSE_COMREQ:
3133 {
3134 theFacade->perform_close_clnt(this);
3135 break;
3136 }
3137 default:
3138 g_eventLogger->error("Unknown signal received. SignalNumber: "
3139 "%i from (%d, 0x%x)",
3140 gsn,
3141 refToNode(signal->theSendersBlockRef),
3142 refToBlock(signal->theSendersBlockRef));
3143 assert(false);
3144 }
3145 }
3146
3147
3148 void
trp_node_status(Uint32 nodeId,Uint32 _event)3149 MgmtSrvr::trp_node_status(Uint32 nodeId, Uint32 _event)
3150 {
3151 }
3152
3153 enum ndb_mgm_node_type
getNodeType(NodeId nodeId) const3154 MgmtSrvr::getNodeType(NodeId nodeId) const
3155 {
3156 if(nodeId >= MAX_NODES)
3157 return (enum ndb_mgm_node_type)-1;
3158
3159 return nodeTypes[nodeId];
3160 }
3161
3162
3163 const char*
get_connect_address(NodeId node_id,char * addr_buf,size_t addr_buf_size)3164 MgmtSrvr::get_connect_address(NodeId node_id,
3165 char *addr_buf,
3166 size_t addr_buf_size)
3167 {
3168 assert(node_id < NDB_ARRAY_SIZE(m_connect_address));
3169
3170 if (m_connect_address[node_id].s_addr == 0)
3171 {
3172 // No cached connect address available
3173 const trp_node &node= getNodeInfo(node_id);
3174 if (node.is_connected())
3175 {
3176 // Cache the connect address, it's valid until
3177 // node disconnects
3178 m_connect_address[node_id] = theFacade->ext_get_connect_address(node_id);
3179 }
3180 }
3181
3182 // Return the cached connect address
3183 return Ndb_inet_ntop(AF_INET,
3184 static_cast<void*>(&m_connect_address[node_id]),
3185 addr_buf,
3186 (socklen_t)addr_buf_size);
3187 }
3188
3189
3190 void
clear_connect_address_cache(NodeId nodeid)3191 MgmtSrvr::clear_connect_address_cache(NodeId nodeid)
3192 {
3193 assert(nodeid < NDB_ARRAY_SIZE(m_connect_address));
3194 if (nodeid < NDB_ARRAY_SIZE(m_connect_address))
3195 {
3196 m_connect_address[nodeid].s_addr = 0;
3197 }
3198 }
3199
3200 /***************************************************************************
3201 * Alloc nodeid
3202 ***************************************************************************/
3203
NodeIdReservations()3204 MgmtSrvr::NodeIdReservations::NodeIdReservations()
3205 {
3206 memset(m_reservations, 0, sizeof(m_reservations));
3207 }
3208
3209
3210 void
check_array(NodeId n) const3211 MgmtSrvr::NodeIdReservations::check_array(NodeId n) const
3212 {
3213 assert( n < NDB_ARRAY_SIZE(m_reservations));
3214 }
3215
3216
3217 bool
get(NodeId n) const3218 MgmtSrvr::NodeIdReservations::get(NodeId n) const
3219 {
3220 check_array(n);
3221
3222 return (m_reservations[n].m_timeout != 0);
3223 }
3224
3225
3226 void
set(NodeId n,unsigned timeout)3227 MgmtSrvr::NodeIdReservations::set(NodeId n, unsigned timeout)
3228 {
3229 check_array(n);
3230
3231 Reservation& r = m_reservations[n];
3232 // Dont't allow double set
3233 assert(r.m_timeout == 0 && !NdbTick_IsValid(r.m_start));
3234
3235 r.m_timeout = timeout;
3236 r.m_start = NdbTick_getCurrentTicks();
3237 }
3238
3239
3240 BaseString
pretty_str() const3241 MgmtSrvr::NodeIdReservations::pretty_str() const
3242 {
3243 const char* sep = "";
3244 BaseString str;
3245 for (size_t i = 0; i < NDB_ARRAY_SIZE(m_reservations); i++)
3246 {
3247 const Reservation& r = m_reservations[i];
3248 if (r.m_timeout)
3249 {
3250 str.appfmt("%s%u", sep, (unsigned)i);
3251 sep = ",";
3252 }
3253 }
3254 return str;
3255 }
3256
3257
3258 void
clear(NodeId n)3259 MgmtSrvr::NodeIdReservations::clear(NodeId n)
3260 {
3261 check_array(n);
3262
3263 Reservation& r = m_reservations[n];
3264 // Dont't allow double clear
3265 assert(r.m_timeout != 0 && NdbTick_IsValid(r.m_start));
3266
3267 r.m_timeout = 0;
3268 NdbTick_Invalidate(&r.m_start);
3269 }
3270
3271
3272 bool
has_timedout(NodeId n,NDB_TICKS now) const3273 MgmtSrvr::NodeIdReservations::has_timedout(NodeId n, NDB_TICKS now) const
3274 {
3275 check_array(n);
3276
3277 const Reservation& r = m_reservations[n];
3278 if (r.m_timeout &&
3279 NdbTick_Elapsed(r.m_start,now).milliSec() > r.m_timeout)
3280 return true;
3281 return false;
3282 }
3283
3284
3285 void
release_local_nodeid_reservation(NodeId nodeid)3286 MgmtSrvr::release_local_nodeid_reservation(NodeId nodeid)
3287 {
3288 NdbMutex_Lock(m_reserved_nodes_mutex);
3289 if (m_reserved_nodes.get(nodeid))
3290 {
3291 g_eventLogger->debug("Releasing local reservation for nodeid %d", nodeid);
3292 m_reserved_nodes.clear(nodeid);
3293 }
3294 NdbMutex_Unlock(m_reserved_nodes_mutex);
3295 }
3296
3297
3298 int
alloc_node_id_req(NodeId free_node_id,enum ndb_mgm_node_type type,Uint32 timeout_ms)3299 MgmtSrvr::alloc_node_id_req(NodeId free_node_id,
3300 enum ndb_mgm_node_type type,
3301 Uint32 timeout_ms)
3302 {
3303 bool first_attempt = true;
3304 SignalSender ss(theFacade);
3305 ss.lock(); // lock will be released on exit
3306
3307 SimpleSignal ssig;
3308 AllocNodeIdReq* req = CAST_PTR(AllocNodeIdReq, ssig.getDataPtrSend());
3309 ssig.set(ss, TestOrd::TraceAPI, QMGR, GSN_ALLOC_NODEID_REQ,
3310 AllocNodeIdReq::SignalLength);
3311
3312 req->senderRef = ss.getOwnRef();
3313 req->senderData = 19;
3314 req->nodeId = free_node_id;
3315 req->nodeType = type;
3316 req->timeout = timeout_ms;
3317
3318 int do_send = 1;
3319 NodeId nodeId = 0;
3320 while (1)
3321 {
3322 if (nodeId == 0)
3323 {
3324 bool next;
3325 while((next = getNextNodeId(&nodeId, NDB_MGM_NODE_TYPE_NDB)) == true &&
3326 getNodeInfo(nodeId).is_confirmed() == false)
3327 ;
3328 if (!next)
3329 return NO_CONTACT_WITH_DB_NODES;
3330 do_send = 1;
3331 }
3332 if (do_send)
3333 {
3334 if (ss.sendSignal(nodeId, &ssig) != SEND_OK)
3335 return SEND_OR_RECEIVE_FAILED;
3336 do_send = 0;
3337 }
3338
3339 SimpleSignal *signal = ss.waitFor();
3340
3341 int gsn = signal->readSignalNumber();
3342 switch (gsn) {
3343 case GSN_ALLOC_NODEID_CONF:
3344 {
3345 #ifdef NOT_USED
3346 const AllocNodeIdConf * const conf =
3347 CAST_CONSTPTR(AllocNodeIdConf, signal->getDataPtr());
3348 #endif
3349 g_eventLogger->info("Alloc node id %u succeeded", free_node_id);
3350 return 0;
3351 }
3352 case GSN_ALLOC_NODEID_REF:
3353 {
3354 const AllocNodeIdRef * const ref =
3355 CAST_CONSTPTR(AllocNodeIdRef, signal->getDataPtr());
3356 if (ref->errorCode == AllocNodeIdRef::NotMaster &&
3357 refToNode(ref->masterRef) == 0xFFFF)
3358 {
3359 /*
3360 The data nodes haven't decided who is the president (yet)
3361 and thus can't allocate nodeids -> return "no contact"
3362 */
3363 g_eventLogger->info("Alloc node id %u failed, no new president yet",
3364 free_node_id);
3365 return NO_CONTACT_WITH_DB_NODES;
3366 }
3367
3368 if (ref->errorCode == AllocNodeIdRef::NotMaster ||
3369 ref->errorCode == AllocNodeIdRef::Busy ||
3370 ref->errorCode == AllocNodeIdRef::NodeFailureHandlingNotCompleted)
3371 {
3372 do_send = 1;
3373 nodeId = refToNode(ref->masterRef);
3374 if (!getNodeInfo(nodeId).is_confirmed())
3375 nodeId = 0;
3376 if (ref->errorCode != AllocNodeIdRef::NotMaster)
3377 {
3378 if (first_attempt)
3379 {
3380 first_attempt = false;
3381 g_eventLogger->info("Alloc node id %u failed with error code %u, will retry",
3382 free_node_id,
3383 ref->errorCode);
3384 }
3385 /* sleep for a while (100ms) before retrying */
3386 ss.unlock();
3387 NdbSleep_MilliSleep(100);
3388 ss.lock();
3389 }
3390 continue;
3391 }
3392 return ref->errorCode;
3393 }
3394 case GSN_NF_COMPLETEREP:
3395 {
3396 continue;
3397 }
3398 case GSN_NODE_FAILREP:{
3399 /**
3400 * ok to trap using NODE_FAILREP
3401 * as we don't really wait on anything interesting
3402 */
3403 const NodeFailRep * const rep =
3404 CAST_CONSTPTR(NodeFailRep, signal->getDataPtr());
3405 Uint32 len = NodeFailRep::getNodeMaskLength(signal->getLength());
3406 assert(len == NodeBitmask::Size); // only full length in ndbapi
3407 if (BitmaskImpl::safe_get(len, rep->theAllNodes, nodeId))
3408 {
3409 do_send = 1;
3410 nodeId = 0;
3411 }
3412 continue;
3413 }
3414 case GSN_API_REGCONF:
3415 case GSN_TAKE_OVERTCCONF:
3416 case GSN_CONNECT_REP:
3417 continue;
3418 default:
3419 report_unknown_signal(signal);
3420 return SEND_OR_RECEIVE_FAILED;
3421 }
3422 }
3423 return 0;
3424 }
3425
3426 static int
match_hostname(const struct sockaddr * clnt_addr,const char * config_hostname)3427 match_hostname(const struct sockaddr *clnt_addr,
3428 const char *config_hostname)
3429 {
3430 struct in_addr config_addr= {0};
3431 if (clnt_addr)
3432 {
3433 const struct in_addr *clnt_in_addr = &((sockaddr_in*)clnt_addr)->sin_addr;
3434
3435 if (Ndb_getInAddr(&config_addr, config_hostname) != 0
3436 || memcmp(&config_addr, clnt_in_addr, sizeof(config_addr)) != 0)
3437 {
3438 struct in_addr tmp_addr;
3439 if (Ndb_getInAddr(&tmp_addr, "localhost") != 0
3440 || memcmp(&tmp_addr, clnt_in_addr, sizeof(config_addr)) != 0)
3441 {
3442 // not localhost
3443 return -1;
3444 }
3445
3446 // connecting through localhost
3447 // check if config_hostname is local
3448 if (!SocketServer::tryBind(0, config_hostname))
3449 return -1;
3450 }
3451 }
3452 else
3453 {
3454 if (!SocketServer::tryBind(0, config_hostname))
3455 return -1;
3456 }
3457 return 0;
3458 }
3459
3460 int
find_node_type(NodeId node_id,ndb_mgm_node_type type,const struct sockaddr * client_addr,Vector<PossibleNode> & nodes,int & error_code,BaseString & error_string)3461 MgmtSrvr::find_node_type(NodeId node_id,
3462 ndb_mgm_node_type type,
3463 const struct sockaddr* client_addr,
3464 Vector<PossibleNode>& nodes,
3465 int& error_code, BaseString& error_string)
3466 {
3467 const char* found_config_hostname= 0;
3468 unsigned type_c= (unsigned)type;
3469
3470 Guard g(m_local_config_mutex);
3471
3472 ConfigIter iter(m_local_config, CFG_SECTION_NODE);
3473 for(iter.first(); iter.valid(); iter.next())
3474 {
3475 unsigned id;
3476 if (iter.get(CFG_NODE_ID, &id))
3477 require(false);
3478 if (node_id && node_id != id)
3479 continue;
3480 if (iter.get(CFG_TYPE_OF_SECTION, &type_c))
3481 require(false);
3482 if (type_c != (unsigned)type)
3483 {
3484 if (!node_id)
3485 continue;
3486 goto error;
3487 }
3488 bool exact_match = false;
3489 const char *config_hostname= 0;
3490 if (iter.get(CFG_NODE_HOST, &config_hostname))
3491 require(false);
3492 if (config_hostname == 0 || config_hostname[0] == 0)
3493 {
3494 config_hostname= "";
3495 }
3496 else
3497 {
3498 found_config_hostname= config_hostname;
3499 if (match_hostname(client_addr, config_hostname))
3500 {
3501 if (!node_id)
3502 continue;
3503 goto error;
3504 }
3505 exact_match = true;
3506 }
3507 /*
3508 Insert this node in the nodes list sorted with the
3509 exact matches ahead of the open nodes
3510 */
3511 PossibleNode possible_node= {id, config_hostname, exact_match};
3512 if (exact_match)
3513 {
3514 // Find the position of first !exact match
3515 unsigned position = 0;
3516 for (unsigned j = 0; j < nodes.size(); j++)
3517 {
3518 if (nodes[j].exact_match)
3519 position++;
3520 }
3521 nodes.push(possible_node, position);
3522 }
3523 else
3524 {
3525 nodes.push_back(possible_node);
3526 }
3527
3528 if (node_id)
3529 break;
3530 }
3531 if (nodes.size() != 0)
3532 {
3533 return 0;
3534 }
3535
3536 error:
3537 /*
3538 lock on m_configMutex held because found_config_hostname may have
3539 reference inot config structure
3540 */
3541 error_code= NDB_MGM_ALLOCID_CONFIG_MISMATCH;
3542 if (node_id)
3543 {
3544 if (type_c != (unsigned) type)
3545 {
3546 BaseString type_string, type_c_string;
3547 const char *alias, *str;
3548 alias= ndb_mgm_get_node_type_alias_string(type, &str);
3549 type_string.assfmt("%s(%s)", alias, str);
3550 alias= ndb_mgm_get_node_type_alias_string((enum ndb_mgm_node_type)type_c,
3551 &str);
3552 type_c_string.assfmt("%s(%s)", alias, str);
3553 error_string.appfmt("Id %d configured as %s, connect attempted as %s.",
3554 node_id, type_c_string.c_str(),
3555 type_string.c_str());
3556 return -1;
3557 }
3558 if (found_config_hostname)
3559 {
3560 char addr_buf[NDB_ADDR_STRLEN];
3561 char *addr_str;
3562 struct in_addr config_addr= {0};
3563 struct in_addr conn_addr =
3564 ((struct sockaddr_in*)(client_addr))->sin_addr;
3565 int r_config_addr= Ndb_getInAddr(&config_addr, found_config_hostname);
3566 addr_str = Ndb_inet_ntop(AF_INET,
3567 static_cast<void*>(&conn_addr),
3568 addr_buf,
3569 (socklen_t)sizeof(addr_buf));
3570 error_string.appfmt("Connection with id %d done from wrong host ip %s,",
3571 node_id, addr_str);
3572 addr_str = Ndb_inet_ntop(AF_INET,
3573 static_cast<void*>(&config_addr),
3574 addr_buf,
3575 (socklen_t)sizeof(addr_buf));
3576 error_string.appfmt(" expected %s(%s).", found_config_hostname,
3577 r_config_addr ?
3578 "lookup failed" : addr_str);
3579 return -1;
3580 }
3581 error_string.appfmt("No node defined with id=%d in config file.", node_id);
3582 return -1;
3583 }
3584
3585 // node_id == 0 and nodes.size() == 0
3586 if (found_config_hostname)
3587 {
3588 char addr_buf[NDB_ADDR_STRLEN];
3589 struct in_addr conn_addr =
3590 ((struct sockaddr_in*)(client_addr))->sin_addr;
3591 char *addr_str = Ndb_inet_ntop(AF_INET,
3592 static_cast<void*>(&conn_addr),
3593 addr_buf,
3594 (socklen_t)sizeof(addr_buf));
3595 error_string.appfmt("Connection done from wrong host ip %s.",
3596 (client_addr) ? addr_str : "");
3597 return -1;
3598 }
3599
3600 error_string.append("No nodes defined in config file.");
3601 return -1;
3602 }
3603
3604
3605 int
try_alloc(NodeId id,ndb_mgm_node_type type,Uint32 timeout_ms)3606 MgmtSrvr::try_alloc(NodeId id,
3607 ndb_mgm_node_type type,
3608 Uint32 timeout_ms)
3609 {
3610 assert(type == NDB_MGM_NODE_TYPE_NDB ||
3611 type == NDB_MGM_NODE_TYPE_API);
3612
3613 const NDB_TICKS start = NdbTick_getCurrentTicks();
3614 while (true)
3615 {
3616 int res = alloc_node_id_req(id, type, timeout_ms);
3617 if (res == 0)
3618 {
3619 /* Node id allocation suceeded */
3620 g_eventLogger->debug("Allocated nodeid %u in cluster", id);
3621 assert(id > 0);
3622 return id;
3623 }
3624
3625 if (res == NO_CONTACT_WITH_DB_NODES &&
3626 type == NDB_MGM_NODE_TYPE_API)
3627 {
3628 const Uint64 retry_timeout = 3000; // milliseconds
3629 const NDB_TICKS now = NdbTick_getCurrentTicks();
3630 const Uint64 elapsed = NdbTick_Elapsed(start,now).milliSec();
3631 if (elapsed > retry_timeout)
3632 {
3633 /*
3634 Have waited long enough time for data nodes to
3635 decide on a master, return error
3636 */
3637 g_eventLogger->debug("Failed to allocate nodeid %u for API node " \
3638 "in cluster (retried during %u milliseconds)",
3639 id, (unsigned)elapsed);
3640 return -1;
3641 }
3642
3643 g_eventLogger->debug("Retrying allocation of nodeid %u...", id);
3644 NdbSleep_MilliSleep(100);
3645 continue;
3646 }
3647
3648 if (res == NO_CONTACT_WITH_DB_NODES &&
3649 type == NDB_MGM_NODE_TYPE_NDB)
3650 {
3651 /*
3652 No reply from data node(s) -> use the requested nodeid
3653 so that data node can start
3654 */
3655 g_eventLogger->debug("Nodeid %u for data node reserved locally " \
3656 "since cluster was not available ", id);
3657 return id;
3658 }
3659
3660 /* Unspecified error */
3661 return 0;
3662 }
3663
3664 assert(false); // Never reached
3665 return 0;
3666 }
3667
3668
3669 bool
try_alloc_from_list(NodeId & nodeid,ndb_mgm_node_type type,Uint32 timeout_ms,Vector<PossibleNode> & nodes)3670 MgmtSrvr::try_alloc_from_list(NodeId& nodeid,
3671 ndb_mgm_node_type type,
3672 Uint32 timeout_ms,
3673 Vector<PossibleNode>& nodes)
3674 {
3675 for (unsigned i = 0; i < nodes.size(); i++)
3676 {
3677 const unsigned id= nodes[i].id;
3678 if (theFacade->ext_isConnected(id))
3679 {
3680 // Node is already reserved(connected via transporter)
3681 continue;
3682 }
3683
3684 NdbMutex_Lock(m_reserved_nodes_mutex);
3685 if (m_reserved_nodes.get(id))
3686 {
3687 // Node is already reserved(locally in this node)
3688 NdbMutex_Unlock(m_reserved_nodes_mutex);
3689 continue;
3690 }
3691
3692 /*
3693 Reserve the nodeid locally while checking if it can
3694 be allocated in the data nodes
3695 */
3696 m_reserved_nodes.set(id, timeout_ms);
3697
3698 NdbMutex_Unlock(m_reserved_nodes_mutex);
3699 int res = try_alloc(id, type, timeout_ms);
3700 if (res > 0)
3701 {
3702 // Nodeid allocation succeeded
3703 nodeid= id;
3704
3705 if (type == NDB_MGM_NODE_TYPE_API)
3706 {
3707 /*
3708 Release the local reservation(which was set to avoid that
3709 more than one thread asked for same nodeid) since it's
3710 now reserved in data node
3711 */
3712 release_local_nodeid_reservation(id);
3713 }
3714
3715 return true;
3716 }
3717
3718 /* Release the local reservation */
3719 release_local_nodeid_reservation(id);
3720
3721 if (res < 0)
3722 {
3723 // Don't try any more nodes from the list
3724 return false;
3725 }
3726 }
3727 return false;
3728 }
3729
3730
3731 bool
alloc_node_id_impl(NodeId & nodeid,enum ndb_mgm_node_type type,const struct sockaddr * client_addr,int & error_code,BaseString & error_string,Uint32 timeout_s)3732 MgmtSrvr::alloc_node_id_impl(NodeId& nodeid,
3733 enum ndb_mgm_node_type type,
3734 const struct sockaddr* client_addr,
3735 int& error_code, BaseString& error_string,
3736 Uint32 timeout_s)
3737 {
3738 if (m_opts.no_nodeid_checks)
3739 {
3740 if (nodeid == 0)
3741 {
3742 error_string.appfmt("no-nodeid-checks set in management server. "
3743 "node id must be set explicitly in connectstring");
3744 error_code = NDB_MGM_ALLOCID_CONFIG_MISMATCH;
3745 return false;
3746 }
3747 return true;
3748 }
3749
3750 /* Don't allow allocation of this ndb_mgmd's nodeid */
3751 assert(_ownNodeId);
3752 if (nodeid == _ownNodeId)
3753 {
3754 // Fatal error
3755 error_code= NDB_MGM_ALLOCID_CONFIG_MISMATCH;
3756 if (type != NDB_MGM_NODE_TYPE_MGM)
3757 {
3758 /**
3759 * be backwards compatile wrt error messages
3760 */
3761 BaseString type_string, type_c_string;
3762 const char *alias, *str;
3763 alias= ndb_mgm_get_node_type_alias_string(type, &str);
3764 type_string.assfmt("%s(%s)", alias, str);
3765 alias= ndb_mgm_get_node_type_alias_string(NDB_MGM_NODE_TYPE_MGM, &str);
3766 type_c_string.assfmt("%s(%s)", alias, str);
3767 error_string.appfmt("Id %d configured as %s, connect attempted as %s.",
3768 nodeid, type_c_string.c_str(),
3769 type_string.c_str());
3770 }
3771 else
3772 {
3773 error_string.appfmt("Id %d is already allocated by this ndb_mgmd",
3774 nodeid);
3775 }
3776 return false;
3777 }
3778
3779 /* Make sure that config is confirmed before allocating nodeid */
3780 Uint32 timeout_ms = timeout_s * 1000;
3781 {
3782 const NDB_TICKS start = NdbTick_getCurrentTicks();
3783 BaseString getconfig_message;
3784 while (!m_config_manager->get_packed_config(type, 0, getconfig_message))
3785 {
3786 const NDB_TICKS now = NdbTick_getCurrentTicks();
3787 if (NdbTick_Elapsed(start,now).milliSec() > timeout_ms)
3788 {
3789 error_code = NDB_MGM_ALLOCID_ERROR;
3790 error_string.append("Unable to allocate nodeid as configuration"
3791 " not yet confirmed");
3792 return false;
3793 }
3794
3795 NdbSleep_MilliSleep(20);
3796 }
3797 }
3798
3799 /* Find possible nodeids */
3800 Vector<PossibleNode> nodes;
3801 if (find_node_type(nodeid, type, client_addr,
3802 nodes, error_code, error_string))
3803 return false;
3804
3805 // Print list of possible nodes
3806 for (unsigned i = 0; i < nodes.size(); i++)
3807 {
3808 const PossibleNode& node = nodes[i];
3809 g_eventLogger->debug(" [%u]: %u, '%s', %d",
3810 (unsigned)i, node.id,
3811 node.host.c_str(),
3812 node.exact_match);
3813 }
3814
3815 // nodes.size() == 0 handled inside find_node_type
3816 assert(nodes.size() != 0);
3817
3818 if (type == NDB_MGM_NODE_TYPE_MGM && nodes.size() > 1)
3819 {
3820 // mgmt server may only have one match
3821 error_string.appfmt("Ambiguous node id's %d and %d. "
3822 "Suggest specifying node id in connectstring, "
3823 "or specifying unique host names in config file.",
3824 nodes[0].id, nodes[1].id);
3825 error_code= NDB_MGM_ALLOCID_CONFIG_MISMATCH;
3826 return false;
3827 }
3828
3829 /* Check timeout of nodeid reservations for NDB */
3830 if (type == NDB_MGM_NODE_TYPE_NDB)
3831 {
3832 const NDB_TICKS now = NdbTick_getCurrentTicks();
3833 for (unsigned i = 0; i < nodes.size(); i++)
3834 {
3835 const NodeId ndb_nodeid = nodes[i].id;
3836 {
3837 Guard g(m_reserved_nodes_mutex);
3838 if (!m_reserved_nodes.has_timedout(ndb_nodeid, now))
3839 continue;
3840 }
3841
3842 // Found a timedout reservation
3843 if (theFacade->ext_isConnected(ndb_nodeid))
3844 continue; // Still connected, ignore the timeout
3845
3846 g_eventLogger->warning("Found timedout nodeid reservation for %u, " \
3847 "releasing it", ndb_nodeid);
3848
3849 // Clear the reservation
3850 release_local_nodeid_reservation(ndb_nodeid);
3851 }
3852 }
3853
3854 if (try_alloc_from_list(nodeid, type, timeout_ms, nodes))
3855 {
3856 if (type == NDB_MGM_NODE_TYPE_NDB)
3857 {
3858 /* Be ready to accept connections from this node */
3859 theFacade->ext_doConnect(nodeid);
3860 }
3861
3862 return true;
3863 }
3864
3865 /*
3866 there are nodes with correct type available but
3867 allocation failed for some reason
3868 */
3869 if (nodeid)
3870 {
3871 error_string.appfmt("Id %d already allocated by another node.",
3872 nodeid);
3873 }
3874 else
3875 {
3876 const char *alias, *str;
3877 alias= ndb_mgm_get_node_type_alias_string(type, &str);
3878 error_string.appfmt("No free node id found for %s(%s).",
3879 alias, str);
3880 }
3881 error_code = NDB_MGM_ALLOCID_ERROR;
3882 return false;
3883 }
3884
3885
3886 bool
alloc_node_id(NodeId & nodeid,enum ndb_mgm_node_type type,const struct sockaddr * client_addr,int & error_code,BaseString & error_string,bool log_event,Uint32 timeout_s)3887 MgmtSrvr::alloc_node_id(NodeId& nodeid,
3888 enum ndb_mgm_node_type type,
3889 const struct sockaddr* client_addr,
3890 int& error_code, BaseString& error_string,
3891 bool log_event,
3892 Uint32 timeout_s)
3893 {
3894 char addr_buf[NDB_ADDR_STRLEN];
3895 struct in_addr conn_addr = ((sockaddr_in*)client_addr)->sin_addr;
3896 const char* type_str = ndb_mgm_get_node_type_string(type);
3897 char* addr_str = Ndb_inet_ntop(AF_INET,
3898 static_cast<void*>(&conn_addr),
3899 addr_buf,
3900 (socklen_t)sizeof(addr_buf));
3901
3902 g_eventLogger->debug("Trying to allocate nodeid for %s" \
3903 "(nodeid: %u, type: %s)",
3904 addr_str, (unsigned)nodeid, type_str);
3905
3906
3907 if (alloc_node_id_impl(nodeid, type, client_addr,
3908 error_code, error_string,
3909 timeout_s))
3910 {
3911 g_eventLogger->info("Nodeid %u allocated for %s at %s",
3912 (unsigned)nodeid, type_str, addr_str);
3913 return true;
3914 }
3915
3916 if (!log_event)
3917 return false;
3918
3919 g_eventLogger->warning("Failed to allocate nodeid for %s at %s. "
3920 "Returned error: '%s'",
3921 type_str, addr_str, error_string.c_str());
3922
3923 return false;
3924 }
3925
3926
3927 bool
getNextNodeId(NodeId * nodeId,enum ndb_mgm_node_type type) const3928 MgmtSrvr::getNextNodeId(NodeId * nodeId, enum ndb_mgm_node_type type) const
3929 {
3930 NodeId tmp = * nodeId;
3931
3932 tmp++;
3933 while(nodeTypes[tmp] != type && tmp < MAX_NODES)
3934 tmp++;
3935
3936 if(tmp == MAX_NODES){
3937 return false;
3938 }
3939
3940 * nodeId = tmp;
3941 return true;
3942 }
3943
3944 #include "Services.hpp"
3945
3946 void
eventReport(const Uint32 * theData,Uint32 len)3947 MgmtSrvr::eventReport(const Uint32 * theData, Uint32 len)
3948 {
3949 const EventReport * const eventReport = (EventReport *)&theData[0];
3950
3951 NodeId nodeId = eventReport->getNodeId();
3952 Ndb_logevent_type type = eventReport->getEventType();
3953 // Log event
3954 g_eventLogger->log(type, theData, len, nodeId,
3955 &m_event_listner[0].m_logLevel);
3956 m_event_listner.log(type, theData, len, nodeId);
3957 }
3958
3959 /***************************************************************************
3960 * Backup
3961 ***************************************************************************/
3962
3963 int
startBackup(Uint32 & backupId,int waitCompleted,Uint32 input_backupId,Uint32 backuppoint)3964 MgmtSrvr::startBackup(Uint32& backupId, int waitCompleted, Uint32 input_backupId, Uint32 backuppoint)
3965 {
3966 SignalSender ss(theFacade);
3967 ss.lock(); // lock will be released on exit
3968
3969 NodeId nodeId = m_master_node;
3970 if (okToSendTo(nodeId, false) != 0)
3971 {
3972 bool next;
3973 nodeId = m_master_node = 0;
3974 while((next = getNextNodeId(&nodeId, NDB_MGM_NODE_TYPE_NDB)) == true &&
3975 okToSendTo(nodeId, false) != 0);
3976 if(!next)
3977 return NO_CONTACT_WITH_DB_NODES;
3978 }
3979
3980 SimpleSignal ssig;
3981 BackupReq* req = CAST_PTR(BackupReq, ssig.getDataPtrSend());
3982 /*
3983 * Single-threaded backup. Set instance key 1. In the kernel
3984 * this maps to main instance 0 or worker instance 1 (if MT LQH).
3985 */
3986 BlockNumber backupBlockNo = numberToBlock(BACKUP, 1);
3987 if(input_backupId > 0)
3988 {
3989 ssig.set(ss, TestOrd::TraceAPI, backupBlockNo, GSN_BACKUP_REQ,
3990 BackupReq::SignalLength);
3991 req->inputBackupId = input_backupId;
3992 }
3993 else
3994 ssig.set(ss, TestOrd::TraceAPI, backupBlockNo, GSN_BACKUP_REQ,
3995 BackupReq::SignalLength - 1);
3996
3997 req->senderData = 19;
3998 req->backupDataLen = 0;
3999 assert(waitCompleted < 3);
4000 req->flags = waitCompleted & 0x3;
4001 if(backuppoint == 1)
4002 req->flags |= BackupReq::USE_UNDO_LOG;
4003
4004 int do_send = 1;
4005 while (1) {
4006 if (do_send)
4007 {
4008 if (ss.sendSignal(nodeId, &ssig) != SEND_OK) {
4009 return SEND_OR_RECEIVE_FAILED;
4010 }
4011 if (waitCompleted == 0)
4012 return 0;
4013 do_send = 0;
4014 }
4015 SimpleSignal *signal = ss.waitFor();
4016
4017 int gsn = signal->readSignalNumber();
4018 switch (gsn) {
4019 case GSN_BACKUP_CONF:{
4020 const BackupConf * const conf =
4021 CAST_CONSTPTR(BackupConf, signal->getDataPtr());
4022 #ifdef VM_TRACE
4023 ndbout_c("Backup(%d) master is %d", conf->backupId,
4024 refToNode(signal->header.theSendersBlockRef));
4025 #endif
4026 backupId = conf->backupId;
4027 if (waitCompleted == 1)
4028 return 0;
4029 // wait for next signal
4030 break;
4031 }
4032 case GSN_BACKUP_COMPLETE_REP:{
4033 const BackupCompleteRep * const rep =
4034 CAST_CONSTPTR(BackupCompleteRep, signal->getDataPtr());
4035 #ifdef VM_TRACE
4036 ndbout_c("Backup(%d) completed", rep->backupId);
4037 #endif
4038 backupId = rep->backupId;
4039 return 0;
4040 }
4041 case GSN_BACKUP_REF:{
4042 const BackupRef * const ref =
4043 CAST_CONSTPTR(BackupRef, signal->getDataPtr());
4044 if(ref->errorCode == BackupRef::IAmNotMaster){
4045 m_master_node = nodeId = refToNode(ref->masterRef);
4046 #ifdef VM_TRACE
4047 ndbout_c("I'm not master resending to %d", nodeId);
4048 #endif
4049 do_send = 1; // try again
4050 if (!getNodeInfo(nodeId).m_alive)
4051 m_master_node = nodeId = 0;
4052 continue;
4053 }
4054 return ref->errorCode;
4055 }
4056 case GSN_BACKUP_ABORT_REP:{
4057 const BackupAbortRep * const rep =
4058 CAST_CONSTPTR(BackupAbortRep, signal->getDataPtr());
4059 #ifdef VM_TRACE
4060 ndbout_c("Backup %d aborted", rep->backupId);
4061 #endif
4062 return rep->reason;
4063 }
4064 case GSN_NF_COMPLETEREP:{
4065 const NFCompleteRep * const rep =
4066 CAST_CONSTPTR(NFCompleteRep, signal->getDataPtr());
4067 #ifdef VM_TRACE
4068 ndbout_c("Node %d fail completed", rep->failedNodeId);
4069 #endif
4070 if (rep->failedNodeId == nodeId ||
4071 waitCompleted == 1)
4072 return 1326;
4073 // wait for next signal
4074 // master node will report aborted backup
4075 break;
4076 }
4077 case GSN_NODE_FAILREP:{
4078 const NodeFailRep * const rep =
4079 CAST_CONSTPTR(NodeFailRep, signal->getDataPtr());
4080 Uint32 len = NodeFailRep::getNodeMaskLength(signal->getLength());
4081 assert(len == NodeBitmask::Size); // only full length in ndbapi
4082 if (BitmaskImpl::safe_get(len, rep->theAllNodes,nodeId) ||
4083 waitCompleted == 1)
4084 return 1326;
4085 // wait for next signal
4086 // master node will report aborted backup
4087 break;
4088 }
4089 case GSN_API_REGCONF:
4090 case GSN_TAKE_OVERTCCONF:
4091 case GSN_CONNECT_REP:
4092 continue;
4093 default:
4094 report_unknown_signal(signal);
4095 return SEND_OR_RECEIVE_FAILED;
4096 }
4097 }
4098 }
4099
4100 int
abortBackup(Uint32 backupId)4101 MgmtSrvr::abortBackup(Uint32 backupId)
4102 {
4103 SignalSender ss(theFacade);
4104 ss.lock(); // lock will be released on exit
4105
4106 bool next;
4107 NodeId nodeId = 0;
4108 while((next = getNextNodeId(&nodeId, NDB_MGM_NODE_TYPE_NDB)) == true &&
4109 getNodeInfo(nodeId).m_alive == false);
4110
4111 if(!next){
4112 return NO_CONTACT_WITH_DB_NODES;
4113 }
4114
4115 SimpleSignal ssig;
4116
4117 AbortBackupOrd* ord = CAST_PTR(AbortBackupOrd, ssig.getDataPtrSend());
4118 /*
4119 * Single-threaded backup. Set instance key 1. In the kernel
4120 * this maps to main instance 0 or worker instance 1 (if MT LQH).
4121 */
4122 BlockNumber backupBlockNo = numberToBlock(BACKUP, 1);
4123 ssig.set(ss, TestOrd::TraceAPI, backupBlockNo, GSN_ABORT_BACKUP_ORD,
4124 AbortBackupOrd::SignalLength);
4125
4126 ord->requestType = AbortBackupOrd::ClientAbort;
4127 ord->senderData = 19;
4128 ord->backupId = backupId;
4129
4130 return ss.sendSignal(nodeId, &ssig) == SEND_OK ? 0 : SEND_OR_RECEIVE_FAILED;
4131 }
4132
4133
4134 int
setDbParameter(int node,int param,const char * value,BaseString & msg)4135 MgmtSrvr::setDbParameter(int node, int param, const char * value,
4136 BaseString& msg)
4137 {
4138
4139 Guard g(m_local_config_mutex);
4140
4141 /**
4142 * Check parameter
4143 */
4144 ConfigIter iter(m_local_config, CFG_SECTION_NODE);
4145 if(iter.first() != 0){
4146 msg.assign("Unable to find node section (iter.first())");
4147 return -1;
4148 }
4149
4150 Uint32 type = NODE_TYPE_DB + 1;
4151 if(node != 0){
4152 // Set parameter only in the specified node
4153 if(iter.find(CFG_NODE_ID, node) != 0){
4154 msg.assign("Unable to find node (iter.find())");
4155 return -1;
4156 }
4157 if(iter.get(CFG_TYPE_OF_SECTION, &type) != 0){
4158 msg.assign("Unable to get node type(iter.get(CFG_TYPE_OF_SECTION))");
4159 return -1;
4160 }
4161 } else {
4162 // Set parameter in all DB nodes
4163 do {
4164 if(iter.get(CFG_TYPE_OF_SECTION, &type) != 0){
4165 msg.assign("Unable to get node type(iter.get(CFG_TYPE_OF_SECTION))");
4166 return -1;
4167 }
4168 if(type == NODE_TYPE_DB)
4169 break;
4170 } while(iter.next() == 0);
4171 }
4172
4173 if(type != NODE_TYPE_DB){
4174 msg.assfmt("Invalid node type or no such node (%d %d)",
4175 type, NODE_TYPE_DB);
4176 return -1;
4177 }
4178
4179 int p_type;
4180 unsigned val_32;
4181 Uint64 val_64;
4182 const char * val_char;
4183 do {
4184 p_type = 0;
4185 if(iter.get(param, &val_32) == 0){
4186 val_32 = atoi(value);
4187 break;
4188 }
4189
4190 p_type++;
4191 if(iter.get(param, &val_64) == 0){
4192 val_64 = my_strtoll(value, 0, 10);
4193 break;
4194 }
4195 p_type++;
4196 if(iter.get(param, &val_char) == 0){
4197 val_char = value;
4198 break;
4199 }
4200 msg.assign("Could not get parameter");
4201 return -1;
4202 } while(0);
4203
4204 bool res = false;
4205 do {
4206 int ret = iter.get(CFG_TYPE_OF_SECTION, &type);
4207 assert(ret == 0);
4208
4209 if(type != NODE_TYPE_DB)
4210 continue;
4211
4212 Uint32 node;
4213 ret = iter.get(CFG_NODE_ID, &node);
4214 assert(ret == 0);
4215
4216 ConfigValues::Iterator i2(m_local_config->m_configValues->m_config,
4217 iter.m_config);
4218 switch(p_type){
4219 case 0:
4220 res = i2.set(param, val_32);
4221 ndbout_c("Updating node %d param: %d to %d", node, param, val_32);
4222 break;
4223 case 1:
4224 res = i2.set(param, val_64);
4225 ndbout_c("Updating node %d param: %d to %u", node, param, val_32);
4226 break;
4227 case 2:
4228 res = i2.set(param, val_char);
4229 ndbout_c("Updating node %d param: %d to %s", node, param, val_char);
4230 break;
4231 default:
4232 require(false);
4233 }
4234 assert(res);
4235 } while(node == 0 && iter.next() == 0);
4236
4237 msg.assign("Success");
4238 return 0;
4239 }
4240
4241
4242 int
setConnectionDbParameter(int node1,int node2,int param,int value,BaseString & msg)4243 MgmtSrvr::setConnectionDbParameter(int node1, int node2,
4244 int param, int value,
4245 BaseString& msg)
4246 {
4247 DBUG_ENTER("MgmtSrvr::setConnectionDbParameter");
4248 DBUG_PRINT("enter", ("node1: %d, node2: %d, param: %d, value: %d",
4249 node1, node2, param, value));
4250
4251 // This function only supports setting dynamic ports
4252 if (param != CFG_CONNECTION_SERVER_PORT)
4253 {
4254 msg.assign("Only param CFG_CONNECTION_SERVER_PORT can be set");
4255 DBUG_RETURN(-1);
4256 }
4257
4258 if (!m_config_manager->set_dynamic_port(node1, node2, value, msg))
4259 DBUG_RETURN(-1);
4260
4261 DBUG_PRINT("exit", ("Set parameter(%d) to %d for %d -> %d",
4262 param, value, node1, node2));
4263 DBUG_RETURN(1);
4264 }
4265
4266
setDynamicPorts(int node,DynPortSpec ports[],unsigned num_ports,BaseString & msg)4267 bool MgmtSrvr::setDynamicPorts(int node, DynPortSpec ports[],
4268 unsigned num_ports, BaseString& msg)
4269 {
4270 return m_config_manager->set_dynamic_ports(node, ports, num_ports, msg);
4271 }
4272
4273
4274 int
getConnectionDbParameter(int node1,int node2,int param,int * value,BaseString & msg)4275 MgmtSrvr::getConnectionDbParameter(int node1, int node2,
4276 int param, int *value,
4277 BaseString& msg)
4278 {
4279 DBUG_ENTER("MgmtSrvr::getConnectionDbParameter");
4280 DBUG_PRINT("enter", ("node1: %d, node2: %d, param: %d",
4281 node1, node2, param));
4282
4283 // This function only supports asking about dynamic ports
4284 if (param != CFG_CONNECTION_SERVER_PORT)
4285 {
4286 msg.assign("Only param CFG_CONNECTION_SERVER_PORT can be retrieved");
4287 DBUG_RETURN(-1);
4288 }
4289
4290 if (!m_config_manager->get_dynamic_port(node1, node2, value, msg))
4291 DBUG_RETURN(-1);
4292
4293 DBUG_PRINT("exit", ("Return parameter(%d): %u for %d -> %d, msg: %s",
4294 param, *value, node1, node2, msg.c_str()));
4295 DBUG_RETURN(1);
4296 }
4297
4298
4299 bool
transporter_connect(NDB_SOCKET_TYPE sockfd,BaseString & msg,bool & close_with_reset)4300 MgmtSrvr::transporter_connect(NDB_SOCKET_TYPE sockfd,
4301 BaseString& msg,
4302 bool& close_with_reset)
4303 {
4304 DBUG_ENTER("MgmtSrvr::transporter_connect");
4305 TransporterRegistry* tr= theFacade->get_registry();
4306 if (!tr->connect_server(sockfd, msg, close_with_reset))
4307 DBUG_RETURN(false);
4308
4309 /**
4310 * TransporterRegistry::update_connections() is responsible
4311 * for doing the final step of bringing the connection into
4312 * CONNECTED state when it detects it 'isConnected()'.
4313 * This is required due to all such state changes has to
4314 * be synchroniced with ::performReceive().
4315 * To speed up CONNECTED detection, we request it to
4316 * happen ASAP. (There is no guarantee when it happen though)
4317 */
4318 theFacade->request_connection_check();
4319 DBUG_RETURN(true);
4320 }
4321
4322
connect_to_self()4323 bool MgmtSrvr::connect_to_self()
4324 {
4325 BaseString buf;
4326 NdbMgmHandle mgm_handle= ndb_mgm_create_handle();
4327
4328 buf.assfmt("%s:%u",
4329 m_opts.bind_address ? m_opts.bind_address : "localhost",
4330 m_port);
4331 ndb_mgm_set_connectstring(mgm_handle, buf.c_str());
4332
4333 if(ndb_mgm_connect(mgm_handle, 0, 0, 0) < 0)
4334 {
4335 g_eventLogger->warning("%d %s",
4336 ndb_mgm_get_latest_error(mgm_handle),
4337 ndb_mgm_get_latest_error_desc(mgm_handle));
4338 ndb_mgm_destroy_handle(&mgm_handle);
4339 return false;
4340 }
4341 // TransporterRegistry now owns the handle and will destroy it.
4342 theFacade->get_registry()->set_mgm_handle(mgm_handle);
4343
4344 return true;
4345 }
4346
4347
4348 bool
change_config(Config & new_config,BaseString & msg)4349 MgmtSrvr::change_config(Config& new_config, BaseString& msg)
4350 {
4351 SignalSender ss(theFacade);
4352 ss.lock();
4353
4354 SimpleSignal ssig;
4355 UtilBuffer buf;
4356 new_config.pack(buf);
4357 ssig.ptr[0].p = (Uint32*)buf.get_data();
4358 ssig.ptr[0].sz = (buf.length() + 3) / 4;
4359 ssig.header.m_noOfSections = 1;
4360
4361 ConfigChangeReq *req= CAST_PTR(ConfigChangeReq, ssig.getDataPtrSend());
4362 req->length = buf.length();
4363
4364 NodeBitmask mgm_nodes;
4365 {
4366 Guard g(m_local_config_mutex);
4367 m_local_config->get_nodemask(mgm_nodes, NDB_MGM_NODE_TYPE_MGM);
4368 }
4369
4370 NodeId nodeId= ss.find_confirmed_node(mgm_nodes);
4371 if (nodeId == 0)
4372 {
4373 msg = "INTERNAL ERROR Could not find any mgmd!";
4374 return false;
4375 }
4376
4377 if (ss.sendFragmentedSignal(nodeId, ssig,
4378 MGM_CONFIG_MAN, GSN_CONFIG_CHANGE_REQ,
4379 ConfigChangeReq::SignalLength) != 0)
4380 {
4381 msg.assfmt("Could not start configuration change, send to "
4382 "node %d failed", nodeId);
4383 return false;
4384 }
4385 mgm_nodes.clear(nodeId);
4386
4387 bool done = false;
4388 while(!done)
4389 {
4390 SimpleSignal *signal= ss.waitFor();
4391
4392 switch(signal->readSignalNumber()){
4393 case GSN_CONFIG_CHANGE_CONF:
4394 done= true;
4395 break;
4396 case GSN_CONFIG_CHANGE_REF:
4397 {
4398 const ConfigChangeRef * const ref =
4399 CAST_CONSTPTR(ConfigChangeRef, signal->getDataPtr());
4400 g_eventLogger->debug("Got CONFIG_CHANGE_REF, error: %d", ref->errorCode);
4401 switch(ref->errorCode)
4402 {
4403 case ConfigChangeRef::NotMaster:{
4404 // Retry with next node if any
4405 NodeId nodeId= ss.find_confirmed_node(mgm_nodes);
4406 if (nodeId == 0)
4407 {
4408 msg = "INTERNAL ERROR Could not find any mgmd!";
4409 return false;
4410 }
4411
4412 if (ss.sendFragmentedSignal(nodeId, ssig,
4413 MGM_CONFIG_MAN, GSN_CONFIG_CHANGE_REQ,
4414 ConfigChangeReq::SignalLength) != 0)
4415 {
4416 msg.assfmt("Could not start configuration change, send to "
4417 "node %d failed", nodeId);
4418 return false;
4419 }
4420 mgm_nodes.clear(nodeId);
4421 break;
4422 }
4423
4424 default:
4425 msg = ConfigChangeRef::errorMessage(ref->errorCode);
4426 return false;
4427 }
4428
4429 break;
4430 }
4431
4432 case GSN_API_REGCONF:
4433 case GSN_TAKE_OVERTCCONF:
4434 case GSN_CONNECT_REP:
4435 // Ignore;
4436 break;
4437
4438
4439 case GSN_NODE_FAILREP:
4440 // ignore, NF_COMPLETEREP will come
4441 break;
4442
4443 case GSN_NF_COMPLETEREP:
4444 {
4445 NodeId nodeId = refToNode(signal->header.theSendersBlockRef);
4446 msg.assign("Node %d failed during configuration change", nodeId);
4447 return false;
4448 break;
4449 }
4450
4451 default:
4452 report_unknown_signal(signal);
4453 return false;
4454
4455 }
4456 }
4457
4458 g_eventLogger->info("Config change completed");
4459
4460 return true;
4461 }
4462
4463
4464 void
print_config(const char * section_filter,NodeId nodeid_filter,const char * param_filter,NdbOut & out)4465 MgmtSrvr::print_config(const char* section_filter, NodeId nodeid_filter,
4466 const char* param_filter,
4467 NdbOut& out)
4468 {
4469 Guard g(m_local_config_mutex);
4470 m_local_config->print(section_filter, nodeid_filter,
4471 param_filter, out);
4472 }
4473
4474
4475 bool
reload_config(const char * config_filename,bool mycnf,BaseString & msg)4476 MgmtSrvr::reload_config(const char* config_filename, bool mycnf,
4477 BaseString& msg)
4478 {
4479 if (config_filename && mycnf)
4480 {
4481 msg = "ERROR: Both mycnf and config_filename is not supported";
4482 return false;
4483 }
4484
4485 if (config_filename)
4486 {
4487 if (m_opts.mycnf)
4488 {
4489 msg.assfmt("ERROR: Can't switch to use config.ini '%s' when "
4490 "node was started from my.cnf", config_filename);
4491 return false;
4492 }
4493 }
4494 else
4495 {
4496 if (mycnf)
4497 {
4498 // Reload from my.cnf
4499 if (!m_opts.mycnf)
4500 {
4501 if (m_opts.config_filename)
4502 {
4503 msg.assfmt("ERROR: Can't switch to use my.cnf when "
4504 "node was started from '%s'", m_opts.config_filename);
4505 return false;
4506 }
4507 }
4508 }
4509 else
4510 {
4511 /* No config file name supplied and not told to use mycnf */
4512 if (m_opts.config_filename)
4513 {
4514 g_eventLogger->info("No config file name supplied, using '%s'",
4515 m_opts.config_filename);
4516 config_filename = m_opts.config_filename;
4517 }
4518 else
4519 {
4520 msg = "ERROR: Neither config file name or mycnf available";
4521 return false;
4522 }
4523 }
4524 }
4525
4526 Config* new_conf_ptr;
4527 if ((new_conf_ptr= ConfigManager::load_config(config_filename,
4528 mycnf, msg)) == NULL)
4529 return false;
4530 Config new_conf(new_conf_ptr);
4531
4532 {
4533 Guard g(m_local_config_mutex);
4534
4535 /* Copy the necessary values from old to new config */
4536 if (!new_conf.setGeneration(m_local_config->getGeneration()) ||
4537 !new_conf.setName(m_local_config->getName()) ||
4538 !new_conf.setPrimaryMgmNode(m_local_config->getPrimaryMgmNode()))
4539 {
4540 msg = "Failed to initialize reloaded config";
4541 return false;
4542 }
4543 }
4544
4545 if (!change_config(new_conf, msg))
4546 return false;
4547 return true;
4548 }
4549
4550 void
show_variables(NdbOut & out)4551 MgmtSrvr::show_variables(NdbOut& out)
4552 {
4553 out << "daemon: " << yes_no(m_opts.daemon) << endl;
4554 out << "non_interactive: " << yes_no(m_opts.non_interactive) << endl;
4555 out << "interactive: " << yes_no(m_opts.interactive) << endl;
4556 out << "config_filename: " << str_null(m_opts.config_filename) << endl;
4557 out << "mycnf: " << yes_no(m_opts.mycnf) << endl;
4558 out << "bind_address: " << str_null(m_opts.bind_address) << endl;
4559 out << "no_nodeid_checks: " << yes_no(m_opts.no_nodeid_checks) << endl;
4560 out << "print_full_config: " << yes_no(m_opts.print_full_config) << endl;
4561 out << "configdir: " << str_null(m_opts.configdir) << endl;
4562 out << "config_cache: " << yes_no(m_opts.config_cache) << endl;
4563 out << "verbose: " << yes_no(m_opts.verbose) << endl;
4564 out << "reload: " << yes_no(m_opts.reload) << endl;
4565
4566 out << "nodeid: " << _ownNodeId << endl;
4567 out << "blocknumber: " << hex <<_blockNumber << endl;
4568 out << "own_reference: " << hex << _ownReference << endl;
4569 out << "port: " << m_port << endl;
4570 out << "need_restart: " << m_need_restart << endl;
4571 out << "is_stop_thread: " << _isStopThread << endl;
4572 out << "log_level_thread_sleep: " << _logLevelThreadSleep << endl;
4573 out << "master_node: " << m_master_node << endl;
4574 }
4575
4576 void
make_sync_req(SignalSender & ss,Uint32 nodeId)4577 MgmtSrvr::make_sync_req(SignalSender& ss, Uint32 nodeId)
4578 {
4579 const trp_node node = ss.getNodeInfo(nodeId);
4580 if (!ndbd_sync_req_support(node.m_info.m_version))
4581 {
4582 /* The node hasn't got SYNC_REQ support */
4583 return;
4584 }
4585
4586 /**
4587 * This subroutine is used to make a async request(error insert/dump)
4588 * "more" syncronous, i.e increasing the likelyhood that
4589 * the async request has really reached the destination
4590 * before returning to the api
4591 *
4592 * I.e it's a work-around...
4593 *
4594 */
4595 SimpleSignal ssig;
4596 SyncReq* req = CAST_PTR(SyncReq, ssig.getDataPtrSend());
4597 req->senderRef = ss.getOwnRef();
4598 req->senderData = 12;
4599 req->prio = 1; // prio b
4600 ssig.set(ss,TestOrd::TraceAPI, CMVMI, GSN_SYNC_REQ, SyncReq::SignalLength);
4601
4602 if (ss.sendSignal(nodeId, &ssig) != SEND_OK)
4603 {
4604 return;
4605 }
4606
4607 while (true)
4608 {
4609 SimpleSignal *signal = ss.waitFor();
4610
4611 int gsn = signal->readSignalNumber();
4612 switch (gsn) {
4613 case GSN_SYNC_REF:
4614 case GSN_SYNC_CONF:
4615 return;
4616
4617 case GSN_NF_COMPLETEREP:{
4618 const NFCompleteRep * const rep =
4619 CAST_CONSTPTR(NFCompleteRep, signal->getDataPtr());
4620 if (rep->failedNodeId == nodeId)
4621 return;
4622 break;
4623 }
4624
4625 case GSN_NODE_FAILREP:{
4626 const NodeFailRep * const rep =
4627 CAST_CONSTPTR(NodeFailRep, signal->getDataPtr());
4628 Uint32 len = NodeFailRep::getNodeMaskLength(signal->getLength());
4629 assert(len == NodeBitmask::Size); // only full length in ndbapi
4630 if (BitmaskImpl::safe_get(len, rep->theAllNodes,nodeId))
4631 return;
4632 break;
4633 }
4634 case GSN_API_REGCONF:
4635 case GSN_TAKE_OVERTCCONF:
4636 case GSN_CONNECT_REP:
4637 break;
4638 default:
4639 return;
4640 }
4641 }
4642 }
4643
4644
4645 bool
request_events(NdbNodeBitmask nodes,Uint32 reports_per_node,Uint32 dump_type,Vector<SimpleSignal> & events)4646 MgmtSrvr::request_events(NdbNodeBitmask nodes, Uint32 reports_per_node,
4647 Uint32 dump_type,
4648 Vector<SimpleSignal>& events)
4649 {
4650 int nodes_counter[MAX_NDB_NODES];
4651 #ifndef NDEBUG
4652 NdbNodeBitmask save = nodes;
4653 #endif
4654 SignalSender ss(theFacade);
4655 ss.lock();
4656
4657 // Send the dump command to all requested NDB nodes
4658 const bool all = nodes.isclear();
4659 for (int i = 1; i < MAX_NDB_NODES; i++)
4660 {
4661 // Check if node should be involved
4662 if (!all && !nodes.get(i))
4663 continue;
4664
4665 // Only request from confirmed DB nodes
4666 const trp_node node = ss.getNodeInfo(i);
4667 if (node.m_info.getType() != NodeInfo::DB ||
4668 !node.is_confirmed())
4669 {
4670 nodes.clear(i);
4671 continue;
4672 }
4673
4674 SimpleSignal ssig;
4675 DumpStateOrd * const dumpOrd = (DumpStateOrd*)ssig.getDataPtrSend();
4676
4677 dumpOrd->args[0] = dump_type;
4678 dumpOrd->args[1] = ss.getOwnRef(); // Return to sender
4679
4680 if (ss.sendSignal(i, ssig, CMVMI, GSN_DUMP_STATE_ORD, 2) == SEND_OK)
4681 {
4682 nodes.set(i);
4683 nodes_counter[i] = (int)reports_per_node;
4684 }
4685 }
4686
4687
4688 while (true)
4689 {
4690 // Check if all nodes are done
4691 if (nodes.isclear())
4692 break;
4693
4694 SimpleSignal *signal = ss.waitFor();
4695 switch (signal->readSignalNumber()) {
4696 case GSN_EVENT_REP:{
4697 const NodeId nodeid = refToNode(signal->header.theSendersBlockRef);
4698 const EventReport * const event =
4699 (const EventReport*)signal->getDataPtr();
4700
4701 if (!nodes.get(nodeid))
4702 {
4703 // The reporting node was not expected
4704 #ifndef NDEBUG
4705 ndbout_c("nodeid: %u", nodeid);
4706 ndbout_c("save: %s", BaseString::getPrettyText(save).c_str());
4707 #endif
4708 assert(false);
4709 return false;
4710 }
4711
4712 if (event->getEventType() == NDB_LE_SavedEvent &&
4713 signal->getDataPtr()[1] == 0)
4714 {
4715 nodes_counter[nodeid] = 1;
4716 }
4717 else
4718 {
4719 // Save signal
4720 events.push_back(SimpleSignal(*signal));
4721 }
4722
4723 // Check if node is done
4724 nodes_counter[nodeid]--;
4725 if (nodes_counter[nodeid] == 0)
4726 nodes.clear(nodeid);
4727
4728 break;
4729 }
4730
4731 case GSN_NODE_FAILREP:{
4732 const NodeFailRep * const rep =
4733 (const NodeFailRep*)signal->getDataPtr();
4734 // only care about data-nodes
4735 for (NodeId i = 1; i < MAX_NDB_NODES; i++)
4736 {
4737 if (NdbNodeBitmask::get(rep->theNodes, i))
4738 {
4739 nodes.clear(i);
4740
4741 // Remove any previous reports from this node
4742 // it should not be reported
4743 for (unsigned j = 0; j < events.size(); j++)
4744 {
4745 const SimpleSignal& ssig = events[j];
4746 const NodeId nodeid = refToNode(ssig.header.theSendersBlockRef);
4747 if (nodeid == i)
4748 {
4749 events.erase(j);
4750 j--;
4751 }
4752 }
4753 }
4754 }
4755 break;
4756 }
4757
4758 default:
4759 // Ignore all other signals
4760 break;
4761 }
4762 }
4763 ss.unlock();
4764
4765 return true;
4766 }
4767
4768 template class MutexVector<NodeId>;
4769 template class MutexVector<Ndb_mgmd_event_service::Event_listener>;
4770 template class Vector<EventSubscribeReq>;
4771 template class MutexVector<EventSubscribeReq>;
4772 template class Vector< Vector<BaseString> >;
4773 template class Vector<MgmtSrvr::PossibleNode>;
4774 template class Vector<Defragger::DefragBuffer*>;
4775