1 /*
2 Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License, version 2.0,
6 as published by the Free Software Foundation.
7
8 This program is also distributed with certain software (including
9 but not limited to OpenSSL) that is licensed under separate terms,
10 as designated in a particular file or component or in included license
11 documentation. The authors of MySQL hereby grant you an additional
12 permission to link the program and your derivative works with the
13 separately licensed software that they have included with MySQL.
14
15 This program is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License, version 2.0, for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with this program; if not, write to the Free Software
22 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
23 */
24
25 #include <ndb_global.h>
26
27 #include "MgmtSrvr.hpp"
28 #include "ndb_mgmd_error.h"
29 #include "Services.hpp"
30 #include "ConfigManager.hpp"
31 #include "Defragger.hpp"
32
33 #include <NdbOut.hpp>
34 #include <NdbApiSignal.hpp>
35 #include <kernel_types.h>
36 #include <GlobalSignalNumbers.h>
37 #include <signaldata/TestOrd.hpp>
38 #include <signaldata/TamperOrd.hpp>
39 #include <signaldata/StartOrd.hpp>
40 #include <signaldata/ApiVersion.hpp>
41 #include <signaldata/ResumeReq.hpp>
42 #include <signaldata/SetLogLevelOrd.hpp>
43 #include <signaldata/EventSubscribeReq.hpp>
44 #include <signaldata/EventReport.hpp>
45 #include <signaldata/DumpStateOrd.hpp>
46 #include <signaldata/BackupSignalData.hpp>
47 #include <signaldata/NFCompleteRep.hpp>
48 #include <signaldata/NodeFailRep.hpp>
49 #include <signaldata/AllocNodeId.hpp>
50 #include <signaldata/SchemaTrans.hpp>
51 #include <signaldata/CreateNodegroup.hpp>
52 #include <signaldata/DropNodegroup.hpp>
53 #include <signaldata/Sync.hpp>
54 #include <signaldata/GetConfig.hpp>
55 #include <NdbSleep.h>
56 #include <portlib/NdbDir.hpp>
57 #include <EventLogger.hpp>
58 #include <logger/FileLogHandler.hpp>
59 #include <logger/ConsoleLogHandler.hpp>
60 #include <logger/SysLogHandler.hpp>
61 #include <DebuggerNames.hpp>
62 #include <ndb_version.h>
63 #include <OwnProcessInfo.hpp>
64
65 #include <SocketServer.hpp>
66 #include <NdbConfig.h>
67
68 #include <NdbAutoPtr.hpp>
69 #include <NdbDir.hpp>
70 #include <ndberror.h>
71
72 #include <mgmapi.h>
73 #include <mgmapi_configuration.hpp>
74 #include <mgmapi_config_parameters.h>
75
76 #include <SignalSender.hpp>
77
78 #include <LogBuffer.hpp>
79 #include <BufferedLogHandler.hpp>
80
81 int g_errorInsert = 0;
82 #define ERROR_INSERTED(x) (g_errorInsert == x)
83
84 #define INIT_SIGNAL_SENDER(ss,nodeId) \
85 SignalSender ss(theFacade); \
86 ss.lock(); /* lock will be released on exit */ \
87 {\
88 int result = okToSendTo(nodeId, true);\
89 if (result != 0) {\
90 return result;\
91 }\
92 }
93
94 extern "C" bool opt_core;
95
96 void *
logLevelThread_C(void * m)97 MgmtSrvr::logLevelThread_C(void* m)
98 {
99 MgmtSrvr *mgm = (MgmtSrvr*)m;
100 mgm->logLevelThreadRun();
101 return 0;
102 }
103
104 extern EventLogger * g_eventLogger;
105
106 #ifdef NOT_USED
107 static NdbOut&
operator <<(NdbOut & out,const LogLevel & ll)108 operator<<(NdbOut& out, const LogLevel & ll)
109 {
110 out << "[LogLevel: ";
111 for(size_t i = 0; i<LogLevel::LOGLEVEL_CATEGORIES; i++)
112 out << ll.getLogLevel((LogLevel::EventCategory)i) << " ";
113 out << "]";
114 return out;
115 }
116 #endif
117
118 void
logLevelThreadRun()119 MgmtSrvr::logLevelThreadRun()
120 {
121 while (!_isStopThread)
122 {
123 Vector<NodeId> failed_started_nodes;
124 Vector<EventSubscribeReq> failed_log_level_requests;
125
126 /**
127 * Handle started nodes
128 */
129 m_started_nodes.lock();
130 if (m_started_nodes.size() > 0)
131 {
132 // calculate max log level
133 EventSubscribeReq req;
134 {
135 LogLevel tmp;
136 m_event_listner.lock();
137 for(int i = m_event_listner.m_clients.size() - 1; i >= 0; i--)
138 tmp.set_max(m_event_listner[i].m_logLevel);
139 m_event_listner.unlock();
140 req.assign(tmp);
141 }
142 req.blockRef = _ownReference;
143 while (m_started_nodes.size() > 0)
144 {
145 Uint32 node = m_started_nodes[0];
146 m_started_nodes.erase(0, false);
147 m_started_nodes.unlock();
148
149 if (setEventReportingLevelImpl(node, req))
150 {
151 failed_started_nodes.push_back(node);
152 }
153 else
154 {
155 SetLogLevelOrd ord;
156 ord.assign(m_nodeLogLevel[node]);
157 setNodeLogLevelImpl(node, ord);
158 }
159 m_started_nodes.lock();
160 }
161 }
162 m_started_nodes.unlock();
163
164 m_log_level_requests.lock();
165 while (m_log_level_requests.size() > 0)
166 {
167 EventSubscribeReq req = m_log_level_requests[0];
168 m_log_level_requests.erase(0, false);
169 m_log_level_requests.unlock();
170
171 if(req.blockRef == 0)
172 {
173 req.blockRef = _ownReference;
174 if (setEventReportingLevelImpl(0, req))
175 {
176 failed_log_level_requests.push_back(req);
177 }
178 }
179 else
180 {
181 SetLogLevelOrd ord;
182 ord.assign(req);
183 if (setNodeLogLevelImpl(req.blockRef, ord))
184 {
185 failed_log_level_requests.push_back(req);
186 }
187 }
188 m_log_level_requests.lock();
189 }
190 m_log_level_requests.unlock();
191
192 if(!ERROR_INSERTED(10000))
193 m_event_listner.check_listeners();
194
195 Uint32 sleeptime = _logLevelThreadSleep;
196 if (failed_started_nodes.size())
197 {
198 m_started_nodes.lock();
199 for (Uint32 i = 0; i<failed_started_nodes.size(); i++)
200 m_started_nodes.push_back(failed_started_nodes[i], false);
201 m_started_nodes.unlock();
202 failed_started_nodes.clear();
203 sleeptime = 100;
204 }
205
206 if (failed_log_level_requests.size())
207 {
208 m_log_level_requests.lock();
209 for (Uint32 i = 0; i<failed_log_level_requests.size(); i++)
210 m_log_level_requests.push_back(failed_log_level_requests[i], false);
211 m_log_level_requests.unlock();
212 failed_log_level_requests.clear();
213 sleeptime = 100;
214 }
215
216 NdbSleep_MilliSleep(sleeptime);
217 }
218 }
219
220
221 static int
translateStopRef(Uint32 errCode)222 translateStopRef(Uint32 errCode)
223 {
224 switch(errCode){
225 case StopRef::NodeShutdownInProgress:
226 return NODE_SHUTDOWN_IN_PROGESS;
227 break;
228 case StopRef::SystemShutdownInProgress:
229 return SYSTEM_SHUTDOWN_IN_PROGRESS;
230 break;
231 case StopRef::NodeShutdownWouldCauseSystemCrash:
232 return NODE_SHUTDOWN_WOULD_CAUSE_SYSTEM_CRASH;
233 break;
234 case StopRef::UnsupportedNodeShutdown:
235 return UNSUPPORTED_NODE_SHUTDOWN;
236 break;
237 }
238 return 4999;
239 }
240
241
MgmtSrvr(const MgmtOpts & opts)242 MgmtSrvr::MgmtSrvr(const MgmtOpts& opts) :
243 m_opts(opts),
244 _blockNumber(0),
245 _ownNodeId(0),
246 m_port(0),
247 m_local_config(NULL),
248 _ownReference(0),
249 m_config_manager(NULL),
250 m_need_restart(false),
251 theFacade(NULL),
252 _isStopThread(false),
253 _logLevelThreadSleep(500),
254 m_event_listner(this),
255 m_master_node(0),
256 _logLevelThread(NULL),
257 m_version_string(ndbGetOwnVersionString()),
258 m_async_cluster_logging(false)
259 {
260 DBUG_ENTER("MgmtSrvr::MgmtSrvr");
261
262 m_local_config_mutex= NdbMutex_Create();
263 m_reserved_nodes_mutex= NdbMutex_Create();
264 if (!m_local_config_mutex || !m_reserved_nodes_mutex)
265 {
266 g_eventLogger->error("Failed to create MgmtSrvr mutexes");
267 require(false);
268 }
269
270 /* Init node arrays */
271 for(Uint32 i = 0; i<MAX_NODES; i++) {
272 nodeTypes[i] = (enum ndb_mgm_node_type)-1;
273 clear_connect_address_cache(i);
274 }
275
276 /* Setup clusterlog as client[0] in m_event_listner */
277 {
278 Ndb_mgmd_event_service::Event_listener se;
279 ndb_socket_invalidate(&(se.m_socket));
280 for(size_t t = 0; t<LogLevel::LOGLEVEL_CATEGORIES; t++){
281 se.m_logLevel.setLogLevel((LogLevel::EventCategory)t, 7);
282 }
283 se.m_logLevel.setLogLevel(LogLevel::llError, 15);
284 se.m_logLevel.setLogLevel(LogLevel::llConnection, 8);
285 se.m_logLevel.setLogLevel(LogLevel::llBackup, 15);
286 m_event_listner.m_clients.push_back(se);
287 m_event_listner.m_logLevel = se.m_logLevel;
288 }
289
290 DBUG_VOID_RETURN;
291 }
292
293
294 /*
295 check_configdir
296
297 Make sure configdir exist and try to create it if not
298
299 */
300
301 const char*
check_configdir() const302 MgmtSrvr::check_configdir() const
303 {
304 if (m_opts.configdir &&
305 strcmp(m_opts.configdir, MYSQLCLUSTERDIR) != 0)
306 {
307 // Specified on commmand line
308 if (access(m_opts.configdir, F_OK))
309 {
310 g_eventLogger->error("Directory '%s' specified with --configdir " \
311 "does not exist. Either create it or pass " \
312 "the path to an already existing directory.",
313 m_opts.configdir);
314 return NULL;
315 }
316 return m_opts.configdir;
317 }
318 else
319 {
320 // Compiled in path MYSQLCLUSTERDIR
321 if (access(MYSQLCLUSTERDIR, F_OK))
322 {
323 g_eventLogger->info("The default config directory '%s' " \
324 "does not exist. Trying to create it...",
325 MYSQLCLUSTERDIR);
326
327 if (!NdbDir::create(MYSQLCLUSTERDIR) ||
328 access(MYSQLCLUSTERDIR, F_OK))
329 {
330 g_eventLogger->error("Could not create directory '%s'. " \
331 "Either create it manually or " \
332 "specify a different directory with " \
333 "--configdir=<path>",
334 MYSQLCLUSTERDIR);
335 return NULL;
336 }
337
338 g_eventLogger->info("Sucessfully created config directory");
339 }
340 return MYSQLCLUSTERDIR;
341 }
342 }
343
344
345 bool
init()346 MgmtSrvr::init()
347 {
348 DBUG_ENTER("MgmtSrvr::init");
349
350 const char* configdir;
351
352 if (!m_opts.config_cache)
353 {
354 g_eventLogger->info("Skipping check of config directory since "
355 "config cache is disabled.");
356 configdir = NULL;
357 }
358 else
359 {
360 if (!(configdir= check_configdir()))
361 DBUG_RETURN(false);
362 }
363
364 if (!(m_config_manager= new ConfigManager(m_opts, configdir)))
365 {
366 g_eventLogger->error("Failed to create ConfigManager");
367 DBUG_RETURN(false);
368 }
369
370 if (m_config_manager->add_config_change_subscriber(this) < 0)
371 {
372 g_eventLogger->error("Failed to add MgmtSrvr as config change subscriber");
373 DBUG_RETURN(false);
374 }
375
376 if (!m_config_manager->init())
377 {
378 DBUG_RETURN(false);
379 }
380
381 /* 'config_changed' should have been called from 'init' */
382 require(m_local_config != 0);
383
384 if (m_opts.print_full_config)
385 {
386 print_config();
387 DBUG_RETURN(false);
388 }
389
390 assert(_ownNodeId);
391
392 DBUG_RETURN(true);
393 }
394
395
396 bool
start_transporter(const Config * config)397 MgmtSrvr::start_transporter(const Config* config)
398 {
399 DBUG_ENTER("MgmtSrvr::start_transporter");
400
401 theFacade= new TransporterFacade(0);
402 if (theFacade == 0)
403 {
404 g_eventLogger->error("Could not create TransporterFacade.");
405 DBUG_RETURN(false);
406 }
407
408 assert(_blockNumber == 0); // Blocknumber shouldn't been allocated yet
409
410 /*
411 Register ourself at TransporterFacade to be able to receive signals
412 and to be notified when a database process has died.
413 */
414 Uint32 res;
415 if ((res = open(theFacade)) == 0)
416 {
417 g_eventLogger->error("Failed to open block in TransporterFacade");
418 theFacade->stop_instance();
419 delete theFacade;
420 theFacade = 0;
421 DBUG_RETURN(false);
422 }
423 _blockNumber = refToBlock(res);
424 assert(_blockNumber > 0);
425
426 /**
427 * Need to call ->open() prior to actually starting TF
428 */
429 m_config_manager->set_facade(theFacade);
430
431 if (theFacade->start_instance(_ownNodeId,
432 config->m_configValues) < 0)
433 {
434 g_eventLogger->error("Failed to start transporter");
435 delete theFacade;
436 theFacade = 0;
437 DBUG_RETURN(false);
438 }
439
440 _ownReference = numberToRef(_blockNumber, _ownNodeId);
441
442 /*
443 set api reg req frequency quite high:
444
445 100 ms interval to make sure we have fairly up-to-date
446 info from the nodes. This to make sure that this info
447 is not dependent on heartbeat settings in the
448 configuration
449 */
450 theFacade->ext_set_max_api_reg_req_interval(100);
451
452 DBUG_RETURN(true);
453 }
454
455
456 bool
start_mgm_service(const Config * config)457 MgmtSrvr::start_mgm_service(const Config* config)
458 {
459 DBUG_ENTER("MgmtSrvr::start_mgm_service");
460
461 assert(m_port == 0);
462 {
463 // Find the portnumber to use for mgm service
464 ConfigIter iter(config, CFG_SECTION_NODE);
465
466 if(iter.find(CFG_NODE_ID, _ownNodeId) != 0){
467 g_eventLogger->error("Could not find node %d in config", _ownNodeId);
468 DBUG_RETURN(false);
469 }
470
471 unsigned type;
472 if(iter.get(CFG_TYPE_OF_SECTION, &type) != 0 ||
473 type != NODE_TYPE_MGM){
474 g_eventLogger->error("Node %d is not defined as management server",
475 _ownNodeId);
476 DBUG_RETURN(false);
477 }
478
479 if(iter.get(CFG_MGM_PORT, &m_port) != 0){
480 g_eventLogger->error("PortNumber not defined for node %d", _ownNodeId);
481 DBUG_RETURN(false);
482 }
483 }
484
485 unsigned short port= m_port;
486 DBUG_PRINT("info", ("Using port %d", port));
487 if (port == 0)
488 {
489 g_eventLogger->error("Could not find out which port to use"\
490 " for management service");
491 DBUG_RETURN(false);
492 }
493
494 {
495 int count= 5; // no of retries for tryBind
496 while(!m_socket_server.tryBind(port, m_opts.bind_address))
497 {
498 if (--count > 0)
499 {
500 NdbSleep_SecSleep(1);
501 continue;
502 }
503 g_eventLogger->error("Unable to bind management service port: %s:%d!\n"
504 "Please check if the port is already used,\n"
505 "(perhaps a ndb_mgmd is already running),\n"
506 "and if you are executing on the correct computer",
507 (m_opts.bind_address ? m_opts.bind_address : "*"),
508 port);
509 DBUG_RETURN(false);
510 }
511 }
512
513 {
514 MgmApiService * mapi = new MgmApiService(*this);
515 if (mapi == NULL)
516 {
517 g_eventLogger->error("Could not allocate MgmApiService");
518 DBUG_RETURN(false);
519 }
520
521 if(!m_socket_server.setup(mapi, &port, m_opts.bind_address))
522 {
523 delete mapi; // Will be deleted by SocketServer in all other cases
524 g_eventLogger->error("Unable to setup management service port: %s:%d!\n"
525 "Please check if the port is already used,\n"
526 "(perhaps a ndb_mgmd is already running),\n"
527 "and if you are executing on the correct computer",
528 (m_opts.bind_address ? m_opts.bind_address : "*"),
529 port);
530 DBUG_RETURN(false);
531 }
532
533 if (port != m_port)
534 {
535 g_eventLogger->error("Couldn't start management service on the "\
536 "requested port: %d. Got port: %d instead",
537 m_port, port);
538 DBUG_RETURN(false);
539 }
540 }
541 setOwnProcessInfoPort(port);
542
543 m_socket_server.startServer();
544
545 g_eventLogger->info("Id: %d, Command port: %s:%d",
546 _ownNodeId,
547 m_opts.bind_address ? m_opts.bind_address : "*",
548 port);
549 DBUG_RETURN(true);
550 }
551
552
553 bool
start()554 MgmtSrvr::start()
555 {
556 DBUG_ENTER("MgmtSrvr::start");
557
558 /* Start transporter */
559 if(!start_transporter(m_local_config))
560 {
561 g_eventLogger->error("Failed to start transporter!");
562 DBUG_RETURN(false);
563 }
564
565 /* Start mgm service */
566 if (!start_mgm_service(m_local_config))
567 {
568 g_eventLogger->error("Failed to start mangement service!");
569 DBUG_RETURN(false);
570 }
571
572 /* Use local MGM port for TransporterRegistry */
573 if(!connect_to_self())
574 {
575 g_eventLogger->error("Failed to connect to ourself!");
576 DBUG_RETURN(false);
577 }
578
579 set_async_cluster_logging(true);
580 /* Start config manager */
581 if (!m_config_manager->start())
582 {
583 g_eventLogger->error("Failed to start ConfigManager");
584 DBUG_RETURN(false);
585 }
586
587 /* Loglevel thread */
588 assert(_isStopThread == false);
589 _logLevelThread = NdbThread_Create(logLevelThread_C,
590 (void**)this,
591 0, // default stack size
592 "MgmtSrvr_Loglevel",
593 NDB_THREAD_PRIO_LOW);
594
595 DBUG_RETURN(true);
596 }
597
598 void
set_async_cluster_logging(bool async_cluster_logging)599 MgmtSrvr::set_async_cluster_logging(bool async_cluster_logging)
600 {
601 m_async_cluster_logging = true;
602 }
603
604 void
configure_eventlogger(const BaseString & logdestination) const605 MgmtSrvr::configure_eventlogger(const BaseString& logdestination) const
606 {
607 // Close old log handlers before creating the new
608 g_eventLogger->close();
609
610 Vector<BaseString> logdestinations;
611 logdestination.split(logdestinations, ";");
612
613 for(unsigned i = 0; i < logdestinations.size(); i++)
614 {
615 // Extract type(everything left of colon)
616 Vector<BaseString> v_type_params;
617 logdestinations[i].split(v_type_params, ":", 2);
618 BaseString type(v_type_params[0]);
619
620 // Extract params(everything right of colon)
621 BaseString params;
622 if(v_type_params.size() >= 2)
623 params = v_type_params[1];
624
625 LogHandler *handler = NULL;
626 if(type == "FILE")
627 {
628 char *default_file_name= NdbConfig_ClusterLogFileName(_ownNodeId);
629 FileLogHandler* file_handler = new FileLogHandler(default_file_name);
630 free(default_file_name);
631
632 if(m_async_cluster_logging)
633 {
634 /**
635 * Log to a buffered log handler, and pass the file log handler
636 * as the destination log handler.
637 */
638 file_handler->parseParams(params);
639 if (!file_handler->is_open() &&
640 !file_handler->open())
641 {
642 ndbout_c("INTERNAL ERROR: Could not create log handler for: '%s'",
643 logdestinations[i].c_str());
644 continue;
645 }
646
647 handler = new BufferedLogHandler(file_handler);
648 }
649 else
650 {
651 handler = file_handler;
652 }
653 }
654 else if(type == "CONSOLE")
655 {
656 handler = new ConsoleLogHandler();
657 }
658 #ifndef _WIN32
659 else if(type == "SYSLOG")
660 {
661 handler = new SysLogHandler();
662 }
663 #endif
664 if(handler == NULL)
665 {
666 ndbout_c("INTERNAL ERROR: Could not create log handler for: '%s'",
667 logdestinations[i].c_str());
668 continue;
669 }
670
671 if(!handler->parseParams(params))
672 {
673 ndbout_c("Failed to parse parameters for log handler: '%s', error: %d '%s'",
674 logdestinations[i].c_str(), handler->getErrorCode(), handler->getErrorStr());
675 delete handler;
676 continue;
677 }
678
679 if (!g_eventLogger->addHandler(handler))
680 {
681 ndbout_c("INTERNAL ERROR: Could not add %s log handler", handler->handler_type());
682 g_eventLogger->error("INTERNAL ERROR: Could not add %s log handler",
683 handler->handler_type());
684 delete handler;
685 continue;
686 }
687 }
688 }
689
690
691 void
setClusterLog(const Config * config)692 MgmtSrvr::setClusterLog(const Config* config)
693 {
694 DBUG_ASSERT(_ownNodeId);
695
696 ConfigIter iter(config, CFG_SECTION_NODE);
697 require(iter.find(CFG_NODE_ID, _ownNodeId) == 0);
698
699 // Update DataDir from config
700 const char *datadir;
701 require(iter.get(CFG_NODE_DATADIR, &datadir) == 0);
702 NdbConfig_SetPath(datadir);
703
704 if (NdbDir::chdir(NdbConfig_get_path(NULL)) != 0)
705 {
706 g_eventLogger->warning("Cannot change directory to '%s', error: %d",
707 NdbConfig_get_path(NULL), errno);
708 // Ignore error
709 }
710
711 // Get log destination from config
712 BaseString logdest;
713 const char *value;
714 if(iter.get(CFG_LOG_DESTINATION, &value) == 0){
715 logdest.assign(value);
716 }
717
718 bool logdest_configured = true;
719 if(logdest.length() == 0 || logdest == "") {
720 // No LogDestination set, use default settings
721 char *clusterLog= NdbConfig_ClusterLogFileName(_ownNodeId);
722 logdest.assfmt("FILE:filename=%s,maxsize=1000000,maxfiles=6",
723 clusterLog);
724 free(clusterLog);
725 logdest_configured = false;
726 }
727
728 configure_eventlogger(logdest);
729
730 if (logdest_configured == false &&
731 m_opts.non_interactive)
732 {
733 g_eventLogger->createConsoleHandler();
734 }
735
736 #ifdef _WIN32
737 /* Output to Windows event log */
738 g_eventLogger->createEventLogHandler("MySQL Cluster Management Server");
739 #endif
740
741 if (m_opts.verbose)
742 g_eventLogger->enable(Logger::LL_DEBUG);
743 }
744
745
746 void
config_changed(NodeId node_id,const Config * new_config)747 MgmtSrvr::config_changed(NodeId node_id, const Config* new_config)
748 {
749 DBUG_ENTER("MgmtSrvr::config_changed");
750
751 Guard g(m_local_config_mutex);
752
753 // Don't allow nodeid to change, once it's been set
754 require(_ownNodeId == 0 || _ownNodeId == node_id);
755
756 _ownNodeId= node_id;
757
758 if (m_local_config)
759 delete m_local_config;
760
761 m_local_config= new Config(new_config); // Copy
762 require(m_local_config != 0);
763
764 /* Rebuild node arrays */
765 ConfigIter iter(m_local_config, CFG_SECTION_NODE);
766 for(Uint32 i = 0; i<MAX_NODES; i++) {
767
768 clear_connect_address_cache(i);
769
770 if (iter.first())
771 continue;
772
773 if (iter.find(CFG_NODE_ID, i) == 0){
774 unsigned type;
775 require(iter.get(CFG_TYPE_OF_SECTION, &type) == 0);
776
777 switch(type){
778 case NODE_TYPE_DB:
779 nodeTypes[i] = NDB_MGM_NODE_TYPE_NDB;
780 break;
781 case NODE_TYPE_API:
782 nodeTypes[i] = NDB_MGM_NODE_TYPE_API;
783 break;
784 case NODE_TYPE_MGM:
785 nodeTypes[i] = NDB_MGM_NODE_TYPE_MGM;
786 break;
787 default:
788 break;
789 }
790 }
791 else
792 {
793 nodeTypes[i] = (enum ndb_mgm_node_type)-1;
794 }
795
796 }
797
798 // Setup cluster log
799 setClusterLog(m_local_config);
800
801 if (theFacade)
802 {
803 if (!theFacade->configure(_ownNodeId,
804 m_local_config->m_configValues))
805 {
806 g_eventLogger->warning("Could not reconfigure everything online, "
807 "this node need a restart");
808 m_need_restart= true;
809 }
810 }
811
812 DBUG_VOID_RETURN;
813 }
814
815
816 bool
get_packed_config(ndb_mgm_node_type node_type,BaseString & buf64,BaseString & error,bool v2,Uint32 node_id)817 MgmtSrvr::get_packed_config(ndb_mgm_node_type node_type,
818 BaseString& buf64,
819 BaseString& error,
820 bool v2,
821 Uint32 node_id)
822 {
823 return m_config_manager->get_packed_config(node_type,
824 &buf64,
825 error,
826 v2,
827 node_id);
828 }
829
830 bool
get_packed_config_from_node(NodeId nodeId,BaseString & buf64,BaseString & error,bool v2_requester)831 MgmtSrvr::get_packed_config_from_node(NodeId nodeId,
832 BaseString& buf64,
833 BaseString& error,
834 bool v2_requester)
835 {
836 DBUG_ENTER("get_packed_config_from_node");
837
838 if (nodeId >= MAX_NODES_ID)
839 {
840 error.assfmt("Nodeid %d is greater than max nodeid %d. ",
841 nodeId, MAX_NODES_ID);
842 DBUG_RETURN(false);
843 }
844
845 if (getNodeType(nodeId) == NDB_MGM_NODE_TYPE_UNKNOWN)
846 {
847 error.assfmt("Nodeid %d does not exist. ", nodeId);
848 DBUG_RETURN(false);
849 }
850
851 if (getNodeType(nodeId) != NDB_MGM_NODE_TYPE_NDB)
852 {
853 error.assfmt("Node %d is not a data node. ", nodeId);
854 DBUG_RETURN(false);
855 }
856
857 trp_node node = getNodeInfo(nodeId);
858
859 if (!node.m_alive)
860 {
861 error.assfmt("Data node %d is not alive. ", nodeId);
862 DBUG_RETURN(false);
863 }
864
865 const Uint32 version = node.m_info.m_version;
866 bool v2_data_node = ndb_config_version_v2(version);
867 INIT_SIGNAL_SENDER(ss,nodeId);
868
869 SimpleSignal ssig;
870 GetConfigReq* req = CAST_PTR(GetConfigReq, ssig.getDataPtrSend());
871 req->senderRef = ss.getOwnRef();
872 req->nodeId = nodeId;
873
874 g_eventLogger->debug("Sending GET_CONFIG_REQ to %d", nodeId);
875
876 ssig.set(ss, TestOrd::TraceAPI, CMVMI, GSN_GET_CONFIG_REQ,
877 GetConfigReq::SignalLength);
878 if ((ss.sendSignal(nodeId, &ssig)) != SEND_OK)
879 {
880 DBUG_RETURN(false);
881 }
882
883 Defragger defragger;
884 while (true)
885 {
886 SimpleSignal *signal = ss.waitFor();
887 int gsn = signal->readSignalNumber();
888
889 switch (gsn)
890 {
891 case GSN_GET_CONFIG_CONF:
892 {
893 if (refToNode(signal->header.theSendersBlockRef) != nodeId)
894 {
895 error.assfmt("Internal Error: Reply from wrong node %d, expected from %d. ",
896 refToNode(signal->header.theSendersBlockRef),
897 nodeId);
898 DBUG_RETURN(false);
899 }
900
901 const GetConfigConf * const conf =
902 CAST_CONSTPTR(GetConfigConf, signal->getDataPtr());
903
904 if (signal->header.m_noOfSections != 1)
905 {
906 error.assfmt("Internal Error: Wrong number of sections %d received, expected %d. ",
907 signal->header.m_noOfSections, 1);
908 DBUG_RETURN(false);
909 }
910
911 if (defragger.defragment(signal))
912 {
913 ConfigValuesFactory cf;
914 if (v2_data_node)
915 require(cf.unpack_v2(signal->ptr[0].p, conf->configLength));
916 else
917 require(cf.unpack_v1(signal->ptr[0].p, conf->configLength));
918
919 Config received_config(cf.getConfigValues());
920 bool ret;
921 if (v2_requester)
922 ret = received_config.pack64_v2(buf64);
923 else
924 ret = received_config.pack64_v1(buf64);
925 if (!ret)
926 {
927 error.assign("Failed to pack64");
928 DBUG_RETURN(false);
929 }
930 DBUG_RETURN(true);
931 }
932 // wait until all fragments are received
933 continue;
934 }
935
936 case GSN_GET_CONFIG_REF:
937 {
938 if (refToNode(ssig.header.theSendersBlockRef) != nodeId)
939 {
940 error.assfmt("Internal Error: Reply from wrong node %d, expected from %d. ",
941 refToNode(signal->header.theSendersBlockRef),
942 nodeId);
943 DBUG_RETURN(false);
944 }
945 const GetConfigRef * const ref =
946 CAST_CONSTPTR(GetConfigRef, signal->getDataPtr());
947 error.assfmt("Error in retrieving config from node %d: Internal error: %d",
948 nodeId, ref->error);
949
950 DBUG_RETURN(false);
951 }
952
953 case GSN_NF_COMPLETEREP:
954 {
955 const NFCompleteRep * rep = CAST_CONSTPTR(NFCompleteRep,
956 signal->getDataPtr());
957 if (rep->failedNodeId == nodeId)
958 {
959 error.assfmt("Node %d is not available", nodeId);
960 DBUG_RETURN(false);
961 }
962 continue;
963 }
964
965 case GSN_NODE_FAILREP:
966 {
967 // Wait until GSN_NODE_COMPLETEREP is received.
968 continue;
969 }
970
971 case GSN_API_REGCONF:
972 case GSN_TAKE_OVERTCCONF:
973 case GSN_CONNECT_REP:
974 // Ignore
975 continue;
976
977 default:
978 report_unknown_signal(signal);
979 DBUG_RETURN(false);
980 }
981 }
982 // Should never come here
983 require(false);
984 DBUG_RETURN(false);
985 }
986
~MgmtSrvr()987 MgmtSrvr::~MgmtSrvr()
988 {
989 /* Stop log level thread */
990 void* res = 0;
991 _isStopThread = true;
992
993 if (_logLevelThread != NULL) {
994 NdbThread_WaitFor(_logLevelThread, &res);
995 NdbThread_Destroy(&_logLevelThread);
996 }
997
998 /* Stop mgm service, don't allow new connections */
999 m_socket_server.stopServer();
1000
1001 /* Stop all active session */
1002 if (!m_socket_server.stopSessions(true,
1003 2 * MgmApiSession::SOCKET_TIMEOUT))
1004 {
1005 g_eventLogger->error("Failed to wait for all sessions to stop, "
1006 "continuing with shutdown anyway.");
1007 }
1008
1009 /* Stop config manager */
1010 if (m_config_manager != 0)
1011 {
1012 m_config_manager->stop();
1013 delete m_config_manager;
1014 m_config_manager= 0;
1015 }
1016
1017 this->close(); // close trp_client before stopping TransporterFacade
1018
1019 // Stop transporter
1020 if(theFacade != 0){
1021 theFacade->stop_instance();
1022 delete theFacade;
1023 theFacade = 0;
1024 }
1025
1026 delete m_local_config;
1027
1028 NdbMutex_Destroy(m_local_config_mutex);
1029 NdbMutex_Destroy(m_reserved_nodes_mutex);
1030 }
1031
1032
1033 //****************************************************************************
1034 //****************************************************************************
1035
okToSendTo(NodeId nodeId,bool unCond)1036 int MgmtSrvr::okToSendTo(NodeId nodeId, bool unCond)
1037 {
1038 if(nodeId == 0 || getNodeType(nodeId) != NDB_MGM_NODE_TYPE_NDB)
1039 return WRONG_PROCESS_TYPE;
1040 // Check if we have contact with it
1041 if(unCond){
1042 if (getNodeInfo(nodeId).is_confirmed())
1043 return 0;
1044 }
1045 else if (getNodeInfo(nodeId).m_alive == true)
1046 return 0;
1047 return NO_CONTACT_WITH_PROCESS;
1048 }
1049
1050 void
report_unknown_signal(SimpleSignal * signal)1051 MgmtSrvr::report_unknown_signal(SimpleSignal *signal)
1052 {
1053 signal->print();
1054 g_eventLogger->error("Unknown signal received. SignalNumber: "
1055 "%i from (%d, 0x%x)",
1056 signal->readSignalNumber(),
1057 refToNode(signal->header.theSendersBlockRef),
1058 refToBlock(signal->header.theSendersBlockRef));
1059 assert(false);
1060 }
1061
1062 /*****************************************************************************
1063 * Starting and stopping database nodes
1064 ****************************************************************************/
1065
1066 int
sendSTART_ORD(int nodeId)1067 MgmtSrvr::sendSTART_ORD(int nodeId)
1068 {
1069 INIT_SIGNAL_SENDER(ss,nodeId);
1070
1071 SimpleSignal ssig;
1072 StartOrd* const startOrd = CAST_PTR(StartOrd, ssig.getDataPtrSend());
1073 ssig.set(ss,TestOrd::TraceAPI, CMVMI, GSN_START_ORD, StartOrd::SignalLength);
1074 startOrd->restartInfo = 0;
1075
1076 return ss.sendSignal(nodeId, &ssig) == SEND_OK ? 0 : SEND_OR_RECEIVE_FAILED;
1077 }
1078
1079 /*****************************************************************************
1080 * Version handling
1081 *****************************************************************************/
1082
1083 void
status_api(int nodeId,ndb_mgm_node_status & node_status,Uint32 & version,Uint32 & mysql_version,const char ** address,char * addr_buf,size_t addr_buf_size,bool & is_single_user)1084 MgmtSrvr::status_api(int nodeId,
1085 ndb_mgm_node_status& node_status,
1086 Uint32& version, Uint32& mysql_version,
1087 const char **address,
1088 char *addr_buf,
1089 size_t addr_buf_size,
1090 bool& is_single_user)
1091 {
1092 assert(getNodeType(nodeId) == NDB_MGM_NODE_TYPE_API);
1093 assert(version == 0 && mysql_version == 0);
1094
1095 if (sendVersionReq(nodeId,
1096 version,
1097 mysql_version,
1098 address,
1099 addr_buf,
1100 addr_buf_size,
1101 is_single_user) != 0)
1102 {
1103 // Couldn't get version from any NDB node.
1104 assert(version == 0);
1105 node_status = NDB_MGM_NODE_STATUS_UNKNOWN;
1106 return;
1107 }
1108
1109 if (version)
1110 {
1111 assert(mysql_version);
1112 node_status = NDB_MGM_NODE_STATUS_CONNECTED;
1113 }
1114 else
1115 {
1116 assert(mysql_version == 0);
1117 node_status = NDB_MGM_NODE_STATUS_NO_CONTACT;
1118 }
1119 return;
1120 }
1121
1122
1123 int
sendVersionReq(int v_nodeId,Uint32 & version,Uint32 & mysql_version,const char ** address,char * addr_buf,size_t addr_buf_size,bool & is_single_user)1124 MgmtSrvr::sendVersionReq(int v_nodeId,
1125 Uint32 &version,
1126 Uint32& mysql_version,
1127 const char **address,
1128 char *addr_buf,
1129 size_t addr_buf_size,
1130 bool& is_single_user)
1131 {
1132 SignalSender ss(theFacade);
1133 ss.lock();
1134
1135 SimpleSignal ssig;
1136 ApiVersionReq* req = CAST_PTR(ApiVersionReq, ssig.getDataPtrSend());
1137 req->senderRef = ss.getOwnRef();
1138 req->nodeId = v_nodeId;
1139 ssig.set(ss, TestOrd::TraceAPI, QMGR,
1140 GSN_API_VERSION_REQ, ApiVersionReq::SignalLength);
1141
1142 NodeId nodeId = 0;
1143 bool do_send = true;
1144 while(true)
1145 {
1146 if (do_send)
1147 {
1148 nodeId = ss.get_an_alive_node();
1149 if (nodeId == 0)
1150 {
1151 return NO_CONTACT_WITH_DB_NODES;
1152 }
1153
1154 if (ss.sendSignal(nodeId, &ssig) != SEND_OK)
1155 {
1156 return SEND_OR_RECEIVE_FAILED;
1157 }
1158
1159 do_send = false;
1160 }
1161
1162 SimpleSignal *signal = ss.waitFor();
1163
1164 switch (signal->readSignalNumber()) {
1165 case GSN_API_VERSION_CONF: {
1166 const ApiVersionConf * const conf =
1167 CAST_CONSTPTR(ApiVersionConf, signal->getDataPtr());
1168
1169 assert((int) conf->nodeId == v_nodeId);
1170
1171 version = conf->version;
1172 mysql_version = conf->mysql_version;
1173 struct in_addr in;
1174 in.s_addr= conf->m_inet_addr;
1175 *address= Ndb_inet_ntop(AF_INET,
1176 static_cast<void*>(&in),
1177 addr_buf,
1178 addr_buf_size);
1179 is_single_user = false;
1180 if (signal->getLength() > ApiVersionConf::SignalLengthWithoutSingleUser) {
1181 // New nodes will return info about single user
1182 is_single_user = conf->isSingleUser;
1183 }
1184 return 0;
1185 }
1186
1187 case GSN_NF_COMPLETEREP:{
1188 const NFCompleteRep * const rep =
1189 CAST_CONSTPTR(NFCompleteRep, signal->getDataPtr());
1190 if (rep->failedNodeId == nodeId)
1191 do_send = true; // retry with other node
1192 continue;
1193 }
1194
1195 case GSN_NODE_FAILREP:{
1196 const NodeFailRep * const rep =
1197 CAST_CONSTPTR(NodeFailRep, signal->getDataPtr());
1198 Uint32 len = NodeFailRep::getNodeMaskLength(signal->getLength());
1199 assert(len == NodeBitmask::Size ||
1200 len == 0); // only full length in ndbapi
1201 if (signal->header.m_noOfSections >= 1)
1202 {
1203 len = signal->ptr[0].sz;
1204 if (BitmaskImpl::safe_get(len, signal->ptr[0].p, nodeId))
1205 {
1206 do_send = true;
1207 }
1208 }
1209 else
1210 {
1211 assert(len > 0);
1212 if (BitmaskImpl::safe_get(len, rep->theAllNodes, nodeId))
1213 {
1214 do_send = true; // retry with other node
1215 }
1216 }
1217 continue;
1218 }
1219 case GSN_API_REGCONF:
1220 case GSN_TAKE_OVERTCCONF:
1221 case GSN_CONNECT_REP:
1222 // Ignore
1223 continue;
1224 default:
1225 report_unknown_signal(signal);
1226 return SEND_OR_RECEIVE_FAILED;
1227 }
1228 }
1229
1230 // Should never come here
1231 require(false);
1232 return -1;
1233 }
1234
1235
sendStopMgmd(NodeId nodeId,bool abort,bool stop,bool restart,bool nostart,bool initialStart)1236 int MgmtSrvr::sendStopMgmd(NodeId nodeId,
1237 bool abort,
1238 bool stop,
1239 bool restart,
1240 bool nostart,
1241 bool initialStart)
1242 {
1243 const char* hostname;
1244 Uint32 port;
1245 BaseString connect_string;
1246
1247 {
1248 Guard g(m_local_config_mutex);
1249 {
1250 ConfigIter iter(m_local_config, CFG_SECTION_NODE);
1251
1252 if(iter.first()) return SEND_OR_RECEIVE_FAILED;
1253 if(iter.find(CFG_NODE_ID, nodeId)) return SEND_OR_RECEIVE_FAILED;
1254 if(iter.get(CFG_NODE_HOST, &hostname)) return SEND_OR_RECEIVE_FAILED;
1255 }
1256 {
1257 ConfigIter iter(m_local_config, CFG_SECTION_NODE);
1258
1259 if(iter.first()) return SEND_OR_RECEIVE_FAILED;
1260 if(iter.find(CFG_NODE_ID, nodeId)) return SEND_OR_RECEIVE_FAILED;
1261 if(iter.get(CFG_MGM_PORT, &port)) return SEND_OR_RECEIVE_FAILED;
1262 }
1263 if( strlen(hostname) == 0 )
1264 return SEND_OR_RECEIVE_FAILED;
1265
1266 }
1267 connect_string.assfmt("%s:%u",hostname,port);
1268
1269 DBUG_PRINT("info",("connect string: %s",connect_string.c_str()));
1270
1271 NdbMgmHandle h= ndb_mgm_create_handle();
1272 if ( h && connect_string.length() > 0 )
1273 {
1274 ndb_mgm_set_connectstring(h,connect_string.c_str());
1275 if(ndb_mgm_connect(h,1,0,0))
1276 {
1277 DBUG_PRINT("info",("failed ndb_mgm_connect"));
1278 ndb_mgm_destroy_handle(&h);
1279 return SEND_OR_RECEIVE_FAILED;
1280 }
1281 if(!restart)
1282 {
1283 int nodes[1];
1284 nodes[0]= (int)nodeId;
1285 if(ndb_mgm_stop(h, 1, nodes) < 0)
1286 {
1287 ndb_mgm_destroy_handle(&h);
1288 return SEND_OR_RECEIVE_FAILED;
1289 }
1290 }
1291 else
1292 {
1293 int nodes[1];
1294 nodes[0]= (int)nodeId;
1295 if(ndb_mgm_restart2(h, 1, nodes, initialStart, nostart, abort) < 0)
1296 {
1297 ndb_mgm_destroy_handle(&h);
1298 return SEND_OR_RECEIVE_FAILED;
1299 }
1300 }
1301 }
1302 ndb_mgm_destroy_handle(&h);
1303
1304 return 0;
1305 }
1306
1307 /**
1308 * send STOP_REQ to all DB-nodes
1309 * and wait for them to stop or refuse
1310 *
1311 */
1312 int
sendall_STOP_REQ(NodeBitmask & stoppedNodes,bool abort,bool stop,bool restart,bool nostart,bool initialStart)1313 MgmtSrvr::sendall_STOP_REQ(NodeBitmask &stoppedNodes,
1314 bool abort,
1315 bool stop,
1316 bool restart,
1317 bool nostart,
1318 bool initialStart)
1319 {
1320 int error = 0;
1321 DBUG_ENTER("MgmtSrvr::sendall_STOP_REQ");
1322 DBUG_PRINT("enter", ("abort: %d stop: %d restart: %d "
1323 "nostart: %d initialStart: %d",
1324 abort, stop, restart, nostart, initialStart));
1325
1326 if (ERROR_INSERTED(10006))
1327 {
1328 /*
1329 * This error insert is for Bug #11757421. Error
1330 * 10006 is used to skip the STOP_REQ call sent by
1331 * the restart command thus ensuring that the nodes
1332 * do not start the shut down process.
1333 */
1334 DBUG_RETURN(error);
1335 }
1336
1337 stoppedNodes.clear();
1338
1339 SignalSender ss(theFacade);
1340 ss.lock(); // lock will be released on exit
1341
1342 SimpleSignal ssig;
1343 StopReq* const stopReq = CAST_PTR(StopReq, ssig.getDataPtrSend());
1344 ssig.set(ss, TestOrd::TraceAPI, NDBCNTR, GSN_STOP_REQ, StopReq::SignalLength);
1345
1346 stopReq->requestInfo = 0;
1347 stopReq->apiTimeout = 5000;
1348 stopReq->transactionTimeout = 1000;
1349 stopReq->readOperationTimeout = 1000;
1350 stopReq->operationTimeout = 1000;
1351 stopReq->senderData = 12;
1352 stopReq->senderRef = ss.getOwnRef();
1353 stopReq->singleuser = 0;
1354 StopReq::setSystemStop(stopReq->requestInfo, stop);
1355 StopReq::setPerformRestart(stopReq->requestInfo, restart);
1356 StopReq::setStopAbort(stopReq->requestInfo, abort);
1357 StopReq::setNoStart(stopReq->requestInfo, nostart);
1358 StopReq::setInitialStart(stopReq->requestInfo, initialStart);
1359
1360 if (ERROR_INSERTED(10007))
1361 {
1362 /*
1363 * This error insert is for Bug #11757421. Error
1364 * 10007 is used to hard code a value of false to
1365 * the nostart flag in the signal. This ensures
1366 * that the nodes do not reach NOT_STARTED state.
1367 */
1368 StopReq::setNoStart(stopReq->requestInfo, false);
1369 }
1370
1371 // send the signals
1372 int failed = 0;
1373 NodeBitmask nodes;
1374 {
1375 NodeId nodeId = 0;
1376 while(getNextNodeId(&nodeId, NDB_MGM_NODE_TYPE_NDB))
1377 {
1378 if (okToSendTo(nodeId, true) == 0)
1379 {
1380 SendStatus result = ss.sendSignal(nodeId, &ssig);
1381 if (result == SEND_OK)
1382 nodes.set(nodeId);
1383 else
1384 failed++;
1385 }
1386 }
1387 }
1388
1389 if (nodes.isclear() && failed > 0)
1390 {
1391 DBUG_RETURN(SEND_OR_RECEIVE_FAILED);
1392 }
1393
1394 // now wait for the replies
1395 while (!nodes.isclear())
1396 {
1397 SimpleSignal *signal = ss.waitFor();
1398 int gsn = signal->readSignalNumber();
1399 switch (gsn) {
1400 case GSN_STOP_REF:
1401 {
1402 const StopRef * const ref = CAST_CONSTPTR(StopRef, signal->getDataPtr());
1403 const NodeId nodeId = refToNode(signal->header.theSendersBlockRef);
1404 #ifdef VM_TRACE
1405 ndbout_c("Node %d refused stop", nodeId);
1406 #endif
1407 assert(nodes.get(nodeId));
1408 nodes.clear(nodeId);
1409 error = translateStopRef(ref->errorCode);
1410 break;
1411 }
1412 case GSN_STOP_CONF:
1413 {
1414 const NodeId nodeId = refToNode(signal->header.theSendersBlockRef);
1415 assert(nodes.get(nodeId));
1416 nodes.clear(nodeId);
1417 break;
1418 }
1419 case GSN_NF_COMPLETEREP:
1420 {
1421 const NFCompleteRep * rep = CAST_CONSTPTR(NFCompleteRep,
1422 signal->getDataPtr());
1423 if (rep->failedNodeId <= nodes.max_size())
1424 nodes.clear(rep->failedNodeId); // clear the failed node
1425
1426 if (rep->failedNodeId <= stoppedNodes.max_size())
1427 stoppedNodes.set(rep->failedNodeId);
1428 break;
1429 }
1430 case GSN_NODE_FAILREP:
1431 {
1432 const NodeFailRep * rep = CAST_CONSTPTR(NodeFailRep,
1433 signal->getDataPtr());
1434 Uint32 len = NodeFailRep::getNodeMaskLength(signal->getLength());
1435 assert(len == NodeBitmask::Size || // only full length in ndbapi
1436 len == 0);
1437 NodeBitmask mask;
1438 if (signal->header.m_noOfSections >= 1)
1439 {
1440 mask.assign(signal->ptr[0].sz, signal->ptr[0].p);
1441 }
1442 else
1443 {
1444 mask.assign(len, rep->theAllNodes);
1445 }
1446 nodes.bitANDC(mask);
1447 stoppedNodes.bitOR(mask);
1448 break;
1449 }
1450 case GSN_API_REGCONF:
1451 case GSN_TAKE_OVERTCCONF:
1452 case GSN_CONNECT_REP:
1453 continue;
1454 default:
1455 report_unknown_signal(signal);
1456 DBUG_RETURN(SEND_OR_RECEIVE_FAILED);
1457 }
1458 }
1459
1460 DBUG_RETURN(error);
1461 }
1462
1463 int
guess_master_node(SignalSender & ss)1464 MgmtSrvr::guess_master_node(SignalSender& ss)
1465 {
1466 /**
1467 * First check if m_master_node is started
1468 */
1469 NodeId guess = m_master_node;
1470 if (guess != 0)
1471 {
1472 trp_node node = ss.getNodeInfo(guess);
1473 if (node.m_state.startLevel == NodeState::SL_STARTED)
1474 return guess;
1475 }
1476
1477 /**
1478 * Check for any started node
1479 */
1480 guess = 0;
1481 while(getNextNodeId(&guess, NDB_MGM_NODE_TYPE_NDB))
1482 {
1483 trp_node node = ss.getNodeInfo(guess);
1484 if (node.m_state.startLevel == NodeState::SL_STARTED)
1485 {
1486 return guess;
1487 }
1488 }
1489
1490 /**
1491 * Check any confirmed node
1492 */
1493 guess = 0;
1494 while(getNextNodeId(&guess, NDB_MGM_NODE_TYPE_NDB))
1495 {
1496 trp_node node = ss.getNodeInfo(guess);
1497 if (node.is_confirmed())
1498 {
1499 return guess;
1500 }
1501 }
1502
1503 /**
1504 * Check any connected node
1505 */
1506 guess = 0;
1507 while(getNextNodeId(&guess, NDB_MGM_NODE_TYPE_NDB))
1508 {
1509 trp_node node = ss.getNodeInfo(guess);
1510 if (node.is_connected())
1511 {
1512 return guess;
1513 }
1514 }
1515
1516 return 0; // give up
1517 }
1518
1519 /*
1520 * Common method for handeling all STOP_REQ signalling that
1521 * is used by Stopping, Restarting and Single user commands
1522 *
1523 * In the event that we need to stop a mgmd, we create a mgm
1524 * client connection to that mgmd and stop it that way.
1525 * This allows us to stop mgm servers when there isn't any real
1526 * distributed communication up.
1527 *
1528 * node_ids.size()==0 means to stop all DB nodes.
1529 * MGM nodes will *NOT* be stopped.
1530 *
1531 * If we work out we should be stopping or restarting ourselves,
1532 * we return <0 in stopSelf for restart, >0 for stop
1533 * and 0 for do nothing.
1534 */
1535
sendSTOP_REQ(const Vector<NodeId> & node_ids,NodeBitmask & stoppedNodes,bool abort,bool stop,bool restart,bool nostart,bool initialStart,int * stopSelf)1536 int MgmtSrvr::sendSTOP_REQ(const Vector<NodeId> &node_ids,
1537 NodeBitmask &stoppedNodes,
1538 bool abort,
1539 bool stop,
1540 bool restart,
1541 bool nostart,
1542 bool initialStart,
1543 int* stopSelf)
1544 {
1545 int error = 0;
1546 DBUG_ENTER("MgmtSrvr::sendSTOP_REQ");
1547 DBUG_PRINT("enter", ("no of nodes: %d "
1548 "abort: %d stop: %d restart: %d "
1549 "nostart: %d initialStart: %d",
1550 node_ids.size(),
1551 abort, stop, restart, nostart, initialStart));
1552
1553 if (ERROR_INSERTED(10006))
1554 {
1555 /*
1556 * This error insert is for Bug #11757421. Error
1557 * 10006 is used to skip the STOP_REQ call sent by
1558 * the restart command thus ensuring that the node
1559 * does not start the shut down process.
1560 */
1561 DBUG_RETURN(error);
1562 }
1563
1564 stoppedNodes.clear();
1565 *stopSelf= 0;
1566
1567 NodeBitmask ndb_nodes_to_stop;
1568 NodeBitmask mgm_nodes_to_stop;
1569
1570 SignalSender ss(theFacade);
1571 ss.lock(); // lock will be released on exit
1572
1573 /**
1574 * First verify arguments
1575 */
1576 for (unsigned i = 0; i < node_ids.size(); i++)
1577 {
1578 switch(getNodeType(node_ids[i])){
1579 case NDB_MGM_NODE_TYPE_MGM:
1580 mgm_nodes_to_stop.set(node_ids[i]);
1581 break;
1582 case NDB_MGM_NODE_TYPE_NDB:
1583 ndb_nodes_to_stop.set(node_ids[i]);
1584 break;
1585 default:
1586 DBUG_RETURN(WRONG_PROCESS_TYPE);
1587 }
1588 }
1589
1590 /**
1591 * Process ndb_mgmd
1592 */
1593 for (Uint32 i = mgm_nodes_to_stop.find(0);
1594 i != mgm_nodes_to_stop.NotFound;
1595 i = mgm_nodes_to_stop.find(i + 1))
1596 {
1597 if (i != getOwnNodeId())
1598 {
1599 error= sendStopMgmd(i, abort, stop, restart,
1600 nostart, initialStart);
1601 if (error == 0)
1602 {
1603 stoppedNodes.set(i);
1604 }
1605 }
1606 else
1607 {
1608 g_eventLogger->info("Stopping this node");
1609 * stopSelf = (restart)? -1 : 1;
1610 stoppedNodes.set(i);
1611 }
1612 }
1613
1614 /**
1615 * Process ndbd
1616 */
1617 SimpleSignal ssig;
1618 StopReq* const stopReq = CAST_PTR(StopReq, ssig.getDataPtrSend());
1619 ssig.set(ss, TestOrd::TraceAPI, NDBCNTR, GSN_STOP_REQ, StopReq::SignalLength_v1);
1620
1621 stopReq->requestInfo = 0;
1622 stopReq->apiTimeout = 5000;
1623 stopReq->transactionTimeout = 1000;
1624 stopReq->readOperationTimeout = 1000;
1625 stopReq->operationTimeout = 1000;
1626 stopReq->senderData = 12;
1627 stopReq->senderRef = ss.getOwnRef();
1628 stopReq->singleuser = 0;
1629 StopReq::setSystemStop(stopReq->requestInfo, stop);
1630 StopReq::setPerformRestart(stopReq->requestInfo, restart);
1631 StopReq::setStopAbort(stopReq->requestInfo, abort);
1632 StopReq::setNoStart(stopReq->requestInfo, nostart);
1633 StopReq::setInitialStart(stopReq->requestInfo, initialStart);
1634
1635 if (ERROR_INSERTED(10007))
1636 {
1637 /*
1638 * This error insert is for Bug #11757421. Error
1639 * 10007 is used to hard code a value of false to
1640 * the nostart flag in the signal. This ensures
1641 * that the node does not reach NOT_STARTED state.
1642 */
1643 StopReq::setNoStart(stopReq->requestInfo, false);
1644 }
1645
1646 int use_master_node = 0;
1647 int do_send = 0;
1648 Uint32 packed_length = 0;
1649 if (ndb_nodes_to_stop.count() > 1)
1650 {
1651 do_send = 1;
1652 use_master_node = 1;
1653 ndb_nodes_to_stop.copyto(NdbNodeBitmask::Size, stopReq->nodes);
1654 packed_length = ndb_nodes_to_stop.getPackedLengthInWords();
1655 StopReq::setStopNodes(stopReq->requestInfo, 1);
1656 }
1657 else if (ndb_nodes_to_stop.count() == 1)
1658 {
1659 Uint32 nodeId = ndb_nodes_to_stop.find(0);
1660 if (okToSendTo(nodeId, true) == 0)
1661 {
1662 if (ndbd_send_node_bitmask_in_section(getNodeInfo(nodeId).m_info.m_version))
1663 {
1664 ssig.ptr[0].p = stopReq->nodes;
1665 ssig.ptr[0].sz = packed_length;
1666 ssig.header.m_noOfSections = 1;
1667 ssig.header.theLength = StopReq::SignalLength;
1668 }
1669 else
1670 {
1671 assert(packed_length <= NdbNodeBitmask48::Size);
1672 }
1673
1674 SendStatus result = ss.sendSignal(nodeId, &ssig);
1675 if (result != SEND_OK)
1676 {
1677 DBUG_RETURN(SEND_OR_RECEIVE_FAILED);
1678 }
1679 }
1680 else
1681 {
1682 DBUG_RETURN(SEND_OR_RECEIVE_FAILED);
1683 }
1684 }
1685
1686
1687 // now wait for the replies
1688 Uint32 sendNodeId = ndb_nodes_to_stop.find(0);
1689 while (!stoppedNodes.contains(ndb_nodes_to_stop))
1690 {
1691 if (do_send)
1692 {
1693 assert(use_master_node);
1694 sendNodeId = guess_master_node(ss);
1695 if (okToSendTo(sendNodeId, true) != 0)
1696 {
1697 DBUG_RETURN(SEND_OR_RECEIVE_FAILED);
1698 }
1699
1700 if (ndbd_send_node_bitmask_in_section(getNodeInfo(sendNodeId).m_info.m_version))
1701 {
1702 ssig.ptr[0].p = stopReq->nodes;
1703 ssig.ptr[0].sz = packed_length;
1704 ssig.header.m_noOfSections = 1;
1705 ssig.header.theLength = StopReq::SignalLength;
1706 }
1707 else
1708 {
1709 assert(packed_length <= NdbNodeBitmask48::Size);
1710 }
1711
1712 if (ss.sendSignal(sendNodeId, &ssig) != SEND_OK)
1713 {
1714 DBUG_RETURN(SEND_OR_RECEIVE_FAILED);
1715 }
1716 do_send = 0;
1717 }
1718
1719 SimpleSignal *signal = ss.waitFor();
1720 int gsn = signal->readSignalNumber();
1721 switch (gsn) {
1722 case GSN_STOP_REF:{
1723 const StopRef * const ref = CAST_CONSTPTR(StopRef, signal->getDataPtr());
1724 const NodeId nodeId = refToNode(signal->header.theSendersBlockRef);
1725 require(nodeId == sendNodeId);
1726 if (ref->errorCode == StopRef::MultiNodeShutdownNotMaster)
1727 {
1728 assert(use_master_node);
1729 m_master_node= ref->masterNodeId;
1730 do_send = 1;
1731 continue;
1732 }
1733 DBUG_RETURN(translateStopRef(ref->errorCode));
1734 break;
1735 }
1736 case GSN_STOP_CONF:{
1737 #ifdef NOT_USED
1738 const StopConf * const ref = CAST_CONSTPTR(StopConf, signal->getDataPtr());
1739 #endif
1740 const NodeId nodeId = refToNode(signal->header.theSendersBlockRef);
1741 require(nodeId == sendNodeId);
1742 stoppedNodes.bitOR(ndb_nodes_to_stop);
1743 break;
1744 }
1745 case GSN_NF_COMPLETEREP:{
1746 const NFCompleteRep * const rep =
1747 CAST_CONSTPTR(NFCompleteRep, signal->getDataPtr());
1748 if (rep->failedNodeId <= stoppedNodes.max_size())
1749 stoppedNodes.set(rep->failedNodeId);
1750 break;
1751 }
1752 case GSN_NODE_FAILREP:{
1753 const NodeFailRep * const rep =
1754 CAST_CONSTPTR(NodeFailRep, signal->getDataPtr());
1755 Uint32 len = NodeFailRep::getNodeMaskLength(signal->getLength());
1756 require(len == NodeBitmask::Size || // only full length in ndbapi
1757 len == 0); // bitmask sent in signal section
1758 NodeBitmask mask;
1759 if (len == 0)
1760 {
1761 mask.assign(signal->ptr[0].sz, signal->ptr[0].p);
1762 }
1763 else
1764 {
1765 mask.assign(len, rep->theAllNodes);
1766 }
1767 stoppedNodes.bitOR(mask);
1768 break;
1769 }
1770 case GSN_API_REGCONF:
1771 case GSN_TAKE_OVERTCCONF:
1772 case GSN_CONNECT_REP:
1773 continue;
1774 default:
1775 report_unknown_signal(signal);
1776 DBUG_RETURN(SEND_OR_RECEIVE_FAILED);
1777 }
1778 }
1779 if (error && *stopSelf)
1780 {
1781 *stopSelf= 0;
1782 }
1783 DBUG_RETURN(error);
1784 }
1785
1786 /*
1787 * Stop one nodes
1788 */
1789
stopNodes(const Vector<NodeId> & node_ids,int * stopCount,bool abort,bool force,int * stopSelf)1790 int MgmtSrvr::stopNodes(const Vector<NodeId> &node_ids,
1791 int *stopCount, bool abort, bool force,
1792 int* stopSelf)
1793 {
1794 if (force || abort)
1795 ; // Skip node state checks
1796 else if (is_any_node_starting())
1797 {
1798 /* Refuse to stop since some node(s) are starting */
1799 return OPERATION_NOT_ALLOWED_START_STOP;
1800 }
1801
1802 NodeBitmask nodes;
1803 int ret = 0;
1804 if (node_ids.size() > 0)
1805 {
1806 ret = sendSTOP_REQ(node_ids, nodes,
1807 abort, false, false, false, false,
1808 stopSelf);
1809 }
1810 else
1811 {
1812 ret = sendall_STOP_REQ(nodes,
1813 abort, false, false, false, false);
1814 }
1815
1816 if (stopCount)
1817 *stopCount= nodes.count();
1818 return ret;
1819 }
1820
shutdownMGM(int * stopCount,bool abort,int * stopSelf)1821 int MgmtSrvr::shutdownMGM(int *stopCount, bool abort, int *stopSelf)
1822 {
1823 NodeId nodeId = 0;
1824 int error;
1825
1826 while(getNextNodeId(&nodeId, NDB_MGM_NODE_TYPE_MGM))
1827 {
1828 if(nodeId==getOwnNodeId())
1829 continue;
1830 error= sendStopMgmd(nodeId, abort, true, false,
1831 false, false);
1832 if (error == 0)
1833 (*stopCount)++;
1834 }
1835
1836 *stopSelf= 1;
1837 (*stopCount)++;
1838
1839 return 0;
1840 }
1841
1842 /*
1843 * Perform DB nodes shutdown.
1844 * MGM servers are left in their current state
1845 */
1846
shutdownDB(int * stopCount,bool abort)1847 int MgmtSrvr::shutdownDB(int * stopCount, bool abort)
1848 {
1849 NodeBitmask nodes;
1850
1851 int ret = sendall_STOP_REQ(nodes,
1852 abort,
1853 true,
1854 false,
1855 false,
1856 false);
1857
1858 if (stopCount)
1859 *stopCount = nodes.count();
1860 return ret;
1861 }
1862
1863 /*
1864 * Enter single user mode on all live nodes
1865 */
1866
enterSingleUser(int * stopCount,Uint32 apiNodeId)1867 int MgmtSrvr::enterSingleUser(int * stopCount, Uint32 apiNodeId)
1868 {
1869 if (getNodeType(apiNodeId) != NDB_MGM_NODE_TYPE_API)
1870 return NODE_NOT_API_NODE;
1871
1872 // Init
1873 if (stopCount)
1874 {
1875 * stopCount = 0;
1876 }
1877
1878 SignalSender ss(theFacade);
1879 ss.lock(); // lock will be released on exit
1880
1881 SimpleSignal ssig;
1882 StopReq* const stopReq = CAST_PTR(StopReq, ssig.getDataPtrSend());
1883 ssig.set(ss, TestOrd::TraceAPI, NDBCNTR, GSN_STOP_REQ, StopReq::SignalLength);
1884
1885 stopReq->requestInfo = 0;
1886 stopReq->apiTimeout = 5000;
1887 stopReq->transactionTimeout = 1000;
1888 stopReq->readOperationTimeout = 1000;
1889 stopReq->operationTimeout = 1000;
1890 stopReq->senderData = 12;
1891 stopReq->senderRef = ss.getOwnRef();
1892 stopReq->singleuser = 1;
1893 stopReq->singleUserApi = apiNodeId;
1894 StopReq::setSystemStop(stopReq->requestInfo, false);
1895 StopReq::setPerformRestart(stopReq->requestInfo, false);
1896 StopReq::setStopAbort(stopReq->requestInfo, false);
1897
1898 NodeBitmask nodes;
1899 {
1900 NodeId nodeId = 0;
1901 Uint32 failed = 0;
1902 while (getNextNodeId(&nodeId, NDB_MGM_NODE_TYPE_NDB))
1903 {
1904 if (okToSendTo(nodeId, true) == 0)
1905 {
1906 SendStatus result = ss.sendSignal(nodeId, &ssig);
1907 if (result == SEND_OK)
1908 nodes.set(nodeId);
1909 else
1910 failed++;
1911 }
1912 else
1913 {
1914 failed++;
1915 }
1916 }
1917 if (nodes.isclear())
1918 {
1919 if (failed)
1920 {
1921 return SEND_OR_RECEIVE_FAILED;
1922 }
1923 return NO_CONTACT_WITH_DB_NODES;
1924 }
1925 }
1926
1927 int error = 0;
1928 int ok = 0;
1929 while (!nodes.isclear())
1930 {
1931 SimpleSignal *signal = ss.waitFor();
1932 int gsn = signal->readSignalNumber();
1933 switch (gsn) {
1934 case GSN_STOP_REF:
1935 {
1936 const StopRef * const ref = CAST_CONSTPTR(StopRef, signal->getDataPtr());
1937 nodes.clear(refToNode(signal->header.theSendersBlockRef));
1938 error = translateStopRef(ref->errorCode);
1939 break;
1940 }
1941 case GSN_STOP_CONF:
1942 {
1943 ok++;
1944 nodes.clear(refToNode(signal->header.theSendersBlockRef));
1945 break;
1946 }
1947 case GSN_NF_COMPLETEREP:
1948 {
1949 const NFCompleteRep * rep = CAST_CONSTPTR(NFCompleteRep,
1950 signal->getDataPtr());
1951 if (rep->failedNodeId <= nodes.max_size())
1952 nodes.clear(rep->failedNodeId);
1953 break;
1954 }
1955
1956 case GSN_NODE_FAILREP:
1957 {
1958 const NodeFailRep * rep = CAST_CONSTPTR(NodeFailRep,
1959 signal->getDataPtr());
1960 Uint32 len = NodeFailRep::getNodeMaskLength(signal->getLength());
1961 assert(len == NodeBitmask::Size || // only full length in ndbapi
1962 len == 0);
1963 NodeBitmask mask;
1964
1965 if (signal->header.m_noOfSections >= 1)
1966 {
1967 mask.assign(signal->ptr[0].sz, signal->ptr[0].p);
1968 }
1969 else
1970 {
1971 mask.assign(len, rep->theAllNodes);
1972 }
1973 nodes.bitANDC(mask);
1974 break;
1975 }
1976 case GSN_API_REGCONF:
1977 case GSN_TAKE_OVERTCCONF:
1978 case GSN_CONNECT_REP:
1979 continue;
1980
1981 default:
1982 report_unknown_signal(signal);
1983 return SEND_OR_RECEIVE_FAILED;
1984 }
1985 }
1986
1987 if (stopCount)
1988 {
1989 * stopCount = ok;
1990 }
1991
1992 return error;
1993 }
1994
1995 /*
1996 * Perform node restart
1997 */
1998
is_any_node_stopping()1999 bool MgmtSrvr::is_any_node_stopping()
2000 {
2001 NodeId nodeId = 0;
2002 trp_node node;
2003 while(getNextNodeId(&nodeId, NDB_MGM_NODE_TYPE_NDB))
2004 {
2005 node = getNodeInfo(nodeId);
2006 if((node.m_state.startLevel == NodeState::SL_STOPPING_1) ||
2007 (node.m_state.startLevel == NodeState::SL_STOPPING_2) ||
2008 (node.m_state.startLevel == NodeState::SL_STOPPING_3) ||
2009 (node.m_state.startLevel == NodeState::SL_STOPPING_4))
2010 return true; // At least one node was stopping
2011 }
2012 return false; // No node was stopping
2013 }
2014
is_any_node_starting()2015 bool MgmtSrvr::is_any_node_starting()
2016 {
2017 NodeId nodeId = 0;
2018 trp_node node;
2019 while(getNextNodeId(&nodeId, NDB_MGM_NODE_TYPE_NDB))
2020 {
2021 node = getNodeInfo(nodeId);
2022 if (node.m_state.startLevel == NodeState::SL_STARTING)
2023 return true; // At least one node was starting
2024 }
2025 return false; // No node was starting
2026 }
2027
is_any_node_alive()2028 bool MgmtSrvr::is_any_node_alive()
2029 {
2030 NodeId nodeId = 0;
2031 while (getNextNodeId(&nodeId, NDB_MGM_NODE_TYPE_NDB))
2032 {
2033 if (getNodeInfo(nodeId).m_alive == true)
2034 return true; // At least one node in alive state
2035 }
2036 return false; // No node in alive state
2037 }
2038
is_any_node_in_started_state()2039 bool MgmtSrvr::is_any_node_in_started_state()
2040 {
2041 NodeId nodeId = 0;
2042 trp_node node;
2043 while(getNextNodeId(&nodeId, NDB_MGM_NODE_TYPE_NDB))
2044 {
2045 node = getNodeInfo(nodeId);
2046 if (node.m_state.startLevel == NodeState::SL_STARTED)
2047 return true; // At least one node is in started state
2048 }
2049 return false; // No node is in started state
2050 }
2051
are_all_nodes_in_cmvmi_state()2052 bool MgmtSrvr::are_all_nodes_in_cmvmi_state()
2053 {
2054 NodeId nodeId = 0;
2055 trp_node node;
2056 while(getNextNodeId(&nodeId, NDB_MGM_NODE_TYPE_NDB))
2057 {
2058 node = getNodeInfo(nodeId);
2059 if (node.m_state.startLevel != NodeState::SL_CMVMI)
2060 return false; // At least one node is not in CMVMI state
2061 }
2062 return true; // All nodes are in CMVMI state
2063 }
2064
isTimeUp(const NDB_TICKS startTime,const Uint64 delay,const Uint64 sleepInterval)2065 bool MgmtSrvr::isTimeUp(const NDB_TICKS startTime,
2066 const Uint64 delay,
2067 const Uint64 sleepInterval)
2068 {
2069 if(NdbTick_Elapsed(startTime, NdbTick_getCurrentTicks()).milliSec()
2070 < delay)
2071 {
2072 NdbSleep_MilliSleep(sleepInterval);
2073 return false;
2074 }
2075 return true;
2076 }
2077
is_cluster_single_user()2078 bool MgmtSrvr::is_cluster_single_user()
2079 {
2080 NodeId nodeId = 0;
2081 trp_node node;
2082 while(getNextNodeId(&nodeId, NDB_MGM_NODE_TYPE_NDB))
2083 {
2084 node = getNodeInfo(nodeId);
2085 if (node.m_state.startLevel == NodeState::SL_SINGLEUSER)
2086 return true; // Cluster is in single user modes
2087 }
2088 return false; // Cluster is not in single user mode
2089 }
2090
restartNodes(const Vector<NodeId> & node_ids,int * stopCount,bool nostart,bool initialStart,bool abort,bool force,int * stopSelf,unsigned int num_secs_to_wait_for_node)2091 int MgmtSrvr::restartNodes(const Vector<NodeId> &node_ids,
2092 int * stopCount, bool nostart,
2093 bool initialStart, bool abort,
2094 bool force,
2095 int *stopSelf,
2096 unsigned int num_secs_to_wait_for_node)
2097 {
2098 if (is_cluster_single_user())
2099 {
2100 /*
2101 Refuse to restart since cluster is in single user mode
2102 and when the node is restarting it would not be allowed to
2103 join cluster, see BUG#31056
2104 */
2105 return OPERATION_NOT_ALLOWED_START_STOP;
2106 }
2107
2108 if (force || abort)
2109 ; // Skip node state checks
2110 else if (is_any_node_starting())
2111 {
2112 /* Refuse to restart since some node(s) are starting */
2113 return OPERATION_NOT_ALLOWED_START_STOP;
2114 }
2115
2116 NodeBitmask nodes;
2117 int ret = 0;
2118 if (node_ids.size() > 0)
2119 {
2120 ret = sendSTOP_REQ(node_ids, nodes,
2121 abort, false, true, true, initialStart,
2122 stopSelf);
2123 }
2124 else
2125 {
2126 ret = sendall_STOP_REQ(nodes,
2127 abort, false, true, true, initialStart);
2128 }
2129
2130 if (ret)
2131 return ret;
2132
2133 if (stopCount)
2134 *stopCount = nodes.count();
2135
2136 // start up the nodes again
2137
2138 /*
2139 * The wait for all nodes to reach NOT_STARTED state is
2140 * split into 2 separate checks:
2141 * 1. Wait for ndbd to start shutting down
2142 * 2. Wait for ndbd to shutdown and reach NOT_STARTED
2143 * state
2144 *
2145 * Wait 1: Wait for ndbd to start shutting down. A short
2146 * wait duration of 12 seconds is being used.
2147 *
2148 * During shutdown the nodes traverse the 4 stopping
2149 * levels namely, SL_STOPPING_1 through SL_STOPPING_4.
2150 *
2151 * Thus, waiting for all the nodes to enter one of these
2152 * levels would be the obvious and intuitive approach for
2153 * this wait. However, the nodes pass these levels in
2154 * exec_STOP_REQ before the flow of execution reaches
2155 * here. An alternate approach adopted here is to check if
2156 * the nodes leave the SL_STARTED state in the first place.
2157 * A failure to leave this state would indicate that for
2158 * some reason the shutdown process failed to start and
2159 * can be considered the equivalent of checking if the
2160 * nodes have transitioned to any of the stopping levels.
2161 *
2162 * The immediate question that arises is how can one be sure
2163 * that the nodes have not gone from STARTED -> STOPPED ->
2164 * STARTED. This scenario is not an issue since we are waiting
2165 * for NOT_STARTED state and only once that state is reached is
2166 * the START_ORD fired which makes the node transition from
2167 * SL_NOTHING to further states.
2168 *
2169 * To summarize, the first of the two waits will wait a short
2170 * (12s) time to check if the shutdown process has been initiated
2171 * and exit in case any of the nodes have not left the
2172 * SL_STARTED state.
2173 */
2174 Uint64 waitTime = 12000;
2175 NDB_TICKS startTime = NdbTick_getCurrentTicks();
2176 bool any_node_in_started_state;
2177 do
2178 {
2179 /*
2180 * Check if any of the data nodes are still
2181 * stuck in STARTED state
2182 */
2183 any_node_in_started_state = false;
2184 for (unsigned i = 0; i < node_ids.size(); i++)
2185 {
2186 NodeId nodeId = node_ids[i];
2187 /*
2188 * Check performed only for data nodes
2189 */
2190 if(getNodeType(nodeId) == NDB_MGM_NODE_TYPE_NDB)
2191 {
2192 trp_node node = getNodeInfo(nodeId);
2193 any_node_in_started_state |= (node.m_state.startLevel ==
2194 NodeState::SL_STARTED);
2195 }
2196 }
2197 } while(any_node_in_started_state && !isTimeUp(startTime,waitTime,100));
2198
2199 if(any_node_in_started_state)
2200 {
2201 return WAIT_FOR_NDBD_TO_START_SHUTDOWN_FAILED;
2202 }
2203
2204 /*
2205 * Wait 2: Wait for ndbd to shutdown and reach NOT_STARTED state
2206 *
2207 * Having confirmed that the shutdown is on its way, the
2208 * second wait involves simply waiting for the shutdown to complete
2209 * and the nodes to enter the NOT_STARTED state.
2210 *
2211 * Once the nodes reach the NOT_STARTED state, they are ready for the
2212 * START_ORD signal. It must be noted that while NOT_STARTED state has
2213 * been mentioned throughout the comments since it is better known from
2214 * a user's perspective, since we are dealing with data nodes, it is
2215 * quicker and more efficient to check if the state is SL_CMVMI which is
2216 * the equivalent of the MGMAPI state of NOT_STARTED.
2217 *
2218 * The wait time in this case is the value of num_secs_to_wait_for_node
2219 */
2220
2221 startTime = NdbTick_getCurrentTicks();
2222 waitTime = num_secs_to_wait_for_node * 1000;
2223 bool all_nodes_in_cmvmi_state;
2224 do
2225 {
2226 /*
2227 * Check if all the data nodes are in
2228 * SL_CMVMI state
2229 */
2230 all_nodes_in_cmvmi_state = true;
2231 for (unsigned i = 0; i < node_ids.size(); i++)
2232 {
2233 NodeId nodeId= node_ids[i];
2234 if(getNodeType(nodeId) == NDB_MGM_NODE_TYPE_NDB)
2235 {
2236 trp_node node = getNodeInfo(nodeId);
2237 all_nodes_in_cmvmi_state &= (node.m_state.startLevel ==
2238 NodeState::SL_CMVMI);
2239 }
2240 }
2241 } while(!all_nodes_in_cmvmi_state &&
2242 !isTimeUp(startTime,waitTime,1000));
2243
2244 if(!all_nodes_in_cmvmi_state)
2245 {
2246 return WAIT_FOR_NDBD_SHUTDOWN_FAILED;
2247 }
2248
2249 if (nostart)
2250 return 0;
2251
2252 /*
2253 verify that no nodes are stopping before starting as this would cause
2254 the starting node to shutdown
2255 */
2256 int retry= 600*10;
2257 for (;is_any_node_stopping();)
2258 {
2259 if (--retry)
2260 break;
2261 NdbSleep_MilliSleep(100);
2262 }
2263
2264 /*
2265 start the nodes
2266 */
2267 for (unsigned i = 0; i < node_ids.size(); i++)
2268 {
2269 unsigned int loop_count = 0;
2270 do
2271 {
2272 int result = sendSTART_ORD(node_ids[i]);
2273 if (result == SEND_OR_RECEIVE_FAILED ||
2274 result == NO_CONTACT_WITH_PROCESS)
2275 {
2276 if (loop_count >= num_secs_to_wait_for_node)
2277 break;
2278 loop_count++;
2279 NdbSleep_MilliSleep(1000);
2280 }
2281 else
2282 {
2283 break;
2284 }
2285 } while (1);
2286 }
2287 return 0;
2288 }
2289
2290 /*
2291 * Perform restart of all DB nodes
2292 */
2293
restartDB(bool nostart,bool initialStart,bool abort,int * stopCount,unsigned int num_secs_to_wait_for_node)2294 int MgmtSrvr::restartDB(bool nostart, bool initialStart,
2295 bool abort, int * stopCount,
2296 unsigned int num_secs_to_wait_for_node)
2297 {
2298 NodeBitmask nodes;
2299
2300 /*
2301 * Restart cannot be performed without any data nodes being started.
2302 */
2303 if (!is_any_node_alive())
2304 {
2305 return 0;
2306 }
2307
2308 int ret = sendall_STOP_REQ(nodes,
2309 abort,
2310 true,
2311 true,
2312 true,
2313 initialStart);
2314
2315 if (ret)
2316 return ret;
2317
2318 if (stopCount)
2319 *stopCount = nodes.count();
2320
2321 #ifdef VM_TRACE
2322 ndbout_c("Stopped %d nodes", nodes.count());
2323 #endif
2324
2325
2326 /*
2327 * The wait for all nodes to reach NOT_STARTED state is
2328 * split into 2 separate checks:
2329 * 1. Wait for ndbd to start shutting down
2330 * 2. Wait for ndbd to shutdown and reach NOT_STARTED
2331 * state
2332 *
2333 * Wait 1: Wait for ndbd to start shutting down. A short
2334 * wait duration of 12 seconds is being used.
2335 *
2336 * During shutdown the nodes traverse the 4 stopping
2337 * levels namely, SL_STOPPING_1 through SL_STOPPING_4.
2338 *
2339 * Thus, waiting for all the nodes to enter one of these
2340 * levels would be the obvious and intuitive approach for
2341 * this wait. However, the nodes pass these levels in
2342 * exec_STOP_REQ before the flow of execution reaches
2343 * here. An alternate approach adopted here is to check if
2344 * the nodes leave the SL_STARTED state in the first place.
2345 * A failure to leave this state would indicate that for
2346 * some reason the shutdown process failed to start and
2347 * can be considered the equivalent of checking if the
2348 * nodes have transitioned to any of the stopping levels.
2349 *
2350 * The immediate question that arises is how can one be sure
2351 * that the nodes have not gone from STARTED -> STOPPED ->
2352 * STARTED. This scenario is not an issue since we are waiting
2353 * for NOT_STARTED state and only once that state is reached is
2354 * the START_ORD fired which makes the node transition from
2355 * SL_NOTHING to further states.
2356 *
2357 * To summarize, the first of the two waits will wait a short
2358 * (12s) time to check if the shutdown process has been initiated
2359 * and exit in case any of the nodes have not left the
2360 * SL_STARTED state.
2361 */
2362 Uint64 waitTime = 12000;
2363 NDB_TICKS startTime = NdbTick_getCurrentTicks();
2364
2365 /*
2366 * Check if any of the data nodes are still
2367 * stuck in STARTED state
2368 */
2369 while(is_any_node_in_started_state() &&
2370 !isTimeUp(startTime,waitTime,100));
2371
2372 if(is_any_node_in_started_state())
2373 {
2374 return WAIT_FOR_NDBD_TO_START_SHUTDOWN_FAILED;
2375 }
2376
2377 /*
2378 * Wait 2: Wait for ndbd to shutdown and reach NOT_STARTED state
2379 *
2380 * Having confirmed that the shutdown is on its way, the
2381 * second wait involves simply waiting for the shutdown to complete
2382 * and the nodes to enter the NOT_STARTED state.
2383 *
2384 * Once the nodes reach the NOT_STARTED state, they are ready for the
2385 * START_ORD signal. It must be noted that while NOT_STARTED state has
2386 * been mentioned throughout the comments since it is better known from
2387 * a user's perspective, since we are dealing with data nodes, it is
2388 * quicker and more efficient to check if the state is SL_CMVMI which is
2389 * the equivalent of the MGMAPI state of NOT_STARTED.
2390 *
2391 * The wait time in this case is the value of num_secs_to_wait_for_node
2392 */
2393 startTime = NdbTick_getCurrentTicks();
2394 waitTime = num_secs_to_wait_for_node * 1000;
2395 while(!are_all_nodes_in_cmvmi_state() &&
2396 !isTimeUp(startTime,waitTime,1000));
2397
2398 if(!are_all_nodes_in_cmvmi_state())
2399 {
2400 return WAIT_FOR_NDBD_SHUTDOWN_FAILED;
2401 }
2402
2403 if(nostart)
2404 return 0;
2405
2406 /**
2407 * Now we start all database nodes (i.e. we make them non-idle)
2408 * We ignore the result we get from the start command.
2409 */
2410 NodeId nodeId = 0;
2411 while(getNextNodeId(&nodeId, NDB_MGM_NODE_TYPE_NDB)) {
2412 if (!nodes.get(nodeId))
2413 continue;
2414 int result;
2415 unsigned int loop_count = 0;
2416 do
2417 {
2418 result = sendSTART_ORD(nodeId);
2419 if (result != SEND_OR_RECEIVE_FAILED &&
2420 result != NO_CONTACT_WITH_PROCESS)
2421 break;
2422 if (loop_count >= num_secs_to_wait_for_node)
2423 break;
2424 NdbSleep_MilliSleep(1000);
2425 loop_count++;
2426 } while (1);
2427 g_eventLogger->debug("Started node %d with result %d", nodeId, result);
2428 /**
2429 * Errors from this call are deliberately ignored.
2430 * Maybe the user only wanted to restart a subset of the nodes.
2431 * It is also easy for the user to check which nodes have
2432 * started and which nodes have not.
2433 */
2434 }
2435
2436 return 0;
2437 }
2438
2439 int
exitSingleUser(int * stopCount,bool abort)2440 MgmtSrvr::exitSingleUser(int * stopCount, bool abort)
2441 {
2442 NodeId nodeId = 0;
2443 int count = 0;
2444
2445 SignalSender ss(theFacade);
2446 ss.lock(); // lock will be released on exit
2447
2448 SimpleSignal ssig;
2449 ResumeReq* const resumeReq =
2450 CAST_PTR(ResumeReq, ssig.getDataPtrSend());
2451
2452 ssig.set(ss,TestOrd::TraceAPI, NDBCNTR, GSN_RESUME_REQ,
2453 ResumeReq::SignalLength);
2454 resumeReq->senderData = 12;
2455 resumeReq->senderRef = ss.getOwnRef();
2456
2457 while(getNextNodeId(&nodeId, NDB_MGM_NODE_TYPE_NDB)){
2458 if(okToSendTo(nodeId, true) == 0){
2459 SendStatus result = ss.sendSignal(nodeId, &ssig);
2460 if (result == SEND_OK)
2461 count++;
2462 }
2463 }
2464
2465 if(stopCount != 0)
2466 * stopCount = count;
2467
2468 return 0;
2469 }
2470
2471 /*****************************************************************************
2472 * Status
2473 ****************************************************************************/
2474
2475 void
status_mgmd(NodeId node_id,ndb_mgm_node_status & node_status,Uint32 & version,Uint32 & mysql_version,const char ** address,char * addr_buf,size_t addr_buf_size,bool & is_single_user)2476 MgmtSrvr::status_mgmd(NodeId node_id,
2477 ndb_mgm_node_status& node_status,
2478 Uint32& version, Uint32& mysql_version,
2479 const char **address,
2480 char *addr_buf,
2481 size_t addr_buf_size,
2482 bool& is_single_user)
2483 {
2484 assert(getNodeType(node_id) == NDB_MGM_NODE_TYPE_MGM);
2485
2486 if (node_id == getOwnNodeId())
2487 {
2488 /*
2489 Special case to get version of own node
2490 - version and mysql_version is hardcoded
2491 - address should be the address seen from ndbd(if it's connected)
2492 else use HostName from config
2493 */
2494 Uint32 tmp_version = 0, tmp_mysql_version = 0;
2495 sendVersionReq(node_id,
2496 tmp_version,
2497 tmp_mysql_version,
2498 address,
2499 addr_buf,
2500 addr_buf_size,
2501 is_single_user);
2502 // Check that the version returned is equal to compiled in version
2503 assert(tmp_version == 0 ||
2504 (tmp_version == NDB_VERSION &&
2505 tmp_mysql_version == NDB_MYSQL_VERSION_D));
2506
2507 version = NDB_VERSION;
2508 mysql_version = NDB_MYSQL_VERSION_D;
2509 if(!*address)
2510 {
2511 // No address returned from ndbd -> get HostName from config
2512 Guard g(m_local_config_mutex);
2513 ConfigIter iter(m_local_config, CFG_SECTION_NODE);
2514 require(iter.find(CFG_NODE_ID, node_id) == 0);
2515 require(iter.get(CFG_NODE_HOST, address) == 0);
2516
2517 /*
2518 Try to convert HostName to numerical ip address
2519 (to get same output as if ndbd had replied)
2520 */
2521 struct in_addr addr;
2522 if (Ndb_getInAddr(&addr, *address) == 0)
2523 {
2524 *address = Ndb_inet_ntop(AF_INET,
2525 static_cast<void*>(&addr),
2526 addr_buf,
2527 addr_buf_size);
2528 }
2529 }
2530
2531 node_status = NDB_MGM_NODE_STATUS_CONNECTED;
2532 return;
2533 }
2534
2535 /*
2536 MGM nodes are connected directly to all other MGM
2537 node(s), return status as seen by ClusterMgr
2538 */
2539 const trp_node node = getNodeInfo(node_id);
2540 if(node.is_connected())
2541 {
2542 version = node.m_info.m_version;
2543 mysql_version = node.m_info.m_mysql_version;
2544 node_status = NDB_MGM_NODE_STATUS_CONNECTED;
2545 *address= get_connect_address(node_id,
2546 addr_buf,
2547 addr_buf_size);
2548 }
2549 else
2550 {
2551 version = 0;
2552 mysql_version = 0;
2553 node_status = NDB_MGM_NODE_STATUS_NO_CONTACT;
2554 }
2555
2556 return;
2557 }
2558
2559 int
status(int nodeId,ndb_mgm_node_status * _status,Uint32 * version,Uint32 * mysql_version,Uint32 * _phase,bool * _system,Uint32 * dynamic,Uint32 * nodegroup,Uint32 * connectCount,const char ** address,char * addr_buf,size_t addr_buf_size,bool * is_single_user)2560 MgmtSrvr::status(int nodeId,
2561 ndb_mgm_node_status * _status,
2562 Uint32 * version,
2563 Uint32 * mysql_version,
2564 Uint32 * _phase,
2565 bool * _system,
2566 Uint32 * dynamic,
2567 Uint32 * nodegroup,
2568 Uint32 * connectCount,
2569 const char **address,
2570 char *addr_buf,
2571 size_t addr_buf_size,
2572 bool* is_single_user)
2573 {
2574 switch(getNodeType(nodeId)){
2575 case NDB_MGM_NODE_TYPE_API:
2576 status_api(nodeId,
2577 *_status,
2578 *version,
2579 *mysql_version,
2580 address,
2581 addr_buf,
2582 addr_buf_size,
2583 *is_single_user);
2584 return 0;
2585 break;
2586
2587 case NDB_MGM_NODE_TYPE_MGM:
2588 status_mgmd(nodeId,
2589 *_status,
2590 *version,
2591 *mysql_version,
2592 address,
2593 addr_buf,
2594 addr_buf_size,
2595 *is_single_user);
2596 return 0;
2597 break;
2598
2599 case NDB_MGM_NODE_TYPE_NDB:
2600 break;
2601
2602 default:
2603 abort();
2604 break;
2605 }
2606
2607 const trp_node node = getNodeInfo(nodeId);
2608 assert(getNodeType(nodeId) == NDB_MGM_NODE_TYPE_NDB &&
2609 node.m_info.getType() == NodeInfo::DB);
2610
2611 if(!node.is_connected()){
2612 * _status = NDB_MGM_NODE_STATUS_NO_CONTACT;
2613 return 0;
2614 }
2615
2616 * version = node.m_info.m_version;
2617 * mysql_version = node.m_info.m_mysql_version;
2618
2619 *address= get_connect_address(nodeId, addr_buf, addr_buf_size);
2620
2621 * dynamic = node.m_state.dynamicId;
2622 * nodegroup = node.m_state.nodeGroup;
2623 * connectCount = node.m_info.m_connectCount;
2624
2625 switch(node.m_state.startLevel){
2626 case NodeState::SL_CMVMI:
2627 * _status = NDB_MGM_NODE_STATUS_NOT_STARTED;
2628 * _phase = 0;
2629 return 0;
2630 break;
2631 case NodeState::SL_STARTING:
2632 * _status = NDB_MGM_NODE_STATUS_STARTING;
2633 * _phase = node.m_state.starting.startPhase;
2634 return 0;
2635 break;
2636 case NodeState::SL_STARTED:
2637 * _status = NDB_MGM_NODE_STATUS_STARTED;
2638 * _phase = 0;
2639 return 0;
2640 break;
2641 case NodeState::SL_STOPPING_1:
2642 * _status = NDB_MGM_NODE_STATUS_SHUTTING_DOWN;
2643 * _phase = 1;
2644 * _system = node.m_state.stopping.systemShutdown != 0;
2645 return 0;
2646 break;
2647 case NodeState::SL_STOPPING_2:
2648 * _status = NDB_MGM_NODE_STATUS_SHUTTING_DOWN;
2649 * _phase = 2;
2650 * _system = node.m_state.stopping.systemShutdown != 0;
2651 return 0;
2652 break;
2653 case NodeState::SL_STOPPING_3:
2654 * _status = NDB_MGM_NODE_STATUS_SHUTTING_DOWN;
2655 * _phase = 3;
2656 * _system = node.m_state.stopping.systemShutdown != 0;
2657 return 0;
2658 break;
2659 case NodeState::SL_STOPPING_4:
2660 * _status = NDB_MGM_NODE_STATUS_SHUTTING_DOWN;
2661 * _phase = 4;
2662 * _system = node.m_state.stopping.systemShutdown != 0;
2663 return 0;
2664 break;
2665 case NodeState::SL_SINGLEUSER:
2666 * _status = NDB_MGM_NODE_STATUS_SINGLEUSER;
2667 * _phase = 0;
2668 return 0;
2669 break;
2670 default:
2671 * _status = NDB_MGM_NODE_STATUS_UNKNOWN;
2672 * _phase = 0;
2673 return 0;
2674 }
2675
2676 return -1;
2677 }
2678
2679 int
setEventReportingLevelImpl(int nodeId_arg,const EventSubscribeReq & ll)2680 MgmtSrvr::setEventReportingLevelImpl(int nodeId_arg,
2681 const EventSubscribeReq& ll)
2682 {
2683 SignalSender ss(theFacade);
2684 NdbNodeBitmask nodes;
2685 nodes.clear();
2686 while (1)
2687 {
2688 Uint32 nodeId, max;
2689 ss.lock();
2690 SimpleSignal ssig;
2691 EventSubscribeReq * dst =
2692 CAST_PTR(EventSubscribeReq, ssig.getDataPtrSend());
2693 ssig.set(ss,TestOrd::TraceAPI, CMVMI, GSN_EVENT_SUBSCRIBE_REQ,
2694 EventSubscribeReq::SignalLength);
2695 *dst = ll;
2696
2697 if (nodeId_arg == 0)
2698 {
2699 // all nodes
2700 nodeId = 1;
2701 max = MAX_NDB_NODES;
2702 }
2703 else
2704 {
2705 // only one node
2706 max = nodeId = nodeId_arg;
2707 }
2708 // first make sure nodes are sendable
2709 for(; nodeId <= max; nodeId++)
2710 {
2711 if (nodeTypes[nodeId] != NODE_TYPE_DB)
2712 continue;
2713 if (okToSendTo(nodeId, true))
2714 {
2715 if (getNodeInfo(nodeId).is_connected() == false)
2716 {
2717 // node not connected we can safely skip this one
2718 continue;
2719 }
2720 // api_reg_conf not recevied yet, need to retry
2721 return SEND_OR_RECEIVE_FAILED;
2722 }
2723 }
2724
2725 if (nodeId_arg == 0)
2726 {
2727 // all nodes
2728 nodeId = 1;
2729 max = MAX_NDB_NODES;
2730 }
2731 else
2732 {
2733 // only one node
2734 max = nodeId = nodeId_arg;
2735 }
2736 // now send to all sendable nodes nodes
2737 // note, lock is held, so states have not changed
2738 for(; (Uint32) nodeId <= max; nodeId++)
2739 {
2740 if (nodeTypes[nodeId] != NODE_TYPE_DB)
2741 continue;
2742 if (getNodeInfo(nodeId).is_connected() == false)
2743 continue; // node is not connected, skip
2744 if (ss.sendSignal(nodeId, &ssig) == SEND_OK)
2745 nodes.set(nodeId);
2746 else if (max == nodeId)
2747 {
2748 return SEND_OR_RECEIVE_FAILED;
2749 }
2750 }
2751 break;
2752 }
2753
2754 if (nodes.isclear())
2755 {
2756 return SEND_OR_RECEIVE_FAILED;
2757 }
2758
2759 int error = 0;
2760 while (!nodes.isclear())
2761 {
2762 Uint32 nodeId;
2763 SimpleSignal *signal = ss.waitFor();
2764 int gsn = signal->readSignalNumber();
2765 nodeId = refToNode(signal->header.theSendersBlockRef);
2766 switch (gsn) {
2767 case GSN_EVENT_SUBSCRIBE_CONF:{
2768 nodes.clear(nodeId);
2769 break;
2770 }
2771 case GSN_EVENT_SUBSCRIBE_REF:{
2772 nodes.clear(nodeId);
2773 error = 1;
2774 break;
2775 }
2776 // Since sending okToSend(true),
2777 // there is no guarantee that NF_COMPLETEREP will come
2778 // i.e listen also to NODE_FAILREP
2779 case GSN_NODE_FAILREP: {
2780 const NodeFailRep * const rep =
2781 CAST_CONSTPTR(NodeFailRep, signal->getDataPtr());
2782 Uint32 len = NodeFailRep::getNodeMaskLength(signal->getLength());
2783 require(len == NodeBitmask::Size || // only full length in ndbapi
2784 len == 0);
2785 NdbNodeBitmask mask;
2786 // only care about data nodes
2787 if (signal->header.m_noOfSections >= 1)
2788 {
2789 mask.assign(signal->ptr[0].sz, signal->ptr[0].p);
2790 }
2791 else
2792 {
2793 mask.assign(NdbNodeBitmask::Size, rep->theNodes);
2794 }
2795 nodes.bitANDC(mask);
2796 break;
2797 }
2798
2799 case GSN_NF_COMPLETEREP:{
2800 const NFCompleteRep * const rep =
2801 CAST_CONSTPTR(NFCompleteRep, signal->getDataPtr());
2802 if (rep->failedNodeId <= nodes.max_size())
2803 nodes.clear(rep->failedNodeId);
2804 break;
2805 }
2806 case GSN_API_REGCONF:
2807 case GSN_TAKE_OVERTCCONF:
2808 case GSN_CONNECT_REP:
2809 continue;
2810 default:
2811 report_unknown_signal(signal);
2812 return SEND_OR_RECEIVE_FAILED;
2813 }
2814 }
2815 if (error)
2816 return SEND_OR_RECEIVE_FAILED;
2817 return 0;
2818 }
2819
2820 //****************************************************************************
2821 //****************************************************************************
2822 int
setNodeLogLevelImpl(int nodeId,const SetLogLevelOrd & ll)2823 MgmtSrvr::setNodeLogLevelImpl(int nodeId, const SetLogLevelOrd & ll)
2824 {
2825 INIT_SIGNAL_SENDER(ss,nodeId);
2826
2827 SimpleSignal ssig;
2828 ssig.set(ss,TestOrd::TraceAPI, CMVMI, GSN_SET_LOGLEVELORD,
2829 SetLogLevelOrd::SignalLength);
2830 SetLogLevelOrd* const dst = CAST_PTR(SetLogLevelOrd, ssig.getDataPtrSend());
2831 *dst = ll;
2832
2833 return ss.sendSignal(nodeId, &ssig) == SEND_OK ? 0 : SEND_OR_RECEIVE_FAILED;
2834 }
2835
2836 //****************************************************************************
2837 //****************************************************************************
2838
2839 int
insertError(int nodeId,int errorNo,Uint32 * extra)2840 MgmtSrvr::insertError(int nodeId, int errorNo, Uint32 * extra)
2841 {
2842 BlockNumber block;
2843
2844 if (errorNo < 0) {
2845 return INVALID_ERROR_NUMBER;
2846 }
2847
2848 SignalSender ss(theFacade);
2849 ss.lock(); /* lock will be released on exit */
2850
2851 if(getNodeType(nodeId) == NDB_MGM_NODE_TYPE_NDB)
2852 {
2853 block= CMVMI;
2854 }
2855 else if(nodeId == _ownNodeId)
2856 {
2857 g_errorInsert= errorNo;
2858 return 0;
2859 }
2860 else if(getNodeType(nodeId) == NDB_MGM_NODE_TYPE_MGM)
2861 block= _blockNumber;
2862 else
2863 return WRONG_PROCESS_TYPE;
2864
2865 SimpleSignal ssig;
2866 ssig.set(ss,TestOrd::TraceAPI, block, GSN_TAMPER_ORD,
2867 TamperOrd::SignalLength);
2868 TamperOrd* const tamperOrd = CAST_PTR(TamperOrd, ssig.getDataPtrSend());
2869 tamperOrd->errorNo = errorNo;
2870
2871 if (extra)
2872 {
2873 ssig.getDataPtrSend()[1] = * extra;
2874 ssig.header.theLength++;
2875 }
2876
2877 int res = ss.sendSignal(nodeId, &ssig) == SEND_OK ? 0 :SEND_OR_RECEIVE_FAILED;
2878
2879 if (res == 0)
2880 {
2881 /**
2882 * In order to make NDB_TAMPER (almost) syncronous,
2883 * make a syncronous request *after* the NDB_TAMPER
2884 */
2885 make_sync_req(ss, Uint32(nodeId));
2886 }
2887
2888 return res;
2889 }
2890
2891
2892 int
startSchemaTrans(SignalSender & ss,NodeId & out_nodeId,Uint32 transId,Uint32 & out_transKey)2893 MgmtSrvr::startSchemaTrans(SignalSender& ss, NodeId & out_nodeId,
2894 Uint32 transId, Uint32 & out_transKey)
2895 {
2896 SimpleSignal ssig;
2897
2898 ssig.set(ss, 0, DBDICT, GSN_SCHEMA_TRANS_BEGIN_REQ,
2899 SchemaTransBeginReq::SignalLength);
2900
2901 SchemaTransBeginReq* req =
2902 CAST_PTR(SchemaTransBeginReq, ssig.getDataPtrSend());
2903
2904 req->clientRef = ss.getOwnRef();
2905 req->transId = transId;
2906 req->requestInfo = 0;
2907
2908 NodeId nodeId = ss.get_an_alive_node();
2909
2910 retry:
2911 if (ss.get_node_alive(nodeId) == false)
2912 {
2913 nodeId = ss.get_an_alive_node();
2914 }
2915
2916 if (ss.sendSignal(nodeId, &ssig) != SEND_OK)
2917 {
2918 return SEND_OR_RECEIVE_FAILED;
2919 }
2920
2921 while (true)
2922 {
2923 SimpleSignal *signal = ss.waitFor();
2924 int gsn = signal->readSignalNumber();
2925 switch (gsn) {
2926 case GSN_SCHEMA_TRANS_BEGIN_CONF: {
2927 const SchemaTransBeginConf * conf =
2928 CAST_CONSTPTR(SchemaTransBeginConf, signal->getDataPtr());
2929 out_transKey = conf->transKey;
2930 out_nodeId = nodeId;
2931 return 0;
2932 }
2933 case GSN_SCHEMA_TRANS_BEGIN_REF: {
2934 const SchemaTransBeginRef * ref =
2935 CAST_CONSTPTR(SchemaTransBeginRef, signal->getDataPtr());
2936
2937 switch(ref->errorCode){
2938 case SchemaTransBeginRef::NotMaster:
2939 nodeId = ref->masterNodeId;
2940 // Fall-through
2941 case SchemaTransBeginRef::Busy:
2942 case SchemaTransBeginRef::BusyWithNR:
2943 goto retry;
2944 default:
2945 return ref->errorCode;
2946 }
2947 }
2948 case GSN_NF_COMPLETEREP:
2949 // ignore
2950 break;
2951 case GSN_NODE_FAILREP:{
2952 const NodeFailRep * const rep =
2953 CAST_CONSTPTR(NodeFailRep, signal->getDataPtr());
2954 Uint32 len = NodeFailRep::getNodeMaskLength(signal->getLength());
2955 assert(len == NodeBitmask::Size || // only full length in ndbapi
2956 len == 0);
2957 if (signal->header.m_noOfSections >= 1)
2958 {
2959 if (BitmaskImpl::safe_get(NodeBitmask::getPackedLengthInWords(signal->ptr[0].p),
2960 signal->ptr[0].p, nodeId))
2961 {
2962 nodeId++;
2963 goto retry;
2964 }
2965 }
2966 else if (BitmaskImpl::safe_get(len, rep->theAllNodes, nodeId))
2967 {
2968 nodeId++;
2969 goto retry;
2970 }
2971 break;
2972 }
2973 case GSN_API_REGCONF:
2974 case GSN_TAKE_OVERTCCONF:
2975 case GSN_CONNECT_REP:
2976 break;
2977 default:
2978 report_unknown_signal(signal);
2979 return SEND_OR_RECEIVE_FAILED;
2980 }
2981 }
2982 }
2983
2984 int
endSchemaTrans(SignalSender & ss,NodeId nodeId,Uint32 transId,Uint32 transKey,Uint32 flags)2985 MgmtSrvr::endSchemaTrans(SignalSender& ss, NodeId nodeId,
2986 Uint32 transId, Uint32 transKey,
2987 Uint32 flags)
2988 {
2989 SimpleSignal ssig;
2990
2991 ssig.set(ss, 0, DBDICT, GSN_SCHEMA_TRANS_END_REQ,
2992 SchemaTransEndReq::SignalLength);
2993
2994 SchemaTransEndReq* req =
2995 CAST_PTR(SchemaTransEndReq, ssig.getDataPtrSend());
2996
2997 req->clientRef = ss.getOwnRef();
2998 req->transId = transId;
2999 req->requestInfo = 0;
3000 req->transKey = transKey;
3001 req->flags = flags;
3002
3003 if (ss.sendSignal(nodeId, &ssig) != SEND_OK)
3004 {
3005 return SEND_OR_RECEIVE_FAILED;
3006 }
3007
3008 while (true)
3009 {
3010 SimpleSignal *signal = ss.waitFor();
3011 int gsn = signal->readSignalNumber();
3012 switch (gsn) {
3013 case GSN_SCHEMA_TRANS_END_CONF: {
3014 return 0;
3015 }
3016 case GSN_SCHEMA_TRANS_END_REF: {
3017 const SchemaTransEndRef * ref =
3018 CAST_CONSTPTR(SchemaTransEndRef, signal->getDataPtr());
3019 return ref->errorCode;
3020 }
3021 case GSN_NF_COMPLETEREP:
3022 // ignore
3023 break;
3024 case GSN_NODE_FAILREP:{
3025 const NodeFailRep * const rep =
3026 CAST_CONSTPTR(NodeFailRep, signal->getDataPtr());
3027 Uint32 len = NodeFailRep::getNodeMaskLength(signal->getLength());
3028 assert(len == NodeBitmask::Size || // only full length in ndbapi
3029 len == 0);
3030
3031 if (signal->header.m_noOfSections >= 1)
3032 {
3033 if (BitmaskImpl::safe_get(NodeBitmask::getPackedLengthInWords(signal->ptr[0].p),
3034 signal->ptr[0].p, nodeId))
3035 {
3036 return -1;
3037 }
3038 }
3039 else if (BitmaskImpl::safe_get(len, rep->theAllNodes, nodeId))
3040 {
3041 return -1;
3042 }
3043 break;
3044 }
3045 case GSN_API_REGCONF:
3046 case GSN_TAKE_OVERTCCONF:
3047 case GSN_CONNECT_REP:
3048 break;
3049 default:
3050 report_unknown_signal(signal);
3051 return SEND_OR_RECEIVE_FAILED;
3052 }
3053 }
3054 }
3055
3056 int
createNodegroup(int * nodes,int count,int * ng)3057 MgmtSrvr::createNodegroup(int *nodes, int count, int *ng)
3058 {
3059 int res;
3060 SignalSender ss(theFacade);
3061 ss.lock();
3062
3063 Uint32 transId = rand();
3064 Uint32 transKey;
3065 NodeId nodeId;
3066
3067 if ((res = startSchemaTrans(ss, nodeId, transId, transKey)))
3068 {
3069 return res;
3070 }
3071
3072 SimpleSignal ssig;
3073 ssig.set(ss, 0, DBDICT, GSN_CREATE_NODEGROUP_REQ,
3074 CreateNodegroupReq::SignalLength);
3075
3076 CreateNodegroupReq* req =
3077 CAST_PTR(CreateNodegroupReq, ssig.getDataPtrSend());
3078
3079 req->transId = transId;
3080 req->transKey = transKey;
3081 req->nodegroupId = RNIL;
3082 req->senderData = 77;
3083 req->senderRef = ss.getOwnRef();
3084 bzero(req->nodes, sizeof(req->nodes));
3085
3086 if (ng)
3087 {
3088 if (* ng != -1)
3089 {
3090 req->nodegroupId = * ng;
3091 }
3092 }
3093 for (int i = 0; i<count && i<(int)NDB_ARRAY_SIZE(req->nodes); i++)
3094 {
3095 req->nodes[i] = nodes[i];
3096 }
3097
3098 if (ss.sendSignal(nodeId, &ssig) != SEND_OK)
3099 {
3100 return SEND_OR_RECEIVE_FAILED;
3101 }
3102
3103 bool wait = true;
3104 while (wait)
3105 {
3106 SimpleSignal *signal = ss.waitFor();
3107 int gsn = signal->readSignalNumber();
3108 switch (gsn) {
3109 case GSN_CREATE_NODEGROUP_CONF: {
3110 const CreateNodegroupConf * conf =
3111 CAST_CONSTPTR(CreateNodegroupConf, signal->getDataPtr());
3112
3113 if (ng)
3114 {
3115 * ng = conf->nodegroupId;
3116 }
3117
3118 wait = false;
3119 break;
3120 }
3121 case GSN_CREATE_NODEGROUP_REF:{
3122 const CreateNodegroupRef * ref =
3123 CAST_CONSTPTR(CreateNodegroupRef, signal->getDataPtr());
3124 Uint32 err = ref->errorCode;
3125 endSchemaTrans(ss, nodeId, transId, transKey,
3126 SchemaTransEndReq::SchemaTransAbort);
3127 return err;
3128 }
3129 case GSN_NF_COMPLETEREP:
3130 // ignore
3131 break;
3132 case GSN_NODE_FAILREP:{
3133 const NodeFailRep * const rep =
3134 CAST_CONSTPTR(NodeFailRep, signal->getDataPtr());
3135 Uint32 len = NodeFailRep::getNodeMaskLength(signal->getLength());
3136 assert(len == NodeBitmask::Size || // only full length in ndbapi
3137 len == 0);
3138
3139 if (signal->header.m_noOfSections >= 1)
3140 {
3141 if (BitmaskImpl::safe_get(NodeBitmask::getPackedLengthInWords(signal->ptr[0].p),
3142 signal->ptr[0].p, nodeId))
3143 {
3144 return SchemaTransBeginRef::Nodefailure;
3145 }
3146 }
3147 else if (BitmaskImpl::safe_get(len, rep->theAllNodes, nodeId))
3148 {
3149 return SchemaTransBeginRef::Nodefailure;
3150 }
3151 break;
3152 }
3153 case GSN_API_REGCONF:
3154 case GSN_TAKE_OVERTCCONF:
3155 case GSN_CONNECT_REP:
3156 break;
3157 default:
3158 report_unknown_signal(signal);
3159 return SEND_OR_RECEIVE_FAILED;
3160 }
3161 }
3162
3163 return endSchemaTrans(ss, nodeId, transId, transKey, 0);
3164 }
3165
3166 int
dropNodegroup(int ng)3167 MgmtSrvr::dropNodegroup(int ng)
3168 {
3169 int res;
3170 SignalSender ss(theFacade);
3171 ss.lock();
3172
3173 Uint32 transId = rand();
3174 Uint32 transKey;
3175 NodeId nodeId;
3176
3177 if ((res = startSchemaTrans(ss, nodeId, transId, transKey)))
3178 {
3179 return res;
3180 }
3181
3182 SimpleSignal ssig;
3183 ssig.set(ss, 0, DBDICT, GSN_DROP_NODEGROUP_REQ, DropNodegroupReq::SignalLength);
3184
3185 DropNodegroupReq* req =
3186 CAST_PTR(DropNodegroupReq, ssig.getDataPtrSend());
3187
3188 req->transId = transId;
3189 req->transKey = transKey;
3190 req->nodegroupId = ng;
3191 req->senderData = 77;
3192 req->senderRef = ss.getOwnRef();
3193
3194 if (ss.sendSignal(nodeId, &ssig) != SEND_OK)
3195 {
3196 return SEND_OR_RECEIVE_FAILED;
3197 }
3198
3199 bool wait = true;
3200 while (wait)
3201 {
3202 SimpleSignal *signal = ss.waitFor();
3203 int gsn = signal->readSignalNumber();
3204 switch (gsn) {
3205 case GSN_DROP_NODEGROUP_CONF: {
3206 wait = false;
3207 break;
3208 }
3209 case GSN_DROP_NODEGROUP_REF:
3210 {
3211 const DropNodegroupRef * ref =
3212 CAST_CONSTPTR(DropNodegroupRef, signal->getDataPtr());
3213 endSchemaTrans(ss, nodeId, transId, transKey,
3214 SchemaTransEndReq::SchemaTransAbort);
3215 return ref->errorCode;
3216 }
3217 case GSN_NF_COMPLETEREP:
3218 // ignore
3219 break;
3220 case GSN_NODE_FAILREP:{
3221 const NodeFailRep * const rep =
3222 CAST_CONSTPTR(NodeFailRep, signal->getDataPtr());
3223 Uint32 len = NodeFailRep::getNodeMaskLength(signal->getLength());
3224 assert(len == NodeBitmask::Size || // only full length in ndbapi
3225 len == 0);
3226
3227 if (signal->header.m_noOfSections >= 1)
3228 {
3229 if (BitmaskImpl::safe_get(NodeBitmask::getPackedLengthInWords(signal->ptr[0].p),
3230 signal->ptr[0].p, nodeId))
3231 {
3232 return SchemaTransBeginRef::Nodefailure;
3233 }
3234 }
3235 else if (BitmaskImpl::safe_get(len, rep->theAllNodes, nodeId))
3236 {
3237 return SchemaTransBeginRef::Nodefailure;
3238 }
3239 break;
3240 }
3241 case GSN_API_REGCONF:
3242 case GSN_TAKE_OVERTCCONF:
3243 case GSN_CONNECT_REP:
3244 break;
3245 default:
3246 report_unknown_signal(signal);
3247 return SEND_OR_RECEIVE_FAILED;
3248 }
3249 }
3250
3251 return endSchemaTrans(ss, nodeId, transId, transKey, 0);
3252 }
3253
3254
3255 //****************************************************************************
3256 //****************************************************************************
3257
3258 int
setTraceNo(int nodeId,int traceNo)3259 MgmtSrvr::setTraceNo(int nodeId, int traceNo)
3260 {
3261 if (traceNo < 0) {
3262 return INVALID_TRACE_NUMBER;
3263 }
3264
3265 INIT_SIGNAL_SENDER(ss,nodeId);
3266
3267 SimpleSignal ssig;
3268 ssig.set(ss,TestOrd::TraceAPI, CMVMI, GSN_TEST_ORD, TestOrd::SignalLength);
3269 TestOrd* const testOrd = CAST_PTR(TestOrd, ssig.getDataPtrSend());
3270 testOrd->clear();
3271 // Assume TRACE command causes toggling. Not really defined... ? TODO
3272 testOrd->setTraceCommand(TestOrd::Toggle,
3273 (TestOrd::TraceSpecification)traceNo);
3274
3275 return ss.sendSignal(nodeId, &ssig) == SEND_OK ? 0 : SEND_OR_RECEIVE_FAILED;
3276 }
3277
3278 //****************************************************************************
3279 //****************************************************************************
3280
3281 int
setSignalLoggingMode(int nodeId,LogMode mode,const Vector<BaseString> & blocks)3282 MgmtSrvr::setSignalLoggingMode(int nodeId, LogMode mode,
3283 const Vector<BaseString>& blocks)
3284 {
3285 INIT_SIGNAL_SENDER(ss,nodeId);
3286
3287 // Convert from MgmtSrvr format...
3288
3289 TestOrd::Command command;
3290 if (mode == Off) {
3291 command = TestOrd::Off;
3292 }
3293 else {
3294 command = TestOrd::On;
3295 }
3296
3297 TestOrd::SignalLoggerSpecification logSpec;
3298 switch (mode) {
3299 case In:
3300 logSpec = TestOrd::InputSignals;
3301 break;
3302 case Out:
3303 logSpec = TestOrd::OutputSignals;
3304 break;
3305 case InOut:
3306 logSpec = TestOrd::InputOutputSignals;
3307 break;
3308 case Off:
3309 // In MgmtSrvr interface it's just possible to switch off all logging, both
3310 // "in" and "out" (this should probably be changed).
3311 logSpec = TestOrd::InputOutputSignals;
3312 break;
3313 default:
3314 ndbout_c("Unexpected value %d, MgmtSrvr::setSignalLoggingMode, line %d",
3315 (unsigned)mode, __LINE__);
3316 assert(false);
3317 return -1;
3318 }
3319
3320 SimpleSignal ssig;
3321 ssig.set(ss,TestOrd::TraceAPI, CMVMI, GSN_TEST_ORD, TestOrd::SignalLength);
3322
3323 TestOrd* const testOrd = CAST_PTR(TestOrd, ssig.getDataPtrSend());
3324 testOrd->clear();
3325
3326 if (blocks.size() == 0 || blocks[0] == "ALL") {
3327 // Logg command for all blocks
3328 testOrd->addSignalLoggerCommand(command, logSpec);
3329 } else {
3330 for(unsigned i = 0; i < blocks.size(); i++)
3331 {
3332 BlockNumber blockNumber = getBlockNo(blocks[i].c_str());
3333 if (blockNumber == 0)
3334 return INVALID_BLOCK_NAME;
3335 testOrd->addSignalLoggerCommand(blockNumber, command, logSpec);
3336 }
3337 }
3338
3339 return ss.sendSignal(nodeId, &ssig) == SEND_OK ? 0 : SEND_OR_RECEIVE_FAILED;
3340 }
3341
3342 /*****************************************************************************
3343 * Signal tracing
3344 *****************************************************************************/
startSignalTracing(int nodeId)3345 int MgmtSrvr::startSignalTracing(int nodeId)
3346 {
3347 INIT_SIGNAL_SENDER(ss,nodeId);
3348
3349 SimpleSignal ssig;
3350 ssig.set(ss,TestOrd::TraceAPI, CMVMI, GSN_TEST_ORD, TestOrd::SignalLength);
3351
3352 TestOrd* const testOrd = CAST_PTR(TestOrd, ssig.getDataPtrSend());
3353 testOrd->clear();
3354 testOrd->setTestCommand(TestOrd::On);
3355
3356 return ss.sendSignal(nodeId, &ssig) == SEND_OK ? 0 : SEND_OR_RECEIVE_FAILED;
3357 }
3358
3359 int
stopSignalTracing(int nodeId)3360 MgmtSrvr::stopSignalTracing(int nodeId)
3361 {
3362 INIT_SIGNAL_SENDER(ss,nodeId);
3363
3364 SimpleSignal ssig;
3365 ssig.set(ss,TestOrd::TraceAPI, CMVMI, GSN_TEST_ORD, TestOrd::SignalLength);
3366 TestOrd* const testOrd = CAST_PTR(TestOrd, ssig.getDataPtrSend());
3367 testOrd->clear();
3368 testOrd->setTestCommand(TestOrd::Off);
3369
3370 return ss.sendSignal(nodeId, &ssig) == SEND_OK ? 0 : SEND_OR_RECEIVE_FAILED;
3371 }
3372
3373
3374 /*****************************************************************************
3375 * Dump state
3376 *****************************************************************************/
3377
3378 int
dumpState(int nodeId,const char * args)3379 MgmtSrvr::dumpState(int nodeId, const char* args)
3380 {
3381 // Convert the space separeted args
3382 // string to an int array
3383 Uint32 args_array[25];
3384 Uint32 numArgs = 0;
3385
3386 const int BufSz = 12; /* 32 bit signed = 10 digits + sign + trailing \0 */
3387 char buf[BufSz];
3388 int b = 0;
3389 memset(buf, 0, BufSz);
3390 for (size_t i = 0; i <= strlen(args); i++){
3391 if (args[i] == ' ' || args[i] == 0){
3392 assert(b < BufSz);
3393 assert(buf[b] == 0);
3394 args_array[numArgs] = atoi(buf);
3395 numArgs++;
3396 memset(buf, 0, BufSz);
3397 b = 0;
3398 } else {
3399 buf[b] = args[i];
3400 b++;
3401 }
3402 }
3403
3404 return dumpState(nodeId, args_array, numArgs);
3405 }
3406
3407 int
dumpState(int nodeId,const Uint32 args[],Uint32 no)3408 MgmtSrvr::dumpState(int nodeId, const Uint32 args[], Uint32 no)
3409 {
3410 if (nodeId == _ownNodeId)
3411 {
3412 return dumpStateSelf(args, no);
3413 }
3414
3415 INIT_SIGNAL_SENDER(ss,nodeId);
3416
3417 const Uint32 len = no > 25 ? 25 : no;
3418
3419 SimpleSignal ssig;
3420 DumpStateOrd * const dumpOrd =
3421 CAST_PTR(DumpStateOrd, ssig.getDataPtrSend());
3422 ssig.set(ss,TestOrd::TraceAPI, CMVMI, GSN_DUMP_STATE_ORD, len);
3423 for(Uint32 i = 0; i<25; i++){
3424 if (i < len)
3425 dumpOrd->args[i] = args[i];
3426 else
3427 dumpOrd->args[i] = 0;
3428 }
3429
3430 int res = ss.sendSignal(nodeId, &ssig) == SEND_OK ? 0 :SEND_OR_RECEIVE_FAILED;
3431
3432 if (res == 0)
3433 {
3434 /**
3435 * In order to make DUMP (almost) syncronous,
3436 * make a syncronous request *after* the NDB_TAMPER
3437 */
3438 make_sync_req(ss, Uint32(nodeId));
3439 }
3440
3441 return res;
3442
3443 }
3444
3445 int
dumpStateSelf(const Uint32 args[],Uint32 no)3446 MgmtSrvr::dumpStateSelf(const Uint32 args[], Uint32 no)
3447 {
3448 if (no < 1)
3449 return -1;
3450
3451 switch(args[0])
3452 {
3453 #ifdef ERROR_INSERT
3454 case 9994:
3455 {
3456 /* Transporter send blocking */
3457 if (no >= 2)
3458 {
3459 Uint32 nodeId = args[1];
3460 ndbout_c("Blocking send to node %u",
3461 nodeId);
3462 TransporterRegistry* tr = theFacade->get_registry();
3463 tr->blockSend(*theFacade, nodeId);
3464 }
3465 break;
3466 }
3467 case 9995:
3468 {
3469 /* Transporter send unblocking */
3470 if (no >= 2)
3471 {
3472 Uint32 nodeId = args[1];
3473 ndbout_c("Unblocking send to node %u",
3474 nodeId);
3475 TransporterRegistry* tr = theFacade->get_registry();
3476 tr->unblockSend(*theFacade, nodeId);
3477 }
3478 break;
3479 }
3480
3481 case 9996:
3482 {
3483 /* Sendbuffer consumption */
3484 if (no >= 2)
3485 {
3486 Uint64 remain_bytes = args[1];
3487 ndbout_c("Consuming sendbuffer except for %llu bytes",
3488 remain_bytes);
3489 theFacade->consume_sendbuffer(remain_bytes);
3490 }
3491 break;
3492 }
3493 case 9997:
3494 {
3495 /* Sendbuffer release */
3496 ndbout_c("Releasing consumed sendbuffer");
3497 theFacade->release_consumed_sendbuffer();
3498 break;
3499 }
3500 #endif
3501 default:
3502 ;
3503 }
3504
3505 return 0;
3506 }
3507
3508
3509
3510 //****************************************************************************
3511 //****************************************************************************
3512
getErrorText(int errorCode,char * buf,int buf_sz)3513 const char* MgmtSrvr::getErrorText(int errorCode, char *buf, int buf_sz)
3514 {
3515 ndb_error_string(errorCode, buf, buf_sz);
3516 buf[buf_sz-1]= 0;
3517 return buf;
3518 }
3519
3520
3521 void
trp_deliver_signal(const NdbApiSignal * signal,const LinearSectionPtr ptr[3])3522 MgmtSrvr::trp_deliver_signal(const NdbApiSignal* signal,
3523 const LinearSectionPtr ptr[3])
3524 {
3525 int gsn = signal->readSignalNumber();
3526
3527 switch (gsn) {
3528 case GSN_EVENT_REP:
3529 {
3530 /**
3531 * This EVENT_REP receives all infoEvent and eventLog messages that
3532 * are NOT generated through a DUMP command.
3533 */
3534 const Uint32 *data = signal->getDataPtr();
3535 Uint32 sz = signal->getLength();
3536 if (signal->getNoOfSections() > 0)
3537 {
3538 /**
3539 * Data comes in segmented part.
3540 */
3541 data = ptr[0].p;
3542 sz = ptr[0].sz;
3543 }
3544 eventReport(signal->getDataPtr(), sz, data);
3545 break;
3546 }
3547
3548 case GSN_NF_COMPLETEREP:{
3549 const NFCompleteRep * rep = CAST_CONSTPTR(NFCompleteRep,
3550 signal->getDataPtr());
3551 /* Clear local nodeid reservation(if any) */
3552 release_local_nodeid_reservation(rep->failedNodeId);
3553
3554 clear_connect_address_cache(rep->failedNodeId);
3555 break;
3556 }
3557 case GSN_TAMPER_ORD:
3558 ndbout << "TAMPER ORD" << endl;
3559 break;
3560 case GSN_API_REGCONF:
3561 case GSN_TAKE_OVERTCCONF:
3562 break;
3563 case GSN_CONNECT_REP:{
3564 const Uint32 nodeId = signal->getDataPtr()[0];
3565
3566 /*
3567 Clear local nodeid reservation since nodeid is
3568 now reserved by a connected transporter
3569 */
3570 release_local_nodeid_reservation(nodeId);
3571
3572 union {
3573 Uint32 theData[25];
3574 EventReport repData;
3575 };
3576 EventReport * rep = &repData;
3577 theData[1] = nodeId;
3578 rep->setEventType(NDB_LE_Connected);
3579
3580 if (nodeTypes[nodeId] == NODE_TYPE_DB)
3581 {
3582 m_started_nodes.push_back(nodeId);
3583 }
3584 rep->setEventType(NDB_LE_Connected);
3585 rep->setNodeId(_ownNodeId);
3586 eventReport(theData, 1, theData);
3587 return;
3588 }
3589 case GSN_NODE_FAILREP:
3590 {
3591 union {
3592 Uint32 theData[25];
3593 EventReport repData;
3594 };
3595 bzero(theData, sizeof(theData));
3596 EventReport * event = &repData;
3597 event->setEventType(NDB_LE_Disconnected);
3598 event->setNodeId(_ownNodeId);
3599
3600 const NodeFailRep *rep = CAST_CONSTPTR(NodeFailRep,
3601 signal->getDataPtr());
3602 Uint32 len = NodeFailRep::getNodeMaskLength(signal->getLength());
3603
3604 const Uint32* nbm;
3605 if (signal->m_noOfSections >= 1)
3606 {
3607 assert (len == 0);
3608 nbm = ptr[0].p;
3609 len = ptr[0].sz;
3610 }
3611 else
3612 {
3613 assert(len == NodeBitmask::Size); // only full length in ndbapi
3614 nbm = rep->theAllNodes;
3615 }
3616
3617 for (Uint32 i = BitmaskImpl::find_first(len, nbm);
3618 i != BitmaskImpl::NotFound;
3619 i = BitmaskImpl::find_next(len, nbm, i + 1))
3620 {
3621 theData[1] = i;
3622 eventReport(theData, 1, theData);
3623
3624 /* Clear local nodeid reservation(if any) */
3625 release_local_nodeid_reservation(i);
3626
3627 clear_connect_address_cache(i);
3628 }
3629 return;
3630 }
3631 case GSN_CLOSE_COMREQ:
3632 {
3633 theFacade->perform_close_clnt(this);
3634 break;
3635 }
3636 default:
3637 g_eventLogger->error("Unknown signal received. SignalNumber: "
3638 "%i from (%d, 0x%x)",
3639 gsn,
3640 refToNode(signal->theSendersBlockRef),
3641 refToBlock(signal->theSendersBlockRef));
3642 assert(false);
3643 }
3644 }
3645
3646
3647 void
trp_node_status(Uint32 nodeId,Uint32 _event)3648 MgmtSrvr::trp_node_status(Uint32 nodeId, Uint32 _event)
3649 {
3650 }
3651
3652 enum ndb_mgm_node_type
getNodeType(NodeId nodeId) const3653 MgmtSrvr::getNodeType(NodeId nodeId) const
3654 {
3655 if(nodeId >= MAX_NODES)
3656 return (enum ndb_mgm_node_type)-1;
3657
3658 return nodeTypes[nodeId];
3659 }
3660
3661
3662 const char*
get_connect_address(NodeId node_id,char * addr_buf,size_t addr_buf_size)3663 MgmtSrvr::get_connect_address(NodeId node_id,
3664 char *addr_buf,
3665 size_t addr_buf_size)
3666 {
3667 assert(node_id < NDB_ARRAY_SIZE(m_connect_address));
3668
3669 if (m_connect_address[node_id].s_addr == 0)
3670 {
3671 // No cached connect address available
3672 const trp_node &node= getNodeInfo(node_id);
3673 if (node.is_connected())
3674 {
3675 // Cache the connect address, it's valid until
3676 // node disconnects
3677 m_connect_address[node_id] = theFacade->ext_get_connect_address(node_id);
3678 }
3679 }
3680
3681 // Return the cached connect address
3682 return Ndb_inet_ntop(AF_INET,
3683 static_cast<void*>(&m_connect_address[node_id]),
3684 addr_buf,
3685 addr_buf_size);
3686 }
3687
3688
3689 void
clear_connect_address_cache(NodeId nodeid)3690 MgmtSrvr::clear_connect_address_cache(NodeId nodeid)
3691 {
3692 assert(nodeid < NDB_ARRAY_SIZE(m_connect_address));
3693 if (nodeid < NDB_ARRAY_SIZE(m_connect_address))
3694 {
3695 m_connect_address[nodeid].s_addr = 0;
3696 }
3697 }
3698
3699 /***************************************************************************
3700 * Alloc nodeid
3701 ***************************************************************************/
3702
NodeIdReservations()3703 MgmtSrvr::NodeIdReservations::NodeIdReservations()
3704 {
3705 memset(m_reservations, 0, sizeof(m_reservations));
3706 }
3707
3708
3709 void
check_array(NodeId n) const3710 MgmtSrvr::NodeIdReservations::check_array(NodeId n) const
3711 {
3712 assert( n < NDB_ARRAY_SIZE(m_reservations));
3713 }
3714
3715
3716 bool
get(NodeId n) const3717 MgmtSrvr::NodeIdReservations::get(NodeId n) const
3718 {
3719 check_array(n);
3720
3721 return (m_reservations[n].m_timeout != 0);
3722 }
3723
3724
3725 void
set(NodeId n,unsigned timeout)3726 MgmtSrvr::NodeIdReservations::set(NodeId n, unsigned timeout)
3727 {
3728 check_array(n);
3729
3730 Reservation& r = m_reservations[n];
3731 // Dont't allow double set
3732 assert(r.m_timeout == 0 && !NdbTick_IsValid(r.m_start));
3733
3734 r.m_timeout = timeout;
3735 r.m_start = NdbTick_getCurrentTicks();
3736 }
3737
3738
3739 BaseString
pretty_str() const3740 MgmtSrvr::NodeIdReservations::pretty_str() const
3741 {
3742 const char* sep = "";
3743 BaseString str;
3744 for (size_t i = 0; i < NDB_ARRAY_SIZE(m_reservations); i++)
3745 {
3746 const Reservation& r = m_reservations[i];
3747 if (r.m_timeout)
3748 {
3749 str.appfmt("%s%u", sep, (unsigned)i);
3750 sep = ",";
3751 }
3752 }
3753 return str;
3754 }
3755
3756
3757 void
clear(NodeId n)3758 MgmtSrvr::NodeIdReservations::clear(NodeId n)
3759 {
3760 check_array(n);
3761
3762 Reservation& r = m_reservations[n];
3763 // Dont't allow double clear
3764 assert(r.m_timeout != 0 && NdbTick_IsValid(r.m_start));
3765
3766 r.m_timeout = 0;
3767 NdbTick_Invalidate(&r.m_start);
3768 }
3769
3770
3771 bool
has_timedout(NodeId n,NDB_TICKS now) const3772 MgmtSrvr::NodeIdReservations::has_timedout(NodeId n, NDB_TICKS now) const
3773 {
3774 check_array(n);
3775
3776 const Reservation& r = m_reservations[n];
3777 if (r.m_timeout &&
3778 NdbTick_Elapsed(r.m_start,now).milliSec() > r.m_timeout)
3779 return true;
3780 return false;
3781 }
3782
3783
3784 void
release_local_nodeid_reservation(NodeId nodeid)3785 MgmtSrvr::release_local_nodeid_reservation(NodeId nodeid)
3786 {
3787 NdbMutex_Lock(m_reserved_nodes_mutex);
3788 if (m_reserved_nodes.get(nodeid))
3789 {
3790 g_eventLogger->debug("Releasing local reservation for nodeid %d", nodeid);
3791 m_reserved_nodes.clear(nodeid);
3792 }
3793 NdbMutex_Unlock(m_reserved_nodes_mutex);
3794 }
3795
3796
3797 int
alloc_node_id_req(NodeId free_node_id,enum ndb_mgm_node_type type,Uint32 timeout_ms)3798 MgmtSrvr::alloc_node_id_req(NodeId free_node_id,
3799 enum ndb_mgm_node_type type,
3800 Uint32 timeout_ms)
3801 {
3802 bool first_attempt = true;
3803 SignalSender ss(theFacade);
3804 ss.lock(); // lock will be released on exit
3805
3806 SimpleSignal ssig;
3807 AllocNodeIdReq* req = CAST_PTR(AllocNodeIdReq, ssig.getDataPtrSend());
3808 ssig.set(ss, TestOrd::TraceAPI, QMGR, GSN_ALLOC_NODEID_REQ,
3809 AllocNodeIdReq::SignalLength);
3810
3811 req->senderRef = ss.getOwnRef();
3812 req->senderData = 19;
3813 req->nodeId = free_node_id;
3814 req->nodeType = type;
3815 req->timeout = timeout_ms;
3816
3817 int do_send = 1;
3818 NodeId nodeId = 0;
3819 while (1)
3820 {
3821 if (nodeId == 0)
3822 {
3823 bool next;
3824 while((next = getNextNodeId(&nodeId, NDB_MGM_NODE_TYPE_NDB)) == true &&
3825 getNodeInfo(nodeId).is_confirmed() == false)
3826 ;
3827 if (!next)
3828 return NO_CONTACT_WITH_DB_NODES;
3829 do_send = 1;
3830 }
3831 if (do_send)
3832 {
3833 if (ss.sendSignal(nodeId, &ssig) != SEND_OK)
3834 return SEND_OR_RECEIVE_FAILED;
3835 do_send = 0;
3836 }
3837
3838 SimpleSignal *signal = ss.waitFor();
3839
3840 int gsn = signal->readSignalNumber();
3841 switch (gsn) {
3842 case GSN_ALLOC_NODEID_CONF:
3843 {
3844 #ifdef NOT_USED
3845 const AllocNodeIdConf * const conf =
3846 CAST_CONSTPTR(AllocNodeIdConf, signal->getDataPtr());
3847 #endif
3848 g_eventLogger->info("Alloc node id %u succeeded", free_node_id);
3849 return 0;
3850 }
3851 case GSN_ALLOC_NODEID_REF:
3852 {
3853 const AllocNodeIdRef * const ref =
3854 CAST_CONSTPTR(AllocNodeIdRef, signal->getDataPtr());
3855 if (ref->errorCode == AllocNodeIdRef::NotMaster &&
3856 refToNode(ref->masterRef) == 0xFFFF)
3857 {
3858 /*
3859 This data node is not aware of who is the president (yet)
3860 and thus cannot allocate nodeids.
3861 If all data nodes are in the same state, then there's
3862 effectively 'no contact'.
3863 However, some other data nodes might be 'up' (node(s) in
3864 NOT_STARTED state).
3865 */
3866 bool next;
3867 while((next = getNextNodeId(&nodeId, NDB_MGM_NODE_TYPE_NDB)) == true &&
3868 getNodeInfo(nodeId).is_confirmed() == false)
3869 ;
3870 if (!next)
3871 {
3872 /* No viable node(s) */
3873 g_eventLogger->info("Alloc node id %u rejected, no new president yet",
3874 free_node_id);
3875 return NO_CONTACT_WITH_DB_NODES;
3876 }
3877
3878 /* Found another node, try to allocate a nodeid from it */
3879 do_send = 1;
3880 continue;
3881 }
3882
3883 if (ref->errorCode == AllocNodeIdRef::NotReady)
3884 {
3885 g_eventLogger->info("Alloc node id %u request rejected, cluster not ready yet",
3886 free_node_id);
3887 return NO_CONTACT_WITH_DB_NODES;
3888 }
3889
3890 const bool refFromMaster = (refToNode(ref->masterRef) == nodeId);
3891 if (ref->errorCode == AllocNodeIdRef::NotMaster ||
3892 ref->errorCode == AllocNodeIdRef::Busy ||
3893 ref->errorCode == AllocNodeIdRef::NodeFailureHandlingNotCompleted)
3894 {
3895 do_send = 1;
3896 nodeId = refToNode(ref->masterRef);
3897 if (!getNodeInfo(nodeId).is_confirmed())
3898 nodeId = 0;
3899 if (first_attempt && (ref->errorCode != AllocNodeIdRef::NotMaster))
3900 {
3901 first_attempt = false;
3902 g_eventLogger->info("Alloc node id %u rejected with error code %u, will retry",
3903 free_node_id,
3904 ref->errorCode);
3905 }
3906 /* sleep for a while before retrying */
3907 ss.unlock();
3908 if (ref->errorCode == AllocNodeIdRef::Busy)
3909 {
3910 NdbSleep_MilliSleep(100);
3911 }
3912 else if (ref->errorCode == AllocNodeIdRef::NotMaster)
3913 {
3914 if (refFromMaster)
3915 {
3916 /* AllocNodeIdReq sent to master node, but master not ready
3917 * to alloc node ID. Sleep before retrying. */
3918 NdbSleep_SecSleep(1);
3919 }
3920 else
3921 {
3922 /* AllocNodeIdReq sent to non-master node, retry by sending
3923 * AllocNodeIdReq to ref->masterRef. No sleep before retrying */
3924 }
3925 }
3926 else /* AllocNodeIdRef::NodeFailureHandlingNotCompleted */
3927 {
3928 NdbSleep_SecSleep(1);
3929 }
3930 ss.lock();
3931 continue;
3932 }
3933 return ref->errorCode;
3934 }
3935 case GSN_NF_COMPLETEREP:
3936 {
3937 continue;
3938 }
3939 case GSN_NODE_FAILREP:{
3940 /**
3941 * ok to trap using NODE_FAILREP
3942 * as we don't really wait on anything interesting
3943 */
3944 const NodeFailRep * const rep =
3945 CAST_CONSTPTR(NodeFailRep, signal->getDataPtr());
3946 Uint32 len = NodeFailRep::getNodeMaskLength(signal->getLength());
3947 const Uint32* nbm;
3948 if (signal->header.m_noOfSections >= 1)
3949 {
3950 assert (len == 0);
3951 nbm = signal->ptr[0].p;
3952 len = signal->ptr[0].sz;
3953 }
3954 else
3955 {
3956 assert(len == NodeBitmask::Size); // only full length in ndbapi
3957 nbm = rep->theAllNodes;
3958 }
3959
3960 if (BitmaskImpl::safe_get(len, nbm, nodeId))
3961 {
3962 do_send = 1;
3963 nodeId = 0;
3964 }
3965 continue;
3966 }
3967 case GSN_API_REGCONF:
3968 case GSN_TAKE_OVERTCCONF:
3969 case GSN_CONNECT_REP:
3970 continue;
3971 default:
3972 report_unknown_signal(signal);
3973 return SEND_OR_RECEIVE_FAILED;
3974 }
3975 }
3976 return 0;
3977 }
3978
3979 static int
match_hostname(const struct sockaddr * clnt_addr,const char * config_hostname)3980 match_hostname(const struct sockaddr *clnt_addr,
3981 const char *config_hostname)
3982 {
3983 if (clnt_addr)
3984 {
3985 const struct in_addr *clnt_in_addr = &((sockaddr_in*)clnt_addr)->sin_addr;
3986
3987 struct in_addr config_addr;
3988 if (Ndb_getInAddr(&config_addr, config_hostname) != 0
3989 || memcmp(&config_addr, clnt_in_addr, sizeof(config_addr)) != 0)
3990 {
3991 struct in_addr tmp_addr;
3992 if (Ndb_getInAddr(&tmp_addr, "localhost") != 0
3993 || memcmp(&tmp_addr, clnt_in_addr, sizeof(config_addr)) != 0)
3994 {
3995 // not localhost
3996 return -1;
3997 }
3998
3999 // connecting through localhost
4000 // check if config_hostname is local
4001 if (!SocketServer::tryBind(0, config_hostname))
4002 return -1;
4003 }
4004 }
4005 else
4006 {
4007 if (!SocketServer::tryBind(0, config_hostname))
4008 return -1;
4009 }
4010 return 0;
4011 }
4012
4013 int
find_node_type(NodeId node_id,ndb_mgm_node_type type,const struct sockaddr * client_addr,Vector<PossibleNode> & nodes,int & error_code,BaseString & error_string)4014 MgmtSrvr::find_node_type(NodeId node_id,
4015 ndb_mgm_node_type type,
4016 const struct sockaddr* client_addr,
4017 Vector<PossibleNode>& nodes,
4018 int& error_code, BaseString& error_string)
4019 {
4020 const char* found_config_hostname= 0;
4021 unsigned type_c= (unsigned)type;
4022
4023 Guard g(m_local_config_mutex);
4024
4025 ConfigIter iter(m_local_config, CFG_SECTION_NODE);
4026 for(iter.first(); iter.valid(); iter.next())
4027 {
4028 unsigned id;
4029 if (iter.get(CFG_NODE_ID, &id))
4030 require(false);
4031 if (node_id && node_id != id)
4032 continue;
4033 if (iter.get(CFG_TYPE_OF_SECTION, &type_c))
4034 require(false);
4035 if (type_c != (unsigned)type)
4036 {
4037 if (!node_id)
4038 continue;
4039 goto error;
4040 }
4041 bool exact_match = false;
4042 const char *config_hostname= 0;
4043 if (iter.get(CFG_NODE_HOST, &config_hostname))
4044 require(false);
4045 if (config_hostname == 0 || config_hostname[0] == 0)
4046 {
4047 config_hostname= "";
4048 }
4049 else
4050 {
4051 found_config_hostname= config_hostname;
4052 if (match_hostname(client_addr, config_hostname))
4053 {
4054 if (!node_id)
4055 continue;
4056 goto error;
4057 }
4058 exact_match = true;
4059 }
4060 unsigned dedicated_node = 0;
4061 iter.get(CFG_NODE_DEDICATED, &dedicated_node);
4062 if (dedicated_node && id != node_id)
4063 {
4064 // id is only handed out if explicitly requested.
4065 continue;
4066 }
4067 /*
4068 Insert this node in the nodes list sorted with the
4069 exact matches ahead of the open nodes
4070 */
4071 PossibleNode possible_node= {id, config_hostname, exact_match};
4072 if (exact_match)
4073 {
4074 // Find the position of first !exact match
4075 unsigned position = 0;
4076 for (unsigned j = 0; j < nodes.size(); j++)
4077 {
4078 if (nodes[j].exact_match)
4079 position++;
4080 }
4081 nodes.push(possible_node, position);
4082 }
4083 else
4084 {
4085 nodes.push_back(possible_node);
4086 }
4087
4088 if (node_id)
4089 break;
4090 }
4091 if (nodes.size() != 0)
4092 {
4093 return 0;
4094 }
4095
4096 error:
4097 /*
4098 lock on m_configMutex held because found_config_hostname may have
4099 reference inot config structure
4100 */
4101 error_code= NDB_MGM_ALLOCID_CONFIG_MISMATCH;
4102 if (node_id)
4103 {
4104 if (type_c != (unsigned) type)
4105 {
4106 BaseString type_string, type_c_string;
4107 const char *alias, *str;
4108 alias= ndb_mgm_get_node_type_alias_string(type, &str);
4109 type_string.assfmt("%s(%s)", alias, str);
4110 alias= ndb_mgm_get_node_type_alias_string((enum ndb_mgm_node_type)type_c,
4111 &str);
4112 type_c_string.assfmt("%s(%s)", alias, str);
4113 error_string.appfmt("Id %d configured as %s, connect attempted as %s.",
4114 node_id, type_c_string.c_str(),
4115 type_string.c_str());
4116 return -1;
4117 }
4118 if (found_config_hostname)
4119 {
4120 char addr_buf[NDB_ADDR_STRLEN];
4121 {
4122 // Append error describing which host the faulty connection was from
4123 struct in_addr conn_addr =
4124 ((struct sockaddr_in*)(client_addr))->sin_addr;
4125 char* addr_str =
4126 Ndb_inet_ntop(AF_INET,
4127 static_cast<void*>(&conn_addr),
4128 addr_buf,
4129 sizeof(addr_buf));
4130 error_string.appfmt("Connection with id %d done from wrong host ip %s,",
4131 node_id, addr_str);
4132 }
4133 {
4134 // Append error describing which was the expected host
4135 struct in_addr config_addr;
4136 int r_config_addr= Ndb_getInAddr(&config_addr, found_config_hostname);
4137 char* addr_str =
4138 Ndb_inet_ntop(AF_INET,
4139 static_cast<void*>(&config_addr),
4140 addr_buf,
4141 sizeof(addr_buf));
4142 error_string.appfmt(" expected %s(%s).", found_config_hostname,
4143 r_config_addr ?
4144 "lookup failed" : addr_str);
4145 }
4146 return -1;
4147 }
4148 error_string.appfmt("No node defined with id=%d in config file.", node_id);
4149 return -1;
4150 }
4151
4152 // node_id == 0 and nodes.size() == 0
4153 if (found_config_hostname)
4154 {
4155 char addr_buf[NDB_ADDR_STRLEN];
4156 struct in_addr conn_addr =
4157 ((struct sockaddr_in*)(client_addr))->sin_addr;
4158 char *addr_str = Ndb_inet_ntop(AF_INET,
4159 static_cast<void*>(&conn_addr),
4160 addr_buf,
4161 sizeof(addr_buf));
4162 error_string.appfmt("Connection done from wrong host ip %s.",
4163 (client_addr) ? addr_str : "");
4164 return -1;
4165 }
4166
4167 error_string.append("No nodes defined in config file.");
4168 return -1;
4169 }
4170
4171
4172 int
try_alloc(NodeId id,ndb_mgm_node_type type,Uint32 timeout_ms,int & error_code,BaseString & error_string)4173 MgmtSrvr::try_alloc(NodeId id,
4174 ndb_mgm_node_type type,
4175 Uint32 timeout_ms,
4176 int& error_code,
4177 BaseString& error_string)
4178 {
4179 assert(type == NDB_MGM_NODE_TYPE_NDB ||
4180 type == NDB_MGM_NODE_TYPE_API);
4181
4182 const NDB_TICKS start = NdbTick_getCurrentTicks();
4183 while (true)
4184 {
4185 int res = alloc_node_id_req(id, type, timeout_ms);
4186 if (res == 0)
4187 {
4188 /* Node id allocation suceeded */
4189 g_eventLogger->debug("Allocated nodeid %u in cluster", id);
4190 assert(id > 0);
4191 return id;
4192 }
4193
4194 if (res == NO_CONTACT_WITH_DB_NODES &&
4195 type == NDB_MGM_NODE_TYPE_API)
4196 {
4197 const Uint64 retry_timeout = 3000; // milliseconds
4198 const NDB_TICKS now = NdbTick_getCurrentTicks();
4199 const Uint64 elapsed = NdbTick_Elapsed(start,now).milliSec();
4200 if (elapsed > retry_timeout)
4201 {
4202 /*
4203 Have waited long enough time for data nodes to
4204 decide on a master, return error
4205 */
4206 g_eventLogger->debug("Unable to allocate nodeid %u for API node " \
4207 "in cluster (retried during %u milliseconds)",
4208 id, (unsigned)elapsed);
4209 error_string.appfmt("No contact with data nodes to get node id %u",
4210 id);
4211 error_code = NDB_MGM_ALLOCID_ERROR;
4212 return -1;
4213 }
4214
4215 g_eventLogger->debug("Retrying allocation of nodeid %u...", id);
4216 NdbSleep_MilliSleep(1000);
4217 continue;
4218 }
4219
4220 if (res == NO_CONTACT_WITH_DB_NODES &&
4221 type == NDB_MGM_NODE_TYPE_NDB)
4222 {
4223 /*
4224 No reply from data node(s) -> use the requested nodeid
4225 so that data node can start
4226 */
4227 g_eventLogger->debug("Nodeid %u for data node reserved locally " \
4228 "since cluster was not available ", id);
4229 return id;
4230 }
4231
4232 /* Unspecified error */
4233 return 0;
4234 }
4235
4236 assert(false); // Never reached
4237 return 0;
4238 }
4239
4240 /**
4241 * try_alloc_from_list
4242 *
4243 * returns :
4244 * 0 : Nodeid allocated
4245 * -1 : Nodeid not available
4246 * -2 : No contact with cluster
4247 */
4248 int
try_alloc_from_list(NodeId & nodeid,ndb_mgm_node_type type,Uint32 timeout_ms,Vector<PossibleNode> & nodes,int & error_code,BaseString & error_string)4249 MgmtSrvr::try_alloc_from_list(NodeId& nodeid,
4250 ndb_mgm_node_type type,
4251 Uint32 timeout_ms,
4252 Vector<PossibleNode>& nodes,
4253 int& error_code,
4254 BaseString& error_string)
4255 {
4256 for (unsigned i = 0; i < nodes.size(); i++)
4257 {
4258 const unsigned id= nodes[i].id;
4259 if (theFacade->ext_isConnected(id))
4260 {
4261 // Node is already reserved(connected via transporter)
4262 continue;
4263 }
4264
4265 NdbMutex_Lock(m_reserved_nodes_mutex);
4266 if (m_reserved_nodes.get(id))
4267 {
4268 // Node is already reserved(locally in this node)
4269 NdbMutex_Unlock(m_reserved_nodes_mutex);
4270 continue;
4271 }
4272
4273 /*
4274 Reserve the nodeid locally while checking if it can
4275 be allocated in the data nodes
4276 */
4277 m_reserved_nodes.set(id, timeout_ms);
4278
4279 NdbMutex_Unlock(m_reserved_nodes_mutex);
4280 int res = try_alloc(id,
4281 type,
4282 timeout_ms,
4283 error_code,
4284 error_string);
4285 if (res > 0)
4286 {
4287 // Nodeid allocation succeeded
4288 nodeid= id;
4289
4290 if (type == NDB_MGM_NODE_TYPE_API)
4291 {
4292 /*
4293 Release the local reservation(which was set to avoid that
4294 more than one thread asked for same nodeid) since it's
4295 now reserved in data node
4296 */
4297 release_local_nodeid_reservation(id);
4298 }
4299
4300 return 0; /* Nodeid allocated */
4301 }
4302
4303 /* Release the local reservation */
4304 release_local_nodeid_reservation(id);
4305
4306 if (res < 0)
4307 {
4308 // Don't try any more nodes from the list
4309 return -2; /* No contact with cluster */
4310 }
4311 }
4312 return -1; /* Nodeid not available */
4313 }
4314
4315
4316 bool
alloc_node_id_impl(NodeId & nodeid,enum ndb_mgm_node_type type,const struct sockaddr * client_addr,int & error_code,BaseString & error_string,Uint32 timeout_s)4317 MgmtSrvr::alloc_node_id_impl(NodeId& nodeid,
4318 enum ndb_mgm_node_type type,
4319 const struct sockaddr* client_addr,
4320 int& error_code, BaseString& error_string,
4321 Uint32 timeout_s)
4322 {
4323 if (m_opts.no_nodeid_checks)
4324 {
4325 if (nodeid == 0)
4326 {
4327 error_string.appfmt("no-nodeid-checks set in management server. "
4328 "node id must be set explicitly in connectstring");
4329 error_code = NDB_MGM_ALLOCID_CONFIG_MISMATCH;
4330 return false;
4331 }
4332 return true;
4333 }
4334 /* Don't allow allocation of this ndb_mgmd's nodeid */
4335 assert(_ownNodeId);
4336 if (nodeid == _ownNodeId)
4337 {
4338 // Fatal error
4339 error_code= NDB_MGM_ALLOCID_CONFIG_MISMATCH;
4340 if (type != NDB_MGM_NODE_TYPE_MGM)
4341 {
4342 /**
4343 * be backwards compatile wrt error messages
4344 */
4345 BaseString type_string, type_c_string;
4346 const char *alias, *str;
4347 alias= ndb_mgm_get_node_type_alias_string(type, &str);
4348 type_string.assfmt("%s(%s)", alias, str);
4349 alias= ndb_mgm_get_node_type_alias_string(NDB_MGM_NODE_TYPE_MGM, &str);
4350 type_c_string.assfmt("%s(%s)", alias, str);
4351 error_string.appfmt("Id %d configured as %s, connect attempted as %s.",
4352 nodeid, type_c_string.c_str(),
4353 type_string.c_str());
4354 }
4355 else
4356 {
4357 error_string.appfmt("Id %d is already allocated by this ndb_mgmd",
4358 nodeid);
4359 }
4360 return false;
4361 }
4362
4363 /* Make sure that config is confirmed before allocating nodeid */
4364 Uint32 timeout_ms = timeout_s * 1000;
4365 {
4366 const NDB_TICKS start = NdbTick_getCurrentTicks();
4367 BaseString getconfig_message;
4368 while (!m_config_manager->get_packed_config(type,
4369 0,
4370 getconfig_message,
4371 true,
4372 nodeid))
4373 {
4374 const NDB_TICKS now = NdbTick_getCurrentTicks();
4375 if (NdbTick_Elapsed(start,now).milliSec() > timeout_ms)
4376 {
4377 error_code = NDB_MGM_ALLOCID_ERROR;
4378 error_string.append("Unable to allocate nodeid as configuration"
4379 " not yet confirmed");
4380 return false;
4381 }
4382
4383 NdbSleep_MilliSleep(20);
4384 }
4385 }
4386
4387 /* Find possible nodeids */
4388 Vector<PossibleNode> nodes;
4389 if (find_node_type(nodeid, type, client_addr,
4390 nodes, error_code, error_string))
4391 return false;
4392
4393 // Print list of possible nodes
4394 for (unsigned i = 0; i < nodes.size(); i++)
4395 {
4396 const PossibleNode& node = nodes[i];
4397 g_eventLogger->debug(" [%u]: %u, '%s', %d",
4398 (unsigned)i, node.id,
4399 node.host.c_str(),
4400 node.exact_match);
4401 }
4402
4403 // nodes.size() == 0 handled inside find_node_type
4404 DBUG_ASSERT(nodes.size() != 0);
4405
4406 if (type == NDB_MGM_NODE_TYPE_MGM && nodes.size() > 1)
4407 {
4408 // mgmt server may only have one match
4409 error_string.appfmt("Ambiguous node id's %d and %d. "
4410 "Suggest specifying node id in connectstring, "
4411 "or specifying unique host names in config file.",
4412 nodes[0].id, nodes[1].id);
4413 error_code= NDB_MGM_ALLOCID_CONFIG_MISMATCH;
4414 return false;
4415 }
4416
4417 /* Check timeout of nodeid reservations for NDB */
4418 if (type == NDB_MGM_NODE_TYPE_NDB)
4419 {
4420 const NDB_TICKS now = NdbTick_getCurrentTicks();
4421 for (unsigned i = 0; i < nodes.size(); i++)
4422 {
4423 const NodeId ndb_nodeid = nodes[i].id;
4424 {
4425 Guard g(m_reserved_nodes_mutex);
4426 if (!m_reserved_nodes.has_timedout(ndb_nodeid, now))
4427 continue;
4428 }
4429
4430 // Found a timedout reservation
4431 if (theFacade->ext_isConnected(ndb_nodeid))
4432 continue; // Still connected, ignore the timeout
4433
4434 g_eventLogger->warning("Found timedout nodeid reservation for %u, " \
4435 "releasing it", ndb_nodeid);
4436
4437 // Clear the reservation
4438 release_local_nodeid_reservation(ndb_nodeid);
4439 }
4440 }
4441
4442 const int try_alloc_rc =
4443 try_alloc_from_list(nodeid,
4444 type,
4445 timeout_ms,
4446 nodes,
4447 error_code,
4448 error_string);
4449 if (try_alloc_rc == 0)
4450 {
4451 if (type == NDB_MGM_NODE_TYPE_NDB)
4452 {
4453 /* Be ready to accept connections from this node */
4454 theFacade->ext_doConnect(nodeid);
4455 }
4456
4457 return true;
4458 }
4459
4460
4461 if (try_alloc_rc == -1)
4462 {
4463 /*
4464 there are nodes with correct type available but
4465 allocation failed for some reason
4466 */
4467 if (nodeid)
4468 {
4469 if (error_code == 0)
4470 {
4471 error_string.appfmt("Id %d already allocated by another node.",
4472 nodeid);
4473 }
4474 }
4475 else
4476 {
4477 if (error_code == 0)
4478 {
4479 const char *alias, *str;
4480 alias = ndb_mgm_get_node_type_alias_string(type, &str);
4481 error_string.appfmt("No free node id found for %s(%s).",
4482 alias,
4483 str);
4484 }
4485 }
4486 error_code = NDB_MGM_ALLOCID_ERROR;
4487 }
4488 else
4489 {
4490 assert(try_alloc_rc == -2); /* No contact with cluster */
4491 error_string.assfmt("Cluster not ready for nodeid allocation.");
4492 }
4493 return false;
4494 }
4495
4496
4497 bool
alloc_node_id(NodeId & nodeid,enum ndb_mgm_node_type type,const struct sockaddr * client_addr,int & error_code,BaseString & error_string,bool log_event,Uint32 timeout_s)4498 MgmtSrvr::alloc_node_id(NodeId& nodeid,
4499 enum ndb_mgm_node_type type,
4500 const struct sockaddr* client_addr,
4501 int& error_code, BaseString& error_string,
4502 bool log_event,
4503 Uint32 timeout_s)
4504 {
4505 char addr_buf[NDB_ADDR_STRLEN];
4506 struct in_addr conn_addr = ((sockaddr_in*)client_addr)->sin_addr;
4507 const char* type_str = ndb_mgm_get_node_type_string(type);
4508 char* addr_str = Ndb_inet_ntop(AF_INET,
4509 static_cast<void*>(&conn_addr),
4510 addr_buf,
4511 sizeof(addr_buf));
4512
4513 error_code = 0;
4514 g_eventLogger->debug("Trying to allocate nodeid for %s" \
4515 "(nodeid: %u, type: %s)",
4516 addr_str, (unsigned)nodeid, type_str);
4517
4518
4519 if (alloc_node_id_impl(nodeid, type, client_addr,
4520 error_code, error_string,
4521 timeout_s))
4522 {
4523 g_eventLogger->info("Nodeid %u allocated for %s at %s",
4524 (unsigned)nodeid, type_str, addr_str);
4525 return true;
4526 }
4527
4528 if (!log_event)
4529 return false;
4530
4531 g_eventLogger->warning("Unable to allocate nodeid for %s at %s. "
4532 "Returned error: '%s'",
4533 type_str, addr_str, error_string.c_str());
4534
4535 return false;
4536 }
4537
4538
4539 bool
getNextNodeId(NodeId * nodeId,enum ndb_mgm_node_type type) const4540 MgmtSrvr::getNextNodeId(NodeId * nodeId, enum ndb_mgm_node_type type) const
4541 {
4542 NodeId tmp = * nodeId;
4543
4544 tmp++;
4545 while(nodeTypes[tmp] != type && tmp < MAX_NODES)
4546 tmp++;
4547
4548 if(tmp == MAX_NODES){
4549 return false;
4550 }
4551
4552 * nodeId = tmp;
4553 return true;
4554 }
4555
4556 #include "Services.hpp"
4557
4558 void
eventReport(const Uint32 * theSignalData,Uint32 len,const Uint32 * theData)4559 MgmtSrvr::eventReport(const Uint32 *theSignalData,
4560 Uint32 len,
4561 const Uint32 *theData)
4562 {
4563 const EventReport * const eventReport = (EventReport *)&theSignalData[0];
4564
4565 NodeId nodeId = eventReport->getNodeId();
4566 Ndb_logevent_type type = eventReport->getEventType();
4567 // Log event
4568 g_eventLogger->log(type, theData, len, nodeId,
4569 &m_event_listner[0].m_logLevel);
4570 m_event_listner.log(type, theData, len, nodeId);
4571 }
4572
4573 /***************************************************************************
4574 * Backup
4575 ***************************************************************************/
4576
4577 int
startBackup(Uint32 & backupId,int waitCompleted,Uint32 input_backupId,Uint32 backuppoint)4578 MgmtSrvr::startBackup(Uint32& backupId, int waitCompleted, Uint32 input_backupId, Uint32 backuppoint)
4579 {
4580 SignalSender ss(theFacade);
4581 ss.lock(); // lock will be released on exit
4582
4583 NodeId nodeId = m_master_node;
4584 if (okToSendTo(nodeId, false) != 0)
4585 {
4586 bool next;
4587 nodeId = m_master_node = 0;
4588 while((next = getNextNodeId(&nodeId, NDB_MGM_NODE_TYPE_NDB)) == true &&
4589 okToSendTo(nodeId, false) != 0);
4590 if(!next)
4591 return NO_CONTACT_WITH_DB_NODES;
4592 }
4593
4594 SimpleSignal ssig;
4595 BackupReq* req = CAST_PTR(BackupReq, ssig.getDataPtrSend());
4596 /*
4597 * Single-threaded backup. Set instance key 1. In the kernel
4598 * this maps to main instance 0 or worker instance 1 (if MT LQH).
4599 */
4600 BlockNumber backupBlockNo = numberToBlock(BACKUP, 1);
4601 if(input_backupId > 0)
4602 {
4603 ssig.set(ss, TestOrd::TraceAPI, backupBlockNo, GSN_BACKUP_REQ,
4604 BackupReq::SignalLength);
4605 req->inputBackupId = input_backupId;
4606 }
4607 else
4608 ssig.set(ss, TestOrd::TraceAPI, backupBlockNo, GSN_BACKUP_REQ,
4609 BackupReq::SignalLength - 1);
4610
4611 req->senderData = 19;
4612 req->backupDataLen = 0;
4613 assert(waitCompleted < 3);
4614 req->flags = waitCompleted & 0x3;
4615 if(backuppoint == 1)
4616 req->flags |= BackupReq::USE_UNDO_LOG;
4617
4618 int do_send = 1;
4619 while (1) {
4620 if (do_send)
4621 {
4622 if (ss.sendSignal(nodeId, &ssig) != SEND_OK) {
4623 return SEND_OR_RECEIVE_FAILED;
4624 }
4625 if (waitCompleted == 0)
4626 return 0;
4627 do_send = 0;
4628 }
4629 SimpleSignal *signal = ss.waitFor();
4630
4631 int gsn = signal->readSignalNumber();
4632 switch (gsn) {
4633 case GSN_BACKUP_CONF:{
4634 const BackupConf * const conf =
4635 CAST_CONSTPTR(BackupConf, signal->getDataPtr());
4636 #ifdef VM_TRACE
4637 ndbout_c("Backup(%d) master is %d", conf->backupId,
4638 refToNode(signal->header.theSendersBlockRef));
4639 #endif
4640 backupId = conf->backupId;
4641 if (waitCompleted == 1)
4642 return 0;
4643 // wait for next signal
4644 break;
4645 }
4646 case GSN_BACKUP_COMPLETE_REP:{
4647 const BackupCompleteRep * const rep =
4648 CAST_CONSTPTR(BackupCompleteRep, signal->getDataPtr());
4649 #ifdef VM_TRACE
4650 ndbout_c("Backup(%d) completed", rep->backupId);
4651 #endif
4652 backupId = rep->backupId;
4653 return 0;
4654 }
4655 case GSN_BACKUP_REF:{
4656 const BackupRef * const ref =
4657 CAST_CONSTPTR(BackupRef, signal->getDataPtr());
4658 if(ref->errorCode == BackupRef::IAmNotMaster){
4659 m_master_node = nodeId = refToNode(ref->masterRef);
4660 #ifdef VM_TRACE
4661 ndbout_c("I'm not master resending to %d", nodeId);
4662 #endif
4663 do_send = 1; // try again
4664 if (!getNodeInfo(nodeId).m_alive)
4665 m_master_node = nodeId = 0;
4666 continue;
4667 }
4668 return ref->errorCode;
4669 }
4670 case GSN_BACKUP_ABORT_REP:{
4671 const BackupAbortRep * const rep =
4672 CAST_CONSTPTR(BackupAbortRep, signal->getDataPtr());
4673 #ifdef VM_TRACE
4674 ndbout_c("Backup %d aborted", rep->backupId);
4675 #endif
4676 return rep->reason;
4677 }
4678 case GSN_NF_COMPLETEREP:{
4679 const NFCompleteRep * const rep =
4680 CAST_CONSTPTR(NFCompleteRep, signal->getDataPtr());
4681 #ifdef VM_TRACE
4682 ndbout_c("Node %d fail completed", rep->failedNodeId);
4683 #endif
4684 if (rep->failedNodeId == nodeId ||
4685 waitCompleted == 1)
4686 return 1326;
4687 // wait for next signal
4688 // master node will report aborted backup
4689 break;
4690 }
4691 case GSN_NODE_FAILREP:{
4692 const NodeFailRep * const rep =
4693 CAST_CONSTPTR(NodeFailRep, signal->getDataPtr());
4694 Uint32 len = NodeFailRep::getNodeMaskLength(signal->getLength());
4695 const Uint32* nbm;
4696 if (signal->header.m_noOfSections >= 1)
4697 {
4698 assert (len == 0);
4699 nbm = signal->ptr[0].p;
4700 len = signal->ptr[0].sz;
4701 }
4702 else
4703 {
4704 assert(len == NodeBitmask::Size); // only full length in ndbapi
4705 nbm = rep->theAllNodes;
4706 }
4707
4708 if (BitmaskImpl::safe_get(len, nbm, nodeId) ||
4709 waitCompleted == 1)
4710 return 1326;
4711 // wait for next signal
4712 // master node will report aborted backup
4713 break;
4714 }
4715 case GSN_API_REGCONF:
4716 case GSN_TAKE_OVERTCCONF:
4717 case GSN_CONNECT_REP:
4718 continue;
4719 default:
4720 report_unknown_signal(signal);
4721 return SEND_OR_RECEIVE_FAILED;
4722 }
4723 }
4724 }
4725
4726 int
abortBackup(Uint32 backupId)4727 MgmtSrvr::abortBackup(Uint32 backupId)
4728 {
4729 SignalSender ss(theFacade);
4730 ss.lock(); // lock will be released on exit
4731
4732 bool next;
4733 NodeId nodeId = 0;
4734 while((next = getNextNodeId(&nodeId, NDB_MGM_NODE_TYPE_NDB)) == true &&
4735 getNodeInfo(nodeId).m_alive == false);
4736
4737 if(!next){
4738 return NO_CONTACT_WITH_DB_NODES;
4739 }
4740
4741 SimpleSignal ssig;
4742
4743 AbortBackupOrd* ord = CAST_PTR(AbortBackupOrd, ssig.getDataPtrSend());
4744 /*
4745 * Single-threaded backup. Set instance key 1. In the kernel
4746 * this maps to main instance 0 or worker instance 1 (if MT LQH).
4747 */
4748 BlockNumber backupBlockNo = numberToBlock(BACKUP, 1);
4749 ssig.set(ss, TestOrd::TraceAPI, backupBlockNo, GSN_ABORT_BACKUP_ORD,
4750 AbortBackupOrd::SignalLength);
4751
4752 ord->requestType = AbortBackupOrd::ClientAbort;
4753 ord->senderData = 19;
4754 ord->backupId = backupId;
4755
4756 return ss.sendSignal(nodeId, &ssig) == SEND_OK ? 0 : SEND_OR_RECEIVE_FAILED;
4757 }
4758
4759
4760 int
setDbParameter(int node,int param,const char * value,BaseString & msg)4761 MgmtSrvr::setDbParameter(int node, int param, const char * value,
4762 BaseString& msg)
4763 {
4764
4765 Guard g(m_local_config_mutex);
4766
4767 /**
4768 * Check parameter
4769 */
4770 ConfigIter iter(m_local_config, CFG_SECTION_NODE);
4771 if(iter.first() != 0){
4772 msg.assign("Unable to find node section (iter.first())");
4773 return -1;
4774 }
4775
4776 Uint32 type = NODE_TYPE_DB + 1;
4777 if(node != 0){
4778 // Set parameter only in the specified node
4779 if(iter.find(CFG_NODE_ID, node) != 0){
4780 msg.assign("Unable to find node (iter.find())");
4781 return -1;
4782 }
4783 if(iter.get(CFG_TYPE_OF_SECTION, &type) != 0){
4784 msg.assign("Unable to get node type(iter.get(CFG_TYPE_OF_SECTION))");
4785 return -1;
4786 }
4787 } else {
4788 // Set parameter in all DB nodes
4789 do {
4790 if(iter.get(CFG_TYPE_OF_SECTION, &type) != 0){
4791 msg.assign("Unable to get node type(iter.get(CFG_TYPE_OF_SECTION))");
4792 return -1;
4793 }
4794 if(type == NODE_TYPE_DB)
4795 break;
4796 } while(iter.next() == 0);
4797 }
4798
4799 if(type != NODE_TYPE_DB){
4800 msg.assfmt("Invalid node type or no such node (%d %d)",
4801 type, NODE_TYPE_DB);
4802 return -1;
4803 }
4804
4805 int p_type;
4806 unsigned val_32;
4807 Uint64 val_64 = 0;
4808 const char * val_char;
4809 do {
4810 p_type = 0;
4811 if(iter.get(param, &val_32) == 0){
4812 val_32 = atoi(value);
4813 break;
4814 }
4815
4816 p_type++;
4817 if(iter.get(param, &val_64) == 0){
4818 val_64 = my_strtoll(value, 0, 10);
4819 break;
4820 }
4821 p_type++;
4822 if(iter.get(param, &val_char) == 0){
4823 val_char = value;
4824 break;
4825 }
4826 msg.assign("Could not get parameter");
4827 return -1;
4828 } while(0);
4829
4830 bool res = false;
4831 do {
4832 int ret = iter.get(CFG_TYPE_OF_SECTION, &type);
4833 assert(ret == 0);
4834
4835 if(type != NODE_TYPE_DB)
4836 continue;
4837
4838 Uint32 node;
4839 ret = iter.get(CFG_NODE_ID, &node);
4840 assert(ret == 0);
4841
4842 ConfigValues::Iterator i2(m_local_config->m_configValues->m_config,
4843 iter.m_config);
4844 switch(p_type){
4845 case 0:
4846 res = i2.set(param, val_32);
4847 ndbout_c("Updating node %d param: %d to %d", node, param, val_32);
4848 break;
4849 case 1:
4850 res = i2.set(param, val_64);
4851 ndbout_c("Updating node %d param: %d to %u", node, param, val_32);
4852 break;
4853 case 2:
4854 res = i2.set(param, val_char);
4855 ndbout_c("Updating node %d param: %d to %s", node, param, val_char);
4856 break;
4857 default:
4858 require(false);
4859 }
4860 require(res);
4861 } while(node == 0 && iter.next() == 0);
4862
4863 msg.assign("Success");
4864 return 0;
4865 }
4866
4867
4868 int
setConnectionDbParameter(int node1,int node2,int param,int value,BaseString & msg)4869 MgmtSrvr::setConnectionDbParameter(int node1, int node2,
4870 int param, int value,
4871 BaseString& msg)
4872 {
4873 DBUG_ENTER("MgmtSrvr::setConnectionDbParameter");
4874 DBUG_PRINT("enter", ("node1: %d, node2: %d, param: %d, value: %d",
4875 node1, node2, param, value));
4876
4877 // This function only supports setting dynamic ports
4878 if (param != CFG_CONNECTION_SERVER_PORT)
4879 {
4880 msg.assign("Only param CFG_CONNECTION_SERVER_PORT can be set");
4881 DBUG_RETURN(-1);
4882 }
4883
4884 if (!m_config_manager->set_dynamic_port(node1, node2, value, msg))
4885 DBUG_RETURN(-1);
4886
4887 DBUG_PRINT("exit", ("Set parameter(%d) to %d for %d -> %d",
4888 param, value, node1, node2));
4889 DBUG_RETURN(1);
4890 }
4891
4892
setDynamicPorts(int node,DynPortSpec ports[],unsigned num_ports,BaseString & msg)4893 bool MgmtSrvr::setDynamicPorts(int node, DynPortSpec ports[],
4894 unsigned num_ports, BaseString& msg)
4895 {
4896 return m_config_manager->set_dynamic_ports(node, ports, num_ports, msg);
4897 }
4898
4899
4900 int
getConnectionDbParameter(int node1,int node2,int param,int * value,BaseString & msg)4901 MgmtSrvr::getConnectionDbParameter(int node1, int node2,
4902 int param, int *value,
4903 BaseString& msg)
4904 {
4905 DBUG_ENTER("MgmtSrvr::getConnectionDbParameter");
4906 DBUG_PRINT("enter", ("node1: %d, node2: %d, param: %d",
4907 node1, node2, param));
4908
4909 // This function only supports asking about dynamic ports
4910 if (param != CFG_CONNECTION_SERVER_PORT)
4911 {
4912 msg.assign("Only param CFG_CONNECTION_SERVER_PORT can be retrieved");
4913 DBUG_RETURN(-1);
4914 }
4915
4916 if (!m_config_manager->get_dynamic_port(node1, node2, value, msg))
4917 DBUG_RETURN(-1);
4918
4919 DBUG_PRINT("exit", ("Return parameter(%d): %u for %d -> %d, msg: %s",
4920 param, *value, node1, node2, msg.c_str()));
4921 DBUG_RETURN(1);
4922 }
4923
4924
4925 bool
transporter_connect(NDB_SOCKET_TYPE sockfd,BaseString & msg,bool & close_with_reset)4926 MgmtSrvr::transporter_connect(NDB_SOCKET_TYPE sockfd,
4927 BaseString& msg,
4928 bool& close_with_reset)
4929 {
4930 DBUG_ENTER("MgmtSrvr::transporter_connect");
4931 TransporterRegistry* tr= theFacade->get_registry();
4932 bool dummy_log_failure = false;
4933 if (!tr->connect_server(sockfd, msg, close_with_reset, dummy_log_failure))
4934 DBUG_RETURN(false);
4935
4936 /**
4937 * TransporterRegistry::update_connections() is responsible
4938 * for doing the final step of bringing the connection into
4939 * CONNECTED state when it detects it 'isConnected()'.
4940 * This is required due to all such state changes has to
4941 * be synchroniced with ::performReceive().
4942 * To speed up CONNECTED detection, we request it to
4943 * happen ASAP. (There is no guarantee when it happen though)
4944 */
4945 theFacade->request_connection_check();
4946 DBUG_RETURN(true);
4947 }
4948
4949
connect_to_self()4950 bool MgmtSrvr::connect_to_self()
4951 {
4952 BaseString buf;
4953 NdbMgmHandle mgm_handle= ndb_mgm_create_handle();
4954
4955 buf.assfmt("%s:%u",
4956 m_opts.bind_address ? m_opts.bind_address : "localhost",
4957 m_port);
4958 ndb_mgm_set_connectstring(mgm_handle, buf.c_str());
4959
4960 if(ndb_mgm_connect(mgm_handle, 0, 0, 0) < 0)
4961 {
4962 g_eventLogger->warning("%d %s",
4963 ndb_mgm_get_latest_error(mgm_handle),
4964 ndb_mgm_get_latest_error_desc(mgm_handle));
4965 ndb_mgm_destroy_handle(&mgm_handle);
4966 return false;
4967 }
4968 // TransporterRegistry now owns the handle and will destroy it.
4969 theFacade->get_registry()->set_mgm_handle(mgm_handle);
4970
4971 return true;
4972 }
4973
4974
4975 bool
change_config(Config & new_config,BaseString & msg)4976 MgmtSrvr::change_config(Config& new_config, BaseString& msg)
4977 {
4978 SignalSender ss(theFacade);
4979 ss.lock();
4980
4981 NodeBitmask mgm_nodes;
4982 {
4983 Guard g(m_local_config_mutex);
4984 m_local_config->get_nodemask(mgm_nodes, NDB_MGM_NODE_TYPE_MGM);
4985 }
4986
4987 NodeId nodeId= ss.find_confirmed_node(mgm_nodes);
4988 if (nodeId == 0)
4989 {
4990 msg = "INTERNAL ERROR Could not find any mgmd!";
4991 return false;
4992 }
4993
4994 bool v2;
4995 {
4996 const trp_node node = ss.getNodeInfo(nodeId);
4997 v2 = ndb_config_version_v2(node.m_info.m_version);
4998 }
4999 SimpleSignal ssig;
5000 UtilBuffer buf;
5001 UtilBuffer *buf_ptr = &buf;
5002 new_config.pack(buf, v2);
5003 ssig.ptr[0].p = (Uint32*)buf.get_data();
5004 ssig.ptr[0].sz = (buf.length() + 3) / 4;
5005 ssig.header.m_noOfSections = 1;
5006
5007 ConfigChangeReq *req= CAST_PTR(ConfigChangeReq, ssig.getDataPtrSend());
5008 req->length = buf.length();
5009
5010 if (ss.sendFragmentedSignal(nodeId, ssig,
5011 MGM_CONFIG_MAN, GSN_CONFIG_CHANGE_REQ,
5012 ConfigChangeReq::SignalLength) != 0)
5013 {
5014 msg.assfmt("Could not start configuration change, send to "
5015 "node %d failed", nodeId);
5016 return false;
5017 }
5018 mgm_nodes.clear(nodeId);
5019
5020 bool done = false;
5021 while(!done)
5022 {
5023 SimpleSignal *signal= ss.waitFor();
5024
5025 switch(signal->readSignalNumber()){
5026 case GSN_CONFIG_CHANGE_CONF:
5027 done= true;
5028 break;
5029 case GSN_CONFIG_CHANGE_REF:
5030 {
5031 const ConfigChangeRef * const ref =
5032 CAST_CONSTPTR(ConfigChangeRef, signal->getDataPtr());
5033 g_eventLogger->debug("Got CONFIG_CHANGE_REF, error: %d", ref->errorCode);
5034 switch(ref->errorCode)
5035 {
5036 case ConfigChangeRef::NotMaster:
5037 {
5038 // Retry with next node if any
5039 NodeId nodeId= ss.find_confirmed_node(mgm_nodes);
5040 if (nodeId == 0)
5041 {
5042 msg = "INTERNAL ERROR Could not find any mgmd!";
5043 return false;
5044 }
5045 {
5046 const trp_node node = ss.getNodeInfo(nodeId);
5047 bool v2_new = ndb_config_version_v2(node.m_info.m_version);
5048 if (v2 != v2_new)
5049 {
5050 /**
5051 * Free old buffer and create a new one.
5052 */
5053 delete buf_ptr;
5054 buf_ptr = new (buf_ptr) UtilBuffer;
5055 require(new_config.pack(buf, v2_new));
5056 v2 = v2_new;
5057 }
5058 }
5059 req->length = buf.length();
5060 ssig.ptr[0].p = (Uint32*)buf.get_data();
5061 ssig.ptr[0].sz = (buf.length() + 3) / 4;
5062 ssig.header.m_noOfSections = 1;
5063 if (ss.sendFragmentedSignal(nodeId, ssig,
5064 MGM_CONFIG_MAN, GSN_CONFIG_CHANGE_REQ,
5065 ConfigChangeReq::SignalLength) != 0)
5066 {
5067 msg.assfmt("Could not start configuration change, send to "
5068 "node %d failed", nodeId);
5069 return false;
5070 }
5071 mgm_nodes.clear(nodeId);
5072 break;
5073 }
5074
5075 default:
5076 msg = ConfigChangeRef::errorMessage(ref->errorCode);
5077 return false;
5078 }
5079
5080 break;
5081 }
5082
5083 case GSN_API_REGCONF:
5084 case GSN_TAKE_OVERTCCONF:
5085 case GSN_CONNECT_REP:
5086 // Ignore;
5087 break;
5088
5089
5090 case GSN_NODE_FAILREP:
5091 // ignore, NF_COMPLETEREP will come
5092 break;
5093
5094 case GSN_NF_COMPLETEREP:
5095 {
5096 NodeId nodeId = refToNode(signal->header.theSendersBlockRef);
5097 msg.assign("Node %d failed during configuration change", nodeId);
5098 return false;
5099 break;
5100 }
5101
5102 default:
5103 report_unknown_signal(signal);
5104 return false;
5105
5106 }
5107 }
5108
5109 g_eventLogger->info("Config change completed");
5110
5111 return true;
5112 }
5113
5114
5115 void
print_config(const char * section_filter,NodeId nodeid_filter,const char * param_filter,NdbOut & out)5116 MgmtSrvr::print_config(const char* section_filter, NodeId nodeid_filter,
5117 const char* param_filter,
5118 NdbOut& out)
5119 {
5120 Guard g(m_local_config_mutex);
5121 m_local_config->print(section_filter, nodeid_filter,
5122 param_filter, out);
5123 }
5124
5125
5126 bool
reload_config(const char * config_filename,bool mycnf,BaseString & msg)5127 MgmtSrvr::reload_config(const char* config_filename, bool mycnf,
5128 BaseString& msg)
5129 {
5130 if (config_filename && mycnf)
5131 {
5132 msg = "ERROR: Both mycnf and config_filename is not supported";
5133 return false;
5134 }
5135
5136 if (config_filename)
5137 {
5138 if (m_opts.mycnf)
5139 {
5140 msg.assfmt("ERROR: Can't switch to use config.ini '%s' when "
5141 "node was started from my.cnf", config_filename);
5142 return false;
5143 }
5144 }
5145 else
5146 {
5147 if (mycnf)
5148 {
5149 // Reload from my.cnf
5150 if (!m_opts.mycnf)
5151 {
5152 if (m_opts.config_filename)
5153 {
5154 msg.assfmt("ERROR: Can't switch to use my.cnf when "
5155 "node was started from '%s'", m_opts.config_filename);
5156 return false;
5157 }
5158 }
5159 }
5160 else
5161 {
5162 /* No config file name supplied and not told to use mycnf */
5163 if (m_opts.config_filename)
5164 {
5165 g_eventLogger->info("No config file name supplied, using '%s'",
5166 m_opts.config_filename);
5167 config_filename = m_opts.config_filename;
5168 }
5169 else
5170 {
5171 msg = "ERROR: Neither config file name or mycnf available";
5172 return false;
5173 }
5174 }
5175 }
5176
5177 Config* new_conf_ptr;
5178 if ((new_conf_ptr= ConfigManager::load_config(config_filename,
5179 mycnf, msg)) == NULL)
5180 return false;
5181 Config new_conf(new_conf_ptr);
5182
5183 {
5184 Guard g(m_local_config_mutex);
5185
5186 /* Copy the necessary values from old to new config */
5187 if (!new_conf.setGeneration(m_local_config->getGeneration()) ||
5188 !new_conf.setName(m_local_config->getName()) ||
5189 !new_conf.setPrimaryMgmNode(m_local_config->getPrimaryMgmNode()))
5190 {
5191 msg = "Failed to initialize reloaded config";
5192 return false;
5193 }
5194 }
5195
5196 if (!change_config(new_conf, msg))
5197 return false;
5198 return true;
5199 }
5200
5201 void
show_variables(NdbOut & out)5202 MgmtSrvr::show_variables(NdbOut& out)
5203 {
5204 out << "daemon: " << yes_no(m_opts.daemon) << endl;
5205 out << "non_interactive: " << yes_no(m_opts.non_interactive) << endl;
5206 out << "interactive: " << yes_no(m_opts.interactive) << endl;
5207 out << "config_filename: " << str_null(m_opts.config_filename) << endl;
5208 out << "mycnf: " << yes_no(m_opts.mycnf) << endl;
5209 out << "bind_address: " << str_null(m_opts.bind_address) << endl;
5210 out << "no_nodeid_checks: " << yes_no(m_opts.no_nodeid_checks) << endl;
5211 out << "print_full_config: " << yes_no(m_opts.print_full_config) << endl;
5212 out << "configdir: " << str_null(m_opts.configdir) << endl;
5213 out << "config_cache: " << yes_no(m_opts.config_cache) << endl;
5214 out << "verbose: " << yes_no(m_opts.verbose) << endl;
5215 out << "reload: " << yes_no(m_opts.reload) << endl;
5216
5217 out << "nodeid: " << _ownNodeId << endl;
5218 out << "blocknumber: " << hex <<_blockNumber << endl;
5219 out << "own_reference: " << hex << _ownReference << endl;
5220 out << "port: " << m_port << endl;
5221 out << "need_restart: " << m_need_restart << endl;
5222 out << "is_stop_thread: " << _isStopThread << endl;
5223 out << "log_level_thread_sleep: " << _logLevelThreadSleep << endl;
5224 out << "master_node: " << m_master_node << endl;
5225 }
5226
5227 void
make_sync_req(SignalSender & ss,Uint32 nodeId)5228 MgmtSrvr::make_sync_req(SignalSender& ss, Uint32 nodeId)
5229 {
5230 /**
5231 * This subroutine is used to make a async request(error insert/dump)
5232 * "more" syncronous, i.e increasing the likelyhood that
5233 * the async request has really reached the destination
5234 * before returning to the api
5235 *
5236 * I.e it's a work-around...
5237 *
5238 */
5239 SimpleSignal ssig;
5240 SyncReq* req = CAST_PTR(SyncReq, ssig.getDataPtrSend());
5241 req->senderRef = ss.getOwnRef();
5242 req->senderData = 12;
5243 req->prio = 1; // prio b
5244 ssig.set(ss,TestOrd::TraceAPI, CMVMI, GSN_SYNC_REQ, SyncReq::SignalLength);
5245
5246 if (ss.sendSignal(nodeId, &ssig) != SEND_OK)
5247 {
5248 return;
5249 }
5250
5251 while (true)
5252 {
5253 SimpleSignal *signal = ss.waitFor();
5254
5255 int gsn = signal->readSignalNumber();
5256 switch (gsn) {
5257 case GSN_SYNC_REF:
5258 case GSN_SYNC_CONF:
5259 return;
5260
5261 case GSN_NF_COMPLETEREP:{
5262 const NFCompleteRep * const rep =
5263 CAST_CONSTPTR(NFCompleteRep, signal->getDataPtr());
5264 if (rep->failedNodeId == nodeId)
5265 return;
5266 break;
5267 }
5268
5269 case GSN_NODE_FAILREP:{
5270 const NodeFailRep * const rep =
5271 CAST_CONSTPTR(NodeFailRep, signal->getDataPtr());
5272 Uint32 len = NodeFailRep::getNodeMaskLength(signal->getLength());
5273 const Uint32* nbm;
5274 if (signal->header.m_noOfSections >= 1)
5275 {
5276 assert (len == 0);
5277 nbm = signal->ptr[0].p;
5278 len = signal->ptr[0].sz;
5279 }
5280 else
5281 {
5282 assert(len == NodeBitmask::Size); // only full length in ndbapi
5283 nbm = rep->theAllNodes;
5284 }
5285
5286 if (BitmaskImpl::safe_get(len, nbm, nodeId))
5287 return;
5288 break;
5289 }
5290 case GSN_API_REGCONF:
5291 case GSN_TAKE_OVERTCCONF:
5292 case GSN_CONNECT_REP:
5293 break;
5294 default:
5295 return;
5296 }
5297 }
5298 }
5299
5300
5301 bool
request_events(NdbNodeBitmask nodes,Uint32 reports_per_node,Uint32 dump_type,Vector<SimpleSignal> & events)5302 MgmtSrvr::request_events(NdbNodeBitmask nodes, Uint32 reports_per_node,
5303 Uint32 dump_type,
5304 Vector<SimpleSignal>& events)
5305 {
5306 int nodes_counter[MAX_NDB_NODES];
5307 #ifndef NDEBUG
5308 NdbNodeBitmask save = nodes;
5309 #endif
5310 SignalSender ss(theFacade);
5311 ss.lock();
5312
5313 // Send the dump command to all requested NDB nodes
5314 const bool all = nodes.isclear();
5315 for (int i = 1; i < MAX_NDB_NODES; i++)
5316 {
5317 // Check if node should be involved
5318 if (!all && !nodes.get(i))
5319 continue;
5320
5321 // Only request from confirmed DB nodes
5322 const trp_node node = ss.getNodeInfo(i);
5323 if (node.m_info.getType() != NodeInfo::DB ||
5324 !node.is_confirmed())
5325 {
5326 nodes.clear(i);
5327 continue;
5328 }
5329
5330 SimpleSignal ssig;
5331 DumpStateOrd * const dumpOrd = (DumpStateOrd*)ssig.getDataPtrSend();
5332
5333 dumpOrd->args[0] = dump_type;
5334 dumpOrd->args[1] = ss.getOwnRef(); // Return to sender
5335
5336 if (ss.sendSignal(i, ssig, CMVMI, GSN_DUMP_STATE_ORD, 2) == SEND_OK)
5337 {
5338 nodes.set(i);
5339 nodes_counter[i] = (int)reports_per_node;
5340 }
5341 }
5342
5343
5344 while (true)
5345 {
5346 // Check if all nodes are done
5347 if (nodes.isclear())
5348 break;
5349
5350 SimpleSignal *signal = ss.waitFor();
5351 switch (signal->readSignalNumber()) {
5352 case GSN_EVENT_REP:{
5353 /**
5354 * This EVENT_REP receives all infoEvent and eventLog messages that
5355 * ARE generated through a DUMP command.
5356 */
5357 const NodeId nodeid = refToNode(signal->header.theSendersBlockRef);
5358 const EventReport * const event =
5359 (const EventReport*)signal->getDataPtr();
5360
5361 if (!nodes.get(nodeid))
5362 {
5363 // The reporting node was not expected
5364 #ifndef NDEBUG
5365 ndbout_c("nodeid: %u", nodeid);
5366 ndbout_c("save: %s", BaseString::getPrettyText(save).c_str());
5367 #endif
5368 assert(false);
5369 return false;
5370 }
5371
5372 if (event->getEventType() == NDB_LE_SavedEvent &&
5373 signal->getDataPtr()[1] == 0)
5374 {
5375 nodes_counter[nodeid] = 1;
5376 }
5377 else
5378 {
5379 // Save signal
5380 events.push_back(SimpleSignal(*signal));
5381 }
5382
5383 // Check if node is done
5384 nodes_counter[nodeid]--;
5385 if (nodes_counter[nodeid] == 0)
5386 nodes.clear(nodeid);
5387
5388 break;
5389 }
5390
5391 case GSN_NODE_FAILREP:{
5392 const NodeFailRep * const rep =
5393 (const NodeFailRep*)signal->getDataPtr();
5394 const Uint32* theNodes = NULL;
5395 if (signal->header.m_noOfSections >= 1)
5396 {
5397 theNodes = signal->ptr[0].p;
5398 }
5399 else
5400 {
5401 theNodes = rep->theNodes;
5402 }
5403 // only care about data-nodes
5404 for (NodeId i = 1; i < MAX_NDB_NODES; i++)
5405 {
5406 if (NdbNodeBitmask::get(theNodes, i))
5407 {
5408 nodes.clear(i);
5409
5410 // Remove any previous reports from this node
5411 // it should not be reported
5412 for (unsigned j = 0; j < events.size(); j++)
5413 {
5414 const SimpleSignal& ssig = events[j];
5415 const NodeId nodeid = refToNode(ssig.header.theSendersBlockRef);
5416 if (nodeid == i)
5417 {
5418 events.erase(j);
5419 j--;
5420 }
5421 }
5422 }
5423 }
5424 break;
5425 }
5426
5427 default:
5428 // Ignore all other signals
5429 break;
5430 }
5431 }
5432 ss.unlock();
5433
5434 return true;
5435 }
5436
5437 template class MutexVector<NodeId>;
5438 template class MutexVector<Ndb_mgmd_event_service::Event_listener>;
5439 template class Vector<EventSubscribeReq>;
5440 template class MutexVector<EventSubscribeReq>;
5441 template class Vector< Vector<BaseString> >;
5442 template class Vector<MgmtSrvr::PossibleNode>;
5443 template class Vector<Defragger::DefragBuffer*>;
5444