1 /* Copyright (c) 2008, 2021, Oracle and/or its affiliates.
2
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License, version 2.0,
5 as published by the Free Software Foundation.
6
7 This program is also distributed with certain software (including
8 but not limited to OpenSSL) that is licensed under separate terms,
9 as designated in a particular file or component or in included license
10 documentation. The authors of MySQL hereby grant you an additional
11 permission to link the program and your derivative works with the
12 separately licensed software that they have included with MySQL.
13
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License, version 2.0, for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA */
22
23
24 #include "ConfigManager.hpp"
25 #include "MgmtSrvr.hpp"
26 #include <NdbDir.hpp>
27
28 #include <NdbConfig.h>
29 #include <NdbSleep.h>
30 #include <kernel/GlobalSignalNumbers.h>
31 #include <SignalSender.hpp>
32 #include <NdbApiSignal.hpp>
33 #include <signaldata/NFCompleteRep.hpp>
34 #include <signaldata/NodeFailRep.hpp>
35 #include <signaldata/ApiRegSignalData.hpp>
36 #include <ndb_version.h>
37
38 #include <EventLogger.hpp>
39 extern EventLogger * g_eventLogger;
40
41 extern "C" const char* opt_ndb_connectstring;
42 extern "C" int opt_ndb_nodeid;
43
44 #if defined VM_TRACE || defined ERROR_INSERT
45 extern int g_errorInsert;
46 #define ERROR_INSERTED(x) (g_errorInsert == x)
47 #else
48 #define ERROR_INSERTED(x) false
49 #endif
50
ConfigManager(const MgmtSrvr::MgmtOpts & opts,const char * configdir)51 ConfigManager::ConfigManager(const MgmtSrvr::MgmtOpts& opts,
52 const char* configdir) :
53 MgmtThread("ConfigManager"),
54 m_opts(opts),
55 m_facade(NULL),
56 m_ss(NULL),
57 m_config_mutex(NULL),
58 m_config(NULL),
59 m_config_retriever(opt_ndb_connectstring,
60 opt_ndb_nodeid,
61 NDB_VERSION,
62 NDB_MGM_NODE_TYPE_MGM,
63 opts.bind_address),
64 m_config_state(CS_UNINITIALIZED),
65 m_previous_state(CS_UNINITIALIZED),
66 m_prepared_config(NULL),
67 m_node_id(0),
68 m_configdir(configdir)
69 {
70 }
71
72
~ConfigManager()73 ConfigManager::~ConfigManager()
74 {
75 delete m_config;
76 delete m_prepared_config;
77 if (m_ss)
78 delete m_ss;
79 NdbMutex_Destroy(m_config_mutex);
80 }
81
82
83 /**
84 alone_on_host
85
86 Check if this is the only node of "type" on
87 this host
88
89 */
90
91 static bool
alone_on_host(Config * conf,Uint32 own_type,Uint32 own_nodeid)92 alone_on_host(Config* conf,
93 Uint32 own_type,
94 Uint32 own_nodeid)
95 {
96 ConfigIter iter(conf, CFG_SECTION_NODE);
97 for (iter.first(); iter.valid(); iter.next())
98 {
99 Uint32 type;
100 if(iter.get(CFG_TYPE_OF_SECTION, &type) ||
101 type != own_type)
102 continue;
103
104 Uint32 nodeid;
105 if(iter.get(CFG_NODE_ID, &nodeid) ||
106 nodeid == own_nodeid)
107 continue;
108
109 const char * hostname;
110 if(iter.get(CFG_NODE_HOST, &hostname))
111 continue;
112
113 if (SocketServer::tryBind(0,hostname))
114 {
115 // Another MGM node was also setup on this host
116 g_eventLogger->debug("Not alone on host %s, node %d " \
117 "will also run here",
118 hostname, nodeid);
119 return false;
120 }
121 }
122 return true;
123 }
124
125
126 /**
127 find_nodeid_from_configdir
128
129 Check if configdir only contains config files
130 with one nodeid -> read the latest and confirm
131 there should only be one mgm node on this host
132 */
133
134 NodeId
find_nodeid_from_configdir(void)135 ConfigManager::find_nodeid_from_configdir(void)
136 {
137 BaseString config_name;
138 NdbDir::Iterator iter;
139
140 if (!m_configdir ||
141 iter.open(m_configdir) != 0)
142 return 0;
143
144 const char* name;
145 unsigned found_nodeid= 0;
146 unsigned nodeid;
147 char extra; // Avoid matching ndb_2_config.bin.2.tmp
148 unsigned version, max_version = 0;
149 while ((name = iter.next_file()) != NULL)
150 {
151 if (sscanf(name,
152 "ndb_%u_config.bin.%u%c",
153 &nodeid, &version, &extra) == 2)
154 {
155 // ndbout_c("match: %s", name);
156
157 if (nodeid != found_nodeid)
158 {
159 if (found_nodeid != 0)
160 return 0; // Found more than one nodeid
161 found_nodeid= nodeid;
162 }
163
164 if (version > max_version)
165 max_version = version;
166 }
167 }
168
169 if (max_version == 0)
170 return 0;
171
172 config_name.assfmt("%s%sndb_%u_config.bin.%u",
173 m_configdir, DIR_SEPARATOR, found_nodeid, max_version);
174
175 Config* conf;
176 if (!(conf = load_saved_config(config_name)))
177 return 0;
178
179 if (!m_config_retriever.verifyConfig(conf->m_configValues,
180 found_nodeid) ||
181 !alone_on_host(conf, NDB_MGM_NODE_TYPE_MGM, found_nodeid))
182 {
183 delete conf;
184 return 0;
185 }
186
187 delete conf;
188 return found_nodeid;
189 }
190
191
192 /**
193 find_own_nodeid
194
195 Return the nodeid of the MGM node
196 defined to run on this host
197
198 Return 0 if more than one node is defined
199 */
200
201 static NodeId
find_own_nodeid(Config * conf)202 find_own_nodeid(Config* conf)
203 {
204 NodeId found_nodeid= 0;
205 ConfigIter iter(conf, CFG_SECTION_NODE);
206 for (iter.first(); iter.valid(); iter.next())
207 {
208 Uint32 type;
209 if(iter.get(CFG_TYPE_OF_SECTION, &type) ||
210 type != NDB_MGM_NODE_TYPE_MGM)
211 continue;
212
213 Uint32 nodeid;
214 require(iter.get(CFG_NODE_ID, &nodeid) == 0);
215
216 const char * hostname;
217 if(iter.get(CFG_NODE_HOST, &hostname))
218 continue;
219
220 if (SocketServer::tryBind(0,hostname))
221 {
222 // This node is setup to run on this host
223 if (found_nodeid == 0)
224 found_nodeid = nodeid;
225 else
226 return 0; // More than one host on this node
227 }
228 }
229 return found_nodeid;
230 }
231
232
233 NodeId
find_nodeid_from_config(void)234 ConfigManager::find_nodeid_from_config(void)
235 {
236 if (!m_opts.mycnf &&
237 !m_opts.config_filename)
238 return 0;
239
240 Config* conf = load_config();
241 if (conf == NULL)
242 return 0;
243
244 NodeId found_nodeid = find_own_nodeid(conf);
245 if (found_nodeid == 0 ||
246 !m_config_retriever.verifyConfig(conf->m_configValues, found_nodeid))
247 {
248 delete conf;
249 return 0;
250 }
251
252 return found_nodeid;
253 }
254
255
256 bool
init_nodeid(void)257 ConfigManager::init_nodeid(void)
258 {
259 DBUG_ENTER("ConfigManager::init_nodeid");
260
261 NodeId nodeid = m_config_retriever.get_configuration_nodeid();
262 if (nodeid)
263 {
264 // Nodeid was specifed on command line or in NDB_CONNECTSTRING
265 g_eventLogger->debug("Got nodeid: %d from command line " \
266 "or NDB_CONNECTSTRING", nodeid);
267 m_node_id = nodeid;
268 DBUG_RETURN(true);
269 }
270
271 nodeid = find_nodeid_from_configdir();
272 if (nodeid)
273 {
274 // Found nodeid by searching in configdir
275 g_eventLogger->debug("Got nodeid: %d from searching in configdir",
276 nodeid);
277 m_node_id = nodeid;
278 DBUG_RETURN(true);
279 }
280
281 nodeid = find_nodeid_from_config();
282 if (nodeid)
283 {
284 // Found nodeid by looking in the config given on command line
285 g_eventLogger->debug("Got nodeid: %d from config file given " \
286 "on command line",
287 nodeid);
288 m_node_id = nodeid;
289 DBUG_RETURN(true);
290 }
291
292 // We _could_ try connecting to other running mgmd(s)
293 // and fetch our nodeid. But, that introduces a dependency
294 // that is not beneficial for a shared nothing cluster, since
295 // it might only work when other mgmd(s) are started. If all
296 // mgmd(s) is down it would require manual intervention.
297 // Better to require the node id to always be specified
298 // on the command line(or the above _local_ magic)
299
300 g_eventLogger->error("Could not determine which nodeid to use for "\
301 "this node. Specify it with --ndb-nodeid=<nodeid> "\
302 "on command line");
303 DBUG_RETURN(false);
304 }
305
306
307 static void
reset_dynamic_ports_in_config(const Config * config)308 reset_dynamic_ports_in_config(const Config* config)
309 {
310 ConfigIter iter(config, CFG_SECTION_CONNECTION);
311
312 for(;iter.valid();iter.next()) {
313 Uint32 port;
314 require(iter.get(CFG_CONNECTION_SERVER_PORT, &port) == 0);
315
316 if ((int)port < 0)
317 {
318 port = 0;
319 ConfigValues::Iterator i2(config->m_configValues->m_config,
320 iter.m_config);
321 require(i2.set(CFG_CONNECTION_SERVER_PORT, port));
322 }
323 }
324 }
325
326
327 bool
init(void)328 ConfigManager::init(void)
329 {
330 DBUG_ENTER("ConfigManager::init");
331
332 m_config_mutex = NdbMutex_Create();
333 if (!m_config_mutex)
334 {
335 g_eventLogger->error("Failed to create mutex in ConfigManager!");
336 DBUG_RETURN(false);
337 }
338
339 require(m_config_state == CS_UNINITIALIZED);
340
341 if (m_config_retriever.hasError())
342 {
343 g_eventLogger->error("%s", m_config_retriever.getErrorString());
344 DBUG_RETURN(false);
345 }
346
347 if (!init_nodeid())
348 DBUG_RETURN(false);
349
350 if (m_opts.initial)
351 {
352 /**
353 * Verify valid -f before delete_saved_configs()
354 */
355 Config* conf = load_config();
356 if (conf == NULL)
357 DBUG_RETURN(false);
358
359 delete conf;
360
361 if (!delete_saved_configs())
362 DBUG_RETURN(false);
363 }
364
365 if (failed_config_change_exists())
366 DBUG_RETURN(false);
367
368 BaseString config_bin_name;
369 if (saved_config_exists(config_bin_name))
370 {
371 Config* conf = NULL;
372 if (!(conf = load_saved_config(config_bin_name)))
373 DBUG_RETURN(false);
374
375 if (!config_ok(conf))
376 DBUG_RETURN(false);
377
378 set_config(conf);
379 m_config_state = CS_CONFIRMED;
380
381 g_eventLogger->info("Loaded config from '%s'", config_bin_name.c_str());
382
383 if (m_opts.reload && // --reload
384 (m_opts.mycnf || m_opts.config_filename))
385 {
386 Config* new_conf = load_config();
387 if (new_conf == NULL)
388 DBUG_RETURN(false);
389
390 /**
391 * Add config to set once ConfigManager is fully started
392 */
393 m_config_change.config_loaded(new_conf);
394 g_eventLogger->info("Loaded configuration from '%s', will try " \
395 "to set it once started",
396 m_opts.mycnf ? "my.cnf" : m_opts.config_filename);
397 }
398 }
399 else
400 {
401 if (m_opts.mycnf || m_opts.config_filename)
402 {
403 Config* conf = load_config();
404 if (conf == NULL)
405 DBUG_RETURN(false);
406
407 if (!config_ok(conf))
408 DBUG_RETURN(false);
409
410 /*
411 Set this node as primary node for config.ini/my.cnf
412 in order to make it possible that make sure an old
413 config.ini is only loaded with --force
414 */
415 if (!conf->setPrimaryMgmNode(m_node_id))
416 {
417 g_eventLogger->error("Failed to set primary MGM node");
418 DBUG_RETURN(false);
419 }
420
421 /* Use the initial config for now */
422 set_config(conf);
423
424 g_eventLogger->info("Got initial configuration from '%s', will try " \
425 "to set it when all ndb_mgmd(s) started",
426 m_opts.mycnf ? "my.cnf" : m_opts.config_filename);
427 m_config_change.m_initial_config = new Config(conf); // Copy config
428 m_config_state = CS_INITIAL;
429
430 if (!init_checkers(m_config_change.m_initial_config))
431 DBUG_RETURN(false);
432 }
433 else
434 {
435 Config* conf = NULL;
436 if (!(conf = fetch_config()))
437 {
438 g_eventLogger->error("Could not fetch config!");
439 DBUG_RETURN(false);
440 }
441
442 /*
443 The fetched config may contain dynamic ports for
444 ndbd(s) which have to be reset to 0 before using
445 the config
446 */
447 reset_dynamic_ports_in_config(conf);
448
449 if (!config_ok(conf))
450 DBUG_RETURN(false);
451
452 /* Use the fetched config for now */
453 set_config(conf);
454
455 if (!m_opts.config_cache)
456 {
457 assert(!m_configdir); // Running without configdir
458 g_eventLogger->info("Fetched configuration, " \
459 "generation: %d, name: '%s'. ",
460 m_config->getGeneration(), m_config->getName());
461 DBUG_RETURN(true);
462 }
463
464 if (m_config->getGeneration() == 0)
465 {
466 g_eventLogger->info("Fetched initial configuration, " \
467 "generation: %d, name: '%s'. "\
468 "Will try to set it when all ndb_mgmd(s) started",
469 m_config->getGeneration(), m_config->getName());
470 m_config_state= CS_INITIAL;
471 m_config_change.m_initial_config = new Config(conf); // Copy config
472
473 if (!init_checkers(m_config_change.m_initial_config))
474 DBUG_RETURN(false);
475 }
476 else
477 {
478 g_eventLogger->info("Fetched confirmed configuration, " \
479 "generation: %d, name: '%s'. " \
480 "Trying to write it to disk...",
481 m_config->getGeneration(), m_config->getName());
482 if (!prepareConfigChange(m_config))
483 {
484 abortConfigChange();
485 g_eventLogger->error("Failed to write the fetched config to disk");
486 DBUG_RETURN(false);
487 }
488 commitConfigChange();
489 m_config_state = CS_CONFIRMED;
490 g_eventLogger->info("The fetched configuration has been saved!");
491 }
492 }
493 }
494
495 require(m_config_state != CS_UNINITIALIZED);
496 DBUG_RETURN(true);
497 }
498
499
500 bool
prepareConfigChange(const Config * config)501 ConfigManager::prepareConfigChange(const Config* config)
502 {
503 if (m_prepared_config)
504 {
505 g_eventLogger->error("Can't prepare configuration change " \
506 "when already prepared");
507 return false;
508 }
509
510 Uint32 generation= config->getGeneration();
511 if (generation == 0)
512 {
513 g_eventLogger->error("Can't prepare configuration change for "\
514 "configuration with generation 0");
515 return false;
516 }
517
518 assert(m_node_id);
519 m_config_name.assfmt("%s%sndb_%u_config.bin.%u",
520 m_configdir, DIR_SEPARATOR, m_node_id, generation);
521 g_eventLogger->debug("Preparing configuration, generation: %d name: %s",
522 generation, m_config_name.c_str());
523
524 /* Check file name is free */
525 if (access(m_config_name.c_str(), F_OK) == 0)
526 {
527 g_eventLogger->error("The file '%s' already exist while preparing",
528 m_config_name.c_str());
529 return false;
530 }
531
532 /* Pack the config */
533 UtilBuffer buf;
534 if(!config->pack(buf))
535 {
536 /* Failed to pack config */
537 g_eventLogger->error("Failed to pack configuration while preparing");
538 return false;
539 }
540
541 /* Write config to temporary file */
542 BaseString prep_config_name(m_config_name);
543 prep_config_name.append(".tmp");
544 FILE * f = fopen(prep_config_name.c_str(), IF_WIN("wbc", "w"));
545 if(f == NULL)
546 {
547 g_eventLogger->error("Failed to open file '%s' while preparing, errno: %d",
548 prep_config_name.c_str(), errno);
549 return false;
550 }
551
552 if(fwrite(buf.get_data(), 1, buf.length(), f) != (size_t)buf.length())
553 {
554 g_eventLogger->error("Failed to write file '%s' while preparing, errno: %d",
555 prep_config_name.c_str(), errno);
556 fclose(f);
557 unlink(prep_config_name.c_str());
558 return false;
559 }
560
561 if (fflush(f))
562 {
563 g_eventLogger->error("Failed to flush file '%s' while preparing, errno: %d",
564 prep_config_name.c_str(), errno);
565 fclose(f);
566 unlink(prep_config_name.c_str());
567 return false;
568 }
569
570 #ifdef _WIN32
571 /*
572 File is opened with the commit flag "c" so
573 that the contents of the file buffer are written
574 directly to disk when fflush is called
575 */
576 #else
577 if (fsync(fileno(f)))
578 {
579 g_eventLogger->error("Failed to sync file '%s' while preparing, errno: %d",
580 prep_config_name.c_str(), errno);
581 fclose(f);
582 unlink(prep_config_name.c_str());
583 return false;
584 }
585 #endif
586 fclose(f);
587
588 m_prepared_config = new Config(config); // Copy
589 g_eventLogger->debug("Configuration prepared");
590
591 return true;
592 }
593
594
595 void
commitConfigChange(void)596 ConfigManager::commitConfigChange(void)
597 {
598 require(m_prepared_config != 0);
599
600 /* Set new config locally and in all subscribers */
601 set_config(m_prepared_config);
602 m_prepared_config= NULL;
603
604 /* Rename file to real name */
605 require(m_config_name.length());
606 BaseString prep_config_name(m_config_name);
607 prep_config_name.append(".tmp");
608 if(rename(prep_config_name.c_str(), m_config_name.c_str()))
609 {
610 g_eventLogger->error("rename from '%s' to '%s' failed while committing, " \
611 "errno: %d",
612 prep_config_name.c_str(), m_config_name.c_str(),
613 errno);
614 // Crash and leave the prepared config file in place
615 abort();
616 }
617 m_config_name.clear();
618
619 g_eventLogger->info("Configuration %d commited", m_config->getGeneration());
620 }
621
622
623 static void
check_no_dynamic_ports_in_config(const Config * config)624 check_no_dynamic_ports_in_config(const Config* config)
625 {
626 bool ok = true;
627 ConfigIter iter(config, CFG_SECTION_CONNECTION);
628
629 for(;iter.valid();iter.next()) {
630 Uint32 n1, n2;
631 require(iter.get(CFG_CONNECTION_NODE_1, &n1) == 0 &&
632 iter.get(CFG_CONNECTION_NODE_2, &n2) == 0);
633
634 Uint32 port_value;
635 require(iter.get(CFG_CONNECTION_SERVER_PORT, &port_value) == 0);
636
637 int port = (int)port_value;
638 if (port < 0)
639 {
640 g_eventLogger->error("INTERNAL ERROR: Found dynamic ports with "
641 "value in config, n1: %d, n2: %d, port: %u",
642 n1, n2, port);
643 ok = false;
644 }
645 }
646 require(ok);
647 }
648
649
650 void
set_config(Config * new_config)651 ConfigManager::set_config(Config* new_config)
652 {
653 // Check that config does not contain any dynamic ports
654 check_no_dynamic_ports_in_config(new_config);
655
656 delete m_config;
657 m_config = new_config;
658
659 // Removed cache of packed config
660 m_packed_config.clear();
661
662 for (unsigned i = 0; i < m_subscribers.size(); i++)
663 m_subscribers[i]->config_changed(m_node_id, new_config);
664 }
665
666
667 int
add_config_change_subscriber(ConfigSubscriber * subscriber)668 ConfigManager::add_config_change_subscriber(ConfigSubscriber* subscriber)
669 {
670 return m_subscribers.push_back(subscriber);
671 }
672
673
674 bool
config_ok(const Config * conf)675 ConfigManager::config_ok(const Config* conf)
676 {
677 assert(m_node_id);
678 if (!m_config_retriever.verifyConfig(conf->m_configValues, m_node_id))
679 {
680 g_eventLogger->error("%s", m_config_retriever.getErrorString());
681 return false;
682 }
683
684 // Check DataDir exist
685 ConfigIter iter(conf, CFG_SECTION_NODE);
686 require(iter.find(CFG_NODE_ID, m_node_id) == 0);
687
688 const char *datadir;
689 require(iter.get(CFG_NODE_DATADIR, &datadir) == 0);
690
691 if (strcmp(datadir, "") != 0 && // datadir != ""
692 access(datadir, F_OK)) // dir exists
693 {
694 g_eventLogger->error("Directory '%s' specified with DataDir " \
695 "in configuration does not exist.", \
696 datadir);
697 return false;
698 }
699 return true;
700 }
701
702
703 void
abortConfigChange(void)704 ConfigManager::abortConfigChange(void)
705 {
706 /* Should always succeed */
707
708 /* Remove the prepared file */
709 BaseString prep_config_name(m_config_name);
710 prep_config_name.append(".tmp");
711 unlink(prep_config_name.c_str());
712 m_config_name.clear();
713
714 delete m_prepared_config;
715 m_prepared_config= NULL;
716 }
717
718
719
720 void
sendConfigChangeImplRef(SignalSender & ss,NodeId nodeId,ConfigChangeRef::ErrorCode error) const721 ConfigManager::sendConfigChangeImplRef(SignalSender& ss, NodeId nodeId,
722 ConfigChangeRef::ErrorCode error) const
723 {
724 SimpleSignal ssig;
725 ConfigChangeImplRef* const ref =
726 CAST_PTR(ConfigChangeImplRef, ssig.getDataPtrSend());
727 ref->errorCode = error;
728
729 g_eventLogger->debug("Send CONFIG_CHANGE_IMPL_REF to node: %d, error: %d",
730 nodeId, error);
731
732 ss.sendSignal(nodeId, ssig,
733 MGM_CONFIG_MAN, GSN_CONFIG_CHANGE_IMPL_REF,
734 ConfigChangeImplRef::SignalLength);
735 }
736
737
738
739 void
execCONFIG_CHANGE_IMPL_REQ(SignalSender & ss,SimpleSignal * sig)740 ConfigManager::execCONFIG_CHANGE_IMPL_REQ(SignalSender& ss, SimpleSignal* sig)
741 {
742 NodeId nodeId = refToNode(sig->header.theSendersBlockRef);
743 const ConfigChangeImplReq * const req =
744 CAST_CONSTPTR(ConfigChangeImplReq, sig->getDataPtr());
745
746 g_eventLogger->debug("Got CONFIG_CHANGE_IMPL_REQ from node: %d, "\
747 "requestType: %d",
748 nodeId, req->requestType);
749
750 if (!m_defragger.defragment(sig))
751 return; // More fragments to come
752
753 Guard g(m_config_mutex);
754
755 switch(req->requestType){
756 case ConfigChangeImplReq::Prepare:{
757 if (sig->header.m_noOfSections != 1)
758 {
759 sendConfigChangeImplRef(ss, nodeId, ConfigChangeRef::NoConfigData);
760 return;
761 }
762
763 ConfigValuesFactory cf;
764 if (!cf.unpack(sig->ptr[0].p, req->length))
765 {
766 sendConfigChangeImplRef(ss, nodeId, ConfigChangeRef::FailedToUnpack);
767 return;
768 }
769
770 Config new_config(cf.getConfigValues());
771 Uint32 new_generation = new_config.getGeneration();
772 Uint32 curr_generation = m_config->getGeneration();
773 const char* new_name = new_config.getName();
774 const char* curr_name = m_config->getName();
775
776 if (m_config->illegal_change(&new_config))
777 {
778 sendConfigChangeImplRef(ss, nodeId, ConfigChangeRef::IllegalConfigChange);
779 return;
780 }
781
782 if (req->initial)
783 {
784 // Check own state
785 if (m_config_state != CS_INITIAL)
786 {
787 g_eventLogger->warning("Refusing to start initial " \
788 "configuration change since this node " \
789 "is not in INITIAL state");
790 sendConfigChangeImplRef(ss, nodeId,
791 ConfigChangeRef::IllegalInitialState);
792 return;
793 }
794
795 // Check generation
796 if (new_generation != 0)
797 {
798 g_eventLogger->warning("Refusing to start initial " \
799 "configuration change since new " \
800 "generation is not 0 (new_generation: %d)",
801 new_generation);
802 sendConfigChangeImplRef(ss, nodeId,
803 ConfigChangeRef::IllegalInitialGeneration);
804 return;
805 }
806 new_generation = 1;
807
808 // Check config is equal to our initial config
809 // but skip check if message is from self...
810 if (nodeId != refToNode(ss.getOwnRef()))
811 {
812 Config new_config_copy(&new_config);
813 require(new_config_copy.setName(new_name));
814 unsigned exclude[]= {CFG_SECTION_SYSTEM, 0};
815 if (!new_config_copy.equal(m_config_change.m_initial_config, exclude))
816 {
817 BaseString buf;
818 g_eventLogger->warning
819 ("Refusing to start initial config " \
820 "change when nodes have different " \
821 "config\n" \
822 "This is the actual diff:\n%s",
823 new_config_copy.diff2str(m_config_change.m_initial_config, buf));
824 sendConfigChangeImplRef(ss, nodeId,
825 ConfigChangeRef::DifferentInitial);
826 return;
827 }
828
829 /*
830 Scrap the new_config, it's been used to check that other node
831 started from equal initial config, now it's not needed anymore
832 */
833 delete m_config_change.m_initial_config;
834 m_config_change.m_initial_config = NULL;
835 }
836 }
837 else
838 {
839
840 // Check that new config has same primary mgm node as current
841 Uint32 curr_primary = m_config->getPrimaryMgmNode();
842 Uint32 new_primary = new_config.getPrimaryMgmNode();
843 if (new_primary != curr_primary)
844 {
845 g_eventLogger->warning("Refusing to start configuration change " \
846 "requested by node %d, the new config uses " \
847 "different primary mgm node %d. " \
848 "Current primary mmgm node is %d.",
849 nodeId, new_primary, curr_primary);
850 sendConfigChangeImplRef(ss, nodeId,
851 ConfigChangeRef::NotPrimaryMgmNode);
852 return;
853 }
854
855 if (new_generation == 0 ||
856 new_generation != curr_generation)
857 {
858 BaseString buf;
859 g_eventLogger->warning("Refusing to start config change " \
860 "requested by node with different " \
861 "generation: %d. Our generation: %d\n" \
862 "This is the actual diff:\n%s",
863 new_generation, curr_generation,
864 new_config.diff2str(m_config, buf));
865 sendConfigChangeImplRef(ss, nodeId, ConfigChangeRef::InvalidGeneration);
866 return;
867 }
868 new_generation++;
869
870 // Check same cluster name
871 if (strcmp(new_name, curr_name))
872 {
873 BaseString buf;
874 g_eventLogger->warning("Refusing to start config change " \
875 "requested by node with different " \
876 "name: '%s'. Our name: '%s'\n" \
877 "This is the actual diff:\n%s",
878 new_name, curr_name,
879 new_config.diff2str(m_config, buf));
880 sendConfigChangeImplRef(ss, nodeId, ConfigChangeRef::InvalidConfigName);
881 return;
882 }
883 }
884
885 // Set new generation
886 if(!new_config.setGeneration(new_generation))
887 {
888 g_eventLogger->error("Failed to set new generation to %d",
889 new_generation);
890 sendConfigChangeImplRef(ss, nodeId, ConfigChangeRef::InternalError);
891 return;
892 }
893
894 if (!prepareConfigChange(&new_config))
895 {
896 sendConfigChangeImplRef(ss, nodeId, ConfigChangeRef::PrepareFailed);
897 return;
898 }
899 break;
900 }
901
902 case ConfigChangeImplReq::Commit:
903 commitConfigChange();
904
905 // All nodes has agreed on config -> CONFIRMED
906 m_config_state = CS_CONFIRMED;
907
908 break;
909
910 case ConfigChangeImplReq::Abort:
911 abortConfigChange();
912 break;
913
914 default:
915 g_eventLogger->error("execCONFIG_CHANGE_IMPL_REQ: unhandled state");
916 abort();
917 break;
918 }
919
920 /* Send CONF */
921 SimpleSignal ssig;
922 ConfigChangeImplConf* const conf =
923 CAST_PTR(ConfigChangeImplConf, ssig.getDataPtrSend());
924 conf->requestType = req->requestType;
925
926 g_eventLogger->debug("Sending CONFIG_CHANGE_IMPL_CONF to node: %d",
927 nodeId);
928
929 ss.sendSignal(nodeId, ssig,
930 MGM_CONFIG_MAN,
931 GSN_CONFIG_CHANGE_IMPL_CONF,
932 ConfigChangeImplConf::SignalLength);
933 }
934
935
set_config_change_state(ConfigChangeState::States state)936 void ConfigManager::set_config_change_state(ConfigChangeState::States state)
937 {
938 if (state == ConfigChangeState::IDLE)
939 {
940 // Rebuild m_all_mgm so that each node in config is included
941 // new mgm nodes might have been added
942 assert(m_config_change.m_error == ConfigChangeRef::OK);
943 m_config->get_nodemask(m_all_mgm, NDB_MGM_NODE_TYPE_MGM);
944 }
945
946 m_config_change.m_state.m_current_state = state;
947 }
948
949
950 void
execCONFIG_CHANGE_IMPL_REF(SignalSender & ss,SimpleSignal * sig)951 ConfigManager::execCONFIG_CHANGE_IMPL_REF(SignalSender& ss, SimpleSignal* sig)
952 {
953 NodeId nodeId = refToNode(sig->header.theSendersBlockRef);
954 g_eventLogger->debug("Got CONFIG_CHANGE_IMPL_REF from node: %d", nodeId);
955
956 const ConfigChangeImplRef * const ref =
957 CAST_CONSTPTR(ConfigChangeImplRef, sig->getDataPtr());
958 g_eventLogger->warning("Node %d refused configuration change, error: %d",
959 nodeId, ref->errorCode);
960
961 /* Remember the original error code */
962 if (m_config_change.m_error == 0)
963 m_config_change.m_error = (ConfigChangeRef::ErrorCode)ref->errorCode;
964
965 switch(m_config_change.m_state){
966 case ConfigChangeState::ABORT:
967 case ConfigChangeState::PREPARING:{
968 /* Got ref while preparing (or already decided to abort) */
969 m_config_change.m_contacted_nodes.clear(nodeId);
970 set_config_change_state(ConfigChangeState::ABORT);
971
972 m_waiting_for.clear(nodeId);
973 if (!m_waiting_for.isclear())
974 return;
975
976 startAbortConfigChange(ss);
977 break;
978 }
979 case ConfigChangeState::COMITTING:
980 /* Got ref while comitting, impossible */
981 abort();
982 break;
983
984 case ConfigChangeState::ABORTING:
985 /* Got ref while aborting, impossible */
986 abort();
987 break;
988
989 default:
990 g_eventLogger->error("execCONFIG_CHANGE_IMPL_REF: unhandled state");
991 abort();
992 break;
993 }
994 }
995
996
997 void
execCONFIG_CHANGE_IMPL_CONF(SignalSender & ss,SimpleSignal * sig)998 ConfigManager::execCONFIG_CHANGE_IMPL_CONF(SignalSender& ss, SimpleSignal* sig)
999 {
1000 NodeId nodeId = refToNode(sig->header.theSendersBlockRef);
1001 const ConfigChangeImplConf * const conf =
1002 CAST_CONSTPTR(ConfigChangeImplConf, sig->getDataPtr());
1003 g_eventLogger->debug("Got CONFIG_CHANGE_IMPL_CONF from node %d", nodeId);
1004
1005 switch(m_config_change.m_state){
1006 case ConfigChangeState::PREPARING:{
1007 require(conf->requestType == ConfigChangeImplReq::Prepare);
1008 m_waiting_for.clear(nodeId);
1009 if (!m_waiting_for.isclear())
1010 return;
1011
1012 // send to next
1013 int res = sendConfigChangeImplReq(ss, m_config_change.m_new_config);
1014 if (res > 0)
1015 {
1016 // sent to new node...
1017 return;
1018 }
1019 else if (res < 0)
1020 {
1021 // send failed, start abort
1022 startAbortConfigChange(ss);
1023 return;
1024 }
1025
1026 /**
1027 * All node has received new config..
1028 * ok to delete it...
1029 */
1030 delete m_config_change.m_new_config;
1031 m_config_change.m_new_config = 0;
1032
1033 /* Send commit to all nodes */
1034 SimpleSignal ssig;
1035 ConfigChangeImplReq* const req =
1036 CAST_PTR(ConfigChangeImplReq, ssig.getDataPtrSend());
1037
1038 req->requestType = ConfigChangeImplReq::Commit;
1039
1040 g_eventLogger->debug("Sending CONFIG_CHANGE_IMPL_REQ(commit)");
1041 require(m_waiting_for.isclear());
1042 m_waiting_for = ss.broadcastSignal(m_config_change.m_contacted_nodes, ssig,
1043 MGM_CONFIG_MAN,
1044 GSN_CONFIG_CHANGE_IMPL_REQ,
1045 ConfigChangeImplReq::SignalLength);
1046 if (m_waiting_for.isclear())
1047 set_config_change_state(ConfigChangeState::IDLE);
1048 else
1049 set_config_change_state(ConfigChangeState::COMITTING);
1050 break;
1051 }
1052
1053 case ConfigChangeState::COMITTING:{
1054 require(conf->requestType == ConfigChangeImplReq::Commit);
1055
1056 m_waiting_for.clear(nodeId);
1057 if (!m_waiting_for.isclear())
1058 return;
1059
1060 require(m_config_change.m_client_ref != RNIL);
1061 require(m_config_change.m_error == 0);
1062 if (m_config_change.m_client_ref == ss.getOwnRef())
1063 {
1064 g_eventLogger->info("Config change completed! New generation: %d",
1065 m_config->getGeneration());
1066 }
1067 else
1068 {
1069 /* Send CONF to requestor */
1070 sendConfigChangeConf(ss, m_config_change.m_client_ref);
1071 }
1072 m_config_change.m_client_ref = RNIL;
1073 set_config_change_state(ConfigChangeState::IDLE);
1074 break;
1075 }
1076
1077 case ConfigChangeState::ABORT:{
1078 m_waiting_for.clear(nodeId);
1079 if (!m_waiting_for.isclear())
1080 return;
1081
1082 startAbortConfigChange(ss);
1083 break;
1084 }
1085
1086 case ConfigChangeState::ABORTING:{
1087 m_waiting_for.clear(nodeId);
1088 if (!m_waiting_for.isclear())
1089 return;
1090
1091 require(m_config_change.m_client_ref != RNIL);
1092 require(m_config_change.m_error);
1093 if (m_config_change.m_client_ref == ss.getOwnRef())
1094 {
1095 g_eventLogger->
1096 error("Configuration change failed! error: %d '%s'",
1097 m_config_change.m_error,
1098 ConfigChangeRef::errorMessage(m_config_change.m_error));
1099 exit(1);
1100 }
1101 else
1102 {
1103 /* Send ref to the requestor */
1104 sendConfigChangeRef(ss, m_config_change.m_client_ref,
1105 m_config_change.m_error);
1106 }
1107 m_config_change.m_error= ConfigChangeRef::OK;
1108 m_config_change.m_client_ref = RNIL;
1109 set_config_change_state(ConfigChangeState::IDLE);
1110 break;
1111 }
1112
1113 default:
1114 g_eventLogger->error("execCONFIG_CHANGE_IMPL_CONF: unhandled state");
1115 abort();
1116 break;
1117 }
1118 }
1119
1120
1121 void
sendConfigChangeRef(SignalSender & ss,BlockReference to,ConfigChangeRef::ErrorCode error) const1122 ConfigManager::sendConfigChangeRef(SignalSender& ss, BlockReference to,
1123 ConfigChangeRef::ErrorCode error) const
1124 {
1125 NodeId nodeId = refToNode(to);
1126 SimpleSignal ssig;
1127 ConfigChangeRef* const ref =
1128 CAST_PTR(ConfigChangeRef, ssig.getDataPtrSend());
1129 ref->errorCode = error;
1130
1131 g_eventLogger->debug("Send CONFIG_CHANGE_REF to node: %d, error: %d",
1132 nodeId, error);
1133
1134 ss.sendSignal(nodeId, ssig, refToBlock(to),
1135 GSN_CONFIG_CHANGE_REF, ConfigChangeRef::SignalLength);
1136 }
1137
1138
1139 void
sendConfigChangeConf(SignalSender & ss,BlockReference to) const1140 ConfigManager::sendConfigChangeConf(SignalSender& ss, BlockReference to) const
1141 {
1142 NodeId nodeId = refToNode(to);
1143 SimpleSignal ssig;
1144
1145 g_eventLogger->debug("Send CONFIG_CHANGE_CONF to node: %d", nodeId);
1146
1147 ss.sendSignal(nodeId, ssig, refToBlock(to),
1148 GSN_CONFIG_CHANGE_CONF, ConfigChangeConf::SignalLength);
1149 }
1150
1151
1152 void
startConfigChange(SignalSender & ss,Uint32 ref)1153 ConfigManager::startConfigChange(SignalSender& ss, Uint32 ref)
1154 {
1155 if (m_config_state == CS_INITIAL)
1156 {
1157 g_eventLogger->info("Starting initial configuration change");
1158 }
1159 else
1160 {
1161 require(m_config_state == CS_CONFIRMED);
1162 g_eventLogger->info("Starting configuration change, generation: %d",
1163 m_config_change.m_new_config->getGeneration());
1164 }
1165 m_config_change.m_contacted_nodes.clear();
1166 m_config_change.m_client_ref = ref;
1167 if (sendConfigChangeImplReq(ss, m_config_change.m_new_config) <= 0)
1168 {
1169 g_eventLogger->error("Failed to start configuration change!");
1170 exit(1);
1171 }
1172 }
1173
1174 void
startAbortConfigChange(SignalSender & ss)1175 ConfigManager::startAbortConfigChange(SignalSender& ss)
1176 {
1177 /* Abort all other nodes */
1178 SimpleSignal ssig;
1179 ConfigChangeImplReq* const req =
1180 CAST_PTR(ConfigChangeImplReq, ssig.getDataPtrSend());
1181 req->requestType = ConfigChangeImplReq::Abort;
1182
1183 g_eventLogger->debug
1184 ("Sending CONFIG_CHANGE_IMPL_REQ(abort) to %s",
1185 BaseString::getPrettyText(m_config_change.m_contacted_nodes).c_str());
1186
1187 require(m_waiting_for.isclear());
1188 m_waiting_for = ss.broadcastSignal(m_config_change.m_contacted_nodes, ssig,
1189 MGM_CONFIG_MAN,
1190 GSN_CONFIG_CHANGE_IMPL_REQ,
1191 ConfigChangeImplReq::SignalLength);
1192
1193 if (m_config_change.m_new_config)
1194 {
1195 delete m_config_change.m_new_config;
1196 m_config_change.m_new_config = 0;
1197 }
1198
1199 if (m_waiting_for.isclear())
1200 {
1201 /**
1202 * Send CONFIG_CHANGE_IMPL_CONF (aborting) to self
1203 */
1204 m_waiting_for.set(ss.getOwnNodeId());
1205 ConfigChangeImplConf* const conf =
1206 CAST_PTR(ConfigChangeImplConf, ssig.getDataPtrSend());
1207 conf->requestType = ConfigChangeImplReq::Abort;
1208
1209 ss.sendSignal(ss.getOwnNodeId(), ssig,
1210 MGM_CONFIG_MAN,
1211 GSN_CONFIG_CHANGE_IMPL_CONF,
1212 ConfigChangeImplConf::SignalLength);
1213 }
1214
1215 set_config_change_state(ConfigChangeState::ABORTING);
1216 }
1217
1218 int
sendConfigChangeImplReq(SignalSender & ss,const Config * conf)1219 ConfigManager::sendConfigChangeImplReq(SignalSender& ss, const Config* conf)
1220 {
1221 require(m_waiting_for.isclear());
1222 require(m_config_change.m_client_ref != RNIL);
1223
1224 if (m_config_change.m_contacted_nodes.isclear())
1225 {
1226 require(m_config_change.m_state == ConfigChangeState::IDLE);
1227 }
1228 else
1229 {
1230 require(m_config_change.m_state == ConfigChangeState::PREPARING);
1231 }
1232
1233 set_config_change_state(ConfigChangeState::PREPARING);
1234
1235 NodeBitmask nodes = m_all_mgm;
1236 nodes.bitANDC(m_config_change.m_contacted_nodes);
1237 if (nodes.isclear())
1238 {
1239 return 0; // all done
1240 }
1241
1242 /**
1243 * Send prepare to all MGM nodes 1 by 1
1244 * keep track of which I sent to in m_contacted_nodes
1245 */
1246 SimpleSignal ssig;
1247
1248 UtilBuffer buf;
1249 conf->pack(buf);
1250 ssig.ptr[0].p = (Uint32*)buf.get_data();
1251 ssig.ptr[0].sz = (buf.length() + 3) / 4;
1252 ssig.header.m_noOfSections = 1;
1253
1254 ConfigChangeImplReq* const req =
1255 CAST_PTR(ConfigChangeImplReq, ssig.getDataPtrSend());
1256 req->requestType = ConfigChangeImplReq::Prepare;
1257 req->initial = (m_config_state == CS_INITIAL);
1258 req->length = buf.length();
1259
1260 Uint32 i = nodes.find(0);
1261 g_eventLogger->debug("Sending CONFIG_CHANGE_IMPL_REQ(prepare) to %u", i);
1262 int result = ss.sendFragmentedSignal(i, ssig, MGM_CONFIG_MAN,
1263 GSN_CONFIG_CHANGE_IMPL_REQ,
1264 ConfigChangeImplReq::SignalLength);
1265 if (result != 0)
1266 {
1267 g_eventLogger->warning("Failed to send configuration change "
1268 "prepare to node: %d, result: %d",
1269 i, result);
1270 return -1;
1271 }
1272
1273 m_waiting_for.set(i);
1274 m_config_change.m_contacted_nodes.set(i);
1275
1276 return 1;
1277 }
1278
1279 void
execCONFIG_CHANGE_REQ(SignalSender & ss,SimpleSignal * sig)1280 ConfigManager::execCONFIG_CHANGE_REQ(SignalSender& ss, SimpleSignal* sig)
1281 {
1282 BlockReference from = sig->header.theSendersBlockRef;
1283 const ConfigChangeReq * const req =
1284 CAST_CONSTPTR(ConfigChangeReq, sig->getDataPtr());
1285
1286 if (!m_defragger.defragment(sig))
1287 return; // More fragments to come
1288
1289 if (!m_started.equal(m_all_mgm)) // Not all started
1290 {
1291 sendConfigChangeRef(ss, from, ConfigChangeRef::NotAllStarted);
1292 return;
1293 }
1294
1295 if (m_all_mgm.find(0) != m_facade->ownId()) // Not the master
1296 {
1297 sendConfigChangeRef(ss, from, ConfigChangeRef::NotMaster);
1298 return;
1299 }
1300
1301 if (m_config_change.m_state != ConfigChangeState::IDLE)
1302 {
1303 sendConfigChangeRef(ss, from, ConfigChangeRef::ConfigChangeOnGoing);
1304 return;
1305 }
1306 require(m_config_change.m_error == ConfigChangeRef::OK);
1307
1308 if (sig->header.m_noOfSections != 1)
1309 {
1310 sendConfigChangeRef(ss, from, ConfigChangeRef::NoConfigData);
1311 return;
1312 }
1313
1314 ConfigValuesFactory cf;
1315 if (!cf.unpack(sig->ptr[0].p, req->length))
1316 {
1317 sendConfigChangeRef(ss, from, ConfigChangeRef::FailedToUnpack);
1318 return;
1319 }
1320
1321 Config * new_config = new Config(cf.getConfigValues());
1322 if (!config_ok(new_config))
1323 {
1324 g_eventLogger->warning("Refusing to start config change, the config "\
1325 "is not ok");
1326 sendConfigChangeRef(ss, from, ConfigChangeRef::ConfigNotOk);
1327 delete new_config;
1328 return;
1329 }
1330
1331 m_config_change.m_new_config = new_config;
1332 startConfigChange(ss, from);
1333
1334 return;
1335 }
1336
1337
1338 static Uint32
config_check_checksum(const Config * config)1339 config_check_checksum(const Config* config)
1340 {
1341 Config copy(config);
1342
1343 // Make constants of a few values in SYSTEM section that are
1344 // not part of the checksum used for "config check"
1345 copy.setName("CHECKSUM");
1346 copy.setPrimaryMgmNode(0);
1347
1348 Uint32 checksum = copy.checksum();
1349
1350 return checksum;
1351 }
1352
1353
1354 void
execCONFIG_CHECK_REQ(SignalSender & ss,SimpleSignal * sig)1355 ConfigManager::execCONFIG_CHECK_REQ(SignalSender& ss, SimpleSignal* sig)
1356 {
1357 Guard g(m_config_mutex);
1358 BlockReference from = sig->header.theSendersBlockRef;
1359 NodeId nodeId = refToNode(from);
1360 const ConfigCheckReq * const req =
1361 CAST_CONSTPTR(ConfigCheckReq, sig->getDataPtr());
1362
1363 Uint32 other_generation = req->generation;
1364 ConfigState other_state = (ConfigState)req->state;
1365
1366 Uint32 generation = m_config->getGeneration();
1367
1368 if (ERROR_INSERTED(100) && nodeId != ss.getOwnNodeId())
1369 {
1370 g_eventLogger->debug("execCONFIG_CHECK_REQ() ERROR_INSERTED(100) => exit()");
1371 exit(0);
1372 }
1373
1374 // checksum
1375 Uint32 checksum = config_check_checksum(m_config);
1376 Uint32 other_checksum = req->checksum;
1377 if (sig->header.theLength == ConfigCheckReq::SignalLengthBeforeChecksum)
1378 {
1379 // Other side uses old version without checksum, use our checksum to
1380 // bypass the checks
1381 g_eventLogger->debug("Other mgmd does not have checksum, using own");
1382 other_checksum = checksum;
1383 }
1384
1385 if (m_prepared_config || m_config_change.m_new_config)
1386 {
1387 g_eventLogger->debug("Got CONFIG_CHECK_REQ from node: %d while "
1388 "config change in progress (m_prepared_config). "
1389 "Returning incorrect state, causing it to be retried",
1390 nodeId);
1391 sendConfigCheckRef(ss, from, ConfigCheckRef::WrongState,
1392 generation, other_generation,
1393 m_config_state, CS_UNINITIALIZED);
1394 return;
1395 }
1396
1397 if (m_config_change.m_loaded_config && ss.getOwnNodeId() < nodeId)
1398 {
1399 g_eventLogger->debug("Got CONFIG_CHECK_REQ from node: %d while "
1400 "having a loaded config (and my node is lower: %d). "
1401 "Returning incorrect state, causing it to be retried",
1402 nodeId,
1403 ss.getOwnNodeId());
1404 sendConfigCheckRef(ss, from, ConfigCheckRef::WrongState,
1405 generation, other_generation,
1406 m_config_state, CS_UNINITIALIZED);
1407 return;
1408 }
1409
1410 g_eventLogger->debug("Got CONFIG_CHECK_REQ from node: %d. "
1411 "Our generation: %d, other generation: %d, "
1412 "our state: %d, other state: %d, "
1413 "our checksum: 0x%.8x, other checksum: 0x%.8x",
1414 nodeId, generation, other_generation,
1415 m_config_state, other_state,
1416 checksum, other_checksum);
1417
1418 switch (m_config_state)
1419 {
1420 default:
1421 case CS_UNINITIALIZED:
1422 g_eventLogger->error("execCONFIG_CHECK_REQ: unhandled state");
1423 abort();
1424 break;
1425
1426 case CS_INITIAL:
1427 if (other_state != CS_INITIAL)
1428 {
1429 g_eventLogger->warning("Refusing CONGIG_CHECK_REQ from %u, "
1430 " it's not CS_INITIAL (I am). "
1431 " Waiting for my check",
1432 nodeId);
1433 sendConfigCheckRef(ss, from, ConfigCheckRef::WrongState,
1434 generation, other_generation,
1435 m_config_state, other_state);
1436 return;
1437 }
1438
1439 require(generation == 0);
1440 if (other_generation != generation)
1441 {
1442 g_eventLogger->warning("Refusing other node, it has different " \
1443 "generation: %d, expected: %d",
1444 other_generation, generation);
1445 sendConfigCheckRef(ss, from, ConfigCheckRef::WrongGeneration,
1446 generation, other_generation,
1447 m_config_state, other_state);
1448 return;
1449 }
1450
1451 if (other_checksum != checksum)
1452 {
1453 g_eventLogger->warning("Refusing other node, it has different "
1454 "checksum: 0x%.8x, expected: 0x%.8x",
1455 other_checksum, checksum);
1456 sendConfigCheckRef(ss, from, ConfigCheckRef::WrongChecksum,
1457 generation, other_generation,
1458 m_config_state, other_state);
1459 return;
1460 }
1461 break;
1462
1463 case CS_CONFIRMED:
1464
1465 if (other_state != CS_CONFIRMED)
1466 {
1467 g_eventLogger->warning("Refusing other node, it's in different " \
1468 "state: %d, expected: %d",
1469 other_state, m_config_state);
1470 sendConfigCheckRef(ss, from, ConfigCheckRef::WrongState,
1471 generation, other_generation,
1472 m_config_state, other_state);
1473 return;
1474 }
1475
1476 if (other_generation == generation)
1477 {
1478 // Same generation, make sure it has same checksum
1479 if (other_checksum != checksum)
1480 {
1481 g_eventLogger->warning("Refusing other node, it has different "
1482 "checksum: 0x%.8x, expected: 0x%.8x",
1483 other_checksum, checksum);
1484 sendConfigCheckRef(ss, from, ConfigCheckRef::WrongChecksum,
1485 generation, other_generation,
1486 m_config_state, other_state);
1487 return;
1488 }
1489 // OK!
1490 }
1491 else if (other_generation < generation)
1492 {
1493 g_eventLogger->warning("Refusing other node, it has lower " \
1494 " generation: %d, expected: %d",
1495 other_generation, generation);
1496 sendConfigCheckRef(ss, from, ConfigCheckRef::WrongGeneration,
1497 generation, other_generation,
1498 m_config_state, other_state);
1499 return;
1500 }
1501 else
1502 {
1503 g_eventLogger->error("Other node has higher generation: %d, this " \
1504 "node is out of sync with generation: %d",
1505 other_generation, generation);
1506 exit(1);
1507 }
1508
1509 break;
1510 }
1511
1512 sendConfigCheckConf(ss, from);
1513 return;
1514 }
1515
1516
1517 void
sendConfigCheckReq(SignalSender & ss,NodeBitmask to)1518 ConfigManager::sendConfigCheckReq(SignalSender& ss, NodeBitmask to)
1519 {
1520 SimpleSignal ssig;
1521 ConfigCheckReq* const req =
1522 CAST_PTR(ConfigCheckReq, ssig.getDataPtrSend());
1523 req->state = m_config_state;
1524 req->generation = m_config->getGeneration();
1525 req->checksum = config_check_checksum(m_config);
1526
1527 g_eventLogger->debug("Sending CONFIG_CHECK_REQ to %s",
1528 BaseString::getPrettyText(to).c_str());
1529
1530 require(m_waiting_for.isclear());
1531 m_waiting_for = ss.broadcastSignal(to, ssig, MGM_CONFIG_MAN,
1532 GSN_CONFIG_CHECK_REQ,
1533 ConfigCheckReq::SignalLength);
1534 }
1535
1536 static bool
send_config_in_check_ref(Uint32 x)1537 send_config_in_check_ref(Uint32 x)
1538 {
1539 if (x >= NDB_MAKE_VERSION(7,0,8))
1540 return true;
1541 return false;
1542 }
1543
1544 void
sendConfigCheckRef(SignalSender & ss,BlockReference to,ConfigCheckRef::ErrorCode error,Uint32 generation,Uint32 other_generation,ConfigState state,ConfigState other_state) const1545 ConfigManager::sendConfigCheckRef(SignalSender& ss, BlockReference to,
1546 ConfigCheckRef::ErrorCode error,
1547 Uint32 generation,
1548 Uint32 other_generation,
1549 ConfigState state,
1550 ConfigState other_state) const
1551 {
1552 int result;
1553 NodeId nodeId = refToNode(to);
1554 SimpleSignal ssig;
1555 ConfigCheckRef* const ref =
1556 CAST_PTR(ConfigCheckRef, ssig.getDataPtrSend());
1557 ref->error = error;
1558 ref->generation = other_generation;
1559 ref->expected_generation = generation;
1560 ref->state = other_state;
1561 ref->expected_state = state;
1562
1563 g_eventLogger->debug("Send CONFIG_CHECK_REF with error: %d to node: %d",
1564 error, nodeId);
1565
1566 if (!send_config_in_check_ref(ss.getNodeInfo(nodeId).m_info.m_version))
1567 {
1568 result = ss.sendSignal(nodeId, ssig, MGM_CONFIG_MAN,
1569 GSN_CONFIG_CHECK_REF, ConfigCheckRef::SignalLength);
1570 }
1571 else
1572 {
1573 UtilBuffer buf;
1574 m_config->pack(buf);
1575 ssig.ptr[0].p = (Uint32*)buf.get_data();
1576 ssig.ptr[0].sz = (buf.length() + 3) / 4;
1577 ssig.header.m_noOfSections = 1;
1578
1579 ref->length = buf.length();
1580
1581 g_eventLogger->debug("Sending CONFIG_CHECK_REF with config");
1582
1583 result = ss.sendFragmentedSignal(nodeId, ssig, MGM_CONFIG_MAN,
1584 GSN_CONFIG_CHECK_REF,
1585 ConfigCheckRef::SignalLengthWithConfig);
1586 }
1587
1588 if (result != 0)
1589 {
1590 g_eventLogger->warning("Failed to send CONFIG_CHECK_REF "
1591 "to node: %d, result: %d",
1592 nodeId, result);
1593 }
1594 }
1595
1596 void
sendConfigCheckConf(SignalSender & ss,BlockReference to) const1597 ConfigManager::sendConfigCheckConf(SignalSender& ss, BlockReference to) const
1598 {
1599 NodeId nodeId = refToNode(to);
1600 SimpleSignal ssig;
1601 ConfigCheckConf* const conf =
1602 CAST_PTR(ConfigCheckConf, ssig.getDataPtrSend());
1603 conf->state = m_config_state;
1604 conf->generation = m_config->getGeneration();
1605
1606 g_eventLogger->debug("Send CONFIG_CHECK_CONF to node: %d", nodeId);
1607
1608 ss.sendSignal(nodeId, ssig, MGM_CONFIG_MAN,
1609 GSN_CONFIG_CHECK_CONF, ConfigCheckConf::SignalLength);
1610 }
1611
1612
1613 void
execCONFIG_CHECK_CONF(SignalSender & ss,SimpleSignal * sig)1614 ConfigManager::execCONFIG_CHECK_CONF(SignalSender& ss, SimpleSignal* sig)
1615 {
1616 BlockReference from = sig->header.theSendersBlockRef;
1617 NodeId nodeId = refToNode(from);
1618 assert(m_waiting_for.get(nodeId));
1619 m_waiting_for.clear(nodeId);
1620 m_checked.set(nodeId);
1621
1622 g_eventLogger->debug("Got CONFIG_CHECK_CONF from node: %d",
1623 nodeId);
1624
1625 return;
1626 }
1627
1628
1629 void
execCONFIG_CHECK_REF(SignalSender & ss,SimpleSignal * sig)1630 ConfigManager::execCONFIG_CHECK_REF(SignalSender& ss, SimpleSignal* sig)
1631 {
1632 BlockReference from = sig->header.theSendersBlockRef;
1633 NodeId nodeId = refToNode(from);
1634 assert(m_waiting_for.get(nodeId));
1635
1636 const ConfigCheckRef* const ref =
1637 CAST_CONSTPTR(ConfigCheckRef, sig->getDataPtr());
1638
1639 if (!m_defragger.defragment(sig))
1640 return; // More fragments to come
1641
1642 g_eventLogger->debug("Got CONFIG_CHECK_REF from node %d, "
1643 "error: %d, message: '%s', "
1644 "generation: %d, expected generation: %d, "
1645 "state: %d, expected state: %d own-state: %u",
1646 nodeId, ref->error,
1647 ConfigCheckRef::errorMessage(ref->error),
1648 ref->generation, ref->expected_generation,
1649 ref->state, ref->expected_state,
1650 m_config_state);
1651
1652 assert(ref->generation != ref->expected_generation ||
1653 ref->state != ref->expected_state ||
1654 ref->error == ConfigCheckRef::WrongChecksum);
1655 if((Uint32)m_config_state != ref->state)
1656 {
1657 // The config state changed while this check was in the air
1658 // drop the signal and thus cause it to run again later
1659 require(!m_checked.get(nodeId));
1660 m_waiting_for.clear(nodeId);
1661 return;
1662 }
1663
1664 switch(m_config_state)
1665 {
1666 default:
1667 case CS_UNINITIALIZED:
1668 g_eventLogger->error("execCONFIG_CHECK_REF: unhandled state");
1669 abort();
1670 break;
1671
1672 case CS_INITIAL:
1673 if (ref->expected_state == CS_CONFIRMED)
1674 {
1675 if (sig->header.theLength != ConfigCheckRef::SignalLengthWithConfig)
1676 break; // No config in the REF -> no action
1677
1678 // The other node has sent it's config in the signal, use it if equal
1679 assert(sig->header.m_noOfSections == 1);
1680
1681 ConfigValuesFactory cf;
1682 require(cf.unpack(sig->ptr[0].p, ref->length));
1683
1684 Config other_config(cf.getConfigValues());
1685 assert(other_config.getGeneration() > 0);
1686
1687 unsigned exclude[]= {CFG_SECTION_SYSTEM, 0};
1688 if (!other_config.equal(m_config, exclude))
1689 {
1690 BaseString buf;
1691 g_eventLogger->error("This node was started --initial with "
1692 "a config which is _not_ equal to the one "
1693 "node %d is using. Refusing to start with "
1694 "different configurations, diff: \n%s",
1695 nodeId,
1696 other_config.diff2str(m_config, buf, exclude));
1697 exit(1);
1698 }
1699
1700 g_eventLogger->info("This node was started --inital with "
1701 "a config equal to the one node %d is using. "
1702 "Will use the config with generation %d "
1703 "from node %d!",
1704 nodeId, other_config.getGeneration(), nodeId);
1705
1706 if (! prepareConfigChange(&other_config))
1707 {
1708 abortConfigChange();
1709 g_eventLogger->error("Failed to write the fetched config to disk");
1710 exit(1);
1711 }
1712 commitConfigChange();
1713 m_config_state = CS_CONFIRMED;
1714 g_eventLogger->info("The fetched configuration has been saved!");
1715 m_waiting_for.clear(nodeId);
1716 m_checked.set(nodeId);
1717 delete m_config_change.m_initial_config;
1718 m_config_change.m_initial_config = NULL;
1719 return;
1720 }
1721 break;
1722
1723 case CS_CONFIRMED:
1724 if (ref->expected_state == CS_INITIAL)
1725 {
1726 g_eventLogger->info("Waiting for peer");
1727 m_waiting_for.clear(nodeId);
1728 return;
1729 }
1730 break;
1731 }
1732
1733 if (ref->error == ConfigCheckRef::WrongChecksum &&
1734 m_node_id < nodeId)
1735 {
1736 g_eventLogger->warning("Ignoring CONFIG_CHECK_REF for wrong checksum "
1737 "other node has higher node id and should "
1738 "shutdown");
1739 return;
1740 }
1741
1742 g_eventLogger->error("Terminating");
1743 exit(1);
1744 }
1745
1746 void
set_facade(TransporterFacade * f)1747 ConfigManager::set_facade(TransporterFacade * f)
1748 {
1749 m_facade = f;
1750 m_ss = new SignalSender(f, MGM_CONFIG_MAN);
1751 require(m_ss != 0);
1752 }
1753
1754 bool
config_loaded(Config * config)1755 ConfigManager::ConfigChange::config_loaded(Config* config)
1756 {
1757 if (m_loaded_config != 0)
1758 return false;
1759 m_loaded_config = config;
1760 return true;
1761 }
1762
1763 Config*
prepareLoadedConfig(Config * new_conf)1764 ConfigManager::prepareLoadedConfig(Config * new_conf)
1765 {
1766 /* Copy the necessary values from old to new config */
1767 if (!new_conf->setGeneration(m_config->getGeneration()))
1768 {
1769 g_eventLogger->error("Failed to copy generation from old config");
1770 delete new_conf;
1771 return 0;
1772 }
1773
1774 if (!new_conf->setName(m_config->getName()))
1775 {
1776 g_eventLogger->error("Failed to copy name from old config");
1777 delete new_conf;
1778 return 0;
1779 }
1780
1781 if (!new_conf->setPrimaryMgmNode(m_config->getPrimaryMgmNode()))
1782 {
1783 g_eventLogger->error("Failed to copy primary mgm node from old config");
1784 delete new_conf;
1785 return 0;
1786 }
1787
1788 /* Check if config has changed */
1789 if (!m_config->equal(new_conf))
1790 {
1791 /* Loaded config is different */
1792 BaseString buf;
1793 g_eventLogger->info("Detected change of %s on disk, will try to "
1794 "set it. "
1795 "This is the actual diff:\n%s",
1796 m_opts.mycnf ? "my.cnf" : m_opts.config_filename,
1797 m_config->diff2str(new_conf, buf));
1798
1799 return new_conf;
1800 }
1801 else
1802 {
1803 /* Loaded config was equal to current */
1804 g_eventLogger->info("Config equal!");
1805 delete new_conf;
1806 }
1807 return 0;
1808 }
1809
1810 void
run()1811 ConfigManager::run()
1812 {
1813 assert(m_facade);
1814 SignalSender & ss = * m_ss;
1815
1816 if (!m_opts.config_cache)
1817 {
1818 /* Stop receiving signals by closing ConfigManager's
1819 block in TransporterFacade */
1820 delete m_ss;
1821 m_ss = NULL;
1822
1823 /* Confirm the present config, free the space that was allocated for a
1824 new one, and terminate the manager thread */
1825 m_config_change.release();
1826 m_config_state = CS_CONFIRMED;
1827 ndbout_c("== ConfigManager disabled -- manager thread will exit ==");
1828 return;
1829 }
1830
1831 ss.lock();
1832
1833 // Build bitmaks of all mgm nodes in config
1834 m_config->get_nodemask(m_all_mgm, NDB_MGM_NODE_TYPE_MGM);
1835
1836 // exclude nowait-nodes from config change protcol
1837 m_all_mgm.bitANDC(m_opts.nowait_nodes);
1838 m_all_mgm.set(m_facade->ownId()); // Never exclude own node
1839
1840 start_checkers();
1841
1842 while (!is_stopped())
1843 {
1844
1845 if (m_config_change.m_state == ConfigChangeState::IDLE)
1846 {
1847 bool print_state = false;
1848 if (m_previous_state != m_config_state)
1849 {
1850 print_state = true;
1851 m_previous_state = m_config_state;
1852 }
1853
1854 /*
1855 Check if it's necessary to start something to get
1856 out of the current state
1857 */
1858 switch (m_config_state){
1859
1860 case CS_UNINITIALIZED:
1861 abort();
1862 break;
1863
1864 case CS_INITIAL:
1865 /*
1866 INITIAL => CONFIRMED
1867 When all mgm nodes has been started and checked that they
1868 are also in INITIAL, the node with the lowest node id
1869 will start an initial config change. When completed
1870 all nodes will be in CONFIRMED
1871 */
1872
1873 if (print_state)
1874 ndbout_c("==INITIAL==");
1875
1876 if (m_config_change.m_initial_config && // Updated config.ini was found
1877 m_started.equal(m_all_mgm) && // All mgmd started
1878 m_checked.equal(m_started) && // All nodes checked
1879 m_all_mgm.find(0) == m_facade->ownId()) // Lowest nodeid
1880 {
1881 Config* new_conf = m_config_change.m_initial_config;
1882 m_config_change.m_initial_config = 0;
1883 m_config_change.m_new_config = new_conf;
1884 startConfigChange(ss, ss.getOwnRef());
1885 }
1886 break;
1887
1888 case CS_CONFIRMED:
1889 if (print_state)
1890 ndbout_c("==CONFIRMED==");
1891
1892 if (m_config_change.m_loaded_config != 0 &&
1893 m_config_change.m_new_config == 0 &&
1894 m_started.equal(m_all_mgm) &&
1895 m_checked.equal(m_started))
1896 {
1897 Config* new_conf = m_config_change.m_loaded_config;
1898 m_config_change.m_loaded_config = 0;
1899 m_config_change.m_new_config = prepareLoadedConfig(new_conf);
1900 }
1901
1902 if (m_config_change.m_new_config && // Updated config.ini was found
1903 m_started.equal(m_all_mgm) && // All mgmd started
1904 m_checked.equal(m_started)) // All nodes checked
1905 {
1906 startConfigChange(ss, ss.getOwnRef());
1907 }
1908
1909 break;
1910
1911 default:
1912 break;
1913 }
1914
1915 // Send CHECK_CONFIG to all nodes not yet checked
1916 if (m_waiting_for.isclear() && // Nothing outstanding
1917 m_prepared_config == 0 && // and no config change ongoing
1918 !m_checked.equal(m_started)) // Some nodes have not been checked
1919 {
1920 NodeBitmask not_checked;
1921 not_checked.assign(m_started);
1922 not_checked.bitANDC(m_checked);
1923 sendConfigCheckReq(ss, not_checked);
1924 }
1925
1926 handle_exclude_nodes();
1927 }
1928
1929 SimpleSignal *sig = ss.waitFor((Uint32)1000);
1930 if (!sig)
1931 continue;
1932
1933 switch (sig->readSignalNumber()) {
1934
1935 case GSN_CONFIG_CHANGE_REQ:
1936 execCONFIG_CHANGE_REQ(ss, sig);
1937 break;
1938
1939 case GSN_CONFIG_CHANGE_IMPL_REQ:
1940 execCONFIG_CHANGE_IMPL_REQ(ss, sig);
1941 break;
1942
1943 case GSN_CONFIG_CHANGE_IMPL_REF:
1944 execCONFIG_CHANGE_IMPL_REF(ss, sig);
1945 break;
1946
1947 case GSN_CONFIG_CHANGE_IMPL_CONF:
1948 execCONFIG_CHANGE_IMPL_CONF(ss, sig);
1949 break;
1950
1951 case GSN_NF_COMPLETEREP:{
1952 const NFCompleteRep * const rep =
1953 CAST_CONSTPTR(NFCompleteRep, sig->getDataPtr());
1954 NodeId nodeId= rep->failedNodeId;
1955
1956 if (!m_all_mgm.get(nodeId)) // Not mgm node
1957 break;
1958
1959 ndbout_c("Node %d failed", nodeId);
1960 m_started.clear(nodeId);
1961 m_checked.clear(nodeId);
1962 m_defragger.node_failed(nodeId);
1963
1964 if (m_config_change.m_state != ConfigChangeState::IDLE)
1965 {
1966 g_eventLogger->info("Node %d failed during config change!!",
1967 nodeId);
1968 g_eventLogger->warning("Node failure handling of config "
1969 "change protocol not yet implemented!! "
1970 "No more configuration changes can occur, "
1971 "but the node will continue to serve the "
1972 "last good configuration");
1973 // TODO start take over of config change protocol
1974 }
1975 break;
1976 }
1977
1978 case GSN_NODE_FAILREP:
1979 // ignore, NF_COMPLETEREP will come
1980 break;
1981
1982 case GSN_API_REGCONF:{
1983 NodeId nodeId = refToNode(sig->header.theSendersBlockRef);
1984 if (m_all_mgm.get(nodeId) && // Is a mgm node
1985 !m_started.get(nodeId)) // Not already marked as started
1986 {
1987 g_eventLogger->info("Node %d connected", nodeId);
1988 m_started.set(nodeId);
1989 }
1990 break;
1991 }
1992
1993 case GSN_CONFIG_CHECK_REQ:
1994 execCONFIG_CHECK_REQ(ss, sig);
1995 break;
1996
1997 case GSN_CONFIG_CHECK_REF:
1998 execCONFIG_CHECK_REF(ss, sig);
1999 break;
2000
2001 case GSN_CONFIG_CHECK_CONF:
2002 execCONFIG_CHECK_CONF(ss, sig);
2003 break;
2004
2005 case GSN_TAKE_OVERTCCONF:
2006 case GSN_CONNECT_REP:
2007 break;
2008
2009 default:
2010 sig->print();
2011 g_eventLogger->error("Unknown signal received. SignalNumber: "
2012 "%i from (%d, 0x%x)",
2013 sig->readSignalNumber(),
2014 refToNode(sig->header.theSendersBlockRef),
2015 refToBlock(sig->header.theSendersBlockRef));
2016 abort();
2017 break;
2018 }
2019 }
2020 stop_checkers();
2021 ss.unlock();
2022 }
2023
2024
2025 #include "InitConfigFileParser.hpp"
2026
2027 Config*
load_init_config(const char * config_filename)2028 ConfigManager::load_init_config(const char* config_filename)
2029 {
2030 InitConfigFileParser parser;
2031 return parser.parseConfig(config_filename);
2032 }
2033
2034
2035 Config*
load_init_mycnf(void)2036 ConfigManager::load_init_mycnf(void)
2037 {
2038 InitConfigFileParser parser;
2039 return parser.parse_mycnf();
2040 }
2041
2042
2043 Config*
load_config(const char * config_filename,bool mycnf,BaseString & msg)2044 ConfigManager::load_config(const char* config_filename, bool mycnf,
2045 BaseString& msg)
2046 {
2047 Config* new_conf = NULL;
2048 if (mycnf && (new_conf = load_init_mycnf()) == NULL)
2049 {
2050 msg.assign("Could not load configuration from 'my.cnf'");
2051 return NULL;
2052 }
2053 else if (config_filename &&
2054 (new_conf = load_init_config(config_filename)) == NULL)
2055 {
2056 msg.assfmt("Could not load configuration from '%s'",
2057 config_filename);
2058 return NULL;
2059 }
2060
2061 return new_conf;
2062 }
2063
2064
2065 Config*
load_config(void) const2066 ConfigManager::load_config(void) const
2067 {
2068 BaseString msg;
2069 Config* new_conf = NULL;
2070 if ((new_conf = load_config(m_opts.config_filename,
2071 m_opts.mycnf, msg)) == NULL)
2072 {
2073 g_eventLogger->error(msg);
2074 return NULL;
2075 }
2076 return new_conf;
2077 }
2078
2079
2080 Config*
fetch_config(void)2081 ConfigManager::fetch_config(void)
2082 {
2083 DBUG_ENTER("ConfigManager::fetch_config");
2084
2085 while(true)
2086 {
2087 /* Loop until config loaded from other mgmd(s) */
2088 char buf[128];
2089 g_eventLogger->info("Trying to get configuration from other mgmd(s) "\
2090 "using '%s'...",
2091 m_config_retriever.get_connectstring(buf, sizeof(buf)));
2092
2093 if (m_config_retriever.is_connected() ||
2094 m_config_retriever.do_connect(30 /* retry */,
2095 1 /* delay */,
2096 0 /* verbose */) == 0)
2097 {
2098 g_eventLogger->info("Connected to '%s:%d'...",
2099 m_config_retriever.get_mgmd_host(),
2100 m_config_retriever.get_mgmd_port());
2101 break;
2102 }
2103 }
2104 // read config from other management server
2105 ndb_mgm_configuration * tmp =
2106 m_config_retriever.getConfig(m_config_retriever.get_mgmHandle());
2107
2108 // Disconnect from other mgmd
2109 m_config_retriever.disconnect();
2110
2111 if (tmp == NULL) {
2112 g_eventLogger->error("%s", m_config_retriever.getErrorString());
2113 DBUG_RETURN(NULL);
2114 }
2115
2116 DBUG_RETURN(new Config(tmp));
2117 }
2118
2119
2120 static bool
delete_file(const char * file_name)2121 delete_file(const char* file_name)
2122 {
2123 #ifdef _WIN32
2124 if (DeleteFile(file_name) == 0)
2125 {
2126 g_eventLogger->error("Failed to delete file '%s', error: %d",
2127 file_name, GetLastError());
2128 return false;
2129 }
2130 #else
2131 if (unlink(file_name) == -1)
2132 {
2133 g_eventLogger->error("Failed to delete file '%s', error: %d",
2134 file_name, errno);
2135 return false;
2136 }
2137 #endif
2138 return true;
2139 }
2140
2141
2142 bool
delete_saved_configs(void) const2143 ConfigManager::delete_saved_configs(void) const
2144 {
2145 NdbDir::Iterator iter;
2146
2147 if (!m_configdir)
2148 {
2149 // No configdir -> no files to delete
2150 return true;
2151 }
2152
2153 if (iter.open(m_configdir) != 0)
2154 return false;
2155
2156 bool result = true;
2157 const char* name;
2158 unsigned nodeid;
2159 char extra; // Avoid matching ndb_2_config.bin.2.tmp
2160 BaseString full_name;
2161 unsigned version;
2162 while ((name= iter.next_file()) != NULL)
2163 {
2164 if (sscanf(name,
2165 "ndb_%u_config.bin.%u%c",
2166 &nodeid, &version, &extra) == 2)
2167 {
2168 // ndbout_c("match: %s", name);
2169
2170 if (nodeid != m_node_id)
2171 continue;
2172
2173 // Delete the file
2174 full_name.assfmt("%s%s%s", m_configdir, DIR_SEPARATOR, name);
2175 g_eventLogger->debug("Deleting binary config file '%s'",
2176 full_name.c_str());
2177 if (!delete_file(full_name.c_str()))
2178 {
2179 // Make function return false, but continue and try
2180 // to delete other files
2181 result = false;
2182 }
2183 }
2184 }
2185
2186 return result;
2187 }
2188
2189
2190 bool
saved_config_exists(BaseString & config_name) const2191 ConfigManager::saved_config_exists(BaseString& config_name) const
2192 {
2193 NdbDir::Iterator iter;
2194
2195 if (!m_configdir ||
2196 iter.open(m_configdir) != 0)
2197 return 0;
2198
2199 const char* name;
2200 unsigned nodeid;
2201 char extra; // Avoid matching ndb_2_config.bin.2.tmp
2202 unsigned version, max_version= 0;
2203 while ((name= iter.next_file()) != NULL)
2204 {
2205 if (sscanf(name,
2206 "ndb_%u_config.bin.%u%c",
2207 &nodeid, &version, &extra) == 2)
2208 {
2209 // ndbout_c("match: %s", name);
2210
2211 if (nodeid != m_node_id)
2212 continue;
2213
2214 if (version>max_version)
2215 max_version= version;
2216 }
2217 }
2218
2219 if (max_version == 0)
2220 return false;
2221
2222 config_name.assfmt("%s%sndb_%u_config.bin.%u",
2223 m_configdir, DIR_SEPARATOR, m_node_id, max_version);
2224 return true;
2225 }
2226
2227
2228
2229 bool
failed_config_change_exists() const2230 ConfigManager::failed_config_change_exists() const
2231 {
2232 NdbDir::Iterator iter;
2233
2234 if (!m_configdir ||
2235 iter.open(m_configdir) != 0)
2236 return 0;
2237
2238 const char* name;
2239 char tmp;
2240 unsigned nodeid;
2241 unsigned version;
2242 while ((name= iter.next_file()) != NULL)
2243 {
2244 // Check for a previously failed config
2245 // change, ie. ndb_<nodeid>_config.bin.X.tmp exist
2246 if (sscanf(name,
2247 "ndb_%u_config.bin.%u.tm%c",
2248 &nodeid, &version, &tmp) == 3 &&
2249 tmp == 'p')
2250 {
2251 if (nodeid != m_node_id)
2252 continue;
2253
2254 g_eventLogger->error("Found binary configuration file '%s%s%s' from "
2255 "previous failed attempt to change config. This "
2256 "error must be manually resolved by removing the "
2257 "file(ie. ROLLBACK) or renaming the file to it's "
2258 "name without the .tmp extension(ie COMMIT). Make "
2259 "sure to check the other nodes so that they all "
2260 "have the same configuration generation.",
2261 m_configdir, DIR_SEPARATOR, name);
2262 return true;
2263 }
2264 }
2265
2266 return false;
2267 }
2268
2269
2270 Config*
load_saved_config(const BaseString & config_name)2271 ConfigManager::load_saved_config(const BaseString& config_name)
2272 {
2273 struct ndb_mgm_configuration * tmp =
2274 m_config_retriever.getConfig(config_name.c_str());
2275 if(tmp == NULL)
2276 {
2277 g_eventLogger->error("Failed to load config from '%s', error: '%s'",
2278 config_name.c_str(),
2279 m_config_retriever.getErrorString());
2280 return NULL;
2281 }
2282
2283 Config* conf = new Config(tmp);
2284 if (conf == NULL)
2285 g_eventLogger->error("Failed to load config, out of memory");
2286 return conf;
2287 }
2288
2289 bool
get_packed_config(ndb_mgm_node_type nodetype,BaseString * buf64,BaseString & error)2290 ConfigManager::get_packed_config(ndb_mgm_node_type nodetype,
2291 BaseString* buf64, BaseString& error)
2292 {
2293 Guard g(m_config_mutex);
2294
2295 /*
2296 Only allow the config to be exported if it's been confirmed
2297 or if another mgmd is asking for it
2298 */
2299 switch(m_config_state)
2300 {
2301 case CS_INITIAL:
2302 if (nodetype == NDB_MGM_NODE_TYPE_MGM)
2303 ; // allow other mgmd to fetch initial configuration
2304 else
2305 {
2306 error.assign("The cluster configuration is not yet confirmed "
2307 "by all defined management servers. ");
2308 if (m_config_change.m_state != ConfigChangeState::IDLE)
2309 {
2310 error.append("Initial configuration change is in progress.");
2311 }
2312 else
2313 {
2314 NodeBitmask not_started(m_all_mgm);
2315 not_started.bitANDC(m_checked);
2316 error.append("This management server is still waiting for node ");
2317 error.append(BaseString::getPrettyText(not_started));
2318 error.append(" to connect.");
2319 }
2320 return false;
2321 }
2322 break;
2323
2324 case CS_CONFIRMED:
2325 // OK
2326 break;
2327
2328 default:
2329 error.assign("get_packed_config, unknown config state: %d",
2330 m_config_state);
2331 return false;
2332 break;
2333
2334 }
2335
2336 require(m_config != 0);
2337 if (buf64)
2338 {
2339 if (!m_packed_config.length())
2340 {
2341 // No packed config exist, generate a new one
2342 Config config_copy(m_config);
2343 if (!m_dynamic_ports.set_in_config(&config_copy))
2344 {
2345 error.assign("get_packed_config, failed to set dynamic ports in config");
2346 return false;
2347 }
2348
2349 if (!config_copy.pack64(m_packed_config))
2350 {
2351 error.assign("get_packed_config, failed to pack config_copy");
2352 return false;
2353 }
2354 }
2355 buf64->assign(m_packed_config, m_packed_config.length());
2356 }
2357 return true;
2358 }
2359
2360
2361 bool
init_checkers(const Config * config)2362 ConfigManager::init_checkers(const Config* config)
2363 {
2364
2365 // Init one thread for each other mgmd
2366 // in the config and check which version it has. If version
2367 // does not have config manager, set this node to ignore
2368 // that node in the config change protocol
2369
2370 BaseString connect_string;
2371 ConfigIter iter(config, CFG_SECTION_NODE);
2372 for (iter.first(); iter.valid(); iter.next())
2373 {
2374
2375 // Only MGM nodes
2376 Uint32 type;
2377 if (iter.get(CFG_TYPE_OF_SECTION, &type) ||
2378 type != NODE_TYPE_MGM)
2379 continue;
2380
2381 // Not this node
2382 Uint32 nodeid;
2383 if(iter.get(CFG_NODE_ID, &nodeid) ||
2384 nodeid == m_node_id)
2385 continue;
2386
2387 const char* hostname;
2388 Uint32 port;
2389 require(!iter.get(CFG_NODE_HOST, &hostname));
2390 require(!iter.get(CFG_MGM_PORT, &port));
2391 connect_string.assfmt("%s:%u",hostname,port);
2392
2393 ConfigChecker* checker =
2394 new ConfigChecker(*this, connect_string.c_str(),
2395 m_opts.bind_address, nodeid);
2396 if (!checker)
2397 {
2398 g_eventLogger->error("Failed to create ConfigChecker");
2399 return false;
2400 }
2401
2402 if (!checker->init())
2403 return false;
2404
2405 m_checkers.push_back(checker);
2406 }
2407 return true;
2408 }
2409
2410
2411 void
start_checkers(void)2412 ConfigManager::start_checkers(void)
2413 {
2414 for (unsigned i = 0; i < m_checkers.size(); i++)
2415 m_checkers[i]->start();
2416 }
2417
2418
2419 void
stop_checkers(void)2420 ConfigManager::stop_checkers(void)
2421 {
2422 for (unsigned i = 0; i < m_checkers.size(); i++)
2423 {
2424 ConfigChecker* checker = m_checkers[i];
2425 ndbout << "stop checker " << i << endl;
2426 checker->stop();
2427 delete checker;
2428 }
2429 }
2430
2431
ConfigChecker(ConfigManager & manager,const char * connect_string,const char * bindaddress,NodeId nodeid)2432 ConfigManager::ConfigChecker::ConfigChecker(ConfigManager& manager,
2433 const char* connect_string,
2434 const char * bindaddress,
2435 NodeId nodeid) :
2436 MgmtThread("ConfigChecker"),
2437 m_manager(manager),
2438 m_config_retriever(opt_ndb_connectstring, opt_ndb_nodeid, NDB_VERSION,
2439 NDB_MGM_NODE_TYPE_MGM, bindaddress),
2440 m_connect_string(connect_string),
2441 m_nodeid(nodeid)
2442 {
2443 }
2444
2445
2446 bool
init()2447 ConfigManager::ConfigChecker::init()
2448 {
2449 if (m_config_retriever.hasError())
2450 {
2451 g_eventLogger->error("%s", m_config_retriever.getErrorString());
2452 return false;
2453 }
2454
2455 return true;
2456 }
2457
2458
2459 void
run()2460 ConfigManager::ConfigChecker::run()
2461 {
2462 // Connect to other mgmd inifintely until thread is stopped
2463 // or connect suceeds
2464 g_eventLogger->debug("ConfigChecker, connecting to '%s'",
2465 m_connect_string.c_str());
2466 while(m_config_retriever.do_connect(0 /* retry */,
2467 1 /* delay */,
2468 0 /* verbose */) != 0)
2469 {
2470 if (is_stopped())
2471 {
2472 g_eventLogger->debug("ConfigChecker, thread is stopped");
2473 return; // Thread is stopped
2474 }
2475
2476 NdbSleep_SecSleep(1);
2477 }
2478
2479 // Connected
2480 g_eventLogger->debug("ConfigChecker, connected to '%s'",
2481 m_connect_string.c_str());
2482
2483 // Check version
2484 int major, minor, build;
2485 char ver_str[50];
2486 if (!ndb_mgm_get_version(m_config_retriever.get_mgmHandle(),
2487 &major, &minor, &build,
2488 sizeof(ver_str), ver_str))
2489 {
2490 g_eventLogger->error("Could not get version from mgmd on '%s'",
2491 m_connect_string.c_str());
2492 return;
2493 }
2494 g_eventLogger->debug("mgmd on '%s' has version %d.%d.%d",
2495 m_connect_string.c_str(), major, minor, build);
2496
2497 // Versions prior to 7 don't have ConfigManager
2498 // exclude it from config change protocol
2499 if (major < 7)
2500 {
2501 g_eventLogger->info("Excluding node %d with version %d.%d.%d from "
2502 "config change protocol",
2503 m_nodeid, major, minor, build);
2504 m_manager.m_exclude_nodes.push_back(m_nodeid);
2505 }
2506
2507 return;
2508 }
2509
2510
2511 void
handle_exclude_nodes(void)2512 ConfigManager::handle_exclude_nodes(void)
2513 {
2514
2515 if (!m_waiting_for.isclear())
2516 return; // Other things going on
2517
2518 switch (m_config_state)
2519 {
2520 case CS_INITIAL:
2521 m_exclude_nodes.lock();
2522 for (unsigned i = 0; i < m_exclude_nodes.size(); i++)
2523 {
2524 NodeId nodeid = m_exclude_nodes[i];
2525 g_eventLogger->debug("Handle exclusion of node %d", nodeid);
2526 m_all_mgm.clear(nodeid);
2527 }
2528 m_exclude_nodes.unlock();
2529 break;
2530
2531 default:
2532 break;
2533 }
2534 m_exclude_nodes.clear();
2535
2536 }
2537
2538
2539 static bool
check_dynamic_port_configured(const Config * config,int node1,int node2,BaseString & msg)2540 check_dynamic_port_configured(const Config* config,
2541 int node1, int node2,
2542 BaseString& msg)
2543 {
2544 ConfigIter iter(config, CFG_SECTION_CONNECTION);
2545
2546 for(;iter.valid();iter.next()) {
2547 Uint32 n1, n2;
2548 if (iter.get(CFG_CONNECTION_NODE_1, &n1) != 0 ||
2549 iter.get(CFG_CONNECTION_NODE_2, &n2) != 0)
2550 {
2551 msg.assign("Could not get node1 or node2 from connection section");
2552 return false;
2553 }
2554
2555 if((n1 == (Uint32)node1 && n2 == (Uint32)node2) ||
2556 (n1 == (Uint32)node2 && n2 == (Uint32)node1))
2557 break;
2558 }
2559 if(!iter.valid()) {
2560 msg.assfmt("Unable to find connection between nodes %d -> %d",
2561 node1, node2);
2562 return false;
2563 }
2564
2565 Uint32 port;
2566 if(iter.get(CFG_CONNECTION_SERVER_PORT, &port) != 0) {
2567 msg.assign("Unable to get current value of CFG_CONNECTION_SERVER_PORT");
2568 return false;
2569 }
2570
2571 if (port != 0)
2572 {
2573 // Dynamic ports is zero in configuration
2574 msg.assfmt("Server port for %d -> %d is not marked as dynamic, value: %u",
2575 node1, node2, port);
2576 return false;
2577 }
2578 return true;
2579 }
2580
2581
2582 bool
set_dynamic_port(int node1,int node2,int value,BaseString & msg)2583 ConfigManager::set_dynamic_port(int node1, int node2, int value,
2584 BaseString& msg)
2585 {
2586 MgmtSrvr::DynPortSpec port = { node2, value };
2587
2588 return set_dynamic_ports(node1, &port, 1, msg);
2589 }
2590
2591
2592 bool
set_dynamic_ports(int node,MgmtSrvr::DynPortSpec ports[],unsigned num_ports,BaseString & msg)2593 ConfigManager::set_dynamic_ports(int node, MgmtSrvr::DynPortSpec ports[],
2594 unsigned num_ports, BaseString &msg)
2595 {
2596 Guard g(m_config_mutex);
2597
2598 // Check that all ports to set are configured as dynamic
2599 for(unsigned i = 0; i < num_ports; i++)
2600 {
2601 const int node2 = ports[i].node;
2602 if (!check_dynamic_port_configured(m_config,
2603 node, node2, msg))
2604 {
2605 return false;
2606 }
2607 }
2608
2609 // Set the dynamic ports
2610 bool result = true;
2611 for(unsigned i = 0; i < num_ports; i++)
2612 {
2613 const int node2 = ports[i].node;
2614 const int value = ports[i].port;
2615 if (!m_dynamic_ports.set(node, node2, value))
2616 {
2617 // Failed to set one port, report problem but since it's very unlikley
2618 // that this step fails, continue and attempt to set remaining ports.
2619 msg.assfmt("Failed to set dynamic port(s)");
2620 result = false;
2621 }
2622 }
2623
2624 // Removed cache of packed config, need to be recreated
2625 // to include the new dynamic port
2626 m_packed_config.clear();
2627
2628 return result;
2629 }
2630
2631
2632 bool
get_dynamic_port(int node1,int node2,int * value,BaseString & msg) const2633 ConfigManager::get_dynamic_port(int node1, int node2, int *value,
2634 BaseString& msg) const {
2635
2636 Guard g(m_config_mutex);
2637 if (!check_dynamic_port_configured(m_config,
2638 node1, node2, msg))
2639 return false;
2640
2641 if (!m_dynamic_ports.get(node1, node2, value))
2642 {
2643 msg.assfmt("Could not get dynamic port for %d -> %d", node1, node2);
2644 return false;
2645 }
2646 return true;
2647 }
2648
2649
check(int & node1,int & node2) const2650 bool ConfigManager::DynamicPorts::check(int& node1, int& node2) const
2651 {
2652 // Always use smaller node first
2653 if (node1 > node2)
2654 {
2655 int tmp = node1;
2656 node1 = node2;
2657 node2 = tmp;
2658 }
2659
2660 // Only NDB nodes can be dynamic port server
2661 if (node1 <= 0 || node1 >= MAX_NDB_NODES)
2662 return false;
2663 if (node2 <= 0 || node2 >= MAX_NODES)
2664 return false;
2665 if (node1 == node2)
2666 return false;
2667
2668 return true;
2669 }
2670
2671
set(int node1,int node2,int port)2672 bool ConfigManager::DynamicPorts::set(int node1, int node2, int port)
2673 {
2674 if (!check(node1, node2))
2675 return false;
2676
2677 if (!m_ports.insert(NodePair(node1, node2), port, true))
2678 return false;
2679
2680 return true;
2681 }
2682
2683
get(int node1,int node2,int * port) const2684 bool ConfigManager::DynamicPorts::get(int node1, int node2, int* port) const
2685 {
2686 if (!check(node1, node2))
2687 return false;
2688
2689 int value = 0; // Return 0 if not found
2690 (void)m_ports.search(NodePair(node1, node2), value);
2691
2692 *port = (int)value;
2693 return true;
2694 }
2695
2696
2697 bool
set_in_config(Config * config)2698 ConfigManager::DynamicPorts::set_in_config(Config* config)
2699 {
2700 bool result = true;
2701 ConfigIter iter(config, CFG_SECTION_CONNECTION);
2702
2703 for(;iter.valid();iter.next()) {
2704 Uint32 port = 0;
2705 if (iter.get(CFG_CONNECTION_SERVER_PORT, &port) != 0 ||
2706 port != 0)
2707 continue; // Not configured as dynamic port
2708
2709 Uint32 n1, n2;
2710 require(iter.get(CFG_CONNECTION_NODE_1, &n1) == 0);
2711 require(iter.get(CFG_CONNECTION_NODE_2, &n2) == 0);
2712
2713 int dyn_port;
2714 if (!get(n1, n2, &dyn_port) || dyn_port == 0)
2715 continue; // No dynamic port registered
2716
2717 // Write the dynamic port to config
2718 port = (Uint32)dyn_port;
2719 ConfigValues::Iterator i2(config->m_configValues->m_config,
2720 iter.m_config);
2721 if(i2.set(CFG_CONNECTION_SERVER_PORT, port) == false)
2722 result = false;
2723 }
2724 return result;
2725 }
2726
2727
2728 template class Vector<ConfigSubscriber*>;
2729 template class Vector<ConfigManager::ConfigChecker*>;
2730
2731