1 /* Copyright (c) 2008, 2011, Oracle and/or its affiliates. All rights reserved.
2
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License, version 2.0,
5 as published by the Free Software Foundation.
6
7 This program is also distributed with certain software (including
8 but not limited to OpenSSL) that is licensed under separate terms,
9 as designated in a particular file or component or in included license
10 documentation. The authors of MySQL hereby grant you an additional
11 permission to link the program and your derivative works with the
12 separately licensed software that they have included with MySQL.
13
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License, version 2.0, for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA */
22
23
24 #include "ConfigManager.hpp"
25 #include "MgmtSrvr.hpp"
26 #include <NdbDir.hpp>
27
28 #include <NdbConfig.h>
29 #include <NdbSleep.h>
30 #include <kernel/GlobalSignalNumbers.h>
31 #include <SignalSender.hpp>
32 #include <NdbApiSignal.hpp>
33 #include <signaldata/NFCompleteRep.hpp>
34 #include <signaldata/NodeFailRep.hpp>
35 #include <signaldata/ApiRegSignalData.hpp>
36 #include <ndb_version.h>
37
38 #include <EventLogger.hpp>
39 extern EventLogger * g_eventLogger;
40
41 extern "C" const char* opt_ndb_connectstring;
42 extern "C" int opt_ndb_nodeid;
43
ConfigManager(const MgmtSrvr::MgmtOpts & opts,const char * configdir)44 ConfigManager::ConfigManager(const MgmtSrvr::MgmtOpts& opts,
45 const char* configdir) :
46 MgmtThread("ConfigManager"),
47 m_opts(opts),
48 m_facade(NULL),
49 m_ss(NULL),
50 m_config_mutex(NULL),
51 m_config(NULL),
52 m_config_retriever(opt_ndb_connectstring,
53 opt_ndb_nodeid,
54 NDB_VERSION,
55 NDB_MGM_NODE_TYPE_MGM,
56 opts.bind_address),
57 m_config_state(CS_UNINITIALIZED),
58 m_previous_state(CS_UNINITIALIZED),
59 m_prepared_config(NULL),
60 m_node_id(0),
61 m_configdir(configdir)
62 {
63 }
64
65
~ConfigManager()66 ConfigManager::~ConfigManager()
67 {
68 delete m_config;
69 delete m_prepared_config;
70 if (m_ss)
71 delete m_ss;
72 NdbMutex_Destroy(m_config_mutex);
73 }
74
75
76 /**
77 alone_on_host
78
79 Check if this is the only node of "type" on
80 this host
81
82 */
83
84 static bool
alone_on_host(Config * conf,Uint32 own_type,Uint32 own_nodeid)85 alone_on_host(Config* conf,
86 Uint32 own_type,
87 Uint32 own_nodeid)
88 {
89 ConfigIter iter(conf, CFG_SECTION_NODE);
90 for (iter.first(); iter.valid(); iter.next())
91 {
92 Uint32 type;
93 if(iter.get(CFG_TYPE_OF_SECTION, &type) ||
94 type != own_type)
95 continue;
96
97 Uint32 nodeid;
98 if(iter.get(CFG_NODE_ID, &nodeid) ||
99 nodeid == own_nodeid)
100 continue;
101
102 const char * hostname;
103 if(iter.get(CFG_NODE_HOST, &hostname))
104 continue;
105
106 if (SocketServer::tryBind(0,hostname))
107 {
108 // Another MGM node was also setup on this host
109 g_eventLogger->debug("Not alone on host %s, node %d " \
110 "will also run here",
111 hostname, nodeid);
112 return false;
113 }
114 }
115 return true;
116 }
117
118
119 /**
120 find_nodeid_from_configdir
121
122 Check if configdir only contains config files
123 with one nodeid -> read the latest and confirm
124 there should only be one mgm node on this host
125 */
126
127 NodeId
find_nodeid_from_configdir(void)128 ConfigManager::find_nodeid_from_configdir(void)
129 {
130 BaseString config_name;
131 NdbDir::Iterator iter;
132
133 if (iter.open(m_configdir) != 0)
134 return 0;
135
136 const char* name;
137 unsigned found_nodeid= 0;
138 unsigned nodeid;
139 char extra; // Avoid matching ndb_2_config.bin.2.tmp
140 unsigned version, max_version = 0;
141 while ((name = iter.next_file()) != NULL)
142 {
143 if (sscanf(name,
144 "ndb_%u_config.bin.%u%c",
145 &nodeid, &version, &extra) == 2)
146 {
147 // ndbout_c("match: %s", name);
148
149 if (nodeid != found_nodeid)
150 {
151 if (found_nodeid != 0)
152 return 0; // Found more than one nodeid
153 found_nodeid= nodeid;
154 }
155
156 if (version > max_version)
157 max_version = version;
158 }
159 }
160
161 if (max_version == 0)
162 return 0;
163
164 config_name.assfmt("%s%sndb_%u_config.bin.%u",
165 m_configdir, DIR_SEPARATOR, found_nodeid, max_version);
166
167 Config* conf;
168 if (!(conf = load_saved_config(config_name)))
169 return 0;
170
171 if (!m_config_retriever.verifyConfig(conf->m_configValues,
172 found_nodeid) ||
173 !alone_on_host(conf, NDB_MGM_NODE_TYPE_MGM, found_nodeid))
174 {
175 delete conf;
176 return 0;
177 }
178
179 delete conf;
180 return found_nodeid;
181 }
182
183
184 /**
185 find_own_nodeid
186
187 Return the nodeid of the MGM node
188 defined to run on this host
189
190 Return 0 if more than one node is defined
191 */
192
193 static NodeId
find_own_nodeid(Config * conf)194 find_own_nodeid(Config* conf)
195 {
196 NodeId found_nodeid= 0;
197 ConfigIter iter(conf, CFG_SECTION_NODE);
198 for (iter.first(); iter.valid(); iter.next())
199 {
200 Uint32 type;
201 if(iter.get(CFG_TYPE_OF_SECTION, &type) ||
202 type != NDB_MGM_NODE_TYPE_MGM)
203 continue;
204
205 Uint32 nodeid;
206 require(iter.get(CFG_NODE_ID, &nodeid) == 0);
207
208 const char * hostname;
209 if(iter.get(CFG_NODE_HOST, &hostname))
210 continue;
211
212 if (SocketServer::tryBind(0,hostname))
213 {
214 // This node is setup to run on this host
215 if (found_nodeid == 0)
216 found_nodeid = nodeid;
217 else
218 return 0; // More than one host on this node
219 }
220 }
221 return found_nodeid;
222 }
223
224
225 NodeId
find_nodeid_from_config(void)226 ConfigManager::find_nodeid_from_config(void)
227 {
228 if (!m_opts.mycnf &&
229 !m_opts.config_filename)
230 return 0;
231
232 Config* conf = load_config();
233 if (conf == NULL)
234 return 0;
235
236 NodeId found_nodeid = find_own_nodeid(conf);
237 if (found_nodeid == 0 ||
238 !m_config_retriever.verifyConfig(conf->m_configValues, found_nodeid))
239 {
240 delete conf;
241 return 0;
242 }
243
244 return found_nodeid;
245 }
246
247
248 bool
init_nodeid(void)249 ConfigManager::init_nodeid(void)
250 {
251 DBUG_ENTER("ConfigManager::init_nodeid");
252
253 NodeId nodeid = m_config_retriever.get_configuration_nodeid();
254 if (nodeid)
255 {
256 // Nodeid was specifed on command line or in NDB_CONNECTSTRING
257 g_eventLogger->debug("Got nodeid: %d from command line " \
258 "or NDB_CONNECTSTRING", nodeid);
259 m_node_id = nodeid;
260 DBUG_RETURN(true);
261 }
262
263 nodeid = find_nodeid_from_configdir();
264 if (nodeid)
265 {
266 // Found nodeid by searching in configdir
267 g_eventLogger->debug("Got nodeid: %d from searching in configdir",
268 nodeid);
269 m_node_id = nodeid;
270 DBUG_RETURN(true);
271 }
272
273 nodeid = find_nodeid_from_config();
274 if (nodeid)
275 {
276 // Found nodeid by looking in the config given on command line
277 g_eventLogger->debug("Got nodeid: %d from config file given " \
278 "on command line",
279 nodeid);
280 m_node_id = nodeid;
281 DBUG_RETURN(true);
282 }
283
284 // We _could_ try connecting to other running mgmd(s)
285 // and fetch our nodeid. But, that introduces a dependency
286 // that is not beneficial for a shared nothing cluster, since
287 // it might only work when other mgmd(s) are started. If all
288 // mgmd(s) is down it would require manual intervention.
289 // Better to require the node id to always be specified
290 // on the command line(or the above _local_ magic)
291
292 g_eventLogger->error("Could not determine which nodeid to use for "\
293 "this node. Specify it with --ndb-nodeid=<nodeid> "\
294 "on command line");
295 DBUG_RETURN(false);
296 }
297
298
299 static void
reset_dynamic_ports_in_config(const Config * config)300 reset_dynamic_ports_in_config(const Config* config)
301 {
302 ConfigIter iter(config, CFG_SECTION_CONNECTION);
303
304 for(;iter.valid();iter.next()) {
305 Uint32 port;
306 require(iter.get(CFG_CONNECTION_SERVER_PORT, &port) == 0);
307
308 if ((int)port < 0)
309 {
310 port = 0;
311 ConfigValues::Iterator i2(config->m_configValues->m_config,
312 iter.m_config);
313 require(i2.set(CFG_CONNECTION_SERVER_PORT, port));
314 }
315 }
316 }
317
318
319 bool
init(void)320 ConfigManager::init(void)
321 {
322 DBUG_ENTER("ConfigManager::init");
323
324 m_config_mutex = NdbMutex_Create();
325 if (!m_config_mutex)
326 {
327 g_eventLogger->error("Failed to create mutex in ConfigManager!");
328 DBUG_RETURN(false);
329 }
330
331 require(m_config_state == CS_UNINITIALIZED);
332
333 if (m_config_retriever.hasError())
334 {
335 g_eventLogger->error("%s", m_config_retriever.getErrorString());
336 DBUG_RETURN(false);
337 }
338
339 if (!init_nodeid())
340 DBUG_RETURN(false);
341
342 if (m_opts.initial && !delete_saved_configs())
343 DBUG_RETURN(false);
344
345 if (failed_config_change_exists())
346 DBUG_RETURN(false);
347
348 BaseString config_bin_name;
349 if (saved_config_exists(config_bin_name))
350 {
351 Config* conf = NULL;
352 if (!(conf = load_saved_config(config_bin_name)))
353 DBUG_RETURN(false);
354
355 if (!config_ok(conf))
356 DBUG_RETURN(false);
357
358 set_config(conf);
359 m_config_state = CS_CONFIRMED;
360
361 g_eventLogger->info("Loaded config from '%s'", config_bin_name.c_str());
362
363 if (m_opts.reload && // --reload
364 (m_opts.mycnf || m_opts.config_filename))
365 {
366 Config* new_conf = load_config();
367 if (new_conf == NULL)
368 DBUG_RETURN(false);
369
370 /**
371 * Add config to set once ConfigManager is fully started
372 */
373 m_config_change.config_loaded(new_conf);
374 g_eventLogger->info("Loaded configuration from '%s', will try " \
375 "to set it once started",
376 m_opts.mycnf ? "my.cnf" : m_opts.config_filename);
377 }
378 }
379 else
380 {
381 if (m_opts.mycnf || m_opts.config_filename)
382 {
383 Config* conf = load_config();
384 if (conf == NULL)
385 DBUG_RETURN(false);
386
387 if (!config_ok(conf))
388 DBUG_RETURN(false);
389
390 /*
391 Set this node as primary node for config.ini/my.cnf
392 in order to make it possible that make sure an old
393 config.ini is only loaded with --force
394 */
395 if (!conf->setPrimaryMgmNode(m_node_id))
396 {
397 g_eventLogger->error("Failed to set primary MGM node");
398 DBUG_RETURN(false);
399 }
400
401 /* Use the initial config for now */
402 set_config(conf);
403
404 g_eventLogger->info("Got initial configuration from '%s', will try " \
405 "to set it when all ndb_mgmd(s) started",
406 m_opts.mycnf ? "my.cnf" : m_opts.config_filename);
407 m_config_change.m_initial_config = new Config(conf); // Copy config
408 m_config_state = CS_INITIAL;
409
410 if (!init_checkers(m_config_change.m_initial_config))
411 DBUG_RETURN(false);
412 }
413 else
414 {
415 Config* conf = NULL;
416 if (!(conf = fetch_config()))
417 {
418 g_eventLogger->error("Could not fetch config!");
419 DBUG_RETURN(false);
420 }
421
422 /*
423 The fetched config may contain dynamic ports for
424 ndbd(s) which have to be reset to 0 before using
425 the config
426 */
427 reset_dynamic_ports_in_config(conf);
428
429 if (!config_ok(conf))
430 DBUG_RETURN(false);
431
432 /* Use the fetched config for now */
433 set_config(conf);
434
435 if (m_config->getGeneration() == 0)
436 {
437 g_eventLogger->info("Fetched initial configuration, " \
438 "generation: %d, name: '%s'. "\
439 "Will try to set it when all ndb_mgmd(s) started",
440 m_config->getGeneration(), m_config->getName());
441 m_config_state= CS_INITIAL;
442 m_config_change.m_initial_config = new Config(conf); // Copy config
443
444 if (!init_checkers(m_config_change.m_initial_config))
445 DBUG_RETURN(false);
446 }
447 else
448 {
449 g_eventLogger->info("Fetched confirmed configuration, " \
450 "generation: %d, name: '%s'. " \
451 "Trying to write it to disk...",
452 m_config->getGeneration(), m_config->getName());
453 if (!prepareConfigChange(m_config))
454 {
455 abortConfigChange();
456 g_eventLogger->error("Failed to write the fetched config to disk");
457 DBUG_RETURN(false);
458 }
459 commitConfigChange();
460 m_config_state = CS_CONFIRMED;
461 g_eventLogger->info("The fetched configuration has been saved!");
462 }
463 }
464 }
465
466 require(m_config_state != CS_UNINITIALIZED);
467 DBUG_RETURN(true);
468 }
469
470
471 bool
prepareConfigChange(const Config * config)472 ConfigManager::prepareConfigChange(const Config* config)
473 {
474 if (m_prepared_config)
475 {
476 g_eventLogger->error("Can't prepare configuration change " \
477 "when already prepared");
478 return false;
479 }
480
481 Uint32 generation= config->getGeneration();
482 if (generation == 0)
483 {
484 g_eventLogger->error("Can't prepare configuration change for "\
485 "configuration with generation 0");
486 return false;
487 }
488
489 assert(m_node_id);
490 m_config_name.assfmt("%s%sndb_%u_config.bin.%u",
491 m_configdir, DIR_SEPARATOR, m_node_id, generation);
492 g_eventLogger->debug("Preparing configuration, generation: %d name: %s",
493 generation, m_config_name.c_str());
494
495 /* Check file name is free */
496 if (access(m_config_name.c_str(), F_OK) == 0)
497 {
498 g_eventLogger->error("The file '%s' already exist while preparing",
499 m_config_name.c_str());
500 return false;
501 }
502
503 /* Pack the config */
504 UtilBuffer buf;
505 if(!config->pack(buf))
506 {
507 /* Failed to pack config */
508 g_eventLogger->error("Failed to pack configuration while preparing");
509 return false;
510 }
511
512 /* Write config to temporary file */
513 BaseString prep_config_name(m_config_name);
514 prep_config_name.append(".tmp");
515 FILE * f = fopen(prep_config_name.c_str(), IF_WIN("wbc", "w"));
516 if(f == NULL)
517 {
518 g_eventLogger->error("Failed to open file '%s' while preparing, errno: %d",
519 prep_config_name.c_str(), errno);
520 return false;
521 }
522
523 if(fwrite(buf.get_data(), 1, buf.length(), f) != (size_t)buf.length())
524 {
525 g_eventLogger->error("Failed to write file '%s' while preparing, errno: %d",
526 prep_config_name.c_str(), errno);
527 fclose(f);
528 unlink(prep_config_name.c_str());
529 return false;
530 }
531
532 if (fflush(f))
533 {
534 g_eventLogger->error("Failed to flush file '%s' while preparing, errno: %d",
535 prep_config_name.c_str(), errno);
536 fclose(f);
537 unlink(prep_config_name.c_str());
538 return false;
539 }
540
541 #ifdef __WIN__
542 /*
543 File is opened with the commit flag "c" so
544 that the contents of the file buffer are written
545 directly to disk when fflush is called
546 */
547 #else
548 if (fsync(fileno(f)))
549 {
550 g_eventLogger->error("Failed to sync file '%s' while preparing, errno: %d",
551 prep_config_name.c_str(), errno);
552 fclose(f);
553 unlink(prep_config_name.c_str());
554 return false;
555 }
556 #endif
557 fclose(f);
558
559 m_prepared_config = new Config(config); // Copy
560 g_eventLogger->debug("Configuration prepared");
561
562 return true;
563 }
564
565
566 void
commitConfigChange(void)567 ConfigManager::commitConfigChange(void)
568 {
569 require(m_prepared_config != 0);
570
571 /* Set new config locally and in all subscribers */
572 set_config(m_prepared_config);
573 m_prepared_config= NULL;
574
575 /* Rename file to real name */
576 require(m_config_name.length());
577 BaseString prep_config_name(m_config_name);
578 prep_config_name.append(".tmp");
579 if(rename(prep_config_name.c_str(), m_config_name.c_str()))
580 {
581 g_eventLogger->error("rename from '%s' to '%s' failed while committing, " \
582 "errno: %d",
583 prep_config_name.c_str(), m_config_name.c_str(),
584 errno);
585 // Crash and leave the prepared config file in place
586 abort();
587 }
588 m_config_name.clear();
589
590 g_eventLogger->info("Configuration %d commited", m_config->getGeneration());
591 }
592
593
594 static void
check_no_dynamic_ports_in_config(const Config * config)595 check_no_dynamic_ports_in_config(const Config* config)
596 {
597 bool ok = true;
598 ConfigIter iter(config, CFG_SECTION_CONNECTION);
599
600 for(;iter.valid();iter.next()) {
601 Uint32 n1, n2;
602 require(iter.get(CFG_CONNECTION_NODE_1, &n1) == 0 &&
603 iter.get(CFG_CONNECTION_NODE_2, &n2) == 0);
604
605 Uint32 port_value;
606 require(iter.get(CFG_CONNECTION_SERVER_PORT, &port_value) == 0);
607
608 int port = (int)port_value;
609 if (port < 0)
610 {
611 g_eventLogger->error("INTERNAL ERROR: Found dynamic ports with "
612 "value in config, n1: %d, n2: %d, port: %u",
613 n1, n2, port);
614 ok = false;
615 }
616 }
617 require(ok);
618 }
619
620
621 void
set_config(Config * new_config)622 ConfigManager::set_config(Config* new_config)
623 {
624 // Check that config does not contain any dynamic ports
625 check_no_dynamic_ports_in_config(new_config);
626
627 delete m_config;
628 m_config = new_config;
629
630 // Removed cache of packed config
631 m_packed_config.clear();
632
633 for (unsigned i = 0; i < m_subscribers.size(); i++)
634 m_subscribers[i]->config_changed(m_node_id, new_config);
635 }
636
637
638 int
add_config_change_subscriber(ConfigSubscriber * subscriber)639 ConfigManager::add_config_change_subscriber(ConfigSubscriber* subscriber)
640 {
641 return m_subscribers.push_back(subscriber);
642 }
643
644
645 bool
config_ok(const Config * conf)646 ConfigManager::config_ok(const Config* conf)
647 {
648 assert(m_node_id);
649 if (!m_config_retriever.verifyConfig(conf->m_configValues, m_node_id))
650 {
651 g_eventLogger->error("%s", m_config_retriever.getErrorString());
652 return false;
653 }
654
655 // Check DataDir exist
656 ConfigIter iter(conf, CFG_SECTION_NODE);
657 require(iter.find(CFG_NODE_ID, m_node_id) == 0);
658
659 const char *datadir;
660 require(iter.get(CFG_NODE_DATADIR, &datadir) == 0);
661
662 if (strcmp(datadir, "") != 0 && // datadir != ""
663 access(datadir, F_OK)) // dir exists
664 {
665 g_eventLogger->error("Directory '%s' specified with DataDir " \
666 "in configuration does not exist.", \
667 datadir);
668 return false;
669 }
670 return true;
671 }
672
673
674 void
abortConfigChange(void)675 ConfigManager::abortConfigChange(void)
676 {
677 /* Should always succeed */
678
679 /* Remove the prepared file */
680 BaseString prep_config_name(m_config_name);
681 prep_config_name.append(".tmp");
682 unlink(prep_config_name.c_str());
683 m_config_name.clear();
684
685 delete m_prepared_config;
686 m_prepared_config= NULL;
687 }
688
689
690
691 void
sendConfigChangeImplRef(SignalSender & ss,NodeId nodeId,ConfigChangeRef::ErrorCode error) const692 ConfigManager::sendConfigChangeImplRef(SignalSender& ss, NodeId nodeId,
693 ConfigChangeRef::ErrorCode error) const
694 {
695 SimpleSignal ssig;
696 ConfigChangeImplRef* const ref =
697 CAST_PTR(ConfigChangeImplRef, ssig.getDataPtrSend());
698 ref->errorCode = error;
699
700 g_eventLogger->debug("Send CONFIG_CHANGE_IMPL_REF to node: %d, error: %d",
701 nodeId, error);
702
703 ss.sendSignal(nodeId, ssig,
704 MGM_CONFIG_MAN, GSN_CONFIG_CHANGE_IMPL_REF,
705 ConfigChangeImplRef::SignalLength);
706 }
707
708
709
710 void
execCONFIG_CHANGE_IMPL_REQ(SignalSender & ss,SimpleSignal * sig)711 ConfigManager::execCONFIG_CHANGE_IMPL_REQ(SignalSender& ss, SimpleSignal* sig)
712 {
713 NodeId nodeId = refToNode(sig->header.theSendersBlockRef);
714 const ConfigChangeImplReq * const req =
715 CAST_CONSTPTR(ConfigChangeImplReq, sig->getDataPtr());
716
717 g_eventLogger->debug("Got CONFIG_CHANGE_IMPL_REQ from node: %d, "\
718 "requestType: %d",
719 nodeId, req->requestType);
720
721 if (!m_defragger.defragment(sig))
722 return; // More fragments to come
723
724 Guard g(m_config_mutex);
725
726 switch(req->requestType){
727 case ConfigChangeImplReq::Prepare:{
728 if (sig->header.m_noOfSections != 1)
729 {
730 sendConfigChangeImplRef(ss, nodeId, ConfigChangeRef::NoConfigData);
731 return;
732 }
733
734 ConfigValuesFactory cf;
735 if (!cf.unpack(sig->ptr[0].p, req->length))
736 {
737 sendConfigChangeImplRef(ss, nodeId, ConfigChangeRef::FailedToUnpack);
738 return;
739 }
740
741 Config new_config(cf.getConfigValues());
742 Uint32 new_generation = new_config.getGeneration();
743 Uint32 curr_generation = m_config->getGeneration();
744 const char* new_name = new_config.getName();
745 const char* curr_name = m_config->getName();
746
747 if (m_config->illegal_change(&new_config))
748 {
749 sendConfigChangeImplRef(ss, nodeId, ConfigChangeRef::IllegalConfigChange);
750 return;
751 }
752
753 if (req->initial)
754 {
755 // Check own state
756 if (m_config_state != CS_INITIAL)
757 {
758 g_eventLogger->warning("Refusing to start initial " \
759 "configuration change since this node " \
760 "is not in INITIAL state");
761 sendConfigChangeImplRef(ss, nodeId,
762 ConfigChangeRef::IllegalInitialState);
763 return;
764 }
765
766 // Check generation
767 if (new_generation != 0)
768 {
769 g_eventLogger->warning("Refusing to start initial " \
770 "configuration change since new " \
771 "generation is not 0 (new_generation: %d)",
772 new_generation);
773 sendConfigChangeImplRef(ss, nodeId,
774 ConfigChangeRef::IllegalInitialGeneration);
775 return;
776 }
777 new_generation = 1;
778
779 // Check config is equal to our initial config
780 // but skip check if message is from self...
781 if (nodeId != refToNode(ss.getOwnRef()))
782 {
783 Config new_config_copy(&new_config);
784 require(new_config_copy.setName(new_name));
785 unsigned exclude[]= {CFG_SECTION_SYSTEM, 0};
786 if (!new_config_copy.equal(m_config_change.m_initial_config, exclude))
787 {
788 BaseString buf;
789 g_eventLogger->warning
790 ("Refusing to start initial config " \
791 "change when nodes have different " \
792 "config\n" \
793 "This is the actual diff:\n%s",
794 new_config_copy.diff2str(m_config_change.m_initial_config, buf));
795 sendConfigChangeImplRef(ss, nodeId,
796 ConfigChangeRef::DifferentInitial);
797 return;
798 }
799
800 /*
801 Scrap the new_config, it's been used to check that other node
802 started from equal initial config, now it's not needed anymore
803 */
804 delete m_config_change.m_initial_config;
805 m_config_change.m_initial_config = NULL;
806 }
807 }
808 else
809 {
810
811 // Check that new config has same primary mgm node as current
812 Uint32 curr_primary = m_config->getPrimaryMgmNode();
813 Uint32 new_primary = new_config.getPrimaryMgmNode();
814 if (new_primary != curr_primary)
815 {
816 g_eventLogger->warning("Refusing to start configuration change " \
817 "requested by node %d, the new config uses " \
818 "different primary mgm node %d. " \
819 "Current primary mmgm node is %d.",
820 nodeId, new_primary, curr_primary);
821 sendConfigChangeImplRef(ss, nodeId,
822 ConfigChangeRef::NotPrimaryMgmNode);
823 return;
824 }
825
826 if (new_generation == 0 ||
827 new_generation != curr_generation)
828 {
829 BaseString buf;
830 g_eventLogger->warning("Refusing to start config change " \
831 "requested by node with different " \
832 "generation: %d. Our generation: %d\n" \
833 "This is the actual diff:\n%s",
834 new_generation, curr_generation,
835 new_config.diff2str(m_config, buf));
836 sendConfigChangeImplRef(ss, nodeId, ConfigChangeRef::InvalidGeneration);
837 return;
838 }
839 new_generation++;
840
841 // Check same cluster name
842 if (strcmp(new_name, curr_name))
843 {
844 BaseString buf;
845 g_eventLogger->warning("Refusing to start config change " \
846 "requested by node with different " \
847 "name: '%s'. Our name: '%s'\n" \
848 "This is the actual diff:\n%s",
849 new_name, curr_name,
850 new_config.diff2str(m_config, buf));
851 sendConfigChangeImplRef(ss, nodeId, ConfigChangeRef::InvalidConfigName);
852 return;
853 }
854 }
855
856 // Set new generation
857 if(!new_config.setGeneration(new_generation))
858 {
859 g_eventLogger->error("Failed to set new generation to %d",
860 new_generation);
861 sendConfigChangeImplRef(ss, nodeId, ConfigChangeRef::InternalError);
862 return;
863 }
864
865 if (!prepareConfigChange(&new_config))
866 {
867 sendConfigChangeImplRef(ss, nodeId, ConfigChangeRef::PrepareFailed);
868 return;
869 }
870 break;
871 }
872
873 case ConfigChangeImplReq::Commit:
874 commitConfigChange();
875
876 // All nodes has agreed on config -> CONFIRMED
877 m_config_state = CS_CONFIRMED;
878
879 break;
880
881 case ConfigChangeImplReq::Abort:
882 abortConfigChange();
883 break;
884
885 default:
886 g_eventLogger->error("execCONFIG_CHANGE_IMPL_REQ: unhandled state");
887 abort();
888 break;
889 }
890
891 /* Send CONF */
892 SimpleSignal ssig;
893 ConfigChangeImplConf* const conf =
894 CAST_PTR(ConfigChangeImplConf, ssig.getDataPtrSend());
895 conf->requestType = req->requestType;
896
897 g_eventLogger->debug("Sending CONFIG_CHANGE_IMPL_CONF to node: %d",
898 nodeId);
899
900 ss.sendSignal(nodeId, ssig,
901 MGM_CONFIG_MAN,
902 GSN_CONFIG_CHANGE_IMPL_CONF,
903 ConfigChangeImplConf::SignalLength);
904 }
905
906
set_config_change_state(ConfigChangeState::States state)907 void ConfigManager::set_config_change_state(ConfigChangeState::States state)
908 {
909 if (state == ConfigChangeState::IDLE)
910 {
911 // Rebuild m_all_mgm so that each node in config is included
912 // new mgm nodes might have been added
913 assert(m_config_change.m_error == ConfigChangeRef::OK);
914 m_config->get_nodemask(m_all_mgm, NDB_MGM_NODE_TYPE_MGM);
915 }
916
917 m_config_change.m_state.m_current_state = state;
918 }
919
920
921 void
execCONFIG_CHANGE_IMPL_REF(SignalSender & ss,SimpleSignal * sig)922 ConfigManager::execCONFIG_CHANGE_IMPL_REF(SignalSender& ss, SimpleSignal* sig)
923 {
924 NodeId nodeId = refToNode(sig->header.theSendersBlockRef);
925 g_eventLogger->debug("Got CONFIG_CHANGE_IMPL_REF from node: %d", nodeId);
926
927 const ConfigChangeImplRef * const ref =
928 CAST_CONSTPTR(ConfigChangeImplRef, sig->getDataPtr());
929 g_eventLogger->warning("Node %d refused configuration change, error: %d",
930 nodeId, ref->errorCode);
931
932 /* Remember the original error code */
933 if (m_config_change.m_error == 0)
934 m_config_change.m_error = (ConfigChangeRef::ErrorCode)ref->errorCode;
935
936 switch(m_config_change.m_state){
937 case ConfigChangeState::ABORT:
938 case ConfigChangeState::PREPARING:{
939 /* Got ref while preparing (or already decided to abort) */
940 m_config_change.m_contacted_nodes.clear(nodeId);
941 set_config_change_state(ConfigChangeState::ABORT);
942
943 m_waiting_for.clear(nodeId);
944 if (!m_waiting_for.isclear())
945 return;
946
947 startAbortConfigChange(ss);
948 break;
949 }
950 case ConfigChangeState::COMITTING:
951 /* Got ref while comitting, impossible */
952 abort();
953 break;
954
955 case ConfigChangeState::ABORTING:
956 /* Got ref while aborting, impossible */
957 abort();
958 break;
959
960 default:
961 g_eventLogger->error("execCONFIG_CHANGE_IMPL_REF: unhandled state");
962 abort();
963 break;
964 }
965 }
966
967
968 void
execCONFIG_CHANGE_IMPL_CONF(SignalSender & ss,SimpleSignal * sig)969 ConfigManager::execCONFIG_CHANGE_IMPL_CONF(SignalSender& ss, SimpleSignal* sig)
970 {
971 NodeId nodeId = refToNode(sig->header.theSendersBlockRef);
972 const ConfigChangeImplConf * const conf =
973 CAST_CONSTPTR(ConfigChangeImplConf, sig->getDataPtr());
974 g_eventLogger->debug("Got CONFIG_CHANGE_IMPL_CONF from node %d", nodeId);
975
976 switch(m_config_change.m_state){
977 case ConfigChangeState::PREPARING:{
978 require(conf->requestType == ConfigChangeImplReq::Prepare);
979 m_waiting_for.clear(nodeId);
980 if (!m_waiting_for.isclear())
981 return;
982
983 // send to next
984 int res = sendConfigChangeImplReq(ss, m_config_change.m_new_config);
985 if (res > 0)
986 {
987 // sent to new node...
988 return;
989 }
990 else if (res < 0)
991 {
992 // send failed, start abort
993 startAbortConfigChange(ss);
994 return;
995 }
996
997 /**
998 * All node has received new config..
999 * ok to delete it...
1000 */
1001 delete m_config_change.m_new_config;
1002 m_config_change.m_new_config = 0;
1003
1004 /* Send commit to all nodes */
1005 SimpleSignal ssig;
1006 ConfigChangeImplReq* const req =
1007 CAST_PTR(ConfigChangeImplReq, ssig.getDataPtrSend());
1008
1009 req->requestType = ConfigChangeImplReq::Commit;
1010
1011 g_eventLogger->debug("Sending CONFIG_CHANGE_IMPL_REQ(commit)");
1012 require(m_waiting_for.isclear());
1013 m_waiting_for = ss.broadcastSignal(m_config_change.m_contacted_nodes, ssig,
1014 MGM_CONFIG_MAN,
1015 GSN_CONFIG_CHANGE_IMPL_REQ,
1016 ConfigChangeImplReq::SignalLength);
1017 if (m_waiting_for.isclear())
1018 set_config_change_state(ConfigChangeState::IDLE);
1019 else
1020 set_config_change_state(ConfigChangeState::COMITTING);
1021 break;
1022 }
1023
1024 case ConfigChangeState::COMITTING:{
1025 require(conf->requestType == ConfigChangeImplReq::Commit);
1026
1027 m_waiting_for.clear(nodeId);
1028 if (!m_waiting_for.isclear())
1029 return;
1030
1031 require(m_config_change.m_client_ref != RNIL);
1032 require(m_config_change.m_error == 0);
1033 if (m_config_change.m_client_ref == ss.getOwnRef())
1034 {
1035 g_eventLogger->info("Config change completed! New generation: %d",
1036 m_config->getGeneration());
1037 }
1038 else
1039 {
1040 /* Send CONF to requestor */
1041 sendConfigChangeConf(ss, m_config_change.m_client_ref);
1042 }
1043 m_config_change.m_client_ref = RNIL;
1044 set_config_change_state(ConfigChangeState::IDLE);
1045 break;
1046 }
1047
1048 case ConfigChangeState::ABORT:{
1049 m_waiting_for.clear(nodeId);
1050 if (!m_waiting_for.isclear())
1051 return;
1052
1053 startAbortConfigChange(ss);
1054 break;
1055 }
1056
1057 case ConfigChangeState::ABORTING:{
1058 m_waiting_for.clear(nodeId);
1059 if (!m_waiting_for.isclear())
1060 return;
1061
1062 require(m_config_change.m_client_ref != RNIL);
1063 require(m_config_change.m_error);
1064 if (m_config_change.m_client_ref == ss.getOwnRef())
1065 {
1066 g_eventLogger->
1067 error("Configuration change failed! error: %d '%s'",
1068 m_config_change.m_error,
1069 ConfigChangeRef::errorMessage(m_config_change.m_error));
1070 exit(1);
1071 }
1072 else
1073 {
1074 /* Send ref to the requestor */
1075 sendConfigChangeRef(ss, m_config_change.m_client_ref,
1076 m_config_change.m_error);
1077 }
1078 m_config_change.m_error= ConfigChangeRef::OK;
1079 m_config_change.m_client_ref = RNIL;
1080 set_config_change_state(ConfigChangeState::IDLE);
1081 break;
1082 }
1083
1084 default:
1085 g_eventLogger->error("execCONFIG_CHANGE_IMPL_CONF: unhandled state");
1086 abort();
1087 break;
1088 }
1089 }
1090
1091
1092 void
sendConfigChangeRef(SignalSender & ss,BlockReference to,ConfigChangeRef::ErrorCode error) const1093 ConfigManager::sendConfigChangeRef(SignalSender& ss, BlockReference to,
1094 ConfigChangeRef::ErrorCode error) const
1095 {
1096 NodeId nodeId = refToNode(to);
1097 SimpleSignal ssig;
1098 ConfigChangeRef* const ref =
1099 CAST_PTR(ConfigChangeRef, ssig.getDataPtrSend());
1100 ref->errorCode = error;
1101
1102 g_eventLogger->debug("Send CONFIG_CHANGE_REF to node: %d, error: %d",
1103 nodeId, error);
1104
1105 ss.sendSignal(nodeId, ssig, refToBlock(to),
1106 GSN_CONFIG_CHANGE_REF, ConfigChangeRef::SignalLength);
1107 }
1108
1109
1110 void
sendConfigChangeConf(SignalSender & ss,BlockReference to) const1111 ConfigManager::sendConfigChangeConf(SignalSender& ss, BlockReference to) const
1112 {
1113 NodeId nodeId = refToNode(to);
1114 SimpleSignal ssig;
1115
1116 g_eventLogger->debug("Send CONFIG_CHANGE_CONF to node: %d", nodeId);
1117
1118 ss.sendSignal(nodeId, ssig, refToBlock(to),
1119 GSN_CONFIG_CHANGE_CONF, ConfigChangeConf::SignalLength);
1120 }
1121
1122
1123 void
startConfigChange(SignalSender & ss,Uint32 ref)1124 ConfigManager::startConfigChange(SignalSender& ss, Uint32 ref)
1125 {
1126 if (m_config_state == CS_INITIAL)
1127 {
1128 g_eventLogger->info("Starting initial configuration change");
1129 }
1130 else
1131 {
1132 require(m_config_state == CS_CONFIRMED);
1133 g_eventLogger->info("Starting configuration change, generation: %d",
1134 m_config_change.m_new_config->getGeneration());
1135 }
1136 m_config_change.m_contacted_nodes.clear();
1137 m_config_change.m_client_ref = ref;
1138 if (sendConfigChangeImplReq(ss, m_config_change.m_new_config) <= 0)
1139 {
1140 g_eventLogger->error("Failed to start configuration change!");
1141 exit(1);
1142 }
1143 }
1144
1145 void
startAbortConfigChange(SignalSender & ss)1146 ConfigManager::startAbortConfigChange(SignalSender& ss)
1147 {
1148 /* Abort all other nodes */
1149 SimpleSignal ssig;
1150 ConfigChangeImplReq* const req =
1151 CAST_PTR(ConfigChangeImplReq, ssig.getDataPtrSend());
1152 req->requestType = ConfigChangeImplReq::Abort;
1153
1154 g_eventLogger->debug
1155 ("Sending CONFIG_CHANGE_IMPL_REQ(abort) to %s",
1156 BaseString::getPrettyText(m_config_change.m_contacted_nodes).c_str());
1157
1158 require(m_waiting_for.isclear());
1159 m_waiting_for = ss.broadcastSignal(m_config_change.m_contacted_nodes, ssig,
1160 MGM_CONFIG_MAN,
1161 GSN_CONFIG_CHANGE_IMPL_REQ,
1162 ConfigChangeImplReq::SignalLength);
1163
1164 if (m_config_change.m_new_config)
1165 {
1166 delete m_config_change.m_new_config;
1167 m_config_change.m_new_config = 0;
1168 }
1169
1170 if (m_waiting_for.isclear())
1171 {
1172 /**
1173 * Send CONFIG_CHANGE_IMPL_CONF (aborting) to self
1174 */
1175 m_waiting_for.set(ss.getOwnNodeId());
1176 ConfigChangeImplConf* const conf =
1177 CAST_PTR(ConfigChangeImplConf, ssig.getDataPtrSend());
1178 conf->requestType = ConfigChangeImplReq::Abort;
1179
1180 ss.sendSignal(ss.getOwnNodeId(), ssig,
1181 MGM_CONFIG_MAN,
1182 GSN_CONFIG_CHANGE_IMPL_CONF,
1183 ConfigChangeImplConf::SignalLength);
1184 }
1185
1186 set_config_change_state(ConfigChangeState::ABORTING);
1187 }
1188
1189 int
sendConfigChangeImplReq(SignalSender & ss,const Config * conf)1190 ConfigManager::sendConfigChangeImplReq(SignalSender& ss, const Config* conf)
1191 {
1192 require(m_waiting_for.isclear());
1193 require(m_config_change.m_client_ref != RNIL);
1194
1195 if (m_config_change.m_contacted_nodes.isclear())
1196 {
1197 require(m_config_change.m_state == ConfigChangeState::IDLE);
1198 }
1199 else
1200 {
1201 require(m_config_change.m_state == ConfigChangeState::PREPARING);
1202 }
1203
1204 set_config_change_state(ConfigChangeState::PREPARING);
1205
1206 NodeBitmask nodes = m_all_mgm;
1207 nodes.bitANDC(m_config_change.m_contacted_nodes);
1208 if (nodes.isclear())
1209 {
1210 return 0; // all done
1211 }
1212
1213 /**
1214 * Send prepare to all MGM nodes 1 by 1
1215 * keep track of which I sent to in m_contacted_nodes
1216 */
1217 SimpleSignal ssig;
1218
1219 UtilBuffer buf;
1220 conf->pack(buf);
1221 ssig.ptr[0].p = (Uint32*)buf.get_data();
1222 ssig.ptr[0].sz = (buf.length() + 3) / 4;
1223 ssig.header.m_noOfSections = 1;
1224
1225 ConfigChangeImplReq* const req =
1226 CAST_PTR(ConfigChangeImplReq, ssig.getDataPtrSend());
1227 req->requestType = ConfigChangeImplReq::Prepare;
1228 req->initial = (m_config_state == CS_INITIAL);
1229 req->length = buf.length();
1230
1231 Uint32 i = nodes.find(0);
1232 g_eventLogger->debug("Sending CONFIG_CHANGE_IMPL_REQ(prepare) to %u", i);
1233 int result = ss.sendFragmentedSignal(i, ssig, MGM_CONFIG_MAN,
1234 GSN_CONFIG_CHANGE_IMPL_REQ,
1235 ConfigChangeImplReq::SignalLength);
1236 if (result != 0)
1237 {
1238 g_eventLogger->warning("Failed to send configuration change "
1239 "prepare to node: %d, result: %d",
1240 i, result);
1241 return -1;
1242 }
1243
1244 m_waiting_for.set(i);
1245 m_config_change.m_contacted_nodes.set(i);
1246
1247 return 1;
1248 }
1249
1250 void
execCONFIG_CHANGE_REQ(SignalSender & ss,SimpleSignal * sig)1251 ConfigManager::execCONFIG_CHANGE_REQ(SignalSender& ss, SimpleSignal* sig)
1252 {
1253 BlockReference from = sig->header.theSendersBlockRef;
1254 const ConfigChangeReq * const req =
1255 CAST_CONSTPTR(ConfigChangeReq, sig->getDataPtr());
1256
1257 if (!m_defragger.defragment(sig))
1258 return; // More fragments to come
1259
1260 if (!m_started.equal(m_all_mgm)) // Not all started
1261 {
1262 sendConfigChangeRef(ss, from, ConfigChangeRef::NotAllStarted);
1263 return;
1264 }
1265
1266 if (m_all_mgm.find(0) != m_facade->ownId()) // Not the master
1267 {
1268 sendConfigChangeRef(ss, from, ConfigChangeRef::NotMaster);
1269 return;
1270 }
1271
1272 if (m_config_change.m_state != ConfigChangeState::IDLE)
1273 {
1274 sendConfigChangeRef(ss, from, ConfigChangeRef::ConfigChangeOnGoing);
1275 return;
1276 }
1277 require(m_config_change.m_error == ConfigChangeRef::OK);
1278
1279 if (sig->header.m_noOfSections != 1)
1280 {
1281 sendConfigChangeRef(ss, from, ConfigChangeRef::NoConfigData);
1282 return;
1283 }
1284
1285 ConfigValuesFactory cf;
1286 if (!cf.unpack(sig->ptr[0].p, req->length))
1287 {
1288 sendConfigChangeRef(ss, from, ConfigChangeRef::FailedToUnpack);
1289 return;
1290 }
1291
1292 Config * new_config = new Config(cf.getConfigValues());
1293 if (!config_ok(new_config))
1294 {
1295 g_eventLogger->warning("Refusing to start config change, the config "\
1296 "is not ok");
1297 sendConfigChangeRef(ss, from, ConfigChangeRef::ConfigNotOk);
1298 delete new_config;
1299 return;
1300 }
1301
1302 m_config_change.m_new_config = new_config;
1303 startConfigChange(ss, from);
1304
1305 return;
1306 }
1307
1308
1309 static Uint32
config_check_checksum(const Config * config)1310 config_check_checksum(const Config* config)
1311 {
1312 Config copy(config);
1313
1314 // Make constants of a few values in SYSTEM section that are
1315 // not part of the checksum used for "config check"
1316 copy.setName("CHECKSUM");
1317 copy.setPrimaryMgmNode(0);
1318
1319 Uint32 checksum = copy.checksum();
1320
1321 return checksum;
1322 }
1323
1324
1325 void
execCONFIG_CHECK_REQ(SignalSender & ss,SimpleSignal * sig)1326 ConfigManager::execCONFIG_CHECK_REQ(SignalSender& ss, SimpleSignal* sig)
1327 {
1328 Guard g(m_config_mutex);
1329 BlockReference from = sig->header.theSendersBlockRef;
1330 NodeId nodeId = refToNode(from);
1331 const ConfigCheckReq * const req =
1332 CAST_CONSTPTR(ConfigCheckReq, sig->getDataPtr());
1333
1334 Uint32 other_generation = req->generation;
1335 ConfigState other_state = (ConfigState)req->state;
1336
1337 Uint32 generation = m_config->getGeneration();
1338
1339 // checksum
1340 Uint32 checksum = config_check_checksum(m_config);
1341 Uint32 other_checksum = req->checksum;
1342 if (sig->header.theLength == ConfigCheckReq::SignalLengthBeforeChecksum)
1343 {
1344 // Other side uses old version without checksum, use our checksum to
1345 // bypass the checks
1346 g_eventLogger->debug("Other mgmd does not have checksum, using own");
1347 other_checksum = checksum;
1348 }
1349
1350 if (m_prepared_config || m_config_change.m_new_config)
1351 {
1352 g_eventLogger->debug("Got CONFIG_CHECK_REQ from node: %d while "
1353 "config change in progress (m_prepared_config). "
1354 "Returning incorrect state, causing it to be retried",
1355 nodeId);
1356 sendConfigCheckRef(ss, from, ConfigCheckRef::WrongState,
1357 generation, other_generation,
1358 m_config_state, CS_UNINITIALIZED);
1359 return;
1360 }
1361
1362 if (m_config_change.m_loaded_config && ss.getOwnNodeId() < nodeId)
1363 {
1364 g_eventLogger->debug("Got CONFIG_CHECK_REQ from node: %d while "
1365 "having a loaded config (and my node is lower: %d). "
1366 "Returning incorrect state, causing it to be retried",
1367 nodeId,
1368 ss.getOwnNodeId());
1369 sendConfigCheckRef(ss, from, ConfigCheckRef::WrongState,
1370 generation, other_generation,
1371 m_config_state, CS_UNINITIALIZED);
1372 return;
1373 }
1374
1375 g_eventLogger->debug("Got CONFIG_CHECK_REQ from node: %d. "
1376 "Our generation: %d, other generation: %d, "
1377 "our state: %d, other state: %d, "
1378 "our checksum: 0x%.8x, other checksum: 0x%.8x",
1379 nodeId, generation, other_generation,
1380 m_config_state, other_state,
1381 checksum, other_checksum);
1382
1383 switch (m_config_state)
1384 {
1385 default:
1386 case CS_UNINITIALIZED:
1387 g_eventLogger->error("execCONFIG_CHECK_REQ: unhandled state");
1388 abort();
1389 break;
1390
1391 case CS_INITIAL:
1392 if (other_state != CS_INITIAL)
1393 {
1394 g_eventLogger->warning("Refusing CONGIG_CHECK_REQ from %u, "
1395 " it's not CS_INITIAL (I am). "
1396 " Waiting for my check",
1397 nodeId);
1398 sendConfigCheckRef(ss, from, ConfigCheckRef::WrongState,
1399 generation, other_generation,
1400 m_config_state, other_state);
1401 return;
1402 }
1403
1404 require(generation == 0);
1405 if (other_generation != generation)
1406 {
1407 g_eventLogger->warning("Refusing other node, it has different " \
1408 "generation: %d, expected: %d",
1409 other_generation, generation);
1410 sendConfigCheckRef(ss, from, ConfigCheckRef::WrongGeneration,
1411 generation, other_generation,
1412 m_config_state, other_state);
1413 return;
1414 }
1415
1416 if (other_checksum != checksum)
1417 {
1418 g_eventLogger->warning("Refusing other node, it has different "
1419 "checksum: 0x%.8x, expected: 0x%.8x",
1420 other_checksum, checksum);
1421 sendConfigCheckRef(ss, from, ConfigCheckRef::WrongChecksum,
1422 generation, other_generation,
1423 m_config_state, other_state);
1424 return;
1425 }
1426 break;
1427
1428 case CS_CONFIRMED:
1429
1430 if (other_state != CS_CONFIRMED)
1431 {
1432 g_eventLogger->warning("Refusing other node, it's in different " \
1433 "state: %d, expected: %d",
1434 other_state, m_config_state);
1435 sendConfigCheckRef(ss, from, ConfigCheckRef::WrongState,
1436 generation, other_generation,
1437 m_config_state, other_state);
1438 return;
1439 }
1440
1441 if (other_generation == generation)
1442 {
1443 // Same generation, make sure it has same checksum
1444 if (other_checksum != checksum)
1445 {
1446 g_eventLogger->warning("Refusing other node, it has different "
1447 "checksum: 0x%.8x, expected: 0x%.8x",
1448 other_checksum, checksum);
1449 sendConfigCheckRef(ss, from, ConfigCheckRef::WrongChecksum,
1450 generation, other_generation,
1451 m_config_state, other_state);
1452 return;
1453 }
1454 // OK!
1455 }
1456 else if (other_generation < generation)
1457 {
1458 g_eventLogger->warning("Refusing other node, it has lower " \
1459 " generation: %d, expected: %d",
1460 other_generation, generation);
1461 sendConfigCheckRef(ss, from, ConfigCheckRef::WrongGeneration,
1462 generation, other_generation,
1463 m_config_state, other_state);
1464 return;
1465 }
1466 else
1467 {
1468 g_eventLogger->error("Other node has higher generation: %d, this " \
1469 "node is out of sync with generation: %d",
1470 other_generation, generation);
1471 exit(1);
1472 }
1473
1474 break;
1475 }
1476
1477 sendConfigCheckConf(ss, from);
1478 return;
1479 }
1480
1481
1482 void
sendConfigCheckReq(SignalSender & ss,NodeBitmask to)1483 ConfigManager::sendConfigCheckReq(SignalSender& ss, NodeBitmask to)
1484 {
1485 SimpleSignal ssig;
1486 ConfigCheckReq* const req =
1487 CAST_PTR(ConfigCheckReq, ssig.getDataPtrSend());
1488 req->state = m_config_state;
1489 req->generation = m_config->getGeneration();
1490 req->checksum = config_check_checksum(m_config);
1491
1492 g_eventLogger->debug("Sending CONFIG_CHECK_REQ to %s",
1493 BaseString::getPrettyText(to).c_str());
1494
1495 require(m_waiting_for.isclear());
1496 m_waiting_for = ss.broadcastSignal(to, ssig, MGM_CONFIG_MAN,
1497 GSN_CONFIG_CHECK_REQ,
1498 ConfigCheckReq::SignalLength);
1499 }
1500
1501 static bool
send_config_in_check_ref(Uint32 x)1502 send_config_in_check_ref(Uint32 x)
1503 {
1504 if (x >= NDB_MAKE_VERSION(7,0,8))
1505 return true;
1506 return false;
1507 }
1508
1509 void
sendConfigCheckRef(SignalSender & ss,BlockReference to,ConfigCheckRef::ErrorCode error,Uint32 generation,Uint32 other_generation,ConfigState state,ConfigState other_state) const1510 ConfigManager::sendConfigCheckRef(SignalSender& ss, BlockReference to,
1511 ConfigCheckRef::ErrorCode error,
1512 Uint32 generation,
1513 Uint32 other_generation,
1514 ConfigState state,
1515 ConfigState other_state) const
1516 {
1517 int result;
1518 NodeId nodeId = refToNode(to);
1519 SimpleSignal ssig;
1520 ConfigCheckRef* const ref =
1521 CAST_PTR(ConfigCheckRef, ssig.getDataPtrSend());
1522 ref->error = error;
1523 ref->generation = other_generation;
1524 ref->expected_generation = generation;
1525 ref->state = other_state;
1526 ref->expected_state = state;
1527
1528 g_eventLogger->debug("Send CONFIG_CHECK_REF with error: %d to node: %d",
1529 error, nodeId);
1530
1531 if (!send_config_in_check_ref(ss.getNodeInfo(nodeId).m_info.m_version))
1532 {
1533 result = ss.sendSignal(nodeId, ssig, MGM_CONFIG_MAN,
1534 GSN_CONFIG_CHECK_REF, ConfigCheckRef::SignalLength);
1535 }
1536 else
1537 {
1538 UtilBuffer buf;
1539 m_config->pack(buf);
1540 ssig.ptr[0].p = (Uint32*)buf.get_data();
1541 ssig.ptr[0].sz = (buf.length() + 3) / 4;
1542 ssig.header.m_noOfSections = 1;
1543
1544 ref->length = buf.length();
1545
1546 g_eventLogger->debug("Sending CONFIG_CHECK_REF with config");
1547
1548 result = ss.sendFragmentedSignal(nodeId, ssig, MGM_CONFIG_MAN,
1549 GSN_CONFIG_CHECK_REF,
1550 ConfigCheckRef::SignalLengthWithConfig);
1551 }
1552
1553 if (result != 0)
1554 {
1555 g_eventLogger->warning("Failed to send CONFIG_CHECK_REF "
1556 "to node: %d, result: %d",
1557 nodeId, result);
1558 }
1559 }
1560
1561 void
sendConfigCheckConf(SignalSender & ss,BlockReference to) const1562 ConfigManager::sendConfigCheckConf(SignalSender& ss, BlockReference to) const
1563 {
1564 NodeId nodeId = refToNode(to);
1565 SimpleSignal ssig;
1566 ConfigCheckConf* const conf =
1567 CAST_PTR(ConfigCheckConf, ssig.getDataPtrSend());
1568 conf->state = m_config_state;
1569 conf->generation = m_config->getGeneration();
1570
1571 g_eventLogger->debug("Send CONFIG_CHECK_CONF to node: %d", nodeId);
1572
1573 ss.sendSignal(nodeId, ssig, MGM_CONFIG_MAN,
1574 GSN_CONFIG_CHECK_CONF, ConfigCheckConf::SignalLength);
1575 }
1576
1577
1578 void
execCONFIG_CHECK_CONF(SignalSender & ss,SimpleSignal * sig)1579 ConfigManager::execCONFIG_CHECK_CONF(SignalSender& ss, SimpleSignal* sig)
1580 {
1581 BlockReference from = sig->header.theSendersBlockRef;
1582 NodeId nodeId = refToNode(from);
1583 assert(m_waiting_for.get(nodeId));
1584 m_waiting_for.clear(nodeId);
1585 m_checked.set(nodeId);
1586
1587 g_eventLogger->debug("Got CONFIG_CHECK_CONF from node: %d",
1588 nodeId);
1589
1590 return;
1591 }
1592
1593
1594 void
execCONFIG_CHECK_REF(SignalSender & ss,SimpleSignal * sig)1595 ConfigManager::execCONFIG_CHECK_REF(SignalSender& ss, SimpleSignal* sig)
1596 {
1597 BlockReference from = sig->header.theSendersBlockRef;
1598 NodeId nodeId = refToNode(from);
1599 assert(m_waiting_for.get(nodeId));
1600
1601 const ConfigCheckRef* const ref =
1602 CAST_CONSTPTR(ConfigCheckRef, sig->getDataPtr());
1603
1604 if (!m_defragger.defragment(sig))
1605 return; // More fragments to come
1606
1607 g_eventLogger->debug("Got CONFIG_CHECK_REF from node %d, "
1608 "error: %d, message: '%s', "
1609 "generation: %d, expected generation: %d, "
1610 "state: %d, expected state: %d own-state: %u",
1611 nodeId, ref->error,
1612 ConfigCheckRef::errorMessage(ref->error),
1613 ref->generation, ref->expected_generation,
1614 ref->state, ref->expected_state,
1615 m_config_state);
1616
1617 assert(ref->generation != ref->expected_generation ||
1618 ref->state != ref->expected_state ||
1619 ref->error == ConfigCheckRef::WrongChecksum);
1620 if((Uint32)m_config_state != ref->state)
1621 {
1622 // The config state changed while this check was in the air
1623 // drop the signal and thus cause it to run again later
1624 require(!m_checked.get(nodeId));
1625 m_waiting_for.clear(nodeId);
1626 return;
1627 }
1628
1629 switch(m_config_state)
1630 {
1631 default:
1632 case CS_UNINITIALIZED:
1633 g_eventLogger->error("execCONFIG_CHECK_REF: unhandled state");
1634 abort();
1635 break;
1636
1637 case CS_INITIAL:
1638 if (ref->expected_state == CS_CONFIRMED)
1639 {
1640 if (sig->header.theLength != ConfigCheckRef::SignalLengthWithConfig)
1641 break; // No config in the REF -> no action
1642
1643 // The other node has sent it's config in the signal, use it if equal
1644 assert(sig->header.m_noOfSections == 1);
1645
1646 ConfigValuesFactory cf;
1647 require(cf.unpack(sig->ptr[0].p, ref->length));
1648
1649 Config other_config(cf.getConfigValues());
1650 assert(other_config.getGeneration() > 0);
1651
1652 unsigned exclude[]= {CFG_SECTION_SYSTEM, 0};
1653 if (!other_config.equal(m_config, exclude))
1654 {
1655 BaseString buf;
1656 g_eventLogger->error("This node was started --initial with "
1657 "a config which is _not_ equal to the one "
1658 "node %d is using. Refusing to start with "
1659 "different configurations, diff: \n%s",
1660 nodeId,
1661 other_config.diff2str(m_config, buf, exclude));
1662 exit(1);
1663 }
1664
1665 g_eventLogger->info("This node was started --inital with "
1666 "a config equal to the one node %d is using. "
1667 "Will use the config with generation %d "
1668 "from node %d!",
1669 nodeId, other_config.getGeneration(), nodeId);
1670
1671 if (! prepareConfigChange(&other_config))
1672 {
1673 abortConfigChange();
1674 g_eventLogger->error("Failed to write the fetched config to disk");
1675 exit(1);
1676 }
1677 commitConfigChange();
1678 m_config_state = CS_CONFIRMED;
1679 g_eventLogger->info("The fetched configuration has been saved!");
1680 m_waiting_for.clear(nodeId);
1681 m_checked.set(nodeId);
1682 delete m_config_change.m_initial_config;
1683 m_config_change.m_initial_config = NULL;
1684 return;
1685 }
1686 break;
1687
1688 case CS_CONFIRMED:
1689 if (ref->expected_state == CS_INITIAL)
1690 {
1691 g_eventLogger->info("Waiting for peer");
1692 m_waiting_for.clear(nodeId);
1693 return;
1694 }
1695 break;
1696 }
1697
1698 if (ref->error == ConfigCheckRef::WrongChecksum &&
1699 m_node_id < nodeId)
1700 {
1701 g_eventLogger->warning("Ignoring CONFIG_CHECK_REF for wrong checksum "
1702 "other node has higher node id and should "
1703 "shutdown");
1704 return;
1705 }
1706
1707 g_eventLogger->error("Terminating");
1708 exit(1);
1709 }
1710
1711 void
set_facade(TransporterFacade * f)1712 ConfigManager::set_facade(TransporterFacade * f)
1713 {
1714 m_facade = f;
1715 m_ss = new SignalSender(f, MGM_CONFIG_MAN);
1716 require(m_ss != 0);
1717 }
1718
1719 bool
config_loaded(Config * config)1720 ConfigManager::ConfigChange::config_loaded(Config* config)
1721 {
1722 if (m_loaded_config != 0)
1723 return false;
1724 m_loaded_config = config;
1725 return true;
1726 }
1727
1728 Config*
prepareLoadedConfig(Config * new_conf)1729 ConfigManager::prepareLoadedConfig(Config * new_conf)
1730 {
1731 /* Copy the necessary values from old to new config */
1732 if (!new_conf->setGeneration(m_config->getGeneration()))
1733 {
1734 g_eventLogger->error("Failed to copy generation from old config");
1735 delete new_conf;
1736 return 0;
1737 }
1738
1739 if (!new_conf->setName(m_config->getName()))
1740 {
1741 g_eventLogger->error("Failed to copy name from old config");
1742 delete new_conf;
1743 return 0;
1744 }
1745
1746 if (!new_conf->setPrimaryMgmNode(m_config->getPrimaryMgmNode()))
1747 {
1748 g_eventLogger->error("Failed to copy primary mgm node from old config");
1749 delete new_conf;
1750 return 0;
1751 }
1752
1753 /* Check if config has changed */
1754 if (!m_config->equal(new_conf))
1755 {
1756 /* Loaded config is different */
1757 BaseString buf;
1758 g_eventLogger->info("Detected change of %s on disk, will try to "
1759 "set it. "
1760 "This is the actual diff:\n%s",
1761 m_opts.mycnf ? "my.cnf" : m_opts.config_filename,
1762 m_config->diff2str(new_conf, buf));
1763
1764 return new_conf;
1765 }
1766 else
1767 {
1768 /* Loaded config was equal to current */
1769 g_eventLogger->info("Config equal!");
1770 delete new_conf;
1771 }
1772 return 0;
1773 }
1774
1775 void
run()1776 ConfigManager::run()
1777 {
1778 assert(m_facade);
1779 SignalSender & ss = * m_ss;
1780
1781 if (!m_opts.config_cache)
1782 {
1783 /* Stop receiving signals by closing ConfigManager's
1784 block in TransporterFacade */
1785 delete m_ss;
1786 m_ss = NULL;
1787
1788 /* Confirm the present config, free the space that was allocated for a
1789 new one, and terminate the manager thread */
1790 m_config_change.release();
1791 m_config_state = CS_CONFIRMED;
1792 ndbout_c("== ConfigManager disabled -- manager thread will exit ==");
1793 return;
1794 }
1795
1796 ss.lock();
1797
1798 // Build bitmaks of all mgm nodes in config
1799 m_config->get_nodemask(m_all_mgm, NDB_MGM_NODE_TYPE_MGM);
1800
1801 // exclude nowait-nodes from config change protcol
1802 m_all_mgm.bitANDC(m_opts.nowait_nodes);
1803 m_all_mgm.set(m_facade->ownId()); // Never exclude own node
1804
1805 start_checkers();
1806
1807 while (!is_stopped())
1808 {
1809
1810 if (m_config_change.m_state == ConfigChangeState::IDLE)
1811 {
1812 bool print_state = false;
1813 if (m_previous_state != m_config_state)
1814 {
1815 print_state = true;
1816 m_previous_state = m_config_state;
1817 }
1818
1819 /*
1820 Check if it's necessary to start something to get
1821 out of the current state
1822 */
1823 switch (m_config_state){
1824
1825 case CS_UNINITIALIZED:
1826 abort();
1827 break;
1828
1829 case CS_INITIAL:
1830 /*
1831 INITIAL => CONFIRMED
1832 When all mgm nodes has been started and checked that they
1833 are also in INITIAL, the node with the lowest node id
1834 will start an initial config change. When completed
1835 all nodes will be in CONFIRMED
1836 */
1837
1838 if (print_state)
1839 ndbout_c("==INITIAL==");
1840
1841 if (m_config_change.m_initial_config && // Updated config.ini was found
1842 m_started.equal(m_all_mgm) && // All mgmd started
1843 m_checked.equal(m_started) && // All nodes checked
1844 m_all_mgm.find(0) == m_facade->ownId()) // Lowest nodeid
1845 {
1846 Config* new_conf = m_config_change.m_initial_config;
1847 m_config_change.m_initial_config = 0;
1848 m_config_change.m_new_config = new_conf;
1849 startConfigChange(ss, ss.getOwnRef());
1850 }
1851 break;
1852
1853 case CS_CONFIRMED:
1854 if (print_state)
1855 ndbout_c("==CONFIRMED==");
1856
1857 if (m_config_change.m_loaded_config != 0 &&
1858 m_config_change.m_new_config == 0 &&
1859 m_started.equal(m_all_mgm) &&
1860 m_checked.equal(m_started))
1861 {
1862 Config* new_conf = m_config_change.m_loaded_config;
1863 m_config_change.m_loaded_config = 0;
1864 m_config_change.m_new_config = prepareLoadedConfig(new_conf);
1865 }
1866
1867 if (m_config_change.m_new_config && // Updated config.ini was found
1868 m_started.equal(m_all_mgm) && // All mgmd started
1869 m_checked.equal(m_started)) // All nodes checked
1870 {
1871 startConfigChange(ss, ss.getOwnRef());
1872 }
1873
1874 break;
1875
1876 default:
1877 break;
1878 }
1879
1880 // Send CHECK_CONFIG to all nodes not yet checked
1881 if (m_waiting_for.isclear() && // Nothing outstanding
1882 m_prepared_config == 0 && // and no config change ongoing
1883 !m_checked.equal(m_started)) // Some nodes have not been checked
1884 {
1885 NodeBitmask not_checked;
1886 not_checked.assign(m_started);
1887 not_checked.bitANDC(m_checked);
1888 sendConfigCheckReq(ss, not_checked);
1889 }
1890
1891 handle_exclude_nodes();
1892 }
1893
1894 SimpleSignal *sig = ss.waitFor((Uint32)1000);
1895 if (!sig)
1896 continue;
1897
1898 switch (sig->readSignalNumber()) {
1899
1900 case GSN_CONFIG_CHANGE_REQ:
1901 execCONFIG_CHANGE_REQ(ss, sig);
1902 break;
1903
1904 case GSN_CONFIG_CHANGE_IMPL_REQ:
1905 execCONFIG_CHANGE_IMPL_REQ(ss, sig);
1906 break;
1907
1908 case GSN_CONFIG_CHANGE_IMPL_REF:
1909 execCONFIG_CHANGE_IMPL_REF(ss, sig);
1910 break;
1911
1912 case GSN_CONFIG_CHANGE_IMPL_CONF:
1913 execCONFIG_CHANGE_IMPL_CONF(ss, sig);
1914 break;
1915
1916 case GSN_NF_COMPLETEREP:{
1917 const NFCompleteRep * const rep =
1918 CAST_CONSTPTR(NFCompleteRep, sig->getDataPtr());
1919 NodeId nodeId= rep->failedNodeId;
1920
1921 if (m_all_mgm.get(nodeId)) // Not mgm node
1922 break;
1923
1924 ndbout_c("Node %d failed", nodeId);
1925 m_started.clear(nodeId);
1926 m_checked.clear(nodeId);
1927 m_defragger.node_failed(nodeId);
1928
1929 if (m_config_change.m_state != ConfigChangeState::IDLE)
1930 {
1931 g_eventLogger->info("Node %d failed during config change!!",
1932 nodeId);
1933 g_eventLogger->warning("Node failure handling of config "
1934 "change protocol not yet implemented!! "
1935 "No more configuration changes can occur, "
1936 "but the node will continue to serve the "
1937 "last good configuration");
1938 // TODO start take over of config change protocol
1939 }
1940 break;
1941 }
1942
1943 case GSN_NODE_FAILREP:
1944 // ignore, NF_COMPLETEREP will come
1945 break;
1946
1947 case GSN_API_REGCONF:{
1948 NodeId nodeId = refToNode(sig->header.theSendersBlockRef);
1949 if (m_all_mgm.get(nodeId) && // Is a mgm node
1950 !m_started.get(nodeId)) // Not already marked as started
1951 {
1952 g_eventLogger->info("Node %d connected", nodeId);
1953 m_started.set(nodeId);
1954 }
1955 break;
1956 }
1957
1958 case GSN_CONFIG_CHECK_REQ:
1959 execCONFIG_CHECK_REQ(ss, sig);
1960 break;
1961
1962 case GSN_CONFIG_CHECK_REF:
1963 execCONFIG_CHECK_REF(ss, sig);
1964 break;
1965
1966 case GSN_CONFIG_CHECK_CONF:
1967 execCONFIG_CHECK_CONF(ss, sig);
1968 break;
1969
1970 case GSN_TAKE_OVERTCCONF:
1971 case GSN_CONNECT_REP:
1972 break;
1973
1974 default:
1975 sig->print();
1976 g_eventLogger->error("Unknown signal received. SignalNumber: "
1977 "%i from (%d, 0x%x)",
1978 sig->readSignalNumber(),
1979 refToNode(sig->header.theSendersBlockRef),
1980 refToBlock(sig->header.theSendersBlockRef));
1981 abort();
1982 break;
1983 }
1984 }
1985 stop_checkers();
1986 ss.unlock();
1987 }
1988
1989
1990 #include "InitConfigFileParser.hpp"
1991
1992 Config*
load_init_config(const char * config_filename)1993 ConfigManager::load_init_config(const char* config_filename)
1994 {
1995 InitConfigFileParser parser;
1996 return parser.parseConfig(config_filename);
1997 }
1998
1999
2000 Config*
load_init_mycnf(void)2001 ConfigManager::load_init_mycnf(void)
2002 {
2003 InitConfigFileParser parser;
2004 return parser.parse_mycnf();
2005 }
2006
2007
2008 Config*
load_config(const char * config_filename,bool mycnf,BaseString & msg)2009 ConfigManager::load_config(const char* config_filename, bool mycnf,
2010 BaseString& msg)
2011 {
2012 Config* new_conf = NULL;
2013 if (mycnf && (new_conf = load_init_mycnf()) == NULL)
2014 {
2015 msg.assign("Could not load configuration from 'my.cnf'");
2016 return NULL;
2017 }
2018 else if (config_filename &&
2019 (new_conf = load_init_config(config_filename)) == NULL)
2020 {
2021 msg.assfmt("Could not load configuration from '%s'",
2022 config_filename);
2023 return NULL;
2024 }
2025
2026 return new_conf;
2027 }
2028
2029
2030 Config*
load_config(void) const2031 ConfigManager::load_config(void) const
2032 {
2033 BaseString msg;
2034 Config* new_conf = NULL;
2035 if ((new_conf = load_config(m_opts.config_filename,
2036 m_opts.mycnf, msg)) == NULL)
2037 {
2038 g_eventLogger->error(msg);
2039 return NULL;
2040 }
2041 return new_conf;
2042 }
2043
2044
2045 Config*
fetch_config(void)2046 ConfigManager::fetch_config(void)
2047 {
2048 DBUG_ENTER("ConfigManager::fetch_config");
2049
2050 while(true)
2051 {
2052 /* Loop until config loaded from other mgmd(s) */
2053 char buf[128];
2054 g_eventLogger->info("Trying to get configuration from other mgmd(s) "\
2055 "using '%s'...",
2056 m_config_retriever.get_connectstring(buf, sizeof(buf)));
2057
2058 if (m_config_retriever.is_connected() ||
2059 m_config_retriever.do_connect(30 /* retry */,
2060 1 /* delay */,
2061 0 /* verbose */) == 0)
2062 {
2063 g_eventLogger->info("Connected to '%s:%d'...",
2064 m_config_retriever.get_mgmd_host(),
2065 m_config_retriever.get_mgmd_port());
2066 break;
2067 }
2068 }
2069 // read config from other management server
2070 ndb_mgm_configuration * tmp =
2071 m_config_retriever.getConfig(m_config_retriever.get_mgmHandle());
2072
2073 // Disconnect from other mgmd
2074 m_config_retriever.disconnect();
2075
2076 if (tmp == NULL) {
2077 g_eventLogger->error("%s", m_config_retriever.getErrorString());
2078 DBUG_RETURN(false);
2079 }
2080
2081 DBUG_RETURN(new Config(tmp));
2082 }
2083
2084
2085 static bool
delete_file(const char * file_name)2086 delete_file(const char* file_name)
2087 {
2088 #ifdef _WIN32
2089 if (DeleteFile(file_name) == 0)
2090 {
2091 g_eventLogger->error("Failed to delete file '%s', error: %d",
2092 file_name, GetLastError());
2093 return false;
2094 }
2095 #else
2096 if (unlink(file_name) == -1)
2097 {
2098 g_eventLogger->error("Failed to delete file '%s', error: %d",
2099 file_name, errno);
2100 return false;
2101 }
2102 #endif
2103 return true;
2104 }
2105
2106
2107 bool
delete_saved_configs(void) const2108 ConfigManager::delete_saved_configs(void) const
2109 {
2110 NdbDir::Iterator iter;
2111
2112 if (iter.open(m_configdir) != 0)
2113 return false;
2114
2115 bool result = true;
2116 const char* name;
2117 unsigned nodeid;
2118 char extra; // Avoid matching ndb_2_config.bin.2.tmp
2119 BaseString full_name;
2120 unsigned version;
2121 while ((name= iter.next_file()) != NULL)
2122 {
2123 if (sscanf(name,
2124 "ndb_%u_config.bin.%u%c",
2125 &nodeid, &version, &extra) == 2)
2126 {
2127 // ndbout_c("match: %s", name);
2128
2129 if (nodeid != m_node_id)
2130 continue;
2131
2132 // Delete the file
2133 full_name.assfmt("%s%s%s", m_configdir, DIR_SEPARATOR, name);
2134 g_eventLogger->debug("Deleting binary config file '%s'",
2135 full_name.c_str());
2136 if (!delete_file(full_name.c_str()))
2137 {
2138 // Make function return false, but continue and try
2139 // to delete other files
2140 result = false;
2141 }
2142 }
2143 }
2144
2145 return result;
2146 }
2147
2148
2149 bool
saved_config_exists(BaseString & config_name) const2150 ConfigManager::saved_config_exists(BaseString& config_name) const
2151 {
2152 NdbDir::Iterator iter;
2153
2154 if (iter.open(m_configdir) != 0)
2155 return false;
2156
2157 const char* name;
2158 unsigned nodeid;
2159 char extra; // Avoid matching ndb_2_config.bin.2.tmp
2160 unsigned version, max_version= 0;
2161 while ((name= iter.next_file()) != NULL)
2162 {
2163 if (sscanf(name,
2164 "ndb_%u_config.bin.%u%c",
2165 &nodeid, &version, &extra) == 2)
2166 {
2167 // ndbout_c("match: %s", name);
2168
2169 if (nodeid != m_node_id)
2170 continue;
2171
2172 if (version>max_version)
2173 max_version= version;
2174 }
2175 }
2176
2177 if (max_version == 0)
2178 return false;
2179
2180 config_name.assfmt("%s%sndb_%u_config.bin.%u",
2181 m_configdir, DIR_SEPARATOR, m_node_id, max_version);
2182 return true;
2183 }
2184
2185
2186
2187 bool
failed_config_change_exists() const2188 ConfigManager::failed_config_change_exists() const
2189 {
2190 NdbDir::Iterator iter;
2191
2192 if (iter.open(m_configdir) != 0)
2193 return false;
2194
2195 const char* name;
2196 char tmp;
2197 unsigned nodeid;
2198 unsigned version;
2199 while ((name= iter.next_file()) != NULL)
2200 {
2201 // Check for a previously failed config
2202 // change, ie. ndb_<nodeid>_config.bin.X.tmp exist
2203 if (sscanf(name,
2204 "ndb_%u_config.bin.%u.tm%c",
2205 &nodeid, &version, &tmp) == 3 &&
2206 tmp == 'p')
2207 {
2208 if (nodeid != m_node_id)
2209 continue;
2210
2211 g_eventLogger->error("Found binary configuration file '%s%s%s' from "
2212 "previous failed attempt to change config. This "
2213 "error must be manually resolved by removing the "
2214 "file(ie. ROLLBACK) or renaming the file to it's "
2215 "name without the .tmp extension(ie COMMIT). Make "
2216 "sure to check the other nodes so that they all "
2217 "have the same configuration generation.",
2218 m_configdir, DIR_SEPARATOR, name);
2219 return true;
2220 }
2221 }
2222
2223 return false;
2224 }
2225
2226
2227 Config*
load_saved_config(const BaseString & config_name)2228 ConfigManager::load_saved_config(const BaseString& config_name)
2229 {
2230 struct ndb_mgm_configuration * tmp =
2231 m_config_retriever.getConfig(config_name.c_str());
2232 if(tmp == NULL)
2233 {
2234 g_eventLogger->error("Failed to load config from '%s', error: '%s'",
2235 config_name.c_str(),
2236 m_config_retriever.getErrorString());
2237 return NULL;
2238 }
2239
2240 Config* conf = new Config(tmp);
2241 if (conf == NULL)
2242 g_eventLogger->error("Failed to load config, out of memory");
2243 return conf;
2244 }
2245
2246 bool
get_packed_config(ndb_mgm_node_type nodetype,BaseString * buf64,BaseString & error)2247 ConfigManager::get_packed_config(ndb_mgm_node_type nodetype,
2248 BaseString* buf64, BaseString& error)
2249 {
2250 Guard g(m_config_mutex);
2251
2252 /*
2253 Only allow the config to be exported if it's been confirmed
2254 or if another mgmd is asking for it
2255 */
2256 switch(m_config_state)
2257 {
2258 case CS_INITIAL:
2259 if (nodetype == NDB_MGM_NODE_TYPE_MGM)
2260 ; // allow other mgmd to fetch initial configuration
2261 else
2262 {
2263 error.assign("The cluster configuration is not yet confirmed "
2264 "by all defined management servers. ");
2265 if (m_config_change.m_state != ConfigChangeState::IDLE)
2266 {
2267 error.append("Initial configuration change is in progress.");
2268 }
2269 else
2270 {
2271 NodeBitmask not_started(m_all_mgm);
2272 not_started.bitANDC(m_checked);
2273 error.append("This management server is still waiting for node ");
2274 error.append(BaseString::getPrettyText(not_started));
2275 error.append(" to connect.");
2276 }
2277 return false;
2278 }
2279 break;
2280
2281 case CS_CONFIRMED:
2282 // OK
2283 break;
2284
2285 default:
2286 error.assign("get_packed_config, unknown config state: %d",
2287 m_config_state);
2288 return false;
2289 break;
2290
2291 }
2292
2293 require(m_config != 0);
2294 if (buf64)
2295 {
2296 if (!m_packed_config.length())
2297 {
2298 // No packed config exist, generate a new one
2299 Config config_copy(m_config);
2300 if (!m_dynamic_ports.set_in_config(&config_copy))
2301 {
2302 error.assign("get_packed_config, failed to set dynamic ports in config");
2303 return false;
2304 }
2305
2306 if (!config_copy.pack64(m_packed_config))
2307 {
2308 error.assign("get_packed_config, failed to pack config_copy");
2309 return false;
2310 }
2311 }
2312 buf64->assign(m_packed_config, m_packed_config.length());
2313 }
2314 return true;
2315 }
2316
2317
2318 bool
init_checkers(const Config * config)2319 ConfigManager::init_checkers(const Config* config)
2320 {
2321
2322 // Init one thread for each other mgmd
2323 // in the config and check which version it has. If version
2324 // does not have config manager, set this node to ignore
2325 // that node in the config change protocol
2326
2327 BaseString connect_string;
2328 ConfigIter iter(config, CFG_SECTION_NODE);
2329 for (iter.first(); iter.valid(); iter.next())
2330 {
2331
2332 // Only MGM nodes
2333 Uint32 type;
2334 if (iter.get(CFG_TYPE_OF_SECTION, &type) ||
2335 type != NODE_TYPE_MGM)
2336 continue;
2337
2338 // Not this node
2339 Uint32 nodeid;
2340 if(iter.get(CFG_NODE_ID, &nodeid) ||
2341 nodeid == m_node_id)
2342 continue;
2343
2344 const char* hostname;
2345 Uint32 port;
2346 require(!iter.get(CFG_NODE_HOST, &hostname));
2347 require(!iter.get(CFG_MGM_PORT, &port));
2348 connect_string.assfmt("%s:%u",hostname,port);
2349
2350 ConfigChecker* checker =
2351 new ConfigChecker(*this, connect_string.c_str(),
2352 m_opts.bind_address, nodeid);
2353 if (!checker)
2354 {
2355 g_eventLogger->error("Failed to create ConfigChecker");
2356 return false;
2357 }
2358
2359 if (!checker->init())
2360 return false;
2361
2362 m_checkers.push_back(checker);
2363 }
2364 return true;
2365 }
2366
2367
2368 void
start_checkers(void)2369 ConfigManager::start_checkers(void)
2370 {
2371 for (unsigned i = 0; i < m_checkers.size(); i++)
2372 m_checkers[i]->start();
2373 }
2374
2375
2376 void
stop_checkers(void)2377 ConfigManager::stop_checkers(void)
2378 {
2379 for (unsigned i = 0; i < m_checkers.size(); i++)
2380 {
2381 ConfigChecker* checker = m_checkers[i];
2382 ndbout << "stop checker " << i << endl;
2383 checker->stop();
2384 delete checker;
2385 }
2386 }
2387
2388
ConfigChecker(ConfigManager & manager,const char * connect_string,const char * bindaddress,NodeId nodeid)2389 ConfigManager::ConfigChecker::ConfigChecker(ConfigManager& manager,
2390 const char* connect_string,
2391 const char * bindaddress,
2392 NodeId nodeid) :
2393 MgmtThread("ConfigChecker"),
2394 m_manager(manager),
2395 m_config_retriever(opt_ndb_connectstring, opt_ndb_nodeid, NDB_VERSION,
2396 NDB_MGM_NODE_TYPE_MGM, bindaddress),
2397 m_connect_string(connect_string),
2398 m_nodeid(nodeid)
2399 {
2400 }
2401
2402
2403 bool
init()2404 ConfigManager::ConfigChecker::init()
2405 {
2406 if (m_config_retriever.hasError())
2407 {
2408 g_eventLogger->error("%s", m_config_retriever.getErrorString());
2409 return false;
2410 }
2411
2412 return true;
2413 }
2414
2415
2416 void
run()2417 ConfigManager::ConfigChecker::run()
2418 {
2419 // Connect to other mgmd inifintely until thread is stopped
2420 // or connect suceeds
2421 g_eventLogger->debug("ConfigChecker, connecting to '%s'",
2422 m_connect_string.c_str());
2423 while(m_config_retriever.do_connect(0 /* retry */,
2424 1 /* delay */,
2425 0 /* verbose */) != 0)
2426 {
2427 if (is_stopped())
2428 {
2429 g_eventLogger->debug("ConfigChecker, thread is stopped");
2430 return; // Thread is stopped
2431 }
2432
2433 NdbSleep_SecSleep(1);
2434 }
2435
2436 // Connected
2437 g_eventLogger->debug("ConfigChecker, connected to '%s'",
2438 m_connect_string.c_str());
2439
2440 // Check version
2441 int major, minor, build;
2442 char ver_str[50];
2443 if (!ndb_mgm_get_version(m_config_retriever.get_mgmHandle(),
2444 &major, &minor, &build,
2445 sizeof(ver_str), ver_str))
2446 {
2447 g_eventLogger->error("Could not get version from mgmd on '%s'",
2448 m_connect_string.c_str());
2449 return;
2450 }
2451 g_eventLogger->debug("mgmd on '%s' has version %d.%d.%d",
2452 m_connect_string.c_str(), major, minor, build);
2453
2454 // Versions prior to 7 don't have ConfigManager
2455 // exclude it from config change protocol
2456 if (major < 7)
2457 {
2458 g_eventLogger->info("Excluding node %d with version %d.%d.%d from "
2459 "config change protocol",
2460 m_nodeid, major, minor, build);
2461 m_manager.m_exclude_nodes.push_back(m_nodeid);
2462 }
2463
2464 return;
2465 }
2466
2467
2468 void
handle_exclude_nodes(void)2469 ConfigManager::handle_exclude_nodes(void)
2470 {
2471
2472 if (!m_waiting_for.isclear())
2473 return; // Other things going on
2474
2475 switch (m_config_state)
2476 {
2477 case CS_INITIAL:
2478 m_exclude_nodes.lock();
2479 for (unsigned i = 0; i < m_exclude_nodes.size(); i++)
2480 {
2481 NodeId nodeid = m_exclude_nodes[i];
2482 g_eventLogger->debug("Handle exclusion of node %d", nodeid);
2483 m_all_mgm.clear(nodeid);
2484 }
2485 m_exclude_nodes.unlock();
2486 break;
2487
2488 default:
2489 break;
2490 }
2491 m_exclude_nodes.clear();
2492
2493 }
2494
2495
2496 static bool
check_dynamic_port_configured(const Config * config,int node1,int node2,BaseString & msg)2497 check_dynamic_port_configured(const Config* config,
2498 int node1, int node2,
2499 BaseString& msg)
2500 {
2501 ConfigIter iter(config, CFG_SECTION_CONNECTION);
2502
2503 for(;iter.valid();iter.next()) {
2504 Uint32 n1, n2;
2505 if (iter.get(CFG_CONNECTION_NODE_1, &n1) != 0 ||
2506 iter.get(CFG_CONNECTION_NODE_2, &n2) != 0)
2507 {
2508 msg.assign("Could not get node1 or node2 from connection section");
2509 return false;
2510 }
2511
2512 if((n1 == (Uint32)node1 && n2 == (Uint32)node2) ||
2513 (n1 == (Uint32)node2 && n2 == (Uint32)node1))
2514 break;
2515 }
2516 if(!iter.valid()) {
2517 msg.assfmt("Unable to find connection between nodes %d -> %d",
2518 node1, node2);
2519 return false;
2520 }
2521
2522 Uint32 port;
2523 if(iter.get(CFG_CONNECTION_SERVER_PORT, &port) != 0) {
2524 msg.assign("Unable to get current value of CFG_CONNECTION_SERVER_PORT");
2525 return false;
2526 }
2527
2528 if (port != 0)
2529 {
2530 // Dynamic ports is zero in configuration
2531 msg.assfmt("Server port for %d -> %d is not marked as dynamic, value: %u",
2532 node1, node2, port);
2533 return false;
2534 }
2535 return true;
2536 }
2537
2538
2539 bool
set_dynamic_port(int node1,int node2,int value,BaseString & msg)2540 ConfigManager::set_dynamic_port(int node1, int node2, int value,
2541 BaseString& msg){
2542
2543 Guard g(m_config_mutex);
2544 if (!check_dynamic_port_configured(m_config,
2545 node1, node2, msg))
2546 return false;
2547
2548 if (!m_dynamic_ports.set(node1, node2, value))
2549 {
2550 msg.assfmt("Could not set dynamic port for %d -> %d", node1, node2);
2551 return false;
2552 }
2553
2554 // Removed cache of packed config, need to be recreated
2555 // to include the new dynamic port
2556 m_packed_config.clear();
2557
2558 return true;
2559 }
2560
2561
2562 bool
get_dynamic_port(int node1,int node2,int * value,BaseString & msg) const2563 ConfigManager::get_dynamic_port(int node1, int node2, int *value,
2564 BaseString& msg) const {
2565
2566 Guard g(m_config_mutex);
2567 if (!check_dynamic_port_configured(m_config,
2568 node1, node2, msg))
2569 return false;
2570
2571 if (!m_dynamic_ports.get(node1, node2, value))
2572 {
2573 msg.assfmt("Could not get dynamic port for %d -> %d", node1, node2);
2574 return false;
2575 }
2576 return true;
2577 }
2578
2579
check(int & node1,int & node2) const2580 bool ConfigManager::DynamicPorts::check(int& node1, int& node2) const
2581 {
2582 // Always use smaller node first
2583 if (node1 > node2)
2584 {
2585 int tmp = node1;
2586 node1 = node2;
2587 node2 = tmp;
2588 }
2589
2590 // Only NDB nodes can be dynamic port server
2591 if (node1 <= 0 || node1 >= MAX_NDB_NODES)
2592 return false;
2593 if (node2 <= 0 || node2 >= MAX_NODES)
2594 return false;
2595 if (node1 == node2)
2596 return false;
2597
2598 return true;
2599 }
2600
2601
set(int node1,int node2,int port)2602 bool ConfigManager::DynamicPorts::set(int node1, int node2, int port)
2603 {
2604 if (!check(node1, node2))
2605 return false;
2606
2607 if (!m_ports.insert(NodePair(node1, node2), port, true))
2608 return false;
2609
2610 return true;
2611 }
2612
2613
get(int node1,int node2,int * port) const2614 bool ConfigManager::DynamicPorts::get(int node1, int node2, int* port) const
2615 {
2616 if (!check(node1, node2))
2617 return false;
2618
2619 int value = 0; // Return 0 if not found
2620 (void)m_ports.search(NodePair(node1, node2), value);
2621
2622 *port = (int)value;
2623 return true;
2624 }
2625
2626
2627 bool
set_in_config(Config * config)2628 ConfigManager::DynamicPorts::set_in_config(Config* config)
2629 {
2630 bool result = true;
2631 ConfigIter iter(config, CFG_SECTION_CONNECTION);
2632
2633 for(;iter.valid();iter.next()) {
2634 Uint32 port = 0;
2635 if (iter.get(CFG_CONNECTION_SERVER_PORT, &port) != 0 ||
2636 port != 0)
2637 continue; // Not configured as dynamic port
2638
2639 Uint32 n1, n2;
2640 require(iter.get(CFG_CONNECTION_NODE_1, &n1) == 0 &&
2641 iter.get(CFG_CONNECTION_NODE_2, &n2) == 0);
2642
2643 int dyn_port;
2644 if (!get(n1, n2, &dyn_port) || dyn_port == 0)
2645 continue; // No dynamic port registered
2646
2647 // Write the dynamic port to config
2648 port = (Uint32)dyn_port;
2649 ConfigValues::Iterator i2(config->m_configValues->m_config,
2650 iter.m_config);
2651 if(i2.set(CFG_CONNECTION_SERVER_PORT, port) == false)
2652 result = false;
2653 }
2654 return result;
2655 }
2656
2657
2658 template class Vector<ConfigSubscriber*>;
2659 template class Vector<ConfigManager::ConfigChecker*>;
2660
2661