1 /* Copyright (c) 2008, 2011, Oracle and/or its affiliates. All rights reserved.
2 
3    This program is free software; you can redistribute it and/or modify
4    it under the terms of the GNU General Public License, version 2.0,
5    as published by the Free Software Foundation.
6 
7    This program is also distributed with certain software (including
8    but not limited to OpenSSL) that is licensed under separate terms,
9    as designated in a particular file or component or in included license
10    documentation.  The authors of MySQL hereby grant you an additional
11    permission to link the program and your derivative works with the
12    separately licensed software that they have included with MySQL.
13 
14    This program is distributed in the hope that it will be useful,
15    but WITHOUT ANY WARRANTY; without even the implied warranty of
16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17    GNU General Public License, version 2.0, for more details.
18 
19    You should have received a copy of the GNU General Public License
20    along with this program; if not, write to the Free Software
21    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA */
22 
23 
24 #include "ConfigManager.hpp"
25 #include "MgmtSrvr.hpp"
26 #include <NdbDir.hpp>
27 
28 #include <NdbConfig.h>
29 #include <NdbSleep.h>
30 #include <kernel/GlobalSignalNumbers.h>
31 #include <SignalSender.hpp>
32 #include <NdbApiSignal.hpp>
33 #include <signaldata/NFCompleteRep.hpp>
34 #include <signaldata/NodeFailRep.hpp>
35 #include <signaldata/ApiRegSignalData.hpp>
36 #include <ndb_version.h>
37 
38 #include <EventLogger.hpp>
39 extern EventLogger * g_eventLogger;
40 
41 extern "C" const char* opt_ndb_connectstring;
42 extern "C" int opt_ndb_nodeid;
43 
ConfigManager(const MgmtSrvr::MgmtOpts & opts,const char * configdir)44 ConfigManager::ConfigManager(const MgmtSrvr::MgmtOpts& opts,
45                              const char* configdir) :
46   MgmtThread("ConfigManager"),
47   m_opts(opts),
48   m_facade(NULL),
49   m_ss(NULL),
50   m_config_mutex(NULL),
51   m_config(NULL),
52   m_config_retriever(opt_ndb_connectstring,
53                      opt_ndb_nodeid,
54                      NDB_VERSION,
55                      NDB_MGM_NODE_TYPE_MGM,
56                      opts.bind_address),
57   m_config_state(CS_UNINITIALIZED),
58   m_previous_state(CS_UNINITIALIZED),
59   m_prepared_config(NULL),
60   m_node_id(0),
61   m_configdir(configdir)
62 {
63 }
64 
65 
~ConfigManager()66 ConfigManager::~ConfigManager()
67 {
68   delete m_config;
69   delete m_prepared_config;
70   if (m_ss)
71     delete m_ss;
72   NdbMutex_Destroy(m_config_mutex);
73 }
74 
75 
76 /**
77    alone_on_host
78 
79    Check if this is the only node of "type" on
80    this host
81 
82 */
83 
84 static bool
alone_on_host(Config * conf,Uint32 own_type,Uint32 own_nodeid)85 alone_on_host(Config* conf,
86               Uint32 own_type,
87               Uint32 own_nodeid)
88 {
89   ConfigIter iter(conf, CFG_SECTION_NODE);
90   for (iter.first(); iter.valid(); iter.next())
91   {
92     Uint32 type;
93     if(iter.get(CFG_TYPE_OF_SECTION, &type) ||
94        type != own_type)
95       continue;
96 
97     Uint32 nodeid;
98     if(iter.get(CFG_NODE_ID, &nodeid) ||
99        nodeid == own_nodeid)
100       continue;
101 
102     const char * hostname;
103     if(iter.get(CFG_NODE_HOST, &hostname))
104       continue;
105 
106     if (SocketServer::tryBind(0,hostname))
107     {
108       // Another MGM node was also setup on this host
109       g_eventLogger->debug("Not alone on host %s, node %d "     \
110                            "will also run here",
111                            hostname, nodeid);
112       return false;
113     }
114   }
115   return true;
116 }
117 
118 
119 /**
120    find_nodeid_from_configdir
121 
122    Check if configdir only contains config files
123    with one nodeid -> read the latest and confirm
124    there should only be one mgm node on this host
125 */
126 
127 NodeId
find_nodeid_from_configdir(void)128 ConfigManager::find_nodeid_from_configdir(void)
129 {
130   BaseString config_name;
131   NdbDir::Iterator iter;
132 
133   if (iter.open(m_configdir) != 0)
134     return 0;
135 
136   const char* name;
137   unsigned found_nodeid= 0;
138   unsigned nodeid;
139   char extra; // Avoid matching ndb_2_config.bin.2.tmp
140   unsigned version, max_version = 0;
141   while ((name = iter.next_file()) != NULL)
142   {
143     if (sscanf(name,
144                "ndb_%u_config.bin.%u%c",
145                &nodeid, &version, &extra) == 2)
146     {
147       // ndbout_c("match: %s", name);
148 
149       if (nodeid != found_nodeid)
150       {
151         if (found_nodeid != 0)
152           return 0; // Found more than one nodeid
153         found_nodeid= nodeid;
154       }
155 
156       if (version > max_version)
157         max_version = version;
158     }
159   }
160 
161   if (max_version == 0)
162     return 0;
163 
164   config_name.assfmt("%s%sndb_%u_config.bin.%u",
165                      m_configdir, DIR_SEPARATOR, found_nodeid, max_version);
166 
167   Config* conf;
168   if (!(conf = load_saved_config(config_name)))
169     return 0;
170 
171   if (!m_config_retriever.verifyConfig(conf->m_configValues,
172                                        found_nodeid) ||
173       !alone_on_host(conf, NDB_MGM_NODE_TYPE_MGM, found_nodeid))
174   {
175     delete conf;
176     return 0;
177   }
178 
179   delete conf;
180   return found_nodeid;
181 }
182 
183 
184 /**
185    find_own_nodeid
186 
187    Return the nodeid of the MGM node
188    defined to run on this host
189 
190    Return 0 if more than one node is defined
191 */
192 
193 static NodeId
find_own_nodeid(Config * conf)194 find_own_nodeid(Config* conf)
195 {
196   NodeId found_nodeid= 0;
197   ConfigIter iter(conf, CFG_SECTION_NODE);
198   for (iter.first(); iter.valid(); iter.next())
199   {
200     Uint32 type;
201     if(iter.get(CFG_TYPE_OF_SECTION, &type) ||
202        type != NDB_MGM_NODE_TYPE_MGM)
203       continue;
204 
205     Uint32 nodeid;
206     require(iter.get(CFG_NODE_ID, &nodeid) == 0);
207 
208     const char * hostname;
209     if(iter.get(CFG_NODE_HOST, &hostname))
210       continue;
211 
212     if (SocketServer::tryBind(0,hostname))
213     {
214       // This node is setup to run on this host
215       if (found_nodeid == 0)
216         found_nodeid = nodeid;
217       else
218         return 0; // More than one host on this node
219     }
220   }
221   return found_nodeid;
222 }
223 
224 
225 NodeId
find_nodeid_from_config(void)226 ConfigManager::find_nodeid_from_config(void)
227 {
228   if (!m_opts.mycnf &&
229       !m_opts.config_filename)
230     return 0;
231 
232   Config* conf = load_config();
233   if (conf == NULL)
234     return 0;
235 
236   NodeId found_nodeid = find_own_nodeid(conf);
237   if (found_nodeid == 0 ||
238       !m_config_retriever.verifyConfig(conf->m_configValues, found_nodeid))
239   {
240     delete conf;
241     return 0;
242   }
243 
244   return found_nodeid;
245 }
246 
247 
248 bool
init_nodeid(void)249 ConfigManager::init_nodeid(void)
250 {
251   DBUG_ENTER("ConfigManager::init_nodeid");
252 
253   NodeId nodeid = m_config_retriever.get_configuration_nodeid();
254   if (nodeid)
255   {
256     // Nodeid was specifed on command line or in NDB_CONNECTSTRING
257     g_eventLogger->debug("Got nodeid: %d from command line "    \
258                          "or NDB_CONNECTSTRING", nodeid);
259     m_node_id = nodeid;
260     DBUG_RETURN(true);
261   }
262 
263   nodeid = find_nodeid_from_configdir();
264   if (nodeid)
265   {
266     // Found nodeid by searching in configdir
267     g_eventLogger->debug("Got nodeid: %d from searching in configdir",
268                          nodeid);
269     m_node_id = nodeid;
270     DBUG_RETURN(true);
271   }
272 
273   nodeid = find_nodeid_from_config();
274   if (nodeid)
275   {
276     // Found nodeid by looking in the config given on command line
277     g_eventLogger->debug("Got nodeid: %d from config file given "       \
278                          "on command line",
279                          nodeid);
280     m_node_id = nodeid;
281     DBUG_RETURN(true);
282   }
283 
284   // We _could_ try connecting to other running mgmd(s)
285   // and fetch our nodeid. But, that introduces a dependency
286   // that is not beneficial for a shared nothing cluster, since
287   // it might only work when other mgmd(s) are started. If all
288   // mgmd(s) is down it would require manual intervention.
289   // Better to require the node id to always be specified
290   // on the command line(or the above _local_ magic)
291 
292   g_eventLogger->error("Could not determine which nodeid to use for "\
293                        "this node. Specify it with --ndb-nodeid=<nodeid> "\
294                        "on command line");
295   DBUG_RETURN(false);
296 }
297 
298 
299 static void
reset_dynamic_ports_in_config(const Config * config)300 reset_dynamic_ports_in_config(const Config* config)
301 {
302   ConfigIter iter(config, CFG_SECTION_CONNECTION);
303 
304   for(;iter.valid();iter.next()) {
305     Uint32 port;
306     require(iter.get(CFG_CONNECTION_SERVER_PORT, &port) == 0);
307 
308     if ((int)port < 0)
309     {
310       port = 0;
311       ConfigValues::Iterator i2(config->m_configValues->m_config,
312                                 iter.m_config);
313       require(i2.set(CFG_CONNECTION_SERVER_PORT, port));
314     }
315   }
316 }
317 
318 
319 bool
init(void)320 ConfigManager::init(void)
321 {
322   DBUG_ENTER("ConfigManager::init");
323 
324   m_config_mutex = NdbMutex_Create();
325   if (!m_config_mutex)
326   {
327     g_eventLogger->error("Failed to create mutex in ConfigManager!");
328     DBUG_RETURN(false);
329   }
330 
331   require(m_config_state == CS_UNINITIALIZED);
332 
333   if (m_config_retriever.hasError())
334   {
335     g_eventLogger->error("%s", m_config_retriever.getErrorString());
336     DBUG_RETURN(false);
337   }
338 
339   if (!init_nodeid())
340     DBUG_RETURN(false);
341 
342   if (m_opts.initial && !delete_saved_configs())
343     DBUG_RETURN(false);
344 
345   if (failed_config_change_exists())
346     DBUG_RETURN(false);
347 
348   BaseString config_bin_name;
349   if (saved_config_exists(config_bin_name))
350   {
351     Config* conf = NULL;
352     if (!(conf = load_saved_config(config_bin_name)))
353       DBUG_RETURN(false);
354 
355     if (!config_ok(conf))
356       DBUG_RETURN(false);
357 
358     set_config(conf);
359     m_config_state = CS_CONFIRMED;
360 
361     g_eventLogger->info("Loaded config from '%s'", config_bin_name.c_str());
362 
363     if (m_opts.reload && // --reload
364         (m_opts.mycnf || m_opts.config_filename))
365     {
366       Config* new_conf = load_config();
367       if (new_conf == NULL)
368         DBUG_RETURN(false);
369 
370       /**
371        * Add config to set once ConfigManager is fully started
372        */
373       m_config_change.config_loaded(new_conf);
374       g_eventLogger->info("Loaded configuration from '%s', will try "   \
375                           "to set it once started",
376                           m_opts.mycnf ? "my.cnf" : m_opts.config_filename);
377     }
378   }
379   else
380   {
381     if (m_opts.mycnf || m_opts.config_filename)
382     {
383       Config* conf = load_config();
384       if (conf == NULL)
385         DBUG_RETURN(false);
386 
387       if (!config_ok(conf))
388         DBUG_RETURN(false);
389 
390       /*
391         Set this node as primary node for config.ini/my.cnf
392         in order to make it possible that make sure an old
393         config.ini is only loaded with --force
394       */
395       if (!conf->setPrimaryMgmNode(m_node_id))
396       {
397         g_eventLogger->error("Failed to set primary MGM node");
398         DBUG_RETURN(false);
399       }
400 
401       /* Use the initial config for now */
402       set_config(conf);
403 
404       g_eventLogger->info("Got initial configuration from '%s', will try " \
405                           "to set it when all ndb_mgmd(s) started",
406                           m_opts.mycnf ? "my.cnf" : m_opts.config_filename);
407       m_config_change.m_initial_config = new Config(conf); // Copy config
408       m_config_state = CS_INITIAL;
409 
410       if (!init_checkers(m_config_change.m_initial_config))
411         DBUG_RETURN(false);
412     }
413     else
414     {
415       Config* conf = NULL;
416       if (!(conf = fetch_config()))
417       {
418         g_eventLogger->error("Could not fetch config!");
419         DBUG_RETURN(false);
420       }
421 
422       /*
423         The fetched config may contain dynamic ports for
424         ndbd(s) which have to be reset to 0 before using
425         the config
426       */
427       reset_dynamic_ports_in_config(conf);
428 
429       if (!config_ok(conf))
430         DBUG_RETURN(false);
431 
432       /* Use the fetched config for now */
433       set_config(conf);
434 
435       if (m_config->getGeneration() == 0)
436       {
437         g_eventLogger->info("Fetched initial configuration, " \
438                             "generation: %d, name: '%s'. "\
439                             "Will try to set it when all ndb_mgmd(s) started",
440                             m_config->getGeneration(), m_config->getName());
441         m_config_state= CS_INITIAL;
442         m_config_change.m_initial_config = new Config(conf); // Copy config
443 
444         if (!init_checkers(m_config_change.m_initial_config))
445           DBUG_RETURN(false);
446       }
447       else
448       {
449         g_eventLogger->info("Fetched confirmed configuration, " \
450                             "generation: %d, name: '%s'. " \
451                             "Trying to write it to disk...",
452                             m_config->getGeneration(), m_config->getName());
453         if (!prepareConfigChange(m_config))
454         {
455           abortConfigChange();
456           g_eventLogger->error("Failed to write the fetched config to disk");
457           DBUG_RETURN(false);
458         }
459         commitConfigChange();
460         m_config_state = CS_CONFIRMED;
461         g_eventLogger->info("The fetched configuration has been saved!");
462       }
463     }
464   }
465 
466   require(m_config_state != CS_UNINITIALIZED);
467   DBUG_RETURN(true);
468 }
469 
470 
471 bool
prepareConfigChange(const Config * config)472 ConfigManager::prepareConfigChange(const Config* config)
473 {
474   if (m_prepared_config)
475   {
476     g_eventLogger->error("Can't prepare configuration change " \
477                          "when already prepared");
478     return false;
479   }
480 
481   Uint32 generation= config->getGeneration();
482   if (generation == 0)
483   {
484     g_eventLogger->error("Can't prepare configuration change for "\
485                          "configuration with generation 0");
486     return false;
487   }
488 
489   assert(m_node_id);
490   m_config_name.assfmt("%s%sndb_%u_config.bin.%u",
491                        m_configdir, DIR_SEPARATOR, m_node_id, generation);
492   g_eventLogger->debug("Preparing configuration, generation: %d name: %s",
493                        generation, m_config_name.c_str());
494 
495   /* Check file name is free */
496   if (access(m_config_name.c_str(), F_OK) == 0)
497   {
498     g_eventLogger->error("The file '%s' already exist while preparing",
499                          m_config_name.c_str());
500     return false;
501   }
502 
503   /* Pack the config */
504   UtilBuffer buf;
505   if(!config->pack(buf))
506   {
507     /* Failed to pack config */
508     g_eventLogger->error("Failed to pack configuration while preparing");
509     return false;
510   }
511 
512   /* Write config to temporary file */
513   BaseString prep_config_name(m_config_name);
514   prep_config_name.append(".tmp");
515   FILE * f = fopen(prep_config_name.c_str(), IF_WIN("wbc", "w"));
516   if(f == NULL)
517   {
518     g_eventLogger->error("Failed to open file '%s' while preparing, errno: %d",
519                          prep_config_name.c_str(), errno);
520     return false;
521   }
522 
523   if(fwrite(buf.get_data(), 1, buf.length(), f) != (size_t)buf.length())
524   {
525     g_eventLogger->error("Failed to write file '%s' while preparing, errno: %d",
526                          prep_config_name.c_str(), errno);
527     fclose(f);
528     unlink(prep_config_name.c_str());
529     return false;
530   }
531 
532   if (fflush(f))
533   {
534     g_eventLogger->error("Failed to flush file '%s' while preparing, errno: %d",
535                          prep_config_name.c_str(), errno);
536     fclose(f);
537     unlink(prep_config_name.c_str());
538     return false;
539   }
540 
541 #ifdef __WIN__
542   /*
543 	File is opened with the commit flag "c" so
544 	that the contents of the file buffer are written
545 	directly to disk when fflush is called
546   */
547 #else
548   if (fsync(fileno(f)))
549   {
550     g_eventLogger->error("Failed to sync file '%s' while preparing, errno: %d",
551                          prep_config_name.c_str(), errno);
552     fclose(f);
553     unlink(prep_config_name.c_str());
554     return false;
555   }
556 #endif
557   fclose(f);
558 
559   m_prepared_config = new Config(config); // Copy
560   g_eventLogger->debug("Configuration prepared");
561 
562   return true;
563 }
564 
565 
566 void
commitConfigChange(void)567 ConfigManager::commitConfigChange(void)
568 {
569   require(m_prepared_config != 0);
570 
571   /* Set new config locally and in all subscribers */
572   set_config(m_prepared_config);
573   m_prepared_config= NULL;
574 
575   /* Rename file to real name */
576   require(m_config_name.length());
577   BaseString prep_config_name(m_config_name);
578   prep_config_name.append(".tmp");
579   if(rename(prep_config_name.c_str(), m_config_name.c_str()))
580   {
581     g_eventLogger->error("rename from '%s' to '%s' failed while committing, " \
582                          "errno: %d",
583                          prep_config_name.c_str(), m_config_name.c_str(),
584                          errno);
585     // Crash and leave the prepared config file in place
586     abort();
587   }
588   m_config_name.clear();
589 
590   g_eventLogger->info("Configuration %d commited", m_config->getGeneration());
591 }
592 
593 
594 static void
check_no_dynamic_ports_in_config(const Config * config)595 check_no_dynamic_ports_in_config(const Config* config)
596 {
597   bool ok = true;
598   ConfigIter iter(config, CFG_SECTION_CONNECTION);
599 
600   for(;iter.valid();iter.next()) {
601     Uint32 n1, n2;
602     require(iter.get(CFG_CONNECTION_NODE_1, &n1) == 0 &&
603             iter.get(CFG_CONNECTION_NODE_2, &n2) == 0);
604 
605     Uint32 port_value;
606     require(iter.get(CFG_CONNECTION_SERVER_PORT, &port_value) == 0);
607 
608     int port = (int)port_value;
609     if (port < 0)
610     {
611       g_eventLogger->error("INTERNAL ERROR: Found dynamic ports with "
612                            "value in config, n1: %d, n2: %d, port: %u",
613                            n1, n2, port);
614       ok = false;
615     }
616   }
617   require(ok);
618 }
619 
620 
621 void
set_config(Config * new_config)622 ConfigManager::set_config(Config* new_config)
623 {
624   // Check that config does not contain any dynamic ports
625   check_no_dynamic_ports_in_config(new_config);
626 
627   delete m_config;
628   m_config = new_config;
629 
630   // Removed cache of packed config
631   m_packed_config.clear();
632 
633   for (unsigned i = 0; i < m_subscribers.size(); i++)
634     m_subscribers[i]->config_changed(m_node_id, new_config);
635 }
636 
637 
638 int
add_config_change_subscriber(ConfigSubscriber * subscriber)639 ConfigManager::add_config_change_subscriber(ConfigSubscriber* subscriber)
640 {
641   return m_subscribers.push_back(subscriber);
642 }
643 
644 
645 bool
config_ok(const Config * conf)646 ConfigManager::config_ok(const Config* conf)
647 {
648   assert(m_node_id);
649   if (!m_config_retriever.verifyConfig(conf->m_configValues, m_node_id))
650   {
651     g_eventLogger->error("%s", m_config_retriever.getErrorString());
652     return false;
653   }
654 
655   // Check DataDir exist
656   ConfigIter iter(conf, CFG_SECTION_NODE);
657   require(iter.find(CFG_NODE_ID, m_node_id) == 0);
658 
659   const char *datadir;
660   require(iter.get(CFG_NODE_DATADIR, &datadir) == 0);
661 
662   if (strcmp(datadir, "") != 0 && // datadir != ""
663       access(datadir, F_OK))                 // dir exists
664   {
665     g_eventLogger->error("Directory '%s' specified with DataDir "  \
666                          "in configuration does not exist.",       \
667                          datadir);
668     return false;
669   }
670   return true;
671 }
672 
673 
674 void
abortConfigChange(void)675 ConfigManager::abortConfigChange(void)
676 {
677   /* Should always succeed */
678 
679   /* Remove the prepared file */
680   BaseString prep_config_name(m_config_name);
681   prep_config_name.append(".tmp");
682   unlink(prep_config_name.c_str());
683   m_config_name.clear();
684 
685   delete m_prepared_config;
686   m_prepared_config= NULL;
687 }
688 
689 
690 
691 void
sendConfigChangeImplRef(SignalSender & ss,NodeId nodeId,ConfigChangeRef::ErrorCode error) const692 ConfigManager::sendConfigChangeImplRef(SignalSender& ss, NodeId nodeId,
693                                        ConfigChangeRef::ErrorCode error) const
694 {
695   SimpleSignal ssig;
696   ConfigChangeImplRef* const ref =
697     CAST_PTR(ConfigChangeImplRef, ssig.getDataPtrSend());
698   ref->errorCode = error;
699 
700   g_eventLogger->debug("Send CONFIG_CHANGE_IMPL_REF to node: %d, error: %d",
701                        nodeId, error);
702 
703   ss.sendSignal(nodeId, ssig,
704                 MGM_CONFIG_MAN, GSN_CONFIG_CHANGE_IMPL_REF,
705                 ConfigChangeImplRef::SignalLength);
706 }
707 
708 
709 
710 void
execCONFIG_CHANGE_IMPL_REQ(SignalSender & ss,SimpleSignal * sig)711 ConfigManager::execCONFIG_CHANGE_IMPL_REQ(SignalSender& ss, SimpleSignal* sig)
712 {
713   NodeId nodeId = refToNode(sig->header.theSendersBlockRef);
714   const ConfigChangeImplReq * const req =
715     CAST_CONSTPTR(ConfigChangeImplReq, sig->getDataPtr());
716 
717   g_eventLogger->debug("Got CONFIG_CHANGE_IMPL_REQ from node: %d, "\
718                        "requestType: %d",
719                        nodeId, req->requestType);
720 
721   if (!m_defragger.defragment(sig))
722     return; // More fragments to come
723 
724   Guard g(m_config_mutex);
725 
726   switch(req->requestType){
727   case ConfigChangeImplReq::Prepare:{
728     if (sig->header.m_noOfSections != 1)
729     {
730       sendConfigChangeImplRef(ss, nodeId, ConfigChangeRef::NoConfigData);
731       return;
732     }
733 
734     ConfigValuesFactory cf;
735     if (!cf.unpack(sig->ptr[0].p, req->length))
736     {
737       sendConfigChangeImplRef(ss, nodeId, ConfigChangeRef::FailedToUnpack);
738       return;
739     }
740 
741     Config new_config(cf.getConfigValues());
742     Uint32 new_generation = new_config.getGeneration();
743     Uint32 curr_generation = m_config->getGeneration();
744     const char* new_name = new_config.getName();
745     const char* curr_name = m_config->getName();
746 
747     if (m_config->illegal_change(&new_config))
748     {
749       sendConfigChangeImplRef(ss, nodeId, ConfigChangeRef::IllegalConfigChange);
750       return;
751     }
752 
753     if (req->initial)
754     {
755       // Check own state
756       if (m_config_state != CS_INITIAL)
757       {
758         g_eventLogger->warning("Refusing to start initial "             \
759                                "configuration change since this node "  \
760                                "is not in INITIAL state");
761         sendConfigChangeImplRef(ss, nodeId,
762                                 ConfigChangeRef::IllegalInitialState);
763         return;
764       }
765 
766       // Check generation
767       if (new_generation != 0)
768       {
769         g_eventLogger->warning("Refusing to start initial "             \
770                                "configuration change since new "        \
771                                "generation is not 0 (new_generation: %d)",
772                                new_generation);
773         sendConfigChangeImplRef(ss, nodeId,
774                                 ConfigChangeRef::IllegalInitialGeneration);
775         return;
776       }
777       new_generation = 1;
778 
779       // Check config is equal to our initial config
780       // but skip check if message is from self...
781       if (nodeId != refToNode(ss.getOwnRef()))
782       {
783         Config new_config_copy(&new_config);
784         require(new_config_copy.setName(new_name));
785         unsigned exclude[]= {CFG_SECTION_SYSTEM, 0};
786         if (!new_config_copy.equal(m_config_change.m_initial_config, exclude))
787         {
788           BaseString buf;
789           g_eventLogger->warning
790             ("Refusing to start initial config "                        \
791              "change when nodes have different "                        \
792              "config\n"                                                 \
793              "This is the actual diff:\n%s",
794              new_config_copy.diff2str(m_config_change.m_initial_config, buf));
795           sendConfigChangeImplRef(ss, nodeId,
796                                   ConfigChangeRef::DifferentInitial);
797           return;
798         }
799 
800         /*
801           Scrap the new_config, it's been used to check that other node
802           started from equal initial config, now it's not needed anymore
803         */
804         delete m_config_change.m_initial_config;
805         m_config_change.m_initial_config = NULL;
806       }
807     }
808     else
809     {
810 
811       // Check that new config has same primary mgm node as current
812       Uint32 curr_primary = m_config->getPrimaryMgmNode();
813       Uint32 new_primary = new_config.getPrimaryMgmNode();
814       if (new_primary != curr_primary)
815       {
816         g_eventLogger->warning("Refusing to start configuration change " \
817                                "requested by node %d, the new config uses " \
818                                "different primary mgm node %d. "      \
819                                "Current primary mmgm node is %d.",
820                                nodeId, new_primary, curr_primary);
821         sendConfigChangeImplRef(ss, nodeId,
822                                 ConfigChangeRef::NotPrimaryMgmNode);
823         return;
824       }
825 
826       if (new_generation == 0 ||
827           new_generation != curr_generation)
828       {
829         BaseString buf;
830         g_eventLogger->warning("Refusing to start config change "     \
831                                "requested by node with different "    \
832                                "generation: %d. Our generation: %d\n" \
833                                "This is the actual diff:\n%s",
834                                new_generation, curr_generation,
835                                new_config.diff2str(m_config, buf));
836         sendConfigChangeImplRef(ss, nodeId, ConfigChangeRef::InvalidGeneration);
837         return;
838       }
839       new_generation++;
840 
841       // Check same cluster name
842       if (strcmp(new_name, curr_name))
843       {
844         BaseString buf;
845         g_eventLogger->warning("Refusing to start config change "       \
846                                "requested by node with different "      \
847                                "name: '%s'. Our name: '%s'\n"           \
848                                "This is the actual diff:\n%s",
849                                new_name, curr_name,
850                                new_config.diff2str(m_config, buf));
851         sendConfigChangeImplRef(ss, nodeId, ConfigChangeRef::InvalidConfigName);
852         return;
853       }
854     }
855 
856     // Set new generation
857     if(!new_config.setGeneration(new_generation))
858     {
859       g_eventLogger->error("Failed to set new generation to %d",
860                            new_generation);
861       sendConfigChangeImplRef(ss, nodeId, ConfigChangeRef::InternalError);
862       return;
863     }
864 
865     if (!prepareConfigChange(&new_config))
866     {
867       sendConfigChangeImplRef(ss, nodeId, ConfigChangeRef::PrepareFailed);
868       return;
869     }
870     break;
871   }
872 
873   case ConfigChangeImplReq::Commit:
874     commitConfigChange();
875 
876     // All nodes has agreed on config -> CONFIRMED
877     m_config_state = CS_CONFIRMED;
878 
879     break;
880 
881   case ConfigChangeImplReq::Abort:
882     abortConfigChange();
883     break;
884 
885   default:
886     g_eventLogger->error("execCONFIG_CHANGE_IMPL_REQ: unhandled state");
887     abort();
888     break;
889   }
890 
891   /* Send CONF */
892   SimpleSignal ssig;
893   ConfigChangeImplConf* const conf =
894     CAST_PTR(ConfigChangeImplConf, ssig.getDataPtrSend());
895   conf->requestType = req->requestType;
896 
897   g_eventLogger->debug("Sending CONFIG_CHANGE_IMPL_CONF to node: %d",
898                        nodeId);
899 
900   ss.sendSignal(nodeId, ssig,
901                 MGM_CONFIG_MAN,
902                 GSN_CONFIG_CHANGE_IMPL_CONF,
903                 ConfigChangeImplConf::SignalLength);
904 }
905 
906 
set_config_change_state(ConfigChangeState::States state)907 void ConfigManager::set_config_change_state(ConfigChangeState::States state)
908 {
909   if (state == ConfigChangeState::IDLE)
910   {
911     // Rebuild m_all_mgm so that each node in config is included
912     // new mgm nodes might have been added
913     assert(m_config_change.m_error == ConfigChangeRef::OK);
914     m_config->get_nodemask(m_all_mgm, NDB_MGM_NODE_TYPE_MGM);
915   }
916 
917   m_config_change.m_state.m_current_state = state;
918 }
919 
920 
921 void
execCONFIG_CHANGE_IMPL_REF(SignalSender & ss,SimpleSignal * sig)922 ConfigManager::execCONFIG_CHANGE_IMPL_REF(SignalSender& ss, SimpleSignal* sig)
923 {
924   NodeId nodeId = refToNode(sig->header.theSendersBlockRef);
925   g_eventLogger->debug("Got CONFIG_CHANGE_IMPL_REF from node: %d", nodeId);
926 
927   const ConfigChangeImplRef * const ref =
928     CAST_CONSTPTR(ConfigChangeImplRef, sig->getDataPtr());
929   g_eventLogger->warning("Node %d refused configuration change, error: %d",
930                          nodeId, ref->errorCode);
931 
932   /* Remember the original error code */
933   if (m_config_change.m_error == 0)
934     m_config_change.m_error = (ConfigChangeRef::ErrorCode)ref->errorCode;
935 
936   switch(m_config_change.m_state){
937   case ConfigChangeState::ABORT:
938   case ConfigChangeState::PREPARING:{
939     /* Got ref while preparing (or already decided to abort) */
940     m_config_change.m_contacted_nodes.clear(nodeId);
941     set_config_change_state(ConfigChangeState::ABORT);
942 
943     m_waiting_for.clear(nodeId);
944     if (!m_waiting_for.isclear())
945       return;
946 
947     startAbortConfigChange(ss);
948     break;
949   }
950   case ConfigChangeState::COMITTING:
951     /* Got ref while comitting, impossible */
952     abort();
953     break;
954 
955   case ConfigChangeState::ABORTING:
956     /* Got ref while aborting, impossible */
957     abort();
958     break;
959 
960   default:
961     g_eventLogger->error("execCONFIG_CHANGE_IMPL_REF: unhandled state");
962     abort();
963     break;
964   }
965 }
966 
967 
968 void
execCONFIG_CHANGE_IMPL_CONF(SignalSender & ss,SimpleSignal * sig)969 ConfigManager::execCONFIG_CHANGE_IMPL_CONF(SignalSender& ss, SimpleSignal* sig)
970 {
971   NodeId nodeId = refToNode(sig->header.theSendersBlockRef);
972   const ConfigChangeImplConf * const conf =
973     CAST_CONSTPTR(ConfigChangeImplConf, sig->getDataPtr());
974   g_eventLogger->debug("Got CONFIG_CHANGE_IMPL_CONF from node %d", nodeId);
975 
976   switch(m_config_change.m_state){
977   case ConfigChangeState::PREPARING:{
978     require(conf->requestType == ConfigChangeImplReq::Prepare);
979     m_waiting_for.clear(nodeId);
980     if (!m_waiting_for.isclear())
981       return;
982 
983     // send to next
984     int res = sendConfigChangeImplReq(ss, m_config_change.m_new_config);
985     if (res > 0)
986     {
987       // sent to new node...
988       return;
989     }
990     else if (res < 0)
991     {
992       // send failed, start abort
993       startAbortConfigChange(ss);
994       return;
995     }
996 
997     /**
998      * All node has received new config..
999      *   ok to delete it...
1000      */
1001     delete m_config_change.m_new_config;
1002     m_config_change.m_new_config = 0;
1003 
1004     /* Send commit to all nodes */
1005     SimpleSignal ssig;
1006     ConfigChangeImplReq* const req =
1007       CAST_PTR(ConfigChangeImplReq, ssig.getDataPtrSend());
1008 
1009     req->requestType = ConfigChangeImplReq::Commit;
1010 
1011     g_eventLogger->debug("Sending CONFIG_CHANGE_IMPL_REQ(commit)");
1012     require(m_waiting_for.isclear());
1013     m_waiting_for = ss.broadcastSignal(m_config_change.m_contacted_nodes, ssig,
1014                                        MGM_CONFIG_MAN,
1015                                        GSN_CONFIG_CHANGE_IMPL_REQ,
1016                                        ConfigChangeImplReq::SignalLength);
1017     if (m_waiting_for.isclear())
1018       set_config_change_state(ConfigChangeState::IDLE);
1019     else
1020       set_config_change_state(ConfigChangeState::COMITTING);
1021     break;
1022   }
1023 
1024   case ConfigChangeState::COMITTING:{
1025     require(conf->requestType == ConfigChangeImplReq::Commit);
1026 
1027     m_waiting_for.clear(nodeId);
1028     if (!m_waiting_for.isclear())
1029       return;
1030 
1031     require(m_config_change.m_client_ref != RNIL);
1032     require(m_config_change.m_error == 0);
1033     if (m_config_change.m_client_ref == ss.getOwnRef())
1034     {
1035       g_eventLogger->info("Config change completed! New generation: %d",
1036                           m_config->getGeneration());
1037     }
1038     else
1039     {
1040       /* Send CONF to requestor */
1041       sendConfigChangeConf(ss, m_config_change.m_client_ref);
1042     }
1043     m_config_change.m_client_ref = RNIL;
1044     set_config_change_state(ConfigChangeState::IDLE);
1045     break;
1046   }
1047 
1048   case ConfigChangeState::ABORT:{
1049     m_waiting_for.clear(nodeId);
1050     if (!m_waiting_for.isclear())
1051       return;
1052 
1053     startAbortConfigChange(ss);
1054     break;
1055   }
1056 
1057   case ConfigChangeState::ABORTING:{
1058     m_waiting_for.clear(nodeId);
1059     if (!m_waiting_for.isclear())
1060       return;
1061 
1062     require(m_config_change.m_client_ref != RNIL);
1063     require(m_config_change.m_error);
1064     if (m_config_change.m_client_ref == ss.getOwnRef())
1065     {
1066       g_eventLogger->
1067         error("Configuration change failed! error: %d '%s'",
1068               m_config_change.m_error,
1069               ConfigChangeRef::errorMessage(m_config_change.m_error));
1070       exit(1);
1071     }
1072     else
1073     {
1074       /* Send ref to the requestor */
1075       sendConfigChangeRef(ss, m_config_change.m_client_ref,
1076                           m_config_change.m_error);
1077     }
1078     m_config_change.m_error= ConfigChangeRef::OK;
1079     m_config_change.m_client_ref = RNIL;
1080     set_config_change_state(ConfigChangeState::IDLE);
1081     break;
1082   }
1083 
1084   default:
1085     g_eventLogger->error("execCONFIG_CHANGE_IMPL_CONF: unhandled state");
1086     abort();
1087     break;
1088   }
1089 }
1090 
1091 
1092 void
sendConfigChangeRef(SignalSender & ss,BlockReference to,ConfigChangeRef::ErrorCode error) const1093 ConfigManager::sendConfigChangeRef(SignalSender& ss, BlockReference to,
1094                                    ConfigChangeRef::ErrorCode error) const
1095 {
1096   NodeId nodeId = refToNode(to);
1097   SimpleSignal ssig;
1098   ConfigChangeRef* const ref =
1099     CAST_PTR(ConfigChangeRef, ssig.getDataPtrSend());
1100   ref->errorCode = error;
1101 
1102   g_eventLogger->debug("Send CONFIG_CHANGE_REF to node: %d, error: %d",
1103                        nodeId, error);
1104 
1105   ss.sendSignal(nodeId, ssig, refToBlock(to),
1106                 GSN_CONFIG_CHANGE_REF, ConfigChangeRef::SignalLength);
1107 }
1108 
1109 
1110 void
sendConfigChangeConf(SignalSender & ss,BlockReference to) const1111 ConfigManager::sendConfigChangeConf(SignalSender& ss, BlockReference to) const
1112 {
1113   NodeId nodeId = refToNode(to);
1114   SimpleSignal ssig;
1115 
1116   g_eventLogger->debug("Send CONFIG_CHANGE_CONF to node: %d", nodeId);
1117 
1118   ss.sendSignal(nodeId, ssig, refToBlock(to),
1119                 GSN_CONFIG_CHANGE_CONF, ConfigChangeConf::SignalLength);
1120 }
1121 
1122 
1123 void
startConfigChange(SignalSender & ss,Uint32 ref)1124 ConfigManager::startConfigChange(SignalSender& ss, Uint32 ref)
1125 {
1126   if (m_config_state == CS_INITIAL)
1127   {
1128     g_eventLogger->info("Starting initial configuration change");
1129   }
1130   else
1131   {
1132     require(m_config_state == CS_CONFIRMED);
1133     g_eventLogger->info("Starting configuration change, generation: %d",
1134                         m_config_change.m_new_config->getGeneration());
1135   }
1136   m_config_change.m_contacted_nodes.clear();
1137   m_config_change.m_client_ref = ref;
1138   if (sendConfigChangeImplReq(ss, m_config_change.m_new_config) <= 0)
1139   {
1140     g_eventLogger->error("Failed to start configuration change!");
1141     exit(1);
1142   }
1143 }
1144 
1145 void
startAbortConfigChange(SignalSender & ss)1146 ConfigManager::startAbortConfigChange(SignalSender& ss)
1147 {
1148   /* Abort all other nodes */
1149   SimpleSignal ssig;
1150   ConfigChangeImplReq* const req =
1151     CAST_PTR(ConfigChangeImplReq, ssig.getDataPtrSend());
1152   req->requestType = ConfigChangeImplReq::Abort;
1153 
1154   g_eventLogger->debug
1155     ("Sending CONFIG_CHANGE_IMPL_REQ(abort) to %s",
1156      BaseString::getPrettyText(m_config_change.m_contacted_nodes).c_str());
1157 
1158   require(m_waiting_for.isclear());
1159   m_waiting_for = ss.broadcastSignal(m_config_change.m_contacted_nodes, ssig,
1160                                      MGM_CONFIG_MAN,
1161                                      GSN_CONFIG_CHANGE_IMPL_REQ,
1162                                      ConfigChangeImplReq::SignalLength);
1163 
1164   if (m_config_change.m_new_config)
1165   {
1166     delete m_config_change.m_new_config;
1167     m_config_change.m_new_config = 0;
1168   }
1169 
1170   if (m_waiting_for.isclear())
1171   {
1172     /**
1173      * Send CONFIG_CHANGE_IMPL_CONF (aborting) to self
1174      */
1175     m_waiting_for.set(ss.getOwnNodeId());
1176     ConfigChangeImplConf* const conf =
1177       CAST_PTR(ConfigChangeImplConf, ssig.getDataPtrSend());
1178     conf->requestType = ConfigChangeImplReq::Abort;
1179 
1180     ss.sendSignal(ss.getOwnNodeId(), ssig,
1181                   MGM_CONFIG_MAN,
1182                   GSN_CONFIG_CHANGE_IMPL_CONF,
1183                   ConfigChangeImplConf::SignalLength);
1184   }
1185 
1186   set_config_change_state(ConfigChangeState::ABORTING);
1187 }
1188 
1189 int
sendConfigChangeImplReq(SignalSender & ss,const Config * conf)1190 ConfigManager::sendConfigChangeImplReq(SignalSender& ss, const Config* conf)
1191 {
1192   require(m_waiting_for.isclear());
1193   require(m_config_change.m_client_ref != RNIL);
1194 
1195   if (m_config_change.m_contacted_nodes.isclear())
1196   {
1197     require(m_config_change.m_state == ConfigChangeState::IDLE);
1198   }
1199   else
1200   {
1201     require(m_config_change.m_state == ConfigChangeState::PREPARING);
1202   }
1203 
1204   set_config_change_state(ConfigChangeState::PREPARING);
1205 
1206   NodeBitmask nodes = m_all_mgm;
1207   nodes.bitANDC(m_config_change.m_contacted_nodes);
1208   if (nodes.isclear())
1209   {
1210     return 0; // all done
1211   }
1212 
1213   /**
1214    * Send prepare to all MGM nodes 1 by 1
1215    *   keep track of which I sent to in m_contacted_nodes
1216    */
1217   SimpleSignal ssig;
1218 
1219   UtilBuffer buf;
1220   conf->pack(buf);
1221   ssig.ptr[0].p = (Uint32*)buf.get_data();
1222   ssig.ptr[0].sz = (buf.length() + 3) / 4;
1223   ssig.header.m_noOfSections = 1;
1224 
1225   ConfigChangeImplReq* const req =
1226     CAST_PTR(ConfigChangeImplReq, ssig.getDataPtrSend());
1227   req->requestType = ConfigChangeImplReq::Prepare;
1228   req->initial = (m_config_state == CS_INITIAL);
1229   req->length = buf.length();
1230 
1231   Uint32 i = nodes.find(0);
1232   g_eventLogger->debug("Sending CONFIG_CHANGE_IMPL_REQ(prepare) to %u", i);
1233   int result = ss.sendFragmentedSignal(i, ssig, MGM_CONFIG_MAN,
1234                                        GSN_CONFIG_CHANGE_IMPL_REQ,
1235                                        ConfigChangeImplReq::SignalLength);
1236   if (result != 0)
1237   {
1238     g_eventLogger->warning("Failed to send configuration change "
1239                            "prepare to node: %d, result: %d",
1240                            i, result);
1241     return -1;
1242   }
1243 
1244   m_waiting_for.set(i);
1245   m_config_change.m_contacted_nodes.set(i);
1246 
1247   return 1;
1248 }
1249 
1250 void
execCONFIG_CHANGE_REQ(SignalSender & ss,SimpleSignal * sig)1251 ConfigManager::execCONFIG_CHANGE_REQ(SignalSender& ss, SimpleSignal* sig)
1252 {
1253   BlockReference from = sig->header.theSendersBlockRef;
1254   const ConfigChangeReq * const req =
1255     CAST_CONSTPTR(ConfigChangeReq, sig->getDataPtr());
1256 
1257   if (!m_defragger.defragment(sig))
1258     return; // More fragments to come
1259 
1260   if (!m_started.equal(m_all_mgm)) // Not all started
1261   {
1262     sendConfigChangeRef(ss, from, ConfigChangeRef::NotAllStarted);
1263     return;
1264   }
1265 
1266   if (m_all_mgm.find(0) != m_facade->ownId()) // Not the master
1267   {
1268     sendConfigChangeRef(ss, from, ConfigChangeRef::NotMaster);
1269     return;
1270   }
1271 
1272   if (m_config_change.m_state != ConfigChangeState::IDLE)
1273   {
1274     sendConfigChangeRef(ss, from, ConfigChangeRef::ConfigChangeOnGoing);
1275     return;
1276   }
1277   require(m_config_change.m_error == ConfigChangeRef::OK);
1278 
1279   if (sig->header.m_noOfSections != 1)
1280   {
1281     sendConfigChangeRef(ss, from, ConfigChangeRef::NoConfigData);
1282     return;
1283   }
1284 
1285   ConfigValuesFactory cf;
1286   if (!cf.unpack(sig->ptr[0].p, req->length))
1287   {
1288     sendConfigChangeRef(ss, from, ConfigChangeRef::FailedToUnpack);
1289     return;
1290   }
1291 
1292   Config * new_config = new Config(cf.getConfigValues());
1293   if (!config_ok(new_config))
1294   {
1295     g_eventLogger->warning("Refusing to start config change, the config "\
1296                            "is not ok");
1297     sendConfigChangeRef(ss, from, ConfigChangeRef::ConfigNotOk);
1298     delete new_config;
1299     return;
1300   }
1301 
1302   m_config_change.m_new_config = new_config;
1303   startConfigChange(ss, from);
1304 
1305   return;
1306 }
1307 
1308 
1309 static Uint32
config_check_checksum(const Config * config)1310 config_check_checksum(const Config* config)
1311 {
1312   Config copy(config);
1313 
1314   // Make constants of a few values in SYSTEM section that are
1315   // not part of the  checksum used for "config check"
1316   copy.setName("CHECKSUM");
1317   copy.setPrimaryMgmNode(0);
1318 
1319   Uint32 checksum = copy.checksum();
1320 
1321   return checksum;
1322 }
1323 
1324 
1325 void
execCONFIG_CHECK_REQ(SignalSender & ss,SimpleSignal * sig)1326 ConfigManager::execCONFIG_CHECK_REQ(SignalSender& ss, SimpleSignal* sig)
1327 {
1328   Guard g(m_config_mutex);
1329   BlockReference from = sig->header.theSendersBlockRef;
1330   NodeId nodeId = refToNode(from);
1331   const ConfigCheckReq * const req =
1332     CAST_CONSTPTR(ConfigCheckReq, sig->getDataPtr());
1333 
1334   Uint32 other_generation = req->generation;
1335   ConfigState other_state = (ConfigState)req->state;
1336 
1337   Uint32 generation = m_config->getGeneration();
1338 
1339   // checksum
1340   Uint32 checksum = config_check_checksum(m_config);
1341   Uint32 other_checksum = req->checksum;
1342   if (sig->header.theLength == ConfigCheckReq::SignalLengthBeforeChecksum)
1343   {
1344     // Other side uses old version without checksum, use our checksum to
1345     // bypass the checks
1346     g_eventLogger->debug("Other mgmd does not have checksum, using own");
1347     other_checksum = checksum;
1348   }
1349 
1350   if (m_prepared_config || m_config_change.m_new_config)
1351   {
1352     g_eventLogger->debug("Got CONFIG_CHECK_REQ from node: %d while "
1353                          "config change in progress (m_prepared_config). "
1354                          "Returning incorrect state, causing it to be retried",
1355                          nodeId);
1356     sendConfigCheckRef(ss, from, ConfigCheckRef::WrongState,
1357                        generation, other_generation,
1358                        m_config_state, CS_UNINITIALIZED);
1359     return;
1360   }
1361 
1362   if (m_config_change.m_loaded_config && ss.getOwnNodeId() < nodeId)
1363   {
1364     g_eventLogger->debug("Got CONFIG_CHECK_REQ from node: %d while "
1365                          "having a loaded config (and my node is lower: %d). "
1366                          "Returning incorrect state, causing it to be retried",
1367                          nodeId,
1368                          ss.getOwnNodeId());
1369     sendConfigCheckRef(ss, from, ConfigCheckRef::WrongState,
1370                        generation, other_generation,
1371                        m_config_state, CS_UNINITIALIZED);
1372     return;
1373   }
1374 
1375   g_eventLogger->debug("Got CONFIG_CHECK_REQ from node: %d. "
1376                        "Our generation: %d, other generation: %d, "
1377                        "our state: %d, other state: %d, "
1378                        "our checksum: 0x%.8x, other checksum: 0x%.8x",
1379                        nodeId, generation, other_generation,
1380                        m_config_state, other_state,
1381                        checksum, other_checksum);
1382 
1383   switch (m_config_state)
1384   {
1385   default:
1386   case CS_UNINITIALIZED:
1387     g_eventLogger->error("execCONFIG_CHECK_REQ: unhandled state");
1388     abort();
1389     break;
1390 
1391   case CS_INITIAL:
1392     if (other_state != CS_INITIAL)
1393     {
1394       g_eventLogger->warning("Refusing CONGIG_CHECK_REQ from %u, "
1395                              "  it's not CS_INITIAL (I am). "
1396                              " Waiting for my check",
1397                              nodeId);
1398       sendConfigCheckRef(ss, from, ConfigCheckRef::WrongState,
1399                          generation, other_generation,
1400                          m_config_state, other_state);
1401       return;
1402     }
1403 
1404     require(generation == 0);
1405     if (other_generation != generation)
1406     {
1407       g_eventLogger->warning("Refusing other node, it has different "   \
1408                              "generation: %d, expected: %d",
1409                              other_generation, generation);
1410       sendConfigCheckRef(ss, from, ConfigCheckRef::WrongGeneration,
1411                          generation, other_generation,
1412                          m_config_state, other_state);
1413       return;
1414     }
1415 
1416     if (other_checksum != checksum)
1417     {
1418       g_eventLogger->warning("Refusing other node, it has different "
1419                              "checksum: 0x%.8x, expected: 0x%.8x",
1420                              other_checksum, checksum);
1421       sendConfigCheckRef(ss, from, ConfigCheckRef::WrongChecksum,
1422                          generation, other_generation,
1423                          m_config_state, other_state);
1424       return;
1425     }
1426     break;
1427 
1428   case CS_CONFIRMED:
1429 
1430     if (other_state != CS_CONFIRMED)
1431     {
1432       g_eventLogger->warning("Refusing other node, it's in different "  \
1433                              "state: %d, expected: %d",
1434                              other_state, m_config_state);
1435       sendConfigCheckRef(ss, from, ConfigCheckRef::WrongState,
1436                          generation, other_generation,
1437                          m_config_state, other_state);
1438       return;
1439     }
1440 
1441     if (other_generation == generation)
1442     {
1443       // Same generation, make sure it has same checksum
1444       if (other_checksum != checksum)
1445       {
1446         g_eventLogger->warning("Refusing other node, it has different "
1447                                "checksum: 0x%.8x, expected: 0x%.8x",
1448                                other_checksum, checksum);
1449         sendConfigCheckRef(ss, from, ConfigCheckRef::WrongChecksum,
1450                            generation, other_generation,
1451                            m_config_state, other_state);
1452         return;
1453       }
1454       // OK!
1455     }
1456     else if (other_generation < generation)
1457     {
1458       g_eventLogger->warning("Refusing other node, it has lower "       \
1459                              " generation: %d, expected: %d",
1460                              other_generation, generation);
1461       sendConfigCheckRef(ss, from, ConfigCheckRef::WrongGeneration,
1462                          generation, other_generation,
1463                          m_config_state, other_state);
1464       return;
1465     }
1466     else
1467     {
1468       g_eventLogger->error("Other node has higher generation: %d, this " \
1469                            "node is out of sync with generation: %d",
1470                            other_generation, generation);
1471       exit(1);
1472     }
1473 
1474     break;
1475   }
1476 
1477   sendConfigCheckConf(ss, from);
1478   return;
1479 }
1480 
1481 
1482 void
sendConfigCheckReq(SignalSender & ss,NodeBitmask to)1483 ConfigManager::sendConfigCheckReq(SignalSender& ss, NodeBitmask to)
1484 {
1485   SimpleSignal ssig;
1486   ConfigCheckReq* const req =
1487     CAST_PTR(ConfigCheckReq, ssig.getDataPtrSend());
1488   req->state =        m_config_state;
1489   req->generation =   m_config->getGeneration();
1490   req->checksum =     config_check_checksum(m_config);
1491 
1492   g_eventLogger->debug("Sending CONFIG_CHECK_REQ to %s",
1493                        BaseString::getPrettyText(to).c_str());
1494 
1495   require(m_waiting_for.isclear());
1496   m_waiting_for = ss.broadcastSignal(to, ssig, MGM_CONFIG_MAN,
1497                                      GSN_CONFIG_CHECK_REQ,
1498                                      ConfigCheckReq::SignalLength);
1499 }
1500 
1501 static bool
send_config_in_check_ref(Uint32 x)1502 send_config_in_check_ref(Uint32 x)
1503 {
1504   if (x >= NDB_MAKE_VERSION(7,0,8))
1505     return true;
1506   return false;
1507 }
1508 
1509 void
sendConfigCheckRef(SignalSender & ss,BlockReference to,ConfigCheckRef::ErrorCode error,Uint32 generation,Uint32 other_generation,ConfigState state,ConfigState other_state) const1510 ConfigManager::sendConfigCheckRef(SignalSender& ss, BlockReference to,
1511                                   ConfigCheckRef::ErrorCode error,
1512                                   Uint32 generation,
1513                                   Uint32 other_generation,
1514                                   ConfigState state,
1515                                   ConfigState other_state) const
1516 {
1517   int result;
1518   NodeId nodeId = refToNode(to);
1519   SimpleSignal ssig;
1520   ConfigCheckRef* const ref =
1521     CAST_PTR(ConfigCheckRef, ssig.getDataPtrSend());
1522   ref->error = error;
1523   ref->generation = other_generation;
1524   ref->expected_generation = generation;
1525   ref->state = other_state;
1526   ref->expected_state = state;
1527 
1528   g_eventLogger->debug("Send CONFIG_CHECK_REF with error: %d to node: %d",
1529                        error, nodeId);
1530 
1531   if (!send_config_in_check_ref(ss.getNodeInfo(nodeId).m_info.m_version))
1532   {
1533     result = ss.sendSignal(nodeId, ssig, MGM_CONFIG_MAN,
1534                            GSN_CONFIG_CHECK_REF, ConfigCheckRef::SignalLength);
1535   }
1536   else
1537   {
1538     UtilBuffer buf;
1539     m_config->pack(buf);
1540     ssig.ptr[0].p = (Uint32*)buf.get_data();
1541     ssig.ptr[0].sz = (buf.length() + 3) / 4;
1542     ssig.header.m_noOfSections = 1;
1543 
1544     ref->length = buf.length();
1545 
1546     g_eventLogger->debug("Sending CONFIG_CHECK_REF with config");
1547 
1548     result = ss.sendFragmentedSignal(nodeId, ssig, MGM_CONFIG_MAN,
1549                                     GSN_CONFIG_CHECK_REF,
1550                                     ConfigCheckRef::SignalLengthWithConfig);
1551   }
1552 
1553   if (result != 0)
1554   {
1555     g_eventLogger->warning("Failed to send CONFIG_CHECK_REF "
1556                            "to node: %d, result: %d",
1557                            nodeId, result);
1558   }
1559 }
1560 
1561 void
sendConfigCheckConf(SignalSender & ss,BlockReference to) const1562 ConfigManager::sendConfigCheckConf(SignalSender& ss, BlockReference to) const
1563 {
1564   NodeId nodeId = refToNode(to);
1565   SimpleSignal ssig;
1566   ConfigCheckConf* const conf =
1567     CAST_PTR(ConfigCheckConf, ssig.getDataPtrSend());
1568   conf->state = m_config_state;
1569   conf->generation = m_config->getGeneration();
1570 
1571   g_eventLogger->debug("Send CONFIG_CHECK_CONF to node: %d", nodeId);
1572 
1573   ss.sendSignal(nodeId, ssig, MGM_CONFIG_MAN,
1574                 GSN_CONFIG_CHECK_CONF, ConfigCheckConf::SignalLength);
1575 }
1576 
1577 
1578 void
execCONFIG_CHECK_CONF(SignalSender & ss,SimpleSignal * sig)1579 ConfigManager::execCONFIG_CHECK_CONF(SignalSender& ss, SimpleSignal* sig)
1580 {
1581   BlockReference from = sig->header.theSendersBlockRef;
1582   NodeId nodeId = refToNode(from);
1583   assert(m_waiting_for.get(nodeId));
1584   m_waiting_for.clear(nodeId);
1585   m_checked.set(nodeId);
1586 
1587   g_eventLogger->debug("Got CONFIG_CHECK_CONF from node: %d",
1588                        nodeId);
1589 
1590   return;
1591 }
1592 
1593 
1594 void
execCONFIG_CHECK_REF(SignalSender & ss,SimpleSignal * sig)1595 ConfigManager::execCONFIG_CHECK_REF(SignalSender& ss, SimpleSignal* sig)
1596 {
1597   BlockReference from = sig->header.theSendersBlockRef;
1598   NodeId nodeId = refToNode(from);
1599   assert(m_waiting_for.get(nodeId));
1600 
1601   const ConfigCheckRef* const ref =
1602     CAST_CONSTPTR(ConfigCheckRef, sig->getDataPtr());
1603 
1604   if (!m_defragger.defragment(sig))
1605     return; // More fragments to come
1606 
1607   g_eventLogger->debug("Got CONFIG_CHECK_REF from node %d, "
1608                       "error: %d, message: '%s', "
1609                       "generation: %d, expected generation: %d, "
1610                       "state: %d, expected state: %d own-state: %u",
1611                       nodeId, ref->error,
1612                       ConfigCheckRef::errorMessage(ref->error),
1613                       ref->generation, ref->expected_generation,
1614                       ref->state, ref->expected_state,
1615                       m_config_state);
1616 
1617   assert(ref->generation != ref->expected_generation ||
1618          ref->state != ref->expected_state ||
1619          ref->error == ConfigCheckRef::WrongChecksum);
1620   if((Uint32)m_config_state != ref->state)
1621   {
1622     // The config state changed while this check was in the air
1623     // drop the signal and thus cause it to run again later
1624     require(!m_checked.get(nodeId));
1625     m_waiting_for.clear(nodeId);
1626     return;
1627   }
1628 
1629   switch(m_config_state)
1630   {
1631   default:
1632   case CS_UNINITIALIZED:
1633     g_eventLogger->error("execCONFIG_CHECK_REF: unhandled state");
1634     abort();
1635     break;
1636 
1637   case CS_INITIAL:
1638     if (ref->expected_state == CS_CONFIRMED)
1639     {
1640       if (sig->header.theLength != ConfigCheckRef::SignalLengthWithConfig)
1641         break; // No config in the REF -> no action
1642 
1643       // The other node has sent it's config in the signal, use it if equal
1644       assert(sig->header.m_noOfSections == 1);
1645 
1646       ConfigValuesFactory cf;
1647       require(cf.unpack(sig->ptr[0].p, ref->length));
1648 
1649       Config other_config(cf.getConfigValues());
1650       assert(other_config.getGeneration() > 0);
1651 
1652       unsigned exclude[]= {CFG_SECTION_SYSTEM, 0};
1653       if (!other_config.equal(m_config, exclude))
1654       {
1655         BaseString buf;
1656         g_eventLogger->error("This node was started --initial with "
1657                              "a config which is _not_ equal to the one "
1658                              "node %d is using. Refusing to start with "
1659                              "different configurations, diff: \n%s",
1660                              nodeId,
1661                              other_config.diff2str(m_config, buf, exclude));
1662         exit(1);
1663       }
1664 
1665       g_eventLogger->info("This node was started --inital with "
1666                           "a config equal to the one node %d is using. "
1667                           "Will use the config with generation %d "
1668                           "from node %d!",
1669                           nodeId, other_config.getGeneration(), nodeId);
1670 
1671       if (! prepareConfigChange(&other_config))
1672       {
1673         abortConfigChange();
1674         g_eventLogger->error("Failed to write the fetched config to disk");
1675         exit(1);
1676       }
1677       commitConfigChange();
1678       m_config_state = CS_CONFIRMED;
1679       g_eventLogger->info("The fetched configuration has been saved!");
1680       m_waiting_for.clear(nodeId);
1681       m_checked.set(nodeId);
1682       delete m_config_change.m_initial_config;
1683       m_config_change.m_initial_config = NULL;
1684       return;
1685     }
1686     break;
1687 
1688   case CS_CONFIRMED:
1689     if (ref->expected_state == CS_INITIAL)
1690     {
1691       g_eventLogger->info("Waiting for peer");
1692       m_waiting_for.clear(nodeId);
1693       return;
1694     }
1695     break;
1696   }
1697 
1698   if (ref->error == ConfigCheckRef::WrongChecksum &&
1699       m_node_id < nodeId)
1700   {
1701     g_eventLogger->warning("Ignoring CONFIG_CHECK_REF for wrong checksum "
1702                            "other node has higher node id and should "
1703                            "shutdown");
1704     return;
1705   }
1706 
1707   g_eventLogger->error("Terminating");
1708   exit(1);
1709 }
1710 
1711 void
set_facade(TransporterFacade * f)1712 ConfigManager::set_facade(TransporterFacade * f)
1713 {
1714   m_facade = f;
1715   m_ss = new SignalSender(f, MGM_CONFIG_MAN);
1716   require(m_ss != 0);
1717 }
1718 
1719 bool
config_loaded(Config * config)1720 ConfigManager::ConfigChange::config_loaded(Config* config)
1721 {
1722   if (m_loaded_config != 0)
1723     return false;
1724   m_loaded_config = config;
1725   return true;
1726 }
1727 
1728 Config*
prepareLoadedConfig(Config * new_conf)1729 ConfigManager::prepareLoadedConfig(Config * new_conf)
1730 {
1731   /* Copy the necessary values from old to new config */
1732   if (!new_conf->setGeneration(m_config->getGeneration()))
1733   {
1734     g_eventLogger->error("Failed to copy generation from old config");
1735     delete new_conf;
1736     return 0;
1737   }
1738 
1739   if (!new_conf->setName(m_config->getName()))
1740   {
1741     g_eventLogger->error("Failed to copy name from old config");
1742     delete new_conf;
1743     return 0;
1744   }
1745 
1746   if (!new_conf->setPrimaryMgmNode(m_config->getPrimaryMgmNode()))
1747   {
1748     g_eventLogger->error("Failed to copy primary mgm node from old config");
1749     delete new_conf;
1750     return 0;
1751   }
1752 
1753   /* Check if config has changed */
1754   if (!m_config->equal(new_conf))
1755   {
1756     /* Loaded config is different */
1757     BaseString buf;
1758     g_eventLogger->info("Detected change of %s on disk, will try to "
1759                         "set it. "
1760                         "This is the actual diff:\n%s",
1761                         m_opts.mycnf ? "my.cnf" : m_opts.config_filename,
1762                         m_config->diff2str(new_conf, buf));
1763 
1764     return new_conf;
1765   }
1766   else
1767   {
1768     /* Loaded config was equal to current */
1769     g_eventLogger->info("Config equal!");
1770     delete new_conf;
1771   }
1772   return 0;
1773 }
1774 
1775 void
run()1776 ConfigManager::run()
1777 {
1778   assert(m_facade);
1779   SignalSender & ss = * m_ss;
1780 
1781   if (!m_opts.config_cache)
1782   {
1783     /* Stop receiving signals by closing ConfigManager's
1784        block in TransporterFacade */
1785     delete m_ss;
1786     m_ss = NULL;
1787 
1788     /* Confirm the present config, free the space that was allocated for a
1789        new one, and terminate the manager thread */
1790     m_config_change.release();
1791     m_config_state = CS_CONFIRMED;
1792     ndbout_c("== ConfigManager disabled -- manager thread will exit ==");
1793     return;
1794   }
1795 
1796   ss.lock();
1797 
1798   // Build bitmaks of all mgm nodes in config
1799   m_config->get_nodemask(m_all_mgm, NDB_MGM_NODE_TYPE_MGM);
1800 
1801   // exclude nowait-nodes from config change protcol
1802   m_all_mgm.bitANDC(m_opts.nowait_nodes);
1803   m_all_mgm.set(m_facade->ownId()); // Never exclude own node
1804 
1805   start_checkers();
1806 
1807   while (!is_stopped())
1808   {
1809 
1810     if (m_config_change.m_state == ConfigChangeState::IDLE)
1811     {
1812       bool print_state = false;
1813       if (m_previous_state != m_config_state)
1814       {
1815         print_state = true;
1816         m_previous_state = m_config_state;
1817       }
1818 
1819       /*
1820         Check if it's necessary to start something to get
1821         out of the current state
1822       */
1823       switch (m_config_state){
1824 
1825       case CS_UNINITIALIZED:
1826         abort();
1827         break;
1828 
1829       case CS_INITIAL:
1830         /*
1831           INITIAL => CONFIRMED
1832           When all mgm nodes has been started and checked that they
1833           are also in INITIAL, the node with the lowest node id
1834           will start an initial config change. When completed
1835           all nodes will be in CONFIRMED
1836         */
1837 
1838         if (print_state)
1839           ndbout_c("==INITIAL==");
1840 
1841         if (m_config_change.m_initial_config && // Updated config.ini was found
1842             m_started.equal(m_all_mgm) &&       // All mgmd started
1843             m_checked.equal(m_started) &&       // All nodes checked
1844             m_all_mgm.find(0) == m_facade->ownId()) // Lowest nodeid
1845         {
1846           Config* new_conf = m_config_change.m_initial_config;
1847           m_config_change.m_initial_config = 0;
1848           m_config_change.m_new_config = new_conf;
1849           startConfigChange(ss, ss.getOwnRef());
1850         }
1851         break;
1852 
1853       case CS_CONFIRMED:
1854         if (print_state)
1855           ndbout_c("==CONFIRMED==");
1856 
1857         if (m_config_change.m_loaded_config != 0 &&
1858             m_config_change.m_new_config == 0    &&
1859             m_started.equal(m_all_mgm)           &&
1860             m_checked.equal(m_started))
1861         {
1862           Config* new_conf = m_config_change.m_loaded_config;
1863           m_config_change.m_loaded_config = 0;
1864           m_config_change.m_new_config = prepareLoadedConfig(new_conf);
1865         }
1866 
1867         if (m_config_change.m_new_config && // Updated config.ini was found
1868             m_started.equal(m_all_mgm) &&   // All mgmd started
1869             m_checked.equal(m_started))     // All nodes checked
1870         {
1871           startConfigChange(ss, ss.getOwnRef());
1872         }
1873 
1874         break;
1875 
1876       default:
1877         break;
1878       }
1879 
1880       // Send CHECK_CONFIG to all nodes not yet checked
1881       if (m_waiting_for.isclear() &&   // Nothing outstanding
1882           m_prepared_config == 0 &&    //   and no config change ongoing
1883           !m_checked.equal(m_started)) // Some nodes have not been checked
1884       {
1885         NodeBitmask not_checked;
1886         not_checked.assign(m_started);
1887         not_checked.bitANDC(m_checked);
1888         sendConfigCheckReq(ss, not_checked);
1889       }
1890 
1891       handle_exclude_nodes();
1892     }
1893 
1894     SimpleSignal *sig = ss.waitFor((Uint32)1000);
1895     if (!sig)
1896       continue;
1897 
1898     switch (sig->readSignalNumber()) {
1899 
1900     case GSN_CONFIG_CHANGE_REQ:
1901       execCONFIG_CHANGE_REQ(ss, sig);
1902       break;
1903 
1904     case GSN_CONFIG_CHANGE_IMPL_REQ:
1905       execCONFIG_CHANGE_IMPL_REQ(ss, sig);
1906       break;
1907 
1908     case GSN_CONFIG_CHANGE_IMPL_REF:
1909       execCONFIG_CHANGE_IMPL_REF(ss, sig);
1910       break;
1911 
1912     case GSN_CONFIG_CHANGE_IMPL_CONF:
1913       execCONFIG_CHANGE_IMPL_CONF(ss, sig);
1914       break;
1915 
1916     case GSN_NF_COMPLETEREP:{
1917       const NFCompleteRep * const rep =
1918         CAST_CONSTPTR(NFCompleteRep, sig->getDataPtr());
1919       NodeId nodeId= rep->failedNodeId;
1920 
1921       if (m_all_mgm.get(nodeId)) // Not mgm node
1922         break;
1923 
1924       ndbout_c("Node %d failed", nodeId);
1925       m_started.clear(nodeId);
1926       m_checked.clear(nodeId);
1927       m_defragger.node_failed(nodeId);
1928 
1929       if (m_config_change.m_state != ConfigChangeState::IDLE)
1930       {
1931         g_eventLogger->info("Node %d failed during config change!!",
1932                             nodeId);
1933         g_eventLogger->warning("Node failure handling of config "
1934                                "change protocol not yet implemented!! "
1935                                "No more configuration changes can occur, "
1936                                "but the node will continue to serve the "
1937                                "last good configuration");
1938         // TODO start take over of config change protocol
1939       }
1940       break;
1941     }
1942 
1943     case GSN_NODE_FAILREP:
1944       // ignore, NF_COMPLETEREP will come
1945       break;
1946 
1947     case GSN_API_REGCONF:{
1948       NodeId nodeId = refToNode(sig->header.theSendersBlockRef);
1949       if (m_all_mgm.get(nodeId) &&      // Is a mgm node
1950           !m_started.get(nodeId))       // Not already marked as started
1951       {
1952         g_eventLogger->info("Node %d connected", nodeId);
1953         m_started.set(nodeId);
1954       }
1955       break;
1956     }
1957 
1958     case GSN_CONFIG_CHECK_REQ:
1959       execCONFIG_CHECK_REQ(ss, sig);
1960       break;
1961 
1962     case GSN_CONFIG_CHECK_REF:
1963       execCONFIG_CHECK_REF(ss, sig);
1964       break;
1965 
1966     case GSN_CONFIG_CHECK_CONF:
1967       execCONFIG_CHECK_CONF(ss, sig);
1968       break;
1969 
1970     case GSN_TAKE_OVERTCCONF:
1971     case GSN_CONNECT_REP:
1972       break;
1973 
1974     default:
1975       sig->print();
1976       g_eventLogger->error("Unknown signal received. SignalNumber: "
1977                            "%i from (%d, 0x%x)",
1978                            sig->readSignalNumber(),
1979                            refToNode(sig->header.theSendersBlockRef),
1980                            refToBlock(sig->header.theSendersBlockRef));
1981       abort();
1982       break;
1983     }
1984   }
1985   stop_checkers();
1986   ss.unlock();
1987 }
1988 
1989 
1990 #include "InitConfigFileParser.hpp"
1991 
1992 Config*
load_init_config(const char * config_filename)1993 ConfigManager::load_init_config(const char* config_filename)
1994 {
1995    InitConfigFileParser parser;
1996   return parser.parseConfig(config_filename);
1997 }
1998 
1999 
2000 Config*
load_init_mycnf(void)2001 ConfigManager::load_init_mycnf(void)
2002 {
2003   InitConfigFileParser parser;
2004   return parser.parse_mycnf();
2005 }
2006 
2007 
2008 Config*
load_config(const char * config_filename,bool mycnf,BaseString & msg)2009 ConfigManager::load_config(const char* config_filename, bool mycnf,
2010                            BaseString& msg)
2011 {
2012   Config* new_conf = NULL;
2013   if (mycnf && (new_conf = load_init_mycnf()) == NULL)
2014   {
2015     msg.assign("Could not load configuration from 'my.cnf'");
2016     return NULL;
2017   }
2018   else if (config_filename &&
2019            (new_conf = load_init_config(config_filename)) == NULL)
2020   {
2021     msg.assfmt("Could not load configuration from '%s'",
2022                config_filename);
2023     return NULL;
2024   }
2025 
2026   return new_conf;
2027 }
2028 
2029 
2030 Config*
load_config(void) const2031 ConfigManager::load_config(void) const
2032 {
2033   BaseString msg;
2034   Config* new_conf = NULL;
2035   if ((new_conf = load_config(m_opts.config_filename,
2036                               m_opts.mycnf, msg)) == NULL)
2037   {
2038     g_eventLogger->error(msg);
2039     return NULL;
2040   }
2041   return new_conf;
2042 }
2043 
2044 
2045 Config*
fetch_config(void)2046 ConfigManager::fetch_config(void)
2047 {
2048   DBUG_ENTER("ConfigManager::fetch_config");
2049 
2050   while(true)
2051   {
2052     /* Loop until config loaded from other mgmd(s) */
2053     char buf[128];
2054     g_eventLogger->info("Trying to get configuration from other mgmd(s) "\
2055                         "using '%s'...",
2056                         m_config_retriever.get_connectstring(buf, sizeof(buf)));
2057 
2058     if (m_config_retriever.is_connected() ||
2059         m_config_retriever.do_connect(30 /* retry */,
2060                                       1 /* delay */,
2061                                       0 /* verbose */) == 0)
2062     {
2063       g_eventLogger->info("Connected to '%s:%d'...",
2064                           m_config_retriever.get_mgmd_host(),
2065                           m_config_retriever.get_mgmd_port());
2066       break;
2067     }
2068   }
2069   // read config from other management server
2070   ndb_mgm_configuration * tmp =
2071     m_config_retriever.getConfig(m_config_retriever.get_mgmHandle());
2072 
2073   // Disconnect from other mgmd
2074   m_config_retriever.disconnect();
2075 
2076   if (tmp == NULL) {
2077     g_eventLogger->error("%s", m_config_retriever.getErrorString());
2078     DBUG_RETURN(false);
2079   }
2080 
2081   DBUG_RETURN(new Config(tmp));
2082 }
2083 
2084 
2085 static bool
delete_file(const char * file_name)2086 delete_file(const char* file_name)
2087 {
2088 #ifdef _WIN32
2089   if (DeleteFile(file_name) == 0)
2090   {
2091     g_eventLogger->error("Failed to delete file '%s', error: %d",
2092                          file_name, GetLastError());
2093     return false;
2094   }
2095 #else
2096   if (unlink(file_name) == -1)
2097   {
2098     g_eventLogger->error("Failed to delete file '%s', error: %d",
2099                          file_name, errno);
2100     return false;
2101   }
2102 #endif
2103   return true;
2104 }
2105 
2106 
2107 bool
delete_saved_configs(void) const2108 ConfigManager::delete_saved_configs(void) const
2109 {
2110   NdbDir::Iterator iter;
2111 
2112   if (iter.open(m_configdir) != 0)
2113     return false;
2114 
2115   bool result = true;
2116   const char* name;
2117   unsigned nodeid;
2118   char extra; // Avoid matching ndb_2_config.bin.2.tmp
2119   BaseString full_name;
2120   unsigned version;
2121   while ((name= iter.next_file()) != NULL)
2122   {
2123     if (sscanf(name,
2124                "ndb_%u_config.bin.%u%c",
2125                &nodeid, &version, &extra) == 2)
2126     {
2127       // ndbout_c("match: %s", name);
2128 
2129       if (nodeid != m_node_id)
2130         continue;
2131 
2132       // Delete the file
2133       full_name.assfmt("%s%s%s", m_configdir, DIR_SEPARATOR, name);
2134       g_eventLogger->debug("Deleting binary config file '%s'",
2135                            full_name.c_str());
2136       if (!delete_file(full_name.c_str()))
2137       {
2138         // Make function return false, but continue and try
2139         // to delete other files
2140         result = false;
2141       }
2142     }
2143   }
2144 
2145   return result;
2146 }
2147 
2148 
2149 bool
saved_config_exists(BaseString & config_name) const2150 ConfigManager::saved_config_exists(BaseString& config_name) const
2151 {
2152   NdbDir::Iterator iter;
2153 
2154   if (iter.open(m_configdir) != 0)
2155     return false;
2156 
2157   const char* name;
2158   unsigned nodeid;
2159   char extra; // Avoid matching ndb_2_config.bin.2.tmp
2160   unsigned version, max_version= 0;
2161   while ((name= iter.next_file()) != NULL)
2162   {
2163     if (sscanf(name,
2164                "ndb_%u_config.bin.%u%c",
2165                &nodeid, &version, &extra) == 2)
2166     {
2167       // ndbout_c("match: %s", name);
2168 
2169       if (nodeid != m_node_id)
2170         continue;
2171 
2172       if (version>max_version)
2173         max_version= version;
2174     }
2175   }
2176 
2177   if (max_version == 0)
2178     return false;
2179 
2180   config_name.assfmt("%s%sndb_%u_config.bin.%u",
2181                      m_configdir, DIR_SEPARATOR, m_node_id, max_version);
2182   return true;
2183 }
2184 
2185 
2186 
2187 bool
failed_config_change_exists() const2188 ConfigManager::failed_config_change_exists() const
2189 {
2190   NdbDir::Iterator iter;
2191 
2192   if (iter.open(m_configdir) != 0)
2193     return false;
2194 
2195   const char* name;
2196   char tmp;
2197   unsigned nodeid;
2198   unsigned version;
2199   while ((name= iter.next_file()) != NULL)
2200   {
2201     // Check for a previously failed config
2202     // change, ie. ndb_<nodeid>_config.bin.X.tmp exist
2203     if (sscanf(name,
2204                "ndb_%u_config.bin.%u.tm%c",
2205                &nodeid, &version, &tmp) == 3 &&
2206         tmp == 'p')
2207     {
2208       if (nodeid != m_node_id)
2209         continue;
2210 
2211       g_eventLogger->error("Found binary configuration file '%s%s%s' from "
2212                            "previous failed attempt to change config. This "
2213                            "error must be manually resolved by removing the "
2214                            "file(ie. ROLLBACK) or renaming the file to it's "
2215                            "name without the .tmp extension(ie COMMIT). Make "
2216                            "sure to check the other nodes so that they all "
2217                            "have the same configuration generation.",
2218                            m_configdir, DIR_SEPARATOR, name);
2219       return true;
2220     }
2221   }
2222 
2223   return false;
2224 }
2225 
2226 
2227 Config*
load_saved_config(const BaseString & config_name)2228 ConfigManager::load_saved_config(const BaseString& config_name)
2229 {
2230   struct ndb_mgm_configuration * tmp =
2231     m_config_retriever.getConfig(config_name.c_str());
2232   if(tmp == NULL)
2233   {
2234     g_eventLogger->error("Failed to load config from '%s', error: '%s'",
2235                          config_name.c_str(),
2236                          m_config_retriever.getErrorString());
2237     return NULL;
2238   }
2239 
2240   Config* conf = new Config(tmp);
2241   if (conf == NULL)
2242     g_eventLogger->error("Failed to load config, out of memory");
2243   return conf;
2244 }
2245 
2246 bool
get_packed_config(ndb_mgm_node_type nodetype,BaseString * buf64,BaseString & error)2247 ConfigManager::get_packed_config(ndb_mgm_node_type nodetype,
2248                                  BaseString* buf64, BaseString& error)
2249 {
2250   Guard g(m_config_mutex);
2251 
2252   /*
2253     Only allow the config to be exported if it's been confirmed
2254     or if another mgmd is asking for it
2255   */
2256   switch(m_config_state)
2257   {
2258   case CS_INITIAL:
2259     if (nodetype == NDB_MGM_NODE_TYPE_MGM)
2260       ; // allow other mgmd to fetch initial configuration
2261     else
2262     {
2263       error.assign("The cluster configuration is not yet confirmed "
2264                    "by all defined management servers. ");
2265       if (m_config_change.m_state != ConfigChangeState::IDLE)
2266       {
2267         error.append("Initial configuration change is in progress.");
2268       }
2269       else
2270       {
2271         NodeBitmask not_started(m_all_mgm);
2272         not_started.bitANDC(m_checked);
2273         error.append("This management server is still waiting for node ");
2274         error.append(BaseString::getPrettyText(not_started));
2275         error.append(" to connect.");
2276       }
2277       return false;
2278     }
2279     break;
2280 
2281   case CS_CONFIRMED:
2282     // OK
2283     break;
2284 
2285   default:
2286     error.assign("get_packed_config, unknown config state: %d",
2287                  m_config_state);
2288      return false;
2289     break;
2290 
2291   }
2292 
2293   require(m_config != 0);
2294   if (buf64)
2295   {
2296     if (!m_packed_config.length())
2297     {
2298       // No packed config exist, generate a new one
2299       Config config_copy(m_config);
2300       if (!m_dynamic_ports.set_in_config(&config_copy))
2301       {
2302         error.assign("get_packed_config, failed to set dynamic ports in config");
2303         return false;
2304       }
2305 
2306       if (!config_copy.pack64(m_packed_config))
2307       {
2308         error.assign("get_packed_config, failed to pack config_copy");
2309         return false;
2310       }
2311     }
2312     buf64->assign(m_packed_config, m_packed_config.length());
2313   }
2314   return true;
2315 }
2316 
2317 
2318 bool
init_checkers(const Config * config)2319 ConfigManager::init_checkers(const Config* config)
2320 {
2321 
2322   // Init one thread for each other mgmd
2323   // in the config and check which version it has. If version
2324   // does not have config manager, set this node to ignore
2325   // that node in the config change protocol
2326 
2327   BaseString connect_string;
2328   ConfigIter iter(config, CFG_SECTION_NODE);
2329   for (iter.first(); iter.valid(); iter.next())
2330   {
2331 
2332     // Only MGM nodes
2333     Uint32 type;
2334     if (iter.get(CFG_TYPE_OF_SECTION, &type) ||
2335         type != NODE_TYPE_MGM)
2336       continue;
2337 
2338     // Not this node
2339     Uint32 nodeid;
2340     if(iter.get(CFG_NODE_ID, &nodeid) ||
2341        nodeid == m_node_id)
2342       continue;
2343 
2344     const char* hostname;
2345     Uint32 port;
2346     require(!iter.get(CFG_NODE_HOST, &hostname));
2347     require(!iter.get(CFG_MGM_PORT, &port));
2348     connect_string.assfmt("%s:%u",hostname,port);
2349 
2350     ConfigChecker* checker =
2351       new ConfigChecker(*this, connect_string.c_str(),
2352                         m_opts.bind_address, nodeid);
2353     if (!checker)
2354     {
2355       g_eventLogger->error("Failed to create ConfigChecker");
2356       return false;
2357     }
2358 
2359     if (!checker->init())
2360       return false;
2361 
2362     m_checkers.push_back(checker);
2363   }
2364   return true;
2365 }
2366 
2367 
2368 void
start_checkers(void)2369 ConfigManager::start_checkers(void)
2370 {
2371   for (unsigned i = 0; i < m_checkers.size(); i++)
2372     m_checkers[i]->start();
2373 }
2374 
2375 
2376 void
stop_checkers(void)2377 ConfigManager::stop_checkers(void)
2378 {
2379   for (unsigned i = 0; i < m_checkers.size(); i++)
2380   {
2381     ConfigChecker* checker = m_checkers[i];
2382     ndbout << "stop checker " << i << endl;
2383     checker->stop();
2384     delete checker;
2385   }
2386 }
2387 
2388 
ConfigChecker(ConfigManager & manager,const char * connect_string,const char * bindaddress,NodeId nodeid)2389 ConfigManager::ConfigChecker::ConfigChecker(ConfigManager& manager,
2390                                             const char* connect_string,
2391                                             const char * bindaddress,
2392                                             NodeId nodeid) :
2393   MgmtThread("ConfigChecker"),
2394   m_manager(manager),
2395   m_config_retriever(opt_ndb_connectstring, opt_ndb_nodeid, NDB_VERSION,
2396                      NDB_MGM_NODE_TYPE_MGM, bindaddress),
2397   m_connect_string(connect_string),
2398   m_nodeid(nodeid)
2399 {
2400 }
2401 
2402 
2403 bool
init()2404 ConfigManager::ConfigChecker::init()
2405 {
2406   if (m_config_retriever.hasError())
2407   {
2408     g_eventLogger->error("%s", m_config_retriever.getErrorString());
2409     return false;
2410   }
2411 
2412   return true;
2413 }
2414 
2415 
2416 void
run()2417 ConfigManager::ConfigChecker::run()
2418 {
2419   // Connect to other mgmd inifintely until thread is stopped
2420   // or connect suceeds
2421   g_eventLogger->debug("ConfigChecker, connecting to '%s'",
2422                        m_connect_string.c_str());
2423   while(m_config_retriever.do_connect(0 /* retry */,
2424                                       1 /* delay */,
2425                                       0 /* verbose */) != 0)
2426   {
2427     if (is_stopped())
2428     {
2429       g_eventLogger->debug("ConfigChecker, thread is stopped");
2430       return; // Thread is stopped
2431     }
2432 
2433     NdbSleep_SecSleep(1);
2434   }
2435 
2436   // Connected
2437   g_eventLogger->debug("ConfigChecker, connected to '%s'",
2438                        m_connect_string.c_str());
2439 
2440   // Check version
2441   int major, minor, build;
2442   char ver_str[50];
2443   if (!ndb_mgm_get_version(m_config_retriever.get_mgmHandle(),
2444                            &major, &minor, &build,
2445                            sizeof(ver_str), ver_str))
2446   {
2447     g_eventLogger->error("Could not get version from mgmd on '%s'",
2448                          m_connect_string.c_str());
2449     return;
2450   }
2451   g_eventLogger->debug("mgmd on '%s' has version %d.%d.%d",
2452                        m_connect_string.c_str(), major, minor, build);
2453 
2454   // Versions prior to 7 don't have ConfigManager
2455   // exclude it from config change protocol
2456   if (major < 7)
2457   {
2458     g_eventLogger->info("Excluding node %d with version %d.%d.%d from "
2459                         "config change protocol",
2460                         m_nodeid, major, minor, build);
2461     m_manager.m_exclude_nodes.push_back(m_nodeid);
2462   }
2463 
2464   return;
2465 }
2466 
2467 
2468 void
handle_exclude_nodes(void)2469 ConfigManager::handle_exclude_nodes(void)
2470 {
2471 
2472   if (!m_waiting_for.isclear())
2473     return; // Other things going on
2474 
2475   switch (m_config_state)
2476   {
2477   case CS_INITIAL:
2478     m_exclude_nodes.lock();
2479     for (unsigned i = 0; i < m_exclude_nodes.size(); i++)
2480     {
2481       NodeId nodeid = m_exclude_nodes[i];
2482       g_eventLogger->debug("Handle exclusion of node %d", nodeid);
2483       m_all_mgm.clear(nodeid);
2484     }
2485     m_exclude_nodes.unlock();
2486     break;
2487 
2488   default:
2489     break;
2490   }
2491   m_exclude_nodes.clear();
2492 
2493 }
2494 
2495 
2496 static bool
check_dynamic_port_configured(const Config * config,int node1,int node2,BaseString & msg)2497 check_dynamic_port_configured(const Config* config,
2498                               int node1, int node2,
2499                               BaseString& msg)
2500 {
2501   ConfigIter iter(config, CFG_SECTION_CONNECTION);
2502 
2503   for(;iter.valid();iter.next()) {
2504     Uint32 n1, n2;
2505     if (iter.get(CFG_CONNECTION_NODE_1, &n1) != 0 ||
2506         iter.get(CFG_CONNECTION_NODE_2, &n2) != 0)
2507     {
2508       msg.assign("Could not get node1 or node2 from connection section");
2509       return false;
2510     }
2511 
2512     if((n1 == (Uint32)node1 && n2 == (Uint32)node2) ||
2513        (n1 == (Uint32)node2 && n2 == (Uint32)node1))
2514       break;
2515   }
2516   if(!iter.valid()) {
2517     msg.assfmt("Unable to find connection between nodes %d -> %d",
2518                node1, node2);
2519     return false;
2520   }
2521 
2522   Uint32 port;
2523   if(iter.get(CFG_CONNECTION_SERVER_PORT, &port) != 0) {
2524     msg.assign("Unable to get current value of CFG_CONNECTION_SERVER_PORT");
2525     return false;
2526   }
2527 
2528   if (port != 0)
2529   {
2530     // Dynamic ports is zero in configuration
2531     msg.assfmt("Server port for %d -> %d is not marked as dynamic, value: %u",
2532                node1, node2, port);
2533     return false;
2534   }
2535   return true;
2536 }
2537 
2538 
2539 bool
set_dynamic_port(int node1,int node2,int value,BaseString & msg)2540 ConfigManager::set_dynamic_port(int node1, int node2, int value,
2541                                 BaseString& msg){
2542 
2543   Guard g(m_config_mutex);
2544   if (!check_dynamic_port_configured(m_config,
2545                                      node1, node2, msg))
2546     return false;
2547 
2548   if (!m_dynamic_ports.set(node1, node2, value))
2549   {
2550     msg.assfmt("Could not set dynamic port for %d -> %d", node1, node2);
2551     return false;
2552   }
2553 
2554   // Removed cache of packed config, need to be recreated
2555   // to include the new dynamic port
2556   m_packed_config.clear();
2557 
2558   return true;
2559 }
2560 
2561 
2562 bool
get_dynamic_port(int node1,int node2,int * value,BaseString & msg) const2563 ConfigManager::get_dynamic_port(int node1, int node2, int *value,
2564                                 BaseString& msg) const {
2565 
2566   Guard g(m_config_mutex);
2567   if (!check_dynamic_port_configured(m_config,
2568                                      node1, node2, msg))
2569     return false;
2570 
2571   if (!m_dynamic_ports.get(node1, node2, value))
2572   {
2573     msg.assfmt("Could not get dynamic port for %d -> %d", node1, node2);
2574     return false;
2575   }
2576   return true;
2577 }
2578 
2579 
check(int & node1,int & node2) const2580 bool ConfigManager::DynamicPorts::check(int& node1, int& node2) const
2581 {
2582   // Always use smaller node first
2583   if (node1 > node2)
2584   {
2585     int tmp = node1;
2586     node1 = node2;
2587     node2 = tmp;
2588   }
2589 
2590   // Only NDB nodes can be dynamic port server
2591   if (node1 <= 0 || node1 >= MAX_NDB_NODES)
2592     return false;
2593   if (node2 <= 0 || node2 >= MAX_NODES)
2594     return false;
2595   if (node1 == node2)
2596     return false;
2597 
2598   return true;
2599 }
2600 
2601 
set(int node1,int node2,int port)2602 bool ConfigManager::DynamicPorts::set(int node1, int node2, int port)
2603 {
2604   if (!check(node1, node2))
2605     return false;
2606 
2607   if (!m_ports.insert(NodePair(node1, node2), port, true))
2608     return false;
2609 
2610   return true;
2611 }
2612 
2613 
get(int node1,int node2,int * port) const2614 bool ConfigManager::DynamicPorts::get(int node1, int node2, int* port) const
2615 {
2616   if (!check(node1, node2))
2617     return false;
2618 
2619   int value = 0; // Return 0 if not found
2620   (void)m_ports.search(NodePair(node1, node2), value);
2621 
2622   *port = (int)value;
2623   return true;
2624 }
2625 
2626 
2627 bool
set_in_config(Config * config)2628 ConfigManager::DynamicPorts::set_in_config(Config* config)
2629 {
2630   bool result = true;
2631   ConfigIter iter(config, CFG_SECTION_CONNECTION);
2632 
2633   for(;iter.valid();iter.next()) {
2634     Uint32 port = 0;
2635     if (iter.get(CFG_CONNECTION_SERVER_PORT, &port) != 0 ||
2636         port != 0)
2637       continue; // Not configured as dynamic port
2638 
2639     Uint32 n1, n2;
2640     require(iter.get(CFG_CONNECTION_NODE_1, &n1) == 0 &&
2641             iter.get(CFG_CONNECTION_NODE_2, &n2) == 0);
2642 
2643     int dyn_port;
2644     if (!get(n1, n2, &dyn_port) || dyn_port == 0)
2645       continue; // No dynamic port registered
2646 
2647     // Write the dynamic port to config
2648     port = (Uint32)dyn_port;
2649     ConfigValues::Iterator i2(config->m_configValues->m_config,
2650                               iter.m_config);
2651     if(i2.set(CFG_CONNECTION_SERVER_PORT, port) == false)
2652       result = false;
2653   }
2654   return result;
2655 }
2656 
2657 
2658 template class Vector<ConfigSubscriber*>;
2659 template class Vector<ConfigManager::ConfigChecker*>;
2660 
2661