1 /* Copyright (c) 2008, 2021, Oracle and/or its affiliates.
2 
3    This program is free software; you can redistribute it and/or modify
4    it under the terms of the GNU General Public License, version 2.0,
5    as published by the Free Software Foundation.
6 
7    This program is also distributed with certain software (including
8    but not limited to OpenSSL) that is licensed under separate terms,
9    as designated in a particular file or component or in included license
10    documentation.  The authors of MySQL hereby grant you an additional
11    permission to link the program and your derivative works with the
12    separately licensed software that they have included with MySQL.
13 
14    This program is distributed in the hope that it will be useful,
15    but WITHOUT ANY WARRANTY; without even the implied warranty of
16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17    GNU General Public License, version 2.0, for more details.
18 
19    You should have received a copy of the GNU General Public License
20    along with this program; if not, write to the Free Software
21    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA */
22 
23 
24 #include "ConfigManager.hpp"
25 #include "MgmtSrvr.hpp"
26 #include <NdbDir.hpp>
27 
28 #include <NdbConfig.h>
29 #include <NdbSleep.h>
30 #include <kernel/GlobalSignalNumbers.h>
31 #include <SignalSender.hpp>
32 #include <NdbApiSignal.hpp>
33 #include <signaldata/NFCompleteRep.hpp>
34 #include <signaldata/NodeFailRep.hpp>
35 #include <signaldata/ApiRegSignalData.hpp>
36 #include <ndb_version.h>
37 
38 #include <EventLogger.hpp>
39 extern EventLogger * g_eventLogger;
40 
41 extern "C" const char* opt_ndb_connectstring;
42 extern "C" int opt_ndb_nodeid;
43 
44 #if defined VM_TRACE || defined ERROR_INSERT
45 extern int g_errorInsert;
46 #define ERROR_INSERTED(x) (g_errorInsert == x)
47 #else
48 #define ERROR_INSERTED(x) false
49 #endif
50 
ConfigManager(const MgmtSrvr::MgmtOpts & opts,const char * configdir)51 ConfigManager::ConfigManager(const MgmtSrvr::MgmtOpts& opts,
52                              const char* configdir) :
53   MgmtThread("ConfigManager"),
54   m_opts(opts),
55   m_facade(NULL),
56   m_ss(NULL),
57   m_config_mutex(NULL),
58   m_config(NULL),
59   m_config_retriever(opt_ndb_connectstring,
60                      opt_ndb_nodeid,
61                      NDB_VERSION,
62                      NDB_MGM_NODE_TYPE_MGM,
63                      opts.bind_address),
64   m_config_state(CS_UNINITIALIZED),
65   m_previous_state(CS_UNINITIALIZED),
66   m_prepared_config(NULL),
67   m_node_id(0),
68   m_configdir(configdir)
69 {
70 }
71 
72 
~ConfigManager()73 ConfigManager::~ConfigManager()
74 {
75   delete m_config;
76   delete m_prepared_config;
77   if (m_ss)
78     delete m_ss;
79   NdbMutex_Destroy(m_config_mutex);
80 }
81 
82 
83 /**
84    alone_on_host
85 
86    Check if this is the only node of "type" on
87    this host
88 
89 */
90 
91 static bool
alone_on_host(Config * conf,Uint32 own_type,Uint32 own_nodeid)92 alone_on_host(Config* conf,
93               Uint32 own_type,
94               Uint32 own_nodeid)
95 {
96   ConfigIter iter(conf, CFG_SECTION_NODE);
97   for (iter.first(); iter.valid(); iter.next())
98   {
99     Uint32 type;
100     if(iter.get(CFG_TYPE_OF_SECTION, &type) ||
101        type != own_type)
102       continue;
103 
104     Uint32 nodeid;
105     if(iter.get(CFG_NODE_ID, &nodeid) ||
106        nodeid == own_nodeid)
107       continue;
108 
109     const char * hostname;
110     if(iter.get(CFG_NODE_HOST, &hostname))
111       continue;
112 
113     if (SocketServer::tryBind(0,hostname))
114     {
115       // Another MGM node was also setup on this host
116       g_eventLogger->debug("Not alone on host %s, node %d "     \
117                            "will also run here",
118                            hostname, nodeid);
119       return false;
120     }
121   }
122   return true;
123 }
124 
125 
126 /**
127    find_nodeid_from_configdir
128 
129    Check if configdir only contains config files
130    with one nodeid -> read the latest and confirm
131    there should only be one mgm node on this host
132 */
133 
134 NodeId
find_nodeid_from_configdir(void)135 ConfigManager::find_nodeid_from_configdir(void)
136 {
137   BaseString config_name;
138   NdbDir::Iterator iter;
139 
140   if (!m_configdir ||
141       iter.open(m_configdir) != 0)
142     return 0;
143 
144   const char* name;
145   unsigned found_nodeid= 0;
146   unsigned nodeid;
147   char extra; // Avoid matching ndb_2_config.bin.2.tmp
148   unsigned version, max_version = 0;
149   while ((name = iter.next_file()) != NULL)
150   {
151     if (sscanf(name,
152                "ndb_%u_config.bin.%u%c",
153                &nodeid, &version, &extra) == 2)
154     {
155       // ndbout_c("match: %s", name);
156 
157       if (nodeid != found_nodeid)
158       {
159         if (found_nodeid != 0)
160           return 0; // Found more than one nodeid
161         found_nodeid= nodeid;
162       }
163 
164       if (version > max_version)
165         max_version = version;
166     }
167   }
168 
169   if (max_version == 0)
170     return 0;
171 
172   config_name.assfmt("%s%sndb_%u_config.bin.%u",
173                      m_configdir, DIR_SEPARATOR, found_nodeid, max_version);
174 
175   Config* conf;
176   if (!(conf = load_saved_config(config_name)))
177     return 0;
178 
179   if (!m_config_retriever.verifyConfig(conf->m_configValues,
180                                        found_nodeid) ||
181       !alone_on_host(conf, NDB_MGM_NODE_TYPE_MGM, found_nodeid))
182   {
183     delete conf;
184     return 0;
185   }
186 
187   delete conf;
188   return found_nodeid;
189 }
190 
191 
192 /**
193    find_own_nodeid
194 
195    Return the nodeid of the MGM node
196    defined to run on this host
197 
198    Return 0 if more than one node is defined
199 */
200 
201 static NodeId
find_own_nodeid(Config * conf)202 find_own_nodeid(Config* conf)
203 {
204   NodeId found_nodeid= 0;
205   ConfigIter iter(conf, CFG_SECTION_NODE);
206   for (iter.first(); iter.valid(); iter.next())
207   {
208     Uint32 type;
209     if(iter.get(CFG_TYPE_OF_SECTION, &type) ||
210        type != NDB_MGM_NODE_TYPE_MGM)
211       continue;
212 
213     Uint32 nodeid;
214     require(iter.get(CFG_NODE_ID, &nodeid) == 0);
215 
216     const char * hostname;
217     if(iter.get(CFG_NODE_HOST, &hostname))
218       continue;
219 
220     if (SocketServer::tryBind(0,hostname))
221     {
222       // This node is setup to run on this host
223       if (found_nodeid == 0)
224         found_nodeid = nodeid;
225       else
226         return 0; // More than one host on this node
227     }
228   }
229   return found_nodeid;
230 }
231 
232 
233 NodeId
find_nodeid_from_config(void)234 ConfigManager::find_nodeid_from_config(void)
235 {
236   if (!m_opts.mycnf &&
237       !m_opts.config_filename)
238     return 0;
239 
240   Config* conf = load_config();
241   if (conf == NULL)
242     return 0;
243 
244   NodeId found_nodeid = find_own_nodeid(conf);
245   if (found_nodeid == 0 ||
246       !m_config_retriever.verifyConfig(conf->m_configValues, found_nodeid))
247   {
248     delete conf;
249     return 0;
250   }
251 
252   return found_nodeid;
253 }
254 
255 
256 bool
init_nodeid(void)257 ConfigManager::init_nodeid(void)
258 {
259   DBUG_ENTER("ConfigManager::init_nodeid");
260 
261   NodeId nodeid = m_config_retriever.get_configuration_nodeid();
262   if (nodeid)
263   {
264     // Nodeid was specifed on command line or in NDB_CONNECTSTRING
265     g_eventLogger->debug("Got nodeid: %d from command line "    \
266                          "or NDB_CONNECTSTRING", nodeid);
267     m_node_id = nodeid;
268     DBUG_RETURN(true);
269   }
270 
271   nodeid = find_nodeid_from_configdir();
272   if (nodeid)
273   {
274     // Found nodeid by searching in configdir
275     g_eventLogger->debug("Got nodeid: %d from searching in configdir",
276                          nodeid);
277     m_node_id = nodeid;
278     DBUG_RETURN(true);
279   }
280 
281   nodeid = find_nodeid_from_config();
282   if (nodeid)
283   {
284     // Found nodeid by looking in the config given on command line
285     g_eventLogger->debug("Got nodeid: %d from config file given "       \
286                          "on command line",
287                          nodeid);
288     m_node_id = nodeid;
289     DBUG_RETURN(true);
290   }
291 
292   // We _could_ try connecting to other running mgmd(s)
293   // and fetch our nodeid. But, that introduces a dependency
294   // that is not beneficial for a shared nothing cluster, since
295   // it might only work when other mgmd(s) are started. If all
296   // mgmd(s) is down it would require manual intervention.
297   // Better to require the node id to always be specified
298   // on the command line(or the above _local_ magic)
299 
300   g_eventLogger->error("Could not determine which nodeid to use for "\
301                        "this node. Specify it with --ndb-nodeid=<nodeid> "\
302                        "on command line");
303   DBUG_RETURN(false);
304 }
305 
306 
307 static void
reset_dynamic_ports_in_config(const Config * config)308 reset_dynamic_ports_in_config(const Config* config)
309 {
310   ConfigIter iter(config, CFG_SECTION_CONNECTION);
311 
312   for(;iter.valid();iter.next()) {
313     Uint32 port;
314     require(iter.get(CFG_CONNECTION_SERVER_PORT, &port) == 0);
315 
316     if ((int)port < 0)
317     {
318       port = 0;
319       ConfigValues::Iterator i2(config->m_configValues->m_config,
320                                 iter.m_config);
321       require(i2.set(CFG_CONNECTION_SERVER_PORT, port));
322     }
323   }
324 }
325 
326 
327 bool
init(void)328 ConfigManager::init(void)
329 {
330   DBUG_ENTER("ConfigManager::init");
331 
332   m_config_mutex = NdbMutex_Create();
333   if (!m_config_mutex)
334   {
335     g_eventLogger->error("Failed to create mutex in ConfigManager!");
336     DBUG_RETURN(false);
337   }
338 
339   require(m_config_state == CS_UNINITIALIZED);
340 
341   if (m_config_retriever.hasError())
342   {
343     g_eventLogger->error("%s", m_config_retriever.getErrorString());
344     DBUG_RETURN(false);
345   }
346 
347   if (!init_nodeid())
348     DBUG_RETURN(false);
349 
350   if (m_opts.initial)
351   {
352     /**
353      * Verify valid -f before delete_saved_configs()
354      */
355     Config* conf = load_config();
356     if (conf == NULL)
357       DBUG_RETURN(false);
358 
359     delete conf;
360 
361     if (!delete_saved_configs())
362       DBUG_RETURN(false);
363   }
364 
365   if (failed_config_change_exists())
366     DBUG_RETURN(false);
367 
368   BaseString config_bin_name;
369   if (saved_config_exists(config_bin_name))
370   {
371     Config* conf = NULL;
372     if (!(conf = load_saved_config(config_bin_name)))
373       DBUG_RETURN(false);
374 
375     if (!config_ok(conf))
376       DBUG_RETURN(false);
377 
378     set_config(conf);
379     m_config_state = CS_CONFIRMED;
380 
381     g_eventLogger->info("Loaded config from '%s'", config_bin_name.c_str());
382 
383     if (m_opts.reload && // --reload
384         (m_opts.mycnf || m_opts.config_filename))
385     {
386       Config* new_conf = load_config();
387       if (new_conf == NULL)
388         DBUG_RETURN(false);
389 
390       /**
391        * Add config to set once ConfigManager is fully started
392        */
393       m_config_change.config_loaded(new_conf);
394       g_eventLogger->info("Loaded configuration from '%s', will try "   \
395                           "to set it once started",
396                           m_opts.mycnf ? "my.cnf" : m_opts.config_filename);
397     }
398   }
399   else
400   {
401     if (m_opts.mycnf || m_opts.config_filename)
402     {
403       Config* conf = load_config();
404       if (conf == NULL)
405         DBUG_RETURN(false);
406 
407       if (!config_ok(conf))
408         DBUG_RETURN(false);
409 
410       /*
411         Set this node as primary node for config.ini/my.cnf
412         in order to make it possible that make sure an old
413         config.ini is only loaded with --force
414       */
415       if (!conf->setPrimaryMgmNode(m_node_id))
416       {
417         g_eventLogger->error("Failed to set primary MGM node");
418         DBUG_RETURN(false);
419       }
420 
421       /* Use the initial config for now */
422       set_config(conf);
423 
424       g_eventLogger->info("Got initial configuration from '%s', will try " \
425                           "to set it when all ndb_mgmd(s) started",
426                           m_opts.mycnf ? "my.cnf" : m_opts.config_filename);
427       m_config_change.m_initial_config = new Config(conf); // Copy config
428       m_config_state = CS_INITIAL;
429 
430       if (!init_checkers(m_config_change.m_initial_config))
431         DBUG_RETURN(false);
432     }
433     else
434     {
435       Config* conf = NULL;
436       if (!(conf = fetch_config()))
437       {
438         g_eventLogger->error("Could not fetch config!");
439         DBUG_RETURN(false);
440       }
441 
442       /*
443         The fetched config may contain dynamic ports for
444         ndbd(s) which have to be reset to 0 before using
445         the config
446       */
447       reset_dynamic_ports_in_config(conf);
448 
449       if (!config_ok(conf))
450         DBUG_RETURN(false);
451 
452       /* Use the fetched config for now */
453       set_config(conf);
454 
455       if (!m_opts.config_cache)
456       {
457         assert(!m_configdir); // Running without configdir
458         g_eventLogger->info("Fetched configuration, " \
459                             "generation: %d, name: '%s'. ",
460                             m_config->getGeneration(), m_config->getName());
461         DBUG_RETURN(true);
462       }
463 
464       if (m_config->getGeneration() == 0)
465       {
466         g_eventLogger->info("Fetched initial configuration, " \
467                             "generation: %d, name: '%s'. "\
468                             "Will try to set it when all ndb_mgmd(s) started",
469                             m_config->getGeneration(), m_config->getName());
470         m_config_state= CS_INITIAL;
471         m_config_change.m_initial_config = new Config(conf); // Copy config
472 
473         if (!init_checkers(m_config_change.m_initial_config))
474           DBUG_RETURN(false);
475       }
476       else
477       {
478         g_eventLogger->info("Fetched confirmed configuration, " \
479                             "generation: %d, name: '%s'. " \
480                             "Trying to write it to disk...",
481                             m_config->getGeneration(), m_config->getName());
482         if (!prepareConfigChange(m_config))
483         {
484           abortConfigChange();
485           g_eventLogger->error("Failed to write the fetched config to disk");
486           DBUG_RETURN(false);
487         }
488         commitConfigChange();
489         m_config_state = CS_CONFIRMED;
490         g_eventLogger->info("The fetched configuration has been saved!");
491       }
492     }
493   }
494 
495   require(m_config_state != CS_UNINITIALIZED);
496   DBUG_RETURN(true);
497 }
498 
499 
500 bool
prepareConfigChange(const Config * config)501 ConfigManager::prepareConfigChange(const Config* config)
502 {
503   if (m_prepared_config)
504   {
505     g_eventLogger->error("Can't prepare configuration change " \
506                          "when already prepared");
507     return false;
508   }
509 
510   Uint32 generation= config->getGeneration();
511   if (generation == 0)
512   {
513     g_eventLogger->error("Can't prepare configuration change for "\
514                          "configuration with generation 0");
515     return false;
516   }
517 
518   assert(m_node_id);
519   m_config_name.assfmt("%s%sndb_%u_config.bin.%u",
520                        m_configdir, DIR_SEPARATOR, m_node_id, generation);
521   g_eventLogger->debug("Preparing configuration, generation: %d name: %s",
522                        generation, m_config_name.c_str());
523 
524   /* Check file name is free */
525   if (access(m_config_name.c_str(), F_OK) == 0)
526   {
527     g_eventLogger->error("The file '%s' already exist while preparing",
528                          m_config_name.c_str());
529     return false;
530   }
531 
532   /* Pack the config */
533   UtilBuffer buf;
534   if(!config->pack(buf))
535   {
536     /* Failed to pack config */
537     g_eventLogger->error("Failed to pack configuration while preparing");
538     return false;
539   }
540 
541   /* Write config to temporary file */
542   BaseString prep_config_name(m_config_name);
543   prep_config_name.append(".tmp");
544   FILE * f = fopen(prep_config_name.c_str(), IF_WIN("wbc", "w"));
545   if(f == NULL)
546   {
547     g_eventLogger->error("Failed to open file '%s' while preparing, errno: %d",
548                          prep_config_name.c_str(), errno);
549     return false;
550   }
551 
552   if(fwrite(buf.get_data(), 1, buf.length(), f) != (size_t)buf.length())
553   {
554     g_eventLogger->error("Failed to write file '%s' while preparing, errno: %d",
555                          prep_config_name.c_str(), errno);
556     fclose(f);
557     unlink(prep_config_name.c_str());
558     return false;
559   }
560 
561   if (fflush(f))
562   {
563     g_eventLogger->error("Failed to flush file '%s' while preparing, errno: %d",
564                          prep_config_name.c_str(), errno);
565     fclose(f);
566     unlink(prep_config_name.c_str());
567     return false;
568   }
569 
570 #ifdef _WIN32
571   /*
572 	File is opened with the commit flag "c" so
573 	that the contents of the file buffer are written
574 	directly to disk when fflush is called
575   */
576 #else
577   if (fsync(fileno(f)))
578   {
579     g_eventLogger->error("Failed to sync file '%s' while preparing, errno: %d",
580                          prep_config_name.c_str(), errno);
581     fclose(f);
582     unlink(prep_config_name.c_str());
583     return false;
584   }
585 #endif
586   fclose(f);
587 
588   m_prepared_config = new Config(config); // Copy
589   g_eventLogger->debug("Configuration prepared");
590 
591   return true;
592 }
593 
594 
595 void
commitConfigChange(void)596 ConfigManager::commitConfigChange(void)
597 {
598   require(m_prepared_config != 0);
599 
600   /* Set new config locally and in all subscribers */
601   set_config(m_prepared_config);
602   m_prepared_config= NULL;
603 
604   /* Rename file to real name */
605   require(m_config_name.length());
606   BaseString prep_config_name(m_config_name);
607   prep_config_name.append(".tmp");
608   if(rename(prep_config_name.c_str(), m_config_name.c_str()))
609   {
610     g_eventLogger->error("rename from '%s' to '%s' failed while committing, " \
611                          "errno: %d",
612                          prep_config_name.c_str(), m_config_name.c_str(),
613                          errno);
614     // Crash and leave the prepared config file in place
615     abort();
616   }
617   m_config_name.clear();
618 
619   g_eventLogger->info("Configuration %d commited", m_config->getGeneration());
620 }
621 
622 
623 static void
check_no_dynamic_ports_in_config(const Config * config)624 check_no_dynamic_ports_in_config(const Config* config)
625 {
626   bool ok = true;
627   ConfigIter iter(config, CFG_SECTION_CONNECTION);
628 
629   for(;iter.valid();iter.next()) {
630     Uint32 n1, n2;
631     require(iter.get(CFG_CONNECTION_NODE_1, &n1) == 0 &&
632             iter.get(CFG_CONNECTION_NODE_2, &n2) == 0);
633 
634     Uint32 port_value;
635     require(iter.get(CFG_CONNECTION_SERVER_PORT, &port_value) == 0);
636 
637     int port = (int)port_value;
638     if (port < 0)
639     {
640       g_eventLogger->error("INTERNAL ERROR: Found dynamic ports with "
641                            "value in config, n1: %d, n2: %d, port: %u",
642                            n1, n2, port);
643       ok = false;
644     }
645   }
646   require(ok);
647 }
648 
649 
650 void
set_config(Config * new_config)651 ConfigManager::set_config(Config* new_config)
652 {
653   // Check that config does not contain any dynamic ports
654   check_no_dynamic_ports_in_config(new_config);
655 
656   delete m_config;
657   m_config = new_config;
658 
659   // Removed cache of packed config
660   m_packed_config.clear();
661 
662   for (unsigned i = 0; i < m_subscribers.size(); i++)
663     m_subscribers[i]->config_changed(m_node_id, new_config);
664 }
665 
666 
667 int
add_config_change_subscriber(ConfigSubscriber * subscriber)668 ConfigManager::add_config_change_subscriber(ConfigSubscriber* subscriber)
669 {
670   return m_subscribers.push_back(subscriber);
671 }
672 
673 
674 bool
config_ok(const Config * conf)675 ConfigManager::config_ok(const Config* conf)
676 {
677   assert(m_node_id);
678   if (!m_config_retriever.verifyConfig(conf->m_configValues, m_node_id))
679   {
680     g_eventLogger->error("%s", m_config_retriever.getErrorString());
681     return false;
682   }
683 
684   // Check DataDir exist
685   ConfigIter iter(conf, CFG_SECTION_NODE);
686   require(iter.find(CFG_NODE_ID, m_node_id) == 0);
687 
688   const char *datadir;
689   require(iter.get(CFG_NODE_DATADIR, &datadir) == 0);
690 
691   if (strcmp(datadir, "") != 0 && // datadir != ""
692       access(datadir, F_OK))                 // dir exists
693   {
694     g_eventLogger->error("Directory '%s' specified with DataDir "  \
695                          "in configuration does not exist.",       \
696                          datadir);
697     return false;
698   }
699   return true;
700 }
701 
702 
703 void
abortConfigChange(void)704 ConfigManager::abortConfigChange(void)
705 {
706   /* Should always succeed */
707 
708   /* Remove the prepared file */
709   BaseString prep_config_name(m_config_name);
710   prep_config_name.append(".tmp");
711   unlink(prep_config_name.c_str());
712   m_config_name.clear();
713 
714   delete m_prepared_config;
715   m_prepared_config= NULL;
716 }
717 
718 
719 
720 void
sendConfigChangeImplRef(SignalSender & ss,NodeId nodeId,ConfigChangeRef::ErrorCode error) const721 ConfigManager::sendConfigChangeImplRef(SignalSender& ss, NodeId nodeId,
722                                        ConfigChangeRef::ErrorCode error) const
723 {
724   SimpleSignal ssig;
725   ConfigChangeImplRef* const ref =
726     CAST_PTR(ConfigChangeImplRef, ssig.getDataPtrSend());
727   ref->errorCode = error;
728 
729   g_eventLogger->debug("Send CONFIG_CHANGE_IMPL_REF to node: %d, error: %d",
730                        nodeId, error);
731 
732   ss.sendSignal(nodeId, ssig,
733                 MGM_CONFIG_MAN, GSN_CONFIG_CHANGE_IMPL_REF,
734                 ConfigChangeImplRef::SignalLength);
735 }
736 
737 
738 
739 void
execCONFIG_CHANGE_IMPL_REQ(SignalSender & ss,SimpleSignal * sig)740 ConfigManager::execCONFIG_CHANGE_IMPL_REQ(SignalSender& ss, SimpleSignal* sig)
741 {
742   NodeId nodeId = refToNode(sig->header.theSendersBlockRef);
743   const ConfigChangeImplReq * const req =
744     CAST_CONSTPTR(ConfigChangeImplReq, sig->getDataPtr());
745 
746   g_eventLogger->debug("Got CONFIG_CHANGE_IMPL_REQ from node: %d, "\
747                        "requestType: %d",
748                        nodeId, req->requestType);
749 
750   if (!m_defragger.defragment(sig))
751     return; // More fragments to come
752 
753   Guard g(m_config_mutex);
754 
755   switch(req->requestType){
756   case ConfigChangeImplReq::Prepare:{
757     if (sig->header.m_noOfSections != 1)
758     {
759       sendConfigChangeImplRef(ss, nodeId, ConfigChangeRef::NoConfigData);
760       return;
761     }
762 
763     ConfigValuesFactory cf;
764     if (!cf.unpack(sig->ptr[0].p, req->length))
765     {
766       sendConfigChangeImplRef(ss, nodeId, ConfigChangeRef::FailedToUnpack);
767       return;
768     }
769 
770     Config new_config(cf.getConfigValues());
771     Uint32 new_generation = new_config.getGeneration();
772     Uint32 curr_generation = m_config->getGeneration();
773     const char* new_name = new_config.getName();
774     const char* curr_name = m_config->getName();
775 
776     if (m_config->illegal_change(&new_config))
777     {
778       sendConfigChangeImplRef(ss, nodeId, ConfigChangeRef::IllegalConfigChange);
779       return;
780     }
781 
782     if (req->initial)
783     {
784       // Check own state
785       if (m_config_state != CS_INITIAL)
786       {
787         g_eventLogger->warning("Refusing to start initial "             \
788                                "configuration change since this node "  \
789                                "is not in INITIAL state");
790         sendConfigChangeImplRef(ss, nodeId,
791                                 ConfigChangeRef::IllegalInitialState);
792         return;
793       }
794 
795       // Check generation
796       if (new_generation != 0)
797       {
798         g_eventLogger->warning("Refusing to start initial "             \
799                                "configuration change since new "        \
800                                "generation is not 0 (new_generation: %d)",
801                                new_generation);
802         sendConfigChangeImplRef(ss, nodeId,
803                                 ConfigChangeRef::IllegalInitialGeneration);
804         return;
805       }
806       new_generation = 1;
807 
808       // Check config is equal to our initial config
809       // but skip check if message is from self...
810       if (nodeId != refToNode(ss.getOwnRef()))
811       {
812         Config new_config_copy(&new_config);
813         require(new_config_copy.setName(new_name));
814         unsigned exclude[]= {CFG_SECTION_SYSTEM, 0};
815         if (!new_config_copy.equal(m_config_change.m_initial_config, exclude))
816         {
817           BaseString buf;
818           g_eventLogger->warning
819             ("Refusing to start initial config "                        \
820              "change when nodes have different "                        \
821              "config\n"                                                 \
822              "This is the actual diff:\n%s",
823              new_config_copy.diff2str(m_config_change.m_initial_config, buf));
824           sendConfigChangeImplRef(ss, nodeId,
825                                   ConfigChangeRef::DifferentInitial);
826           return;
827         }
828 
829         /*
830           Scrap the new_config, it's been used to check that other node
831           started from equal initial config, now it's not needed anymore
832         */
833         delete m_config_change.m_initial_config;
834         m_config_change.m_initial_config = NULL;
835       }
836     }
837     else
838     {
839 
840       // Check that new config has same primary mgm node as current
841       Uint32 curr_primary = m_config->getPrimaryMgmNode();
842       Uint32 new_primary = new_config.getPrimaryMgmNode();
843       if (new_primary != curr_primary)
844       {
845         g_eventLogger->warning("Refusing to start configuration change " \
846                                "requested by node %d, the new config uses " \
847                                "different primary mgm node %d. "      \
848                                "Current primary mmgm node is %d.",
849                                nodeId, new_primary, curr_primary);
850         sendConfigChangeImplRef(ss, nodeId,
851                                 ConfigChangeRef::NotPrimaryMgmNode);
852         return;
853       }
854 
855       if (new_generation == 0 ||
856           new_generation != curr_generation)
857       {
858         BaseString buf;
859         g_eventLogger->warning("Refusing to start config change "     \
860                                "requested by node with different "    \
861                                "generation: %d. Our generation: %d\n" \
862                                "This is the actual diff:\n%s",
863                                new_generation, curr_generation,
864                                new_config.diff2str(m_config, buf));
865         sendConfigChangeImplRef(ss, nodeId, ConfigChangeRef::InvalidGeneration);
866         return;
867       }
868       new_generation++;
869 
870       // Check same cluster name
871       if (strcmp(new_name, curr_name))
872       {
873         BaseString buf;
874         g_eventLogger->warning("Refusing to start config change "       \
875                                "requested by node with different "      \
876                                "name: '%s'. Our name: '%s'\n"           \
877                                "This is the actual diff:\n%s",
878                                new_name, curr_name,
879                                new_config.diff2str(m_config, buf));
880         sendConfigChangeImplRef(ss, nodeId, ConfigChangeRef::InvalidConfigName);
881         return;
882       }
883     }
884 
885     // Set new generation
886     if(!new_config.setGeneration(new_generation))
887     {
888       g_eventLogger->error("Failed to set new generation to %d",
889                            new_generation);
890       sendConfigChangeImplRef(ss, nodeId, ConfigChangeRef::InternalError);
891       return;
892     }
893 
894     if (!prepareConfigChange(&new_config))
895     {
896       sendConfigChangeImplRef(ss, nodeId, ConfigChangeRef::PrepareFailed);
897       return;
898     }
899     break;
900   }
901 
902   case ConfigChangeImplReq::Commit:
903     commitConfigChange();
904 
905     // All nodes has agreed on config -> CONFIRMED
906     m_config_state = CS_CONFIRMED;
907 
908     break;
909 
910   case ConfigChangeImplReq::Abort:
911     abortConfigChange();
912     break;
913 
914   default:
915     g_eventLogger->error("execCONFIG_CHANGE_IMPL_REQ: unhandled state");
916     abort();
917     break;
918   }
919 
920   /* Send CONF */
921   SimpleSignal ssig;
922   ConfigChangeImplConf* const conf =
923     CAST_PTR(ConfigChangeImplConf, ssig.getDataPtrSend());
924   conf->requestType = req->requestType;
925 
926   g_eventLogger->debug("Sending CONFIG_CHANGE_IMPL_CONF to node: %d",
927                        nodeId);
928 
929   ss.sendSignal(nodeId, ssig,
930                 MGM_CONFIG_MAN,
931                 GSN_CONFIG_CHANGE_IMPL_CONF,
932                 ConfigChangeImplConf::SignalLength);
933 }
934 
935 
set_config_change_state(ConfigChangeState::States state)936 void ConfigManager::set_config_change_state(ConfigChangeState::States state)
937 {
938   if (state == ConfigChangeState::IDLE)
939   {
940     // Rebuild m_all_mgm so that each node in config is included
941     // new mgm nodes might have been added
942     assert(m_config_change.m_error == ConfigChangeRef::OK);
943     m_config->get_nodemask(m_all_mgm, NDB_MGM_NODE_TYPE_MGM);
944   }
945 
946   m_config_change.m_state.m_current_state = state;
947 }
948 
949 
950 void
execCONFIG_CHANGE_IMPL_REF(SignalSender & ss,SimpleSignal * sig)951 ConfigManager::execCONFIG_CHANGE_IMPL_REF(SignalSender& ss, SimpleSignal* sig)
952 {
953   NodeId nodeId = refToNode(sig->header.theSendersBlockRef);
954   g_eventLogger->debug("Got CONFIG_CHANGE_IMPL_REF from node: %d", nodeId);
955 
956   const ConfigChangeImplRef * const ref =
957     CAST_CONSTPTR(ConfigChangeImplRef, sig->getDataPtr());
958   g_eventLogger->warning("Node %d refused configuration change, error: %d",
959                          nodeId, ref->errorCode);
960 
961   /* Remember the original error code */
962   if (m_config_change.m_error == 0)
963     m_config_change.m_error = (ConfigChangeRef::ErrorCode)ref->errorCode;
964 
965   switch(m_config_change.m_state){
966   case ConfigChangeState::ABORT:
967   case ConfigChangeState::PREPARING:{
968     /* Got ref while preparing (or already decided to abort) */
969     m_config_change.m_contacted_nodes.clear(nodeId);
970     set_config_change_state(ConfigChangeState::ABORT);
971 
972     m_waiting_for.clear(nodeId);
973     if (!m_waiting_for.isclear())
974       return;
975 
976     startAbortConfigChange(ss);
977     break;
978   }
979   case ConfigChangeState::COMITTING:
980     /* Got ref while comitting, impossible */
981     abort();
982     break;
983 
984   case ConfigChangeState::ABORTING:
985     /* Got ref while aborting, impossible */
986     abort();
987     break;
988 
989   default:
990     g_eventLogger->error("execCONFIG_CHANGE_IMPL_REF: unhandled state");
991     abort();
992     break;
993   }
994 }
995 
996 
997 void
execCONFIG_CHANGE_IMPL_CONF(SignalSender & ss,SimpleSignal * sig)998 ConfigManager::execCONFIG_CHANGE_IMPL_CONF(SignalSender& ss, SimpleSignal* sig)
999 {
1000   NodeId nodeId = refToNode(sig->header.theSendersBlockRef);
1001   const ConfigChangeImplConf * const conf =
1002     CAST_CONSTPTR(ConfigChangeImplConf, sig->getDataPtr());
1003   g_eventLogger->debug("Got CONFIG_CHANGE_IMPL_CONF from node %d", nodeId);
1004 
1005   switch(m_config_change.m_state){
1006   case ConfigChangeState::PREPARING:{
1007     require(conf->requestType == ConfigChangeImplReq::Prepare);
1008     m_waiting_for.clear(nodeId);
1009     if (!m_waiting_for.isclear())
1010       return;
1011 
1012     // send to next
1013     int res = sendConfigChangeImplReq(ss, m_config_change.m_new_config);
1014     if (res > 0)
1015     {
1016       // sent to new node...
1017       return;
1018     }
1019     else if (res < 0)
1020     {
1021       // send failed, start abort
1022       startAbortConfigChange(ss);
1023       return;
1024     }
1025 
1026     /**
1027      * All node has received new config..
1028      *   ok to delete it...
1029      */
1030     delete m_config_change.m_new_config;
1031     m_config_change.m_new_config = 0;
1032 
1033     /* Send commit to all nodes */
1034     SimpleSignal ssig;
1035     ConfigChangeImplReq* const req =
1036       CAST_PTR(ConfigChangeImplReq, ssig.getDataPtrSend());
1037 
1038     req->requestType = ConfigChangeImplReq::Commit;
1039 
1040     g_eventLogger->debug("Sending CONFIG_CHANGE_IMPL_REQ(commit)");
1041     require(m_waiting_for.isclear());
1042     m_waiting_for = ss.broadcastSignal(m_config_change.m_contacted_nodes, ssig,
1043                                        MGM_CONFIG_MAN,
1044                                        GSN_CONFIG_CHANGE_IMPL_REQ,
1045                                        ConfigChangeImplReq::SignalLength);
1046     if (m_waiting_for.isclear())
1047       set_config_change_state(ConfigChangeState::IDLE);
1048     else
1049       set_config_change_state(ConfigChangeState::COMITTING);
1050     break;
1051   }
1052 
1053   case ConfigChangeState::COMITTING:{
1054     require(conf->requestType == ConfigChangeImplReq::Commit);
1055 
1056     m_waiting_for.clear(nodeId);
1057     if (!m_waiting_for.isclear())
1058       return;
1059 
1060     require(m_config_change.m_client_ref != RNIL);
1061     require(m_config_change.m_error == 0);
1062     if (m_config_change.m_client_ref == ss.getOwnRef())
1063     {
1064       g_eventLogger->info("Config change completed! New generation: %d",
1065                           m_config->getGeneration());
1066     }
1067     else
1068     {
1069       /* Send CONF to requestor */
1070       sendConfigChangeConf(ss, m_config_change.m_client_ref);
1071     }
1072     m_config_change.m_client_ref = RNIL;
1073     set_config_change_state(ConfigChangeState::IDLE);
1074     break;
1075   }
1076 
1077   case ConfigChangeState::ABORT:{
1078     m_waiting_for.clear(nodeId);
1079     if (!m_waiting_for.isclear())
1080       return;
1081 
1082     startAbortConfigChange(ss);
1083     break;
1084   }
1085 
1086   case ConfigChangeState::ABORTING:{
1087     m_waiting_for.clear(nodeId);
1088     if (!m_waiting_for.isclear())
1089       return;
1090 
1091     require(m_config_change.m_client_ref != RNIL);
1092     require(m_config_change.m_error);
1093     if (m_config_change.m_client_ref == ss.getOwnRef())
1094     {
1095       g_eventLogger->
1096         error("Configuration change failed! error: %d '%s'",
1097               m_config_change.m_error,
1098               ConfigChangeRef::errorMessage(m_config_change.m_error));
1099       exit(1);
1100     }
1101     else
1102     {
1103       /* Send ref to the requestor */
1104       sendConfigChangeRef(ss, m_config_change.m_client_ref,
1105                           m_config_change.m_error);
1106     }
1107     m_config_change.m_error= ConfigChangeRef::OK;
1108     m_config_change.m_client_ref = RNIL;
1109     set_config_change_state(ConfigChangeState::IDLE);
1110     break;
1111   }
1112 
1113   default:
1114     g_eventLogger->error("execCONFIG_CHANGE_IMPL_CONF: unhandled state");
1115     abort();
1116     break;
1117   }
1118 }
1119 
1120 
1121 void
sendConfigChangeRef(SignalSender & ss,BlockReference to,ConfigChangeRef::ErrorCode error) const1122 ConfigManager::sendConfigChangeRef(SignalSender& ss, BlockReference to,
1123                                    ConfigChangeRef::ErrorCode error) const
1124 {
1125   NodeId nodeId = refToNode(to);
1126   SimpleSignal ssig;
1127   ConfigChangeRef* const ref =
1128     CAST_PTR(ConfigChangeRef, ssig.getDataPtrSend());
1129   ref->errorCode = error;
1130 
1131   g_eventLogger->debug("Send CONFIG_CHANGE_REF to node: %d, error: %d",
1132                        nodeId, error);
1133 
1134   ss.sendSignal(nodeId, ssig, refToBlock(to),
1135                 GSN_CONFIG_CHANGE_REF, ConfigChangeRef::SignalLength);
1136 }
1137 
1138 
1139 void
sendConfigChangeConf(SignalSender & ss,BlockReference to) const1140 ConfigManager::sendConfigChangeConf(SignalSender& ss, BlockReference to) const
1141 {
1142   NodeId nodeId = refToNode(to);
1143   SimpleSignal ssig;
1144 
1145   g_eventLogger->debug("Send CONFIG_CHANGE_CONF to node: %d", nodeId);
1146 
1147   ss.sendSignal(nodeId, ssig, refToBlock(to),
1148                 GSN_CONFIG_CHANGE_CONF, ConfigChangeConf::SignalLength);
1149 }
1150 
1151 
1152 void
startConfigChange(SignalSender & ss,Uint32 ref)1153 ConfigManager::startConfigChange(SignalSender& ss, Uint32 ref)
1154 {
1155   if (m_config_state == CS_INITIAL)
1156   {
1157     g_eventLogger->info("Starting initial configuration change");
1158   }
1159   else
1160   {
1161     require(m_config_state == CS_CONFIRMED);
1162     g_eventLogger->info("Starting configuration change, generation: %d",
1163                         m_config_change.m_new_config->getGeneration());
1164   }
1165   m_config_change.m_contacted_nodes.clear();
1166   m_config_change.m_client_ref = ref;
1167   if (sendConfigChangeImplReq(ss, m_config_change.m_new_config) <= 0)
1168   {
1169     g_eventLogger->error("Failed to start configuration change!");
1170     exit(1);
1171   }
1172 }
1173 
1174 void
startAbortConfigChange(SignalSender & ss)1175 ConfigManager::startAbortConfigChange(SignalSender& ss)
1176 {
1177   /* Abort all other nodes */
1178   SimpleSignal ssig;
1179   ConfigChangeImplReq* const req =
1180     CAST_PTR(ConfigChangeImplReq, ssig.getDataPtrSend());
1181   req->requestType = ConfigChangeImplReq::Abort;
1182 
1183   g_eventLogger->debug
1184     ("Sending CONFIG_CHANGE_IMPL_REQ(abort) to %s",
1185      BaseString::getPrettyText(m_config_change.m_contacted_nodes).c_str());
1186 
1187   require(m_waiting_for.isclear());
1188   m_waiting_for = ss.broadcastSignal(m_config_change.m_contacted_nodes, ssig,
1189                                      MGM_CONFIG_MAN,
1190                                      GSN_CONFIG_CHANGE_IMPL_REQ,
1191                                      ConfigChangeImplReq::SignalLength);
1192 
1193   if (m_config_change.m_new_config)
1194   {
1195     delete m_config_change.m_new_config;
1196     m_config_change.m_new_config = 0;
1197   }
1198 
1199   if (m_waiting_for.isclear())
1200   {
1201     /**
1202      * Send CONFIG_CHANGE_IMPL_CONF (aborting) to self
1203      */
1204     m_waiting_for.set(ss.getOwnNodeId());
1205     ConfigChangeImplConf* const conf =
1206       CAST_PTR(ConfigChangeImplConf, ssig.getDataPtrSend());
1207     conf->requestType = ConfigChangeImplReq::Abort;
1208 
1209     ss.sendSignal(ss.getOwnNodeId(), ssig,
1210                   MGM_CONFIG_MAN,
1211                   GSN_CONFIG_CHANGE_IMPL_CONF,
1212                   ConfigChangeImplConf::SignalLength);
1213   }
1214 
1215   set_config_change_state(ConfigChangeState::ABORTING);
1216 }
1217 
1218 int
sendConfigChangeImplReq(SignalSender & ss,const Config * conf)1219 ConfigManager::sendConfigChangeImplReq(SignalSender& ss, const Config* conf)
1220 {
1221   require(m_waiting_for.isclear());
1222   require(m_config_change.m_client_ref != RNIL);
1223 
1224   if (m_config_change.m_contacted_nodes.isclear())
1225   {
1226     require(m_config_change.m_state == ConfigChangeState::IDLE);
1227   }
1228   else
1229   {
1230     require(m_config_change.m_state == ConfigChangeState::PREPARING);
1231   }
1232 
1233   set_config_change_state(ConfigChangeState::PREPARING);
1234 
1235   NodeBitmask nodes = m_all_mgm;
1236   nodes.bitANDC(m_config_change.m_contacted_nodes);
1237   if (nodes.isclear())
1238   {
1239     return 0; // all done
1240   }
1241 
1242   /**
1243    * Send prepare to all MGM nodes 1 by 1
1244    *   keep track of which I sent to in m_contacted_nodes
1245    */
1246   SimpleSignal ssig;
1247 
1248   UtilBuffer buf;
1249   conf->pack(buf);
1250   ssig.ptr[0].p = (Uint32*)buf.get_data();
1251   ssig.ptr[0].sz = (buf.length() + 3) / 4;
1252   ssig.header.m_noOfSections = 1;
1253 
1254   ConfigChangeImplReq* const req =
1255     CAST_PTR(ConfigChangeImplReq, ssig.getDataPtrSend());
1256   req->requestType = ConfigChangeImplReq::Prepare;
1257   req->initial = (m_config_state == CS_INITIAL);
1258   req->length = buf.length();
1259 
1260   Uint32 i = nodes.find(0);
1261   g_eventLogger->debug("Sending CONFIG_CHANGE_IMPL_REQ(prepare) to %u", i);
1262   int result = ss.sendFragmentedSignal(i, ssig, MGM_CONFIG_MAN,
1263                                        GSN_CONFIG_CHANGE_IMPL_REQ,
1264                                        ConfigChangeImplReq::SignalLength);
1265   if (result != 0)
1266   {
1267     g_eventLogger->warning("Failed to send configuration change "
1268                            "prepare to node: %d, result: %d",
1269                            i, result);
1270     return -1;
1271   }
1272 
1273   m_waiting_for.set(i);
1274   m_config_change.m_contacted_nodes.set(i);
1275 
1276   return 1;
1277 }
1278 
1279 void
execCONFIG_CHANGE_REQ(SignalSender & ss,SimpleSignal * sig)1280 ConfigManager::execCONFIG_CHANGE_REQ(SignalSender& ss, SimpleSignal* sig)
1281 {
1282   BlockReference from = sig->header.theSendersBlockRef;
1283   const ConfigChangeReq * const req =
1284     CAST_CONSTPTR(ConfigChangeReq, sig->getDataPtr());
1285 
1286   if (!m_defragger.defragment(sig))
1287     return; // More fragments to come
1288 
1289   if (!m_started.equal(m_all_mgm)) // Not all started
1290   {
1291     sendConfigChangeRef(ss, from, ConfigChangeRef::NotAllStarted);
1292     return;
1293   }
1294 
1295   if (m_all_mgm.find(0) != m_facade->ownId()) // Not the master
1296   {
1297     sendConfigChangeRef(ss, from, ConfigChangeRef::NotMaster);
1298     return;
1299   }
1300 
1301   if (m_config_change.m_state != ConfigChangeState::IDLE)
1302   {
1303     sendConfigChangeRef(ss, from, ConfigChangeRef::ConfigChangeOnGoing);
1304     return;
1305   }
1306   require(m_config_change.m_error == ConfigChangeRef::OK);
1307 
1308   if (sig->header.m_noOfSections != 1)
1309   {
1310     sendConfigChangeRef(ss, from, ConfigChangeRef::NoConfigData);
1311     return;
1312   }
1313 
1314   ConfigValuesFactory cf;
1315   if (!cf.unpack(sig->ptr[0].p, req->length))
1316   {
1317     sendConfigChangeRef(ss, from, ConfigChangeRef::FailedToUnpack);
1318     return;
1319   }
1320 
1321   Config * new_config = new Config(cf.getConfigValues());
1322   if (!config_ok(new_config))
1323   {
1324     g_eventLogger->warning("Refusing to start config change, the config "\
1325                            "is not ok");
1326     sendConfigChangeRef(ss, from, ConfigChangeRef::ConfigNotOk);
1327     delete new_config;
1328     return;
1329   }
1330 
1331   m_config_change.m_new_config = new_config;
1332   startConfigChange(ss, from);
1333 
1334   return;
1335 }
1336 
1337 
1338 static Uint32
config_check_checksum(const Config * config)1339 config_check_checksum(const Config* config)
1340 {
1341   Config copy(config);
1342 
1343   // Make constants of a few values in SYSTEM section that are
1344   // not part of the  checksum used for "config check"
1345   copy.setName("CHECKSUM");
1346   copy.setPrimaryMgmNode(0);
1347 
1348   Uint32 checksum = copy.checksum();
1349 
1350   return checksum;
1351 }
1352 
1353 
1354 void
execCONFIG_CHECK_REQ(SignalSender & ss,SimpleSignal * sig)1355 ConfigManager::execCONFIG_CHECK_REQ(SignalSender& ss, SimpleSignal* sig)
1356 {
1357   Guard g(m_config_mutex);
1358   BlockReference from = sig->header.theSendersBlockRef;
1359   NodeId nodeId = refToNode(from);
1360   const ConfigCheckReq * const req =
1361     CAST_CONSTPTR(ConfigCheckReq, sig->getDataPtr());
1362 
1363   Uint32 other_generation = req->generation;
1364   ConfigState other_state = (ConfigState)req->state;
1365 
1366   Uint32 generation = m_config->getGeneration();
1367 
1368   if (ERROR_INSERTED(100) && nodeId != ss.getOwnNodeId())
1369   {
1370     g_eventLogger->debug("execCONFIG_CHECK_REQ() ERROR_INSERTED(100) => exit()");
1371     exit(0);
1372   }
1373 
1374   // checksum
1375   Uint32 checksum = config_check_checksum(m_config);
1376   Uint32 other_checksum = req->checksum;
1377   if (sig->header.theLength == ConfigCheckReq::SignalLengthBeforeChecksum)
1378   {
1379     // Other side uses old version without checksum, use our checksum to
1380     // bypass the checks
1381     g_eventLogger->debug("Other mgmd does not have checksum, using own");
1382     other_checksum = checksum;
1383   }
1384 
1385   if (m_prepared_config || m_config_change.m_new_config)
1386   {
1387     g_eventLogger->debug("Got CONFIG_CHECK_REQ from node: %d while "
1388                          "config change in progress (m_prepared_config). "
1389                          "Returning incorrect state, causing it to be retried",
1390                          nodeId);
1391     sendConfigCheckRef(ss, from, ConfigCheckRef::WrongState,
1392                        generation, other_generation,
1393                        m_config_state, CS_UNINITIALIZED);
1394     return;
1395   }
1396 
1397   if (m_config_change.m_loaded_config && ss.getOwnNodeId() < nodeId)
1398   {
1399     g_eventLogger->debug("Got CONFIG_CHECK_REQ from node: %d while "
1400                          "having a loaded config (and my node is lower: %d). "
1401                          "Returning incorrect state, causing it to be retried",
1402                          nodeId,
1403                          ss.getOwnNodeId());
1404     sendConfigCheckRef(ss, from, ConfigCheckRef::WrongState,
1405                        generation, other_generation,
1406                        m_config_state, CS_UNINITIALIZED);
1407     return;
1408   }
1409 
1410   g_eventLogger->debug("Got CONFIG_CHECK_REQ from node: %d. "
1411                        "Our generation: %d, other generation: %d, "
1412                        "our state: %d, other state: %d, "
1413                        "our checksum: 0x%.8x, other checksum: 0x%.8x",
1414                        nodeId, generation, other_generation,
1415                        m_config_state, other_state,
1416                        checksum, other_checksum);
1417 
1418   switch (m_config_state)
1419   {
1420   default:
1421   case CS_UNINITIALIZED:
1422     g_eventLogger->error("execCONFIG_CHECK_REQ: unhandled state");
1423     abort();
1424     break;
1425 
1426   case CS_INITIAL:
1427     if (other_state != CS_INITIAL)
1428     {
1429       g_eventLogger->warning("Refusing CONGIG_CHECK_REQ from %u, "
1430                              "  it's not CS_INITIAL (I am). "
1431                              " Waiting for my check",
1432                              nodeId);
1433       sendConfigCheckRef(ss, from, ConfigCheckRef::WrongState,
1434                          generation, other_generation,
1435                          m_config_state, other_state);
1436       return;
1437     }
1438 
1439     require(generation == 0);
1440     if (other_generation != generation)
1441     {
1442       g_eventLogger->warning("Refusing other node, it has different "   \
1443                              "generation: %d, expected: %d",
1444                              other_generation, generation);
1445       sendConfigCheckRef(ss, from, ConfigCheckRef::WrongGeneration,
1446                          generation, other_generation,
1447                          m_config_state, other_state);
1448       return;
1449     }
1450 
1451     if (other_checksum != checksum)
1452     {
1453       g_eventLogger->warning("Refusing other node, it has different "
1454                              "checksum: 0x%.8x, expected: 0x%.8x",
1455                              other_checksum, checksum);
1456       sendConfigCheckRef(ss, from, ConfigCheckRef::WrongChecksum,
1457                          generation, other_generation,
1458                          m_config_state, other_state);
1459       return;
1460     }
1461     break;
1462 
1463   case CS_CONFIRMED:
1464 
1465     if (other_state != CS_CONFIRMED)
1466     {
1467       g_eventLogger->warning("Refusing other node, it's in different "  \
1468                              "state: %d, expected: %d",
1469                              other_state, m_config_state);
1470       sendConfigCheckRef(ss, from, ConfigCheckRef::WrongState,
1471                          generation, other_generation,
1472                          m_config_state, other_state);
1473       return;
1474     }
1475 
1476     if (other_generation == generation)
1477     {
1478       // Same generation, make sure it has same checksum
1479       if (other_checksum != checksum)
1480       {
1481         g_eventLogger->warning("Refusing other node, it has different "
1482                                "checksum: 0x%.8x, expected: 0x%.8x",
1483                                other_checksum, checksum);
1484         sendConfigCheckRef(ss, from, ConfigCheckRef::WrongChecksum,
1485                            generation, other_generation,
1486                            m_config_state, other_state);
1487         return;
1488       }
1489       // OK!
1490     }
1491     else if (other_generation < generation)
1492     {
1493       g_eventLogger->warning("Refusing other node, it has lower "       \
1494                              " generation: %d, expected: %d",
1495                              other_generation, generation);
1496       sendConfigCheckRef(ss, from, ConfigCheckRef::WrongGeneration,
1497                          generation, other_generation,
1498                          m_config_state, other_state);
1499       return;
1500     }
1501     else
1502     {
1503       g_eventLogger->error("Other node has higher generation: %d, this " \
1504                            "node is out of sync with generation: %d",
1505                            other_generation, generation);
1506       exit(1);
1507     }
1508 
1509     break;
1510   }
1511 
1512   sendConfigCheckConf(ss, from);
1513   return;
1514 }
1515 
1516 
1517 void
sendConfigCheckReq(SignalSender & ss,NodeBitmask to)1518 ConfigManager::sendConfigCheckReq(SignalSender& ss, NodeBitmask to)
1519 {
1520   SimpleSignal ssig;
1521   ConfigCheckReq* const req =
1522     CAST_PTR(ConfigCheckReq, ssig.getDataPtrSend());
1523   req->state =        m_config_state;
1524   req->generation =   m_config->getGeneration();
1525   req->checksum =     config_check_checksum(m_config);
1526 
1527   g_eventLogger->debug("Sending CONFIG_CHECK_REQ to %s",
1528                        BaseString::getPrettyText(to).c_str());
1529 
1530   require(m_waiting_for.isclear());
1531   m_waiting_for = ss.broadcastSignal(to, ssig, MGM_CONFIG_MAN,
1532                                      GSN_CONFIG_CHECK_REQ,
1533                                      ConfigCheckReq::SignalLength);
1534 }
1535 
1536 static bool
send_config_in_check_ref(Uint32 x)1537 send_config_in_check_ref(Uint32 x)
1538 {
1539   if (x >= NDB_MAKE_VERSION(7,0,8))
1540     return true;
1541   return false;
1542 }
1543 
1544 void
sendConfigCheckRef(SignalSender & ss,BlockReference to,ConfigCheckRef::ErrorCode error,Uint32 generation,Uint32 other_generation,ConfigState state,ConfigState other_state) const1545 ConfigManager::sendConfigCheckRef(SignalSender& ss, BlockReference to,
1546                                   ConfigCheckRef::ErrorCode error,
1547                                   Uint32 generation,
1548                                   Uint32 other_generation,
1549                                   ConfigState state,
1550                                   ConfigState other_state) const
1551 {
1552   int result;
1553   NodeId nodeId = refToNode(to);
1554   SimpleSignal ssig;
1555   ConfigCheckRef* const ref =
1556     CAST_PTR(ConfigCheckRef, ssig.getDataPtrSend());
1557   ref->error = error;
1558   ref->generation = other_generation;
1559   ref->expected_generation = generation;
1560   ref->state = other_state;
1561   ref->expected_state = state;
1562 
1563   g_eventLogger->debug("Send CONFIG_CHECK_REF with error: %d to node: %d",
1564                        error, nodeId);
1565 
1566   if (!send_config_in_check_ref(ss.getNodeInfo(nodeId).m_info.m_version))
1567   {
1568     result = ss.sendSignal(nodeId, ssig, MGM_CONFIG_MAN,
1569                            GSN_CONFIG_CHECK_REF, ConfigCheckRef::SignalLength);
1570   }
1571   else
1572   {
1573     UtilBuffer buf;
1574     m_config->pack(buf);
1575     ssig.ptr[0].p = (Uint32*)buf.get_data();
1576     ssig.ptr[0].sz = (buf.length() + 3) / 4;
1577     ssig.header.m_noOfSections = 1;
1578 
1579     ref->length = buf.length();
1580 
1581     g_eventLogger->debug("Sending CONFIG_CHECK_REF with config");
1582 
1583     result = ss.sendFragmentedSignal(nodeId, ssig, MGM_CONFIG_MAN,
1584                                     GSN_CONFIG_CHECK_REF,
1585                                     ConfigCheckRef::SignalLengthWithConfig);
1586   }
1587 
1588   if (result != 0)
1589   {
1590     g_eventLogger->warning("Failed to send CONFIG_CHECK_REF "
1591                            "to node: %d, result: %d",
1592                            nodeId, result);
1593   }
1594 }
1595 
1596 void
sendConfigCheckConf(SignalSender & ss,BlockReference to) const1597 ConfigManager::sendConfigCheckConf(SignalSender& ss, BlockReference to) const
1598 {
1599   NodeId nodeId = refToNode(to);
1600   SimpleSignal ssig;
1601   ConfigCheckConf* const conf =
1602     CAST_PTR(ConfigCheckConf, ssig.getDataPtrSend());
1603   conf->state = m_config_state;
1604   conf->generation = m_config->getGeneration();
1605 
1606   g_eventLogger->debug("Send CONFIG_CHECK_CONF to node: %d", nodeId);
1607 
1608   ss.sendSignal(nodeId, ssig, MGM_CONFIG_MAN,
1609                 GSN_CONFIG_CHECK_CONF, ConfigCheckConf::SignalLength);
1610 }
1611 
1612 
1613 void
execCONFIG_CHECK_CONF(SignalSender & ss,SimpleSignal * sig)1614 ConfigManager::execCONFIG_CHECK_CONF(SignalSender& ss, SimpleSignal* sig)
1615 {
1616   BlockReference from = sig->header.theSendersBlockRef;
1617   NodeId nodeId = refToNode(from);
1618   assert(m_waiting_for.get(nodeId));
1619   m_waiting_for.clear(nodeId);
1620   m_checked.set(nodeId);
1621 
1622   g_eventLogger->debug("Got CONFIG_CHECK_CONF from node: %d",
1623                        nodeId);
1624 
1625   return;
1626 }
1627 
1628 
1629 void
execCONFIG_CHECK_REF(SignalSender & ss,SimpleSignal * sig)1630 ConfigManager::execCONFIG_CHECK_REF(SignalSender& ss, SimpleSignal* sig)
1631 {
1632   BlockReference from = sig->header.theSendersBlockRef;
1633   NodeId nodeId = refToNode(from);
1634   assert(m_waiting_for.get(nodeId));
1635 
1636   const ConfigCheckRef* const ref =
1637     CAST_CONSTPTR(ConfigCheckRef, sig->getDataPtr());
1638 
1639   if (!m_defragger.defragment(sig))
1640     return; // More fragments to come
1641 
1642   g_eventLogger->debug("Got CONFIG_CHECK_REF from node %d, "
1643                       "error: %d, message: '%s', "
1644                       "generation: %d, expected generation: %d, "
1645                       "state: %d, expected state: %d own-state: %u",
1646                       nodeId, ref->error,
1647                       ConfigCheckRef::errorMessage(ref->error),
1648                       ref->generation, ref->expected_generation,
1649                       ref->state, ref->expected_state,
1650                       m_config_state);
1651 
1652   assert(ref->generation != ref->expected_generation ||
1653          ref->state != ref->expected_state ||
1654          ref->error == ConfigCheckRef::WrongChecksum);
1655   if((Uint32)m_config_state != ref->state)
1656   {
1657     // The config state changed while this check was in the air
1658     // drop the signal and thus cause it to run again later
1659     require(!m_checked.get(nodeId));
1660     m_waiting_for.clear(nodeId);
1661     return;
1662   }
1663 
1664   switch(m_config_state)
1665   {
1666   default:
1667   case CS_UNINITIALIZED:
1668     g_eventLogger->error("execCONFIG_CHECK_REF: unhandled state");
1669     abort();
1670     break;
1671 
1672   case CS_INITIAL:
1673     if (ref->expected_state == CS_CONFIRMED)
1674     {
1675       if (sig->header.theLength != ConfigCheckRef::SignalLengthWithConfig)
1676         break; // No config in the REF -> no action
1677 
1678       // The other node has sent it's config in the signal, use it if equal
1679       assert(sig->header.m_noOfSections == 1);
1680 
1681       ConfigValuesFactory cf;
1682       require(cf.unpack(sig->ptr[0].p, ref->length));
1683 
1684       Config other_config(cf.getConfigValues());
1685       assert(other_config.getGeneration() > 0);
1686 
1687       unsigned exclude[]= {CFG_SECTION_SYSTEM, 0};
1688       if (!other_config.equal(m_config, exclude))
1689       {
1690         BaseString buf;
1691         g_eventLogger->error("This node was started --initial with "
1692                              "a config which is _not_ equal to the one "
1693                              "node %d is using. Refusing to start with "
1694                              "different configurations, diff: \n%s",
1695                              nodeId,
1696                              other_config.diff2str(m_config, buf, exclude));
1697         exit(1);
1698       }
1699 
1700       g_eventLogger->info("This node was started --inital with "
1701                           "a config equal to the one node %d is using. "
1702                           "Will use the config with generation %d "
1703                           "from node %d!",
1704                           nodeId, other_config.getGeneration(), nodeId);
1705 
1706       if (! prepareConfigChange(&other_config))
1707       {
1708         abortConfigChange();
1709         g_eventLogger->error("Failed to write the fetched config to disk");
1710         exit(1);
1711       }
1712       commitConfigChange();
1713       m_config_state = CS_CONFIRMED;
1714       g_eventLogger->info("The fetched configuration has been saved!");
1715       m_waiting_for.clear(nodeId);
1716       m_checked.set(nodeId);
1717       delete m_config_change.m_initial_config;
1718       m_config_change.m_initial_config = NULL;
1719       return;
1720     }
1721     break;
1722 
1723   case CS_CONFIRMED:
1724     if (ref->expected_state == CS_INITIAL)
1725     {
1726       g_eventLogger->info("Waiting for peer");
1727       m_waiting_for.clear(nodeId);
1728       return;
1729     }
1730     break;
1731   }
1732 
1733   if (ref->error == ConfigCheckRef::WrongChecksum &&
1734       m_node_id < nodeId)
1735   {
1736     g_eventLogger->warning("Ignoring CONFIG_CHECK_REF for wrong checksum "
1737                            "other node has higher node id and should "
1738                            "shutdown");
1739     return;
1740   }
1741 
1742   g_eventLogger->error("Terminating");
1743   exit(1);
1744 }
1745 
1746 void
set_facade(TransporterFacade * f)1747 ConfigManager::set_facade(TransporterFacade * f)
1748 {
1749   m_facade = f;
1750   m_ss = new SignalSender(f, MGM_CONFIG_MAN);
1751   require(m_ss != 0);
1752 }
1753 
1754 bool
config_loaded(Config * config)1755 ConfigManager::ConfigChange::config_loaded(Config* config)
1756 {
1757   if (m_loaded_config != 0)
1758     return false;
1759   m_loaded_config = config;
1760   return true;
1761 }
1762 
1763 Config*
prepareLoadedConfig(Config * new_conf)1764 ConfigManager::prepareLoadedConfig(Config * new_conf)
1765 {
1766   /* Copy the necessary values from old to new config */
1767   if (!new_conf->setGeneration(m_config->getGeneration()))
1768   {
1769     g_eventLogger->error("Failed to copy generation from old config");
1770     delete new_conf;
1771     return 0;
1772   }
1773 
1774   if (!new_conf->setName(m_config->getName()))
1775   {
1776     g_eventLogger->error("Failed to copy name from old config");
1777     delete new_conf;
1778     return 0;
1779   }
1780 
1781   if (!new_conf->setPrimaryMgmNode(m_config->getPrimaryMgmNode()))
1782   {
1783     g_eventLogger->error("Failed to copy primary mgm node from old config");
1784     delete new_conf;
1785     return 0;
1786   }
1787 
1788   /* Check if config has changed */
1789   if (!m_config->equal(new_conf))
1790   {
1791     /* Loaded config is different */
1792     BaseString buf;
1793     g_eventLogger->info("Detected change of %s on disk, will try to "
1794                         "set it. "
1795                         "This is the actual diff:\n%s",
1796                         m_opts.mycnf ? "my.cnf" : m_opts.config_filename,
1797                         m_config->diff2str(new_conf, buf));
1798 
1799     return new_conf;
1800   }
1801   else
1802   {
1803     /* Loaded config was equal to current */
1804     g_eventLogger->info("Config equal!");
1805     delete new_conf;
1806   }
1807   return 0;
1808 }
1809 
1810 void
run()1811 ConfigManager::run()
1812 {
1813   assert(m_facade);
1814   SignalSender & ss = * m_ss;
1815 
1816   if (!m_opts.config_cache)
1817   {
1818     /* Stop receiving signals by closing ConfigManager's
1819        block in TransporterFacade */
1820     delete m_ss;
1821     m_ss = NULL;
1822 
1823     /* Confirm the present config, free the space that was allocated for a
1824        new one, and terminate the manager thread */
1825     m_config_change.release();
1826     m_config_state = CS_CONFIRMED;
1827     ndbout_c("== ConfigManager disabled -- manager thread will exit ==");
1828     return;
1829   }
1830 
1831   ss.lock();
1832 
1833   // Build bitmaks of all mgm nodes in config
1834   m_config->get_nodemask(m_all_mgm, NDB_MGM_NODE_TYPE_MGM);
1835 
1836   // exclude nowait-nodes from config change protcol
1837   m_all_mgm.bitANDC(m_opts.nowait_nodes);
1838   m_all_mgm.set(m_facade->ownId()); // Never exclude own node
1839 
1840   start_checkers();
1841 
1842   while (!is_stopped())
1843   {
1844 
1845     if (m_config_change.m_state == ConfigChangeState::IDLE)
1846     {
1847       bool print_state = false;
1848       if (m_previous_state != m_config_state)
1849       {
1850         print_state = true;
1851         m_previous_state = m_config_state;
1852       }
1853 
1854       /*
1855         Check if it's necessary to start something to get
1856         out of the current state
1857       */
1858       switch (m_config_state){
1859 
1860       case CS_UNINITIALIZED:
1861         abort();
1862         break;
1863 
1864       case CS_INITIAL:
1865         /*
1866           INITIAL => CONFIRMED
1867           When all mgm nodes has been started and checked that they
1868           are also in INITIAL, the node with the lowest node id
1869           will start an initial config change. When completed
1870           all nodes will be in CONFIRMED
1871         */
1872 
1873         if (print_state)
1874           ndbout_c("==INITIAL==");
1875 
1876         if (m_config_change.m_initial_config && // Updated config.ini was found
1877             m_started.equal(m_all_mgm) &&       // All mgmd started
1878             m_checked.equal(m_started) &&       // All nodes checked
1879             m_all_mgm.find(0) == m_facade->ownId()) // Lowest nodeid
1880         {
1881           Config* new_conf = m_config_change.m_initial_config;
1882           m_config_change.m_initial_config = 0;
1883           m_config_change.m_new_config = new_conf;
1884           startConfigChange(ss, ss.getOwnRef());
1885         }
1886         break;
1887 
1888       case CS_CONFIRMED:
1889         if (print_state)
1890           ndbout_c("==CONFIRMED==");
1891 
1892         if (m_config_change.m_loaded_config != 0 &&
1893             m_config_change.m_new_config == 0    &&
1894             m_started.equal(m_all_mgm)           &&
1895             m_checked.equal(m_started))
1896         {
1897           Config* new_conf = m_config_change.m_loaded_config;
1898           m_config_change.m_loaded_config = 0;
1899           m_config_change.m_new_config = prepareLoadedConfig(new_conf);
1900         }
1901 
1902         if (m_config_change.m_new_config && // Updated config.ini was found
1903             m_started.equal(m_all_mgm) &&   // All mgmd started
1904             m_checked.equal(m_started))     // All nodes checked
1905         {
1906           startConfigChange(ss, ss.getOwnRef());
1907         }
1908 
1909         break;
1910 
1911       default:
1912         break;
1913       }
1914 
1915       // Send CHECK_CONFIG to all nodes not yet checked
1916       if (m_waiting_for.isclear() &&   // Nothing outstanding
1917           m_prepared_config == 0 &&    //   and no config change ongoing
1918           !m_checked.equal(m_started)) // Some nodes have not been checked
1919       {
1920         NodeBitmask not_checked;
1921         not_checked.assign(m_started);
1922         not_checked.bitANDC(m_checked);
1923         sendConfigCheckReq(ss, not_checked);
1924       }
1925 
1926       handle_exclude_nodes();
1927     }
1928 
1929     SimpleSignal *sig = ss.waitFor((Uint32)1000);
1930     if (!sig)
1931       continue;
1932 
1933     switch (sig->readSignalNumber()) {
1934 
1935     case GSN_CONFIG_CHANGE_REQ:
1936       execCONFIG_CHANGE_REQ(ss, sig);
1937       break;
1938 
1939     case GSN_CONFIG_CHANGE_IMPL_REQ:
1940       execCONFIG_CHANGE_IMPL_REQ(ss, sig);
1941       break;
1942 
1943     case GSN_CONFIG_CHANGE_IMPL_REF:
1944       execCONFIG_CHANGE_IMPL_REF(ss, sig);
1945       break;
1946 
1947     case GSN_CONFIG_CHANGE_IMPL_CONF:
1948       execCONFIG_CHANGE_IMPL_CONF(ss, sig);
1949       break;
1950 
1951     case GSN_NF_COMPLETEREP:{
1952       const NFCompleteRep * const rep =
1953         CAST_CONSTPTR(NFCompleteRep, sig->getDataPtr());
1954       NodeId nodeId= rep->failedNodeId;
1955 
1956       if (!m_all_mgm.get(nodeId)) // Not mgm node
1957         break;
1958 
1959       ndbout_c("Node %d failed", nodeId);
1960       m_started.clear(nodeId);
1961       m_checked.clear(nodeId);
1962       m_defragger.node_failed(nodeId);
1963 
1964       if (m_config_change.m_state != ConfigChangeState::IDLE)
1965       {
1966         g_eventLogger->info("Node %d failed during config change!!",
1967                             nodeId);
1968         g_eventLogger->warning("Node failure handling of config "
1969                                "change protocol not yet implemented!! "
1970                                "No more configuration changes can occur, "
1971                                "but the node will continue to serve the "
1972                                "last good configuration");
1973         // TODO start take over of config change protocol
1974       }
1975       break;
1976     }
1977 
1978     case GSN_NODE_FAILREP:
1979       // ignore, NF_COMPLETEREP will come
1980       break;
1981 
1982     case GSN_API_REGCONF:{
1983       NodeId nodeId = refToNode(sig->header.theSendersBlockRef);
1984       if (m_all_mgm.get(nodeId) &&      // Is a mgm node
1985           !m_started.get(nodeId))       // Not already marked as started
1986       {
1987         g_eventLogger->info("Node %d connected", nodeId);
1988         m_started.set(nodeId);
1989       }
1990       break;
1991     }
1992 
1993     case GSN_CONFIG_CHECK_REQ:
1994       execCONFIG_CHECK_REQ(ss, sig);
1995       break;
1996 
1997     case GSN_CONFIG_CHECK_REF:
1998       execCONFIG_CHECK_REF(ss, sig);
1999       break;
2000 
2001     case GSN_CONFIG_CHECK_CONF:
2002       execCONFIG_CHECK_CONF(ss, sig);
2003       break;
2004 
2005     case GSN_TAKE_OVERTCCONF:
2006     case GSN_CONNECT_REP:
2007       break;
2008 
2009     default:
2010       sig->print();
2011       g_eventLogger->error("Unknown signal received. SignalNumber: "
2012                            "%i from (%d, 0x%x)",
2013                            sig->readSignalNumber(),
2014                            refToNode(sig->header.theSendersBlockRef),
2015                            refToBlock(sig->header.theSendersBlockRef));
2016       abort();
2017       break;
2018     }
2019   }
2020   stop_checkers();
2021   ss.unlock();
2022 }
2023 
2024 
2025 #include "InitConfigFileParser.hpp"
2026 
2027 Config*
load_init_config(const char * config_filename)2028 ConfigManager::load_init_config(const char* config_filename)
2029 {
2030    InitConfigFileParser parser;
2031   return parser.parseConfig(config_filename);
2032 }
2033 
2034 
2035 Config*
load_init_mycnf(void)2036 ConfigManager::load_init_mycnf(void)
2037 {
2038   InitConfigFileParser parser;
2039   return parser.parse_mycnf();
2040 }
2041 
2042 
2043 Config*
load_config(const char * config_filename,bool mycnf,BaseString & msg)2044 ConfigManager::load_config(const char* config_filename, bool mycnf,
2045                            BaseString& msg)
2046 {
2047   Config* new_conf = NULL;
2048   if (mycnf && (new_conf = load_init_mycnf()) == NULL)
2049   {
2050     msg.assign("Could not load configuration from 'my.cnf'");
2051     return NULL;
2052   }
2053   else if (config_filename &&
2054            (new_conf = load_init_config(config_filename)) == NULL)
2055   {
2056     msg.assfmt("Could not load configuration from '%s'",
2057                config_filename);
2058     return NULL;
2059   }
2060 
2061   return new_conf;
2062 }
2063 
2064 
2065 Config*
load_config(void) const2066 ConfigManager::load_config(void) const
2067 {
2068   BaseString msg;
2069   Config* new_conf = NULL;
2070   if ((new_conf = load_config(m_opts.config_filename,
2071                               m_opts.mycnf, msg)) == NULL)
2072   {
2073     g_eventLogger->error(msg);
2074     return NULL;
2075   }
2076   return new_conf;
2077 }
2078 
2079 
2080 Config*
fetch_config(void)2081 ConfigManager::fetch_config(void)
2082 {
2083   DBUG_ENTER("ConfigManager::fetch_config");
2084 
2085   while(true)
2086   {
2087     /* Loop until config loaded from other mgmd(s) */
2088     char buf[128];
2089     g_eventLogger->info("Trying to get configuration from other mgmd(s) "\
2090                         "using '%s'...",
2091                         m_config_retriever.get_connectstring(buf, sizeof(buf)));
2092 
2093     if (m_config_retriever.is_connected() ||
2094         m_config_retriever.do_connect(30 /* retry */,
2095                                       1 /* delay */,
2096                                       0 /* verbose */) == 0)
2097     {
2098       g_eventLogger->info("Connected to '%s:%d'...",
2099                           m_config_retriever.get_mgmd_host(),
2100                           m_config_retriever.get_mgmd_port());
2101       break;
2102     }
2103   }
2104   // read config from other management server
2105   ndb_mgm_configuration * tmp =
2106     m_config_retriever.getConfig(m_config_retriever.get_mgmHandle());
2107 
2108   // Disconnect from other mgmd
2109   m_config_retriever.disconnect();
2110 
2111   if (tmp == NULL) {
2112     g_eventLogger->error("%s", m_config_retriever.getErrorString());
2113     DBUG_RETURN(NULL);
2114   }
2115 
2116   DBUG_RETURN(new Config(tmp));
2117 }
2118 
2119 
2120 static bool
delete_file(const char * file_name)2121 delete_file(const char* file_name)
2122 {
2123 #ifdef _WIN32
2124   if (DeleteFile(file_name) == 0)
2125   {
2126     g_eventLogger->error("Failed to delete file '%s', error: %d",
2127                          file_name, GetLastError());
2128     return false;
2129   }
2130 #else
2131   if (unlink(file_name) == -1)
2132   {
2133     g_eventLogger->error("Failed to delete file '%s', error: %d",
2134                          file_name, errno);
2135     return false;
2136   }
2137 #endif
2138   return true;
2139 }
2140 
2141 
2142 bool
delete_saved_configs(void) const2143 ConfigManager::delete_saved_configs(void) const
2144 {
2145   NdbDir::Iterator iter;
2146 
2147   if (!m_configdir)
2148   {
2149     // No configdir -> no files to delete
2150     return true;
2151   }
2152 
2153   if (iter.open(m_configdir) != 0)
2154     return false;
2155 
2156   bool result = true;
2157   const char* name;
2158   unsigned nodeid;
2159   char extra; // Avoid matching ndb_2_config.bin.2.tmp
2160   BaseString full_name;
2161   unsigned version;
2162   while ((name= iter.next_file()) != NULL)
2163   {
2164     if (sscanf(name,
2165                "ndb_%u_config.bin.%u%c",
2166                &nodeid, &version, &extra) == 2)
2167     {
2168       // ndbout_c("match: %s", name);
2169 
2170       if (nodeid != m_node_id)
2171         continue;
2172 
2173       // Delete the file
2174       full_name.assfmt("%s%s%s", m_configdir, DIR_SEPARATOR, name);
2175       g_eventLogger->debug("Deleting binary config file '%s'",
2176                            full_name.c_str());
2177       if (!delete_file(full_name.c_str()))
2178       {
2179         // Make function return false, but continue and try
2180         // to delete other files
2181         result = false;
2182       }
2183     }
2184   }
2185 
2186   return result;
2187 }
2188 
2189 
2190 bool
saved_config_exists(BaseString & config_name) const2191 ConfigManager::saved_config_exists(BaseString& config_name) const
2192 {
2193   NdbDir::Iterator iter;
2194 
2195   if (!m_configdir ||
2196       iter.open(m_configdir) != 0)
2197     return 0;
2198 
2199   const char* name;
2200   unsigned nodeid;
2201   char extra; // Avoid matching ndb_2_config.bin.2.tmp
2202   unsigned version, max_version= 0;
2203   while ((name= iter.next_file()) != NULL)
2204   {
2205     if (sscanf(name,
2206                "ndb_%u_config.bin.%u%c",
2207                &nodeid, &version, &extra) == 2)
2208     {
2209       // ndbout_c("match: %s", name);
2210 
2211       if (nodeid != m_node_id)
2212         continue;
2213 
2214       if (version>max_version)
2215         max_version= version;
2216     }
2217   }
2218 
2219   if (max_version == 0)
2220     return false;
2221 
2222   config_name.assfmt("%s%sndb_%u_config.bin.%u",
2223                      m_configdir, DIR_SEPARATOR, m_node_id, max_version);
2224   return true;
2225 }
2226 
2227 
2228 
2229 bool
failed_config_change_exists() const2230 ConfigManager::failed_config_change_exists() const
2231 {
2232   NdbDir::Iterator iter;
2233 
2234   if (!m_configdir ||
2235       iter.open(m_configdir) != 0)
2236     return 0;
2237 
2238   const char* name;
2239   char tmp;
2240   unsigned nodeid;
2241   unsigned version;
2242   while ((name= iter.next_file()) != NULL)
2243   {
2244     // Check for a previously failed config
2245     // change, ie. ndb_<nodeid>_config.bin.X.tmp exist
2246     if (sscanf(name,
2247                "ndb_%u_config.bin.%u.tm%c",
2248                &nodeid, &version, &tmp) == 3 &&
2249         tmp == 'p')
2250     {
2251       if (nodeid != m_node_id)
2252         continue;
2253 
2254       g_eventLogger->error("Found binary configuration file '%s%s%s' from "
2255                            "previous failed attempt to change config. This "
2256                            "error must be manually resolved by removing the "
2257                            "file(ie. ROLLBACK) or renaming the file to it's "
2258                            "name without the .tmp extension(ie COMMIT). Make "
2259                            "sure to check the other nodes so that they all "
2260                            "have the same configuration generation.",
2261                            m_configdir, DIR_SEPARATOR, name);
2262       return true;
2263     }
2264   }
2265 
2266   return false;
2267 }
2268 
2269 
2270 Config*
load_saved_config(const BaseString & config_name)2271 ConfigManager::load_saved_config(const BaseString& config_name)
2272 {
2273   struct ndb_mgm_configuration * tmp =
2274     m_config_retriever.getConfig(config_name.c_str());
2275   if(tmp == NULL)
2276   {
2277     g_eventLogger->error("Failed to load config from '%s', error: '%s'",
2278                          config_name.c_str(),
2279                          m_config_retriever.getErrorString());
2280     return NULL;
2281   }
2282 
2283   Config* conf = new Config(tmp);
2284   if (conf == NULL)
2285     g_eventLogger->error("Failed to load config, out of memory");
2286   return conf;
2287 }
2288 
2289 bool
get_packed_config(ndb_mgm_node_type nodetype,BaseString * buf64,BaseString & error)2290 ConfigManager::get_packed_config(ndb_mgm_node_type nodetype,
2291                                  BaseString* buf64, BaseString& error)
2292 {
2293   Guard g(m_config_mutex);
2294 
2295   /*
2296     Only allow the config to be exported if it's been confirmed
2297     or if another mgmd is asking for it
2298   */
2299   switch(m_config_state)
2300   {
2301   case CS_INITIAL:
2302     if (nodetype == NDB_MGM_NODE_TYPE_MGM)
2303       ; // allow other mgmd to fetch initial configuration
2304     else
2305     {
2306       error.assign("The cluster configuration is not yet confirmed "
2307                    "by all defined management servers. ");
2308       if (m_config_change.m_state != ConfigChangeState::IDLE)
2309       {
2310         error.append("Initial configuration change is in progress.");
2311       }
2312       else
2313       {
2314         NodeBitmask not_started(m_all_mgm);
2315         not_started.bitANDC(m_checked);
2316         error.append("This management server is still waiting for node ");
2317         error.append(BaseString::getPrettyText(not_started));
2318         error.append(" to connect.");
2319       }
2320       return false;
2321     }
2322     break;
2323 
2324   case CS_CONFIRMED:
2325     // OK
2326     break;
2327 
2328   default:
2329     error.assign("get_packed_config, unknown config state: %d",
2330                  m_config_state);
2331      return false;
2332     break;
2333 
2334   }
2335 
2336   require(m_config != 0);
2337   if (buf64)
2338   {
2339     if (!m_packed_config.length())
2340     {
2341       // No packed config exist, generate a new one
2342       Config config_copy(m_config);
2343       if (!m_dynamic_ports.set_in_config(&config_copy))
2344       {
2345         error.assign("get_packed_config, failed to set dynamic ports in config");
2346         return false;
2347       }
2348 
2349       if (!config_copy.pack64(m_packed_config))
2350       {
2351         error.assign("get_packed_config, failed to pack config_copy");
2352         return false;
2353       }
2354     }
2355     buf64->assign(m_packed_config, m_packed_config.length());
2356   }
2357   return true;
2358 }
2359 
2360 
2361 bool
init_checkers(const Config * config)2362 ConfigManager::init_checkers(const Config* config)
2363 {
2364 
2365   // Init one thread for each other mgmd
2366   // in the config and check which version it has. If version
2367   // does not have config manager, set this node to ignore
2368   // that node in the config change protocol
2369 
2370   BaseString connect_string;
2371   ConfigIter iter(config, CFG_SECTION_NODE);
2372   for (iter.first(); iter.valid(); iter.next())
2373   {
2374 
2375     // Only MGM nodes
2376     Uint32 type;
2377     if (iter.get(CFG_TYPE_OF_SECTION, &type) ||
2378         type != NODE_TYPE_MGM)
2379       continue;
2380 
2381     // Not this node
2382     Uint32 nodeid;
2383     if(iter.get(CFG_NODE_ID, &nodeid) ||
2384        nodeid == m_node_id)
2385       continue;
2386 
2387     const char* hostname;
2388     Uint32 port;
2389     require(!iter.get(CFG_NODE_HOST, &hostname));
2390     require(!iter.get(CFG_MGM_PORT, &port));
2391     connect_string.assfmt("%s:%u",hostname,port);
2392 
2393     ConfigChecker* checker =
2394       new ConfigChecker(*this, connect_string.c_str(),
2395                         m_opts.bind_address, nodeid);
2396     if (!checker)
2397     {
2398       g_eventLogger->error("Failed to create ConfigChecker");
2399       return false;
2400     }
2401 
2402     if (!checker->init())
2403       return false;
2404 
2405     m_checkers.push_back(checker);
2406   }
2407   return true;
2408 }
2409 
2410 
2411 void
start_checkers(void)2412 ConfigManager::start_checkers(void)
2413 {
2414   for (unsigned i = 0; i < m_checkers.size(); i++)
2415     m_checkers[i]->start();
2416 }
2417 
2418 
2419 void
stop_checkers(void)2420 ConfigManager::stop_checkers(void)
2421 {
2422   for (unsigned i = 0; i < m_checkers.size(); i++)
2423   {
2424     ConfigChecker* checker = m_checkers[i];
2425     ndbout << "stop checker " << i << endl;
2426     checker->stop();
2427     delete checker;
2428   }
2429 }
2430 
2431 
ConfigChecker(ConfigManager & manager,const char * connect_string,const char * bindaddress,NodeId nodeid)2432 ConfigManager::ConfigChecker::ConfigChecker(ConfigManager& manager,
2433                                             const char* connect_string,
2434                                             const char * bindaddress,
2435                                             NodeId nodeid) :
2436   MgmtThread("ConfigChecker"),
2437   m_manager(manager),
2438   m_config_retriever(opt_ndb_connectstring, opt_ndb_nodeid, NDB_VERSION,
2439                      NDB_MGM_NODE_TYPE_MGM, bindaddress),
2440   m_connect_string(connect_string),
2441   m_nodeid(nodeid)
2442 {
2443 }
2444 
2445 
2446 bool
init()2447 ConfigManager::ConfigChecker::init()
2448 {
2449   if (m_config_retriever.hasError())
2450   {
2451     g_eventLogger->error("%s", m_config_retriever.getErrorString());
2452     return false;
2453   }
2454 
2455   return true;
2456 }
2457 
2458 
2459 void
run()2460 ConfigManager::ConfigChecker::run()
2461 {
2462   // Connect to other mgmd inifintely until thread is stopped
2463   // or connect suceeds
2464   g_eventLogger->debug("ConfigChecker, connecting to '%s'",
2465                        m_connect_string.c_str());
2466   while(m_config_retriever.do_connect(0 /* retry */,
2467                                       1 /* delay */,
2468                                       0 /* verbose */) != 0)
2469   {
2470     if (is_stopped())
2471     {
2472       g_eventLogger->debug("ConfigChecker, thread is stopped");
2473       return; // Thread is stopped
2474     }
2475 
2476     NdbSleep_SecSleep(1);
2477   }
2478 
2479   // Connected
2480   g_eventLogger->debug("ConfigChecker, connected to '%s'",
2481                        m_connect_string.c_str());
2482 
2483   // Check version
2484   int major, minor, build;
2485   char ver_str[50];
2486   if (!ndb_mgm_get_version(m_config_retriever.get_mgmHandle(),
2487                            &major, &minor, &build,
2488                            sizeof(ver_str), ver_str))
2489   {
2490     g_eventLogger->error("Could not get version from mgmd on '%s'",
2491                          m_connect_string.c_str());
2492     return;
2493   }
2494   g_eventLogger->debug("mgmd on '%s' has version %d.%d.%d",
2495                        m_connect_string.c_str(), major, minor, build);
2496 
2497   // Versions prior to 7 don't have ConfigManager
2498   // exclude it from config change protocol
2499   if (major < 7)
2500   {
2501     g_eventLogger->info("Excluding node %d with version %d.%d.%d from "
2502                         "config change protocol",
2503                         m_nodeid, major, minor, build);
2504     m_manager.m_exclude_nodes.push_back(m_nodeid);
2505   }
2506 
2507   return;
2508 }
2509 
2510 
2511 void
handle_exclude_nodes(void)2512 ConfigManager::handle_exclude_nodes(void)
2513 {
2514 
2515   if (!m_waiting_for.isclear())
2516     return; // Other things going on
2517 
2518   switch (m_config_state)
2519   {
2520   case CS_INITIAL:
2521     m_exclude_nodes.lock();
2522     for (unsigned i = 0; i < m_exclude_nodes.size(); i++)
2523     {
2524       NodeId nodeid = m_exclude_nodes[i];
2525       g_eventLogger->debug("Handle exclusion of node %d", nodeid);
2526       m_all_mgm.clear(nodeid);
2527     }
2528     m_exclude_nodes.unlock();
2529     break;
2530 
2531   default:
2532     break;
2533   }
2534   m_exclude_nodes.clear();
2535 
2536 }
2537 
2538 
2539 static bool
check_dynamic_port_configured(const Config * config,int node1,int node2,BaseString & msg)2540 check_dynamic_port_configured(const Config* config,
2541                               int node1, int node2,
2542                               BaseString& msg)
2543 {
2544   ConfigIter iter(config, CFG_SECTION_CONNECTION);
2545 
2546   for(;iter.valid();iter.next()) {
2547     Uint32 n1, n2;
2548     if (iter.get(CFG_CONNECTION_NODE_1, &n1) != 0 ||
2549         iter.get(CFG_CONNECTION_NODE_2, &n2) != 0)
2550     {
2551       msg.assign("Could not get node1 or node2 from connection section");
2552       return false;
2553     }
2554 
2555     if((n1 == (Uint32)node1 && n2 == (Uint32)node2) ||
2556        (n1 == (Uint32)node2 && n2 == (Uint32)node1))
2557       break;
2558   }
2559   if(!iter.valid()) {
2560     msg.assfmt("Unable to find connection between nodes %d -> %d",
2561                node1, node2);
2562     return false;
2563   }
2564 
2565   Uint32 port;
2566   if(iter.get(CFG_CONNECTION_SERVER_PORT, &port) != 0) {
2567     msg.assign("Unable to get current value of CFG_CONNECTION_SERVER_PORT");
2568     return false;
2569   }
2570 
2571   if (port != 0)
2572   {
2573     // Dynamic ports is zero in configuration
2574     msg.assfmt("Server port for %d -> %d is not marked as dynamic, value: %u",
2575                node1, node2, port);
2576     return false;
2577   }
2578   return true;
2579 }
2580 
2581 
2582 bool
set_dynamic_port(int node1,int node2,int value,BaseString & msg)2583 ConfigManager::set_dynamic_port(int node1, int node2, int value,
2584                                 BaseString& msg)
2585 {
2586   MgmtSrvr::DynPortSpec port = { node2, value };
2587 
2588   return set_dynamic_ports(node1, &port, 1, msg);
2589 }
2590 
2591 
2592 bool
set_dynamic_ports(int node,MgmtSrvr::DynPortSpec ports[],unsigned num_ports,BaseString & msg)2593 ConfigManager::set_dynamic_ports(int node, MgmtSrvr::DynPortSpec ports[],
2594                                  unsigned num_ports, BaseString &msg)
2595 {
2596   Guard g(m_config_mutex);
2597 
2598   // Check that all ports to set are configured as dynamic
2599   for(unsigned i = 0; i < num_ports; i++)
2600   {
2601     const int node2 = ports[i].node;
2602     if (!check_dynamic_port_configured(m_config,
2603                                        node, node2, msg))
2604     {
2605       return false;
2606     }
2607   }
2608 
2609   // Set the dynamic ports
2610   bool result = true;
2611   for(unsigned i = 0; i < num_ports; i++)
2612   {
2613     const int node2 = ports[i].node;
2614     const int value = ports[i].port;
2615     if (!m_dynamic_ports.set(node, node2, value))
2616     {
2617       // Failed to set one port, report problem but since it's very unlikley
2618       // that this step fails, continue and attempt to set remaining ports.
2619       msg.assfmt("Failed to set dynamic port(s)");
2620       result =  false;
2621     }
2622   }
2623 
2624   // Removed cache of packed config, need to be recreated
2625   // to include the new dynamic port
2626   m_packed_config.clear();
2627 
2628   return result;
2629 }
2630 
2631 
2632 bool
get_dynamic_port(int node1,int node2,int * value,BaseString & msg) const2633 ConfigManager::get_dynamic_port(int node1, int node2, int *value,
2634                                 BaseString& msg) const {
2635 
2636   Guard g(m_config_mutex);
2637   if (!check_dynamic_port_configured(m_config,
2638                                      node1, node2, msg))
2639     return false;
2640 
2641   if (!m_dynamic_ports.get(node1, node2, value))
2642   {
2643     msg.assfmt("Could not get dynamic port for %d -> %d", node1, node2);
2644     return false;
2645   }
2646   return true;
2647 }
2648 
2649 
check(int & node1,int & node2) const2650 bool ConfigManager::DynamicPorts::check(int& node1, int& node2) const
2651 {
2652   // Always use smaller node first
2653   if (node1 > node2)
2654   {
2655     int tmp = node1;
2656     node1 = node2;
2657     node2 = tmp;
2658   }
2659 
2660   // Only NDB nodes can be dynamic port server
2661   if (node1 <= 0 || node1 >= MAX_NDB_NODES)
2662     return false;
2663   if (node2 <= 0 || node2 >= MAX_NODES)
2664     return false;
2665   if (node1 == node2)
2666     return false;
2667 
2668   return true;
2669 }
2670 
2671 
set(int node1,int node2,int port)2672 bool ConfigManager::DynamicPorts::set(int node1, int node2, int port)
2673 {
2674   if (!check(node1, node2))
2675     return false;
2676 
2677   if (!m_ports.insert(NodePair(node1, node2), port, true))
2678     return false;
2679 
2680   return true;
2681 }
2682 
2683 
get(int node1,int node2,int * port) const2684 bool ConfigManager::DynamicPorts::get(int node1, int node2, int* port) const
2685 {
2686   if (!check(node1, node2))
2687     return false;
2688 
2689   int value = 0; // Return 0 if not found
2690   (void)m_ports.search(NodePair(node1, node2), value);
2691 
2692   *port = (int)value;
2693   return true;
2694 }
2695 
2696 
2697 bool
set_in_config(Config * config)2698 ConfigManager::DynamicPorts::set_in_config(Config* config)
2699 {
2700   bool result = true;
2701   ConfigIter iter(config, CFG_SECTION_CONNECTION);
2702 
2703   for(;iter.valid();iter.next()) {
2704     Uint32 port = 0;
2705     if (iter.get(CFG_CONNECTION_SERVER_PORT, &port) != 0 ||
2706         port != 0)
2707       continue; // Not configured as dynamic port
2708 
2709     Uint32 n1, n2;
2710     require(iter.get(CFG_CONNECTION_NODE_1, &n1) == 0);
2711     require(iter.get(CFG_CONNECTION_NODE_2, &n2) == 0);
2712 
2713     int dyn_port;
2714     if (!get(n1, n2, &dyn_port) || dyn_port == 0)
2715       continue; // No dynamic port registered
2716 
2717     // Write the dynamic port to config
2718     port = (Uint32)dyn_port;
2719     ConfigValues::Iterator i2(config->m_configValues->m_config,
2720                               iter.m_config);
2721     if(i2.set(CFG_CONNECTION_SERVER_PORT, port) == false)
2722       result = false;
2723   }
2724   return result;
2725 }
2726 
2727 
2728 template class Vector<ConfigSubscriber*>;
2729 template class Vector<ConfigManager::ConfigChecker*>;
2730 
2731