1 /*
2    Copyright (c) 2003, 2021, Oracle and/or its affiliates.
3 
4    This program is free software; you can redistribute it and/or modify
5    it under the terms of the GNU General Public License, version 2.0,
6    as published by the Free Software Foundation.
7 
8    This program is also distributed with certain software (including
9    but not limited to OpenSSL) that is licensed under separate terms,
10    as designated in a particular file or component or in included license
11    documentation.  The authors of MySQL hereby grant you an additional
12    permission to link the program and your derivative works with the
13    separately licensed software that they have included with MySQL.
14 
15    This program is distributed in the hope that it will be useful,
16    but WITHOUT ANY WARRANTY; without even the implied warranty of
17    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18    GNU General Public License, version 2.0, for more details.
19 
20    You should have received a copy of the GNU General Public License
21    along with this program; if not, write to the Free Software
22    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA
23 */
24 
25 #include <ndb_global.h>
26 #include <NdbRestarter.hpp>
27 #include <NdbOut.hpp>
28 #include <NdbSleep.h>
29 #include <NdbTick.h>
30 #include <mgmapi_debug.h>
31 #include <NDBT_Output.hpp>
32 #include <random.h>
33 #include <kernel/ndb_limits.h>
34 #include <ndb_version.h>
35 #include <NodeBitmask.hpp>
36 #include <ndb_cluster_connection.hpp>
37 
38 #define MGMERR(h) \
39   ndbout << "latest_error="<<ndb_mgm_get_latest_error(h) \
40 	 << ", line="<<ndb_mgm_get_latest_error_line(h) \
41          << ", mesg="<<ndb_mgm_get_latest_error_msg(h) \
42          << ", desc="<<ndb_mgm_get_latest_error_desc(h) \
43 	 << endl;
44 
45 
NdbRestarter(const char * _addr,Ndb_cluster_connection * con)46 NdbRestarter::NdbRestarter(const char* _addr, Ndb_cluster_connection * con):
47   handle(NULL),
48   connected(false),
49   m_config(0),
50   m_reconnect(false),
51   m_cluster_connection(con)
52 {
53   if (_addr == NULL){
54     addr.assign("");
55   } else {
56     addr.assign(_addr);
57   }
58 }
59 
~NdbRestarter()60 NdbRestarter::~NdbRestarter(){
61   disconnect();
62 }
63 
64 
getDbNodeId(int _i)65 int NdbRestarter::getDbNodeId(int _i){
66   if (!isConnected())
67     return -1;
68 
69   if (getStatus() != 0)
70     return -1;
71 
72   for(unsigned i = 0; i < ndbNodes.size(); i++){
73     if (i == (unsigned)_i){
74       return ndbNodes[i].node_id;
75     }
76   }
77   return -1;
78 }
79 
80 
81 int
restartOneDbNode(int _nodeId,bool inital,bool nostart,bool abort,bool force)82 NdbRestarter::restartOneDbNode(int _nodeId,
83 			       bool inital,
84 			       bool nostart,
85 			       bool abort,
86                                bool force)
87 {
88   return restartNodes(&_nodeId, 1,
89                       (inital ? NRRF_INITIAL : 0) |
90                       (nostart ? NRRF_NOSTART : 0) |
91                       (abort ? NRRF_ABORT : 0) |
92                       (force ? NRRF_FORCE : 0));
93 }
94 
95 int
restartNodes(int * nodes,int cnt,Uint32 flags)96 NdbRestarter::restartNodes(int * nodes, int cnt,
97                            Uint32 flags)
98 {
99   if (!isConnected())
100     return -1;
101 
102   int ret = 0;
103   int unused;
104   if ((ret = ndb_mgm_restart4(handle, cnt, nodes,
105                               (flags & NRRF_INITIAL),
106                               (flags & NRRF_NOSTART),
107                               (flags & NRRF_ABORT),
108                               (flags & NRRF_FORCE),
109                               &unused)) <= 0)
110   {
111     /**
112      * ndb_mgm_restart4 returned error, one reason could
113      * be that the node have not stopped fast enough!
114      * Check status of the node to see if it's on the
115      * way down. If that's the case ignore the error
116      */
117 
118     if (getStatus() != 0)
119       return -1;
120 
121     g_info << "ndb_mgm_restart4 returned with error, checking node state"
122            << endl;
123 
124     for (int j = 0; j<cnt; j++)
125     {
126       int _nodeId = nodes[j];
127       for(unsigned i = 0; i < ndbNodes.size(); i++)
128       {
129         if(ndbNodes[i].node_id == _nodeId)
130         {
131           g_info <<_nodeId<<": status="<<ndbNodes[i].node_status<<endl;
132           /* Node found check state */
133           switch(ndbNodes[i].node_status){
134           case NDB_MGM_NODE_STATUS_RESTARTING:
135           case NDB_MGM_NODE_STATUS_SHUTTING_DOWN:
136             break;
137           default:
138             MGMERR(handle);
139             g_err  << "Could not stop node with id = "<< _nodeId << endl;
140             return -1;
141           }
142         }
143       }
144     }
145   }
146 
147   if ((flags & NRRF_NOSTART) == 0)
148   {
149     wait_until_ready(nodes, cnt);
150   }
151 
152   return 0;
153 }
154 
155 int
getMasterNodeId()156 NdbRestarter::getMasterNodeId(){
157   if (!isConnected())
158     return -1;
159 
160   if (getStatus() != 0)
161     return -1;
162 
163   int min = 0;
164   int node = -1;
165   for(unsigned i = 0; i < ndbNodes.size(); i++){
166     if(min == 0 || ndbNodes[i].dynamic_id < min){
167       min = ndbNodes[i].dynamic_id;
168       node = ndbNodes[i].node_id;
169     }
170   }
171 
172   return node;
173 }
174 
175 int
getNodeGroup(int nodeId)176 NdbRestarter::getNodeGroup(int nodeId){
177   if (!isConnected())
178     return -1;
179 
180   if (getStatus() != 0)
181     return -1;
182 
183   for(unsigned i = 0; i < ndbNodes.size(); i++)
184   {
185     if(ndbNodes[i].node_id == nodeId)
186     {
187       return ndbNodes[i].node_group;
188     }
189   }
190 
191   return -1;
192 }
193 
194 int
getNextMasterNodeId(int nodeId)195 NdbRestarter::getNextMasterNodeId(int nodeId){
196   if (!isConnected())
197     return -1;
198 
199   if (getStatus() != 0)
200     return -1;
201 
202   unsigned i;
203   for(i = 0; i < ndbNodes.size(); i++)
204   {
205     if(ndbNodes[i].node_id == nodeId)
206     {
207       break;
208     }
209   }
210   require(i < ndbNodes.size());
211   if (i == ndbNodes.size())
212     return -1;
213 
214   int dynid = ndbNodes[i].dynamic_id;
215   int minid = dynid;
216   for (i = 0; i<ndbNodes.size(); i++)
217     if (ndbNodes[i].dynamic_id > minid)
218       minid = ndbNodes[i].dynamic_id;
219 
220   for (i = 0; i<ndbNodes.size(); i++)
221     if (ndbNodes[i].dynamic_id > dynid &&
222 	ndbNodes[i].dynamic_id < minid)
223     {
224       minid = ndbNodes[i].dynamic_id;
225     }
226 
227   if (minid != ~0)
228   {
229     for (i = 0; i<ndbNodes.size(); i++)
230       if (ndbNodes[i].dynamic_id == minid)
231 	return ndbNodes[i].node_id;
232   }
233 
234   return getMasterNodeId();
235 }
236 
237 int
getRandomNotMasterNodeId(int rand)238 NdbRestarter::getRandomNotMasterNodeId(int rand){
239   int master = getMasterNodeId();
240   if(master == -1)
241     return -1;
242 
243   Uint32 counter = 0;
244   rand = rand % ndbNodes.size();
245   while(counter++ < ndbNodes.size() && ndbNodes[rand].node_id == master)
246     rand = (rand + 1) % ndbNodes.size();
247 
248   if(ndbNodes[rand].node_id != master)
249     return ndbNodes[rand].node_id;
250   return -1;
251 }
252 
253 int
getRandomNodeOtherNodeGroup(int nodeId,int rand)254 NdbRestarter::getRandomNodeOtherNodeGroup(int nodeId, int rand){
255   if (!isConnected())
256     return -1;
257 
258   if (getStatus() != 0)
259     return -1;
260 
261   int node_group = -1;
262   for(unsigned i = 0; i < ndbNodes.size(); i++){
263     if(ndbNodes[i].node_id == nodeId){
264       node_group = ndbNodes[i].node_group;
265       break;
266     }
267   }
268   if(node_group == -1){
269     return -1;
270   }
271 
272   Uint32 counter = 0;
273   rand = rand % ndbNodes.size();
274   while(counter++ < ndbNodes.size() && ndbNodes[rand].node_group == node_group)
275     rand = (rand + 1) % ndbNodes.size();
276 
277   if(ndbNodes[rand].node_group != node_group)
278     return ndbNodes[rand].node_id;
279 
280   return -1;
281 }
282 
283 int
getRandomNodeSameNodeGroup(int nodeId,int rand)284 NdbRestarter::getRandomNodeSameNodeGroup(int nodeId, int rand){
285   if (!isConnected())
286     return -1;
287 
288   if (getStatus() != 0)
289     return -1;
290 
291   int node_group = -1;
292   for(unsigned i = 0; i < ndbNodes.size(); i++){
293     if(ndbNodes[i].node_id == nodeId){
294       node_group = ndbNodes[i].node_group;
295       break;
296     }
297   }
298   if(node_group == -1){
299     return -1;
300   }
301 
302   Uint32 counter = 0;
303   rand = rand % ndbNodes.size();
304   while(counter++ < ndbNodes.size() &&
305 	(ndbNodes[rand].node_id == nodeId ||
306 	 ndbNodes[rand].node_group != node_group))
307     rand = (rand + 1) % ndbNodes.size();
308 
309   if(ndbNodes[rand].node_group == node_group &&
310      ndbNodes[rand].node_id != nodeId)
311     return ndbNodes[rand].node_id;
312 
313   return -1;
314 }
315 
316 
317 // Wait until connected to ndb_mgmd
318 int
waitConnected(unsigned int _timeout)319 NdbRestarter::waitConnected(unsigned int _timeout){
320   _timeout*= 10;
321   while (isConnected() && getStatus() != 0){
322     if (_timeout-- == 0){
323       ndbout << "NdbRestarter::waitConnected failed" << endl;
324       return -1;
325     }
326     NdbSleep_MilliSleep(100);
327   }
328   return 0;
329 }
330 
331 int
waitClusterStarted(unsigned int _timeout)332 NdbRestarter::waitClusterStarted(unsigned int _timeout){
333   int res = waitClusterState(NDB_MGM_NODE_STATUS_STARTED, _timeout);
334   if (res == 0)
335   {
336     wait_until_ready();
337   }
338   return res;
339 }
340 
341 int
waitClusterStartPhase(int _startphase,unsigned int _timeout)342 NdbRestarter::waitClusterStartPhase(int _startphase, unsigned int _timeout){
343   return waitClusterState(NDB_MGM_NODE_STATUS_STARTING, _timeout, _startphase);
344 }
345 
346 int
waitClusterSingleUser(unsigned int _timeout)347 NdbRestarter::waitClusterSingleUser(unsigned int _timeout){
348   return waitClusterState(NDB_MGM_NODE_STATUS_SINGLEUSER, _timeout);
349 }
350 
351 int
waitClusterNoStart(unsigned int _timeout)352 NdbRestarter::waitClusterNoStart(unsigned int _timeout){
353   return waitClusterState(NDB_MGM_NODE_STATUS_NOT_STARTED, _timeout);
354 }
355 
356 int
waitClusterState(ndb_mgm_node_status _status,unsigned int _timeout,int _startphase)357 NdbRestarter::waitClusterState(ndb_mgm_node_status _status,
358 			       unsigned int _timeout,
359 			       int _startphase){
360 
361   int nodes[MAX_NDB_NODES];
362   int numNodes = 0;
363 
364   if (getStatus() != 0){
365     g_err << "waitClusterStat: getStatus != 0" << endl;
366     return -1;
367   }
368 
369   // Collect all nodes into nodes
370   for (unsigned i = 0; i < ndbNodes.size(); i++){
371     nodes[i] = ndbNodes[i].node_id;
372     numNodes++;
373   }
374 
375   return waitNodesState(nodes, numNodes, _status, _timeout, _startphase);
376 }
377 
378 
379 int
waitNodesState(const int * _nodes,int _num_nodes,ndb_mgm_node_status _status,unsigned int _timeout,int _startphase)380 NdbRestarter::waitNodesState(const int * _nodes, int _num_nodes,
381 			     ndb_mgm_node_status _status,
382 			     unsigned int _timeout,
383 			     int _startphase){
384 
385   if (!isConnected()){
386     g_err << "!isConnected"<<endl;
387     return -1;
388   }
389 
390   unsigned int attempts = 0;
391   unsigned int resetAttempts = 0;
392   const unsigned int MAX_RESET_ATTEMPTS = 10;
393   bool allInState = false;
394   while (allInState == false){
395     if (_timeout > 0 && attempts > _timeout){
396       /**
397        * Timeout has expired waiting for the nodes to enter
398        * the state we want
399        */
400       bool waitMore = false;
401       /**
402        * Make special check if we are waiting for
403        * cluster to become started
404        */
405       if(_status == NDB_MGM_NODE_STATUS_STARTED){
406 	waitMore = true;
407 	/**
408 	 * First check if any node is not starting
409 	 * then it's no idea to wait anymore
410 	 */
411 	for (unsigned n = 0; n < ndbNodes.size(); n++){
412 	  if (ndbNodes[n].node_status != NDB_MGM_NODE_STATUS_STARTED &&
413 	      ndbNodes[n].node_status != NDB_MGM_NODE_STATUS_STARTING)
414 	  {
415             // Found one not starting node, don't wait anymore
416 	    waitMore = false;
417             break;
418           }
419 
420 	}
421       }
422 
423       if (!waitMore || resetAttempts > MAX_RESET_ATTEMPTS){
424 	g_err << "waitNodesState("
425 	      << ndb_mgm_get_node_status_string(_status)
426 	      <<", "<<_startphase<<")"
427 	      << " timeout after " << attempts <<" attemps" << endl;
428 	return -1;
429       }
430 
431       g_err << "waitNodesState("
432 	    << ndb_mgm_get_node_status_string(_status)
433 	    <<", "<<_startphase<<")"
434 	    << " resetting number of attempts "
435 	    << resetAttempts << endl;
436       attempts = 0;
437       resetAttempts++;
438 
439     }
440 
441     allInState = true;
442     if (getStatus() != 0){
443       g_err << "waitNodesState: getStatus != 0" << endl;
444       return -1;
445     }
446 
447     for (int i = 0; i < _num_nodes; i++)
448     {
449       // Find node with given node id
450       ndb_mgm_node_state* ndbNode = NULL;
451       for (unsigned n = 0; n < ndbNodes.size(); n++)
452       {
453         if (ndbNodes[n].node_id == _nodes[i])
454         {
455           ndbNode = &ndbNodes[n];
456           break;
457         }
458       }
459 
460       if(ndbNode == NULL){
461 	allInState = false;
462 	continue;
463       }
464 
465       g_info << "State node " << ndbNode->node_id << " "
466 	     << ndb_mgm_get_node_status_string(ndbNode->node_status);
467       if (ndbNode->node_status == NDB_MGM_NODE_STATUS_STARTING)
468         g_info<< ", start_phase=" << ndbNode->start_phase;
469       g_info << endl;
470 
471       require(ndbNode != NULL);
472 
473       if(_status == NDB_MGM_NODE_STATUS_STARTING &&
474 	 ((ndbNode->node_status == NDB_MGM_NODE_STATUS_STARTING &&
475 	   ndbNode->start_phase >= _startphase) ||
476 	  (ndbNode->node_status == NDB_MGM_NODE_STATUS_STARTED)))
477 	continue;
478 
479       if (_status == NDB_MGM_NODE_STATUS_STARTING){
480 	g_info << "status = "
481 	       << ndb_mgm_get_node_status_string(ndbNode->node_status)
482 	       <<", start_phase="<<ndbNode->start_phase<<endl;
483 	if (ndbNode->node_status !=  _status) {
484 	  if (ndbNode->node_status < _status)
485 	    allInState = false;
486 	  else
487 	    g_info << "node_status(" << ndbNode->node_status
488 		   <<") != _status("<<_status<<")"<<endl;
489 	} else if (ndbNode->start_phase < _startphase)
490 	  allInState = false;
491       } else {
492 	if (ndbNode->node_status !=  _status)
493 	  allInState = false;
494       }
495     }
496     g_info << "Waiting for cluster enter state"
497 	    << ndb_mgm_get_node_status_string(_status)<< endl;
498     NdbSleep_SecSleep(1);
499     attempts++;
500   }
501   return 0;
502 }
503 
waitNodesStarted(const int * _nodes,int _num_nodes,unsigned int _timeout)504 int NdbRestarter::waitNodesStarted(const int * _nodes, int _num_nodes,
505 		     unsigned int _timeout){
506   int res = waitNodesState(_nodes, _num_nodes,
507                            NDB_MGM_NODE_STATUS_STARTED, _timeout);
508   if (res == 0)
509   {
510     wait_until_ready(_nodes, _num_nodes);
511   }
512 
513   return res;
514 }
515 
waitNodesStartPhase(const int * _nodes,int _num_nodes,int _startphase,unsigned int _timeout)516 int NdbRestarter::waitNodesStartPhase(const int * _nodes, int _num_nodes,
517 			int _startphase, unsigned int _timeout){
518   return waitNodesState(_nodes, _num_nodes,
519 			  NDB_MGM_NODE_STATUS_STARTING, _timeout,
520 			  _startphase);
521 }
522 
waitNodesNoStart(const int * _nodes,int _num_nodes,unsigned int _timeout)523 int NdbRestarter::waitNodesNoStart(const int * _nodes, int _num_nodes,
524 		     unsigned int _timeout){
525   return waitNodesState(_nodes, _num_nodes,
526 			  NDB_MGM_NODE_STATUS_NOT_STARTED, _timeout);
527 }
528 
529 bool
isConnected()530 NdbRestarter::isConnected(){
531   if (connected == true)
532     return true;
533   return connect() == 0;
534 }
535 
536 int
connect()537 NdbRestarter::connect(){
538   disconnect();
539   handle = ndb_mgm_create_handle();
540   if (handle == NULL){
541     g_err << "handle == NULL" << endl;
542     return -1;
543   }
544   g_info << "Connecting to mgmsrv at " << addr.c_str() << endl;
545   if (ndb_mgm_set_connectstring(handle,addr.c_str()))
546   {
547     MGMERR(handle);
548     g_err  << "Connection to " << addr.c_str() << " failed" << endl;
549     return -1;
550   }
551 
552   if (ndb_mgm_connect(handle, 0, 0, 0) == -1)
553   {
554     MGMERR(handle);
555     g_err  << "Connection to " << addr.c_str() << " failed" << endl;
556     return -1;
557   }
558 
559   connected = true;
560   return 0;
561 }
562 
563 void
disconnect()564 NdbRestarter::disconnect(){
565   if (handle != NULL){
566     ndb_mgm_disconnect(handle);
567     ndb_mgm_destroy_handle(&handle);
568   }
569   connected = false;
570 }
571 
572 int
getStatus()573 NdbRestarter::getStatus(){
574   int retries = 0;
575   struct ndb_mgm_cluster_state * status;
576   struct ndb_mgm_node_state * node;
577 
578   ndbNodes.clear();
579   mgmNodes.clear();
580   apiNodes.clear();
581 
582   if (!isConnected())
583     return -1;
584 
585   while(retries < 10){
586     status = ndb_mgm_get_status(handle);
587     if (status == NULL){
588       if (m_reconnect){
589         if (connect() == 0){
590           g_err << "Reconnected..." << endl;
591           continue;
592         }
593         const int err = ndb_mgm_get_latest_error(handle);
594         if (err == NDB_MGM_COULD_NOT_CONNECT_TO_SOCKET){
595           g_err << "Could not connect to socket, sleep and retry" << endl;
596           retries= 0;
597           NdbSleep_SecSleep(1);
598           continue;
599         }
600       }
601       const int err = ndb_mgm_get_latest_error(handle);
602       ndbout << "status==NULL, retries="<<retries<< " err=" << err << endl;
603       MGMERR(handle);
604       retries++;
605       continue;
606     }
607     for (int i = 0; i < status->no_of_nodes; i++){
608       node = &status->node_states[i];
609       switch(node->node_type){
610       case NDB_MGM_NODE_TYPE_NDB:
611 	ndbNodes.push_back(*node);
612 	break;
613       case NDB_MGM_NODE_TYPE_MGM:
614 	mgmNodes.push_back(*node);
615 	break;
616       case NDB_MGM_NODE_TYPE_API:
617 	apiNodes.push_back(*node);
618 	break;
619       default:
620 	if(node->node_status == NDB_MGM_NODE_STATUS_UNKNOWN ||
621 	   node->node_status == NDB_MGM_NODE_STATUS_NO_CONTACT){
622 	  retries++;
623 	  ndbNodes.clear();
624 	  mgmNodes.clear();
625 	  apiNodes.clear();
626 	  free(status);
627 	  status = NULL;
628 	  i = status->no_of_nodes;
629 
630 	  ndbout << "kalle"<< endl;
631 	  break;
632 	}
633 	abort();
634 	break;
635       }
636     }
637     if(status == 0){
638       ndbout << "status == 0" << endl;
639       continue;
640     }
641     free(status);
642     return 0;
643   }
644 
645   g_err  << "getStatus failed" << endl;
646   return -1;
647 }
648 
649 
getNumDbNodes()650 int NdbRestarter::getNumDbNodes(){
651   if (!isConnected())
652     return -1;
653 
654   if (getStatus() != 0)
655     return -1;
656 
657   return ndbNodes.size();
658 }
659 
restartAll(bool initial,bool nostart,bool abort,bool force)660 int NdbRestarter::restartAll(bool initial,
661 			     bool nostart,
662 			     bool abort,
663                              bool force)
664 {
665   if (!isConnected())
666     return -1;
667 
668   int unused;
669   if (ndb_mgm_restart4(handle, 0, NULL, initial, 1, abort,
670                        force, &unused) == -1) {
671     MGMERR(handle);
672     g_err  << "Could not restart(stop) all nodes " << endl;
673     // return -1; Continue anyway - Magnus
674   }
675 
676   if (waitClusterNoStart(60) != 0){
677     g_err << "Cluster didnt enter STATUS_NOT_STARTED within 60s" << endl;
678     return -1;
679   }
680 
681   if(nostart){
682     g_debug << "restartAll: nostart == true" << endl;
683     return 0;
684   }
685 
686   if (ndb_mgm_start(handle, 0, NULL) == -1) {
687     MGMERR(handle);
688     g_err  << "Could not restart(start) all nodes " << endl;
689     return -1;
690   }
691 
692   return 0;
693 }
694 
startAll()695 int NdbRestarter::startAll(){
696   if (!isConnected())
697     return -1;
698 
699   if (ndb_mgm_start(handle, 0, NULL) == -1) {
700     MGMERR(handle);
701     g_err  << "Could not start all nodes " << endl;
702     return -1;
703   }
704 
705   return 0;
706 
707 }
708 
startNodes(const int * nodes,int num_nodes)709 int NdbRestarter::startNodes(const int * nodes, int num_nodes){
710   if (!isConnected())
711     return -1;
712 
713   if (ndb_mgm_start(handle, num_nodes, nodes) != num_nodes) {
714     MGMERR(handle);
715     g_err  << "Could not start all nodes " << endl;
716     return -1;
717   }
718 
719   return 0;
720 }
721 
insertErrorInNode(int _nodeId,int _error)722 int NdbRestarter::insertErrorInNode(int _nodeId, int _error){
723   if (!isConnected())
724     return -1;
725 
726   ndb_mgm_reply reply;
727   reply.return_code = 0;
728 
729   if (ndb_mgm_insert_error(handle, _nodeId, _error, &reply) == -1){
730     MGMERR(handle);
731     g_err << "Could not insert error in node with id = "<< _nodeId << endl;
732   }
733   if(reply.return_code != 0){
734     g_err << "Error: " << reply.message << endl;
735   }
736   return 0;
737 }
738 
insertErrorInAllNodes(int _error)739 int NdbRestarter::insertErrorInAllNodes(int _error){
740   if (!isConnected())
741     return -1;
742 
743   if (getStatus() != 0)
744     return -1;
745 
746   int result = 0;
747 
748   for(unsigned i = 0; i < ndbNodes.size(); i++){
749     g_debug << "inserting error in node " << ndbNodes[i].node_id << endl;
750     if (insertErrorInNode(ndbNodes[i].node_id, _error) == -1)
751       result = -1;
752   }
753   return result;
754 
755 }
756 
757 int
insertError2InNode(int _nodeId,int _error,int extra)758 NdbRestarter::insertError2InNode(int _nodeId, int _error, int extra){
759   if (!isConnected())
760     return -1;
761 
762   ndb_mgm_reply reply;
763   reply.return_code = 0;
764 
765   if (ndb_mgm_insert_error2(handle, _nodeId, _error, extra, &reply) == -1){
766     MGMERR(handle);
767     g_err << "Could not insert error in node with id = "<< _nodeId << endl;
768   }
769   if(reply.return_code != 0){
770     g_err << "Error: " << reply.message << endl;
771   }
772   return 0;
773 }
774 
insertError2InAllNodes(int _error,int extra)775 int NdbRestarter::insertError2InAllNodes(int _error, int extra){
776   if (!isConnected())
777     return -1;
778 
779   if (getStatus() != 0)
780     return -1;
781 
782   int result = 0;
783 
784   for(unsigned i = 0; i < ndbNodes.size(); i++){
785     g_debug << "inserting error in node " << ndbNodes[i].node_id << endl;
786     if (insertError2InNode(ndbNodes[i].node_id, _error, extra) == -1)
787       result = -1;
788   }
789   return result;
790 
791 }
792 
793 
794 
dumpStateOneNode(int _nodeId,const int * _args,int _num_args)795 int NdbRestarter::dumpStateOneNode(int _nodeId, const int * _args, int _num_args){
796  if (!isConnected())
797     return -1;
798 
799   ndb_mgm_reply reply;
800   reply.return_code = 0;
801 
802   if (ndb_mgm_dump_state(handle, _nodeId, _args, _num_args, &reply) == -1){
803     MGMERR(handle);
804     g_err << "Could not dump state in node with id = "<< _nodeId << endl;
805   }
806 
807   if(reply.return_code != 0){
808     g_err << "Error: " << reply.message << endl;
809   }
810   return reply.return_code;
811 }
812 
dumpStateAllNodes(const int * _args,int _num_args)813 int NdbRestarter::dumpStateAllNodes(const int * _args, int _num_args){
814  if (!isConnected())
815     return -1;
816 
817  if (getStatus() != 0)
818    return -1;
819 
820  int result = 0;
821 
822  for(unsigned i = 0; i < ndbNodes.size(); i++){
823    g_debug << "dumping state in node " << ndbNodes[i].node_id << endl;
824    if (dumpStateOneNode(ndbNodes[i].node_id, _args, _num_args) == -1)
825      result = -1;
826  }
827  return result;
828 
829 }
830 
831 
enterSingleUserMode(int _nodeId)832 int NdbRestarter::enterSingleUserMode(int _nodeId){
833   if (!isConnected())
834     return -1;
835 
836   ndb_mgm_reply reply;
837   reply.return_code = 0;
838 
839   if (ndb_mgm_enter_single_user(handle, _nodeId, &reply) == -1){
840     MGMERR(handle);
841     g_err << "Could not enter single user mode api node = "<< _nodeId << endl;
842   }
843 
844   if(reply.return_code != 0){
845     g_err << "Error: " << reply.message << endl;
846   }
847 
848   return reply.return_code;
849 }
850 
851 
exitSingleUserMode()852 int NdbRestarter::exitSingleUserMode(){
853   if (!isConnected())
854     return -1;
855 
856   ndb_mgm_reply reply;
857   reply.return_code = 0;
858 
859   if (ndb_mgm_exit_single_user(handle, &reply) == -1){
860     MGMERR(handle);
861     g_err << "Could not exit single user mode " << endl;
862   }
863 
864   if(reply.return_code != 0){
865     g_err << "Error: " << reply.message << endl;
866   }
867   return reply.return_code;
868 }
869 
870 ndb_mgm_configuration*
getConfig()871 NdbRestarter::getConfig(){
872   if(m_config) return m_config;
873 
874   if (!isConnected())
875     return 0;
876   m_config = ndb_mgm_get_configuration(handle, 0);
877   return m_config;
878 }
879 
880 int
getNode(NodeSelector type)881 NdbRestarter::getNode(NodeSelector type)
882 {
883   switch(type){
884   case NS_RANDOM:
885     return getDbNodeId(rand() % getNumDbNodes());
886   case NS_MASTER:
887     return getMasterNodeId();
888   case NS_NON_MASTER:
889     return getRandomNotMasterNodeId(rand());
890   default:
891     abort();
892   }
893   return -1;
894 }
895 
896 
897 void
setReconnect(bool val)898 NdbRestarter::setReconnect(bool val){
899   m_reconnect= val;
900 }
901 
902 int
checkClusterAlive(const int * deadnodes,int num_nodes)903 NdbRestarter::checkClusterAlive(const int * deadnodes, int num_nodes)
904 {
905   if (getStatus() != 0)
906     return -1;
907 
908   NdbNodeBitmask mask;
909   for (int i = 0; i<num_nodes; i++)
910     mask.set(deadnodes[i]);
911 
912   for (unsigned n = 0; n < ndbNodes.size(); n++)
913   {
914     if (mask.get(ndbNodes[n].node_id))
915       continue;
916 
917     if (ndbNodes[n].node_status != NDB_MGM_NODE_STATUS_STARTED)
918       return ndbNodes[n].node_id;
919   }
920 
921   return 0;
922 }
923 
924 int
rollingRestart(Uint32 flags)925 NdbRestarter::rollingRestart(Uint32 flags)
926 {
927   if (getStatus() != 0)
928     return -1;
929 
930   NdbNodeBitmask ng_mask;
931   NdbNodeBitmask restart_nodes;
932   Vector<int> nodes;
933   for(unsigned i = 0; i < ndbNodes.size(); i++)
934   {
935     if (ng_mask.get(ndbNodes[i].node_group) == false)
936     {
937       ng_mask.set(ndbNodes[i].node_group);
938       nodes.push_back(ndbNodes[i].node_id);
939       restart_nodes.set(ndbNodes[i].node_id);
940     }
941   }
942 
943 loop:
944   if (ndb_mgm_restart2(handle, nodes.size(), nodes.getBase(),
945                        (flags & NRRF_INITIAL) != 0,
946                        (flags & NRRF_NOSTART) != 0,
947                        (flags & NRRF_ABORT) != 0 || true) <= 0)
948   {
949     return -1;
950   }
951 
952   if (waitNodesNoStart(nodes.getBase(), nodes.size()))
953     return -1;
954 
955   if (startNodes(nodes.getBase(), nodes.size()))
956     return -1;
957 
958   if (waitClusterStarted())
959     return -1;
960 
961   nodes.clear();
962   for (Uint32 i = 0; i<ndbNodes.size(); i++)
963   {
964     if (restart_nodes.get(ndbNodes[i].node_id) == false)
965     {
966       nodes.push_back(ndbNodes[i].node_id);
967       restart_nodes.set(ndbNodes[i].node_id);
968     }
969   }
970   if (nodes.size())
971     goto loop;
972 
973   return 0;
974 }
975 
976 int
getMasterNodeVersion(int & version)977 NdbRestarter::getMasterNodeVersion(int& version)
978 {
979   int masterNodeId = getMasterNodeId();
980   if (masterNodeId != -1)
981   {
982     for(unsigned i = 0; i < ndbNodes.size(); i++)
983     {
984       if (ndbNodes[i].node_id == masterNodeId)
985       {
986         version =  ndbNodes[i].version;
987         return 0;
988       }
989     }
990   }
991 
992   g_err << "Could not find node info for master node id "
993         << masterNodeId << endl;
994   return -1;
995 }
996 
997 int
getNodeTypeVersionRange(ndb_mgm_node_type type,int & minVer,int & maxVer)998 NdbRestarter::getNodeTypeVersionRange(ndb_mgm_node_type type,
999                                       int& minVer,
1000                                       int& maxVer)
1001 {
1002   if (!isConnected())
1003     return -1;
1004 
1005   if (getStatus() != 0)
1006     return -1;
1007 
1008   Vector<ndb_mgm_node_state>* nodeVec = NULL;
1009 
1010   switch (type)
1011   {
1012   case NDB_MGM_NODE_TYPE_API:
1013     nodeVec = &apiNodes;
1014     break;
1015   case NDB_MGM_NODE_TYPE_NDB:
1016     nodeVec = &ndbNodes;
1017     break;
1018   case NDB_MGM_NODE_TYPE_MGM:
1019     nodeVec = &mgmNodes;
1020     break;
1021   default:
1022     g_err << "Bad node type : " << type << endl;
1023     return -1;
1024   }
1025 
1026   if (nodeVec->size() == 0)
1027   {
1028     g_err << "No nodes of type " << type << " online" << endl;
1029     return -1;
1030   }
1031 
1032   minVer = 0;
1033   maxVer = 0;
1034 
1035   for(unsigned i = 0; i < nodeVec->size(); i++)
1036   {
1037     int nodeVer = (*nodeVec)[i].version;
1038     if ((minVer == 0) ||
1039         (nodeVer < minVer))
1040       minVer = nodeVer;
1041 
1042     if (nodeVer > maxVer)
1043       maxVer = nodeVer;
1044   }
1045 
1046   return 0;
1047 }
1048 
1049 int
getNodeStatus(int nodeid)1050 NdbRestarter::getNodeStatus(int nodeid)
1051 {
1052   if (getStatus() != 0)
1053     return -1;
1054 
1055   for (unsigned n = 0; n < ndbNodes.size(); n++)
1056   {
1057     if (ndbNodes[n].node_id == nodeid)
1058       return ndbNodes[n].node_status;
1059   }
1060   return -1;
1061 }
1062 
1063 Vector<Vector<int> >
splitNodes()1064 NdbRestarter::splitNodes()
1065 {
1066   Vector<int> part0;
1067   Vector<int> part1;
1068   Bitmask<255> ngmask;
1069   for (int i = 0; i < getNumDbNodes(); i++)
1070   {
1071     int nodeId = getDbNodeId(i);
1072     int ng = getNodeGroup(nodeId);
1073     if (ngmask.get(ng))
1074     {
1075       part1.push_back(nodeId);
1076     }
1077     else
1078     {
1079       ngmask.set(ng);
1080       part0.push_back(nodeId);
1081     }
1082   }
1083   Vector<Vector<int> > result;
1084   if ((rand() % 100) > 50)
1085   {
1086     result.push_back(part0);
1087     result.push_back(part1);
1088   }
1089   else
1090   {
1091     result.push_back(part1);
1092     result.push_back(part0);
1093   }
1094   return result;
1095 }
1096 
1097 int
wait_until_ready(const int * nodes,int cnt,int timeout)1098 NdbRestarter::wait_until_ready(const int * nodes, int cnt, int timeout)
1099 {
1100   if (m_cluster_connection == 0)
1101   {
1102     // no cluster connection, skip wait
1103     return 0;
1104   }
1105 
1106   Vector<int> allNodes;
1107   if (cnt == 0)
1108   {
1109     if (!isConnected())
1110       return -1;
1111 
1112     if (getStatus() != 0)
1113       return -1;
1114 
1115     for(unsigned i = 0; i < ndbNodes.size(); i++)
1116     {
1117       allNodes.push_back(ndbNodes[i].node_id);
1118     }
1119     cnt = (int)allNodes.size();
1120     nodes = allNodes.getBase();
1121   }
1122 
1123   return m_cluster_connection->wait_until_ready(nodes, cnt, timeout);
1124 }
1125 
1126 template class Vector<ndb_mgm_node_state>;
1127 template class Vector<Vector<int> >;
1128