1 /*
2 Copyright (c) 2003, 2021, Oracle and/or its affiliates.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License, version 2.0,
6 as published by the Free Software Foundation.
7
8 This program is also distributed with certain software (including
9 but not limited to OpenSSL) that is licensed under separate terms,
10 as designated in a particular file or component or in included license
11 documentation. The authors of MySQL hereby grant you an additional
12 permission to link the program and your derivative works with the
13 separately licensed software that they have included with MySQL.
14
15 This program is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License, version 2.0, for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with this program; if not, write to the Free Software
22 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
23 */
24
25 #include <ndb_global.h>
26 #include <NdbRestarter.hpp>
27 #include <NdbOut.hpp>
28 #include <NdbSleep.h>
29 #include <NdbTick.h>
30 #include <mgmapi_debug.h>
31 #include <NDBT_Output.hpp>
32 #include <random.h>
33 #include <kernel/ndb_limits.h>
34 #include <ndb_version.h>
35 #include <NodeBitmask.hpp>
36 #include <ndb_cluster_connection.hpp>
37
38 #define MGMERR(h) \
39 ndbout << "latest_error="<<ndb_mgm_get_latest_error(h) \
40 << ", line="<<ndb_mgm_get_latest_error_line(h) \
41 << ", mesg="<<ndb_mgm_get_latest_error_msg(h) \
42 << ", desc="<<ndb_mgm_get_latest_error_desc(h) \
43 << endl;
44
45
NdbRestarter(const char * _addr,Ndb_cluster_connection * con)46 NdbRestarter::NdbRestarter(const char* _addr, Ndb_cluster_connection * con):
47 handle(NULL),
48 connected(false),
49 m_config(0),
50 m_reconnect(false),
51 m_cluster_connection(con)
52 {
53 if (_addr == NULL){
54 addr.assign("");
55 } else {
56 addr.assign(_addr);
57 }
58 }
59
~NdbRestarter()60 NdbRestarter::~NdbRestarter(){
61 disconnect();
62 }
63
64
getDbNodeId(int _i)65 int NdbRestarter::getDbNodeId(int _i){
66 if (!isConnected())
67 return -1;
68
69 if (getStatus() != 0)
70 return -1;
71
72 for(unsigned i = 0; i < ndbNodes.size(); i++){
73 if (i == (unsigned)_i){
74 return ndbNodes[i].node_id;
75 }
76 }
77 return -1;
78 }
79
80
81 int
restartOneDbNode(int _nodeId,bool inital,bool nostart,bool abort,bool force)82 NdbRestarter::restartOneDbNode(int _nodeId,
83 bool inital,
84 bool nostart,
85 bool abort,
86 bool force)
87 {
88 return restartNodes(&_nodeId, 1,
89 (inital ? NRRF_INITIAL : 0) |
90 (nostart ? NRRF_NOSTART : 0) |
91 (abort ? NRRF_ABORT : 0) |
92 (force ? NRRF_FORCE : 0));
93 }
94
95 int
restartNodes(int * nodes,int cnt,Uint32 flags)96 NdbRestarter::restartNodes(int * nodes, int cnt,
97 Uint32 flags)
98 {
99 if (!isConnected())
100 return -1;
101
102 int ret = 0;
103 int unused;
104 if ((ret = ndb_mgm_restart4(handle, cnt, nodes,
105 (flags & NRRF_INITIAL),
106 (flags & NRRF_NOSTART),
107 (flags & NRRF_ABORT),
108 (flags & NRRF_FORCE),
109 &unused)) <= 0)
110 {
111 /**
112 * ndb_mgm_restart4 returned error, one reason could
113 * be that the node have not stopped fast enough!
114 * Check status of the node to see if it's on the
115 * way down. If that's the case ignore the error
116 */
117
118 if (getStatus() != 0)
119 return -1;
120
121 g_info << "ndb_mgm_restart4 returned with error, checking node state"
122 << endl;
123
124 for (int j = 0; j<cnt; j++)
125 {
126 int _nodeId = nodes[j];
127 for(unsigned i = 0; i < ndbNodes.size(); i++)
128 {
129 if(ndbNodes[i].node_id == _nodeId)
130 {
131 g_info <<_nodeId<<": status="<<ndbNodes[i].node_status<<endl;
132 /* Node found check state */
133 switch(ndbNodes[i].node_status){
134 case NDB_MGM_NODE_STATUS_RESTARTING:
135 case NDB_MGM_NODE_STATUS_SHUTTING_DOWN:
136 break;
137 default:
138 MGMERR(handle);
139 g_err << "Could not stop node with id = "<< _nodeId << endl;
140 return -1;
141 }
142 }
143 }
144 }
145 }
146
147 if ((flags & NRRF_NOSTART) == 0)
148 {
149 wait_until_ready(nodes, cnt);
150 }
151
152 return 0;
153 }
154
155 int
getMasterNodeId()156 NdbRestarter::getMasterNodeId(){
157 if (!isConnected())
158 return -1;
159
160 if (getStatus() != 0)
161 return -1;
162
163 int min = 0;
164 int node = -1;
165 for(unsigned i = 0; i < ndbNodes.size(); i++){
166 if(min == 0 || ndbNodes[i].dynamic_id < min){
167 min = ndbNodes[i].dynamic_id;
168 node = ndbNodes[i].node_id;
169 }
170 }
171
172 return node;
173 }
174
175 int
getNodeGroup(int nodeId)176 NdbRestarter::getNodeGroup(int nodeId){
177 if (!isConnected())
178 return -1;
179
180 if (getStatus() != 0)
181 return -1;
182
183 for(unsigned i = 0; i < ndbNodes.size(); i++)
184 {
185 if(ndbNodes[i].node_id == nodeId)
186 {
187 return ndbNodes[i].node_group;
188 }
189 }
190
191 return -1;
192 }
193
194 int
getNextMasterNodeId(int nodeId)195 NdbRestarter::getNextMasterNodeId(int nodeId){
196 if (!isConnected())
197 return -1;
198
199 if (getStatus() != 0)
200 return -1;
201
202 unsigned i;
203 for(i = 0; i < ndbNodes.size(); i++)
204 {
205 if(ndbNodes[i].node_id == nodeId)
206 {
207 break;
208 }
209 }
210 require(i < ndbNodes.size());
211 if (i == ndbNodes.size())
212 return -1;
213
214 int dynid = ndbNodes[i].dynamic_id;
215 int minid = dynid;
216 for (i = 0; i<ndbNodes.size(); i++)
217 if (ndbNodes[i].dynamic_id > minid)
218 minid = ndbNodes[i].dynamic_id;
219
220 for (i = 0; i<ndbNodes.size(); i++)
221 if (ndbNodes[i].dynamic_id > dynid &&
222 ndbNodes[i].dynamic_id < minid)
223 {
224 minid = ndbNodes[i].dynamic_id;
225 }
226
227 if (minid != ~0)
228 {
229 for (i = 0; i<ndbNodes.size(); i++)
230 if (ndbNodes[i].dynamic_id == minid)
231 return ndbNodes[i].node_id;
232 }
233
234 return getMasterNodeId();
235 }
236
237 int
getRandomNotMasterNodeId(int rand)238 NdbRestarter::getRandomNotMasterNodeId(int rand){
239 int master = getMasterNodeId();
240 if(master == -1)
241 return -1;
242
243 Uint32 counter = 0;
244 rand = rand % ndbNodes.size();
245 while(counter++ < ndbNodes.size() && ndbNodes[rand].node_id == master)
246 rand = (rand + 1) % ndbNodes.size();
247
248 if(ndbNodes[rand].node_id != master)
249 return ndbNodes[rand].node_id;
250 return -1;
251 }
252
253 int
getRandomNodeOtherNodeGroup(int nodeId,int rand)254 NdbRestarter::getRandomNodeOtherNodeGroup(int nodeId, int rand){
255 if (!isConnected())
256 return -1;
257
258 if (getStatus() != 0)
259 return -1;
260
261 int node_group = -1;
262 for(unsigned i = 0; i < ndbNodes.size(); i++){
263 if(ndbNodes[i].node_id == nodeId){
264 node_group = ndbNodes[i].node_group;
265 break;
266 }
267 }
268 if(node_group == -1){
269 return -1;
270 }
271
272 Uint32 counter = 0;
273 rand = rand % ndbNodes.size();
274 while(counter++ < ndbNodes.size() && ndbNodes[rand].node_group == node_group)
275 rand = (rand + 1) % ndbNodes.size();
276
277 if(ndbNodes[rand].node_group != node_group)
278 return ndbNodes[rand].node_id;
279
280 return -1;
281 }
282
283 int
getRandomNodeSameNodeGroup(int nodeId,int rand)284 NdbRestarter::getRandomNodeSameNodeGroup(int nodeId, int rand){
285 if (!isConnected())
286 return -1;
287
288 if (getStatus() != 0)
289 return -1;
290
291 int node_group = -1;
292 for(unsigned i = 0; i < ndbNodes.size(); i++){
293 if(ndbNodes[i].node_id == nodeId){
294 node_group = ndbNodes[i].node_group;
295 break;
296 }
297 }
298 if(node_group == -1){
299 return -1;
300 }
301
302 Uint32 counter = 0;
303 rand = rand % ndbNodes.size();
304 while(counter++ < ndbNodes.size() &&
305 (ndbNodes[rand].node_id == nodeId ||
306 ndbNodes[rand].node_group != node_group))
307 rand = (rand + 1) % ndbNodes.size();
308
309 if(ndbNodes[rand].node_group == node_group &&
310 ndbNodes[rand].node_id != nodeId)
311 return ndbNodes[rand].node_id;
312
313 return -1;
314 }
315
316
317 // Wait until connected to ndb_mgmd
318 int
waitConnected(unsigned int _timeout)319 NdbRestarter::waitConnected(unsigned int _timeout){
320 _timeout*= 10;
321 while (isConnected() && getStatus() != 0){
322 if (_timeout-- == 0){
323 ndbout << "NdbRestarter::waitConnected failed" << endl;
324 return -1;
325 }
326 NdbSleep_MilliSleep(100);
327 }
328 return 0;
329 }
330
331 int
waitClusterStarted(unsigned int _timeout)332 NdbRestarter::waitClusterStarted(unsigned int _timeout){
333 int res = waitClusterState(NDB_MGM_NODE_STATUS_STARTED, _timeout);
334 if (res == 0)
335 {
336 wait_until_ready();
337 }
338 return res;
339 }
340
341 int
waitClusterStartPhase(int _startphase,unsigned int _timeout)342 NdbRestarter::waitClusterStartPhase(int _startphase, unsigned int _timeout){
343 return waitClusterState(NDB_MGM_NODE_STATUS_STARTING, _timeout, _startphase);
344 }
345
346 int
waitClusterSingleUser(unsigned int _timeout)347 NdbRestarter::waitClusterSingleUser(unsigned int _timeout){
348 return waitClusterState(NDB_MGM_NODE_STATUS_SINGLEUSER, _timeout);
349 }
350
351 int
waitClusterNoStart(unsigned int _timeout)352 NdbRestarter::waitClusterNoStart(unsigned int _timeout){
353 return waitClusterState(NDB_MGM_NODE_STATUS_NOT_STARTED, _timeout);
354 }
355
356 int
waitClusterState(ndb_mgm_node_status _status,unsigned int _timeout,int _startphase)357 NdbRestarter::waitClusterState(ndb_mgm_node_status _status,
358 unsigned int _timeout,
359 int _startphase){
360
361 int nodes[MAX_NDB_NODES];
362 int numNodes = 0;
363
364 if (getStatus() != 0){
365 g_err << "waitClusterStat: getStatus != 0" << endl;
366 return -1;
367 }
368
369 // Collect all nodes into nodes
370 for (unsigned i = 0; i < ndbNodes.size(); i++){
371 nodes[i] = ndbNodes[i].node_id;
372 numNodes++;
373 }
374
375 return waitNodesState(nodes, numNodes, _status, _timeout, _startphase);
376 }
377
378
379 int
waitNodesState(const int * _nodes,int _num_nodes,ndb_mgm_node_status _status,unsigned int _timeout,int _startphase)380 NdbRestarter::waitNodesState(const int * _nodes, int _num_nodes,
381 ndb_mgm_node_status _status,
382 unsigned int _timeout,
383 int _startphase){
384
385 if (!isConnected()){
386 g_err << "!isConnected"<<endl;
387 return -1;
388 }
389
390 unsigned int attempts = 0;
391 unsigned int resetAttempts = 0;
392 const unsigned int MAX_RESET_ATTEMPTS = 10;
393 bool allInState = false;
394 while (allInState == false){
395 if (_timeout > 0 && attempts > _timeout){
396 /**
397 * Timeout has expired waiting for the nodes to enter
398 * the state we want
399 */
400 bool waitMore = false;
401 /**
402 * Make special check if we are waiting for
403 * cluster to become started
404 */
405 if(_status == NDB_MGM_NODE_STATUS_STARTED){
406 waitMore = true;
407 /**
408 * First check if any node is not starting
409 * then it's no idea to wait anymore
410 */
411 for (unsigned n = 0; n < ndbNodes.size(); n++){
412 if (ndbNodes[n].node_status != NDB_MGM_NODE_STATUS_STARTED &&
413 ndbNodes[n].node_status != NDB_MGM_NODE_STATUS_STARTING)
414 {
415 // Found one not starting node, don't wait anymore
416 waitMore = false;
417 break;
418 }
419
420 }
421 }
422
423 if (!waitMore || resetAttempts > MAX_RESET_ATTEMPTS){
424 g_err << "waitNodesState("
425 << ndb_mgm_get_node_status_string(_status)
426 <<", "<<_startphase<<")"
427 << " timeout after " << attempts <<" attemps" << endl;
428 return -1;
429 }
430
431 g_err << "waitNodesState("
432 << ndb_mgm_get_node_status_string(_status)
433 <<", "<<_startphase<<")"
434 << " resetting number of attempts "
435 << resetAttempts << endl;
436 attempts = 0;
437 resetAttempts++;
438
439 }
440
441 allInState = true;
442 if (getStatus() != 0){
443 g_err << "waitNodesState: getStatus != 0" << endl;
444 return -1;
445 }
446
447 for (int i = 0; i < _num_nodes; i++)
448 {
449 // Find node with given node id
450 ndb_mgm_node_state* ndbNode = NULL;
451 for (unsigned n = 0; n < ndbNodes.size(); n++)
452 {
453 if (ndbNodes[n].node_id == _nodes[i])
454 {
455 ndbNode = &ndbNodes[n];
456 break;
457 }
458 }
459
460 if(ndbNode == NULL){
461 allInState = false;
462 continue;
463 }
464
465 g_info << "State node " << ndbNode->node_id << " "
466 << ndb_mgm_get_node_status_string(ndbNode->node_status);
467 if (ndbNode->node_status == NDB_MGM_NODE_STATUS_STARTING)
468 g_info<< ", start_phase=" << ndbNode->start_phase;
469 g_info << endl;
470
471 require(ndbNode != NULL);
472
473 if(_status == NDB_MGM_NODE_STATUS_STARTING &&
474 ((ndbNode->node_status == NDB_MGM_NODE_STATUS_STARTING &&
475 ndbNode->start_phase >= _startphase) ||
476 (ndbNode->node_status == NDB_MGM_NODE_STATUS_STARTED)))
477 continue;
478
479 if (_status == NDB_MGM_NODE_STATUS_STARTING){
480 g_info << "status = "
481 << ndb_mgm_get_node_status_string(ndbNode->node_status)
482 <<", start_phase="<<ndbNode->start_phase<<endl;
483 if (ndbNode->node_status != _status) {
484 if (ndbNode->node_status < _status)
485 allInState = false;
486 else
487 g_info << "node_status(" << ndbNode->node_status
488 <<") != _status("<<_status<<")"<<endl;
489 } else if (ndbNode->start_phase < _startphase)
490 allInState = false;
491 } else {
492 if (ndbNode->node_status != _status)
493 allInState = false;
494 }
495 }
496 g_info << "Waiting for cluster enter state"
497 << ndb_mgm_get_node_status_string(_status)<< endl;
498 NdbSleep_SecSleep(1);
499 attempts++;
500 }
501 return 0;
502 }
503
waitNodesStarted(const int * _nodes,int _num_nodes,unsigned int _timeout)504 int NdbRestarter::waitNodesStarted(const int * _nodes, int _num_nodes,
505 unsigned int _timeout){
506 int res = waitNodesState(_nodes, _num_nodes,
507 NDB_MGM_NODE_STATUS_STARTED, _timeout);
508 if (res == 0)
509 {
510 wait_until_ready(_nodes, _num_nodes);
511 }
512
513 return res;
514 }
515
waitNodesStartPhase(const int * _nodes,int _num_nodes,int _startphase,unsigned int _timeout)516 int NdbRestarter::waitNodesStartPhase(const int * _nodes, int _num_nodes,
517 int _startphase, unsigned int _timeout){
518 return waitNodesState(_nodes, _num_nodes,
519 NDB_MGM_NODE_STATUS_STARTING, _timeout,
520 _startphase);
521 }
522
waitNodesNoStart(const int * _nodes,int _num_nodes,unsigned int _timeout)523 int NdbRestarter::waitNodesNoStart(const int * _nodes, int _num_nodes,
524 unsigned int _timeout){
525 return waitNodesState(_nodes, _num_nodes,
526 NDB_MGM_NODE_STATUS_NOT_STARTED, _timeout);
527 }
528
529 bool
isConnected()530 NdbRestarter::isConnected(){
531 if (connected == true)
532 return true;
533 return connect() == 0;
534 }
535
536 int
connect()537 NdbRestarter::connect(){
538 disconnect();
539 handle = ndb_mgm_create_handle();
540 if (handle == NULL){
541 g_err << "handle == NULL" << endl;
542 return -1;
543 }
544 g_info << "Connecting to mgmsrv at " << addr.c_str() << endl;
545 if (ndb_mgm_set_connectstring(handle,addr.c_str()))
546 {
547 MGMERR(handle);
548 g_err << "Connection to " << addr.c_str() << " failed" << endl;
549 return -1;
550 }
551
552 if (ndb_mgm_connect(handle, 0, 0, 0) == -1)
553 {
554 MGMERR(handle);
555 g_err << "Connection to " << addr.c_str() << " failed" << endl;
556 return -1;
557 }
558
559 connected = true;
560 return 0;
561 }
562
563 void
disconnect()564 NdbRestarter::disconnect(){
565 if (handle != NULL){
566 ndb_mgm_disconnect(handle);
567 ndb_mgm_destroy_handle(&handle);
568 }
569 connected = false;
570 }
571
572 int
getStatus()573 NdbRestarter::getStatus(){
574 int retries = 0;
575 struct ndb_mgm_cluster_state * status;
576 struct ndb_mgm_node_state * node;
577
578 ndbNodes.clear();
579 mgmNodes.clear();
580 apiNodes.clear();
581
582 if (!isConnected())
583 return -1;
584
585 while(retries < 10){
586 status = ndb_mgm_get_status(handle);
587 if (status == NULL){
588 if (m_reconnect){
589 if (connect() == 0){
590 g_err << "Reconnected..." << endl;
591 continue;
592 }
593 const int err = ndb_mgm_get_latest_error(handle);
594 if (err == NDB_MGM_COULD_NOT_CONNECT_TO_SOCKET){
595 g_err << "Could not connect to socket, sleep and retry" << endl;
596 retries= 0;
597 NdbSleep_SecSleep(1);
598 continue;
599 }
600 }
601 const int err = ndb_mgm_get_latest_error(handle);
602 ndbout << "status==NULL, retries="<<retries<< " err=" << err << endl;
603 MGMERR(handle);
604 retries++;
605 continue;
606 }
607 for (int i = 0; i < status->no_of_nodes; i++){
608 node = &status->node_states[i];
609 switch(node->node_type){
610 case NDB_MGM_NODE_TYPE_NDB:
611 ndbNodes.push_back(*node);
612 break;
613 case NDB_MGM_NODE_TYPE_MGM:
614 mgmNodes.push_back(*node);
615 break;
616 case NDB_MGM_NODE_TYPE_API:
617 apiNodes.push_back(*node);
618 break;
619 default:
620 if(node->node_status == NDB_MGM_NODE_STATUS_UNKNOWN ||
621 node->node_status == NDB_MGM_NODE_STATUS_NO_CONTACT){
622 retries++;
623 ndbNodes.clear();
624 mgmNodes.clear();
625 apiNodes.clear();
626 free(status);
627 status = NULL;
628 i = status->no_of_nodes;
629
630 ndbout << "kalle"<< endl;
631 break;
632 }
633 abort();
634 break;
635 }
636 }
637 if(status == 0){
638 ndbout << "status == 0" << endl;
639 continue;
640 }
641 free(status);
642 return 0;
643 }
644
645 g_err << "getStatus failed" << endl;
646 return -1;
647 }
648
649
getNumDbNodes()650 int NdbRestarter::getNumDbNodes(){
651 if (!isConnected())
652 return -1;
653
654 if (getStatus() != 0)
655 return -1;
656
657 return ndbNodes.size();
658 }
659
restartAll(bool initial,bool nostart,bool abort,bool force)660 int NdbRestarter::restartAll(bool initial,
661 bool nostart,
662 bool abort,
663 bool force)
664 {
665 if (!isConnected())
666 return -1;
667
668 int unused;
669 if (ndb_mgm_restart4(handle, 0, NULL, initial, 1, abort,
670 force, &unused) == -1) {
671 MGMERR(handle);
672 g_err << "Could not restart(stop) all nodes " << endl;
673 // return -1; Continue anyway - Magnus
674 }
675
676 if (waitClusterNoStart(60) != 0){
677 g_err << "Cluster didnt enter STATUS_NOT_STARTED within 60s" << endl;
678 return -1;
679 }
680
681 if(nostart){
682 g_debug << "restartAll: nostart == true" << endl;
683 return 0;
684 }
685
686 if (ndb_mgm_start(handle, 0, NULL) == -1) {
687 MGMERR(handle);
688 g_err << "Could not restart(start) all nodes " << endl;
689 return -1;
690 }
691
692 return 0;
693 }
694
startAll()695 int NdbRestarter::startAll(){
696 if (!isConnected())
697 return -1;
698
699 if (ndb_mgm_start(handle, 0, NULL) == -1) {
700 MGMERR(handle);
701 g_err << "Could not start all nodes " << endl;
702 return -1;
703 }
704
705 return 0;
706
707 }
708
startNodes(const int * nodes,int num_nodes)709 int NdbRestarter::startNodes(const int * nodes, int num_nodes){
710 if (!isConnected())
711 return -1;
712
713 if (ndb_mgm_start(handle, num_nodes, nodes) != num_nodes) {
714 MGMERR(handle);
715 g_err << "Could not start all nodes " << endl;
716 return -1;
717 }
718
719 return 0;
720 }
721
insertErrorInNode(int _nodeId,int _error)722 int NdbRestarter::insertErrorInNode(int _nodeId, int _error){
723 if (!isConnected())
724 return -1;
725
726 ndb_mgm_reply reply;
727 reply.return_code = 0;
728
729 if (ndb_mgm_insert_error(handle, _nodeId, _error, &reply) == -1){
730 MGMERR(handle);
731 g_err << "Could not insert error in node with id = "<< _nodeId << endl;
732 }
733 if(reply.return_code != 0){
734 g_err << "Error: " << reply.message << endl;
735 }
736 return 0;
737 }
738
insertErrorInAllNodes(int _error)739 int NdbRestarter::insertErrorInAllNodes(int _error){
740 if (!isConnected())
741 return -1;
742
743 if (getStatus() != 0)
744 return -1;
745
746 int result = 0;
747
748 for(unsigned i = 0; i < ndbNodes.size(); i++){
749 g_debug << "inserting error in node " << ndbNodes[i].node_id << endl;
750 if (insertErrorInNode(ndbNodes[i].node_id, _error) == -1)
751 result = -1;
752 }
753 return result;
754
755 }
756
757 int
insertError2InNode(int _nodeId,int _error,int extra)758 NdbRestarter::insertError2InNode(int _nodeId, int _error, int extra){
759 if (!isConnected())
760 return -1;
761
762 ndb_mgm_reply reply;
763 reply.return_code = 0;
764
765 if (ndb_mgm_insert_error2(handle, _nodeId, _error, extra, &reply) == -1){
766 MGMERR(handle);
767 g_err << "Could not insert error in node with id = "<< _nodeId << endl;
768 }
769 if(reply.return_code != 0){
770 g_err << "Error: " << reply.message << endl;
771 }
772 return 0;
773 }
774
insertError2InAllNodes(int _error,int extra)775 int NdbRestarter::insertError2InAllNodes(int _error, int extra){
776 if (!isConnected())
777 return -1;
778
779 if (getStatus() != 0)
780 return -1;
781
782 int result = 0;
783
784 for(unsigned i = 0; i < ndbNodes.size(); i++){
785 g_debug << "inserting error in node " << ndbNodes[i].node_id << endl;
786 if (insertError2InNode(ndbNodes[i].node_id, _error, extra) == -1)
787 result = -1;
788 }
789 return result;
790
791 }
792
793
794
dumpStateOneNode(int _nodeId,const int * _args,int _num_args)795 int NdbRestarter::dumpStateOneNode(int _nodeId, const int * _args, int _num_args){
796 if (!isConnected())
797 return -1;
798
799 ndb_mgm_reply reply;
800 reply.return_code = 0;
801
802 if (ndb_mgm_dump_state(handle, _nodeId, _args, _num_args, &reply) == -1){
803 MGMERR(handle);
804 g_err << "Could not dump state in node with id = "<< _nodeId << endl;
805 }
806
807 if(reply.return_code != 0){
808 g_err << "Error: " << reply.message << endl;
809 }
810 return reply.return_code;
811 }
812
dumpStateAllNodes(const int * _args,int _num_args)813 int NdbRestarter::dumpStateAllNodes(const int * _args, int _num_args){
814 if (!isConnected())
815 return -1;
816
817 if (getStatus() != 0)
818 return -1;
819
820 int result = 0;
821
822 for(unsigned i = 0; i < ndbNodes.size(); i++){
823 g_debug << "dumping state in node " << ndbNodes[i].node_id << endl;
824 if (dumpStateOneNode(ndbNodes[i].node_id, _args, _num_args) == -1)
825 result = -1;
826 }
827 return result;
828
829 }
830
831
enterSingleUserMode(int _nodeId)832 int NdbRestarter::enterSingleUserMode(int _nodeId){
833 if (!isConnected())
834 return -1;
835
836 ndb_mgm_reply reply;
837 reply.return_code = 0;
838
839 if (ndb_mgm_enter_single_user(handle, _nodeId, &reply) == -1){
840 MGMERR(handle);
841 g_err << "Could not enter single user mode api node = "<< _nodeId << endl;
842 }
843
844 if(reply.return_code != 0){
845 g_err << "Error: " << reply.message << endl;
846 }
847
848 return reply.return_code;
849 }
850
851
exitSingleUserMode()852 int NdbRestarter::exitSingleUserMode(){
853 if (!isConnected())
854 return -1;
855
856 ndb_mgm_reply reply;
857 reply.return_code = 0;
858
859 if (ndb_mgm_exit_single_user(handle, &reply) == -1){
860 MGMERR(handle);
861 g_err << "Could not exit single user mode " << endl;
862 }
863
864 if(reply.return_code != 0){
865 g_err << "Error: " << reply.message << endl;
866 }
867 return reply.return_code;
868 }
869
870 ndb_mgm_configuration*
getConfig()871 NdbRestarter::getConfig(){
872 if(m_config) return m_config;
873
874 if (!isConnected())
875 return 0;
876 m_config = ndb_mgm_get_configuration(handle, 0);
877 return m_config;
878 }
879
880 int
getNode(NodeSelector type)881 NdbRestarter::getNode(NodeSelector type)
882 {
883 switch(type){
884 case NS_RANDOM:
885 return getDbNodeId(rand() % getNumDbNodes());
886 case NS_MASTER:
887 return getMasterNodeId();
888 case NS_NON_MASTER:
889 return getRandomNotMasterNodeId(rand());
890 default:
891 abort();
892 }
893 return -1;
894 }
895
896
897 void
setReconnect(bool val)898 NdbRestarter::setReconnect(bool val){
899 m_reconnect= val;
900 }
901
902 int
checkClusterAlive(const int * deadnodes,int num_nodes)903 NdbRestarter::checkClusterAlive(const int * deadnodes, int num_nodes)
904 {
905 if (getStatus() != 0)
906 return -1;
907
908 NdbNodeBitmask mask;
909 for (int i = 0; i<num_nodes; i++)
910 mask.set(deadnodes[i]);
911
912 for (unsigned n = 0; n < ndbNodes.size(); n++)
913 {
914 if (mask.get(ndbNodes[n].node_id))
915 continue;
916
917 if (ndbNodes[n].node_status != NDB_MGM_NODE_STATUS_STARTED)
918 return ndbNodes[n].node_id;
919 }
920
921 return 0;
922 }
923
924 int
rollingRestart(Uint32 flags)925 NdbRestarter::rollingRestart(Uint32 flags)
926 {
927 if (getStatus() != 0)
928 return -1;
929
930 NdbNodeBitmask ng_mask;
931 NdbNodeBitmask restart_nodes;
932 Vector<int> nodes;
933 for(unsigned i = 0; i < ndbNodes.size(); i++)
934 {
935 if (ng_mask.get(ndbNodes[i].node_group) == false)
936 {
937 ng_mask.set(ndbNodes[i].node_group);
938 nodes.push_back(ndbNodes[i].node_id);
939 restart_nodes.set(ndbNodes[i].node_id);
940 }
941 }
942
943 loop:
944 if (ndb_mgm_restart2(handle, nodes.size(), nodes.getBase(),
945 (flags & NRRF_INITIAL) != 0,
946 (flags & NRRF_NOSTART) != 0,
947 (flags & NRRF_ABORT) != 0 || true) <= 0)
948 {
949 return -1;
950 }
951
952 if (waitNodesNoStart(nodes.getBase(), nodes.size()))
953 return -1;
954
955 if (startNodes(nodes.getBase(), nodes.size()))
956 return -1;
957
958 if (waitClusterStarted())
959 return -1;
960
961 nodes.clear();
962 for (Uint32 i = 0; i<ndbNodes.size(); i++)
963 {
964 if (restart_nodes.get(ndbNodes[i].node_id) == false)
965 {
966 nodes.push_back(ndbNodes[i].node_id);
967 restart_nodes.set(ndbNodes[i].node_id);
968 }
969 }
970 if (nodes.size())
971 goto loop;
972
973 return 0;
974 }
975
976 int
getMasterNodeVersion(int & version)977 NdbRestarter::getMasterNodeVersion(int& version)
978 {
979 int masterNodeId = getMasterNodeId();
980 if (masterNodeId != -1)
981 {
982 for(unsigned i = 0; i < ndbNodes.size(); i++)
983 {
984 if (ndbNodes[i].node_id == masterNodeId)
985 {
986 version = ndbNodes[i].version;
987 return 0;
988 }
989 }
990 }
991
992 g_err << "Could not find node info for master node id "
993 << masterNodeId << endl;
994 return -1;
995 }
996
997 int
getNodeTypeVersionRange(ndb_mgm_node_type type,int & minVer,int & maxVer)998 NdbRestarter::getNodeTypeVersionRange(ndb_mgm_node_type type,
999 int& minVer,
1000 int& maxVer)
1001 {
1002 if (!isConnected())
1003 return -1;
1004
1005 if (getStatus() != 0)
1006 return -1;
1007
1008 Vector<ndb_mgm_node_state>* nodeVec = NULL;
1009
1010 switch (type)
1011 {
1012 case NDB_MGM_NODE_TYPE_API:
1013 nodeVec = &apiNodes;
1014 break;
1015 case NDB_MGM_NODE_TYPE_NDB:
1016 nodeVec = &ndbNodes;
1017 break;
1018 case NDB_MGM_NODE_TYPE_MGM:
1019 nodeVec = &mgmNodes;
1020 break;
1021 default:
1022 g_err << "Bad node type : " << type << endl;
1023 return -1;
1024 }
1025
1026 if (nodeVec->size() == 0)
1027 {
1028 g_err << "No nodes of type " << type << " online" << endl;
1029 return -1;
1030 }
1031
1032 minVer = 0;
1033 maxVer = 0;
1034
1035 for(unsigned i = 0; i < nodeVec->size(); i++)
1036 {
1037 int nodeVer = (*nodeVec)[i].version;
1038 if ((minVer == 0) ||
1039 (nodeVer < minVer))
1040 minVer = nodeVer;
1041
1042 if (nodeVer > maxVer)
1043 maxVer = nodeVer;
1044 }
1045
1046 return 0;
1047 }
1048
1049 int
getNodeStatus(int nodeid)1050 NdbRestarter::getNodeStatus(int nodeid)
1051 {
1052 if (getStatus() != 0)
1053 return -1;
1054
1055 for (unsigned n = 0; n < ndbNodes.size(); n++)
1056 {
1057 if (ndbNodes[n].node_id == nodeid)
1058 return ndbNodes[n].node_status;
1059 }
1060 return -1;
1061 }
1062
1063 Vector<Vector<int> >
splitNodes()1064 NdbRestarter::splitNodes()
1065 {
1066 Vector<int> part0;
1067 Vector<int> part1;
1068 Bitmask<255> ngmask;
1069 for (int i = 0; i < getNumDbNodes(); i++)
1070 {
1071 int nodeId = getDbNodeId(i);
1072 int ng = getNodeGroup(nodeId);
1073 if (ngmask.get(ng))
1074 {
1075 part1.push_back(nodeId);
1076 }
1077 else
1078 {
1079 ngmask.set(ng);
1080 part0.push_back(nodeId);
1081 }
1082 }
1083 Vector<Vector<int> > result;
1084 if ((rand() % 100) > 50)
1085 {
1086 result.push_back(part0);
1087 result.push_back(part1);
1088 }
1089 else
1090 {
1091 result.push_back(part1);
1092 result.push_back(part0);
1093 }
1094 return result;
1095 }
1096
1097 int
wait_until_ready(const int * nodes,int cnt,int timeout)1098 NdbRestarter::wait_until_ready(const int * nodes, int cnt, int timeout)
1099 {
1100 if (m_cluster_connection == 0)
1101 {
1102 // no cluster connection, skip wait
1103 return 0;
1104 }
1105
1106 Vector<int> allNodes;
1107 if (cnt == 0)
1108 {
1109 if (!isConnected())
1110 return -1;
1111
1112 if (getStatus() != 0)
1113 return -1;
1114
1115 for(unsigned i = 0; i < ndbNodes.size(); i++)
1116 {
1117 allNodes.push_back(ndbNodes[i].node_id);
1118 }
1119 cnt = (int)allNodes.size();
1120 nodes = allNodes.getBase();
1121 }
1122
1123 return m_cluster_connection->wait_until_ready(nodes, cnt, timeout);
1124 }
1125
1126 template class Vector<ndb_mgm_node_state>;
1127 template class Vector<Vector<int> >;
1128