1 /*
2 Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License, version 2.0,
6 as published by the Free Software Foundation.
7
8 This program is also distributed with certain software (including
9 but not limited to OpenSSL) that is licensed under separate terms,
10 as designated in a particular file or component or in included license
11 documentation. The authors of MySQL hereby grant you an additional
12 permission to link the program and your derivative works with the
13 separately licensed software that they have included with MySQL.
14
15 This program is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License, version 2.0, for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with this program; if not, write to the Free Software
22 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
23 */
24
25
26 #define QMGR_C
27 #include "Qmgr.hpp"
28 #include <pc.hpp>
29 #include <NdbTick.h>
30 #include <signaldata/EventReport.hpp>
31 #include <signaldata/StartOrd.hpp>
32 #include <signaldata/CloseComReqConf.hpp>
33 #include <signaldata/PrepFailReqRef.hpp>
34 #include <signaldata/NodeFailRep.hpp>
35 #include <signaldata/ReadNodesConf.hpp>
36 #include <signaldata/NFCompleteRep.hpp>
37 #include <signaldata/CheckNodeGroups.hpp>
38 #include <signaldata/ArbitSignalData.hpp>
39 #include <signaldata/ApiRegSignalData.hpp>
40 #include <signaldata/ApiVersion.hpp>
41 #include <signaldata/BlockCommitOrd.hpp>
42 #include <signaldata/FailRep.hpp>
43 #include <signaldata/DisconnectRep.hpp>
44 #include <signaldata/ApiBroadcast.hpp>
45 #include <signaldata/Upgrade.hpp>
46 #include <signaldata/EnableCom.hpp>
47 #include <signaldata/RouteOrd.hpp>
48 #include <signaldata/NodePing.hpp>
49 #include <signaldata/DihRestart.hpp>
50 #include <ndb_version.h>
51
52 #include <EventLogger.hpp>
53 extern EventLogger * g_eventLogger;
54
55 //#define DEBUG_QMGR_START
56 #ifdef DEBUG_QMGR_START
57 #include <DebuggerNames.hpp>
58 #define DEBUG(x) ndbout << "QMGR " << __LINE__ << ": " << x << endl
59 #define DEBUG_START(gsn, node, msg) DEBUG(getSignalName(gsn) << " to: " << node << " - " << msg)
60 #define DEBUG_START2(gsn, rg, msg) { char nodes[255]; DEBUG(getSignalName(gsn) << " to: " << rg.m_nodes.getText(nodes) << " - " << msg); }
61 #define DEBUG_START3(signal, msg) DEBUG(getSignalName(signal->header.theVerId_signalNumber) << " from " << refToNode(signal->getSendersBlockRef()) << " - " << msg);
62 #else
63 #define DEBUG(x)
64 #define DEBUG_START(gsn, node, msg)
65 #define DEBUG_START2(gsn, rg, msg)
66 #define DEBUG_START3(signal, msg)
67 #endif
68
69 /**
70 * c_start.m_gsn = GSN_CM_REGREQ
71 * Possible for all nodes
72 * c_start.m_nodes contains all nodes in config
73 *
74 * c_start.m_gsn = GSN_CM_NODEINFOREQ;
75 * Set when receiving CM_REGCONF
76 * State possible for starting node only (not in cluster)
77 *
78 * c_start.m_nodes contains all node in alive cluster that
79 * that has not replied to GSN_CM_NODEINFOREQ
80 * passed by president in GSN_CM_REGCONF
81 *
82 * c_start.m_gsn = GSN_CM_ADD
83 * Possible for president only
84 * Set when receiving and accepting CM_REGREQ (to include node)
85 *
86 * c_start.m_nodes contains all nodes in alive cluster + starting node
87 * that has not replied to GSN_CM_ADD
88 * by sending GSN_CM_ACKADD
89 *
90 * c_start.m_gsn = GSN_CM_NODEINFOCONF
91 * Possible for non presidents only
92 * c_start.m_nodes contains a node that has been accepted by president
93 * but has not connected to us yet
94 */
95
96 // Signal entries and statement blocks
97 /* 4 P R O G R A M */
98 /*******************************/
99 /* CMHEART_BEAT */
100 /*******************************/
execCM_HEARTBEAT(Signal * signal)101 void Qmgr::execCM_HEARTBEAT(Signal* signal)
102 {
103 NodeRecPtr hbNodePtr;
104 jamEntry();
105 hbNodePtr.i = signal->theData[0];
106 ptrCheckGuard(hbNodePtr, MAX_NDB_NODES, nodeRec);
107 setNodeInfo(hbNodePtr.i).m_heartbeat_cnt= 0;
108 return;
109 }//Qmgr::execCM_HEARTBEAT()
110
111 /*******************************/
112 /* CM_NODEINFOREF */
113 /*******************************/
execCM_NODEINFOREF(Signal * signal)114 void Qmgr::execCM_NODEINFOREF(Signal* signal)
115 {
116 jamEntry();
117 systemErrorLab(signal, __LINE__);
118 return;
119 }//Qmgr::execCM_NODEINFOREF()
120
121 /*******************************/
122 /* CONTINUEB */
123 /*******************************/
execCONTINUEB(Signal * signal)124 void Qmgr::execCONTINUEB(Signal* signal)
125 {
126 jamEntry();
127 const Uint32 tcontinuebType = signal->theData[0];
128 const Uint32 tdata0 = signal->theData[1];
129 const Uint32 tdata1 = signal->theData[2];
130 switch (tcontinuebType) {
131 case ZREGREQ_TIMELIMIT:
132 jam();
133 if (c_start.m_startKey != tdata0 || c_start.m_startNode != tdata1) {
134 jam();
135 return;
136 }//if
137 regreqTimeLimitLab(signal);
138 break;
139 case ZREGREQ_MASTER_TIMELIMIT:
140 jam();
141 if (c_start.m_startKey != tdata0 || c_start.m_startNode != tdata1) {
142 jam();
143 return;
144 }//if
145 //regreqMasterTimeLimitLab(signal);
146 failReportLab(signal, c_start.m_startNode, FailRep::ZSTART_IN_REGREQ, getOwnNodeId());
147 return;
148 break;
149 case ZTIMER_HANDLING:
150 jam();
151 timerHandlingLab(signal);
152 return;
153 break;
154 case ZARBIT_HANDLING:
155 jam();
156 runArbitThread(signal);
157 return;
158 break;
159 case ZSTART_FAILURE_LIMIT:{
160 if (cpresident != ZNIL)
161 {
162 jam();
163 return;
164 }
165 Uint64 now = NdbTick_CurrentMillisecond();
166
167 if (now > (c_start_election_time + c_restartFailureTimeout))
168 {
169 jam();
170 BaseString tmp;
171 tmp.append("Shutting down node as total restart time exceeds "
172 " StartFailureTimeout as set in config file ");
173 if(c_restartFailureTimeout == (Uint32) ~0)
174 tmp.append(" 0 (inifinite)");
175 else
176 tmp.appfmt(" %d", c_restartFailureTimeout);
177
178 progError(__LINE__, NDBD_EXIT_SYSTEM_ERROR, tmp.c_str());
179 }
180 signal->theData[0] = ZSTART_FAILURE_LIMIT;
181 sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 3000, 1);
182 return;
183 }
184 default:
185 jam();
186 // ZCOULD_NOT_OCCUR_ERROR;
187 systemErrorLab(signal, __LINE__);
188 return;
189 break;
190 }//switch
191 return;
192 }//Qmgr::execCONTINUEB()
193
194
execDEBUG_SIG(Signal * signal)195 void Qmgr::execDEBUG_SIG(Signal* signal)
196 {
197 NodeRecPtr debugNodePtr;
198 jamEntry();
199 debugNodePtr.i = signal->theData[0];
200 ptrCheckGuard(debugNodePtr, MAX_NODES, nodeRec);
201 return;
202 }//Qmgr::execDEBUG_SIG()
203
204 /*******************************/
205 /* FAIL_REP */
206 /*******************************/
execFAIL_REP(Signal * signal)207 void Qmgr::execFAIL_REP(Signal* signal)
208 {
209 const FailRep * const failRep = (FailRep *)&signal->theData[0];
210 const NodeId failNodeId = failRep->failNodeId;
211 const FailRep::FailCause failCause = (FailRep::FailCause)failRep->failCause;
212 Uint32 failSource = failRep->getFailSourceNodeId(signal->length());
213 if (!failSource)
214 {
215 /* Failure source not included, use sender of signal as 'source' */
216 failSource = refToNode(signal->getSendersBlockRef());
217 }
218
219 jamEntry();
220 failReportLab(signal, failNodeId, failCause, failSource);
221 return;
222 }//Qmgr::execFAIL_REP()
223
224 /*******************************/
225 /* PRES_TOREQ */
226 /*******************************/
execPRES_TOREQ(Signal * signal)227 void Qmgr::execPRES_TOREQ(Signal* signal)
228 {
229 jamEntry();
230 BlockReference Tblockref = signal->theData[0];
231 signal->theData[0] = getOwnNodeId();
232 signal->theData[1] = ccommitFailureNr;
233 sendSignal(Tblockref, GSN_PRES_TOCONF, signal, 2, JBA);
234 return;
235 }//Qmgr::execPRES_TOREQ()
236
237 void
execREAD_CONFIG_REQ(Signal * signal)238 Qmgr::execREAD_CONFIG_REQ(Signal* signal)
239 {
240 jamEntry();
241
242 const ReadConfigReq * req = (ReadConfigReq*)signal->getDataPtr();
243
244 Uint32 ref = req->senderRef;
245 Uint32 senderData = req->senderData;
246
247 const ndb_mgm_configuration_iterator * p =
248 m_ctx.m_config.getOwnConfigIterator();
249 ndbrequire(p != 0);
250
251 ReadConfigConf * conf = (ReadConfigConf*)signal->getDataPtrSend();
252 conf->senderRef = reference();
253 conf->senderData = senderData;
254 sendSignal(ref, GSN_READ_CONFIG_CONF, signal,
255 ReadConfigConf::SignalLength, JBB);
256 }
257
258 void
execSTART_ORD(Signal * signal)259 Qmgr::execSTART_ORD(Signal* signal)
260 {
261 /**
262 * Start timer handling
263 */
264 Uint64 now = NdbTick_CurrentMillisecond();
265 signal->theData[0] = ZTIMER_HANDLING;
266 signal->theData[1] = Uint32(now >> 32);
267 signal->theData[2] = Uint32(now);
268 sendSignal(QMGR_REF, GSN_CONTINUEB, signal, 3, JBB);
269
270 NodeRecPtr nodePtr;
271 for (nodePtr.i = 1; nodePtr.i < MAX_NODES; nodePtr.i++)
272 {
273 ptrAss(nodePtr, nodeRec);
274 nodePtr.p->ndynamicId = 0;
275 nodePtr.p->hbOrder = 0;
276 Uint32 cnt = 0;
277 Uint32 type = getNodeInfo(nodePtr.i).m_type;
278 switch(type){
279 case NodeInfo::DB:
280 jam();
281 nodePtr.p->phase = ZINIT;
282 c_definedNodes.set(nodePtr.i);
283 break;
284 case NodeInfo::API:
285 jam();
286 nodePtr.p->phase = ZAPI_INACTIVE;
287 break;
288 case NodeInfo::MGM:
289 jam();
290 /**
291 * cmvmi allows ndb_mgmd to connect directly
292 */
293 nodePtr.p->phase = ZAPI_INACTIVE;
294 break;
295 default:
296 jam();
297 nodePtr.p->phase = ZAPI_INACTIVE;
298 }
299
300 setNodeInfo(nodePtr.i).m_heartbeat_cnt = cnt;
301 nodePtr.p->sendPrepFailReqStatus = Q_NOT_ACTIVE;
302 nodePtr.p->sendCommitFailReqStatus = Q_NOT_ACTIVE;
303 nodePtr.p->sendPresToStatus = Q_NOT_ACTIVE;
304 nodePtr.p->failState = NORMAL;
305 }//for
306 }
307
308 /*
309 4.2 ADD NODE MODULE*/
310 /*##########################################################################*/
311 /*
312 4.2.1 STTOR */
313 /**--------------------------------------------------------------------------
314 * Start phase signal, must be handled by all blocks.
315 * QMGR is only interested in the first phase.
316 * During phase one we clear all registered applications.
317 *---------------------------------------------------------------------------*/
318 /*******************************/
319 /* STTOR */
320 /*******************************/
execSTTOR(Signal * signal)321 void Qmgr::execSTTOR(Signal* signal)
322 {
323 jamEntry();
324
325 switch(signal->theData[1]){
326 case 1:
327 initData(signal);
328 startphase1(signal);
329 recompute_version_info(NodeInfo::DB);
330 recompute_version_info(NodeInfo::API);
331 recompute_version_info(NodeInfo::MGM);
332 return;
333 case 7:
334 cactivateApiCheck = 1;
335 if (cpresident == getOwnNodeId())
336 {
337 switch(arbitRec.method){
338 case ArbitRec::DISABLED:
339 break;
340
341 case ArbitRec::METHOD_EXTERNAL:
342 case ArbitRec::METHOD_DEFAULT:
343 /**
344 * Start arbitration thread. This could be done as soon as
345 * we have all nodes (or a winning majority).
346 */
347 jam();
348 handleArbitStart(signal);
349 break;
350 }
351 }
352 break;
353 case 8:{
354 /**
355 * Enable communication to all API nodes by setting state
356 * to ZFAIL_CLOSING (which will make it auto-open in checkStartInterface)
357 */
358 c_allow_api_connect = 1;
359 NodeRecPtr nodePtr;
360 for (nodePtr.i = 1; nodePtr.i < MAX_NODES; nodePtr.i++)
361 {
362 jam();
363 Uint32 type = getNodeInfo(nodePtr.i).m_type;
364 if (type != NodeInfo::API)
365 continue;
366
367 ptrAss(nodePtr, nodeRec);
368 if (nodePtr.p->phase == ZAPI_INACTIVE)
369 {
370 jam();
371 setNodeInfo(nodePtr.i).m_heartbeat_cnt = 3;
372 nodePtr.p->phase = ZFAIL_CLOSING;
373 nodePtr.p->failState = NORMAL;
374 }
375 }
376 }
377 }
378
379 sendSttorryLab(signal);
380 return;
381 }//Qmgr::execSTTOR()
382
sendSttorryLab(Signal * signal)383 void Qmgr::sendSttorryLab(Signal* signal)
384 {
385 /****************************<*/
386 /*< STTORRY <*/
387 /****************************<*/
388 signal->theData[3] = 7;
389 signal->theData[4] = 8;
390 signal->theData[5] = 255;
391 sendSignal(NDBCNTR_REF, GSN_STTORRY, signal, 6, JBB);
392 return;
393 }//Qmgr::sendSttorryLab()
394
startphase1(Signal * signal)395 void Qmgr::startphase1(Signal* signal)
396 {
397 jamEntry();
398
399 NodeRecPtr nodePtr;
400 nodePtr.i = getOwnNodeId();
401 ptrAss(nodePtr, nodeRec);
402 nodePtr.p->phase = ZSTARTING;
403
404 DihRestartReq * req = CAST_PTR(DihRestartReq, signal->getDataPtrSend());
405 req->senderRef = reference();
406 sendSignal(DBDIH_REF, GSN_DIH_RESTARTREQ, signal,
407 DihRestartReq::SignalLength, JBB);
408 return;
409 }
410
411 void
execDIH_RESTARTREF(Signal * signal)412 Qmgr::execDIH_RESTARTREF(Signal*signal)
413 {
414 jamEntry();
415
416 const DihRestartRef * ref = CAST_CONSTPTR(DihRestartRef,
417 signal->getDataPtr());
418 c_start.m_latest_gci = 0;
419 c_start.m_no_nodegroup_nodes.assign(NdbNodeBitmask::Size,
420 ref->no_nodegroup_mask);
421 execCM_INFOCONF(signal);
422 }
423
424 void
execDIH_RESTARTCONF(Signal * signal)425 Qmgr::execDIH_RESTARTCONF(Signal*signal)
426 {
427 jamEntry();
428
429 const DihRestartConf * conf = CAST_CONSTPTR(DihRestartConf,
430 signal->getDataPtr());
431
432 c_start.m_latest_gci = conf->latest_gci;
433 c_start.m_no_nodegroup_nodes.assign(NdbNodeBitmask::Size,
434 conf->no_nodegroup_mask);
435 execCM_INFOCONF(signal);
436 }
437
setHbDelay(UintR aHbDelay)438 void Qmgr::setHbDelay(UintR aHbDelay)
439 {
440 NDB_TICKS now = NdbTick_CurrentMillisecond();
441 hb_send_timer.setDelay(aHbDelay < 10 ? 10 : aHbDelay);
442 hb_send_timer.reset(now);
443 hb_check_timer.setDelay(aHbDelay < 10 ? 10 : aHbDelay);
444 hb_check_timer.reset(now);
445 }
446
setHbApiDelay(UintR aHbApiDelay)447 void Qmgr::setHbApiDelay(UintR aHbApiDelay)
448 {
449 NDB_TICKS now = NdbTick_CurrentMillisecond();
450 chbApiDelay = (aHbApiDelay < 100 ? 100 : aHbApiDelay);
451 hb_api_timer.setDelay(chbApiDelay);
452 hb_api_timer.reset(now);
453 }
454
setArbitTimeout(UintR aArbitTimeout)455 void Qmgr::setArbitTimeout(UintR aArbitTimeout)
456 {
457 arbitRec.timeout = (aArbitTimeout < 10 ? 10 : aArbitTimeout);
458 }
459
setCCDelay(UintR aCCDelay)460 void Qmgr::setCCDelay(UintR aCCDelay)
461 {
462 NDB_TICKS now = NdbTick_CurrentMillisecond();
463 if (aCCDelay == 0)
464 {
465 /* Connectivity check disabled */
466 m_connectivity_check.m_enabled = false;
467 m_connectivity_check.m_timer.setDelay(0);
468 }
469 else
470 {
471 m_connectivity_check.m_enabled = true;
472 m_connectivity_check.m_timer.setDelay(aCCDelay < 10 ? 10 : aCCDelay);
473 m_connectivity_check.m_timer.reset(now);
474 }
475 }
476
execCONNECT_REP(Signal * signal)477 void Qmgr::execCONNECT_REP(Signal* signal)
478 {
479 jamEntry();
480 const Uint32 nodeId = signal->theData[0];
481
482 if (ERROR_INSERTED(931))
483 {
484 jam();
485 ndbout_c("Discarding CONNECT_REP(%d)", nodeId);
486 infoEvent("Discarding CONNECT_REP(%d)", nodeId);
487 return;
488 }
489
490 c_connectedNodes.set(nodeId);
491
492 NodeRecPtr nodePtr;
493 nodePtr.i = nodeId;
494 ptrCheckGuard(nodePtr, MAX_NODES, nodeRec);
495 nodePtr.p->m_secret = 0;
496
497 nodePtr.i = getOwnNodeId();
498 ptrCheckGuard(nodePtr, MAX_NODES, nodeRec);
499 NodeInfo nodeInfo = getNodeInfo(nodeId);
500 switch(nodePtr.p->phase){
501 case ZRUNNING:
502 if (nodeInfo.getType() == NodeInfo::DB)
503 {
504 ndbrequire(!c_clusterNodes.get(nodeId));
505 }
506 case ZSTARTING:
507 jam();
508 break;
509 case ZPREPARE_FAIL:
510 case ZFAIL_CLOSING:
511 jam();
512 return;
513 case ZAPI_ACTIVE:
514 case ZAPI_INACTIVE:
515 return;
516 case ZINIT:
517 ndbrequire(getNodeInfo(nodeId).m_type == NodeInfo::MGM);
518 break;
519 default:
520 ndbrequire(false);
521 }
522
523 if (nodeInfo.getType() != NodeInfo::DB)
524 {
525 jam();
526 return;
527 }
528
529 switch(c_start.m_gsn){
530 case GSN_CM_REGREQ:
531 jam();
532 sendCmRegReq(signal, nodeId);
533
534 /**
535 * We're waiting for CM_REGCONF c_start.m_nodes contains all configured
536 * nodes
537 */
538 ndbrequire(nodePtr.p->phase == ZSTARTING);
539 ndbrequire(c_start.m_nodes.isWaitingFor(nodeId));
540 return;
541 case GSN_CM_NODEINFOREQ:
542 jam();
543
544 if (c_start.m_nodes.isWaitingFor(nodeId))
545 {
546 jam();
547 ndbrequire(getOwnNodeId() != cpresident);
548 ndbrequire(nodePtr.p->phase == ZSTARTING);
549 sendCmNodeInfoReq(signal, nodeId, nodePtr.p);
550 return;
551 }
552 return;
553 case GSN_CM_NODEINFOCONF:{
554 jam();
555
556 ndbrequire(getOwnNodeId() != cpresident);
557 ndbrequire(nodePtr.p->phase == ZRUNNING);
558 if (c_start.m_nodes.isWaitingFor(nodeId))
559 {
560 jam();
561 c_start.m_nodes.clearWaitingFor(nodeId);
562 c_start.m_gsn = RNIL;
563
564 NodeRecPtr addNodePtr;
565 addNodePtr.i = nodeId;
566 ptrCheckGuard(addNodePtr, MAX_NDB_NODES, nodeRec);
567 cmAddPrepare(signal, addNodePtr, nodePtr.p);
568 return;
569 }
570 }
571 default:
572 (void)1;
573 }
574
575 ndbrequire(!c_start.m_nodes.isWaitingFor(nodeId));
576 ndbrequire(!c_readnodes_nodes.get(nodeId));
577 c_readnodes_nodes.set(nodeId);
578 signal->theData[0] = reference();
579 sendSignal(calcQmgrBlockRef(nodeId), GSN_READ_NODESREQ, signal, 1, JBA);
580 return;
581 }//Qmgr::execCONNECT_REP()
582
583 void
execREAD_NODESCONF(Signal * signal)584 Qmgr::execREAD_NODESCONF(Signal* signal)
585 {
586 jamEntry();
587 check_readnodes_reply(signal,
588 refToNode(signal->getSendersBlockRef()),
589 GSN_READ_NODESCONF);
590 }
591
592 void
execREAD_NODESREF(Signal * signal)593 Qmgr::execREAD_NODESREF(Signal* signal)
594 {
595 jamEntry();
596 check_readnodes_reply(signal,
597 refToNode(signal->getSendersBlockRef()),
598 GSN_READ_NODESREF);
599 }
600
601 /*******************************/
602 /* CM_INFOCONF */
603 /*******************************/
execCM_INFOCONF(Signal * signal)604 void Qmgr::execCM_INFOCONF(Signal* signal)
605 {
606 /**
607 * Open communcation to all DB nodes
608 */
609 signal->theData[0] = 0; // no answer
610 signal->theData[1] = 0; // no id
611 signal->theData[2] = NodeInfo::DB;
612 sendSignal(CMVMI_REF, GSN_OPEN_COMREQ, signal, 3, JBB);
613
614 cpresident = ZNIL;
615 cpresidentAlive = ZFALSE;
616 c_start_election_time = NdbTick_CurrentMillisecond();
617
618 signal->theData[0] = ZSTART_FAILURE_LIMIT;
619 sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 3000, 1);
620
621 cmInfoconf010Lab(signal);
622
623 return;
624 }//Qmgr::execCM_INFOCONF()
625
626 Uint32 g_start_type = 0;
627 NdbNodeBitmask g_nowait_nodes; // Set by clo
628
cmInfoconf010Lab(Signal * signal)629 void Qmgr::cmInfoconf010Lab(Signal* signal)
630 {
631 c_start.m_startKey = 0;
632 c_start.m_startNode = getOwnNodeId();
633 c_start.m_nodes.clearWaitingFor();
634 c_start.m_gsn = GSN_CM_REGREQ;
635 c_start.m_starting_nodes.clear();
636 c_start.m_starting_nodes_w_log.clear();
637 c_start.m_regReqReqSent = 0;
638 c_start.m_regReqReqRecv = 0;
639 c_start.m_skip_nodes = g_nowait_nodes;
640 c_start.m_skip_nodes.bitAND(c_definedNodes);
641 c_start.m_start_type = g_start_type;
642
643 NodeRecPtr nodePtr;
644 cnoOfNodes = 0;
645 for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
646 jam();
647 ptrAss(nodePtr, nodeRec);
648
649 if(getNodeInfo(nodePtr.i).getType() != NodeInfo::DB)
650 continue;
651
652 c_start.m_nodes.setWaitingFor(nodePtr.i);
653 cnoOfNodes++;
654
655 if(!c_connectedNodes.get(nodePtr.i))
656 continue;
657
658 sendCmRegReq(signal, nodePtr.i);
659 }
660
661 //----------------------------------------
662 /* Wait for a while. When it returns */
663 /* we will check if we got any CM_REGREF*/
664 /* or CM_REGREQ (lower nodeid than our */
665 /* own). */
666 //----------------------------------------
667 signal->theData[0] = ZREGREQ_TIMELIMIT;
668 signal->theData[1] = c_start.m_startKey;
669 signal->theData[2] = c_start.m_startNode;
670 sendSignalWithDelay(QMGR_REF, GSN_CONTINUEB, signal, 3000, 3);
671
672 creadyDistCom = ZTRUE;
673 return;
674 }//Qmgr::cmInfoconf010Lab()
675
676 void
sendCmRegReq(Signal * signal,Uint32 nodeId)677 Qmgr::sendCmRegReq(Signal * signal, Uint32 nodeId){
678 CmRegReq * req = (CmRegReq *)&signal->theData[0];
679 req->blockRef = reference();
680 req->nodeId = getOwnNodeId();
681 req->version = NDB_VERSION;
682 req->mysql_version = NDB_MYSQL_VERSION_D;
683 req->latest_gci = c_start.m_latest_gci;
684 req->start_type = c_start.m_start_type;
685 c_start.m_skip_nodes.copyto(NdbNodeBitmask::Size, req->skip_nodes);
686 const Uint32 ref = calcQmgrBlockRef(nodeId);
687 sendSignal(ref, GSN_CM_REGREQ, signal, CmRegReq::SignalLength, JBB);
688 DEBUG_START(GSN_CM_REGREQ, nodeId, "");
689
690 c_start.m_regReqReqSent++;
691 }
692
693 /*
694 4.4.11 CM_REGREQ */
695 /**--------------------------------------------------------------------------
696 * If this signal is received someone tries to get registrated.
697 * Only the president have the authority make decissions about new nodes,
698 * so only a president or a node that claims to be the president may send a
699 * reply to this signal.
700 * This signal can occur any time after that STTOR was received.
701 * CPRESIDENT: Timelimit has expired and someone has
702 * decided to enter the president role
703 * CPRESIDENT_CANDIDATE:
704 * Assigned when we receive a CM_REGREF, if we got more than one REF
705 * then we always keep the lowest nodenumber.
706 * We accept this nodeno as president when our timelimit expires
707 * We should consider the following cases:
708 * 1- We are the president. If we are busy by adding new nodes to cluster,
709 * then we have to refuse this node to be added.
710 * The refused node will try in ZREFUSE_ADD_TIME seconds again.
711 * If we are not busy then we confirm
712 *
713 * 2- We know the president, we dont bother us about this REQ.
714 * The president has also got this REQ and will take care of it.
715 *
716 * 3- The president are not known. We have received CM_INIT, so we compare the
717 * senders node number to GETOWNNODEID().
718 * If we have a lower number than the sender then we will claim
719 * that we are the president so we send him a refuse signal back.
720 * We have to wait for the CONTINUEB signal before we can enter the
721 * president role. If our GETOWNNODEID() if larger than sender node number,
722 * we are not the president and just have to wait for the
723 * reply signal (REF) to our CM_REGREQ_2.
724 * 4- We havent received the CM_INIT signal so we don't know who we are.
725 * Ignore the request.
726 *--------------------------------------------------------------------------*/
727 /*******************************/
728 /* CM_REGREQ */
729 /*******************************/
730 static
731 int
check_start_type(Uint32 starting,Uint32 own)732 check_start_type(Uint32 starting, Uint32 own)
733 {
734 if (starting == (1 << NodeState::ST_INITIAL_START) &&
735 ((own & (1 << NodeState::ST_INITIAL_START)) == 0))
736 {
737 return 1;
738 }
739 return 0;
740 }
741
execCM_REGREQ(Signal * signal)742 void Qmgr::execCM_REGREQ(Signal* signal)
743 {
744 DEBUG_START3(signal, "");
745
746 NodeRecPtr addNodePtr;
747 jamEntry();
748
749 CmRegReq * const cmRegReq = (CmRegReq *)&signal->theData[0];
750 const BlockReference Tblockref = cmRegReq->blockRef;
751 const Uint32 startingVersion = cmRegReq->version;
752 Uint32 startingMysqlVersion = cmRegReq->mysql_version;
753 addNodePtr.i = cmRegReq->nodeId;
754 Uint32 gci = 1;
755 Uint32 start_type = ~0;
756 NdbNodeBitmask skip_nodes;
757
758 if (!c_connectedNodes.get(cmRegReq->nodeId))
759 {
760 jam();
761
762 /**
763 * With ndbmtd, there is a race condition such that
764 * CM_REGREQ can arrive prior to CONNECT_REP
765 * since CONNECT_REP is sent from CMVMI
766 *
767 * In such cases, ignore the CM_REGREQ which is safe
768 * as it will anyway be resent by starting node
769 */
770 g_eventLogger->info("discarding CM_REGREQ from %u "
771 "as we're not yet connected (isNdbMt: %u)",
772 cmRegReq->nodeId,
773 (unsigned)isNdbMt());
774
775 ndbrequire(isNdbMt());
776 return;
777 }
778
779 if (signal->getLength() == CmRegReq::SignalLength)
780 {
781 jam();
782 gci = cmRegReq->latest_gci;
783 start_type = cmRegReq->start_type;
784 skip_nodes.assign(NdbNodeBitmask::Size, cmRegReq->skip_nodes);
785 }
786
787 if (startingVersion < NDBD_SPLIT_VERSION)
788 {
789 startingMysqlVersion = 0;
790 }
791
792 if (creadyDistCom == ZFALSE) {
793 jam();
794 /* NOT READY FOR DISTRIBUTED COMMUNICATION.*/
795 return;
796 }//if
797
798 if (!ndbCompatible_ndb_ndb(NDB_VERSION, startingVersion)) {
799 jam();
800 sendCmRegrefLab(signal, Tblockref, CmRegRef::ZINCOMPATIBLE_VERSION);
801 return;
802 }
803
804 if (!ndb_check_micro_gcp(startingVersion))
805 {
806 jam();
807 infoEvent("Connection from node %u refused as it's not micro GCP enabled",
808 addNodePtr.i);
809 sendCmRegrefLab(signal, Tblockref, CmRegRef::ZINCOMPATIBLE_VERSION);
810 return;
811 }
812
813 if (!ndb_pnr(startingVersion))
814 {
815 jam();
816 infoEvent("Connection from node %u refused as it's not does not support "
817 "parallel node recovery",
818 addNodePtr.i);
819 sendCmRegrefLab(signal, Tblockref, CmRegRef::ZINCOMPATIBLE_VERSION);
820 return;
821 }
822
823 if (!ndb_check_hb_order_version(startingVersion) &&
824 m_hb_order_config_used)
825 {
826 jam();
827 infoEvent("Connection from node %u refused as it does not support "
828 "user-defined HeartbeatOrder",
829 addNodePtr.i);
830 sendCmRegrefLab(signal, Tblockref, CmRegRef::ZINCOMPATIBLE_VERSION);
831 return;
832 }
833
834 if (m_connectivity_check.m_enabled &&
835 !ndbd_connectivity_check(startingVersion))
836 {
837 jam();
838 infoEvent("Connection from node %u refused as it does not support "
839 "ConnectCheckIntervalDelay",
840 addNodePtr.i);
841 sendCmRegrefLab(signal, Tblockref, CmRegRef::ZINCOMPATIBLE_VERSION);
842 return;
843 }
844
845 if (check_start_type(start_type, c_start.m_start_type))
846 {
847 jam();
848 sendCmRegrefLab(signal, Tblockref, CmRegRef::ZINCOMPATIBLE_START_TYPE);
849 return;
850 }
851
852 if (cpresident != getOwnNodeId())
853 {
854 jam();
855
856 if (cpresident == ZNIL)
857 {
858 /***
859 * We don't know the president.
860 * If the node to be added has lower node id
861 * than our president cancidate. Set it as
862 * candidate
863 */
864 jam();
865 if (gci > c_start.m_president_candidate_gci ||
866 (gci == c_start.m_president_candidate_gci &&
867 addNodePtr.i < c_start.m_president_candidate))
868 {
869 jam();
870 c_start.m_president_candidate = addNodePtr.i;
871 c_start.m_president_candidate_gci = gci;
872 }
873 sendCmRegrefLab(signal, Tblockref, CmRegRef::ZELECTION);
874 return;
875 }
876
877 /**
878 * We are not the president.
879 * We know the president.
880 * President will answer.
881 */
882 sendCmRegrefLab(signal, Tblockref, CmRegRef::ZNOT_PRESIDENT);
883 return;
884 }//if
885
886 if (c_start.m_startNode != 0)
887 {
888 jam();
889 /**
890 * President busy by adding another node
891 */
892 sendCmRegrefLab(signal, Tblockref, CmRegRef::ZBUSY_PRESIDENT);
893 return;
894 }//if
895
896 if (ctoStatus == Q_ACTIVE)
897 {
898 jam();
899 /**
900 * Active taking over as president
901 */
902 sendCmRegrefLab(signal, Tblockref, CmRegRef::ZBUSY_TO_PRES);
903 return;
904 }//if
905
906 if (getNodeInfo(addNodePtr.i).m_type != NodeInfo::DB)
907 {
908 jam();
909 /**
910 * The new node is not in config file
911 */
912 sendCmRegrefLab(signal, Tblockref, CmRegRef::ZNOT_IN_CFG);
913 return;
914 }
915
916 if (getNodeState().getSingleUserMode())
917 {
918 /**
919 * The cluster is in single user mode.
920 * Data node is not allowed to get added in the cluster
921 * while in single user mode.
922 */
923 // handle rolling upgrade
924 {
925 unsigned int get_major = getMajor(startingVersion);
926 unsigned int get_minor = getMinor(startingVersion);
927 unsigned int get_build = getBuild(startingVersion);
928
929 if (startingVersion < NDBD_QMGR_SINGLEUSER_VERSION_5) {
930 jam();
931
932 infoEvent("QMGR: detect upgrade: new node %u old version %u.%u.%u",
933 (unsigned int)addNodePtr.i, get_major, get_minor, get_build);
934 /**
935 * The new node is old version, send ZINCOMPATIBLE_VERSION instead
936 * of ZSINGLE_USER_MODE.
937 */
938 sendCmRegrefLab(signal, Tblockref, CmRegRef::ZINCOMPATIBLE_VERSION);
939 } else {
940 jam();
941
942 sendCmRegrefLab(signal, Tblockref, CmRegRef::ZSINGLE_USER_MODE);
943 }//if
944 }
945
946 return;
947 }//if
948
949 ptrCheckGuard(addNodePtr, MAX_NDB_NODES, nodeRec);
950 Phase phase = addNodePtr.p->phase;
951 if (phase != ZINIT)
952 {
953 jam();
954 DEBUG("phase = " << phase);
955 sendCmRegrefLab(signal, Tblockref, CmRegRef::ZNOT_DEAD);
956 return;
957 }
958
959 jam();
960 /**
961 * WE ARE PRESIDENT AND WE ARE NOT BUSY ADDING ANOTHER NODE.
962 * WE WILL TAKE CARE OF THE INCLUSION OF THIS NODE INTO THE CLUSTER.
963 * WE NEED TO START TIME SUPERVISION OF THIS. SINCE WE CANNOT STOP
964 * TIMED SIGNAL IF THE INCLUSION IS INTERRUPTED WE IDENTIFY
965 * EACH INCLUSION WITH A UNIQUE IDENTITY. THIS IS CHECKED WHEN
966 * THE SIGNAL ARRIVES. IF IT HAS CHANGED THEN WE SIMPLY IGNORE
967 * THE TIMED SIGNAL.
968 */
969
970 /**
971 * Update start record
972 */
973 c_start.m_startKey++;
974 c_start.m_startNode = addNodePtr.i;
975
976 /**
977 * Assign dynamic id
978 */
979 UintR TdynId = (++c_maxDynamicId) & 0xFFFF;
980 TdynId |= (addNodePtr.p->hbOrder << 16);
981 setNodeInfo(addNodePtr.i).m_version = startingVersion;
982 setNodeInfo(addNodePtr.i).m_mysql_version = startingMysqlVersion;
983 recompute_version_info(NodeInfo::DB, startingVersion);
984 addNodePtr.p->ndynamicId = TdynId;
985
986 /**
987 * Reply with CM_REGCONF
988 */
989 CmRegConf * const cmRegConf = (CmRegConf *)&signal->theData[0];
990 cmRegConf->presidentBlockRef = reference();
991 cmRegConf->presidentNodeId = getOwnNodeId();
992 cmRegConf->presidentVersion = getNodeInfo(getOwnNodeId()).m_version;
993 cmRegConf->presidentMysqlVersion = getNodeInfo(getOwnNodeId()).m_mysql_version;
994 cmRegConf->dynamicId = TdynId;
995 c_clusterNodes.copyto(NdbNodeBitmask::Size, cmRegConf->allNdbNodes);
996 sendSignal(Tblockref, GSN_CM_REGCONF, signal,
997 CmRegConf::SignalLength, JBA);
998 DEBUG_START(GSN_CM_REGCONF, refToNode(Tblockref), "");
999
1000 /**
1001 * Send CmAdd to all nodes (including starting)
1002 */
1003 c_start.m_nodes = c_clusterNodes;
1004 c_start.m_nodes.setWaitingFor(addNodePtr.i);
1005 c_start.m_gsn = GSN_CM_ADD;
1006
1007 NodeReceiverGroup rg(QMGR, c_start.m_nodes);
1008 CmAdd * const cmAdd = (CmAdd*)signal->getDataPtrSend();
1009 cmAdd->requestType = CmAdd::Prepare;
1010 cmAdd->startingNodeId = addNodePtr.i;
1011 cmAdd->startingVersion = startingVersion;
1012 cmAdd->startingMysqlVersion = startingMysqlVersion;
1013 sendSignal(rg, GSN_CM_ADD, signal, CmAdd::SignalLength, JBA);
1014 DEBUG_START2(GSN_CM_ADD, rg, "Prepare");
1015
1016 /**
1017 * Set timer
1018 */
1019 return;
1020 signal->theData[0] = ZREGREQ_MASTER_TIMELIMIT;
1021 signal->theData[1] = c_start.m_startKey;
1022 sendSignalWithDelay(QMGR_REF, GSN_CONTINUEB, signal, 30000, 2);
1023
1024 return;
1025 }//Qmgr::execCM_REGREQ()
1026
sendCmRegrefLab(Signal * signal,BlockReference TBRef,CmRegRef::ErrorCode Terror)1027 void Qmgr::sendCmRegrefLab(Signal* signal, BlockReference TBRef,
1028 CmRegRef::ErrorCode Terror)
1029 {
1030 CmRegRef* ref = (CmRegRef*)signal->getDataPtrSend();
1031 ref->blockRef = reference();
1032 ref->nodeId = getOwnNodeId();
1033 ref->errorCode = Terror;
1034 ref->presidentCandidate =
1035 (cpresident == ZNIL ? c_start.m_president_candidate : cpresident);
1036 ref->candidate_latest_gci = c_start.m_president_candidate_gci;
1037 ref->latest_gci = c_start.m_latest_gci;
1038 ref->start_type = c_start.m_start_type;
1039 c_start.m_skip_nodes.copyto(NdbNodeBitmask::Size, ref->skip_nodes);
1040 sendSignal(TBRef, GSN_CM_REGREF, signal,
1041 CmRegRef::SignalLength, JBB);
1042 DEBUG_START(GSN_CM_REGREF, refToNode(TBRef), "");
1043 return;
1044 }//Qmgr::sendCmRegrefLab()
1045
1046 /*
1047 4.4.11 CM_REGCONF */
1048 /**--------------------------------------------------------------------------
1049 * President gives permission to a node which wants to join the cluster.
1050 * The president will prepare the cluster that a new node will be added to
1051 * cluster. When the new node has set up all connections to the cluster,
1052 * the president will send commit to all clusternodes so the phase of the
1053 * new node can be changed to ZRUNNING.
1054 *--------------------------------------------------------------------------*/
1055 /*******************************/
1056 /* CM_REGCONF */
1057 /*******************************/
execCM_REGCONF(Signal * signal)1058 void Qmgr::execCM_REGCONF(Signal* signal)
1059 {
1060 DEBUG_START3(signal, "");
1061
1062 NodeRecPtr myNodePtr;
1063 NodeRecPtr nodePtr;
1064 jamEntry();
1065
1066 const CmRegConf * const cmRegConf = (CmRegConf *)&signal->theData[0];
1067
1068 if (!ndbCompatible_ndb_ndb(NDB_VERSION, cmRegConf->presidentVersion)) {
1069 jam();
1070 char buf[128];
1071 BaseString::snprintf(buf,sizeof(buf),
1072 "incompatible version own=0x%x other=0x%x, "
1073 " shutting down",
1074 NDB_VERSION, cmRegConf->presidentVersion);
1075 progError(__LINE__, NDBD_EXIT_UNSUPPORTED_VERSION, buf);
1076 return;
1077 }
1078
1079 if (!ndb_check_hb_order_version(cmRegConf->presidentVersion) &&
1080 m_hb_order_config_used) {
1081 jam();
1082 char buf[128];
1083 BaseString::snprintf(buf,sizeof(buf),
1084 "incompatible version own=0x%x other=0x%x, "
1085 "due to user-defined HeartbeatOrder, shutting down",
1086 NDB_VERSION, cmRegConf->presidentVersion);
1087 progError(__LINE__, NDBD_EXIT_UNSUPPORTED_VERSION, buf);
1088 return;
1089 }
1090
1091 if (m_connectivity_check.m_enabled &&
1092 !ndbd_connectivity_check(cmRegConf->presidentVersion))
1093 {
1094 jam();
1095 m_connectivity_check.m_enabled = false;
1096 ndbout_c("Disabling ConnectCheckIntervalDelay as president "
1097 " does not support it");
1098 infoEvent("Disabling ConnectCheckIntervalDelay as president "
1099 " does not support it");
1100 }
1101
1102 myNodePtr.i = getOwnNodeId();
1103 ptrCheckGuard(myNodePtr, MAX_NDB_NODES, nodeRec);
1104
1105 ndbrequire(c_start.m_gsn == GSN_CM_REGREQ);
1106 ndbrequire(myNodePtr.p->phase == ZSTARTING);
1107
1108 cpdistref = cmRegConf->presidentBlockRef;
1109 cpresident = cmRegConf->presidentNodeId;
1110 UintR TdynamicId = cmRegConf->dynamicId;
1111 c_maxDynamicId = TdynamicId & 0xFFFF;
1112 c_clusterNodes.assign(NdbNodeBitmask::Size, cmRegConf->allNdbNodes);
1113
1114 myNodePtr.p->ndynamicId = TdynamicId;
1115
1116 // set own MT config here or in REF, and others in CM_NODEINFOREQ/CONF
1117 setNodeInfo(getOwnNodeId()).m_lqh_workers = globalData.ndbMtLqhWorkers;
1118
1119 /*--------------------------------------------------------------*/
1120 // Send this as an EVENT REPORT to inform about hearing about
1121 // other NDB node proclaiming to be president.
1122 /*--------------------------------------------------------------*/
1123 signal->theData[0] = NDB_LE_CM_REGCONF;
1124 signal->theData[1] = getOwnNodeId();
1125 signal->theData[2] = cpresident;
1126 signal->theData[3] = TdynamicId;
1127 sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 4, JBB);
1128
1129 for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
1130 jam();
1131 if (c_clusterNodes.get(nodePtr.i)){
1132 jam();
1133 ptrAss(nodePtr, nodeRec);
1134
1135 ndbrequire(nodePtr.p->phase == ZINIT);
1136 nodePtr.p->phase = ZRUNNING;
1137
1138 if(c_connectedNodes.get(nodePtr.i)){
1139 jam();
1140 sendCmNodeInfoReq(signal, nodePtr.i, myNodePtr.p);
1141 }
1142 }
1143 }
1144
1145 c_start.m_gsn = GSN_CM_NODEINFOREQ;
1146 c_start.m_nodes = c_clusterNodes;
1147
1148 if (ERROR_INSERTED(937))
1149 {
1150 CLEAR_ERROR_INSERT_VALUE;
1151 signal->theData[0] = 9999;
1152 sendSignalWithDelay(CMVMI_REF, GSN_NDB_TAMPER, signal, 500, 1);
1153 }
1154
1155 return;
1156 }//Qmgr::execCM_REGCONF()
1157
1158 void
check_readnodes_reply(Signal * signal,Uint32 nodeId,Uint32 gsn)1159 Qmgr::check_readnodes_reply(Signal* signal, Uint32 nodeId, Uint32 gsn)
1160 {
1161 NodeRecPtr myNodePtr;
1162 myNodePtr.i = getOwnNodeId();
1163 ptrCheckGuard(myNodePtr, MAX_NDB_NODES, nodeRec);
1164
1165 NodeRecPtr nodePtr;
1166 nodePtr.i = nodeId;
1167 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
1168
1169 ndbrequire(c_readnodes_nodes.get(nodeId));
1170 ReadNodesConf* conf = (ReadNodesConf*)signal->getDataPtr();
1171 if (gsn == GSN_READ_NODESREF)
1172 {
1173 jam();
1174 retry:
1175 signal->theData[0] = reference();
1176 sendSignal(calcQmgrBlockRef(nodeId), GSN_READ_NODESREQ, signal, 1, JBA);
1177 return;
1178 }
1179
1180 if (conf->masterNodeId == ZNIL)
1181 {
1182 jam();
1183 goto retry;
1184 }
1185
1186 Uint32 president = conf->masterNodeId;
1187 if (president == cpresident)
1188 {
1189 jam();
1190 c_readnodes_nodes.clear(nodeId);
1191 return;
1192 }
1193
1194 char buf[255];
1195 BaseString::snprintf(buf, sizeof(buf),
1196 "check StartPartialTimeout, "
1197 "node %d thinks %d is president, "
1198 "I think president is: %d",
1199 nodeId, president, cpresident);
1200
1201 ndbout_c("%s", buf);
1202 CRASH_INSERTION(933);
1203
1204 if (getNodeState().startLevel == NodeState::SL_STARTED)
1205 {
1206 jam();
1207 NdbNodeBitmask part;
1208 part.assign(NdbNodeBitmask::Size, conf->clusterNodes);
1209 FailRep* rep = (FailRep*)signal->getDataPtrSend();
1210 rep->failCause = FailRep::ZPARTITIONED_CLUSTER;
1211 rep->partitioned.president = cpresident;
1212 c_clusterNodes.copyto(NdbNodeBitmask::Size, rep->partitioned.partition);
1213 rep->partitioned.partitionFailSourceNodeId = getOwnNodeId();
1214 Uint32 ref = calcQmgrBlockRef(nodeId);
1215 Uint32 i = 0;
1216 /* Send source of event info if a node supports it */
1217 Uint32 length = FailRep::OrigSignalLength + FailRep::PartitionedExtraLength;
1218 while((i = part.find(i + 1)) != NdbNodeBitmask::NotFound)
1219 {
1220 if (i == nodeId)
1221 continue;
1222 rep->failNodeId = i;
1223 bool sendSourceId = ndbd_fail_rep_source_node((getNodeInfo(i)).m_version);
1224 sendSignal(ref, GSN_FAIL_REP, signal,
1225 length + (sendSourceId ? FailRep::SourceExtraLength : 0),
1226 JBA);
1227 }
1228 rep->failNodeId = nodeId;
1229 bool sendSourceId = ndbd_fail_rep_source_node((getNodeInfo(nodeId)).m_version);
1230
1231 sendSignal(ref, GSN_FAIL_REP, signal,
1232 length + (sendSourceId ? FailRep::SourceExtraLength : 0),
1233 JBB);
1234 return;
1235 }
1236
1237 CRASH_INSERTION(932);
1238 CRASH_INSERTION(938);
1239
1240 progError(__LINE__,
1241 NDBD_EXIT_PARTITIONED_SHUTDOWN,
1242 buf);
1243
1244 ndbrequire(false);
1245 }
1246
1247 void
sendCmNodeInfoReq(Signal * signal,Uint32 nodeId,const NodeRec * self)1248 Qmgr::sendCmNodeInfoReq(Signal* signal, Uint32 nodeId, const NodeRec * self){
1249 CmNodeInfoReq * const req = (CmNodeInfoReq*)signal->getDataPtrSend();
1250 req->nodeId = getOwnNodeId();
1251 req->dynamicId = self->ndynamicId;
1252 req->version = getNodeInfo(getOwnNodeId()).m_version;
1253 req->mysql_version = getNodeInfo(getOwnNodeId()).m_mysql_version;
1254 req->lqh_workers = getNodeInfo(getOwnNodeId()).m_lqh_workers;
1255 const Uint32 ref = calcQmgrBlockRef(nodeId);
1256 sendSignal(ref,GSN_CM_NODEINFOREQ, signal, CmNodeInfoReq::SignalLength, JBB);
1257 DEBUG_START(GSN_CM_NODEINFOREQ, nodeId, "");
1258 }
1259
1260 /*
1261 4.4.11 CM_REGREF */
1262 /**--------------------------------------------------------------------------
1263 * Only a president or a president candidate can refuse a node to get added to
1264 * the cluster.
1265 * Refuse reasons:
1266 * ZBUSY We know that the sender is the president and we have to
1267 * make a new CM_REGREQ.
1268 * ZNOT_IN_CFG This node number is not specified in the configfile,
1269 * SYSTEM ERROR
1270 * ZELECTION Sender is a president candidate, his timelimit
1271 * hasn't expired so maybe someone else will show up.
1272 * Update the CPRESIDENT_CANDIDATE, then wait for our
1273 * timelimit to expire.
1274 *---------------------------------------------------------------------------*/
1275 /*******************************/
1276 /* CM_REGREF */
1277 /*******************************/
1278 static
1279 const char *
get_start_type_string(Uint32 st)1280 get_start_type_string(Uint32 st)
1281 {
1282 static char buf[256];
1283
1284 if (st == 0)
1285 {
1286 return "<ANY>";
1287 }
1288 else
1289 {
1290 buf[0] = 0;
1291 for(Uint32 i = 0; i<NodeState::ST_ILLEGAL_TYPE; i++)
1292 {
1293 if (st & (1 << i))
1294 {
1295 if (buf[0])
1296 strcat(buf, "/");
1297 switch(i){
1298 case NodeState::ST_INITIAL_START:
1299 strcat(buf, "inital start");
1300 break;
1301 case NodeState::ST_SYSTEM_RESTART:
1302 strcat(buf, "system restart");
1303 break;
1304 case NodeState::ST_NODE_RESTART:
1305 strcat(buf, "node restart");
1306 break;
1307 case NodeState::ST_INITIAL_NODE_RESTART:
1308 strcat(buf, "initial node restart");
1309 break;
1310 }
1311 }
1312 }
1313 return buf;
1314 }
1315 }
1316
execCM_REGREF(Signal * signal)1317 void Qmgr::execCM_REGREF(Signal* signal)
1318 {
1319 jamEntry();
1320
1321 CmRegRef* ref = (CmRegRef*)signal->getDataPtr();
1322 UintR TaddNodeno = ref->nodeId;
1323 UintR TrefuseReason = ref->errorCode;
1324 Uint32 candidate = ref->presidentCandidate;
1325 Uint32 node_gci = 1;
1326 Uint32 candidate_gci = 1;
1327 Uint32 start_type = ~0;
1328 NdbNodeBitmask skip_nodes;
1329 DEBUG_START3(signal, TrefuseReason);
1330
1331 if (signal->getLength() == CmRegRef::SignalLength)
1332 {
1333 jam();
1334 node_gci = ref->latest_gci;
1335 candidate_gci = ref->candidate_latest_gci;
1336 start_type = ref->start_type;
1337 skip_nodes.assign(NdbNodeBitmask::Size, ref->skip_nodes);
1338 }
1339
1340 c_start.m_regReqReqRecv++;
1341
1342 // Ignore block reference in data[0]
1343
1344 if(candidate != c_start.m_president_candidate)
1345 {
1346 jam();
1347 c_start.m_regReqReqRecv = ~0;
1348 }
1349
1350 c_start.m_starting_nodes.set(TaddNodeno);
1351 if (node_gci)
1352 {
1353 jam();
1354 c_start.m_starting_nodes_w_log.set(TaddNodeno);
1355 }
1356 c_start.m_node_gci[TaddNodeno] = node_gci;
1357
1358 skip_nodes.bitAND(c_definedNodes);
1359 c_start.m_skip_nodes.bitOR(skip_nodes);
1360
1361 // set own MT config here or in CONF, and others in CM_NODEINFOREQ/CONF
1362 setNodeInfo(getOwnNodeId()).m_lqh_workers = globalData.ndbMtLqhWorkers;
1363
1364 char buf[100];
1365 switch (TrefuseReason) {
1366 case CmRegRef::ZINCOMPATIBLE_VERSION:
1367 jam();
1368 progError(__LINE__, NDBD_EXIT_UNSUPPORTED_VERSION,
1369 "incompatible version, "
1370 "connection refused by running ndb node");
1371 case CmRegRef::ZINCOMPATIBLE_START_TYPE:
1372 jam();
1373 BaseString::snprintf(buf, sizeof(buf),
1374 "incompatible start type detected: node %d"
1375 " reports %s(%d) my start type: %s(%d)",
1376 TaddNodeno,
1377 get_start_type_string(start_type), start_type,
1378 get_start_type_string(c_start.m_start_type),
1379 c_start.m_start_type);
1380 progError(__LINE__, NDBD_EXIT_SR_RESTARTCONFLICT, buf);
1381 break;
1382 case CmRegRef::ZBUSY:
1383 case CmRegRef::ZBUSY_TO_PRES:
1384 case CmRegRef::ZBUSY_PRESIDENT:
1385 jam();
1386 cpresidentAlive = ZTRUE;
1387 signal->theData[3] = 0;
1388 break;
1389 case CmRegRef::ZNOT_IN_CFG:
1390 jam();
1391 progError(__LINE__, NDBD_EXIT_NODE_NOT_IN_CONFIG);
1392 break;
1393 case CmRegRef::ZNOT_DEAD:
1394 jam();
1395 progError(__LINE__, NDBD_EXIT_NODE_NOT_DEAD);
1396 break;
1397 case CmRegRef::ZSINGLE_USER_MODE:
1398 jam();
1399 progError(__LINE__, NDBD_EXIT_SINGLE_USER_MODE);
1400 break;
1401 /**
1402 * For generic refuse error.
1403 * e.g. in online upgrade, we can use this error code instead
1404 * of the incompatible error code.
1405 */
1406 case CmRegRef::ZGENERIC:
1407 jam();
1408 progError(__LINE__, NDBD_EXIT_GENERIC);
1409 break;
1410 case CmRegRef::ZELECTION:
1411 jam();
1412 if (candidate_gci > c_start.m_president_candidate_gci ||
1413 (candidate_gci == c_start.m_president_candidate_gci &&
1414 candidate < c_start.m_president_candidate))
1415 {
1416 jam();
1417 //----------------------------------------
1418 /* We may already have a candidate */
1419 /* choose the lowest nodeno */
1420 //----------------------------------------
1421 signal->theData[3] = 2;
1422 c_start.m_president_candidate = candidate;
1423 c_start.m_president_candidate_gci = candidate_gci;
1424 } else {
1425 signal->theData[3] = 4;
1426 }//if
1427 break;
1428 case CmRegRef::ZNOT_PRESIDENT:
1429 jam();
1430 cpresidentAlive = ZTRUE;
1431 signal->theData[3] = 3;
1432 break;
1433 default:
1434 jam();
1435 signal->theData[3] = 5;
1436 /*empty*/;
1437 break;
1438 }//switch
1439 /*--------------------------------------------------------------*/
1440 // Send this as an EVENT REPORT to inform about hearing about
1441 // other NDB node proclaiming not to be president.
1442 /*--------------------------------------------------------------*/
1443 signal->theData[0] = NDB_LE_CM_REGREF;
1444 signal->theData[1] = getOwnNodeId();
1445 signal->theData[2] = TaddNodeno;
1446 //-----------------------------------------
1447 // signal->theData[3] filled in above
1448 //-----------------------------------------
1449 sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 4, JBB);
1450
1451 if(cpresidentAlive == ZTRUE)
1452 {
1453 jam();
1454 DEBUG("cpresidentAlive");
1455 return;
1456 }
1457
1458 if(c_start.m_regReqReqSent != c_start.m_regReqReqRecv)
1459 {
1460 jam();
1461 DEBUG(c_start.m_regReqReqSent << " != " << c_start.m_regReqReqRecv);
1462 return;
1463 }
1464
1465 if(c_start.m_president_candidate != getOwnNodeId())
1466 {
1467 jam();
1468 DEBUG("i'm not the candidate");
1469 return;
1470 }
1471
1472 /**
1473 * All connected nodes has agreed
1474 */
1475 if(check_startup(signal))
1476 {
1477 jam();
1478 electionWon(signal);
1479 }
1480
1481 return;
1482 }//Qmgr::execCM_REGREF()
1483
1484 Uint32
check_startup(Signal * signal)1485 Qmgr::check_startup(Signal* signal)
1486 {
1487 Uint64 now = NdbTick_CurrentMillisecond();
1488 Uint64 partial_timeout = c_start_election_time + c_restartPartialTimeout;
1489 Uint64 partitioned_timeout = partial_timeout + c_restartPartionedTimeout;
1490 Uint64 no_nodegroup_timeout = c_start_election_time +
1491 c_restartNoNodegroupTimeout;
1492
1493 const bool no_nodegroup_active =
1494 (c_restartNoNodegroupTimeout != ~Uint32(0)) &&
1495 (! c_start.m_no_nodegroup_nodes.isclear());
1496
1497 /**
1498 * First see if we should wait more...
1499 */
1500 NdbNodeBitmask tmp;
1501 tmp.bitOR(c_start.m_skip_nodes);
1502 tmp.bitOR(c_start.m_starting_nodes);
1503
1504 NdbNodeBitmask wait;
1505 wait.assign(c_definedNodes);
1506 wait.bitANDC(tmp);
1507
1508 Uint32 retVal = 0;
1509 Uint32 incompleteng = MAX_NDB_NODES; // Illegal value
1510 NdbNodeBitmask report_mask;
1511
1512 if ((c_start.m_latest_gci == 0) ||
1513 (c_start.m_start_type == (1 << NodeState::ST_INITIAL_START)))
1514 {
1515 if (tmp.equal(c_definedNodes))
1516 {
1517 jam();
1518 signal->theData[1] = 0x8000;
1519 report_mask.assign(c_definedNodes);
1520 report_mask.bitANDC(c_start.m_starting_nodes);
1521 retVal = 1;
1522 goto start_report;
1523 }
1524 else if (no_nodegroup_active)
1525 {
1526 if (now < no_nodegroup_timeout)
1527 {
1528 signal->theData[1] = 6;
1529 signal->theData[2] = Uint32((no_nodegroup_timeout - now + 500) / 1000);
1530 report_mask.assign(wait);
1531 retVal = 0;
1532 goto start_report;
1533 }
1534 tmp.bitOR(c_start.m_no_nodegroup_nodes);
1535 if (tmp.equal(c_definedNodes))
1536 {
1537 signal->theData[1] = 0x8000;
1538 report_mask.assign(c_definedNodes);
1539 report_mask.bitANDC(c_start.m_starting_nodes);
1540 retVal = 1;
1541 goto start_report;
1542 }
1543 else
1544 {
1545 jam();
1546 signal->theData[1] = 1;
1547 signal->theData[2] = ~0;
1548 report_mask.assign(wait);
1549 retVal = 0;
1550 goto start_report;
1551 }
1552 }
1553 else
1554 {
1555 jam();
1556 signal->theData[1] = 1;
1557 signal->theData[2] = ~0;
1558 report_mask.assign(wait);
1559 retVal = 0;
1560 goto start_report;
1561 }
1562 }
1563
1564 if (now >= no_nodegroup_timeout)
1565 {
1566 tmp.bitOR(c_start.m_no_nodegroup_nodes);
1567 }
1568
1569 {
1570 const bool all = c_start.m_starting_nodes.equal(c_definedNodes);
1571 CheckNodeGroups* sd = (CheckNodeGroups*)&signal->theData[0];
1572
1573 {
1574 /**
1575 * Check for missing node group directly
1576 */
1577 NdbNodeBitmask check;
1578 check.assign(c_definedNodes);
1579 check.bitANDC(c_start.m_starting_nodes); // Not connected nodes
1580 check.bitOR(c_start.m_starting_nodes_w_log);
1581
1582 sd->blockRef = reference();
1583 sd->requestType = CheckNodeGroups::Direct | CheckNodeGroups::ArbitCheck;
1584 sd->mask = check;
1585 EXECUTE_DIRECT(DBDIH, GSN_CHECKNODEGROUPSREQ, signal,
1586 CheckNodeGroups::SignalLength);
1587
1588 if (sd->output == CheckNodeGroups::Lose)
1589 {
1590 jam();
1591 goto missing_nodegroup;
1592 }
1593 }
1594
1595 sd->blockRef = reference();
1596 sd->requestType = CheckNodeGroups::Direct | CheckNodeGroups::ArbitCheck;
1597 sd->mask = c_start.m_starting_nodes;
1598 EXECUTE_DIRECT(DBDIH, GSN_CHECKNODEGROUPSREQ, signal,
1599 CheckNodeGroups::SignalLength);
1600
1601 const Uint32 result = sd->output;
1602
1603 sd->blockRef = reference();
1604 sd->requestType = CheckNodeGroups::Direct | CheckNodeGroups::ArbitCheck;
1605 sd->mask = c_start.m_starting_nodes_w_log;
1606 EXECUTE_DIRECT(DBDIH, GSN_CHECKNODEGROUPSREQ, signal,
1607 CheckNodeGroups::SignalLength);
1608
1609 const Uint32 result_w_log = sd->output;
1610
1611 if (tmp.equal(c_definedNodes))
1612 {
1613 /**
1614 * All nodes (wrt no-wait nodes) has connected...
1615 * this means that we will now start or die
1616 */
1617 jam();
1618 switch(result_w_log){
1619 case CheckNodeGroups::Lose:
1620 {
1621 jam();
1622 goto missing_nodegroup;
1623 }
1624 case CheckNodeGroups::Win:
1625 signal->theData[1] = all ? 0x8001 : 0x8002;
1626 report_mask.assign(c_definedNodes);
1627 report_mask.bitANDC(c_start.m_starting_nodes);
1628 retVal = 1;
1629 goto check_log;
1630 case CheckNodeGroups::Partitioning:
1631 ndbrequire(result != CheckNodeGroups::Lose);
1632 signal->theData[1] =
1633 all ? 0x8001 : (result == CheckNodeGroups::Win ? 0x8002 : 0x8003);
1634 report_mask.assign(c_definedNodes);
1635 report_mask.bitANDC(c_start.m_starting_nodes);
1636 retVal = 1;
1637 goto check_log;
1638 }
1639 }
1640
1641 if (now < partial_timeout)
1642 {
1643 jam();
1644
1645 signal->theData[1] = c_restartPartialTimeout == (Uint32) ~0 ? 2 : 3;
1646 signal->theData[2] = Uint32((partial_timeout - now + 500) / 1000);
1647 report_mask.assign(wait);
1648 retVal = 0;
1649
1650 if (no_nodegroup_active && now < no_nodegroup_timeout)
1651 {
1652 signal->theData[1] = 7;
1653 signal->theData[2] = Uint32((no_nodegroup_timeout - now + 500) / 1000);
1654 }
1655 else if (no_nodegroup_active && now >= no_nodegroup_timeout)
1656 {
1657 report_mask.bitANDC(c_start.m_no_nodegroup_nodes);
1658 }
1659
1660 goto start_report;
1661 }
1662
1663 /**
1664 * Start partial has passed...check for partitioning...
1665 */
1666 switch(result_w_log){
1667 case CheckNodeGroups::Lose:
1668 jam();
1669 goto missing_nodegroup;
1670 case CheckNodeGroups::Partitioning:
1671 if (now < partitioned_timeout && result != CheckNodeGroups::Win)
1672 {
1673 goto missinglog;
1674 }
1675 // Fall through...
1676 case CheckNodeGroups::Win:
1677 signal->theData[1] =
1678 all ? 0x8001 : (result == CheckNodeGroups::Win ? 0x8002 : 0x8003);
1679 report_mask.assign(c_definedNodes);
1680 report_mask.bitANDC(c_start.m_starting_nodes);
1681 retVal = 2;
1682 goto check_log;
1683 }
1684 }
1685 ndbrequire(false);
1686
1687 check_log:
1688 jam();
1689 {
1690 Uint32 save[4+4*NdbNodeBitmask::Size];
1691 memcpy(save, signal->theData, sizeof(save));
1692
1693 DihRestartReq * req = CAST_PTR(DihRestartReq, signal->getDataPtrSend());
1694 req->senderRef = 0;
1695 c_start.m_starting_nodes.copyto(NdbNodeBitmask::Size, req->nodemask);
1696 memcpy(req->node_gcis, c_start.m_node_gci, 4*MAX_NDB_NODES);
1697 EXECUTE_DIRECT(DBDIH, GSN_DIH_RESTARTREQ, signal,
1698 DihRestartReq::CheckLength);
1699
1700 incompleteng = signal->theData[0];
1701 memcpy(signal->theData, save, sizeof(save));
1702
1703 if (incompleteng != MAX_NDB_NODES)
1704 {
1705 jam();
1706 if (retVal == 1)
1707 {
1708 jam();
1709 goto incomplete_log;
1710 }
1711 else if (retVal == 2)
1712 {
1713 if (now <= partitioned_timeout)
1714 {
1715 jam();
1716 goto missinglog;
1717 }
1718 else
1719 {
1720 goto incomplete_log;
1721 }
1722 }
1723 ndbrequire(false);
1724 }
1725 }
1726 goto start_report;
1727
1728 missinglog:
1729 signal->theData[1] = c_restartPartionedTimeout == (Uint32) ~0 ? 4 : 5;
1730 signal->theData[2] = Uint32((partitioned_timeout - now + 500) / 1000);
1731 report_mask.assign(c_definedNodes);
1732 report_mask.bitANDC(c_start.m_starting_nodes);
1733 retVal = 0;
1734 goto start_report;
1735
1736 start_report:
1737 jam();
1738 {
1739 Uint32 sz = NdbNodeBitmask::Size;
1740 signal->theData[0] = NDB_LE_StartReport;
1741 signal->theData[3] = sz;
1742 Uint32* ptr = signal->theData+4;
1743 c_definedNodes.copyto(sz, ptr); ptr += sz;
1744 c_start.m_starting_nodes.copyto(sz, ptr); ptr += sz;
1745 c_start.m_skip_nodes.copyto(sz, ptr); ptr += sz;
1746 report_mask.copyto(sz, ptr); ptr+= sz;
1747 c_start.m_no_nodegroup_nodes.copyto(sz, ptr); ptr += sz;
1748 sendSignal(CMVMI_REF, GSN_EVENT_REP, signal,
1749 4+5*NdbNodeBitmask::Size, JBB);
1750 }
1751 return retVal;
1752
1753 missing_nodegroup:
1754 jam();
1755 {
1756 char buf[100], mask1[100], mask2[100];
1757 c_start.m_starting_nodes.getText(mask1);
1758 tmp.assign(c_start.m_starting_nodes);
1759 tmp.bitANDC(c_start.m_starting_nodes_w_log);
1760 tmp.getText(mask2);
1761 BaseString::snprintf(buf, sizeof(buf),
1762 "Unable to start missing node group! "
1763 " starting: %s (missing fs for: %s)",
1764 mask1, mask2);
1765 progError(__LINE__, NDBD_EXIT_INSUFFICENT_NODES, buf);
1766 return 0; // Deadcode
1767 }
1768
1769 incomplete_log:
1770 jam();
1771 {
1772 char buf[100], mask1[100];
1773 c_start.m_starting_nodes.getText(mask1);
1774 BaseString::snprintf(buf, sizeof(buf),
1775 "Incomplete log for node group: %d! "
1776 " starting nodes: %s",
1777 incompleteng, mask1);
1778 progError(__LINE__, NDBD_EXIT_INSUFFICENT_NODES, buf);
1779 return 0; // Deadcode
1780 }
1781 }
1782
1783 void
electionWon(Signal * signal)1784 Qmgr::electionWon(Signal* signal){
1785 NodeRecPtr myNodePtr;
1786 cpresident = getOwnNodeId(); /* This node becomes president. */
1787 myNodePtr.i = getOwnNodeId();
1788 ptrCheckGuard(myNodePtr, MAX_NDB_NODES, nodeRec);
1789
1790 myNodePtr.p->phase = ZRUNNING;
1791
1792 cpdistref = reference();
1793 cneighbourl = ZNIL;
1794 cneighbourh = ZNIL;
1795 myNodePtr.p->ndynamicId = 1 | (myNodePtr.p->hbOrder << 16);
1796 c_maxDynamicId = 1;
1797 c_clusterNodes.clear();
1798 c_clusterNodes.set(getOwnNodeId());
1799
1800 cpresidentAlive = ZTRUE;
1801 c_start_election_time = ~0;
1802 c_start.reset();
1803
1804 signal->theData[0] = NDB_LE_CM_REGCONF;
1805 signal->theData[1] = getOwnNodeId();
1806 signal->theData[2] = cpresident;
1807 signal->theData[3] = myNodePtr.p->ndynamicId;
1808 sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 4, JBB);
1809
1810 c_start.m_starting_nodes.clear(getOwnNodeId());
1811 if (c_start.m_starting_nodes.isclear())
1812 {
1813 jam();
1814 sendSttorryLab(signal);
1815 }
1816 }
1817
1818 /*
1819 4.4.11 CONTINUEB */
1820 /*--------------------------------------------------------------------------*/
1821 /* */
1822 /*--------------------------------------------------------------------------*/
1823 /****************************>---------------------------------------------*/
1824 /* CONTINUEB > SENDER: Own block, Own node */
1825 /****************************>-------+INPUT : TCONTINUEB_TYPE */
1826 /*--------------------------------------------------------------*/
regreqTimeLimitLab(Signal * signal)1827 void Qmgr::regreqTimeLimitLab(Signal* signal)
1828 {
1829 if(cpresident == ZNIL)
1830 {
1831 if (c_start.m_president_candidate == ZNIL)
1832 {
1833 jam();
1834 c_start.m_president_candidate = getOwnNodeId();
1835 }
1836
1837 cmInfoconf010Lab(signal);
1838 }
1839 }//Qmgr::regreqTimelimitLab()
1840
1841 /**---------------------------------------------------------------------------
1842 * The new node will take care of giving information about own node and ask
1843 * all other nodes for nodeinfo. The new node will use CM_NODEINFOREQ for
1844 * that purpose. When the setup of connections to all running, the president
1845 * will send a commit to all running nodes + the new node
1846 * INPUT: NODE_PTR1, must be set as ZNIL if we don't enter CONNECT_NODES)
1847 * from signal CM_NODEINFOCONF.
1848 *---------------------------------------------------------------------------*/
1849 /*******************************/
1850 /* CM_NODEINFOCONF */
1851 /*******************************/
execCM_NODEINFOCONF(Signal * signal)1852 void Qmgr::execCM_NODEINFOCONF(Signal* signal)
1853 {
1854 DEBUG_START3(signal, "");
1855
1856 jamEntry();
1857
1858 CmNodeInfoConf * const conf = (CmNodeInfoConf*)signal->getDataPtr();
1859
1860 const Uint32 nodeId = conf->nodeId;
1861 const Uint32 dynamicId = conf->dynamicId;
1862 const Uint32 version = conf->version;
1863 Uint32 mysql_version = conf->mysql_version;
1864 Uint32 lqh_workers = conf->lqh_workers;
1865 if (version < NDBD_SPLIT_VERSION)
1866 {
1867 jam();
1868 mysql_version = 0;
1869 }
1870 if (version < NDBD_MT_LQH_VERSION)
1871 {
1872 jam();
1873 lqh_workers = 0;
1874 }
1875
1876 NodeRecPtr nodePtr;
1877 nodePtr.i = getOwnNodeId();
1878 ptrAss(nodePtr, nodeRec);
1879 ndbrequire(nodePtr.p->phase == ZSTARTING);
1880 ndbrequire(c_start.m_gsn == GSN_CM_NODEINFOREQ);
1881 c_start.m_nodes.clearWaitingFor(nodeId);
1882
1883 /**
1884 * Update node info
1885 */
1886 NodeRecPtr replyNodePtr;
1887 replyNodePtr.i = nodeId;
1888 ptrCheckGuard(replyNodePtr, MAX_NDB_NODES, nodeRec);
1889 replyNodePtr.p->ndynamicId = dynamicId;
1890 replyNodePtr.p->blockRef = signal->getSendersBlockRef();
1891 setNodeInfo(replyNodePtr.i).m_version = version;
1892 setNodeInfo(replyNodePtr.i).m_mysql_version = mysql_version;
1893 setNodeInfo(replyNodePtr.i).m_lqh_workers = lqh_workers;
1894
1895 recompute_version_info(NodeInfo::DB, version);
1896
1897 if(!c_start.m_nodes.done()){
1898 jam();
1899 return;
1900 }
1901
1902 /**********************************************<*/
1903 /* Send an ack. back to the president. */
1904 /* CM_ACKADD */
1905 /* The new node has been registered by all */
1906 /* running nodes and has stored nodeinfo about */
1907 /* all running nodes. The new node has to wait */
1908 /* for CM_ADD (commit) from president to become */
1909 /* a running node in the cluster. */
1910 /**********************************************<*/
1911 sendCmAckAdd(signal, getOwnNodeId(), CmAdd::Prepare);
1912 return;
1913 }//Qmgr::execCM_NODEINFOCONF()
1914
1915 /**---------------------------------------------------------------------------
1916 * A new node sends nodeinfo about himself. The new node asks for
1917 * corresponding nodeinfo back in the CM_NODEINFOCONF.
1918 *---------------------------------------------------------------------------*/
1919 /*******************************/
1920 /* CM_NODEINFOREQ */
1921 /*******************************/
execCM_NODEINFOREQ(Signal * signal)1922 void Qmgr::execCM_NODEINFOREQ(Signal* signal)
1923 {
1924 jamEntry();
1925
1926 const Uint32 Tblockref = signal->getSendersBlockRef();
1927
1928 NodeRecPtr nodePtr;
1929 nodePtr.i = getOwnNodeId();
1930 ptrAss(nodePtr, nodeRec);
1931 if(nodePtr.p->phase != ZRUNNING){
1932 jam();
1933 signal->theData[0] = reference();
1934 signal->theData[1] = getOwnNodeId();
1935 signal->theData[2] = ZNOT_RUNNING;
1936 sendSignal(Tblockref, GSN_CM_NODEINFOREF, signal, 3, JBB);
1937 return;
1938 }
1939
1940 NodeRecPtr addNodePtr;
1941 CmNodeInfoReq * const req = (CmNodeInfoReq*)signal->getDataPtr();
1942 addNodePtr.i = req->nodeId;
1943 ptrCheckGuard(addNodePtr, MAX_NDB_NODES, nodeRec);
1944 addNodePtr.p->ndynamicId = req->dynamicId;
1945 addNodePtr.p->blockRef = signal->getSendersBlockRef();
1946 setNodeInfo(addNodePtr.i).m_version = req->version;
1947
1948 Uint32 mysql_version = req->mysql_version;
1949 if (req->version < NDBD_SPLIT_VERSION)
1950 mysql_version = 0;
1951 setNodeInfo(addNodePtr.i).m_mysql_version = mysql_version;
1952
1953 Uint32 lqh_workers = req->lqh_workers;
1954 if (req->version < NDBD_MT_LQH_VERSION)
1955 lqh_workers = 0;
1956 setNodeInfo(addNodePtr.i).m_lqh_workers = lqh_workers;
1957
1958 c_maxDynamicId = req->dynamicId & 0xFFFF;
1959
1960 cmAddPrepare(signal, addNodePtr, nodePtr.p);
1961 }//Qmgr::execCM_NODEINFOREQ()
1962
1963 void
cmAddPrepare(Signal * signal,NodeRecPtr nodePtr,const NodeRec * self)1964 Qmgr::cmAddPrepare(Signal* signal, NodeRecPtr nodePtr, const NodeRec * self){
1965 jam();
1966
1967 switch(nodePtr.p->phase){
1968 case ZINIT:
1969 jam();
1970 nodePtr.p->phase = ZSTARTING;
1971 return;
1972 case ZFAIL_CLOSING:
1973 jam();
1974
1975 #if 1
1976 warningEvent("Recieved request to incorperate node %u, "
1977 "while error handling has not yet completed",
1978 nodePtr.i);
1979
1980 ndbrequire(getOwnNodeId() != cpresident);
1981 ndbrequire(signal->header.theVerId_signalNumber == GSN_CM_ADD);
1982 c_start.m_nodes.clearWaitingFor();
1983 c_start.m_nodes.setWaitingFor(nodePtr.i);
1984 c_start.m_gsn = GSN_CM_NODEINFOCONF;
1985 #else
1986 warningEvent("Enabling communication to CM_ADD node %u state=%d",
1987 nodePtr.i,
1988 nodePtr.p->phase);
1989 nodePtr.p->phase = ZSTARTING;
1990 nodePtr.p->failState = NORMAL;
1991 signal->theData[0] = 0;
1992 signal->theData[1] = nodePtr.i;
1993 sendSignal(CMVMI_REF, GSN_OPEN_COMREQ, signal, 2, JBA);
1994 #endif
1995 return;
1996 case ZSTARTING:
1997 break;
1998 case ZRUNNING:
1999 case ZPREPARE_FAIL:
2000 case ZAPI_ACTIVE:
2001 case ZAPI_INACTIVE:
2002 ndbrequire(false);
2003 }
2004
2005 sendCmAckAdd(signal, nodePtr.i, CmAdd::Prepare);
2006 sendApiVersionRep(signal, nodePtr);
2007
2008 /* President have prepared us */
2009 CmNodeInfoConf * conf = (CmNodeInfoConf*)signal->getDataPtrSend();
2010 conf->nodeId = getOwnNodeId();
2011 conf->dynamicId = self->ndynamicId;
2012 conf->version = getNodeInfo(getOwnNodeId()).m_version;
2013 conf->mysql_version = getNodeInfo(getOwnNodeId()).m_mysql_version;
2014 conf->lqh_workers = getNodeInfo(getOwnNodeId()).m_lqh_workers;
2015 sendSignal(nodePtr.p->blockRef, GSN_CM_NODEINFOCONF, signal,
2016 CmNodeInfoConf::SignalLength, JBB);
2017 DEBUG_START(GSN_CM_NODEINFOCONF, refToNode(nodePtr.p->blockRef), "");
2018 }
2019
2020 void
sendApiVersionRep(Signal * signal,NodeRecPtr nodePtr)2021 Qmgr::sendApiVersionRep(Signal* signal, NodeRecPtr nodePtr)
2022 {
2023 if (getNodeInfo(nodePtr.i).m_version >= NDBD_NODE_VERSION_REP)
2024 {
2025 jam();
2026 Uint32 ref = calcQmgrBlockRef(nodePtr.i);
2027 for(Uint32 i = 1; i<MAX_NODES; i++)
2028 {
2029 jam();
2030 Uint32 version = getNodeInfo(i).m_version;
2031 Uint32 type = getNodeInfo(i).m_type;
2032 if (type != NodeInfo::DB && version)
2033 {
2034 jam();
2035 signal->theData[0] = i;
2036 signal->theData[1] = version;
2037 sendSignal(ref, GSN_NODE_VERSION_REP, signal, 2, JBB);
2038 }
2039 }
2040 }
2041 }
2042
2043 void
sendCmAckAdd(Signal * signal,Uint32 nodeId,CmAdd::RequestType type)2044 Qmgr::sendCmAckAdd(Signal * signal, Uint32 nodeId, CmAdd::RequestType type){
2045
2046 CmAckAdd * cmAckAdd = (CmAckAdd*)signal->getDataPtrSend();
2047 cmAckAdd->requestType = type;
2048 cmAckAdd->startingNodeId = nodeId;
2049 cmAckAdd->senderNodeId = getOwnNodeId();
2050 sendSignal(cpdistref, GSN_CM_ACKADD, signal, CmAckAdd::SignalLength, JBA);
2051 DEBUG_START(GSN_CM_ACKADD, cpresident, "");
2052
2053 switch(type){
2054 case CmAdd::Prepare:
2055 return;
2056 case CmAdd::AddCommit:
2057 case CmAdd::CommitNew:
2058 break;
2059 }
2060
2061 signal->theData[0] = nodeId;
2062 EXECUTE_DIRECT(NDBCNTR, GSN_CM_ADD_REP, signal, 1);
2063 jamEntry();
2064 }
2065
2066 /*
2067 4.4.11 CM_ADD */
2068 /**--------------------------------------------------------------------------
2069 * Prepare a running node to add a new node to the cluster. The running node
2070 * will change phase of the new node fron ZINIT to ZWAITING. The running node
2071 * will also mark that we have received a prepare. When the new node has sent
2072 * us nodeinfo we can send an acknowledgement back to the president. When all
2073 * running nodes has acknowledged the new node, the president will send a
2074 * commit and we can change phase of the new node to ZRUNNING. The president
2075 * will also send CM_ADD to himself.
2076 *---------------------------------------------------------------------------*/
2077 /*******************************/
2078 /* CM_ADD */
2079 /*******************************/
execCM_ADD(Signal * signal)2080 void Qmgr::execCM_ADD(Signal* signal)
2081 {
2082 NodeRecPtr addNodePtr;
2083 jamEntry();
2084
2085 NodeRecPtr nodePtr;
2086 nodePtr.i = getOwnNodeId();
2087 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
2088
2089 CmAdd * const cmAdd = (CmAdd*)signal->getDataPtr();
2090 const CmAdd::RequestType type = (CmAdd::RequestType)cmAdd->requestType;
2091 addNodePtr.i = cmAdd->startingNodeId;
2092 //const Uint32 startingVersion = cmAdd->startingVersion;
2093 ptrCheckGuard(addNodePtr, MAX_NDB_NODES, nodeRec);
2094
2095 DEBUG_START3(signal, type);
2096
2097 if(nodePtr.p->phase == ZSTARTING){
2098 jam();
2099 /**
2100 * We are joining...
2101 */
2102 ndbrequire(addNodePtr.i == nodePtr.i);
2103 switch(type){
2104 case CmAdd::Prepare:
2105 ndbrequire(c_start.m_gsn == GSN_CM_NODEINFOREQ);
2106 /**
2107 * Wait for CM_NODEINFO_CONF
2108 */
2109 return;
2110 case CmAdd::CommitNew:
2111 /**
2112 * Tata. we're in the cluster
2113 */
2114 joinedCluster(signal, addNodePtr);
2115 return;
2116 case CmAdd::AddCommit:
2117 ndbrequire(false);
2118 }
2119 }
2120
2121 switch (type) {
2122 case CmAdd::Prepare:
2123 cmAddPrepare(signal, addNodePtr, nodePtr.p);
2124 break;
2125 case CmAdd::AddCommit:{
2126 jam();
2127 ndbrequire(addNodePtr.p->phase == ZSTARTING);
2128 addNodePtr.p->phase = ZRUNNING;
2129 m_connectivity_check.reportNodeConnect(addNodePtr.i);
2130 setNodeInfo(addNodePtr.i).m_heartbeat_cnt= 0;
2131 c_clusterNodes.set(addNodePtr.i);
2132 findNeighbours(signal, __LINE__);
2133
2134 /**
2135 * SEND A HEARTBEAT IMMEDIATELY TO DECREASE THE RISK THAT WE MISS EARLY
2136 * HEARTBEATS.
2137 */
2138 sendHeartbeat(signal);
2139 hb_send_timer.reset(0);
2140
2141 /**
2142 * ENABLE COMMUNICATION WITH ALL BLOCKS WITH THE NEWLY ADDED NODE
2143 */
2144 EnableComReq *enableComReq = (EnableComReq *)signal->getDataPtrSend();
2145 enableComReq->m_senderRef = reference();
2146 enableComReq->m_senderData = ENABLE_COM_CM_ADD_COMMIT;
2147 NodeBitmask::clear(enableComReq->m_nodeIds);
2148 NodeBitmask::set(enableComReq->m_nodeIds, addNodePtr.i);
2149 sendSignal(CMVMI_REF, GSN_ENABLE_COMREQ, signal,
2150 EnableComReq::SignalLength, JBA);
2151 break;
2152 }
2153 case CmAdd::CommitNew:
2154 jam();
2155 ndbrequire(false);
2156 }
2157
2158 }//Qmgr::execCM_ADD()
2159
2160 void
handleEnableComAddCommit(Signal * signal,Uint32 node)2161 Qmgr::handleEnableComAddCommit(Signal *signal, Uint32 node)
2162 {
2163 sendCmAckAdd(signal, node, CmAdd::AddCommit);
2164 if(getOwnNodeId() != cpresident){
2165 jam();
2166 c_start.reset();
2167 }
2168 }
2169
2170 void
execENABLE_COMCONF(Signal * signal)2171 Qmgr::execENABLE_COMCONF(Signal *signal)
2172 {
2173 const EnableComConf *enableComConf =
2174 (const EnableComConf *)signal->getDataPtr();
2175 Uint32 state = enableComConf->m_senderData;
2176 Uint32 node = NodeBitmask::find(enableComConf->m_nodeIds, 0);
2177
2178 jamEntry();
2179
2180 switch (state)
2181 {
2182 case ENABLE_COM_CM_ADD_COMMIT:
2183 jam();
2184 /* Only exactly one node possible here. */
2185 ndbrequire(node != NodeBitmask::NotFound);
2186 ndbrequire(NodeBitmask::find(enableComConf->m_nodeIds, node + 1) ==
2187 NodeBitmask::NotFound);
2188 handleEnableComAddCommit(signal, node);
2189 break;
2190
2191 case ENABLE_COM_CM_COMMIT_NEW:
2192 jam();
2193 handleEnableComCommitNew(signal);
2194 break;
2195
2196 case ENABLE_COM_API_REGREQ:
2197 jam();
2198 /* Only exactly one node possible here. */
2199 ndbrequire(node != NodeBitmask::NotFound);
2200 ndbrequire(NodeBitmask::find(enableComConf->m_nodeIds, node + 1) ==
2201 NodeBitmask::NotFound);
2202 handleEnableComApiRegreq(signal, node);
2203 break;
2204
2205 default:
2206 jam();
2207 ndbrequire(false);
2208 }
2209 }
2210
2211 void
joinedCluster(Signal * signal,NodeRecPtr nodePtr)2212 Qmgr::joinedCluster(Signal* signal, NodeRecPtr nodePtr){
2213 /**
2214 * WE HAVE BEEN INCLUDED IN THE CLUSTER WE CAN START BEING PART OF THE
2215 * HEARTBEAT PROTOCOL AND WE WILL ALSO ENABLE COMMUNICATION WITH ALL
2216 * NODES IN THE CLUSTER.
2217 */
2218 nodePtr.p->phase = ZRUNNING;
2219 setNodeInfo(nodePtr.i).m_heartbeat_cnt= 0;
2220 findNeighbours(signal, __LINE__);
2221 c_clusterNodes.set(nodePtr.i);
2222 c_start.reset();
2223
2224 /**
2225 * SEND A HEARTBEAT IMMEDIATELY TO DECREASE THE RISK
2226 * THAT WE MISS EARLY HEARTBEATS.
2227 */
2228 sendHeartbeat(signal);
2229 hb_send_timer.reset(0);
2230
2231 /**
2232 * ENABLE COMMUNICATION WITH ALL BLOCKS IN THE CURRENT CLUSTER AND SET
2233 * THE NODES IN THE CLUSTER TO BE RUNNING.
2234 */
2235 EnableComReq *enableComReq = (EnableComReq *)signal->getDataPtrSend();
2236 enableComReq->m_senderRef = reference();
2237 enableComReq->m_senderData = ENABLE_COM_CM_COMMIT_NEW;
2238 NodeBitmask::clear(enableComReq->m_nodeIds);
2239 for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
2240 jam();
2241 ptrAss(nodePtr, nodeRec);
2242 if ((nodePtr.p->phase == ZRUNNING) && (nodePtr.i != getOwnNodeId())) {
2243 /*-------------------------------------------------------------------*/
2244 // Enable full communication to all other nodes. Not really necessary
2245 // to open communication to ourself.
2246 /*-------------------------------------------------------------------*/
2247 jam();
2248 NodeBitmask::set(enableComReq->m_nodeIds, nodePtr.i);
2249 }//if
2250 }//for
2251
2252 if (!NodeBitmask::isclear(enableComReq->m_nodeIds))
2253 {
2254 jam();
2255 sendSignal(CMVMI_REF, GSN_ENABLE_COMREQ, signal,
2256 EnableComReq::SignalLength, JBA);
2257 }
2258 else
2259 {
2260 handleEnableComCommitNew(signal);
2261 }
2262 }
2263
2264 void
handleEnableComCommitNew(Signal * signal)2265 Qmgr::handleEnableComCommitNew(Signal *signal)
2266 {
2267 sendSttorryLab(signal);
2268
2269 sendCmAckAdd(signal, getOwnNodeId(), CmAdd::CommitNew);
2270 }
2271
2272 /* 4.10.7 CM_ACKADD - PRESIDENT IS RECEIVER - */
2273 /*---------------------------------------------------------------------------*/
2274 /* Entry point for an ack add signal.
2275 * The TTYPE defines if it is a prepare or a commit. */
2276 /*---------------------------------------------------------------------------*/
execCM_ACKADD(Signal * signal)2277 void Qmgr::execCM_ACKADD(Signal* signal)
2278 {
2279 NodeRecPtr addNodePtr;
2280 NodeRecPtr senderNodePtr;
2281 jamEntry();
2282
2283 CmAckAdd * const cmAckAdd = (CmAckAdd*)signal->getDataPtr();
2284 const CmAdd::RequestType type = (CmAdd::RequestType)cmAckAdd->requestType;
2285 addNodePtr.i = cmAckAdd->startingNodeId;
2286 senderNodePtr.i = cmAckAdd->senderNodeId;
2287
2288 DEBUG_START3(signal, type);
2289
2290 if (cpresident != getOwnNodeId()) {
2291 jam();
2292 /*-----------------------------------------------------------------------*/
2293 /* IF WE ARE NOT PRESIDENT THEN WE SHOULD NOT RECEIVE THIS MESSAGE. */
2294 /*------------------------------------------------------------_----------*/
2295 warningEvent("Received CM_ACKADD from %d president=%d",
2296 senderNodePtr.i, cpresident);
2297 return;
2298 }//if
2299
2300 if (addNodePtr.i != c_start.m_startNode) {
2301 jam();
2302 /*----------------------------------------------------------------------*/
2303 /* THIS IS NOT THE STARTING NODE. WE ARE ACTIVE NOW WITH ANOTHER START. */
2304 /*----------------------------------------------------------------------*/
2305 warningEvent("Received CM_ACKADD from %d with startNode=%d != own %d",
2306 senderNodePtr.i, addNodePtr.i, c_start.m_startNode);
2307 return;
2308 }//if
2309
2310 ndbrequire(c_start.m_gsn == GSN_CM_ADD);
2311 c_start.m_nodes.clearWaitingFor(senderNodePtr.i);
2312 if(!c_start.m_nodes.done()){
2313 jam();
2314 return;
2315 }
2316
2317 switch (type) {
2318 case CmAdd::Prepare:{
2319 jam();
2320
2321 /*----------------------------------------------------------------------*/
2322 /* ALL RUNNING NODES HAVE PREPARED THE INCLUSION OF THIS NEW NODE. */
2323 /*----------------------------------------------------------------------*/
2324 c_start.m_gsn = GSN_CM_ADD;
2325 c_start.m_nodes = c_clusterNodes;
2326
2327 CmAdd * const cmAdd = (CmAdd*)signal->getDataPtrSend();
2328 cmAdd->requestType = CmAdd::AddCommit;
2329 cmAdd->startingNodeId = addNodePtr.i;
2330 cmAdd->startingVersion = getNodeInfo(addNodePtr.i).m_version;
2331 cmAdd->startingMysqlVersion = getNodeInfo(addNodePtr.i).m_mysql_version;
2332 NodeReceiverGroup rg(QMGR, c_clusterNodes);
2333 sendSignal(rg, GSN_CM_ADD, signal, CmAdd::SignalLength, JBA);
2334 DEBUG_START2(GSN_CM_ADD, rg, "AddCommit");
2335 return;
2336 }
2337 case CmAdd::AddCommit:{
2338 jam();
2339
2340 /****************************************/
2341 /* Send commit to the new node so he */
2342 /* will change PHASE into ZRUNNING */
2343 /****************************************/
2344 c_start.m_gsn = GSN_CM_ADD;
2345 c_start.m_nodes.clearWaitingFor();
2346 c_start.m_nodes.setWaitingFor(addNodePtr.i);
2347
2348 CmAdd * const cmAdd = (CmAdd*)signal->getDataPtrSend();
2349 cmAdd->requestType = CmAdd::CommitNew;
2350 cmAdd->startingNodeId = addNodePtr.i;
2351 cmAdd->startingVersion = getNodeInfo(addNodePtr.i).m_version;
2352 cmAdd->startingMysqlVersion = getNodeInfo(addNodePtr.i).m_mysql_version;
2353 sendSignal(calcQmgrBlockRef(addNodePtr.i), GSN_CM_ADD, signal,
2354 CmAdd::SignalLength, JBA);
2355 DEBUG_START(GSN_CM_ADD, addNodePtr.i, "CommitNew");
2356 return;
2357 }
2358 case CmAdd::CommitNew:
2359 jam();
2360 /**
2361 * Tell arbitration about new node.
2362 */
2363 handleArbitNdbAdd(signal, addNodePtr.i);
2364 c_start.reset();
2365
2366 if (c_start.m_starting_nodes.get(addNodePtr.i))
2367 {
2368 jam();
2369 c_start.m_starting_nodes.clear(addNodePtr.i);
2370 if (c_start.m_starting_nodes.isclear())
2371 {
2372 jam();
2373 sendSttorryLab(signal);
2374 }
2375 }
2376 return;
2377 }//switch
2378 ndbrequire(false);
2379 }//Qmgr::execCM_ACKADD()
2380
2381 /**-------------------------------------------------------------------------
2382 * WE HAVE BEEN INCLUDED INTO THE CLUSTER. IT IS NOW TIME TO CALCULATE WHICH
2383 * ARE OUR LEFT AND RIGHT NEIGHBOURS FOR THE HEARTBEAT PROTOCOL.
2384 *--------------------------------------------------------------------------*/
findNeighbours(Signal * signal,Uint32 from)2385 void Qmgr::findNeighbours(Signal* signal, Uint32 from)
2386 {
2387 UintR toldLeftNeighbour;
2388 UintR tfnLeftFound;
2389 UintR tfnMaxFound;
2390 UintR tfnMinFound;
2391 UintR tfnRightFound;
2392 NodeRecPtr fnNodePtr;
2393 NodeRecPtr fnOwnNodePtr;
2394
2395 Uint32 toldRightNeighbour = cneighbourh;
2396 toldLeftNeighbour = cneighbourl;
2397 tfnLeftFound = 0;
2398 tfnMaxFound = 0;
2399 tfnMinFound = (UintR)-1;
2400 tfnRightFound = (UintR)-1;
2401 fnOwnNodePtr.i = getOwnNodeId();
2402 ptrCheckGuard(fnOwnNodePtr, MAX_NDB_NODES, nodeRec);
2403 for (fnNodePtr.i = 1; fnNodePtr.i < MAX_NDB_NODES; fnNodePtr.i++) {
2404 jam();
2405 ptrAss(fnNodePtr, nodeRec);
2406 if (fnNodePtr.i != fnOwnNodePtr.i) {
2407 if (fnNodePtr.p->phase == ZRUNNING) {
2408 if (tfnMinFound > fnNodePtr.p->ndynamicId) {
2409 jam();
2410 tfnMinFound = fnNodePtr.p->ndynamicId;
2411 }//if
2412 if (tfnMaxFound < fnNodePtr.p->ndynamicId) {
2413 jam();
2414 tfnMaxFound = fnNodePtr.p->ndynamicId;
2415 }//if
2416 if (fnOwnNodePtr.p->ndynamicId > fnNodePtr.p->ndynamicId) {
2417 jam();
2418 if (fnNodePtr.p->ndynamicId > tfnLeftFound) {
2419 jam();
2420 tfnLeftFound = fnNodePtr.p->ndynamicId;
2421 }//if
2422 } else {
2423 jam();
2424 if (fnNodePtr.p->ndynamicId < tfnRightFound) {
2425 jam();
2426 tfnRightFound = fnNodePtr.p->ndynamicId;
2427 }//if
2428 }//if
2429 }//if
2430 }//if
2431 }//for
2432 if (tfnLeftFound == 0) {
2433 if (tfnMinFound == (UintR)-1) {
2434 jam();
2435 cneighbourl = ZNIL;
2436 } else {
2437 jam();
2438 cneighbourl = translateDynamicIdToNodeId(signal, tfnMaxFound);
2439 }//if
2440 } else {
2441 jam();
2442 cneighbourl = translateDynamicIdToNodeId(signal, tfnLeftFound);
2443 }//if
2444 if (tfnRightFound == (UintR)-1) {
2445 if (tfnMaxFound == 0) {
2446 jam();
2447 cneighbourh = ZNIL;
2448 } else {
2449 jam();
2450 cneighbourh = translateDynamicIdToNodeId(signal, tfnMinFound);
2451 }//if
2452 } else {
2453 jam();
2454 cneighbourh = translateDynamicIdToNodeId(signal, tfnRightFound);
2455 }//if
2456 if (toldLeftNeighbour != cneighbourl) {
2457 jam();
2458 if (cneighbourl != ZNIL) {
2459 jam();
2460 /**-------------------------------------------------------------------*/
2461 /* WE ARE SUPERVISING A NEW LEFT NEIGHBOUR. WE START WITH ALARM COUNT
2462 * EQUAL TO ZERO.
2463 *---------------------------------------------------------------------*/
2464 fnNodePtr.i = cneighbourl;
2465 ptrCheckGuard(fnNodePtr, MAX_NDB_NODES, nodeRec);
2466 setNodeInfo(fnNodePtr.i).m_heartbeat_cnt= 0;
2467 }//if
2468 }//if
2469
2470 signal->theData[0] = NDB_LE_FIND_NEIGHBOURS;
2471 signal->theData[1] = getOwnNodeId();
2472 signal->theData[2] = cneighbourl;
2473 signal->theData[3] = cneighbourh;
2474 signal->theData[4] = fnOwnNodePtr.p->ndynamicId;
2475 UintR Tlen = 5;
2476 sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, Tlen, JBB);
2477 g_eventLogger->info("findNeighbours from: %u old (left: %u right: %u) new (%u %u)",
2478 from,
2479 toldLeftNeighbour,
2480 toldRightNeighbour,
2481 cneighbourl,
2482 cneighbourh);
2483 }//Qmgr::findNeighbours()
2484
2485 /*
2486 4.10.7 INIT_DATA */
2487 /*---------------------------------------------------------------------------*/
2488 /*---------------------------------------------------------------------------*/
initData(Signal * signal)2489 void Qmgr::initData(Signal* signal)
2490 {
2491 NDB_TICKS now = NdbTick_CurrentMillisecond();
2492 interface_check_timer.setDelay(1000);
2493 interface_check_timer.reset(now);
2494
2495 // catch-all for missing initializations
2496 memset(&arbitRec, 0, sizeof(arbitRec));
2497
2498 /**
2499 * Timeouts
2500 */
2501 const ndb_mgm_configuration_iterator * p =
2502 m_ctx.m_config.getOwnConfigIterator();
2503 ndbrequire(p != 0);
2504
2505 Uint32 hbDBDB = 1500;
2506 Uint32 arbitTimeout = 1000;
2507 Uint32 arbitMethod = ARBIT_METHOD_DEFAULT;
2508 Uint32 ccInterval = 0;
2509 c_restartPartialTimeout = 30000;
2510 c_restartPartionedTimeout = 60000;
2511 c_restartFailureTimeout = ~0;
2512 c_restartNoNodegroupTimeout = 15000;
2513 ndb_mgm_get_int_parameter(p, CFG_DB_HEARTBEAT_INTERVAL, &hbDBDB);
2514 ndb_mgm_get_int_parameter(p, CFG_DB_ARBIT_TIMEOUT, &arbitTimeout);
2515 ndb_mgm_get_int_parameter(p, CFG_DB_ARBIT_METHOD, &arbitMethod);
2516 ndb_mgm_get_int_parameter(p, CFG_DB_START_PARTIAL_TIMEOUT,
2517 &c_restartPartialTimeout);
2518 ndb_mgm_get_int_parameter(p, CFG_DB_START_PARTITION_TIMEOUT,
2519 &c_restartPartionedTimeout);
2520 ndb_mgm_get_int_parameter(p, CFG_DB_START_NO_NODEGROUP_TIMEOUT,
2521 &c_restartNoNodegroupTimeout);
2522 ndb_mgm_get_int_parameter(p, CFG_DB_START_FAILURE_TIMEOUT,
2523 &c_restartFailureTimeout);
2524 ndb_mgm_get_int_parameter(p, CFG_DB_CONNECT_CHECK_DELAY,
2525 &ccInterval);
2526
2527 if(c_restartPartialTimeout == 0)
2528 {
2529 c_restartPartialTimeout = ~0;
2530 }
2531
2532 if (c_restartPartionedTimeout ==0)
2533 {
2534 c_restartPartionedTimeout = ~0;
2535 }
2536
2537 if (c_restartFailureTimeout == 0)
2538 {
2539 c_restartFailureTimeout = ~0;
2540 }
2541
2542 if (c_restartNoNodegroupTimeout == 0)
2543 {
2544 c_restartNoNodegroupTimeout = ~0;
2545 }
2546
2547 setHbDelay(hbDBDB);
2548 setCCDelay(ccInterval);
2549 setArbitTimeout(arbitTimeout);
2550
2551 arbitRec.method = (ArbitRec::Method)arbitMethod;
2552 arbitRec.state = ARBIT_NULL; // start state for all nodes
2553 arbitRec.apiMask[0].clear(); // prepare for ARBIT_CFG
2554
2555 Uint32 sum = 0;
2556 ArbitSignalData* const sd = (ArbitSignalData*)&signal->theData[0];
2557 for (unsigned rank = 1; rank <= 2; rank++) {
2558 sd->sender = getOwnNodeId();
2559 sd->code = rank;
2560 sd->node = 0;
2561 sd->ticket.clear();
2562 sd->mask.clear();
2563 ndb_mgm_configuration_iterator * iter =
2564 m_ctx.m_config.getClusterConfigIterator();
2565 for (ndb_mgm_first(iter); ndb_mgm_valid(iter); ndb_mgm_next(iter)) {
2566 Uint32 tmp = 0;
2567 if (ndb_mgm_get_int_parameter(iter, CFG_NODE_ARBIT_RANK, &tmp) == 0 &&
2568 tmp == rank){
2569 Uint32 nodeId = 0;
2570 ndbrequire(!ndb_mgm_get_int_parameter(iter, CFG_NODE_ID, &nodeId));
2571 sd->mask.set(nodeId);
2572 }
2573 }
2574 sum += sd->mask.count();
2575 execARBIT_CFG(signal);
2576 }
2577
2578 if (arbitRec.method == ArbitRec::METHOD_DEFAULT &&
2579 sum == 0)
2580 {
2581 jam();
2582 infoEvent("Arbitration disabled, all API nodes have rank 0");
2583 arbitRec.method = ArbitRec::DISABLED;
2584 }
2585
2586 setNodeInfo(getOwnNodeId()).m_mysql_version = NDB_MYSQL_VERSION_D;
2587
2588 ndb_mgm_configuration_iterator * iter =
2589 m_ctx.m_config.getClusterConfigIterator();
2590 for (ndb_mgm_first(iter); ndb_mgm_valid(iter); ndb_mgm_next(iter))
2591 {
2592 jam();
2593 Uint32 nodeId = 0;
2594 if (ndb_mgm_get_int_parameter(iter, CFG_NODE_ID, &nodeId) == 0)
2595 {
2596 jam();
2597 if (nodeId < MAX_NDB_NODES && getNodeInfo(nodeId).m_type == NodeInfo::DB)
2598 {
2599 Uint32 hbOrder = 0;
2600 ndb_mgm_get_int_parameter(iter, CFG_DB_HB_ORDER, &hbOrder);
2601
2602 NodeRecPtr nodePtr;
2603 nodePtr.i = nodeId;
2604 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
2605 nodePtr.p->hbOrder = hbOrder;
2606 }
2607 }
2608 }
2609 int hb_order_error = check_hb_order_config();
2610 if (hb_order_error == -1)
2611 {
2612 char msg[] = "Illegal HeartbeatOrder config, "
2613 "all nodes must have non-zero config value";
2614 progError(__LINE__, NDBD_EXIT_INVALID_CONFIG, msg);
2615 return;
2616 }
2617 if (hb_order_error == -2)
2618 {
2619 char msg[] = "Illegal HeartbeatOrder config, "
2620 "the nodes must have distinct config values";
2621 progError(__LINE__, NDBD_EXIT_INVALID_CONFIG, msg);
2622 return;
2623 }
2624 ndbrequire(hb_order_error == 0);
2625 }//Qmgr::initData()
2626
2627
2628 /**---------------------------------------------------------------------------
2629 * HERE WE RECEIVE THE JOB TABLE SIGNAL EVERY 10 MILLISECONDS.
2630 * WE WILL USE THIS TO CHECK IF IT IS TIME TO CHECK THE NEIGHBOUR NODE.
2631 * WE WILL ALSO SEND A SIGNAL TO BLOCKS THAT NEED A TIME SIGNAL AND
2632 * DO NOT WANT TO USE JOB TABLE SIGNALS.
2633 *---------------------------------------------------------------------------*/
timerHandlingLab(Signal * signal)2634 void Qmgr::timerHandlingLab(Signal* signal)
2635 {
2636 NDB_TICKS TcurrentTime = NdbTick_CurrentMillisecond();
2637 NodeRecPtr myNodePtr;
2638 myNodePtr.i = getOwnNodeId();
2639 ptrCheckGuard(myNodePtr, MAX_NDB_NODES, nodeRec);
2640
2641 Uint32 sentHi = signal->theData[1];
2642 Uint32 sentLo = signal->theData[2];
2643 Uint64 sent = (Uint64(sentHi) << 32) + sentLo;
2644
2645 if (TcurrentTime >= sent + 1000 || (TcurrentTime < sent))
2646 {
2647 jam();
2648 g_eventLogger->warning("timerHandlingLab now: %llu sent: %llu diff: %d",
2649 TcurrentTime, sent, int(TcurrentTime - sent));
2650 }
2651 else if (TcurrentTime >= sent + 150)
2652 {
2653 g_eventLogger->info("timerHandlingLab now: %llu sent: %llu diff: %d",
2654 TcurrentTime, sent, int(TcurrentTime - sent));
2655 }
2656
2657 if (myNodePtr.p->phase == ZRUNNING) {
2658 jam();
2659 /**---------------------------------------------------------------------
2660 * WE ARE ONLY PART OF HEARTBEAT CLUSTER IF WE ARE UP AND RUNNING.
2661 *---------------------------------------------------------------------*/
2662 if (hb_send_timer.check(TcurrentTime)) {
2663 jam();
2664 sendHeartbeat(signal);
2665 hb_send_timer.reset(TcurrentTime);
2666 }
2667 if (likely(! m_connectivity_check.m_active))
2668 {
2669 if (hb_check_timer.check(TcurrentTime)) {
2670 jam();
2671 checkHeartbeat(signal);
2672 hb_check_timer.reset(TcurrentTime);
2673 }
2674 }
2675 else
2676 {
2677 /* Connectivity check */
2678 if (m_connectivity_check.m_timer.check(TcurrentTime)) {
2679 jam();
2680 checkConnectivityTimeSignal(signal);
2681 m_connectivity_check.m_timer.reset(TcurrentTime);
2682 }
2683 }
2684 }
2685
2686 if (interface_check_timer.check(TcurrentTime)) {
2687 jam();
2688 interface_check_timer.reset(TcurrentTime);
2689 checkStartInterface(signal, TcurrentTime);
2690 }
2691
2692 if (hb_api_timer.check(TcurrentTime))
2693 {
2694 jam();
2695 hb_api_timer.reset(TcurrentTime);
2696 apiHbHandlingLab(signal, TcurrentTime);
2697 }
2698
2699 if (cactivateApiCheck != 0) {
2700 jam();
2701 if (clatestTransactionCheck == 0) {
2702 //-------------------------------------------------------------
2703 // Initialise the Transaction check timer.
2704 //-------------------------------------------------------------
2705 clatestTransactionCheck = TcurrentTime;
2706 }//if
2707 int counter = 0;
2708 while (TcurrentTime > ((NDB_TICKS)10 + clatestTransactionCheck)) {
2709 jam();
2710 clatestTransactionCheck += (NDB_TICKS)10;
2711 sendSignal(DBTC_REF, GSN_TIME_SIGNAL, signal, 1, JBB);
2712 sendSignal(DBLQH_REF, GSN_TIME_SIGNAL, signal, 1, JBB);
2713 counter++;
2714 if (counter > 1) {
2715 jam();
2716 break;
2717 } else {
2718 ;
2719 }//if
2720 }//while
2721 }//if
2722
2723 //--------------------------------------------------
2724 // Resend this signal with 10 milliseconds delay.
2725 //--------------------------------------------------
2726 signal->theData[0] = ZTIMER_HANDLING;
2727 signal->theData[1] = Uint32(TcurrentTime >> 32);
2728 signal->theData[2] = Uint32(TcurrentTime);
2729 sendSignalWithDelay(QMGR_REF, GSN_CONTINUEB, signal, 10, 3);
2730 return;
2731 }//Qmgr::timerHandlingLab()
2732
2733 /*---------------------------------------------------------------------------*/
2734 /* THIS MODULE HANDLES THE SENDING AND RECEIVING OF HEARTBEATS. */
2735 /*---------------------------------------------------------------------------*/
sendHeartbeat(Signal * signal)2736 void Qmgr::sendHeartbeat(Signal* signal)
2737 {
2738 NodeRecPtr localNodePtr;
2739 localNodePtr.i = cneighbourh;
2740 if (localNodePtr.i == ZNIL) {
2741 jam();
2742 /**---------------------------------------------------------------------
2743 * THERE ARE NO NEIGHBOURS. THIS IS POSSIBLE IF WE ARE THE ONLY NODE IN
2744 * THE CLUSTER.IN THIS CASE WE DO NOT NEED TO SEND ANY HEARTBEAT SIGNALS.
2745 *-----------------------------------------------------------------------*/
2746 return;
2747 }//if
2748 ptrCheckGuard(localNodePtr, MAX_NDB_NODES, nodeRec);
2749 signal->theData[0] = getOwnNodeId();
2750
2751 sendSignal(localNodePtr.p->blockRef, GSN_CM_HEARTBEAT, signal, 1, JBA);
2752 #ifdef VM_TRACE
2753 signal->theData[0] = NDB_LE_SentHeartbeat;
2754 signal->theData[1] = localNodePtr.i;
2755 sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB);
2756 #endif
2757 }//Qmgr::sendHeartbeat()
2758
checkHeartbeat(Signal * signal)2759 void Qmgr::checkHeartbeat(Signal* signal)
2760 {
2761 NodeRecPtr nodePtr;
2762
2763 nodePtr.i = cneighbourl;
2764 if (nodePtr.i == ZNIL) {
2765 jam();
2766 /**---------------------------------------------------------------------
2767 * THERE ARE NO NEIGHBOURS. THIS IS POSSIBLE IF WE ARE THE ONLY NODE IN
2768 * THE CLUSTER. IN THIS CASE WE DO NOT NEED TO CHECK ANY HEARTBEATS.
2769 *-----------------------------------------------------------------------*/
2770 return;
2771 }//if
2772 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
2773
2774 setNodeInfo(nodePtr.i).m_heartbeat_cnt++;
2775 ndbrequire(nodePtr.p->phase == ZRUNNING);
2776 ndbrequire(getNodeInfo(nodePtr.i).m_type == NodeInfo::DB);
2777
2778 if(getNodeInfo(nodePtr.i).m_heartbeat_cnt > 2){
2779 signal->theData[0] = NDB_LE_MissedHeartbeat;
2780 signal->theData[1] = nodePtr.i;
2781 signal->theData[2] = getNodeInfo(nodePtr.i).m_heartbeat_cnt - 1;
2782 sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 3, JBB);
2783 }
2784
2785 if (getNodeInfo(nodePtr.i).m_heartbeat_cnt > 4) {
2786 jam();
2787 if (m_connectivity_check.getEnabled())
2788 {
2789 jam();
2790 /* Start connectivity check, indicating the cause */
2791 startConnectivityCheck(signal, FailRep::ZHEARTBEAT_FAILURE, nodePtr.i);
2792 return;
2793 }
2794 else
2795 {
2796 /**----------------------------------------------------------------------
2797 * OUR LEFT NEIGHBOUR HAVE KEPT QUIET FOR THREE CONSECUTIVE HEARTBEAT
2798 * PERIODS. THUS WE DECLARE HIM DOWN.
2799 *----------------------------------------------------------------------*/
2800 signal->theData[0] = NDB_LE_DeadDueToHeartbeat;
2801 signal->theData[1] = nodePtr.i;
2802 sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB);
2803
2804 failReportLab(signal, nodePtr.i, FailRep::ZHEARTBEAT_FAILURE, getOwnNodeId());
2805 return;
2806 }
2807 }//if
2808 }//Qmgr::checkHeartbeat()
2809
apiHbHandlingLab(Signal * signal,Uint64 now)2810 void Qmgr::apiHbHandlingLab(Signal* signal, Uint64 now)
2811 {
2812 NodeRecPtr TnodePtr;
2813
2814 for (TnodePtr.i = 1; TnodePtr.i < MAX_NODES; TnodePtr.i++) {
2815 const Uint32 nodeId = TnodePtr.i;
2816 ptrAss(TnodePtr, nodeRec);
2817
2818 const NodeInfo::NodeType type = getNodeInfo(nodeId).getType();
2819 if(type == NodeInfo::DB)
2820 continue;
2821
2822 if(type == NodeInfo::INVALID)
2823 continue;
2824
2825 if (c_connectedNodes.get(nodeId))
2826 {
2827 jam();
2828 setNodeInfo(TnodePtr.i).m_heartbeat_cnt++;
2829
2830 if(getNodeInfo(TnodePtr.i).m_heartbeat_cnt > 2)
2831 {
2832 signal->theData[0] = NDB_LE_MissedHeartbeat;
2833 signal->theData[1] = nodeId;
2834 signal->theData[2] = getNodeInfo(TnodePtr.i).m_heartbeat_cnt - 1;
2835 sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 3, JBB);
2836 }
2837
2838 if (getNodeInfo(TnodePtr.i).m_heartbeat_cnt > 4)
2839 {
2840 jam();
2841 /*------------------------------------------------------------------*/
2842 /* THE API NODE HAS NOT SENT ANY HEARTBEAT FOR THREE SECONDS.
2843 * WE WILL DISCONNECT FROM IT NOW.
2844 *------------------------------------------------------------------*/
2845 /*------------------------------------------------------------------*/
2846 /* We call node_failed to release all connections for this api node */
2847 /*------------------------------------------------------------------*/
2848 signal->theData[0] = NDB_LE_DeadDueToHeartbeat;
2849 signal->theData[1] = nodeId;
2850 sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB);
2851
2852 api_failed(signal, nodeId);
2853 }//if
2854 }//if
2855 else if (TnodePtr.p->phase == ZAPI_INACTIVE &&
2856 TnodePtr.p->m_secret != 0 && now > TnodePtr.p->m_alloc_timeout)
2857 {
2858 jam();
2859 TnodePtr.p->m_secret = 0;
2860 warningEvent("Releasing node id allocation for node %u",
2861 TnodePtr.i);
2862 }
2863 }//for
2864 return;
2865 }//Qmgr::apiHbHandlingLab()
2866
checkStartInterface(Signal * signal,Uint64 now)2867 void Qmgr::checkStartInterface(Signal* signal, Uint64 now)
2868 {
2869 NodeRecPtr nodePtr;
2870 /*------------------------------------------------------------------------*/
2871 // This method is called once per second. After a disconnect we wait at
2872 // least three seconds before allowing new connects. We will also ensure
2873 // that handling of the failure is completed before we allow new connections.
2874 /*------------------------------------------------------------------------*/
2875 for (nodePtr.i = 1; nodePtr.i < MAX_NODES; nodePtr.i++) {
2876 ptrAss(nodePtr, nodeRec);
2877 Uint32 type = getNodeInfo(nodePtr.i).m_type;
2878 if (nodePtr.p->phase == ZFAIL_CLOSING) {
2879 jam();
2880 setNodeInfo(nodePtr.i).m_heartbeat_cnt++;
2881 if (c_connectedNodes.get(nodePtr.i)){
2882 jam();
2883 /*-------------------------------------------------------------------*/
2884 // We need to ensure that the connection is not restored until it has
2885 // been disconnected for at least three seconds.
2886 /*-------------------------------------------------------------------*/
2887 setNodeInfo(nodePtr.i).m_heartbeat_cnt= 0;
2888 }//if
2889 if ((getNodeInfo(nodePtr.i).m_heartbeat_cnt > 3)
2890 && (nodePtr.p->failState == NORMAL)) {
2891 /**------------------------------------------------------------------
2892 * WE HAVE DISCONNECTED THREE SECONDS AGO. WE ARE NOW READY TO
2893 * CONNECT AGAIN AND ACCEPT NEW REGISTRATIONS FROM THIS NODE.
2894 * WE WILL NOT ALLOW CONNECTIONS OF API NODES UNTIL API FAIL HANDLING
2895 * IS COMPLETE.
2896 *-------------------------------------------------------------------*/
2897 nodePtr.p->failState = NORMAL;
2898 nodePtr.p->m_secret = 0;
2899 switch(type){
2900 case NodeInfo::DB:
2901 jam();
2902 nodePtr.p->phase = ZINIT;
2903 break;
2904 case NodeInfo::MGM:
2905 jam();
2906 nodePtr.p->phase = ZAPI_INACTIVE;
2907 break;
2908 case NodeInfo::API:
2909 jam();
2910 if (c_allow_api_connect)
2911 {
2912 jam();
2913 nodePtr.p->phase = ZAPI_INACTIVE;
2914 break;
2915 }
2916 else
2917 {
2918 /**
2919 * Dont allow API node to connect before c_allow_api_connect
2920 */
2921 jam();
2922 setNodeInfo(nodePtr.i).m_heartbeat_cnt = 3;
2923 continue;
2924 }
2925 }
2926
2927 setNodeInfo(nodePtr.i).m_heartbeat_cnt= 0;
2928 signal->theData[0] = 0;
2929 signal->theData[1] = nodePtr.i;
2930 sendSignal(CMVMI_REF, GSN_OPEN_COMREQ, signal, 2, JBA);
2931 }
2932 else
2933 {
2934 jam();
2935 if(((getNodeInfo(nodePtr.i).m_heartbeat_cnt + 1) % 60) == 0)
2936 {
2937 jam();
2938 char buf[256];
2939 if (getNodeInfo(nodePtr.i).m_type == NodeInfo::DB)
2940 {
2941 jam();
2942 BaseString::snprintf(buf, sizeof(buf),
2943 "Failure handling of node %d has not completed"
2944 " in %d min - state = %d",
2945 nodePtr.i,
2946 (getNodeInfo(nodePtr.i).m_heartbeat_cnt+1)/60,
2947 nodePtr.p->failState);
2948 warningEvent("%s", buf);
2949 if (((getNodeInfo(nodePtr.i).m_heartbeat_cnt + 1) % 300) == 0)
2950 {
2951 jam();
2952 /**
2953 * Also dump DIH nf-state
2954 */
2955 signal->theData[0] = 7019;
2956 signal->theData[1] = nodePtr.i;
2957 sendSignal(DBDIH_REF, GSN_DUMP_STATE_ORD, signal, 2, JBB);
2958 }
2959 }
2960 else
2961 {
2962 jam();
2963 BaseString::snprintf(buf, sizeof(buf),
2964 "Failure handling of api %u has not completed"
2965 " in %d min - state = %d",
2966 nodePtr.i,
2967 (getNodeInfo(nodePtr.i).m_heartbeat_cnt+1)/60,
2968 nodePtr.p->failState);
2969 warningEvent("%s", buf);
2970 if (nodePtr.p->failState == WAITING_FOR_API_FAILCONF)
2971 {
2972 jam();
2973 compile_time_assert(NDB_ARRAY_SIZE(nodePtr.p->m_failconf_blocks) == 5);
2974 BaseString::snprintf(buf, sizeof(buf),
2975 " Waiting for blocks: %u %u %u %u %u",
2976 nodePtr.p->m_failconf_blocks[0],
2977 nodePtr.p->m_failconf_blocks[1],
2978 nodePtr.p->m_failconf_blocks[2],
2979 nodePtr.p->m_failconf_blocks[3],
2980 nodePtr.p->m_failconf_blocks[4]);
2981 warningEvent("%s", buf);
2982 }
2983 }
2984 }
2985 }
2986 }
2987 else if (type == NodeInfo::DB && nodePtr.p->phase == ZINIT &&
2988 nodePtr.p->m_secret != 0 && now > nodePtr.p->m_alloc_timeout)
2989 {
2990 jam();
2991 nodePtr.p->m_secret = 0;
2992 warningEvent("Releasing node id allocation for node %u",
2993 nodePtr.i);
2994 }
2995 }//for
2996 return;
2997 }//Qmgr::checkStartInterface()
2998
2999 /**-------------------------------------------------------------------------
3000 * This method is called when a DISCONNECT_REP signal arrived which means that
3001 * the API node is gone and we want to release resources in TC/DICT blocks.
3002 *---------------------------------------------------------------------------*/
sendApiFailReq(Signal * signal,Uint16 failedNodeNo,bool sumaOnly)3003 void Qmgr::sendApiFailReq(Signal* signal, Uint16 failedNodeNo, bool sumaOnly)
3004 {
3005 jamEntry();
3006 signal->theData[0] = failedNodeNo;
3007 signal->theData[1] = QMGR_REF;
3008
3009 /* We route the ApiFailReq signals via CMVMI
3010 * This is done to ensure that they are received after
3011 * any pending signals from the failed Api node when
3012 * running ndbmtd, as these signals would be enqueued from
3013 * the thread running CMVMI
3014 */
3015 Uint32 routedSignalSectionI = RNIL;
3016 ndbrequire(appendToSection(routedSignalSectionI,
3017 &signal->theData[0],
3018 2));
3019 SectionHandle handle(this, routedSignalSectionI);
3020
3021 /* RouteOrd data */
3022 RouteOrd* routeOrd = (RouteOrd*) &signal->theData[0];
3023 routeOrd->srcRef = reference();
3024 routeOrd->gsn = GSN_API_FAILREQ;
3025
3026 NodeRecPtr failedNodePtr;
3027 failedNodePtr.i = failedNodeNo;
3028 ptrCheckGuard(failedNodePtr, MAX_NODES, nodeRec);
3029 failedNodePtr.p->failState = WAITING_FOR_API_FAILCONF;
3030
3031
3032 /* Send ROUTE_ORD signals to CMVMI via JBA
3033 * CMVMI will then immediately send the API_FAILREQ
3034 * signals to the destination block(s) using JBB
3035 * These API_FAILREQ signals will be sent *after*
3036 * any JBB signals enqueued from the failed API
3037 * by the CMVMI thread.
3038 */
3039 if (!sumaOnly)
3040 {
3041 jam();
3042 add_failconf_block(failedNodePtr, DBTC);
3043 routeOrd->dstRef = DBTC_REF;
3044 sendSignalNoRelease(CMVMI_REF, GSN_ROUTE_ORD, signal,
3045 RouteOrd::SignalLength,
3046 JBA, &handle);
3047
3048 add_failconf_block(failedNodePtr, DBDICT);
3049 routeOrd->dstRef = DBDICT_REF;
3050 sendSignalNoRelease(CMVMI_REF, GSN_ROUTE_ORD, signal,
3051 RouteOrd::SignalLength,
3052 JBA, &handle);
3053
3054 add_failconf_block(failedNodePtr, DBSPJ);
3055 routeOrd->dstRef = DBSPJ_REF;
3056 sendSignalNoRelease(CMVMI_REF, GSN_ROUTE_ORD, signal,
3057 RouteOrd::SignalLength,
3058 JBA, &handle);
3059 }
3060
3061 /* Suma always notified */
3062 add_failconf_block(failedNodePtr, SUMA);
3063 routeOrd->dstRef = SUMA_REF;
3064 sendSignal(CMVMI_REF, GSN_ROUTE_ORD, signal,
3065 RouteOrd::SignalLength,
3066 JBA, &handle);
3067 }//Qmgr::sendApiFailReq()
3068
execAPI_FAILREQ(Signal * signal)3069 void Qmgr::execAPI_FAILREQ(Signal* signal)
3070 {
3071 jamEntry();
3072 NodeRecPtr failedNodePtr;
3073 failedNodePtr.i = signal->theData[0];
3074 // signal->theData[1] == QMGR_REF
3075 ptrCheckGuard(failedNodePtr, MAX_NODES, nodeRec);
3076
3077 ndbrequire(getNodeInfo(failedNodePtr.i).getType() != NodeInfo::DB);
3078
3079 api_failed(signal, signal->theData[0]);
3080 }
3081
execAPI_FAILCONF(Signal * signal)3082 void Qmgr::execAPI_FAILCONF(Signal* signal)
3083 {
3084 NodeRecPtr failedNodePtr;
3085
3086 jamEntry();
3087 failedNodePtr.i = signal->theData[0];
3088 ptrCheckGuard(failedNodePtr, MAX_NODES, nodeRec);
3089
3090 Uint32 block = refToMain(signal->theData[1]);
3091 if (failedNodePtr.p->failState != WAITING_FOR_API_FAILCONF ||
3092 !remove_failconf_block(failedNodePtr, block))
3093 {
3094 jam();
3095 ndbout << "execAPI_FAILCONF from " << block
3096 << " failedNodePtr.p->failState = "
3097 << (Uint32)(failedNodePtr.p->failState)
3098 << " blocks: ";
3099 for (Uint32 i = 0;i<NDB_ARRAY_SIZE(failedNodePtr.p->m_failconf_blocks);i++)
3100 {
3101 printf("%u ", failedNodePtr.p->m_failconf_blocks[i]);
3102 }
3103 ndbout << endl;
3104 systemErrorLab(signal, __LINE__);
3105 }//if
3106
3107 if (is_empty_failconf_block(failedNodePtr))
3108 {
3109 jam();
3110 failedNodePtr.p->failState = NORMAL;
3111
3112 /**
3113 * When we set this state, connection will later be opened
3114 * in checkStartInterface
3115 */
3116 }
3117 return;
3118 }//Qmgr::execAPI_FAILCONF()
3119
3120 void
add_failconf_block(NodeRecPtr nodePtr,Uint32 block)3121 Qmgr::add_failconf_block(NodeRecPtr nodePtr, Uint32 block)
3122 {
3123 // Check that it does not already exists!!
3124 Uint32 pos = 0;
3125 for (; pos < NDB_ARRAY_SIZE(nodePtr.p->m_failconf_blocks); pos++)
3126 {
3127 jam();
3128 if (nodePtr.p->m_failconf_blocks[pos] == 0)
3129 {
3130 jam();
3131 break;
3132 }
3133 else if (nodePtr.p->m_failconf_blocks[pos] == block)
3134 {
3135 jam();
3136 break;
3137 }
3138 }
3139
3140 ndbrequire(pos != NDB_ARRAY_SIZE(nodePtr.p->m_failconf_blocks));
3141 ndbassert(nodePtr.p->m_failconf_blocks[pos] != block);
3142 if (nodePtr.p->m_failconf_blocks[pos] == block)
3143 {
3144 jam();
3145 /**
3146 * Already in list!!
3147 */
3148 #ifdef ERROR_INSERT
3149 ndbrequire(false);
3150 #endif
3151 return;
3152 }
3153 ndbrequire(nodePtr.p->m_failconf_blocks[pos] == 0);
3154 nodePtr.p->m_failconf_blocks[pos] = block;
3155 }
3156
3157 bool
remove_failconf_block(NodeRecPtr nodePtr,Uint32 block)3158 Qmgr::remove_failconf_block(NodeRecPtr nodePtr, Uint32 block)
3159 {
3160 // Check that it does exists!!
3161 Uint32 pos = 0;
3162 for (; pos < NDB_ARRAY_SIZE(nodePtr.p->m_failconf_blocks); pos++)
3163 {
3164 jam();
3165 if (nodePtr.p->m_failconf_blocks[pos] == 0)
3166 {
3167 jam();
3168 break;
3169 }
3170 else if (nodePtr.p->m_failconf_blocks[pos] == block)
3171 {
3172 jam();
3173 break;
3174 }
3175 }
3176
3177 if (pos == NDB_ARRAY_SIZE(nodePtr.p->m_failconf_blocks) ||
3178 nodePtr.p->m_failconf_blocks[pos] != block)
3179 {
3180 jam();
3181 /**
3182 * Not found!!
3183 */
3184 return false;
3185 }
3186
3187 nodePtr.p->m_failconf_blocks[pos] = 0;
3188 for (pos++; pos < NDB_ARRAY_SIZE(nodePtr.p->m_failconf_blocks); pos++)
3189 {
3190 jam();
3191 nodePtr.p->m_failconf_blocks[pos - 1] = nodePtr.p->m_failconf_blocks[pos];
3192 }
3193
3194 return true;
3195 }
3196
3197 bool
is_empty_failconf_block(NodeRecPtr nodePtr) const3198 Qmgr::is_empty_failconf_block(NodeRecPtr nodePtr) const
3199 {
3200 return nodePtr.p->m_failconf_blocks[0] == 0;
3201 }
3202
execNDB_FAILCONF(Signal * signal)3203 void Qmgr::execNDB_FAILCONF(Signal* signal)
3204 {
3205 NodeRecPtr failedNodePtr;
3206 NodeRecPtr nodePtr;
3207
3208 jamEntry();
3209 failedNodePtr.i = signal->theData[0];
3210
3211 if (ERROR_INSERTED(930))
3212 {
3213 CLEAR_ERROR_INSERT_VALUE;
3214 infoEvent("Discarding NDB_FAILCONF for %u", failedNodePtr.i);
3215 return;
3216 }
3217
3218 ptrCheckGuard(failedNodePtr, MAX_NDB_NODES, nodeRec);
3219 if (failedNodePtr.p->failState == WAITING_FOR_NDB_FAILCONF){
3220 failedNodePtr.p->failState = NORMAL;
3221 } else {
3222 jam();
3223
3224 char buf[100];
3225 BaseString::snprintf(buf, 100,
3226 "Received NDB_FAILCONF for node %u with state: %d %d",
3227 failedNodePtr.i,
3228 failedNodePtr.p->phase,
3229 failedNodePtr.p->failState);
3230 progError(__LINE__, 0, buf);
3231 systemErrorLab(signal, __LINE__);
3232 }//if
3233
3234 if (cpresident == getOwnNodeId())
3235 {
3236 jam();
3237
3238 CRASH_INSERTION(936);
3239 }
3240
3241 /**
3242 * Prepare a NFCompleteRep and send to all connected API's
3243 * They can then abort all transaction waiting for response from
3244 * the failed node
3245 *
3246 * NOTE: This is sent from all nodes, as otherwise we would need
3247 * take-over if cpresident dies befor sending this
3248 */
3249 NFCompleteRep * const nfComp = (NFCompleteRep *)&signal->theData[0];
3250 nfComp->blockNo = QMGR_REF;
3251 nfComp->nodeId = getOwnNodeId();
3252 nfComp->failedNodeId = failedNodePtr.i;
3253
3254 for (nodePtr.i = 1; nodePtr.i < MAX_NODES; nodePtr.i++)
3255 {
3256 jam();
3257 ptrAss(nodePtr, nodeRec);
3258 if (nodePtr.p->phase == ZAPI_ACTIVE){
3259 jam();
3260 sendSignal(nodePtr.p->blockRef, GSN_NF_COMPLETEREP, signal,
3261 NFCompleteRep::SignalLength, JBB);
3262 }//if
3263 }//for
3264 return;
3265 }//Qmgr::execNDB_FAILCONF()
3266
3267 void
execNF_COMPLETEREP(Signal * signal)3268 Qmgr::execNF_COMPLETEREP(Signal* signal)
3269 {
3270 jamEntry();
3271 NFCompleteRep rep = *(NFCompleteRep*)signal->getDataPtr();
3272 if (rep.blockNo != DBTC)
3273 {
3274 jam();
3275 ndbassert(false);
3276 return;
3277 }
3278
3279 /**
3280 * This is a disgrace...but execNF_COMPLETEREP in ndbapi is a mess
3281 * actually equally messy as it is in ndbd...
3282 * this is therefore a simple way of having ndbapi to get
3283 * earlier information that transactions can be aborted
3284 */
3285 signal->theData[0] = rep.failedNodeId;
3286 NodeRecPtr nodePtr;
3287 for (nodePtr.i = 1; nodePtr.i < MAX_NODES; nodePtr.i++)
3288 {
3289 jam();
3290 ptrAss(nodePtr, nodeRec);
3291 if (nodePtr.p->phase == ZAPI_ACTIVE &&
3292 ndb_takeovertc(getNodeInfo(nodePtr.i).m_version))
3293 {
3294 jam();
3295 sendSignal(nodePtr.p->blockRef, GSN_TAKE_OVERTCCONF, signal,
3296 NFCompleteRep::SignalLength, JBB);
3297 }//if
3298 }//for
3299 return;
3300 }
3301
3302 /*******************************/
3303 /* DISCONNECT_REP */
3304 /*******************************/
3305 const char *lookupConnectionError(Uint32 err);
3306
execDISCONNECT_REP(Signal * signal)3307 void Qmgr::execDISCONNECT_REP(Signal* signal)
3308 {
3309 jamEntry();
3310 const DisconnectRep * const rep = (DisconnectRep *)&signal->theData[0];
3311 const Uint32 nodeId = rep->nodeId;
3312 const Uint32 err = rep->err;
3313 const NodeInfo nodeInfo = getNodeInfo(nodeId);
3314 c_connectedNodes.clear(nodeId);
3315
3316 if (nodeInfo.getType() == NodeInfo::DB)
3317 {
3318 c_readnodes_nodes.clear(nodeId);
3319 }
3320
3321 NodeRecPtr nodePtr;
3322 nodePtr.i = getOwnNodeId();
3323 ptrCheckGuard(nodePtr, MAX_NODES, nodeRec);
3324
3325 char buf[100];
3326 if (nodeInfo.getType() == NodeInfo::DB &&
3327 getNodeState().startLevel < NodeState::SL_STARTED)
3328 {
3329 jam();
3330 CRASH_INSERTION(932);
3331 CRASH_INSERTION(938);
3332 BaseString::snprintf(buf, 100, "Node %u disconnected", nodeId);
3333 progError(__LINE__, NDBD_EXIT_SR_OTHERNODEFAILED, buf);
3334 ndbrequire(false);
3335 }
3336
3337 if (getNodeInfo(nodeId).getType() != NodeInfo::DB)
3338 {
3339 jam();
3340 api_failed(signal, nodeId);
3341 return;
3342 }
3343
3344 switch(nodePtr.p->phase){
3345 case ZRUNNING:
3346 jam();
3347 break;
3348 case ZINIT:
3349 ndbrequire(false);
3350 case ZSTARTING:
3351 progError(__LINE__, NDBD_EXIT_CONNECTION_SETUP_FAILED,
3352 lookupConnectionError(err));
3353 ndbrequire(false);
3354 case ZPREPARE_FAIL:
3355 ndbrequire(false);
3356 case ZFAIL_CLOSING:
3357 ndbrequire(false);
3358 case ZAPI_ACTIVE:
3359 ndbrequire(false);
3360 case ZAPI_INACTIVE:
3361 {
3362 BaseString::snprintf(buf, 100, "Node %u disconnected", nodeId);
3363 progError(__LINE__, NDBD_EXIT_SR_OTHERNODEFAILED, buf);
3364 ndbrequire(false);
3365 }
3366 }
3367 node_failed(signal, nodeId);
3368 }//DISCONNECT_REP
3369
node_failed(Signal * signal,Uint16 aFailedNode)3370 void Qmgr::node_failed(Signal* signal, Uint16 aFailedNode)
3371 {
3372 NodeRecPtr failedNodePtr;
3373 /**------------------------------------------------------------------------
3374 * A COMMUNICATION LINK HAS BEEN DISCONNECTED. WE MUST TAKE SOME ACTION
3375 * DUE TO THIS.
3376 *-----------------------------------------------------------------------*/
3377 failedNodePtr.i = aFailedNode;
3378 ptrCheckGuard(failedNodePtr, MAX_NODES, nodeRec);
3379 failedNodePtr.p->m_secret = 0; // Not yet Uint64(rand()) << 32 + rand();
3380
3381 ndbrequire(getNodeInfo(failedNodePtr.i).getType() == NodeInfo::DB);
3382
3383 /**---------------------------------------------------------------------
3384 * THE OTHER NODE IS AN NDB NODE, WE HANDLE IT AS IF A HEARTBEAT
3385 * FAILURE WAS DISCOVERED.
3386 *---------------------------------------------------------------------*/
3387 switch(failedNodePtr.p->phase){
3388 case ZRUNNING:
3389 jam();
3390 failReportLab(signal, aFailedNode, FailRep::ZLINK_FAILURE, getOwnNodeId());
3391 return;
3392 case ZFAIL_CLOSING:
3393 jam();
3394 return;
3395 case ZSTARTING:
3396 /**
3397 * bug#42422
3398 * Force "real" failure handling
3399 */
3400 failedNodePtr.p->phase = ZRUNNING;
3401 failReportLab(signal, aFailedNode, FailRep::ZLINK_FAILURE, getOwnNodeId());
3402 return;
3403 // Fall-through
3404 default:
3405 jam();
3406 /*---------------------------------------------------------------------*/
3407 // The other node is still not in the cluster but disconnected.
3408 // We must restart communication in three seconds.
3409 /*---------------------------------------------------------------------*/
3410 failedNodePtr.p->failState = NORMAL;
3411 failedNodePtr.p->phase = ZFAIL_CLOSING;
3412 setNodeInfo(failedNodePtr.i).m_heartbeat_cnt= 0;
3413
3414 CloseComReqConf * const closeCom =
3415 (CloseComReqConf *)&signal->theData[0];
3416
3417 closeCom->xxxBlockRef = reference();
3418 closeCom->requestType = CloseComReqConf::RT_NO_REPLY;
3419 closeCom->failNo = 0;
3420 closeCom->noOfNodes = 1;
3421 NodeBitmask::clear(closeCom->theNodes);
3422 NodeBitmask::set(closeCom->theNodes, failedNodePtr.i);
3423 sendSignal(CMVMI_REF, GSN_CLOSE_COMREQ, signal,
3424 CloseComReqConf::SignalLength, JBA);
3425 }//if
3426 return;
3427 }
3428
3429 void
execUPGRADE_PROTOCOL_ORD(Signal * signal)3430 Qmgr::execUPGRADE_PROTOCOL_ORD(Signal* signal)
3431 {
3432 const UpgradeProtocolOrd* ord = (UpgradeProtocolOrd*)signal->getDataPtr();
3433 switch(ord->type){
3434 case UpgradeProtocolOrd::UPO_ENABLE_MICRO_GCP:
3435 jam();
3436 m_micro_gcp_enabled = true;
3437 return;
3438 }
3439 }
3440
3441 void
api_failed(Signal * signal,Uint32 nodeId)3442 Qmgr::api_failed(Signal* signal, Uint32 nodeId)
3443 {
3444 NodeRecPtr failedNodePtr;
3445 /**------------------------------------------------------------------------
3446 * A COMMUNICATION LINK HAS BEEN DISCONNECTED. WE MUST TAKE SOME ACTION
3447 * DUE TO THIS.
3448 *-----------------------------------------------------------------------*/
3449 failedNodePtr.i = nodeId;
3450 ptrCheckGuard(failedNodePtr, MAX_NODES, nodeRec);
3451 failedNodePtr.p->m_secret = 0; // Not yet Uint64(rand()) << 32 + rand();
3452
3453 if (failedNodePtr.p->phase == ZFAIL_CLOSING)
3454 {
3455 /**
3456 * Failure handling already in progress
3457 */
3458 jam();
3459 return;
3460 }
3461
3462 ndbrequire(failedNodePtr.p->failState == NORMAL);
3463
3464 /* Send API_FAILREQ to peer QMGR blocks to allow them to disconnect
3465 * quickly
3466 * Local application blocks get API_FAILREQ once all pending signals
3467 * from the failed API have been processed.
3468 */
3469 signal->theData[0] = failedNodePtr.i;
3470 signal->theData[1] = QMGR_REF;
3471 NodeReceiverGroup rg(QMGR, c_clusterNodes);
3472 sendSignal(rg, GSN_API_FAILREQ, signal, 2, JBA);
3473
3474 /* Now ask CMVMI to disconnect the node */
3475 FailState initialState = (failedNodePtr.p->phase == ZAPI_ACTIVE) ?
3476 WAITING_FOR_CLOSECOMCONF_ACTIVE :
3477 WAITING_FOR_CLOSECOMCONF_NOTACTIVE;
3478
3479 failedNodePtr.p->failState = initialState;
3480 failedNodePtr.p->phase = ZFAIL_CLOSING;
3481 setNodeInfo(failedNodePtr.i).m_heartbeat_cnt= 0;
3482 setNodeInfo(failedNodePtr.i).m_version = 0;
3483 recompute_version_info(getNodeInfo(failedNodePtr.i).m_type);
3484
3485 CloseComReqConf * const closeCom = (CloseComReqConf *)&signal->theData[0];
3486 closeCom->xxxBlockRef = reference();
3487 closeCom->requestType = CloseComReqConf::RT_API_FAILURE;
3488 closeCom->failNo = 0;
3489 closeCom->noOfNodes = 1;
3490 NodeBitmask::clear(closeCom->theNodes);
3491 NodeBitmask::set(closeCom->theNodes, failedNodePtr.i);
3492 sendSignal(CMVMI_REF, GSN_CLOSE_COMREQ, signal,
3493 CloseComReqConf::SignalLength, JBA);
3494 } // api_failed
3495
3496 /**--------------------------------------------------------------------------
3497 * AN API NODE IS REGISTERING. IF FOR THE FIRST TIME WE WILL ENABLE
3498 * COMMUNICATION WITH ALL NDB BLOCKS.
3499 *---------------------------------------------------------------------------*/
3500 /*******************************/
3501 /* API_REGREQ */
3502 /*******************************/
execAPI_REGREQ(Signal * signal)3503 void Qmgr::execAPI_REGREQ(Signal* signal)
3504 {
3505 jamEntry();
3506
3507 ApiRegReq* req = (ApiRegReq*)signal->getDataPtr();
3508 const Uint32 version = req->version;
3509 const BlockReference ref = req->ref;
3510
3511 Uint32 mysql_version = req->mysql_version;
3512 if (version < NDBD_SPLIT_VERSION)
3513 mysql_version = 0;
3514
3515 NodeRecPtr apiNodePtr;
3516 apiNodePtr.i = refToNode(ref);
3517 ptrCheckGuard(apiNodePtr, MAX_NODES, nodeRec);
3518
3519 if (apiNodePtr.p->phase == ZFAIL_CLOSING)
3520 {
3521 jam();
3522 /**
3523 * This node is pending CLOSE_COM_CONF
3524 * ignore API_REGREQ
3525 */
3526 return;
3527 }
3528
3529 #if 0
3530 ndbout_c("Qmgr::execAPI_REGREQ: Recd API_REGREQ (NodeId=%d)", apiNodePtr.i);
3531 #endif
3532
3533 bool compatability_check;
3534 const char * extra = 0;
3535 NodeInfo::NodeType type= getNodeInfo(apiNodePtr.i).getType();
3536 switch(type){
3537 case NodeInfo::API:
3538 if (m_micro_gcp_enabled && !ndb_check_micro_gcp(version))
3539 {
3540 jam();
3541 compatability_check = false;
3542 extra = ": micro gcp enabled";
3543 }
3544 else
3545 {
3546 jam();
3547 compatability_check = ndbCompatible_ndb_api(NDB_VERSION, version);
3548 }
3549 break;
3550 case NodeInfo::MGM:
3551 compatability_check = ndbCompatible_ndb_mgmt(NDB_VERSION, version);
3552 break;
3553 case NodeInfo::DB:
3554 case NodeInfo::INVALID:
3555 default:
3556 sendApiRegRef(signal, ref, ApiRegRef::WrongType);
3557 infoEvent("Invalid connection attempt with type %d", type);
3558 return;
3559 }
3560
3561 if (!compatability_check) {
3562 jam();
3563 char buf[NDB_VERSION_STRING_BUF_SZ];
3564 infoEvent("Connection attempt from %s id=%d with %s "
3565 "incompatible with %s%s",
3566 type == NodeInfo::API ? "api or mysqld" : "management server",
3567 apiNodePtr.i,
3568 ndbGetVersionString(version, mysql_version, 0,
3569 buf,
3570 sizeof(buf)),
3571 NDB_VERSION_STRING,
3572 extra ? extra : "");
3573 apiNodePtr.p->phase = ZAPI_INACTIVE;
3574 sendApiRegRef(signal, ref, ApiRegRef::UnsupportedVersion);
3575 return;
3576 }
3577
3578 setNodeInfo(apiNodePtr.i).m_version = version;
3579 setNodeInfo(apiNodePtr.i).m_mysql_version = mysql_version;
3580 setNodeInfo(apiNodePtr.i).m_heartbeat_cnt= 0;
3581
3582 NodeState state = getNodeState();
3583 if (apiNodePtr.p->phase == ZAPI_INACTIVE)
3584 {
3585 apiNodePtr.p->blockRef = ref;
3586 if ((state.startLevel == NodeState::SL_STARTED ||
3587 state.getSingleUserMode() ||
3588 (state.startLevel == NodeState::SL_STARTING &&
3589 state.starting.startPhase >= 100)))
3590 {
3591 jam();
3592 /**----------------------------------------------------------------------
3593 * THE API NODE IS REGISTERING. WE WILL ACCEPT IT BY CHANGING STATE AND
3594 * SENDING A CONFIRM.
3595 *----------------------------------------------------------------------*/
3596 apiNodePtr.p->phase = ZAPI_ACTIVE;
3597 EnableComReq *enableComReq = (EnableComReq *)signal->getDataPtrSend();
3598 enableComReq->m_senderRef = reference();
3599 enableComReq->m_senderData = ENABLE_COM_API_REGREQ;
3600 NodeBitmask::clear(enableComReq->m_nodeIds);
3601 NodeBitmask::set(enableComReq->m_nodeIds, apiNodePtr.i);
3602 sendSignal(CMVMI_REF, GSN_ENABLE_COMREQ, signal,
3603 EnableComReq::SignalLength, JBA);
3604 return;
3605 }
3606 }
3607
3608 sendApiRegConf(signal, apiNodePtr.i);
3609 }//Qmgr::execAPI_REGREQ()
3610
3611 void
handleEnableComApiRegreq(Signal * signal,Uint32 node)3612 Qmgr::handleEnableComApiRegreq(Signal *signal, Uint32 node)
3613 {
3614 NodeInfo::NodeType type = getNodeInfo(node).getType();
3615 Uint32 version = getNodeInfo(node).m_version;
3616 recompute_version_info(type, version);
3617
3618 signal->theData[0] = node;
3619 signal->theData[1] = version;
3620 NodeReceiverGroup rg(QMGR, c_clusterNodes);
3621 rg.m_nodes.clear(getOwnNodeId());
3622 sendVersionedDb(rg, GSN_NODE_VERSION_REP, signal, 2, JBB,
3623 NDBD_NODE_VERSION_REP);
3624
3625 signal->theData[0] = node;
3626 EXECUTE_DIRECT(NDBCNTR, GSN_API_START_REP, signal, 1);
3627
3628 sendApiRegConf(signal, node);
3629 }
3630
3631 void
sendApiRegConf(Signal * signal,Uint32 node)3632 Qmgr::sendApiRegConf(Signal *signal, Uint32 node)
3633 {
3634 NodeRecPtr apiNodePtr;
3635 apiNodePtr.i = node;
3636 ptrCheckGuard(apiNodePtr, MAX_NODES, nodeRec);
3637 const BlockReference ref = apiNodePtr.p->blockRef;
3638 ndbassert(ref != 0);
3639
3640 ApiRegConf * const apiRegConf = (ApiRegConf *)&signal->theData[0];
3641 apiRegConf->qmgrRef = reference();
3642 apiRegConf->apiHeartbeatFrequency = (chbApiDelay / 10);
3643 apiRegConf->version = NDB_VERSION;
3644 apiRegConf->mysql_version = NDB_MYSQL_VERSION_D;
3645 apiRegConf->nodeState = getNodeState();
3646 {
3647 NodeRecPtr nodePtr;
3648 nodePtr.i = getOwnNodeId();
3649 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
3650 Uint32 dynamicId = nodePtr.p->ndynamicId;
3651
3652 if(apiRegConf->nodeState.masterNodeId != getOwnNodeId()){
3653 jam();
3654 apiRegConf->nodeState.dynamicId = dynamicId;
3655 } else {
3656 apiRegConf->nodeState.dynamicId = (Uint32)(-(Int32)dynamicId);
3657 }
3658 }
3659 NodeVersionInfo info = getNodeVersionInfo();
3660 apiRegConf->minDbVersion = info.m_type[NodeInfo::DB].m_min_version;
3661 apiRegConf->nodeState.m_connected_nodes.assign(c_connectedNodes);
3662 sendSignal(ref, GSN_API_REGCONF, signal, ApiRegConf::SignalLength, JBB);
3663 }
3664
3665 void
sendVersionedDb(NodeReceiverGroup rg,GlobalSignalNumber gsn,Signal * signal,Uint32 length,JobBufferLevel jbuf,Uint32 minversion)3666 Qmgr::sendVersionedDb(NodeReceiverGroup rg,
3667 GlobalSignalNumber gsn,
3668 Signal* signal,
3669 Uint32 length,
3670 JobBufferLevel jbuf,
3671 Uint32 minversion)
3672 {
3673 jam();
3674 NodeVersionInfo info = getNodeVersionInfo();
3675 if (info.m_type[NodeInfo::DB].m_min_version >= minversion)
3676 {
3677 jam();
3678 sendSignal(rg, gsn, signal, length, jbuf);
3679 }
3680 else
3681 {
3682 jam();
3683 Uint32 i = 0, cnt = 0;
3684 while((i = rg.m_nodes.find(i + 1)) != NodeBitmask::NotFound)
3685 {
3686 jam();
3687 if (getNodeInfo(i).m_version >= minversion)
3688 {
3689 jam();
3690 cnt++;
3691 sendSignal(numberToRef(rg.m_block, i), gsn, signal, length, jbuf);
3692 }
3693 }
3694 ndbassert((cnt == 0 && rg.m_nodes.count() == 0) ||
3695 (cnt < rg.m_nodes.count()));
3696 }
3697 }
3698
3699 void
execAPI_VERSION_REQ(Signal * signal)3700 Qmgr::execAPI_VERSION_REQ(Signal * signal) {
3701 jamEntry();
3702 ApiVersionReq * const req = (ApiVersionReq *)signal->getDataPtr();
3703
3704 Uint32 senderRef = req->senderRef;
3705 Uint32 nodeId = req->nodeId;
3706
3707 ApiVersionConf * conf = (ApiVersionConf *)req;
3708 if(getNodeInfo(nodeId).m_connected)
3709 {
3710 conf->version = getNodeInfo(nodeId).m_version;
3711 conf->mysql_version = getNodeInfo(nodeId).m_mysql_version;
3712 struct in_addr in= globalTransporterRegistry.get_connect_address(nodeId);
3713 conf->inet_addr= in.s_addr;
3714 }
3715 else
3716 {
3717 conf->version = 0;
3718 conf->mysql_version = 0;
3719 conf->inet_addr= 0;
3720 }
3721 conf->nodeId = nodeId;
3722
3723 sendSignal(senderRef,
3724 GSN_API_VERSION_CONF,
3725 signal,
3726 ApiVersionConf::SignalLength, JBB);
3727 }
3728
3729 void
execNODE_VERSION_REP(Signal * signal)3730 Qmgr::execNODE_VERSION_REP(Signal* signal)
3731 {
3732 jamEntry();
3733 Uint32 nodeId = signal->theData[0];
3734 Uint32 version = signal->theData[1];
3735
3736 if (nodeId < MAX_NODES)
3737 {
3738 jam();
3739 Uint32 type = getNodeInfo(nodeId).m_type;
3740 setNodeInfo(nodeId).m_version = version;
3741 recompute_version_info(type, version);
3742 }
3743 }
3744
3745 void
recompute_version_info(Uint32 type,Uint32 version)3746 Qmgr::recompute_version_info(Uint32 type, Uint32 version)
3747 {
3748 NodeVersionInfo& info = setNodeVersionInfo();
3749 switch(type){
3750 case NodeInfo::DB:
3751 case NodeInfo::API:
3752 case NodeInfo::MGM:
3753 break;
3754 default:
3755 return;
3756 }
3757
3758 if (info.m_type[type].m_min_version == 0 ||
3759 version < info.m_type[type].m_min_version)
3760 info.m_type[type].m_min_version = version;
3761 if (version > info.m_type[type].m_max_version)
3762 info.m_type[type].m_max_version = version;
3763 }
3764
3765 void
recompute_version_info(Uint32 type)3766 Qmgr::recompute_version_info(Uint32 type)
3767 {
3768 switch(type){
3769 case NodeInfo::DB:
3770 case NodeInfo::API:
3771 case NodeInfo::MGM:
3772 break;
3773 default:
3774 return;
3775 }
3776
3777 Uint32 min = ~0, max = 0;
3778 Uint32 cnt = type == NodeInfo::DB ? MAX_NDB_NODES : MAX_NODES;
3779 for (Uint32 i = 1; i<cnt; i++)
3780 {
3781 if (getNodeInfo(i).m_type == type)
3782 {
3783 Uint32 version = getNodeInfo(i).m_version;
3784
3785 if (version)
3786 {
3787 if (version < min)
3788 min = version;
3789 if (version > max)
3790 max = version;
3791 }
3792 }
3793 }
3794
3795 NodeVersionInfo& info = setNodeVersionInfo();
3796 info.m_type[type].m_min_version = min == ~(Uint32)0 ? 0 : min;
3797 info.m_type[type].m_max_version = max;
3798 }
3799
3800 #if 0
3801 bool
3802 Qmgr::checkAPIVersion(NodeId nodeId,
3803 Uint32 apiVersion, Uint32 ownVersion) const {
3804 bool ret=true;
3805 /**
3806 * First implementation...
3807 */
3808 if ((getMajor(apiVersion) < getMajor(ownVersion) ||
3809 getMinor(apiVersion) < getMinor(ownVersion)) &&
3810 apiVersion >= API_UPGRADE_VERSION) {
3811 jam();
3812 if ( getNodeInfo(nodeId).getType() != NodeInfo::MGM ) {
3813 jam();
3814 ret = false;
3815 } else {
3816 jam();
3817 /* we have a software upgrade situation, mgmtsrvr should be
3818 * the highest, let him decide what to do
3819 */
3820 ;
3821 }
3822 }
3823 return ret;
3824 }
3825 #endif
3826
3827 void
sendApiRegRef(Signal * signal,Uint32 Tref,ApiRegRef::ErrorCode err)3828 Qmgr::sendApiRegRef(Signal* signal, Uint32 Tref, ApiRegRef::ErrorCode err){
3829 ApiRegRef* ref = (ApiRegRef*)signal->getDataPtrSend();
3830 ref->ref = reference();
3831 ref->version = NDB_VERSION;
3832 ref->mysql_version = NDB_MYSQL_VERSION_D;
3833 ref->errorCode = err;
3834 sendSignal(Tref, GSN_API_REGREF, signal, ApiRegRef::SignalLength, JBB);
3835 }
3836
3837 /**--------------------------------------------------------------------------
3838 * A NODE HAS BEEN DECLARED AS DOWN. WE WILL CLOSE THE COMMUNICATION TO THIS
3839 * NODE IF NOT ALREADY DONE. IF WE ARE PRESIDENT OR BECOMES PRESIDENT BECAUSE
3840 * OF A FAILED PRESIDENT THEN WE WILL TAKE FURTHER ACTION.
3841 *---------------------------------------------------------------------------*/
failReportLab(Signal * signal,Uint16 aFailedNode,FailRep::FailCause aFailCause,Uint16 sourceNode)3842 void Qmgr::failReportLab(Signal* signal, Uint16 aFailedNode,
3843 FailRep::FailCause aFailCause,
3844 Uint16 sourceNode)
3845 {
3846 NodeRecPtr nodePtr;
3847 NodeRecPtr failedNodePtr;
3848 NodeRecPtr myNodePtr;
3849 UintR TnoFailedNodes;
3850
3851 failedNodePtr.i = aFailedNode;
3852 ptrCheckGuard(failedNodePtr, MAX_NDB_NODES, nodeRec);
3853 FailRep* rep = (FailRep*)signal->getDataPtr();
3854
3855 if (check_multi_node_shutdown(signal))
3856 {
3857 jam();
3858 return;
3859 }
3860
3861 if (isNodeConnectivitySuspect(sourceNode) &&
3862 // (! isNodeConnectivitySuspect(aFailedNode)) && // TODO : Required?
3863 ((aFailCause == FailRep::ZCONNECT_CHECK_FAILURE) ||
3864 (aFailCause == FailRep::ZLINK_FAILURE)))
3865 {
3866 jam();
3867 /* Connectivity related failure report from a node with suspect
3868 * connectivity, handle differently
3869 */
3870 ndbrequire(sourceNode != getOwnNodeId());
3871
3872 handleFailFromSuspect(signal,
3873 aFailCause,
3874 aFailedNode,
3875 sourceNode);
3876 return;
3877 }
3878
3879 if (failedNodePtr.i == getOwnNodeId()) {
3880 jam();
3881
3882 Uint32 code = NDBD_EXIT_NODE_DECLARED_DEAD;
3883 const char * msg = 0;
3884 char extra[100];
3885 switch(aFailCause){
3886 case FailRep::ZOWN_FAILURE:
3887 msg = "Own failure";
3888 break;
3889 case FailRep::ZOTHER_NODE_WHEN_WE_START:
3890 case FailRep::ZOTHERNODE_FAILED_DURING_START:
3891 msg = "Other node died during start";
3892 break;
3893 case FailRep::ZIN_PREP_FAIL_REQ:
3894 msg = "Prep fail";
3895 break;
3896 case FailRep::ZSTART_IN_REGREQ:
3897 msg = "Start timeout";
3898 break;
3899 case FailRep::ZHEARTBEAT_FAILURE:
3900 msg = "Heartbeat failure";
3901 break;
3902 case FailRep::ZLINK_FAILURE:
3903 msg = "Connection failure";
3904 break;
3905 case FailRep::ZPARTITIONED_CLUSTER:
3906 {
3907 code = NDBD_EXIT_PARTITIONED_SHUTDOWN;
3908 char buf1[100], buf2[100];
3909 c_clusterNodes.getText(buf1);
3910 if (((signal->getLength()== FailRep::OrigSignalLength + FailRep::PartitionedExtraLength) ||
3911 (signal->getLength()== FailRep::SignalLength + FailRep::PartitionedExtraLength)) &&
3912 signal->header.theVerId_signalNumber == GSN_FAIL_REP)
3913 {
3914 jam();
3915 NdbNodeBitmask part;
3916 part.assign(NdbNodeBitmask::Size, rep->partitioned.partition);
3917 part.getText(buf2);
3918 BaseString::snprintf(extra, sizeof(extra),
3919 "Our cluster: %s other cluster: %s",
3920 buf1, buf2);
3921 }
3922 else
3923 {
3924 jam();
3925 BaseString::snprintf(extra, sizeof(extra),
3926 "Our cluster: %s", buf1);
3927 }
3928 msg = extra;
3929 break;
3930 }
3931 case FailRep::ZMULTI_NODE_SHUTDOWN:
3932 msg = "Multi node shutdown";
3933 break;
3934 case FailRep::ZCONNECT_CHECK_FAILURE:
3935 msg = "Connectivity check failure";
3936 break;
3937 default:
3938 msg = "<UNKNOWN>";
3939 }
3940
3941 CRASH_INSERTION(932);
3942 CRASH_INSERTION(938);
3943
3944 char buf[255];
3945 BaseString::snprintf(buf, sizeof(buf),
3946 "We(%u) have been declared dead by %u (via %u) reason: %s(%u)",
3947 getOwnNodeId(),
3948 sourceNode,
3949 refToNode(signal->getSendersBlockRef()),
3950 msg ? msg : "<Unknown>",
3951 aFailCause);
3952
3953 progError(__LINE__, code, buf);
3954 return;
3955 }//if
3956
3957 myNodePtr.i = getOwnNodeId();
3958 ptrCheckGuard(myNodePtr, MAX_NDB_NODES, nodeRec);
3959 if (myNodePtr.p->phase != ZRUNNING) {
3960 jam();
3961 systemErrorLab(signal, __LINE__);
3962 return;
3963 }//if
3964
3965 if (getNodeState().startLevel < NodeState::SL_STARTED)
3966 {
3967 jam();
3968 CRASH_INSERTION(932);
3969 CRASH_INSERTION(938);
3970 char buf[100];
3971 BaseString::snprintf(buf, 100, "Node failure during restart");
3972 progError(__LINE__, NDBD_EXIT_SR_OTHERNODEFAILED, buf);
3973 ndbrequire(false);
3974 }
3975
3976 TnoFailedNodes = cnoFailedNodes;
3977 failReport(signal, failedNodePtr.i, (UintR)ZTRUE, aFailCause, sourceNode);
3978 if (cpresident == getOwnNodeId()) {
3979 jam();
3980 if (ctoStatus == Q_NOT_ACTIVE) {
3981 jam();
3982 /**--------------------------------------------------------------------
3983 * AS PRESIDENT WE ARE REQUIRED TO START THE EXCLUSION PROCESS SUCH THAT
3984 * THE APPLICATION SEE NODE FAILURES IN A CONSISTENT ORDER.
3985 * IF WE HAVE BECOME PRESIDENT NOW (CTO_STATUS = ACTIVE) THEN WE HAVE
3986 * TO COMPLETE THE PREVIOUS COMMIT FAILED NODE PROCESS BEFORE STARTING
3987 * A NEW.
3988 * CTO_STATUS = ACTIVE CAN ALSO MEAN THAT WE ARE PRESIDENT AND ARE
3989 * CURRENTLY COMMITTING A SET OF NODE CRASHES. IN THIS CASE IT IS NOT
3990 * ALLOWED TO START PREPARING NEW NODE CRASHES.
3991 *---------------------------------------------------------------------*/
3992 if (TnoFailedNodes != cnoFailedNodes) {
3993 jam();
3994 cfailureNr = cfailureNr + 1;
3995 for (nodePtr.i = 1;
3996 nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
3997 jam();
3998 ptrAss(nodePtr, nodeRec);
3999 if (nodePtr.p->phase == ZRUNNING) {
4000 jam();
4001 sendPrepFailReq(signal, nodePtr.i);
4002 }//if
4003 }//for
4004 }//if
4005 }//if
4006 }//if
4007 return;
4008 }//Qmgr::failReportLab()
4009
4010 /**-------------------------------------------------------------------------
4011 * WE HAVE RECEIVED A PREPARE TO EXCLUDE A NUMBER OF NODES FROM THE CLUSTER.
4012 * WE WILL FIRST CHECK THAT WE HAVE NOT ANY MORE NODES THAT
4013 * WE ALSO HAVE EXCLUDED
4014 *--------------------------------------------------------------------------*/
4015 /*******************************/
4016 /* PREP_FAILREQ */
4017 /*******************************/
execPREP_FAILREQ(Signal * signal)4018 void Qmgr::execPREP_FAILREQ(Signal* signal)
4019 {
4020 NodeRecPtr myNodePtr;
4021 jamEntry();
4022
4023 c_start.reset();
4024
4025 if (check_multi_node_shutdown(signal))
4026 {
4027 jam();
4028 return;
4029 }
4030
4031 PrepFailReqRef * const prepFail = (PrepFailReqRef *)&signal->theData[0];
4032
4033 BlockReference Tblockref = prepFail->xxxBlockRef;
4034 Uint16 TfailureNr = prepFail->failNo;
4035 cnoPrepFailedNodes = prepFail->noOfNodes;
4036 UintR arrayIndex = 0;
4037 Uint32 Tindex;
4038 for (Tindex = 0; Tindex < MAX_NDB_NODES; Tindex++) {
4039 if (NdbNodeBitmask::get(prepFail->theNodes, Tindex)){
4040 cprepFailedNodes[arrayIndex] = Tindex;
4041 arrayIndex++;
4042 }//if
4043 }//for
4044 UintR guard0;
4045
4046 /**
4047 * Block commit until node failures has stabilized
4048 *
4049 * @See RT352
4050 */
4051 BlockCommitOrd* const block = (BlockCommitOrd *)&signal->theData[0];
4052 block->failNo = TfailureNr;
4053 EXECUTE_DIRECT(DBDIH, GSN_BLOCK_COMMIT_ORD, signal,
4054 BlockCommitOrd::SignalLength);
4055
4056 myNodePtr.i = getOwnNodeId();
4057 ptrCheckGuard(myNodePtr, MAX_NDB_NODES, nodeRec);
4058 if (myNodePtr.p->phase != ZRUNNING) {
4059 jam();
4060 systemErrorLab(signal, __LINE__);
4061 return;
4062 }//if
4063
4064 if (getNodeState().startLevel < NodeState::SL_STARTED)
4065 {
4066 jam();
4067 CRASH_INSERTION(932);
4068 CRASH_INSERTION(938);
4069 char buf[100];
4070 BaseString::snprintf(buf, 100, "Node failure during restart");
4071 progError(__LINE__, NDBD_EXIT_SR_OTHERNODEFAILED, buf);
4072 ndbrequire(false);
4073 }
4074
4075 guard0 = cnoPrepFailedNodes - 1;
4076 arrGuard(guard0, MAX_NDB_NODES);
4077 for (Tindex = 0; Tindex <= guard0; Tindex++) {
4078 jam();
4079 failReport(signal,
4080 cprepFailedNodes[Tindex],
4081 (UintR)ZFALSE,
4082 FailRep::ZIN_PREP_FAIL_REQ,
4083 0); /* Source node not required (or known) here */
4084 }//for
4085 sendCloseComReq(signal, Tblockref, TfailureNr);
4086 cnoCommitFailedNodes = 0;
4087 cprepareFailureNr = TfailureNr;
4088 return;
4089 }//Qmgr::execPREP_FAILREQ()
4090
4091
handleApiCloseComConf(Signal * signal)4092 void Qmgr::handleApiCloseComConf(Signal* signal)
4093 {
4094 jam();
4095 CloseComReqConf * const closeCom = (CloseComReqConf *)&signal->theData[0];
4096
4097 /* Api failure special case */
4098 for(Uint32 nodeId = 0; nodeId < MAX_NODES; nodeId ++)
4099 {
4100 if (NodeBitmask::get(closeCom->theNodes, nodeId))
4101 {
4102 jam();
4103 /* Check that *only* 1 *API* node is included in
4104 * this CLOSE_COM_CONF
4105 */
4106 ndbrequire(getNodeInfo(nodeId).getType() != NodeInfo::DB);
4107 ndbrequire(closeCom->noOfNodes == 1);
4108 NodeBitmask::clear(closeCom->theNodes, nodeId);
4109 ndbrequire(NodeBitmask::isclear(closeCom->theNodes));
4110
4111 /* Now that we know communication from the failed Api has
4112 * ceased, we can send the required API_FAILREQ signals
4113 * and continue API failure handling
4114 */
4115 NodeRecPtr failedNodePtr;
4116 failedNodePtr.i = nodeId;
4117 ptrCheckGuard(failedNodePtr, MAX_NODES, nodeRec);
4118
4119 ndbrequire((failedNodePtr.p->failState ==
4120 WAITING_FOR_CLOSECOMCONF_ACTIVE) ||
4121 (failedNodePtr.p->failState ==
4122 WAITING_FOR_CLOSECOMCONF_NOTACTIVE));
4123
4124 if (failedNodePtr.p->failState == WAITING_FOR_CLOSECOMCONF_ACTIVE)
4125 {
4126 /**
4127 * Inform application blocks TC, DICT, SUMA etc.
4128 */
4129 jam();
4130 sendApiFailReq(signal, nodeId, false); // !sumaOnly
4131 arbitRec.code = ArbitCode::ApiFail;
4132 handleArbitApiFail(signal, nodeId);
4133 }
4134 else
4135 {
4136 /**
4137 * Always inform SUMA
4138 */
4139 jam();
4140 sendApiFailReq(signal, nodeId, true); // sumaOnly
4141 }
4142
4143 if (getNodeInfo(failedNodePtr.i).getType() == NodeInfo::MGM)
4144 {
4145 /**
4146 * Allow MGM do reconnect "directly"
4147 */
4148 jam();
4149 setNodeInfo(failedNodePtr.i).m_heartbeat_cnt = 3;
4150 }
4151
4152 /* Handled the single API node failure */
4153 return;
4154 }
4155 }
4156 /* Never get here */
4157 ndbrequire(false);
4158 }
4159
4160 /**---------------------------------------------------------------------------
4161 * THE CRASHED NODES HAS BEEN EXCLUDED FROM COMMUNICATION.
4162 * WE WILL CHECK WHETHER ANY MORE NODES HAVE FAILED DURING THE PREPARE PROCESS.
4163 * IF SO WE WILL REFUSE THE PREPARE PHASE AND EXPECT A NEW PREPARE MESSAGE
4164 * WITH ALL FAILED NODES INCLUDED.
4165 *---------------------------------------------------------------------------*/
4166 /*******************************/
4167 /* CLOSE_COMCONF */
4168 /*******************************/
execCLOSE_COMCONF(Signal * signal)4169 void Qmgr::execCLOSE_COMCONF(Signal* signal)
4170 {
4171 jamEntry();
4172
4173 CloseComReqConf * const closeCom = (CloseComReqConf *)&signal->theData[0];
4174
4175 Uint32 requestType = closeCom->requestType;
4176
4177 if (requestType == CloseComReqConf::RT_API_FAILURE)
4178 {
4179 jam();
4180 handleApiCloseComConf(signal);
4181 return;
4182 }
4183
4184 /* Normal node failure preparation path */
4185 ndbassert(requestType == CloseComReqConf::RT_NODE_FAILURE);
4186 BlockReference Tblockref = closeCom->xxxBlockRef;
4187 Uint16 TfailureNr = closeCom->failNo;
4188
4189 cnoPrepFailedNodes = closeCom->noOfNodes;
4190 UintR arrayIndex = 0;
4191 UintR Tindex = 0;
4192 for(Tindex = 0; Tindex < MAX_NDB_NODES; Tindex++){
4193 if(NdbNodeBitmask::get(closeCom->theNodes, Tindex)){
4194 cprepFailedNodes[arrayIndex] = Tindex;
4195 arrayIndex++;
4196 }
4197 }
4198 ndbassert(arrayIndex == cnoPrepFailedNodes);
4199 UintR tprepFailConf;
4200 UintR Tindex2;
4201 UintR guard0;
4202 UintR guard1;
4203 UintR Tfound;
4204 Uint16 TfailedNodeNo;
4205
4206 tprepFailConf = ZTRUE;
4207 if (cnoFailedNodes > 0) {
4208 jam();
4209 /* Check whether the set of nodes which have had communications
4210 * closed is the same as the set of failed nodes.
4211 * If it is, we can confirm the PREP_FAIL phase for this set
4212 * of nodes to the President.
4213 * If it is not, we Refuse the PREP_FAIL phase for this set
4214 * of nodes, the President will start a new PREP_FAIL phase
4215 * for the new set.
4216 */
4217 guard0 = cnoFailedNodes - 1;
4218 arrGuard(guard0, MAX_NDB_NODES);
4219 for (Tindex = 0; Tindex <= guard0; Tindex++) {
4220 jam();
4221 TfailedNodeNo = cfailedNodes[Tindex];
4222 Tfound = ZFALSE;
4223 guard1 = cnoPrepFailedNodes - 1;
4224 arrGuard(guard1, MAX_NDB_NODES);
4225 for (Tindex2 = 0; Tindex2 <= guard1; Tindex2++) {
4226 jam();
4227 if (TfailedNodeNo == cprepFailedNodes[Tindex2]) {
4228 jam();
4229 Tfound = ZTRUE;
4230 }//if
4231 }//for
4232 if (Tfound == ZFALSE) {
4233 jam();
4234 /* A failed node is missing from the set, we will not
4235 * confirm this Prepare_Fail phase.
4236 * Store the node id in the array for later.
4237 */
4238 tprepFailConf = ZFALSE;
4239 arrGuard(cnoPrepFailedNodes, MAX_NDB_NODES);
4240 cprepFailedNodes[cnoPrepFailedNodes] = TfailedNodeNo;
4241 cnoPrepFailedNodes = cnoPrepFailedNodes + 1;
4242 }//if
4243 }//for
4244 }//if
4245 if (tprepFailConf == ZFALSE) {
4246 jam();
4247 /* Inform President that we cannot confirm the PREP_FAIL
4248 * phase as we are aware of at least one other node
4249 * failure
4250 */
4251 for (Tindex = 0; Tindex < MAX_NDB_NODES; Tindex++) {
4252 cfailedNodes[Tindex] = cprepFailedNodes[Tindex];
4253 }//for
4254 cnoFailedNodes = cnoPrepFailedNodes;
4255 sendPrepFailReqRef(signal,
4256 Tblockref,
4257 GSN_PREP_FAILREF,
4258 reference(),
4259 cfailureNr,
4260 cnoPrepFailedNodes,
4261 cprepFailedNodes);
4262 } else {
4263 /* We have prepared the failure of the requested nodes
4264 * send confirmation to the president
4265 */
4266 jam();
4267 cnoCommitFailedNodes = cnoPrepFailedNodes;
4268 guard0 = cnoPrepFailedNodes - 1;
4269 arrGuard(guard0, MAX_NDB_NODES);
4270 for (Tindex = 0; Tindex <= guard0; Tindex++) {
4271 jam();
4272 arrGuard(Tindex, MAX_NDB_NODES);
4273 ccommitFailedNodes[Tindex] = cprepFailedNodes[Tindex];
4274 }//for
4275 signal->theData[0] = getOwnNodeId();
4276 signal->theData[1] = TfailureNr;
4277 sendSignal(Tblockref, GSN_PREP_FAILCONF, signal, 2, JBA);
4278 }//if
4279 return;
4280 }//Qmgr::execCLOSE_COMCONF()
4281
4282 /*---------------------------------------------------------------------------*/
4283 /* WE HAVE RECEIVED A CONFIRM OF THAT THIS NODE HAVE PREPARED THE FAILURE. */
4284 /*---------------------------------------------------------------------------*/
4285 /*******************************/
4286 /* PREP_FAILCONF */
4287 /*******************************/
execPREP_FAILCONF(Signal * signal)4288 void Qmgr::execPREP_FAILCONF(Signal* signal)
4289 {
4290 NodeRecPtr nodePtr;
4291 NodeRecPtr replyNodePtr;
4292 jamEntry();
4293 replyNodePtr.i = signal->theData[0];
4294 Uint16 TfailureNr = signal->theData[1];
4295 if (TfailureNr != cfailureNr) {
4296 jam();
4297 /**----------------------------------------------------------------------
4298 * WE HAVE ALREADY STARTING A NEW ATTEMPT TO EXCLUDE A NUMBER OF NODES.
4299 * IGNORE
4300 *----------------------------------------------------------------------*/
4301 return;
4302 }//if
4303 ptrCheckGuard(replyNodePtr, MAX_NDB_NODES, nodeRec);
4304 replyNodePtr.p->sendPrepFailReqStatus = Q_NOT_ACTIVE;
4305 for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
4306 jam();
4307 ptrAss(nodePtr, nodeRec);
4308 if (nodePtr.p->phase == ZRUNNING) {
4309 if (nodePtr.p->sendPrepFailReqStatus == Q_ACTIVE) {
4310 jam();
4311 return;
4312 }//if
4313 }//if
4314 }//for
4315 /**
4316 * Check node count and groups and invoke arbitrator if necessary.
4317 * Continues via sendCommitFailReq() if successful.
4318 */
4319 arbitRec.failureNr = cfailureNr;
4320 const NodeState & s = getNodeState();
4321 if(s.startLevel == NodeState::SL_STOPPING_3 && s.stopping.systemShutdown){
4322 jam();
4323 /**
4324 * We're performing a system shutdown,
4325 * don't let artibtrator shut us down
4326 */
4327 return;
4328 }
4329
4330 switch(arbitRec.method){
4331 case ArbitRec::DISABLED:
4332 jam();
4333 // No arbitration -> immediately commit the failed nodes
4334 sendCommitFailReq(signal);
4335 break;
4336
4337 case ArbitRec::METHOD_EXTERNAL:
4338 case ArbitRec::METHOD_DEFAULT:
4339 jam();
4340 handleArbitCheck(signal);
4341 break;
4342
4343 }
4344 return;
4345 }//Qmgr::execPREP_FAILCONF()
4346
4347 void
sendCommitFailReq(Signal * signal)4348 Qmgr::sendCommitFailReq(Signal* signal)
4349 {
4350 NodeRecPtr nodePtr;
4351 jam();
4352 if (arbitRec.failureNr != cfailureNr) {
4353 jam();
4354 /**----------------------------------------------------------------------
4355 * WE HAVE ALREADY STARTING A NEW ATTEMPT TO EXCLUDE A NUMBER OF NODES.
4356 * IGNORE
4357 *----------------------------------------------------------------------*/
4358 return;
4359 }//if
4360 /**-----------------------------------------------------------------------
4361 * WE HAVE SUCCESSFULLY PREPARED A SET OF NODE FAILURES. WE WILL NOW COMMIT
4362 * THESE NODE FAILURES.
4363 *-------------------------------------------------------------------------*/
4364 for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
4365 jam();
4366 ptrAss(nodePtr, nodeRec);
4367
4368 #ifdef ERROR_INSERT
4369 if (false && ERROR_INSERTED(935) && nodePtr.i == c_error_insert_extra)
4370 {
4371 ndbout_c("skipping node %d", c_error_insert_extra);
4372 CLEAR_ERROR_INSERT_VALUE;
4373 signal->theData[0] = 9999;
4374 sendSignalWithDelay(CMVMI_REF, GSN_NDB_TAMPER, signal, 1000, 1);
4375 continue;
4376 }
4377 #endif
4378
4379 if (nodePtr.p->phase == ZRUNNING) {
4380 jam();
4381 nodePtr.p->sendCommitFailReqStatus = Q_ACTIVE;
4382 signal->theData[0] = cpdistref;
4383 signal->theData[1] = cfailureNr;
4384 sendSignal(nodePtr.p->blockRef, GSN_COMMIT_FAILREQ, signal, 2, JBA);
4385 }//if
4386 }//for
4387 ctoStatus = Q_ACTIVE;
4388 cnoFailedNodes = 0;
4389 return;
4390 }//sendCommitFailReq()
4391
4392 /*---------------------------------------------------------------------------*/
4393 /* SOME NODE HAVE DISCOVERED A NODE FAILURE THAT WE HAVE NOT YET DISCOVERED. */
4394 /* WE WILL START ANOTHER ROUND OF PREPARING A SET OF NODE FAILURES. */
4395 /*---------------------------------------------------------------------------*/
4396 /*******************************/
4397 /* PREP_FAILREF */
4398 /*******************************/
execPREP_FAILREF(Signal * signal)4399 void Qmgr::execPREP_FAILREF(Signal* signal)
4400 {
4401 NodeRecPtr nodePtr;
4402 jamEntry();
4403
4404 PrepFailReqRef * const prepFail = (PrepFailReqRef *)&signal->theData[0];
4405
4406 Uint16 TfailureNr = prepFail->failNo;
4407 cnoPrepFailedNodes = prepFail->noOfNodes;
4408
4409 UintR arrayIndex = 0;
4410 UintR Tindex = 0;
4411 for(Tindex = 0; Tindex < MAX_NDB_NODES; Tindex++) {
4412 jam();
4413 if(NdbNodeBitmask::get(prepFail->theNodes, Tindex)){
4414 jam();
4415 cprepFailedNodes[arrayIndex] = Tindex;
4416 arrayIndex++;
4417 }//if
4418 }//for
4419 if (TfailureNr != cfailureNr) {
4420 jam();
4421 /**---------------------------------------------------------------------
4422 * WE HAVE ALREADY STARTING A NEW ATTEMPT TO EXCLUDE A NUMBER OF NODES.
4423 * IGNORE
4424 *----------------------------------------------------------------------*/
4425 return;
4426 }//if
4427 UintR guard0;
4428 UintR Ti;
4429
4430 cnoFailedNodes = cnoPrepFailedNodes;
4431 guard0 = cnoPrepFailedNodes - 1;
4432 arrGuard(guard0, MAX_NDB_NODES);
4433 for (Ti = 0; Ti <= guard0; Ti++) {
4434 jam();
4435 cfailedNodes[Ti] = cprepFailedNodes[Ti];
4436 }//for
4437 cfailureNr = cfailureNr + 1;
4438 for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
4439 jam();
4440 ptrAss(nodePtr, nodeRec);
4441 if (nodePtr.p->phase == ZRUNNING) {
4442 jam();
4443 sendPrepFailReq(signal, nodePtr.i);
4444 }//if
4445 }//for
4446 return;
4447 }//Qmgr::execPREP_FAILREF()
4448
4449 static
4450 Uint32
clear_nodes(Uint32 dstcnt,Uint16 dst[],Uint32 srccnt,const Uint16 src[])4451 clear_nodes(Uint32 dstcnt, Uint16 dst[], Uint32 srccnt, const Uint16 src[])
4452 {
4453 if (srccnt == 0)
4454 return dstcnt;
4455
4456 Uint32 pos = 0;
4457 for (Uint32 i = 0; i<dstcnt; i++)
4458 {
4459 Uint32 node = dst[i];
4460 for (Uint32 j = 0; j<srccnt; j++)
4461 {
4462 if (node == dst[j])
4463 {
4464 node = RNIL;
4465 break;
4466 }
4467 }
4468 if (node != RNIL)
4469 {
4470 dst[pos++] = node;
4471 }
4472 }
4473 return pos;
4474 }
4475
4476 /*---------------------------------------------------------------------------*/
4477 /* THE PRESIDENT IS NOW COMMITTING THE PREVIOUSLY PREPARED NODE FAILURE. */
4478 /*---------------------------------------------------------------------------*/
4479 /***********************/
4480 /* COMMIT_FAILREQ */
4481 /***********************/
execCOMMIT_FAILREQ(Signal * signal)4482 void Qmgr::execCOMMIT_FAILREQ(Signal* signal)
4483 {
4484 NodeRecPtr nodePtr;
4485 jamEntry();
4486
4487 CRASH_INSERTION(935);
4488
4489 BlockReference Tblockref = signal->theData[0];
4490 UintR TfailureNr = signal->theData[1];
4491 if (Tblockref != cpdistref) {
4492 jam();
4493 return;
4494 }//if
4495 UintR guard0;
4496 UintR Tj;
4497
4498 /**
4499 * Block commit until node failures has stabilized
4500 *
4501 * @See RT352
4502 */
4503 UnblockCommitOrd* const unblock = (UnblockCommitOrd *)&signal->theData[0];
4504 unblock->failNo = TfailureNr;
4505 EXECUTE_DIRECT(DBDIH, GSN_UNBLOCK_COMMIT_ORD, signal,
4506 UnblockCommitOrd::SignalLength);
4507
4508 if ((ccommitFailureNr != TfailureNr) &&
4509 (cnoCommitFailedNodes > 0)) {
4510 jam();
4511 /**-----------------------------------------------------------------------
4512 * WE ONLY DO THIS PART OF THE COMMIT HANDLING THE FIRST TIME WE HEAR THIS
4513 * SIGNAL. WE CAN HEAR IT SEVERAL TIMES IF THE PRESIDENTS KEEP FAILING.
4514 *-----------------------------------------------------------------------*/
4515 ccommitFailureNr = TfailureNr;
4516 NodeFailRep * const nodeFail = (NodeFailRep *)&signal->theData[0];
4517
4518 nodeFail->failNo = ccommitFailureNr;
4519 nodeFail->noOfNodes = cnoCommitFailedNodes;
4520 nodeFail->masterNodeId = cpresident;
4521 NdbNodeBitmask::clear(nodeFail->theNodes);
4522 for(unsigned i = 0; i < cnoCommitFailedNodes; i++) {
4523 jam();
4524 NdbNodeBitmask::set(nodeFail->theNodes, ccommitFailedNodes[i]);
4525 }//if
4526
4527 if (ERROR_INSERTED(936))
4528 {
4529 sendSignalWithDelay(NDBCNTR_REF, GSN_NODE_FAILREP, signal,
4530 200, NodeFailRep::SignalLength);
4531 }
4532 else
4533 {
4534 sendSignal(NDBCNTR_REF, GSN_NODE_FAILREP, signal,
4535 NodeFailRep::SignalLength, JBB);
4536 }
4537
4538 guard0 = cnoCommitFailedNodes - 1;
4539 arrGuard(guard0, MAX_NDB_NODES);
4540 /**--------------------------------------------------------------------
4541 * WE MUST PREPARE TO ACCEPT THE CRASHED NODE INTO THE CLUSTER AGAIN BY
4542 * SETTING UP CONNECTIONS AGAIN AFTER THREE SECONDS OF DELAY.
4543 *--------------------------------------------------------------------*/
4544 for (Tj = 0; Tj <= guard0; Tj++) {
4545 jam();
4546 nodePtr.i = ccommitFailedNodes[Tj];
4547 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
4548 nodePtr.p->phase = ZFAIL_CLOSING;
4549 nodePtr.p->failState = WAITING_FOR_NDB_FAILCONF;
4550 setNodeInfo(nodePtr.i).m_heartbeat_cnt= 0;
4551 setNodeInfo(nodePtr.i).m_version = 0;
4552 c_clusterNodes.clear(nodePtr.i);
4553 }//for
4554 recompute_version_info(NodeInfo::DB);
4555 /*----------------------------------------------------------------------*/
4556 /* WE INFORM THE API'S WE HAVE CONNECTED ABOUT THE FAILED NODES. */
4557 /*----------------------------------------------------------------------*/
4558 for (nodePtr.i = 1; nodePtr.i < MAX_NODES; nodePtr.i++) {
4559 jam();
4560 ptrAss(nodePtr, nodeRec);
4561 if (nodePtr.p->phase == ZAPI_ACTIVE) {
4562 jam();
4563
4564 NodeFailRep * const nodeFail = (NodeFailRep *)&signal->theData[0];
4565
4566 nodeFail->failNo = ccommitFailureNr;
4567 nodeFail->noOfNodes = cnoCommitFailedNodes;
4568 NdbNodeBitmask::clear(nodeFail->theNodes);
4569 for(unsigned i = 0; i < cnoCommitFailedNodes; i++) {
4570 jam();
4571 NdbNodeBitmask::set(nodeFail->theNodes, ccommitFailedNodes[i]);
4572 }//for
4573 sendSignal(nodePtr.p->blockRef, GSN_NODE_FAILREP, signal,
4574 NodeFailRep::SignalLength, JBB);
4575 }//if
4576 }//for
4577
4578 /**
4579 * Remove committed nodes from failed/prepared
4580 */
4581 cnoFailedNodes = clear_nodes(cnoFailedNodes,
4582 cfailedNodes,
4583 cnoCommitFailedNodes,
4584 ccommitFailedNodes);
4585 cnoPrepFailedNodes = clear_nodes(cnoPrepFailedNodes,
4586 cprepFailedNodes,
4587 cnoCommitFailedNodes,
4588 ccommitFailedNodes);
4589 cnoCommitFailedNodes = 0;
4590 }//if
4591 /**-----------------------------------------------------------------------
4592 * WE WILL ALWAYS ACKNOWLEDGE THE COMMIT EVEN WHEN RECEIVING IT MULTIPLE
4593 * TIMES SINCE IT WILL ALWAYS COME FROM A NEW PRESIDENT.
4594 *------------------------------------------------------------------------*/
4595 signal->theData[0] = getOwnNodeId();
4596 sendSignal(Tblockref, GSN_COMMIT_FAILCONF, signal, 1, JBA);
4597 return;
4598 }//Qmgr::execCOMMIT_FAILREQ()
4599
4600 /*--------------------------------------------------------------------------*/
4601 /* WE HAVE RECEIVED A CONFIRM OF THAT THIS NODE HAVE COMMITTED THE FAILURES.*/
4602 /*--------------------------------------------------------------------------*/
4603 /*******************************/
4604 /* COMMIT_FAILCONF */
4605 /*******************************/
execCOMMIT_FAILCONF(Signal * signal)4606 void Qmgr::execCOMMIT_FAILCONF(Signal* signal)
4607 {
4608 NodeRecPtr nodePtr;
4609 NodeRecPtr replyNodePtr;
4610 jamEntry();
4611 replyNodePtr.i = signal->theData[0];
4612
4613 ptrCheckGuard(replyNodePtr, MAX_NDB_NODES, nodeRec);
4614 replyNodePtr.p->sendCommitFailReqStatus = Q_NOT_ACTIVE;
4615 for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
4616 jam();
4617 ptrAss(nodePtr, nodeRec);
4618 if (nodePtr.p->phase == ZRUNNING) {
4619 if (nodePtr.p->sendCommitFailReqStatus == Q_ACTIVE) {
4620 jam();
4621 return;
4622 }//if
4623 }//if
4624 }//for
4625 /*-----------------------------------------------------------------------*/
4626 /* WE HAVE SUCCESSFULLY COMMITTED A SET OF NODE FAILURES. */
4627 /*-----------------------------------------------------------------------*/
4628 ctoStatus = Q_NOT_ACTIVE;
4629 if (cnoFailedNodes != 0) {
4630 jam();
4631 /**----------------------------------------------------------------------
4632 * A FAILURE OCCURRED IN THE MIDDLE OF THE COMMIT PROCESS. WE ARE NOW
4633 * READY TO START THE FAILED NODE PROCESS FOR THIS NODE.
4634 *----------------------------------------------------------------------*/
4635 cfailureNr = cfailureNr + 1;
4636 for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
4637 jam();
4638 ptrAss(nodePtr, nodeRec);
4639 if (nodePtr.p->phase == ZRUNNING) {
4640 jam();
4641 sendPrepFailReq(signal, nodePtr.i);
4642 }//if
4643 }//for
4644 }//if
4645 return;
4646 }//Qmgr::execCOMMIT_FAILCONF()
4647
4648 /**--------------------------------------------------------------------------
4649 * IF THE PRESIDENT FAILS IN THE MIDDLE OF THE COMMIT OF A FAILED NODE THEN
4650 * THE NEW PRESIDENT NEEDS TO QUERY THE COMMIT STATUS IN THE RUNNING NODES.
4651 *---------------------------------------------------------------------------*/
4652 /*******************************/
4653 /* PRES_TOCONF */
4654 /*******************************/
execPRES_TOCONF(Signal * signal)4655 void Qmgr::execPRES_TOCONF(Signal* signal)
4656 {
4657 NodeRecPtr nodePtr;
4658 NodeRecPtr replyNodePtr;
4659 jamEntry();
4660 replyNodePtr.i = signal->theData[0];
4661 UintR TfailureNr = signal->theData[1];
4662 if (ctoFailureNr < TfailureNr) {
4663 jam();
4664 ctoFailureNr = TfailureNr;
4665 }//if
4666 ptrCheckGuard(replyNodePtr, MAX_NDB_NODES, nodeRec);
4667 replyNodePtr.p->sendPresToStatus = Q_NOT_ACTIVE;
4668 for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
4669 jam();
4670 ptrAss(nodePtr, nodeRec);
4671 if (nodePtr.p->sendPresToStatus == Q_ACTIVE) {
4672 jam();
4673 return;
4674 }//if
4675 }//for
4676 /*-------------------------------------------------------------------------*/
4677 /* WE ARE NOW READY TO DISCOVER WHETHER THE FAILURE WAS COMMITTED OR NOT. */
4678 /*-------------------------------------------------------------------------*/
4679 if (ctoFailureNr > ccommitFailureNr) {
4680 jam();
4681 for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
4682 jam();
4683 ptrAss(nodePtr, nodeRec);
4684 if (nodePtr.p->phase == ZRUNNING) {
4685 jam();
4686 nodePtr.p->sendCommitFailReqStatus = Q_ACTIVE;
4687 signal->theData[0] = cpdistref;
4688 signal->theData[1] = ctoFailureNr;
4689 sendSignal(nodePtr.p->blockRef, GSN_COMMIT_FAILREQ, signal, 2, JBA);
4690 }//if
4691 }//for
4692 return;
4693 }//if
4694 /*-------------------------------------------------------------------------*/
4695 /* WE ARE NOW READY TO START THE NEW NODE FAILURE PROCESS. */
4696 /*-------------------------------------------------------------------------*/
4697 ctoStatus = Q_NOT_ACTIVE;
4698 cfailureNr = cfailureNr + 1;
4699 for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
4700 jam();
4701 ptrAss(nodePtr, nodeRec);
4702 if (nodePtr.p->phase == ZRUNNING) {
4703 jam();
4704 sendPrepFailReq(signal, nodePtr.i);
4705 }//if
4706 }//for
4707 return;
4708 }//Qmgr::execPRES_TOCONF()
4709
4710 /*--------------------------------------------------------------------------*/
4711 // Provide information about the configured NDB nodes in the system.
4712 /*--------------------------------------------------------------------------*/
execREAD_NODESREQ(Signal * signal)4713 void Qmgr::execREAD_NODESREQ(Signal* signal)
4714 {
4715 jamEntry();
4716
4717 BlockReference TBref = signal->theData[0];
4718
4719 ReadNodesConf * const readNodes = (ReadNodesConf *)&signal->theData[0];
4720
4721 NodeRecPtr nodePtr;
4722 nodePtr.i = getOwnNodeId();
4723 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
4724
4725 NdbNodeBitmask tmp = c_definedNodes;
4726 tmp.bitANDC(c_clusterNodes);
4727
4728 readNodes->noOfNodes = c_definedNodes.count();
4729 readNodes->masterNodeId = cpresident;
4730 readNodes->ndynamicId = nodePtr.p->ndynamicId;
4731 c_definedNodes.copyto(NdbNodeBitmask::Size, readNodes->definedNodes);
4732 c_clusterNodes.copyto(NdbNodeBitmask::Size, readNodes->clusterNodes);
4733 tmp.copyto(NdbNodeBitmask::Size, readNodes->inactiveNodes);
4734 NdbNodeBitmask::clear(readNodes->startingNodes);
4735 NdbNodeBitmask::clear(readNodes->startedNodes);
4736
4737 sendSignal(TBref, GSN_READ_NODESCONF, signal,
4738 ReadNodesConf::SignalLength, JBB);
4739 }//Qmgr::execREAD_NODESREQ()
4740
systemErrorBecauseOtherNodeFailed(Signal * signal,Uint32 line,NodeId failedNodeId)4741 void Qmgr::systemErrorBecauseOtherNodeFailed(Signal* signal, Uint32 line,
4742 NodeId failedNodeId) {
4743 jam();
4744
4745 // Broadcast that this node is failing to other nodes
4746 failReport(signal, getOwnNodeId(), (UintR)ZTRUE, FailRep::ZOWN_FAILURE, getOwnNodeId());
4747
4748 char buf[100];
4749 BaseString::snprintf(buf, 100,
4750 "Node was shutdown during startup because node %d failed",
4751 failedNodeId);
4752
4753 progError(line, NDBD_EXIT_SR_OTHERNODEFAILED, buf);
4754 }
4755
4756
systemErrorLab(Signal * signal,Uint32 line,const char * message)4757 void Qmgr::systemErrorLab(Signal* signal, Uint32 line, const char * message)
4758 {
4759 jam();
4760 // Broadcast that this node is failing to other nodes
4761 failReport(signal, getOwnNodeId(), (UintR)ZTRUE, FailRep::ZOWN_FAILURE, getOwnNodeId());
4762
4763 // If it's known why shutdown occured
4764 // an error message has been passed to this function
4765 progError(line, NDBD_EXIT_NDBREQUIRE, message);
4766
4767 return;
4768 }//Qmgr::systemErrorLab()
4769
4770
4771 /**---------------------------------------------------------------------------
4772 * A FAILURE HAVE BEEN DISCOVERED ON A NODE. WE NEED TO CLEAR A
4773 * NUMBER OF VARIABLES.
4774 *---------------------------------------------------------------------------*/
failReport(Signal * signal,Uint16 aFailedNode,UintR aSendFailRep,FailRep::FailCause aFailCause,Uint16 sourceNode)4775 void Qmgr::failReport(Signal* signal,
4776 Uint16 aFailedNode,
4777 UintR aSendFailRep,
4778 FailRep::FailCause aFailCause,
4779 Uint16 sourceNode)
4780 {
4781 UintR tfrMinDynamicId;
4782 NodeRecPtr failedNodePtr;
4783 NodeRecPtr nodePtr;
4784 NodeRecPtr presidentNodePtr;
4785
4786
4787 ndbassert((! aSendFailRep) || (sourceNode != 0));
4788
4789 failedNodePtr.i = aFailedNode;
4790 ptrCheckGuard(failedNodePtr, MAX_NDB_NODES, nodeRec);
4791 if (failedNodePtr.p->phase == ZRUNNING) {
4792 jam();
4793
4794 #ifdef ERROR_INSERT
4795 if (ERROR_INSERTED(938))
4796 {
4797 nodeFailCount++;
4798 ndbout_c("QMGR : execFAIL_REP : %u nodes have failed", nodeFailCount);
4799 /* Count DB nodes */
4800 Uint32 nodeCount = 0;
4801 for (Uint32 i = 1; i < MAX_NDB_NODES; i++)
4802 {
4803 if (getNodeInfo(i).getType() == NODE_TYPE_DB)
4804 nodeCount++;
4805 }
4806
4807 /* When > 25% of cluster has failed, resume communications */
4808 if (nodeFailCount > (nodeCount / 4))
4809 {
4810 ndbout_c("QMGR : execFAIL_REP > 25%% nodes failed, resuming comms");
4811 Signal save = *signal;
4812 signal->theData[0] = 9991;
4813 sendSignal(CMVMI_REF, GSN_DUMP_STATE_ORD, signal, 1, JBB);
4814 *signal = save;
4815 nodeFailCount = 0;
4816 SET_ERROR_INSERT_VALUE(932);
4817 }
4818 }
4819 #endif
4820
4821 /* WE ALSO NEED TO ADD HERE SOME CODE THAT GETS OUR NEW NEIGHBOURS. */
4822 if (cpresident == getOwnNodeId()) {
4823 jam();
4824 if (failedNodePtr.p->sendCommitFailReqStatus == Q_ACTIVE) {
4825 jam();
4826 signal->theData[0] = failedNodePtr.i;
4827 sendSignal(QMGR_REF, GSN_COMMIT_FAILCONF, signal, 1, JBA);
4828 }//if
4829 if (failedNodePtr.p->sendPresToStatus == Q_ACTIVE) {
4830 jam();
4831 signal->theData[0] = failedNodePtr.i;
4832 signal->theData[1] = ccommitFailureNr;
4833 sendSignal(QMGR_REF, GSN_PRES_TOCONF, signal, 2, JBA);
4834 }//if
4835 }//if
4836 failedNodePtr.p->phase = ZPREPARE_FAIL;
4837 failedNodePtr.p->sendPrepFailReqStatus = Q_NOT_ACTIVE;
4838 failedNodePtr.p->sendCommitFailReqStatus = Q_NOT_ACTIVE;
4839 failedNodePtr.p->sendPresToStatus = Q_NOT_ACTIVE;
4840 setNodeInfo(failedNodePtr.i).m_heartbeat_cnt= 0;
4841 if (aSendFailRep == ZTRUE) {
4842 jam();
4843 if (failedNodePtr.i != getOwnNodeId()) {
4844 jam();
4845 FailRep * const failRep = (FailRep *)&signal->theData[0];
4846 failRep->failNodeId = failedNodePtr.i;
4847 failRep->failCause = aFailCause;
4848 failRep->failSourceNodeId = sourceNode;
4849 sendSignal(failedNodePtr.p->blockRef, GSN_FAIL_REP, signal,
4850 FailRep::SignalLength, JBA);
4851 }//if
4852 for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
4853 jam();
4854 ptrAss(nodePtr, nodeRec);
4855 if (nodePtr.p->phase == ZRUNNING) {
4856 jam();
4857 FailRep * const failRep = (FailRep *)&signal->theData[0];
4858 failRep->failNodeId = failedNodePtr.i;
4859 failRep->failCause = aFailCause;
4860 failRep->failSourceNodeId = sourceNode;
4861 sendSignal(nodePtr.p->blockRef, GSN_FAIL_REP, signal,
4862 FailRep::SignalLength, JBA);
4863 }//if
4864 }//for
4865 }//if
4866 if (failedNodePtr.i == getOwnNodeId()) {
4867 jam();
4868 return;
4869 }//if
4870
4871 if (unlikely(m_connectivity_check.reportNodeFailure(failedNodePtr.i)))
4872 {
4873 jam();
4874 connectivityCheckCompleted(signal);
4875 }
4876
4877 failedNodePtr.p->ndynamicId = 0;
4878 findNeighbours(signal, __LINE__);
4879 if (failedNodePtr.i == cpresident) {
4880 jam();
4881 /**--------------------------------------------------------------------
4882 * IF PRESIDENT HAVE FAILED WE MUST CALCULATE THE NEW PRESIDENT BY
4883 * FINDING THE NODE WITH THE MINIMUM DYNAMIC IDENTITY.
4884 *---------------------------------------------------------------------*/
4885 tfrMinDynamicId = (UintR)-1;
4886 for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
4887 jam();
4888 ptrAss(nodePtr, nodeRec);
4889 if (nodePtr.p->phase == ZRUNNING) {
4890 if ((nodePtr.p->ndynamicId & 0xFFFF) < tfrMinDynamicId) {
4891 jam();
4892 tfrMinDynamicId = (nodePtr.p->ndynamicId & 0xFFFF);
4893 cpresident = nodePtr.i;
4894 }//if
4895 }//if
4896 }//for
4897 presidentNodePtr.i = cpresident;
4898 ptrCheckGuard(presidentNodePtr, MAX_NDB_NODES, nodeRec);
4899 cpdistref = presidentNodePtr.p->blockRef;
4900 if (cpresident == getOwnNodeId()) {
4901 CRASH_INSERTION(920);
4902 cfailureNr = cprepareFailureNr;
4903 ctoFailureNr = 0;
4904 ctoStatus = Q_ACTIVE;
4905 c_start.reset(); // Don't take over nodes being started
4906 if (cnoCommitFailedNodes > 0) {
4907 jam();
4908 /**-----------------------------------------------------------------
4909 * IN THIS SITUATION WE ARE UNCERTAIN OF WHETHER THE NODE FAILURE
4910 * PROCESS WAS COMMITTED. WE NEED TO QUERY THE OTHER NODES ABOUT
4911 * THEIR STATUS.
4912 *-----------------------------------------------------------------*/
4913 for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES;
4914 nodePtr.i++) {
4915 jam();
4916 ptrAss(nodePtr, nodeRec);
4917 if (nodePtr.p->phase == ZRUNNING) {
4918 jam();
4919 nodePtr.p->sendPresToStatus = Q_ACTIVE;
4920 signal->theData[0] = cpdistref;
4921 signal->theData[1] = cprepareFailureNr;
4922 sendSignal(nodePtr.p->blockRef, GSN_PRES_TOREQ,
4923 signal, 1, JBA);
4924 }//if
4925 }//for
4926 } else {
4927 jam();
4928 /*-----------------------------------------------------------------*/
4929 // In this case it could be that a commit process is still ongoing.
4930 // If so we must conclude it as the new master.
4931 /*-----------------------------------------------------------------*/
4932 for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES;
4933 nodePtr.i++) {
4934 jam();
4935 ptrAss(nodePtr, nodeRec);
4936 if (nodePtr.p->phase == ZRUNNING) {
4937 jam();
4938 nodePtr.p->sendCommitFailReqStatus = Q_ACTIVE;
4939 signal->theData[0] = cpdistref;
4940 signal->theData[1] = ccommitFailureNr;
4941 sendSignal(nodePtr.p->blockRef, GSN_COMMIT_FAILREQ, signal,
4942 2, JBA);
4943 }//if
4944 }//for
4945 }//if
4946 }//if
4947 }//if
4948 arrGuard(cnoFailedNodes, MAX_NDB_NODES);
4949 cfailedNodes[cnoFailedNodes] = failedNodePtr.i;
4950 cnoFailedNodes = cnoFailedNodes + 1;
4951 }//if
4952 }//Qmgr::failReport()
4953
4954 /*---------------------------------------------------------------------------*/
4955 /* INPUT: TTDI_DYN_ID */
4956 /* OUTPUT: TTDI_NODE_ID */
4957 /*---------------------------------------------------------------------------*/
translateDynamicIdToNodeId(Signal * signal,UintR TdynamicId)4958 Uint16 Qmgr::translateDynamicIdToNodeId(Signal* signal, UintR TdynamicId)
4959 {
4960 NodeRecPtr tdiNodePtr;
4961 Uint16 TtdiNodeId = ZNIL;
4962
4963 for (tdiNodePtr.i = 1; tdiNodePtr.i < MAX_NDB_NODES; tdiNodePtr.i++) {
4964 jam();
4965 ptrAss(tdiNodePtr, nodeRec);
4966 if (tdiNodePtr.p->ndynamicId == TdynamicId) {
4967 jam();
4968 TtdiNodeId = tdiNodePtr.i;
4969 break;
4970 }//if
4971 }//for
4972 if (TtdiNodeId == ZNIL) {
4973 jam();
4974 systemErrorLab(signal, __LINE__);
4975 }//if
4976 return TtdiNodeId;
4977 }//Qmgr::translateDynamicIdToNodeId()
4978
4979 /**--------------------------------------------------------------------------
4980 * WHEN RECEIVING PREPARE FAILURE REQUEST WE WILL IMMEDIATELY CLOSE
4981 * COMMUNICATION WITH ALL THOSE NODES.
4982 *--------------------------------------------------------------------------*/
sendCloseComReq(Signal * signal,BlockReference TBRef,Uint16 aFailNo)4983 void Qmgr::sendCloseComReq(Signal* signal, BlockReference TBRef, Uint16 aFailNo)
4984 {
4985 CloseComReqConf * const closeCom = (CloseComReqConf *)&signal->theData[0];
4986
4987 closeCom->xxxBlockRef = TBRef;
4988 closeCom->requestType = CloseComReqConf::RT_NODE_FAILURE;
4989 closeCom->failNo = aFailNo;
4990 closeCom->noOfNodes = cnoPrepFailedNodes;
4991
4992 NodeBitmask::clear(closeCom->theNodes);
4993
4994 for(int i = 0; i < cnoPrepFailedNodes; i++) {
4995 const NodeId nodeId = cprepFailedNodes[i];
4996 jam();
4997 NodeBitmask::set(closeCom->theNodes, nodeId);
4998 }
4999
5000 sendSignal(CMVMI_REF, GSN_CLOSE_COMREQ, signal,
5001 CloseComReqConf::SignalLength, JBA);
5002
5003 }//Qmgr::sendCloseComReq()
5004
5005 void
sendPrepFailReqRef(Signal * signal,Uint32 dstBlockRef,GlobalSignalNumber gsn,Uint32 blockRef,Uint32 failNo,Uint32 noOfNodes,const NodeId theNodes[])5006 Qmgr::sendPrepFailReqRef(Signal* signal,
5007 Uint32 dstBlockRef,
5008 GlobalSignalNumber gsn,
5009 Uint32 blockRef,
5010 Uint32 failNo,
5011 Uint32 noOfNodes,
5012 const NodeId theNodes[]){
5013
5014 PrepFailReqRef * const prepFail = (PrepFailReqRef *)&signal->theData[0];
5015 prepFail->xxxBlockRef = blockRef;
5016 prepFail->failNo = failNo;
5017 prepFail->noOfNodes = noOfNodes;
5018
5019 NdbNodeBitmask::clear(prepFail->theNodes);
5020
5021 for(Uint32 i = 0; i<noOfNodes; i++){
5022 const NodeId nodeId = theNodes[i];
5023 NdbNodeBitmask::set(prepFail->theNodes, nodeId);
5024 }
5025
5026 sendSignal(dstBlockRef, gsn, signal, PrepFailReqRef::SignalLength, JBA);
5027 }
5028
5029
5030 /**--------------------------------------------------------------------------
5031 * SEND PREPARE FAIL REQUEST FROM PRESIDENT.
5032 *---------------------------------------------------------------------------*/
sendPrepFailReq(Signal * signal,Uint16 aNode)5033 void Qmgr::sendPrepFailReq(Signal* signal, Uint16 aNode)
5034 {
5035 NodeRecPtr sendNodePtr;
5036 sendNodePtr.i = aNode;
5037 ptrCheckGuard(sendNodePtr, MAX_NDB_NODES, nodeRec);
5038 sendNodePtr.p->sendPrepFailReqStatus = Q_ACTIVE;
5039
5040 sendPrepFailReqRef(signal,
5041 sendNodePtr.p->blockRef,
5042 GSN_PREP_FAILREQ,
5043 reference(),
5044 cfailureNr,
5045 cnoFailedNodes,
5046 cfailedNodes);
5047 }//Qmgr::sendPrepFailReq()
5048
5049 /**
5050 * Arbitration module. Rest of QMGR calls us only via
5051 * the "handle" routines.
5052 */
5053
5054 /**
5055 * Should < 1/2 nodes die unconditionally. Affects only >= 3-way
5056 * replication.
5057 */
5058 static const bool g_ndb_arbit_one_half_rule = false;
5059
5060 /**
5061 * Config signals are logically part of CM_INIT.
5062 */
5063 void
execARBIT_CFG(Signal * signal)5064 Qmgr::execARBIT_CFG(Signal* signal)
5065 {
5066 jamEntry();
5067 ArbitSignalData* sd = (ArbitSignalData*)&signal->theData[0];
5068 unsigned rank = sd->code;
5069 ndbrequire(1 <= rank && rank <= 2);
5070 arbitRec.apiMask[0].bitOR(sd->mask);
5071 arbitRec.apiMask[rank].assign(sd->mask);
5072 }
5073
5074 /**
5075 * ContinueB delay (0=JBA 1=JBB)
5076 */
getArbitDelay()5077 Uint32 Qmgr::getArbitDelay()
5078 {
5079 switch (arbitRec.state) {
5080 case ARBIT_NULL:
5081 jam();
5082 break;
5083 case ARBIT_INIT:
5084 jam();
5085 case ARBIT_FIND:
5086 jam();
5087 case ARBIT_PREP1:
5088 jam();
5089 case ARBIT_PREP2:
5090 jam();
5091 case ARBIT_START:
5092 jam();
5093 return 100;
5094 case ARBIT_RUN:
5095 jam();
5096 return 1000;
5097 case ARBIT_CHOOSE:
5098 jam();
5099 return 10;
5100 case ARBIT_CRASH: // if we could wait
5101 jam();
5102 return 100;
5103 }
5104 ndbrequire(false);
5105 return (Uint32)-1;
5106 }
5107
5108 /**
5109 * Time to wait for reply. There is only 1 config parameter
5110 * (timeout for CHOOSE). XXX The rest are guesses.
5111 */
getArbitTimeout()5112 Uint32 Qmgr::getArbitTimeout()
5113 {
5114 switch (arbitRec.state) {
5115 case ARBIT_NULL:
5116 jam();
5117 break;
5118 case ARBIT_INIT: // not used
5119 jam();
5120 case ARBIT_FIND: // not used
5121 jam();
5122 return 1000;
5123 case ARBIT_PREP1:
5124 jam();
5125 case ARBIT_PREP2:
5126 jam();
5127 return 1000 + cnoOfNodes * Uint32(hb_send_timer.getDelay());
5128 case ARBIT_START:
5129 jam();
5130 return 1000 + arbitRec.timeout;
5131 case ARBIT_RUN: // not used (yet)
5132 jam();
5133 return 1000;
5134 case ARBIT_CHOOSE:
5135 jam();
5136 return arbitRec.timeout;
5137 case ARBIT_CRASH: // if we could wait
5138 jam();
5139 return 100;
5140 }
5141 ndbrequire(false);
5142 return (Uint32)-1;
5143 }
5144
5145 /**
5146 * Start arbitration thread when we are president and database
5147 * is opened for the first time.
5148 *
5149 * XXX Do arbitration check just like on node failure. Since
5150 * there is no arbitrator yet, must win on counts alone.
5151 */
5152 void
handleArbitStart(Signal * signal)5153 Qmgr::handleArbitStart(Signal* signal)
5154 {
5155 jam();
5156 ndbrequire(cpresident == getOwnNodeId());
5157 ndbrequire(arbitRec.state == ARBIT_NULL);
5158 arbitRec.state = ARBIT_INIT;
5159 arbitRec.newstate = true;
5160 startArbitThread(signal);
5161 }
5162
5163 /**
5164 * Handle API node failure. Called also by non-president nodes.
5165 * If we are president go back to INIT state, otherwise to NULL.
5166 * Start new thread to save time.
5167 */
5168 void
handleArbitApiFail(Signal * signal,Uint16 nodeId)5169 Qmgr::handleArbitApiFail(Signal* signal, Uint16 nodeId)
5170 {
5171 if (arbitRec.node != nodeId) {
5172 jam();
5173 return;
5174 }
5175 reportArbitEvent(signal, NDB_LE_ArbitState);
5176 arbitRec.node = 0;
5177 switch (arbitRec.state) {
5178 case ARBIT_NULL: // should not happen
5179 jam();
5180 case ARBIT_INIT:
5181 jam();
5182 case ARBIT_FIND:
5183 jam();
5184 break;
5185 case ARBIT_PREP1: // start from beginning
5186 jam();
5187 case ARBIT_PREP2:
5188 jam();
5189 case ARBIT_START:
5190 jam();
5191 case ARBIT_RUN:
5192 if (cpresident == getOwnNodeId()) {
5193 jam();
5194 arbitRec.state = ARBIT_INIT;
5195 arbitRec.newstate = true;
5196 startArbitThread(signal);
5197 } else {
5198 jam();
5199 arbitRec.state = ARBIT_NULL;
5200 }
5201 break;
5202 case ARBIT_CHOOSE: // XXX too late
5203 jam();
5204 case ARBIT_CRASH:
5205 jam();
5206 break;
5207 default:
5208 ndbrequire(false);
5209 break;
5210 }
5211 }
5212
5213 /**
5214 * Handle NDB node add. Ignore if arbitration thread not yet
5215 * started. If PREP is not ready, go back to INIT. Otherwise
5216 * the new node gets arbitrator and ticket once we reach RUN state.
5217 * Start new thread to save time.
5218 */
5219 void
handleArbitNdbAdd(Signal * signal,Uint16 nodeId)5220 Qmgr::handleArbitNdbAdd(Signal* signal, Uint16 nodeId)
5221 {
5222 jam();
5223 ndbrequire(cpresident == getOwnNodeId());
5224 switch (arbitRec.state) {
5225 case ARBIT_NULL: // before db opened
5226 jam();
5227 break;
5228 case ARBIT_INIT: // start from beginning
5229 jam();
5230 case ARBIT_FIND:
5231 jam();
5232 case ARBIT_PREP1:
5233 jam();
5234 case ARBIT_PREP2:
5235 jam();
5236 arbitRec.state = ARBIT_INIT;
5237 arbitRec.newstate = true;
5238 startArbitThread(signal);
5239 break;
5240 case ARBIT_START: // process in RUN state
5241 jam();
5242 case ARBIT_RUN:
5243 jam();
5244 arbitRec.newMask.set(nodeId);
5245 break;
5246 case ARBIT_CHOOSE: // XXX too late
5247 jam();
5248 case ARBIT_CRASH:
5249 jam();
5250 break;
5251 default:
5252 ndbrequire(false);
5253 break;
5254 }
5255 }
5256
5257 /**
5258 * Check if current nodeset can survive. The decision is
5259 * based on node count, node groups, and on external arbitrator
5260 * (if we have one). Always starts a new thread because
5261 * 1) CHOOSE cannot wait 2) if we are new president we need
5262 * a thread 3) if we are old president it does no harm.
5263 */
5264 void
handleArbitCheck(Signal * signal)5265 Qmgr::handleArbitCheck(Signal* signal)
5266 {
5267 jam();
5268 ndbrequire(cpresident == getOwnNodeId());
5269 NdbNodeBitmask ndbMask;
5270 computeArbitNdbMask(ndbMask);
5271 if (g_ndb_arbit_one_half_rule &&
5272 2 * ndbMask.count() < cnoOfNodes) {
5273 jam();
5274 arbitRec.code = ArbitCode::LoseNodes;
5275 } else {
5276 jam();
5277 CheckNodeGroups* sd = (CheckNodeGroups*)&signal->theData[0];
5278 sd->blockRef = reference();
5279 sd->requestType = CheckNodeGroups::Direct | CheckNodeGroups::ArbitCheck;
5280 sd->mask = ndbMask;
5281 EXECUTE_DIRECT(DBDIH, GSN_CHECKNODEGROUPSREQ, signal,
5282 CheckNodeGroups::SignalLength);
5283 jamEntry();
5284 switch (sd->output) {
5285 case CheckNodeGroups::Win:
5286 jam();
5287 arbitRec.code = ArbitCode::WinGroups;
5288 break;
5289 case CheckNodeGroups::Lose:
5290 jam();
5291 arbitRec.code = ArbitCode::LoseGroups;
5292 break;
5293 case CheckNodeGroups::Partitioning:
5294 jam();
5295 arbitRec.code = ArbitCode::Partitioning;
5296 if (g_ndb_arbit_one_half_rule &&
5297 2 * ndbMask.count() > cnoOfNodes) {
5298 jam();
5299 arbitRec.code = ArbitCode::WinNodes;
5300 }
5301 break;
5302 default:
5303 ndbrequire(false);
5304 break;
5305 }
5306 }
5307 switch (arbitRec.code) {
5308 case ArbitCode::LoseNodes:
5309 jam();
5310 case ArbitCode::LoseGroups:
5311 jam();
5312 goto crashme;
5313 case ArbitCode::WinNodes:
5314 jam();
5315 case ArbitCode::WinGroups:
5316 jam();
5317 if (arbitRec.state == ARBIT_RUN) {
5318 jam();
5319 break;
5320 }
5321 arbitRec.state = ARBIT_INIT;
5322 arbitRec.newstate = true;
5323 break;
5324 case ArbitCode::Partitioning:
5325 if (arbitRec.state == ARBIT_RUN) {
5326 jam();
5327 arbitRec.state = ARBIT_CHOOSE;
5328 arbitRec.newstate = true;
5329 break;
5330 }
5331 if (arbitRec.apiMask[0].count() != 0) {
5332 jam();
5333 arbitRec.code = ArbitCode::LoseNorun;
5334 } else {
5335 jam();
5336 arbitRec.code = ArbitCode::LoseNocfg;
5337 }
5338 goto crashme;
5339 default:
5340 crashme:
5341 jam();
5342 arbitRec.state = ARBIT_CRASH;
5343 arbitRec.newstate = true;
5344 break;
5345 }
5346 reportArbitEvent(signal, NDB_LE_ArbitResult);
5347 switch (arbitRec.state) {
5348 default:
5349 jam();
5350 arbitRec.newMask.bitAND(ndbMask); // delete failed nodes
5351 arbitRec.recvMask.bitAND(ndbMask);
5352 sendCommitFailReq(signal); // start commit of failed nodes
5353 break;
5354 case ARBIT_CHOOSE:
5355 jam();
5356 case ARBIT_CRASH:
5357 jam();
5358 break;
5359 }
5360 startArbitThread(signal);
5361 }
5362
5363 /**
5364 * Start a new continueB thread. The thread id is incremented
5365 * so that any old thread will exit.
5366 */
5367 void
startArbitThread(Signal * signal)5368 Qmgr::startArbitThread(Signal* signal)
5369 {
5370 jam();
5371 ndbrequire(cpresident == getOwnNodeId());
5372 arbitRec.code = ArbitCode::ThreadStart;
5373 reportArbitEvent(signal, NDB_LE_ArbitState);
5374 signal->theData[1] = ++arbitRec.thread;
5375 runArbitThread(signal);
5376 }
5377
5378 /**
5379 * Handle arbitration thread. The initial thread normally ends
5380 * up in RUN state. New thread can be started to save time.
5381 */
5382 void
runArbitThread(Signal * signal)5383 Qmgr::runArbitThread(Signal* signal)
5384 {
5385 #ifdef DEBUG_ARBIT
5386 char buf[256];
5387 NdbNodeBitmask ndbMask;
5388 computeArbitNdbMask(ndbMask);
5389 ndbout << "arbit thread:";
5390 ndbout << " state=" << arbitRec.state;
5391 ndbout << " newstate=" << arbitRec.newstate;
5392 ndbout << " thread=" << arbitRec.thread;
5393 ndbout << " node=" << arbitRec.node;
5394 arbitRec.ticket.getText(buf, sizeof(buf));
5395 ndbout << " ticket=" << buf;
5396 ndbMask.getText(buf);
5397 ndbout << " ndbmask=" << buf;
5398 ndbout << " sendcount=" << arbitRec.sendCount;
5399 ndbout << " recvcount=" << arbitRec.recvCount;
5400 arbitRec.recvMask.getText(buf);
5401 ndbout << " recvmask=" << buf;
5402 ndbout << " code=" << arbitRec.code;
5403 ndbout << endl;
5404 #endif
5405 if (signal->theData[1] != arbitRec.thread) {
5406 jam();
5407 return; // old thread dies
5408 }
5409 switch (arbitRec.state) {
5410 case ARBIT_INIT: // main thread
5411 jam();
5412 stateArbitInit(signal);
5413 break;
5414 case ARBIT_FIND:
5415 jam();
5416 stateArbitFind(signal);
5417 break;
5418 case ARBIT_PREP1:
5419 jam();
5420 case ARBIT_PREP2:
5421 jam();
5422 stateArbitPrep(signal);
5423 break;
5424 case ARBIT_START:
5425 jam();
5426 stateArbitStart(signal);
5427 break;
5428 case ARBIT_RUN:
5429 jam();
5430 stateArbitRun(signal);
5431 break;
5432 case ARBIT_CHOOSE: // partitition thread
5433 jam();
5434 stateArbitChoose(signal);
5435 break;
5436 case ARBIT_CRASH:
5437 jam();
5438 stateArbitCrash(signal);
5439 break;
5440 default:
5441 ndbrequire(false);
5442 break;
5443 }
5444 signal->theData[0] = ZARBIT_HANDLING;
5445 signal->theData[1] = arbitRec.thread;
5446 signal->theData[2] = arbitRec.state; // just for signal log
5447 Uint32 delay = getArbitDelay();
5448 if (delay == 0) {
5449 jam();
5450 sendSignal(QMGR_REF, GSN_CONTINUEB, signal, 3, JBA);
5451 } else if (delay == 1) {
5452 jam();
5453 sendSignal(QMGR_REF, GSN_CONTINUEB, signal, 3, JBB);
5454 } else {
5455 jam();
5456 sendSignalWithDelay(QMGR_REF, GSN_CONTINUEB, signal, delay, 3);
5457 }//if
5458 }
5459
5460 /**
5461 * Handle INIT state. Generate next ticket. Switch to FIND
5462 * state without delay.
5463 */
5464 void
stateArbitInit(Signal * signal)5465 Qmgr::stateArbitInit(Signal* signal)
5466 {
5467 if (arbitRec.newstate) {
5468 jam();
5469 CRASH_INSERTION((Uint32)910 + arbitRec.state);
5470
5471 arbitRec.node = 0;
5472 arbitRec.ticket.update();
5473 arbitRec.newMask.clear();
5474 arbitRec.code = 0;
5475 arbitRec.newstate = false;
5476 }
5477 arbitRec.state = ARBIT_FIND;
5478 arbitRec.newstate = true;
5479 stateArbitFind(signal);
5480 }
5481
5482 /**
5483 * Handle FIND state. Find first arbitrator which is alive
5484 * and invoke PREP state without delay. If none are found,
5485 * loop in FIND state. This is forever if no arbitrators
5486 * are configured (not the normal case).
5487 *
5488 * XXX Add adaptive behaviour to avoid getting stuck on API
5489 * nodes which are alive but do not respond or die too soon.
5490 */
5491 void
stateArbitFind(Signal * signal)5492 Qmgr::stateArbitFind(Signal* signal)
5493 {
5494 if (arbitRec.newstate) {
5495 jam();
5496 CRASH_INSERTION((Uint32)910 + arbitRec.state);
5497
5498 arbitRec.code = 0;
5499 arbitRec.newstate = false;
5500 }
5501
5502 switch (arbitRec.method){
5503 case ArbitRec::METHOD_EXTERNAL:
5504 {
5505 // Don't select any API node as arbitrator
5506 arbitRec.node = 0;
5507 arbitRec.state = ARBIT_PREP1;
5508 arbitRec.newstate = true;
5509 stateArbitPrep(signal);
5510 return;
5511 break;
5512 }
5513
5514 case ArbitRec::METHOD_DEFAULT:
5515 {
5516 NodeRecPtr aPtr;
5517 // Select the best available API node as arbitrator
5518 for (unsigned rank = 1; rank <= 2; rank++) {
5519 jam();
5520 aPtr.i = 0;
5521 const unsigned stop = NodeBitmask::NotFound;
5522 while ((aPtr.i = arbitRec.apiMask[rank].find(aPtr.i + 1)) != stop) {
5523 jam();
5524 ptrAss(aPtr, nodeRec);
5525 if (aPtr.p->phase != ZAPI_ACTIVE)
5526 continue;
5527 arbitRec.node = aPtr.i;
5528 arbitRec.state = ARBIT_PREP1;
5529 arbitRec.newstate = true;
5530 stateArbitPrep(signal);
5531 return;
5532 }
5533 }
5534 return;
5535 break;
5536 }
5537
5538 default:
5539 ndbrequire(false);
5540 }
5541 }
5542
5543 /**
5544 * Handle PREP states. First round nulls any existing tickets.
5545 * Second round sends new ticket. When all confirms have been
5546 * received invoke START state immediately.
5547 */
5548 void
stateArbitPrep(Signal * signal)5549 Qmgr::stateArbitPrep(Signal* signal)
5550 {
5551 if (arbitRec.newstate) {
5552 jam();
5553 CRASH_INSERTION((Uint32)910 + arbitRec.state);
5554
5555 arbitRec.sendCount = 0; // send all at once
5556 computeArbitNdbMask(arbitRec.recvMask); // to send and recv
5557 arbitRec.recvMask.clear(getOwnNodeId());
5558 arbitRec.code = 0;
5559 arbitRec.newstate = false;
5560 }
5561 if (! arbitRec.sendCount) {
5562 jam();
5563 NodeRecPtr aPtr;
5564 aPtr.i = 0;
5565 const unsigned stop = NodeBitmask::NotFound;
5566 while ((aPtr.i = arbitRec.recvMask.find(aPtr.i + 1)) != stop) {
5567 jam();
5568 ptrAss(aPtr, nodeRec);
5569 ArbitSignalData* sd = (ArbitSignalData*)&signal->theData[0];
5570 sd->sender = getOwnNodeId();
5571 if (arbitRec.state == ARBIT_PREP1) {
5572 jam();
5573 sd->code = ArbitCode::PrepPart1;
5574 } else {
5575 jam();
5576 sd->code = ArbitCode::PrepPart2;
5577 }
5578 sd->node = arbitRec.node;
5579 sd->ticket = arbitRec.ticket;
5580 sd->mask.clear();
5581 sendSignal(aPtr.p->blockRef, GSN_ARBIT_PREPREQ, signal,
5582 ArbitSignalData::SignalLength, JBB);
5583 }
5584 arbitRec.setTimestamp(); // send time
5585 arbitRec.sendCount = 1;
5586 return;
5587 }
5588 if (arbitRec.code != 0) { // error
5589 jam();
5590 arbitRec.state = ARBIT_INIT;
5591 arbitRec.newstate = true;
5592 return;
5593 }
5594 if (arbitRec.recvMask.count() == 0) { // recv all
5595 if (arbitRec.state == ARBIT_PREP1) {
5596 jam();
5597 arbitRec.state = ARBIT_PREP2;
5598 arbitRec.newstate = true;
5599 } else {
5600 jam();
5601 arbitRec.state = ARBIT_START;
5602 arbitRec.newstate = true;
5603 stateArbitStart(signal);
5604 }
5605 return;
5606 }
5607 if (arbitRec.getTimediff() > getArbitTimeout()) {
5608 jam();
5609 arbitRec.state = ARBIT_INIT;
5610 arbitRec.newstate = true;
5611 return;
5612 }
5613 }
5614
5615 void
execARBIT_PREPREQ(Signal * signal)5616 Qmgr::execARBIT_PREPREQ(Signal* signal)
5617 {
5618 jamEntry();
5619 ArbitSignalData* sd = (ArbitSignalData*)&signal->theData[0];
5620 if (getOwnNodeId() == cpresident) {
5621 jam();
5622 return; // wrong state
5623 }
5624 if (sd->sender != cpresident) {
5625 jam();
5626 return; // wrong state
5627 }
5628 NodeRecPtr aPtr;
5629 aPtr.i = sd->sender;
5630 ptrAss(aPtr, nodeRec);
5631 switch (sd->code) {
5632 case ArbitCode::PrepPart1: // zero them just to be sure
5633 jam();
5634 arbitRec.node = 0;
5635 arbitRec.ticket.clear();
5636 break;
5637 case ArbitCode::PrepPart2: // non-president enters RUN state
5638 jam();
5639 case ArbitCode::PrepAtrun:
5640 jam();
5641 arbitRec.node = sd->node;
5642 arbitRec.ticket = sd->ticket;
5643 arbitRec.code = sd->code;
5644 reportArbitEvent(signal, NDB_LE_ArbitState);
5645 arbitRec.state = ARBIT_RUN;
5646 arbitRec.newstate = true;
5647 if (sd->code == ArbitCode::PrepAtrun) {
5648 jam();
5649 return;
5650 }
5651 break;
5652 default:
5653 jam();
5654 ndbrequire(false);
5655 }
5656 sd->sender = getOwnNodeId();
5657 sd->code = 0;
5658 sendSignal(aPtr.p->blockRef, GSN_ARBIT_PREPCONF, signal,
5659 ArbitSignalData::SignalLength, JBB);
5660 }
5661
5662 void
execARBIT_PREPCONF(Signal * signal)5663 Qmgr::execARBIT_PREPCONF(Signal* signal)
5664 {
5665 jamEntry();
5666 ArbitSignalData* sd = (ArbitSignalData*)&signal->theData[0];
5667 if (! arbitRec.match(sd)) {
5668 jam();
5669 return; // stray signal
5670 }
5671 if (arbitRec.state != ARBIT_PREP1 && arbitRec.state != ARBIT_PREP2) {
5672 jam();
5673 return; // wrong state
5674 }
5675 if (! arbitRec.recvMask.get(sd->sender)) {
5676 jam();
5677 return; // wrong state
5678 }
5679 arbitRec.recvMask.clear(sd->sender);
5680 if (arbitRec.code == 0 && sd->code != 0) {
5681 jam();
5682 arbitRec.code = sd->code;
5683 }//if
5684 }
5685
5686 void
execARBIT_PREPREF(Signal * signal)5687 Qmgr::execARBIT_PREPREF(Signal* signal)
5688 {
5689 jamEntry();
5690 ArbitSignalData* sd = (ArbitSignalData*)&signal->theData[0];
5691 if (sd->code == 0) {
5692 jam();
5693 sd->code = ArbitCode::ErrUnknown;
5694 }
5695 execARBIT_PREPCONF(signal);
5696 }
5697
5698 /**
5699 * Handle START state. On first call send start request to
5700 * the chosen arbitrator. Then wait for a CONF.
5701 */
5702 void
stateArbitStart(Signal * signal)5703 Qmgr::stateArbitStart(Signal* signal)
5704 {
5705 if (arbitRec.newstate) {
5706 jam();
5707 CRASH_INSERTION((Uint32)910 + arbitRec.state);
5708
5709 arbitRec.sendCount = 0;
5710 arbitRec.recvCount = 0;
5711 arbitRec.code = 0;
5712 arbitRec.newstate = false;
5713 }
5714
5715 switch (arbitRec.method){
5716 case ArbitRec::METHOD_EXTERNAL:
5717 jam();
5718 ndbrequire(arbitRec.node == 0); // No arbitrator selected
5719
5720 // Don't start arbitrator in API node => ARBIT_RUN
5721 arbitRec.state = ARBIT_RUN;
5722 arbitRec.newstate = true;
5723 return;
5724 break;
5725
5726 case ArbitRec::METHOD_DEFAULT:
5727 if (! arbitRec.sendCount) {
5728 jam();
5729 BlockReference blockRef = calcApiClusterMgrBlockRef(arbitRec.node);
5730 ArbitSignalData* sd = (ArbitSignalData*)&signal->theData[0];
5731 sd->sender = getOwnNodeId();
5732 sd->code = 0;
5733 sd->node = arbitRec.node;
5734 sd->ticket = arbitRec.ticket;
5735 sd->mask.clear();
5736 sendSignal(blockRef, GSN_ARBIT_STARTREQ, signal,
5737 ArbitSignalData::SignalLength, JBB);
5738 arbitRec.sendCount = 1;
5739 arbitRec.setTimestamp(); // send time
5740 return;
5741 }
5742 if (arbitRec.recvCount) {
5743 jam();
5744 reportArbitEvent(signal, NDB_LE_ArbitState);
5745 if (arbitRec.code == ArbitCode::ApiStart) {
5746 jam();
5747 arbitRec.state = ARBIT_RUN;
5748 arbitRec.newstate = true;
5749 return;
5750 }
5751 arbitRec.state = ARBIT_INIT;
5752 arbitRec.newstate = true;
5753 return;
5754 }
5755 if (arbitRec.getTimediff() > getArbitTimeout()) {
5756 jam();
5757 arbitRec.code = ArbitCode::ErrTimeout;
5758 reportArbitEvent(signal, NDB_LE_ArbitState);
5759 arbitRec.state = ARBIT_INIT;
5760 arbitRec.newstate = true;
5761 return;
5762 }
5763 break;
5764
5765 default:
5766 ndbrequire(false);
5767 break;
5768 }
5769 }
5770
5771 void
execARBIT_STARTCONF(Signal * signal)5772 Qmgr::execARBIT_STARTCONF(Signal* signal)
5773 {
5774 jamEntry();
5775 ArbitSignalData* sd = (ArbitSignalData*)&signal->theData[0];
5776 if (! arbitRec.match(sd)) {
5777 jam();
5778 return; // stray signal
5779 }
5780 if (arbitRec.state != ARBIT_START) {
5781 jam();
5782 return; // wrong state
5783 }
5784 if (arbitRec.recvCount) {
5785 jam();
5786 return; // wrong state
5787 }
5788 arbitRec.code = sd->code;
5789 arbitRec.recvCount = 1;
5790 }
5791
5792 void
execARBIT_STARTREF(Signal * signal)5793 Qmgr::execARBIT_STARTREF(Signal* signal)
5794 {
5795 jamEntry();
5796 ArbitSignalData* sd = (ArbitSignalData*)&signal->theData[0];
5797 if (sd->code == 0) {
5798 jam();
5799 sd->code = ArbitCode::ErrUnknown;
5800 }
5801 execARBIT_STARTCONF(signal);
5802 }
5803
5804 /**
5805 * Handle RUN state. Send ticket to any new nodes which have
5806 * appeared after PREP state. We don't care about a CONF.
5807 */
5808 void
stateArbitRun(Signal * signal)5809 Qmgr::stateArbitRun(Signal* signal)
5810 {
5811 if (arbitRec.newstate) {
5812 jam();
5813 CRASH_INSERTION((Uint32)910 + arbitRec.state);
5814
5815 arbitRec.code = 0;
5816 arbitRec.newstate = false;
5817 }
5818 NodeRecPtr aPtr;
5819 aPtr.i = 0;
5820 const unsigned stop = NodeBitmask::NotFound;
5821 while ((aPtr.i = arbitRec.newMask.find(aPtr.i + 1)) != stop) {
5822 jam();
5823 arbitRec.newMask.clear(aPtr.i);
5824 ptrAss(aPtr, nodeRec);
5825 ArbitSignalData* sd = (ArbitSignalData*)&signal->theData[0];
5826 sd->sender = getOwnNodeId();
5827 sd->code = ArbitCode::PrepAtrun;
5828 sd->node = arbitRec.node;
5829 sd->ticket = arbitRec.ticket;
5830 sd->mask.clear();
5831 sendSignal(aPtr.p->blockRef, GSN_ARBIT_PREPREQ, signal,
5832 ArbitSignalData::SignalLength, JBB);
5833 }
5834 }
5835
5836 /**
5837 * Handle CHOOSE state. Entered only from RUN state when
5838 * there is a possible network partitioning. Send CHOOSE to
5839 * the arbitrator. On win switch to INIT state because a new
5840 * ticket must be created.
5841 */
5842 void
stateArbitChoose(Signal * signal)5843 Qmgr::stateArbitChoose(Signal* signal)
5844 {
5845 if (arbitRec.newstate) {
5846 jam();
5847 CRASH_INSERTION((Uint32)910 + arbitRec.state);
5848
5849 arbitRec.sendCount = 0;
5850 arbitRec.recvCount = 0;
5851 arbitRec.code = 0;
5852 arbitRec.newstate = false;
5853 }
5854
5855 switch(arbitRec.method){
5856 case ArbitRec::METHOD_EXTERNAL:
5857 {
5858 if (! arbitRec.sendCount) {
5859 jam();
5860 ndbrequire(arbitRec.node == 0); // No arbitrator selected
5861 // Don't send CHOOSE to anyone, just wait for timeout to expire
5862 arbitRec.sendCount = 1;
5863 arbitRec.setTimestamp();
5864 return;
5865 }
5866
5867 if (arbitRec.getTimediff() > getArbitTimeout()) {
5868 jam();
5869 // Arbitration timeout has expired
5870 ndbrequire(arbitRec.node == 0); // No arbitrator selected
5871
5872 NodeBitmask nodes;
5873 computeArbitNdbMask(nodes);
5874 arbitRec.code = ArbitCode::WinWaitExternal;
5875 reportArbitEvent(signal, NDB_LE_ArbitResult, nodes);
5876
5877 sendCommitFailReq(signal); // start commit of failed nodes
5878 arbitRec.state = ARBIT_INIT;
5879 arbitRec.newstate = true;
5880 return;
5881 }
5882 break;
5883 }
5884
5885 case ArbitRec::METHOD_DEFAULT:
5886 {
5887 if (! arbitRec.sendCount) {
5888 jam();
5889 const BlockReference blockRef = calcApiClusterMgrBlockRef(arbitRec.node);
5890 ArbitSignalData* sd = (ArbitSignalData*)&signal->theData[0];
5891 sd->sender = getOwnNodeId();
5892 sd->code = 0;
5893 sd->node = arbitRec.node;
5894 sd->ticket = arbitRec.ticket;
5895 computeArbitNdbMask(sd->mask);
5896 sendSignal(blockRef, GSN_ARBIT_CHOOSEREQ, signal,
5897 ArbitSignalData::SignalLength, JBA);
5898 arbitRec.sendCount = 1;
5899 arbitRec.setTimestamp(); // send time
5900 return;
5901 }
5902
5903 if (arbitRec.recvCount) {
5904 jam();
5905 reportArbitEvent(signal, NDB_LE_ArbitResult);
5906 if (arbitRec.code == ArbitCode::WinChoose) {
5907 jam();
5908 sendCommitFailReq(signal); // start commit of failed nodes
5909 arbitRec.state = ARBIT_INIT;
5910 arbitRec.newstate = true;
5911 return;
5912 }
5913 arbitRec.state = ARBIT_CRASH;
5914 arbitRec.newstate = true;
5915 stateArbitCrash(signal); // do it at once
5916 return;
5917 }
5918
5919 if (arbitRec.getTimediff() > getArbitTimeout()) {
5920 jam();
5921 // Arbitration timeout has expired
5922 arbitRec.code = ArbitCode::ErrTimeout;
5923 reportArbitEvent(signal, NDB_LE_ArbitState);
5924 arbitRec.state = ARBIT_CRASH;
5925 arbitRec.newstate = true;
5926 stateArbitCrash(signal); // do it at once
5927 return;
5928 }
5929 break;
5930 }
5931
5932 default:
5933 ndbrequire(false);
5934 break;
5935 }
5936 }
5937
5938 void
execARBIT_CHOOSECONF(Signal * signal)5939 Qmgr::execARBIT_CHOOSECONF(Signal* signal)
5940 {
5941 jamEntry();
5942 ArbitSignalData* sd = (ArbitSignalData*)&signal->theData[0];
5943 if (!arbitRec.match(sd)) {
5944 jam();
5945 return; // stray signal
5946 }
5947 if (arbitRec.state != ARBIT_CHOOSE) {
5948 jam();
5949 return; // wrong state
5950 }
5951 if (arbitRec.recvCount) {
5952 jam();
5953 return; // wrong state
5954 }
5955 arbitRec.recvCount = 1;
5956 arbitRec.code = sd->code;
5957 }
5958
5959 void
execARBIT_CHOOSEREF(Signal * signal)5960 Qmgr::execARBIT_CHOOSEREF(Signal* signal)
5961 {
5962 jamEntry();
5963 ArbitSignalData* sd = (ArbitSignalData*)&signal->theData[0];
5964 if (sd->code == 0) {
5965 jam();
5966 sd->code = ArbitCode::ErrUnknown;
5967 }
5968 execARBIT_CHOOSECONF(signal);
5969 }
5970
5971 /**
5972 * Handle CRASH state. We must crash immediately.
5973 * XXX tell other nodes in our party to crash too.
5974 */
5975 void
stateArbitCrash(Signal * signal)5976 Qmgr::stateArbitCrash(Signal* signal)
5977 {
5978 jam();
5979 if (arbitRec.newstate) {
5980 jam();
5981 CRASH_INSERTION((Uint32)910 + arbitRec.state);
5982 arbitRec.setTimestamp();
5983 arbitRec.code = 0;
5984 arbitRec.newstate = false;
5985 }
5986 #ifdef ndb_arbit_crash_wait_for_event_report_to_get_out
5987 if (! (arbitRec.getTimediff() > getArbitTimeout()))
5988 return;
5989 #endif
5990 CRASH_INSERTION(932);
5991 CRASH_INSERTION(938);
5992 progError(__LINE__, NDBD_EXIT_ARBIT_SHUTDOWN,
5993 "Arbitrator decided to shutdown this node");
5994 }
5995
5996 /**
5997 * Arbitrator may inform us that it will exit. This lets us
5998 * start looking sooner for a new one. Handle it like API node
5999 * failure.
6000 */
6001 void
execARBIT_STOPREP(Signal * signal)6002 Qmgr::execARBIT_STOPREP(Signal* signal)
6003 {
6004 jamEntry();
6005 ArbitSignalData* sd = (ArbitSignalData*)&signal->theData[0];
6006 if (! arbitRec.match(sd)) {
6007 jam();
6008 return; // stray signal
6009 }
6010 arbitRec.code = ArbitCode::ApiExit;
6011 handleArbitApiFail(signal, arbitRec.node);
6012 }
6013
6014 void
computeArbitNdbMask(NodeBitmaskPOD & aMask)6015 Qmgr::computeArbitNdbMask(NodeBitmaskPOD& aMask)
6016 {
6017 NodeRecPtr aPtr;
6018 aMask.clear();
6019 for (aPtr.i = 1; aPtr.i < MAX_NDB_NODES; aPtr.i++) {
6020 jam();
6021 ptrAss(aPtr, nodeRec);
6022 if (getNodeInfo(aPtr.i).getType() == NodeInfo::DB && aPtr.p->phase == ZRUNNING){
6023 jam();
6024 aMask.set(aPtr.i);
6025 }
6026 }
6027 }
6028
6029 void
computeArbitNdbMask(NdbNodeBitmaskPOD & aMask)6030 Qmgr::computeArbitNdbMask(NdbNodeBitmaskPOD& aMask)
6031 {
6032 NodeRecPtr aPtr;
6033 aMask.clear();
6034 for (aPtr.i = 1; aPtr.i < MAX_NDB_NODES; aPtr.i++) {
6035 jam();
6036 ptrAss(aPtr, nodeRec);
6037 if (getNodeInfo(aPtr.i).getType() == NodeInfo::DB && aPtr.p->phase == ZRUNNING){
6038 jam();
6039 aMask.set(aPtr.i);
6040 }
6041 }
6042 }
6043
6044 /**
6045 * Report arbitration event. We use arbitration signal format
6046 * where sender (word 0) is event type.
6047 */
6048 void
reportArbitEvent(Signal * signal,Ndb_logevent_type type,const NodeBitmask mask)6049 Qmgr::reportArbitEvent(Signal* signal, Ndb_logevent_type type,
6050 const NodeBitmask mask)
6051 {
6052 ArbitSignalData* sd = (ArbitSignalData*)&signal->theData[0];
6053 sd->sender = type;
6054 sd->code = arbitRec.code | (arbitRec.state << 16);
6055 sd->node = arbitRec.node;
6056 sd->ticket = arbitRec.ticket;
6057 sd->mask = mask;
6058 sendSignal(CMVMI_REF, GSN_EVENT_REP, signal,
6059 ArbitSignalData::SignalLength, JBB);
6060 }
6061
6062 // end of arbitration module
6063
6064 void
execDUMP_STATE_ORD(Signal * signal)6065 Qmgr::execDUMP_STATE_ORD(Signal* signal)
6066 {
6067 switch (signal->theData[0]) {
6068 case 1:
6069 infoEvent("creadyDistCom = %d, cpresident = %d\n",
6070 creadyDistCom, cpresident);
6071 infoEvent("cpresidentAlive = %d, cpresidentCand = %d (gci: %d)\n",
6072 cpresidentAlive,
6073 c_start.m_president_candidate,
6074 c_start.m_president_candidate_gci);
6075 infoEvent("ctoStatus = %d\n", ctoStatus);
6076 for(Uint32 i = 1; i<MAX_NDB_NODES; i++){
6077 NodeRecPtr nodePtr;
6078 nodePtr.i = i;
6079 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
6080 char buf[100];
6081 switch(nodePtr.p->phase){
6082 case ZINIT:
6083 sprintf(buf, "Node %d: ZINIT(%d)", i, nodePtr.p->phase);
6084 break;
6085 case ZSTARTING:
6086 sprintf(buf, "Node %d: ZSTARTING(%d)", i, nodePtr.p->phase);
6087 break;
6088 case ZRUNNING:
6089 sprintf(buf, "Node %d: ZRUNNING(%d)", i, nodePtr.p->phase);
6090 break;
6091 case ZPREPARE_FAIL:
6092 sprintf(buf, "Node %d: ZPREPARE_FAIL(%d)", i, nodePtr.p->phase);
6093 break;
6094 case ZFAIL_CLOSING:
6095 sprintf(buf, "Node %d: ZFAIL_CLOSING(%d)", i, nodePtr.p->phase);
6096 break;
6097 case ZAPI_INACTIVE:
6098 sprintf(buf, "Node %d: ZAPI_INACTIVE(%d)", i, nodePtr.p->phase);
6099 break;
6100 case ZAPI_ACTIVE:
6101 sprintf(buf, "Node %d: ZAPI_ACTIVE(%d)", i, nodePtr.p->phase);
6102 break;
6103 default:
6104 sprintf(buf, "Node %d: <UNKNOWN>(%d)", i, nodePtr.p->phase);
6105 break;
6106 }
6107 infoEvent("%s", buf);
6108 }
6109 }
6110
6111 #ifdef ERROR_INSERT
6112 if (signal->theData[0] == 935 && signal->getLength() == 2)
6113 {
6114 SET_ERROR_INSERT_VALUE(935);
6115 c_error_insert_extra = signal->theData[1];
6116 }
6117 #endif
6118
6119 if (signal->theData[0] == 900 && signal->getLength() == 2)
6120 {
6121 ndbout_c("disconnecting %u", signal->theData[1]);
6122 api_failed(signal, signal->theData[1]);
6123 }
6124
6125 if (signal->theData[0] == 908)
6126 {
6127 int tag = signal->getLength() < 2 ? -1 : signal->theData[1];
6128 char buf[8192];
6129 // for easy grepping in *out.log ...
6130 strcpy(buf, "HB:");
6131 if (tag >= 0)
6132 sprintf(buf+strlen(buf), "%d:", tag);
6133 sprintf(buf+strlen(buf), " pres:%u", cpresident);
6134 sprintf(buf+strlen(buf), " own:%u", getOwnNodeId());
6135 NodeRecPtr myNodePtr;
6136 myNodePtr.i = getOwnNodeId();
6137 ptrCheckGuard(myNodePtr, MAX_NDB_NODES, nodeRec);
6138 sprintf(buf+strlen(buf), " dyn:%u-%u", myNodePtr.p->ndynamicId & 0xFFFF, myNodePtr.p->ndynamicId >> 16);
6139 sprintf(buf+strlen(buf), " mxdyn:%u", c_maxDynamicId);
6140 sprintf(buf+strlen(buf), " hb:%u->%u->%u", cneighbourl, getOwnNodeId(), cneighbourh);
6141 sprintf(buf+strlen(buf), " node:dyn-hi,cfg:");
6142 NodeRecPtr nodePtr;
6143 for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++)
6144 {
6145 ptrAss(nodePtr, nodeRec);
6146 Uint32 type = getNodeInfo(nodePtr.i).m_type;
6147 if (type == NodeInfo::DB)
6148 {
6149 sprintf(buf+strlen(buf), " %u:%u-%u,%u", nodePtr.i, nodePtr.p->ndynamicId & 0xFFFF, nodePtr.p->ndynamicId >> 16, nodePtr.p->hbOrder);
6150 }
6151 }
6152 ndbout << buf << endl;
6153 }
6154
6155 #ifdef ERROR_INSERT
6156 Uint32 dumpCode = signal->theData[0];
6157 if ((dumpCode == 9992) ||
6158 (dumpCode == 9993))
6159 {
6160 if (signal->getLength() == 2)
6161 {
6162 Uint32 nodeId = signal->theData[1];
6163 Uint32& newNodeId = signal->theData[1];
6164 Uint32 length = 2;
6165 assert(257 > MAX_NODES);
6166 if (nodeId > MAX_NODES)
6167 {
6168 const char* type = "None";
6169 switch (nodeId)
6170 {
6171 case 257:
6172 {
6173 /* Left (lower) neighbour */
6174 newNodeId = cneighbourl;
6175 type = "Left neighbour";
6176 break;
6177 }
6178 case 258:
6179 {
6180 /* Right (higher) neighbour */
6181 newNodeId = cneighbourh;
6182 type = "Right neighbour";
6183 break;
6184 }
6185 case 259:
6186 {
6187 /* President */
6188 newNodeId = cpresident;
6189 type = "President";
6190 break;
6191 }
6192 }
6193 ndbout_c("QMGR : Mapping request on node id %u to node id %u (%s)",
6194 nodeId, newNodeId, type);
6195 if (newNodeId != nodeId)
6196 {
6197 sendSignal(CMVMI_REF, GSN_DUMP_STATE_ORD, signal, length, JBB);
6198 }
6199 }
6200 }
6201 }
6202
6203 if (dumpCode == 9994)
6204 {
6205 ndbout_c("setCCDelay(%u)", signal->theData[1]);
6206 setCCDelay(signal->theData[1]);
6207 m_connectivity_check.m_enabled = true;
6208 }
6209 #endif
6210 }//Qmgr::execDUMP_STATE_ORD()
6211
6212
6213 void
execAPI_BROADCAST_REP(Signal * signal)6214 Qmgr::execAPI_BROADCAST_REP(Signal* signal)
6215 {
6216 jamEntry();
6217 ApiBroadcastRep api= *(const ApiBroadcastRep*)signal->getDataPtr();
6218
6219 SectionHandle handle(this, signal);
6220 Uint32 len = signal->getLength() - ApiBroadcastRep::SignalLength;
6221 memmove(signal->theData, signal->theData+ApiBroadcastRep::SignalLength,
6222 4*len);
6223
6224 NodeBitmask mask;
6225 NodeRecPtr nodePtr;
6226 for (nodePtr.i = 1; nodePtr.i < MAX_NODES; nodePtr.i++)
6227 {
6228 jam();
6229 ptrAss(nodePtr, nodeRec);
6230 if (nodePtr.p->phase == ZAPI_ACTIVE &&
6231 getNodeInfo(nodePtr.i).m_version >= api.minVersion)
6232 {
6233 jam();
6234 mask.set(nodePtr.i);
6235 }
6236 }
6237
6238 if (mask.isclear())
6239 {
6240 jam();
6241 releaseSections(handle);
6242 return;
6243 }
6244
6245 NodeReceiverGroup rg(API_CLUSTERMGR, mask);
6246 sendSignal(rg, api.gsn, signal, len, JBB,
6247 &handle);
6248 }
6249
6250 void
execNODE_FAILREP(Signal * signal)6251 Qmgr::execNODE_FAILREP(Signal * signal)
6252 {
6253 jamEntry();
6254 // make sure any distributed signals get acknowledged
6255 // destructive of the signal
6256 c_counterMgr.execNODE_FAILREP(signal);
6257 }
6258
6259 void
execALLOC_NODEID_REQ(Signal * signal)6260 Qmgr::execALLOC_NODEID_REQ(Signal * signal)
6261 {
6262 jamEntry();
6263 AllocNodeIdReq req = *(AllocNodeIdReq*)signal->getDataPtr();
6264 Uint32 error = 0;
6265
6266 NodeRecPtr nodePtr;
6267 nodePtr.i = req.nodeId;
6268 ptrAss(nodePtr, nodeRec);
6269
6270 if (refToBlock(req.senderRef) != QMGR) // request from management server
6271 {
6272 /* master */
6273
6274 if (getOwnNodeId() != cpresident)
6275 {
6276 jam();
6277 error = AllocNodeIdRef::NotMaster;
6278 }
6279 else if (!opAllocNodeIdReq.m_tracker.done())
6280 {
6281 jam();
6282 error = AllocNodeIdRef::Busy;
6283 }
6284 else if (c_connectedNodes.get(req.nodeId))
6285 {
6286 jam();
6287 error = AllocNodeIdRef::NodeConnected;
6288 }
6289 else if (nodePtr.p->m_secret != 0)
6290 {
6291 jam();
6292 error = AllocNodeIdRef::NodeReserved;
6293 }
6294
6295 if (error)
6296 {
6297 jam();
6298 AllocNodeIdRef * ref = (AllocNodeIdRef*)signal->getDataPtrSend();
6299 ref->senderRef = reference();
6300 ref->errorCode = error;
6301 ref->masterRef = numberToRef(QMGR, cpresident);
6302 ref->senderData = req.senderData;
6303 ref->nodeId = req.nodeId;
6304 sendSignal(req.senderRef, GSN_ALLOC_NODEID_REF, signal,
6305 AllocNodeIdRef::SignalLength, JBB);
6306 return;
6307 }
6308
6309 if (ERROR_INSERTED(934) && req.nodeId != getOwnNodeId())
6310 {
6311 CRASH_INSERTION(934);
6312 }
6313
6314 /**
6315 * generate secret
6316 */
6317 Uint64 now = NdbTick_CurrentMillisecond();
6318 Uint32 secret_hi = Uint32(now >> 24);
6319 Uint32 secret_lo = Uint32(now << 8) + getOwnNodeId();
6320 req.secret_hi = secret_hi;
6321 req.secret_lo = secret_lo;
6322
6323 if (req.timeout > 60000)
6324 req.timeout = 60000;
6325
6326 nodePtr.p->m_secret = (Uint64(secret_hi) << 32) + secret_lo;
6327 nodePtr.p->m_alloc_timeout = now + req.timeout;
6328
6329 opAllocNodeIdReq.m_req = req;
6330 opAllocNodeIdReq.m_error = 0;
6331 opAllocNodeIdReq.m_connectCount =
6332 getNodeInfo(refToNode(req.senderRef)).m_connectCount;
6333
6334 jam();
6335 AllocNodeIdReq * req2 = (AllocNodeIdReq*)signal->getDataPtrSend();
6336 * req2 = req;
6337 req2->senderRef = reference();
6338 NodeReceiverGroup rg(QMGR, c_clusterNodes);
6339 RequestTracker & p = opAllocNodeIdReq.m_tracker;
6340 p.init<AllocNodeIdRef>(c_counterMgr, rg, GSN_ALLOC_NODEID_REF, 0);
6341
6342 sendSignal(rg, GSN_ALLOC_NODEID_REQ, signal,
6343 AllocNodeIdReq::SignalLengthQMGR, JBB);
6344 return;
6345 }
6346
6347 /* participant */
6348 if (c_connectedNodes.get(req.nodeId))
6349 {
6350 jam();
6351 error = AllocNodeIdRef::NodeConnected;
6352 }
6353 else if (req.nodeType != getNodeInfo(req.nodeId).m_type)
6354 {
6355 jam();
6356 error = AllocNodeIdRef::NodeTypeMismatch;
6357 }
6358 else if (nodePtr.p->failState != NORMAL)
6359 {
6360 jam();
6361 error = AllocNodeIdRef::NodeFailureHandlingNotCompleted;
6362 }
6363 #if 0
6364 /**
6365 * For now only make "time/secret" based reservation on master
6366 * as we otherwise also need to clear it on failure + handle
6367 * master failure
6368 */
6369 else if (nodePtr.p->m_secret != 0)
6370 {
6371 jam();
6372 error = AllocNodeIdRef::NodeReserved;
6373 }
6374 #endif
6375
6376 if (error)
6377 {
6378 jam();
6379 AllocNodeIdRef * ref = (AllocNodeIdRef*)signal->getDataPtrSend();
6380 ref->senderRef = reference();
6381 ref->errorCode = error;
6382 ref->senderData = req.senderData;
6383 ref->nodeId = req.nodeId;
6384 ref->masterRef = numberToRef(QMGR, cpresident);
6385 sendSignal(req.senderRef, GSN_ALLOC_NODEID_REF, signal,
6386 AllocNodeIdRef::SignalLength, JBB);
6387 return;
6388 }
6389
6390 AllocNodeIdConf * conf = (AllocNodeIdConf*)signal->getDataPtrSend();
6391 conf->senderRef = reference();
6392 conf->secret_hi = req.secret_hi;
6393 conf->secret_lo = req.secret_lo;
6394 sendSignal(req.senderRef, GSN_ALLOC_NODEID_CONF, signal,
6395 AllocNodeIdConf::SignalLength, JBB);
6396 }
6397
6398 void
execALLOC_NODEID_CONF(Signal * signal)6399 Qmgr::execALLOC_NODEID_CONF(Signal * signal)
6400 {
6401 /* master */
6402
6403 jamEntry();
6404 const AllocNodeIdConf * conf = (AllocNodeIdConf*)signal->getDataPtr();
6405 opAllocNodeIdReq.m_tracker.reportConf(c_counterMgr,
6406 refToNode(conf->senderRef));
6407
6408 if (signal->getLength() >= AllocNodeIdConf::SignalLength)
6409 {
6410 jam();
6411 if (opAllocNodeIdReq.m_req.secret_hi != conf->secret_hi ||
6412 opAllocNodeIdReq.m_req.secret_lo != conf->secret_lo)
6413 {
6414 jam();
6415 if (opAllocNodeIdReq.m_error == 0)
6416 {
6417 jam();
6418 opAllocNodeIdReq.m_error = AllocNodeIdRef::Undefined;
6419 }
6420 }
6421 }
6422
6423 completeAllocNodeIdReq(signal);
6424 }
6425
6426
6427 void
execALLOC_NODEID_REF(Signal * signal)6428 Qmgr::execALLOC_NODEID_REF(Signal * signal)
6429 {
6430 /* master */
6431
6432 jamEntry();
6433 const AllocNodeIdRef * ref = (AllocNodeIdRef*)signal->getDataPtr();
6434 if (ref->errorCode == AllocNodeIdRef::NF_FakeErrorREF)
6435 {
6436 jam();
6437 opAllocNodeIdReq.m_tracker.ignoreRef(c_counterMgr,
6438 refToNode(ref->senderRef));
6439 }
6440 else
6441 {
6442 jam();
6443 opAllocNodeIdReq.m_tracker.reportRef(c_counterMgr,
6444 refToNode(ref->senderRef));
6445 if (opAllocNodeIdReq.m_error == 0)
6446 {
6447 jam();
6448 opAllocNodeIdReq.m_error = ref->errorCode;
6449 }
6450 }
6451 completeAllocNodeIdReq(signal);
6452 }
6453
6454 void
completeAllocNodeIdReq(Signal * signal)6455 Qmgr::completeAllocNodeIdReq(Signal *signal)
6456 {
6457 /* master */
6458
6459 if (!opAllocNodeIdReq.m_tracker.done())
6460 {
6461 jam();
6462 return;
6463 }
6464
6465 if (opAllocNodeIdReq.m_connectCount !=
6466 getNodeInfo(refToNode(opAllocNodeIdReq.m_req.senderRef)).m_connectCount)
6467 {
6468 // management server not same version as the original requester
6469 jam();
6470 return;
6471 }
6472
6473 if (opAllocNodeIdReq.m_tracker.hasRef())
6474 {
6475 jam();
6476
6477 {
6478 /**
6479 * Clear reservation
6480 */
6481 NodeRecPtr nodePtr;
6482 nodePtr.i = opAllocNodeIdReq.m_req.nodeId;
6483 ptrAss(nodePtr, nodeRec);
6484 nodePtr.p->m_secret = 0;
6485 }
6486
6487 AllocNodeIdRef * ref = (AllocNodeIdRef*)signal->getDataPtrSend();
6488 ref->senderRef = reference();
6489 ref->senderData = opAllocNodeIdReq.m_req.senderData;
6490 ref->nodeId = opAllocNodeIdReq.m_req.nodeId;
6491 ref->errorCode = opAllocNodeIdReq.m_error;
6492 ref->masterRef = numberToRef(QMGR, cpresident);
6493 ndbassert(AllocNodeIdRef::SignalLength == 5);
6494 sendSignal(opAllocNodeIdReq.m_req.senderRef, GSN_ALLOC_NODEID_REF, signal,
6495 AllocNodeIdRef::SignalLength, JBB);
6496 return;
6497 }
6498
6499 jam();
6500
6501 AllocNodeIdConf * conf = (AllocNodeIdConf*)signal->getDataPtrSend();
6502 conf->senderRef = reference();
6503 conf->senderData = opAllocNodeIdReq.m_req.senderData;
6504 conf->nodeId = opAllocNodeIdReq.m_req.nodeId;
6505 conf->secret_lo = opAllocNodeIdReq.m_req.secret_lo;
6506 conf->secret_hi = opAllocNodeIdReq.m_req.secret_hi;
6507 sendSignal(opAllocNodeIdReq.m_req.senderRef, GSN_ALLOC_NODEID_CONF, signal,
6508 AllocNodeIdConf::SignalLength, JBB);
6509 }
6510
6511 void
execSTOP_REQ(Signal * signal)6512 Qmgr::execSTOP_REQ(Signal* signal)
6513 {
6514 jamEntry();
6515 c_stopReq = * (StopReq*)signal->getDataPtr();
6516
6517 if (c_stopReq.senderRef)
6518 {
6519 jam();
6520 ndbrequire(NdbNodeBitmask::get(c_stopReq.nodes, getOwnNodeId()));
6521
6522 StopConf *conf = (StopConf*)signal->getDataPtrSend();
6523 conf->senderData = c_stopReq.senderData;
6524 conf->nodeState = getOwnNodeId();
6525 sendSignal(c_stopReq.senderRef,
6526 GSN_STOP_CONF, signal, StopConf::SignalLength, JBA);
6527 }
6528 }
6529
6530 bool
check_multi_node_shutdown(Signal * signal)6531 Qmgr::check_multi_node_shutdown(Signal* signal)
6532 {
6533 if (c_stopReq.senderRef &&
6534 NdbNodeBitmask::get(c_stopReq.nodes, getOwnNodeId()))
6535 {
6536 jam();
6537 if(StopReq::getPerformRestart(c_stopReq.requestInfo))
6538 {
6539 jam();
6540 StartOrd * startOrd = (StartOrd *)&signal->theData[0];
6541 startOrd->restartInfo = c_stopReq.requestInfo;
6542 sendSignal(CMVMI_REF, GSN_START_ORD, signal, 2, JBA);
6543 } else {
6544 sendSignal(CMVMI_REF, GSN_STOP_ORD, signal, 1, JBA);
6545 }
6546 return true;
6547 }
6548 return false;
6549 }
6550
6551 int
check_hb_order_config()6552 Qmgr::check_hb_order_config()
6553 {
6554 m_hb_order_config_used = false;
6555 Uint32 count = 0;
6556 Uint32 count_zero = 0;
6557 NodeRecPtr nodePtr;
6558 for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++)
6559 {
6560 ptrAss(nodePtr, nodeRec);
6561 const NodeInfo& nodeInfo = getNodeInfo(nodePtr.i);
6562 if (nodeInfo.m_type == NodeInfo::DB)
6563 {
6564 count++;
6565 if (nodePtr.p->hbOrder == 0)
6566 count_zero++;
6567 }
6568 }
6569 ndbrequire(count != 0); // must have node info
6570 if (count_zero == count)
6571 {
6572 jam();
6573 return 0; // no hbOrder defined
6574 }
6575 if (count_zero != 0)
6576 {
6577 jam();
6578 return -1; // error: not all zero or all nonzero
6579 }
6580 for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++)
6581 {
6582 ptrAss(nodePtr, nodeRec);
6583 const NodeInfo& nodeInfo = getNodeInfo(nodePtr.i);
6584 if (nodeInfo.m_type == NodeInfo::DB)
6585 {
6586 NodeRecPtr nodePtr2;
6587 for (nodePtr2.i = nodePtr.i + 1; nodePtr2.i < MAX_NDB_NODES; nodePtr2.i++)
6588 {
6589 ptrAss(nodePtr2, nodeRec);
6590 const NodeInfo& nodeInfo2 = getNodeInfo(nodePtr2.i);
6591 if (nodeInfo2.m_type == NodeInfo::DB)
6592 {
6593 if (nodePtr.i != nodePtr2.i &&
6594 nodePtr.p->hbOrder == nodePtr2.p->hbOrder)
6595 {
6596 jam();
6597 return -2; // error: duplicate nonzero value
6598 }
6599 }
6600 }
6601 }
6602 }
6603 m_hb_order_config_used = true;
6604 return 0;
6605 }
6606
6607 static const Uint32 CC_SuspectTicks = 1;
6608 static const Uint32 CC_FailedTicks = 2;
6609
6610 void
startConnectivityCheck(Signal * signal,Uint32 reason,Uint32 causingNode)6611 Qmgr::startConnectivityCheck(Signal* signal, Uint32 reason, Uint32 causingNode)
6612 {
6613 jam();
6614 ndbrequire(m_connectivity_check.getEnabled());
6615
6616 if (m_connectivity_check.m_active)
6617 {
6618 jam();
6619 /* Connectivity check underway already
6620 * do nothing
6621 */
6622 return;
6623 }
6624
6625
6626 m_connectivity_check.m_nodesPinged.clear();
6627
6628 /* Send NODE_PINGREQ signal to all other running nodes, and
6629 * initialise connectivity check bitmasks.
6630 * Note that nodes may already be considered suspect due to
6631 * a previous connectivity check round.
6632 */
6633 Uint32 ownId = getOwnNodeId();
6634 NodePingReq* pingReq = CAST_PTR(NodePingReq, &signal->theData[0]);
6635 pingReq->senderData = ++m_connectivity_check.m_currentRound;
6636 pingReq->senderRef = reference();
6637
6638 for (Uint32 i=1; i < MAX_NDB_NODES; i++)
6639 {
6640 if (i != ownId)
6641 {
6642 NodeRec& node = nodeRec[i];
6643 if (node.phase == ZRUNNING)
6644 {
6645 /* If connection was considered ok, treat as unknown,
6646 * If it was considered slow, continue to treat
6647 * as slow
6648 */
6649 sendSignal(node.blockRef,
6650 GSN_NODE_PING_REQ,
6651 signal,
6652 NodePingReq::SignalLength,
6653 JBA);
6654
6655 m_connectivity_check.m_nodesPinged.set(i);
6656 }
6657 }
6658 }
6659
6660 /* Initialise result bitmasks */
6661 m_connectivity_check.m_nodesWaiting.assign(m_connectivity_check.m_nodesPinged);
6662 m_connectivity_check.m_nodesFailedDuring.clear();
6663
6664 /* Ensure only live nodes are considered suspect */
6665 m_connectivity_check.m_nodesSuspect.bitAND(m_connectivity_check.m_nodesPinged);
6666
6667 const char* reasonText = "Unknown";
6668 bool firstTime = true;
6669
6670 switch(reason)
6671 {
6672 case FailRep::ZHEARTBEAT_FAILURE:
6673 reasonText = "Heartbeat failure";
6674 break;
6675 case FailRep::ZCONNECT_CHECK_FAILURE:
6676 reasonText = "Connectivity check request";
6677 break;
6678 default:
6679 firstTime = false;
6680 ndbrequire(m_connectivity_check.m_nodesSuspect.count() > 0);
6681 break;
6682 }
6683
6684 if (!m_connectivity_check.m_nodesPinged.isclear())
6685 {
6686 jam();
6687 {
6688 char buff[100];
6689 m_connectivity_check.m_nodesPinged.getText(buff);
6690 if (firstTime)
6691 {
6692 g_eventLogger->info("QMGR : Starting connectivity check of %u other nodes (%s) due to %s from node %u.",
6693 m_connectivity_check.m_nodesPinged.count(),
6694 buff,
6695 reasonText,
6696 causingNode);
6697 }
6698 else
6699 {
6700 char buff2[100];
6701 m_connectivity_check.m_nodesSuspect.getText(buff2);
6702 g_eventLogger->info("QMGR : Restarting connectivity check of %u other nodes (%s) due to %u syspect nodes (%s)",
6703 m_connectivity_check.m_nodesPinged.count(),
6704 buff,
6705 m_connectivity_check.m_nodesSuspect.count(),
6706 buff2);
6707 }
6708 }
6709
6710 /* Generate cluster log event */
6711 Uint32 bitmaskSz = NdbNodeBitmask::Size;
6712 signal->theData[0] = NDB_LE_ConnectCheckStarted;
6713 signal->theData[1] = m_connectivity_check.m_nodesPinged.count();
6714 signal->theData[2] = reason;
6715 signal->theData[3] = causingNode;
6716 signal->theData[4] = bitmaskSz;
6717 Uint32* sigPtr = &signal->theData[5];
6718 m_connectivity_check.m_nodesPinged.copyto(bitmaskSz, sigPtr); sigPtr+= bitmaskSz;
6719 m_connectivity_check.m_nodesSuspect.copyto(bitmaskSz, sigPtr);
6720 sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 5 + (2 * bitmaskSz), JBB);
6721
6722 m_connectivity_check.m_active = true;
6723 m_connectivity_check.m_tick = 0;
6724 NDB_TICKS now = NdbTick_CurrentMillisecond();
6725 m_connectivity_check.m_timer.reset(now);
6726 }
6727 else
6728 {
6729 g_eventLogger->info("QMGR : Connectivity check requested due to %s (from %u) not started as no other running nodes.",
6730 reasonText,
6731 causingNode);
6732 }
6733 }
6734
6735 void
execNODE_PINGREQ(Signal * signal)6736 Qmgr::execNODE_PINGREQ(Signal* signal)
6737 {
6738 jamEntry();
6739 Uint32 ownId = getOwnNodeId();
6740 const NodePingReq* pingReq = CAST_CONSTPTR(NodePingReq, &signal->theData[0]);
6741 Uint32 sendersRef = signal->getSendersBlockRef();
6742 Uint32 sendersNodeId = refToNode(sendersRef);
6743 Uint32 senderData = pingReq->senderData;
6744
6745 ndbrequire(sendersNodeId != ownId);
6746
6747 /* We will start our own connectivity check if necessary
6748 * before responding with PING_CONF to the requestor.
6749 * This means that the sending node will receive our PING_REQ
6750 * before our PING_CONF, which should avoid them starting an
6751 * unnecessary extra connectivity check round in some cases.
6752 */
6753 if (likely(m_connectivity_check.getEnabled()))
6754 {
6755 jam();
6756 /* We have connectivity checking configured */
6757 if (! m_connectivity_check.m_active)
6758 {
6759 jam();
6760
6761 {
6762 /* Don't start a new connectivity check if the requesting
6763 * node has failed from our point of view
6764 */
6765 NodeRecPtr nodePtr;
6766 nodePtr.i = sendersNodeId;
6767 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
6768 if (unlikely(nodePtr.p->phase != ZRUNNING))
6769 {
6770 jam();
6771
6772 g_eventLogger->warning("QMGR : Discarding NODE_PINGREQ from non-running node %u (%u)",
6773 sendersNodeId, nodePtr.p->phase);
6774 return;
6775 }
6776 }
6777
6778 /* Start our own Connectivity Check now indicating reason and causing node */
6779 startConnectivityCheck(signal, FailRep::ZCONNECT_CHECK_FAILURE, sendersNodeId);
6780 }
6781 }
6782 else
6783 {
6784 jam();
6785 g_eventLogger->warning("QMGR : NODE_PINGREQ received from node %u, but connectivity "
6786 "checking not configured on this node. Ensure all "
6787 "nodes have the same configuration for parameter "
6788 "ConnectCheckIntervalMillis.",
6789 sendersNodeId);
6790 }
6791
6792 /* Now respond with NODE_PINGCONF */
6793 NodePingConf* pingConf = CAST_PTR(NodePingConf, &signal->theData[0]);
6794
6795 pingConf->senderData = senderData;
6796 pingConf->senderRef = reference();
6797
6798 sendSignal(sendersRef,
6799 GSN_NODE_PING_CONF,
6800 signal,
6801 NodePingConf::SignalLength,
6802 JBA);
6803 }
6804
6805 void
reportNodeConnect(Uint32 nodeId)6806 Qmgr::ConnectCheckRec::reportNodeConnect(Uint32 nodeId)
6807 {
6808 /* Clear any suspicion */
6809 m_nodesSuspect.clear(nodeId);
6810 }
6811
6812 bool
reportNodeFailure(Uint32 nodeId)6813 Qmgr::ConnectCheckRec::reportNodeFailure(Uint32 nodeId)
6814 {
6815 if (unlikely(m_active))
6816 {
6817 m_nodesFailedDuring.set(nodeId);
6818
6819 if (m_nodesWaiting.get(nodeId))
6820 {
6821 /* We were waiting for a NODE_PING_CONF from this node,
6822 * remove it from the set
6823 */
6824 m_nodesWaiting.clear(nodeId);
6825
6826 return m_nodesWaiting.isclear();
6827 }
6828 }
6829 return false;
6830 }
6831
6832 void
execNODE_PINGCONF(Signal * signal)6833 Qmgr::execNODE_PINGCONF(Signal* signal)
6834 {
6835 jamEntry();
6836
6837 ndbrequire(m_connectivity_check.getEnabled());
6838
6839 const NodePingConf* pingConf = CAST_CONSTPTR(NodePingConf, &signal->theData[0]);
6840 Uint32 sendersBlockRef = signal->getSendersBlockRef();
6841 Uint32 sendersNodeId = refToNode(sendersBlockRef);
6842 Uint32 roundNumber = pingConf->senderData;
6843
6844 ndbrequire(sendersNodeId != getOwnNodeId());
6845 ndbrequire((m_connectivity_check.m_active) || /* Normal */
6846 (m_connectivity_check.m_nodesWaiting.get(sendersNodeId) || /* We killed last round */
6847 m_connectivity_check.m_nodesFailedDuring.get(sendersNodeId))); /* Someone killed */
6848
6849 if (unlikely((! m_connectivity_check.m_active) ||
6850 (roundNumber != m_connectivity_check.m_currentRound)))
6851 {
6852 g_eventLogger->warning("QMGR : Received NODEPING_CONF from node %u for round %u, "
6853 "but we are %sactive on round %u. Discarding.",
6854 sendersNodeId,
6855 roundNumber,
6856 ((m_connectivity_check.m_active)?"":"in"),
6857 m_connectivity_check.m_currentRound);
6858 return;
6859 }
6860
6861 /* Node must have been pinged, we must be waiting for the response,
6862 * or the node must have already failed
6863 */
6864 ndbrequire(m_connectivity_check.m_nodesPinged.get(sendersNodeId));
6865 ndbrequire(m_connectivity_check.m_nodesWaiting.get(sendersNodeId) ||
6866 m_connectivity_check.m_nodesFailedDuring.get(sendersNodeId));
6867
6868 m_connectivity_check.m_nodesWaiting.clear(sendersNodeId);
6869
6870 if (likely(m_connectivity_check.m_tick < CC_SuspectTicks))
6871 {
6872 jam();
6873 /* Node responded on time, clear any suspicion about it */
6874 m_connectivity_check.m_nodesSuspect.clear(sendersNodeId);
6875 }
6876
6877 if (m_connectivity_check.m_nodesWaiting.isclear())
6878 {
6879 jam();
6880 /* Connectivity check round is now finished */
6881 connectivityCheckCompleted(signal);
6882 }
6883 }
6884
6885 void
connectivityCheckCompleted(Signal * signal)6886 Qmgr::connectivityCheckCompleted(Signal* signal)
6887 {
6888 jam();
6889
6890 m_connectivity_check.m_active = false;
6891
6892 /* Log the following :
6893 * Nodes checked
6894 * Nodes responded ok
6895 * Nodes responded late (now suspect)
6896 * Nodes failed to respond.
6897 * Nodes failed during
6898 */
6899 char pinged[100];
6900 char late[100];
6901 char silent[100];
6902 char failed[100];
6903
6904 /* Any 'waiting' nodes have been killed
6905 * Surviving suspects do not include them.
6906 */
6907 NdbNodeBitmask survivingSuspects(m_connectivity_check.m_nodesSuspect);
6908 survivingSuspects.bitANDC(m_connectivity_check.m_nodesWaiting);
6909
6910 /* Nodes that failed during the check are also excluded */
6911 survivingSuspects.bitANDC(m_connectivity_check.m_nodesFailedDuring);
6912
6913 m_connectivity_check.m_nodesPinged.getText(pinged);
6914 survivingSuspects.getText(late);
6915 m_connectivity_check.m_nodesWaiting.getText(silent);
6916 m_connectivity_check.m_nodesFailedDuring.getText(failed);
6917
6918 g_eventLogger->info("QMGR : Connectivity check completed, "
6919 "%u other nodes checked (%s), "
6920 "%u responded on time, "
6921 "%u responded late (%s), "
6922 "%u no response will be failed (%s), "
6923 "%u failed during check (%s)\n",
6924 m_connectivity_check.m_nodesPinged.count(),
6925 pinged,
6926 m_connectivity_check.m_nodesPinged.count() -
6927 m_connectivity_check.m_nodesSuspect.count(),
6928 survivingSuspects.count(),
6929 late,
6930 m_connectivity_check.m_nodesWaiting.count(),
6931 silent,
6932 m_connectivity_check.m_nodesFailedDuring.count(),
6933 failed);
6934
6935 /* Log in Cluster log */
6936 signal->theData[0] = NDB_LE_ConnectCheckCompleted;
6937 signal->theData[1] = m_connectivity_check.m_nodesPinged.count();
6938 signal->theData[2] = survivingSuspects.count();
6939 signal->theData[3] = m_connectivity_check.m_nodesWaiting.count() +
6940 m_connectivity_check.m_nodesFailedDuring.count();
6941
6942 sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 4, JBB);
6943
6944 if (survivingSuspects.count() > 0)
6945 {
6946 jam();
6947 /* Still suspect nodes, start another round */
6948 g_eventLogger->info("QMGR : Starting new connectivity check due to suspect nodes.");
6949 /* Restart connectivity check, no external reason or cause */
6950 startConnectivityCheck(signal, 0, 0);
6951 }
6952 else
6953 {
6954 jam();
6955 /* No suspect nodes, stop the protocol now */
6956
6957 g_eventLogger->info("QMGR : All other nodes (%u) connectivity ok.",
6958 m_connectivity_check.m_nodesPinged.count() -
6959 (m_connectivity_check.m_nodesWaiting.count() +
6960 m_connectivity_check.m_nodesFailedDuring.count()));
6961
6962 /* Send a heartbeat to our right neighbour at this point as a gesture
6963 * of goodwill
6964 */
6965 sendHeartbeat(signal);
6966 hb_send_timer.reset(NdbTick_CurrentMillisecond());
6967 };
6968 }
6969
6970 void
checkConnectivityTimeSignal(Signal * signal)6971 Qmgr::checkConnectivityTimeSignal(Signal* signal)
6972 {
6973 /* Executed periodically when a connectivity check is
6974 * underway.
6975 * After CC_SuspectTicks have elapsed, any nodes
6976 * which have not responded are considered
6977 * 'Suspect'.
6978 * After CC_FailedTicks have elapsed, any nodes
6979 * which have not responded are considered
6980 * to have failed, and failure handling
6981 * begins.
6982 */
6983 jam();
6984
6985 /* Preconditions, otherwise we shouldn't have been called */
6986 ndbrequire(m_connectivity_check.getEnabled());
6987 ndbrequire(m_connectivity_check.m_active);
6988 ndbrequire(!m_connectivity_check.m_nodesWaiting.isclear());
6989
6990 m_connectivity_check.m_tick++;
6991
6992 switch (m_connectivity_check.m_tick)
6993 {
6994 case CC_SuspectTicks:
6995 {
6996 jam();
6997 /* Still waiting to hear from some nodes, they are now
6998 * suspect
6999 */
7000 m_connectivity_check.m_nodesSuspect.bitOR(m_connectivity_check.m_nodesWaiting);
7001 return;
7002 }
7003 case CC_FailedTicks:
7004 {
7005 jam();
7006 /* Still waiting to hear from some nodes, they will now
7007 * be failed
7008 */
7009 m_connectivity_check.m_active = false;
7010 Uint32 nodeId = 0;
7011
7012 while ((nodeId = m_connectivity_check.m_nodesWaiting.find(nodeId))
7013 != BitmaskImpl::NotFound)
7014 {
7015 jam();
7016 /* Log failure reason */
7017 /* Todo : Connectivity Check specific failure log? */
7018 signal->theData[0] = NDB_LE_DeadDueToHeartbeat;
7019 signal->theData[1] = nodeId;
7020
7021 sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB);
7022
7023 /* Fail the node */
7024 /* TODO : Consider real time break here */
7025 failReportLab(signal, nodeId, FailRep::ZCONNECT_CHECK_FAILURE, getOwnNodeId());
7026 nodeId++;
7027 }
7028
7029 /* Now handle the end of the Connectivity Check */
7030 connectivityCheckCompleted(signal);
7031 }
7032 }
7033 }
7034
7035 bool
isNodeConnectivitySuspect(Uint32 nodeId) const7036 Qmgr::isNodeConnectivitySuspect(Uint32 nodeId) const
7037 {
7038 return m_connectivity_check.m_nodesSuspect.get(nodeId);
7039 }
7040
7041 void
handleFailFromSuspect(Signal * signal,Uint32 reason,Uint16 aFailedNode,Uint16 sourceNode)7042 Qmgr::handleFailFromSuspect(Signal* signal,
7043 Uint32 reason,
7044 Uint16 aFailedNode,
7045 Uint16 sourceNode)
7046 {
7047 jam();
7048
7049 const char* reasonText = "Unknown";
7050
7051 /* We have received a failure report about some node X from
7052 * some other node that we consider to have suspect connectivity
7053 * which may have caused the report.
7054 *
7055 * We will 'invert' the sense of this, and handle it as
7056 * a failure report of the sender, with the same cause.
7057 */
7058 switch(reason)
7059 {
7060 case FailRep::ZCONNECT_CHECK_FAILURE:
7061 jam();
7062 /* Suspect says that connectivity check failed for another node.
7063 * As suspect has bad connectivity from our point of view, we
7064 * blame him.
7065 */
7066 reasonText = "ZCONNECT_CHECK_FAILURE";
7067 break;
7068 case FailRep::ZLINK_FAILURE:
7069 jam();
7070 /* Suspect says that link failed for another node.
7071 * As suspect has bad connectivity from our point of view, we
7072 * blame her.
7073 */
7074 reasonText = "ZLINK_FAILURE";
7075 break;
7076 default:
7077 ndbrequire(false);
7078 }
7079
7080 g_eventLogger->warning("QMGR : Received Connectivity failure notification about "
7081 "%u from suspect node %u with reason %s. "
7082 "Mapping to failure of %u sourced by me.",
7083 aFailedNode, sourceNode, reasonText, sourceNode);
7084
7085 signal->theData[0] = NDB_LE_NodeFailRejected;
7086 signal->theData[1] = reason;
7087 signal->theData[2] = aFailedNode;
7088 signal->theData[3] = sourceNode;
7089
7090 sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 4, JBB);
7091
7092 failReportLab(signal, sourceNode, (FailRep::FailCause) reason, getOwnNodeId());
7093 }
7094