1 /*
2 Copyright (c) 2003, 2021, Oracle and/or its affiliates.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License, version 2.0,
6 as published by the Free Software Foundation.
7
8 This program is also distributed with certain software (including
9 but not limited to OpenSSL) that is licensed under separate terms,
10 as designated in a particular file or component or in included license
11 documentation. The authors of MySQL hereby grant you an additional
12 permission to link the program and your derivative works with the
13 separately licensed software that they have included with MySQL.
14
15 This program is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License, version 2.0, for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with this program; if not, write to the Free Software
22 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
23 */
24
25
26 #define QMGR_C
27 #include "Qmgr.hpp"
28 #include <pc.hpp>
29 #include <NdbTick.h>
30 #include <signaldata/NodeRecoveryStatusRep.hpp>
31 #include <signaldata/EventReport.hpp>
32 #include <signaldata/StartOrd.hpp>
33 #include <signaldata/CloseComReqConf.hpp>
34 #include <signaldata/PrepFailReqRef.hpp>
35 #include <signaldata/NodeFailRep.hpp>
36 #include <signaldata/ReadNodesConf.hpp>
37 #include <signaldata/NFCompleteRep.hpp>
38 #include <signaldata/CheckNodeGroups.hpp>
39 #include <signaldata/ArbitSignalData.hpp>
40 #include <signaldata/ApiRegSignalData.hpp>
41 #include <signaldata/ApiVersion.hpp>
42 #include <signaldata/BlockCommitOrd.hpp>
43 #include <signaldata/FailRep.hpp>
44 #include <signaldata/DisconnectRep.hpp>
45 #include <signaldata/ApiBroadcast.hpp>
46 #include <signaldata/Upgrade.hpp>
47 #include <signaldata/EnableCom.hpp>
48 #include <signaldata/RouteOrd.hpp>
49 #include <signaldata/NodePing.hpp>
50 #include <signaldata/DihRestart.hpp>
51 #include <signaldata/DumpStateOrd.hpp>
52 #include <signaldata/IsolateOrd.hpp>
53 #include <ndb_version.h>
54
55 #include <TransporterRegistry.hpp> // Get connect address
56
57 #include <EventLogger.hpp>
58 extern EventLogger * g_eventLogger;
59
60 //#define DEBUG_QMGR_START
61 #ifdef DEBUG_QMGR_START
62 #include <DebuggerNames.hpp>
63 #define DEBUG(x) ndbout << "QMGR " << __LINE__ << ": " << x << endl
64 #define DEBUG_START(gsn, node, msg) DEBUG(getSignalName(gsn) << " to: " << node << " - " << msg)
65 #define DEBUG_START2(gsn, rg, msg) { char nodes[255]; DEBUG(getSignalName(gsn) << " to: " << rg.m_nodes.getText(nodes) << " - " << msg); }
66 #define DEBUG_START3(signal, msg) DEBUG(getSignalName(signal->header.theVerId_signalNumber) << " from " << refToNode(signal->getSendersBlockRef()) << " - " << msg);
67 #else
68 #define DEBUG(x)
69 #define DEBUG_START(gsn, node, msg)
70 #define DEBUG_START2(gsn, rg, msg)
71 #define DEBUG_START3(signal, msg)
72 #endif
73
74 #define JAM_FILE_ID 360
75
76
77 /**
78 * c_start.m_gsn = GSN_CM_REGREQ
79 * Possible for all nodes
80 * c_start.m_nodes contains all nodes in config
81 *
82 * c_start.m_gsn = GSN_CM_NODEINFOREQ;
83 * Set when receiving CM_REGCONF
84 * State possible for starting node only (not in cluster)
85 *
86 * c_start.m_nodes contains all node in alive cluster that
87 * that has not replied to GSN_CM_NODEINFOREQ
88 * passed by president in GSN_CM_REGCONF
89 *
90 * c_start.m_gsn = GSN_CM_ADD
91 * Possible for president only
92 * Set when receiving and accepting CM_REGREQ (to include node)
93 *
94 * c_start.m_nodes contains all nodes in alive cluster + starting node
95 * that has not replied to GSN_CM_ADD
96 * by sending GSN_CM_ACKADD
97 *
98 * c_start.m_gsn = GSN_CM_NODEINFOCONF
99 * Possible for non presidents only
100 * c_start.m_nodes contains a node that has been accepted by president
101 * but has not connected to us yet
102 */
103
104 // Signal entries and statement blocks
105 /* 4 P R O G R A M */
106 /*******************************/
107 /* CMHEART_BEAT */
108 /*******************************/
execCM_HEARTBEAT(Signal * signal)109 void Qmgr::execCM_HEARTBEAT(Signal* signal)
110 {
111 NodeRecPtr hbNodePtr;
112 jamEntry();
113 hbNodePtr.i = signal->theData[0];
114 ptrCheckGuard(hbNodePtr, MAX_NDB_NODES, nodeRec);
115 set_hb_count(hbNodePtr.i) = 0;
116 return;
117 }//Qmgr::execCM_HEARTBEAT()
118
119 /*******************************/
120 /* CM_NODEINFOREF */
121 /*******************************/
execCM_NODEINFOREF(Signal * signal)122 void Qmgr::execCM_NODEINFOREF(Signal* signal)
123 {
124 jamEntry();
125 systemErrorLab(signal, __LINE__);
126 return;
127 }//Qmgr::execCM_NODEINFOREF()
128
129 /*******************************/
130 /* CONTINUEB */
131 /*******************************/
execCONTINUEB(Signal * signal)132 void Qmgr::execCONTINUEB(Signal* signal)
133 {
134 jamEntry();
135 const Uint32 tcontinuebType = signal->theData[0];
136 const Uint32 tdata0 = signal->theData[1];
137 const Uint32 tdata1 = signal->theData[2];
138 switch (tcontinuebType) {
139 case ZREGREQ_TIMELIMIT:
140 jam();
141 if (c_start.m_startKey != tdata0 || c_start.m_startNode != tdata1) {
142 jam();
143 return;
144 }//if
145 regreqTimeLimitLab(signal);
146 break;
147 case ZREGREQ_MASTER_TIMELIMIT:
148 jam();
149 if (c_start.m_startKey != tdata0 || c_start.m_startNode != tdata1) {
150 jam();
151 return;
152 }//if
153 //regreqMasterTimeLimitLab(signal);
154 failReportLab(signal, c_start.m_startNode, FailRep::ZSTART_IN_REGREQ, getOwnNodeId());
155 return;
156 break;
157 case ZTIMER_HANDLING:
158 jam();
159 timerHandlingLab(signal);
160 return;
161 break;
162 case ZARBIT_HANDLING:
163 jam();
164 runArbitThread(signal);
165 return;
166 break;
167 case ZSTART_FAILURE_LIMIT:{
168 if (cpresident != ZNIL)
169 {
170 jam();
171 return;
172 }
173 const NDB_TICKS now = NdbTick_getCurrentTicks();
174 const Uint64 elapsed = NdbTick_Elapsed(c_start_election_time,now).milliSec();
175 if (elapsed > c_restartFailureTimeout)
176 {
177 jam();
178 BaseString tmp;
179 tmp.append("Shutting down node as total restart time exceeds "
180 " StartFailureTimeout as set in config file ");
181 if(c_restartFailureTimeout == (Uint32) ~0)
182 tmp.append(" 0 (inifinite)");
183 else
184 tmp.appfmt(" %d", c_restartFailureTimeout);
185
186 progError(__LINE__, NDBD_EXIT_SYSTEM_ERROR, tmp.c_str());
187 }
188 signal->theData[0] = ZSTART_FAILURE_LIMIT;
189 sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 3000, 1);
190 return;
191 }
192 default:
193 jam();
194 // ZCOULD_NOT_OCCUR_ERROR;
195 systemErrorLab(signal, __LINE__);
196 return;
197 break;
198 }//switch
199 return;
200 }//Qmgr::execCONTINUEB()
201
202
execDEBUG_SIG(Signal * signal)203 void Qmgr::execDEBUG_SIG(Signal* signal)
204 {
205 NodeRecPtr debugNodePtr;
206 jamEntry();
207 debugNodePtr.i = signal->theData[0];
208 ptrCheckGuard(debugNodePtr, MAX_NODES, nodeRec);
209 return;
210 }//Qmgr::execDEBUG_SIG()
211
212 /*******************************/
213 /* FAIL_REP */
214 /*******************************/
execFAIL_REP(Signal * signal)215 void Qmgr::execFAIL_REP(Signal* signal)
216 {
217 const FailRep * const failRep = (FailRep *)&signal->theData[0];
218 const NodeId failNodeId = failRep->failNodeId;
219 const FailRep::FailCause failCause = (FailRep::FailCause)failRep->failCause;
220 Uint32 failSource = failRep->getFailSourceNodeId(signal->length());
221 if (!failSource)
222 {
223 /* Failure source not included, use sender of signal as 'source' */
224 failSource = refToNode(signal->getSendersBlockRef());
225 }
226
227 jamEntry();
228 failReportLab(signal, failNodeId, failCause, failSource);
229 return;
230 }//Qmgr::execFAIL_REP()
231
232 /*******************************/
233 /* PRES_TOREQ */
234 /*******************************/
execPRES_TOREQ(Signal * signal)235 void Qmgr::execPRES_TOREQ(Signal* signal)
236 {
237 jamEntry();
238 BlockReference Tblockref = signal->theData[0];
239 signal->theData[0] = getOwnNodeId();
240 signal->theData[1] = ccommitFailureNr;
241 sendSignal(Tblockref, GSN_PRES_TOCONF, signal, 2, JBA);
242 return;
243 }//Qmgr::execPRES_TOREQ()
244
245 void
execREAD_CONFIG_REQ(Signal * signal)246 Qmgr::execREAD_CONFIG_REQ(Signal* signal)
247 {
248 jamEntry();
249
250 const ReadConfigReq * req = (ReadConfigReq*)signal->getDataPtr();
251
252 Uint32 ref = req->senderRef;
253 Uint32 senderData = req->senderData;
254
255 const ndb_mgm_configuration_iterator * p =
256 m_ctx.m_config.getOwnConfigIterator();
257 ndbrequire(p != 0);
258
259 ReadConfigConf * conf = (ReadConfigConf*)signal->getDataPtrSend();
260 conf->senderRef = reference();
261 conf->senderData = senderData;
262 sendSignal(ref, GSN_READ_CONFIG_CONF, signal,
263 ReadConfigConf::SignalLength, JBB);
264 }
265
266 void
execSTART_ORD(Signal * signal)267 Qmgr::execSTART_ORD(Signal* signal)
268 {
269 /**
270 * Start timer handling
271 */
272 const NDB_TICKS now = NdbTick_getCurrentTicks();
273 signal->theData[0] = ZTIMER_HANDLING;
274 signal->theData[1] = Uint32(now.getUint64() >> 32);
275 signal->theData[2] = Uint32(now.getUint64());
276 sendSignal(QMGR_REF, GSN_CONTINUEB, signal, 3, JBB);
277 }
278
279 /*
280 4.2 ADD NODE MODULE*/
281 /*##########################################################################*/
282 /*
283 4.2.1 STTOR */
284 /**--------------------------------------------------------------------------
285 * Start phase signal, must be handled by all blocks.
286 * QMGR is only interested in the first phase.
287 * During phase one we clear all registered applications.
288 *---------------------------------------------------------------------------*/
289 /*******************************/
290 /* STTOR */
291 /*******************************/
execSTTOR(Signal * signal)292 void Qmgr::execSTTOR(Signal* signal)
293 {
294 jamEntry();
295
296 switch(signal->theData[1]){
297 case 1:
298 initData(signal);
299 g_eventLogger->info("Starting QMGR phase 1");
300 startphase1(signal);
301 recompute_version_info(NodeInfo::DB);
302 recompute_version_info(NodeInfo::API);
303 recompute_version_info(NodeInfo::MGM);
304 return;
305 case 7:
306 if (cpresident == getOwnNodeId())
307 {
308 switch(arbitRec.method){
309 case ArbitRec::DISABLED:
310 break;
311
312 case ArbitRec::METHOD_EXTERNAL:
313 case ArbitRec::METHOD_DEFAULT:
314 /**
315 * Start arbitration thread. This could be done as soon as
316 * we have all nodes (or a winning majority).
317 */
318 jam();
319 handleArbitStart(signal);
320 break;
321 }
322 }
323 break;
324 case 9:{
325 /**
326 * Enable communication to all API nodes by setting state
327 * to ZFAIL_CLOSING (which will make it auto-open in checkStartInterface)
328 */
329 c_allow_api_connect = 1;
330 NodeRecPtr nodePtr;
331 for (nodePtr.i = 1; nodePtr.i < MAX_NODES; nodePtr.i++)
332 {
333 jam();
334 Uint32 type = getNodeInfo(nodePtr.i).m_type;
335 if (type != NodeInfo::API)
336 continue;
337
338 ptrAss(nodePtr, nodeRec);
339 if (nodePtr.p->phase == ZAPI_INACTIVE)
340 {
341 jam();
342 set_hb_count(nodePtr.i) = 3;
343 nodePtr.p->phase = ZFAIL_CLOSING;
344 nodePtr.p->failState = NORMAL;
345 }
346 }
347 }
348 }
349
350 sendSttorryLab(signal, false);
351 return;
352 }//Qmgr::execSTTOR()
353
sendSttorryLab(Signal * signal,bool first_phase)354 void Qmgr::sendSttorryLab(Signal* signal, bool first_phase)
355 {
356 if (first_phase)
357 {
358 g_eventLogger->info("Include node protocol completed, phase 1 in QMGR"
359 " completed");
360 }
361 /****************************<*/
362 /*< STTORRY <*/
363 /****************************<*/
364 signal->theData[3] = 7;
365 signal->theData[4] = 9;
366 signal->theData[5] = 255;
367 sendSignal(NDBCNTR_REF, GSN_STTORRY, signal, 6, JBB);
368 return;
369 }//Qmgr::sendSttorryLab()
370
startphase1(Signal * signal)371 void Qmgr::startphase1(Signal* signal)
372 {
373 jamEntry();
374
375 NodeRecPtr nodePtr;
376 nodePtr.i = getOwnNodeId();
377 ptrAss(nodePtr, nodeRec);
378 nodePtr.p->phase = ZSTARTING;
379
380 DihRestartReq * req = CAST_PTR(DihRestartReq, signal->getDataPtrSend());
381 req->senderRef = reference();
382 sendSignal(DBDIH_REF, GSN_DIH_RESTARTREQ, signal,
383 DihRestartReq::SignalLength, JBB);
384 return;
385 }
386
387 void
execDIH_RESTARTREF(Signal * signal)388 Qmgr::execDIH_RESTARTREF(Signal*signal)
389 {
390 jamEntry();
391
392 g_eventLogger->info("DIH reported initial start, now starting the"
393 " Node Inclusion Protocol");
394 const DihRestartRef * ref = CAST_CONSTPTR(DihRestartRef,
395 signal->getDataPtr());
396 c_start.m_latest_gci = 0;
397 c_start.m_no_nodegroup_nodes.assign(NdbNodeBitmask::Size,
398 ref->no_nodegroup_mask);
399 execCM_INFOCONF(signal);
400 }
401
402 void
execDIH_RESTARTCONF(Signal * signal)403 Qmgr::execDIH_RESTARTCONF(Signal*signal)
404 {
405 jamEntry();
406
407 const DihRestartConf * conf = CAST_CONSTPTR(DihRestartConf,
408 signal->getDataPtr());
409
410 g_eventLogger->info("DIH reported normal start, now starting the"
411 " Node Inclusion Protocol");
412 c_start.m_latest_gci = conf->latest_gci;
413 c_start.m_no_nodegroup_nodes.assign(NdbNodeBitmask::Size,
414 conf->no_nodegroup_mask);
415 execCM_INFOCONF(signal);
416 }
417
setHbDelay(UintR aHbDelay)418 void Qmgr::setHbDelay(UintR aHbDelay)
419 {
420 const NDB_TICKS now = NdbTick_getCurrentTicks();
421 hb_send_timer.setDelay(aHbDelay < 10 ? 10 : aHbDelay);
422 hb_send_timer.reset(now);
423 hb_check_timer.setDelay(aHbDelay < 10 ? 10 : aHbDelay);
424 hb_check_timer.reset(now);
425 }
426
setHbApiDelay(UintR aHbApiDelay)427 void Qmgr::setHbApiDelay(UintR aHbApiDelay)
428 {
429 const NDB_TICKS now = NdbTick_getCurrentTicks();
430 chbApiDelay = (aHbApiDelay < 100 ? 100 : aHbApiDelay);
431 hb_api_timer.setDelay(chbApiDelay);
432 hb_api_timer.reset(now);
433 }
434
setArbitTimeout(UintR aArbitTimeout)435 void Qmgr::setArbitTimeout(UintR aArbitTimeout)
436 {
437 arbitRec.timeout = (aArbitTimeout < 10 ? 10 : aArbitTimeout);
438 }
439
setCCDelay(UintR aCCDelay)440 void Qmgr::setCCDelay(UintR aCCDelay)
441 {
442 const NDB_TICKS now = NdbTick_getCurrentTicks();
443 if (aCCDelay == 0)
444 {
445 /* Connectivity check disabled */
446 m_connectivity_check.m_enabled = false;
447 m_connectivity_check.m_timer.setDelay(0);
448 }
449 else
450 {
451 m_connectivity_check.m_enabled = true;
452 m_connectivity_check.m_timer.setDelay(aCCDelay < 10 ? 10 : aCCDelay);
453 m_connectivity_check.m_timer.reset(now);
454 }
455 }
456
execCONNECT_REP(Signal * signal)457 void Qmgr::execCONNECT_REP(Signal* signal)
458 {
459 jamEntry();
460 const Uint32 connectedNodeId = signal->theData[0];
461
462 if (ERROR_INSERTED(931))
463 {
464 jam();
465 ndbout_c("Discarding CONNECT_REP(%d)", connectedNodeId);
466 infoEvent("Discarding CONNECT_REP(%d)", connectedNodeId);
467 return;
468 }
469
470 if (ERROR_INSERTED(941) &&
471 getNodeInfo(connectedNodeId).getType() == NodeInfo::API)
472 {
473 jam();
474 CLEAR_ERROR_INSERT_VALUE;
475 ndbout_c("Discarding one API CONNECT_REP(%d)", connectedNodeId);
476 infoEvent("Discarding one API CONNECT_REP(%d)", connectedNodeId);
477 return;
478 }
479
480 if (c_connectedNodes.get(connectedNodeId) == false)
481 {
482 jam();
483 setNodeInfo(connectedNodeId).m_version = 0;
484 setNodeInfo(connectedNodeId).m_mysql_version = 0;
485 }
486
487 c_connectedNodes.set(connectedNodeId);
488
489 {
490 NodeRecPtr connectedNodePtr;
491 connectedNodePtr.i = connectedNodeId;
492 ptrCheckGuard(connectedNodePtr, MAX_NODES, nodeRec);
493 connectedNodePtr.p->m_secret = 0;
494 }
495
496 NodeRecPtr myNodePtr;
497 myNodePtr.i = getOwnNodeId();
498 ptrCheckGuard(myNodePtr, MAX_NODES, nodeRec);
499 NodeInfo connectedNodeInfo = getNodeInfo(connectedNodeId);
500 switch(myNodePtr.p->phase){
501 case ZRUNNING:
502 if (connectedNodeInfo.getType() == NodeInfo::DB)
503 {
504 ndbrequire(!c_clusterNodes.get(connectedNodeId));
505 }
506 case ZSTARTING:
507 jam();
508 break;
509 case ZPREPARE_FAIL:
510 case ZFAIL_CLOSING:
511 jam();
512 return;
513 case ZAPI_ACTIVATION_ONGOING:
514 ndbrequire(false);
515 case ZAPI_ACTIVE:
516 ndbrequire(false);
517 case ZAPI_INACTIVE:
518 ndbrequire(false);
519 case ZINIT:
520 ndbrequire(getNodeInfo(connectedNodeId).m_type == NodeInfo::MGM);
521 break;
522 default:
523 ndbrequire(false);
524 }
525
526 if (connectedNodeInfo.getType() != NodeInfo::DB)
527 {
528 jam();
529 return;
530 }
531
532 switch(c_start.m_gsn){
533 case GSN_CM_REGREQ:
534 jam();
535 sendCmRegReq(signal, connectedNodeId);
536
537 /**
538 * We're waiting for CM_REGCONF c_start.m_nodes contains all configured
539 * nodes
540 */
541 ndbrequire(myNodePtr.p->phase == ZSTARTING);
542 ndbrequire(c_start.m_nodes.isWaitingFor(connectedNodeId));
543 return;
544 case GSN_CM_NODEINFOREQ:
545 jam();
546
547 if (c_start.m_nodes.isWaitingFor(connectedNodeId))
548 {
549 jam();
550 ndbrequire(getOwnNodeId() != cpresident);
551 ndbrequire(myNodePtr.p->phase == ZSTARTING);
552 sendCmNodeInfoReq(signal, connectedNodeId, myNodePtr.p);
553 return;
554 }
555 return;
556 case GSN_CM_NODEINFOCONF:{
557 jam();
558
559 ndbrequire(getOwnNodeId() != cpresident);
560 ndbrequire(myNodePtr.p->phase == ZRUNNING);
561 if (c_start.m_nodes.isWaitingFor(connectedNodeId))
562 {
563 jam();
564 c_start.m_nodes.clearWaitingFor(connectedNodeId);
565 c_start.m_gsn = RNIL;
566
567 NodeRecPtr addNodePtr;
568 addNodePtr.i = connectedNodeId;
569 ptrCheckGuard(addNodePtr, MAX_NDB_NODES, nodeRec);
570 cmAddPrepare(signal, addNodePtr, myNodePtr.p);
571 return;
572 }
573 }
574 default:
575 (void)1;
576 }
577
578 ndbrequire(!c_start.m_nodes.isWaitingFor(connectedNodeId));
579 ndbrequire(!c_readnodes_nodes.get(connectedNodeId));
580 c_readnodes_nodes.set(connectedNodeId);
581 signal->theData[0] = reference();
582 sendSignal(calcQmgrBlockRef(connectedNodeId), GSN_READ_NODESREQ, signal, 1, JBA);
583 return;
584 }//Qmgr::execCONNECT_REP()
585
586 void
execREAD_NODESCONF(Signal * signal)587 Qmgr::execREAD_NODESCONF(Signal* signal)
588 {
589 jamEntry();
590 check_readnodes_reply(signal,
591 refToNode(signal->getSendersBlockRef()),
592 GSN_READ_NODESCONF);
593 }
594
595 void
execREAD_NODESREF(Signal * signal)596 Qmgr::execREAD_NODESREF(Signal* signal)
597 {
598 jamEntry();
599 check_readnodes_reply(signal,
600 refToNode(signal->getSendersBlockRef()),
601 GSN_READ_NODESREF);
602 }
603
604 /**
605 * Heartbeat Inclusion Protocol Handling
606 * -------------------------------------
607 * The protocol to include our node in the heartbeat protocol starts when
608 * we call execCM_INFOCONF. We start by opening communication to all nodes
609 * in the cluster. When we start this protocol we don't know anything about
610 * which nodes are up and running and we don't which node is currently the
611 * president of the heartbeat protocol.
612 *
613 * For us to be successful with being included in the heartbeat protocol we
614 * need to be connected to all nodes currently in the heartbeat protocol. It
615 * is important to remember that QMGR sees a node as alive if it is included
616 * in the heartbeat protocol. Higher level notions of aliveness is handled
617 * primarily by the DBDIH block, but also to some extent by NDBCNTR.
618 *
619 * The protocol starts by the new node sending CM_REGREQ to all nodes it is
620 * connected to. Only the president will respond to this message. We could
621 * have a situation where there currently isn't a president choosen. In this
622 * case an election is held whereby a new president is assigned. In the rest
623 * of this comment we assume that a president already exists.
624 *
625 * So if we were connected to the president we will get a response to the
626 * CM_REGREQ from the president with CM_REGCONF. The CM_REGCONF contains
627 * the set of nodes currently included in the heartbeat protocol.
628 *
629 * The president will send in parallel to sending CM_REGCONF a CM_ADD(prepare)
630 * message to all nodes included in the protocol.
631 *
632 * When receiving CM_REGCONF the new node will send CM_NODEINFOREQ with
633 * information about version of the binary, number of LDM workers and
634 * MySQL version of binary.
635 *
636 * The nodes already included in the heartbeat protocol will wait until it
637 * receives both the CM_ADD(prepare) from the president and the
638 * CM_NODEINFOREQ from the starting node. When it receives those two
639 * messages it will send CM_ACKADD(prepare) to the president and
640 * CM_NODEINFOCONF to the starting node with its own node information.
641 *
642 * When the president received CM_ACKADD(prepare) from all nodes included
643 * in the heartbeat protocol then it sends CM_ADD(AddCommit) to all nodes
644 * included in the heartbeat protocol.
645 *
646 * When the nodes receives CM_ADD(AddCommit) from the president then
647 * they will enable communication to the new node and immediately start
648 * sending heartbeats to the new node. They will also include the new
649 * node in their view of the nodes included in the heartbeat protocol.
650 * Next they will send CM_ACKADD(AddCommit) back to the president.
651 *
652 * When the president has received CM_ACKADD(AddCommit) from all nodes
653 * included in the heartbeat protocol then it sends CM_ADD(CommitNew)
654 * to the starting node.
655 *
656 * This is also the point where we report the node as included in the
657 * heartbeat protocol to DBDIH as from here the rest of the protocol is
658 * only about informing the new node about the outcome of inclusion
659 * protocol. When we receive the response to this message the new node
660 * can already have proceeded a bit into its restart.
661 *
662 * The starting node after receiving CM_REGCONF waits for all nodes
663 * included in the heartbeat protocol to send CM_NODEINFOCONF and
664 * also for receiving the CM_ADD(CommitNew) from the president. When
665 * all this have been received the new nodes adds itself and all nodes
666 * it have been informed about into its view of the nodes included in
667 * the heartbeat protocol and enables communication to all other
668 * nodes included therein. Finally it sends CM_ACKADD(CommitNew) to
669 * the president.
670 *
671 * When the president has received CM_ACKADD(CommitNew) from the starting
672 * node the inclusion protocol is completed and the president is ready
673 * to receive a new node into the cluster.
674 *
675 * It is the responsibility of the starting nodes to retry after a failed
676 * node inclusion, they will do so with 3 seconds delay. This means that
677 * at most one node per 3 seconds will normally be added to the cluster.
678 * So this phase of adding nodes to the cluster can add up to a little bit
679 * more than a minute of delay in a large cluster starting up.
680 *
681 * We try to depict the above in a graph here as well:
682 *
683 * New node Nodes included in the heartbeat protocol President
684 * ----------------------------------------------------------------------------
685 * ----CM_REGREQ--------------------->>
686 * ----CM_REGREQ---------------------------------------------------------->
687 *
688 * <----------------CM_REGCONF---------------------------------------------
689 * <<------CM_ADD(Prepare)---------------
690 *
691 * -----CM_NODEINFOREQ--------------->>
692 *
693 * Nodes included in heartbeat protocol can receive CM_ADD(Prepare) and
694 * CM_NODEINFOREQ in any order.
695 *
696 * <<---CM_NODEINFOCONF-------------- --------CM_ACKADD(Prepare)--------->>
697 *
698 * <<-------CM_ADD(AddCommit)------------
699 *
700 * Here nodes enables communication to new node and starts sending heartbeats
701 *
702 * ---------CM_ACKADD(AddCommit)------->>
703 *
704 * Here we report to DBDIH about new node included in heartbeat protocol
705 * in master node.
706 *
707 * <----CM_ADD(CommitNew)--------------------------------------------------
708 *
709 * Here new node enables communication to new nodes and starts sending
710 * heartbeat messages.
711 *
712 * -----CM_ACKADD(CommitNew)---------------------------------------------->
713 *
714 * Here the president can complete the inclusion protocol and is ready to
715 * receive new nodes into the heartbeat protocol.
716 */
717 /*******************************/
718 /* CM_INFOCONF */
719 /*******************************/
execCM_INFOCONF(Signal * signal)720 void Qmgr::execCM_INFOCONF(Signal* signal)
721 {
722 /**
723 * Open communcation to all DB nodes
724 */
725 signal->theData[0] = 0; // no answer
726 signal->theData[1] = 0; // no id
727 signal->theData[2] = NodeInfo::DB;
728 sendSignal(TRPMAN_REF, GSN_OPEN_COMORD, signal, 3, JBB);
729
730 cpresident = ZNIL;
731 cpresidentAlive = ZFALSE;
732 c_start_election_time = NdbTick_getCurrentTicks();
733
734 signal->theData[0] = ZSTART_FAILURE_LIMIT;
735 sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 3000, 1);
736
737 cmInfoconf010Lab(signal);
738
739 return;
740 }//Qmgr::execCM_INFOCONF()
741
742 Uint32 g_start_type = 0;
743 NdbNodeBitmask g_nowait_nodes; // Set by clo
744
cmInfoconf010Lab(Signal * signal)745 void Qmgr::cmInfoconf010Lab(Signal* signal)
746 {
747 c_start.m_startKey = 0;
748 c_start.m_startNode = getOwnNodeId();
749 c_start.m_nodes.clearWaitingFor();
750 c_start.m_gsn = GSN_CM_REGREQ;
751 c_start.m_starting_nodes.clear();
752 c_start.m_starting_nodes_w_log.clear();
753 c_start.m_regReqReqSent = 0;
754 c_start.m_regReqReqRecv = 0;
755 c_start.m_skip_nodes = g_nowait_nodes;
756 c_start.m_skip_nodes.bitAND(c_definedNodes);
757 c_start.m_start_type = g_start_type;
758
759 NodeRecPtr nodePtr;
760 cnoOfNodes = 0;
761 for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
762 jam();
763 ptrAss(nodePtr, nodeRec);
764
765 if(getNodeInfo(nodePtr.i).getType() != NodeInfo::DB)
766 continue;
767
768 c_start.m_nodes.setWaitingFor(nodePtr.i);
769 cnoOfNodes++;
770
771 if(!c_connectedNodes.get(nodePtr.i))
772 continue;
773
774 sendCmRegReq(signal, nodePtr.i);
775 }
776
777 //----------------------------------------
778 /* Wait for a while. When it returns */
779 /* we will check if we got any CM_REGREF*/
780 /* or CM_REGREQ (lower nodeid than our */
781 /* own). */
782 //----------------------------------------
783 signal->theData[0] = ZREGREQ_TIMELIMIT;
784 signal->theData[1] = c_start.m_startKey;
785 signal->theData[2] = c_start.m_startNode;
786 sendSignalWithDelay(QMGR_REF, GSN_CONTINUEB, signal, 3000, 3);
787
788 creadyDistCom = ZTRUE;
789 return;
790 }//Qmgr::cmInfoconf010Lab()
791
792 void
sendCmRegReq(Signal * signal,Uint32 nodeId)793 Qmgr::sendCmRegReq(Signal * signal, Uint32 nodeId){
794 CmRegReq * req = (CmRegReq *)&signal->theData[0];
795 req->blockRef = reference();
796 req->nodeId = getOwnNodeId();
797 req->version = NDB_VERSION;
798 req->mysql_version = NDB_MYSQL_VERSION_D;
799 req->latest_gci = c_start.m_latest_gci;
800 req->start_type = c_start.m_start_type;
801 c_start.m_skip_nodes.copyto(NdbNodeBitmask::Size, req->skip_nodes);
802 const Uint32 ref = calcQmgrBlockRef(nodeId);
803 sendSignal(ref, GSN_CM_REGREQ, signal, CmRegReq::SignalLength, JBB);
804 DEBUG_START(GSN_CM_REGREQ, nodeId, "");
805
806 c_start.m_regReqReqSent++;
807 }
808
809 /*
810 4.4.11 CM_REGREQ */
811 /**--------------------------------------------------------------------------
812 * If this signal is received someone tries to get registrated.
813 * Only the president have the authority make decisions about new nodes,
814 * so only a president or a node that claims to be the president may send a
815 * reply to this signal.
816 * This signal can occur any time after that STTOR was received.
817 * CPRESIDENT: Timelimit has expired and someone has
818 * decided to enter the president role
819 * CPRESIDENT_CANDIDATE:
820 * Assigned when we receive a CM_REGREF, if we got more than one REF
821 * then we always keep the lowest nodenumber.
822 * We accept this nodeno as president when our timelimit expires
823 * We should consider the following cases:
824 * 1- We are the president. If we are busy by adding new nodes to cluster,
825 * then we have to refuse this node to be added.
826 * The refused node will try in ZREFUSE_ADD_TIME seconds again.
827 * If we are not busy then we confirm
828 *
829 * 2- We know the president, we dont bother us about this REQ.
830 * The president has also got this REQ and will take care of it.
831 *
832 * 3- The president are not known. We have received CM_INIT, so we compare the
833 * senders node number to GETOWNNODEID().
834 * If we have a lower number than the sender then we will claim
835 * that we are the president so we send him a refuse signal back.
836 * We have to wait for the CONTINUEB signal before we can enter the
837 * president role. If our GETOWNNODEID() if larger than sender node number,
838 * we are not the president and just have to wait for the
839 * reply signal (REF) to our CM_REGREQ_2.
840 * 4- We havent received the CM_INIT signal so we don't know who we are.
841 * Ignore the request.
842 *--------------------------------------------------------------------------*/
843 /*******************************/
844 /* CM_REGREQ */
845 /*******************************/
846 static
847 int
check_start_type(Uint32 starting,Uint32 own)848 check_start_type(Uint32 starting, Uint32 own)
849 {
850 if (starting == (1 << NodeState::ST_INITIAL_START) &&
851 ((own & (1 << NodeState::ST_INITIAL_START)) == 0))
852 {
853 return 1;
854 }
855 return 0;
856 }
857
execCM_REGREQ(Signal * signal)858 void Qmgr::execCM_REGREQ(Signal* signal)
859 {
860 DEBUG_START3(signal, "");
861
862 NodeRecPtr addNodePtr;
863 jamEntry();
864
865 CmRegReq * const cmRegReq = (CmRegReq *)&signal->theData[0];
866 const BlockReference Tblockref = cmRegReq->blockRef;
867 const Uint32 startingVersion = cmRegReq->version;
868 Uint32 startingMysqlVersion = cmRegReq->mysql_version;
869 addNodePtr.i = cmRegReq->nodeId;
870 Uint32 gci = 1;
871 Uint32 start_type = ~0;
872 NdbNodeBitmask skip_nodes;
873
874 if (!c_connectedNodes.get(cmRegReq->nodeId))
875 {
876 jam();
877
878 /**
879 * With ndbmtd, there is a race condition such that
880 * CM_REGREQ can arrive prior to CONNECT_REP
881 * since CONNECT_REP is sent from CMVMI
882 *
883 * In such cases, ignore the CM_REGREQ which is safe
884 * as it will anyway be resent by starting node
885 */
886 g_eventLogger->info("discarding CM_REGREQ from %u "
887 "as we're not yet connected (isNdbMt: %u)",
888 cmRegReq->nodeId,
889 (unsigned)isNdbMt());
890
891 return;
892 }
893
894 if (signal->getLength() == CmRegReq::SignalLength)
895 {
896 jam();
897 gci = cmRegReq->latest_gci;
898 start_type = cmRegReq->start_type;
899 skip_nodes.assign(NdbNodeBitmask::Size, cmRegReq->skip_nodes);
900 }
901
902 if (startingVersion < NDBD_SPLIT_VERSION)
903 {
904 startingMysqlVersion = 0;
905 }
906
907 if (creadyDistCom == ZFALSE) {
908 jam();
909 /* NOT READY FOR DISTRIBUTED COMMUNICATION.*/
910 return;
911 }//if
912
913 if (!ndbCompatible_ndb_ndb(NDB_VERSION, startingVersion)) {
914 jam();
915 sendCmRegrefLab(signal, Tblockref, CmRegRef::ZINCOMPATIBLE_VERSION);
916 return;
917 }
918
919 if (!ndb_check_micro_gcp(startingVersion))
920 {
921 jam();
922 infoEvent("Connection from node %u refused as it's not micro GCP enabled",
923 addNodePtr.i);
924 sendCmRegrefLab(signal, Tblockref, CmRegRef::ZINCOMPATIBLE_VERSION);
925 return;
926 }
927
928 if (!ndb_pnr(startingVersion))
929 {
930 jam();
931 infoEvent("Connection from node %u refused as it's not does not support "
932 "parallel node recovery",
933 addNodePtr.i);
934 sendCmRegrefLab(signal, Tblockref, CmRegRef::ZINCOMPATIBLE_VERSION);
935 return;
936 }
937
938 if (!ndb_check_hb_order_version(startingVersion) &&
939 m_hb_order_config_used)
940 {
941 jam();
942 infoEvent("Connection from node %u refused as it does not support "
943 "user-defined HeartbeatOrder",
944 addNodePtr.i);
945 sendCmRegrefLab(signal, Tblockref, CmRegRef::ZINCOMPATIBLE_VERSION);
946 return;
947 }
948
949 if (m_connectivity_check.m_enabled &&
950 !ndbd_connectivity_check(startingVersion))
951 {
952 jam();
953 infoEvent("Connection from node %u refused as it does not support "
954 "ConnectCheckIntervalDelay",
955 addNodePtr.i);
956 sendCmRegrefLab(signal, Tblockref, CmRegRef::ZINCOMPATIBLE_VERSION);
957 return;
958 }
959
960 if (check_start_type(start_type, c_start.m_start_type))
961 {
962 jam();
963 sendCmRegrefLab(signal, Tblockref, CmRegRef::ZINCOMPATIBLE_START_TYPE);
964 return;
965 }
966
967 if (cpresident != getOwnNodeId())
968 {
969 jam();
970
971 if (cpresident == ZNIL)
972 {
973 /***
974 * We don't know the president.
975 * If the node to be added has lower node id
976 * than our president cancidate. Set it as
977 * candidate
978 */
979 jam();
980 if (gci > c_start.m_president_candidate_gci ||
981 (gci == c_start.m_president_candidate_gci &&
982 addNodePtr.i < c_start.m_president_candidate))
983 {
984 jam();
985 c_start.m_president_candidate = addNodePtr.i;
986 c_start.m_president_candidate_gci = gci;
987 }
988 sendCmRegrefLab(signal, Tblockref, CmRegRef::ZELECTION);
989 return;
990 }
991
992 /**
993 * We are not the president.
994 * We know the president.
995 * President will answer.
996 */
997 sendCmRegrefLab(signal, Tblockref, CmRegRef::ZNOT_PRESIDENT);
998 return;
999 }//if
1000
1001 if (c_start.m_startNode != 0)
1002 {
1003 jam();
1004 /**
1005 * President busy by adding another node
1006 */
1007 sendCmRegrefLab(signal, Tblockref, CmRegRef::ZBUSY_PRESIDENT);
1008 return;
1009 }//if
1010
1011 if (ctoStatus == Q_ACTIVE)
1012 {
1013 jam();
1014 /**
1015 * Active taking over as president
1016 */
1017 sendCmRegrefLab(signal, Tblockref, CmRegRef::ZBUSY_TO_PRES);
1018 return;
1019 }//if
1020
1021 if (getNodeInfo(addNodePtr.i).m_type != NodeInfo::DB)
1022 {
1023 jam();
1024 /**
1025 * The new node is not in config file
1026 */
1027 sendCmRegrefLab(signal, Tblockref, CmRegRef::ZNOT_IN_CFG);
1028 return;
1029 }
1030
1031 if (getNodeState().getSingleUserMode())
1032 {
1033 /**
1034 * The cluster is in single user mode.
1035 * Data node is not allowed to get added in the cluster
1036 * while in single user mode.
1037 */
1038 // handle rolling upgrade
1039 {
1040 unsigned int get_major = getMajor(startingVersion);
1041 unsigned int get_minor = getMinor(startingVersion);
1042 unsigned int get_build = getBuild(startingVersion);
1043
1044 if (startingVersion < NDBD_QMGR_SINGLEUSER_VERSION_5) {
1045 jam();
1046
1047 infoEvent("QMGR: detect upgrade: new node %u old version %u.%u.%u",
1048 (unsigned int)addNodePtr.i, get_major, get_minor, get_build);
1049 /**
1050 * The new node is old version, send ZINCOMPATIBLE_VERSION instead
1051 * of ZSINGLE_USER_MODE.
1052 */
1053 sendCmRegrefLab(signal, Tblockref, CmRegRef::ZINCOMPATIBLE_VERSION);
1054 } else {
1055 jam();
1056
1057 sendCmRegrefLab(signal, Tblockref, CmRegRef::ZSINGLE_USER_MODE);
1058 }//if
1059 }
1060
1061 return;
1062 }//if
1063
1064 ptrCheckGuard(addNodePtr, MAX_NDB_NODES, nodeRec);
1065 Phase phase = addNodePtr.p->phase;
1066 if (phase != ZINIT)
1067 {
1068 jam();
1069 DEBUG("phase = " << phase);
1070 sendCmRegrefLab(signal, Tblockref, CmRegRef::ZNOT_DEAD);
1071 return;
1072 }
1073
1074 jam();
1075 /**
1076 * WE ARE PRESIDENT AND WE ARE NOT BUSY ADDING ANOTHER NODE.
1077 * WE WILL TAKE CARE OF THE INCLUSION OF THIS NODE INTO THE CLUSTER.
1078 * WE NEED TO START TIME SUPERVISION OF THIS. SINCE WE CANNOT STOP
1079 * TIMED SIGNAL IF THE INCLUSION IS INTERRUPTED WE IDENTIFY
1080 * EACH INCLUSION WITH A UNIQUE IDENTITY. THIS IS CHECKED WHEN
1081 * THE SIGNAL ARRIVES. IF IT HAS CHANGED THEN WE SIMPLY IGNORE
1082 * THE TIMED SIGNAL.
1083 */
1084
1085 /**
1086 * Update start record
1087 */
1088 c_start.m_startKey++;
1089 c_start.m_startNode = addNodePtr.i;
1090
1091 /**
1092 * Assign dynamic id
1093 */
1094 UintR TdynId = (++c_maxDynamicId) & 0xFFFF;
1095 TdynId |= (addNodePtr.p->hbOrder << 16);
1096 setNodeInfo(addNodePtr.i).m_version = startingVersion;
1097 setNodeInfo(addNodePtr.i).m_mysql_version = startingMysqlVersion;
1098 recompute_version_info(NodeInfo::DB, startingVersion);
1099 addNodePtr.p->ndynamicId = TdynId;
1100
1101 /**
1102 * Reply with CM_REGCONF
1103 */
1104 CmRegConf * const cmRegConf = (CmRegConf *)&signal->theData[0];
1105 cmRegConf->presidentBlockRef = reference();
1106 cmRegConf->presidentNodeId = getOwnNodeId();
1107 cmRegConf->presidentVersion = getNodeInfo(getOwnNodeId()).m_version;
1108 cmRegConf->presidentMysqlVersion = getNodeInfo(getOwnNodeId()).m_mysql_version;
1109 cmRegConf->dynamicId = TdynId;
1110 c_clusterNodes.copyto(NdbNodeBitmask::Size, cmRegConf->allNdbNodes);
1111 sendSignal(Tblockref, GSN_CM_REGCONF, signal,
1112 CmRegConf::SignalLength, JBA);
1113 DEBUG_START(GSN_CM_REGCONF, refToNode(Tblockref), "");
1114
1115 /**
1116 * Send CmAdd to all nodes (including starting)
1117 */
1118 c_start.m_nodes = c_clusterNodes;
1119 c_start.m_nodes.setWaitingFor(addNodePtr.i);
1120 c_start.m_gsn = GSN_CM_ADD;
1121
1122 NodeReceiverGroup rg(QMGR, c_start.m_nodes);
1123 CmAdd * const cmAdd = (CmAdd*)signal->getDataPtrSend();
1124 cmAdd->requestType = CmAdd::Prepare;
1125 cmAdd->startingNodeId = addNodePtr.i;
1126 cmAdd->startingVersion = startingVersion;
1127 cmAdd->startingMysqlVersion = startingMysqlVersion;
1128 sendSignal(rg, GSN_CM_ADD, signal, CmAdd::SignalLength, JBA);
1129 DEBUG_START2(GSN_CM_ADD, rg, "Prepare");
1130
1131 /**
1132 * Set timer
1133 */
1134 return;
1135 signal->theData[0] = ZREGREQ_MASTER_TIMELIMIT;
1136 signal->theData[1] = c_start.m_startKey;
1137 sendSignalWithDelay(QMGR_REF, GSN_CONTINUEB, signal, 30000, 2);
1138
1139 return;
1140 }//Qmgr::execCM_REGREQ()
1141
sendCmRegrefLab(Signal * signal,BlockReference TBRef,CmRegRef::ErrorCode Terror)1142 void Qmgr::sendCmRegrefLab(Signal* signal, BlockReference TBRef,
1143 CmRegRef::ErrorCode Terror)
1144 {
1145 CmRegRef* ref = (CmRegRef*)signal->getDataPtrSend();
1146 ref->blockRef = reference();
1147 ref->nodeId = getOwnNodeId();
1148 ref->errorCode = Terror;
1149 ref->presidentCandidate =
1150 (cpresident == ZNIL ? c_start.m_president_candidate : cpresident);
1151 ref->candidate_latest_gci = c_start.m_president_candidate_gci;
1152 ref->latest_gci = c_start.m_latest_gci;
1153 ref->start_type = c_start.m_start_type;
1154 c_start.m_skip_nodes.copyto(NdbNodeBitmask::Size, ref->skip_nodes);
1155 sendSignal(TBRef, GSN_CM_REGREF, signal,
1156 CmRegRef::SignalLength, JBB);
1157 DEBUG_START(GSN_CM_REGREF, refToNode(TBRef), "");
1158 return;
1159 }//Qmgr::sendCmRegrefLab()
1160
1161 /*
1162 4.4.11 CM_REGCONF */
1163 /**--------------------------------------------------------------------------
1164 * President gives permission to a node which wants to join the cluster.
1165 * The president will prepare the cluster that a new node will be added to
1166 * cluster. When the new node has set up all connections to the cluster,
1167 * the president will send commit to all clusternodes so the phase of the
1168 * new node can be changed to ZRUNNING.
1169 *--------------------------------------------------------------------------*/
1170 /*******************************/
1171 /* CM_REGCONF */
1172 /*******************************/
execCM_REGCONF(Signal * signal)1173 void Qmgr::execCM_REGCONF(Signal* signal)
1174 {
1175 DEBUG_START3(signal, "");
1176
1177 NodeRecPtr myNodePtr;
1178 NodeRecPtr nodePtr;
1179 jamEntry();
1180
1181 const CmRegConf * const cmRegConf = (CmRegConf *)&signal->theData[0];
1182
1183 if (!ndbCompatible_ndb_ndb(NDB_VERSION, cmRegConf->presidentVersion)) {
1184 jam();
1185 char buf[128];
1186 BaseString::snprintf(buf,sizeof(buf),
1187 "incompatible version own=0x%x other=0x%x, "
1188 " shutting down",
1189 NDB_VERSION, cmRegConf->presidentVersion);
1190 progError(__LINE__, NDBD_EXIT_UNSUPPORTED_VERSION, buf);
1191 return;
1192 }
1193
1194 if (!ndb_check_hb_order_version(cmRegConf->presidentVersion) &&
1195 m_hb_order_config_used) {
1196 jam();
1197 char buf[128];
1198 BaseString::snprintf(buf,sizeof(buf),
1199 "incompatible version own=0x%x other=0x%x, "
1200 "due to user-defined HeartbeatOrder, shutting down",
1201 NDB_VERSION, cmRegConf->presidentVersion);
1202 progError(__LINE__, NDBD_EXIT_UNSUPPORTED_VERSION, buf);
1203 return;
1204 }
1205
1206 if (m_connectivity_check.m_enabled &&
1207 !ndbd_connectivity_check(cmRegConf->presidentVersion))
1208 {
1209 jam();
1210 m_connectivity_check.m_enabled = false;
1211 ndbout_c("Disabling ConnectCheckIntervalDelay as president "
1212 " does not support it");
1213 infoEvent("Disabling ConnectCheckIntervalDelay as president "
1214 " does not support it");
1215 }
1216
1217 myNodePtr.i = getOwnNodeId();
1218 ptrCheckGuard(myNodePtr, MAX_NDB_NODES, nodeRec);
1219
1220 ndbrequire(c_start.m_gsn == GSN_CM_REGREQ);
1221 ndbrequire(myNodePtr.p->phase == ZSTARTING);
1222
1223 cpdistref = cmRegConf->presidentBlockRef;
1224 cpresident = cmRegConf->presidentNodeId;
1225 UintR TdynamicId = cmRegConf->dynamicId;
1226 c_maxDynamicId = TdynamicId & 0xFFFF;
1227 c_clusterNodes.assign(NdbNodeBitmask::Size, cmRegConf->allNdbNodes);
1228
1229 myNodePtr.p->ndynamicId = TdynamicId;
1230
1231 // set own MT config here or in REF, and others in CM_NODEINFOREQ/CONF
1232 setNodeInfo(getOwnNodeId()).m_lqh_workers = globalData.ndbMtLqhWorkers;
1233
1234 /*--------------------------------------------------------------*/
1235 // Send this as an EVENT REPORT to inform about hearing about
1236 // other NDB node proclaiming to be president.
1237 /*--------------------------------------------------------------*/
1238 signal->theData[0] = NDB_LE_CM_REGCONF;
1239 signal->theData[1] = getOwnNodeId();
1240 signal->theData[2] = cpresident;
1241 signal->theData[3] = TdynamicId;
1242 sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 4, JBB);
1243
1244 for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
1245 if (c_clusterNodes.get(nodePtr.i)){
1246 jamLine(nodePtr.i);
1247 ptrAss(nodePtr, nodeRec);
1248
1249 ndbrequire(nodePtr.p->phase == ZINIT);
1250 nodePtr.p->phase = ZRUNNING;
1251
1252 if(c_connectedNodes.get(nodePtr.i)){
1253 jam();
1254 sendCmNodeInfoReq(signal, nodePtr.i, myNodePtr.p);
1255 }
1256 }
1257 }
1258
1259 c_start.m_gsn = GSN_CM_NODEINFOREQ;
1260 c_start.m_nodes = c_clusterNodes;
1261
1262 if (ERROR_INSERTED(937))
1263 {
1264 CLEAR_ERROR_INSERT_VALUE;
1265 signal->theData[0] = 9999;
1266 sendSignalWithDelay(CMVMI_REF, GSN_NDB_TAMPER, signal, 500, 1);
1267 }
1268
1269 return;
1270 }//Qmgr::execCM_REGCONF()
1271
1272 void
check_readnodes_reply(Signal * signal,Uint32 nodeId,Uint32 gsn)1273 Qmgr::check_readnodes_reply(Signal* signal, Uint32 nodeId, Uint32 gsn)
1274 {
1275 NodeRecPtr myNodePtr;
1276 myNodePtr.i = getOwnNodeId();
1277 ptrCheckGuard(myNodePtr, MAX_NDB_NODES, nodeRec);
1278
1279 NodeRecPtr nodePtr;
1280 nodePtr.i = nodeId;
1281 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
1282
1283 ndbrequire(c_readnodes_nodes.get(nodeId));
1284 ReadNodesConf* conf = (ReadNodesConf*)signal->getDataPtr();
1285 if (gsn == GSN_READ_NODESREF)
1286 {
1287 jam();
1288 retry:
1289 signal->theData[0] = reference();
1290 sendSignal(calcQmgrBlockRef(nodeId), GSN_READ_NODESREQ, signal, 1, JBA);
1291 return;
1292 }
1293
1294 if (conf->masterNodeId == ZNIL)
1295 {
1296 jam();
1297 goto retry;
1298 }
1299
1300 Uint32 president = conf->masterNodeId;
1301 if (president == cpresident)
1302 {
1303 jam();
1304 c_readnodes_nodes.clear(nodeId);
1305 return;
1306 }
1307
1308 char buf[255];
1309 BaseString::snprintf(buf, sizeof(buf),
1310 "check StartPartialTimeout, "
1311 "node %d thinks %d is president, "
1312 "I think president is: %d",
1313 nodeId, president, cpresident);
1314
1315 ndbout_c("%s", buf);
1316 CRASH_INSERTION(933);
1317
1318 if (getNodeState().startLevel == NodeState::SL_STARTED)
1319 {
1320 jam();
1321 NdbNodeBitmask part;
1322 part.assign(NdbNodeBitmask::Size, conf->clusterNodes);
1323 FailRep* rep = (FailRep*)signal->getDataPtrSend();
1324 rep->failCause = FailRep::ZPARTITIONED_CLUSTER;
1325 rep->partitioned.president = cpresident;
1326 c_clusterNodes.copyto(NdbNodeBitmask::Size, rep->partitioned.partition);
1327 rep->partitioned.partitionFailSourceNodeId = getOwnNodeId();
1328 Uint32 ref = calcQmgrBlockRef(nodeId);
1329 Uint32 i = 0;
1330 /* Send source of event info if a node supports it */
1331 Uint32 length = FailRep::OrigSignalLength + FailRep::PartitionedExtraLength;
1332 while((i = part.find(i + 1)) != NdbNodeBitmask::NotFound)
1333 {
1334 if (i == nodeId)
1335 continue;
1336 rep->failNodeId = i;
1337 bool sendSourceId = ndbd_fail_rep_source_node((getNodeInfo(i)).m_version);
1338 sendSignal(ref, GSN_FAIL_REP, signal,
1339 length + (sendSourceId ? FailRep::SourceExtraLength : 0),
1340 JBA);
1341 }
1342 rep->failNodeId = nodeId;
1343 bool sendSourceId = ndbd_fail_rep_source_node((getNodeInfo(nodeId)).m_version);
1344
1345 sendSignal(ref, GSN_FAIL_REP, signal,
1346 length + (sendSourceId ? FailRep::SourceExtraLength : 0),
1347 JBB);
1348 return;
1349 }
1350
1351 CRASH_INSERTION(932);
1352 CRASH_INSERTION(938);
1353
1354 progError(__LINE__,
1355 NDBD_EXIT_PARTITIONED_SHUTDOWN,
1356 buf);
1357
1358 ndbrequire(false);
1359 }
1360
1361 void
sendCmNodeInfoReq(Signal * signal,Uint32 nodeId,const NodeRec * self)1362 Qmgr::sendCmNodeInfoReq(Signal* signal, Uint32 nodeId, const NodeRec * self){
1363 CmNodeInfoReq * const req = (CmNodeInfoReq*)signal->getDataPtrSend();
1364 req->nodeId = getOwnNodeId();
1365 req->dynamicId = self->ndynamicId;
1366 req->version = getNodeInfo(getOwnNodeId()).m_version;
1367 req->mysql_version = getNodeInfo(getOwnNodeId()).m_mysql_version;
1368 req->lqh_workers = getNodeInfo(getOwnNodeId()).m_lqh_workers;
1369 const Uint32 ref = calcQmgrBlockRef(nodeId);
1370 sendSignal(ref,GSN_CM_NODEINFOREQ, signal, CmNodeInfoReq::SignalLength, JBB);
1371 DEBUG_START(GSN_CM_NODEINFOREQ, nodeId, "");
1372 }
1373
1374 /*
1375 4.4.11 CM_REGREF */
1376 /**--------------------------------------------------------------------------
1377 * Only a president or a president candidate can refuse a node to get added to
1378 * the cluster.
1379 * Refuse reasons:
1380 * ZBUSY We know that the sender is the president and we have to
1381 * make a new CM_REGREQ.
1382 * ZNOT_IN_CFG This node number is not specified in the configfile,
1383 * SYSTEM ERROR
1384 * ZELECTION Sender is a president candidate, his timelimit
1385 * hasn't expired so maybe someone else will show up.
1386 * Update the CPRESIDENT_CANDIDATE, then wait for our
1387 * timelimit to expire.
1388 *---------------------------------------------------------------------------*/
1389 /*******************************/
1390 /* CM_REGREF */
1391 /*******************************/
1392 static
1393 const char *
get_start_type_string(Uint32 st)1394 get_start_type_string(Uint32 st)
1395 {
1396 static char buf[256];
1397
1398 if (st == 0)
1399 {
1400 return "<ANY>";
1401 }
1402 else
1403 {
1404 buf[0] = 0;
1405 for(Uint32 i = 0; i<NodeState::ST_ILLEGAL_TYPE; i++)
1406 {
1407 if (st & (1 << i))
1408 {
1409 if (buf[0])
1410 strcat(buf, "/");
1411 switch(i){
1412 case NodeState::ST_INITIAL_START:
1413 strcat(buf, "inital start");
1414 break;
1415 case NodeState::ST_SYSTEM_RESTART:
1416 strcat(buf, "system restart");
1417 break;
1418 case NodeState::ST_NODE_RESTART:
1419 strcat(buf, "node restart");
1420 break;
1421 case NodeState::ST_INITIAL_NODE_RESTART:
1422 strcat(buf, "initial node restart");
1423 break;
1424 }
1425 }
1426 }
1427 return buf;
1428 }
1429 }
1430
execCM_REGREF(Signal * signal)1431 void Qmgr::execCM_REGREF(Signal* signal)
1432 {
1433 jamEntry();
1434
1435 CmRegRef* ref = (CmRegRef*)signal->getDataPtr();
1436 UintR TaddNodeno = ref->nodeId;
1437 UintR TrefuseReason = ref->errorCode;
1438 Uint32 candidate = ref->presidentCandidate;
1439 Uint32 node_gci = 1;
1440 Uint32 candidate_gci = 1;
1441 Uint32 start_type = ~0;
1442 NdbNodeBitmask skip_nodes;
1443 DEBUG_START3(signal, TrefuseReason);
1444
1445 if (signal->getLength() == CmRegRef::SignalLength)
1446 {
1447 jam();
1448 node_gci = ref->latest_gci;
1449 candidate_gci = ref->candidate_latest_gci;
1450 start_type = ref->start_type;
1451 skip_nodes.assign(NdbNodeBitmask::Size, ref->skip_nodes);
1452 }
1453
1454 c_start.m_regReqReqRecv++;
1455
1456 // Ignore block reference in data[0]
1457
1458 if(candidate != c_start.m_president_candidate)
1459 {
1460 jam();
1461 c_start.m_regReqReqRecv = ~0;
1462 }
1463
1464 c_start.m_starting_nodes.set(TaddNodeno);
1465 if (node_gci)
1466 {
1467 jam();
1468 c_start.m_starting_nodes_w_log.set(TaddNodeno);
1469 }
1470 c_start.m_node_gci[TaddNodeno] = node_gci;
1471
1472 skip_nodes.bitAND(c_definedNodes);
1473 c_start.m_skip_nodes.bitOR(skip_nodes);
1474
1475 // set own MT config here or in CONF, and others in CM_NODEINFOREQ/CONF
1476 setNodeInfo(getOwnNodeId()).m_lqh_workers = globalData.ndbMtLqhWorkers;
1477
1478 char buf[100];
1479 switch (TrefuseReason) {
1480 case CmRegRef::ZINCOMPATIBLE_VERSION:
1481 jam();
1482 progError(__LINE__, NDBD_EXIT_UNSUPPORTED_VERSION,
1483 "incompatible version, "
1484 "connection refused by running ndb node");
1485 case CmRegRef::ZINCOMPATIBLE_START_TYPE:
1486 jam();
1487 BaseString::snprintf(buf, sizeof(buf),
1488 "incompatible start type detected: node %d"
1489 " reports %s(%d) my start type: %s(%d)",
1490 TaddNodeno,
1491 get_start_type_string(start_type), start_type,
1492 get_start_type_string(c_start.m_start_type),
1493 c_start.m_start_type);
1494 progError(__LINE__, NDBD_EXIT_SR_RESTARTCONFLICT, buf);
1495 break;
1496 case CmRegRef::ZBUSY:
1497 case CmRegRef::ZBUSY_TO_PRES:
1498 case CmRegRef::ZBUSY_PRESIDENT:
1499 jam();
1500 cpresidentAlive = ZTRUE;
1501 signal->theData[3] = 0;
1502 break;
1503 case CmRegRef::ZNOT_IN_CFG:
1504 jam();
1505 progError(__LINE__, NDBD_EXIT_NODE_NOT_IN_CONFIG);
1506 break;
1507 case CmRegRef::ZNOT_DEAD:
1508 jam();
1509 progError(__LINE__, NDBD_EXIT_NODE_NOT_DEAD);
1510 break;
1511 case CmRegRef::ZSINGLE_USER_MODE:
1512 jam();
1513 progError(__LINE__, NDBD_EXIT_SINGLE_USER_MODE);
1514 break;
1515 /**
1516 * For generic refuse error.
1517 * e.g. in online upgrade, we can use this error code instead
1518 * of the incompatible error code.
1519 */
1520 case CmRegRef::ZGENERIC:
1521 jam();
1522 progError(__LINE__, NDBD_EXIT_GENERIC);
1523 break;
1524 case CmRegRef::ZELECTION:
1525 jam();
1526 if (candidate_gci > c_start.m_president_candidate_gci ||
1527 (candidate_gci == c_start.m_president_candidate_gci &&
1528 candidate < c_start.m_president_candidate))
1529 {
1530 jam();
1531 //----------------------------------------
1532 /* We may already have a candidate */
1533 /* choose the lowest nodeno */
1534 //----------------------------------------
1535 signal->theData[3] = 2;
1536 c_start.m_president_candidate = candidate;
1537 c_start.m_president_candidate_gci = candidate_gci;
1538 } else {
1539 signal->theData[3] = 4;
1540 }//if
1541 break;
1542 case CmRegRef::ZNOT_PRESIDENT:
1543 jam();
1544 cpresidentAlive = ZTRUE;
1545 signal->theData[3] = 3;
1546 break;
1547 default:
1548 jam();
1549 signal->theData[3] = 5;
1550 /*empty*/;
1551 break;
1552 }//switch
1553 /*--------------------------------------------------------------*/
1554 // Send this as an EVENT REPORT to inform about hearing about
1555 // other NDB node proclaiming not to be president.
1556 /*--------------------------------------------------------------*/
1557 signal->theData[0] = NDB_LE_CM_REGREF;
1558 signal->theData[1] = getOwnNodeId();
1559 signal->theData[2] = TaddNodeno;
1560 //-----------------------------------------
1561 // signal->theData[3] filled in above
1562 //-----------------------------------------
1563 sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 4, JBB);
1564
1565 if(cpresidentAlive == ZTRUE)
1566 {
1567 jam();
1568 DEBUG("cpresidentAlive");
1569 return;
1570 }
1571
1572 if(c_start.m_regReqReqSent != c_start.m_regReqReqRecv)
1573 {
1574 jam();
1575 DEBUG(c_start.m_regReqReqSent << " != " << c_start.m_regReqReqRecv);
1576 return;
1577 }
1578
1579 if(c_start.m_president_candidate != getOwnNodeId())
1580 {
1581 jam();
1582 DEBUG("i'm not the candidate");
1583 return;
1584 }
1585
1586 /**
1587 * All connected nodes has agreed
1588 */
1589 if(check_startup(signal))
1590 {
1591 jam();
1592 electionWon(signal);
1593 }
1594
1595 return;
1596 }//Qmgr::execCM_REGREF()
1597
1598 Uint32
check_startup(Signal * signal)1599 Qmgr::check_startup(Signal* signal)
1600 {
1601 const NDB_TICKS now = NdbTick_getCurrentTicks();
1602 const Uint64 elapsed = NdbTick_Elapsed(c_start_election_time,now).milliSec();
1603 const Uint64 partitionedTimeout = c_restartPartialTimeout
1604 + c_restartPartionedTimeout;
1605
1606 const bool no_nodegroup_active =
1607 (c_restartNoNodegroupTimeout != ~Uint32(0)) &&
1608 (! c_start.m_no_nodegroup_nodes.isclear());
1609
1610 /**
1611 * First see if we should wait more...
1612 */
1613 NdbNodeBitmask tmp;
1614 tmp.bitOR(c_start.m_skip_nodes);
1615 tmp.bitOR(c_start.m_starting_nodes);
1616
1617 NdbNodeBitmask wait;
1618 wait.assign(c_definedNodes);
1619 wait.bitANDC(tmp);
1620
1621 Uint32 retVal = 0;
1622 Uint32 incompleteng = MAX_NDB_NODES; // Illegal value
1623 NdbNodeBitmask report_mask;
1624
1625 if ((c_start.m_latest_gci == 0) ||
1626 (c_start.m_start_type == (1 << NodeState::ST_INITIAL_START)))
1627 {
1628 if (tmp.equal(c_definedNodes))
1629 {
1630 jam();
1631 signal->theData[1] = 0x8000;
1632 report_mask.assign(c_definedNodes);
1633 report_mask.bitANDC(c_start.m_starting_nodes);
1634 retVal = 1;
1635 goto start_report;
1636 }
1637 else if (no_nodegroup_active)
1638 {
1639 if (elapsed < c_restartNoNodegroupTimeout)
1640 {
1641 signal->theData[1] = 6;
1642 signal->theData[2] = Uint32((c_restartNoNodegroupTimeout - elapsed + 500) / 1000);
1643 report_mask.assign(wait);
1644 retVal = 0;
1645 goto start_report;
1646 }
1647 tmp.bitOR(c_start.m_no_nodegroup_nodes);
1648 if (tmp.equal(c_definedNodes))
1649 {
1650 signal->theData[1] = 0x8000;
1651 report_mask.assign(c_definedNodes);
1652 report_mask.bitANDC(c_start.m_starting_nodes);
1653 retVal = 1;
1654 goto start_report;
1655 }
1656 else
1657 {
1658 jam();
1659 signal->theData[1] = 1;
1660 signal->theData[2] = ~0;
1661 report_mask.assign(wait);
1662 retVal = 0;
1663 goto start_report;
1664 }
1665 }
1666 else
1667 {
1668 jam();
1669 signal->theData[1] = 1;
1670 signal->theData[2] = ~0;
1671 report_mask.assign(wait);
1672 retVal = 0;
1673 goto start_report;
1674 }
1675 }
1676
1677 if (elapsed >= c_restartNoNodegroupTimeout)
1678 {
1679 tmp.bitOR(c_start.m_no_nodegroup_nodes);
1680 }
1681
1682 {
1683 const bool all = c_start.m_starting_nodes.equal(c_definedNodes);
1684 CheckNodeGroups* sd = (CheckNodeGroups*)&signal->theData[0];
1685
1686 {
1687 /**
1688 * Check for missing node group directly
1689 */
1690 NdbNodeBitmask check;
1691 check.assign(c_definedNodes);
1692 check.bitANDC(c_start.m_starting_nodes); // Not connected nodes
1693 check.bitOR(c_start.m_starting_nodes_w_log);
1694
1695 sd->blockRef = reference();
1696 sd->requestType = CheckNodeGroups::Direct | CheckNodeGroups::ArbitCheck;
1697 sd->mask = check;
1698 EXECUTE_DIRECT(DBDIH, GSN_CHECKNODEGROUPSREQ, signal,
1699 CheckNodeGroups::SignalLength);
1700
1701 if (sd->output == CheckNodeGroups::Lose)
1702 {
1703 jam();
1704 goto missing_nodegroup;
1705 }
1706 }
1707
1708 sd->blockRef = reference();
1709 sd->requestType = CheckNodeGroups::Direct | CheckNodeGroups::ArbitCheck;
1710 sd->mask = c_start.m_starting_nodes;
1711 EXECUTE_DIRECT(DBDIH, GSN_CHECKNODEGROUPSREQ, signal,
1712 CheckNodeGroups::SignalLength);
1713
1714 const Uint32 result = sd->output;
1715
1716 sd->blockRef = reference();
1717 sd->requestType = CheckNodeGroups::Direct | CheckNodeGroups::ArbitCheck;
1718 sd->mask = c_start.m_starting_nodes_w_log;
1719 EXECUTE_DIRECT(DBDIH, GSN_CHECKNODEGROUPSREQ, signal,
1720 CheckNodeGroups::SignalLength);
1721
1722 const Uint32 result_w_log = sd->output;
1723
1724 if (tmp.equal(c_definedNodes))
1725 {
1726 /**
1727 * All nodes (wrt no-wait nodes) has connected...
1728 * this means that we will now start or die
1729 */
1730 jam();
1731 switch(result_w_log){
1732 case CheckNodeGroups::Lose:
1733 {
1734 jam();
1735 goto missing_nodegroup;
1736 }
1737 case CheckNodeGroups::Win:
1738 signal->theData[1] = all ? 0x8001 : 0x8002;
1739 report_mask.assign(c_definedNodes);
1740 report_mask.bitANDC(c_start.m_starting_nodes);
1741 retVal = 1;
1742 goto check_log;
1743 case CheckNodeGroups::Partitioning:
1744 ndbrequire(result != CheckNodeGroups::Lose);
1745 signal->theData[1] =
1746 all ? 0x8001 : (result == CheckNodeGroups::Win ? 0x8002 : 0x8003);
1747 report_mask.assign(c_definedNodes);
1748 report_mask.bitANDC(c_start.m_starting_nodes);
1749 retVal = 1;
1750 goto check_log;
1751 }
1752 }
1753
1754 if (elapsed < c_restartPartialTimeout)
1755 {
1756 jam();
1757
1758 signal->theData[1] = c_restartPartialTimeout == (Uint32) ~0 ? 2 : 3;
1759 signal->theData[2] = Uint32((c_restartPartialTimeout - elapsed + 500) / 1000);
1760 report_mask.assign(wait);
1761 retVal = 0;
1762
1763 if (no_nodegroup_active && elapsed < c_restartNoNodegroupTimeout)
1764 {
1765 signal->theData[1] = 7;
1766 signal->theData[2] = Uint32((c_restartNoNodegroupTimeout - elapsed + 500) / 1000);
1767 }
1768 else if (no_nodegroup_active && elapsed >= c_restartNoNodegroupTimeout)
1769 {
1770 report_mask.bitANDC(c_start.m_no_nodegroup_nodes);
1771 }
1772
1773 goto start_report;
1774 }
1775
1776 /**
1777 * Start partial has passed...check for partitioning...
1778 */
1779 switch(result_w_log){
1780 case CheckNodeGroups::Lose:
1781 jam();
1782 goto missing_nodegroup;
1783 case CheckNodeGroups::Partitioning:
1784 if (elapsed < partitionedTimeout && result != CheckNodeGroups::Win)
1785 {
1786 goto missinglog;
1787 }
1788 // Fall through...
1789 case CheckNodeGroups::Win:
1790 signal->theData[1] =
1791 all ? 0x8001 : (result == CheckNodeGroups::Win ? 0x8002 : 0x8003);
1792 report_mask.assign(c_definedNodes);
1793 report_mask.bitANDC(c_start.m_starting_nodes);
1794 retVal = 2;
1795 goto check_log;
1796 }
1797 }
1798 ndbrequire(false);
1799
1800 check_log:
1801 jam();
1802 {
1803 Uint32 save[4+4*NdbNodeBitmask::Size];
1804 memcpy(save, signal->theData, sizeof(save));
1805
1806 DihRestartReq * req = CAST_PTR(DihRestartReq, signal->getDataPtrSend());
1807 req->senderRef = 0;
1808 c_start.m_starting_nodes.copyto(NdbNodeBitmask::Size, req->nodemask);
1809 memcpy(req->node_gcis, c_start.m_node_gci, 4*MAX_NDB_NODES);
1810 EXECUTE_DIRECT(DBDIH, GSN_DIH_RESTARTREQ, signal,
1811 DihRestartReq::CheckLength);
1812
1813 incompleteng = signal->theData[0];
1814 memcpy(signal->theData, save, sizeof(save));
1815
1816 if (incompleteng != MAX_NDB_NODES)
1817 {
1818 jam();
1819 if (retVal == 1)
1820 {
1821 jam();
1822 goto incomplete_log;
1823 }
1824 else if (retVal == 2)
1825 {
1826 if (elapsed <= partitionedTimeout)
1827 {
1828 jam();
1829 goto missinglog;
1830 }
1831 else
1832 {
1833 goto incomplete_log;
1834 }
1835 }
1836 ndbrequire(false);
1837 }
1838 }
1839 goto start_report;
1840
1841 missinglog:
1842 signal->theData[1] = c_restartPartionedTimeout == (Uint32) ~0 ? 4 : 5;
1843 signal->theData[2] = Uint32((partitionedTimeout - elapsed + 500) / 1000);
1844 report_mask.assign(c_definedNodes);
1845 report_mask.bitANDC(c_start.m_starting_nodes);
1846 retVal = 0;
1847 goto start_report;
1848
1849 start_report:
1850 jam();
1851 {
1852 Uint32 sz = NdbNodeBitmask::Size;
1853 signal->theData[0] = NDB_LE_StartReport;
1854 signal->theData[3] = sz;
1855 Uint32* ptr = signal->theData+4;
1856 c_definedNodes.copyto(sz, ptr); ptr += sz;
1857 c_start.m_starting_nodes.copyto(sz, ptr); ptr += sz;
1858 c_start.m_skip_nodes.copyto(sz, ptr); ptr += sz;
1859 report_mask.copyto(sz, ptr); ptr+= sz;
1860 c_start.m_no_nodegroup_nodes.copyto(sz, ptr); ptr += sz;
1861 sendSignal(CMVMI_REF, GSN_EVENT_REP, signal,
1862 4+5*NdbNodeBitmask::Size, JBB);
1863 }
1864 return retVal;
1865
1866 missing_nodegroup:
1867 jam();
1868 {
1869 char buf[100], mask1[100], mask2[100];
1870 c_start.m_starting_nodes.getText(mask1);
1871 tmp.assign(c_start.m_starting_nodes);
1872 tmp.bitANDC(c_start.m_starting_nodes_w_log);
1873 tmp.getText(mask2);
1874 BaseString::snprintf(buf, sizeof(buf),
1875 "Unable to start missing node group! "
1876 " starting: %s (missing fs for: %s)",
1877 mask1, mask2);
1878 progError(__LINE__, NDBD_EXIT_INSUFFICENT_NODES, buf);
1879 return 0; // Deadcode
1880 }
1881
1882 incomplete_log:
1883 jam();
1884 {
1885 char buf[100], mask1[100];
1886 c_start.m_starting_nodes.getText(mask1);
1887 BaseString::snprintf(buf, sizeof(buf),
1888 "Incomplete log for node group: %d! "
1889 " starting nodes: %s",
1890 incompleteng, mask1);
1891 progError(__LINE__, NDBD_EXIT_INSUFFICENT_NODES, buf);
1892 return 0; // Deadcode
1893 }
1894 }
1895
1896 void
electionWon(Signal * signal)1897 Qmgr::electionWon(Signal* signal)
1898 {
1899 NodeRecPtr myNodePtr;
1900 cpresident = getOwnNodeId(); /* This node becomes president. */
1901 myNodePtr.i = getOwnNodeId();
1902 ptrCheckGuard(myNodePtr, MAX_NDB_NODES, nodeRec);
1903
1904 myNodePtr.p->phase = ZRUNNING;
1905
1906 cpdistref = reference();
1907 cneighbourl = ZNIL;
1908 cneighbourh = ZNIL;
1909 myNodePtr.p->ndynamicId = 1 | (myNodePtr.p->hbOrder << 16);
1910 c_maxDynamicId = 1;
1911 c_clusterNodes.clear();
1912 c_clusterNodes.set(getOwnNodeId());
1913
1914 cpresidentAlive = ZTRUE;
1915 NdbTick_Invalidate(&c_start_election_time);
1916 c_start.reset();
1917
1918 signal->theData[0] = NDB_LE_CM_REGCONF;
1919 signal->theData[1] = getOwnNodeId();
1920 signal->theData[2] = cpresident;
1921 signal->theData[3] = myNodePtr.p->ndynamicId;
1922 sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 4, JBB);
1923
1924 c_start.m_starting_nodes.clear(getOwnNodeId());
1925 if (c_start.m_starting_nodes.isclear())
1926 {
1927 jam();
1928 sendSttorryLab(signal, true);
1929 }
1930 }
1931
1932 /*
1933 4.4.11 CONTINUEB */
1934 /*--------------------------------------------------------------------------*/
1935 /* */
1936 /*--------------------------------------------------------------------------*/
1937 /****************************>---------------------------------------------*/
1938 /* CONTINUEB > SENDER: Own block, Own node */
1939 /****************************>-------+INPUT : TCONTINUEB_TYPE */
1940 /*--------------------------------------------------------------*/
regreqTimeLimitLab(Signal * signal)1941 void Qmgr::regreqTimeLimitLab(Signal* signal)
1942 {
1943 if(cpresident == ZNIL)
1944 {
1945 if (c_start.m_president_candidate == ZNIL)
1946 {
1947 jam();
1948 c_start.m_president_candidate = getOwnNodeId();
1949 }
1950
1951 cmInfoconf010Lab(signal);
1952 }
1953 }//Qmgr::regreqTimelimitLab()
1954
1955 /**---------------------------------------------------------------------------
1956 * The new node will take care of giving information about own node and ask
1957 * all other nodes for nodeinfo. The new node will use CM_NODEINFOREQ for
1958 * that purpose. When the setup of connections to all running, the president
1959 * will send a commit to all running nodes + the new node
1960 * INPUT: NODE_PTR1, must be set as ZNIL if we don't enter CONNECT_NODES)
1961 * from signal CM_NODEINFOCONF.
1962 *---------------------------------------------------------------------------*/
1963 /*******************************/
1964 /* CM_NODEINFOCONF */
1965 /*******************************/
execCM_NODEINFOCONF(Signal * signal)1966 void Qmgr::execCM_NODEINFOCONF(Signal* signal)
1967 {
1968 DEBUG_START3(signal, "");
1969
1970 jamEntry();
1971
1972 CmNodeInfoConf * const conf = (CmNodeInfoConf*)signal->getDataPtr();
1973
1974 const Uint32 nodeId = conf->nodeId;
1975 const Uint32 dynamicId = conf->dynamicId;
1976 const Uint32 version = conf->version;
1977 Uint32 mysql_version = conf->mysql_version;
1978 Uint32 lqh_workers = conf->lqh_workers;
1979 if (version < NDBD_SPLIT_VERSION)
1980 {
1981 jam();
1982 mysql_version = 0;
1983 }
1984 if (version < NDBD_MT_LQH_VERSION)
1985 {
1986 jam();
1987 lqh_workers = 0;
1988 }
1989
1990 NodeRecPtr nodePtr;
1991 nodePtr.i = getOwnNodeId();
1992 ptrAss(nodePtr, nodeRec);
1993 ndbrequire(nodePtr.p->phase == ZSTARTING);
1994 ndbrequire(c_start.m_gsn == GSN_CM_NODEINFOREQ);
1995 c_start.m_nodes.clearWaitingFor(nodeId);
1996
1997 /**
1998 * Update node info
1999 */
2000 NodeRecPtr replyNodePtr;
2001 replyNodePtr.i = nodeId;
2002 ptrCheckGuard(replyNodePtr, MAX_NDB_NODES, nodeRec);
2003 replyNodePtr.p->ndynamicId = dynamicId;
2004 replyNodePtr.p->blockRef = signal->getSendersBlockRef();
2005 setNodeInfo(replyNodePtr.i).m_version = version;
2006 setNodeInfo(replyNodePtr.i).m_mysql_version = mysql_version;
2007 setNodeInfo(replyNodePtr.i).m_lqh_workers = lqh_workers;
2008
2009 recompute_version_info(NodeInfo::DB, version);
2010
2011 if(!c_start.m_nodes.done()){
2012 jam();
2013 return;
2014 }
2015
2016 /**********************************************<*/
2017 /* Send an ack. back to the president. */
2018 /* CM_ACKADD */
2019 /* The new node has been registered by all */
2020 /* running nodes and has stored nodeinfo about */
2021 /* all running nodes. The new node has to wait */
2022 /* for CM_ADD (commit) from president to become */
2023 /* a running node in the cluster. */
2024 /**********************************************<*/
2025 sendCmAckAdd(signal, getOwnNodeId(), CmAdd::Prepare);
2026 return;
2027 }//Qmgr::execCM_NODEINFOCONF()
2028
2029 /**---------------------------------------------------------------------------
2030 * A new node sends nodeinfo about himself. The new node asks for
2031 * corresponding nodeinfo back in the CM_NODEINFOCONF.
2032 *---------------------------------------------------------------------------*/
2033 /*******************************/
2034 /* CM_NODEINFOREQ */
2035 /*******************************/
execCM_NODEINFOREQ(Signal * signal)2036 void Qmgr::execCM_NODEINFOREQ(Signal* signal)
2037 {
2038 jamEntry();
2039
2040 const Uint32 Tblockref = signal->getSendersBlockRef();
2041
2042 NodeRecPtr nodePtr;
2043 nodePtr.i = getOwnNodeId();
2044 ptrAss(nodePtr, nodeRec);
2045 if(nodePtr.p->phase != ZRUNNING){
2046 jam();
2047 signal->theData[0] = reference();
2048 signal->theData[1] = getOwnNodeId();
2049 signal->theData[2] = ZNOT_RUNNING;
2050 sendSignal(Tblockref, GSN_CM_NODEINFOREF, signal, 3, JBB);
2051 return;
2052 }
2053
2054 NodeRecPtr addNodePtr;
2055 CmNodeInfoReq * const req = (CmNodeInfoReq*)signal->getDataPtr();
2056 addNodePtr.i = req->nodeId;
2057 ptrCheckGuard(addNodePtr, MAX_NDB_NODES, nodeRec);
2058 addNodePtr.p->ndynamicId = req->dynamicId;
2059 addNodePtr.p->blockRef = signal->getSendersBlockRef();
2060 setNodeInfo(addNodePtr.i).m_version = req->version;
2061
2062 Uint32 mysql_version = req->mysql_version;
2063 if (req->version < NDBD_SPLIT_VERSION)
2064 mysql_version = 0;
2065 setNodeInfo(addNodePtr.i).m_mysql_version = mysql_version;
2066
2067 Uint32 lqh_workers = req->lqh_workers;
2068 if (req->version < NDBD_MT_LQH_VERSION)
2069 lqh_workers = 0;
2070 setNodeInfo(addNodePtr.i).m_lqh_workers = lqh_workers;
2071
2072 c_maxDynamicId = req->dynamicId & 0xFFFF;
2073
2074 cmAddPrepare(signal, addNodePtr, nodePtr.p);
2075 }//Qmgr::execCM_NODEINFOREQ()
2076
2077 void
cmAddPrepare(Signal * signal,NodeRecPtr nodePtr,const NodeRec * self)2078 Qmgr::cmAddPrepare(Signal* signal, NodeRecPtr nodePtr, const NodeRec * self){
2079 jam();
2080
2081 switch(nodePtr.p->phase){
2082 case ZINIT:
2083 jam();
2084 nodePtr.p->phase = ZSTARTING;
2085 return;
2086 case ZFAIL_CLOSING:
2087 jam();
2088
2089 #if 1
2090 warningEvent("Received request to incorporate node %u, "
2091 "while error handling has not yet completed",
2092 nodePtr.i);
2093
2094 ndbrequire(getOwnNodeId() != cpresident);
2095 ndbrequire(signal->header.theVerId_signalNumber == GSN_CM_ADD);
2096 c_start.m_nodes.clearWaitingFor();
2097 c_start.m_nodes.setWaitingFor(nodePtr.i);
2098 c_start.m_gsn = GSN_CM_NODEINFOCONF;
2099 #else
2100 warningEvent("Enabling communication to CM_ADD node %u state=%d",
2101 nodePtr.i,
2102 nodePtr.p->phase);
2103 nodePtr.p->phase = ZSTARTING;
2104 nodePtr.p->failState = NORMAL;
2105 signal->theData[0] = 0;
2106 signal->theData[1] = nodePtr.i;
2107 sendSignal(TRPMAN_REF, GSN_OPEN_COMORD, signal, 2, JBB);
2108 #endif
2109 return;
2110 case ZSTARTING:
2111 break;
2112 case ZRUNNING:
2113 jam();
2114 case ZPREPARE_FAIL:
2115 jam();
2116 case ZAPI_ACTIVATION_ONGOING:
2117 jam();
2118 case ZAPI_ACTIVE:
2119 jam();
2120 case ZAPI_INACTIVE:
2121 ndbrequire(false);
2122 }
2123
2124 sendCmAckAdd(signal, nodePtr.i, CmAdd::Prepare);
2125 sendApiVersionRep(signal, nodePtr);
2126
2127 /* President have prepared us */
2128 CmNodeInfoConf * conf = (CmNodeInfoConf*)signal->getDataPtrSend();
2129 conf->nodeId = getOwnNodeId();
2130 conf->dynamicId = self->ndynamicId;
2131 conf->version = getNodeInfo(getOwnNodeId()).m_version;
2132 conf->mysql_version = getNodeInfo(getOwnNodeId()).m_mysql_version;
2133 conf->lqh_workers = getNodeInfo(getOwnNodeId()).m_lqh_workers;
2134 sendSignal(nodePtr.p->blockRef, GSN_CM_NODEINFOCONF, signal,
2135 CmNodeInfoConf::SignalLength, JBB);
2136 DEBUG_START(GSN_CM_NODEINFOCONF, refToNode(nodePtr.p->blockRef), "");
2137 }
2138
2139 void
sendApiVersionRep(Signal * signal,NodeRecPtr nodePtr)2140 Qmgr::sendApiVersionRep(Signal* signal, NodeRecPtr nodePtr)
2141 {
2142 if (getNodeInfo(nodePtr.i).m_version >= NDBD_NODE_VERSION_REP)
2143 {
2144 jam();
2145 Uint32 ref = calcQmgrBlockRef(nodePtr.i);
2146 for(Uint32 i = 1; i<MAX_NODES; i++)
2147 {
2148 jam();
2149 Uint32 version = getNodeInfo(i).m_version;
2150 Uint32 type = getNodeInfo(i).m_type;
2151 if (type != NodeInfo::DB && version)
2152 {
2153 jam();
2154 signal->theData[0] = i;
2155 signal->theData[1] = version;
2156 sendSignal(ref, GSN_NODE_VERSION_REP, signal, 2, JBB);
2157 }
2158 }
2159 }
2160 }
2161
2162 void
sendCmAckAdd(Signal * signal,Uint32 nodeId,CmAdd::RequestType type)2163 Qmgr::sendCmAckAdd(Signal * signal, Uint32 nodeId, CmAdd::RequestType type){
2164
2165 CmAckAdd * cmAckAdd = (CmAckAdd*)signal->getDataPtrSend();
2166 cmAckAdd->requestType = type;
2167 cmAckAdd->startingNodeId = nodeId;
2168 cmAckAdd->senderNodeId = getOwnNodeId();
2169 sendSignal(cpdistref, GSN_CM_ACKADD, signal, CmAckAdd::SignalLength, JBA);
2170 DEBUG_START(GSN_CM_ACKADD, cpresident, "");
2171
2172 switch(type){
2173 case CmAdd::Prepare:
2174 return;
2175 case CmAdd::AddCommit:
2176 case CmAdd::CommitNew:
2177 break;
2178 }
2179
2180 signal->theData[0] = nodeId;
2181 EXECUTE_DIRECT(NDBCNTR, GSN_CM_ADD_REP, signal, 1);
2182 jamEntry();
2183 }
2184
2185 /*
2186 4.4.11 CM_ADD */
2187 /**--------------------------------------------------------------------------
2188 * Prepare a running node to add a new node to the cluster. The running node
2189 * will change phase of the new node fron ZINIT to ZWAITING. The running node
2190 * will also mark that we have received a prepare. When the new node has sent
2191 * us nodeinfo we can send an acknowledgement back to the president. When all
2192 * running nodes has acknowledged the new node, the president will send a
2193 * commit and we can change phase of the new node to ZRUNNING. The president
2194 * will also send CM_ADD to himself.
2195 *---------------------------------------------------------------------------*/
2196 /*******************************/
2197 /* CM_ADD */
2198 /*******************************/
execCM_ADD(Signal * signal)2199 void Qmgr::execCM_ADD(Signal* signal)
2200 {
2201 NodeRecPtr addNodePtr;
2202 jamEntry();
2203
2204 NodeRecPtr nodePtr;
2205 nodePtr.i = getOwnNodeId();
2206 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
2207
2208 CRASH_INSERTION(940);
2209
2210 CmAdd * const cmAdd = (CmAdd*)signal->getDataPtr();
2211 const CmAdd::RequestType type = (CmAdd::RequestType)cmAdd->requestType;
2212 addNodePtr.i = cmAdd->startingNodeId;
2213 //const Uint32 startingVersion = cmAdd->startingVersion;
2214 ptrCheckGuard(addNodePtr, MAX_NDB_NODES, nodeRec);
2215
2216 DEBUG_START3(signal, type);
2217
2218 if(nodePtr.p->phase == ZSTARTING){
2219 jam();
2220 /**
2221 * We are joining...
2222 */
2223 ndbrequire(addNodePtr.i == nodePtr.i);
2224 switch(type){
2225 case CmAdd::Prepare:
2226 ndbrequire(c_start.m_gsn == GSN_CM_NODEINFOREQ);
2227 /**
2228 * Wait for CM_NODEINFO_CONF
2229 */
2230 return;
2231 case CmAdd::CommitNew:
2232 /**
2233 * Tata. we're in the cluster
2234 */
2235 joinedCluster(signal, addNodePtr);
2236 return;
2237 case CmAdd::AddCommit:
2238 ndbrequire(false);
2239 }
2240 }
2241
2242 switch (type) {
2243 case CmAdd::Prepare:
2244 cmAddPrepare(signal, addNodePtr, nodePtr.p);
2245 break;
2246 case CmAdd::AddCommit:{
2247 jam();
2248 ndbrequire(addNodePtr.p->phase == ZSTARTING);
2249 addNodePtr.p->phase = ZRUNNING;
2250 m_connectivity_check.reportNodeConnect(addNodePtr.i);
2251 set_hb_count(addNodePtr.i) = 0;
2252 c_clusterNodes.set(addNodePtr.i);
2253 findNeighbours(signal, __LINE__);
2254
2255 /**
2256 * SEND A HEARTBEAT IMMEDIATELY TO DECREASE THE RISK THAT WE MISS EARLY
2257 * HEARTBEATS.
2258 */
2259 sendHeartbeat(signal);
2260 hb_send_timer.reset(NdbTick_getCurrentTicks());
2261
2262 /**
2263 * ENABLE COMMUNICATION WITH ALL BLOCKS WITH THE NEWLY ADDED NODE
2264 */
2265 EnableComReq *enableComReq = (EnableComReq *)signal->getDataPtrSend();
2266 enableComReq->m_senderRef = reference();
2267 enableComReq->m_senderData = ENABLE_COM_CM_ADD_COMMIT;
2268 NodeBitmask::clear(enableComReq->m_nodeIds);
2269 NodeBitmask::set(enableComReq->m_nodeIds, addNodePtr.i);
2270 sendSignal(TRPMAN_REF, GSN_ENABLE_COMREQ, signal,
2271 EnableComReq::SignalLength, JBB);
2272 break;
2273 }
2274 case CmAdd::CommitNew:
2275 jam();
2276 ndbrequire(false);
2277 }
2278
2279 }//Qmgr::execCM_ADD()
2280
2281 void
handleEnableComAddCommit(Signal * signal,Uint32 node)2282 Qmgr::handleEnableComAddCommit(Signal *signal, Uint32 node)
2283 {
2284 sendCmAckAdd(signal, node, CmAdd::AddCommit);
2285 if(getOwnNodeId() != cpresident){
2286 jam();
2287 c_start.reset();
2288 }
2289 }
2290
2291 void
execENABLE_COMCONF(Signal * signal)2292 Qmgr::execENABLE_COMCONF(Signal *signal)
2293 {
2294 const EnableComConf *enableComConf =
2295 (const EnableComConf *)signal->getDataPtr();
2296 Uint32 state = enableComConf->m_senderData;
2297 Uint32 node = NodeBitmask::find(enableComConf->m_nodeIds, 0);
2298
2299 jamEntry();
2300
2301 switch (state)
2302 {
2303 case ENABLE_COM_CM_ADD_COMMIT:
2304 jam();
2305 /* Only exactly one node possible here. */
2306 ndbrequire(node != NodeBitmask::NotFound);
2307 ndbrequire(NodeBitmask::find(enableComConf->m_nodeIds, node + 1) ==
2308 NodeBitmask::NotFound);
2309 handleEnableComAddCommit(signal, node);
2310 break;
2311
2312 case ENABLE_COM_CM_COMMIT_NEW:
2313 jam();
2314 handleEnableComCommitNew(signal);
2315 break;
2316
2317 case ENABLE_COM_API_REGREQ:
2318 jam();
2319 /* Only exactly one node possible here. */
2320 ndbrequire(node != NodeBitmask::NotFound);
2321 ndbrequire(NodeBitmask::find(enableComConf->m_nodeIds, node + 1) ==
2322 NodeBitmask::NotFound);
2323 handleEnableComApiRegreq(signal, node);
2324 break;
2325
2326 default:
2327 jam();
2328 ndbrequire(false);
2329 }
2330 }
2331
2332 void
joinedCluster(Signal * signal,NodeRecPtr nodePtr)2333 Qmgr::joinedCluster(Signal* signal, NodeRecPtr nodePtr){
2334 /**
2335 * WE HAVE BEEN INCLUDED IN THE CLUSTER WE CAN START BEING PART OF THE
2336 * HEARTBEAT PROTOCOL AND WE WILL ALSO ENABLE COMMUNICATION WITH ALL
2337 * NODES IN THE CLUSTER.
2338 */
2339 nodePtr.p->phase = ZRUNNING;
2340 set_hb_count(nodePtr.i) = 0;
2341 findNeighbours(signal, __LINE__);
2342 c_clusterNodes.set(nodePtr.i);
2343 c_start.reset();
2344
2345 /**
2346 * SEND A HEARTBEAT IMMEDIATELY TO DECREASE THE RISK
2347 * THAT WE MISS EARLY HEARTBEATS.
2348 */
2349 sendHeartbeat(signal);
2350 hb_send_timer.reset(NdbTick_getCurrentTicks());
2351
2352 /**
2353 * ENABLE COMMUNICATION WITH ALL BLOCKS IN THE CURRENT CLUSTER AND SET
2354 * THE NODES IN THE CLUSTER TO BE RUNNING.
2355 */
2356 EnableComReq *enableComReq = (EnableComReq *)signal->getDataPtrSend();
2357 enableComReq->m_senderRef = reference();
2358 enableComReq->m_senderData = ENABLE_COM_CM_COMMIT_NEW;
2359 NodeBitmask::clear(enableComReq->m_nodeIds);
2360 jam();
2361 for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
2362 ptrAss(nodePtr, nodeRec);
2363 if ((nodePtr.p->phase == ZRUNNING) && (nodePtr.i != getOwnNodeId())) {
2364 /*-------------------------------------------------------------------*/
2365 // Enable full communication to all other nodes. Not really necessary
2366 // to open communication to ourself.
2367 /*-------------------------------------------------------------------*/
2368 jamLine(nodePtr.i);
2369 NodeBitmask::set(enableComReq->m_nodeIds, nodePtr.i);
2370 }//if
2371 }//for
2372
2373 if (!NodeBitmask::isclear(enableComReq->m_nodeIds))
2374 {
2375 jam();
2376 sendSignal(TRPMAN_REF, GSN_ENABLE_COMREQ, signal,
2377 EnableComReq::SignalLength, JBB);
2378 }
2379 else
2380 {
2381 handleEnableComCommitNew(signal);
2382 }
2383 }
2384
2385 void
handleEnableComCommitNew(Signal * signal)2386 Qmgr::handleEnableComCommitNew(Signal *signal)
2387 {
2388 sendSttorryLab(signal, true);
2389
2390 sendCmAckAdd(signal, getOwnNodeId(), CmAdd::CommitNew);
2391 }
2392
2393 /* 4.10.7 CM_ACKADD - PRESIDENT IS RECEIVER - */
2394 /*---------------------------------------------------------------------------*/
2395 /* Entry point for an ack add signal.
2396 * The TTYPE defines if it is a prepare or a commit. */
2397 /*---------------------------------------------------------------------------*/
execCM_ACKADD(Signal * signal)2398 void Qmgr::execCM_ACKADD(Signal* signal)
2399 {
2400 NodeRecPtr addNodePtr;
2401 NodeRecPtr senderNodePtr;
2402 jamEntry();
2403
2404 CmAckAdd * const cmAckAdd = (CmAckAdd*)signal->getDataPtr();
2405 const CmAdd::RequestType type = (CmAdd::RequestType)cmAckAdd->requestType;
2406 addNodePtr.i = cmAckAdd->startingNodeId;
2407 senderNodePtr.i = cmAckAdd->senderNodeId;
2408
2409 DEBUG_START3(signal, type);
2410
2411 if (cpresident != getOwnNodeId()) {
2412 jam();
2413 /*-----------------------------------------------------------------------*/
2414 /* IF WE ARE NOT PRESIDENT THEN WE SHOULD NOT RECEIVE THIS MESSAGE. */
2415 /*------------------------------------------------------------_----------*/
2416 warningEvent("Received CM_ACKADD from %d president=%d",
2417 senderNodePtr.i, cpresident);
2418 return;
2419 }//if
2420
2421 if (addNodePtr.i != c_start.m_startNode) {
2422 jam();
2423 /*----------------------------------------------------------------------*/
2424 /* THIS IS NOT THE STARTING NODE. WE ARE ACTIVE NOW WITH ANOTHER START. */
2425 /*----------------------------------------------------------------------*/
2426 warningEvent("Received CM_ACKADD from %d with startNode=%d != own %d",
2427 senderNodePtr.i, addNodePtr.i, c_start.m_startNode);
2428 return;
2429 }//if
2430
2431 ndbrequire(c_start.m_gsn == GSN_CM_ADD);
2432 c_start.m_nodes.clearWaitingFor(senderNodePtr.i);
2433 if(!c_start.m_nodes.done()){
2434 jam();
2435 return;
2436 }
2437
2438 switch (type) {
2439 case CmAdd::Prepare:{
2440 jam();
2441
2442 /*----------------------------------------------------------------------*/
2443 /* ALL RUNNING NODES HAVE PREPARED THE INCLUSION OF THIS NEW NODE. */
2444 /*----------------------------------------------------------------------*/
2445 c_start.m_gsn = GSN_CM_ADD;
2446 c_start.m_nodes = c_clusterNodes;
2447
2448 CmAdd * const cmAdd = (CmAdd*)signal->getDataPtrSend();
2449 cmAdd->requestType = CmAdd::AddCommit;
2450 cmAdd->startingNodeId = addNodePtr.i;
2451 cmAdd->startingVersion = getNodeInfo(addNodePtr.i).m_version;
2452 cmAdd->startingMysqlVersion = getNodeInfo(addNodePtr.i).m_mysql_version;
2453 NodeReceiverGroup rg(QMGR, c_clusterNodes);
2454 sendSignal(rg, GSN_CM_ADD, signal, CmAdd::SignalLength, JBA);
2455 DEBUG_START2(GSN_CM_ADD, rg, "AddCommit");
2456 return;
2457 }
2458 case CmAdd::AddCommit:{
2459 jam();
2460
2461 /****************************************/
2462 /* Send commit to the new node so he */
2463 /* will change PHASE into ZRUNNING */
2464 /****************************************/
2465 c_start.m_gsn = GSN_CM_ADD;
2466 c_start.m_nodes.clearWaitingFor();
2467 c_start.m_nodes.setWaitingFor(addNodePtr.i);
2468
2469 CmAdd * const cmAdd = (CmAdd*)signal->getDataPtrSend();
2470 cmAdd->requestType = CmAdd::CommitNew;
2471 cmAdd->startingNodeId = addNodePtr.i;
2472 cmAdd->startingVersion = getNodeInfo(addNodePtr.i).m_version;
2473 cmAdd->startingMysqlVersion = getNodeInfo(addNodePtr.i).m_mysql_version;
2474 sendSignal(calcQmgrBlockRef(addNodePtr.i), GSN_CM_ADD, signal,
2475 CmAdd::SignalLength, JBA);
2476 DEBUG_START(GSN_CM_ADD, addNodePtr.i, "CommitNew");
2477 /**
2478 * Report to DBDIH that a node have been added to the nodes included
2479 * in the heartbeat protocol.
2480 */
2481 InclNodeHBProtocolRep *rep = (InclNodeHBProtocolRep*)signal->getDataPtrSend();
2482 rep->nodeId = addNodePtr.i;
2483 EXECUTE_DIRECT(DBDIH, GSN_INCL_NODE_HB_PROTOCOL_REP, signal,
2484 InclNodeHBProtocolRep::SignalLength);
2485 return;
2486 }
2487 case CmAdd::CommitNew:
2488 jam();
2489 /**
2490 * Tell arbitration about new node.
2491 */
2492 handleArbitNdbAdd(signal, addNodePtr.i);
2493 c_start.reset();
2494
2495 if (c_start.m_starting_nodes.get(addNodePtr.i))
2496 {
2497 jam();
2498 c_start.m_starting_nodes.clear(addNodePtr.i);
2499 if (c_start.m_starting_nodes.isclear())
2500 {
2501 jam();
2502 sendSttorryLab(signal, true);
2503 }
2504 }
2505 return;
2506 }//switch
2507 ndbrequire(false);
2508 }//Qmgr::execCM_ACKADD()
2509
2510 /**-------------------------------------------------------------------------
2511 * WE HAVE BEEN INCLUDED INTO THE CLUSTER. IT IS NOW TIME TO CALCULATE WHICH
2512 * ARE OUR LEFT AND RIGHT NEIGHBOURS FOR THE HEARTBEAT PROTOCOL.
2513 *--------------------------------------------------------------------------*/
findNeighbours(Signal * signal,Uint32 from)2514 void Qmgr::findNeighbours(Signal* signal, Uint32 from)
2515 {
2516 UintR toldLeftNeighbour;
2517 UintR tfnLeftFound;
2518 UintR tfnMaxFound;
2519 UintR tfnMinFound;
2520 UintR tfnRightFound;
2521 NodeRecPtr fnNodePtr;
2522 NodeRecPtr fnOwnNodePtr;
2523
2524 Uint32 toldRightNeighbour = cneighbourh;
2525 toldLeftNeighbour = cneighbourl;
2526 tfnLeftFound = 0;
2527 tfnMaxFound = 0;
2528 tfnMinFound = (UintR)-1;
2529 tfnRightFound = (UintR)-1;
2530 fnOwnNodePtr.i = getOwnNodeId();
2531 ptrCheckGuard(fnOwnNodePtr, MAX_NDB_NODES, nodeRec);
2532 for (fnNodePtr.i = 1; fnNodePtr.i < MAX_NDB_NODES; fnNodePtr.i++) {
2533 ptrAss(fnNodePtr, nodeRec);
2534 if (fnNodePtr.i != fnOwnNodePtr.i) {
2535 jamLine(fnNodePtr.i);
2536 if (fnNodePtr.p->phase == ZRUNNING) {
2537 if (tfnMinFound > fnNodePtr.p->ndynamicId) {
2538 jam();
2539 tfnMinFound = fnNodePtr.p->ndynamicId;
2540 }//if
2541 if (tfnMaxFound < fnNodePtr.p->ndynamicId) {
2542 jam();
2543 tfnMaxFound = fnNodePtr.p->ndynamicId;
2544 }//if
2545 if (fnOwnNodePtr.p->ndynamicId > fnNodePtr.p->ndynamicId) {
2546 jam();
2547 if (fnNodePtr.p->ndynamicId > tfnLeftFound) {
2548 jam();
2549 tfnLeftFound = fnNodePtr.p->ndynamicId;
2550 }//if
2551 } else {
2552 jam();
2553 if (fnNodePtr.p->ndynamicId < tfnRightFound) {
2554 jam();
2555 tfnRightFound = fnNodePtr.p->ndynamicId;
2556 }//if
2557 }//if
2558 }//if
2559 }//if
2560 }//for
2561 if (tfnLeftFound == 0) {
2562 if (tfnMinFound == (UintR)-1) {
2563 jam();
2564 cneighbourl = ZNIL;
2565 } else {
2566 jam();
2567 cneighbourl = translateDynamicIdToNodeId(signal, tfnMaxFound);
2568 }//if
2569 } else {
2570 jam();
2571 cneighbourl = translateDynamicIdToNodeId(signal, tfnLeftFound);
2572 }//if
2573 if (tfnRightFound == (UintR)-1) {
2574 if (tfnMaxFound == 0) {
2575 jam();
2576 cneighbourh = ZNIL;
2577 } else {
2578 jam();
2579 cneighbourh = translateDynamicIdToNodeId(signal, tfnMinFound);
2580 }//if
2581 } else {
2582 jam();
2583 cneighbourh = translateDynamicIdToNodeId(signal, tfnRightFound);
2584 }//if
2585 if (toldLeftNeighbour != cneighbourl) {
2586 jam();
2587 if (cneighbourl != ZNIL) {
2588 jam();
2589 /**-------------------------------------------------------------------*/
2590 /* WE ARE SUPERVISING A NEW LEFT NEIGHBOUR. WE START WITH ALARM COUNT
2591 * EQUAL TO ZERO.
2592 *---------------------------------------------------------------------*/
2593 fnNodePtr.i = cneighbourl;
2594 ptrCheckGuard(fnNodePtr, MAX_NDB_NODES, nodeRec);
2595 set_hb_count(fnNodePtr.i) = 0;
2596 }//if
2597 }//if
2598
2599 signal->theData[0] = NDB_LE_FIND_NEIGHBOURS;
2600 signal->theData[1] = getOwnNodeId();
2601 signal->theData[2] = cneighbourl;
2602 signal->theData[3] = cneighbourh;
2603 signal->theData[4] = fnOwnNodePtr.p->ndynamicId;
2604 UintR Tlen = 5;
2605 sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, Tlen, JBB);
2606 g_eventLogger->info("findNeighbours from: %u old (left: %u right: %u) new (%u %u)",
2607 from,
2608 toldLeftNeighbour,
2609 toldRightNeighbour,
2610 cneighbourl,
2611 cneighbourh);
2612 }//Qmgr::findNeighbours()
2613
2614 /*
2615 4.10.7 INIT_DATA */
2616 /*---------------------------------------------------------------------------*/
2617 /*---------------------------------------------------------------------------*/
initData(Signal * signal)2618 void Qmgr::initData(Signal* signal)
2619 {
2620 // catch-all for missing initializations
2621 memset(&arbitRec, 0, sizeof(arbitRec));
2622
2623 /**
2624 * Timeouts
2625 */
2626 const ndb_mgm_configuration_iterator * p =
2627 m_ctx.m_config.getOwnConfigIterator();
2628 ndbrequire(p != 0);
2629
2630 Uint32 hbDBDB = 1500;
2631 Uint32 arbitTimeout = 1000;
2632 Uint32 arbitMethod = ARBIT_METHOD_DEFAULT;
2633 Uint32 ccInterval = 0;
2634 c_restartPartialTimeout = 30000;
2635 c_restartPartionedTimeout = 60000;
2636 c_restartFailureTimeout = ~0;
2637 c_restartNoNodegroupTimeout = 15000;
2638 ndb_mgm_get_int_parameter(p, CFG_DB_HEARTBEAT_INTERVAL, &hbDBDB);
2639 ndb_mgm_get_int_parameter(p, CFG_DB_ARBIT_TIMEOUT, &arbitTimeout);
2640 ndb_mgm_get_int_parameter(p, CFG_DB_ARBIT_METHOD, &arbitMethod);
2641 ndb_mgm_get_int_parameter(p, CFG_DB_START_PARTIAL_TIMEOUT,
2642 &c_restartPartialTimeout);
2643 ndb_mgm_get_int_parameter(p, CFG_DB_START_PARTITION_TIMEOUT,
2644 &c_restartPartionedTimeout);
2645 ndb_mgm_get_int_parameter(p, CFG_DB_START_NO_NODEGROUP_TIMEOUT,
2646 &c_restartNoNodegroupTimeout);
2647 ndb_mgm_get_int_parameter(p, CFG_DB_START_FAILURE_TIMEOUT,
2648 &c_restartFailureTimeout);
2649 ndb_mgm_get_int_parameter(p, CFG_DB_CONNECT_CHECK_DELAY,
2650 &ccInterval);
2651
2652 if(c_restartPartialTimeout == 0)
2653 {
2654 c_restartPartialTimeout = ~0;
2655 }
2656
2657 if (c_restartPartionedTimeout ==0)
2658 {
2659 c_restartPartionedTimeout = ~0;
2660 }
2661
2662 if (c_restartFailureTimeout == 0)
2663 {
2664 c_restartFailureTimeout = ~0;
2665 }
2666
2667 if (c_restartNoNodegroupTimeout == 0)
2668 {
2669 c_restartNoNodegroupTimeout = ~0;
2670 }
2671
2672 setHbDelay(hbDBDB);
2673 setCCDelay(ccInterval);
2674 setArbitTimeout(arbitTimeout);
2675
2676 arbitRec.method = (ArbitRec::Method)arbitMethod;
2677 arbitRec.state = ARBIT_NULL; // start state for all nodes
2678 arbitRec.apiMask[0].clear(); // prepare for ARBIT_CFG
2679
2680 Uint32 sum = 0;
2681 ArbitSignalData* const sd = (ArbitSignalData*)&signal->theData[0];
2682 for (unsigned rank = 1; rank <= 2; rank++) {
2683 sd->sender = getOwnNodeId();
2684 sd->code = rank;
2685 sd->node = 0;
2686 sd->ticket.clear();
2687 sd->mask.clear();
2688 ndb_mgm_configuration_iterator * iter =
2689 m_ctx.m_config.getClusterConfigIterator();
2690 for (ndb_mgm_first(iter); ndb_mgm_valid(iter); ndb_mgm_next(iter)) {
2691 Uint32 tmp = 0;
2692 if (ndb_mgm_get_int_parameter(iter, CFG_NODE_ARBIT_RANK, &tmp) == 0 &&
2693 tmp == rank){
2694 Uint32 nodeId = 0;
2695 ndbrequire(!ndb_mgm_get_int_parameter(iter, CFG_NODE_ID, &nodeId));
2696 sd->mask.set(nodeId);
2697 }
2698 }
2699 sum += sd->mask.count();
2700 execARBIT_CFG(signal);
2701 }
2702
2703 if (arbitRec.method == ArbitRec::METHOD_DEFAULT &&
2704 sum == 0)
2705 {
2706 jam();
2707 infoEvent("Arbitration disabled, all API nodes have rank 0");
2708 arbitRec.method = ArbitRec::DISABLED;
2709 }
2710
2711 setNodeInfo(getOwnNodeId()).m_mysql_version = NDB_MYSQL_VERSION_D;
2712
2713 ndb_mgm_configuration_iterator * iter =
2714 m_ctx.m_config.getClusterConfigIterator();
2715 for (ndb_mgm_first(iter); ndb_mgm_valid(iter); ndb_mgm_next(iter))
2716 {
2717 jam();
2718 Uint32 nodeId = 0;
2719 if (ndb_mgm_get_int_parameter(iter, CFG_NODE_ID, &nodeId) == 0)
2720 {
2721 jam();
2722 if (nodeId < MAX_NDB_NODES && getNodeInfo(nodeId).m_type == NodeInfo::DB)
2723 {
2724 Uint32 hbOrder = 0;
2725 ndb_mgm_get_int_parameter(iter, CFG_DB_HB_ORDER, &hbOrder);
2726
2727 NodeRecPtr nodePtr;
2728 nodePtr.i = nodeId;
2729 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
2730 nodePtr.p->hbOrder = hbOrder;
2731 }
2732 }
2733 }
2734 int hb_order_error = check_hb_order_config();
2735 if (hb_order_error == -1)
2736 {
2737 char msg[] = "Illegal HeartbeatOrder config, "
2738 "all nodes must have non-zero config value";
2739 progError(__LINE__, NDBD_EXIT_INVALID_CONFIG, msg);
2740 return;
2741 }
2742 if (hb_order_error == -2)
2743 {
2744 char msg[] = "Illegal HeartbeatOrder config, "
2745 "the nodes must have distinct config values";
2746 progError(__LINE__, NDBD_EXIT_INVALID_CONFIG, msg);
2747 return;
2748 }
2749 ndbrequire(hb_order_error == 0);
2750 }//Qmgr::initData()
2751
2752
2753 /**---------------------------------------------------------------------------
2754 * HERE WE RECEIVE THE JOB TABLE SIGNAL EVERY 10 MILLISECONDS.
2755 * WE WILL USE THIS TO CHECK IF IT IS TIME TO CHECK THE NEIGHBOUR NODE.
2756 * WE WILL ALSO SEND A SIGNAL TO BLOCKS THAT NEED A TIME SIGNAL AND
2757 * DO NOT WANT TO USE JOB TABLE SIGNALS.
2758 *---------------------------------------------------------------------------*/
timerHandlingLab(Signal * signal)2759 void Qmgr::timerHandlingLab(Signal* signal)
2760 {
2761 const NDB_TICKS TcurrentTime = NdbTick_getCurrentTicks();
2762 NodeRecPtr myNodePtr;
2763 myNodePtr.i = getOwnNodeId();
2764 ptrCheckGuard(myNodePtr, MAX_NDB_NODES, nodeRec);
2765
2766 const Uint32 sentHi = signal->theData[1];
2767 const Uint32 sentLo = signal->theData[2];
2768 const NDB_TICKS sent((Uint64(sentHi) << 32) | sentLo);
2769
2770 if (NdbTick_Compare(sent,TcurrentTime) > 0)
2771 {
2772 jam();
2773 const Uint64 backwards = NdbTick_Elapsed(TcurrentTime,sent).milliSec();
2774 if (backwards > 0) //Ignore sub millisecond backticks
2775 {
2776 g_eventLogger->warning("timerHandlingLab, clock ticked backwards: %llu (ms)",
2777 backwards);
2778 }
2779 }
2780 else
2781 {
2782 const Uint64 elapsed = NdbTick_Elapsed(sent,TcurrentTime).milliSec();
2783 if (elapsed >= 1000)
2784 {
2785 jam();
2786 g_eventLogger->warning("timerHandlingLab, expected 10ms sleep"
2787 ", not scheduled for: %d (ms)", int(elapsed));
2788 }
2789 else if (elapsed >= 150)
2790 {
2791 g_eventLogger->info("timerHandlingLab, expected 10ms sleep"
2792 ", not scheduled for: %d (ms)", int(elapsed));
2793 }
2794 }
2795
2796 if (myNodePtr.p->phase == ZRUNNING) {
2797 jam();
2798 /**---------------------------------------------------------------------
2799 * WE ARE ONLY PART OF HEARTBEAT CLUSTER IF WE ARE UP AND RUNNING.
2800 *---------------------------------------------------------------------*/
2801 if (hb_send_timer.check(TcurrentTime)) {
2802 jam();
2803 sendHeartbeat(signal);
2804 hb_send_timer.reset(TcurrentTime);
2805 }
2806 if (likely(! m_connectivity_check.m_active))
2807 {
2808 if (hb_check_timer.check(TcurrentTime)) {
2809 jam();
2810 checkHeartbeat(signal);
2811 hb_check_timer.reset(TcurrentTime);
2812 }
2813 }
2814 else
2815 {
2816 /* Connectivity check */
2817 if (m_connectivity_check.m_timer.check(TcurrentTime)) {
2818 jam();
2819 checkConnectivityTimeSignal(signal);
2820 m_connectivity_check.m_timer.reset(TcurrentTime);
2821 }
2822 }
2823 }
2824
2825 if (interface_check_timer.check(TcurrentTime)) {
2826 jam();
2827 interface_check_timer.reset(TcurrentTime);
2828 checkStartInterface(signal, TcurrentTime);
2829 }
2830
2831 if (hb_api_timer.check(TcurrentTime))
2832 {
2833 jam();
2834 hb_api_timer.reset(TcurrentTime);
2835 apiHbHandlingLab(signal, TcurrentTime);
2836 }
2837
2838 //--------------------------------------------------
2839 // Resend this signal with 10 milliseconds delay.
2840 //--------------------------------------------------
2841 signal->theData[0] = ZTIMER_HANDLING;
2842 signal->theData[1] = Uint32(TcurrentTime.getUint64() >> 32);
2843 signal->theData[2] = Uint32(TcurrentTime.getUint64());
2844 sendSignalWithDelay(QMGR_REF, GSN_CONTINUEB, signal, 10, 3);
2845 return;
2846 }//Qmgr::timerHandlingLab()
2847
2848 /*---------------------------------------------------------------------------*/
2849 /* THIS MODULE HANDLES THE SENDING AND RECEIVING OF HEARTBEATS. */
2850 /*---------------------------------------------------------------------------*/
sendHeartbeat(Signal * signal)2851 void Qmgr::sendHeartbeat(Signal* signal)
2852 {
2853 NodeRecPtr localNodePtr;
2854 localNodePtr.i = cneighbourh;
2855 if (localNodePtr.i == ZNIL) {
2856 jam();
2857 /**---------------------------------------------------------------------
2858 * THERE ARE NO NEIGHBOURS. THIS IS POSSIBLE IF WE ARE THE ONLY NODE IN
2859 * THE CLUSTER.IN THIS CASE WE DO NOT NEED TO SEND ANY HEARTBEAT SIGNALS.
2860 *-----------------------------------------------------------------------*/
2861 return;
2862 }//if
2863 ptrCheckGuard(localNodePtr, MAX_NDB_NODES, nodeRec);
2864 signal->theData[0] = getOwnNodeId();
2865
2866 sendSignal(localNodePtr.p->blockRef, GSN_CM_HEARTBEAT, signal, 1, JBA);
2867 #ifdef VM_TRACE
2868 signal->theData[0] = NDB_LE_SentHeartbeat;
2869 signal->theData[1] = localNodePtr.i;
2870 sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB);
2871 #endif
2872 }//Qmgr::sendHeartbeat()
2873
checkHeartbeat(Signal * signal)2874 void Qmgr::checkHeartbeat(Signal* signal)
2875 {
2876 NodeRecPtr nodePtr;
2877
2878 nodePtr.i = cneighbourl;
2879 if (nodePtr.i == ZNIL) {
2880 jam();
2881 /**---------------------------------------------------------------------
2882 * THERE ARE NO NEIGHBOURS. THIS IS POSSIBLE IF WE ARE THE ONLY NODE IN
2883 * THE CLUSTER. IN THIS CASE WE DO NOT NEED TO CHECK ANY HEARTBEATS.
2884 *-----------------------------------------------------------------------*/
2885 return;
2886 }//if
2887 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
2888
2889 set_hb_count(nodePtr.i)++;
2890 ndbrequire(nodePtr.p->phase == ZRUNNING);
2891 ndbrequire(getNodeInfo(nodePtr.i).m_type == NodeInfo::DB);
2892
2893 if (get_hb_count(nodePtr.i) > 2)
2894 {
2895 signal->theData[0] = NDB_LE_MissedHeartbeat;
2896 signal->theData[1] = nodePtr.i;
2897 signal->theData[2] = get_hb_count(nodePtr.i) - 1;
2898 sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 3, JBB);
2899 }
2900
2901 if (get_hb_count(nodePtr.i) > 4)
2902 {
2903 jam();
2904 if (m_connectivity_check.getEnabled())
2905 {
2906 jam();
2907 /* Start connectivity check, indicating the cause */
2908 startConnectivityCheck(signal, FailRep::ZHEARTBEAT_FAILURE, nodePtr.i);
2909 return;
2910 }
2911 else
2912 {
2913 /**----------------------------------------------------------------------
2914 * OUR LEFT NEIGHBOUR HAVE KEPT QUIET FOR THREE CONSECUTIVE HEARTBEAT
2915 * PERIODS. THUS WE DECLARE HIM DOWN.
2916 *----------------------------------------------------------------------*/
2917 signal->theData[0] = NDB_LE_DeadDueToHeartbeat;
2918 signal->theData[1] = nodePtr.i;
2919 sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB);
2920
2921 failReportLab(signal, nodePtr.i, FailRep::ZHEARTBEAT_FAILURE, getOwnNodeId());
2922 return;
2923 }
2924 }//if
2925 }//Qmgr::checkHeartbeat()
2926
apiHbHandlingLab(Signal * signal,NDB_TICKS now)2927 void Qmgr::apiHbHandlingLab(Signal* signal, NDB_TICKS now)
2928 {
2929 NodeRecPtr TnodePtr;
2930
2931 jam();
2932 for (TnodePtr.i = 1; TnodePtr.i < MAX_NODES; TnodePtr.i++) {
2933 const Uint32 nodeId = TnodePtr.i;
2934 ptrAss(TnodePtr, nodeRec);
2935
2936 const NodeInfo::NodeType type = getNodeInfo(nodeId).getType();
2937 if(type == NodeInfo::DB)
2938 continue;
2939
2940 if(type == NodeInfo::INVALID)
2941 continue;
2942
2943 if (c_connectedNodes.get(nodeId))
2944 {
2945 jamLine(nodeId);
2946 set_hb_count(TnodePtr.i)++;
2947
2948 if (get_hb_count(TnodePtr.i) > 2)
2949 {
2950 signal->theData[0] = NDB_LE_MissedHeartbeat;
2951 signal->theData[1] = nodeId;
2952 signal->theData[2] = get_hb_count(TnodePtr.i) - 1;
2953 sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 3, JBB);
2954 }
2955
2956 if (get_hb_count(TnodePtr.i) > 4)
2957 {
2958 jam();
2959 /*------------------------------------------------------------------*/
2960 /* THE API NODE HAS NOT SENT ANY HEARTBEAT FOR THREE SECONDS.
2961 * WE WILL DISCONNECT FROM IT NOW.
2962 *------------------------------------------------------------------*/
2963 /*------------------------------------------------------------------*/
2964 /* We call node_failed to release all connections for this api node */
2965 /*------------------------------------------------------------------*/
2966 signal->theData[0] = NDB_LE_DeadDueToHeartbeat;
2967 signal->theData[1] = nodeId;
2968 sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB);
2969
2970 api_failed(signal, nodeId);
2971 }//if
2972 }//if
2973 else if (TnodePtr.p->phase == ZAPI_INACTIVE &&
2974 TnodePtr.p->m_secret != 0 &&
2975 NdbTick_Compare(now,TnodePtr.p->m_alloc_timeout) > 0)
2976 {
2977 jam();
2978 TnodePtr.p->m_secret = 0;
2979 warningEvent("Releasing node id allocation for node %u",
2980 TnodePtr.i);
2981 }
2982 }//for
2983 return;
2984 }//Qmgr::apiHbHandlingLab()
2985
checkStartInterface(Signal * signal,NDB_TICKS now)2986 void Qmgr::checkStartInterface(Signal* signal, NDB_TICKS now)
2987 {
2988 NodeRecPtr nodePtr;
2989 /*------------------------------------------------------------------------*/
2990 // This method is called once per second. After a disconnect we wait at
2991 // least three seconds before allowing new connects. We will also ensure
2992 // that handling of the failure is completed before we allow new connections.
2993 /*------------------------------------------------------------------------*/
2994 jam();
2995 for (nodePtr.i = 1; nodePtr.i < MAX_NODES; nodePtr.i++) {
2996 ptrAss(nodePtr, nodeRec);
2997 Uint32 type = getNodeInfo(nodePtr.i).m_type;
2998 if (nodePtr.p->phase == ZFAIL_CLOSING) {
2999 jamLine(nodePtr.i);
3000 set_hb_count(nodePtr.i)++;
3001 if (c_connectedNodes.get(nodePtr.i)){
3002 jam();
3003 /*-------------------------------------------------------------------*/
3004 // We need to ensure that the connection is not restored until it has
3005 // been disconnected for at least three seconds.
3006 /*-------------------------------------------------------------------*/
3007 set_hb_count(nodePtr.i) = 0;
3008 }//if
3009 if ((get_hb_count(nodePtr.i) > 3)
3010 && (nodePtr.p->failState == NORMAL)) {
3011 /**------------------------------------------------------------------
3012 * WE HAVE DISCONNECTED THREE SECONDS AGO. WE ARE NOW READY TO
3013 * CONNECT AGAIN AND ACCEPT NEW REGISTRATIONS FROM THIS NODE.
3014 * WE WILL NOT ALLOW CONNECTIONS OF API NODES UNTIL API FAIL HANDLING
3015 * IS COMPLETE.
3016 *-------------------------------------------------------------------*/
3017 nodePtr.p->failState = NORMAL;
3018 nodePtr.p->m_secret = 0;
3019 switch(type){
3020 case NodeInfo::DB:
3021 jam();
3022 nodePtr.p->phase = ZINIT;
3023 break;
3024 case NodeInfo::MGM:
3025 jam();
3026 nodePtr.p->phase = ZAPI_INACTIVE;
3027 break;
3028 case NodeInfo::API:
3029 jam();
3030 if (c_allow_api_connect)
3031 {
3032 jam();
3033 nodePtr.p->phase = ZAPI_INACTIVE;
3034 break;
3035 }
3036 else
3037 {
3038 /**
3039 * Dont allow API node to connect before c_allow_api_connect
3040 */
3041 jam();
3042 set_hb_count(nodePtr.i) = 3;
3043 continue;
3044 }
3045 }
3046
3047 set_hb_count(nodePtr.i) = 0;
3048 signal->theData[0] = 0;
3049 signal->theData[1] = nodePtr.i;
3050 sendSignal(TRPMAN_REF, GSN_OPEN_COMORD, signal, 2, JBB);
3051 }
3052 else
3053 {
3054 jam();
3055 if(((get_hb_count(nodePtr.i) + 1) % 60) == 0)
3056 {
3057 jam();
3058 char buf[256];
3059 if (getNodeInfo(nodePtr.i).m_type == NodeInfo::DB)
3060 {
3061 jam();
3062 BaseString::snprintf(buf, sizeof(buf),
3063 "Failure handling of node %d has not completed"
3064 " in %d min - state = %d",
3065 nodePtr.i,
3066 (get_hb_count(nodePtr.i)+1)/60,
3067 nodePtr.p->failState);
3068 warningEvent("%s", buf);
3069 if (((get_hb_count(nodePtr.i) + 1) % 300) == 0)
3070 {
3071 jam();
3072 /**
3073 * Also dump DIH nf-state
3074 */
3075 signal->theData[0] = DumpStateOrd::DihTcSumaNodeFailCompleted;
3076 signal->theData[1] = nodePtr.i;
3077 sendSignal(DBDIH_REF, GSN_DUMP_STATE_ORD, signal, 2, JBB);
3078 }
3079 }
3080 else
3081 {
3082 jam();
3083 BaseString::snprintf(buf, sizeof(buf),
3084 "Failure handling of api %u has not completed"
3085 " in %d min - state = %d",
3086 nodePtr.i,
3087 (get_hb_count(nodePtr.i)+1)/60,
3088 nodePtr.p->failState);
3089 warningEvent("%s", buf);
3090 if (nodePtr.p->failState == WAITING_FOR_API_FAILCONF)
3091 {
3092 jam();
3093 compile_time_assert(NDB_ARRAY_SIZE(nodePtr.p->m_failconf_blocks) == 5);
3094 BaseString::snprintf(buf, sizeof(buf),
3095 " Waiting for blocks: %u %u %u %u %u",
3096 nodePtr.p->m_failconf_blocks[0],
3097 nodePtr.p->m_failconf_blocks[1],
3098 nodePtr.p->m_failconf_blocks[2],
3099 nodePtr.p->m_failconf_blocks[3],
3100 nodePtr.p->m_failconf_blocks[4]);
3101 warningEvent("%s", buf);
3102 }
3103 }
3104 }
3105 }
3106 }
3107 else if (type == NodeInfo::DB && nodePtr.p->phase == ZINIT &&
3108 nodePtr.p->m_secret != 0 &&
3109 NdbTick_Compare(now,nodePtr.p->m_alloc_timeout) > 0)
3110 {
3111 jam();
3112 nodePtr.p->m_secret = 0;
3113 warningEvent("Releasing node id allocation for node %u",
3114 nodePtr.i);
3115 }
3116 }//for
3117 return;
3118 }//Qmgr::checkStartInterface()
3119
3120 /**-------------------------------------------------------------------------
3121 * This method is called when a DISCONNECT_REP signal arrived which means that
3122 * the API node is gone and we want to release resources in TC/DICT blocks.
3123 *---------------------------------------------------------------------------*/
sendApiFailReq(Signal * signal,Uint16 failedNodeNo,bool sumaOnly)3124 void Qmgr::sendApiFailReq(Signal* signal, Uint16 failedNodeNo, bool sumaOnly)
3125 {
3126 jamEntry();
3127 signal->theData[0] = failedNodeNo;
3128 signal->theData[1] = QMGR_REF;
3129
3130 /* We route the ApiFailReq signals via CMVMI
3131 * This is done to ensure that they are received after
3132 * any pending signals from the failed Api node when
3133 * running ndbmtd, as these signals would be enqueued from
3134 * the thread running CMVMI
3135 */
3136 Uint32 routedSignalSectionI = RNIL;
3137 ndbrequire(appendToSection(routedSignalSectionI,
3138 &signal->theData[0],
3139 2));
3140 SectionHandle handle(this, routedSignalSectionI);
3141
3142 /* RouteOrd data */
3143 RouteOrd* routeOrd = (RouteOrd*) &signal->theData[0];
3144 routeOrd->srcRef = reference();
3145 routeOrd->gsn = GSN_API_FAILREQ;
3146 routeOrd->from = failedNodeNo;
3147
3148 NodeRecPtr failedNodePtr;
3149 failedNodePtr.i = failedNodeNo;
3150 ptrCheckGuard(failedNodePtr, MAX_NODES, nodeRec);
3151 failedNodePtr.p->failState = WAITING_FOR_API_FAILCONF;
3152
3153
3154 /* Send ROUTE_ORD signals to CMVMI via JBA
3155 * CMVMI will then immediately send the API_FAILREQ
3156 * signals to the destination block(s) using JBB
3157 * These API_FAILREQ signals will be sent *after*
3158 * any JBB signals enqueued from the failed API
3159 * by the CMVMI thread.
3160 */
3161 if (!sumaOnly)
3162 {
3163 jam();
3164 add_failconf_block(failedNodePtr, DBTC);
3165 routeOrd->dstRef = DBTC_REF;
3166 sendSignalNoRelease(TRPMAN_REF, GSN_ROUTE_ORD, signal,
3167 RouteOrd::SignalLength,
3168 JBA, &handle);
3169
3170 add_failconf_block(failedNodePtr, DBDICT);
3171 routeOrd->dstRef = DBDICT_REF;
3172 sendSignalNoRelease(TRPMAN_REF, GSN_ROUTE_ORD, signal,
3173 RouteOrd::SignalLength,
3174 JBA, &handle);
3175
3176 add_failconf_block(failedNodePtr, DBSPJ);
3177 routeOrd->dstRef = DBSPJ_REF;
3178 sendSignalNoRelease(TRPMAN_REF, GSN_ROUTE_ORD, signal,
3179 RouteOrd::SignalLength,
3180 JBA, &handle);
3181 }
3182
3183 /* Suma always notified */
3184 add_failconf_block(failedNodePtr, SUMA);
3185 routeOrd->dstRef = SUMA_REF;
3186 sendSignal(TRPMAN_REF, GSN_ROUTE_ORD, signal,
3187 RouteOrd::SignalLength,
3188 JBA, &handle);
3189 }//Qmgr::sendApiFailReq()
3190
execAPI_FAILREQ(Signal * signal)3191 void Qmgr::execAPI_FAILREQ(Signal* signal)
3192 {
3193 jamEntry();
3194 NodeRecPtr failedNodePtr;
3195 failedNodePtr.i = signal->theData[0];
3196 // signal->theData[1] == QMGR_REF
3197 ptrCheckGuard(failedNodePtr, MAX_NODES, nodeRec);
3198
3199 ndbrequire(getNodeInfo(failedNodePtr.i).getType() != NodeInfo::DB);
3200
3201 api_failed(signal, signal->theData[0]);
3202 }
3203
execAPI_FAILCONF(Signal * signal)3204 void Qmgr::execAPI_FAILCONF(Signal* signal)
3205 {
3206 NodeRecPtr failedNodePtr;
3207
3208 jamEntry();
3209 failedNodePtr.i = signal->theData[0];
3210 ptrCheckGuard(failedNodePtr, MAX_NODES, nodeRec);
3211
3212 Uint32 block = refToMain(signal->theData[1]);
3213 if (failedNodePtr.p->failState != WAITING_FOR_API_FAILCONF ||
3214 !remove_failconf_block(failedNodePtr, block))
3215 {
3216 jam();
3217 ndbout << "execAPI_FAILCONF from " << block
3218 << " failedNodePtr.p->failState = "
3219 << (Uint32)(failedNodePtr.p->failState)
3220 << " blocks: ";
3221 for (Uint32 i = 0;i<NDB_ARRAY_SIZE(failedNodePtr.p->m_failconf_blocks);i++)
3222 {
3223 printf("%u ", failedNodePtr.p->m_failconf_blocks[i]);
3224 }
3225 ndbout << endl;
3226 systemErrorLab(signal, __LINE__);
3227 }//if
3228
3229 if (is_empty_failconf_block(failedNodePtr))
3230 {
3231 jam();
3232 /**
3233 * When we set this state, connection will later be opened
3234 * in checkStartInterface
3235 */
3236 failedNodePtr.p->failState = NORMAL;
3237
3238 /**
3239 * Reset m_version only after all blocks has responded with API_FAILCONF
3240 * so that no block risks reading 0 as node-version
3241 */
3242 setNodeInfo(failedNodePtr.i).m_version = 0;
3243 recompute_version_info(getNodeInfo(failedNodePtr.i).m_type);
3244 }
3245 return;
3246 }//Qmgr::execAPI_FAILCONF()
3247
3248 void
add_failconf_block(NodeRecPtr nodePtr,Uint32 block)3249 Qmgr::add_failconf_block(NodeRecPtr nodePtr, Uint32 block)
3250 {
3251 // Check that it does not already exists!!
3252 Uint32 pos = 0;
3253 for (; pos < NDB_ARRAY_SIZE(nodePtr.p->m_failconf_blocks); pos++)
3254 {
3255 jam();
3256 if (nodePtr.p->m_failconf_blocks[pos] == 0)
3257 {
3258 jam();
3259 break;
3260 }
3261 else if (nodePtr.p->m_failconf_blocks[pos] == block)
3262 {
3263 jam();
3264 break;
3265 }
3266 }
3267
3268 ndbrequire(pos != NDB_ARRAY_SIZE(nodePtr.p->m_failconf_blocks));
3269 ndbassert(nodePtr.p->m_failconf_blocks[pos] != block);
3270 if (nodePtr.p->m_failconf_blocks[pos] == block)
3271 {
3272 jam();
3273 /**
3274 * Already in list!!
3275 */
3276 #ifdef ERROR_INSERT
3277 ndbrequire(false);
3278 #endif
3279 return;
3280 }
3281 ndbrequire(nodePtr.p->m_failconf_blocks[pos] == 0);
3282 nodePtr.p->m_failconf_blocks[pos] = block;
3283 }
3284
3285 bool
remove_failconf_block(NodeRecPtr nodePtr,Uint32 block)3286 Qmgr::remove_failconf_block(NodeRecPtr nodePtr, Uint32 block)
3287 {
3288 // Check that it does exists!!
3289 Uint32 pos = 0;
3290 for (; pos < NDB_ARRAY_SIZE(nodePtr.p->m_failconf_blocks); pos++)
3291 {
3292 jam();
3293 if (nodePtr.p->m_failconf_blocks[pos] == 0)
3294 {
3295 jam();
3296 break;
3297 }
3298 else if (nodePtr.p->m_failconf_blocks[pos] == block)
3299 {
3300 jam();
3301 break;
3302 }
3303 }
3304
3305 if (pos == NDB_ARRAY_SIZE(nodePtr.p->m_failconf_blocks) ||
3306 nodePtr.p->m_failconf_blocks[pos] != block)
3307 {
3308 jam();
3309 /**
3310 * Not found!!
3311 */
3312 return false;
3313 }
3314
3315 nodePtr.p->m_failconf_blocks[pos] = 0;
3316 for (pos++; pos < NDB_ARRAY_SIZE(nodePtr.p->m_failconf_blocks); pos++)
3317 {
3318 jam();
3319 nodePtr.p->m_failconf_blocks[pos - 1] = nodePtr.p->m_failconf_blocks[pos];
3320 }
3321
3322 return true;
3323 }
3324
3325 bool
is_empty_failconf_block(NodeRecPtr nodePtr) const3326 Qmgr::is_empty_failconf_block(NodeRecPtr nodePtr) const
3327 {
3328 return nodePtr.p->m_failconf_blocks[0] == 0;
3329 }
3330
execNDB_FAILCONF(Signal * signal)3331 void Qmgr::execNDB_FAILCONF(Signal* signal)
3332 {
3333 NodeRecPtr failedNodePtr;
3334 NodeRecPtr nodePtr;
3335
3336 jamEntry();
3337 failedNodePtr.i = signal->theData[0];
3338
3339 if (ERROR_INSERTED(930))
3340 {
3341 CLEAR_ERROR_INSERT_VALUE;
3342 infoEvent("Discarding NDB_FAILCONF for %u", failedNodePtr.i);
3343 return;
3344 }
3345
3346 ptrCheckGuard(failedNodePtr, MAX_NDB_NODES, nodeRec);
3347 if (failedNodePtr.p->failState == WAITING_FOR_NDB_FAILCONF)
3348 {
3349 g_eventLogger->info("Node %u has completed node fail handling",
3350 failedNodePtr.i);
3351 failedNodePtr.p->failState = NORMAL;
3352 }
3353 else
3354 {
3355 jam();
3356
3357 char buf[100];
3358 BaseString::snprintf(buf, 100,
3359 "Received NDB_FAILCONF for node %u with state: %d %d",
3360 failedNodePtr.i,
3361 failedNodePtr.p->phase,
3362 failedNodePtr.p->failState);
3363 progError(__LINE__, 0, buf);
3364 systemErrorLab(signal, __LINE__);
3365 }//if
3366
3367 if (cpresident == getOwnNodeId())
3368 {
3369 jam();
3370
3371 CRASH_INSERTION(936);
3372 }
3373
3374 /**
3375 * Reset node version only after all blocks has handled the failure
3376 * so that no block risks reading 0 as node version
3377 */
3378 setNodeInfo(failedNodePtr.i).m_version = 0;
3379 recompute_version_info(NodeInfo::DB);
3380
3381 /**
3382 * Prepare a NFCompleteRep and send to all connected API's
3383 * They can then abort all transaction waiting for response from
3384 * the failed node
3385 *
3386 * NOTE: This is sent from all nodes, as otherwise we would need
3387 * take-over if cpresident dies befor sending this
3388 */
3389 NFCompleteRep * const nfComp = (NFCompleteRep *)&signal->theData[0];
3390 nfComp->blockNo = QMGR_REF;
3391 nfComp->nodeId = getOwnNodeId();
3392 nfComp->failedNodeId = failedNodePtr.i;
3393
3394 jam();
3395 for (nodePtr.i = 1; nodePtr.i < MAX_NODES; nodePtr.i++)
3396 {
3397 ptrAss(nodePtr, nodeRec);
3398 if (nodePtr.p->phase == ZAPI_ACTIVE){
3399 jamLine(nodePtr.i);
3400 sendSignal(nodePtr.p->blockRef, GSN_NF_COMPLETEREP, signal,
3401 NFCompleteRep::SignalLength, JBB);
3402 }//if
3403 }//for
3404 return;
3405 }//Qmgr::execNDB_FAILCONF()
3406
3407 void
execNF_COMPLETEREP(Signal * signal)3408 Qmgr::execNF_COMPLETEREP(Signal* signal)
3409 {
3410 jamEntry();
3411 NFCompleteRep rep = *(NFCompleteRep*)signal->getDataPtr();
3412 if (rep.blockNo != DBTC)
3413 {
3414 jam();
3415 ndbassert(false);
3416 return;
3417 }
3418
3419 /**
3420 * This is a simple way of having ndbapi to get
3421 * earlier information that transactions can be aborted
3422 */
3423 signal->theData[0] = rep.failedNodeId;
3424 NodeRecPtr nodePtr;
3425 for (nodePtr.i = 1; nodePtr.i < MAX_NODES; nodePtr.i++)
3426 {
3427 ptrAss(nodePtr, nodeRec);
3428 if (nodePtr.p->phase == ZAPI_ACTIVE &&
3429 ndb_takeovertc(getNodeInfo(nodePtr.i).m_version))
3430 {
3431 jamLine(nodePtr.i);
3432 sendSignal(nodePtr.p->blockRef, GSN_TAKE_OVERTCCONF, signal,
3433 NFCompleteRep::SignalLength, JBB);
3434 }//if
3435 }//for
3436 return;
3437 }
3438
3439 /*******************************/
3440 /* DISCONNECT_REP */
3441 /*******************************/
3442 const char *lookupConnectionError(Uint32 err);
3443
execDISCONNECT_REP(Signal * signal)3444 void Qmgr::execDISCONNECT_REP(Signal* signal)
3445 {
3446 jamEntry();
3447 const DisconnectRep * const rep = (DisconnectRep *)&signal->theData[0];
3448 const Uint32 nodeId = rep->nodeId;
3449 const Uint32 err = rep->err;
3450 const NodeInfo nodeInfo = getNodeInfo(nodeId);
3451 c_connectedNodes.clear(nodeId);
3452
3453 if (nodeInfo.getType() == NodeInfo::DB)
3454 {
3455 c_readnodes_nodes.clear(nodeId);
3456
3457 if (ERROR_INSERTED(942))
3458 {
3459 g_eventLogger->info("DISCONNECT_REP received from data node %u - crash insertion",
3460 nodeId);
3461 CRASH_INSERTION(942);
3462 }
3463 }
3464
3465 NodeRecPtr nodePtr;
3466 nodePtr.i = getOwnNodeId();
3467 ptrCheckGuard(nodePtr, MAX_NODES, nodeRec);
3468
3469 char buf[100];
3470 if (nodeInfo.getType() == NodeInfo::DB &&
3471 getNodeState().startLevel < NodeState::SL_STARTED)
3472 {
3473 jam();
3474 CRASH_INSERTION(932);
3475 CRASH_INSERTION(938);
3476 BaseString::snprintf(buf, 100, "Node %u disconnected", nodeId);
3477 progError(__LINE__, NDBD_EXIT_SR_OTHERNODEFAILED, buf);
3478 ndbrequire(false);
3479 }
3480
3481 if (getNodeInfo(nodeId).getType() != NodeInfo::DB)
3482 {
3483 jam();
3484 api_failed(signal, nodeId);
3485 return;
3486 }
3487
3488 switch(nodePtr.p->phase){
3489 case ZRUNNING:
3490 jam();
3491 break;
3492 case ZINIT:
3493 ndbrequire(false);
3494 case ZSTARTING:
3495 progError(__LINE__, NDBD_EXIT_CONNECTION_SETUP_FAILED,
3496 lookupConnectionError(err));
3497 ndbrequire(false);
3498 case ZPREPARE_FAIL:
3499 ndbrequire(false);
3500 case ZFAIL_CLOSING:
3501 ndbrequire(false);
3502 case ZAPI_ACTIVATION_ONGOING:
3503 ndbrequire(false);
3504 case ZAPI_ACTIVE:
3505 ndbrequire(false);
3506 case ZAPI_INACTIVE:
3507 {
3508 BaseString::snprintf(buf, 100, "Node %u disconnected", nodeId);
3509 progError(__LINE__, NDBD_EXIT_SR_OTHERNODEFAILED, buf);
3510 ndbrequire(false);
3511 }
3512 }
3513
3514 if (ERROR_INSERTED(939) && ERROR_INSERT_EXTRA == nodeId)
3515 {
3516 ndbout_c("Ignoring DISCONNECT_REP for node %u that was force disconnected",
3517 nodeId);
3518 CLEAR_ERROR_INSERT_VALUE;
3519 return;
3520 }
3521
3522 node_failed(signal, nodeId);
3523 }//DISCONNECT_REP
3524
node_failed(Signal * signal,Uint16 aFailedNode)3525 void Qmgr::node_failed(Signal* signal, Uint16 aFailedNode)
3526 {
3527 NodeRecPtr failedNodePtr;
3528 /**------------------------------------------------------------------------
3529 * A COMMUNICATION LINK HAS BEEN DISCONNECTED. WE MUST TAKE SOME ACTION
3530 * DUE TO THIS.
3531 *-----------------------------------------------------------------------*/
3532 failedNodePtr.i = aFailedNode;
3533 ptrCheckGuard(failedNodePtr, MAX_NODES, nodeRec);
3534 failedNodePtr.p->m_secret = 0; // Not yet Uint64(rand()) << 32 + rand();
3535
3536 ndbrequire(getNodeInfo(failedNodePtr.i).getType() == NodeInfo::DB);
3537
3538 /**---------------------------------------------------------------------
3539 * THE OTHER NODE IS AN NDB NODE, WE HANDLE IT AS IF A HEARTBEAT
3540 * FAILURE WAS DISCOVERED.
3541 *---------------------------------------------------------------------*/
3542 switch(failedNodePtr.p->phase){
3543 case ZRUNNING:
3544 jam();
3545 failReportLab(signal, aFailedNode, FailRep::ZLINK_FAILURE, getOwnNodeId());
3546 return;
3547 case ZSTARTING:
3548 /**
3549 * bug#42422
3550 * Force "real" failure handling
3551 */
3552 jam();
3553 failedNodePtr.p->phase = ZRUNNING;
3554 failReportLab(signal, aFailedNode, FailRep::ZLINK_FAILURE, getOwnNodeId());
3555 return;
3556 case ZFAIL_CLOSING: // Close already in progress
3557 jam();
3558 return;
3559 case ZPREPARE_FAIL: // PREP_FAIL already sent CLOSE_COMREQ
3560 jam();
3561 return;
3562 case ZINIT:
3563 {
3564 jam();
3565 /*---------------------------------------------------------------------*/
3566 // The other node is still not in the cluster but disconnected.
3567 // We must restart communication in three seconds.
3568 /*---------------------------------------------------------------------*/
3569 failedNodePtr.p->failState = NORMAL;
3570 failedNodePtr.p->phase = ZFAIL_CLOSING;
3571 set_hb_count(failedNodePtr.i) = 0;
3572
3573 CloseComReqConf * const closeCom =
3574 (CloseComReqConf *)&signal->theData[0];
3575
3576 closeCom->xxxBlockRef = reference();
3577 closeCom->requestType = CloseComReqConf::RT_NO_REPLY;
3578 closeCom->failNo = 0;
3579 closeCom->noOfNodes = 1;
3580 NodeBitmask::clear(closeCom->theNodes);
3581 NodeBitmask::set(closeCom->theNodes, failedNodePtr.i);
3582 sendSignal(TRPMAN_REF, GSN_CLOSE_COMREQ, signal,
3583 CloseComReqConf::SignalLength, JBB);
3584 return;
3585 }
3586 case ZAPI_ACTIVE: // Unexpected states handled in ::api_failed()
3587 ndbrequire(false);
3588 case ZAPI_INACTIVE:
3589 ndbrequire(false);
3590 case ZAPI_ACTIVATION_ONGOING:
3591 ndbrequire(false);
3592 default:
3593 ndbrequire(false); // Unhandled state
3594 }//switch
3595
3596 return;
3597 }
3598
3599 void
execUPGRADE_PROTOCOL_ORD(Signal * signal)3600 Qmgr::execUPGRADE_PROTOCOL_ORD(Signal* signal)
3601 {
3602 const UpgradeProtocolOrd* ord = (UpgradeProtocolOrd*)signal->getDataPtr();
3603 switch(ord->type){
3604 case UpgradeProtocolOrd::UPO_ENABLE_MICRO_GCP:
3605 jam();
3606 m_micro_gcp_enabled = true;
3607 return;
3608 }
3609 }
3610
3611 void
api_failed(Signal * signal,Uint32 nodeId)3612 Qmgr::api_failed(Signal* signal, Uint32 nodeId)
3613 {
3614 NodeRecPtr failedNodePtr;
3615 /**------------------------------------------------------------------------
3616 * A COMMUNICATION LINK HAS BEEN DISCONNECTED. WE MUST TAKE SOME ACTION
3617 * DUE TO THIS.
3618 *-----------------------------------------------------------------------*/
3619 failedNodePtr.i = nodeId;
3620 ptrCheckGuard(failedNodePtr, MAX_NODES, nodeRec);
3621 failedNodePtr.p->m_secret = 0; // Not yet Uint64(rand()) << 32 + rand();
3622
3623 if (failedNodePtr.p->phase == ZFAIL_CLOSING)
3624 {
3625 /**
3626 * Failure handling already in progress
3627 */
3628 jam();
3629 return;
3630 }
3631
3632 ndbrequire(failedNodePtr.p->failState == NORMAL);
3633
3634 /* Send API_FAILREQ to peer QMGR blocks to allow them to disconnect
3635 * quickly
3636 * Local application blocks get API_FAILREQ once all pending signals
3637 * from the failed API have been processed.
3638 */
3639 signal->theData[0] = failedNodePtr.i;
3640 signal->theData[1] = QMGR_REF;
3641 NodeReceiverGroup rg(QMGR, c_clusterNodes);
3642 sendSignal(rg, GSN_API_FAILREQ, signal, 2, JBA);
3643
3644 /* Now ask CMVMI to disconnect the node */
3645 FailState initialState = (failedNodePtr.p->phase == ZAPI_ACTIVE) ?
3646 WAITING_FOR_CLOSECOMCONF_ACTIVE :
3647 WAITING_FOR_CLOSECOMCONF_NOTACTIVE;
3648
3649 failedNodePtr.p->failState = initialState;
3650 failedNodePtr.p->phase = ZFAIL_CLOSING;
3651 set_hb_count(failedNodePtr.i) = 0;
3652
3653 CloseComReqConf * const closeCom = (CloseComReqConf *)&signal->theData[0];
3654 closeCom->xxxBlockRef = reference();
3655 closeCom->requestType = CloseComReqConf::RT_API_FAILURE;
3656 closeCom->failNo = 0;
3657 closeCom->noOfNodes = 1;
3658 NodeBitmask::clear(closeCom->theNodes);
3659 NodeBitmask::set(closeCom->theNodes, failedNodePtr.i);
3660 sendSignal(TRPMAN_REF, GSN_CLOSE_COMREQ, signal,
3661 CloseComReqConf::SignalLength, JBB);
3662 } // api_failed
3663
3664 /**--------------------------------------------------------------------------
3665 * AN API NODE IS REGISTERING. IF FOR THE FIRST TIME WE WILL ENABLE
3666 * COMMUNICATION WITH ALL NDB BLOCKS.
3667 *---------------------------------------------------------------------------*/
3668 /*******************************/
3669 /* API_REGREQ */
3670 /*******************************/
execAPI_REGREQ(Signal * signal)3671 void Qmgr::execAPI_REGREQ(Signal* signal)
3672 {
3673 jamEntry();
3674
3675 ApiRegReq* req = (ApiRegReq*)signal->getDataPtr();
3676 const Uint32 version = req->version;
3677 const BlockReference ref = req->ref;
3678
3679 Uint32 mysql_version = req->mysql_version;
3680 if (version < NDBD_SPLIT_VERSION)
3681 mysql_version = 0;
3682
3683 NodeRecPtr apiNodePtr;
3684 apiNodePtr.i = refToNode(ref);
3685 ptrCheckGuard(apiNodePtr, MAX_NODES, nodeRec);
3686
3687 if (apiNodePtr.p->phase == ZFAIL_CLOSING)
3688 {
3689 jam();
3690 /**
3691 * This node is pending CLOSE_COM_CONF
3692 * ignore API_REGREQ
3693 */
3694 return;
3695 }
3696
3697 if (!c_connectedNodes.get(apiNodePtr.i))
3698 {
3699 jam();
3700 /**
3701 * We have not yet heard execCONNECT_REP
3702 * so ignore this until we do...
3703 */
3704 return;
3705 }
3706
3707 #if 0
3708 ndbout_c("Qmgr::execAPI_REGREQ: Recd API_REGREQ (NodeId=%d)", apiNodePtr.i);
3709 #endif
3710
3711 bool compatability_check;
3712 const char * extra = 0;
3713 NodeInfo::NodeType type= getNodeInfo(apiNodePtr.i).getType();
3714 switch(type){
3715 case NodeInfo::API:
3716 if (m_micro_gcp_enabled && !ndb_check_micro_gcp(version))
3717 {
3718 jam();
3719 compatability_check = false;
3720 extra = ": micro gcp enabled";
3721 }
3722 else
3723 {
3724 jam();
3725 compatability_check = ndbCompatible_ndb_api(NDB_VERSION, version);
3726 }
3727 break;
3728 case NodeInfo::MGM:
3729 compatability_check = ndbCompatible_ndb_mgmt(NDB_VERSION, version);
3730 break;
3731 case NodeInfo::DB:
3732 case NodeInfo::INVALID:
3733 default:
3734 sendApiRegRef(signal, ref, ApiRegRef::WrongType);
3735 infoEvent("Invalid connection attempt with type %d", type);
3736 return;
3737 }
3738
3739 if (!compatability_check) {
3740 jam();
3741 char buf[NDB_VERSION_STRING_BUF_SZ];
3742 infoEvent("Connection attempt from %s id=%d with %s "
3743 "incompatible with %s%s",
3744 type == NodeInfo::API ? "api or mysqld" : "management server",
3745 apiNodePtr.i,
3746 ndbGetVersionString(version, mysql_version, 0,
3747 buf,
3748 sizeof(buf)),
3749 NDB_VERSION_STRING,
3750 extra ? extra : "");
3751 apiNodePtr.p->phase = ZAPI_INACTIVE;
3752 sendApiRegRef(signal, ref, ApiRegRef::UnsupportedVersion);
3753 return;
3754 }
3755
3756 setNodeInfo(apiNodePtr.i).m_version = version;
3757 setNodeInfo(apiNodePtr.i).m_mysql_version = mysql_version;
3758 set_hb_count(apiNodePtr.i) = 0;
3759
3760 NodeState state = getNodeState();
3761 if (apiNodePtr.p->phase == ZAPI_INACTIVE)
3762 {
3763 apiNodePtr.p->blockRef = ref;
3764 if ((state.startLevel == NodeState::SL_STARTED ||
3765 state.getSingleUserMode() ||
3766 (state.startLevel == NodeState::SL_STARTING &&
3767 state.starting.startPhase >= 8)))
3768 {
3769 jam();
3770 /**----------------------------------------------------------------------
3771 * THE API NODE IS REGISTERING. WE WILL ACCEPT IT BY CHANGING STATE AND
3772 * SENDING A CONFIRM. We set state to ZAPI_ACTIVATION_ONGOING to ensure
3773 * that we don't send unsolicited API_REGCONF or other things before we
3774 * actually fully enabled the node for communicating with the new API
3775 * node. It also avoids sending NODE_FAILREP, NF_COMPLETEREP and
3776 * TAKE_OVERTCCONF even before the API_REGCONF is sent. We will get a
3777 * fresh state of the nodes in API_REGCONF which is sufficient, no need
3778 * to update the API before the API got the initial state.
3779 *----------------------------------------------------------------------*/
3780 apiNodePtr.p->phase = ZAPI_ACTIVATION_ONGOING;
3781 EnableComReq *enableComReq = (EnableComReq *)signal->getDataPtrSend();
3782 enableComReq->m_senderRef = reference();
3783 enableComReq->m_senderData = ENABLE_COM_API_REGREQ;
3784 NodeBitmask::clear(enableComReq->m_nodeIds);
3785 NodeBitmask::set(enableComReq->m_nodeIds, apiNodePtr.i);
3786 sendSignal(TRPMAN_REF, GSN_ENABLE_COMREQ, signal,
3787 EnableComReq::SignalLength, JBB);
3788 return;
3789 }
3790 /**
3791 * The node is in some kind of STOPPING state, so we send API_REGCONF even
3792 * though we've not enabled communication, if the API tries to send
3793 * anything to us anyways it will simply be ignored since only QMGR will
3794 * receive signals in this state. The API receives the node states, so it
3795 * should be able to discover what nodes that it is able to actually use.
3796 */
3797 }
3798
3799 sendApiRegConf(signal, apiNodePtr.i);
3800 }//Qmgr::execAPI_REGREQ()
3801
3802 void
handleEnableComApiRegreq(Signal * signal,Uint32 node)3803 Qmgr::handleEnableComApiRegreq(Signal *signal, Uint32 node)
3804 {
3805 NodeRecPtr apiNodePtr;
3806 NodeInfo::NodeType type = getNodeInfo(node).getType();
3807 Uint32 version = getNodeInfo(node).m_version;
3808 recompute_version_info(type, version);
3809
3810 signal->theData[0] = node;
3811 signal->theData[1] = version;
3812 NodeReceiverGroup rg(QMGR, c_clusterNodes);
3813 rg.m_nodes.clear(getOwnNodeId());
3814 sendVersionedDb(rg, GSN_NODE_VERSION_REP, signal, 2, JBB,
3815 NDBD_NODE_VERSION_REP);
3816
3817 signal->theData[0] = node;
3818 EXECUTE_DIRECT(NDBCNTR, GSN_API_START_REP, signal, 1);
3819
3820 apiNodePtr.i = node;
3821 ptrCheckGuard(apiNodePtr, MAX_NODES, nodeRec);
3822 if (apiNodePtr.p->phase == ZAPI_ACTIVATION_ONGOING)
3823 {
3824 /**
3825 * Now we're about to send API_REGCONF to an API node, this means
3826 * that this node can immediately start communicating to TC, SUMA
3827 * and so forth. The state also indicates that the API is ready
3828 * to receive an unsolicited API_REGCONF when the node goes to
3829 * state SL_STARTED.
3830 */
3831 jam();
3832 apiNodePtr.p->phase = ZAPI_ACTIVE;
3833 sendApiRegConf(signal, node);
3834 }
3835 jam();
3836 /**
3837 * Node is no longer in state ZAPI_ACTIVATION_ONGOING, the node must
3838 * have failed, we can ignore sending API_REGCONF to a failed node.
3839 */
3840 }
3841
3842 void
execNODE_STARTED_REP(Signal * signal)3843 Qmgr::execNODE_STARTED_REP(Signal *signal)
3844 {
3845 NodeRecPtr apiNodePtr;
3846 for (apiNodePtr.i = 1;
3847 apiNodePtr.i < MAX_NODES;
3848 apiNodePtr.i++)
3849 {
3850 ptrCheckGuard(apiNodePtr, MAX_NODES, nodeRec);
3851 NodeInfo::NodeType type = getNodeInfo(apiNodePtr.i).getType();
3852 if (type != NodeInfo::API)
3853 {
3854 /* Not an API node */
3855 continue;
3856 }
3857 if (!c_connectedNodes.get(apiNodePtr.i))
3858 {
3859 /* API not connected */
3860 continue;
3861 }
3862 if (apiNodePtr.p->phase != ZAPI_ACTIVE)
3863 {
3864 /**
3865 * The phase variable can be in three states for the API nodes, it can
3866 * be ZAPI_INACTIVE for an API node that hasn't connected, it can be
3867 * ZFAIL_CLOSING for an API node that recently failed and is performing
3868 * failure handling. It can be in the state ZAPI_ACTIVE which it enters
3869 * upon us receiving an API_REGREQ from the API. So at this point the
3870 * API is also able to receive an unsolicited API_REGCONF message.
3871 */
3872 continue;
3873 }
3874 /**
3875 * We will send an unsolicited API_REGCONF to the API node, this makes the
3876 * API node aware of our existence much faster (without it can wait up to
3877 * the lenght of a heartbeat DB-API period. For rolling restarts and other
3878 * similar actions this can easily cause the API to not have any usable
3879 * DB connections at all. This unsolicited response minimises this window
3880 * of unavailability to zero for all practical purposes.
3881 */
3882 sendApiRegConf(signal, apiNodePtr.i);
3883 }
3884 }
3885
3886 void
sendApiRegConf(Signal * signal,Uint32 node)3887 Qmgr::sendApiRegConf(Signal *signal, Uint32 node)
3888 {
3889 NodeRecPtr apiNodePtr;
3890 apiNodePtr.i = node;
3891 ptrCheckGuard(apiNodePtr, MAX_NODES, nodeRec);
3892 const BlockReference ref = apiNodePtr.p->blockRef;
3893 ndbassert(ref != 0);
3894
3895 ApiRegConf * const apiRegConf = (ApiRegConf *)&signal->theData[0];
3896 apiRegConf->qmgrRef = reference();
3897 apiRegConf->apiHeartbeatFrequency = (chbApiDelay / 10);
3898 apiRegConf->version = NDB_VERSION;
3899 apiRegConf->mysql_version = NDB_MYSQL_VERSION_D;
3900 apiRegConf->nodeState = getNodeState();
3901 {
3902 NodeRecPtr nodePtr;
3903 nodePtr.i = getOwnNodeId();
3904 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
3905 Uint32 dynamicId = nodePtr.p->ndynamicId;
3906
3907 if(apiRegConf->nodeState.masterNodeId != getOwnNodeId()){
3908 jam();
3909 apiRegConf->nodeState.dynamicId = dynamicId;
3910 } else {
3911 apiRegConf->nodeState.dynamicId = (Uint32)(-(Int32)dynamicId);
3912 }
3913 }
3914 NodeVersionInfo info = getNodeVersionInfo();
3915 apiRegConf->minDbVersion = info.m_type[NodeInfo::DB].m_min_version;
3916 apiRegConf->nodeState.m_connected_nodes.assign(c_connectedNodes);
3917 sendSignal(ref, GSN_API_REGCONF, signal, ApiRegConf::SignalLength, JBB);
3918 }
3919
3920 void
sendVersionedDb(NodeReceiverGroup rg,GlobalSignalNumber gsn,Signal * signal,Uint32 length,JobBufferLevel jbuf,Uint32 minversion)3921 Qmgr::sendVersionedDb(NodeReceiverGroup rg,
3922 GlobalSignalNumber gsn,
3923 Signal* signal,
3924 Uint32 length,
3925 JobBufferLevel jbuf,
3926 Uint32 minversion)
3927 {
3928 jam();
3929 NodeVersionInfo info = getNodeVersionInfo();
3930 if (info.m_type[NodeInfo::DB].m_min_version >= minversion)
3931 {
3932 jam();
3933 sendSignal(rg, gsn, signal, length, jbuf);
3934 }
3935 else
3936 {
3937 jam();
3938 Uint32 i = 0, cnt = 0;
3939 while((i = rg.m_nodes.find(i + 1)) != NodeBitmask::NotFound)
3940 {
3941 jam();
3942 if (getNodeInfo(i).m_version >= minversion)
3943 {
3944 jam();
3945 cnt++;
3946 sendSignal(numberToRef(rg.m_block, i), gsn, signal, length, jbuf);
3947 }
3948 }
3949 ndbassert((cnt == 0 && rg.m_nodes.count() == 0) ||
3950 (cnt < rg.m_nodes.count()));
3951 }
3952 }
3953
3954 void
execAPI_VERSION_REQ(Signal * signal)3955 Qmgr::execAPI_VERSION_REQ(Signal * signal) {
3956 jamEntry();
3957 ApiVersionReq * const req = (ApiVersionReq *)signal->getDataPtr();
3958
3959 Uint32 senderRef = req->senderRef;
3960 Uint32 nodeId = req->nodeId;
3961
3962 ApiVersionConf * conf = (ApiVersionConf *)req;
3963 if(getNodeInfo(nodeId).m_connected)
3964 {
3965 conf->version = getNodeInfo(nodeId).m_version;
3966 conf->mysql_version = getNodeInfo(nodeId).m_mysql_version;
3967 struct in_addr in= globalTransporterRegistry.get_connect_address(nodeId);
3968 conf->m_inet_addr= in.s_addr;
3969 }
3970 else
3971 {
3972 conf->version = 0;
3973 conf->mysql_version = 0;
3974 conf->m_inet_addr= 0;
3975 }
3976 conf->nodeId = nodeId;
3977
3978 sendSignal(senderRef,
3979 GSN_API_VERSION_CONF,
3980 signal,
3981 ApiVersionConf::SignalLength, JBB);
3982 }
3983
3984 void
execNODE_VERSION_REP(Signal * signal)3985 Qmgr::execNODE_VERSION_REP(Signal* signal)
3986 {
3987 jamEntry();
3988 Uint32 nodeId = signal->theData[0];
3989 Uint32 version = signal->theData[1];
3990
3991 if (nodeId < MAX_NODES)
3992 {
3993 jam();
3994 Uint32 type = getNodeInfo(nodeId).m_type;
3995 setNodeInfo(nodeId).m_version = version;
3996 recompute_version_info(type, version);
3997 }
3998 }
3999
4000 void
recompute_version_info(Uint32 type,Uint32 version)4001 Qmgr::recompute_version_info(Uint32 type, Uint32 version)
4002 {
4003 NodeVersionInfo& info = setNodeVersionInfo();
4004 switch(type){
4005 case NodeInfo::DB:
4006 case NodeInfo::API:
4007 case NodeInfo::MGM:
4008 break;
4009 default:
4010 return;
4011 }
4012
4013 if (info.m_type[type].m_min_version == 0 ||
4014 version < info.m_type[type].m_min_version)
4015 info.m_type[type].m_min_version = version;
4016 if (version > info.m_type[type].m_max_version)
4017 info.m_type[type].m_max_version = version;
4018 }
4019
4020 void
recompute_version_info(Uint32 type)4021 Qmgr::recompute_version_info(Uint32 type)
4022 {
4023 switch(type){
4024 case NodeInfo::DB:
4025 case NodeInfo::API:
4026 case NodeInfo::MGM:
4027 break;
4028 default:
4029 return;
4030 }
4031
4032 Uint32 min = ~0, max = 0;
4033 Uint32 cnt = type == NodeInfo::DB ? MAX_NDB_NODES : MAX_NODES;
4034 for (Uint32 i = 1; i<cnt; i++)
4035 {
4036 if (getNodeInfo(i).m_type == type)
4037 {
4038 Uint32 version = getNodeInfo(i).m_version;
4039
4040 if (version)
4041 {
4042 if (version < min)
4043 min = version;
4044 if (version > max)
4045 max = version;
4046 }
4047 }
4048 }
4049
4050 NodeVersionInfo& info = setNodeVersionInfo();
4051 info.m_type[type].m_min_version = min == ~(Uint32)0 ? 0 : min;
4052 info.m_type[type].m_max_version = max;
4053 }
4054
4055 #if 0
4056 bool
4057 Qmgr::checkAPIVersion(NodeId nodeId,
4058 Uint32 apiVersion, Uint32 ownVersion) const {
4059 bool ret=true;
4060 /**
4061 * First implementation...
4062 */
4063 if ((getMajor(apiVersion) < getMajor(ownVersion) ||
4064 getMinor(apiVersion) < getMinor(ownVersion)) &&
4065 apiVersion >= API_UPGRADE_VERSION) {
4066 jam();
4067 if ( getNodeInfo(nodeId).getType() != NodeInfo::MGM ) {
4068 jam();
4069 ret = false;
4070 } else {
4071 jam();
4072 /* we have a software upgrade situation, mgmtsrvr should be
4073 * the highest, let him decide what to do
4074 */
4075 ;
4076 }
4077 }
4078 return ret;
4079 }
4080 #endif
4081
4082 void
sendApiRegRef(Signal * signal,Uint32 Tref,ApiRegRef::ErrorCode err)4083 Qmgr::sendApiRegRef(Signal* signal, Uint32 Tref, ApiRegRef::ErrorCode err){
4084 ApiRegRef* ref = (ApiRegRef*)signal->getDataPtrSend();
4085 ref->ref = reference();
4086 ref->version = NDB_VERSION;
4087 ref->mysql_version = NDB_MYSQL_VERSION_D;
4088 ref->errorCode = err;
4089 sendSignal(Tref, GSN_API_REGREF, signal, ApiRegRef::SignalLength, JBB);
4090 }
4091
4092 /**--------------------------------------------------------------------------
4093 * A NODE HAS BEEN DECLARED AS DOWN. WE WILL CLOSE THE COMMUNICATION TO THIS
4094 * NODE IF NOT ALREADY DONE. IF WE ARE PRESIDENT OR BECOMES PRESIDENT BECAUSE
4095 * OF A FAILED PRESIDENT THEN WE WILL TAKE FURTHER ACTION.
4096 *---------------------------------------------------------------------------*/
failReportLab(Signal * signal,Uint16 aFailedNode,FailRep::FailCause aFailCause,Uint16 sourceNode)4097 void Qmgr::failReportLab(Signal* signal, Uint16 aFailedNode,
4098 FailRep::FailCause aFailCause,
4099 Uint16 sourceNode)
4100 {
4101 NodeRecPtr nodePtr;
4102 NodeRecPtr failedNodePtr;
4103 NodeRecPtr myNodePtr;
4104
4105 failedNodePtr.i = aFailedNode;
4106 ptrCheckGuard(failedNodePtr, MAX_NDB_NODES, nodeRec);
4107 FailRep* rep = (FailRep*)signal->getDataPtr();
4108
4109 if (check_multi_node_shutdown(signal))
4110 {
4111 jam();
4112 return;
4113 }
4114
4115 if (isNodeConnectivitySuspect(sourceNode) &&
4116 // (! isNodeConnectivitySuspect(aFailedNode)) && // TODO : Required?
4117 ((aFailCause == FailRep::ZCONNECT_CHECK_FAILURE) ||
4118 (aFailCause == FailRep::ZLINK_FAILURE)))
4119 {
4120 jam();
4121 /* Connectivity related failure report from a node with suspect
4122 * connectivity, handle differently
4123 */
4124 ndbrequire(sourceNode != getOwnNodeId());
4125
4126 handleFailFromSuspect(signal,
4127 aFailCause,
4128 aFailedNode,
4129 sourceNode);
4130 return;
4131 }
4132
4133 if (failedNodePtr.i == getOwnNodeId()) {
4134 jam();
4135
4136 Uint32 code = NDBD_EXIT_NODE_DECLARED_DEAD;
4137 const char * msg = 0;
4138 char extra[100];
4139 switch(aFailCause){
4140 case FailRep::ZOWN_FAILURE:
4141 msg = "Own failure";
4142 break;
4143 case FailRep::ZOTHER_NODE_WHEN_WE_START:
4144 case FailRep::ZOTHERNODE_FAILED_DURING_START:
4145 msg = "Other node died during start";
4146 break;
4147 case FailRep::ZIN_PREP_FAIL_REQ:
4148 msg = "Prep fail";
4149 break;
4150 case FailRep::ZSTART_IN_REGREQ:
4151 msg = "Start timeout";
4152 break;
4153 case FailRep::ZHEARTBEAT_FAILURE:
4154 msg = "Heartbeat failure";
4155 break;
4156 case FailRep::ZLINK_FAILURE:
4157 msg = "Connection failure";
4158 break;
4159 case FailRep::ZPARTITIONED_CLUSTER:
4160 {
4161 code = NDBD_EXIT_PARTITIONED_SHUTDOWN;
4162 char buf1[100], buf2[100];
4163 c_clusterNodes.getText(buf1);
4164 if (((signal->getLength()== FailRep::OrigSignalLength + FailRep::PartitionedExtraLength) ||
4165 (signal->getLength()== FailRep::SignalLength + FailRep::PartitionedExtraLength)) &&
4166 signal->header.theVerId_signalNumber == GSN_FAIL_REP)
4167 {
4168 jam();
4169 NdbNodeBitmask part;
4170 part.assign(NdbNodeBitmask::Size, rep->partitioned.partition);
4171 part.getText(buf2);
4172 BaseString::snprintf(extra, sizeof(extra),
4173 "Our cluster: %s other cluster: %s",
4174 buf1, buf2);
4175 }
4176 else
4177 {
4178 jam();
4179 BaseString::snprintf(extra, sizeof(extra),
4180 "Our cluster: %s", buf1);
4181 }
4182 msg = extra;
4183 break;
4184 }
4185 case FailRep::ZMULTI_NODE_SHUTDOWN:
4186 msg = "Multi node shutdown";
4187 break;
4188 case FailRep::ZCONNECT_CHECK_FAILURE:
4189 msg = "Connectivity check failure";
4190 break;
4191 case FailRep::ZFORCED_ISOLATION:
4192 msg = "Forced isolation";
4193 if (ERROR_INSERTED(942))
4194 {
4195 g_eventLogger->info("FAIL_REP FORCED_ISOLATION received from data node %u - ignoring.",
4196 sourceNode);
4197 /* Let's wait for remote disconnection */
4198 return;
4199 }
4200 break;
4201 default:
4202 msg = "<UNKNOWN>";
4203 }
4204
4205 CRASH_INSERTION(932);
4206 CRASH_INSERTION(938);
4207
4208 char buf[255];
4209 BaseString::snprintf(buf, sizeof(buf),
4210 "We(%u) have been declared dead by %u (via %u) reason: %s(%u)",
4211 getOwnNodeId(),
4212 sourceNode,
4213 refToNode(signal->getSendersBlockRef()),
4214 msg ? msg : "<Unknown>",
4215 aFailCause);
4216
4217 progError(__LINE__, code, buf);
4218 return;
4219 }//if
4220
4221 myNodePtr.i = getOwnNodeId();
4222 ptrCheckGuard(myNodePtr, MAX_NDB_NODES, nodeRec);
4223 if (myNodePtr.p->phase != ZRUNNING) {
4224 jam();
4225 systemErrorLab(signal, __LINE__);
4226 return;
4227 }//if
4228
4229 if (getNodeState().startLevel < NodeState::SL_STARTED)
4230 {
4231 jam();
4232 CRASH_INSERTION(932);
4233 CRASH_INSERTION(938);
4234 char buf[100];
4235 BaseString::snprintf(buf, 100, "Node failure during restart");
4236 progError(__LINE__, NDBD_EXIT_SR_OTHERNODEFAILED, buf);
4237 ndbrequire(false);
4238 }
4239
4240 const NdbNodeBitmask TfailedNodes(cfailedNodes);
4241 failReport(signal, failedNodePtr.i, (UintR)ZTRUE, aFailCause, sourceNode);
4242
4243 /**
4244 * If any node is starting now (c_start.startNode != 0)
4245 * include it in nodes handled by sendPrepFailReq
4246 */
4247 if (c_start.m_startNode != 0)
4248 {
4249 jam();
4250 cfailedNodes.set(c_start.m_startNode);
4251 }
4252
4253 if (cpresident == getOwnNodeId()) {
4254 jam();
4255 if (ctoStatus == Q_NOT_ACTIVE) {
4256 jam();
4257 /**--------------------------------------------------------------------
4258 * AS PRESIDENT WE ARE REQUIRED TO START THE EXCLUSION PROCESS SUCH THAT
4259 * THE APPLICATION SEE NODE FAILURES IN A CONSISTENT ORDER.
4260 * IF WE HAVE BECOME PRESIDENT NOW (CTO_STATUS = ACTIVE) THEN WE HAVE
4261 * TO COMPLETE THE PREVIOUS COMMIT FAILED NODE PROCESS BEFORE STARTING
4262 * A NEW.
4263 * CTO_STATUS = ACTIVE CAN ALSO MEAN THAT WE ARE PRESIDENT AND ARE
4264 * CURRENTLY COMMITTING A SET OF NODE CRASHES. IN THIS CASE IT IS NOT
4265 * ALLOWED TO START PREPARING NEW NODE CRASHES.
4266 *---------------------------------------------------------------------*/
4267 if (!cfailedNodes.equal(TfailedNodes)) {
4268 jam();
4269 cfailureNr = cfailureNr + 1;
4270 for (nodePtr.i = 1;
4271 nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
4272 ptrAss(nodePtr, nodeRec);
4273 if (nodePtr.p->phase == ZRUNNING) {
4274 jamLine(nodePtr.i);
4275 sendPrepFailReq(signal, nodePtr.i);
4276 }//if
4277 }//for
4278 }//if
4279 }//if
4280 }
4281 return;
4282 }//Qmgr::failReportLab()
4283
4284 /**-------------------------------------------------------------------------
4285 * WE HAVE RECEIVED A PREPARE TO EXCLUDE A NUMBER OF NODES FROM THE CLUSTER.
4286 * WE WILL FIRST CHECK THAT WE HAVE NOT ANY MORE NODES THAT
4287 * WE ALSO HAVE EXCLUDED
4288 *--------------------------------------------------------------------------*/
4289 /*******************************/
4290 /* PREP_FAILREQ */
4291 /*******************************/
execPREP_FAILREQ(Signal * signal)4292 void Qmgr::execPREP_FAILREQ(Signal* signal)
4293 {
4294 NodeRecPtr myNodePtr;
4295 PrepFailReqRef * const prepFail = (PrepFailReqRef *)&signal->theData[0];
4296 BlockReference Tblockref = prepFail->xxxBlockRef;
4297 Uint16 TfailureNr = prepFail->failNo;
4298
4299 jamEntry();
4300
4301 // Clear 'c_start.m_startNode' if it failed.
4302 if (NdbNodeBitmask::get(prepFail->theNodes, c_start.m_startNode))
4303 {
4304 jam();
4305 c_start.reset();
4306 }
4307 if (c_start.m_gsn == GSN_CM_NODEINFOCONF)
4308 {
4309 Uint32 nodeId;
4310 jam();
4311 /**
4312 * This is a very unusual event we are looking for, but still required
4313 * to be handled. The starting node has connected to the president and
4314 * managed to start the node inclusion protocol. We received an indication
4315 * of this from the president. The starting node now however fails before
4316 * it connected to us, so we need to clear the indication of that we
4317 * received CM_ADD(Prepare) from president since this belonged to an
4318 * already cancelled node restart.
4319 */
4320 for (nodeId = 1; nodeId < MAX_NDB_NODES; nodeId++)
4321 {
4322 if (c_start.m_nodes.isWaitingFor(nodeId) &&
4323 NdbNodeBitmask::get(prepFail->theNodes, nodeId))
4324 {
4325 jamLine(nodeId);
4326 /* Found such a condition as described above, clear state */
4327 c_start.m_gsn = RNIL;
4328 c_start.m_nodes.clearWaitingFor();
4329 break;
4330 }
4331 }
4332 }
4333
4334
4335 if (check_multi_node_shutdown(signal))
4336 {
4337 jam();
4338 return;
4339 }
4340
4341 if (ERROR_INSERTED(941) &&
4342 getOwnNodeId() == 4 &&
4343 NdbNodeBitmask::get(prepFail->theNodes, 2))
4344 {
4345 /* Insert ERROR_INSERT crash */
4346 CRASH_INSERTION(941);
4347 }
4348
4349 cprepFailedNodes.assign(NdbNodeBitmask::Size, prepFail->theNodes);
4350 ndbassert(prepFail->noOfNodes == cprepFailedNodes.count());
4351
4352 /**
4353 * Block commit until node failures has stabilized
4354 *
4355 * @See RT352
4356 */
4357 BlockCommitOrd* const block = (BlockCommitOrd *)&signal->theData[0];
4358 block->failNo = TfailureNr;
4359 EXECUTE_DIRECT(DBDIH, GSN_BLOCK_COMMIT_ORD, signal,
4360 BlockCommitOrd::SignalLength);
4361
4362 myNodePtr.i = getOwnNodeId();
4363 ptrCheckGuard(myNodePtr, MAX_NDB_NODES, nodeRec);
4364 if (myNodePtr.p->phase != ZRUNNING) {
4365 jam();
4366 systemErrorLab(signal, __LINE__);
4367 return;
4368 }//if
4369
4370 if (getNodeState().startLevel < NodeState::SL_STARTED)
4371 {
4372 jam();
4373 CRASH_INSERTION(932);
4374 CRASH_INSERTION(938);
4375 char buf[100];
4376 BaseString::snprintf(buf, 100, "Node failure during restart");
4377 progError(__LINE__, NDBD_EXIT_SR_OTHERNODEFAILED, buf);
4378 ndbrequire(false);
4379 }
4380
4381 for (unsigned nodeId = 1; nodeId < MAX_NDB_NODES; nodeId++)
4382 {
4383 if (cprepFailedNodes.get(nodeId))
4384 {
4385 jam();
4386 failReport(signal,
4387 nodeId,
4388 (UintR)ZFALSE,
4389 FailRep::ZIN_PREP_FAIL_REQ,
4390 0); /* Source node not required (or known) here */
4391 }//if
4392 }//for
4393 sendCloseComReq(signal, Tblockref, TfailureNr);
4394 ccommitFailedNodes.clear();
4395 cprepareFailureNr = TfailureNr;
4396 return;
4397 }//Qmgr::execPREP_FAILREQ()
4398
4399
handleApiCloseComConf(Signal * signal)4400 void Qmgr::handleApiCloseComConf(Signal* signal)
4401 {
4402 jam();
4403 CloseComReqConf * const closeCom = (CloseComReqConf *)&signal->theData[0];
4404
4405 /* Api failure special case */
4406 for(Uint32 nodeId = 0; nodeId < MAX_NODES; nodeId ++)
4407 {
4408 if (NodeBitmask::get(closeCom->theNodes, nodeId))
4409 {
4410 jam();
4411 /* Check that *only* 1 *API* node is included in
4412 * this CLOSE_COM_CONF
4413 */
4414 ndbrequire(getNodeInfo(nodeId).getType() != NodeInfo::DB);
4415 ndbrequire(closeCom->noOfNodes == 1);
4416 NodeBitmask::clear(closeCom->theNodes, nodeId);
4417 ndbrequire(NodeBitmask::isclear(closeCom->theNodes));
4418
4419 /* Now that we know communication from the failed Api has
4420 * ceased, we can send the required API_FAILREQ signals
4421 * and continue API failure handling
4422 */
4423 NodeRecPtr failedNodePtr;
4424 failedNodePtr.i = nodeId;
4425 ptrCheckGuard(failedNodePtr, MAX_NODES, nodeRec);
4426
4427 ndbrequire((failedNodePtr.p->failState ==
4428 WAITING_FOR_CLOSECOMCONF_ACTIVE) ||
4429 (failedNodePtr.p->failState ==
4430 WAITING_FOR_CLOSECOMCONF_NOTACTIVE));
4431
4432 if (failedNodePtr.p->failState == WAITING_FOR_CLOSECOMCONF_ACTIVE)
4433 {
4434 /**
4435 * Inform application blocks TC, DICT, SUMA etc.
4436 */
4437 jam();
4438 sendApiFailReq(signal, nodeId, false); // !sumaOnly
4439 arbitRec.code = ArbitCode::ApiFail;
4440 handleArbitApiFail(signal, nodeId);
4441 }
4442 else
4443 {
4444 /**
4445 * Always inform SUMA
4446 */
4447 jam();
4448 sendApiFailReq(signal, nodeId, true); // sumaOnly
4449 }
4450
4451 if (getNodeInfo(failedNodePtr.i).getType() == NodeInfo::MGM)
4452 {
4453 /**
4454 * Allow MGM do reconnect "directly"
4455 */
4456 jam();
4457 set_hb_count(failedNodePtr.i) = 3;
4458 }
4459
4460 /* Handled the single API node failure */
4461 return;
4462 }
4463 }
4464 /* Never get here */
4465 ndbrequire(false);
4466 }
4467
4468 /**---------------------------------------------------------------------------
4469 * THE CRASHED NODES HAS BEEN EXCLUDED FROM COMMUNICATION.
4470 * WE WILL CHECK WHETHER ANY MORE NODES HAVE FAILED DURING THE PREPARE PROCESS.
4471 * IF SO WE WILL REFUSE THE PREPARE PHASE AND EXPECT A NEW PREPARE MESSAGE
4472 * WITH ALL FAILED NODES INCLUDED.
4473 *---------------------------------------------------------------------------*/
4474 /*******************************/
4475 /* CLOSE_COMCONF */
4476 /*******************************/
execCLOSE_COMCONF(Signal * signal)4477 void Qmgr::execCLOSE_COMCONF(Signal* signal)
4478 {
4479 jamEntry();
4480
4481 CloseComReqConf * const closeCom = (CloseComReqConf *)&signal->theData[0];
4482
4483 Uint32 requestType = closeCom->requestType;
4484
4485 if (requestType == CloseComReqConf::RT_API_FAILURE)
4486 {
4487 jam();
4488 handleApiCloseComConf(signal);
4489 return;
4490 }
4491
4492 /* Normal node failure preparation path */
4493 ndbassert(requestType == CloseComReqConf::RT_NODE_FAILURE);
4494 BlockReference Tblockref = closeCom->xxxBlockRef;
4495 Uint16 TfailureNr = closeCom->failNo;
4496
4497 cprepFailedNodes.assign(NdbNodeBitmask::Size, closeCom->theNodes);
4498 ndbassert(closeCom->noOfNodes == cprepFailedNodes.count());
4499
4500 UintR tprepFailConf = ZTRUE;
4501
4502 /* Check whether the set of nodes which have had communications
4503 * closed is the same as the set of failed nodes.
4504 * If it is, we can confirm the PREP_FAIL phase for this set
4505 * of nodes to the President.
4506 * If it is not, we Refuse the PREP_FAIL phase for this set
4507 * of nodes, the President will start a new PREP_FAIL phase
4508 * for the new set.
4509 */
4510 if (!cprepFailedNodes.contains(cfailedNodes)) {
4511 /* Failed node(s) is missing from the set, we will not
4512 * confirm this Prepare_Fail phase.
4513 * Store the node id in the array for later.
4514 */
4515 jam();
4516 tprepFailConf = ZFALSE;
4517 cprepFailedNodes.bitOR(cfailedNodes);
4518 }//if
4519 if (tprepFailConf == ZFALSE) {
4520 jam();
4521 /* Inform President that we cannot confirm the PREP_FAIL
4522 * phase as we are aware of at least one other node
4523 * failure
4524 */
4525 cfailedNodes = cprepFailedNodes;
4526
4527 sendPrepFailReqRef(signal,
4528 Tblockref,
4529 GSN_PREP_FAILREF,
4530 reference(),
4531 cfailureNr,
4532 cprepFailedNodes);
4533 } else {
4534 /* We have prepared the failure of the requested nodes
4535 * send confirmation to the president
4536 */
4537 jam();
4538 ccommitFailedNodes = cprepFailedNodes;
4539
4540 signal->theData[0] = getOwnNodeId();
4541 signal->theData[1] = TfailureNr;
4542 sendSignal(Tblockref, GSN_PREP_FAILCONF, signal, 2, JBA);
4543 }//if
4544 return;
4545 }//Qmgr::execCLOSE_COMCONF()
4546
4547 /*---------------------------------------------------------------------------*/
4548 /* WE HAVE RECEIVED A CONFIRM OF THAT THIS NODE HAVE PREPARED THE FAILURE. */
4549 /*---------------------------------------------------------------------------*/
4550 /*******************************/
4551 /* PREP_FAILCONF */
4552 /*******************************/
execPREP_FAILCONF(Signal * signal)4553 void Qmgr::execPREP_FAILCONF(Signal* signal)
4554 {
4555 NodeRecPtr nodePtr;
4556 NodeRecPtr replyNodePtr;
4557 jamEntry();
4558 replyNodePtr.i = signal->theData[0];
4559 Uint16 TfailureNr = signal->theData[1];
4560 if (TfailureNr != cfailureNr) {
4561 jam();
4562 /**----------------------------------------------------------------------
4563 * WE HAVE ALREADY STARTING A NEW ATTEMPT TO EXCLUDE A NUMBER OF NODES.
4564 * IGNORE
4565 *----------------------------------------------------------------------*/
4566 return;
4567 }//if
4568 ptrCheckGuard(replyNodePtr, MAX_NDB_NODES, nodeRec);
4569 replyNodePtr.p->sendPrepFailReqStatus = Q_NOT_ACTIVE;
4570 for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
4571 ptrAss(nodePtr, nodeRec);
4572 if (nodePtr.p->phase == ZRUNNING) {
4573 if (nodePtr.p->sendPrepFailReqStatus == Q_ACTIVE) {
4574 jamLine(nodePtr.i);
4575 return;
4576 }//if
4577 }//if
4578 }//for
4579 /**
4580 * Check node count and groups and invoke arbitrator if necessary.
4581 * Continues via sendCommitFailReq() if successful.
4582 */
4583 arbitRec.failureNr = cfailureNr;
4584 const NodeState & s = getNodeState();
4585 if(s.startLevel == NodeState::SL_STOPPING_3 && s.stopping.systemShutdown){
4586 jam();
4587 /**
4588 * We're performing a system shutdown,
4589 * don't let artibtrator shut us down
4590 */
4591 return;
4592 }
4593
4594 switch(arbitRec.method){
4595 case ArbitRec::DISABLED:
4596 jam();
4597 // No arbitration -> immediately commit the failed nodes
4598 sendCommitFailReq(signal);
4599 break;
4600
4601 case ArbitRec::METHOD_EXTERNAL:
4602 case ArbitRec::METHOD_DEFAULT:
4603 jam();
4604 handleArbitCheck(signal);
4605 break;
4606
4607 }
4608 return;
4609 }//Qmgr::execPREP_FAILCONF()
4610
4611 void
sendCommitFailReq(Signal * signal)4612 Qmgr::sendCommitFailReq(Signal* signal)
4613 {
4614 NodeRecPtr nodePtr;
4615 jam();
4616 if (arbitRec.failureNr != cfailureNr) {
4617 jam();
4618 /**----------------------------------------------------------------------
4619 * WE HAVE ALREADY STARTING A NEW ATTEMPT TO EXCLUDE A NUMBER OF NODES.
4620 * IGNORE
4621 *----------------------------------------------------------------------*/
4622 return;
4623 }//if
4624 /**-----------------------------------------------------------------------
4625 * WE HAVE SUCCESSFULLY PREPARED A SET OF NODE FAILURES. WE WILL NOW COMMIT
4626 * THESE NODE FAILURES.
4627 *-------------------------------------------------------------------------*/
4628 for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
4629 ptrAss(nodePtr, nodeRec);
4630
4631 #ifdef ERROR_INSERT
4632 if (false && ERROR_INSERTED(935) && nodePtr.i == c_error_insert_extra)
4633 {
4634 ndbout_c("skipping node %d", c_error_insert_extra);
4635 CLEAR_ERROR_INSERT_VALUE;
4636 signal->theData[0] = 9999;
4637 sendSignalWithDelay(CMVMI_REF, GSN_NDB_TAMPER, signal, 1000, 1);
4638 continue;
4639 }
4640 #endif
4641
4642 if (nodePtr.p->phase == ZRUNNING) {
4643 jamLine(nodePtr.i);
4644 nodePtr.p->sendCommitFailReqStatus = Q_ACTIVE;
4645 signal->theData[0] = cpdistref;
4646 signal->theData[1] = cfailureNr;
4647 sendSignal(nodePtr.p->blockRef, GSN_COMMIT_FAILREQ, signal, 2, JBA);
4648 }//if
4649 }//for
4650 ctoStatus = Q_ACTIVE;
4651 cfailedNodes.clear();
4652 return;
4653 }//sendCommitFailReq()
4654
4655 /*---------------------------------------------------------------------------*/
4656 /* SOME NODE HAVE DISCOVERED A NODE FAILURE THAT WE HAVE NOT YET DISCOVERED. */
4657 /* WE WILL START ANOTHER ROUND OF PREPARING A SET OF NODE FAILURES. */
4658 /*---------------------------------------------------------------------------*/
4659 /*******************************/
4660 /* PREP_FAILREF */
4661 /*******************************/
execPREP_FAILREF(Signal * signal)4662 void Qmgr::execPREP_FAILREF(Signal* signal)
4663 {
4664 NodeRecPtr nodePtr;
4665 jamEntry();
4666
4667 PrepFailReqRef * const prepFail = (PrepFailReqRef *)&signal->theData[0];
4668
4669 Uint16 TfailureNr = prepFail->failNo;
4670
4671 cprepFailedNodes.assign(NdbNodeBitmask::Size, prepFail->theNodes);
4672 ndbassert(prepFail->noOfNodes == cprepFailedNodes.count());
4673
4674 if (TfailureNr != cfailureNr) {
4675 jam();
4676 /**---------------------------------------------------------------------
4677 * WE HAVE ALREADY STARTING A NEW ATTEMPT TO EXCLUDE A NUMBER OF NODES.
4678 * IGNORE
4679 *----------------------------------------------------------------------*/
4680 return;
4681 }//if
4682
4683 cfailedNodes = cprepFailedNodes;
4684
4685 cfailureNr = cfailureNr + 1;
4686 for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
4687 ptrAss(nodePtr, nodeRec);
4688 if (nodePtr.p->phase == ZRUNNING) {
4689 jamLine(nodePtr.i);
4690 sendPrepFailReq(signal, nodePtr.i);
4691 }//if
4692 }//for
4693 return;
4694 }//Qmgr::execPREP_FAILREF()
4695
4696 /*---------------------------------------------------------------------------*/
4697 /* THE PRESIDENT IS NOW COMMITTING THE PREVIOUSLY PREPARED NODE FAILURE. */
4698 /*---------------------------------------------------------------------------*/
4699 /***********************/
4700 /* COMMIT_FAILREQ */
4701 /***********************/
execCOMMIT_FAILREQ(Signal * signal)4702 void Qmgr::execCOMMIT_FAILREQ(Signal* signal)
4703 {
4704 NodeRecPtr nodePtr;
4705 jamEntry();
4706
4707 CRASH_INSERTION(935);
4708
4709 BlockReference Tblockref = signal->theData[0];
4710 UintR TfailureNr = signal->theData[1];
4711 if (Tblockref != cpdistref) {
4712 jam();
4713 return;
4714 }//if
4715
4716 /**
4717 * Block commit until node failures has stabilized
4718 *
4719 * @See RT352
4720 */
4721 UnblockCommitOrd* const unblock = (UnblockCommitOrd *)&signal->theData[0];
4722 unblock->failNo = TfailureNr;
4723 EXECUTE_DIRECT(DBDIH, GSN_UNBLOCK_COMMIT_ORD, signal,
4724 UnblockCommitOrd::SignalLength);
4725
4726 if ((ccommitFailureNr != TfailureNr) &&
4727 (!ccommitFailedNodes.isclear()))
4728 {
4729 jam();
4730 /**-----------------------------------------------------------------------
4731 * WE ONLY DO THIS PART OF THE COMMIT HANDLING THE FIRST TIME WE HEAR THIS
4732 * SIGNAL. WE CAN HEAR IT SEVERAL TIMES IF THE PRESIDENTS KEEP FAILING.
4733 *-----------------------------------------------------------------------*/
4734 ccommitFailureNr = TfailureNr;
4735 NodeFailRep * const nodeFail = (NodeFailRep *)&signal->theData[0];
4736
4737 nodeFail->failNo = ccommitFailureNr;
4738 nodeFail->masterNodeId = cpresident;
4739 nodeFail->noOfNodes = ccommitFailedNodes.count();
4740 ccommitFailedNodes.copyto(NdbNodeBitmask::Size, nodeFail->theNodes);
4741
4742 if (ERROR_INSERTED(936))
4743 {
4744 sendSignalWithDelay(NDBCNTR_REF, GSN_NODE_FAILREP, signal,
4745 200, NodeFailRep::SignalLength);
4746 }
4747 else
4748 {
4749 sendSignal(NDBCNTR_REF, GSN_NODE_FAILREP, signal,
4750 NodeFailRep::SignalLength, JBB);
4751 }
4752
4753 /**--------------------------------------------------------------------
4754 * WE MUST PREPARE TO ACCEPT THE CRASHED NODE INTO THE CLUSTER AGAIN BY
4755 * SETTING UP CONNECTIONS AGAIN AFTER THREE SECONDS OF DELAY.
4756 *--------------------------------------------------------------------*/
4757 for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
4758 if (ccommitFailedNodes.get(nodePtr.i)) {
4759 jamLine(nodePtr.i);
4760 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
4761 nodePtr.p->phase = ZFAIL_CLOSING;
4762 nodePtr.p->failState = WAITING_FOR_NDB_FAILCONF;
4763 set_hb_count(nodePtr.i) = 0;
4764 c_clusterNodes.clear(nodePtr.i);
4765 }//if
4766 }//for
4767
4768 /*----------------------------------------------------------------------*/
4769 /* WE INFORM THE API'S WE HAVE CONNECTED ABOUT THE FAILED NODES. */
4770 /*----------------------------------------------------------------------*/
4771 for (nodePtr.i = 1; nodePtr.i < MAX_NODES; nodePtr.i++) {
4772 ptrAss(nodePtr, nodeRec);
4773 if (nodePtr.p->phase == ZAPI_ACTIVE) {
4774 jamLine(nodePtr.i);
4775
4776 NodeFailRep * const nodeFail = (NodeFailRep *)&signal->theData[0];
4777
4778 nodeFail->failNo = ccommitFailureNr;
4779 nodeFail->noOfNodes = ccommitFailedNodes.count();
4780 ccommitFailedNodes.copyto(NdbNodeBitmask::Size, nodeFail->theNodes);
4781
4782 sendSignal(nodePtr.p->blockRef, GSN_NODE_FAILREP, signal,
4783 NodeFailRep::SignalLength, JBB);
4784 }//if
4785 }//for
4786
4787 /**
4788 * Remove committed nodes from failed/prepared
4789 */
4790 cfailedNodes.bitANDC(ccommitFailedNodes);
4791 cprepFailedNodes.bitANDC(ccommitFailedNodes);
4792 ccommitFailedNodes.clear();
4793 }//if
4794 /**-----------------------------------------------------------------------
4795 * WE WILL ALWAYS ACKNOWLEDGE THE COMMIT EVEN WHEN RECEIVING IT MULTIPLE
4796 * TIMES SINCE IT WILL ALWAYS COME FROM A NEW PRESIDENT.
4797 *------------------------------------------------------------------------*/
4798 signal->theData[0] = getOwnNodeId();
4799 sendSignal(Tblockref, GSN_COMMIT_FAILCONF, signal, 1, JBA);
4800 return;
4801 }//Qmgr::execCOMMIT_FAILREQ()
4802
4803 /*--------------------------------------------------------------------------*/
4804 /* WE HAVE RECEIVED A CONFIRM OF THAT THIS NODE HAVE COMMITTED THE FAILURES.*/
4805 /*--------------------------------------------------------------------------*/
4806 /*******************************/
4807 /* COMMIT_FAILCONF */
4808 /*******************************/
execCOMMIT_FAILCONF(Signal * signal)4809 void Qmgr::execCOMMIT_FAILCONF(Signal* signal)
4810 {
4811 NodeRecPtr nodePtr;
4812 NodeRecPtr replyNodePtr;
4813 jamEntry();
4814 replyNodePtr.i = signal->theData[0];
4815
4816 ptrCheckGuard(replyNodePtr, MAX_NDB_NODES, nodeRec);
4817 replyNodePtr.p->sendCommitFailReqStatus = Q_NOT_ACTIVE;
4818 for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
4819 ptrAss(nodePtr, nodeRec);
4820 if (nodePtr.p->phase == ZRUNNING) {
4821 if (nodePtr.p->sendCommitFailReqStatus == Q_ACTIVE) {
4822 jamLine(nodePtr.i);
4823 return;
4824 }//if
4825 }//if
4826 }//for
4827 /*-----------------------------------------------------------------------*/
4828 /* WE HAVE SUCCESSFULLY COMMITTED A SET OF NODE FAILURES. */
4829 /*-----------------------------------------------------------------------*/
4830 ctoStatus = Q_NOT_ACTIVE;
4831 if (!cfailedNodes.isclear()) {
4832 jam();
4833 /**----------------------------------------------------------------------
4834 * A FAILURE OCCURRED IN THE MIDDLE OF THE COMMIT PROCESS. WE ARE NOW
4835 * READY TO START THE FAILED NODE PROCESS FOR THIS NODE.
4836 *----------------------------------------------------------------------*/
4837 cfailureNr = cfailureNr + 1;
4838 for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
4839 ptrAss(nodePtr, nodeRec);
4840 if (nodePtr.p->phase == ZRUNNING) {
4841 jamLine(nodePtr.i);
4842 sendPrepFailReq(signal, nodePtr.i);
4843 }//if
4844 }//for
4845 }//if
4846 return;
4847 }//Qmgr::execCOMMIT_FAILCONF()
4848
4849 /**--------------------------------------------------------------------------
4850 * IF THE PRESIDENT FAILS IN THE MIDDLE OF THE COMMIT OF A FAILED NODE THEN
4851 * THE NEW PRESIDENT NEEDS TO QUERY THE COMMIT STATUS IN THE RUNNING NODES.
4852 *---------------------------------------------------------------------------*/
4853 /*******************************/
4854 /* PRES_TOCONF */
4855 /*******************************/
execPRES_TOCONF(Signal * signal)4856 void Qmgr::execPRES_TOCONF(Signal* signal)
4857 {
4858 NodeRecPtr nodePtr;
4859 NodeRecPtr replyNodePtr;
4860 jamEntry();
4861 replyNodePtr.i = signal->theData[0];
4862 UintR TfailureNr = signal->theData[1];
4863 if (ctoFailureNr < TfailureNr) {
4864 jam();
4865 ctoFailureNr = TfailureNr;
4866 }//if
4867 ptrCheckGuard(replyNodePtr, MAX_NDB_NODES, nodeRec);
4868 replyNodePtr.p->sendPresToStatus = Q_NOT_ACTIVE;
4869 for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
4870 ptrAss(nodePtr, nodeRec);
4871 if (nodePtr.p->sendPresToStatus == Q_ACTIVE) {
4872 jamLine(nodePtr.i);
4873 return;
4874 }//if
4875 }//for
4876 /*-------------------------------------------------------------------------*/
4877 /* WE ARE NOW READY TO DISCOVER WHETHER THE FAILURE WAS COMMITTED OR NOT. */
4878 /*-------------------------------------------------------------------------*/
4879 if (ctoFailureNr > ccommitFailureNr) {
4880 jam();
4881 for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
4882 ptrAss(nodePtr, nodeRec);
4883 if (nodePtr.p->phase == ZRUNNING) {
4884 jamLine(nodePtr.i);
4885 nodePtr.p->sendCommitFailReqStatus = Q_ACTIVE;
4886 signal->theData[0] = cpdistref;
4887 signal->theData[1] = ctoFailureNr;
4888 sendSignal(nodePtr.p->blockRef, GSN_COMMIT_FAILREQ, signal, 2, JBA);
4889 }//if
4890 }//for
4891 return;
4892 }//if
4893 /*-------------------------------------------------------------------------*/
4894 /* WE ARE NOW READY TO START THE NEW NODE FAILURE PROCESS. */
4895 /*-------------------------------------------------------------------------*/
4896 ctoStatus = Q_NOT_ACTIVE;
4897 cfailureNr = cfailureNr + 1;
4898 for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
4899 ptrAss(nodePtr, nodeRec);
4900 if (nodePtr.p->phase == ZRUNNING) {
4901 jamLine(nodePtr.i);
4902 sendPrepFailReq(signal, nodePtr.i);
4903 }//if
4904 }//for
4905 return;
4906 }//Qmgr::execPRES_TOCONF()
4907
4908 /*--------------------------------------------------------------------------*/
4909 // Provide information about the configured NDB nodes in the system.
4910 /*--------------------------------------------------------------------------*/
execREAD_NODESREQ(Signal * signal)4911 void Qmgr::execREAD_NODESREQ(Signal* signal)
4912 {
4913 jamEntry();
4914
4915 BlockReference TBref = signal->theData[0];
4916
4917 ReadNodesConf * const readNodes = (ReadNodesConf *)&signal->theData[0];
4918
4919 NodeRecPtr nodePtr;
4920 nodePtr.i = getOwnNodeId();
4921 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
4922
4923 NdbNodeBitmask tmp = c_definedNodes;
4924 tmp.bitANDC(c_clusterNodes);
4925
4926 readNodes->noOfNodes = c_definedNodes.count();
4927 readNodes->masterNodeId = cpresident;
4928 readNodes->ndynamicId = nodePtr.p->ndynamicId;
4929 c_definedNodes.copyto(NdbNodeBitmask::Size, readNodes->definedNodes);
4930 c_clusterNodes.copyto(NdbNodeBitmask::Size, readNodes->clusterNodes);
4931 tmp.copyto(NdbNodeBitmask::Size, readNodes->inactiveNodes);
4932 NdbNodeBitmask::clear(readNodes->startingNodes);
4933 NdbNodeBitmask::clear(readNodes->startedNodes);
4934
4935 sendSignal(TBref, GSN_READ_NODESCONF, signal,
4936 ReadNodesConf::SignalLength, JBB);
4937 }//Qmgr::execREAD_NODESREQ()
4938
systemErrorBecauseOtherNodeFailed(Signal * signal,Uint32 line,NodeId failedNodeId)4939 void Qmgr::systemErrorBecauseOtherNodeFailed(Signal* signal, Uint32 line,
4940 NodeId failedNodeId) {
4941 jam();
4942
4943 // Broadcast that this node is failing to other nodes
4944 failReport(signal, getOwnNodeId(), (UintR)ZTRUE, FailRep::ZOWN_FAILURE, getOwnNodeId());
4945
4946 char buf[100];
4947 BaseString::snprintf(buf, 100,
4948 "Node was shutdown during startup because node %d failed",
4949 failedNodeId);
4950
4951 progError(line, NDBD_EXIT_SR_OTHERNODEFAILED, buf);
4952 }
4953
4954
systemErrorLab(Signal * signal,Uint32 line,const char * message)4955 void Qmgr::systemErrorLab(Signal* signal, Uint32 line, const char * message)
4956 {
4957 jam();
4958 // Broadcast that this node is failing to other nodes
4959 failReport(signal, getOwnNodeId(), (UintR)ZTRUE, FailRep::ZOWN_FAILURE, getOwnNodeId());
4960
4961 // If it's known why shutdown occured
4962 // an error message has been passed to this function
4963 progError(line, NDBD_EXIT_NDBREQUIRE, message);
4964
4965 return;
4966 }//Qmgr::systemErrorLab()
4967
4968
4969 /**---------------------------------------------------------------------------
4970 * A FAILURE HAVE BEEN DISCOVERED ON A NODE. WE NEED TO CLEAR A
4971 * NUMBER OF VARIABLES.
4972 *---------------------------------------------------------------------------*/
failReport(Signal * signal,Uint16 aFailedNode,UintR aSendFailRep,FailRep::FailCause aFailCause,Uint16 sourceNode)4973 void Qmgr::failReport(Signal* signal,
4974 Uint16 aFailedNode,
4975 UintR aSendFailRep,
4976 FailRep::FailCause aFailCause,
4977 Uint16 sourceNode)
4978 {
4979 UintR tfrMinDynamicId;
4980 NodeRecPtr failedNodePtr;
4981 NodeRecPtr nodePtr;
4982 NodeRecPtr presidentNodePtr;
4983
4984
4985 ndbassert((! aSendFailRep) || (sourceNode != 0));
4986
4987 failedNodePtr.i = aFailedNode;
4988 ptrCheckGuard(failedNodePtr, MAX_NDB_NODES, nodeRec);
4989 if (failedNodePtr.p->phase == ZRUNNING) {
4990 jam();
4991
4992 #ifdef ERROR_INSERT
4993 if (ERROR_INSERTED(938))
4994 {
4995 nodeFailCount++;
4996 ndbout_c("QMGR : execFAIL_REP(Failed : %u Source : %u Cause : %u) : "
4997 "%u nodes have failed",
4998 aFailedNode, sourceNode, aFailCause, nodeFailCount);
4999 /* Count DB nodes */
5000 Uint32 nodeCount = 0;
5001 for (Uint32 i = 1; i < MAX_NDB_NODES; i++)
5002 {
5003 if (getNodeInfo(i).getType() == NODE_TYPE_DB)
5004 nodeCount++;
5005 }
5006
5007 /* When > 25% of cluster has failed, resume communications */
5008 if (nodeFailCount > (nodeCount / 4))
5009 {
5010 ndbout_c("QMGR : execFAIL_REP > 25%% nodes failed, resuming comms");
5011 Signal save = *signal;
5012 signal->theData[0] = 9991;
5013 sendSignal(CMVMI_REF, GSN_DUMP_STATE_ORD, signal, 1, JBB);
5014 *signal = save;
5015 nodeFailCount = 0;
5016 SET_ERROR_INSERT_VALUE(932);
5017 }
5018 }
5019 #endif
5020
5021 /* WE ALSO NEED TO ADD HERE SOME CODE THAT GETS OUR NEW NEIGHBOURS. */
5022 if (cpresident == getOwnNodeId()) {
5023 jam();
5024 if (failedNodePtr.p->sendCommitFailReqStatus == Q_ACTIVE) {
5025 jam();
5026 signal->theData[0] = failedNodePtr.i;
5027 sendSignal(QMGR_REF, GSN_COMMIT_FAILCONF, signal, 1, JBA);
5028 }//if
5029 if (failedNodePtr.p->sendPresToStatus == Q_ACTIVE) {
5030 jam();
5031 signal->theData[0] = failedNodePtr.i;
5032 signal->theData[1] = ccommitFailureNr;
5033 sendSignal(QMGR_REF, GSN_PRES_TOCONF, signal, 2, JBA);
5034 }//if
5035 }//if
5036 failedNodePtr.p->phase = ZPREPARE_FAIL;
5037 failedNodePtr.p->sendPrepFailReqStatus = Q_NOT_ACTIVE;
5038 failedNodePtr.p->sendCommitFailReqStatus = Q_NOT_ACTIVE;
5039 failedNodePtr.p->sendPresToStatus = Q_NOT_ACTIVE;
5040 set_hb_count(failedNodePtr.i) = 0;
5041 if (aSendFailRep == ZTRUE) {
5042 jam();
5043 if (failedNodePtr.i != getOwnNodeId()) {
5044 jam();
5045 FailRep * const failRep = (FailRep *)&signal->theData[0];
5046 failRep->failNodeId = failedNodePtr.i;
5047 failRep->failCause = aFailCause;
5048 failRep->failSourceNodeId = sourceNode;
5049 sendSignal(failedNodePtr.p->blockRef, GSN_FAIL_REP, signal,
5050 FailRep::SignalLength, JBA);
5051 }//if
5052 for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
5053 ptrAss(nodePtr, nodeRec);
5054 if (nodePtr.p->phase == ZRUNNING) {
5055 jamLine(nodePtr.i);
5056 FailRep * const failRep = (FailRep *)&signal->theData[0];
5057 failRep->failNodeId = failedNodePtr.i;
5058 failRep->failCause = aFailCause;
5059 failRep->failSourceNodeId = sourceNode;
5060 sendSignal(nodePtr.p->blockRef, GSN_FAIL_REP, signal,
5061 FailRep::SignalLength, JBA);
5062 }//if
5063 }//for
5064 }//if
5065 if (failedNodePtr.i == getOwnNodeId()) {
5066 jam();
5067 return;
5068 }//if
5069
5070 if (unlikely(m_connectivity_check.reportNodeFailure(failedNodePtr.i)))
5071 {
5072 jam();
5073 connectivityCheckCompleted(signal);
5074 }
5075
5076 failedNodePtr.p->ndynamicId = 0;
5077 findNeighbours(signal, __LINE__);
5078 if (failedNodePtr.i == cpresident) {
5079 jam();
5080 /**--------------------------------------------------------------------
5081 * IF PRESIDENT HAVE FAILED WE MUST CALCULATE THE NEW PRESIDENT BY
5082 * FINDING THE NODE WITH THE MINIMUM DYNAMIC IDENTITY.
5083 *---------------------------------------------------------------------*/
5084 tfrMinDynamicId = (UintR)-1;
5085 for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
5086 ptrAss(nodePtr, nodeRec);
5087 if (nodePtr.p->phase == ZRUNNING) {
5088 jamLine(nodePtr.i);
5089 if ((nodePtr.p->ndynamicId & 0xFFFF) < tfrMinDynamicId) {
5090 jam();
5091 tfrMinDynamicId = (nodePtr.p->ndynamicId & 0xFFFF);
5092 cpresident = nodePtr.i;
5093 }//if
5094 }//if
5095 }//for
5096 presidentNodePtr.i = cpresident;
5097 ptrCheckGuard(presidentNodePtr, MAX_NDB_NODES, nodeRec);
5098 cpdistref = presidentNodePtr.p->blockRef;
5099 if (cpresident == getOwnNodeId()) {
5100 CRASH_INSERTION(920);
5101 cfailureNr = cprepareFailureNr;
5102 ctoFailureNr = 0;
5103 ctoStatus = Q_ACTIVE;
5104 c_start.reset(); // Don't take over nodes being started
5105 if (!ccommitFailedNodes.isclear()) {
5106 jam();
5107 /**-----------------------------------------------------------------
5108 * IN THIS SITUATION WE ARE UNCERTAIN OF WHETHER THE NODE FAILURE
5109 * PROCESS WAS COMMITTED. WE NEED TO QUERY THE OTHER NODES ABOUT
5110 * THEIR STATUS.
5111 *-----------------------------------------------------------------*/
5112 for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES;
5113 nodePtr.i++) {
5114 jam();
5115 ptrAss(nodePtr, nodeRec);
5116 if (nodePtr.p->phase == ZRUNNING) {
5117 jam();
5118 nodePtr.p->sendPresToStatus = Q_ACTIVE;
5119 signal->theData[0] = cpdistref;
5120 signal->theData[1] = cprepareFailureNr;
5121 sendSignal(nodePtr.p->blockRef, GSN_PRES_TOREQ,
5122 signal, 1, JBA);
5123 }//if
5124 }//for
5125 } else {
5126 jam();
5127 /*-----------------------------------------------------------------*/
5128 // In this case it could be that a commit process is still ongoing.
5129 // If so we must conclude it as the new master.
5130 /*-----------------------------------------------------------------*/
5131 for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES;
5132 nodePtr.i++) {
5133 ptrAss(nodePtr, nodeRec);
5134 if (nodePtr.p->phase == ZRUNNING) {
5135 jamLine(nodePtr.i);
5136 nodePtr.p->sendCommitFailReqStatus = Q_ACTIVE;
5137 signal->theData[0] = cpdistref;
5138 signal->theData[1] = ccommitFailureNr;
5139 sendSignal(nodePtr.p->blockRef, GSN_COMMIT_FAILREQ, signal,
5140 2, JBA);
5141 }//if
5142 }//for
5143 }//if
5144 }//if
5145 }//if
5146 cfailedNodes.set(failedNodePtr.i);
5147 }//if
5148 }//Qmgr::failReport()
5149
5150 /*---------------------------------------------------------------------------*/
5151 /* INPUT: TTDI_DYN_ID */
5152 /* OUTPUT: TTDI_NODE_ID */
5153 /*---------------------------------------------------------------------------*/
translateDynamicIdToNodeId(Signal * signal,UintR TdynamicId)5154 Uint16 Qmgr::translateDynamicIdToNodeId(Signal* signal, UintR TdynamicId)
5155 {
5156 NodeRecPtr tdiNodePtr;
5157 Uint16 TtdiNodeId = ZNIL;
5158
5159 for (tdiNodePtr.i = 1; tdiNodePtr.i < MAX_NDB_NODES; tdiNodePtr.i++) {
5160 jam();
5161 ptrAss(tdiNodePtr, nodeRec);
5162 if (tdiNodePtr.p->ndynamicId == TdynamicId) {
5163 jam();
5164 TtdiNodeId = tdiNodePtr.i;
5165 break;
5166 }//if
5167 }//for
5168 if (TtdiNodeId == ZNIL) {
5169 jam();
5170 systemErrorLab(signal, __LINE__);
5171 }//if
5172 return TtdiNodeId;
5173 }//Qmgr::translateDynamicIdToNodeId()
5174
5175 /**--------------------------------------------------------------------------
5176 * WHEN RECEIVING PREPARE FAILURE REQUEST WE WILL IMMEDIATELY CLOSE
5177 * COMMUNICATION WITH ALL THOSE NODES.
5178 *--------------------------------------------------------------------------*/
sendCloseComReq(Signal * signal,BlockReference TBRef,Uint16 aFailNo)5179 void Qmgr::sendCloseComReq(Signal* signal, BlockReference TBRef, Uint16 aFailNo)
5180 {
5181 CloseComReqConf * const closeCom = (CloseComReqConf *)&signal->theData[0];
5182
5183 closeCom->xxxBlockRef = TBRef;
5184 closeCom->requestType = CloseComReqConf::RT_NODE_FAILURE;
5185 closeCom->failNo = aFailNo;
5186 closeCom->noOfNodes = cprepFailedNodes.count();
5187 /**
5188 * We are sending a signal where bitmap is of size NodeBitmask::size and we only
5189 * have a bitmask of NdbNodeBitmask::size, we clear all bits using NodeBitmask
5190 * before assigning the smaller bitmask to ensure we don't send any garbage.
5191 */
5192 NodeBitmask::clear(closeCom->theNodes);
5193 cprepFailedNodes.copyto(NdbNodeBitmask::Size, closeCom->theNodes);
5194
5195 sendSignal(TRPMAN_REF, GSN_CLOSE_COMREQ, signal,
5196 CloseComReqConf::SignalLength, JBB);
5197
5198 }//Qmgr::sendCloseComReq()
5199
5200 void
sendPrepFailReqRef(Signal * signal,Uint32 dstBlockRef,GlobalSignalNumber gsn,Uint32 blockRef,Uint32 failNo,const NdbNodeBitmask & nodes)5201 Qmgr::sendPrepFailReqRef(Signal* signal,
5202 Uint32 dstBlockRef,
5203 GlobalSignalNumber gsn,
5204 Uint32 blockRef,
5205 Uint32 failNo,
5206 const NdbNodeBitmask& nodes)
5207 {
5208 PrepFailReqRef * const prepFail = (PrepFailReqRef *)&signal->theData[0];
5209 prepFail->xxxBlockRef = blockRef;
5210 prepFail->failNo = failNo;
5211 prepFail->noOfNodes = nodes.count();
5212 nodes.copyto(NdbNodeBitmask::Size, prepFail->theNodes);
5213
5214 sendSignal(dstBlockRef, gsn, signal, PrepFailReqRef::SignalLength, JBA);
5215 }
5216
5217
5218 /**--------------------------------------------------------------------------
5219 * SEND PREPARE FAIL REQUEST FROM PRESIDENT.
5220 *---------------------------------------------------------------------------*/
sendPrepFailReq(Signal * signal,Uint16 aNode)5221 void Qmgr::sendPrepFailReq(Signal* signal, Uint16 aNode)
5222 {
5223 NodeRecPtr sendNodePtr;
5224 sendNodePtr.i = aNode;
5225 ptrCheckGuard(sendNodePtr, MAX_NDB_NODES, nodeRec);
5226 sendNodePtr.p->sendPrepFailReqStatus = Q_ACTIVE;
5227
5228 sendPrepFailReqRef(signal,
5229 sendNodePtr.p->blockRef,
5230 GSN_PREP_FAILREQ,
5231 reference(),
5232 cfailureNr,
5233 cfailedNodes);
5234 }//Qmgr::sendPrepFailReq()
5235
5236 /**
5237 * Arbitration module. Rest of QMGR calls us only via
5238 * the "handle" routines.
5239 */
5240
5241 /**
5242 * Should < 1/2 nodes die unconditionally. Affects only >= 3-way
5243 * replication.
5244 */
5245 static const bool g_ndb_arbit_one_half_rule = false;
5246
5247 /**
5248 * Config signals are logically part of CM_INIT.
5249 */
5250 void
execARBIT_CFG(Signal * signal)5251 Qmgr::execARBIT_CFG(Signal* signal)
5252 {
5253 jamEntry();
5254 ArbitSignalData* sd = (ArbitSignalData*)&signal->theData[0];
5255 unsigned rank = sd->code;
5256 ndbrequire(1 <= rank && rank <= 2);
5257 arbitRec.apiMask[0].bitOR(sd->mask);
5258 arbitRec.apiMask[rank].assign(sd->mask);
5259 }
5260
5261 /**
5262 * ContinueB delay (0=JBA 1=JBB)
5263 */
getArbitDelay()5264 Uint32 Qmgr::getArbitDelay()
5265 {
5266 switch (arbitRec.state) {
5267 case ARBIT_NULL:
5268 jam();
5269 break;
5270 case ARBIT_INIT:
5271 jam();
5272 case ARBIT_FIND:
5273 jam();
5274 case ARBIT_PREP1:
5275 jam();
5276 case ARBIT_PREP2:
5277 jam();
5278 case ARBIT_START:
5279 jam();
5280 return 100;
5281 case ARBIT_RUN:
5282 jam();
5283 return 1000;
5284 case ARBIT_CHOOSE:
5285 jam();
5286 return 10;
5287 case ARBIT_CRASH: // if we could wait
5288 jam();
5289 return 100;
5290 }
5291 ndbrequire(false);
5292 return (Uint32)-1;
5293 }
5294
5295 /**
5296 * Time to wait for reply. There is only 1 config parameter
5297 * (timeout for CHOOSE). XXX The rest are guesses.
5298 */
getArbitTimeout()5299 Uint32 Qmgr::getArbitTimeout()
5300 {
5301 switch (arbitRec.state) {
5302 case ARBIT_NULL:
5303 jam();
5304 break;
5305 case ARBIT_INIT: // not used
5306 jam();
5307 case ARBIT_FIND:
5308 jam();
5309 /* This timeout will be used only to print out a warning
5310 * when a suitable arbitrator is not found.
5311 */
5312 return 60000;
5313 case ARBIT_PREP1:
5314 jam();
5315 case ARBIT_PREP2:
5316 jam();
5317 return 1000 + cnoOfNodes * Uint32(hb_send_timer.getDelay());
5318 case ARBIT_START:
5319 jam();
5320 return 1000 + arbitRec.timeout;
5321 case ARBIT_RUN: // not used (yet)
5322 jam();
5323 return 1000;
5324 case ARBIT_CHOOSE:
5325 jam();
5326 return arbitRec.timeout;
5327 case ARBIT_CRASH: // if we could wait
5328 jam();
5329 return 100;
5330 }
5331 ndbrequire(false);
5332 return (Uint32)-1;
5333 }
5334
5335 /**
5336 * Start arbitration thread when we are president and database
5337 * is opened for the first time.
5338 *
5339 * XXX Do arbitration check just like on node failure. Since
5340 * there is no arbitrator yet, must win on counts alone.
5341 */
5342 void
handleArbitStart(Signal * signal)5343 Qmgr::handleArbitStart(Signal* signal)
5344 {
5345 jam();
5346 ndbrequire(cpresident == getOwnNodeId());
5347 ndbrequire(arbitRec.state == ARBIT_NULL);
5348 arbitRec.state = ARBIT_INIT;
5349 arbitRec.newstate = true;
5350 startArbitThread(signal);
5351 }
5352
5353 /**
5354 * Handle API node failure. Called also by non-president nodes.
5355 * If we are president go back to INIT state, otherwise to NULL.
5356 * Start new thread to save time.
5357 */
5358 void
handleArbitApiFail(Signal * signal,Uint16 nodeId)5359 Qmgr::handleArbitApiFail(Signal* signal, Uint16 nodeId)
5360 {
5361 if (arbitRec.node != nodeId) {
5362 jam();
5363 return;
5364 }
5365 reportArbitEvent(signal, NDB_LE_ArbitState);
5366 arbitRec.node = 0;
5367 switch (arbitRec.state) {
5368 case ARBIT_NULL: // should not happen
5369 jam();
5370 case ARBIT_INIT:
5371 jam();
5372 case ARBIT_FIND:
5373 jam();
5374 break;
5375 case ARBIT_PREP1: // start from beginning
5376 jam();
5377 case ARBIT_PREP2:
5378 jam();
5379 case ARBIT_START:
5380 jam();
5381 case ARBIT_RUN:
5382 if (cpresident == getOwnNodeId()) {
5383 jam();
5384 arbitRec.state = ARBIT_INIT;
5385 arbitRec.newstate = true;
5386 startArbitThread(signal);
5387 } else {
5388 jam();
5389 arbitRec.state = ARBIT_NULL;
5390 }
5391 break;
5392 case ARBIT_CHOOSE: // XXX too late
5393 jam();
5394 case ARBIT_CRASH:
5395 jam();
5396 break;
5397 default:
5398 ndbrequire(false);
5399 break;
5400 }
5401 }
5402
5403 /**
5404 * Handle NDB node add. Ignore if arbitration thread not yet
5405 * started. If PREP is not ready, go back to INIT. Otherwise
5406 * the new node gets arbitrator and ticket once we reach RUN state.
5407 * Start new thread to save time.
5408 */
5409 void
handleArbitNdbAdd(Signal * signal,Uint16 nodeId)5410 Qmgr::handleArbitNdbAdd(Signal* signal, Uint16 nodeId)
5411 {
5412 jam();
5413 ndbrequire(cpresident == getOwnNodeId());
5414 switch (arbitRec.state) {
5415 case ARBIT_NULL: // before db opened
5416 jam();
5417 break;
5418 case ARBIT_INIT: // start from beginning
5419 jam();
5420 case ARBIT_FIND:
5421 jam();
5422 case ARBIT_PREP1:
5423 jam();
5424 case ARBIT_PREP2:
5425 jam();
5426 arbitRec.state = ARBIT_INIT;
5427 arbitRec.newstate = true;
5428 startArbitThread(signal);
5429 break;
5430 case ARBIT_START: // process in RUN state
5431 jam();
5432 case ARBIT_RUN:
5433 jam();
5434 arbitRec.newMask.set(nodeId);
5435 break;
5436 case ARBIT_CHOOSE: // XXX too late
5437 jam();
5438 case ARBIT_CRASH:
5439 jam();
5440 break;
5441 default:
5442 ndbrequire(false);
5443 break;
5444 }
5445 }
5446
5447 /**
5448 * Check if current nodeset can survive. The decision is
5449 * based on node count, node groups, and on external arbitrator
5450 * (if we have one). Always starts a new thread because
5451 * 1) CHOOSE cannot wait 2) if we are new president we need
5452 * a thread 3) if we are old president it does no harm.
5453 */
5454 void
handleArbitCheck(Signal * signal)5455 Qmgr::handleArbitCheck(Signal* signal)
5456 {
5457 jam();
5458 ndbrequire(cpresident == getOwnNodeId());
5459 NdbNodeBitmask ndbMask;
5460 computeArbitNdbMask(ndbMask);
5461 if (g_ndb_arbit_one_half_rule && !ERROR_INSERTED(943) &&
5462 2 * ndbMask.count() < cnoOfNodes) {
5463 jam();
5464 arbitRec.code = ArbitCode::LoseNodes;
5465 } else {
5466 jam();
5467 CheckNodeGroups* sd = (CheckNodeGroups*)&signal->theData[0];
5468 sd->blockRef = reference();
5469 sd->requestType = CheckNodeGroups::Direct | CheckNodeGroups::ArbitCheck;
5470 sd->mask = ndbMask;
5471 EXECUTE_DIRECT(DBDIH, GSN_CHECKNODEGROUPSREQ, signal,
5472 CheckNodeGroups::SignalLength);
5473 jamEntry();
5474 if (ERROR_INSERTED(943))
5475 {
5476 ndbout << "Requiring arbitration, even if there is no"
5477 << " possible split."<< endl;
5478 sd->output = CheckNodeGroups::Partitioning;
5479 arbitRec.state = ARBIT_RUN;
5480 }
5481 switch (sd->output) {
5482 case CheckNodeGroups::Win:
5483 jam();
5484 arbitRec.code = ArbitCode::WinGroups;
5485 break;
5486 case CheckNodeGroups::Lose:
5487 jam();
5488 arbitRec.code = ArbitCode::LoseGroups;
5489 break;
5490 case CheckNodeGroups::Partitioning:
5491 jam();
5492 arbitRec.code = ArbitCode::Partitioning;
5493 if (g_ndb_arbit_one_half_rule &&
5494 2 * ndbMask.count() > cnoOfNodes) {
5495 jam();
5496 arbitRec.code = ArbitCode::WinNodes;
5497 }
5498 break;
5499 default:
5500 ndbrequire(false);
5501 break;
5502 }
5503 }
5504 switch (arbitRec.code) {
5505 case ArbitCode::LoseNodes:
5506 jam();
5507 case ArbitCode::LoseGroups:
5508 jam();
5509 goto crashme;
5510 case ArbitCode::WinNodes:
5511 jam();
5512 case ArbitCode::WinGroups:
5513 jam();
5514 if (arbitRec.state == ARBIT_RUN) {
5515 jam();
5516 break;
5517 }
5518 arbitRec.state = ARBIT_INIT;
5519 arbitRec.newstate = true;
5520 break;
5521 case ArbitCode::Partitioning:
5522 if (arbitRec.state == ARBIT_RUN) {
5523 jam();
5524 arbitRec.state = ARBIT_CHOOSE;
5525 arbitRec.newstate = true;
5526 break;
5527 }
5528 if (arbitRec.apiMask[0].count() != 0) {
5529 jam();
5530 arbitRec.code = ArbitCode::LoseNorun;
5531 } else {
5532 jam();
5533 arbitRec.code = ArbitCode::LoseNocfg;
5534 }
5535 goto crashme;
5536 default:
5537 crashme:
5538 jam();
5539 arbitRec.state = ARBIT_CRASH;
5540 arbitRec.newstate = true;
5541 break;
5542 }
5543 reportArbitEvent(signal, NDB_LE_ArbitResult);
5544 switch (arbitRec.state) {
5545 default:
5546 jam();
5547 arbitRec.newMask.bitAND(ndbMask); // delete failed nodes
5548 arbitRec.recvMask.bitAND(ndbMask);
5549 sendCommitFailReq(signal); // start commit of failed nodes
5550 break;
5551 case ARBIT_CHOOSE:
5552 jam();
5553 case ARBIT_CRASH:
5554 jam();
5555 break;
5556 }
5557 startArbitThread(signal);
5558 }
5559
5560 /**
5561 * Start a new continueB thread. The thread id is incremented
5562 * so that any old thread will exit.
5563 */
5564 void
startArbitThread(Signal * signal)5565 Qmgr::startArbitThread(Signal* signal)
5566 {
5567 jam();
5568 ndbrequire(cpresident == getOwnNodeId());
5569 arbitRec.code = ArbitCode::ThreadStart;
5570 reportArbitEvent(signal, NDB_LE_ArbitState);
5571 signal->theData[1] = ++arbitRec.thread;
5572 runArbitThread(signal);
5573 }
5574
5575 /**
5576 * Handle arbitration thread. The initial thread normally ends
5577 * up in RUN state. New thread can be started to save time.
5578 */
5579 void
runArbitThread(Signal * signal)5580 Qmgr::runArbitThread(Signal* signal)
5581 {
5582 #ifdef DEBUG_ARBIT
5583 char buf[256];
5584 NdbNodeBitmask ndbMask;
5585 computeArbitNdbMask(ndbMask);
5586 ndbout << "arbit thread:";
5587 ndbout << " state=" << arbitRec.state;
5588 ndbout << " newstate=" << arbitRec.newstate;
5589 ndbout << " thread=" << arbitRec.thread;
5590 ndbout << " node=" << arbitRec.node;
5591 arbitRec.ticket.getText(buf, sizeof(buf));
5592 ndbout << " ticket=" << buf;
5593 ndbMask.getText(buf);
5594 ndbout << " ndbmask=" << buf;
5595 ndbout << " sendcount=" << arbitRec.sendCount;
5596 ndbout << " recvcount=" << arbitRec.recvCount;
5597 arbitRec.recvMask.getText(buf);
5598 ndbout << " recvmask=" << buf;
5599 ndbout << " code=" << arbitRec.code;
5600 ndbout << endl;
5601 #endif
5602 if (signal->theData[1] != arbitRec.thread) {
5603 jam();
5604 return; // old thread dies
5605 }
5606 switch (arbitRec.state) {
5607 case ARBIT_INIT: // main thread
5608 jam();
5609 stateArbitInit(signal);
5610 break;
5611 case ARBIT_FIND:
5612 jam();
5613 stateArbitFind(signal);
5614 break;
5615 case ARBIT_PREP1:
5616 jam();
5617 case ARBIT_PREP2:
5618 jam();
5619 stateArbitPrep(signal);
5620 break;
5621 case ARBIT_START:
5622 jam();
5623 stateArbitStart(signal);
5624 break;
5625 case ARBIT_RUN:
5626 jam();
5627 stateArbitRun(signal);
5628 break;
5629 case ARBIT_CHOOSE: // partitition thread
5630 jam();
5631 stateArbitChoose(signal);
5632 break;
5633 case ARBIT_CRASH:
5634 jam();
5635 stateArbitCrash(signal);
5636 break;
5637 default:
5638 ndbrequire(false);
5639 break;
5640 }
5641 signal->theData[0] = ZARBIT_HANDLING;
5642 signal->theData[1] = arbitRec.thread;
5643 signal->theData[2] = arbitRec.state; // just for signal log
5644 Uint32 delay = getArbitDelay();
5645 if (delay == 0) {
5646 jam();
5647 sendSignal(QMGR_REF, GSN_CONTINUEB, signal, 3, JBA);
5648 } else if (delay == 1) {
5649 jam();
5650 sendSignal(QMGR_REF, GSN_CONTINUEB, signal, 3, JBB);
5651 } else {
5652 jam();
5653 sendSignalWithDelay(QMGR_REF, GSN_CONTINUEB, signal, delay, 3);
5654 }//if
5655 }
5656
5657 /**
5658 * Handle INIT state. Generate next ticket. Switch to FIND
5659 * state without delay.
5660 */
5661 void
stateArbitInit(Signal * signal)5662 Qmgr::stateArbitInit(Signal* signal)
5663 {
5664 if (arbitRec.newstate) {
5665 jam();
5666 CRASH_INSERTION((Uint32)910 + arbitRec.state);
5667
5668 arbitRec.node = 0;
5669 arbitRec.ticket.update();
5670 arbitRec.newMask.clear();
5671 arbitRec.code = 0;
5672 arbitRec.newstate = false;
5673 }
5674 arbitRec.setTimestamp(); // Init arbitration timer
5675 arbitRec.state = ARBIT_FIND;
5676 arbitRec.newstate = true;
5677 stateArbitFind(signal);
5678 }
5679
5680 /**
5681 * Handle FIND state. Find first arbitrator which is alive
5682 * and invoke PREP state without delay. If none are found,
5683 * loop in FIND state. This is forever if no arbitrators
5684 * are configured (not the normal case).
5685 *
5686 * XXX Add adaptive behaviour to avoid getting stuck on API
5687 * nodes which are alive but do not respond or die too soon.
5688 */
5689 void
stateArbitFind(Signal * signal)5690 Qmgr::stateArbitFind(Signal* signal)
5691 {
5692 if (arbitRec.newstate) {
5693 jam();
5694 CRASH_INSERTION((Uint32)910 + arbitRec.state);
5695
5696 arbitRec.code = 0;
5697 arbitRec.newstate = false;
5698 }
5699
5700 switch (arbitRec.method){
5701 case ArbitRec::METHOD_EXTERNAL:
5702 {
5703 // Don't select any API node as arbitrator
5704 arbitRec.node = 0;
5705 arbitRec.state = ARBIT_PREP1;
5706 arbitRec.newstate = true;
5707 stateArbitPrep(signal);
5708 return;
5709 break;
5710 }
5711
5712 case ArbitRec::METHOD_DEFAULT:
5713 {
5714 NodeRecPtr aPtr;
5715 // Select the best available API node as arbitrator
5716 for (unsigned rank = 1; rank <= 2; rank++) {
5717 jam();
5718 aPtr.i = 0;
5719 const unsigned stop = NodeBitmask::NotFound;
5720 while ((aPtr.i = arbitRec.apiMask[rank].find(aPtr.i + 1)) != stop) {
5721 jam();
5722 ptrAss(aPtr, nodeRec);
5723 if (aPtr.p->phase != ZAPI_ACTIVE)
5724 continue;
5725 ndbrequire(c_connectedNodes.get(aPtr.i));
5726 arbitRec.node = aPtr.i;
5727 arbitRec.state = ARBIT_PREP1;
5728 arbitRec.newstate = true;
5729 stateArbitPrep(signal);
5730 return;
5731 }
5732 }
5733
5734 /* If the president cannot find a suitable arbitrator then
5735 * it will report this once a minute. Success in finding
5736 * an arbitrator will be notified when the arbitrator
5737 * accepts and acks the offer.
5738 */
5739
5740 if (arbitRec.getTimediff() > getArbitTimeout()) {
5741 jam();
5742 g_eventLogger->warning("Could not find an arbitrator, cluster is not partition-safe");
5743 warningEvent("Could not find an arbitrator, cluster is not partition-safe");
5744 arbitRec.setTimestamp();
5745 }
5746 return;
5747 break;
5748 }
5749
5750 default:
5751 ndbrequire(false);
5752 }
5753 }
5754
5755 /**
5756 * Handle PREP states. First round nulls any existing tickets.
5757 * Second round sends new ticket. When all confirms have been
5758 * received invoke START state immediately.
5759 */
5760 void
stateArbitPrep(Signal * signal)5761 Qmgr::stateArbitPrep(Signal* signal)
5762 {
5763 if (arbitRec.newstate) {
5764 jam();
5765 CRASH_INSERTION((Uint32)910 + arbitRec.state);
5766
5767 arbitRec.sendCount = 0; // send all at once
5768 computeArbitNdbMask(arbitRec.recvMask); // to send and recv
5769 arbitRec.recvMask.clear(getOwnNodeId());
5770 arbitRec.code = 0;
5771 arbitRec.newstate = false;
5772 }
5773 if (! arbitRec.sendCount) {
5774 jam();
5775 NodeRecPtr aPtr;
5776 aPtr.i = 0;
5777 const unsigned stop = NodeBitmask::NotFound;
5778 while ((aPtr.i = arbitRec.recvMask.find(aPtr.i + 1)) != stop) {
5779 jam();
5780 ptrAss(aPtr, nodeRec);
5781 ArbitSignalData* sd = (ArbitSignalData*)&signal->theData[0];
5782 sd->sender = getOwnNodeId();
5783 if (arbitRec.state == ARBIT_PREP1) {
5784 jam();
5785 sd->code = ArbitCode::PrepPart1;
5786 } else {
5787 jam();
5788 sd->code = ArbitCode::PrepPart2;
5789 }
5790 sd->node = arbitRec.node;
5791 sd->ticket = arbitRec.ticket;
5792 sd->mask.clear();
5793 sendSignal(aPtr.p->blockRef, GSN_ARBIT_PREPREQ, signal,
5794 ArbitSignalData::SignalLength, JBB);
5795 }
5796 arbitRec.setTimestamp(); // send time
5797 arbitRec.sendCount = 1;
5798 return;
5799 }
5800 if (arbitRec.code != 0) { // error
5801 jam();
5802 arbitRec.state = ARBIT_INIT;
5803 arbitRec.newstate = true;
5804 return;
5805 }
5806 if (arbitRec.recvMask.count() == 0) { // recv all
5807 if (arbitRec.state == ARBIT_PREP1) {
5808 jam();
5809 arbitRec.state = ARBIT_PREP2;
5810 arbitRec.newstate = true;
5811 } else {
5812 jam();
5813 arbitRec.state = ARBIT_START;
5814 arbitRec.newstate = true;
5815 stateArbitStart(signal);
5816 }
5817 return;
5818 }
5819 if (arbitRec.getTimediff() > getArbitTimeout()) {
5820 jam();
5821 arbitRec.state = ARBIT_INIT;
5822 arbitRec.newstate = true;
5823 return;
5824 }
5825 }
5826
5827 void
execARBIT_PREPREQ(Signal * signal)5828 Qmgr::execARBIT_PREPREQ(Signal* signal)
5829 {
5830 jamEntry();
5831 ArbitSignalData* sd = (ArbitSignalData*)&signal->theData[0];
5832 if (getOwnNodeId() == cpresident) {
5833 jam();
5834 return; // wrong state
5835 }
5836 if (sd->sender != cpresident) {
5837 jam();
5838 return; // wrong state
5839 }
5840 NodeRecPtr aPtr;
5841 aPtr.i = sd->sender;
5842 ptrAss(aPtr, nodeRec);
5843 switch (sd->code) {
5844 case ArbitCode::PrepPart1: // zero them just to be sure
5845 jam();
5846 arbitRec.node = 0;
5847 arbitRec.ticket.clear();
5848 break;
5849 case ArbitCode::PrepPart2: // non-president enters RUN state
5850 jam();
5851 case ArbitCode::PrepAtrun:
5852 jam();
5853 arbitRec.node = sd->node;
5854 arbitRec.ticket = sd->ticket;
5855 arbitRec.code = sd->code;
5856 reportArbitEvent(signal, NDB_LE_ArbitState);
5857 arbitRec.state = ARBIT_RUN;
5858 arbitRec.newstate = true;
5859
5860 // Non-president node logs.
5861 if (!c_connectedNodes.get(arbitRec.node))
5862 {
5863 char buf[20]; // needs 16 + 1 for '\0'
5864 arbitRec.ticket.getText(buf, sizeof(buf));
5865 g_eventLogger->warning("President %u proposed disconnected "
5866 "node %u as arbitrator [ticket=%s]. "
5867 "Cluster may be partially connected. "
5868 "Connected nodes: %s",
5869 cpresident, arbitRec.node, buf,
5870 BaseString::getPrettyTextShort(c_connectedNodes).c_str());
5871
5872 warningEvent("President %u proposed disconnected node %u "
5873 "as arbitrator [ticket %s]",
5874 cpresident, arbitRec.node, buf);
5875 warningEvent("Cluster may be partially connected. Connected nodes: ");
5876
5877 // Split the connected-node list, since warningEvents are
5878 // limited to ~24 words / 96 chars
5879 BaseString tmp(BaseString::getPrettyTextShort(c_connectedNodes).c_str());
5880 Vector<BaseString> split;
5881 tmp.split(split, "", 92);
5882 for(unsigned i = 0; i < split.size(); ++i)
5883 {
5884 warningEvent("%s", split[i].c_str());
5885 }
5886 }
5887
5888 if (sd->code == ArbitCode::PrepAtrun) {
5889 jam();
5890 return;
5891 }
5892 break;
5893 default:
5894 jam();
5895 ndbrequire(false);
5896 }
5897 sd->sender = getOwnNodeId();
5898 sd->code = 0;
5899 sendSignal(aPtr.p->blockRef, GSN_ARBIT_PREPCONF, signal,
5900 ArbitSignalData::SignalLength, JBB);
5901 }
5902
5903 void
execARBIT_PREPCONF(Signal * signal)5904 Qmgr::execARBIT_PREPCONF(Signal* signal)
5905 {
5906 jamEntry();
5907 ArbitSignalData* sd = (ArbitSignalData*)&signal->theData[0];
5908 if (! arbitRec.match(sd)) {
5909 jam();
5910 return; // stray signal
5911 }
5912 if (arbitRec.state != ARBIT_PREP1 && arbitRec.state != ARBIT_PREP2) {
5913 jam();
5914 return; // wrong state
5915 }
5916 if (! arbitRec.recvMask.get(sd->sender)) {
5917 jam();
5918 return; // wrong state
5919 }
5920 arbitRec.recvMask.clear(sd->sender);
5921 if (arbitRec.code == 0 && sd->code != 0) {
5922 jam();
5923 arbitRec.code = sd->code;
5924 }//if
5925 }
5926
5927 void
execARBIT_PREPREF(Signal * signal)5928 Qmgr::execARBIT_PREPREF(Signal* signal)
5929 {
5930 jamEntry();
5931 ArbitSignalData* sd = (ArbitSignalData*)&signal->theData[0];
5932 if (sd->code == 0) {
5933 jam();
5934 sd->code = ArbitCode::ErrUnknown;
5935 }
5936 execARBIT_PREPCONF(signal);
5937 }
5938
5939 /**
5940 * Handle START state. On first call send start request to
5941 * the chosen arbitrator. Then wait for a CONF.
5942 */
5943 void
stateArbitStart(Signal * signal)5944 Qmgr::stateArbitStart(Signal* signal)
5945 {
5946 if (arbitRec.newstate) {
5947 jam();
5948 CRASH_INSERTION((Uint32)910 + arbitRec.state);
5949
5950 arbitRec.sendCount = 0;
5951 arbitRec.recvCount = 0;
5952 arbitRec.code = 0;
5953 arbitRec.newstate = false;
5954 }
5955
5956 switch (arbitRec.method){
5957 case ArbitRec::METHOD_EXTERNAL:
5958 jam();
5959 ndbrequire(arbitRec.node == 0); // No arbitrator selected
5960
5961 // Don't start arbitrator in API node => ARBIT_RUN
5962 arbitRec.state = ARBIT_RUN;
5963 arbitRec.newstate = true;
5964 return;
5965 break;
5966
5967 case ArbitRec::METHOD_DEFAULT:
5968 if (! arbitRec.sendCount) {
5969 jam();
5970 BlockReference blockRef = calcApiClusterMgrBlockRef(arbitRec.node);
5971 ArbitSignalData* sd = (ArbitSignalData*)&signal->theData[0];
5972 sd->sender = getOwnNodeId();
5973 sd->code = 0;
5974 sd->node = arbitRec.node;
5975 sd->ticket = arbitRec.ticket;
5976 sd->mask.clear();
5977 sendSignal(blockRef, GSN_ARBIT_STARTREQ, signal,
5978 ArbitSignalData::SignalLength, JBB);
5979 arbitRec.sendCount = 1;
5980 arbitRec.setTimestamp(); // send time
5981 return;
5982 }
5983 if (arbitRec.recvCount) {
5984 jam();
5985 reportArbitEvent(signal, NDB_LE_ArbitState);
5986 if (arbitRec.code == ArbitCode::ApiStart) {
5987 jam();
5988 arbitRec.state = ARBIT_RUN;
5989 arbitRec.newstate = true;
5990 return;
5991 }
5992 arbitRec.state = ARBIT_INIT;
5993 arbitRec.newstate = true;
5994 return;
5995 }
5996 if (arbitRec.getTimediff() > getArbitTimeout()) {
5997 jam();
5998 arbitRec.code = ArbitCode::ErrTimeout;
5999 reportArbitEvent(signal, NDB_LE_ArbitState);
6000 arbitRec.state = ARBIT_INIT;
6001 arbitRec.newstate = true;
6002 return;
6003 }
6004 break;
6005
6006 default:
6007 ndbrequire(false);
6008 break;
6009 }
6010 }
6011
6012 void
execARBIT_STARTCONF(Signal * signal)6013 Qmgr::execARBIT_STARTCONF(Signal* signal)
6014 {
6015 jamEntry();
6016 ArbitSignalData* sd = (ArbitSignalData*)&signal->theData[0];
6017 if (! arbitRec.match(sd)) {
6018 jam();
6019 return; // stray signal
6020 }
6021 if (arbitRec.state != ARBIT_START) {
6022 jam();
6023 return; // wrong state
6024 }
6025 if (arbitRec.recvCount) {
6026 jam();
6027 return; // wrong state
6028 }
6029 arbitRec.code = sd->code;
6030 arbitRec.recvCount = 1;
6031 }
6032
6033 void
execARBIT_STARTREF(Signal * signal)6034 Qmgr::execARBIT_STARTREF(Signal* signal)
6035 {
6036 jamEntry();
6037 ArbitSignalData* sd = (ArbitSignalData*)&signal->theData[0];
6038 if (sd->code == 0) {
6039 jam();
6040 sd->code = ArbitCode::ErrUnknown;
6041 }
6042 execARBIT_STARTCONF(signal);
6043 }
6044
6045 /**
6046 * Handle RUN state. Send ticket to any new nodes which have
6047 * appeared after PREP state. We don't care about a CONF.
6048 */
6049 void
stateArbitRun(Signal * signal)6050 Qmgr::stateArbitRun(Signal* signal)
6051 {
6052 if (arbitRec.newstate) {
6053 jam();
6054 CRASH_INSERTION((Uint32)910 + arbitRec.state);
6055
6056 arbitRec.code = 0;
6057 arbitRec.newstate = false;
6058 }
6059 NodeRecPtr aPtr;
6060 aPtr.i = 0;
6061 const unsigned stop = NodeBitmask::NotFound;
6062 while ((aPtr.i = arbitRec.newMask.find(aPtr.i + 1)) != stop) {
6063 jam();
6064 arbitRec.newMask.clear(aPtr.i);
6065 ptrAss(aPtr, nodeRec);
6066 ArbitSignalData* sd = (ArbitSignalData*)&signal->theData[0];
6067 sd->sender = getOwnNodeId();
6068 sd->code = ArbitCode::PrepAtrun;
6069 sd->node = arbitRec.node;
6070 sd->ticket = arbitRec.ticket;
6071 sd->mask.clear();
6072 sendSignal(aPtr.p->blockRef, GSN_ARBIT_PREPREQ, signal,
6073 ArbitSignalData::SignalLength, JBB);
6074 }
6075 }
6076
6077 /**
6078 * Handle CHOOSE state. Entered only from RUN state when
6079 * there is a possible network partitioning. Send CHOOSE to
6080 * the arbitrator. On win switch to INIT state because a new
6081 * ticket must be created.
6082 */
6083 void
stateArbitChoose(Signal * signal)6084 Qmgr::stateArbitChoose(Signal* signal)
6085 {
6086 if (arbitRec.newstate) {
6087 jam();
6088 CRASH_INSERTION((Uint32)910 + arbitRec.state);
6089
6090 arbitRec.sendCount = 0;
6091 arbitRec.recvCount = 0;
6092 arbitRec.code = 0;
6093 arbitRec.newstate = false;
6094 }
6095
6096 switch(arbitRec.method){
6097 case ArbitRec::METHOD_EXTERNAL:
6098 {
6099 if (! arbitRec.sendCount) {
6100 jam();
6101 ndbrequire(arbitRec.node == 0); // No arbitrator selected
6102 // Don't send CHOOSE to anyone, just wait for timeout to expire
6103 arbitRec.sendCount = 1;
6104 arbitRec.setTimestamp();
6105 return;
6106 }
6107
6108 if (arbitRec.getTimediff() > getArbitTimeout()) {
6109 jam();
6110 // Arbitration timeout has expired
6111 ndbrequire(arbitRec.node == 0); // No arbitrator selected
6112
6113 NodeBitmask nodes;
6114 computeArbitNdbMask(nodes);
6115 arbitRec.code = ArbitCode::WinWaitExternal;
6116 reportArbitEvent(signal, NDB_LE_ArbitResult, nodes);
6117
6118 sendCommitFailReq(signal); // start commit of failed nodes
6119 arbitRec.state = ARBIT_INIT;
6120 arbitRec.newstate = true;
6121 return;
6122 }
6123 break;
6124 }
6125
6126 case ArbitRec::METHOD_DEFAULT:
6127 {
6128 if (! arbitRec.sendCount) {
6129 jam();
6130 const BlockReference blockRef = calcApiClusterMgrBlockRef(arbitRec.node);
6131 ArbitSignalData* sd = (ArbitSignalData*)&signal->theData[0];
6132 sd->sender = getOwnNodeId();
6133 sd->code = 0;
6134 sd->node = arbitRec.node;
6135 sd->ticket = arbitRec.ticket;
6136 computeArbitNdbMask(sd->mask);
6137 if (ERROR_INSERTED(943))
6138 {
6139 ndbout << "Not sending GSN_ARBIT_CHOOSEREQ, thereby causing"
6140 << " arbitration to time out."<< endl;
6141 }
6142 else
6143 {
6144 sendSignal(blockRef, GSN_ARBIT_CHOOSEREQ, signal,
6145 ArbitSignalData::SignalLength, JBA);
6146 }
6147 arbitRec.sendCount = 1;
6148 arbitRec.setTimestamp(); // send time
6149 return;
6150 }
6151
6152 if (arbitRec.recvCount) {
6153 jam();
6154 reportArbitEvent(signal, NDB_LE_ArbitResult);
6155 if (arbitRec.code == ArbitCode::WinChoose) {
6156 jam();
6157 sendCommitFailReq(signal); // start commit of failed nodes
6158 arbitRec.state = ARBIT_INIT;
6159 arbitRec.newstate = true;
6160 return;
6161 }
6162 arbitRec.state = ARBIT_CRASH;
6163 arbitRec.newstate = true;
6164 stateArbitCrash(signal); // do it at once
6165 return;
6166 }
6167
6168 if (arbitRec.getTimediff() > getArbitTimeout()) {
6169 jam();
6170 // Arbitration timeout has expired
6171 arbitRec.code = ArbitCode::ErrTimeout;
6172 reportArbitEvent(signal, NDB_LE_ArbitState);
6173 arbitRec.state = ARBIT_CRASH;
6174 arbitRec.newstate = true;
6175 stateArbitCrash(signal); // do it at once
6176 return;
6177 }
6178 break;
6179 }
6180
6181 default:
6182 ndbrequire(false);
6183 break;
6184 }
6185 }
6186
6187 void
execARBIT_CHOOSECONF(Signal * signal)6188 Qmgr::execARBIT_CHOOSECONF(Signal* signal)
6189 {
6190 jamEntry();
6191 ArbitSignalData* sd = (ArbitSignalData*)&signal->theData[0];
6192 if (!arbitRec.match(sd)) {
6193 jam();
6194 return; // stray signal
6195 }
6196 if (arbitRec.state != ARBIT_CHOOSE) {
6197 jam();
6198 return; // wrong state
6199 }
6200 if (arbitRec.recvCount) {
6201 jam();
6202 return; // wrong state
6203 }
6204 arbitRec.recvCount = 1;
6205 arbitRec.code = sd->code;
6206 }
6207
6208 void
execARBIT_CHOOSEREF(Signal * signal)6209 Qmgr::execARBIT_CHOOSEREF(Signal* signal)
6210 {
6211 jamEntry();
6212 ArbitSignalData* sd = (ArbitSignalData*)&signal->theData[0];
6213 if (sd->code == 0) {
6214 jam();
6215 sd->code = ArbitCode::ErrUnknown;
6216 }
6217 execARBIT_CHOOSECONF(signal);
6218 }
6219
6220 /**
6221 * Handle CRASH state. We must crash immediately.
6222 * XXX tell other nodes in our party to crash too.
6223 */
6224 void
stateArbitCrash(Signal * signal)6225 Qmgr::stateArbitCrash(Signal* signal)
6226 {
6227 jam();
6228 if (arbitRec.newstate) {
6229 jam();
6230 CRASH_INSERTION((Uint32)910 + arbitRec.state);
6231 arbitRec.setTimestamp();
6232 arbitRec.code = 0;
6233 arbitRec.newstate = false;
6234 }
6235 #ifdef ndb_arbit_crash_wait_for_event_report_to_get_out
6236 if (! (arbitRec.getTimediff() > getArbitTimeout()))
6237 return;
6238 #endif
6239 CRASH_INSERTION(932);
6240 CRASH_INSERTION(938);
6241 CRASH_INSERTION(943);
6242 progError(__LINE__, NDBD_EXIT_ARBIT_SHUTDOWN,
6243 "Arbitrator decided to shutdown this node");
6244 }
6245
6246 /**
6247 * Arbitrator may inform us that it will exit. This lets us
6248 * start looking sooner for a new one. Handle it like API node
6249 * failure.
6250 */
6251 void
execARBIT_STOPREP(Signal * signal)6252 Qmgr::execARBIT_STOPREP(Signal* signal)
6253 {
6254 jamEntry();
6255 ArbitSignalData* sd = (ArbitSignalData*)&signal->theData[0];
6256 if (! arbitRec.match(sd)) {
6257 jam();
6258 return; // stray signal
6259 }
6260 arbitRec.code = ArbitCode::ApiExit;
6261 handleArbitApiFail(signal, arbitRec.node);
6262 }
6263
6264 void
computeArbitNdbMask(NodeBitmaskPOD & aMask)6265 Qmgr::computeArbitNdbMask(NodeBitmaskPOD& aMask)
6266 {
6267 NodeRecPtr aPtr;
6268 aMask.clear();
6269 for (aPtr.i = 1; aPtr.i < MAX_NDB_NODES; aPtr.i++) {
6270 jam();
6271 ptrAss(aPtr, nodeRec);
6272 if (getNodeInfo(aPtr.i).getType() == NodeInfo::DB && aPtr.p->phase == ZRUNNING){
6273 jam();
6274 aMask.set(aPtr.i);
6275 }
6276 }
6277 }
6278
6279 void
computeArbitNdbMask(NdbNodeBitmaskPOD & aMask)6280 Qmgr::computeArbitNdbMask(NdbNodeBitmaskPOD& aMask)
6281 {
6282 NodeRecPtr aPtr;
6283 aMask.clear();
6284 for (aPtr.i = 1; aPtr.i < MAX_NDB_NODES; aPtr.i++) {
6285 jam();
6286 ptrAss(aPtr, nodeRec);
6287 if (getNodeInfo(aPtr.i).getType() == NodeInfo::DB && aPtr.p->phase == ZRUNNING){
6288 jam();
6289 aMask.set(aPtr.i);
6290 }
6291 }
6292 }
6293
6294 /**
6295 * Report arbitration event. We use arbitration signal format
6296 * where sender (word 0) is event type.
6297 */
6298 void
reportArbitEvent(Signal * signal,Ndb_logevent_type type,const NodeBitmask mask)6299 Qmgr::reportArbitEvent(Signal* signal, Ndb_logevent_type type,
6300 const NodeBitmask mask)
6301 {
6302 ArbitSignalData* sd = (ArbitSignalData*)&signal->theData[0];
6303 sd->sender = type;
6304 sd->code = arbitRec.code | (arbitRec.state << 16);
6305 sd->node = arbitRec.node;
6306 sd->ticket = arbitRec.ticket;
6307 sd->mask = mask;
6308
6309 // Log to console/stdout
6310 LogLevel ll;
6311 ll.setLogLevel(LogLevel::llNodeRestart, 15);
6312 g_eventLogger->log(type, &signal->theData[0],
6313 ArbitSignalData::SignalLength, 0, &ll);
6314
6315 sendSignal(CMVMI_REF, GSN_EVENT_REP, signal,
6316 ArbitSignalData::SignalLength, JBB);
6317 }
6318
6319 // end of arbitration module
6320
6321 void
execDUMP_STATE_ORD(Signal * signal)6322 Qmgr::execDUMP_STATE_ORD(Signal* signal)
6323 {
6324 if (signal->theData[0] == 1)
6325 {
6326 unsigned max_nodes = MAX_NDB_NODES;
6327 if (signal->getLength() == 2)
6328 {
6329 max_nodes = signal->theData[1];
6330 if (max_nodes == 0 || max_nodes >= MAX_NODES)
6331 {
6332 max_nodes = MAX_NODES;
6333 }
6334 else
6335 {
6336 max_nodes++; // Include node id argument in loop
6337 }
6338 }
6339 infoEvent("creadyDistCom = %d, cpresident = %d\n",
6340 creadyDistCom, cpresident);
6341 infoEvent("cpresidentAlive = %d, cpresidentCand = %d (gci: %d)\n",
6342 cpresidentAlive,
6343 c_start.m_president_candidate,
6344 c_start.m_president_candidate_gci);
6345 infoEvent("ctoStatus = %d\n", ctoStatus);
6346 for(Uint32 i = 1; i < max_nodes; i++){
6347 NodeRecPtr nodePtr;
6348 nodePtr.i = i;
6349 ptrCheckGuard(nodePtr, MAX_NODES, nodeRec);
6350 char buf[100];
6351 switch(nodePtr.p->phase){
6352 case ZINIT:
6353 sprintf(buf, "Node %d: ZINIT(%d)", i, nodePtr.p->phase);
6354 break;
6355 case ZSTARTING:
6356 sprintf(buf, "Node %d: ZSTARTING(%d)", i, nodePtr.p->phase);
6357 break;
6358 case ZRUNNING:
6359 sprintf(buf, "Node %d: ZRUNNING(%d)", i, nodePtr.p->phase);
6360 break;
6361 case ZPREPARE_FAIL:
6362 sprintf(buf, "Node %d: ZPREPARE_FAIL(%d)", i, nodePtr.p->phase);
6363 break;
6364 case ZFAIL_CLOSING:
6365 sprintf(buf, "Node %d: ZFAIL_CLOSING(%d)", i, nodePtr.p->phase);
6366 break;
6367 case ZAPI_INACTIVE:
6368 sprintf(buf, "Node %d: ZAPI_INACTIVE(%d)", i, nodePtr.p->phase);
6369 break;
6370 case ZAPI_ACTIVE:
6371 sprintf(buf, "Node %d: ZAPI_ACTIVE(%d)", i, nodePtr.p->phase);
6372 break;
6373 case ZAPI_ACTIVATION_ONGOING:
6374 sprintf(buf, "Node %d: ZAPI_ACTIVATION_ONGOING(%d)",
6375 i,
6376 nodePtr.p->phase);
6377 break;
6378 default:
6379 sprintf(buf, "Node %d: <UNKNOWN>(%d)", i, nodePtr.p->phase);
6380 break;
6381 }
6382 infoEvent("%s", buf);
6383 }
6384 }
6385
6386 #ifdef ERROR_INSERT
6387 if (signal->theData[0] == 935 && signal->getLength() == 2)
6388 {
6389 SET_ERROR_INSERT_VALUE(935);
6390 c_error_insert_extra = signal->theData[1];
6391 }
6392 #endif
6393
6394 if (signal->theData[0] == 900 && signal->getLength() == 2)
6395 {
6396 ndbout_c("disconnecting %u", signal->theData[1]);
6397 api_failed(signal, signal->theData[1]);
6398 }
6399
6400 if (signal->theData[0] == 908)
6401 {
6402 int tag = signal->getLength() < 2 ? -1 : signal->theData[1];
6403 char buf[8192];
6404 // for easy grepping in *out.log ...
6405 strcpy(buf, "HB:");
6406 if (tag >= 0)
6407 sprintf(buf+strlen(buf), "%d:", tag);
6408 sprintf(buf+strlen(buf), " pres:%u", cpresident);
6409 sprintf(buf+strlen(buf), " own:%u", getOwnNodeId());
6410 NodeRecPtr myNodePtr;
6411 myNodePtr.i = getOwnNodeId();
6412 ptrCheckGuard(myNodePtr, MAX_NDB_NODES, nodeRec);
6413 sprintf(buf+strlen(buf), " dyn:%u-%u", myNodePtr.p->ndynamicId & 0xFFFF, myNodePtr.p->ndynamicId >> 16);
6414 sprintf(buf+strlen(buf), " mxdyn:%u", c_maxDynamicId);
6415 sprintf(buf+strlen(buf), " hb:%u->%u->%u", cneighbourl, getOwnNodeId(), cneighbourh);
6416 sprintf(buf+strlen(buf), " node:dyn-hi,cfg:");
6417 NodeRecPtr nodePtr;
6418 for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++)
6419 {
6420 ptrAss(nodePtr, nodeRec);
6421 Uint32 type = getNodeInfo(nodePtr.i).m_type;
6422 if (type == NodeInfo::DB)
6423 {
6424 sprintf(buf+strlen(buf), " %u:%u-%u,%u", nodePtr.i, nodePtr.p->ndynamicId & 0xFFFF, nodePtr.p->ndynamicId >> 16, nodePtr.p->hbOrder);
6425 }
6426 }
6427 ndbout << buf << endl;
6428 }
6429
6430 #ifdef ERROR_INSERT
6431 Uint32 dumpCode = signal->theData[0];
6432 if ((dumpCode == 9992) ||
6433 (dumpCode == 9993))
6434 {
6435 if (signal->getLength() == 2)
6436 {
6437 Uint32 nodeId = signal->theData[1];
6438 Uint32& newNodeId = signal->theData[1];
6439 Uint32 length = 2;
6440 assert(257 > MAX_NODES);
6441 if (nodeId > MAX_NODES)
6442 {
6443 const char* type = "None";
6444 switch (nodeId)
6445 {
6446 case 257:
6447 {
6448 /* Left (lower) neighbour */
6449 newNodeId = cneighbourl;
6450 type = "Left neighbour";
6451 break;
6452 }
6453 case 258:
6454 {
6455 /* Right (higher) neighbour */
6456 newNodeId = cneighbourh;
6457 type = "Right neighbour";
6458 break;
6459 }
6460 case 259:
6461 {
6462 /* President */
6463 newNodeId = cpresident;
6464 type = "President";
6465 break;
6466 }
6467 }
6468 ndbout_c("QMGR : Mapping request on node id %u to node id %u (%s)",
6469 nodeId, newNodeId, type);
6470 if (newNodeId != nodeId)
6471 {
6472 sendSignal(CMVMI_REF, GSN_DUMP_STATE_ORD, signal, length, JBB);
6473 }
6474 }
6475 }
6476 }
6477
6478 if (dumpCode == 9994)
6479 {
6480 ndbout_c("setCCDelay(%u)", signal->theData[1]);
6481 setCCDelay(signal->theData[1]);
6482 m_connectivity_check.m_enabled = true;
6483 }
6484 #endif
6485
6486 if (signal->theData[0] == 939 && signal->getLength() == 2)
6487 {
6488 jam();
6489 Uint32 nodeId = signal->theData[1];
6490 ndbout_c("Force close communication to %u", nodeId);
6491 SET_ERROR_INSERT_VALUE2(939, nodeId);
6492 CloseComReqConf * closeCom = CAST_PTR(CloseComReqConf,
6493 signal->getDataPtrSend());
6494
6495 closeCom->xxxBlockRef = reference();
6496 closeCom->requestType = CloseComReqConf::RT_NO_REPLY;
6497 closeCom->failNo = 0;
6498 closeCom->noOfNodes = 1;
6499 NodeBitmask::clear(closeCom->theNodes);
6500 NodeBitmask::set(closeCom->theNodes, nodeId);
6501 sendSignal(TRPMAN_REF, GSN_CLOSE_COMREQ, signal,
6502 CloseComReqConf::SignalLength, JBB);
6503 }
6504 }//Qmgr::execDUMP_STATE_ORD()
6505
6506 void
execAPI_BROADCAST_REP(Signal * signal)6507 Qmgr::execAPI_BROADCAST_REP(Signal* signal)
6508 {
6509 jamEntry();
6510 ApiBroadcastRep api= *(const ApiBroadcastRep*)signal->getDataPtr();
6511
6512 SectionHandle handle(this, signal);
6513 Uint32 len = signal->getLength() - ApiBroadcastRep::SignalLength;
6514 memmove(signal->theData, signal->theData+ApiBroadcastRep::SignalLength,
6515 4*len);
6516
6517 NodeBitmask mask;
6518 NodeRecPtr nodePtr;
6519 for (nodePtr.i = 1; nodePtr.i < MAX_NODES; nodePtr.i++)
6520 {
6521 jam();
6522 ptrAss(nodePtr, nodeRec);
6523 if (nodePtr.p->phase == ZAPI_ACTIVE &&
6524 getNodeInfo(nodePtr.i).m_version >= api.minVersion)
6525 {
6526 jam();
6527 mask.set(nodePtr.i);
6528 }
6529 }
6530
6531 if (mask.isclear())
6532 {
6533 jam();
6534 releaseSections(handle);
6535 return;
6536 }
6537
6538 NodeReceiverGroup rg(API_CLUSTERMGR, mask);
6539 sendSignal(rg, api.gsn, signal, len, JBB,
6540 &handle);
6541 }
6542
6543 void
execNODE_FAILREP(Signal * signal)6544 Qmgr::execNODE_FAILREP(Signal * signal)
6545 {
6546 jamEntry();
6547 // make sure any distributed signals get acknowledged
6548 // destructive of the signal
6549 c_counterMgr.execNODE_FAILREP(signal);
6550 }
6551
6552 void
execALLOC_NODEID_REQ(Signal * signal)6553 Qmgr::execALLOC_NODEID_REQ(Signal * signal)
6554 {
6555 jamEntry();
6556 AllocNodeIdReq req = *(AllocNodeIdReq*)signal->getDataPtr();
6557 Uint32 error = 0;
6558
6559 NodeRecPtr nodePtr;
6560 nodePtr.i = req.nodeId;
6561 if ((nodePtr.i >= MAX_NODES) ||
6562 ((req.nodeType == NodeInfo::DB) &&
6563 (nodePtr.i >= MAX_NDB_NODES)))
6564 {
6565 /* Ignore messages about nodes not even within range */
6566 jam();
6567 return;
6568 }
6569 ptrAss(nodePtr, nodeRec);
6570
6571 if (refToBlock(req.senderRef) != QMGR) // request from management server
6572 {
6573 /* master */
6574
6575 if (getOwnNodeId() != cpresident)
6576 {
6577 jam();
6578 error = AllocNodeIdRef::NotMaster;
6579 }
6580 else if (!opAllocNodeIdReq.m_tracker.done())
6581 {
6582 jam();
6583 error = AllocNodeIdRef::Busy;
6584 }
6585 else if (c_connectedNodes.get(req.nodeId))
6586 {
6587 jam();
6588 error = AllocNodeIdRef::NodeConnected;
6589 }
6590 else if (nodePtr.p->m_secret != 0)
6591 {
6592 jam();
6593 error = AllocNodeIdRef::NodeReserved;
6594 }
6595 else if (req.nodeType != getNodeInfo(req.nodeId).m_type)
6596 {
6597 jam();
6598 error = AllocNodeIdRef::NodeTypeMismatch;
6599 }
6600 else if (req.nodeType == NodeInfo::API && c_allow_api_connect == 0)
6601 {
6602 jam();
6603 error = AllocNodeIdRef::NodeReserved;
6604 }
6605
6606 if (error)
6607 {
6608 jam();
6609 AllocNodeIdRef * ref = (AllocNodeIdRef*)signal->getDataPtrSend();
6610 ref->senderRef = reference();
6611 ref->errorCode = error;
6612 ref->masterRef = numberToRef(QMGR, cpresident);
6613 ref->senderData = req.senderData;
6614 ref->nodeId = req.nodeId;
6615 sendSignal(req.senderRef, GSN_ALLOC_NODEID_REF, signal,
6616 AllocNodeIdRef::SignalLength, JBB);
6617 return;
6618 }
6619
6620 if (ERROR_INSERTED(934) && req.nodeId != getOwnNodeId())
6621 {
6622 CRASH_INSERTION(934);
6623 }
6624
6625 /**
6626 * generate secret
6627 */
6628 const NDB_TICKS now = NdbTick_getCurrentTicks();
6629 const Uint32 secret_hi = Uint32(now.getUint64() >> 24);
6630 const Uint32 secret_lo = Uint32(now.getUint64() << 8) + getOwnNodeId();
6631 req.secret_hi = secret_hi;
6632 req.secret_lo = secret_lo;
6633
6634 if (req.timeout > 60000)
6635 req.timeout = 60000;
6636
6637 nodePtr.p->m_secret = (Uint64(secret_hi) << 32) + secret_lo;
6638 nodePtr.p->m_alloc_timeout = NdbTick_AddMilliseconds(now,req.timeout);
6639
6640 opAllocNodeIdReq.m_req = req;
6641 opAllocNodeIdReq.m_error = 0;
6642 opAllocNodeIdReq.m_connectCount =
6643 getNodeInfo(refToNode(req.senderRef)).m_connectCount;
6644
6645 jam();
6646 AllocNodeIdReq * req2 = (AllocNodeIdReq*)signal->getDataPtrSend();
6647 * req2 = req;
6648 req2->senderRef = reference();
6649 NodeReceiverGroup rg(QMGR, c_clusterNodes);
6650 RequestTracker & p = opAllocNodeIdReq.m_tracker;
6651 p.init<AllocNodeIdRef>(c_counterMgr, rg, GSN_ALLOC_NODEID_REF, 0);
6652
6653 sendSignal(rg, GSN_ALLOC_NODEID_REQ, signal,
6654 AllocNodeIdReq::SignalLengthQMGR, JBB);
6655 return;
6656 }
6657
6658 /* participant */
6659 if (c_connectedNodes.get(req.nodeId))
6660 {
6661 jam();
6662 error = AllocNodeIdRef::NodeConnected;
6663 }
6664 else if (req.nodeType != getNodeInfo(req.nodeId).m_type)
6665 {
6666 jam();
6667 error = AllocNodeIdRef::NodeTypeMismatch;
6668 }
6669 else if ((nodePtr.p->failState != NORMAL) ||
6670 ((req.nodeType == NodeInfo::DB) &&
6671 (cfailedNodes.get(nodePtr.i))))
6672 {
6673 /**
6674 * Either the node has committed its node failure in QMGR but not yet
6675 * completed the node internal node failure handling. Or the node
6676 * failure commit process is still ongoing in QMGR. We should not
6677 * allocate a node id in either case.
6678 */
6679 jam();
6680 error = AllocNodeIdRef::NodeFailureHandlingNotCompleted;
6681 }
6682 else if (req.nodeType == NodeInfo::API && nodePtr.p->phase != ZAPI_INACTIVE)
6683 {
6684 jam();
6685 if (cpresident != getOwnNodeId() && c_allow_api_connect == 0)
6686 {
6687 /**
6688 * Don't block during NR
6689 */
6690 jam();
6691 }
6692 else
6693 {
6694 jam();
6695 error = AllocNodeIdRef::NodeReserved;
6696 }
6697 }
6698 #if 0
6699 /**
6700 * For now only make "time/secret" based reservation on master
6701 * as we otherwise also need to clear it on failure + handle
6702 * master failure
6703 */
6704 else if (nodePtr.p->m_secret != 0)
6705 {
6706 jam();
6707 error = AllocNodeIdRef::NodeReserved;
6708 }
6709 #endif
6710
6711 if (error)
6712 {
6713 jam();
6714 AllocNodeIdRef * ref = (AllocNodeIdRef*)signal->getDataPtrSend();
6715 ref->senderRef = reference();
6716 ref->errorCode = error;
6717 ref->senderData = req.senderData;
6718 ref->nodeId = req.nodeId;
6719 ref->masterRef = numberToRef(QMGR, cpresident);
6720 sendSignal(req.senderRef, GSN_ALLOC_NODEID_REF, signal,
6721 AllocNodeIdRef::SignalLength, JBB);
6722 return;
6723 }
6724
6725 AllocNodeIdConf * conf = (AllocNodeIdConf*)signal->getDataPtrSend();
6726 conf->senderRef = reference();
6727 conf->secret_hi = req.secret_hi;
6728 conf->secret_lo = req.secret_lo;
6729 sendSignal(req.senderRef, GSN_ALLOC_NODEID_CONF, signal,
6730 AllocNodeIdConf::SignalLength, JBB);
6731 }
6732
6733 void
execALLOC_NODEID_CONF(Signal * signal)6734 Qmgr::execALLOC_NODEID_CONF(Signal * signal)
6735 {
6736 /* master */
6737
6738 jamEntry();
6739 const AllocNodeIdConf * conf = (AllocNodeIdConf*)signal->getDataPtr();
6740 opAllocNodeIdReq.m_tracker.reportConf(c_counterMgr,
6741 refToNode(conf->senderRef));
6742
6743 if (signal->getLength() >= AllocNodeIdConf::SignalLength)
6744 {
6745 jam();
6746 if (opAllocNodeIdReq.m_req.secret_hi != conf->secret_hi ||
6747 opAllocNodeIdReq.m_req.secret_lo != conf->secret_lo)
6748 {
6749 jam();
6750 if (opAllocNodeIdReq.m_error == 0)
6751 {
6752 jam();
6753 opAllocNodeIdReq.m_error = AllocNodeIdRef::Undefined;
6754 }
6755 }
6756 }
6757
6758 completeAllocNodeIdReq(signal);
6759 }
6760
6761
6762 void
execALLOC_NODEID_REF(Signal * signal)6763 Qmgr::execALLOC_NODEID_REF(Signal * signal)
6764 {
6765 /* master */
6766
6767 jamEntry();
6768 const AllocNodeIdRef * ref = (AllocNodeIdRef*)signal->getDataPtr();
6769
6770 if (ref->errorCode == AllocNodeIdRef::NF_FakeErrorREF)
6771 {
6772 jam();
6773 if (ref->nodeId == refToNode(ref->senderRef))
6774 {
6775 /**
6776 * The node id we are trying to allocate has responded with a REF,
6777 * this was sent in response to a node failure, so we are most
6778 * likely not ready to allocate this node id yet. Report node
6779 * failure handling not ready yet.
6780 */
6781 jam();
6782 opAllocNodeIdReq.m_tracker.reportRef(c_counterMgr,
6783 refToNode(ref->senderRef));
6784 if (opAllocNodeIdReq.m_error == 0)
6785 {
6786 jam();
6787 opAllocNodeIdReq.m_error =
6788 AllocNodeIdRef::NodeFailureHandlingNotCompleted;
6789 }
6790 }
6791 else
6792 {
6793 jam();
6794 opAllocNodeIdReq.m_tracker.ignoreRef(c_counterMgr,
6795 refToNode(ref->senderRef));
6796 }
6797 }
6798 else
6799 {
6800 jam();
6801 opAllocNodeIdReq.m_tracker.reportRef(c_counterMgr,
6802 refToNode(ref->senderRef));
6803 if (opAllocNodeIdReq.m_error == 0)
6804 {
6805 jam();
6806 opAllocNodeIdReq.m_error = ref->errorCode;
6807 }
6808 }
6809 completeAllocNodeIdReq(signal);
6810 }
6811
6812 void
completeAllocNodeIdReq(Signal * signal)6813 Qmgr::completeAllocNodeIdReq(Signal *signal)
6814 {
6815 /* master */
6816
6817 if (!opAllocNodeIdReq.m_tracker.done())
6818 {
6819 jam();
6820 return;
6821 }
6822
6823 if (opAllocNodeIdReq.m_connectCount !=
6824 getNodeInfo(refToNode(opAllocNodeIdReq.m_req.senderRef)).m_connectCount)
6825 {
6826 // management server not same version as the original requester
6827 jam();
6828 return;
6829 }
6830
6831 if (opAllocNodeIdReq.m_tracker.hasRef())
6832 {
6833 jam();
6834
6835 {
6836 /**
6837 * Clear reservation
6838 */
6839 NodeRecPtr nodePtr;
6840 nodePtr.i = opAllocNodeIdReq.m_req.nodeId;
6841 ptrAss(nodePtr, nodeRec);
6842 nodePtr.p->m_secret = 0;
6843 }
6844
6845 AllocNodeIdRef * ref = (AllocNodeIdRef*)signal->getDataPtrSend();
6846 ref->senderRef = reference();
6847 ref->senderData = opAllocNodeIdReq.m_req.senderData;
6848 ref->nodeId = opAllocNodeIdReq.m_req.nodeId;
6849 ref->errorCode = opAllocNodeIdReq.m_error;
6850 ref->masterRef = numberToRef(QMGR, cpresident);
6851 ndbassert(AllocNodeIdRef::SignalLength == 5);
6852 sendSignal(opAllocNodeIdReq.m_req.senderRef, GSN_ALLOC_NODEID_REF, signal,
6853 AllocNodeIdRef::SignalLength, JBB);
6854 return;
6855 }
6856
6857 jam();
6858
6859 AllocNodeIdConf * conf = (AllocNodeIdConf*)signal->getDataPtrSend();
6860 conf->senderRef = reference();
6861 conf->senderData = opAllocNodeIdReq.m_req.senderData;
6862 conf->nodeId = opAllocNodeIdReq.m_req.nodeId;
6863 conf->secret_lo = opAllocNodeIdReq.m_req.secret_lo;
6864 conf->secret_hi = opAllocNodeIdReq.m_req.secret_hi;
6865 sendSignal(opAllocNodeIdReq.m_req.senderRef, GSN_ALLOC_NODEID_CONF, signal,
6866 AllocNodeIdConf::SignalLength, JBB);
6867
6868 /**
6869 * We are the master and master DIH wants to keep track of node restart
6870 * state to be able to control LCP start and stop and also to be able
6871 * to easily report this state to the user when he asks for it.
6872 */
6873 AllocNodeIdRep *rep = (AllocNodeIdRep*)signal->getDataPtrSend();
6874 rep->nodeId = opAllocNodeIdReq.m_req.nodeId;
6875 EXECUTE_DIRECT(DBDIH, GSN_ALLOC_NODEID_REP, signal,
6876 AllocNodeIdRep::SignalLength);
6877 }
6878
6879 void
execSTOP_REQ(Signal * signal)6880 Qmgr::execSTOP_REQ(Signal* signal)
6881 {
6882 jamEntry();
6883 c_stopReq = * (StopReq*)signal->getDataPtr();
6884
6885 if (c_stopReq.senderRef)
6886 {
6887 jam();
6888 ndbrequire(NdbNodeBitmask::get(c_stopReq.nodes, getOwnNodeId()));
6889
6890 StopConf *conf = (StopConf*)signal->getDataPtrSend();
6891 conf->senderData = c_stopReq.senderData;
6892 conf->nodeState = getOwnNodeId();
6893 sendSignal(c_stopReq.senderRef,
6894 GSN_STOP_CONF, signal, StopConf::SignalLength, JBA);
6895 }
6896 }
6897
6898 bool
check_multi_node_shutdown(Signal * signal)6899 Qmgr::check_multi_node_shutdown(Signal* signal)
6900 {
6901 if (c_stopReq.senderRef &&
6902 NdbNodeBitmask::get(c_stopReq.nodes, getOwnNodeId()))
6903 {
6904 jam();
6905 if(StopReq::getPerformRestart(c_stopReq.requestInfo))
6906 {
6907 jam();
6908 StartOrd * startOrd = (StartOrd *)&signal->theData[0];
6909 startOrd->restartInfo = c_stopReq.requestInfo;
6910 sendSignal(CMVMI_REF, GSN_START_ORD, signal, 2, JBA);
6911 } else {
6912 sendSignal(CMVMI_REF, GSN_STOP_ORD, signal, 1, JBA);
6913 }
6914 return true;
6915 }
6916 return false;
6917 }
6918
6919 int
check_hb_order_config()6920 Qmgr::check_hb_order_config()
6921 {
6922 m_hb_order_config_used = false;
6923 Uint32 count = 0;
6924 Uint32 count_zero = 0;
6925 NodeRecPtr nodePtr;
6926 for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++)
6927 {
6928 ptrAss(nodePtr, nodeRec);
6929 const NodeInfo& nodeInfo = getNodeInfo(nodePtr.i);
6930 if (nodeInfo.m_type == NodeInfo::DB)
6931 {
6932 count++;
6933 if (nodePtr.p->hbOrder == 0)
6934 count_zero++;
6935 }
6936 }
6937 ndbrequire(count != 0); // must have node info
6938 if (count_zero == count)
6939 {
6940 jam();
6941 return 0; // no hbOrder defined
6942 }
6943 if (count_zero != 0)
6944 {
6945 jam();
6946 return -1; // error: not all zero or all nonzero
6947 }
6948 for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++)
6949 {
6950 ptrAss(nodePtr, nodeRec);
6951 const NodeInfo& nodeInfo = getNodeInfo(nodePtr.i);
6952 if (nodeInfo.m_type == NodeInfo::DB)
6953 {
6954 NodeRecPtr nodePtr2;
6955 for (nodePtr2.i = nodePtr.i + 1; nodePtr2.i < MAX_NDB_NODES; nodePtr2.i++)
6956 {
6957 ptrAss(nodePtr2, nodeRec);
6958 const NodeInfo& nodeInfo2 = getNodeInfo(nodePtr2.i);
6959 if (nodeInfo2.m_type == NodeInfo::DB)
6960 {
6961 if (nodePtr.i != nodePtr2.i &&
6962 nodePtr.p->hbOrder == nodePtr2.p->hbOrder)
6963 {
6964 jam();
6965 return -2; // error: duplicate nonzero value
6966 }
6967 }
6968 }
6969 }
6970 }
6971 m_hb_order_config_used = true;
6972 return 0;
6973 }
6974
6975 static const Uint32 CC_SuspectTicks = 1;
6976 static const Uint32 CC_FailedTicks = 2;
6977
6978 void
startConnectivityCheck(Signal * signal,Uint32 reason,Uint32 causingNode)6979 Qmgr::startConnectivityCheck(Signal* signal, Uint32 reason, Uint32 causingNode)
6980 {
6981 jam();
6982 ndbrequire(m_connectivity_check.getEnabled());
6983
6984 if (m_connectivity_check.m_active)
6985 {
6986 jam();
6987 /* Connectivity check underway already
6988 * do nothing
6989 */
6990 return;
6991 }
6992
6993
6994 m_connectivity_check.m_nodesPinged.clear();
6995
6996 /* Send NODE_PINGREQ signal to all other running nodes, and
6997 * initialise connectivity check bitmasks.
6998 * Note that nodes may already be considered suspect due to
6999 * a previous connectivity check round.
7000 */
7001 Uint32 ownId = getOwnNodeId();
7002 NodePingReq* pingReq = CAST_PTR(NodePingReq, &signal->theData[0]);
7003 pingReq->senderData = ++m_connectivity_check.m_currentRound;
7004 pingReq->senderRef = reference();
7005
7006 for (Uint32 i=1; i < MAX_NDB_NODES; i++)
7007 {
7008 if (i != ownId)
7009 {
7010 NodeRec& node = nodeRec[i];
7011 if (node.phase == ZRUNNING)
7012 {
7013 /* If connection was considered ok, treat as unknown,
7014 * If it was considered slow, continue to treat
7015 * as slow
7016 */
7017 sendSignal(node.blockRef,
7018 GSN_NODE_PING_REQ,
7019 signal,
7020 NodePingReq::SignalLength,
7021 JBA);
7022
7023 m_connectivity_check.m_nodesPinged.set(i);
7024 }
7025 }
7026 }
7027
7028 /* Initialise result bitmasks */
7029 m_connectivity_check.m_nodesWaiting.assign(m_connectivity_check.m_nodesPinged);
7030 m_connectivity_check.m_nodesFailedDuring.clear();
7031
7032 /* Ensure only live nodes are considered suspect */
7033 m_connectivity_check.m_nodesSuspect.bitAND(m_connectivity_check.m_nodesPinged);
7034
7035 const char* reasonText = "Unknown";
7036 bool firstTime = true;
7037
7038 switch(reason)
7039 {
7040 case FailRep::ZHEARTBEAT_FAILURE:
7041 reasonText = "Heartbeat failure";
7042 break;
7043 case FailRep::ZCONNECT_CHECK_FAILURE:
7044 reasonText = "Connectivity check request";
7045 break;
7046 default:
7047 firstTime = false;
7048 ndbrequire(m_connectivity_check.m_nodesSuspect.count() > 0);
7049 break;
7050 }
7051
7052 if (!m_connectivity_check.m_nodesPinged.isclear())
7053 {
7054 jam();
7055 {
7056 char buff[100];
7057 m_connectivity_check.m_nodesPinged.getText(buff);
7058 if (firstTime)
7059 {
7060 g_eventLogger->info("QMGR : Starting connectivity check of %u other nodes (%s) due to %s from node %u.",
7061 m_connectivity_check.m_nodesPinged.count(),
7062 buff,
7063 reasonText,
7064 causingNode);
7065 }
7066 else
7067 {
7068 char buff2[100];
7069 m_connectivity_check.m_nodesSuspect.getText(buff2);
7070 g_eventLogger->info("QMGR : Restarting connectivity check of %u other nodes (%s) due to %u syspect nodes (%s)",
7071 m_connectivity_check.m_nodesPinged.count(),
7072 buff,
7073 m_connectivity_check.m_nodesSuspect.count(),
7074 buff2);
7075 }
7076 }
7077
7078 /* Generate cluster log event */
7079 Uint32 bitmaskSz = NdbNodeBitmask::Size;
7080 signal->theData[0] = NDB_LE_ConnectCheckStarted;
7081 signal->theData[1] = m_connectivity_check.m_nodesPinged.count();
7082 signal->theData[2] = reason;
7083 signal->theData[3] = causingNode;
7084 signal->theData[4] = bitmaskSz;
7085 Uint32* sigPtr = &signal->theData[5];
7086 m_connectivity_check.m_nodesPinged.copyto(bitmaskSz, sigPtr); sigPtr+= bitmaskSz;
7087 m_connectivity_check.m_nodesSuspect.copyto(bitmaskSz, sigPtr);
7088 sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 5 + (2 * bitmaskSz), JBB);
7089
7090 m_connectivity_check.m_active = true;
7091 m_connectivity_check.m_tick = 0;
7092 const NDB_TICKS now = NdbTick_getCurrentTicks();
7093 m_connectivity_check.m_timer.reset(now);
7094 }
7095 else
7096 {
7097 g_eventLogger->info("QMGR : Connectivity check requested due to %s (from %u) not started as no other running nodes.",
7098 reasonText,
7099 causingNode);
7100 }
7101 }
7102
7103 void
execNODE_PINGREQ(Signal * signal)7104 Qmgr::execNODE_PINGREQ(Signal* signal)
7105 {
7106 jamEntry();
7107 Uint32 ownId = getOwnNodeId();
7108 const NodePingReq* pingReq = CAST_CONSTPTR(NodePingReq, &signal->theData[0]);
7109 Uint32 sendersRef = signal->getSendersBlockRef();
7110 Uint32 sendersNodeId = refToNode(sendersRef);
7111 Uint32 senderData = pingReq->senderData;
7112
7113 ndbrequire(sendersNodeId != ownId);
7114
7115 /* We will start our own connectivity check if necessary
7116 * before responding with PING_CONF to the requestor.
7117 * This means that the sending node will receive our PING_REQ
7118 * before our PING_CONF, which should avoid them starting an
7119 * unnecessary extra connectivity check round in some cases.
7120 */
7121 if (likely(m_connectivity_check.getEnabled()))
7122 {
7123 jam();
7124 /* We have connectivity checking configured */
7125 if (! m_connectivity_check.m_active)
7126 {
7127 jam();
7128
7129 {
7130 /* Don't start a new connectivity check if the requesting
7131 * node has failed from our point of view
7132 */
7133 NodeRecPtr nodePtr;
7134 nodePtr.i = sendersNodeId;
7135 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
7136 if (unlikely(nodePtr.p->phase != ZRUNNING))
7137 {
7138 jam();
7139
7140 g_eventLogger->warning("QMGR : Discarding NODE_PINGREQ from non-running node %u (%u)",
7141 sendersNodeId, nodePtr.p->phase);
7142 return;
7143 }
7144 }
7145
7146 /* Start our own Connectivity Check now indicating reason and causing node */
7147 startConnectivityCheck(signal, FailRep::ZCONNECT_CHECK_FAILURE, sendersNodeId);
7148 }
7149 }
7150 else
7151 {
7152 jam();
7153 g_eventLogger->warning("QMGR : NODE_PINGREQ received from node %u, but connectivity "
7154 "checking not configured on this node. Ensure all "
7155 "nodes have the same configuration for parameter "
7156 "ConnectCheckIntervalMillis.",
7157 sendersNodeId);
7158 }
7159
7160 /* Now respond with NODE_PINGCONF */
7161 NodePingConf* pingConf = CAST_PTR(NodePingConf, &signal->theData[0]);
7162
7163 pingConf->senderData = senderData;
7164 pingConf->senderRef = reference();
7165
7166 sendSignal(sendersRef,
7167 GSN_NODE_PING_CONF,
7168 signal,
7169 NodePingConf::SignalLength,
7170 JBA);
7171 }
7172
7173 void
reportNodeConnect(Uint32 nodeId)7174 Qmgr::ConnectCheckRec::reportNodeConnect(Uint32 nodeId)
7175 {
7176 /* Clear any suspicion */
7177 m_nodesSuspect.clear(nodeId);
7178 }
7179
7180 bool
reportNodeFailure(Uint32 nodeId)7181 Qmgr::ConnectCheckRec::reportNodeFailure(Uint32 nodeId)
7182 {
7183 if (unlikely(m_active))
7184 {
7185 m_nodesFailedDuring.set(nodeId);
7186
7187 if (m_nodesWaiting.get(nodeId))
7188 {
7189 /* We were waiting for a NODE_PING_CONF from this node,
7190 * remove it from the set
7191 */
7192 m_nodesWaiting.clear(nodeId);
7193
7194 return m_nodesWaiting.isclear();
7195 }
7196 }
7197 return false;
7198 }
7199
7200 void
execNODE_PINGCONF(Signal * signal)7201 Qmgr::execNODE_PINGCONF(Signal* signal)
7202 {
7203 jamEntry();
7204
7205 ndbrequire(m_connectivity_check.getEnabled());
7206
7207 const NodePingConf* pingConf = CAST_CONSTPTR(NodePingConf, &signal->theData[0]);
7208 Uint32 sendersBlockRef = signal->getSendersBlockRef();
7209 Uint32 sendersNodeId = refToNode(sendersBlockRef);
7210 Uint32 roundNumber = pingConf->senderData;
7211
7212 ndbrequire(sendersNodeId != getOwnNodeId());
7213 ndbrequire((m_connectivity_check.m_active) || /* Normal */
7214 (m_connectivity_check.m_nodesWaiting.get(sendersNodeId) || /* We killed last round */
7215 m_connectivity_check.m_nodesFailedDuring.get(sendersNodeId))); /* Someone killed */
7216
7217 if (unlikely((! m_connectivity_check.m_active) ||
7218 (roundNumber != m_connectivity_check.m_currentRound)))
7219 {
7220 g_eventLogger->warning("QMGR : Received NODEPING_CONF from node %u for round %u, "
7221 "but we are %sactive on round %u. Discarding.",
7222 sendersNodeId,
7223 roundNumber,
7224 ((m_connectivity_check.m_active)?"":"in"),
7225 m_connectivity_check.m_currentRound);
7226 return;
7227 }
7228
7229 if (ERROR_INSERTED(938))
7230 {
7231 ndbout_c("QMGR : execNODE_PING_CONF() from %u in tick %u",
7232 sendersNodeId, m_connectivity_check.m_tick);
7233 }
7234
7235 /* Node must have been pinged, we must be waiting for the response,
7236 * or the node must have already failed
7237 */
7238 ndbrequire(m_connectivity_check.m_nodesPinged.get(sendersNodeId));
7239 ndbrequire(m_connectivity_check.m_nodesWaiting.get(sendersNodeId) ||
7240 m_connectivity_check.m_nodesFailedDuring.get(sendersNodeId));
7241
7242 m_connectivity_check.m_nodesWaiting.clear(sendersNodeId);
7243
7244 if (likely(m_connectivity_check.m_tick < CC_SuspectTicks))
7245 {
7246 jam();
7247 /* Node responded on time, clear any suspicion about it */
7248 m_connectivity_check.m_nodesSuspect.clear(sendersNodeId);
7249 }
7250
7251 if (m_connectivity_check.m_nodesWaiting.isclear())
7252 {
7253 jam();
7254 /* Connectivity check round is now finished */
7255 connectivityCheckCompleted(signal);
7256 }
7257 }
7258
7259 void
connectivityCheckCompleted(Signal * signal)7260 Qmgr::connectivityCheckCompleted(Signal* signal)
7261 {
7262 jam();
7263
7264 m_connectivity_check.m_active = false;
7265
7266 /* Log the following :
7267 * Nodes checked
7268 * Nodes responded ok
7269 * Nodes responded late (now suspect)
7270 * Nodes failed to respond.
7271 * Nodes failed during
7272 */
7273 char pinged[100];
7274 char late[100];
7275 char silent[100];
7276 char failed[100];
7277
7278 /* Any 'waiting' nodes have been killed
7279 * Surviving suspects do not include them.
7280 */
7281 NdbNodeBitmask survivingSuspects(m_connectivity_check.m_nodesSuspect);
7282 survivingSuspects.bitANDC(m_connectivity_check.m_nodesWaiting);
7283
7284 /* Nodes that failed during the check are also excluded */
7285 survivingSuspects.bitANDC(m_connectivity_check.m_nodesFailedDuring);
7286
7287 m_connectivity_check.m_nodesPinged.getText(pinged);
7288 survivingSuspects.getText(late);
7289 m_connectivity_check.m_nodesWaiting.getText(silent);
7290 m_connectivity_check.m_nodesFailedDuring.getText(failed);
7291
7292 g_eventLogger->info("QMGR : Connectivity check completed, "
7293 "%u other nodes checked (%s), "
7294 "%u responded on time, "
7295 "%u responded late (%s), "
7296 "%u no response will be failed (%s), "
7297 "%u failed during check (%s)\n",
7298 m_connectivity_check.m_nodesPinged.count(),
7299 pinged,
7300 m_connectivity_check.m_nodesPinged.count() -
7301 m_connectivity_check.m_nodesSuspect.count(),
7302 survivingSuspects.count(),
7303 late,
7304 m_connectivity_check.m_nodesWaiting.count(),
7305 silent,
7306 m_connectivity_check.m_nodesFailedDuring.count(),
7307 failed);
7308
7309 /* Log in Cluster log */
7310 signal->theData[0] = NDB_LE_ConnectCheckCompleted;
7311 signal->theData[1] = m_connectivity_check.m_nodesPinged.count();
7312 signal->theData[2] = survivingSuspects.count();
7313 signal->theData[3] = m_connectivity_check.m_nodesWaiting.count() +
7314 m_connectivity_check.m_nodesFailedDuring.count();
7315
7316 sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 4, JBB);
7317
7318 if (survivingSuspects.count() > 0)
7319 {
7320 jam();
7321 /* Still suspect nodes, start another round */
7322 g_eventLogger->info("QMGR : Starting new connectivity check due to suspect nodes.");
7323 /* Restart connectivity check, no external reason or cause */
7324 startConnectivityCheck(signal, 0, 0);
7325 }
7326 else
7327 {
7328 jam();
7329 /* No suspect nodes, stop the protocol now */
7330
7331 g_eventLogger->info("QMGR : All other nodes (%u) connectivity ok.",
7332 m_connectivity_check.m_nodesPinged.count() -
7333 (m_connectivity_check.m_nodesWaiting.count() +
7334 m_connectivity_check.m_nodesFailedDuring.count()));
7335
7336 /* Send a heartbeat to our right neighbour at this point as a gesture
7337 * of goodwill
7338 */
7339 sendHeartbeat(signal);
7340 hb_send_timer.reset(NdbTick_getCurrentTicks());
7341 };
7342 }
7343
7344 void
checkConnectivityTimeSignal(Signal * signal)7345 Qmgr::checkConnectivityTimeSignal(Signal* signal)
7346 {
7347 /* Executed periodically when a connectivity check is
7348 * underway.
7349 * After CC_SuspectTicks have elapsed, any nodes
7350 * which have not responded are considered
7351 * 'Suspect'.
7352 * After CC_FailedTicks have elapsed, any nodes
7353 * which have not responded are considered
7354 * to have failed, and failure handling
7355 * begins.
7356 */
7357 jam();
7358
7359 /* Preconditions, otherwise we shouldn't have been called */
7360 ndbrequire(m_connectivity_check.getEnabled());
7361 ndbrequire(m_connectivity_check.m_active);
7362 ndbrequire(!m_connectivity_check.m_nodesWaiting.isclear());
7363
7364 m_connectivity_check.m_tick++;
7365
7366 switch (m_connectivity_check.m_tick)
7367 {
7368 case CC_SuspectTicks:
7369 {
7370 jam();
7371 /* Still waiting to hear from some nodes, they are now
7372 * suspect
7373 */
7374 m_connectivity_check.m_nodesSuspect.bitOR(m_connectivity_check.m_nodesWaiting);
7375 return;
7376 }
7377 case CC_FailedTicks:
7378 {
7379 jam();
7380 /* Still waiting to hear from some nodes, they will now
7381 * be failed
7382 */
7383 m_connectivity_check.m_active = false;
7384 Uint32 nodeId = 0;
7385
7386 while ((nodeId = m_connectivity_check.m_nodesWaiting.find(nodeId))
7387 != BitmaskImpl::NotFound)
7388 {
7389 jam();
7390 /* Log failure reason */
7391 /* Todo : Connectivity Check specific failure log? */
7392 signal->theData[0] = NDB_LE_DeadDueToHeartbeat;
7393 signal->theData[1] = nodeId;
7394
7395 sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB);
7396
7397 /* Fail the node */
7398 /* TODO : Consider real time break here */
7399 failReportLab(signal, nodeId, FailRep::ZCONNECT_CHECK_FAILURE, getOwnNodeId());
7400 nodeId++;
7401 }
7402
7403 /* Now handle the end of the Connectivity Check */
7404 connectivityCheckCompleted(signal);
7405 }
7406 }
7407 }
7408
7409 bool
isNodeConnectivitySuspect(Uint32 nodeId) const7410 Qmgr::isNodeConnectivitySuspect(Uint32 nodeId) const
7411 {
7412 return m_connectivity_check.m_nodesSuspect.get(nodeId);
7413 }
7414
7415 void
handleFailFromSuspect(Signal * signal,Uint32 reason,Uint16 aFailedNode,Uint16 sourceNode)7416 Qmgr::handleFailFromSuspect(Signal* signal,
7417 Uint32 reason,
7418 Uint16 aFailedNode,
7419 Uint16 sourceNode)
7420 {
7421 jam();
7422
7423 const char* reasonText = "Unknown";
7424
7425 /* We have received a failure report about some node X from
7426 * some other node that we consider to have suspect connectivity
7427 * which may have caused the report.
7428 *
7429 * We will 'invert' the sense of this, and handle it as
7430 * a failure report of the sender, with the same cause.
7431 */
7432 switch(reason)
7433 {
7434 case FailRep::ZCONNECT_CHECK_FAILURE:
7435 jam();
7436 /* Suspect says that connectivity check failed for another node.
7437 * As suspect has bad connectivity from our point of view, we
7438 * blame him.
7439 */
7440 reasonText = "ZCONNECT_CHECK_FAILURE";
7441 break;
7442 case FailRep::ZLINK_FAILURE:
7443 jam();
7444 /* Suspect says that link failed for another node.
7445 * As suspect has bad connectivity from our point of view, we
7446 * blame her.
7447 */
7448 reasonText = "ZLINK_FAILURE";
7449 break;
7450 default:
7451 ndbrequire(false);
7452 }
7453
7454 g_eventLogger->warning("QMGR : Received Connectivity failure notification about "
7455 "%u from suspect node %u with reason %s. "
7456 "Mapping to failure of %u sourced by me.",
7457 aFailedNode, sourceNode, reasonText, sourceNode);
7458
7459 signal->theData[0] = NDB_LE_NodeFailRejected;
7460 signal->theData[1] = reason;
7461 signal->theData[2] = aFailedNode;
7462 signal->theData[3] = sourceNode;
7463
7464 sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 4, JBB);
7465
7466 failReportLab(signal, sourceNode, (FailRep::FailCause) reason, getOwnNodeId());
7467 }
7468
7469 void
execDBINFO_SCANREQ(Signal * signal)7470 Qmgr::execDBINFO_SCANREQ(Signal *signal)
7471 {
7472 DbinfoScanReq req= *(DbinfoScanReq*)signal->theData;
7473 Ndbinfo::Ratelimit rl;
7474
7475 jamEntry();
7476 switch(req.tableId) {
7477 case Ndbinfo::MEMBERSHIP_TABLEID:
7478 {
7479 jam();
7480 Ndbinfo::Row row(signal, req);
7481 row.write_uint32(getOwnNodeId());
7482 row.write_uint32(getNodeState().nodeGroup);
7483 row.write_uint32(cneighbourl);
7484 row.write_uint32(cneighbourh);
7485 row.write_uint32(cpresident);
7486
7487 // President successor
7488 Uint32 successor = 0;
7489 {
7490 NodeRecPtr nodePtr;
7491 UintR minDynamicId = (UintR)-1;
7492 for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++)
7493 {
7494 jam();
7495 ptrAss(nodePtr, nodeRec);
7496 if (nodePtr.p->phase == ZRUNNING)
7497 {
7498 if ((nodePtr.p->ndynamicId & 0xFFFF) < minDynamicId)
7499 {
7500 jam();
7501 if (cpresident != nodePtr.i)
7502 {
7503 minDynamicId = (nodePtr.p->ndynamicId & 0xFFFF);
7504 successor = nodePtr.i;
7505 }
7506 }
7507 }
7508 }
7509 }
7510 row.write_uint32(successor);
7511
7512 NodeRecPtr myNodePtr;
7513 myNodePtr.i = getOwnNodeId();
7514 ptrCheckGuard(myNodePtr, MAX_NDB_NODES, nodeRec);
7515 row.write_uint32(myNodePtr.p->ndynamicId);
7516
7517 row.write_uint32(arbitRec.node); // arbitrator
7518
7519 char ticket[20]; // Need 16 characters + 1 for trailing '\0'
7520 arbitRec.ticket.getText(ticket, sizeof(ticket));
7521 row.write_string(ticket);
7522
7523 row.write_uint32(arbitRec.state);
7524
7525 // arbitrator connected
7526 row.write_uint32(c_connectedNodes.get(arbitRec.node));
7527
7528 // Find potential (rank1 and rank2) arbitrators that are connected.
7529 NodeRecPtr aPtr;
7530 // buf_size: Node nr (max 3 chars) and ', ' + trailing '\0'
7531 const int buf_size = 5 * MAX_NODES + 1;
7532 char buf[buf_size];
7533
7534 for (unsigned rank = 1; rank <= 2; rank++)
7535 {
7536 jam();
7537 aPtr.i = 0;
7538 const unsigned stop = NodeBitmask::NotFound;
7539 int buf_offset = 0;
7540 const char* delimiter = "";
7541
7542 while ((aPtr.i = arbitRec.apiMask[rank].find(aPtr.i + 1)) != stop)
7543 {
7544 jam();
7545 ptrAss(aPtr, nodeRec);
7546 if (c_connectedNodes.get(aPtr.i))
7547 {
7548 buf_offset += BaseString::snprintf(buf + buf_offset,
7549 buf_size - buf_offset,
7550 "%s%u", delimiter, aPtr.i);
7551 delimiter = ", ";
7552 }
7553 }
7554
7555 if (buf_offset == 0)
7556 row.write_string("-");
7557 else
7558 row.write_string(buf);
7559 }
7560
7561 ndbinfo_send_row(signal, req, row, rl);
7562 break;
7563 }
7564 default:
7565 break;
7566 }
7567 ndbinfo_send_scan_conf(signal, req, rl);
7568 }
7569
7570
7571 void
execISOLATE_ORD(Signal * signal)7572 Qmgr::execISOLATE_ORD(Signal* signal)
7573 {
7574 jamEntry();
7575
7576 IsolateOrd* sig = (IsolateOrd*) signal->theData;
7577
7578 ndbrequire(sig->senderRef != 0);
7579 NdbNodeBitmask victims;
7580 victims.assign(NdbNodeBitmask::Size, sig->nodesToIsolate);
7581 ndbrequire(!victims.isclear());
7582
7583 switch (sig->isolateStep)
7584 {
7585 case IsolateOrd::IS_REQ:
7586 {
7587 jam();
7588 /* Initial request, broadcast immediately */
7589
7590 /* Need to get the set of live nodes to broadcast to */
7591 NdbNodeBitmask hitmen(c_clusterNodes);
7592
7593 unsigned nodeId = hitmen.find_first();
7594 do
7595 {
7596 jam();
7597 if (!ndbd_isolate_ord(getNodeInfo(nodeId).m_version))
7598 {
7599 jam();
7600 /* Node not able to handle ISOLATE_ORD, skip */
7601 hitmen.clear(nodeId);
7602 }
7603
7604 nodeId = hitmen.find_next(nodeId + 1);
7605 } while (nodeId != BitmaskImpl::NotFound);
7606
7607 ndbrequire(!hitmen.isclear()); /* At least me */
7608
7609 NodeReceiverGroup rg(QMGR, hitmen);
7610
7611 sig->isolateStep = IsolateOrd::IS_BROADCAST;
7612 sendSignal(rg, GSN_ISOLATE_ORD, signal, IsolateOrd::SignalLength, JBA);
7613 return;
7614 }
7615 case IsolateOrd::IS_BROADCAST:
7616 {
7617 jam();
7618 /* Received reqest, delay */
7619 sig->isolateStep = IsolateOrd::IS_DELAY;
7620
7621 if (sig->delayMillis > 0)
7622 {
7623 /* Delay processing until delayMillis passes */
7624 jam();
7625 sendSignalWithDelay(reference(),
7626 GSN_ISOLATE_ORD,
7627 signal,
7628 sig->delayMillis,
7629 IsolateOrd::SignalLength);
7630 return;
7631 }
7632 /* Fall through... */
7633 }
7634 case IsolateOrd::IS_DELAY:
7635 {
7636 jam();
7637
7638 if (ERROR_INSERTED(942))
7639 {
7640 jam();
7641 g_eventLogger->info("QMGR discarding IsolateRequest");
7642 return;
7643 }
7644
7645 /* Map to FAIL_REP signal(s) */
7646 Uint32 failSource = refToNode(sig->senderRef);
7647
7648 unsigned nodeId = victims.find_first();
7649 do
7650 {
7651 jam();
7652
7653 /* TODO : Consider checking node state and skipping if
7654 * failing already
7655 * Consider logging that action is being taken here
7656 */
7657
7658 FailRep* failRep = (FailRep*)&signal->theData[0];
7659 failRep->failNodeId = nodeId;
7660 failRep->failCause = FailRep::ZFORCED_ISOLATION;
7661 failRep->failSourceNodeId = failSource;
7662
7663 sendSignal(reference(), GSN_FAIL_REP, signal, 3, JBA);
7664
7665 nodeId = victims.find_next(nodeId + 1);
7666 } while (nodeId != BitmaskImpl::NotFound);
7667
7668 /* Fail rep signals are en-route... */
7669
7670 return;
7671 }
7672 }
7673
7674 ndbrequire(false);
7675 }
7676