1 /*
2    Copyright (c) 2003, 2021, Oracle and/or its affiliates.
3 
4    This program is free software; you can redistribute it and/or modify
5    it under the terms of the GNU General Public License, version 2.0,
6    as published by the Free Software Foundation.
7 
8    This program is also distributed with certain software (including
9    but not limited to OpenSSL) that is licensed under separate terms,
10    as designated in a particular file or component or in included license
11    documentation.  The authors of MySQL hereby grant you an additional
12    permission to link the program and your derivative works with the
13    separately licensed software that they have included with MySQL.
14 
15    This program is distributed in the hope that it will be useful,
16    but WITHOUT ANY WARRANTY; without even the implied warranty of
17    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18    GNU General Public License, version 2.0, for more details.
19 
20    You should have received a copy of the GNU General Public License
21    along with this program; if not, write to the Free Software
22    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA
23 */
24 
25 
26 #define QMGR_C
27 #include "Qmgr.hpp"
28 #include <pc.hpp>
29 #include <NdbTick.h>
30 #include <signaldata/NodeRecoveryStatusRep.hpp>
31 #include <signaldata/EventReport.hpp>
32 #include <signaldata/StartOrd.hpp>
33 #include <signaldata/CloseComReqConf.hpp>
34 #include <signaldata/PrepFailReqRef.hpp>
35 #include <signaldata/NodeFailRep.hpp>
36 #include <signaldata/ReadNodesConf.hpp>
37 #include <signaldata/NFCompleteRep.hpp>
38 #include <signaldata/CheckNodeGroups.hpp>
39 #include <signaldata/ArbitSignalData.hpp>
40 #include <signaldata/ApiRegSignalData.hpp>
41 #include <signaldata/ApiVersion.hpp>
42 #include <signaldata/BlockCommitOrd.hpp>
43 #include <signaldata/FailRep.hpp>
44 #include <signaldata/DisconnectRep.hpp>
45 #include <signaldata/ApiBroadcast.hpp>
46 #include <signaldata/Upgrade.hpp>
47 #include <signaldata/EnableCom.hpp>
48 #include <signaldata/RouteOrd.hpp>
49 #include <signaldata/NodePing.hpp>
50 #include <signaldata/DihRestart.hpp>
51 #include <signaldata/DumpStateOrd.hpp>
52 #include <signaldata/IsolateOrd.hpp>
53 #include <ndb_version.h>
54 
55 #include <TransporterRegistry.hpp> // Get connect address
56 
57 #include <EventLogger.hpp>
58 extern EventLogger * g_eventLogger;
59 
60 //#define DEBUG_QMGR_START
61 #ifdef DEBUG_QMGR_START
62 #include <DebuggerNames.hpp>
63 #define DEBUG(x) ndbout << "QMGR " << __LINE__ << ": " << x << endl
64 #define DEBUG_START(gsn, node, msg) DEBUG(getSignalName(gsn) << " to: " << node << " - " << msg)
65 #define DEBUG_START2(gsn, rg, msg) { char nodes[255]; DEBUG(getSignalName(gsn) << " to: " << rg.m_nodes.getText(nodes) << " - " << msg); }
66 #define DEBUG_START3(signal, msg) DEBUG(getSignalName(signal->header.theVerId_signalNumber) << " from " << refToNode(signal->getSendersBlockRef()) << " - " << msg);
67 #else
68 #define DEBUG(x)
69 #define DEBUG_START(gsn, node, msg)
70 #define DEBUG_START2(gsn, rg, msg)
71 #define DEBUG_START3(signal, msg)
72 #endif
73 
74 #define JAM_FILE_ID 360
75 
76 
77 /**
78  * c_start.m_gsn = GSN_CM_REGREQ
79  *   Possible for all nodes
80  *   c_start.m_nodes contains all nodes in config
81  *
82  * c_start.m_gsn = GSN_CM_NODEINFOREQ;
83  *   Set when receiving CM_REGCONF
84  *   State possible for starting node only (not in cluster)
85  *
86  *   c_start.m_nodes contains all node in alive cluster that
87  *                   that has not replied to GSN_CM_NODEINFOREQ
88  *                   passed by president in GSN_CM_REGCONF
89  *
90  * c_start.m_gsn = GSN_CM_ADD
91  *   Possible for president only
92  *   Set when receiving and accepting CM_REGREQ (to include node)
93  *
94  *   c_start.m_nodes contains all nodes in alive cluster + starting node
95  *                   that has not replied to GSN_CM_ADD
96  *                   by sending GSN_CM_ACKADD
97  *
98  * c_start.m_gsn = GSN_CM_NODEINFOCONF
99  *   Possible for non presidents only
100  *     c_start.m_nodes contains a node that has been accepted by president
101  *     but has not connected to us yet
102  */
103 
104 // Signal entries and statement blocks
105 /* 4  P R O G R A M        */
106 /*******************************/
107 /* CMHEART_BEAT               */
108 /*******************************/
execCM_HEARTBEAT(Signal * signal)109 void Qmgr::execCM_HEARTBEAT(Signal* signal)
110 {
111   NodeRecPtr hbNodePtr;
112   jamEntry();
113   hbNodePtr.i = signal->theData[0];
114   ptrCheckGuard(hbNodePtr, MAX_NDB_NODES, nodeRec);
115   set_hb_count(hbNodePtr.i) = 0;
116   return;
117 }//Qmgr::execCM_HEARTBEAT()
118 
119 /*******************************/
120 /* CM_NODEINFOREF             */
121 /*******************************/
execCM_NODEINFOREF(Signal * signal)122 void Qmgr::execCM_NODEINFOREF(Signal* signal)
123 {
124   jamEntry();
125   systemErrorLab(signal, __LINE__);
126   return;
127 }//Qmgr::execCM_NODEINFOREF()
128 
129 /*******************************/
130 /* CONTINUEB                  */
131 /*******************************/
execCONTINUEB(Signal * signal)132 void Qmgr::execCONTINUEB(Signal* signal)
133 {
134   jamEntry();
135   const Uint32 tcontinuebType = signal->theData[0];
136   const Uint32 tdata0 = signal->theData[1];
137   const Uint32 tdata1 = signal->theData[2];
138   switch (tcontinuebType) {
139   case ZREGREQ_TIMELIMIT:
140     jam();
141     if (c_start.m_startKey != tdata0 || c_start.m_startNode != tdata1) {
142       jam();
143       return;
144     }//if
145     regreqTimeLimitLab(signal);
146     break;
147   case ZREGREQ_MASTER_TIMELIMIT:
148     jam();
149     if (c_start.m_startKey != tdata0 || c_start.m_startNode != tdata1) {
150       jam();
151       return;
152     }//if
153     //regreqMasterTimeLimitLab(signal);
154     failReportLab(signal, c_start.m_startNode, FailRep::ZSTART_IN_REGREQ, getOwnNodeId());
155     return;
156     break;
157   case ZTIMER_HANDLING:
158     jam();
159     timerHandlingLab(signal);
160     return;
161     break;
162   case ZARBIT_HANDLING:
163     jam();
164     runArbitThread(signal);
165     return;
166     break;
167   case ZSTART_FAILURE_LIMIT:{
168     if (cpresident != ZNIL)
169     {
170       jam();
171       return;
172     }
173     const NDB_TICKS now = NdbTick_getCurrentTicks();
174     const Uint64 elapsed = NdbTick_Elapsed(c_start_election_time,now).milliSec();
175     if (elapsed > c_restartFailureTimeout)
176     {
177       jam();
178       BaseString tmp;
179       tmp.append("Shutting down node as total restart time exceeds "
180 		 " StartFailureTimeout as set in config file ");
181       if(c_restartFailureTimeout == (Uint32) ~0)
182 	tmp.append(" 0 (inifinite)");
183       else
184 	tmp.appfmt(" %d", c_restartFailureTimeout);
185 
186       progError(__LINE__, NDBD_EXIT_SYSTEM_ERROR, tmp.c_str());
187     }
188     signal->theData[0] = ZSTART_FAILURE_LIMIT;
189     sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 3000, 1);
190     return;
191   }
192   default:
193     jam();
194     // ZCOULD_NOT_OCCUR_ERROR;
195     systemErrorLab(signal, __LINE__);
196     return;
197     break;
198   }//switch
199   return;
200 }//Qmgr::execCONTINUEB()
201 
202 
execDEBUG_SIG(Signal * signal)203 void Qmgr::execDEBUG_SIG(Signal* signal)
204 {
205   NodeRecPtr debugNodePtr;
206   jamEntry();
207   debugNodePtr.i = signal->theData[0];
208   ptrCheckGuard(debugNodePtr, MAX_NODES, nodeRec);
209   return;
210 }//Qmgr::execDEBUG_SIG()
211 
212 /*******************************/
213 /* FAIL_REP                   */
214 /*******************************/
execFAIL_REP(Signal * signal)215 void Qmgr::execFAIL_REP(Signal* signal)
216 {
217   const FailRep * const failRep = (FailRep *)&signal->theData[0];
218   const NodeId failNodeId = failRep->failNodeId;
219   const FailRep::FailCause failCause = (FailRep::FailCause)failRep->failCause;
220   Uint32 failSource = failRep->getFailSourceNodeId(signal->length());
221   if (!failSource)
222   {
223     /* Failure source not included, use sender of signal as 'source' */
224     failSource = refToNode(signal->getSendersBlockRef());
225   }
226 
227   jamEntry();
228   failReportLab(signal, failNodeId, failCause, failSource);
229   return;
230 }//Qmgr::execFAIL_REP()
231 
232 /*******************************/
233 /* PRES_TOREQ                 */
234 /*******************************/
execPRES_TOREQ(Signal * signal)235 void Qmgr::execPRES_TOREQ(Signal* signal)
236 {
237   jamEntry();
238   BlockReference Tblockref = signal->theData[0];
239   signal->theData[0] = getOwnNodeId();
240   signal->theData[1] = ccommitFailureNr;
241   sendSignal(Tblockref, GSN_PRES_TOCONF, signal, 2, JBA);
242   return;
243 }//Qmgr::execPRES_TOREQ()
244 
245 void
execREAD_CONFIG_REQ(Signal * signal)246 Qmgr::execREAD_CONFIG_REQ(Signal* signal)
247 {
248   jamEntry();
249 
250   const ReadConfigReq * req = (ReadConfigReq*)signal->getDataPtr();
251 
252   Uint32 ref = req->senderRef;
253   Uint32 senderData = req->senderData;
254 
255   const ndb_mgm_configuration_iterator * p =
256     m_ctx.m_config.getOwnConfigIterator();
257   ndbrequire(p != 0);
258 
259   ReadConfigConf * conf = (ReadConfigConf*)signal->getDataPtrSend();
260   conf->senderRef = reference();
261   conf->senderData = senderData;
262   sendSignal(ref, GSN_READ_CONFIG_CONF, signal,
263 	     ReadConfigConf::SignalLength, JBB);
264 }
265 
266 void
execSTART_ORD(Signal * signal)267 Qmgr::execSTART_ORD(Signal* signal)
268 {
269   /**
270    * Start timer handling
271    */
272   const NDB_TICKS now = NdbTick_getCurrentTicks();
273   signal->theData[0] = ZTIMER_HANDLING;
274   signal->theData[1] = Uint32(now.getUint64() >> 32);
275   signal->theData[2] = Uint32(now.getUint64());
276   sendSignal(QMGR_REF, GSN_CONTINUEB, signal, 3, JBB);
277 }
278 
279 /*
280 4.2  ADD NODE MODULE*/
281 /*##########################################################################*/
282 /*
283 4.2.1 STTOR     */
284 /**--------------------------------------------------------------------------
285  * Start phase signal, must be handled by all blocks.
286  * QMGR is only interested in the first phase.
287  * During phase one we clear all registered applications.
288  *---------------------------------------------------------------------------*/
289 /*******************************/
290 /* STTOR                      */
291 /*******************************/
execSTTOR(Signal * signal)292 void Qmgr::execSTTOR(Signal* signal)
293 {
294   jamEntry();
295 
296   switch(signal->theData[1]){
297   case 1:
298     initData(signal);
299     g_eventLogger->info("Starting QMGR phase 1");
300     startphase1(signal);
301     recompute_version_info(NodeInfo::DB);
302     recompute_version_info(NodeInfo::API);
303     recompute_version_info(NodeInfo::MGM);
304     return;
305   case 7:
306     if (cpresident == getOwnNodeId())
307     {
308       switch(arbitRec.method){
309       case ArbitRec::DISABLED:
310         break;
311 
312       case ArbitRec::METHOD_EXTERNAL:
313       case ArbitRec::METHOD_DEFAULT:
314         /**
315          * Start arbitration thread.  This could be done as soon as
316          * we have all nodes (or a winning majority).
317          */
318         jam();
319         handleArbitStart(signal);
320         break;
321       }
322     }
323     break;
324   case 9:{
325     /**
326      * Enable communication to all API nodes by setting state
327      *   to ZFAIL_CLOSING (which will make it auto-open in checkStartInterface)
328      */
329     c_allow_api_connect = 1;
330     NodeRecPtr nodePtr;
331     for (nodePtr.i = 1; nodePtr.i < MAX_NODES; nodePtr.i++)
332     {
333       jam();
334       Uint32 type = getNodeInfo(nodePtr.i).m_type;
335       if (type != NodeInfo::API)
336         continue;
337 
338       ptrAss(nodePtr, nodeRec);
339       if (nodePtr.p->phase == ZAPI_INACTIVE)
340       {
341         jam();
342         set_hb_count(nodePtr.i) = 3;
343         nodePtr.p->phase = ZFAIL_CLOSING;
344         nodePtr.p->failState = NORMAL;
345       }
346     }
347   }
348   }
349 
350   sendSttorryLab(signal, false);
351   return;
352 }//Qmgr::execSTTOR()
353 
sendSttorryLab(Signal * signal,bool first_phase)354 void Qmgr::sendSttorryLab(Signal* signal, bool first_phase)
355 {
356   if (first_phase)
357   {
358     g_eventLogger->info("Include node protocol completed, phase 1 in QMGR"
359                         " completed");
360   }
361 /****************************<*/
362 /*< STTORRY                  <*/
363 /****************************<*/
364   signal->theData[3] = 7;
365   signal->theData[4] = 9;
366   signal->theData[5] = 255;
367   sendSignal(NDBCNTR_REF, GSN_STTORRY, signal, 6, JBB);
368   return;
369 }//Qmgr::sendSttorryLab()
370 
startphase1(Signal * signal)371 void Qmgr::startphase1(Signal* signal)
372 {
373   jamEntry();
374 
375   NodeRecPtr nodePtr;
376   nodePtr.i = getOwnNodeId();
377   ptrAss(nodePtr, nodeRec);
378   nodePtr.p->phase = ZSTARTING;
379 
380   DihRestartReq * req = CAST_PTR(DihRestartReq, signal->getDataPtrSend());
381   req->senderRef = reference();
382   sendSignal(DBDIH_REF, GSN_DIH_RESTARTREQ, signal,
383              DihRestartReq::SignalLength, JBB);
384   return;
385 }
386 
387 void
execDIH_RESTARTREF(Signal * signal)388 Qmgr::execDIH_RESTARTREF(Signal*signal)
389 {
390   jamEntry();
391 
392   g_eventLogger->info("DIH reported initial start, now starting the"
393                       " Node Inclusion Protocol");
394   const DihRestartRef * ref = CAST_CONSTPTR(DihRestartRef,
395                                             signal->getDataPtr());
396   c_start.m_latest_gci = 0;
397   c_start.m_no_nodegroup_nodes.assign(NdbNodeBitmask::Size,
398                                       ref->no_nodegroup_mask);
399   execCM_INFOCONF(signal);
400 }
401 
402 void
execDIH_RESTARTCONF(Signal * signal)403 Qmgr::execDIH_RESTARTCONF(Signal*signal)
404 {
405   jamEntry();
406 
407   const DihRestartConf * conf = CAST_CONSTPTR(DihRestartConf,
408                                               signal->getDataPtr());
409 
410   g_eventLogger->info("DIH reported normal start, now starting the"
411                       " Node Inclusion Protocol");
412   c_start.m_latest_gci = conf->latest_gci;
413   c_start.m_no_nodegroup_nodes.assign(NdbNodeBitmask::Size,
414                                       conf->no_nodegroup_mask);
415   execCM_INFOCONF(signal);
416 }
417 
setHbDelay(UintR aHbDelay)418 void Qmgr::setHbDelay(UintR aHbDelay)
419 {
420   const NDB_TICKS now = NdbTick_getCurrentTicks();
421   hb_send_timer.setDelay(aHbDelay < 10 ? 10 : aHbDelay);
422   hb_send_timer.reset(now);
423   hb_check_timer.setDelay(aHbDelay < 10 ? 10 : aHbDelay);
424   hb_check_timer.reset(now);
425 }
426 
setHbApiDelay(UintR aHbApiDelay)427 void Qmgr::setHbApiDelay(UintR aHbApiDelay)
428 {
429   const NDB_TICKS now = NdbTick_getCurrentTicks();
430   chbApiDelay = (aHbApiDelay < 100 ? 100 : aHbApiDelay);
431   hb_api_timer.setDelay(chbApiDelay);
432   hb_api_timer.reset(now);
433 }
434 
setArbitTimeout(UintR aArbitTimeout)435 void Qmgr::setArbitTimeout(UintR aArbitTimeout)
436 {
437   arbitRec.timeout = (aArbitTimeout < 10 ? 10 : aArbitTimeout);
438 }
439 
setCCDelay(UintR aCCDelay)440 void Qmgr::setCCDelay(UintR aCCDelay)
441 {
442   const NDB_TICKS now = NdbTick_getCurrentTicks();
443   if (aCCDelay == 0)
444   {
445     /* Connectivity check disabled */
446     m_connectivity_check.m_enabled = false;
447     m_connectivity_check.m_timer.setDelay(0);
448   }
449   else
450   {
451     m_connectivity_check.m_enabled = true;
452     m_connectivity_check.m_timer.setDelay(aCCDelay < 10 ? 10 : aCCDelay);
453     m_connectivity_check.m_timer.reset(now);
454   }
455 }
456 
execCONNECT_REP(Signal * signal)457 void Qmgr::execCONNECT_REP(Signal* signal)
458 {
459   jamEntry();
460   const Uint32 connectedNodeId = signal->theData[0];
461 
462   if (ERROR_INSERTED(931))
463   {
464     jam();
465     ndbout_c("Discarding CONNECT_REP(%d)", connectedNodeId);
466     infoEvent("Discarding CONNECT_REP(%d)", connectedNodeId);
467     return;
468   }
469 
470   if (ERROR_INSERTED(941) &&
471       getNodeInfo(connectedNodeId).getType() == NodeInfo::API)
472   {
473     jam();
474     CLEAR_ERROR_INSERT_VALUE;
475     ndbout_c("Discarding one API CONNECT_REP(%d)", connectedNodeId);
476     infoEvent("Discarding one API CONNECT_REP(%d)", connectedNodeId);
477     return;
478   }
479 
480   if (c_connectedNodes.get(connectedNodeId) == false)
481   {
482     jam();
483     setNodeInfo(connectedNodeId).m_version = 0;
484     setNodeInfo(connectedNodeId).m_mysql_version = 0;
485   }
486 
487   c_connectedNodes.set(connectedNodeId);
488 
489   {
490     NodeRecPtr connectedNodePtr;
491     connectedNodePtr.i = connectedNodeId;
492     ptrCheckGuard(connectedNodePtr, MAX_NODES, nodeRec);
493     connectedNodePtr.p->m_secret = 0;
494   }
495 
496   NodeRecPtr myNodePtr;
497   myNodePtr.i = getOwnNodeId();
498   ptrCheckGuard(myNodePtr, MAX_NODES, nodeRec);
499   NodeInfo connectedNodeInfo = getNodeInfo(connectedNodeId);
500   switch(myNodePtr.p->phase){
501   case ZRUNNING:
502     if (connectedNodeInfo.getType() == NodeInfo::DB)
503     {
504       ndbrequire(!c_clusterNodes.get(connectedNodeId));
505     }
506   case ZSTARTING:
507     jam();
508     break;
509   case ZPREPARE_FAIL:
510   case ZFAIL_CLOSING:
511     jam();
512     return;
513   case ZAPI_ACTIVATION_ONGOING:
514     ndbrequire(false);
515   case ZAPI_ACTIVE:
516     ndbrequire(false);
517   case ZAPI_INACTIVE:
518     ndbrequire(false);
519   case ZINIT:
520     ndbrequire(getNodeInfo(connectedNodeId).m_type == NodeInfo::MGM);
521     break;
522   default:
523     ndbrequire(false);
524   }
525 
526   if (connectedNodeInfo.getType() != NodeInfo::DB)
527   {
528     jam();
529     return;
530   }
531 
532   switch(c_start.m_gsn){
533   case GSN_CM_REGREQ:
534     jam();
535     sendCmRegReq(signal, connectedNodeId);
536 
537     /**
538      * We're waiting for CM_REGCONF c_start.m_nodes contains all configured
539      *   nodes
540      */
541     ndbrequire(myNodePtr.p->phase == ZSTARTING);
542     ndbrequire(c_start.m_nodes.isWaitingFor(connectedNodeId));
543     return;
544   case GSN_CM_NODEINFOREQ:
545     jam();
546 
547     if (c_start.m_nodes.isWaitingFor(connectedNodeId))
548     {
549       jam();
550       ndbrequire(getOwnNodeId() != cpresident);
551       ndbrequire(myNodePtr.p->phase == ZSTARTING);
552       sendCmNodeInfoReq(signal, connectedNodeId, myNodePtr.p);
553       return;
554     }
555     return;
556   case GSN_CM_NODEINFOCONF:{
557     jam();
558 
559     ndbrequire(getOwnNodeId() != cpresident);
560     ndbrequire(myNodePtr.p->phase == ZRUNNING);
561     if (c_start.m_nodes.isWaitingFor(connectedNodeId))
562     {
563       jam();
564       c_start.m_nodes.clearWaitingFor(connectedNodeId);
565       c_start.m_gsn = RNIL;
566 
567       NodeRecPtr addNodePtr;
568       addNodePtr.i = connectedNodeId;
569       ptrCheckGuard(addNodePtr, MAX_NDB_NODES, nodeRec);
570       cmAddPrepare(signal, addNodePtr, myNodePtr.p);
571       return;
572     }
573   }
574   default:
575     (void)1;
576   }
577 
578   ndbrequire(!c_start.m_nodes.isWaitingFor(connectedNodeId));
579   ndbrequire(!c_readnodes_nodes.get(connectedNodeId));
580   c_readnodes_nodes.set(connectedNodeId);
581   signal->theData[0] = reference();
582   sendSignal(calcQmgrBlockRef(connectedNodeId), GSN_READ_NODESREQ, signal, 1, JBA);
583   return;
584 }//Qmgr::execCONNECT_REP()
585 
586 void
execREAD_NODESCONF(Signal * signal)587 Qmgr::execREAD_NODESCONF(Signal* signal)
588 {
589   jamEntry();
590   check_readnodes_reply(signal,
591 			refToNode(signal->getSendersBlockRef()),
592 			GSN_READ_NODESCONF);
593 }
594 
595 void
execREAD_NODESREF(Signal * signal)596 Qmgr::execREAD_NODESREF(Signal* signal)
597 {
598   jamEntry();
599   check_readnodes_reply(signal,
600 			refToNode(signal->getSendersBlockRef()),
601 			GSN_READ_NODESREF);
602 }
603 
604 /**
605  * Heartbeat Inclusion Protocol Handling
606  * -------------------------------------
607  * The protocol to include our node in the heartbeat protocol starts when
608  * we call execCM_INFOCONF. We start by opening communication to all nodes
609  * in the cluster. When we start this protocol we don't know anything about
610  * which nodes are up and running and we don't which node is currently the
611  * president of the heartbeat protocol.
612  *
613  * For us to be successful with being included in the heartbeat protocol we
614  * need to be connected to all nodes currently in the heartbeat protocol. It
615  * is important to remember that QMGR sees a node as alive if it is included
616  * in the heartbeat protocol. Higher level notions of aliveness is handled
617  * primarily by the DBDIH block, but also to some extent by NDBCNTR.
618  *
619  * The protocol starts by the new node sending CM_REGREQ to all nodes it is
620  * connected to. Only the president will respond to this message. We could
621  * have a situation where there currently isn't a president choosen. In this
622  * case an election is held whereby a new president is assigned. In the rest
623  * of this comment we assume that a president already exists.
624  *
625  * So if we were connected to the president we will get a response to the
626  * CM_REGREQ from the president with CM_REGCONF. The CM_REGCONF contains
627  * the set of nodes currently included in the heartbeat protocol.
628  *
629  * The president will send in parallel to sending CM_REGCONF a CM_ADD(prepare)
630  * message to all nodes included in the protocol.
631  *
632  * When receiving CM_REGCONF the new node will send CM_NODEINFOREQ with
633  * information about version of the binary, number of LDM workers and
634  * MySQL version of binary.
635  *
636  * The nodes already included in the heartbeat protocol will wait until it
637  * receives both the CM_ADD(prepare) from the president and the
638  * CM_NODEINFOREQ from the starting node. When it receives those two
639  * messages it will send CM_ACKADD(prepare) to the president and
640  * CM_NODEINFOCONF to the starting node with its own node information.
641  *
642  * When the president received CM_ACKADD(prepare) from all nodes included
643  * in the heartbeat protocol then it sends CM_ADD(AddCommit) to all nodes
644  * included in the heartbeat protocol.
645  *
646  * When the nodes receives CM_ADD(AddCommit) from the president then
647  * they will enable communication to the new node and immediately start
648  * sending heartbeats to the new node. They will also include the new
649  * node in their view of the nodes included in the heartbeat protocol.
650  * Next they will send CM_ACKADD(AddCommit) back to the president.
651  *
652  * When the president has received CM_ACKADD(AddCommit) from all nodes
653  * included in the heartbeat protocol then it sends CM_ADD(CommitNew)
654  * to the starting node.
655  *
656  * This is also the point where we report the node as included in the
657  * heartbeat protocol to DBDIH as from here the rest of the protocol is
658  * only about informing the new node about the outcome of inclusion
659  * protocol. When we receive the response to this message the new node
660  * can already have proceeded a bit into its restart.
661  *
662  * The starting node after receiving CM_REGCONF waits for all nodes
663  * included in the heartbeat protocol to send CM_NODEINFOCONF and
664  * also for receiving the CM_ADD(CommitNew) from the president. When
665  * all this have been received the new nodes adds itself and all nodes
666  * it have been informed about into its view of the nodes included in
667  * the heartbeat protocol and enables communication to all other
668  * nodes included therein. Finally it sends CM_ACKADD(CommitNew) to
669  * the president.
670  *
671  * When the president has received CM_ACKADD(CommitNew) from the starting
672  * node the inclusion protocol is completed and the president is ready
673  * to receive a new node into the cluster.
674  *
675  * It is the responsibility of the starting nodes to retry after a failed
676  * node inclusion, they will do so with 3 seconds delay. This means that
677  * at most one node per 3 seconds will normally be added to the cluster.
678  * So this phase of adding nodes to the cluster can add up to a little bit
679  * more than a minute of delay in a large cluster starting up.
680  *
681  * We try to depict the above in a graph here as well:
682  *
683  * New node           Nodes included in the heartbeat protocol        President
684  * ----------------------------------------------------------------------------
685  * ----CM_REGREQ--------------------->>
686  * ----CM_REGREQ---------------------------------------------------------->
687  *
688  * <----------------CM_REGCONF---------------------------------------------
689  *                                   <<------CM_ADD(Prepare)---------------
690  *
691  * -----CM_NODEINFOREQ--------------->>
692  *
693  * Nodes included in heartbeat protocol can receive CM_ADD(Prepare) and
694  * CM_NODEINFOREQ in any order.
695  *
696  * <<---CM_NODEINFOCONF-------------- --------CM_ACKADD(Prepare)--------->>
697  *
698  *                                   <<-------CM_ADD(AddCommit)------------
699  *
700  * Here nodes enables communication to new node and starts sending heartbeats
701  *
702  *                                   ---------CM_ACKADD(AddCommit)------->>
703  *
704  * Here we report to DBDIH about new node included in heartbeat protocol
705  * in master node.
706  *
707  * <----CM_ADD(CommitNew)--------------------------------------------------
708  *
709  * Here new node enables communication to new nodes and starts sending
710  * heartbeat messages.
711  *
712  * -----CM_ACKADD(CommitNew)---------------------------------------------->
713  *
714  * Here the president can complete the inclusion protocol and is ready to
715  * receive new nodes into the heartbeat protocol.
716  */
717 /*******************************/
718 /* CM_INFOCONF                */
719 /*******************************/
execCM_INFOCONF(Signal * signal)720 void Qmgr::execCM_INFOCONF(Signal* signal)
721 {
722   /**
723    * Open communcation to all DB nodes
724    */
725   signal->theData[0] = 0; // no answer
726   signal->theData[1] = 0; // no id
727   signal->theData[2] = NodeInfo::DB;
728   sendSignal(TRPMAN_REF, GSN_OPEN_COMORD, signal, 3, JBB);
729 
730   cpresident = ZNIL;
731   cpresidentAlive = ZFALSE;
732   c_start_election_time = NdbTick_getCurrentTicks();
733 
734   signal->theData[0] = ZSTART_FAILURE_LIMIT;
735   sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 3000, 1);
736 
737   cmInfoconf010Lab(signal);
738 
739   return;
740 }//Qmgr::execCM_INFOCONF()
741 
742 Uint32 g_start_type = 0;
743 NdbNodeBitmask g_nowait_nodes; // Set by clo
744 
cmInfoconf010Lab(Signal * signal)745 void Qmgr::cmInfoconf010Lab(Signal* signal)
746 {
747   c_start.m_startKey = 0;
748   c_start.m_startNode = getOwnNodeId();
749   c_start.m_nodes.clearWaitingFor();
750   c_start.m_gsn = GSN_CM_REGREQ;
751   c_start.m_starting_nodes.clear();
752   c_start.m_starting_nodes_w_log.clear();
753   c_start.m_regReqReqSent = 0;
754   c_start.m_regReqReqRecv = 0;
755   c_start.m_skip_nodes = g_nowait_nodes;
756   c_start.m_skip_nodes.bitAND(c_definedNodes);
757   c_start.m_start_type = g_start_type;
758 
759   NodeRecPtr nodePtr;
760   cnoOfNodes = 0;
761   for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
762     jam();
763     ptrAss(nodePtr, nodeRec);
764 
765     if(getNodeInfo(nodePtr.i).getType() != NodeInfo::DB)
766       continue;
767 
768     c_start.m_nodes.setWaitingFor(nodePtr.i);
769     cnoOfNodes++;
770 
771     if(!c_connectedNodes.get(nodePtr.i))
772       continue;
773 
774     sendCmRegReq(signal, nodePtr.i);
775   }
776 
777   //----------------------------------------
778   /* Wait for a while. When it returns    */
779   /* we will check if we got any CM_REGREF*/
780   /* or CM_REGREQ (lower nodeid than our  */
781   /* own).                                */
782   //----------------------------------------
783   signal->theData[0] = ZREGREQ_TIMELIMIT;
784   signal->theData[1] = c_start.m_startKey;
785   signal->theData[2] = c_start.m_startNode;
786   sendSignalWithDelay(QMGR_REF, GSN_CONTINUEB, signal, 3000, 3);
787 
788   creadyDistCom = ZTRUE;
789   return;
790 }//Qmgr::cmInfoconf010Lab()
791 
792 void
sendCmRegReq(Signal * signal,Uint32 nodeId)793 Qmgr::sendCmRegReq(Signal * signal, Uint32 nodeId){
794   CmRegReq * req = (CmRegReq *)&signal->theData[0];
795   req->blockRef = reference();
796   req->nodeId = getOwnNodeId();
797   req->version = NDB_VERSION;
798   req->mysql_version = NDB_MYSQL_VERSION_D;
799   req->latest_gci = c_start.m_latest_gci;
800   req->start_type = c_start.m_start_type;
801   c_start.m_skip_nodes.copyto(NdbNodeBitmask::Size, req->skip_nodes);
802   const Uint32 ref = calcQmgrBlockRef(nodeId);
803   sendSignal(ref, GSN_CM_REGREQ, signal, CmRegReq::SignalLength, JBB);
804   DEBUG_START(GSN_CM_REGREQ, nodeId, "");
805 
806   c_start.m_regReqReqSent++;
807 }
808 
809 /*
810 4.4.11 CM_REGREQ */
811 /**--------------------------------------------------------------------------
812  * If this signal is received someone tries to get registrated.
813  * Only the president have the authority make decisions about new nodes,
814  * so only a president or a node that claims to be the president may send a
815  * reply to this signal.
816  * This signal can occur any time after that STTOR was received.
817  * CPRESIDENT:             Timelimit has expired and someone has
818  *                         decided to enter the president role
819  * CPRESIDENT_CANDIDATE:
820  *     Assigned when we receive a CM_REGREF, if we got more than one REF
821  *     then we always keep the lowest nodenumber.
822  *     We accept this nodeno as president when our timelimit expires
823  * We should consider the following cases:
824  * 1- We are the president. If we are busy by adding new nodes to cluster,
825  *    then we have to refuse this node to be added.
826  *    The refused node will try in ZREFUSE_ADD_TIME seconds again.
827  *    If we are not busy then we confirm
828  *
829  * 2- We know the president, we dont bother us about this REQ.
830  *    The president has also got this REQ and will take care of it.
831  *
832  * 3- The president are not known. We have received CM_INIT, so we compare the
833  *    senders node number to GETOWNNODEID().
834  *    If we have a lower number than the sender then we will claim
835  *    that we are the president so we send him a refuse signal back.
836  *    We have to wait for the CONTINUEB signal before we can enter the
837  *    president role. If our GETOWNNODEID() if larger than sender node number,
838  *    we are not the president and just have to wait for the
839  *    reply signal (REF)  to our CM_REGREQ_2.
840  * 4- We havent received the CM_INIT signal so we don't know who we are.
841  *    Ignore the request.
842  *--------------------------------------------------------------------------*/
843 /*******************************/
844 /* CM_REGREQ                  */
845 /*******************************/
846 static
847 int
check_start_type(Uint32 starting,Uint32 own)848 check_start_type(Uint32 starting, Uint32 own)
849 {
850   if (starting == (1 << NodeState::ST_INITIAL_START) &&
851       ((own & (1 << NodeState::ST_INITIAL_START)) == 0))
852   {
853     return 1;
854   }
855   return 0;
856 }
857 
execCM_REGREQ(Signal * signal)858 void Qmgr::execCM_REGREQ(Signal* signal)
859 {
860   DEBUG_START3(signal, "");
861 
862   NodeRecPtr addNodePtr;
863   jamEntry();
864 
865   CmRegReq * const cmRegReq = (CmRegReq *)&signal->theData[0];
866   const BlockReference Tblockref = cmRegReq->blockRef;
867   const Uint32 startingVersion = cmRegReq->version;
868   Uint32 startingMysqlVersion = cmRegReq->mysql_version;
869   addNodePtr.i = cmRegReq->nodeId;
870   Uint32 gci = 1;
871   Uint32 start_type = ~0;
872   NdbNodeBitmask skip_nodes;
873 
874   if (!c_connectedNodes.get(cmRegReq->nodeId))
875   {
876     jam();
877 
878     /**
879      * With ndbmtd, there is a race condition such that
880      *   CM_REGREQ can arrive prior to CONNECT_REP
881      *   since CONNECT_REP is sent from CMVMI
882      *
883      * In such cases, ignore the CM_REGREQ which is safe
884      *   as it will anyway be resent by starting node
885      */
886     g_eventLogger->info("discarding CM_REGREQ from %u "
887                         "as we're not yet connected (isNdbMt: %u)",
888                         cmRegReq->nodeId,
889                         (unsigned)isNdbMt());
890 
891     return;
892   }
893 
894   if (signal->getLength() == CmRegReq::SignalLength)
895   {
896     jam();
897     gci = cmRegReq->latest_gci;
898     start_type = cmRegReq->start_type;
899     skip_nodes.assign(NdbNodeBitmask::Size, cmRegReq->skip_nodes);
900   }
901 
902   if (startingVersion < NDBD_SPLIT_VERSION)
903   {
904     startingMysqlVersion = 0;
905   }
906 
907   if (creadyDistCom == ZFALSE) {
908     jam();
909     /* NOT READY FOR DISTRIBUTED COMMUNICATION.*/
910     return;
911   }//if
912 
913   if (!ndbCompatible_ndb_ndb(NDB_VERSION, startingVersion)) {
914     jam();
915     sendCmRegrefLab(signal, Tblockref, CmRegRef::ZINCOMPATIBLE_VERSION);
916     return;
917   }
918 
919   if (!ndb_check_micro_gcp(startingVersion))
920   {
921     jam();
922     infoEvent("Connection from node %u refused as it's not micro GCP enabled",
923               addNodePtr.i);
924     sendCmRegrefLab(signal, Tblockref, CmRegRef::ZINCOMPATIBLE_VERSION);
925     return;
926   }
927 
928   if (!ndb_pnr(startingVersion))
929   {
930     jam();
931     infoEvent("Connection from node %u refused as it's not does not support "
932               "parallel node recovery",
933               addNodePtr.i);
934     sendCmRegrefLab(signal, Tblockref, CmRegRef::ZINCOMPATIBLE_VERSION);
935     return;
936   }
937 
938   if (!ndb_check_hb_order_version(startingVersion) &&
939       m_hb_order_config_used)
940   {
941     jam();
942     infoEvent("Connection from node %u refused as it does not support "
943               "user-defined HeartbeatOrder",
944               addNodePtr.i);
945     sendCmRegrefLab(signal, Tblockref, CmRegRef::ZINCOMPATIBLE_VERSION);
946     return;
947   }
948 
949   if (m_connectivity_check.m_enabled &&
950       !ndbd_connectivity_check(startingVersion))
951   {
952     jam();
953     infoEvent("Connection from node %u refused as it does not support "
954               "ConnectCheckIntervalDelay",
955               addNodePtr.i);
956     sendCmRegrefLab(signal, Tblockref, CmRegRef::ZINCOMPATIBLE_VERSION);
957     return;
958   }
959 
960   if (check_start_type(start_type, c_start.m_start_type))
961   {
962     jam();
963     sendCmRegrefLab(signal, Tblockref, CmRegRef::ZINCOMPATIBLE_START_TYPE);
964     return;
965   }
966 
967   if (cpresident != getOwnNodeId())
968   {
969     jam();
970 
971     if (cpresident == ZNIL)
972     {
973       /***
974        * We don't know the president.
975        * If the node to be added has lower node id
976        * than our president cancidate. Set it as
977        * candidate
978        */
979       jam();
980       if (gci > c_start.m_president_candidate_gci ||
981 	  (gci == c_start.m_president_candidate_gci &&
982 	   addNodePtr.i < c_start.m_president_candidate))
983       {
984 	jam();
985 	c_start.m_president_candidate = addNodePtr.i;
986 	c_start.m_president_candidate_gci = gci;
987       }
988       sendCmRegrefLab(signal, Tblockref, CmRegRef::ZELECTION);
989       return;
990     }
991 
992     /**
993      * We are not the president.
994      * We know the president.
995      * President will answer.
996      */
997     sendCmRegrefLab(signal, Tblockref, CmRegRef::ZNOT_PRESIDENT);
998     return;
999   }//if
1000 
1001   if (c_start.m_startNode != 0)
1002   {
1003     jam();
1004     /**
1005      * President busy by adding another node
1006     */
1007     sendCmRegrefLab(signal, Tblockref, CmRegRef::ZBUSY_PRESIDENT);
1008     return;
1009   }//if
1010 
1011   if (ctoStatus == Q_ACTIVE)
1012   {
1013     jam();
1014     /**
1015      * Active taking over as president
1016      */
1017     sendCmRegrefLab(signal, Tblockref, CmRegRef::ZBUSY_TO_PRES);
1018     return;
1019   }//if
1020 
1021   if (getNodeInfo(addNodePtr.i).m_type != NodeInfo::DB)
1022   {
1023     jam();
1024     /**
1025      * The new node is not in config file
1026      */
1027     sendCmRegrefLab(signal, Tblockref, CmRegRef::ZNOT_IN_CFG);
1028     return;
1029   }
1030 
1031   if (getNodeState().getSingleUserMode())
1032   {
1033     /**
1034      * The cluster is in single user mode.
1035      * Data node is not allowed to get added in the cluster
1036      * while in single user mode.
1037      */
1038     // handle rolling upgrade
1039     {
1040       unsigned int get_major = getMajor(startingVersion);
1041       unsigned int get_minor = getMinor(startingVersion);
1042       unsigned int get_build = getBuild(startingVersion);
1043 
1044       if (startingVersion < NDBD_QMGR_SINGLEUSER_VERSION_5) {
1045         jam();
1046 
1047         infoEvent("QMGR: detect upgrade: new node %u old version %u.%u.%u",
1048           (unsigned int)addNodePtr.i, get_major, get_minor, get_build);
1049         /**
1050          * The new node is old version, send ZINCOMPATIBLE_VERSION instead
1051          * of ZSINGLE_USER_MODE.
1052          */
1053         sendCmRegrefLab(signal, Tblockref, CmRegRef::ZINCOMPATIBLE_VERSION);
1054       } else {
1055         jam();
1056 
1057         sendCmRegrefLab(signal, Tblockref, CmRegRef::ZSINGLE_USER_MODE);
1058       }//if
1059     }
1060 
1061     return;
1062   }//if
1063 
1064   ptrCheckGuard(addNodePtr, MAX_NDB_NODES, nodeRec);
1065   Phase phase = addNodePtr.p->phase;
1066   if (phase != ZINIT)
1067   {
1068     jam();
1069     DEBUG("phase = " << phase);
1070     sendCmRegrefLab(signal, Tblockref, CmRegRef::ZNOT_DEAD);
1071     return;
1072   }
1073 
1074   jam();
1075   /**
1076    * WE ARE PRESIDENT AND WE ARE NOT BUSY ADDING ANOTHER NODE.
1077    * WE WILL TAKE CARE OF THE INCLUSION OF THIS NODE INTO THE CLUSTER.
1078    * WE NEED TO START TIME SUPERVISION OF THIS. SINCE WE CANNOT STOP
1079    * TIMED SIGNAL IF THE INCLUSION IS INTERRUPTED WE IDENTIFY
1080    * EACH INCLUSION WITH A UNIQUE IDENTITY. THIS IS CHECKED WHEN
1081    * THE SIGNAL ARRIVES. IF IT HAS CHANGED THEN WE SIMPLY IGNORE
1082    * THE TIMED SIGNAL.
1083    */
1084 
1085   /**
1086    * Update start record
1087    */
1088   c_start.m_startKey++;
1089   c_start.m_startNode = addNodePtr.i;
1090 
1091   /**
1092    * Assign dynamic id
1093    */
1094   UintR TdynId = (++c_maxDynamicId) & 0xFFFF;
1095   TdynId |= (addNodePtr.p->hbOrder << 16);
1096   setNodeInfo(addNodePtr.i).m_version = startingVersion;
1097   setNodeInfo(addNodePtr.i).m_mysql_version = startingMysqlVersion;
1098   recompute_version_info(NodeInfo::DB, startingVersion);
1099   addNodePtr.p->ndynamicId = TdynId;
1100 
1101   /**
1102    * Reply with CM_REGCONF
1103    */
1104   CmRegConf * const cmRegConf = (CmRegConf *)&signal->theData[0];
1105   cmRegConf->presidentBlockRef = reference();
1106   cmRegConf->presidentNodeId   = getOwnNodeId();
1107   cmRegConf->presidentVersion  = getNodeInfo(getOwnNodeId()).m_version;
1108   cmRegConf->presidentMysqlVersion = getNodeInfo(getOwnNodeId()).m_mysql_version;
1109   cmRegConf->dynamicId         = TdynId;
1110   c_clusterNodes.copyto(NdbNodeBitmask::Size, cmRegConf->allNdbNodes);
1111   sendSignal(Tblockref, GSN_CM_REGCONF, signal,
1112 	     CmRegConf::SignalLength, JBA);
1113   DEBUG_START(GSN_CM_REGCONF, refToNode(Tblockref), "");
1114 
1115   /**
1116    * Send CmAdd to all nodes (including starting)
1117    */
1118   c_start.m_nodes = c_clusterNodes;
1119   c_start.m_nodes.setWaitingFor(addNodePtr.i);
1120   c_start.m_gsn = GSN_CM_ADD;
1121 
1122   NodeReceiverGroup rg(QMGR, c_start.m_nodes);
1123   CmAdd * const cmAdd = (CmAdd*)signal->getDataPtrSend();
1124   cmAdd->requestType = CmAdd::Prepare;
1125   cmAdd->startingNodeId = addNodePtr.i;
1126   cmAdd->startingVersion = startingVersion;
1127   cmAdd->startingMysqlVersion = startingMysqlVersion;
1128   sendSignal(rg, GSN_CM_ADD, signal, CmAdd::SignalLength, JBA);
1129   DEBUG_START2(GSN_CM_ADD, rg, "Prepare");
1130 
1131   /**
1132    * Set timer
1133    */
1134   return;
1135   signal->theData[0] = ZREGREQ_MASTER_TIMELIMIT;
1136   signal->theData[1] = c_start.m_startKey;
1137   sendSignalWithDelay(QMGR_REF, GSN_CONTINUEB, signal, 30000, 2);
1138 
1139   return;
1140 }//Qmgr::execCM_REGREQ()
1141 
sendCmRegrefLab(Signal * signal,BlockReference TBRef,CmRegRef::ErrorCode Terror)1142 void Qmgr::sendCmRegrefLab(Signal* signal, BlockReference TBRef,
1143 			   CmRegRef::ErrorCode Terror)
1144 {
1145   CmRegRef* ref = (CmRegRef*)signal->getDataPtrSend();
1146   ref->blockRef = reference();
1147   ref->nodeId = getOwnNodeId();
1148   ref->errorCode = Terror;
1149   ref->presidentCandidate =
1150     (cpresident == ZNIL ? c_start.m_president_candidate : cpresident);
1151   ref->candidate_latest_gci = c_start.m_president_candidate_gci;
1152   ref->latest_gci = c_start.m_latest_gci;
1153   ref->start_type = c_start.m_start_type;
1154   c_start.m_skip_nodes.copyto(NdbNodeBitmask::Size, ref->skip_nodes);
1155   sendSignal(TBRef, GSN_CM_REGREF, signal,
1156 	     CmRegRef::SignalLength, JBB);
1157   DEBUG_START(GSN_CM_REGREF, refToNode(TBRef), "");
1158   return;
1159 }//Qmgr::sendCmRegrefLab()
1160 
1161 /*
1162 4.4.11 CM_REGCONF */
1163 /**--------------------------------------------------------------------------
1164  * President gives permission to a node which wants to join the cluster.
1165  * The president will prepare the cluster that a new node will be added to
1166  * cluster. When the new node has set up all connections to the cluster,
1167  * the president will send commit to all clusternodes so the phase of the
1168  * new node can be changed to ZRUNNING.
1169  *--------------------------------------------------------------------------*/
1170 /*******************************/
1171 /* CM_REGCONF                 */
1172 /*******************************/
execCM_REGCONF(Signal * signal)1173 void Qmgr::execCM_REGCONF(Signal* signal)
1174 {
1175   DEBUG_START3(signal, "");
1176 
1177   NodeRecPtr myNodePtr;
1178   NodeRecPtr nodePtr;
1179   jamEntry();
1180 
1181   const CmRegConf * const cmRegConf = (CmRegConf *)&signal->theData[0];
1182 
1183   if (!ndbCompatible_ndb_ndb(NDB_VERSION, cmRegConf->presidentVersion)) {
1184     jam();
1185     char buf[128];
1186     BaseString::snprintf(buf,sizeof(buf),
1187 			 "incompatible version own=0x%x other=0x%x, "
1188 			 " shutting down",
1189 			 NDB_VERSION, cmRegConf->presidentVersion);
1190     progError(__LINE__, NDBD_EXIT_UNSUPPORTED_VERSION, buf);
1191     return;
1192   }
1193 
1194   if (!ndb_check_hb_order_version(cmRegConf->presidentVersion) &&
1195       m_hb_order_config_used) {
1196     jam();
1197     char buf[128];
1198     BaseString::snprintf(buf,sizeof(buf),
1199 			 "incompatible version own=0x%x other=0x%x, "
1200 			 "due to user-defined HeartbeatOrder, shutting down",
1201 			 NDB_VERSION, cmRegConf->presidentVersion);
1202     progError(__LINE__, NDBD_EXIT_UNSUPPORTED_VERSION, buf);
1203     return;
1204   }
1205 
1206   if (m_connectivity_check.m_enabled &&
1207       !ndbd_connectivity_check(cmRegConf->presidentVersion))
1208   {
1209     jam();
1210     m_connectivity_check.m_enabled = false;
1211     ndbout_c("Disabling ConnectCheckIntervalDelay as president "
1212              " does not support it");
1213     infoEvent("Disabling ConnectCheckIntervalDelay as president "
1214               " does not support it");
1215   }
1216 
1217   myNodePtr.i = getOwnNodeId();
1218   ptrCheckGuard(myNodePtr, MAX_NDB_NODES, nodeRec);
1219 
1220   ndbrequire(c_start.m_gsn == GSN_CM_REGREQ);
1221   ndbrequire(myNodePtr.p->phase == ZSTARTING);
1222 
1223   cpdistref    = cmRegConf->presidentBlockRef;
1224   cpresident   = cmRegConf->presidentNodeId;
1225   UintR TdynamicId   = cmRegConf->dynamicId;
1226   c_maxDynamicId = TdynamicId & 0xFFFF;
1227   c_clusterNodes.assign(NdbNodeBitmask::Size, cmRegConf->allNdbNodes);
1228 
1229   myNodePtr.p->ndynamicId = TdynamicId;
1230 
1231   // set own MT config here or in REF, and others in CM_NODEINFOREQ/CONF
1232   setNodeInfo(getOwnNodeId()).m_lqh_workers = globalData.ndbMtLqhWorkers;
1233 
1234 /*--------------------------------------------------------------*/
1235 // Send this as an EVENT REPORT to inform about hearing about
1236 // other NDB node proclaiming to be president.
1237 /*--------------------------------------------------------------*/
1238   signal->theData[0] = NDB_LE_CM_REGCONF;
1239   signal->theData[1] = getOwnNodeId();
1240   signal->theData[2] = cpresident;
1241   signal->theData[3] = TdynamicId;
1242   sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 4, JBB);
1243 
1244   for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
1245     if (c_clusterNodes.get(nodePtr.i)){
1246       jamLine(nodePtr.i);
1247       ptrAss(nodePtr, nodeRec);
1248 
1249       ndbrequire(nodePtr.p->phase == ZINIT);
1250       nodePtr.p->phase = ZRUNNING;
1251 
1252       if(c_connectedNodes.get(nodePtr.i)){
1253 	jam();
1254 	sendCmNodeInfoReq(signal, nodePtr.i, myNodePtr.p);
1255       }
1256     }
1257   }
1258 
1259   c_start.m_gsn = GSN_CM_NODEINFOREQ;
1260   c_start.m_nodes = c_clusterNodes;
1261 
1262   if (ERROR_INSERTED(937))
1263   {
1264     CLEAR_ERROR_INSERT_VALUE;
1265     signal->theData[0] = 9999;
1266     sendSignalWithDelay(CMVMI_REF, GSN_NDB_TAMPER, signal, 500, 1);
1267   }
1268 
1269   return;
1270 }//Qmgr::execCM_REGCONF()
1271 
1272 void
check_readnodes_reply(Signal * signal,Uint32 nodeId,Uint32 gsn)1273 Qmgr::check_readnodes_reply(Signal* signal, Uint32 nodeId, Uint32 gsn)
1274 {
1275   NodeRecPtr myNodePtr;
1276   myNodePtr.i = getOwnNodeId();
1277   ptrCheckGuard(myNodePtr, MAX_NDB_NODES, nodeRec);
1278 
1279   NodeRecPtr nodePtr;
1280   nodePtr.i = nodeId;
1281   ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
1282 
1283   ndbrequire(c_readnodes_nodes.get(nodeId));
1284   ReadNodesConf* conf = (ReadNodesConf*)signal->getDataPtr();
1285   if (gsn == GSN_READ_NODESREF)
1286   {
1287     jam();
1288 retry:
1289     signal->theData[0] = reference();
1290     sendSignal(calcQmgrBlockRef(nodeId), GSN_READ_NODESREQ, signal, 1, JBA);
1291     return;
1292   }
1293 
1294   if (conf->masterNodeId == ZNIL)
1295   {
1296     jam();
1297     goto retry;
1298   }
1299 
1300   Uint32 president = conf->masterNodeId;
1301   if (president == cpresident)
1302   {
1303     jam();
1304     c_readnodes_nodes.clear(nodeId);
1305     return;
1306   }
1307 
1308   char buf[255];
1309   BaseString::snprintf(buf, sizeof(buf),
1310 		       "check StartPartialTimeout, "
1311 		       "node %d thinks %d is president, "
1312 		       "I think president is: %d",
1313 		       nodeId, president, cpresident);
1314 
1315   ndbout_c("%s", buf);
1316   CRASH_INSERTION(933);
1317 
1318   if (getNodeState().startLevel == NodeState::SL_STARTED)
1319   {
1320     jam();
1321     NdbNodeBitmask part;
1322     part.assign(NdbNodeBitmask::Size, conf->clusterNodes);
1323     FailRep* rep = (FailRep*)signal->getDataPtrSend();
1324     rep->failCause = FailRep::ZPARTITIONED_CLUSTER;
1325     rep->partitioned.president = cpresident;
1326     c_clusterNodes.copyto(NdbNodeBitmask::Size, rep->partitioned.partition);
1327     rep->partitioned.partitionFailSourceNodeId = getOwnNodeId();
1328     Uint32 ref = calcQmgrBlockRef(nodeId);
1329     Uint32 i = 0;
1330     /* Send source of event info if a node supports it */
1331     Uint32 length = FailRep::OrigSignalLength + FailRep::PartitionedExtraLength;
1332     while((i = part.find(i + 1)) != NdbNodeBitmask::NotFound)
1333     {
1334       if (i == nodeId)
1335 	continue;
1336       rep->failNodeId = i;
1337       bool sendSourceId = ndbd_fail_rep_source_node((getNodeInfo(i)).m_version);
1338       sendSignal(ref, GSN_FAIL_REP, signal,
1339                  length + (sendSourceId ? FailRep::SourceExtraLength : 0),
1340                  JBA);
1341     }
1342     rep->failNodeId = nodeId;
1343     bool sendSourceId = ndbd_fail_rep_source_node((getNodeInfo(nodeId)).m_version);
1344 
1345     sendSignal(ref, GSN_FAIL_REP, signal,
1346                length + (sendSourceId ? FailRep::SourceExtraLength : 0),
1347                JBB);
1348     return;
1349   }
1350 
1351   CRASH_INSERTION(932);
1352   CRASH_INSERTION(938);
1353 
1354   progError(__LINE__,
1355 	    NDBD_EXIT_PARTITIONED_SHUTDOWN,
1356 	    buf);
1357 
1358   ndbrequire(false);
1359 }
1360 
1361 void
sendCmNodeInfoReq(Signal * signal,Uint32 nodeId,const NodeRec * self)1362 Qmgr::sendCmNodeInfoReq(Signal* signal, Uint32 nodeId, const NodeRec * self){
1363   CmNodeInfoReq * const req = (CmNodeInfoReq*)signal->getDataPtrSend();
1364   req->nodeId = getOwnNodeId();
1365   req->dynamicId = self->ndynamicId;
1366   req->version = getNodeInfo(getOwnNodeId()).m_version;
1367   req->mysql_version = getNodeInfo(getOwnNodeId()).m_mysql_version;
1368   req->lqh_workers = getNodeInfo(getOwnNodeId()).m_lqh_workers;
1369   const Uint32 ref = calcQmgrBlockRef(nodeId);
1370   sendSignal(ref,GSN_CM_NODEINFOREQ, signal, CmNodeInfoReq::SignalLength, JBB);
1371   DEBUG_START(GSN_CM_NODEINFOREQ, nodeId, "");
1372 }
1373 
1374 /*
1375 4.4.11 CM_REGREF */
1376 /**--------------------------------------------------------------------------
1377  * Only a president or a president candidate can refuse a node to get added to
1378  * the cluster.
1379  * Refuse reasons:
1380  * ZBUSY         We know that the sender is the president and we have to
1381  *               make a new CM_REGREQ.
1382  * ZNOT_IN_CFG   This node number is not specified in the configfile,
1383  *               SYSTEM ERROR
1384  * ZELECTION     Sender is a president candidate, his timelimit
1385  *               hasn't expired so maybe someone else will show up.
1386  *               Update the CPRESIDENT_CANDIDATE, then wait for our
1387  *               timelimit to expire.
1388  *---------------------------------------------------------------------------*/
1389 /*******************************/
1390 /* CM_REGREF                  */
1391 /*******************************/
1392 static
1393 const char *
get_start_type_string(Uint32 st)1394 get_start_type_string(Uint32 st)
1395 {
1396   static char buf[256];
1397 
1398   if (st == 0)
1399   {
1400     return "<ANY>";
1401   }
1402   else
1403   {
1404     buf[0] = 0;
1405     for(Uint32 i = 0; i<NodeState::ST_ILLEGAL_TYPE; i++)
1406     {
1407       if (st & (1 << i))
1408       {
1409 	if (buf[0])
1410 	  strcat(buf, "/");
1411 	switch(i){
1412 	case NodeState::ST_INITIAL_START:
1413 	  strcat(buf, "inital start");
1414 	  break;
1415 	case NodeState::ST_SYSTEM_RESTART:
1416 	  strcat(buf, "system restart");
1417 	  break;
1418 	case NodeState::ST_NODE_RESTART:
1419 	  strcat(buf, "node restart");
1420 	  break;
1421 	case NodeState::ST_INITIAL_NODE_RESTART:
1422 	  strcat(buf, "initial node restart");
1423 	  break;
1424 	}
1425       }
1426     }
1427     return buf;
1428   }
1429 }
1430 
execCM_REGREF(Signal * signal)1431 void Qmgr::execCM_REGREF(Signal* signal)
1432 {
1433   jamEntry();
1434 
1435   CmRegRef* ref = (CmRegRef*)signal->getDataPtr();
1436   UintR TaddNodeno = ref->nodeId;
1437   UintR TrefuseReason = ref->errorCode;
1438   Uint32 candidate = ref->presidentCandidate;
1439   Uint32 node_gci = 1;
1440   Uint32 candidate_gci = 1;
1441   Uint32 start_type = ~0;
1442   NdbNodeBitmask skip_nodes;
1443   DEBUG_START3(signal, TrefuseReason);
1444 
1445   if (signal->getLength() == CmRegRef::SignalLength)
1446   {
1447     jam();
1448     node_gci = ref->latest_gci;
1449     candidate_gci = ref->candidate_latest_gci;
1450     start_type = ref->start_type;
1451     skip_nodes.assign(NdbNodeBitmask::Size, ref->skip_nodes);
1452   }
1453 
1454   c_start.m_regReqReqRecv++;
1455 
1456   // Ignore block reference in data[0]
1457 
1458   if(candidate != c_start.m_president_candidate)
1459   {
1460     jam();
1461     c_start.m_regReqReqRecv = ~0;
1462   }
1463 
1464   c_start.m_starting_nodes.set(TaddNodeno);
1465   if (node_gci)
1466   {
1467     jam();
1468     c_start.m_starting_nodes_w_log.set(TaddNodeno);
1469   }
1470   c_start.m_node_gci[TaddNodeno] = node_gci;
1471 
1472   skip_nodes.bitAND(c_definedNodes);
1473   c_start.m_skip_nodes.bitOR(skip_nodes);
1474 
1475   // set own MT config here or in CONF, and others in CM_NODEINFOREQ/CONF
1476   setNodeInfo(getOwnNodeId()).m_lqh_workers = globalData.ndbMtLqhWorkers;
1477 
1478   char buf[100];
1479   switch (TrefuseReason) {
1480   case CmRegRef::ZINCOMPATIBLE_VERSION:
1481     jam();
1482     progError(__LINE__, NDBD_EXIT_UNSUPPORTED_VERSION,
1483               "incompatible version, "
1484               "connection refused by running ndb node");
1485   case CmRegRef::ZINCOMPATIBLE_START_TYPE:
1486     jam();
1487     BaseString::snprintf(buf, sizeof(buf),
1488 			 "incompatible start type detected: node %d"
1489 			 " reports %s(%d) my start type: %s(%d)",
1490 			 TaddNodeno,
1491 			 get_start_type_string(start_type), start_type,
1492 			 get_start_type_string(c_start.m_start_type),
1493 			 c_start.m_start_type);
1494     progError(__LINE__, NDBD_EXIT_SR_RESTARTCONFLICT, buf);
1495     break;
1496   case CmRegRef::ZBUSY:
1497   case CmRegRef::ZBUSY_TO_PRES:
1498   case CmRegRef::ZBUSY_PRESIDENT:
1499     jam();
1500     cpresidentAlive = ZTRUE;
1501     signal->theData[3] = 0;
1502     break;
1503   case CmRegRef::ZNOT_IN_CFG:
1504     jam();
1505     progError(__LINE__, NDBD_EXIT_NODE_NOT_IN_CONFIG);
1506     break;
1507   case CmRegRef::ZNOT_DEAD:
1508     jam();
1509     progError(__LINE__, NDBD_EXIT_NODE_NOT_DEAD);
1510     break;
1511   case CmRegRef::ZSINGLE_USER_MODE:
1512     jam();
1513     progError(__LINE__, NDBD_EXIT_SINGLE_USER_MODE);
1514     break;
1515   /**
1516    * For generic refuse error.
1517    * e.g. in online upgrade, we can use this error code instead
1518    * of the incompatible error code.
1519    */
1520   case CmRegRef::ZGENERIC:
1521     jam();
1522     progError(__LINE__, NDBD_EXIT_GENERIC);
1523     break;
1524   case CmRegRef::ZELECTION:
1525     jam();
1526     if (candidate_gci > c_start.m_president_candidate_gci ||
1527 	(candidate_gci == c_start.m_president_candidate_gci &&
1528 	 candidate < c_start.m_president_candidate))
1529     {
1530       jam();
1531       //----------------------------------------
1532       /* We may already have a candidate      */
1533       /* choose the lowest nodeno             */
1534       //----------------------------------------
1535       signal->theData[3] = 2;
1536       c_start.m_president_candidate = candidate;
1537       c_start.m_president_candidate_gci = candidate_gci;
1538     } else {
1539       signal->theData[3] = 4;
1540     }//if
1541     break;
1542   case CmRegRef::ZNOT_PRESIDENT:
1543     jam();
1544     cpresidentAlive = ZTRUE;
1545     signal->theData[3] = 3;
1546     break;
1547   default:
1548     jam();
1549     signal->theData[3] = 5;
1550     /*empty*/;
1551     break;
1552   }//switch
1553 /*--------------------------------------------------------------*/
1554 // Send this as an EVENT REPORT to inform about hearing about
1555 // other NDB node proclaiming not to be president.
1556 /*--------------------------------------------------------------*/
1557   signal->theData[0] = NDB_LE_CM_REGREF;
1558   signal->theData[1] = getOwnNodeId();
1559   signal->theData[2] = TaddNodeno;
1560 //-----------------------------------------
1561 // signal->theData[3] filled in above
1562 //-----------------------------------------
1563   sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 4, JBB);
1564 
1565   if(cpresidentAlive == ZTRUE)
1566   {
1567     jam();
1568     DEBUG("cpresidentAlive");
1569     return;
1570   }
1571 
1572   if(c_start.m_regReqReqSent != c_start.m_regReqReqRecv)
1573   {
1574     jam();
1575     DEBUG(c_start.m_regReqReqSent << " != " << c_start.m_regReqReqRecv);
1576     return;
1577   }
1578 
1579   if(c_start.m_president_candidate != getOwnNodeId())
1580   {
1581     jam();
1582     DEBUG("i'm not the candidate");
1583     return;
1584   }
1585 
1586   /**
1587    * All connected nodes has agreed
1588    */
1589   if(check_startup(signal))
1590   {
1591     jam();
1592     electionWon(signal);
1593   }
1594 
1595   return;
1596 }//Qmgr::execCM_REGREF()
1597 
1598 Uint32
check_startup(Signal * signal)1599 Qmgr::check_startup(Signal* signal)
1600 {
1601   const NDB_TICKS now  = NdbTick_getCurrentTicks();
1602   const Uint64 elapsed = NdbTick_Elapsed(c_start_election_time,now).milliSec();
1603   const Uint64 partitionedTimeout = c_restartPartialTimeout
1604                                   + c_restartPartionedTimeout;
1605 
1606   const bool no_nodegroup_active =
1607     (c_restartNoNodegroupTimeout != ~Uint32(0)) &&
1608     (! c_start.m_no_nodegroup_nodes.isclear());
1609 
1610   /**
1611    * First see if we should wait more...
1612    */
1613   NdbNodeBitmask tmp;
1614   tmp.bitOR(c_start.m_skip_nodes);
1615   tmp.bitOR(c_start.m_starting_nodes);
1616 
1617   NdbNodeBitmask wait;
1618   wait.assign(c_definedNodes);
1619   wait.bitANDC(tmp);
1620 
1621   Uint32 retVal = 0;
1622   Uint32 incompleteng = MAX_NDB_NODES; // Illegal value
1623   NdbNodeBitmask report_mask;
1624 
1625   if ((c_start.m_latest_gci == 0) ||
1626       (c_start.m_start_type == (1 << NodeState::ST_INITIAL_START)))
1627   {
1628     if (tmp.equal(c_definedNodes))
1629     {
1630       jam();
1631       signal->theData[1] = 0x8000;
1632       report_mask.assign(c_definedNodes);
1633       report_mask.bitANDC(c_start.m_starting_nodes);
1634       retVal = 1;
1635       goto start_report;
1636     }
1637     else if (no_nodegroup_active)
1638     {
1639       if (elapsed < c_restartNoNodegroupTimeout)
1640       {
1641         signal->theData[1] = 6;
1642         signal->theData[2] = Uint32((c_restartNoNodegroupTimeout - elapsed + 500) / 1000);
1643         report_mask.assign(wait);
1644         retVal = 0;
1645         goto start_report;
1646       }
1647       tmp.bitOR(c_start.m_no_nodegroup_nodes);
1648       if (tmp.equal(c_definedNodes))
1649       {
1650         signal->theData[1] = 0x8000;
1651         report_mask.assign(c_definedNodes);
1652         report_mask.bitANDC(c_start.m_starting_nodes);
1653         retVal = 1;
1654         goto start_report;
1655       }
1656       else
1657       {
1658         jam();
1659         signal->theData[1] = 1;
1660         signal->theData[2] = ~0;
1661         report_mask.assign(wait);
1662         retVal = 0;
1663         goto start_report;
1664       }
1665     }
1666     else
1667     {
1668       jam();
1669       signal->theData[1] = 1;
1670       signal->theData[2] = ~0;
1671       report_mask.assign(wait);
1672       retVal = 0;
1673       goto start_report;
1674     }
1675   }
1676 
1677   if (elapsed >= c_restartNoNodegroupTimeout)
1678   {
1679     tmp.bitOR(c_start.m_no_nodegroup_nodes);
1680   }
1681 
1682   {
1683     const bool all = c_start.m_starting_nodes.equal(c_definedNodes);
1684     CheckNodeGroups* sd = (CheckNodeGroups*)&signal->theData[0];
1685 
1686     {
1687       /**
1688        * Check for missing node group directly
1689        */
1690       NdbNodeBitmask check;
1691       check.assign(c_definedNodes);
1692       check.bitANDC(c_start.m_starting_nodes);    // Not connected nodes
1693       check.bitOR(c_start.m_starting_nodes_w_log);
1694 
1695       sd->blockRef = reference();
1696       sd->requestType = CheckNodeGroups::Direct | CheckNodeGroups::ArbitCheck;
1697       sd->mask = check;
1698       EXECUTE_DIRECT(DBDIH, GSN_CHECKNODEGROUPSREQ, signal,
1699                      CheckNodeGroups::SignalLength);
1700 
1701       if (sd->output == CheckNodeGroups::Lose)
1702       {
1703         jam();
1704         goto missing_nodegroup;
1705       }
1706     }
1707 
1708     sd->blockRef = reference();
1709     sd->requestType = CheckNodeGroups::Direct | CheckNodeGroups::ArbitCheck;
1710     sd->mask = c_start.m_starting_nodes;
1711     EXECUTE_DIRECT(DBDIH, GSN_CHECKNODEGROUPSREQ, signal,
1712                    CheckNodeGroups::SignalLength);
1713 
1714     const Uint32 result = sd->output;
1715 
1716     sd->blockRef = reference();
1717     sd->requestType = CheckNodeGroups::Direct | CheckNodeGroups::ArbitCheck;
1718     sd->mask = c_start.m_starting_nodes_w_log;
1719     EXECUTE_DIRECT(DBDIH, GSN_CHECKNODEGROUPSREQ, signal,
1720                    CheckNodeGroups::SignalLength);
1721 
1722     const Uint32 result_w_log = sd->output;
1723 
1724     if (tmp.equal(c_definedNodes))
1725     {
1726       /**
1727        * All nodes (wrt no-wait nodes) has connected...
1728        *   this means that we will now start or die
1729        */
1730       jam();
1731       switch(result_w_log){
1732       case CheckNodeGroups::Lose:
1733       {
1734         jam();
1735         goto missing_nodegroup;
1736       }
1737       case CheckNodeGroups::Win:
1738         signal->theData[1] = all ? 0x8001 : 0x8002;
1739         report_mask.assign(c_definedNodes);
1740         report_mask.bitANDC(c_start.m_starting_nodes);
1741         retVal = 1;
1742         goto check_log;
1743       case CheckNodeGroups::Partitioning:
1744         ndbrequire(result != CheckNodeGroups::Lose);
1745         signal->theData[1] =
1746           all ? 0x8001 : (result == CheckNodeGroups::Win ? 0x8002 : 0x8003);
1747         report_mask.assign(c_definedNodes);
1748         report_mask.bitANDC(c_start.m_starting_nodes);
1749         retVal = 1;
1750         goto check_log;
1751       }
1752     }
1753 
1754     if (elapsed < c_restartPartialTimeout)
1755     {
1756       jam();
1757 
1758       signal->theData[1] = c_restartPartialTimeout == (Uint32) ~0 ? 2 : 3;
1759       signal->theData[2] = Uint32((c_restartPartialTimeout - elapsed + 500) / 1000);
1760       report_mask.assign(wait);
1761       retVal = 0;
1762 
1763       if (no_nodegroup_active && elapsed < c_restartNoNodegroupTimeout)
1764       {
1765         signal->theData[1] = 7;
1766         signal->theData[2] = Uint32((c_restartNoNodegroupTimeout - elapsed + 500) / 1000);
1767       }
1768       else if (no_nodegroup_active && elapsed >= c_restartNoNodegroupTimeout)
1769       {
1770         report_mask.bitANDC(c_start.m_no_nodegroup_nodes);
1771       }
1772 
1773       goto start_report;
1774     }
1775 
1776     /**
1777      * Start partial has passed...check for partitioning...
1778      */
1779     switch(result_w_log){
1780     case CheckNodeGroups::Lose:
1781       jam();
1782       goto missing_nodegroup;
1783     case CheckNodeGroups::Partitioning:
1784       if (elapsed < partitionedTimeout && result != CheckNodeGroups::Win)
1785       {
1786         goto missinglog;
1787       }
1788       // Fall through...
1789     case CheckNodeGroups::Win:
1790       signal->theData[1] =
1791         all ? 0x8001 : (result == CheckNodeGroups::Win ? 0x8002 : 0x8003);
1792       report_mask.assign(c_definedNodes);
1793       report_mask.bitANDC(c_start.m_starting_nodes);
1794       retVal = 2;
1795       goto check_log;
1796     }
1797   }
1798   ndbrequire(false);
1799 
1800 check_log:
1801   jam();
1802   {
1803     Uint32 save[4+4*NdbNodeBitmask::Size];
1804     memcpy(save, signal->theData, sizeof(save));
1805 
1806     DihRestartReq * req = CAST_PTR(DihRestartReq, signal->getDataPtrSend());
1807     req->senderRef = 0;
1808     c_start.m_starting_nodes.copyto(NdbNodeBitmask::Size, req->nodemask);
1809     memcpy(req->node_gcis, c_start.m_node_gci, 4*MAX_NDB_NODES);
1810     EXECUTE_DIRECT(DBDIH, GSN_DIH_RESTARTREQ, signal,
1811 		   DihRestartReq::CheckLength);
1812 
1813     incompleteng = signal->theData[0];
1814     memcpy(signal->theData, save, sizeof(save));
1815 
1816     if (incompleteng != MAX_NDB_NODES)
1817     {
1818       jam();
1819       if (retVal == 1)
1820       {
1821 	jam();
1822 	goto incomplete_log;
1823       }
1824       else if (retVal == 2)
1825       {
1826 	if (elapsed <= partitionedTimeout)
1827 	{
1828 	  jam();
1829 	  goto missinglog;
1830 	}
1831 	else
1832 	{
1833 	  goto incomplete_log;
1834 	}
1835       }
1836       ndbrequire(false);
1837     }
1838   }
1839   goto start_report;
1840 
1841 missinglog:
1842   signal->theData[1] = c_restartPartionedTimeout == (Uint32) ~0 ? 4 : 5;
1843   signal->theData[2] = Uint32((partitionedTimeout - elapsed + 500) / 1000);
1844   report_mask.assign(c_definedNodes);
1845   report_mask.bitANDC(c_start.m_starting_nodes);
1846   retVal = 0;
1847   goto start_report;
1848 
1849 start_report:
1850   jam();
1851   {
1852     Uint32 sz = NdbNodeBitmask::Size;
1853     signal->theData[0] = NDB_LE_StartReport;
1854     signal->theData[3] = sz;
1855     Uint32* ptr = signal->theData+4;
1856     c_definedNodes.copyto(sz, ptr); ptr += sz;
1857     c_start.m_starting_nodes.copyto(sz, ptr); ptr += sz;
1858     c_start.m_skip_nodes.copyto(sz, ptr); ptr += sz;
1859     report_mask.copyto(sz, ptr); ptr+= sz;
1860     c_start.m_no_nodegroup_nodes.copyto(sz, ptr); ptr += sz;
1861     sendSignal(CMVMI_REF, GSN_EVENT_REP, signal,
1862 	       4+5*NdbNodeBitmask::Size, JBB);
1863   }
1864   return retVal;
1865 
1866 missing_nodegroup:
1867   jam();
1868   {
1869     char buf[100], mask1[100], mask2[100];
1870     c_start.m_starting_nodes.getText(mask1);
1871     tmp.assign(c_start.m_starting_nodes);
1872     tmp.bitANDC(c_start.m_starting_nodes_w_log);
1873     tmp.getText(mask2);
1874     BaseString::snprintf(buf, sizeof(buf),
1875 			 "Unable to start missing node group! "
1876 			 " starting: %s (missing fs for: %s)",
1877 			 mask1, mask2);
1878     progError(__LINE__, NDBD_EXIT_INSUFFICENT_NODES, buf);
1879     return 0;                                     // Deadcode
1880   }
1881 
1882 incomplete_log:
1883   jam();
1884   {
1885     char buf[100], mask1[100];
1886     c_start.m_starting_nodes.getText(mask1);
1887     BaseString::snprintf(buf, sizeof(buf),
1888 			 "Incomplete log for node group: %d! "
1889 			 " starting nodes: %s",
1890 			 incompleteng, mask1);
1891     progError(__LINE__, NDBD_EXIT_INSUFFICENT_NODES, buf);
1892     return 0;                                     // Deadcode
1893   }
1894 }
1895 
1896 void
electionWon(Signal * signal)1897 Qmgr::electionWon(Signal* signal)
1898 {
1899   NodeRecPtr myNodePtr;
1900   cpresident = getOwnNodeId(); /* This node becomes president. */
1901   myNodePtr.i = getOwnNodeId();
1902   ptrCheckGuard(myNodePtr, MAX_NDB_NODES, nodeRec);
1903 
1904   myNodePtr.p->phase = ZRUNNING;
1905 
1906   cpdistref = reference();
1907   cneighbourl = ZNIL;
1908   cneighbourh = ZNIL;
1909   myNodePtr.p->ndynamicId = 1 | (myNodePtr.p->hbOrder << 16);
1910   c_maxDynamicId = 1;
1911   c_clusterNodes.clear();
1912   c_clusterNodes.set(getOwnNodeId());
1913 
1914   cpresidentAlive = ZTRUE;
1915   NdbTick_Invalidate(&c_start_election_time);
1916   c_start.reset();
1917 
1918   signal->theData[0] = NDB_LE_CM_REGCONF;
1919   signal->theData[1] = getOwnNodeId();
1920   signal->theData[2] = cpresident;
1921   signal->theData[3] = myNodePtr.p->ndynamicId;
1922   sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 4, JBB);
1923 
1924   c_start.m_starting_nodes.clear(getOwnNodeId());
1925   if (c_start.m_starting_nodes.isclear())
1926   {
1927     jam();
1928     sendSttorryLab(signal, true);
1929   }
1930 }
1931 
1932 /*
1933 4.4.11 CONTINUEB */
1934 /*--------------------------------------------------------------------------*/
1935 /*                                                                          */
1936 /*--------------------------------------------------------------------------*/
1937 /****************************>---------------------------------------------*/
1938 /* CONTINUEB                 >        SENDER: Own block, Own node          */
1939 /****************************>-------+INPUT : TCONTINUEB_TYPE              */
1940 /*--------------------------------------------------------------*/
regreqTimeLimitLab(Signal * signal)1941 void Qmgr::regreqTimeLimitLab(Signal* signal)
1942 {
1943   if(cpresident == ZNIL)
1944   {
1945     if (c_start.m_president_candidate == ZNIL)
1946     {
1947       jam();
1948       c_start.m_president_candidate = getOwnNodeId();
1949     }
1950 
1951     cmInfoconf010Lab(signal);
1952   }
1953 }//Qmgr::regreqTimelimitLab()
1954 
1955 /**---------------------------------------------------------------------------
1956  * The new node will take care of giving information about own node and ask
1957  * all other nodes for nodeinfo. The new node will use CM_NODEINFOREQ for
1958  * that purpose. When the setup of connections to all running, the president
1959  * will send a commit to all running nodes + the new node
1960  * INPUT: NODE_PTR1, must be set as ZNIL if we don't enter CONNECT_NODES)
1961  *                   from signal CM_NODEINFOCONF.
1962  *---------------------------------------------------------------------------*/
1963 /*******************************/
1964 /* CM_NODEINFOCONF            */
1965 /*******************************/
execCM_NODEINFOCONF(Signal * signal)1966 void Qmgr::execCM_NODEINFOCONF(Signal* signal)
1967 {
1968   DEBUG_START3(signal, "");
1969 
1970   jamEntry();
1971 
1972   CmNodeInfoConf * const conf = (CmNodeInfoConf*)signal->getDataPtr();
1973 
1974   const Uint32 nodeId = conf->nodeId;
1975   const Uint32 dynamicId = conf->dynamicId;
1976   const Uint32 version = conf->version;
1977   Uint32 mysql_version = conf->mysql_version;
1978   Uint32 lqh_workers = conf->lqh_workers;
1979   if (version < NDBD_SPLIT_VERSION)
1980   {
1981     jam();
1982     mysql_version = 0;
1983   }
1984   if (version < NDBD_MT_LQH_VERSION)
1985   {
1986     jam();
1987     lqh_workers = 0;
1988   }
1989 
1990   NodeRecPtr nodePtr;
1991   nodePtr.i = getOwnNodeId();
1992   ptrAss(nodePtr, nodeRec);
1993   ndbrequire(nodePtr.p->phase == ZSTARTING);
1994   ndbrequire(c_start.m_gsn == GSN_CM_NODEINFOREQ);
1995   c_start.m_nodes.clearWaitingFor(nodeId);
1996 
1997   /**
1998    * Update node info
1999    */
2000   NodeRecPtr replyNodePtr;
2001   replyNodePtr.i = nodeId;
2002   ptrCheckGuard(replyNodePtr, MAX_NDB_NODES, nodeRec);
2003   replyNodePtr.p->ndynamicId = dynamicId;
2004   replyNodePtr.p->blockRef = signal->getSendersBlockRef();
2005   setNodeInfo(replyNodePtr.i).m_version = version;
2006   setNodeInfo(replyNodePtr.i).m_mysql_version = mysql_version;
2007   setNodeInfo(replyNodePtr.i).m_lqh_workers = lqh_workers;
2008 
2009   recompute_version_info(NodeInfo::DB, version);
2010 
2011   if(!c_start.m_nodes.done()){
2012     jam();
2013     return;
2014   }
2015 
2016   /**********************************************<*/
2017   /* Send an ack. back to the president.          */
2018   /* CM_ACKADD                                    */
2019   /* The new node has been registered by all      */
2020   /* running nodes and has stored nodeinfo about  */
2021   /* all running nodes. The new node has to wait  */
2022   /* for CM_ADD (commit) from president to become */
2023   /* a running node in the cluster.               */
2024   /**********************************************<*/
2025   sendCmAckAdd(signal, getOwnNodeId(), CmAdd::Prepare);
2026   return;
2027 }//Qmgr::execCM_NODEINFOCONF()
2028 
2029 /**---------------------------------------------------------------------------
2030  * A new node sends nodeinfo about himself. The new node asks for
2031  * corresponding nodeinfo back in the  CM_NODEINFOCONF.
2032  *---------------------------------------------------------------------------*/
2033 /*******************************/
2034 /* CM_NODEINFOREQ             */
2035 /*******************************/
execCM_NODEINFOREQ(Signal * signal)2036 void Qmgr::execCM_NODEINFOREQ(Signal* signal)
2037 {
2038   jamEntry();
2039 
2040   const Uint32 Tblockref = signal->getSendersBlockRef();
2041 
2042   NodeRecPtr nodePtr;
2043   nodePtr.i = getOwnNodeId();
2044   ptrAss(nodePtr, nodeRec);
2045   if(nodePtr.p->phase != ZRUNNING){
2046     jam();
2047     signal->theData[0] = reference();
2048     signal->theData[1] = getOwnNodeId();
2049     signal->theData[2] = ZNOT_RUNNING;
2050     sendSignal(Tblockref, GSN_CM_NODEINFOREF, signal, 3, JBB);
2051     return;
2052   }
2053 
2054   NodeRecPtr addNodePtr;
2055   CmNodeInfoReq * const req = (CmNodeInfoReq*)signal->getDataPtr();
2056   addNodePtr.i = req->nodeId;
2057   ptrCheckGuard(addNodePtr, MAX_NDB_NODES, nodeRec);
2058   addNodePtr.p->ndynamicId = req->dynamicId;
2059   addNodePtr.p->blockRef = signal->getSendersBlockRef();
2060   setNodeInfo(addNodePtr.i).m_version = req->version;
2061 
2062   Uint32 mysql_version = req->mysql_version;
2063   if (req->version < NDBD_SPLIT_VERSION)
2064     mysql_version = 0;
2065   setNodeInfo(addNodePtr.i).m_mysql_version = mysql_version;
2066 
2067   Uint32 lqh_workers = req->lqh_workers;
2068   if (req->version < NDBD_MT_LQH_VERSION)
2069     lqh_workers = 0;
2070   setNodeInfo(addNodePtr.i).m_lqh_workers = lqh_workers;
2071 
2072   c_maxDynamicId = req->dynamicId & 0xFFFF;
2073 
2074   cmAddPrepare(signal, addNodePtr, nodePtr.p);
2075 }//Qmgr::execCM_NODEINFOREQ()
2076 
2077 void
cmAddPrepare(Signal * signal,NodeRecPtr nodePtr,const NodeRec * self)2078 Qmgr::cmAddPrepare(Signal* signal, NodeRecPtr nodePtr, const NodeRec * self){
2079   jam();
2080 
2081   switch(nodePtr.p->phase){
2082   case ZINIT:
2083     jam();
2084     nodePtr.p->phase = ZSTARTING;
2085     return;
2086   case ZFAIL_CLOSING:
2087     jam();
2088 
2089 #if 1
2090     warningEvent("Received request to incorporate node %u, "
2091 		 "while error handling has not yet completed",
2092 		 nodePtr.i);
2093 
2094     ndbrequire(getOwnNodeId() != cpresident);
2095     ndbrequire(signal->header.theVerId_signalNumber == GSN_CM_ADD);
2096     c_start.m_nodes.clearWaitingFor();
2097     c_start.m_nodes.setWaitingFor(nodePtr.i);
2098     c_start.m_gsn = GSN_CM_NODEINFOCONF;
2099 #else
2100     warningEvent("Enabling communication to CM_ADD node %u state=%d",
2101 		 nodePtr.i,
2102 		 nodePtr.p->phase);
2103     nodePtr.p->phase = ZSTARTING;
2104     nodePtr.p->failState = NORMAL;
2105     signal->theData[0] = 0;
2106     signal->theData[1] = nodePtr.i;
2107     sendSignal(TRPMAN_REF, GSN_OPEN_COMORD, signal, 2, JBB);
2108 #endif
2109     return;
2110   case ZSTARTING:
2111     break;
2112   case ZRUNNING:
2113     jam();
2114   case ZPREPARE_FAIL:
2115     jam();
2116   case ZAPI_ACTIVATION_ONGOING:
2117     jam();
2118   case ZAPI_ACTIVE:
2119     jam();
2120   case ZAPI_INACTIVE:
2121     ndbrequire(false);
2122   }
2123 
2124   sendCmAckAdd(signal, nodePtr.i, CmAdd::Prepare);
2125   sendApiVersionRep(signal, nodePtr);
2126 
2127   /* President have prepared us */
2128   CmNodeInfoConf * conf = (CmNodeInfoConf*)signal->getDataPtrSend();
2129   conf->nodeId = getOwnNodeId();
2130   conf->dynamicId = self->ndynamicId;
2131   conf->version = getNodeInfo(getOwnNodeId()).m_version;
2132   conf->mysql_version = getNodeInfo(getOwnNodeId()).m_mysql_version;
2133   conf->lqh_workers = getNodeInfo(getOwnNodeId()).m_lqh_workers;
2134   sendSignal(nodePtr.p->blockRef, GSN_CM_NODEINFOCONF, signal,
2135 	     CmNodeInfoConf::SignalLength, JBB);
2136   DEBUG_START(GSN_CM_NODEINFOCONF, refToNode(nodePtr.p->blockRef), "");
2137 }
2138 
2139 void
sendApiVersionRep(Signal * signal,NodeRecPtr nodePtr)2140 Qmgr::sendApiVersionRep(Signal* signal, NodeRecPtr nodePtr)
2141 {
2142   if (getNodeInfo(nodePtr.i).m_version >= NDBD_NODE_VERSION_REP)
2143   {
2144     jam();
2145     Uint32 ref = calcQmgrBlockRef(nodePtr.i);
2146     for(Uint32 i = 1; i<MAX_NODES; i++)
2147     {
2148       jam();
2149       Uint32 version = getNodeInfo(i).m_version;
2150       Uint32 type = getNodeInfo(i).m_type;
2151       if (type != NodeInfo::DB && version)
2152       {
2153 	jam();
2154 	signal->theData[0] = i;
2155 	signal->theData[1] = version;
2156 	sendSignal(ref, GSN_NODE_VERSION_REP, signal, 2, JBB);
2157       }
2158     }
2159   }
2160 }
2161 
2162 void
sendCmAckAdd(Signal * signal,Uint32 nodeId,CmAdd::RequestType type)2163 Qmgr::sendCmAckAdd(Signal * signal, Uint32 nodeId, CmAdd::RequestType type){
2164 
2165   CmAckAdd * cmAckAdd = (CmAckAdd*)signal->getDataPtrSend();
2166   cmAckAdd->requestType = type;
2167   cmAckAdd->startingNodeId = nodeId;
2168   cmAckAdd->senderNodeId = getOwnNodeId();
2169   sendSignal(cpdistref, GSN_CM_ACKADD, signal, CmAckAdd::SignalLength, JBA);
2170   DEBUG_START(GSN_CM_ACKADD, cpresident, "");
2171 
2172   switch(type){
2173   case CmAdd::Prepare:
2174     return;
2175   case CmAdd::AddCommit:
2176   case CmAdd::CommitNew:
2177     break;
2178   }
2179 
2180   signal->theData[0] = nodeId;
2181   EXECUTE_DIRECT(NDBCNTR, GSN_CM_ADD_REP, signal, 1);
2182   jamEntry();
2183 }
2184 
2185 /*
2186 4.4.11 CM_ADD */
2187 /**--------------------------------------------------------------------------
2188  * Prepare a running node to add a new node to the cluster. The running node
2189  * will change phase of the new node fron ZINIT to ZWAITING. The running node
2190  * will also mark that we have received a prepare. When the new node has sent
2191  * us nodeinfo we can send an acknowledgement back to the president. When all
2192  * running nodes has acknowledged the new node, the president will send a
2193  * commit and we can change phase of the new node to ZRUNNING. The president
2194  * will also send CM_ADD to himself.
2195  *---------------------------------------------------------------------------*/
2196 /*******************************/
2197 /* CM_ADD                     */
2198 /*******************************/
execCM_ADD(Signal * signal)2199 void Qmgr::execCM_ADD(Signal* signal)
2200 {
2201   NodeRecPtr addNodePtr;
2202   jamEntry();
2203 
2204   NodeRecPtr nodePtr;
2205   nodePtr.i = getOwnNodeId();
2206   ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
2207 
2208   CRASH_INSERTION(940);
2209 
2210   CmAdd * const cmAdd = (CmAdd*)signal->getDataPtr();
2211   const CmAdd::RequestType type = (CmAdd::RequestType)cmAdd->requestType;
2212   addNodePtr.i = cmAdd->startingNodeId;
2213   //const Uint32 startingVersion = cmAdd->startingVersion;
2214   ptrCheckGuard(addNodePtr, MAX_NDB_NODES, nodeRec);
2215 
2216   DEBUG_START3(signal, type);
2217 
2218   if(nodePtr.p->phase == ZSTARTING){
2219     jam();
2220     /**
2221      * We are joining...
2222      */
2223     ndbrequire(addNodePtr.i == nodePtr.i);
2224     switch(type){
2225     case CmAdd::Prepare:
2226       ndbrequire(c_start.m_gsn == GSN_CM_NODEINFOREQ);
2227       /**
2228        * Wait for CM_NODEINFO_CONF
2229        */
2230       return;
2231     case CmAdd::CommitNew:
2232       /**
2233        * Tata. we're in the cluster
2234        */
2235       joinedCluster(signal, addNodePtr);
2236       return;
2237     case CmAdd::AddCommit:
2238       ndbrequire(false);
2239     }
2240   }
2241 
2242   switch (type) {
2243   case CmAdd::Prepare:
2244     cmAddPrepare(signal, addNodePtr, nodePtr.p);
2245     break;
2246   case CmAdd::AddCommit:{
2247     jam();
2248     ndbrequire(addNodePtr.p->phase == ZSTARTING);
2249     addNodePtr.p->phase = ZRUNNING;
2250     m_connectivity_check.reportNodeConnect(addNodePtr.i);
2251     set_hb_count(addNodePtr.i) = 0;
2252     c_clusterNodes.set(addNodePtr.i);
2253     findNeighbours(signal, __LINE__);
2254 
2255     /**
2256      * SEND A HEARTBEAT IMMEDIATELY TO DECREASE THE RISK THAT WE MISS EARLY
2257      * HEARTBEATS.
2258      */
2259     sendHeartbeat(signal);
2260     hb_send_timer.reset(NdbTick_getCurrentTicks());
2261 
2262     /**
2263      *  ENABLE COMMUNICATION WITH ALL BLOCKS WITH THE NEWLY ADDED NODE
2264      */
2265     EnableComReq *enableComReq = (EnableComReq *)signal->getDataPtrSend();
2266     enableComReq->m_senderRef = reference();
2267     enableComReq->m_senderData = ENABLE_COM_CM_ADD_COMMIT;
2268     NodeBitmask::clear(enableComReq->m_nodeIds);
2269     NodeBitmask::set(enableComReq->m_nodeIds, addNodePtr.i);
2270     sendSignal(TRPMAN_REF, GSN_ENABLE_COMREQ, signal,
2271                EnableComReq::SignalLength, JBB);
2272     break;
2273   }
2274   case CmAdd::CommitNew:
2275     jam();
2276     ndbrequire(false);
2277   }
2278 
2279 }//Qmgr::execCM_ADD()
2280 
2281 void
handleEnableComAddCommit(Signal * signal,Uint32 node)2282 Qmgr::handleEnableComAddCommit(Signal *signal, Uint32 node)
2283 {
2284   sendCmAckAdd(signal, node, CmAdd::AddCommit);
2285   if(getOwnNodeId() != cpresident){
2286     jam();
2287     c_start.reset();
2288   }
2289 }
2290 
2291 void
execENABLE_COMCONF(Signal * signal)2292 Qmgr::execENABLE_COMCONF(Signal *signal)
2293 {
2294   const EnableComConf *enableComConf =
2295     (const EnableComConf *)signal->getDataPtr();
2296   Uint32 state = enableComConf->m_senderData;
2297   Uint32 node = NodeBitmask::find(enableComConf->m_nodeIds, 0);
2298 
2299   jamEntry();
2300 
2301   switch (state)
2302   {
2303     case ENABLE_COM_CM_ADD_COMMIT:
2304       jam();
2305       /* Only exactly one node possible here. */
2306       ndbrequire(node != NodeBitmask::NotFound);
2307       ndbrequire(NodeBitmask::find(enableComConf->m_nodeIds, node + 1) ==
2308                  NodeBitmask::NotFound);
2309       handleEnableComAddCommit(signal, node);
2310       break;
2311 
2312     case ENABLE_COM_CM_COMMIT_NEW:
2313       jam();
2314       handleEnableComCommitNew(signal);
2315       break;
2316 
2317     case ENABLE_COM_API_REGREQ:
2318       jam();
2319       /* Only exactly one node possible here. */
2320       ndbrequire(node != NodeBitmask::NotFound);
2321       ndbrequire(NodeBitmask::find(enableComConf->m_nodeIds, node + 1) ==
2322                  NodeBitmask::NotFound);
2323       handleEnableComApiRegreq(signal, node);
2324       break;
2325 
2326     default:
2327       jam();
2328       ndbrequire(false);
2329   }
2330 }
2331 
2332 void
joinedCluster(Signal * signal,NodeRecPtr nodePtr)2333 Qmgr::joinedCluster(Signal* signal, NodeRecPtr nodePtr){
2334   /**
2335    * WE HAVE BEEN INCLUDED IN THE CLUSTER WE CAN START BEING PART OF THE
2336    * HEARTBEAT PROTOCOL AND WE WILL ALSO ENABLE COMMUNICATION WITH ALL
2337    * NODES IN THE CLUSTER.
2338    */
2339   nodePtr.p->phase = ZRUNNING;
2340   set_hb_count(nodePtr.i) = 0;
2341   findNeighbours(signal, __LINE__);
2342   c_clusterNodes.set(nodePtr.i);
2343   c_start.reset();
2344 
2345   /**
2346    * SEND A HEARTBEAT IMMEDIATELY TO DECREASE THE RISK
2347    * THAT WE MISS EARLY HEARTBEATS.
2348    */
2349   sendHeartbeat(signal);
2350   hb_send_timer.reset(NdbTick_getCurrentTicks());
2351 
2352   /**
2353    * ENABLE COMMUNICATION WITH ALL BLOCKS IN THE CURRENT CLUSTER AND SET
2354    * THE NODES IN THE CLUSTER TO BE RUNNING.
2355    */
2356   EnableComReq *enableComReq = (EnableComReq *)signal->getDataPtrSend();
2357   enableComReq->m_senderRef = reference();
2358   enableComReq->m_senderData = ENABLE_COM_CM_COMMIT_NEW;
2359   NodeBitmask::clear(enableComReq->m_nodeIds);
2360   jam();
2361   for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
2362     ptrAss(nodePtr, nodeRec);
2363     if ((nodePtr.p->phase == ZRUNNING) && (nodePtr.i != getOwnNodeId())) {
2364       /*-------------------------------------------------------------------*/
2365       // Enable full communication to all other nodes. Not really necessary
2366       // to open communication to ourself.
2367       /*-------------------------------------------------------------------*/
2368       jamLine(nodePtr.i);
2369       NodeBitmask::set(enableComReq->m_nodeIds, nodePtr.i);
2370     }//if
2371   }//for
2372 
2373   if (!NodeBitmask::isclear(enableComReq->m_nodeIds))
2374   {
2375     jam();
2376     sendSignal(TRPMAN_REF, GSN_ENABLE_COMREQ, signal,
2377                EnableComReq::SignalLength, JBB);
2378   }
2379   else
2380   {
2381     handleEnableComCommitNew(signal);
2382   }
2383 }
2384 
2385 void
handleEnableComCommitNew(Signal * signal)2386 Qmgr::handleEnableComCommitNew(Signal *signal)
2387 {
2388   sendSttorryLab(signal, true);
2389 
2390   sendCmAckAdd(signal, getOwnNodeId(), CmAdd::CommitNew);
2391 }
2392 
2393 /*  4.10.7 CM_ACKADD        - PRESIDENT IS RECEIVER -       */
2394 /*---------------------------------------------------------------------------*/
2395 /* Entry point for an ack add signal.
2396  * The TTYPE defines if it is a prepare or a commit.                         */
2397 /*---------------------------------------------------------------------------*/
execCM_ACKADD(Signal * signal)2398 void Qmgr::execCM_ACKADD(Signal* signal)
2399 {
2400   NodeRecPtr addNodePtr;
2401   NodeRecPtr senderNodePtr;
2402   jamEntry();
2403 
2404   CmAckAdd * const cmAckAdd = (CmAckAdd*)signal->getDataPtr();
2405   const CmAdd::RequestType type = (CmAdd::RequestType)cmAckAdd->requestType;
2406   addNodePtr.i = cmAckAdd->startingNodeId;
2407   senderNodePtr.i = cmAckAdd->senderNodeId;
2408 
2409   DEBUG_START3(signal, type);
2410 
2411   if (cpresident != getOwnNodeId()) {
2412     jam();
2413     /*-----------------------------------------------------------------------*/
2414     /* IF WE ARE NOT PRESIDENT THEN WE SHOULD NOT RECEIVE THIS MESSAGE.      */
2415     /*------------------------------------------------------------_----------*/
2416     warningEvent("Received CM_ACKADD from %d president=%d",
2417 		 senderNodePtr.i, cpresident);
2418     return;
2419   }//if
2420 
2421   if (addNodePtr.i != c_start.m_startNode) {
2422     jam();
2423     /*----------------------------------------------------------------------*/
2424     /* THIS IS NOT THE STARTING NODE. WE ARE ACTIVE NOW WITH ANOTHER START. */
2425     /*----------------------------------------------------------------------*/
2426     warningEvent("Received CM_ACKADD from %d with startNode=%d != own %d",
2427 		 senderNodePtr.i, addNodePtr.i, c_start.m_startNode);
2428     return;
2429   }//if
2430 
2431   ndbrequire(c_start.m_gsn == GSN_CM_ADD);
2432   c_start.m_nodes.clearWaitingFor(senderNodePtr.i);
2433   if(!c_start.m_nodes.done()){
2434     jam();
2435     return;
2436   }
2437 
2438   switch (type) {
2439   case CmAdd::Prepare:{
2440     jam();
2441 
2442     /*----------------------------------------------------------------------*/
2443     /* ALL RUNNING NODES HAVE PREPARED THE INCLUSION OF THIS NEW NODE.      */
2444     /*----------------------------------------------------------------------*/
2445     c_start.m_gsn = GSN_CM_ADD;
2446     c_start.m_nodes = c_clusterNodes;
2447 
2448     CmAdd * const cmAdd = (CmAdd*)signal->getDataPtrSend();
2449     cmAdd->requestType = CmAdd::AddCommit;
2450     cmAdd->startingNodeId = addNodePtr.i;
2451     cmAdd->startingVersion = getNodeInfo(addNodePtr.i).m_version;
2452     cmAdd->startingMysqlVersion = getNodeInfo(addNodePtr.i).m_mysql_version;
2453     NodeReceiverGroup rg(QMGR, c_clusterNodes);
2454     sendSignal(rg, GSN_CM_ADD, signal, CmAdd::SignalLength, JBA);
2455     DEBUG_START2(GSN_CM_ADD, rg, "AddCommit");
2456     return;
2457   }
2458   case CmAdd::AddCommit:{
2459     jam();
2460 
2461     /****************************************/
2462     /* Send commit to the new node so he    */
2463     /* will change PHASE into ZRUNNING      */
2464     /****************************************/
2465     c_start.m_gsn = GSN_CM_ADD;
2466     c_start.m_nodes.clearWaitingFor();
2467     c_start.m_nodes.setWaitingFor(addNodePtr.i);
2468 
2469     CmAdd * const cmAdd = (CmAdd*)signal->getDataPtrSend();
2470     cmAdd->requestType = CmAdd::CommitNew;
2471     cmAdd->startingNodeId = addNodePtr.i;
2472     cmAdd->startingVersion = getNodeInfo(addNodePtr.i).m_version;
2473     cmAdd->startingMysqlVersion = getNodeInfo(addNodePtr.i).m_mysql_version;
2474     sendSignal(calcQmgrBlockRef(addNodePtr.i), GSN_CM_ADD, signal,
2475 	       CmAdd::SignalLength, JBA);
2476     DEBUG_START(GSN_CM_ADD, addNodePtr.i, "CommitNew");
2477     /**
2478      * Report to DBDIH that a node have been added to the nodes included
2479      * in the heartbeat protocol.
2480      */
2481     InclNodeHBProtocolRep *rep = (InclNodeHBProtocolRep*)signal->getDataPtrSend();
2482     rep->nodeId = addNodePtr.i;
2483     EXECUTE_DIRECT(DBDIH, GSN_INCL_NODE_HB_PROTOCOL_REP, signal,
2484                    InclNodeHBProtocolRep::SignalLength);
2485     return;
2486   }
2487   case CmAdd::CommitNew:
2488     jam();
2489     /**
2490      * Tell arbitration about new node.
2491      */
2492     handleArbitNdbAdd(signal, addNodePtr.i);
2493     c_start.reset();
2494 
2495     if (c_start.m_starting_nodes.get(addNodePtr.i))
2496     {
2497       jam();
2498       c_start.m_starting_nodes.clear(addNodePtr.i);
2499       if (c_start.m_starting_nodes.isclear())
2500       {
2501 	jam();
2502 	sendSttorryLab(signal, true);
2503       }
2504     }
2505     return;
2506   }//switch
2507   ndbrequire(false);
2508 }//Qmgr::execCM_ACKADD()
2509 
2510 /**-------------------------------------------------------------------------
2511  * WE HAVE BEEN INCLUDED INTO THE CLUSTER. IT IS NOW TIME TO CALCULATE WHICH
2512  * ARE OUR LEFT AND RIGHT NEIGHBOURS FOR THE HEARTBEAT PROTOCOL.
2513  *--------------------------------------------------------------------------*/
findNeighbours(Signal * signal,Uint32 from)2514 void Qmgr::findNeighbours(Signal* signal, Uint32 from)
2515 {
2516   UintR toldLeftNeighbour;
2517   UintR tfnLeftFound;
2518   UintR tfnMaxFound;
2519   UintR tfnMinFound;
2520   UintR tfnRightFound;
2521   NodeRecPtr fnNodePtr;
2522   NodeRecPtr fnOwnNodePtr;
2523 
2524   Uint32 toldRightNeighbour = cneighbourh;
2525   toldLeftNeighbour = cneighbourl;
2526   tfnLeftFound = 0;
2527   tfnMaxFound = 0;
2528   tfnMinFound = (UintR)-1;
2529   tfnRightFound = (UintR)-1;
2530   fnOwnNodePtr.i = getOwnNodeId();
2531   ptrCheckGuard(fnOwnNodePtr, MAX_NDB_NODES, nodeRec);
2532   for (fnNodePtr.i = 1; fnNodePtr.i < MAX_NDB_NODES; fnNodePtr.i++) {
2533     ptrAss(fnNodePtr, nodeRec);
2534     if (fnNodePtr.i != fnOwnNodePtr.i) {
2535       jamLine(fnNodePtr.i);
2536       if (fnNodePtr.p->phase == ZRUNNING) {
2537         if (tfnMinFound > fnNodePtr.p->ndynamicId) {
2538           jam();
2539           tfnMinFound = fnNodePtr.p->ndynamicId;
2540         }//if
2541         if (tfnMaxFound < fnNodePtr.p->ndynamicId) {
2542           jam();
2543           tfnMaxFound = fnNodePtr.p->ndynamicId;
2544         }//if
2545         if (fnOwnNodePtr.p->ndynamicId > fnNodePtr.p->ndynamicId) {
2546           jam();
2547           if (fnNodePtr.p->ndynamicId > tfnLeftFound) {
2548             jam();
2549             tfnLeftFound = fnNodePtr.p->ndynamicId;
2550           }//if
2551         } else {
2552           jam();
2553           if (fnNodePtr.p->ndynamicId < tfnRightFound) {
2554             jam();
2555             tfnRightFound = fnNodePtr.p->ndynamicId;
2556           }//if
2557         }//if
2558       }//if
2559     }//if
2560   }//for
2561   if (tfnLeftFound == 0) {
2562     if (tfnMinFound == (UintR)-1) {
2563       jam();
2564       cneighbourl = ZNIL;
2565     } else {
2566       jam();
2567       cneighbourl = translateDynamicIdToNodeId(signal, tfnMaxFound);
2568     }//if
2569   } else {
2570     jam();
2571     cneighbourl = translateDynamicIdToNodeId(signal, tfnLeftFound);
2572   }//if
2573   if (tfnRightFound == (UintR)-1) {
2574     if (tfnMaxFound == 0) {
2575       jam();
2576       cneighbourh = ZNIL;
2577     } else {
2578       jam();
2579       cneighbourh = translateDynamicIdToNodeId(signal, tfnMinFound);
2580     }//if
2581   } else {
2582     jam();
2583     cneighbourh = translateDynamicIdToNodeId(signal, tfnRightFound);
2584   }//if
2585   if (toldLeftNeighbour != cneighbourl) {
2586     jam();
2587     if (cneighbourl != ZNIL) {
2588       jam();
2589       /**-------------------------------------------------------------------*/
2590       /* WE ARE SUPERVISING A NEW LEFT NEIGHBOUR. WE START WITH ALARM COUNT
2591        * EQUAL TO ZERO.
2592        *---------------------------------------------------------------------*/
2593       fnNodePtr.i = cneighbourl;
2594       ptrCheckGuard(fnNodePtr, MAX_NDB_NODES, nodeRec);
2595       set_hb_count(fnNodePtr.i) = 0;
2596     }//if
2597   }//if
2598 
2599   signal->theData[0] = NDB_LE_FIND_NEIGHBOURS;
2600   signal->theData[1] = getOwnNodeId();
2601   signal->theData[2] = cneighbourl;
2602   signal->theData[3] = cneighbourh;
2603   signal->theData[4] = fnOwnNodePtr.p->ndynamicId;
2604   UintR Tlen = 5;
2605   sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, Tlen, JBB);
2606   g_eventLogger->info("findNeighbours from: %u old (left: %u right: %u) new (%u %u)",
2607                       from,
2608                       toldLeftNeighbour,
2609                       toldRightNeighbour,
2610                       cneighbourl,
2611                       cneighbourh);
2612 }//Qmgr::findNeighbours()
2613 
2614 /*
2615 4.10.7 INIT_DATA        */
2616 /*---------------------------------------------------------------------------*/
2617 /*---------------------------------------------------------------------------*/
initData(Signal * signal)2618 void Qmgr::initData(Signal* signal)
2619 {
2620   // catch-all for missing initializations
2621   memset(&arbitRec, 0, sizeof(arbitRec));
2622 
2623   /**
2624    * Timeouts
2625    */
2626   const ndb_mgm_configuration_iterator * p =
2627     m_ctx.m_config.getOwnConfigIterator();
2628   ndbrequire(p != 0);
2629 
2630   Uint32 hbDBDB = 1500;
2631   Uint32 arbitTimeout = 1000;
2632   Uint32 arbitMethod = ARBIT_METHOD_DEFAULT;
2633   Uint32 ccInterval = 0;
2634   c_restartPartialTimeout = 30000;
2635   c_restartPartionedTimeout = 60000;
2636   c_restartFailureTimeout = ~0;
2637   c_restartNoNodegroupTimeout = 15000;
2638   ndb_mgm_get_int_parameter(p, CFG_DB_HEARTBEAT_INTERVAL, &hbDBDB);
2639   ndb_mgm_get_int_parameter(p, CFG_DB_ARBIT_TIMEOUT, &arbitTimeout);
2640   ndb_mgm_get_int_parameter(p, CFG_DB_ARBIT_METHOD, &arbitMethod);
2641   ndb_mgm_get_int_parameter(p, CFG_DB_START_PARTIAL_TIMEOUT,
2642 			    &c_restartPartialTimeout);
2643   ndb_mgm_get_int_parameter(p, CFG_DB_START_PARTITION_TIMEOUT,
2644 			    &c_restartPartionedTimeout);
2645   ndb_mgm_get_int_parameter(p, CFG_DB_START_NO_NODEGROUP_TIMEOUT,
2646 			    &c_restartNoNodegroupTimeout);
2647   ndb_mgm_get_int_parameter(p, CFG_DB_START_FAILURE_TIMEOUT,
2648 			    &c_restartFailureTimeout);
2649   ndb_mgm_get_int_parameter(p, CFG_DB_CONNECT_CHECK_DELAY,
2650                             &ccInterval);
2651 
2652   if(c_restartPartialTimeout == 0)
2653   {
2654     c_restartPartialTimeout = ~0;
2655   }
2656 
2657   if (c_restartPartionedTimeout ==0)
2658   {
2659     c_restartPartionedTimeout = ~0;
2660   }
2661 
2662   if (c_restartFailureTimeout == 0)
2663   {
2664     c_restartFailureTimeout = ~0;
2665   }
2666 
2667   if (c_restartNoNodegroupTimeout == 0)
2668   {
2669     c_restartNoNodegroupTimeout = ~0;
2670   }
2671 
2672   setHbDelay(hbDBDB);
2673   setCCDelay(ccInterval);
2674   setArbitTimeout(arbitTimeout);
2675 
2676   arbitRec.method = (ArbitRec::Method)arbitMethod;
2677   arbitRec.state = ARBIT_NULL;          // start state for all nodes
2678   arbitRec.apiMask[0].clear();          // prepare for ARBIT_CFG
2679 
2680   Uint32 sum = 0;
2681   ArbitSignalData* const sd = (ArbitSignalData*)&signal->theData[0];
2682   for (unsigned rank = 1; rank <= 2; rank++) {
2683     sd->sender = getOwnNodeId();
2684     sd->code = rank;
2685     sd->node = 0;
2686     sd->ticket.clear();
2687     sd->mask.clear();
2688     ndb_mgm_configuration_iterator * iter =
2689       m_ctx.m_config.getClusterConfigIterator();
2690     for (ndb_mgm_first(iter); ndb_mgm_valid(iter); ndb_mgm_next(iter)) {
2691       Uint32 tmp = 0;
2692       if (ndb_mgm_get_int_parameter(iter, CFG_NODE_ARBIT_RANK, &tmp) == 0 &&
2693 	  tmp == rank){
2694 	Uint32 nodeId = 0;
2695 	ndbrequire(!ndb_mgm_get_int_parameter(iter, CFG_NODE_ID, &nodeId));
2696 	sd->mask.set(nodeId);
2697       }
2698     }
2699     sum += sd->mask.count();
2700     execARBIT_CFG(signal);
2701   }
2702 
2703   if (arbitRec.method == ArbitRec::METHOD_DEFAULT &&
2704       sum == 0)
2705   {
2706     jam();
2707     infoEvent("Arbitration disabled, all API nodes have rank 0");
2708     arbitRec.method = ArbitRec::DISABLED;
2709   }
2710 
2711   setNodeInfo(getOwnNodeId()).m_mysql_version = NDB_MYSQL_VERSION_D;
2712 
2713   ndb_mgm_configuration_iterator * iter =
2714     m_ctx.m_config.getClusterConfigIterator();
2715   for (ndb_mgm_first(iter); ndb_mgm_valid(iter); ndb_mgm_next(iter))
2716   {
2717     jam();
2718     Uint32 nodeId = 0;
2719     if (ndb_mgm_get_int_parameter(iter, CFG_NODE_ID, &nodeId) == 0)
2720     {
2721       jam();
2722       if (nodeId < MAX_NDB_NODES && getNodeInfo(nodeId).m_type == NodeInfo::DB)
2723       {
2724         Uint32 hbOrder = 0;
2725         ndb_mgm_get_int_parameter(iter, CFG_DB_HB_ORDER, &hbOrder);
2726 
2727         NodeRecPtr nodePtr;
2728         nodePtr.i = nodeId;
2729         ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
2730         nodePtr.p->hbOrder = hbOrder;
2731       }
2732     }
2733   }
2734   int hb_order_error = check_hb_order_config();
2735   if (hb_order_error == -1)
2736   {
2737     char msg[] = "Illegal HeartbeatOrder config, "
2738                  "all nodes must have non-zero config value";
2739     progError(__LINE__, NDBD_EXIT_INVALID_CONFIG, msg);
2740     return;
2741   }
2742   if (hb_order_error == -2)
2743   {
2744     char msg[] = "Illegal HeartbeatOrder config, "
2745                  "the nodes must have distinct config values";
2746     progError(__LINE__, NDBD_EXIT_INVALID_CONFIG, msg);
2747     return;
2748   }
2749   ndbrequire(hb_order_error == 0);
2750 }//Qmgr::initData()
2751 
2752 
2753 /**---------------------------------------------------------------------------
2754  * HERE WE RECEIVE THE JOB TABLE SIGNAL EVERY 10 MILLISECONDS.
2755  * WE WILL USE THIS TO CHECK IF IT IS TIME TO CHECK THE NEIGHBOUR NODE.
2756  * WE WILL ALSO SEND A SIGNAL TO BLOCKS THAT NEED A TIME SIGNAL AND
2757  * DO NOT WANT TO USE JOB TABLE SIGNALS.
2758  *---------------------------------------------------------------------------*/
timerHandlingLab(Signal * signal)2759 void Qmgr::timerHandlingLab(Signal* signal)
2760 {
2761   const NDB_TICKS TcurrentTime = NdbTick_getCurrentTicks();
2762   NodeRecPtr myNodePtr;
2763   myNodePtr.i = getOwnNodeId();
2764   ptrCheckGuard(myNodePtr, MAX_NDB_NODES, nodeRec);
2765 
2766   const Uint32 sentHi = signal->theData[1];
2767   const Uint32 sentLo = signal->theData[2];
2768   const NDB_TICKS sent((Uint64(sentHi) << 32) | sentLo);
2769 
2770   if (NdbTick_Compare(sent,TcurrentTime) > 0)
2771   {
2772     jam();
2773     const Uint64 backwards = NdbTick_Elapsed(TcurrentTime,sent).milliSec();
2774     if (backwards > 0) //Ignore sub millisecond backticks
2775     {
2776       g_eventLogger->warning("timerHandlingLab, clock ticked backwards: %llu (ms)",
2777                               backwards);
2778     }
2779   }
2780   else
2781   {
2782     const Uint64 elapsed = NdbTick_Elapsed(sent,TcurrentTime).milliSec();
2783     if (elapsed >= 1000)
2784     {
2785       jam();
2786       g_eventLogger->warning("timerHandlingLab, expected 10ms sleep"
2787                              ", not scheduled for: %d (ms)", int(elapsed));
2788     }
2789     else if (elapsed >= 150)
2790     {
2791       g_eventLogger->info("timerHandlingLab, expected 10ms sleep"
2792                           ", not scheduled for: %d (ms)", int(elapsed));
2793     }
2794   }
2795 
2796   if (myNodePtr.p->phase == ZRUNNING) {
2797     jam();
2798     /**---------------------------------------------------------------------
2799      * WE ARE ONLY PART OF HEARTBEAT CLUSTER IF WE ARE UP AND RUNNING.
2800      *---------------------------------------------------------------------*/
2801     if (hb_send_timer.check(TcurrentTime)) {
2802       jam();
2803       sendHeartbeat(signal);
2804       hb_send_timer.reset(TcurrentTime);
2805     }
2806     if (likely(! m_connectivity_check.m_active))
2807     {
2808       if (hb_check_timer.check(TcurrentTime)) {
2809         jam();
2810         checkHeartbeat(signal);
2811         hb_check_timer.reset(TcurrentTime);
2812       }
2813     }
2814     else
2815     {
2816       /* Connectivity check */
2817       if (m_connectivity_check.m_timer.check(TcurrentTime)) {
2818         jam();
2819         checkConnectivityTimeSignal(signal);
2820         m_connectivity_check.m_timer.reset(TcurrentTime);
2821       }
2822     }
2823   }
2824 
2825   if (interface_check_timer.check(TcurrentTime)) {
2826     jam();
2827     interface_check_timer.reset(TcurrentTime);
2828     checkStartInterface(signal, TcurrentTime);
2829   }
2830 
2831   if (hb_api_timer.check(TcurrentTime))
2832   {
2833     jam();
2834     hb_api_timer.reset(TcurrentTime);
2835     apiHbHandlingLab(signal, TcurrentTime);
2836   }
2837 
2838   //--------------------------------------------------
2839   // Resend this signal with 10 milliseconds delay.
2840   //--------------------------------------------------
2841   signal->theData[0] = ZTIMER_HANDLING;
2842   signal->theData[1] = Uint32(TcurrentTime.getUint64() >> 32);
2843   signal->theData[2] = Uint32(TcurrentTime.getUint64());
2844   sendSignalWithDelay(QMGR_REF, GSN_CONTINUEB, signal, 10, 3);
2845   return;
2846 }//Qmgr::timerHandlingLab()
2847 
2848 /*---------------------------------------------------------------------------*/
2849 /*       THIS MODULE HANDLES THE SENDING AND RECEIVING OF HEARTBEATS.        */
2850 /*---------------------------------------------------------------------------*/
sendHeartbeat(Signal * signal)2851 void Qmgr::sendHeartbeat(Signal* signal)
2852 {
2853   NodeRecPtr localNodePtr;
2854   localNodePtr.i = cneighbourh;
2855   if (localNodePtr.i == ZNIL) {
2856     jam();
2857     /**---------------------------------------------------------------------
2858      * THERE ARE NO NEIGHBOURS. THIS IS POSSIBLE IF WE ARE THE ONLY NODE IN
2859      * THE CLUSTER.IN THIS CASE WE DO NOT NEED TO SEND ANY HEARTBEAT SIGNALS.
2860      *-----------------------------------------------------------------------*/
2861     return;
2862   }//if
2863   ptrCheckGuard(localNodePtr, MAX_NDB_NODES, nodeRec);
2864   signal->theData[0] = getOwnNodeId();
2865 
2866   sendSignal(localNodePtr.p->blockRef, GSN_CM_HEARTBEAT, signal, 1, JBA);
2867 #ifdef VM_TRACE
2868   signal->theData[0] = NDB_LE_SentHeartbeat;
2869   signal->theData[1] = localNodePtr.i;
2870   sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB);
2871 #endif
2872 }//Qmgr::sendHeartbeat()
2873 
checkHeartbeat(Signal * signal)2874 void Qmgr::checkHeartbeat(Signal* signal)
2875 {
2876   NodeRecPtr nodePtr;
2877 
2878   nodePtr.i = cneighbourl;
2879   if (nodePtr.i == ZNIL) {
2880     jam();
2881     /**---------------------------------------------------------------------
2882      * THERE ARE NO NEIGHBOURS. THIS IS POSSIBLE IF WE ARE THE ONLY NODE IN
2883      * THE CLUSTER. IN THIS CASE WE DO NOT NEED TO CHECK ANY HEARTBEATS.
2884      *-----------------------------------------------------------------------*/
2885     return;
2886   }//if
2887   ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
2888 
2889   set_hb_count(nodePtr.i)++;
2890   ndbrequire(nodePtr.p->phase == ZRUNNING);
2891   ndbrequire(getNodeInfo(nodePtr.i).m_type == NodeInfo::DB);
2892 
2893   if (get_hb_count(nodePtr.i) > 2)
2894   {
2895     signal->theData[0] = NDB_LE_MissedHeartbeat;
2896     signal->theData[1] = nodePtr.i;
2897     signal->theData[2] = get_hb_count(nodePtr.i) - 1;
2898     sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 3, JBB);
2899   }
2900 
2901   if (get_hb_count(nodePtr.i) > 4)
2902   {
2903     jam();
2904     if (m_connectivity_check.getEnabled())
2905     {
2906       jam();
2907       /* Start connectivity check, indicating the cause */
2908       startConnectivityCheck(signal, FailRep::ZHEARTBEAT_FAILURE, nodePtr.i);
2909       return;
2910     }
2911     else
2912     {
2913       /**----------------------------------------------------------------------
2914        * OUR LEFT NEIGHBOUR HAVE KEPT QUIET FOR THREE CONSECUTIVE HEARTBEAT
2915        * PERIODS. THUS WE DECLARE HIM DOWN.
2916        *----------------------------------------------------------------------*/
2917       signal->theData[0] = NDB_LE_DeadDueToHeartbeat;
2918       signal->theData[1] = nodePtr.i;
2919       sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB);
2920 
2921       failReportLab(signal, nodePtr.i, FailRep::ZHEARTBEAT_FAILURE, getOwnNodeId());
2922       return;
2923     }
2924   }//if
2925 }//Qmgr::checkHeartbeat()
2926 
apiHbHandlingLab(Signal * signal,NDB_TICKS now)2927 void Qmgr::apiHbHandlingLab(Signal* signal, NDB_TICKS now)
2928 {
2929   NodeRecPtr TnodePtr;
2930 
2931   jam();
2932   for (TnodePtr.i = 1; TnodePtr.i < MAX_NODES; TnodePtr.i++) {
2933     const Uint32 nodeId = TnodePtr.i;
2934     ptrAss(TnodePtr, nodeRec);
2935 
2936     const NodeInfo::NodeType type = getNodeInfo(nodeId).getType();
2937     if(type == NodeInfo::DB)
2938       continue;
2939 
2940     if(type == NodeInfo::INVALID)
2941       continue;
2942 
2943     if (c_connectedNodes.get(nodeId))
2944     {
2945       jamLine(nodeId);
2946       set_hb_count(TnodePtr.i)++;
2947 
2948       if (get_hb_count(TnodePtr.i) > 2)
2949       {
2950 	signal->theData[0] = NDB_LE_MissedHeartbeat;
2951 	signal->theData[1] = nodeId;
2952 	signal->theData[2] = get_hb_count(TnodePtr.i) - 1;
2953 	sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 3, JBB);
2954       }
2955 
2956       if (get_hb_count(TnodePtr.i) > 4)
2957       {
2958         jam();
2959 	/*------------------------------------------------------------------*/
2960 	/* THE API NODE HAS NOT SENT ANY HEARTBEAT FOR THREE SECONDS.
2961 	 * WE WILL DISCONNECT FROM IT NOW.
2962 	 *------------------------------------------------------------------*/
2963 	/*------------------------------------------------------------------*/
2964 	/* We call node_failed to release all connections for this api node */
2965 	/*------------------------------------------------------------------*/
2966 	signal->theData[0] = NDB_LE_DeadDueToHeartbeat;
2967 	signal->theData[1] = nodeId;
2968 	sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB);
2969 
2970         api_failed(signal, nodeId);
2971       }//if
2972     }//if
2973     else if (TnodePtr.p->phase == ZAPI_INACTIVE &&
2974              TnodePtr.p->m_secret != 0 &&
2975              NdbTick_Compare(now,TnodePtr.p->m_alloc_timeout) > 0)
2976     {
2977       jam();
2978       TnodePtr.p->m_secret = 0;
2979       warningEvent("Releasing node id allocation for node %u",
2980                    TnodePtr.i);
2981     }
2982   }//for
2983   return;
2984 }//Qmgr::apiHbHandlingLab()
2985 
checkStartInterface(Signal * signal,NDB_TICKS now)2986 void Qmgr::checkStartInterface(Signal* signal, NDB_TICKS now)
2987 {
2988   NodeRecPtr nodePtr;
2989   /*------------------------------------------------------------------------*/
2990   // This method is called once per second. After a disconnect we wait at
2991   // least three seconds before allowing new connects. We will also ensure
2992   // that handling of the failure is completed before we allow new connections.
2993   /*------------------------------------------------------------------------*/
2994   jam();
2995   for (nodePtr.i = 1; nodePtr.i < MAX_NODES; nodePtr.i++) {
2996     ptrAss(nodePtr, nodeRec);
2997     Uint32 type = getNodeInfo(nodePtr.i).m_type;
2998     if (nodePtr.p->phase == ZFAIL_CLOSING) {
2999       jamLine(nodePtr.i);
3000       set_hb_count(nodePtr.i)++;
3001       if (c_connectedNodes.get(nodePtr.i)){
3002         jam();
3003 	/*-------------------------------------------------------------------*/
3004 	// We need to ensure that the connection is not restored until it has
3005 	// been disconnected for at least three seconds.
3006 	/*-------------------------------------------------------------------*/
3007         set_hb_count(nodePtr.i) = 0;
3008       }//if
3009       if ((get_hb_count(nodePtr.i) > 3)
3010 	  && (nodePtr.p->failState == NORMAL)) {
3011 	/**------------------------------------------------------------------
3012 	 * WE HAVE DISCONNECTED THREE SECONDS AGO. WE ARE NOW READY TO
3013 	 * CONNECT AGAIN AND ACCEPT NEW REGISTRATIONS FROM THIS NODE.
3014 	 * WE WILL NOT ALLOW CONNECTIONS OF API NODES UNTIL API FAIL HANDLING
3015 	 * IS COMPLETE.
3016 	 *-------------------------------------------------------------------*/
3017         nodePtr.p->failState = NORMAL;
3018         nodePtr.p->m_secret = 0;
3019         switch(type){
3020         case NodeInfo::DB:
3021           jam();
3022           nodePtr.p->phase = ZINIT;
3023           break;
3024         case NodeInfo::MGM:
3025           jam();
3026           nodePtr.p->phase = ZAPI_INACTIVE;
3027           break;
3028         case NodeInfo::API:
3029           jam();
3030           if (c_allow_api_connect)
3031           {
3032             jam();
3033             nodePtr.p->phase = ZAPI_INACTIVE;
3034             break;
3035           }
3036           else
3037           {
3038             /**
3039              * Dont allow API node to connect before c_allow_api_connect
3040              */
3041             jam();
3042             set_hb_count(nodePtr.i) = 3;
3043             continue;
3044           }
3045         }
3046 
3047         set_hb_count(nodePtr.i) = 0;
3048         signal->theData[0] = 0;
3049         signal->theData[1] = nodePtr.i;
3050         sendSignal(TRPMAN_REF, GSN_OPEN_COMORD, signal, 2, JBB);
3051       }
3052       else
3053       {
3054         jam();
3055         if(((get_hb_count(nodePtr.i) + 1) % 60) == 0)
3056         {
3057           jam();
3058 	  char buf[256];
3059           if (getNodeInfo(nodePtr.i).m_type == NodeInfo::DB)
3060           {
3061             jam();
3062             BaseString::snprintf(buf, sizeof(buf),
3063                                  "Failure handling of node %d has not completed"
3064                                  " in %d min - state = %d",
3065                                  nodePtr.i,
3066                                  (get_hb_count(nodePtr.i)+1)/60,
3067                                  nodePtr.p->failState);
3068             warningEvent("%s", buf);
3069             if (((get_hb_count(nodePtr.i) + 1) % 300) == 0)
3070             {
3071               jam();
3072               /**
3073                * Also dump DIH nf-state
3074                */
3075               signal->theData[0] = DumpStateOrd::DihTcSumaNodeFailCompleted;
3076               signal->theData[1] = nodePtr.i;
3077               sendSignal(DBDIH_REF, GSN_DUMP_STATE_ORD, signal, 2, JBB);
3078             }
3079           }
3080           else
3081           {
3082             jam();
3083             BaseString::snprintf(buf, sizeof(buf),
3084                                  "Failure handling of api %u has not completed"
3085                                  " in %d min - state = %d",
3086                                  nodePtr.i,
3087                                  (get_hb_count(nodePtr.i)+1)/60,
3088                                  nodePtr.p->failState);
3089             warningEvent("%s", buf);
3090             if (nodePtr.p->failState == WAITING_FOR_API_FAILCONF)
3091             {
3092               jam();
3093               compile_time_assert(NDB_ARRAY_SIZE(nodePtr.p->m_failconf_blocks) == 5);
3094               BaseString::snprintf(buf, sizeof(buf),
3095                                    "  Waiting for blocks: %u %u %u %u %u",
3096                                    nodePtr.p->m_failconf_blocks[0],
3097                                    nodePtr.p->m_failconf_blocks[1],
3098                                    nodePtr.p->m_failconf_blocks[2],
3099                                    nodePtr.p->m_failconf_blocks[3],
3100                                    nodePtr.p->m_failconf_blocks[4]);
3101               warningEvent("%s", buf);
3102             }
3103           }
3104 	}
3105       }
3106     }
3107     else if (type == NodeInfo::DB && nodePtr.p->phase == ZINIT &&
3108              nodePtr.p->m_secret != 0 &&
3109              NdbTick_Compare(now,nodePtr.p->m_alloc_timeout) > 0)
3110     {
3111       jam();
3112       nodePtr.p->m_secret = 0;
3113       warningEvent("Releasing node id allocation for node %u",
3114                    nodePtr.i);
3115     }
3116   }//for
3117   return;
3118 }//Qmgr::checkStartInterface()
3119 
3120 /**-------------------------------------------------------------------------
3121  * This method is called when a DISCONNECT_REP signal arrived which means that
3122  * the API node is gone and we want to release resources in TC/DICT blocks.
3123  *---------------------------------------------------------------------------*/
sendApiFailReq(Signal * signal,Uint16 failedNodeNo,bool sumaOnly)3124 void Qmgr::sendApiFailReq(Signal* signal, Uint16 failedNodeNo, bool sumaOnly)
3125 {
3126   jamEntry();
3127   signal->theData[0] = failedNodeNo;
3128   signal->theData[1] = QMGR_REF;
3129 
3130   /* We route the ApiFailReq signals via CMVMI
3131    * This is done to ensure that they are received after
3132    * any pending signals from the failed Api node when
3133    * running ndbmtd, as these signals would be enqueued from
3134    * the thread running CMVMI
3135    */
3136   Uint32 routedSignalSectionI = RNIL;
3137   ndbrequire(appendToSection(routedSignalSectionI,
3138                              &signal->theData[0],
3139                              2));
3140   SectionHandle handle(this, routedSignalSectionI);
3141 
3142   /* RouteOrd data */
3143   RouteOrd* routeOrd = (RouteOrd*) &signal->theData[0];
3144   routeOrd->srcRef = reference();
3145   routeOrd->gsn = GSN_API_FAILREQ;
3146   routeOrd->from = failedNodeNo;
3147 
3148   NodeRecPtr failedNodePtr;
3149   failedNodePtr.i = failedNodeNo;
3150   ptrCheckGuard(failedNodePtr, MAX_NODES, nodeRec);
3151   failedNodePtr.p->failState = WAITING_FOR_API_FAILCONF;
3152 
3153 
3154   /* Send ROUTE_ORD signals to CMVMI via JBA
3155    * CMVMI will then immediately send the API_FAILREQ
3156    * signals to the destination block(s) using JBB
3157    * These API_FAILREQ signals will be sent *after*
3158    * any JBB signals enqueued from the failed API
3159    * by the CMVMI thread.
3160    */
3161   if (!sumaOnly)
3162   {
3163     jam();
3164     add_failconf_block(failedNodePtr, DBTC);
3165     routeOrd->dstRef = DBTC_REF;
3166     sendSignalNoRelease(TRPMAN_REF, GSN_ROUTE_ORD, signal,
3167                         RouteOrd::SignalLength,
3168                         JBA, &handle);
3169 
3170     add_failconf_block(failedNodePtr, DBDICT);
3171     routeOrd->dstRef = DBDICT_REF;
3172     sendSignalNoRelease(TRPMAN_REF, GSN_ROUTE_ORD, signal,
3173                         RouteOrd::SignalLength,
3174                         JBA, &handle);
3175 
3176     add_failconf_block(failedNodePtr, DBSPJ);
3177     routeOrd->dstRef = DBSPJ_REF;
3178     sendSignalNoRelease(TRPMAN_REF, GSN_ROUTE_ORD, signal,
3179                         RouteOrd::SignalLength,
3180                         JBA, &handle);
3181   }
3182 
3183   /* Suma always notified */
3184   add_failconf_block(failedNodePtr, SUMA);
3185   routeOrd->dstRef = SUMA_REF;
3186   sendSignal(TRPMAN_REF, GSN_ROUTE_ORD, signal,
3187              RouteOrd::SignalLength,
3188              JBA, &handle);
3189 }//Qmgr::sendApiFailReq()
3190 
execAPI_FAILREQ(Signal * signal)3191 void Qmgr::execAPI_FAILREQ(Signal* signal)
3192 {
3193   jamEntry();
3194   NodeRecPtr failedNodePtr;
3195   failedNodePtr.i = signal->theData[0];
3196   // signal->theData[1] == QMGR_REF
3197   ptrCheckGuard(failedNodePtr, MAX_NODES, nodeRec);
3198 
3199   ndbrequire(getNodeInfo(failedNodePtr.i).getType() != NodeInfo::DB);
3200 
3201   api_failed(signal, signal->theData[0]);
3202 }
3203 
execAPI_FAILCONF(Signal * signal)3204 void Qmgr::execAPI_FAILCONF(Signal* signal)
3205 {
3206   NodeRecPtr failedNodePtr;
3207 
3208   jamEntry();
3209   failedNodePtr.i = signal->theData[0];
3210   ptrCheckGuard(failedNodePtr, MAX_NODES, nodeRec);
3211 
3212   Uint32 block = refToMain(signal->theData[1]);
3213   if (failedNodePtr.p->failState != WAITING_FOR_API_FAILCONF ||
3214       !remove_failconf_block(failedNodePtr, block))
3215   {
3216     jam();
3217     ndbout << "execAPI_FAILCONF from " << block
3218            << " failedNodePtr.p->failState = "
3219 	   << (Uint32)(failedNodePtr.p->failState)
3220            << " blocks: ";
3221     for (Uint32 i = 0;i<NDB_ARRAY_SIZE(failedNodePtr.p->m_failconf_blocks);i++)
3222     {
3223       printf("%u ", failedNodePtr.p->m_failconf_blocks[i]);
3224     }
3225     ndbout << endl;
3226     systemErrorLab(signal, __LINE__);
3227   }//if
3228 
3229   if (is_empty_failconf_block(failedNodePtr))
3230   {
3231     jam();
3232     /**
3233      * When we set this state, connection will later be opened
3234      *   in checkStartInterface
3235      */
3236     failedNodePtr.p->failState = NORMAL;
3237 
3238     /**
3239      * Reset m_version only after all blocks has responded with API_FAILCONF
3240      *   so that no block risks reading 0 as node-version
3241      */
3242     setNodeInfo(failedNodePtr.i).m_version = 0;
3243     recompute_version_info(getNodeInfo(failedNodePtr.i).m_type);
3244   }
3245   return;
3246 }//Qmgr::execAPI_FAILCONF()
3247 
3248 void
add_failconf_block(NodeRecPtr nodePtr,Uint32 block)3249 Qmgr::add_failconf_block(NodeRecPtr nodePtr, Uint32 block)
3250 {
3251   // Check that it does not already exists!!
3252   Uint32 pos = 0;
3253   for (; pos < NDB_ARRAY_SIZE(nodePtr.p->m_failconf_blocks); pos++)
3254   {
3255     jam();
3256     if (nodePtr.p->m_failconf_blocks[pos] == 0)
3257     {
3258       jam();
3259       break;
3260     }
3261     else if (nodePtr.p->m_failconf_blocks[pos] == block)
3262     {
3263       jam();
3264       break;
3265     }
3266   }
3267 
3268   ndbrequire(pos != NDB_ARRAY_SIZE(nodePtr.p->m_failconf_blocks));
3269   ndbassert(nodePtr.p->m_failconf_blocks[pos] != block);
3270   if (nodePtr.p->m_failconf_blocks[pos] == block)
3271   {
3272     jam();
3273     /**
3274      * Already in list!!
3275      */
3276 #ifdef ERROR_INSERT
3277     ndbrequire(false);
3278 #endif
3279     return;
3280   }
3281   ndbrequire(nodePtr.p->m_failconf_blocks[pos] == 0);
3282   nodePtr.p->m_failconf_blocks[pos] = block;
3283 }
3284 
3285 bool
remove_failconf_block(NodeRecPtr nodePtr,Uint32 block)3286 Qmgr::remove_failconf_block(NodeRecPtr nodePtr, Uint32 block)
3287 {
3288   // Check that it does exists!!
3289   Uint32 pos = 0;
3290   for (; pos < NDB_ARRAY_SIZE(nodePtr.p->m_failconf_blocks); pos++)
3291   {
3292     jam();
3293     if (nodePtr.p->m_failconf_blocks[pos] == 0)
3294     {
3295       jam();
3296       break;
3297     }
3298     else if (nodePtr.p->m_failconf_blocks[pos] == block)
3299     {
3300       jam();
3301       break;
3302     }
3303   }
3304 
3305   if (pos == NDB_ARRAY_SIZE(nodePtr.p->m_failconf_blocks) ||
3306       nodePtr.p->m_failconf_blocks[pos] != block)
3307   {
3308     jam();
3309     /**
3310      * Not found!!
3311      */
3312     return false;
3313   }
3314 
3315   nodePtr.p->m_failconf_blocks[pos] = 0;
3316   for (pos++; pos < NDB_ARRAY_SIZE(nodePtr.p->m_failconf_blocks); pos++)
3317   {
3318     jam();
3319     nodePtr.p->m_failconf_blocks[pos - 1] = nodePtr.p->m_failconf_blocks[pos];
3320   }
3321 
3322   return true;
3323 }
3324 
3325 bool
is_empty_failconf_block(NodeRecPtr nodePtr) const3326 Qmgr::is_empty_failconf_block(NodeRecPtr nodePtr) const
3327 {
3328   return nodePtr.p->m_failconf_blocks[0] == 0;
3329 }
3330 
execNDB_FAILCONF(Signal * signal)3331 void Qmgr::execNDB_FAILCONF(Signal* signal)
3332 {
3333   NodeRecPtr failedNodePtr;
3334   NodeRecPtr nodePtr;
3335 
3336   jamEntry();
3337   failedNodePtr.i = signal->theData[0];
3338 
3339   if (ERROR_INSERTED(930))
3340   {
3341     CLEAR_ERROR_INSERT_VALUE;
3342     infoEvent("Discarding NDB_FAILCONF for %u", failedNodePtr.i);
3343     return;
3344   }
3345 
3346   ptrCheckGuard(failedNodePtr, MAX_NDB_NODES, nodeRec);
3347   if (failedNodePtr.p->failState == WAITING_FOR_NDB_FAILCONF)
3348   {
3349     g_eventLogger->info("Node %u has completed node fail handling",
3350                         failedNodePtr.i);
3351     failedNodePtr.p->failState = NORMAL;
3352   }
3353   else
3354   {
3355     jam();
3356 
3357     char buf[100];
3358     BaseString::snprintf(buf, 100,
3359 			 "Received NDB_FAILCONF for node %u with state: %d %d",
3360 			 failedNodePtr.i,
3361 			 failedNodePtr.p->phase,
3362 			 failedNodePtr.p->failState);
3363     progError(__LINE__, 0, buf);
3364     systemErrorLab(signal, __LINE__);
3365   }//if
3366 
3367   if (cpresident == getOwnNodeId())
3368   {
3369     jam();
3370 
3371     CRASH_INSERTION(936);
3372   }
3373 
3374   /**
3375    * Reset node version only after all blocks has handled the failure
3376    *   so that no block risks reading 0 as node version
3377    */
3378   setNodeInfo(failedNodePtr.i).m_version = 0;
3379   recompute_version_info(NodeInfo::DB);
3380 
3381   /**
3382    * Prepare a NFCompleteRep and send to all connected API's
3383    * They can then abort all transaction waiting for response from
3384    * the failed node
3385    *
3386    * NOTE: This is sent from all nodes, as otherwise we would need
3387    *       take-over if cpresident dies befor sending this
3388    */
3389   NFCompleteRep * const nfComp = (NFCompleteRep *)&signal->theData[0];
3390   nfComp->blockNo = QMGR_REF;
3391   nfComp->nodeId = getOwnNodeId();
3392   nfComp->failedNodeId = failedNodePtr.i;
3393 
3394   jam();
3395   for (nodePtr.i = 1; nodePtr.i < MAX_NODES; nodePtr.i++)
3396   {
3397     ptrAss(nodePtr, nodeRec);
3398     if (nodePtr.p->phase == ZAPI_ACTIVE){
3399       jamLine(nodePtr.i);
3400       sendSignal(nodePtr.p->blockRef, GSN_NF_COMPLETEREP, signal,
3401                  NFCompleteRep::SignalLength, JBB);
3402     }//if
3403   }//for
3404   return;
3405 }//Qmgr::execNDB_FAILCONF()
3406 
3407 void
execNF_COMPLETEREP(Signal * signal)3408 Qmgr::execNF_COMPLETEREP(Signal* signal)
3409 {
3410   jamEntry();
3411   NFCompleteRep rep = *(NFCompleteRep*)signal->getDataPtr();
3412   if (rep.blockNo != DBTC)
3413   {
3414     jam();
3415     ndbassert(false);
3416     return;
3417   }
3418 
3419   /**
3420    * This is a simple way of having ndbapi to get
3421    * earlier information that transactions can be aborted
3422    */
3423   signal->theData[0] = rep.failedNodeId;
3424   NodeRecPtr nodePtr;
3425   for (nodePtr.i = 1; nodePtr.i < MAX_NODES; nodePtr.i++)
3426   {
3427     ptrAss(nodePtr, nodeRec);
3428     if (nodePtr.p->phase == ZAPI_ACTIVE &&
3429         ndb_takeovertc(getNodeInfo(nodePtr.i).m_version))
3430     {
3431       jamLine(nodePtr.i);
3432       sendSignal(nodePtr.p->blockRef, GSN_TAKE_OVERTCCONF, signal,
3433                  NFCompleteRep::SignalLength, JBB);
3434     }//if
3435   }//for
3436   return;
3437 }
3438 
3439 /*******************************/
3440 /* DISCONNECT_REP             */
3441 /*******************************/
3442 const char *lookupConnectionError(Uint32 err);
3443 
execDISCONNECT_REP(Signal * signal)3444 void Qmgr::execDISCONNECT_REP(Signal* signal)
3445 {
3446   jamEntry();
3447   const DisconnectRep * const rep = (DisconnectRep *)&signal->theData[0];
3448   const Uint32 nodeId = rep->nodeId;
3449   const Uint32 err = rep->err;
3450   const NodeInfo nodeInfo = getNodeInfo(nodeId);
3451   c_connectedNodes.clear(nodeId);
3452 
3453   if (nodeInfo.getType() == NodeInfo::DB)
3454   {
3455     c_readnodes_nodes.clear(nodeId);
3456 
3457     if (ERROR_INSERTED(942))
3458     {
3459       g_eventLogger->info("DISCONNECT_REP received from data node %u - crash insertion",
3460                           nodeId);
3461       CRASH_INSERTION(942);
3462     }
3463   }
3464 
3465   NodeRecPtr nodePtr;
3466   nodePtr.i = getOwnNodeId();
3467   ptrCheckGuard(nodePtr, MAX_NODES, nodeRec);
3468 
3469   char buf[100];
3470   if (nodeInfo.getType() == NodeInfo::DB &&
3471       getNodeState().startLevel < NodeState::SL_STARTED)
3472   {
3473     jam();
3474     CRASH_INSERTION(932);
3475     CRASH_INSERTION(938);
3476     BaseString::snprintf(buf, 100, "Node %u disconnected", nodeId);
3477     progError(__LINE__, NDBD_EXIT_SR_OTHERNODEFAILED, buf);
3478     ndbrequire(false);
3479   }
3480 
3481   if (getNodeInfo(nodeId).getType() != NodeInfo::DB)
3482   {
3483     jam();
3484     api_failed(signal, nodeId);
3485     return;
3486   }
3487 
3488   switch(nodePtr.p->phase){
3489   case ZRUNNING:
3490     jam();
3491     break;
3492   case ZINIT:
3493     ndbrequire(false);
3494   case ZSTARTING:
3495     progError(__LINE__, NDBD_EXIT_CONNECTION_SETUP_FAILED,
3496 	      lookupConnectionError(err));
3497     ndbrequire(false);
3498   case ZPREPARE_FAIL:
3499     ndbrequire(false);
3500   case ZFAIL_CLOSING:
3501     ndbrequire(false);
3502   case ZAPI_ACTIVATION_ONGOING:
3503     ndbrequire(false);
3504   case ZAPI_ACTIVE:
3505     ndbrequire(false);
3506   case ZAPI_INACTIVE:
3507   {
3508     BaseString::snprintf(buf, 100, "Node %u disconnected", nodeId);
3509     progError(__LINE__, NDBD_EXIT_SR_OTHERNODEFAILED, buf);
3510     ndbrequire(false);
3511   }
3512   }
3513 
3514   if (ERROR_INSERTED(939) && ERROR_INSERT_EXTRA == nodeId)
3515   {
3516     ndbout_c("Ignoring DISCONNECT_REP for node %u that was force disconnected",
3517              nodeId);
3518     CLEAR_ERROR_INSERT_VALUE;
3519     return;
3520   }
3521 
3522   node_failed(signal, nodeId);
3523 }//DISCONNECT_REP
3524 
node_failed(Signal * signal,Uint16 aFailedNode)3525 void Qmgr::node_failed(Signal* signal, Uint16 aFailedNode)
3526 {
3527   NodeRecPtr failedNodePtr;
3528   /**------------------------------------------------------------------------
3529    *   A COMMUNICATION LINK HAS BEEN DISCONNECTED. WE MUST TAKE SOME ACTION
3530    *   DUE TO THIS.
3531    *-----------------------------------------------------------------------*/
3532   failedNodePtr.i = aFailedNode;
3533   ptrCheckGuard(failedNodePtr, MAX_NODES, nodeRec);
3534   failedNodePtr.p->m_secret = 0; // Not yet Uint64(rand()) << 32 + rand();
3535 
3536   ndbrequire(getNodeInfo(failedNodePtr.i).getType() == NodeInfo::DB);
3537 
3538   /**---------------------------------------------------------------------
3539    *   THE OTHER NODE IS AN NDB NODE, WE HANDLE IT AS IF A HEARTBEAT
3540    *   FAILURE WAS DISCOVERED.
3541    *---------------------------------------------------------------------*/
3542   switch(failedNodePtr.p->phase){
3543   case ZRUNNING:
3544     jam();
3545     failReportLab(signal, aFailedNode, FailRep::ZLINK_FAILURE, getOwnNodeId());
3546     return;
3547   case ZSTARTING:
3548     /**
3549      * bug#42422
3550      *   Force "real" failure handling
3551      */
3552     jam();
3553     failedNodePtr.p->phase = ZRUNNING;
3554     failReportLab(signal, aFailedNode, FailRep::ZLINK_FAILURE, getOwnNodeId());
3555     return;
3556   case ZFAIL_CLOSING:  // Close already in progress
3557     jam();
3558     return;
3559   case ZPREPARE_FAIL:  // PREP_FAIL already sent CLOSE_COMREQ
3560     jam();
3561     return;
3562   case ZINIT:
3563   {
3564     jam();
3565     /*---------------------------------------------------------------------*/
3566     // The other node is still not in the cluster but disconnected.
3567     // We must restart communication in three seconds.
3568     /*---------------------------------------------------------------------*/
3569     failedNodePtr.p->failState = NORMAL;
3570     failedNodePtr.p->phase = ZFAIL_CLOSING;
3571     set_hb_count(failedNodePtr.i) = 0;
3572 
3573     CloseComReqConf * const closeCom =
3574       (CloseComReqConf *)&signal->theData[0];
3575 
3576     closeCom->xxxBlockRef = reference();
3577     closeCom->requestType = CloseComReqConf::RT_NO_REPLY;
3578     closeCom->failNo      = 0;
3579     closeCom->noOfNodes   = 1;
3580     NodeBitmask::clear(closeCom->theNodes);
3581     NodeBitmask::set(closeCom->theNodes, failedNodePtr.i);
3582     sendSignal(TRPMAN_REF, GSN_CLOSE_COMREQ, signal,
3583                CloseComReqConf::SignalLength, JBB);
3584     return;
3585   }
3586   case ZAPI_ACTIVE:     // Unexpected states handled in ::api_failed()
3587     ndbrequire(false);
3588   case ZAPI_INACTIVE:
3589     ndbrequire(false);
3590   case ZAPI_ACTIVATION_ONGOING:
3591     ndbrequire(false);
3592   default:
3593     ndbrequire(false);  // Unhandled state
3594   }//switch
3595 
3596   return;
3597 }
3598 
3599 void
execUPGRADE_PROTOCOL_ORD(Signal * signal)3600 Qmgr::execUPGRADE_PROTOCOL_ORD(Signal* signal)
3601 {
3602   const UpgradeProtocolOrd* ord = (UpgradeProtocolOrd*)signal->getDataPtr();
3603   switch(ord->type){
3604   case UpgradeProtocolOrd::UPO_ENABLE_MICRO_GCP:
3605     jam();
3606     m_micro_gcp_enabled = true;
3607     return;
3608   }
3609 }
3610 
3611 void
api_failed(Signal * signal,Uint32 nodeId)3612 Qmgr::api_failed(Signal* signal, Uint32 nodeId)
3613 {
3614   NodeRecPtr failedNodePtr;
3615   /**------------------------------------------------------------------------
3616    *   A COMMUNICATION LINK HAS BEEN DISCONNECTED. WE MUST TAKE SOME ACTION
3617    *   DUE TO THIS.
3618    *-----------------------------------------------------------------------*/
3619   failedNodePtr.i = nodeId;
3620   ptrCheckGuard(failedNodePtr, MAX_NODES, nodeRec);
3621   failedNodePtr.p->m_secret = 0; // Not yet Uint64(rand()) << 32 + rand();
3622 
3623   if (failedNodePtr.p->phase == ZFAIL_CLOSING)
3624   {
3625     /**
3626      * Failure handling already in progress
3627      */
3628     jam();
3629     return;
3630   }
3631 
3632   ndbrequire(failedNodePtr.p->failState == NORMAL);
3633 
3634   /* Send API_FAILREQ to peer QMGR blocks to allow them to disconnect
3635    * quickly
3636    * Local application blocks get API_FAILREQ once all pending signals
3637    * from the failed API have been processed.
3638    */
3639   signal->theData[0] = failedNodePtr.i;
3640   signal->theData[1] = QMGR_REF;
3641   NodeReceiverGroup rg(QMGR, c_clusterNodes);
3642   sendSignal(rg, GSN_API_FAILREQ, signal, 2, JBA);
3643 
3644   /* Now ask CMVMI to disconnect the node */
3645   FailState initialState = (failedNodePtr.p->phase == ZAPI_ACTIVE) ?
3646     WAITING_FOR_CLOSECOMCONF_ACTIVE :
3647     WAITING_FOR_CLOSECOMCONF_NOTACTIVE;
3648 
3649   failedNodePtr.p->failState = initialState;
3650   failedNodePtr.p->phase = ZFAIL_CLOSING;
3651   set_hb_count(failedNodePtr.i) = 0;
3652 
3653   CloseComReqConf * const closeCom = (CloseComReqConf *)&signal->theData[0];
3654   closeCom->xxxBlockRef = reference();
3655   closeCom->requestType = CloseComReqConf::RT_API_FAILURE;
3656   closeCom->failNo      = 0;
3657   closeCom->noOfNodes   = 1;
3658   NodeBitmask::clear(closeCom->theNodes);
3659   NodeBitmask::set(closeCom->theNodes, failedNodePtr.i);
3660   sendSignal(TRPMAN_REF, GSN_CLOSE_COMREQ, signal,
3661              CloseComReqConf::SignalLength, JBB);
3662 } // api_failed
3663 
3664 /**--------------------------------------------------------------------------
3665  * AN API NODE IS REGISTERING. IF FOR THE FIRST TIME WE WILL ENABLE
3666  * COMMUNICATION WITH ALL NDB BLOCKS.
3667  *---------------------------------------------------------------------------*/
3668 /*******************************/
3669 /* API_REGREQ                 */
3670 /*******************************/
execAPI_REGREQ(Signal * signal)3671 void Qmgr::execAPI_REGREQ(Signal* signal)
3672 {
3673   jamEntry();
3674 
3675   ApiRegReq* req = (ApiRegReq*)signal->getDataPtr();
3676   const Uint32 version = req->version;
3677   const BlockReference ref = req->ref;
3678 
3679   Uint32 mysql_version = req->mysql_version;
3680   if (version < NDBD_SPLIT_VERSION)
3681     mysql_version = 0;
3682 
3683   NodeRecPtr apiNodePtr;
3684   apiNodePtr.i = refToNode(ref);
3685   ptrCheckGuard(apiNodePtr, MAX_NODES, nodeRec);
3686 
3687   if (apiNodePtr.p->phase == ZFAIL_CLOSING)
3688   {
3689     jam();
3690     /**
3691      * This node is pending CLOSE_COM_CONF
3692      *   ignore API_REGREQ
3693      */
3694     return;
3695   }
3696 
3697   if (!c_connectedNodes.get(apiNodePtr.i))
3698   {
3699     jam();
3700     /**
3701      * We have not yet heard execCONNECT_REP
3702      *   so ignore this until we do...
3703      */
3704     return;
3705   }
3706 
3707 #if 0
3708   ndbout_c("Qmgr::execAPI_REGREQ: Recd API_REGREQ (NodeId=%d)", apiNodePtr.i);
3709 #endif
3710 
3711   bool compatability_check;
3712   const char * extra = 0;
3713   NodeInfo::NodeType type= getNodeInfo(apiNodePtr.i).getType();
3714   switch(type){
3715   case NodeInfo::API:
3716     if (m_micro_gcp_enabled && !ndb_check_micro_gcp(version))
3717     {
3718       jam();
3719       compatability_check = false;
3720       extra = ": micro gcp enabled";
3721     }
3722     else
3723     {
3724       jam();
3725       compatability_check = ndbCompatible_ndb_api(NDB_VERSION, version);
3726     }
3727     break;
3728   case NodeInfo::MGM:
3729     compatability_check = ndbCompatible_ndb_mgmt(NDB_VERSION, version);
3730     break;
3731   case NodeInfo::DB:
3732   case NodeInfo::INVALID:
3733   default:
3734     sendApiRegRef(signal, ref, ApiRegRef::WrongType);
3735     infoEvent("Invalid connection attempt with type %d", type);
3736     return;
3737   }
3738 
3739   if (!compatability_check) {
3740     jam();
3741     char buf[NDB_VERSION_STRING_BUF_SZ];
3742     infoEvent("Connection attempt from %s id=%d with %s "
3743 	      "incompatible with %s%s",
3744 	      type == NodeInfo::API ? "api or mysqld" : "management server",
3745 	      apiNodePtr.i,
3746 	      ndbGetVersionString(version, mysql_version, 0,
3747                                   buf,
3748                                   sizeof(buf)),
3749 	      NDB_VERSION_STRING,
3750               extra ? extra : "");
3751     apiNodePtr.p->phase = ZAPI_INACTIVE;
3752     sendApiRegRef(signal, ref, ApiRegRef::UnsupportedVersion);
3753     return;
3754   }
3755 
3756   setNodeInfo(apiNodePtr.i).m_version = version;
3757   setNodeInfo(apiNodePtr.i).m_mysql_version = mysql_version;
3758   set_hb_count(apiNodePtr.i) = 0;
3759 
3760   NodeState state = getNodeState();
3761   if (apiNodePtr.p->phase == ZAPI_INACTIVE)
3762   {
3763     apiNodePtr.p->blockRef = ref;
3764     if ((state.startLevel == NodeState::SL_STARTED ||
3765          state.getSingleUserMode() ||
3766          (state.startLevel == NodeState::SL_STARTING &&
3767           state.starting.startPhase >= 8)))
3768     {
3769       jam();
3770       /**----------------------------------------------------------------------
3771        * THE API NODE IS REGISTERING. WE WILL ACCEPT IT BY CHANGING STATE AND
3772        * SENDING A CONFIRM. We set state to ZAPI_ACTIVATION_ONGOING to ensure
3773        * that we don't send unsolicited API_REGCONF or other things before we
3774        * actually fully enabled the node for communicating with the new API
3775        * node. It also avoids sending NODE_FAILREP, NF_COMPLETEREP and
3776        * TAKE_OVERTCCONF even before the API_REGCONF is sent. We will get a
3777        * fresh state of the nodes in API_REGCONF which is sufficient, no need
3778        * to update the API before the API got the initial state.
3779        *----------------------------------------------------------------------*/
3780       apiNodePtr.p->phase = ZAPI_ACTIVATION_ONGOING;
3781       EnableComReq *enableComReq = (EnableComReq *)signal->getDataPtrSend();
3782       enableComReq->m_senderRef = reference();
3783       enableComReq->m_senderData = ENABLE_COM_API_REGREQ;
3784       NodeBitmask::clear(enableComReq->m_nodeIds);
3785       NodeBitmask::set(enableComReq->m_nodeIds, apiNodePtr.i);
3786       sendSignal(TRPMAN_REF, GSN_ENABLE_COMREQ, signal,
3787                  EnableComReq::SignalLength, JBB);
3788       return;
3789     }
3790     /**
3791      * The node is in some kind of STOPPING state, so we send API_REGCONF even
3792      * though we've not enabled communication, if the API tries to send
3793      * anything to us anyways it will simply be ignored since only QMGR will
3794      * receive signals in this state. The API receives the node states, so it
3795      * should be able to discover what nodes that it is able to actually use.
3796      */
3797   }
3798 
3799   sendApiRegConf(signal, apiNodePtr.i);
3800 }//Qmgr::execAPI_REGREQ()
3801 
3802 void
handleEnableComApiRegreq(Signal * signal,Uint32 node)3803 Qmgr::handleEnableComApiRegreq(Signal *signal, Uint32 node)
3804 {
3805   NodeRecPtr apiNodePtr;
3806   NodeInfo::NodeType type = getNodeInfo(node).getType();
3807   Uint32 version = getNodeInfo(node).m_version;
3808   recompute_version_info(type, version);
3809 
3810   signal->theData[0] = node;
3811   signal->theData[1] = version;
3812   NodeReceiverGroup rg(QMGR, c_clusterNodes);
3813   rg.m_nodes.clear(getOwnNodeId());
3814   sendVersionedDb(rg, GSN_NODE_VERSION_REP, signal, 2, JBB,
3815                   NDBD_NODE_VERSION_REP);
3816 
3817   signal->theData[0] = node;
3818   EXECUTE_DIRECT(NDBCNTR, GSN_API_START_REP, signal, 1);
3819 
3820   apiNodePtr.i = node;
3821   ptrCheckGuard(apiNodePtr, MAX_NODES, nodeRec);
3822   if (apiNodePtr.p->phase == ZAPI_ACTIVATION_ONGOING)
3823   {
3824     /**
3825      * Now we're about to send API_REGCONF to an API node, this means
3826      * that this node can immediately start communicating to TC, SUMA
3827      * and so forth. The state also indicates that the API is ready
3828      * to receive an unsolicited API_REGCONF when the node goes to
3829      * state SL_STARTED.
3830      */
3831     jam();
3832     apiNodePtr.p->phase = ZAPI_ACTIVE;
3833     sendApiRegConf(signal, node);
3834   }
3835   jam();
3836   /**
3837    * Node is no longer in state ZAPI_ACTIVATION_ONGOING, the node must
3838    * have failed, we can ignore sending API_REGCONF to a failed node.
3839    */
3840 }
3841 
3842 void
execNODE_STARTED_REP(Signal * signal)3843 Qmgr::execNODE_STARTED_REP(Signal *signal)
3844 {
3845   NodeRecPtr apiNodePtr;
3846   for (apiNodePtr.i = 1;
3847        apiNodePtr.i < MAX_NODES;
3848        apiNodePtr.i++)
3849   {
3850     ptrCheckGuard(apiNodePtr, MAX_NODES, nodeRec);
3851     NodeInfo::NodeType type = getNodeInfo(apiNodePtr.i).getType();
3852     if (type != NodeInfo::API)
3853     {
3854       /* Not an API node */
3855       continue;
3856     }
3857     if (!c_connectedNodes.get(apiNodePtr.i))
3858     {
3859       /* API not connected */
3860       continue;
3861     }
3862     if (apiNodePtr.p->phase != ZAPI_ACTIVE)
3863     {
3864       /**
3865        * The phase variable can be in three states for the API nodes, it can
3866        * be ZAPI_INACTIVE for an API node that hasn't connected, it can be
3867        * ZFAIL_CLOSING for an API node that recently failed and is performing
3868        * failure handling. It can be in the state ZAPI_ACTIVE which it enters
3869        * upon us receiving an API_REGREQ from the API. So at this point the
3870        * API is also able to receive an unsolicited API_REGCONF message.
3871        */
3872       continue;
3873     }
3874     /**
3875      * We will send an unsolicited API_REGCONF to the API node, this makes the
3876      * API node aware of our existence much faster (without it can wait up to
3877      * the lenght of a heartbeat DB-API period. For rolling restarts and other
3878      * similar actions this can easily cause the API to not have any usable
3879      * DB connections at all. This unsolicited response minimises this window
3880      * of unavailability to zero for all practical purposes.
3881      */
3882     sendApiRegConf(signal, apiNodePtr.i);
3883   }
3884 }
3885 
3886 void
sendApiRegConf(Signal * signal,Uint32 node)3887 Qmgr::sendApiRegConf(Signal *signal, Uint32 node)
3888 {
3889   NodeRecPtr apiNodePtr;
3890   apiNodePtr.i = node;
3891   ptrCheckGuard(apiNodePtr, MAX_NODES, nodeRec);
3892   const BlockReference ref = apiNodePtr.p->blockRef;
3893   ndbassert(ref != 0);
3894 
3895   ApiRegConf * const apiRegConf = (ApiRegConf *)&signal->theData[0];
3896   apiRegConf->qmgrRef = reference();
3897   apiRegConf->apiHeartbeatFrequency = (chbApiDelay / 10);
3898   apiRegConf->version = NDB_VERSION;
3899   apiRegConf->mysql_version = NDB_MYSQL_VERSION_D;
3900   apiRegConf->nodeState = getNodeState();
3901   {
3902     NodeRecPtr nodePtr;
3903     nodePtr.i = getOwnNodeId();
3904     ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
3905     Uint32 dynamicId = nodePtr.p->ndynamicId;
3906 
3907     if(apiRegConf->nodeState.masterNodeId != getOwnNodeId()){
3908       jam();
3909       apiRegConf->nodeState.dynamicId = dynamicId;
3910     } else {
3911       apiRegConf->nodeState.dynamicId = (Uint32)(-(Int32)dynamicId);
3912     }
3913   }
3914   NodeVersionInfo info = getNodeVersionInfo();
3915   apiRegConf->minDbVersion = info.m_type[NodeInfo::DB].m_min_version;
3916   apiRegConf->nodeState.m_connected_nodes.assign(c_connectedNodes);
3917   sendSignal(ref, GSN_API_REGCONF, signal, ApiRegConf::SignalLength, JBB);
3918 }
3919 
3920 void
sendVersionedDb(NodeReceiverGroup rg,GlobalSignalNumber gsn,Signal * signal,Uint32 length,JobBufferLevel jbuf,Uint32 minversion)3921 Qmgr::sendVersionedDb(NodeReceiverGroup rg,
3922 		      GlobalSignalNumber gsn,
3923 		      Signal* signal,
3924 		      Uint32 length,
3925 		      JobBufferLevel jbuf,
3926 		      Uint32 minversion)
3927 {
3928   jam();
3929   NodeVersionInfo info = getNodeVersionInfo();
3930   if (info.m_type[NodeInfo::DB].m_min_version >= minversion)
3931   {
3932     jam();
3933     sendSignal(rg, gsn, signal, length, jbuf);
3934   }
3935   else
3936   {
3937     jam();
3938     Uint32 i = 0, cnt = 0;
3939     while((i = rg.m_nodes.find(i + 1)) != NodeBitmask::NotFound)
3940     {
3941       jam();
3942       if (getNodeInfo(i).m_version >= minversion)
3943       {
3944 	jam();
3945 	cnt++;
3946 	sendSignal(numberToRef(rg.m_block, i), gsn, signal, length, jbuf);
3947       }
3948     }
3949     ndbassert((cnt == 0 && rg.m_nodes.count() == 0) ||
3950 	      (cnt < rg.m_nodes.count()));
3951   }
3952 }
3953 
3954 void
execAPI_VERSION_REQ(Signal * signal)3955 Qmgr::execAPI_VERSION_REQ(Signal * signal) {
3956   jamEntry();
3957   ApiVersionReq * const req = (ApiVersionReq *)signal->getDataPtr();
3958 
3959   Uint32 senderRef = req->senderRef;
3960   Uint32 nodeId = req->nodeId;
3961 
3962   ApiVersionConf * conf = (ApiVersionConf *)req;
3963   if(getNodeInfo(nodeId).m_connected)
3964   {
3965     conf->version = getNodeInfo(nodeId).m_version;
3966     conf->mysql_version = getNodeInfo(nodeId).m_mysql_version;
3967     struct in_addr in= globalTransporterRegistry.get_connect_address(nodeId);
3968     conf->m_inet_addr= in.s_addr;
3969   }
3970   else
3971   {
3972     conf->version =  0;
3973     conf->mysql_version =  0;
3974     conf->m_inet_addr= 0;
3975   }
3976   conf->nodeId = nodeId;
3977 
3978   sendSignal(senderRef,
3979 	     GSN_API_VERSION_CONF,
3980 	     signal,
3981 	     ApiVersionConf::SignalLength, JBB);
3982 }
3983 
3984 void
execNODE_VERSION_REP(Signal * signal)3985 Qmgr::execNODE_VERSION_REP(Signal* signal)
3986 {
3987   jamEntry();
3988   Uint32 nodeId = signal->theData[0];
3989   Uint32 version = signal->theData[1];
3990 
3991   if (nodeId < MAX_NODES)
3992   {
3993     jam();
3994     Uint32 type = getNodeInfo(nodeId).m_type;
3995     setNodeInfo(nodeId).m_version = version;
3996     recompute_version_info(type, version);
3997   }
3998 }
3999 
4000 void
recompute_version_info(Uint32 type,Uint32 version)4001 Qmgr::recompute_version_info(Uint32 type, Uint32 version)
4002 {
4003   NodeVersionInfo& info = setNodeVersionInfo();
4004   switch(type){
4005   case NodeInfo::DB:
4006   case NodeInfo::API:
4007   case NodeInfo::MGM:
4008     break;
4009   default:
4010     return;
4011   }
4012 
4013   if (info.m_type[type].m_min_version == 0 ||
4014       version < info.m_type[type].m_min_version)
4015     info.m_type[type].m_min_version = version;
4016   if (version > info.m_type[type].m_max_version)
4017     info.m_type[type].m_max_version = version;
4018 }
4019 
4020 void
recompute_version_info(Uint32 type)4021 Qmgr::recompute_version_info(Uint32 type)
4022 {
4023   switch(type){
4024   case NodeInfo::DB:
4025   case NodeInfo::API:
4026   case NodeInfo::MGM:
4027     break;
4028   default:
4029     return;
4030   }
4031 
4032   Uint32 min = ~0, max = 0;
4033   Uint32 cnt = type == NodeInfo::DB ? MAX_NDB_NODES : MAX_NODES;
4034   for (Uint32 i = 1; i<cnt; i++)
4035   {
4036     if (getNodeInfo(i).m_type == type)
4037     {
4038       Uint32 version = getNodeInfo(i).m_version;
4039 
4040       if (version)
4041       {
4042 	if (version < min)
4043 	  min = version;
4044 	if (version > max)
4045 	  max = version;
4046       }
4047     }
4048   }
4049 
4050   NodeVersionInfo& info = setNodeVersionInfo();
4051   info.m_type[type].m_min_version = min == ~(Uint32)0 ? 0 : min;
4052   info.m_type[type].m_max_version = max;
4053 }
4054 
4055 #if 0
4056 bool
4057 Qmgr::checkAPIVersion(NodeId nodeId,
4058 		      Uint32 apiVersion, Uint32 ownVersion) const {
4059   bool ret=true;
4060   /**
4061    * First implementation...
4062    */
4063   if ((getMajor(apiVersion) < getMajor(ownVersion) ||
4064        getMinor(apiVersion) < getMinor(ownVersion)) &&
4065       apiVersion >= API_UPGRADE_VERSION) {
4066     jam();
4067     if ( getNodeInfo(nodeId).getType() !=  NodeInfo::MGM ) {
4068       jam();
4069       ret = false;
4070     } else {
4071       jam();
4072       /* we have a software upgrade situation, mgmtsrvr should be
4073        * the highest, let him decide what to do
4074        */
4075       ;
4076     }
4077   }
4078   return ret;
4079 }
4080 #endif
4081 
4082 void
sendApiRegRef(Signal * signal,Uint32 Tref,ApiRegRef::ErrorCode err)4083 Qmgr::sendApiRegRef(Signal* signal, Uint32 Tref, ApiRegRef::ErrorCode err){
4084   ApiRegRef* ref = (ApiRegRef*)signal->getDataPtrSend();
4085   ref->ref = reference();
4086   ref->version = NDB_VERSION;
4087   ref->mysql_version = NDB_MYSQL_VERSION_D;
4088   ref->errorCode = err;
4089   sendSignal(Tref, GSN_API_REGREF, signal, ApiRegRef::SignalLength, JBB);
4090 }
4091 
4092 /**--------------------------------------------------------------------------
4093  * A NODE HAS BEEN DECLARED AS DOWN. WE WILL CLOSE THE COMMUNICATION TO THIS
4094  * NODE IF NOT ALREADY DONE. IF WE ARE PRESIDENT OR BECOMES PRESIDENT BECAUSE
4095  * OF A FAILED PRESIDENT THEN WE WILL TAKE FURTHER ACTION.
4096  *---------------------------------------------------------------------------*/
failReportLab(Signal * signal,Uint16 aFailedNode,FailRep::FailCause aFailCause,Uint16 sourceNode)4097 void Qmgr::failReportLab(Signal* signal, Uint16 aFailedNode,
4098 			 FailRep::FailCause aFailCause,
4099                          Uint16 sourceNode)
4100 {
4101   NodeRecPtr nodePtr;
4102   NodeRecPtr failedNodePtr;
4103   NodeRecPtr myNodePtr;
4104 
4105   failedNodePtr.i = aFailedNode;
4106   ptrCheckGuard(failedNodePtr, MAX_NDB_NODES, nodeRec);
4107   FailRep* rep = (FailRep*)signal->getDataPtr();
4108 
4109   if (check_multi_node_shutdown(signal))
4110   {
4111     jam();
4112     return;
4113   }
4114 
4115   if (isNodeConnectivitySuspect(sourceNode) &&
4116       // (! isNodeConnectivitySuspect(aFailedNode)) &&  // TODO : Required?
4117       ((aFailCause == FailRep::ZCONNECT_CHECK_FAILURE) ||
4118        (aFailCause == FailRep::ZLINK_FAILURE)))
4119   {
4120     jam();
4121     /* Connectivity related failure report from a node with suspect
4122      * connectivity, handle differently
4123      */
4124     ndbrequire(sourceNode != getOwnNodeId());
4125 
4126     handleFailFromSuspect(signal,
4127                           aFailCause,
4128                           aFailedNode,
4129                           sourceNode);
4130     return;
4131   }
4132 
4133   if (failedNodePtr.i == getOwnNodeId()) {
4134     jam();
4135 
4136     Uint32 code = NDBD_EXIT_NODE_DECLARED_DEAD;
4137     const char * msg = 0;
4138     char extra[100];
4139     switch(aFailCause){
4140     case FailRep::ZOWN_FAILURE:
4141       msg = "Own failure";
4142       break;
4143     case FailRep::ZOTHER_NODE_WHEN_WE_START:
4144     case FailRep::ZOTHERNODE_FAILED_DURING_START:
4145       msg = "Other node died during start";
4146       break;
4147     case FailRep::ZIN_PREP_FAIL_REQ:
4148       msg = "Prep fail";
4149       break;
4150     case FailRep::ZSTART_IN_REGREQ:
4151       msg = "Start timeout";
4152       break;
4153     case FailRep::ZHEARTBEAT_FAILURE:
4154       msg = "Heartbeat failure";
4155       break;
4156     case FailRep::ZLINK_FAILURE:
4157       msg = "Connection failure";
4158       break;
4159     case FailRep::ZPARTITIONED_CLUSTER:
4160     {
4161       code = NDBD_EXIT_PARTITIONED_SHUTDOWN;
4162       char buf1[100], buf2[100];
4163       c_clusterNodes.getText(buf1);
4164       if (((signal->getLength()== FailRep::OrigSignalLength + FailRep::PartitionedExtraLength) ||
4165            (signal->getLength()== FailRep::SignalLength + FailRep::PartitionedExtraLength)) &&
4166           signal->header.theVerId_signalNumber == GSN_FAIL_REP)
4167       {
4168 	jam();
4169 	NdbNodeBitmask part;
4170 	part.assign(NdbNodeBitmask::Size, rep->partitioned.partition);
4171 	part.getText(buf2);
4172 	BaseString::snprintf(extra, sizeof(extra),
4173 			     "Our cluster: %s other cluster: %s",
4174 			     buf1, buf2);
4175       }
4176       else
4177       {
4178 	jam();
4179 	BaseString::snprintf(extra, sizeof(extra),
4180 			     "Our cluster: %s", buf1);
4181       }
4182       msg = extra;
4183       break;
4184     }
4185     case FailRep::ZMULTI_NODE_SHUTDOWN:
4186       msg = "Multi node shutdown";
4187       break;
4188     case FailRep::ZCONNECT_CHECK_FAILURE:
4189       msg = "Connectivity check failure";
4190       break;
4191     case FailRep::ZFORCED_ISOLATION:
4192       msg = "Forced isolation";
4193       if (ERROR_INSERTED(942))
4194       {
4195         g_eventLogger->info("FAIL_REP FORCED_ISOLATION received from data node %u - ignoring.",
4196                             sourceNode);
4197         /* Let's wait for remote disconnection */
4198         return;
4199       }
4200       break;
4201     default:
4202       msg = "<UNKNOWN>";
4203     }
4204 
4205     CRASH_INSERTION(932);
4206     CRASH_INSERTION(938);
4207 
4208     char buf[255];
4209     BaseString::snprintf(buf, sizeof(buf),
4210 			 "We(%u) have been declared dead by %u (via %u) reason: %s(%u)",
4211 			 getOwnNodeId(),
4212                          sourceNode,
4213 			 refToNode(signal->getSendersBlockRef()),
4214 			 msg ? msg : "<Unknown>",
4215 			 aFailCause);
4216 
4217     progError(__LINE__, code, buf);
4218     return;
4219   }//if
4220 
4221   myNodePtr.i = getOwnNodeId();
4222   ptrCheckGuard(myNodePtr, MAX_NDB_NODES, nodeRec);
4223   if (myNodePtr.p->phase != ZRUNNING) {
4224     jam();
4225     systemErrorLab(signal, __LINE__);
4226     return;
4227   }//if
4228 
4229   if (getNodeState().startLevel < NodeState::SL_STARTED)
4230   {
4231     jam();
4232     CRASH_INSERTION(932);
4233     CRASH_INSERTION(938);
4234     char buf[100];
4235     BaseString::snprintf(buf, 100, "Node failure during restart");
4236     progError(__LINE__, NDBD_EXIT_SR_OTHERNODEFAILED, buf);
4237     ndbrequire(false);
4238   }
4239 
4240   const NdbNodeBitmask TfailedNodes(cfailedNodes);
4241   failReport(signal, failedNodePtr.i, (UintR)ZTRUE, aFailCause, sourceNode);
4242 
4243   /**
4244    * If any node is starting now (c_start.startNode != 0)
4245    *   include it in nodes handled by sendPrepFailReq
4246    */
4247   if (c_start.m_startNode != 0)
4248   {
4249     jam();
4250     cfailedNodes.set(c_start.m_startNode);
4251   }
4252 
4253   if (cpresident == getOwnNodeId()) {
4254     jam();
4255     if (ctoStatus == Q_NOT_ACTIVE) {
4256       jam();
4257       /**--------------------------------------------------------------------
4258        * AS PRESIDENT WE ARE REQUIRED TO START THE EXCLUSION PROCESS SUCH THAT
4259        * THE APPLICATION SEE NODE FAILURES IN A CONSISTENT ORDER.
4260        * IF WE HAVE BECOME PRESIDENT NOW (CTO_STATUS = ACTIVE) THEN WE HAVE
4261        * TO COMPLETE THE PREVIOUS COMMIT FAILED NODE PROCESS BEFORE STARTING
4262        * A NEW.
4263        * CTO_STATUS = ACTIVE CAN ALSO MEAN THAT WE ARE PRESIDENT AND ARE
4264        * CURRENTLY COMMITTING A SET OF NODE CRASHES. IN THIS CASE IT IS NOT
4265        * ALLOWED TO START PREPARING NEW NODE CRASHES.
4266        *---------------------------------------------------------------------*/
4267       if (!cfailedNodes.equal(TfailedNodes)) {
4268         jam();
4269         cfailureNr = cfailureNr + 1;
4270         for (nodePtr.i = 1;
4271              nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
4272           ptrAss(nodePtr, nodeRec);
4273           if (nodePtr.p->phase == ZRUNNING) {
4274             jamLine(nodePtr.i);
4275             sendPrepFailReq(signal, nodePtr.i);
4276           }//if
4277         }//for
4278       }//if
4279     }//if
4280   }
4281   return;
4282 }//Qmgr::failReportLab()
4283 
4284 /**-------------------------------------------------------------------------
4285  * WE HAVE RECEIVED A PREPARE TO EXCLUDE A NUMBER OF NODES FROM THE CLUSTER.
4286  * WE WILL FIRST CHECK THAT WE HAVE NOT ANY MORE NODES THAT
4287  * WE ALSO HAVE EXCLUDED
4288  *--------------------------------------------------------------------------*/
4289 /*******************************/
4290 /* PREP_FAILREQ               */
4291 /*******************************/
execPREP_FAILREQ(Signal * signal)4292 void Qmgr::execPREP_FAILREQ(Signal* signal)
4293 {
4294   NodeRecPtr myNodePtr;
4295   PrepFailReqRef * const prepFail = (PrepFailReqRef *)&signal->theData[0];
4296   BlockReference Tblockref  = prepFail->xxxBlockRef;
4297   Uint16 TfailureNr = prepFail->failNo;
4298 
4299   jamEntry();
4300 
4301   // Clear 'c_start.m_startNode' if it failed.
4302   if (NdbNodeBitmask::get(prepFail->theNodes, c_start.m_startNode))
4303   {
4304     jam();
4305     c_start.reset();
4306   }
4307   if (c_start.m_gsn == GSN_CM_NODEINFOCONF)
4308   {
4309     Uint32 nodeId;
4310     jam();
4311     /**
4312      * This is a very unusual event we are looking for, but still required
4313      * to be handled. The starting node has connected to the president and
4314      * managed to start the node inclusion protocol. We received an indication
4315      * of this from the president. The starting node now however fails before
4316      * it connected to us, so we need to clear the indication of that we
4317      * received CM_ADD(Prepare) from president since this belonged to an
4318      * already cancelled node restart.
4319      */
4320     for (nodeId = 1; nodeId < MAX_NDB_NODES; nodeId++)
4321     {
4322       if (c_start.m_nodes.isWaitingFor(nodeId) &&
4323           NdbNodeBitmask::get(prepFail->theNodes, nodeId))
4324       {
4325         jamLine(nodeId);
4326         /* Found such a condition as described above, clear state */
4327         c_start.m_gsn = RNIL;
4328         c_start.m_nodes.clearWaitingFor();
4329         break;
4330       }
4331     }
4332   }
4333 
4334 
4335   if (check_multi_node_shutdown(signal))
4336   {
4337     jam();
4338     return;
4339   }
4340 
4341   if (ERROR_INSERTED(941) &&
4342       getOwnNodeId() == 4 &&
4343       NdbNodeBitmask::get(prepFail->theNodes, 2))
4344   {
4345     /* Insert ERROR_INSERT crash */
4346     CRASH_INSERTION(941);
4347   }
4348 
4349   cprepFailedNodes.assign(NdbNodeBitmask::Size, prepFail->theNodes);
4350   ndbassert(prepFail->noOfNodes == cprepFailedNodes.count());
4351 
4352   /**
4353    * Block commit until node failures has stabilized
4354    *
4355    * @See RT352
4356    */
4357   BlockCommitOrd* const block = (BlockCommitOrd *)&signal->theData[0];
4358   block->failNo = TfailureNr;
4359   EXECUTE_DIRECT(DBDIH, GSN_BLOCK_COMMIT_ORD, signal,
4360 		 BlockCommitOrd::SignalLength);
4361 
4362   myNodePtr.i = getOwnNodeId();
4363   ptrCheckGuard(myNodePtr, MAX_NDB_NODES, nodeRec);
4364   if (myNodePtr.p->phase != ZRUNNING) {
4365     jam();
4366     systemErrorLab(signal, __LINE__);
4367     return;
4368   }//if
4369 
4370   if (getNodeState().startLevel < NodeState::SL_STARTED)
4371   {
4372     jam();
4373     CRASH_INSERTION(932);
4374     CRASH_INSERTION(938);
4375     char buf[100];
4376     BaseString::snprintf(buf, 100, "Node failure during restart");
4377     progError(__LINE__, NDBD_EXIT_SR_OTHERNODEFAILED, buf);
4378     ndbrequire(false);
4379   }
4380 
4381   for (unsigned nodeId = 1; nodeId < MAX_NDB_NODES; nodeId++)
4382   {
4383     if (cprepFailedNodes.get(nodeId))
4384     {
4385       jam();
4386       failReport(signal,
4387                  nodeId,
4388                  (UintR)ZFALSE,
4389                  FailRep::ZIN_PREP_FAIL_REQ,
4390                  0); /* Source node not required (or known) here */
4391     }//if
4392   }//for
4393   sendCloseComReq(signal, Tblockref, TfailureNr);
4394   ccommitFailedNodes.clear();
4395   cprepareFailureNr = TfailureNr;
4396   return;
4397 }//Qmgr::execPREP_FAILREQ()
4398 
4399 
handleApiCloseComConf(Signal * signal)4400 void Qmgr::handleApiCloseComConf(Signal* signal)
4401 {
4402   jam();
4403   CloseComReqConf * const closeCom = (CloseComReqConf *)&signal->theData[0];
4404 
4405   /* Api failure special case */
4406   for(Uint32 nodeId = 0; nodeId < MAX_NODES; nodeId ++)
4407   {
4408     if (NodeBitmask::get(closeCom->theNodes, nodeId))
4409     {
4410       jam();
4411       /* Check that *only* 1 *API* node is included in
4412        * this CLOSE_COM_CONF
4413        */
4414       ndbrequire(getNodeInfo(nodeId).getType() != NodeInfo::DB);
4415       ndbrequire(closeCom->noOfNodes == 1);
4416       NodeBitmask::clear(closeCom->theNodes, nodeId);
4417       ndbrequire(NodeBitmask::isclear(closeCom->theNodes));
4418 
4419       /* Now that we know communication from the failed Api has
4420        * ceased, we can send the required API_FAILREQ signals
4421        * and continue API failure handling
4422        */
4423       NodeRecPtr failedNodePtr;
4424       failedNodePtr.i = nodeId;
4425       ptrCheckGuard(failedNodePtr, MAX_NODES, nodeRec);
4426 
4427       ndbrequire((failedNodePtr.p->failState ==
4428                   WAITING_FOR_CLOSECOMCONF_ACTIVE) ||
4429                  (failedNodePtr.p->failState ==
4430                   WAITING_FOR_CLOSECOMCONF_NOTACTIVE));
4431 
4432       if (failedNodePtr.p->failState == WAITING_FOR_CLOSECOMCONF_ACTIVE)
4433       {
4434         /**
4435          * Inform application blocks TC, DICT, SUMA etc.
4436          */
4437         jam();
4438         sendApiFailReq(signal, nodeId, false); // !sumaOnly
4439         arbitRec.code = ArbitCode::ApiFail;
4440         handleArbitApiFail(signal, nodeId);
4441       }
4442       else
4443       {
4444         /**
4445          * Always inform SUMA
4446          */
4447         jam();
4448         sendApiFailReq(signal, nodeId, true); // sumaOnly
4449       }
4450 
4451       if (getNodeInfo(failedNodePtr.i).getType() == NodeInfo::MGM)
4452       {
4453         /**
4454          * Allow MGM do reconnect "directly"
4455          */
4456         jam();
4457         set_hb_count(failedNodePtr.i) = 3;
4458       }
4459 
4460       /* Handled the single API node failure */
4461       return;
4462     }
4463   }
4464   /* Never get here */
4465   ndbrequire(false);
4466 }
4467 
4468 /**---------------------------------------------------------------------------
4469  * THE CRASHED NODES HAS BEEN EXCLUDED FROM COMMUNICATION.
4470  * WE WILL CHECK WHETHER ANY MORE NODES HAVE FAILED DURING THE PREPARE PROCESS.
4471  * IF SO WE WILL REFUSE THE PREPARE PHASE AND EXPECT A NEW PREPARE MESSAGE
4472  * WITH ALL FAILED NODES INCLUDED.
4473  *---------------------------------------------------------------------------*/
4474 /*******************************/
4475 /* CLOSE_COMCONF              */
4476 /*******************************/
execCLOSE_COMCONF(Signal * signal)4477 void Qmgr::execCLOSE_COMCONF(Signal* signal)
4478 {
4479   jamEntry();
4480 
4481   CloseComReqConf * const closeCom = (CloseComReqConf *)&signal->theData[0];
4482 
4483   Uint32 requestType = closeCom->requestType;
4484 
4485   if (requestType == CloseComReqConf::RT_API_FAILURE)
4486   {
4487     jam();
4488     handleApiCloseComConf(signal);
4489     return;
4490   }
4491 
4492   /* Normal node failure preparation path */
4493   ndbassert(requestType == CloseComReqConf::RT_NODE_FAILURE);
4494   BlockReference Tblockref  = closeCom->xxxBlockRef;
4495   Uint16 TfailureNr = closeCom->failNo;
4496 
4497   cprepFailedNodes.assign(NdbNodeBitmask::Size, closeCom->theNodes);
4498   ndbassert(closeCom->noOfNodes == cprepFailedNodes.count());
4499 
4500   UintR tprepFailConf = ZTRUE;
4501 
4502   /* Check whether the set of nodes which have had communications
4503    * closed is the same as the set of failed nodes.
4504    * If it is, we can confirm the PREP_FAIL phase for this set
4505    * of nodes to the President.
4506    * If it is not, we Refuse the PREP_FAIL phase for this set
4507    * of nodes, the President will start a new PREP_FAIL phase
4508    * for the new set.
4509    */
4510   if (!cprepFailedNodes.contains(cfailedNodes)) {
4511     /* Failed node(s) is missing from the set, we will not
4512      * confirm this Prepare_Fail phase.
4513      * Store the node id in the array for later.
4514      */
4515     jam();
4516     tprepFailConf = ZFALSE;
4517     cprepFailedNodes.bitOR(cfailedNodes);
4518   }//if
4519   if (tprepFailConf == ZFALSE) {
4520     jam();
4521     /* Inform President that we cannot confirm the PREP_FAIL
4522      * phase as we are aware of at least one other node
4523      * failure
4524      */
4525     cfailedNodes = cprepFailedNodes;
4526 
4527     sendPrepFailReqRef(signal,
4528 		       Tblockref,
4529 		       GSN_PREP_FAILREF,
4530 		       reference(),
4531 		       cfailureNr,
4532 		       cprepFailedNodes);
4533   } else {
4534     /* We have prepared the failure of the requested nodes
4535      * send confirmation to the president
4536      */
4537     jam();
4538     ccommitFailedNodes = cprepFailedNodes;
4539 
4540     signal->theData[0] = getOwnNodeId();
4541     signal->theData[1] = TfailureNr;
4542     sendSignal(Tblockref, GSN_PREP_FAILCONF, signal, 2, JBA);
4543   }//if
4544   return;
4545 }//Qmgr::execCLOSE_COMCONF()
4546 
4547 /*---------------------------------------------------------------------------*/
4548 /* WE HAVE RECEIVED A CONFIRM OF THAT THIS NODE HAVE PREPARED THE FAILURE.   */
4549 /*---------------------------------------------------------------------------*/
4550 /*******************************/
4551 /* PREP_FAILCONF              */
4552 /*******************************/
execPREP_FAILCONF(Signal * signal)4553 void Qmgr::execPREP_FAILCONF(Signal* signal)
4554 {
4555   NodeRecPtr nodePtr;
4556   NodeRecPtr replyNodePtr;
4557   jamEntry();
4558   replyNodePtr.i = signal->theData[0];
4559   Uint16 TfailureNr = signal->theData[1];
4560   if (TfailureNr != cfailureNr) {
4561     jam();
4562     /**----------------------------------------------------------------------
4563      * WE HAVE ALREADY STARTING A NEW ATTEMPT TO EXCLUDE A NUMBER OF NODES.
4564      *  IGNORE
4565      *----------------------------------------------------------------------*/
4566     return;
4567   }//if
4568   ptrCheckGuard(replyNodePtr, MAX_NDB_NODES, nodeRec);
4569   replyNodePtr.p->sendPrepFailReqStatus = Q_NOT_ACTIVE;
4570   for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
4571     ptrAss(nodePtr, nodeRec);
4572     if (nodePtr.p->phase == ZRUNNING) {
4573       if (nodePtr.p->sendPrepFailReqStatus == Q_ACTIVE) {
4574         jamLine(nodePtr.i);
4575         return;
4576       }//if
4577     }//if
4578   }//for
4579   /**
4580    * Check node count and groups and invoke arbitrator if necessary.
4581    * Continues via sendCommitFailReq() if successful.
4582    */
4583   arbitRec.failureNr = cfailureNr;
4584   const NodeState & s = getNodeState();
4585   if(s.startLevel == NodeState::SL_STOPPING_3 && s.stopping.systemShutdown){
4586     jam();
4587     /**
4588      * We're performing a system shutdown,
4589      * don't let artibtrator shut us down
4590      */
4591     return;
4592   }
4593 
4594   switch(arbitRec.method){
4595   case ArbitRec::DISABLED:
4596     jam();
4597     // No arbitration -> immediately commit the failed nodes
4598     sendCommitFailReq(signal);
4599     break;
4600 
4601   case ArbitRec::METHOD_EXTERNAL:
4602   case ArbitRec::METHOD_DEFAULT:
4603     jam();
4604     handleArbitCheck(signal);
4605     break;
4606 
4607   }
4608   return;
4609 }//Qmgr::execPREP_FAILCONF()
4610 
4611 void
sendCommitFailReq(Signal * signal)4612 Qmgr::sendCommitFailReq(Signal* signal)
4613 {
4614   NodeRecPtr nodePtr;
4615   jam();
4616   if (arbitRec.failureNr != cfailureNr) {
4617     jam();
4618     /**----------------------------------------------------------------------
4619      * WE HAVE ALREADY STARTING A NEW ATTEMPT TO EXCLUDE A NUMBER OF NODES.
4620      *  IGNORE
4621      *----------------------------------------------------------------------*/
4622     return;
4623   }//if
4624   /**-----------------------------------------------------------------------
4625    * WE HAVE SUCCESSFULLY PREPARED A SET OF NODE FAILURES. WE WILL NOW COMMIT
4626    * THESE NODE FAILURES.
4627    *-------------------------------------------------------------------------*/
4628   for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
4629     ptrAss(nodePtr, nodeRec);
4630 
4631 #ifdef ERROR_INSERT
4632     if (false && ERROR_INSERTED(935) && nodePtr.i == c_error_insert_extra)
4633     {
4634       ndbout_c("skipping node %d", c_error_insert_extra);
4635       CLEAR_ERROR_INSERT_VALUE;
4636       signal->theData[0] = 9999;
4637       sendSignalWithDelay(CMVMI_REF, GSN_NDB_TAMPER, signal, 1000, 1);
4638       continue;
4639     }
4640 #endif
4641 
4642     if (nodePtr.p->phase == ZRUNNING) {
4643       jamLine(nodePtr.i);
4644       nodePtr.p->sendCommitFailReqStatus = Q_ACTIVE;
4645       signal->theData[0] = cpdistref;
4646       signal->theData[1] = cfailureNr;
4647       sendSignal(nodePtr.p->blockRef, GSN_COMMIT_FAILREQ, signal, 2, JBA);
4648     }//if
4649   }//for
4650   ctoStatus = Q_ACTIVE;
4651   cfailedNodes.clear();
4652   return;
4653 }//sendCommitFailReq()
4654 
4655 /*---------------------------------------------------------------------------*/
4656 /* SOME NODE HAVE DISCOVERED A NODE FAILURE THAT WE HAVE NOT YET DISCOVERED. */
4657 /* WE WILL START ANOTHER ROUND OF PREPARING A SET OF NODE FAILURES.          */
4658 /*---------------------------------------------------------------------------*/
4659 /*******************************/
4660 /* PREP_FAILREF               */
4661 /*******************************/
execPREP_FAILREF(Signal * signal)4662 void Qmgr::execPREP_FAILREF(Signal* signal)
4663 {
4664   NodeRecPtr nodePtr;
4665   jamEntry();
4666 
4667   PrepFailReqRef * const prepFail = (PrepFailReqRef *)&signal->theData[0];
4668 
4669   Uint16 TfailureNr = prepFail->failNo;
4670 
4671   cprepFailedNodes.assign(NdbNodeBitmask::Size, prepFail->theNodes);
4672   ndbassert(prepFail->noOfNodes == cprepFailedNodes.count());
4673 
4674   if (TfailureNr != cfailureNr) {
4675     jam();
4676     /**---------------------------------------------------------------------
4677      * WE HAVE ALREADY STARTING A NEW ATTEMPT TO EXCLUDE A NUMBER OF NODES.
4678      *  IGNORE
4679      *----------------------------------------------------------------------*/
4680     return;
4681   }//if
4682 
4683   cfailedNodes = cprepFailedNodes;
4684 
4685   cfailureNr = cfailureNr + 1;
4686   for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
4687     ptrAss(nodePtr, nodeRec);
4688     if (nodePtr.p->phase == ZRUNNING) {
4689       jamLine(nodePtr.i);
4690       sendPrepFailReq(signal, nodePtr.i);
4691     }//if
4692   }//for
4693   return;
4694 }//Qmgr::execPREP_FAILREF()
4695 
4696 /*---------------------------------------------------------------------------*/
4697 /*    THE PRESIDENT IS NOW COMMITTING THE PREVIOUSLY PREPARED NODE FAILURE.  */
4698 /*---------------------------------------------------------------------------*/
4699 /***********************/
4700 /* COMMIT_FAILREQ     */
4701 /***********************/
execCOMMIT_FAILREQ(Signal * signal)4702 void Qmgr::execCOMMIT_FAILREQ(Signal* signal)
4703 {
4704   NodeRecPtr nodePtr;
4705   jamEntry();
4706 
4707   CRASH_INSERTION(935);
4708 
4709   BlockReference Tblockref = signal->theData[0];
4710   UintR TfailureNr = signal->theData[1];
4711   if (Tblockref != cpdistref) {
4712     jam();
4713     return;
4714   }//if
4715 
4716   /**
4717    * Block commit until node failures has stabilized
4718    *
4719    * @See RT352
4720    */
4721   UnblockCommitOrd* const unblock = (UnblockCommitOrd *)&signal->theData[0];
4722   unblock->failNo = TfailureNr;
4723   EXECUTE_DIRECT(DBDIH, GSN_UNBLOCK_COMMIT_ORD, signal,
4724 		 UnblockCommitOrd::SignalLength);
4725 
4726   if ((ccommitFailureNr != TfailureNr) &&
4727       (!ccommitFailedNodes.isclear()))
4728   {
4729     jam();
4730     /**-----------------------------------------------------------------------
4731      * WE ONLY DO THIS PART OF THE COMMIT HANDLING THE FIRST TIME WE HEAR THIS
4732      * SIGNAL. WE CAN HEAR IT SEVERAL TIMES IF THE PRESIDENTS KEEP FAILING.
4733      *-----------------------------------------------------------------------*/
4734     ccommitFailureNr = TfailureNr;
4735     NodeFailRep * const nodeFail = (NodeFailRep *)&signal->theData[0];
4736 
4737     nodeFail->failNo    = ccommitFailureNr;
4738     nodeFail->masterNodeId = cpresident;
4739     nodeFail->noOfNodes = ccommitFailedNodes.count();
4740     ccommitFailedNodes.copyto(NdbNodeBitmask::Size, nodeFail->theNodes);
4741 
4742     if (ERROR_INSERTED(936))
4743     {
4744       sendSignalWithDelay(NDBCNTR_REF, GSN_NODE_FAILREP, signal,
4745                           200, NodeFailRep::SignalLength);
4746     }
4747     else
4748     {
4749       sendSignal(NDBCNTR_REF, GSN_NODE_FAILREP, signal,
4750                  NodeFailRep::SignalLength, JBB);
4751     }
4752 
4753     /**--------------------------------------------------------------------
4754      * WE MUST PREPARE TO ACCEPT THE CRASHED NODE INTO THE CLUSTER AGAIN BY
4755      * SETTING UP CONNECTIONS AGAIN AFTER THREE SECONDS OF DELAY.
4756      *--------------------------------------------------------------------*/
4757     for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
4758       if (ccommitFailedNodes.get(nodePtr.i)) {
4759         jamLine(nodePtr.i);
4760         ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
4761         nodePtr.p->phase = ZFAIL_CLOSING;
4762         nodePtr.p->failState = WAITING_FOR_NDB_FAILCONF;
4763         set_hb_count(nodePtr.i) = 0;
4764         c_clusterNodes.clear(nodePtr.i);
4765       }//if
4766     }//for
4767 
4768     /*----------------------------------------------------------------------*/
4769     /*       WE INFORM THE API'S WE HAVE CONNECTED ABOUT THE FAILED NODES.  */
4770     /*----------------------------------------------------------------------*/
4771     for (nodePtr.i = 1; nodePtr.i < MAX_NODES; nodePtr.i++) {
4772       ptrAss(nodePtr, nodeRec);
4773       if (nodePtr.p->phase == ZAPI_ACTIVE) {
4774         jamLine(nodePtr.i);
4775 
4776 	NodeFailRep * const nodeFail = (NodeFailRep *)&signal->theData[0];
4777 
4778 	nodeFail->failNo    = ccommitFailureNr;
4779 	nodeFail->noOfNodes = ccommitFailedNodes.count();
4780 	ccommitFailedNodes.copyto(NdbNodeBitmask::Size, nodeFail->theNodes);
4781 
4782         sendSignal(nodePtr.p->blockRef, GSN_NODE_FAILREP, signal,
4783 		   NodeFailRep::SignalLength, JBB);
4784       }//if
4785     }//for
4786 
4787     /**
4788      * Remove committed nodes from failed/prepared
4789      */
4790     cfailedNodes.bitANDC(ccommitFailedNodes);
4791     cprepFailedNodes.bitANDC(ccommitFailedNodes);
4792     ccommitFailedNodes.clear();
4793   }//if
4794   /**-----------------------------------------------------------------------
4795    * WE WILL ALWAYS ACKNOWLEDGE THE COMMIT EVEN WHEN RECEIVING IT MULTIPLE
4796    * TIMES SINCE IT WILL ALWAYS COME FROM A NEW PRESIDENT.
4797    *------------------------------------------------------------------------*/
4798   signal->theData[0] = getOwnNodeId();
4799   sendSignal(Tblockref, GSN_COMMIT_FAILCONF, signal, 1, JBA);
4800   return;
4801 }//Qmgr::execCOMMIT_FAILREQ()
4802 
4803 /*--------------------------------------------------------------------------*/
4804 /* WE HAVE RECEIVED A CONFIRM OF THAT THIS NODE HAVE COMMITTED THE FAILURES.*/
4805 /*--------------------------------------------------------------------------*/
4806 /*******************************/
4807 /* COMMIT_FAILCONF            */
4808 /*******************************/
execCOMMIT_FAILCONF(Signal * signal)4809 void Qmgr::execCOMMIT_FAILCONF(Signal* signal)
4810 {
4811   NodeRecPtr nodePtr;
4812   NodeRecPtr replyNodePtr;
4813   jamEntry();
4814   replyNodePtr.i = signal->theData[0];
4815 
4816   ptrCheckGuard(replyNodePtr, MAX_NDB_NODES, nodeRec);
4817   replyNodePtr.p->sendCommitFailReqStatus = Q_NOT_ACTIVE;
4818   for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
4819     ptrAss(nodePtr, nodeRec);
4820     if (nodePtr.p->phase == ZRUNNING) {
4821       if (nodePtr.p->sendCommitFailReqStatus == Q_ACTIVE) {
4822         jamLine(nodePtr.i);
4823         return;
4824       }//if
4825     }//if
4826   }//for
4827   /*-----------------------------------------------------------------------*/
4828   /*   WE HAVE SUCCESSFULLY COMMITTED A SET OF NODE FAILURES.              */
4829   /*-----------------------------------------------------------------------*/
4830   ctoStatus = Q_NOT_ACTIVE;
4831   if (!cfailedNodes.isclear()) {
4832     jam();
4833     /**----------------------------------------------------------------------
4834      *	A FAILURE OCCURRED IN THE MIDDLE OF THE COMMIT PROCESS. WE ARE NOW
4835      *  READY TO START THE FAILED NODE PROCESS FOR THIS NODE.
4836      *----------------------------------------------------------------------*/
4837     cfailureNr = cfailureNr + 1;
4838     for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
4839       ptrAss(nodePtr, nodeRec);
4840       if (nodePtr.p->phase == ZRUNNING) {
4841         jamLine(nodePtr.i);
4842         sendPrepFailReq(signal, nodePtr.i);
4843       }//if
4844     }//for
4845   }//if
4846   return;
4847 }//Qmgr::execCOMMIT_FAILCONF()
4848 
4849 /**--------------------------------------------------------------------------
4850  * IF THE PRESIDENT FAILS IN THE MIDDLE OF THE COMMIT OF A FAILED NODE THEN
4851  * THE NEW PRESIDENT NEEDS TO QUERY THE COMMIT STATUS IN THE RUNNING NODES.
4852  *---------------------------------------------------------------------------*/
4853 /*******************************/
4854 /* PRES_TOCONF                */
4855 /*******************************/
execPRES_TOCONF(Signal * signal)4856 void Qmgr::execPRES_TOCONF(Signal* signal)
4857 {
4858   NodeRecPtr nodePtr;
4859   NodeRecPtr replyNodePtr;
4860   jamEntry();
4861   replyNodePtr.i = signal->theData[0];
4862   UintR TfailureNr = signal->theData[1];
4863   if (ctoFailureNr < TfailureNr) {
4864     jam();
4865     ctoFailureNr = TfailureNr;
4866   }//if
4867   ptrCheckGuard(replyNodePtr, MAX_NDB_NODES, nodeRec);
4868   replyNodePtr.p->sendPresToStatus = Q_NOT_ACTIVE;
4869   for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
4870     ptrAss(nodePtr, nodeRec);
4871     if (nodePtr.p->sendPresToStatus == Q_ACTIVE) {
4872       jamLine(nodePtr.i);
4873       return;
4874     }//if
4875   }//for
4876   /*-------------------------------------------------------------------------*/
4877   /* WE ARE NOW READY TO DISCOVER WHETHER THE FAILURE WAS COMMITTED OR NOT.  */
4878   /*-------------------------------------------------------------------------*/
4879   if (ctoFailureNr > ccommitFailureNr) {
4880     jam();
4881     for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
4882       ptrAss(nodePtr, nodeRec);
4883       if (nodePtr.p->phase == ZRUNNING) {
4884         jamLine(nodePtr.i);
4885         nodePtr.p->sendCommitFailReqStatus = Q_ACTIVE;
4886         signal->theData[0] = cpdistref;
4887         signal->theData[1] = ctoFailureNr;
4888         sendSignal(nodePtr.p->blockRef, GSN_COMMIT_FAILREQ, signal, 2, JBA);
4889       }//if
4890     }//for
4891     return;
4892   }//if
4893   /*-------------------------------------------------------------------------*/
4894   /*       WE ARE NOW READY TO START THE NEW NODE FAILURE PROCESS.           */
4895   /*-------------------------------------------------------------------------*/
4896   ctoStatus = Q_NOT_ACTIVE;
4897   cfailureNr = cfailureNr + 1;
4898   for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
4899     ptrAss(nodePtr, nodeRec);
4900     if (nodePtr.p->phase == ZRUNNING) {
4901       jamLine(nodePtr.i);
4902       sendPrepFailReq(signal, nodePtr.i);
4903     }//if
4904   }//for
4905   return;
4906 }//Qmgr::execPRES_TOCONF()
4907 
4908 /*--------------------------------------------------------------------------*/
4909 // Provide information about the configured NDB nodes in the system.
4910 /*--------------------------------------------------------------------------*/
execREAD_NODESREQ(Signal * signal)4911 void Qmgr::execREAD_NODESREQ(Signal* signal)
4912 {
4913   jamEntry();
4914 
4915   BlockReference TBref = signal->theData[0];
4916 
4917   ReadNodesConf * const readNodes = (ReadNodesConf *)&signal->theData[0];
4918 
4919   NodeRecPtr nodePtr;
4920   nodePtr.i = getOwnNodeId();
4921   ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
4922 
4923   NdbNodeBitmask tmp = c_definedNodes;
4924   tmp.bitANDC(c_clusterNodes);
4925 
4926   readNodes->noOfNodes = c_definedNodes.count();
4927   readNodes->masterNodeId = cpresident;
4928   readNodes->ndynamicId = nodePtr.p->ndynamicId;
4929   c_definedNodes.copyto(NdbNodeBitmask::Size, readNodes->definedNodes);
4930   c_clusterNodes.copyto(NdbNodeBitmask::Size, readNodes->clusterNodes);
4931   tmp.copyto(NdbNodeBitmask::Size, readNodes->inactiveNodes);
4932   NdbNodeBitmask::clear(readNodes->startingNodes);
4933   NdbNodeBitmask::clear(readNodes->startedNodes);
4934 
4935   sendSignal(TBref, GSN_READ_NODESCONF, signal,
4936 	     ReadNodesConf::SignalLength, JBB);
4937 }//Qmgr::execREAD_NODESREQ()
4938 
systemErrorBecauseOtherNodeFailed(Signal * signal,Uint32 line,NodeId failedNodeId)4939 void Qmgr::systemErrorBecauseOtherNodeFailed(Signal* signal, Uint32 line,
4940 					     NodeId failedNodeId) {
4941   jam();
4942 
4943   // Broadcast that this node is failing to other nodes
4944   failReport(signal, getOwnNodeId(), (UintR)ZTRUE, FailRep::ZOWN_FAILURE, getOwnNodeId());
4945 
4946   char buf[100];
4947   BaseString::snprintf(buf, 100,
4948 	   "Node was shutdown during startup because node %d failed",
4949 	   failedNodeId);
4950 
4951   progError(line, NDBD_EXIT_SR_OTHERNODEFAILED, buf);
4952 }
4953 
4954 
systemErrorLab(Signal * signal,Uint32 line,const char * message)4955 void Qmgr::systemErrorLab(Signal* signal, Uint32 line, const char * message)
4956 {
4957   jam();
4958   // Broadcast that this node is failing to other nodes
4959   failReport(signal, getOwnNodeId(), (UintR)ZTRUE, FailRep::ZOWN_FAILURE, getOwnNodeId());
4960 
4961   // If it's known why shutdown occured
4962   // an error message has been passed to this function
4963   progError(line, NDBD_EXIT_NDBREQUIRE, message);
4964 
4965   return;
4966 }//Qmgr::systemErrorLab()
4967 
4968 
4969 /**---------------------------------------------------------------------------
4970  * A FAILURE HAVE BEEN DISCOVERED ON A NODE. WE NEED TO CLEAR A
4971  * NUMBER OF VARIABLES.
4972  *---------------------------------------------------------------------------*/
failReport(Signal * signal,Uint16 aFailedNode,UintR aSendFailRep,FailRep::FailCause aFailCause,Uint16 sourceNode)4973 void Qmgr::failReport(Signal* signal,
4974                       Uint16 aFailedNode,
4975                       UintR aSendFailRep,
4976                       FailRep::FailCause aFailCause,
4977                       Uint16 sourceNode)
4978 {
4979   UintR tfrMinDynamicId;
4980   NodeRecPtr failedNodePtr;
4981   NodeRecPtr nodePtr;
4982   NodeRecPtr presidentNodePtr;
4983 
4984 
4985   ndbassert((! aSendFailRep) || (sourceNode != 0));
4986 
4987   failedNodePtr.i = aFailedNode;
4988   ptrCheckGuard(failedNodePtr, MAX_NDB_NODES, nodeRec);
4989   if (failedNodePtr.p->phase == ZRUNNING) {
4990     jam();
4991 
4992 #ifdef ERROR_INSERT
4993     if (ERROR_INSERTED(938))
4994     {
4995       nodeFailCount++;
4996       ndbout_c("QMGR : execFAIL_REP(Failed : %u Source : %u  Cause : %u) : "
4997                "%u nodes have failed",
4998                aFailedNode, sourceNode, aFailCause, nodeFailCount);
4999       /* Count DB nodes */
5000       Uint32 nodeCount = 0;
5001       for (Uint32 i = 1; i < MAX_NDB_NODES; i++)
5002       {
5003         if (getNodeInfo(i).getType() == NODE_TYPE_DB)
5004           nodeCount++;
5005       }
5006 
5007       /* When > 25% of cluster has failed, resume communications */
5008       if (nodeFailCount > (nodeCount / 4))
5009       {
5010         ndbout_c("QMGR : execFAIL_REP > 25%% nodes failed, resuming comms");
5011         Signal save = *signal;
5012         signal->theData[0] = 9991;
5013         sendSignal(CMVMI_REF, GSN_DUMP_STATE_ORD, signal, 1, JBB);
5014         *signal = save;
5015         nodeFailCount = 0;
5016         SET_ERROR_INSERT_VALUE(932);
5017       }
5018     }
5019 #endif
5020 
5021 /* WE ALSO NEED TO ADD HERE SOME CODE THAT GETS OUR NEW NEIGHBOURS. */
5022     if (cpresident == getOwnNodeId()) {
5023       jam();
5024       if (failedNodePtr.p->sendCommitFailReqStatus == Q_ACTIVE) {
5025         jam();
5026         signal->theData[0] = failedNodePtr.i;
5027         sendSignal(QMGR_REF, GSN_COMMIT_FAILCONF, signal, 1, JBA);
5028       }//if
5029       if (failedNodePtr.p->sendPresToStatus == Q_ACTIVE) {
5030         jam();
5031         signal->theData[0] = failedNodePtr.i;
5032         signal->theData[1] = ccommitFailureNr;
5033         sendSignal(QMGR_REF, GSN_PRES_TOCONF, signal, 2, JBA);
5034       }//if
5035     }//if
5036     failedNodePtr.p->phase = ZPREPARE_FAIL;
5037     failedNodePtr.p->sendPrepFailReqStatus = Q_NOT_ACTIVE;
5038     failedNodePtr.p->sendCommitFailReqStatus = Q_NOT_ACTIVE;
5039     failedNodePtr.p->sendPresToStatus = Q_NOT_ACTIVE;
5040     set_hb_count(failedNodePtr.i) = 0;
5041     if (aSendFailRep == ZTRUE) {
5042       jam();
5043       if (failedNodePtr.i != getOwnNodeId()) {
5044         jam();
5045 	FailRep * const failRep = (FailRep *)&signal->theData[0];
5046         failRep->failNodeId = failedNodePtr.i;
5047         failRep->failCause = aFailCause;
5048         failRep->failSourceNodeId = sourceNode;
5049         sendSignal(failedNodePtr.p->blockRef, GSN_FAIL_REP, signal,
5050 		   FailRep::SignalLength, JBA);
5051       }//if
5052       for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
5053         ptrAss(nodePtr, nodeRec);
5054         if (nodePtr.p->phase == ZRUNNING) {
5055           jamLine(nodePtr.i);
5056 	  FailRep * const failRep = (FailRep *)&signal->theData[0];
5057 	  failRep->failNodeId = failedNodePtr.i;
5058 	  failRep->failCause = aFailCause;
5059           failRep->failSourceNodeId = sourceNode;
5060           sendSignal(nodePtr.p->blockRef, GSN_FAIL_REP, signal,
5061 		     FailRep::SignalLength, JBA);
5062         }//if
5063       }//for
5064     }//if
5065     if (failedNodePtr.i == getOwnNodeId()) {
5066       jam();
5067       return;
5068     }//if
5069 
5070     if (unlikely(m_connectivity_check.reportNodeFailure(failedNodePtr.i)))
5071     {
5072       jam();
5073       connectivityCheckCompleted(signal);
5074     }
5075 
5076     failedNodePtr.p->ndynamicId = 0;
5077     findNeighbours(signal, __LINE__);
5078     if (failedNodePtr.i == cpresident) {
5079       jam();
5080       /**--------------------------------------------------------------------
5081        * IF PRESIDENT HAVE FAILED WE MUST CALCULATE THE NEW PRESIDENT BY
5082        * FINDING THE NODE WITH THE MINIMUM DYNAMIC IDENTITY.
5083        *---------------------------------------------------------------------*/
5084       tfrMinDynamicId = (UintR)-1;
5085       for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
5086         ptrAss(nodePtr, nodeRec);
5087         if (nodePtr.p->phase == ZRUNNING) {
5088           jamLine(nodePtr.i);
5089           if ((nodePtr.p->ndynamicId & 0xFFFF) < tfrMinDynamicId) {
5090             jam();
5091             tfrMinDynamicId = (nodePtr.p->ndynamicId & 0xFFFF);
5092             cpresident = nodePtr.i;
5093           }//if
5094         }//if
5095       }//for
5096       presidentNodePtr.i = cpresident;
5097       ptrCheckGuard(presidentNodePtr, MAX_NDB_NODES, nodeRec);
5098       cpdistref = presidentNodePtr.p->blockRef;
5099       if (cpresident == getOwnNodeId()) {
5100 	CRASH_INSERTION(920);
5101         cfailureNr = cprepareFailureNr;
5102         ctoFailureNr = 0;
5103         ctoStatus = Q_ACTIVE;
5104 	c_start.reset(); // Don't take over nodes being started
5105         if (!ccommitFailedNodes.isclear()) {
5106           jam();
5107 	  /**-----------------------------------------------------------------
5108 	   * IN THIS SITUATION WE ARE UNCERTAIN OF WHETHER THE NODE FAILURE
5109 	   * PROCESS WAS COMMITTED. WE NEED TO QUERY THE OTHER NODES ABOUT
5110 	   * THEIR STATUS.
5111 	   *-----------------------------------------------------------------*/
5112           for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES;
5113 	       nodePtr.i++) {
5114             jam();
5115             ptrAss(nodePtr, nodeRec);
5116             if (nodePtr.p->phase == ZRUNNING) {
5117               jam();
5118               nodePtr.p->sendPresToStatus = Q_ACTIVE;
5119               signal->theData[0] = cpdistref;
5120               signal->theData[1] = cprepareFailureNr;
5121               sendSignal(nodePtr.p->blockRef, GSN_PRES_TOREQ,
5122 			 signal, 1, JBA);
5123             }//if
5124           }//for
5125         } else {
5126           jam();
5127 	  /*-----------------------------------------------------------------*/
5128 	  // In this case it could be that a commit process is still ongoing.
5129 	  // If so we must conclude it as the new master.
5130 	  /*-----------------------------------------------------------------*/
5131           for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES;
5132 	       nodePtr.i++) {
5133             ptrAss(nodePtr, nodeRec);
5134             if (nodePtr.p->phase == ZRUNNING) {
5135               jamLine(nodePtr.i);
5136               nodePtr.p->sendCommitFailReqStatus = Q_ACTIVE;
5137               signal->theData[0] = cpdistref;
5138               signal->theData[1] = ccommitFailureNr;
5139               sendSignal(nodePtr.p->blockRef, GSN_COMMIT_FAILREQ, signal,
5140 			 2, JBA);
5141             }//if
5142           }//for
5143         }//if
5144       }//if
5145     }//if
5146     cfailedNodes.set(failedNodePtr.i);
5147   }//if
5148 }//Qmgr::failReport()
5149 
5150 /*---------------------------------------------------------------------------*/
5151 /*       INPUT:  TTDI_DYN_ID                                                 */
5152 /*       OUTPUT: TTDI_NODE_ID                                                */
5153 /*---------------------------------------------------------------------------*/
translateDynamicIdToNodeId(Signal * signal,UintR TdynamicId)5154 Uint16 Qmgr::translateDynamicIdToNodeId(Signal* signal, UintR TdynamicId)
5155 {
5156   NodeRecPtr tdiNodePtr;
5157   Uint16 TtdiNodeId = ZNIL;
5158 
5159   for (tdiNodePtr.i = 1; tdiNodePtr.i < MAX_NDB_NODES; tdiNodePtr.i++) {
5160     jam();
5161     ptrAss(tdiNodePtr, nodeRec);
5162     if (tdiNodePtr.p->ndynamicId == TdynamicId) {
5163       jam();
5164       TtdiNodeId = tdiNodePtr.i;
5165       break;
5166     }//if
5167   }//for
5168   if (TtdiNodeId == ZNIL) {
5169     jam();
5170     systemErrorLab(signal, __LINE__);
5171   }//if
5172   return TtdiNodeId;
5173 }//Qmgr::translateDynamicIdToNodeId()
5174 
5175 /**--------------------------------------------------------------------------
5176  *       WHEN RECEIVING PREPARE FAILURE REQUEST WE WILL IMMEDIATELY CLOSE
5177  *       COMMUNICATION WITH ALL THOSE NODES.
5178  *--------------------------------------------------------------------------*/
sendCloseComReq(Signal * signal,BlockReference TBRef,Uint16 aFailNo)5179 void Qmgr::sendCloseComReq(Signal* signal, BlockReference TBRef, Uint16 aFailNo)
5180 {
5181   CloseComReqConf * const closeCom = (CloseComReqConf *)&signal->theData[0];
5182 
5183   closeCom->xxxBlockRef = TBRef;
5184   closeCom->requestType = CloseComReqConf::RT_NODE_FAILURE;
5185   closeCom->failNo      = aFailNo;
5186   closeCom->noOfNodes   = cprepFailedNodes.count();
5187   /**
5188    * We are sending a signal where bitmap is of size NodeBitmask::size and we only
5189    * have a bitmask of NdbNodeBitmask::size, we clear all bits using NodeBitmask
5190    * before assigning the smaller bitmask to ensure we don't send any garbage.
5191    */
5192   NodeBitmask::clear(closeCom->theNodes);
5193   cprepFailedNodes.copyto(NdbNodeBitmask::Size, closeCom->theNodes);
5194 
5195   sendSignal(TRPMAN_REF, GSN_CLOSE_COMREQ, signal,
5196 	     CloseComReqConf::SignalLength, JBB);
5197 
5198 }//Qmgr::sendCloseComReq()
5199 
5200 void
sendPrepFailReqRef(Signal * signal,Uint32 dstBlockRef,GlobalSignalNumber gsn,Uint32 blockRef,Uint32 failNo,const NdbNodeBitmask & nodes)5201 Qmgr::sendPrepFailReqRef(Signal* signal,
5202 			 Uint32 dstBlockRef,
5203 			 GlobalSignalNumber gsn,
5204 			 Uint32 blockRef,
5205 			 Uint32 failNo,
5206 			 const NdbNodeBitmask& nodes)
5207 {
5208   PrepFailReqRef * const prepFail = (PrepFailReqRef *)&signal->theData[0];
5209   prepFail->xxxBlockRef = blockRef;
5210   prepFail->failNo = failNo;
5211   prepFail->noOfNodes = nodes.count();
5212   nodes.copyto(NdbNodeBitmask::Size, prepFail->theNodes);
5213 
5214   sendSignal(dstBlockRef, gsn, signal, PrepFailReqRef::SignalLength, JBA);
5215 }
5216 
5217 
5218 /**--------------------------------------------------------------------------
5219  *       SEND PREPARE FAIL REQUEST FROM PRESIDENT.
5220  *---------------------------------------------------------------------------*/
sendPrepFailReq(Signal * signal,Uint16 aNode)5221 void Qmgr::sendPrepFailReq(Signal* signal, Uint16 aNode)
5222 {
5223   NodeRecPtr sendNodePtr;
5224   sendNodePtr.i = aNode;
5225   ptrCheckGuard(sendNodePtr, MAX_NDB_NODES, nodeRec);
5226   sendNodePtr.p->sendPrepFailReqStatus = Q_ACTIVE;
5227 
5228   sendPrepFailReqRef(signal,
5229 		     sendNodePtr.p->blockRef,
5230 		     GSN_PREP_FAILREQ,
5231 		     reference(),
5232 		     cfailureNr,
5233 		     cfailedNodes);
5234 }//Qmgr::sendPrepFailReq()
5235 
5236 /**
5237  * Arbitration module.  Rest of QMGR calls us only via
5238  * the "handle" routines.
5239  */
5240 
5241 /**
5242  * Should < 1/2 nodes die unconditionally.  Affects only >= 3-way
5243  * replication.
5244  */
5245 static const bool g_ndb_arbit_one_half_rule = false;
5246 
5247 /**
5248  * Config signals are logically part of CM_INIT.
5249  */
5250 void
execARBIT_CFG(Signal * signal)5251 Qmgr::execARBIT_CFG(Signal* signal)
5252 {
5253   jamEntry();
5254   ArbitSignalData* sd = (ArbitSignalData*)&signal->theData[0];
5255   unsigned rank = sd->code;
5256   ndbrequire(1 <= rank && rank <= 2);
5257   arbitRec.apiMask[0].bitOR(sd->mask);
5258   arbitRec.apiMask[rank].assign(sd->mask);
5259 }
5260 
5261 /**
5262  * ContinueB delay (0=JBA 1=JBB)
5263  */
getArbitDelay()5264 Uint32 Qmgr::getArbitDelay()
5265 {
5266   switch (arbitRec.state) {
5267   case ARBIT_NULL:
5268     jam();
5269     break;
5270   case ARBIT_INIT:
5271     jam();
5272   case ARBIT_FIND:
5273     jam();
5274   case ARBIT_PREP1:
5275     jam();
5276   case ARBIT_PREP2:
5277     jam();
5278   case ARBIT_START:
5279     jam();
5280     return 100;
5281   case ARBIT_RUN:
5282     jam();
5283     return 1000;
5284   case ARBIT_CHOOSE:
5285     jam();
5286     return 10;
5287   case ARBIT_CRASH:             // if we could wait
5288     jam();
5289     return 100;
5290   }
5291   ndbrequire(false);
5292   return (Uint32)-1;
5293 }
5294 
5295 /**
5296  * Time to wait for reply.  There is only 1 config parameter
5297  * (timeout for CHOOSE).  XXX The rest are guesses.
5298  */
getArbitTimeout()5299 Uint32 Qmgr::getArbitTimeout()
5300 {
5301   switch (arbitRec.state) {
5302   case ARBIT_NULL:
5303     jam();
5304     break;
5305   case ARBIT_INIT:              // not used
5306     jam();
5307   case ARBIT_FIND:
5308     jam();
5309     /* This timeout will be used only to print out a warning
5310      * when a suitable arbitrator is not found.
5311      */
5312     return 60000;
5313   case ARBIT_PREP1:
5314     jam();
5315   case ARBIT_PREP2:
5316     jam();
5317     return 1000 + cnoOfNodes * Uint32(hb_send_timer.getDelay());
5318   case ARBIT_START:
5319     jam();
5320     return 1000 + arbitRec.timeout;
5321   case ARBIT_RUN:               // not used (yet)
5322     jam();
5323     return 1000;
5324   case ARBIT_CHOOSE:
5325     jam();
5326     return arbitRec.timeout;
5327   case ARBIT_CRASH:             // if we could wait
5328     jam();
5329     return 100;
5330   }
5331   ndbrequire(false);
5332   return (Uint32)-1;
5333 }
5334 
5335 /**
5336  * Start arbitration thread when we are president and database
5337  * is opened for the first time.
5338  *
5339  * XXX  Do arbitration check just like on node failure.  Since
5340  * there is no arbitrator yet, must win on counts alone.
5341  */
5342 void
handleArbitStart(Signal * signal)5343 Qmgr::handleArbitStart(Signal* signal)
5344 {
5345   jam();
5346   ndbrequire(cpresident == getOwnNodeId());
5347   ndbrequire(arbitRec.state == ARBIT_NULL);
5348   arbitRec.state = ARBIT_INIT;
5349   arbitRec.newstate = true;
5350   startArbitThread(signal);
5351 }
5352 
5353 /**
5354  * Handle API node failure.  Called also by non-president nodes.
5355  * If we are president go back to INIT state, otherwise to NULL.
5356  * Start new thread to save time.
5357  */
5358 void
handleArbitApiFail(Signal * signal,Uint16 nodeId)5359 Qmgr::handleArbitApiFail(Signal* signal, Uint16 nodeId)
5360 {
5361   if (arbitRec.node != nodeId) {
5362     jam();
5363     return;
5364   }
5365   reportArbitEvent(signal, NDB_LE_ArbitState);
5366   arbitRec.node = 0;
5367   switch (arbitRec.state) {
5368   case ARBIT_NULL:              // should not happen
5369     jam();
5370   case ARBIT_INIT:
5371     jam();
5372   case ARBIT_FIND:
5373     jam();
5374     break;
5375   case ARBIT_PREP1:		// start from beginning
5376     jam();
5377   case ARBIT_PREP2:
5378     jam();
5379   case ARBIT_START:
5380     jam();
5381   case ARBIT_RUN:
5382     if (cpresident == getOwnNodeId()) {
5383       jam();
5384       arbitRec.state = ARBIT_INIT;
5385       arbitRec.newstate = true;
5386       startArbitThread(signal);
5387     } else {
5388       jam();
5389       arbitRec.state = ARBIT_NULL;
5390     }
5391     break;
5392   case ARBIT_CHOOSE:		// XXX too late
5393     jam();
5394   case ARBIT_CRASH:
5395     jam();
5396     break;
5397   default:
5398     ndbrequire(false);
5399     break;
5400   }
5401 }
5402 
5403 /**
5404  * Handle NDB node add.  Ignore if arbitration thread not yet
5405  * started.  If PREP is not ready, go back to INIT.  Otherwise
5406  * the new node gets arbitrator and ticket once we reach RUN state.
5407  * Start new thread to save time.
5408  */
5409 void
handleArbitNdbAdd(Signal * signal,Uint16 nodeId)5410 Qmgr::handleArbitNdbAdd(Signal* signal, Uint16 nodeId)
5411 {
5412   jam();
5413   ndbrequire(cpresident == getOwnNodeId());
5414   switch (arbitRec.state) {
5415   case ARBIT_NULL:              // before db opened
5416     jam();
5417     break;
5418   case ARBIT_INIT:		// start from beginning
5419     jam();
5420   case ARBIT_FIND:
5421     jam();
5422   case ARBIT_PREP1:
5423     jam();
5424   case ARBIT_PREP2:
5425     jam();
5426     arbitRec.state = ARBIT_INIT;
5427     arbitRec.newstate = true;
5428     startArbitThread(signal);
5429     break;
5430   case ARBIT_START:		// process in RUN state
5431     jam();
5432   case ARBIT_RUN:
5433     jam();
5434     arbitRec.newMask.set(nodeId);
5435     break;
5436   case ARBIT_CHOOSE:            // XXX too late
5437     jam();
5438   case ARBIT_CRASH:
5439     jam();
5440     break;
5441   default:
5442     ndbrequire(false);
5443     break;
5444   }
5445 }
5446 
5447 /**
5448  * Check if current nodeset can survive.  The decision is
5449  * based on node count, node groups, and on external arbitrator
5450  * (if we have one).  Always starts a new thread because
5451  * 1) CHOOSE cannot wait 2) if we are new president we need
5452  * a thread 3) if we are old president it does no harm.
5453  */
5454 void
handleArbitCheck(Signal * signal)5455 Qmgr::handleArbitCheck(Signal* signal)
5456 {
5457   jam();
5458   ndbrequire(cpresident == getOwnNodeId());
5459   NdbNodeBitmask ndbMask;
5460   computeArbitNdbMask(ndbMask);
5461   if (g_ndb_arbit_one_half_rule && !ERROR_INSERTED(943) &&
5462       2 * ndbMask.count() < cnoOfNodes) {
5463     jam();
5464     arbitRec.code = ArbitCode::LoseNodes;
5465   } else {
5466     jam();
5467     CheckNodeGroups* sd = (CheckNodeGroups*)&signal->theData[0];
5468     sd->blockRef = reference();
5469     sd->requestType = CheckNodeGroups::Direct | CheckNodeGroups::ArbitCheck;
5470     sd->mask = ndbMask;
5471     EXECUTE_DIRECT(DBDIH, GSN_CHECKNODEGROUPSREQ, signal,
5472 		   CheckNodeGroups::SignalLength);
5473     jamEntry();
5474     if (ERROR_INSERTED(943))
5475     {
5476       ndbout << "Requiring arbitration, even if there is no"
5477              << " possible split."<< endl;
5478       sd->output = CheckNodeGroups::Partitioning;
5479       arbitRec.state = ARBIT_RUN;
5480     }
5481     switch (sd->output) {
5482     case CheckNodeGroups::Win:
5483       jam();
5484       arbitRec.code = ArbitCode::WinGroups;
5485       break;
5486     case CheckNodeGroups::Lose:
5487       jam();
5488       arbitRec.code = ArbitCode::LoseGroups;
5489       break;
5490     case CheckNodeGroups::Partitioning:
5491       jam();
5492       arbitRec.code = ArbitCode::Partitioning;
5493       if (g_ndb_arbit_one_half_rule &&
5494           2 * ndbMask.count() > cnoOfNodes) {
5495         jam();
5496         arbitRec.code = ArbitCode::WinNodes;
5497       }
5498       break;
5499     default:
5500       ndbrequire(false);
5501       break;
5502     }
5503   }
5504   switch (arbitRec.code) {
5505   case ArbitCode::LoseNodes:
5506     jam();
5507   case ArbitCode::LoseGroups:
5508     jam();
5509     goto crashme;
5510   case ArbitCode::WinNodes:
5511     jam();
5512   case ArbitCode::WinGroups:
5513     jam();
5514     if (arbitRec.state == ARBIT_RUN) {
5515       jam();
5516       break;
5517     }
5518     arbitRec.state = ARBIT_INIT;
5519     arbitRec.newstate = true;
5520     break;
5521   case ArbitCode::Partitioning:
5522     if (arbitRec.state == ARBIT_RUN) {
5523       jam();
5524       arbitRec.state = ARBIT_CHOOSE;
5525       arbitRec.newstate = true;
5526       break;
5527     }
5528     if (arbitRec.apiMask[0].count() != 0) {
5529       jam();
5530       arbitRec.code = ArbitCode::LoseNorun;
5531     } else {
5532       jam();
5533       arbitRec.code = ArbitCode::LoseNocfg;
5534     }
5535     goto crashme;
5536   default:
5537   crashme:
5538     jam();
5539     arbitRec.state = ARBIT_CRASH;
5540     arbitRec.newstate = true;
5541     break;
5542   }
5543   reportArbitEvent(signal, NDB_LE_ArbitResult);
5544   switch (arbitRec.state) {
5545   default:
5546     jam();
5547     arbitRec.newMask.bitAND(ndbMask);   // delete failed nodes
5548     arbitRec.recvMask.bitAND(ndbMask);
5549     sendCommitFailReq(signal);          // start commit of failed nodes
5550     break;
5551   case ARBIT_CHOOSE:
5552     jam();
5553   case ARBIT_CRASH:
5554     jam();
5555     break;
5556   }
5557   startArbitThread(signal);
5558 }
5559 
5560 /**
5561  * Start a new continueB thread.  The thread id is incremented
5562  * so that any old thread will exit.
5563  */
5564 void
startArbitThread(Signal * signal)5565 Qmgr::startArbitThread(Signal* signal)
5566 {
5567   jam();
5568   ndbrequire(cpresident == getOwnNodeId());
5569   arbitRec.code = ArbitCode::ThreadStart;
5570   reportArbitEvent(signal, NDB_LE_ArbitState);
5571   signal->theData[1] = ++arbitRec.thread;
5572   runArbitThread(signal);
5573 }
5574 
5575 /**
5576  * Handle arbitration thread.  The initial thread normally ends
5577  * up in RUN state.  New thread can be started to save time.
5578  */
5579 void
runArbitThread(Signal * signal)5580 Qmgr::runArbitThread(Signal* signal)
5581 {
5582 #ifdef DEBUG_ARBIT
5583   char buf[256];
5584   NdbNodeBitmask ndbMask;
5585   computeArbitNdbMask(ndbMask);
5586   ndbout << "arbit thread:";
5587   ndbout << " state=" << arbitRec.state;
5588   ndbout << " newstate=" << arbitRec.newstate;
5589   ndbout << " thread=" << arbitRec.thread;
5590   ndbout << " node=" << arbitRec.node;
5591   arbitRec.ticket.getText(buf, sizeof(buf));
5592   ndbout << " ticket=" << buf;
5593   ndbMask.getText(buf);
5594   ndbout << " ndbmask=" << buf;
5595   ndbout << " sendcount=" << arbitRec.sendCount;
5596   ndbout << " recvcount=" << arbitRec.recvCount;
5597   arbitRec.recvMask.getText(buf);
5598   ndbout << " recvmask=" << buf;
5599   ndbout << " code=" << arbitRec.code;
5600   ndbout << endl;
5601 #endif
5602   if (signal->theData[1] != arbitRec.thread) {
5603     jam();
5604     return;	        	// old thread dies
5605   }
5606   switch (arbitRec.state) {
5607   case ARBIT_INIT:		// main thread
5608     jam();
5609     stateArbitInit(signal);
5610     break;
5611   case ARBIT_FIND:
5612     jam();
5613     stateArbitFind(signal);
5614     break;
5615   case ARBIT_PREP1:
5616     jam();
5617   case ARBIT_PREP2:
5618     jam();
5619     stateArbitPrep(signal);
5620     break;
5621   case ARBIT_START:
5622     jam();
5623     stateArbitStart(signal);
5624     break;
5625   case ARBIT_RUN:
5626     jam();
5627     stateArbitRun(signal);
5628     break;
5629   case ARBIT_CHOOSE:		// partitition thread
5630     jam();
5631     stateArbitChoose(signal);
5632     break;
5633   case ARBIT_CRASH:
5634     jam();
5635     stateArbitCrash(signal);
5636     break;
5637   default:
5638     ndbrequire(false);
5639     break;
5640   }
5641   signal->theData[0] = ZARBIT_HANDLING;
5642   signal->theData[1] = arbitRec.thread;
5643   signal->theData[2] = arbitRec.state;		// just for signal log
5644   Uint32 delay = getArbitDelay();
5645   if (delay == 0) {
5646     jam();
5647     sendSignal(QMGR_REF, GSN_CONTINUEB, signal, 3, JBA);
5648   } else if (delay == 1) {
5649     jam();
5650     sendSignal(QMGR_REF, GSN_CONTINUEB, signal, 3, JBB);
5651   } else {
5652     jam();
5653     sendSignalWithDelay(QMGR_REF, GSN_CONTINUEB, signal, delay, 3);
5654   }//if
5655 }
5656 
5657 /**
5658  * Handle INIT state.  Generate next ticket.  Switch to FIND
5659  * state without delay.
5660  */
5661 void
stateArbitInit(Signal * signal)5662 Qmgr::stateArbitInit(Signal* signal)
5663 {
5664   if (arbitRec.newstate) {
5665     jam();
5666     CRASH_INSERTION((Uint32)910 + arbitRec.state);
5667 
5668     arbitRec.node = 0;
5669     arbitRec.ticket.update();
5670     arbitRec.newMask.clear();
5671     arbitRec.code = 0;
5672     arbitRec.newstate = false;
5673   }
5674   arbitRec.setTimestamp();  // Init arbitration timer
5675   arbitRec.state = ARBIT_FIND;
5676   arbitRec.newstate = true;
5677   stateArbitFind(signal);
5678 }
5679 
5680 /**
5681  * Handle FIND state.  Find first arbitrator which is alive
5682  * and invoke PREP state without delay.  If none are found,
5683  * loop in FIND state.  This is forever if no arbitrators
5684  * are configured (not the normal case).
5685  *
5686  * XXX  Add adaptive behaviour to avoid getting stuck on API
5687  * nodes which are alive but do not respond or die too soon.
5688  */
5689 void
stateArbitFind(Signal * signal)5690 Qmgr::stateArbitFind(Signal* signal)
5691 {
5692   if (arbitRec.newstate) {
5693     jam();
5694     CRASH_INSERTION((Uint32)910 + arbitRec.state);
5695 
5696     arbitRec.code = 0;
5697     arbitRec.newstate = false;
5698   }
5699 
5700   switch (arbitRec.method){
5701   case ArbitRec::METHOD_EXTERNAL:
5702   {
5703     // Don't select any API node as arbitrator
5704     arbitRec.node = 0;
5705     arbitRec.state = ARBIT_PREP1;
5706     arbitRec.newstate = true;
5707     stateArbitPrep(signal);
5708     return;
5709     break;
5710   }
5711 
5712   case ArbitRec::METHOD_DEFAULT:
5713   {
5714     NodeRecPtr aPtr;
5715     // Select the best available API node as arbitrator
5716     for (unsigned rank = 1; rank <= 2; rank++) {
5717       jam();
5718       aPtr.i = 0;
5719       const unsigned stop = NodeBitmask::NotFound;
5720       while ((aPtr.i = arbitRec.apiMask[rank].find(aPtr.i + 1)) != stop) {
5721         jam();
5722         ptrAss(aPtr, nodeRec);
5723         if (aPtr.p->phase != ZAPI_ACTIVE)
5724           continue;
5725         ndbrequire(c_connectedNodes.get(aPtr.i));
5726         arbitRec.node = aPtr.i;
5727         arbitRec.state = ARBIT_PREP1;
5728         arbitRec.newstate = true;
5729         stateArbitPrep(signal);
5730         return;
5731       }
5732     }
5733 
5734     /* If the president cannot find a suitable arbitrator then
5735      * it will report this once a minute. Success in finding
5736      * an arbitrator will be notified when the arbitrator
5737      * accepts and acks the offer.
5738     */
5739 
5740     if (arbitRec.getTimediff() > getArbitTimeout()) {
5741       jam();
5742       g_eventLogger->warning("Could not find an arbitrator, cluster is not partition-safe");
5743       warningEvent("Could not find an arbitrator, cluster is not partition-safe");
5744       arbitRec.setTimestamp();
5745     }
5746     return;
5747     break;
5748   }
5749 
5750   default:
5751     ndbrequire(false);
5752   }
5753 }
5754 
5755 /**
5756  * Handle PREP states.  First round nulls any existing tickets.
5757  * Second round sends new ticket.  When all confirms have been
5758  * received invoke START state immediately.
5759  */
5760 void
stateArbitPrep(Signal * signal)5761 Qmgr::stateArbitPrep(Signal* signal)
5762 {
5763   if (arbitRec.newstate) {
5764     jam();
5765     CRASH_INSERTION((Uint32)910 + arbitRec.state);
5766 
5767     arbitRec.sendCount = 0;                     // send all at once
5768     computeArbitNdbMask(arbitRec.recvMask);     // to send and recv
5769     arbitRec.recvMask.clear(getOwnNodeId());
5770     arbitRec.code = 0;
5771     arbitRec.newstate = false;
5772   }
5773   if (! arbitRec.sendCount) {
5774     jam();
5775     NodeRecPtr aPtr;
5776     aPtr.i = 0;
5777     const unsigned stop = NodeBitmask::NotFound;
5778     while ((aPtr.i = arbitRec.recvMask.find(aPtr.i + 1)) != stop) {
5779       jam();
5780       ptrAss(aPtr, nodeRec);
5781       ArbitSignalData* sd = (ArbitSignalData*)&signal->theData[0];
5782       sd->sender = getOwnNodeId();
5783       if (arbitRec.state == ARBIT_PREP1) {
5784         jam();
5785         sd->code = ArbitCode::PrepPart1;
5786       } else {
5787         jam();
5788         sd->code = ArbitCode::PrepPart2;
5789       }
5790       sd->node = arbitRec.node;
5791       sd->ticket = arbitRec.ticket;
5792       sd->mask.clear();
5793       sendSignal(aPtr.p->blockRef, GSN_ARBIT_PREPREQ, signal,
5794         ArbitSignalData::SignalLength, JBB);
5795     }
5796     arbitRec.setTimestamp();			// send time
5797     arbitRec.sendCount = 1;
5798     return;
5799   }
5800   if (arbitRec.code != 0) {			// error
5801     jam();
5802     arbitRec.state = ARBIT_INIT;
5803     arbitRec.newstate = true;
5804     return;
5805   }
5806   if (arbitRec.recvMask.count() == 0) {		// recv all
5807     if (arbitRec.state == ARBIT_PREP1) {
5808       jam();
5809       arbitRec.state = ARBIT_PREP2;
5810       arbitRec.newstate = true;
5811     } else {
5812       jam();
5813       arbitRec.state = ARBIT_START;
5814       arbitRec.newstate = true;
5815       stateArbitStart(signal);
5816     }
5817     return;
5818   }
5819   if (arbitRec.getTimediff() > getArbitTimeout()) {
5820     jam();
5821     arbitRec.state = ARBIT_INIT;
5822     arbitRec.newstate = true;
5823     return;
5824   }
5825 }
5826 
5827 void
execARBIT_PREPREQ(Signal * signal)5828 Qmgr::execARBIT_PREPREQ(Signal* signal)
5829 {
5830   jamEntry();
5831   ArbitSignalData* sd = (ArbitSignalData*)&signal->theData[0];
5832   if (getOwnNodeId() == cpresident) {
5833     jam();
5834     return;		// wrong state
5835   }
5836   if (sd->sender != cpresident) {
5837     jam();
5838     return;		// wrong state
5839   }
5840   NodeRecPtr aPtr;
5841   aPtr.i = sd->sender;
5842   ptrAss(aPtr, nodeRec);
5843   switch (sd->code) {
5844   case ArbitCode::PrepPart1:    // zero them just to be sure
5845     jam();
5846     arbitRec.node = 0;
5847     arbitRec.ticket.clear();
5848     break;
5849   case ArbitCode::PrepPart2:    // non-president enters RUN state
5850     jam();
5851   case ArbitCode::PrepAtrun:
5852     jam();
5853     arbitRec.node = sd->node;
5854     arbitRec.ticket = sd->ticket;
5855     arbitRec.code = sd->code;
5856     reportArbitEvent(signal, NDB_LE_ArbitState);
5857     arbitRec.state = ARBIT_RUN;
5858     arbitRec.newstate = true;
5859 
5860     // Non-president node logs.
5861     if (!c_connectedNodes.get(arbitRec.node))
5862     {
5863       char buf[20]; // needs 16 + 1 for '\0'
5864       arbitRec.ticket.getText(buf, sizeof(buf));
5865       g_eventLogger->warning("President %u proposed disconnected "
5866                              "node %u as arbitrator [ticket=%s]. "
5867                              "Cluster may be partially connected. "
5868                              "Connected nodes: %s",
5869                              cpresident, arbitRec.node, buf,
5870                              BaseString::getPrettyTextShort(c_connectedNodes).c_str());
5871 
5872       warningEvent("President %u proposed disconnected node %u "
5873                    "as arbitrator [ticket %s]",
5874                    cpresident, arbitRec.node, buf);
5875       warningEvent("Cluster may be partially connected. Connected nodes: ");
5876 
5877       // Split the connected-node list, since warningEvents are
5878       // limited to ~24 words / 96 chars
5879       BaseString tmp(BaseString::getPrettyTextShort(c_connectedNodes).c_str());
5880       Vector<BaseString> split;
5881       tmp.split(split, "", 92);
5882       for(unsigned i = 0; i < split.size(); ++i)
5883       {
5884         warningEvent("%s", split[i].c_str());
5885       }
5886     }
5887 
5888     if (sd->code == ArbitCode::PrepAtrun) {
5889       jam();
5890       return;
5891     }
5892     break;
5893   default:
5894     jam();
5895     ndbrequire(false);
5896   }
5897   sd->sender = getOwnNodeId();
5898   sd->code = 0;
5899   sendSignal(aPtr.p->blockRef, GSN_ARBIT_PREPCONF, signal,
5900     ArbitSignalData::SignalLength, JBB);
5901 }
5902 
5903 void
execARBIT_PREPCONF(Signal * signal)5904 Qmgr::execARBIT_PREPCONF(Signal* signal)
5905 {
5906   jamEntry();
5907   ArbitSignalData* sd = (ArbitSignalData*)&signal->theData[0];
5908   if (! arbitRec.match(sd)) {
5909     jam();
5910     return;		// stray signal
5911   }
5912   if (arbitRec.state != ARBIT_PREP1 && arbitRec.state != ARBIT_PREP2) {
5913     jam();
5914     return;		// wrong state
5915   }
5916   if (! arbitRec.recvMask.get(sd->sender)) {
5917     jam();
5918     return;		// wrong state
5919   }
5920   arbitRec.recvMask.clear(sd->sender);
5921   if (arbitRec.code == 0 && sd->code != 0) {
5922     jam();
5923     arbitRec.code = sd->code;
5924   }//if
5925 }
5926 
5927 void
execARBIT_PREPREF(Signal * signal)5928 Qmgr::execARBIT_PREPREF(Signal* signal)
5929 {
5930   jamEntry();
5931   ArbitSignalData* sd = (ArbitSignalData*)&signal->theData[0];
5932   if (sd->code == 0) {
5933     jam();
5934     sd->code = ArbitCode::ErrUnknown;
5935   }
5936   execARBIT_PREPCONF(signal);
5937 }
5938 
5939 /**
5940  * Handle START state.  On first call send start request to
5941  * the chosen arbitrator.  Then wait for a CONF.
5942  */
5943 void
stateArbitStart(Signal * signal)5944 Qmgr::stateArbitStart(Signal* signal)
5945 {
5946   if (arbitRec.newstate) {
5947     jam();
5948     CRASH_INSERTION((Uint32)910 + arbitRec.state);
5949 
5950     arbitRec.sendCount = 0;
5951     arbitRec.recvCount = 0;
5952     arbitRec.code = 0;
5953     arbitRec.newstate = false;
5954   }
5955 
5956   switch (arbitRec.method){
5957   case ArbitRec::METHOD_EXTERNAL:
5958     jam();
5959     ndbrequire(arbitRec.node == 0); // No arbitrator selected
5960 
5961     // Don't start arbitrator in API node => ARBIT_RUN
5962     arbitRec.state = ARBIT_RUN;
5963     arbitRec.newstate = true;
5964     return;
5965     break;
5966 
5967   case ArbitRec::METHOD_DEFAULT:
5968     if (! arbitRec.sendCount) {
5969       jam();
5970       BlockReference blockRef = calcApiClusterMgrBlockRef(arbitRec.node);
5971       ArbitSignalData* sd = (ArbitSignalData*)&signal->theData[0];
5972       sd->sender = getOwnNodeId();
5973       sd->code = 0;
5974       sd->node = arbitRec.node;
5975       sd->ticket = arbitRec.ticket;
5976       sd->mask.clear();
5977       sendSignal(blockRef, GSN_ARBIT_STARTREQ, signal,
5978                  ArbitSignalData::SignalLength, JBB);
5979       arbitRec.sendCount = 1;
5980       arbitRec.setTimestamp();		// send time
5981       return;
5982     }
5983     if (arbitRec.recvCount) {
5984       jam();
5985       reportArbitEvent(signal, NDB_LE_ArbitState);
5986       if (arbitRec.code == ArbitCode::ApiStart) {
5987         jam();
5988         arbitRec.state = ARBIT_RUN;
5989         arbitRec.newstate = true;
5990         return;
5991       }
5992       arbitRec.state = ARBIT_INIT;
5993       arbitRec.newstate = true;
5994       return;
5995     }
5996     if (arbitRec.getTimediff() > getArbitTimeout()) {
5997       jam();
5998       arbitRec.code = ArbitCode::ErrTimeout;
5999       reportArbitEvent(signal, NDB_LE_ArbitState);
6000       arbitRec.state = ARBIT_INIT;
6001       arbitRec.newstate = true;
6002       return;
6003     }
6004     break;
6005 
6006   default:
6007     ndbrequire(false);
6008     break;
6009   }
6010 }
6011 
6012 void
execARBIT_STARTCONF(Signal * signal)6013 Qmgr::execARBIT_STARTCONF(Signal* signal)
6014 {
6015   jamEntry();
6016   ArbitSignalData* sd = (ArbitSignalData*)&signal->theData[0];
6017   if (! arbitRec.match(sd)) {
6018     jam();
6019     return;		// stray signal
6020   }
6021   if (arbitRec.state != ARBIT_START) {
6022     jam();
6023     return;		// wrong state
6024   }
6025   if (arbitRec.recvCount) {
6026     jam();
6027     return;		// wrong state
6028   }
6029   arbitRec.code = sd->code;
6030   arbitRec.recvCount = 1;
6031 }
6032 
6033 void
execARBIT_STARTREF(Signal * signal)6034 Qmgr::execARBIT_STARTREF(Signal* signal)
6035 {
6036   jamEntry();
6037   ArbitSignalData* sd = (ArbitSignalData*)&signal->theData[0];
6038   if (sd->code == 0) {
6039     jam();
6040     sd->code = ArbitCode::ErrUnknown;
6041   }
6042   execARBIT_STARTCONF(signal);
6043 }
6044 
6045 /**
6046  * Handle RUN state.  Send ticket to any new nodes which have
6047  * appeared after PREP state.  We don't care about a CONF.
6048  */
6049 void
stateArbitRun(Signal * signal)6050 Qmgr::stateArbitRun(Signal* signal)
6051 {
6052   if (arbitRec.newstate) {
6053     jam();
6054     CRASH_INSERTION((Uint32)910 + arbitRec.state);
6055 
6056     arbitRec.code = 0;
6057     arbitRec.newstate = false;
6058   }
6059   NodeRecPtr aPtr;
6060   aPtr.i = 0;
6061   const unsigned stop = NodeBitmask::NotFound;
6062   while ((aPtr.i = arbitRec.newMask.find(aPtr.i + 1)) != stop) {
6063     jam();
6064     arbitRec.newMask.clear(aPtr.i);
6065     ptrAss(aPtr, nodeRec);
6066     ArbitSignalData* sd = (ArbitSignalData*)&signal->theData[0];
6067     sd->sender = getOwnNodeId();
6068     sd->code = ArbitCode::PrepAtrun;
6069     sd->node = arbitRec.node;
6070     sd->ticket = arbitRec.ticket;
6071     sd->mask.clear();
6072     sendSignal(aPtr.p->blockRef, GSN_ARBIT_PREPREQ, signal,
6073       ArbitSignalData::SignalLength, JBB);
6074   }
6075 }
6076 
6077 /**
6078  * Handle CHOOSE state.  Entered only from RUN state when
6079  * there is a possible network partitioning.  Send CHOOSE to
6080  * the arbitrator.  On win switch to INIT state because a new
6081  * ticket must be created.
6082  */
6083 void
stateArbitChoose(Signal * signal)6084 Qmgr::stateArbitChoose(Signal* signal)
6085 {
6086   if (arbitRec.newstate) {
6087     jam();
6088     CRASH_INSERTION((Uint32)910 + arbitRec.state);
6089 
6090     arbitRec.sendCount = 0;
6091     arbitRec.recvCount = 0;
6092     arbitRec.code = 0;
6093     arbitRec.newstate = false;
6094   }
6095 
6096   switch(arbitRec.method){
6097   case ArbitRec::METHOD_EXTERNAL:
6098   {
6099     if (! arbitRec.sendCount) {
6100       jam();
6101       ndbrequire(arbitRec.node == 0); // No arbitrator selected
6102       // Don't send CHOOSE to anyone, just wait for timeout to expire
6103       arbitRec.sendCount = 1;
6104       arbitRec.setTimestamp();
6105       return;
6106     }
6107 
6108     if (arbitRec.getTimediff() > getArbitTimeout()) {
6109       jam();
6110       // Arbitration timeout has expired
6111       ndbrequire(arbitRec.node == 0); // No arbitrator selected
6112 
6113       NodeBitmask nodes;
6114       computeArbitNdbMask(nodes);
6115       arbitRec.code = ArbitCode::WinWaitExternal;
6116       reportArbitEvent(signal, NDB_LE_ArbitResult, nodes);
6117 
6118       sendCommitFailReq(signal);        // start commit of failed nodes
6119       arbitRec.state = ARBIT_INIT;
6120       arbitRec.newstate = true;
6121       return;
6122     }
6123     break;
6124   }
6125 
6126   case ArbitRec::METHOD_DEFAULT:
6127   {
6128     if (! arbitRec.sendCount) {
6129       jam();
6130       const BlockReference blockRef = calcApiClusterMgrBlockRef(arbitRec.node);
6131       ArbitSignalData* sd = (ArbitSignalData*)&signal->theData[0];
6132       sd->sender = getOwnNodeId();
6133       sd->code = 0;
6134       sd->node = arbitRec.node;
6135       sd->ticket = arbitRec.ticket;
6136       computeArbitNdbMask(sd->mask);
6137       if (ERROR_INSERTED(943))
6138       {
6139         ndbout << "Not sending GSN_ARBIT_CHOOSEREQ, thereby causing"
6140                << " arbitration to time out."<< endl;
6141       }
6142       else
6143       {
6144         sendSignal(blockRef, GSN_ARBIT_CHOOSEREQ, signal,
6145                    ArbitSignalData::SignalLength, JBA);
6146       }
6147       arbitRec.sendCount = 1;
6148       arbitRec.setTimestamp();		// send time
6149       return;
6150     }
6151 
6152     if (arbitRec.recvCount) {
6153       jam();
6154       reportArbitEvent(signal, NDB_LE_ArbitResult);
6155       if (arbitRec.code == ArbitCode::WinChoose) {
6156         jam();
6157         sendCommitFailReq(signal);        // start commit of failed nodes
6158         arbitRec.state = ARBIT_INIT;
6159         arbitRec.newstate = true;
6160         return;
6161       }
6162       arbitRec.state = ARBIT_CRASH;
6163       arbitRec.newstate = true;
6164       stateArbitCrash(signal);		// do it at once
6165       return;
6166     }
6167 
6168     if (arbitRec.getTimediff() > getArbitTimeout()) {
6169       jam();
6170       // Arbitration timeout has expired
6171       arbitRec.code = ArbitCode::ErrTimeout;
6172       reportArbitEvent(signal, NDB_LE_ArbitState);
6173       arbitRec.state = ARBIT_CRASH;
6174       arbitRec.newstate = true;
6175       stateArbitCrash(signal);		// do it at once
6176       return;
6177     }
6178     break;
6179   }
6180 
6181   default:
6182     ndbrequire(false);
6183     break;
6184   }
6185 }
6186 
6187 void
execARBIT_CHOOSECONF(Signal * signal)6188 Qmgr::execARBIT_CHOOSECONF(Signal* signal)
6189 {
6190   jamEntry();
6191   ArbitSignalData* sd = (ArbitSignalData*)&signal->theData[0];
6192   if (!arbitRec.match(sd)) {
6193     jam();
6194     return;		// stray signal
6195   }
6196   if (arbitRec.state != ARBIT_CHOOSE) {
6197     jam();
6198     return;		// wrong state
6199   }
6200   if (arbitRec.recvCount) {
6201     jam();
6202     return;		// wrong state
6203   }
6204   arbitRec.recvCount = 1;
6205   arbitRec.code = sd->code;
6206 }
6207 
6208 void
execARBIT_CHOOSEREF(Signal * signal)6209 Qmgr::execARBIT_CHOOSEREF(Signal* signal)
6210 {
6211   jamEntry();
6212   ArbitSignalData* sd = (ArbitSignalData*)&signal->theData[0];
6213   if (sd->code == 0) {
6214     jam();
6215     sd->code = ArbitCode::ErrUnknown;
6216   }
6217   execARBIT_CHOOSECONF(signal);
6218 }
6219 
6220 /**
6221  * Handle CRASH state.  We must crash immediately.
6222  * XXX tell other nodes in our party to crash too.
6223  */
6224 void
stateArbitCrash(Signal * signal)6225 Qmgr::stateArbitCrash(Signal* signal)
6226 {
6227   jam();
6228   if (arbitRec.newstate) {
6229     jam();
6230     CRASH_INSERTION((Uint32)910 + arbitRec.state);
6231     arbitRec.setTimestamp();
6232     arbitRec.code = 0;
6233     arbitRec.newstate = false;
6234   }
6235 #ifdef ndb_arbit_crash_wait_for_event_report_to_get_out
6236   if (! (arbitRec.getTimediff() > getArbitTimeout()))
6237     return;
6238 #endif
6239   CRASH_INSERTION(932);
6240   CRASH_INSERTION(938);
6241   CRASH_INSERTION(943);
6242   progError(__LINE__, NDBD_EXIT_ARBIT_SHUTDOWN,
6243             "Arbitrator decided to shutdown this node");
6244 }
6245 
6246 /**
6247  * Arbitrator may inform us that it will exit.  This lets us
6248  * start looking sooner for a new one.  Handle it like API node
6249  * failure.
6250  */
6251 void
execARBIT_STOPREP(Signal * signal)6252 Qmgr::execARBIT_STOPREP(Signal* signal)
6253 {
6254   jamEntry();
6255   ArbitSignalData* sd = (ArbitSignalData*)&signal->theData[0];
6256   if (! arbitRec.match(sd)) {
6257     jam();
6258     return;		// stray signal
6259   }
6260   arbitRec.code = ArbitCode::ApiExit;
6261   handleArbitApiFail(signal, arbitRec.node);
6262 }
6263 
6264 void
computeArbitNdbMask(NodeBitmaskPOD & aMask)6265 Qmgr::computeArbitNdbMask(NodeBitmaskPOD& aMask)
6266 {
6267   NodeRecPtr aPtr;
6268   aMask.clear();
6269   for (aPtr.i = 1; aPtr.i < MAX_NDB_NODES; aPtr.i++) {
6270     jam();
6271     ptrAss(aPtr, nodeRec);
6272     if (getNodeInfo(aPtr.i).getType() == NodeInfo::DB && aPtr.p->phase == ZRUNNING){
6273       jam();
6274       aMask.set(aPtr.i);
6275     }
6276   }
6277 }
6278 
6279 void
computeArbitNdbMask(NdbNodeBitmaskPOD & aMask)6280 Qmgr::computeArbitNdbMask(NdbNodeBitmaskPOD& aMask)
6281 {
6282   NodeRecPtr aPtr;
6283   aMask.clear();
6284   for (aPtr.i = 1; aPtr.i < MAX_NDB_NODES; aPtr.i++) {
6285     jam();
6286     ptrAss(aPtr, nodeRec);
6287     if (getNodeInfo(aPtr.i).getType() == NodeInfo::DB && aPtr.p->phase == ZRUNNING){
6288       jam();
6289       aMask.set(aPtr.i);
6290     }
6291   }
6292 }
6293 
6294 /**
6295  * Report arbitration event.  We use arbitration signal format
6296  * where sender (word 0) is event type.
6297  */
6298 void
reportArbitEvent(Signal * signal,Ndb_logevent_type type,const NodeBitmask mask)6299 Qmgr::reportArbitEvent(Signal* signal, Ndb_logevent_type type,
6300                        const NodeBitmask mask)
6301 {
6302   ArbitSignalData* sd = (ArbitSignalData*)&signal->theData[0];
6303   sd->sender = type;
6304   sd->code = arbitRec.code | (arbitRec.state << 16);
6305   sd->node = arbitRec.node;
6306   sd->ticket = arbitRec.ticket;
6307   sd->mask = mask;
6308 
6309   // Log to console/stdout
6310   LogLevel ll;
6311   ll.setLogLevel(LogLevel::llNodeRestart, 15);
6312   g_eventLogger->log(type, &signal->theData[0],
6313                      ArbitSignalData::SignalLength, 0, &ll);
6314 
6315   sendSignal(CMVMI_REF, GSN_EVENT_REP, signal,
6316     ArbitSignalData::SignalLength, JBB);
6317 }
6318 
6319 // end of arbitration module
6320 
6321 void
execDUMP_STATE_ORD(Signal * signal)6322 Qmgr::execDUMP_STATE_ORD(Signal* signal)
6323 {
6324   if (signal->theData[0] == 1)
6325   {
6326     unsigned max_nodes = MAX_NDB_NODES;
6327     if (signal->getLength() == 2)
6328     {
6329       max_nodes = signal->theData[1];
6330       if (max_nodes == 0 || max_nodes >= MAX_NODES)
6331       {
6332         max_nodes = MAX_NODES;
6333       }
6334       else
6335       {
6336         max_nodes++; // Include node id argument in loop
6337       }
6338     }
6339     infoEvent("creadyDistCom = %d, cpresident = %d\n",
6340 	      creadyDistCom, cpresident);
6341     infoEvent("cpresidentAlive = %d, cpresidentCand = %d (gci: %d)\n",
6342               cpresidentAlive,
6343 	      c_start.m_president_candidate,
6344 	      c_start.m_president_candidate_gci);
6345     infoEvent("ctoStatus = %d\n", ctoStatus);
6346     for(Uint32 i = 1; i < max_nodes; i++){
6347       NodeRecPtr nodePtr;
6348       nodePtr.i = i;
6349       ptrCheckGuard(nodePtr, MAX_NODES, nodeRec);
6350       char buf[100];
6351       switch(nodePtr.p->phase){
6352       case ZINIT:
6353         sprintf(buf, "Node %d: ZINIT(%d)", i, nodePtr.p->phase);
6354         break;
6355       case ZSTARTING:
6356         sprintf(buf, "Node %d: ZSTARTING(%d)", i, nodePtr.p->phase);
6357         break;
6358       case ZRUNNING:
6359         sprintf(buf, "Node %d: ZRUNNING(%d)", i, nodePtr.p->phase);
6360         break;
6361       case ZPREPARE_FAIL:
6362         sprintf(buf, "Node %d: ZPREPARE_FAIL(%d)", i, nodePtr.p->phase);
6363         break;
6364       case ZFAIL_CLOSING:
6365         sprintf(buf, "Node %d: ZFAIL_CLOSING(%d)", i, nodePtr.p->phase);
6366         break;
6367       case ZAPI_INACTIVE:
6368         sprintf(buf, "Node %d: ZAPI_INACTIVE(%d)", i, nodePtr.p->phase);
6369         break;
6370       case ZAPI_ACTIVE:
6371         sprintf(buf, "Node %d: ZAPI_ACTIVE(%d)", i, nodePtr.p->phase);
6372         break;
6373       case ZAPI_ACTIVATION_ONGOING:
6374         sprintf(buf, "Node %d: ZAPI_ACTIVATION_ONGOING(%d)",
6375                 i,
6376                 nodePtr.p->phase);
6377         break;
6378       default:
6379         sprintf(buf, "Node %d: <UNKNOWN>(%d)", i, nodePtr.p->phase);
6380         break;
6381       }
6382       infoEvent("%s", buf);
6383     }
6384   }
6385 
6386 #ifdef ERROR_INSERT
6387   if (signal->theData[0] == 935 && signal->getLength() == 2)
6388   {
6389     SET_ERROR_INSERT_VALUE(935);
6390     c_error_insert_extra = signal->theData[1];
6391   }
6392 #endif
6393 
6394   if (signal->theData[0] == 900 && signal->getLength() == 2)
6395   {
6396     ndbout_c("disconnecting %u", signal->theData[1]);
6397     api_failed(signal, signal->theData[1]);
6398   }
6399 
6400   if (signal->theData[0] == 908)
6401   {
6402     int tag = signal->getLength() < 2 ? -1 : signal->theData[1];
6403     char buf[8192];
6404     // for easy grepping in *out.log ...
6405     strcpy(buf, "HB:");
6406     if (tag >= 0)
6407       sprintf(buf+strlen(buf), "%d:", tag);
6408     sprintf(buf+strlen(buf), " pres:%u", cpresident);
6409     sprintf(buf+strlen(buf), " own:%u", getOwnNodeId());
6410     NodeRecPtr myNodePtr;
6411     myNodePtr.i = getOwnNodeId();
6412     ptrCheckGuard(myNodePtr, MAX_NDB_NODES, nodeRec);
6413     sprintf(buf+strlen(buf), " dyn:%u-%u", myNodePtr.p->ndynamicId & 0xFFFF, myNodePtr.p->ndynamicId >> 16);
6414     sprintf(buf+strlen(buf), " mxdyn:%u", c_maxDynamicId);
6415     sprintf(buf+strlen(buf), " hb:%u->%u->%u", cneighbourl, getOwnNodeId(), cneighbourh);
6416     sprintf(buf+strlen(buf), " node:dyn-hi,cfg:");
6417     NodeRecPtr nodePtr;
6418     for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++)
6419     {
6420       ptrAss(nodePtr, nodeRec);
6421       Uint32 type = getNodeInfo(nodePtr.i).m_type;
6422       if (type == NodeInfo::DB)
6423       {
6424         sprintf(buf+strlen(buf), " %u:%u-%u,%u", nodePtr.i, nodePtr.p->ndynamicId & 0xFFFF, nodePtr.p->ndynamicId >> 16, nodePtr.p->hbOrder);
6425       }
6426     }
6427     ndbout << buf << endl;
6428   }
6429 
6430 #ifdef ERROR_INSERT
6431   Uint32 dumpCode = signal->theData[0];
6432   if ((dumpCode == 9992) ||
6433       (dumpCode == 9993))
6434   {
6435     if (signal->getLength() == 2)
6436     {
6437       Uint32 nodeId = signal->theData[1];
6438       Uint32& newNodeId = signal->theData[1];
6439       Uint32 length = 2;
6440       assert(257 > MAX_NODES);
6441       if (nodeId > MAX_NODES)
6442       {
6443         const char* type = "None";
6444         switch (nodeId)
6445         {
6446         case 257:
6447         {
6448           /* Left (lower) neighbour */
6449           newNodeId = cneighbourl;
6450           type = "Left neighbour";
6451           break;
6452         }
6453         case 258:
6454         {
6455           /* Right (higher) neighbour */
6456           newNodeId = cneighbourh;
6457           type = "Right neighbour";
6458           break;
6459         }
6460         case 259:
6461         {
6462           /* President */
6463           newNodeId = cpresident;
6464           type = "President";
6465           break;
6466         }
6467         }
6468         ndbout_c("QMGR : Mapping request on node id %u to node id %u (%s)",
6469                  nodeId, newNodeId, type);
6470         if (newNodeId != nodeId)
6471         {
6472           sendSignal(CMVMI_REF, GSN_DUMP_STATE_ORD, signal, length, JBB);
6473         }
6474       }
6475     }
6476   }
6477 
6478   if (dumpCode == 9994)
6479   {
6480     ndbout_c("setCCDelay(%u)", signal->theData[1]);
6481     setCCDelay(signal->theData[1]);
6482     m_connectivity_check.m_enabled = true;
6483   }
6484 #endif
6485 
6486   if (signal->theData[0] == 939 && signal->getLength() == 2)
6487   {
6488     jam();
6489     Uint32 nodeId = signal->theData[1];
6490     ndbout_c("Force close communication to %u", nodeId);
6491     SET_ERROR_INSERT_VALUE2(939, nodeId);
6492     CloseComReqConf * closeCom = CAST_PTR(CloseComReqConf,
6493                                           signal->getDataPtrSend());
6494 
6495     closeCom->xxxBlockRef = reference();
6496     closeCom->requestType = CloseComReqConf::RT_NO_REPLY;
6497     closeCom->failNo      = 0;
6498     closeCom->noOfNodes   = 1;
6499     NodeBitmask::clear(closeCom->theNodes);
6500     NodeBitmask::set(closeCom->theNodes, nodeId);
6501     sendSignal(TRPMAN_REF, GSN_CLOSE_COMREQ, signal,
6502                CloseComReqConf::SignalLength, JBB);
6503   }
6504 }//Qmgr::execDUMP_STATE_ORD()
6505 
6506 void
execAPI_BROADCAST_REP(Signal * signal)6507 Qmgr::execAPI_BROADCAST_REP(Signal* signal)
6508 {
6509   jamEntry();
6510   ApiBroadcastRep api= *(const ApiBroadcastRep*)signal->getDataPtr();
6511 
6512   SectionHandle handle(this, signal);
6513   Uint32 len = signal->getLength() - ApiBroadcastRep::SignalLength;
6514   memmove(signal->theData, signal->theData+ApiBroadcastRep::SignalLength,
6515 	  4*len);
6516 
6517   NodeBitmask mask;
6518   NodeRecPtr nodePtr;
6519   for (nodePtr.i = 1; nodePtr.i < MAX_NODES; nodePtr.i++)
6520   {
6521     jam();
6522     ptrAss(nodePtr, nodeRec);
6523     if (nodePtr.p->phase == ZAPI_ACTIVE &&
6524 	getNodeInfo(nodePtr.i).m_version >= api.minVersion)
6525     {
6526       jam();
6527       mask.set(nodePtr.i);
6528     }
6529   }
6530 
6531   if (mask.isclear())
6532   {
6533     jam();
6534     releaseSections(handle);
6535     return;
6536   }
6537 
6538   NodeReceiverGroup rg(API_CLUSTERMGR, mask);
6539   sendSignal(rg, api.gsn, signal, len, JBB,
6540 	     &handle);
6541 }
6542 
6543 void
execNODE_FAILREP(Signal * signal)6544 Qmgr::execNODE_FAILREP(Signal * signal)
6545 {
6546   jamEntry();
6547   // make sure any distributed signals get acknowledged
6548   // destructive of the signal
6549   c_counterMgr.execNODE_FAILREP(signal);
6550 }
6551 
6552 void
execALLOC_NODEID_REQ(Signal * signal)6553 Qmgr::execALLOC_NODEID_REQ(Signal * signal)
6554 {
6555   jamEntry();
6556   AllocNodeIdReq req = *(AllocNodeIdReq*)signal->getDataPtr();
6557   Uint32 error = 0;
6558 
6559   NodeRecPtr nodePtr;
6560   nodePtr.i = req.nodeId;
6561   if ((nodePtr.i >= MAX_NODES) ||
6562       ((req.nodeType == NodeInfo::DB) &&
6563        (nodePtr.i >= MAX_NDB_NODES)))
6564   {
6565     /* Ignore messages about nodes not even within range */
6566     jam();
6567     return;
6568   }
6569   ptrAss(nodePtr, nodeRec);
6570 
6571   if (refToBlock(req.senderRef) != QMGR) // request from management server
6572   {
6573     /* master */
6574 
6575     if (getOwnNodeId() != cpresident)
6576     {
6577       jam();
6578       error = AllocNodeIdRef::NotMaster;
6579     }
6580     else if (!opAllocNodeIdReq.m_tracker.done())
6581     {
6582       jam();
6583       error = AllocNodeIdRef::Busy;
6584     }
6585     else if (c_connectedNodes.get(req.nodeId))
6586     {
6587       jam();
6588       error = AllocNodeIdRef::NodeConnected;
6589     }
6590     else if (nodePtr.p->m_secret != 0)
6591     {
6592       jam();
6593       error = AllocNodeIdRef::NodeReserved;
6594     }
6595     else if (req.nodeType != getNodeInfo(req.nodeId).m_type)
6596     {
6597       jam();
6598       error = AllocNodeIdRef::NodeTypeMismatch;
6599     }
6600     else if (req.nodeType == NodeInfo::API && c_allow_api_connect == 0)
6601     {
6602       jam();
6603       error = AllocNodeIdRef::NodeReserved;
6604     }
6605 
6606     if (error)
6607     {
6608       jam();
6609       AllocNodeIdRef * ref = (AllocNodeIdRef*)signal->getDataPtrSend();
6610       ref->senderRef = reference();
6611       ref->errorCode = error;
6612       ref->masterRef = numberToRef(QMGR, cpresident);
6613       ref->senderData = req.senderData;
6614       ref->nodeId = req.nodeId;
6615       sendSignal(req.senderRef, GSN_ALLOC_NODEID_REF, signal,
6616                  AllocNodeIdRef::SignalLength, JBB);
6617       return;
6618     }
6619 
6620     if (ERROR_INSERTED(934) && req.nodeId != getOwnNodeId())
6621     {
6622       CRASH_INSERTION(934);
6623     }
6624 
6625     /**
6626      * generate secret
6627      */
6628     const NDB_TICKS now = NdbTick_getCurrentTicks();
6629     const Uint32 secret_hi = Uint32(now.getUint64() >> 24);
6630     const Uint32 secret_lo = Uint32(now.getUint64() << 8) + getOwnNodeId();
6631     req.secret_hi = secret_hi;
6632     req.secret_lo = secret_lo;
6633 
6634     if (req.timeout > 60000)
6635       req.timeout = 60000;
6636 
6637     nodePtr.p->m_secret = (Uint64(secret_hi) << 32) + secret_lo;
6638     nodePtr.p->m_alloc_timeout = NdbTick_AddMilliseconds(now,req.timeout);
6639 
6640     opAllocNodeIdReq.m_req = req;
6641     opAllocNodeIdReq.m_error = 0;
6642     opAllocNodeIdReq.m_connectCount =
6643       getNodeInfo(refToNode(req.senderRef)).m_connectCount;
6644 
6645     jam();
6646     AllocNodeIdReq * req2 = (AllocNodeIdReq*)signal->getDataPtrSend();
6647     * req2 = req;
6648     req2->senderRef = reference();
6649     NodeReceiverGroup rg(QMGR, c_clusterNodes);
6650     RequestTracker & p = opAllocNodeIdReq.m_tracker;
6651     p.init<AllocNodeIdRef>(c_counterMgr, rg, GSN_ALLOC_NODEID_REF, 0);
6652 
6653     sendSignal(rg, GSN_ALLOC_NODEID_REQ, signal,
6654                AllocNodeIdReq::SignalLengthQMGR, JBB);
6655     return;
6656   }
6657 
6658   /* participant */
6659   if (c_connectedNodes.get(req.nodeId))
6660   {
6661     jam();
6662     error = AllocNodeIdRef::NodeConnected;
6663   }
6664   else if (req.nodeType != getNodeInfo(req.nodeId).m_type)
6665   {
6666     jam();
6667     error = AllocNodeIdRef::NodeTypeMismatch;
6668   }
6669   else if ((nodePtr.p->failState != NORMAL) ||
6670            ((req.nodeType == NodeInfo::DB) &&
6671             (cfailedNodes.get(nodePtr.i))))
6672   {
6673     /**
6674      * Either the node has committed its node failure in QMGR but not yet
6675      * completed the node internal node failure handling. Or the node
6676      * failure commit process is still ongoing in QMGR. We should not
6677      * allocate a node id in either case.
6678      */
6679     jam();
6680     error = AllocNodeIdRef::NodeFailureHandlingNotCompleted;
6681   }
6682   else if (req.nodeType == NodeInfo::API && nodePtr.p->phase != ZAPI_INACTIVE)
6683   {
6684     jam();
6685     if (cpresident != getOwnNodeId() && c_allow_api_connect == 0)
6686     {
6687       /**
6688        * Don't block during NR
6689        */
6690       jam();
6691     }
6692     else
6693     {
6694       jam();
6695       error = AllocNodeIdRef::NodeReserved;
6696     }
6697   }
6698 #if 0
6699   /**
6700    * For now only make "time/secret" based reservation on master
6701    *   as we otherwise also need to clear it on failure + handle
6702    *   master failure
6703    */
6704   else if (nodePtr.p->m_secret != 0)
6705   {
6706     jam();
6707     error = AllocNodeIdRef::NodeReserved;
6708   }
6709 #endif
6710 
6711   if (error)
6712   {
6713     jam();
6714     AllocNodeIdRef * ref = (AllocNodeIdRef*)signal->getDataPtrSend();
6715     ref->senderRef = reference();
6716     ref->errorCode = error;
6717     ref->senderData = req.senderData;
6718     ref->nodeId = req.nodeId;
6719     ref->masterRef = numberToRef(QMGR, cpresident);
6720     sendSignal(req.senderRef, GSN_ALLOC_NODEID_REF, signal,
6721                AllocNodeIdRef::SignalLength, JBB);
6722     return;
6723   }
6724 
6725   AllocNodeIdConf * conf = (AllocNodeIdConf*)signal->getDataPtrSend();
6726   conf->senderRef = reference();
6727   conf->secret_hi = req.secret_hi;
6728   conf->secret_lo = req.secret_lo;
6729   sendSignal(req.senderRef, GSN_ALLOC_NODEID_CONF, signal,
6730              AllocNodeIdConf::SignalLength, JBB);
6731 }
6732 
6733 void
execALLOC_NODEID_CONF(Signal * signal)6734 Qmgr::execALLOC_NODEID_CONF(Signal * signal)
6735 {
6736   /* master */
6737 
6738   jamEntry();
6739   const AllocNodeIdConf * conf = (AllocNodeIdConf*)signal->getDataPtr();
6740   opAllocNodeIdReq.m_tracker.reportConf(c_counterMgr,
6741                                         refToNode(conf->senderRef));
6742 
6743   if (signal->getLength() >= AllocNodeIdConf::SignalLength)
6744   {
6745     jam();
6746     if (opAllocNodeIdReq.m_req.secret_hi != conf->secret_hi ||
6747         opAllocNodeIdReq.m_req.secret_lo != conf->secret_lo)
6748     {
6749       jam();
6750       if (opAllocNodeIdReq.m_error == 0)
6751       {
6752         jam();
6753         opAllocNodeIdReq.m_error = AllocNodeIdRef::Undefined;
6754       }
6755     }
6756   }
6757 
6758   completeAllocNodeIdReq(signal);
6759 }
6760 
6761 
6762 void
execALLOC_NODEID_REF(Signal * signal)6763 Qmgr::execALLOC_NODEID_REF(Signal * signal)
6764 {
6765   /* master */
6766 
6767   jamEntry();
6768   const AllocNodeIdRef * ref = (AllocNodeIdRef*)signal->getDataPtr();
6769 
6770   if (ref->errorCode == AllocNodeIdRef::NF_FakeErrorREF)
6771   {
6772     jam();
6773     if (ref->nodeId == refToNode(ref->senderRef))
6774     {
6775       /**
6776        * The node id we are trying to allocate has responded with a REF,
6777        * this was sent in response to a node failure, so we are most
6778        * likely not ready to allocate this node id yet. Report node
6779        * failure handling not ready yet.
6780        */
6781       jam();
6782       opAllocNodeIdReq.m_tracker.reportRef(c_counterMgr,
6783                                            refToNode(ref->senderRef));
6784       if (opAllocNodeIdReq.m_error == 0)
6785       {
6786         jam();
6787         opAllocNodeIdReq.m_error =
6788           AllocNodeIdRef::NodeFailureHandlingNotCompleted;
6789       }
6790     }
6791     else
6792     {
6793       jam();
6794       opAllocNodeIdReq.m_tracker.ignoreRef(c_counterMgr,
6795                                            refToNode(ref->senderRef));
6796     }
6797   }
6798   else
6799   {
6800     jam();
6801     opAllocNodeIdReq.m_tracker.reportRef(c_counterMgr,
6802                                          refToNode(ref->senderRef));
6803     if (opAllocNodeIdReq.m_error == 0)
6804     {
6805       jam();
6806       opAllocNodeIdReq.m_error = ref->errorCode;
6807     }
6808   }
6809   completeAllocNodeIdReq(signal);
6810 }
6811 
6812 void
completeAllocNodeIdReq(Signal * signal)6813 Qmgr::completeAllocNodeIdReq(Signal *signal)
6814 {
6815   /* master */
6816 
6817   if (!opAllocNodeIdReq.m_tracker.done())
6818   {
6819     jam();
6820     return;
6821   }
6822 
6823   if (opAllocNodeIdReq.m_connectCount !=
6824       getNodeInfo(refToNode(opAllocNodeIdReq.m_req.senderRef)).m_connectCount)
6825   {
6826     // management server not same version as the original requester
6827     jam();
6828     return;
6829   }
6830 
6831   if (opAllocNodeIdReq.m_tracker.hasRef())
6832   {
6833     jam();
6834 
6835     {
6836       /**
6837        * Clear reservation
6838        */
6839       NodeRecPtr nodePtr;
6840       nodePtr.i = opAllocNodeIdReq.m_req.nodeId;
6841       ptrAss(nodePtr, nodeRec);
6842       nodePtr.p->m_secret = 0;
6843     }
6844 
6845     AllocNodeIdRef * ref = (AllocNodeIdRef*)signal->getDataPtrSend();
6846     ref->senderRef = reference();
6847     ref->senderData = opAllocNodeIdReq.m_req.senderData;
6848     ref->nodeId = opAllocNodeIdReq.m_req.nodeId;
6849     ref->errorCode = opAllocNodeIdReq.m_error;
6850     ref->masterRef = numberToRef(QMGR, cpresident);
6851     ndbassert(AllocNodeIdRef::SignalLength == 5);
6852     sendSignal(opAllocNodeIdReq.m_req.senderRef, GSN_ALLOC_NODEID_REF, signal,
6853                AllocNodeIdRef::SignalLength, JBB);
6854     return;
6855   }
6856 
6857   jam();
6858 
6859   AllocNodeIdConf * conf = (AllocNodeIdConf*)signal->getDataPtrSend();
6860   conf->senderRef = reference();
6861   conf->senderData = opAllocNodeIdReq.m_req.senderData;
6862   conf->nodeId = opAllocNodeIdReq.m_req.nodeId;
6863   conf->secret_lo = opAllocNodeIdReq.m_req.secret_lo;
6864   conf->secret_hi = opAllocNodeIdReq.m_req.secret_hi;
6865   sendSignal(opAllocNodeIdReq.m_req.senderRef, GSN_ALLOC_NODEID_CONF, signal,
6866              AllocNodeIdConf::SignalLength, JBB);
6867 
6868   /**
6869    * We are the master and master DIH wants to keep track of node restart
6870    * state to be able to control LCP start and stop and also to be able
6871    * to easily report this state to the user when he asks for it.
6872    */
6873   AllocNodeIdRep *rep = (AllocNodeIdRep*)signal->getDataPtrSend();
6874   rep->nodeId = opAllocNodeIdReq.m_req.nodeId;
6875   EXECUTE_DIRECT(DBDIH, GSN_ALLOC_NODEID_REP, signal,
6876 		 AllocNodeIdRep::SignalLength);
6877 }
6878 
6879 void
execSTOP_REQ(Signal * signal)6880 Qmgr::execSTOP_REQ(Signal* signal)
6881 {
6882   jamEntry();
6883   c_stopReq = * (StopReq*)signal->getDataPtr();
6884 
6885   if (c_stopReq.senderRef)
6886   {
6887     jam();
6888     ndbrequire(NdbNodeBitmask::get(c_stopReq.nodes, getOwnNodeId()));
6889 
6890     StopConf *conf = (StopConf*)signal->getDataPtrSend();
6891     conf->senderData = c_stopReq.senderData;
6892     conf->nodeState = getOwnNodeId();
6893     sendSignal(c_stopReq.senderRef,
6894 	       GSN_STOP_CONF, signal, StopConf::SignalLength, JBA);
6895   }
6896 }
6897 
6898 bool
check_multi_node_shutdown(Signal * signal)6899 Qmgr::check_multi_node_shutdown(Signal* signal)
6900 {
6901   if (c_stopReq.senderRef &&
6902       NdbNodeBitmask::get(c_stopReq.nodes, getOwnNodeId()))
6903   {
6904     jam();
6905     if(StopReq::getPerformRestart(c_stopReq.requestInfo))
6906     {
6907       jam();
6908       StartOrd * startOrd = (StartOrd *)&signal->theData[0];
6909       startOrd->restartInfo = c_stopReq.requestInfo;
6910       sendSignal(CMVMI_REF, GSN_START_ORD, signal, 2, JBA);
6911     } else {
6912       sendSignal(CMVMI_REF, GSN_STOP_ORD, signal, 1, JBA);
6913     }
6914     return true;
6915   }
6916   return false;
6917 }
6918 
6919 int
check_hb_order_config()6920 Qmgr::check_hb_order_config()
6921 {
6922   m_hb_order_config_used = false;
6923   Uint32 count = 0;
6924   Uint32 count_zero = 0;
6925   NodeRecPtr nodePtr;
6926   for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++)
6927   {
6928     ptrAss(nodePtr, nodeRec);
6929     const NodeInfo& nodeInfo = getNodeInfo(nodePtr.i);
6930     if (nodeInfo.m_type == NodeInfo::DB)
6931     {
6932       count++;
6933       if (nodePtr.p->hbOrder == 0)
6934         count_zero++;
6935     }
6936   }
6937   ndbrequire(count != 0); // must have node info
6938   if (count_zero == count)
6939   {
6940     jam();
6941     return 0; // no hbOrder defined
6942   }
6943   if (count_zero != 0)
6944   {
6945     jam();
6946     return -1; // error: not all zero or all nonzero
6947   }
6948   for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++)
6949   {
6950     ptrAss(nodePtr, nodeRec);
6951     const NodeInfo& nodeInfo = getNodeInfo(nodePtr.i);
6952     if (nodeInfo.m_type == NodeInfo::DB)
6953     {
6954       NodeRecPtr nodePtr2;
6955       for (nodePtr2.i = nodePtr.i + 1; nodePtr2.i < MAX_NDB_NODES; nodePtr2.i++)
6956       {
6957         ptrAss(nodePtr2, nodeRec);
6958         const NodeInfo& nodeInfo2 = getNodeInfo(nodePtr2.i);
6959         if (nodeInfo2.m_type == NodeInfo::DB)
6960         {
6961           if (nodePtr.i != nodePtr2.i &&
6962               nodePtr.p->hbOrder == nodePtr2.p->hbOrder)
6963           {
6964             jam();
6965             return -2; // error: duplicate nonzero value
6966           }
6967         }
6968       }
6969     }
6970   }
6971   m_hb_order_config_used = true;
6972   return 0;
6973 }
6974 
6975 static const Uint32 CC_SuspectTicks = 1;
6976 static const Uint32 CC_FailedTicks = 2;
6977 
6978 void
startConnectivityCheck(Signal * signal,Uint32 reason,Uint32 causingNode)6979 Qmgr::startConnectivityCheck(Signal* signal, Uint32 reason, Uint32 causingNode)
6980 {
6981   jam();
6982   ndbrequire(m_connectivity_check.getEnabled());
6983 
6984   if (m_connectivity_check.m_active)
6985   {
6986     jam();
6987     /* Connectivity check underway already
6988      * do nothing
6989      */
6990     return;
6991   }
6992 
6993 
6994   m_connectivity_check.m_nodesPinged.clear();
6995 
6996   /* Send NODE_PINGREQ signal to all other running nodes, and
6997    * initialise connectivity check bitmasks.
6998    * Note that nodes may already be considered suspect due to
6999    * a previous connectivity check round.
7000    */
7001   Uint32 ownId = getOwnNodeId();
7002   NodePingReq* pingReq = CAST_PTR(NodePingReq, &signal->theData[0]);
7003   pingReq->senderData = ++m_connectivity_check.m_currentRound;
7004   pingReq->senderRef = reference();
7005 
7006   for (Uint32 i=1; i < MAX_NDB_NODES; i++)
7007   {
7008     if (i != ownId)
7009     {
7010       NodeRec& node = nodeRec[i];
7011       if (node.phase == ZRUNNING)
7012       {
7013         /* If connection was considered ok, treat as unknown,
7014          * If it was considered slow, continue to treat
7015          *   as slow
7016          */
7017         sendSignal(node.blockRef,
7018                    GSN_NODE_PING_REQ,
7019                    signal,
7020                    NodePingReq::SignalLength,
7021                    JBA);
7022 
7023         m_connectivity_check.m_nodesPinged.set(i);
7024       }
7025     }
7026   }
7027 
7028   /* Initialise result bitmasks */
7029   m_connectivity_check.m_nodesWaiting.assign(m_connectivity_check.m_nodesPinged);
7030   m_connectivity_check.m_nodesFailedDuring.clear();
7031 
7032   /* Ensure only live nodes are considered suspect */
7033   m_connectivity_check.m_nodesSuspect.bitAND(m_connectivity_check.m_nodesPinged);
7034 
7035   const char* reasonText = "Unknown";
7036   bool firstTime = true;
7037 
7038   switch(reason)
7039   {
7040   case FailRep::ZHEARTBEAT_FAILURE:
7041     reasonText = "Heartbeat failure";
7042     break;
7043   case FailRep::ZCONNECT_CHECK_FAILURE:
7044     reasonText = "Connectivity check request";
7045     break;
7046   default:
7047     firstTime = false;
7048     ndbrequire(m_connectivity_check.m_nodesSuspect.count() > 0);
7049     break;
7050   }
7051 
7052   if (!m_connectivity_check.m_nodesPinged.isclear())
7053   {
7054     jam();
7055     {
7056       char buff[100];
7057       m_connectivity_check.m_nodesPinged.getText(buff);
7058       if (firstTime)
7059       {
7060         g_eventLogger->info("QMGR : Starting connectivity check of %u other nodes (%s) due to %s from node %u.",
7061                             m_connectivity_check.m_nodesPinged.count(),
7062                             buff,
7063                             reasonText,
7064                             causingNode);
7065       }
7066       else
7067       {
7068         char buff2[100];
7069         m_connectivity_check.m_nodesSuspect.getText(buff2);
7070         g_eventLogger->info("QMGR : Restarting connectivity check of %u other nodes (%s) due to %u syspect nodes (%s)",
7071                             m_connectivity_check.m_nodesPinged.count(),
7072                             buff,
7073                             m_connectivity_check.m_nodesSuspect.count(),
7074                             buff2);
7075       }
7076     }
7077 
7078     /* Generate cluster log event */
7079     Uint32 bitmaskSz = NdbNodeBitmask::Size;
7080     signal->theData[0] = NDB_LE_ConnectCheckStarted;
7081     signal->theData[1] = m_connectivity_check.m_nodesPinged.count();
7082     signal->theData[2] = reason;
7083     signal->theData[3] = causingNode;
7084     signal->theData[4] = bitmaskSz;
7085     Uint32* sigPtr = &signal->theData[5];
7086     m_connectivity_check.m_nodesPinged.copyto(bitmaskSz, sigPtr); sigPtr+= bitmaskSz;
7087     m_connectivity_check.m_nodesSuspect.copyto(bitmaskSz, sigPtr);
7088     sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 5 + (2 * bitmaskSz), JBB);
7089 
7090     m_connectivity_check.m_active = true;
7091     m_connectivity_check.m_tick = 0;
7092     const NDB_TICKS now = NdbTick_getCurrentTicks();
7093     m_connectivity_check.m_timer.reset(now);
7094   }
7095   else
7096   {
7097     g_eventLogger->info("QMGR : Connectivity check requested due to %s (from %u) not started as no other running nodes.",
7098                         reasonText,
7099                         causingNode);
7100   }
7101 }
7102 
7103 void
execNODE_PINGREQ(Signal * signal)7104 Qmgr::execNODE_PINGREQ(Signal* signal)
7105 {
7106   jamEntry();
7107   Uint32 ownId = getOwnNodeId();
7108   const NodePingReq* pingReq = CAST_CONSTPTR(NodePingReq, &signal->theData[0]);
7109   Uint32 sendersRef = signal->getSendersBlockRef();
7110   Uint32 sendersNodeId = refToNode(sendersRef);
7111   Uint32 senderData = pingReq->senderData;
7112 
7113   ndbrequire(sendersNodeId != ownId);
7114 
7115   /* We will start our own connectivity check if necessary
7116    * before responding with PING_CONF to the requestor.
7117    * This means that the sending node will receive our PING_REQ
7118    * before our PING_CONF, which should avoid them starting an
7119    * unnecessary extra connectivity check round in some cases.
7120    */
7121   if (likely(m_connectivity_check.getEnabled()))
7122   {
7123     jam();
7124     /* We have connectivity checking configured */
7125     if (! m_connectivity_check.m_active)
7126     {
7127       jam();
7128 
7129       {
7130         /* Don't start a new connectivity check if the requesting
7131          * node has failed from our point of view
7132          */
7133         NodeRecPtr nodePtr;
7134         nodePtr.i = sendersNodeId;
7135         ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
7136         if (unlikely(nodePtr.p->phase != ZRUNNING))
7137         {
7138           jam();
7139 
7140           g_eventLogger->warning("QMGR : Discarding NODE_PINGREQ from non-running node %u (%u)",
7141                                  sendersNodeId, nodePtr.p->phase);
7142           return;
7143         }
7144       }
7145 
7146       /* Start our own Connectivity Check now indicating reason and causing node */
7147       startConnectivityCheck(signal, FailRep::ZCONNECT_CHECK_FAILURE, sendersNodeId);
7148     }
7149   }
7150   else
7151   {
7152     jam();
7153     g_eventLogger->warning("QMGR : NODE_PINGREQ received from node %u, but connectivity "
7154                            "checking not configured on this node.  Ensure all "
7155                            "nodes have the same configuration for parameter "
7156                            "ConnectCheckIntervalMillis.",
7157                            sendersNodeId);
7158   }
7159 
7160   /* Now respond with NODE_PINGCONF */
7161   NodePingConf* pingConf = CAST_PTR(NodePingConf, &signal->theData[0]);
7162 
7163   pingConf->senderData = senderData;
7164   pingConf->senderRef = reference();
7165 
7166   sendSignal(sendersRef,
7167              GSN_NODE_PING_CONF,
7168              signal,
7169              NodePingConf::SignalLength,
7170              JBA);
7171 }
7172 
7173 void
reportNodeConnect(Uint32 nodeId)7174 Qmgr::ConnectCheckRec::reportNodeConnect(Uint32 nodeId)
7175 {
7176   /* Clear any suspicion */
7177   m_nodesSuspect.clear(nodeId);
7178 }
7179 
7180 bool
reportNodeFailure(Uint32 nodeId)7181 Qmgr::ConnectCheckRec::reportNodeFailure(Uint32 nodeId)
7182 {
7183   if (unlikely(m_active))
7184   {
7185     m_nodesFailedDuring.set(nodeId);
7186 
7187     if (m_nodesWaiting.get(nodeId))
7188     {
7189       /* We were waiting for a NODE_PING_CONF from this node,
7190        * remove it from the set
7191        */
7192       m_nodesWaiting.clear(nodeId);
7193 
7194       return m_nodesWaiting.isclear();
7195     }
7196   }
7197   return false;
7198 }
7199 
7200 void
execNODE_PINGCONF(Signal * signal)7201 Qmgr::execNODE_PINGCONF(Signal* signal)
7202 {
7203   jamEntry();
7204 
7205   ndbrequire(m_connectivity_check.getEnabled());
7206 
7207   const NodePingConf* pingConf = CAST_CONSTPTR(NodePingConf, &signal->theData[0]);
7208   Uint32 sendersBlockRef = signal->getSendersBlockRef();
7209   Uint32 sendersNodeId = refToNode(sendersBlockRef);
7210   Uint32 roundNumber = pingConf->senderData;
7211 
7212   ndbrequire(sendersNodeId != getOwnNodeId());
7213   ndbrequire((m_connectivity_check.m_active)                                || /* Normal */
7214              (m_connectivity_check.m_nodesWaiting.get(sendersNodeId)          || /* We killed last round */
7215               m_connectivity_check.m_nodesFailedDuring.get(sendersNodeId)));     /* Someone killed */
7216 
7217   if (unlikely((! m_connectivity_check.m_active) ||
7218                (roundNumber != m_connectivity_check.m_currentRound)))
7219   {
7220     g_eventLogger->warning("QMGR : Received NODEPING_CONF from node %u for round %u, "
7221                            "but we are %sactive on round %u.  Discarding.",
7222                            sendersNodeId,
7223                            roundNumber,
7224                            ((m_connectivity_check.m_active)?"":"in"),
7225                            m_connectivity_check.m_currentRound);
7226     return;
7227   }
7228 
7229   if (ERROR_INSERTED(938))
7230   {
7231     ndbout_c("QMGR : execNODE_PING_CONF() from %u in tick %u",
7232              sendersNodeId, m_connectivity_check.m_tick);
7233   }
7234 
7235   /* Node must have been pinged, we must be waiting for the response,
7236    * or the node must have already failed
7237    */
7238   ndbrequire(m_connectivity_check.m_nodesPinged.get(sendersNodeId));
7239   ndbrequire(m_connectivity_check.m_nodesWaiting.get(sendersNodeId) ||
7240              m_connectivity_check.m_nodesFailedDuring.get(sendersNodeId));
7241 
7242   m_connectivity_check.m_nodesWaiting.clear(sendersNodeId);
7243 
7244   if (likely(m_connectivity_check.m_tick < CC_SuspectTicks))
7245   {
7246     jam();
7247     /* Node responded on time, clear any suspicion about it */
7248     m_connectivity_check.m_nodesSuspect.clear(sendersNodeId);
7249   }
7250 
7251   if (m_connectivity_check.m_nodesWaiting.isclear())
7252   {
7253     jam();
7254     /* Connectivity check round is now finished */
7255     connectivityCheckCompleted(signal);
7256   }
7257 }
7258 
7259 void
connectivityCheckCompleted(Signal * signal)7260 Qmgr::connectivityCheckCompleted(Signal* signal)
7261 {
7262   jam();
7263 
7264   m_connectivity_check.m_active = false;
7265 
7266   /* Log the following :
7267    * Nodes checked
7268    * Nodes responded ok
7269    * Nodes responded late (now suspect)
7270    * Nodes failed to respond.
7271    * Nodes failed during
7272    */
7273   char pinged[100];
7274   char late[100];
7275   char silent[100];
7276   char failed[100];
7277 
7278   /* Any 'waiting' nodes have been killed
7279    * Surviving suspects do not include them.
7280    */
7281   NdbNodeBitmask survivingSuspects(m_connectivity_check.m_nodesSuspect);
7282   survivingSuspects.bitANDC(m_connectivity_check.m_nodesWaiting);
7283 
7284   /* Nodes that failed during the check are also excluded */
7285   survivingSuspects.bitANDC(m_connectivity_check.m_nodesFailedDuring);
7286 
7287   m_connectivity_check.m_nodesPinged.getText(pinged);
7288   survivingSuspects.getText(late);
7289   m_connectivity_check.m_nodesWaiting.getText(silent);
7290   m_connectivity_check.m_nodesFailedDuring.getText(failed);
7291 
7292   g_eventLogger->info("QMGR : Connectivity check completed, "
7293                       "%u other nodes checked (%s), "
7294                       "%u responded on time, "
7295                       "%u responded late (%s), "
7296                       "%u no response will be failed (%s), "
7297                       "%u failed during check (%s)\n",
7298                       m_connectivity_check.m_nodesPinged.count(),
7299                       pinged,
7300                       m_connectivity_check.m_nodesPinged.count() -
7301                       m_connectivity_check.m_nodesSuspect.count(),
7302                       survivingSuspects.count(),
7303                       late,
7304                       m_connectivity_check.m_nodesWaiting.count(),
7305                       silent,
7306                       m_connectivity_check.m_nodesFailedDuring.count(),
7307                       failed);
7308 
7309   /* Log in Cluster log */
7310   signal->theData[0] = NDB_LE_ConnectCheckCompleted;
7311   signal->theData[1] = m_connectivity_check.m_nodesPinged.count();
7312   signal->theData[2] = survivingSuspects.count();
7313   signal->theData[3] = m_connectivity_check.m_nodesWaiting.count() +
7314     m_connectivity_check.m_nodesFailedDuring.count();
7315 
7316   sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 4, JBB);
7317 
7318   if (survivingSuspects.count() > 0)
7319   {
7320     jam();
7321     /* Still suspect nodes, start another round */
7322     g_eventLogger->info("QMGR : Starting new connectivity check due to suspect nodes.");
7323     /* Restart connectivity check, no external reason or cause */
7324     startConnectivityCheck(signal, 0, 0);
7325   }
7326   else
7327   {
7328     jam();
7329     /* No suspect nodes, stop the protocol now */
7330 
7331     g_eventLogger->info("QMGR : All other nodes (%u) connectivity ok.",
7332                         m_connectivity_check.m_nodesPinged.count() -
7333                         (m_connectivity_check.m_nodesWaiting.count() +
7334                          m_connectivity_check.m_nodesFailedDuring.count()));
7335 
7336     /* Send a heartbeat to our right neighbour at this point as a gesture
7337      * of goodwill
7338      */
7339     sendHeartbeat(signal);
7340     hb_send_timer.reset(NdbTick_getCurrentTicks());
7341   };
7342 }
7343 
7344 void
checkConnectivityTimeSignal(Signal * signal)7345 Qmgr::checkConnectivityTimeSignal(Signal* signal)
7346 {
7347   /* Executed periodically when a connectivity check is
7348    * underway.
7349    * After CC_SuspectTicks have elapsed, any nodes
7350    * which have not responded are considered
7351    * 'Suspect'.
7352    * After CC_FailedTicks have elapsed, any nodes
7353    * which have not responded are considered
7354    * to have failed, and failure handling
7355    * begins.
7356    */
7357   jam();
7358 
7359   /* Preconditions, otherwise we shouldn't have been called */
7360   ndbrequire(m_connectivity_check.getEnabled());
7361   ndbrequire(m_connectivity_check.m_active);
7362   ndbrequire(!m_connectivity_check.m_nodesWaiting.isclear());
7363 
7364   m_connectivity_check.m_tick++;
7365 
7366   switch (m_connectivity_check.m_tick)
7367   {
7368   case CC_SuspectTicks:
7369   {
7370     jam();
7371     /* Still waiting to hear from some nodes, they are now
7372      * suspect
7373      */
7374     m_connectivity_check.m_nodesSuspect.bitOR(m_connectivity_check.m_nodesWaiting);
7375     return;
7376   }
7377   case CC_FailedTicks:
7378   {
7379     jam();
7380     /* Still waiting to hear from some nodes, they will now
7381      * be failed
7382      */
7383     m_connectivity_check.m_active = false;
7384     Uint32 nodeId = 0;
7385 
7386     while ((nodeId = m_connectivity_check.m_nodesWaiting.find(nodeId))
7387            != BitmaskImpl::NotFound)
7388     {
7389       jam();
7390       /* Log failure reason */
7391       /* Todo : Connectivity Check specific failure log? */
7392       signal->theData[0] = NDB_LE_DeadDueToHeartbeat;
7393       signal->theData[1] = nodeId;
7394 
7395       sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB);
7396 
7397       /* Fail the node */
7398       /* TODO : Consider real time break here */
7399       failReportLab(signal, nodeId, FailRep::ZCONNECT_CHECK_FAILURE, getOwnNodeId());
7400       nodeId++;
7401     }
7402 
7403     /* Now handle the end of the Connectivity Check */
7404     connectivityCheckCompleted(signal);
7405   }
7406   }
7407 }
7408 
7409 bool
isNodeConnectivitySuspect(Uint32 nodeId) const7410 Qmgr::isNodeConnectivitySuspect(Uint32 nodeId) const
7411 {
7412   return m_connectivity_check.m_nodesSuspect.get(nodeId);
7413 }
7414 
7415 void
handleFailFromSuspect(Signal * signal,Uint32 reason,Uint16 aFailedNode,Uint16 sourceNode)7416 Qmgr::handleFailFromSuspect(Signal* signal,
7417                             Uint32 reason,
7418                             Uint16 aFailedNode,
7419                             Uint16 sourceNode)
7420 {
7421   jam();
7422 
7423   const char* reasonText = "Unknown";
7424 
7425   /* We have received a failure report about some node X from
7426    * some other node that we consider to have suspect connectivity
7427    * which may have caused the report.
7428    *
7429    * We will 'invert' the sense of this, and handle it as
7430    * a failure report of the sender, with the same cause.
7431    */
7432   switch(reason)
7433   {
7434   case FailRep::ZCONNECT_CHECK_FAILURE:
7435     jam();
7436     /* Suspect says that connectivity check failed for another node.
7437      * As suspect has bad connectivity from our point of view, we
7438      * blame him.
7439      */
7440     reasonText = "ZCONNECT_CHECK_FAILURE";
7441     break;
7442   case FailRep::ZLINK_FAILURE:
7443     jam();
7444     /* Suspect says that link failed for another node.
7445      * As suspect has bad connectivity from our point of view, we
7446      * blame her.
7447      */
7448     reasonText = "ZLINK_FAILURE";
7449     break;
7450   default:
7451     ndbrequire(false);
7452   }
7453 
7454   g_eventLogger->warning("QMGR : Received Connectivity failure notification about "
7455                          "%u from suspect node %u with reason %s.  "
7456                          "Mapping to failure of %u sourced by me.",
7457                          aFailedNode, sourceNode, reasonText, sourceNode);
7458 
7459   signal->theData[0] = NDB_LE_NodeFailRejected;
7460   signal->theData[1] = reason;
7461   signal->theData[2] = aFailedNode;
7462   signal->theData[3] = sourceNode;
7463 
7464   sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 4, JBB);
7465 
7466   failReportLab(signal, sourceNode, (FailRep::FailCause) reason, getOwnNodeId());
7467 }
7468 
7469 void
execDBINFO_SCANREQ(Signal * signal)7470 Qmgr::execDBINFO_SCANREQ(Signal *signal)
7471 {
7472   DbinfoScanReq req= *(DbinfoScanReq*)signal->theData;
7473   Ndbinfo::Ratelimit rl;
7474 
7475   jamEntry();
7476   switch(req.tableId) {
7477   case Ndbinfo::MEMBERSHIP_TABLEID:
7478   {
7479     jam();
7480     Ndbinfo::Row row(signal, req);
7481     row.write_uint32(getOwnNodeId());
7482     row.write_uint32(getNodeState().nodeGroup);
7483     row.write_uint32(cneighbourl);
7484     row.write_uint32(cneighbourh);
7485     row.write_uint32(cpresident);
7486 
7487     // President successor
7488     Uint32 successor = 0;
7489     {
7490       NodeRecPtr nodePtr;
7491       UintR minDynamicId = (UintR)-1;
7492       for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++)
7493       {
7494         jam();
7495         ptrAss(nodePtr, nodeRec);
7496         if (nodePtr.p->phase == ZRUNNING)
7497         {
7498           if ((nodePtr.p->ndynamicId & 0xFFFF) < minDynamicId)
7499           {
7500             jam();
7501             if (cpresident !=  nodePtr.i)
7502             {
7503               minDynamicId = (nodePtr.p->ndynamicId & 0xFFFF);
7504               successor = nodePtr.i;
7505             }
7506           }
7507         }
7508       }
7509     }
7510     row.write_uint32(successor);
7511 
7512     NodeRecPtr myNodePtr;
7513     myNodePtr.i = getOwnNodeId();
7514     ptrCheckGuard(myNodePtr, MAX_NDB_NODES, nodeRec);
7515     row.write_uint32(myNodePtr.p->ndynamicId);
7516 
7517     row.write_uint32(arbitRec.node); // arbitrator
7518 
7519     char ticket[20]; // Need 16 characters + 1 for trailing '\0'
7520     arbitRec.ticket.getText(ticket, sizeof(ticket));
7521     row.write_string(ticket);
7522 
7523     row.write_uint32(arbitRec.state);
7524 
7525     // arbitrator connected
7526     row.write_uint32(c_connectedNodes.get(arbitRec.node));
7527 
7528     // Find potential (rank1 and rank2) arbitrators that are connected.
7529     NodeRecPtr aPtr;
7530     // buf_size: Node nr (max 3 chars) and ', '  + trailing '\0'
7531     const int buf_size = 5 * MAX_NODES + 1;
7532     char buf[buf_size];
7533 
7534     for (unsigned rank = 1; rank <= 2; rank++)
7535     {
7536       jam();
7537       aPtr.i = 0;
7538       const unsigned stop = NodeBitmask::NotFound;
7539       int buf_offset = 0;
7540       const char* delimiter = "";
7541 
7542       while ((aPtr.i = arbitRec.apiMask[rank].find(aPtr.i + 1)) != stop)
7543       {
7544         jam();
7545         ptrAss(aPtr, nodeRec);
7546         if (c_connectedNodes.get(aPtr.i))
7547         {
7548           buf_offset += BaseString::snprintf(buf + buf_offset,
7549                                              buf_size - buf_offset,
7550                                              "%s%u", delimiter, aPtr.i);
7551           delimiter = ", ";
7552         }
7553       }
7554 
7555       if (buf_offset == 0)
7556         row.write_string("-");
7557       else
7558         row.write_string(buf);
7559     }
7560 
7561     ndbinfo_send_row(signal, req, row, rl);
7562     break;
7563   }
7564   default:
7565     break;
7566   }
7567   ndbinfo_send_scan_conf(signal, req, rl);
7568 }
7569 
7570 
7571 void
execISOLATE_ORD(Signal * signal)7572 Qmgr::execISOLATE_ORD(Signal* signal)
7573 {
7574   jamEntry();
7575 
7576   IsolateOrd* sig = (IsolateOrd*) signal->theData;
7577 
7578   ndbrequire(sig->senderRef != 0);
7579   NdbNodeBitmask victims;
7580   victims.assign(NdbNodeBitmask::Size, sig->nodesToIsolate);
7581   ndbrequire(!victims.isclear());
7582 
7583   switch (sig->isolateStep)
7584   {
7585   case IsolateOrd::IS_REQ:
7586   {
7587     jam();
7588     /* Initial request, broadcast immediately */
7589 
7590     /* Need to get the set of live nodes to broadcast to */
7591     NdbNodeBitmask hitmen(c_clusterNodes);
7592 
7593     unsigned nodeId = hitmen.find_first();
7594     do
7595     {
7596       jam();
7597       if (!ndbd_isolate_ord(getNodeInfo(nodeId).m_version))
7598       {
7599         jam();
7600         /* Node not able to handle ISOLATE_ORD, skip */
7601         hitmen.clear(nodeId);
7602       }
7603 
7604       nodeId = hitmen.find_next(nodeId + 1);
7605     } while (nodeId != BitmaskImpl::NotFound);
7606 
7607     ndbrequire(!hitmen.isclear()); /* At least me */
7608 
7609     NodeReceiverGroup rg(QMGR, hitmen);
7610 
7611     sig->isolateStep = IsolateOrd::IS_BROADCAST;
7612     sendSignal(rg, GSN_ISOLATE_ORD, signal, IsolateOrd::SignalLength, JBA);
7613     return;
7614   }
7615   case IsolateOrd::IS_BROADCAST:
7616   {
7617     jam();
7618     /* Received reqest, delay */
7619     sig->isolateStep = IsolateOrd::IS_DELAY;
7620 
7621     if (sig->delayMillis > 0)
7622     {
7623       /* Delay processing until delayMillis passes */
7624       jam();
7625       sendSignalWithDelay(reference(),
7626                           GSN_ISOLATE_ORD,
7627                           signal,
7628                           sig->delayMillis,
7629                           IsolateOrd::SignalLength);
7630       return;
7631     }
7632     /* Fall through... */
7633   }
7634   case IsolateOrd::IS_DELAY:
7635   {
7636     jam();
7637 
7638     if (ERROR_INSERTED(942))
7639     {
7640       jam();
7641       g_eventLogger->info("QMGR discarding IsolateRequest");
7642       return;
7643     }
7644 
7645     /* Map to FAIL_REP signal(s) */
7646     Uint32 failSource = refToNode(sig->senderRef);
7647 
7648     unsigned nodeId = victims.find_first();
7649     do
7650     {
7651       jam();
7652 
7653       /* TODO : Consider checking node state and skipping if
7654        * failing already
7655        * Consider logging that action is being taken here
7656        */
7657 
7658       FailRep* failRep = (FailRep*)&signal->theData[0];
7659       failRep->failNodeId = nodeId;
7660       failRep->failCause = FailRep::ZFORCED_ISOLATION;
7661       failRep->failSourceNodeId = failSource;
7662 
7663       sendSignal(reference(), GSN_FAIL_REP, signal, 3, JBA);
7664 
7665       nodeId = victims.find_next(nodeId + 1);
7666     } while (nodeId != BitmaskImpl::NotFound);
7667 
7668     /* Fail rep signals are en-route... */
7669 
7670     return;
7671   }
7672   }
7673 
7674   ndbrequire(false);
7675 }
7676