1 /*
2    Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
3 
4    This program is free software; you can redistribute it and/or modify
5    it under the terms of the GNU General Public License, version 2.0,
6    as published by the Free Software Foundation.
7 
8    This program is also distributed with certain software (including
9    but not limited to OpenSSL) that is licensed under separate terms,
10    as designated in a particular file or component or in included license
11    documentation.  The authors of MySQL hereby grant you an additional
12    permission to link the program and your derivative works with the
13    separately licensed software that they have included with MySQL.
14 
15    This program is distributed in the hope that it will be useful,
16    but WITHOUT ANY WARRANTY; without even the implied warranty of
17    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18    GNU General Public License, version 2.0, for more details.
19 
20    You should have received a copy of the GNU General Public License
21    along with this program; if not, write to the Free Software
22    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA
23 */
24 
25 
26 #define QMGR_C
27 #include "Qmgr.hpp"
28 #include <pc.hpp>
29 #include <NdbTick.h>
30 #include <signaldata/EventReport.hpp>
31 #include <signaldata/StartOrd.hpp>
32 #include <signaldata/CloseComReqConf.hpp>
33 #include <signaldata/PrepFailReqRef.hpp>
34 #include <signaldata/NodeFailRep.hpp>
35 #include <signaldata/ReadNodesConf.hpp>
36 #include <signaldata/NFCompleteRep.hpp>
37 #include <signaldata/CheckNodeGroups.hpp>
38 #include <signaldata/ArbitSignalData.hpp>
39 #include <signaldata/ApiRegSignalData.hpp>
40 #include <signaldata/ApiVersion.hpp>
41 #include <signaldata/BlockCommitOrd.hpp>
42 #include <signaldata/FailRep.hpp>
43 #include <signaldata/DisconnectRep.hpp>
44 #include <signaldata/ApiBroadcast.hpp>
45 #include <signaldata/Upgrade.hpp>
46 #include <signaldata/EnableCom.hpp>
47 #include <signaldata/RouteOrd.hpp>
48 #include <signaldata/NodePing.hpp>
49 #include <signaldata/DihRestart.hpp>
50 #include <ndb_version.h>
51 
52 #include <EventLogger.hpp>
53 extern EventLogger * g_eventLogger;
54 
55 //#define DEBUG_QMGR_START
56 #ifdef DEBUG_QMGR_START
57 #include <DebuggerNames.hpp>
58 #define DEBUG(x) ndbout << "QMGR " << __LINE__ << ": " << x << endl
59 #define DEBUG_START(gsn, node, msg) DEBUG(getSignalName(gsn) << " to: " << node << " - " << msg)
60 #define DEBUG_START2(gsn, rg, msg) { char nodes[255]; DEBUG(getSignalName(gsn) << " to: " << rg.m_nodes.getText(nodes) << " - " << msg); }
61 #define DEBUG_START3(signal, msg) DEBUG(getSignalName(signal->header.theVerId_signalNumber) << " from " << refToNode(signal->getSendersBlockRef()) << " - " << msg);
62 #else
63 #define DEBUG(x)
64 #define DEBUG_START(gsn, node, msg)
65 #define DEBUG_START2(gsn, rg, msg)
66 #define DEBUG_START3(signal, msg)
67 #endif
68 
69 /**
70  * c_start.m_gsn = GSN_CM_REGREQ
71  *   Possible for all nodes
72  *   c_start.m_nodes contains all nodes in config
73  *
74  * c_start.m_gsn = GSN_CM_NODEINFOREQ;
75  *   Set when receiving CM_REGCONF
76  *   State possible for starting node only (not in cluster)
77  *
78  *   c_start.m_nodes contains all node in alive cluster that
79  *                   that has not replied to GSN_CM_NODEINFOREQ
80  *                   passed by president in GSN_CM_REGCONF
81  *
82  * c_start.m_gsn = GSN_CM_ADD
83  *   Possible for president only
84  *   Set when receiving and accepting CM_REGREQ (to include node)
85  *
86  *   c_start.m_nodes contains all nodes in alive cluster + starting node
87  *                   that has not replied to GSN_CM_ADD
88  *                   by sending GSN_CM_ACKADD
89  *
90  * c_start.m_gsn = GSN_CM_NODEINFOCONF
91  *   Possible for non presidents only
92  *     c_start.m_nodes contains a node that has been accepted by president
93  *     but has not connected to us yet
94  */
95 
96 // Signal entries and statement blocks
97 /* 4  P R O G R A M        */
98 /*******************************/
99 /* CMHEART_BEAT               */
100 /*******************************/
execCM_HEARTBEAT(Signal * signal)101 void Qmgr::execCM_HEARTBEAT(Signal* signal)
102 {
103   NodeRecPtr hbNodePtr;
104   jamEntry();
105   hbNodePtr.i = signal->theData[0];
106   ptrCheckGuard(hbNodePtr, MAX_NDB_NODES, nodeRec);
107   setNodeInfo(hbNodePtr.i).m_heartbeat_cnt= 0;
108   return;
109 }//Qmgr::execCM_HEARTBEAT()
110 
111 /*******************************/
112 /* CM_NODEINFOREF             */
113 /*******************************/
execCM_NODEINFOREF(Signal * signal)114 void Qmgr::execCM_NODEINFOREF(Signal* signal)
115 {
116   jamEntry();
117   systemErrorLab(signal, __LINE__);
118   return;
119 }//Qmgr::execCM_NODEINFOREF()
120 
121 /*******************************/
122 /* CONTINUEB                  */
123 /*******************************/
execCONTINUEB(Signal * signal)124 void Qmgr::execCONTINUEB(Signal* signal)
125 {
126   jamEntry();
127   const Uint32 tcontinuebType = signal->theData[0];
128   const Uint32 tdata0 = signal->theData[1];
129   const Uint32 tdata1 = signal->theData[2];
130   switch (tcontinuebType) {
131   case ZREGREQ_TIMELIMIT:
132     jam();
133     if (c_start.m_startKey != tdata0 || c_start.m_startNode != tdata1) {
134       jam();
135       return;
136     }//if
137     regreqTimeLimitLab(signal);
138     break;
139   case ZREGREQ_MASTER_TIMELIMIT:
140     jam();
141     if (c_start.m_startKey != tdata0 || c_start.m_startNode != tdata1) {
142       jam();
143       return;
144     }//if
145     //regreqMasterTimeLimitLab(signal);
146     failReportLab(signal, c_start.m_startNode, FailRep::ZSTART_IN_REGREQ, getOwnNodeId());
147     return;
148     break;
149   case ZTIMER_HANDLING:
150     jam();
151     timerHandlingLab(signal);
152     return;
153     break;
154   case ZARBIT_HANDLING:
155     jam();
156     runArbitThread(signal);
157     return;
158     break;
159   case ZSTART_FAILURE_LIMIT:{
160     if (cpresident != ZNIL)
161     {
162       jam();
163       return;
164     }
165     Uint64 now = NdbTick_CurrentMillisecond();
166 
167     if (now > (c_start_election_time + c_restartFailureTimeout))
168     {
169       jam();
170       BaseString tmp;
171       tmp.append("Shutting down node as total restart time exceeds "
172 		 " StartFailureTimeout as set in config file ");
173       if(c_restartFailureTimeout == (Uint32) ~0)
174 	tmp.append(" 0 (inifinite)");
175       else
176 	tmp.appfmt(" %d", c_restartFailureTimeout);
177 
178       progError(__LINE__, NDBD_EXIT_SYSTEM_ERROR, tmp.c_str());
179     }
180     signal->theData[0] = ZSTART_FAILURE_LIMIT;
181     sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 3000, 1);
182     return;
183   }
184   default:
185     jam();
186     // ZCOULD_NOT_OCCUR_ERROR;
187     systemErrorLab(signal, __LINE__);
188     return;
189     break;
190   }//switch
191   return;
192 }//Qmgr::execCONTINUEB()
193 
194 
execDEBUG_SIG(Signal * signal)195 void Qmgr::execDEBUG_SIG(Signal* signal)
196 {
197   NodeRecPtr debugNodePtr;
198   jamEntry();
199   debugNodePtr.i = signal->theData[0];
200   ptrCheckGuard(debugNodePtr, MAX_NODES, nodeRec);
201   return;
202 }//Qmgr::execDEBUG_SIG()
203 
204 /*******************************/
205 /* FAIL_REP                   */
206 /*******************************/
execFAIL_REP(Signal * signal)207 void Qmgr::execFAIL_REP(Signal* signal)
208 {
209   const FailRep * const failRep = (FailRep *)&signal->theData[0];
210   const NodeId failNodeId = failRep->failNodeId;
211   const FailRep::FailCause failCause = (FailRep::FailCause)failRep->failCause;
212   Uint32 failSource = failRep->getFailSourceNodeId(signal->length());
213   if (!failSource)
214   {
215     /* Failure source not included, use sender of signal as 'source' */
216     failSource = refToNode(signal->getSendersBlockRef());
217   }
218 
219   jamEntry();
220   failReportLab(signal, failNodeId, failCause, failSource);
221   return;
222 }//Qmgr::execFAIL_REP()
223 
224 /*******************************/
225 /* PRES_TOREQ                 */
226 /*******************************/
execPRES_TOREQ(Signal * signal)227 void Qmgr::execPRES_TOREQ(Signal* signal)
228 {
229   jamEntry();
230   BlockReference Tblockref = signal->theData[0];
231   signal->theData[0] = getOwnNodeId();
232   signal->theData[1] = ccommitFailureNr;
233   sendSignal(Tblockref, GSN_PRES_TOCONF, signal, 2, JBA);
234   return;
235 }//Qmgr::execPRES_TOREQ()
236 
237 void
execREAD_CONFIG_REQ(Signal * signal)238 Qmgr::execREAD_CONFIG_REQ(Signal* signal)
239 {
240   jamEntry();
241 
242   const ReadConfigReq * req = (ReadConfigReq*)signal->getDataPtr();
243 
244   Uint32 ref = req->senderRef;
245   Uint32 senderData = req->senderData;
246 
247   const ndb_mgm_configuration_iterator * p =
248     m_ctx.m_config.getOwnConfigIterator();
249   ndbrequire(p != 0);
250 
251   ReadConfigConf * conf = (ReadConfigConf*)signal->getDataPtrSend();
252   conf->senderRef = reference();
253   conf->senderData = senderData;
254   sendSignal(ref, GSN_READ_CONFIG_CONF, signal,
255 	     ReadConfigConf::SignalLength, JBB);
256 }
257 
258 void
execSTART_ORD(Signal * signal)259 Qmgr::execSTART_ORD(Signal* signal)
260 {
261   /**
262    * Start timer handling
263    */
264   Uint64 now = NdbTick_CurrentMillisecond();
265   signal->theData[0] = ZTIMER_HANDLING;
266   signal->theData[1] = Uint32(now >> 32);
267   signal->theData[2] = Uint32(now);
268   sendSignal(QMGR_REF, GSN_CONTINUEB, signal, 3, JBB);
269 
270   NodeRecPtr nodePtr;
271   for (nodePtr.i = 1; nodePtr.i < MAX_NODES; nodePtr.i++)
272   {
273     ptrAss(nodePtr, nodeRec);
274     nodePtr.p->ndynamicId = 0;
275     nodePtr.p->hbOrder = 0;
276     Uint32 cnt = 0;
277     Uint32 type = getNodeInfo(nodePtr.i).m_type;
278     switch(type){
279     case NodeInfo::DB:
280       jam();
281       nodePtr.p->phase = ZINIT;
282       c_definedNodes.set(nodePtr.i);
283       break;
284     case NodeInfo::API:
285       jam();
286       nodePtr.p->phase = ZAPI_INACTIVE;
287       break;
288     case NodeInfo::MGM:
289       jam();
290       /**
291        * cmvmi allows ndb_mgmd to connect directly
292        */
293       nodePtr.p->phase = ZAPI_INACTIVE;
294       break;
295     default:
296       jam();
297       nodePtr.p->phase = ZAPI_INACTIVE;
298     }
299 
300     setNodeInfo(nodePtr.i).m_heartbeat_cnt = cnt;
301     nodePtr.p->sendPrepFailReqStatus = Q_NOT_ACTIVE;
302     nodePtr.p->sendCommitFailReqStatus = Q_NOT_ACTIVE;
303     nodePtr.p->sendPresToStatus = Q_NOT_ACTIVE;
304     nodePtr.p->failState = NORMAL;
305   }//for
306 }
307 
308 /*
309 4.2  ADD NODE MODULE*/
310 /*##########################################################################*/
311 /*
312 4.2.1 STTOR     */
313 /**--------------------------------------------------------------------------
314  * Start phase signal, must be handled by all blocks.
315  * QMGR is only interested in the first phase.
316  * During phase one we clear all registered applications.
317  *---------------------------------------------------------------------------*/
318 /*******************************/
319 /* STTOR                      */
320 /*******************************/
execSTTOR(Signal * signal)321 void Qmgr::execSTTOR(Signal* signal)
322 {
323   jamEntry();
324 
325   switch(signal->theData[1]){
326   case 1:
327     initData(signal);
328     startphase1(signal);
329     recompute_version_info(NodeInfo::DB);
330     recompute_version_info(NodeInfo::API);
331     recompute_version_info(NodeInfo::MGM);
332     return;
333   case 7:
334     cactivateApiCheck = 1;
335     if (cpresident == getOwnNodeId())
336     {
337       switch(arbitRec.method){
338       case ArbitRec::DISABLED:
339         break;
340 
341       case ArbitRec::METHOD_EXTERNAL:
342       case ArbitRec::METHOD_DEFAULT:
343         /**
344          * Start arbitration thread.  This could be done as soon as
345          * we have all nodes (or a winning majority).
346          */
347         jam();
348         handleArbitStart(signal);
349         break;
350       }
351     }
352     break;
353   case 8:{
354     /**
355      * Enable communication to all API nodes by setting state
356      *   to ZFAIL_CLOSING (which will make it auto-open in checkStartInterface)
357      */
358     c_allow_api_connect = 1;
359     NodeRecPtr nodePtr;
360     for (nodePtr.i = 1; nodePtr.i < MAX_NODES; nodePtr.i++)
361     {
362       jam();
363       Uint32 type = getNodeInfo(nodePtr.i).m_type;
364       if (type != NodeInfo::API)
365         continue;
366 
367       ptrAss(nodePtr, nodeRec);
368       if (nodePtr.p->phase == ZAPI_INACTIVE)
369       {
370         jam();
371         setNodeInfo(nodePtr.i).m_heartbeat_cnt = 3;
372         nodePtr.p->phase = ZFAIL_CLOSING;
373         nodePtr.p->failState = NORMAL;
374       }
375     }
376   }
377   }
378 
379   sendSttorryLab(signal);
380   return;
381 }//Qmgr::execSTTOR()
382 
sendSttorryLab(Signal * signal)383 void Qmgr::sendSttorryLab(Signal* signal)
384 {
385 /****************************<*/
386 /*< STTORRY                  <*/
387 /****************************<*/
388   signal->theData[3] = 7;
389   signal->theData[4] = 8;
390   signal->theData[5] = 255;
391   sendSignal(NDBCNTR_REF, GSN_STTORRY, signal, 6, JBB);
392   return;
393 }//Qmgr::sendSttorryLab()
394 
startphase1(Signal * signal)395 void Qmgr::startphase1(Signal* signal)
396 {
397   jamEntry();
398 
399   NodeRecPtr nodePtr;
400   nodePtr.i = getOwnNodeId();
401   ptrAss(nodePtr, nodeRec);
402   nodePtr.p->phase = ZSTARTING;
403 
404   DihRestartReq * req = CAST_PTR(DihRestartReq, signal->getDataPtrSend());
405   req->senderRef = reference();
406   sendSignal(DBDIH_REF, GSN_DIH_RESTARTREQ, signal,
407              DihRestartReq::SignalLength, JBB);
408   return;
409 }
410 
411 void
execDIH_RESTARTREF(Signal * signal)412 Qmgr::execDIH_RESTARTREF(Signal*signal)
413 {
414   jamEntry();
415 
416   const DihRestartRef * ref = CAST_CONSTPTR(DihRestartRef,
417                                             signal->getDataPtr());
418   c_start.m_latest_gci = 0;
419   c_start.m_no_nodegroup_nodes.assign(NdbNodeBitmask::Size,
420                                       ref->no_nodegroup_mask);
421   execCM_INFOCONF(signal);
422 }
423 
424 void
execDIH_RESTARTCONF(Signal * signal)425 Qmgr::execDIH_RESTARTCONF(Signal*signal)
426 {
427   jamEntry();
428 
429   const DihRestartConf * conf = CAST_CONSTPTR(DihRestartConf,
430                                               signal->getDataPtr());
431 
432   c_start.m_latest_gci = conf->latest_gci;
433   c_start.m_no_nodegroup_nodes.assign(NdbNodeBitmask::Size,
434                                       conf->no_nodegroup_mask);
435   execCM_INFOCONF(signal);
436 }
437 
setHbDelay(UintR aHbDelay)438 void Qmgr::setHbDelay(UintR aHbDelay)
439 {
440   NDB_TICKS now = NdbTick_CurrentMillisecond();
441   hb_send_timer.setDelay(aHbDelay < 10 ? 10 : aHbDelay);
442   hb_send_timer.reset(now);
443   hb_check_timer.setDelay(aHbDelay < 10 ? 10 : aHbDelay);
444   hb_check_timer.reset(now);
445 }
446 
setHbApiDelay(UintR aHbApiDelay)447 void Qmgr::setHbApiDelay(UintR aHbApiDelay)
448 {
449   NDB_TICKS now = NdbTick_CurrentMillisecond();
450   chbApiDelay = (aHbApiDelay < 100 ? 100 : aHbApiDelay);
451   hb_api_timer.setDelay(chbApiDelay);
452   hb_api_timer.reset(now);
453 }
454 
setArbitTimeout(UintR aArbitTimeout)455 void Qmgr::setArbitTimeout(UintR aArbitTimeout)
456 {
457   arbitRec.timeout = (aArbitTimeout < 10 ? 10 : aArbitTimeout);
458 }
459 
setCCDelay(UintR aCCDelay)460 void Qmgr::setCCDelay(UintR aCCDelay)
461 {
462   NDB_TICKS now = NdbTick_CurrentMillisecond();
463   if (aCCDelay == 0)
464   {
465     /* Connectivity check disabled */
466     m_connectivity_check.m_enabled = false;
467     m_connectivity_check.m_timer.setDelay(0);
468   }
469   else
470   {
471     m_connectivity_check.m_enabled = true;
472     m_connectivity_check.m_timer.setDelay(aCCDelay < 10 ? 10 : aCCDelay);
473     m_connectivity_check.m_timer.reset(now);
474   }
475 }
476 
execCONNECT_REP(Signal * signal)477 void Qmgr::execCONNECT_REP(Signal* signal)
478 {
479   jamEntry();
480   const Uint32 nodeId = signal->theData[0];
481 
482   if (ERROR_INSERTED(931))
483   {
484     jam();
485     ndbout_c("Discarding CONNECT_REP(%d)", nodeId);
486     infoEvent("Discarding CONNECT_REP(%d)", nodeId);
487     return;
488   }
489 
490   c_connectedNodes.set(nodeId);
491 
492   NodeRecPtr nodePtr;
493   nodePtr.i = nodeId;
494   ptrCheckGuard(nodePtr, MAX_NODES, nodeRec);
495   nodePtr.p->m_secret = 0;
496 
497   nodePtr.i = getOwnNodeId();
498   ptrCheckGuard(nodePtr, MAX_NODES, nodeRec);
499   NodeInfo nodeInfo = getNodeInfo(nodeId);
500   switch(nodePtr.p->phase){
501   case ZRUNNING:
502     if (nodeInfo.getType() == NodeInfo::DB)
503     {
504       ndbrequire(!c_clusterNodes.get(nodeId));
505     }
506   case ZSTARTING:
507     jam();
508     break;
509   case ZPREPARE_FAIL:
510   case ZFAIL_CLOSING:
511     jam();
512     return;
513   case ZAPI_ACTIVE:
514   case ZAPI_INACTIVE:
515     return;
516   case ZINIT:
517     ndbrequire(getNodeInfo(nodeId).m_type == NodeInfo::MGM);
518     break;
519   default:
520     ndbrequire(false);
521   }
522 
523   if (nodeInfo.getType() != NodeInfo::DB)
524   {
525     jam();
526     return;
527   }
528 
529   switch(c_start.m_gsn){
530   case GSN_CM_REGREQ:
531     jam();
532     sendCmRegReq(signal, nodeId);
533 
534     /**
535      * We're waiting for CM_REGCONF c_start.m_nodes contains all configured
536      *   nodes
537      */
538     ndbrequire(nodePtr.p->phase == ZSTARTING);
539     ndbrequire(c_start.m_nodes.isWaitingFor(nodeId));
540     return;
541   case GSN_CM_NODEINFOREQ:
542     jam();
543 
544     if (c_start.m_nodes.isWaitingFor(nodeId))
545     {
546       jam();
547       ndbrequire(getOwnNodeId() != cpresident);
548       ndbrequire(nodePtr.p->phase == ZSTARTING);
549       sendCmNodeInfoReq(signal, nodeId, nodePtr.p);
550       return;
551     }
552     return;
553   case GSN_CM_NODEINFOCONF:{
554     jam();
555 
556     ndbrequire(getOwnNodeId() != cpresident);
557     ndbrequire(nodePtr.p->phase == ZRUNNING);
558     if (c_start.m_nodes.isWaitingFor(nodeId))
559     {
560       jam();
561       c_start.m_nodes.clearWaitingFor(nodeId);
562       c_start.m_gsn = RNIL;
563 
564       NodeRecPtr addNodePtr;
565       addNodePtr.i = nodeId;
566       ptrCheckGuard(addNodePtr, MAX_NDB_NODES, nodeRec);
567       cmAddPrepare(signal, addNodePtr, nodePtr.p);
568       return;
569     }
570   }
571   default:
572     (void)1;
573   }
574 
575   ndbrequire(!c_start.m_nodes.isWaitingFor(nodeId));
576   ndbrequire(!c_readnodes_nodes.get(nodeId));
577   c_readnodes_nodes.set(nodeId);
578   signal->theData[0] = reference();
579   sendSignal(calcQmgrBlockRef(nodeId), GSN_READ_NODESREQ, signal, 1, JBA);
580   return;
581 }//Qmgr::execCONNECT_REP()
582 
583 void
execREAD_NODESCONF(Signal * signal)584 Qmgr::execREAD_NODESCONF(Signal* signal)
585 {
586   jamEntry();
587   check_readnodes_reply(signal,
588 			refToNode(signal->getSendersBlockRef()),
589 			GSN_READ_NODESCONF);
590 }
591 
592 void
execREAD_NODESREF(Signal * signal)593 Qmgr::execREAD_NODESREF(Signal* signal)
594 {
595   jamEntry();
596   check_readnodes_reply(signal,
597 			refToNode(signal->getSendersBlockRef()),
598 			GSN_READ_NODESREF);
599 }
600 
601 /*******************************/
602 /* CM_INFOCONF                */
603 /*******************************/
execCM_INFOCONF(Signal * signal)604 void Qmgr::execCM_INFOCONF(Signal* signal)
605 {
606   /**
607    * Open communcation to all DB nodes
608    */
609   signal->theData[0] = 0; // no answer
610   signal->theData[1] = 0; // no id
611   signal->theData[2] = NodeInfo::DB;
612   sendSignal(CMVMI_REF, GSN_OPEN_COMREQ, signal, 3, JBB);
613 
614   cpresident = ZNIL;
615   cpresidentAlive = ZFALSE;
616   c_start_election_time = NdbTick_CurrentMillisecond();
617 
618   signal->theData[0] = ZSTART_FAILURE_LIMIT;
619   sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 3000, 1);
620 
621   cmInfoconf010Lab(signal);
622 
623   return;
624 }//Qmgr::execCM_INFOCONF()
625 
626 Uint32 g_start_type = 0;
627 NdbNodeBitmask g_nowait_nodes; // Set by clo
628 
cmInfoconf010Lab(Signal * signal)629 void Qmgr::cmInfoconf010Lab(Signal* signal)
630 {
631   c_start.m_startKey = 0;
632   c_start.m_startNode = getOwnNodeId();
633   c_start.m_nodes.clearWaitingFor();
634   c_start.m_gsn = GSN_CM_REGREQ;
635   c_start.m_starting_nodes.clear();
636   c_start.m_starting_nodes_w_log.clear();
637   c_start.m_regReqReqSent = 0;
638   c_start.m_regReqReqRecv = 0;
639   c_start.m_skip_nodes = g_nowait_nodes;
640   c_start.m_skip_nodes.bitAND(c_definedNodes);
641   c_start.m_start_type = g_start_type;
642 
643   NodeRecPtr nodePtr;
644   cnoOfNodes = 0;
645   for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
646     jam();
647     ptrAss(nodePtr, nodeRec);
648 
649     if(getNodeInfo(nodePtr.i).getType() != NodeInfo::DB)
650       continue;
651 
652     c_start.m_nodes.setWaitingFor(nodePtr.i);
653     cnoOfNodes++;
654 
655     if(!c_connectedNodes.get(nodePtr.i))
656       continue;
657 
658     sendCmRegReq(signal, nodePtr.i);
659   }
660 
661   //----------------------------------------
662   /* Wait for a while. When it returns    */
663   /* we will check if we got any CM_REGREF*/
664   /* or CM_REGREQ (lower nodeid than our  */
665   /* own).                                */
666   //----------------------------------------
667   signal->theData[0] = ZREGREQ_TIMELIMIT;
668   signal->theData[1] = c_start.m_startKey;
669   signal->theData[2] = c_start.m_startNode;
670   sendSignalWithDelay(QMGR_REF, GSN_CONTINUEB, signal, 3000, 3);
671 
672   creadyDistCom = ZTRUE;
673   return;
674 }//Qmgr::cmInfoconf010Lab()
675 
676 void
sendCmRegReq(Signal * signal,Uint32 nodeId)677 Qmgr::sendCmRegReq(Signal * signal, Uint32 nodeId){
678   CmRegReq * req = (CmRegReq *)&signal->theData[0];
679   req->blockRef = reference();
680   req->nodeId = getOwnNodeId();
681   req->version = NDB_VERSION;
682   req->mysql_version = NDB_MYSQL_VERSION_D;
683   req->latest_gci = c_start.m_latest_gci;
684   req->start_type = c_start.m_start_type;
685   c_start.m_skip_nodes.copyto(NdbNodeBitmask::Size, req->skip_nodes);
686   const Uint32 ref = calcQmgrBlockRef(nodeId);
687   sendSignal(ref, GSN_CM_REGREQ, signal, CmRegReq::SignalLength, JBB);
688   DEBUG_START(GSN_CM_REGREQ, nodeId, "");
689 
690   c_start.m_regReqReqSent++;
691 }
692 
693 /*
694 4.4.11 CM_REGREQ */
695 /**--------------------------------------------------------------------------
696  * If this signal is received someone tries to get registrated.
697  * Only the president have the authority make decissions about new nodes,
698  * so only a president or a node that claims to be the president may send a
699  * reply to this signal.
700  * This signal can occur any time after that STTOR was received.
701  * CPRESIDENT:             Timelimit has expired and someone has
702  *                         decided to enter the president role
703  * CPRESIDENT_CANDIDATE:
704  *     Assigned when we receive a CM_REGREF, if we got more than one REF
705  *     then we always keep the lowest nodenumber.
706  *     We accept this nodeno as president when our timelimit expires
707  * We should consider the following cases:
708  * 1- We are the president. If we are busy by adding new nodes to cluster,
709  *    then we have to refuse this node to be added.
710  *    The refused node will try in ZREFUSE_ADD_TIME seconds again.
711  *    If we are not busy then we confirm
712  *
713  * 2- We know the president, we dont bother us about this REQ.
714  *    The president has also got this REQ and will take care of it.
715  *
716  * 3- The president are not known. We have received CM_INIT, so we compare the
717  *    senders node number to GETOWNNODEID().
718  *    If we have a lower number than the sender then we will claim
719  *    that we are the president so we send him a refuse signal back.
720  *    We have to wait for the CONTINUEB signal before we can enter the
721  *    president role. If our GETOWNNODEID() if larger than sender node number,
722  *    we are not the president and just have to wait for the
723  *    reply signal (REF)  to our CM_REGREQ_2.
724  * 4- We havent received the CM_INIT signal so we don't know who we are.
725  *    Ignore the request.
726  *--------------------------------------------------------------------------*/
727 /*******************************/
728 /* CM_REGREQ                  */
729 /*******************************/
730 static
731 int
check_start_type(Uint32 starting,Uint32 own)732 check_start_type(Uint32 starting, Uint32 own)
733 {
734   if (starting == (1 << NodeState::ST_INITIAL_START) &&
735       ((own & (1 << NodeState::ST_INITIAL_START)) == 0))
736   {
737     return 1;
738   }
739   return 0;
740 }
741 
execCM_REGREQ(Signal * signal)742 void Qmgr::execCM_REGREQ(Signal* signal)
743 {
744   DEBUG_START3(signal, "");
745 
746   NodeRecPtr addNodePtr;
747   jamEntry();
748 
749   CmRegReq * const cmRegReq = (CmRegReq *)&signal->theData[0];
750   const BlockReference Tblockref = cmRegReq->blockRef;
751   const Uint32 startingVersion = cmRegReq->version;
752   Uint32 startingMysqlVersion = cmRegReq->mysql_version;
753   addNodePtr.i = cmRegReq->nodeId;
754   Uint32 gci = 1;
755   Uint32 start_type = ~0;
756   NdbNodeBitmask skip_nodes;
757 
758   if (!c_connectedNodes.get(cmRegReq->nodeId))
759   {
760     jam();
761 
762     /**
763      * With ndbmtd, there is a race condition such that
764      *   CM_REGREQ can arrive prior to CONNECT_REP
765      *   since CONNECT_REP is sent from CMVMI
766      *
767      * In such cases, ignore the CM_REGREQ which is safe
768      *   as it will anyway be resent by starting node
769      */
770     g_eventLogger->info("discarding CM_REGREQ from %u "
771                         "as we're not yet connected (isNdbMt: %u)",
772                         cmRegReq->nodeId,
773                         (unsigned)isNdbMt());
774 
775     ndbrequire(isNdbMt());
776     return;
777   }
778 
779   if (signal->getLength() == CmRegReq::SignalLength)
780   {
781     jam();
782     gci = cmRegReq->latest_gci;
783     start_type = cmRegReq->start_type;
784     skip_nodes.assign(NdbNodeBitmask::Size, cmRegReq->skip_nodes);
785   }
786 
787   if (startingVersion < NDBD_SPLIT_VERSION)
788   {
789     startingMysqlVersion = 0;
790   }
791 
792   if (creadyDistCom == ZFALSE) {
793     jam();
794     /* NOT READY FOR DISTRIBUTED COMMUNICATION.*/
795     return;
796   }//if
797 
798   if (!ndbCompatible_ndb_ndb(NDB_VERSION, startingVersion)) {
799     jam();
800     sendCmRegrefLab(signal, Tblockref, CmRegRef::ZINCOMPATIBLE_VERSION);
801     return;
802   }
803 
804   if (!ndb_check_micro_gcp(startingVersion))
805   {
806     jam();
807     infoEvent("Connection from node %u refused as it's not micro GCP enabled",
808               addNodePtr.i);
809     sendCmRegrefLab(signal, Tblockref, CmRegRef::ZINCOMPATIBLE_VERSION);
810     return;
811   }
812 
813   if (!ndb_pnr(startingVersion))
814   {
815     jam();
816     infoEvent("Connection from node %u refused as it's not does not support "
817               "parallel node recovery",
818               addNodePtr.i);
819     sendCmRegrefLab(signal, Tblockref, CmRegRef::ZINCOMPATIBLE_VERSION);
820     return;
821   }
822 
823   if (!ndb_check_hb_order_version(startingVersion) &&
824       m_hb_order_config_used)
825   {
826     jam();
827     infoEvent("Connection from node %u refused as it does not support "
828               "user-defined HeartbeatOrder",
829               addNodePtr.i);
830     sendCmRegrefLab(signal, Tblockref, CmRegRef::ZINCOMPATIBLE_VERSION);
831     return;
832   }
833 
834   if (m_connectivity_check.m_enabled &&
835       !ndbd_connectivity_check(startingVersion))
836   {
837     jam();
838     infoEvent("Connection from node %u refused as it does not support "
839               "ConnectCheckIntervalDelay",
840               addNodePtr.i);
841     sendCmRegrefLab(signal, Tblockref, CmRegRef::ZINCOMPATIBLE_VERSION);
842     return;
843   }
844 
845   if (check_start_type(start_type, c_start.m_start_type))
846   {
847     jam();
848     sendCmRegrefLab(signal, Tblockref, CmRegRef::ZINCOMPATIBLE_START_TYPE);
849     return;
850   }
851 
852   if (cpresident != getOwnNodeId())
853   {
854     jam();
855 
856     if (cpresident == ZNIL)
857     {
858       /***
859        * We don't know the president.
860        * If the node to be added has lower node id
861        * than our president cancidate. Set it as
862        * candidate
863        */
864       jam();
865       if (gci > c_start.m_president_candidate_gci ||
866 	  (gci == c_start.m_president_candidate_gci &&
867 	   addNodePtr.i < c_start.m_president_candidate))
868       {
869 	jam();
870 	c_start.m_president_candidate = addNodePtr.i;
871 	c_start.m_president_candidate_gci = gci;
872       }
873       sendCmRegrefLab(signal, Tblockref, CmRegRef::ZELECTION);
874       return;
875     }
876 
877     /**
878      * We are not the president.
879      * We know the president.
880      * President will answer.
881      */
882     sendCmRegrefLab(signal, Tblockref, CmRegRef::ZNOT_PRESIDENT);
883     return;
884   }//if
885 
886   if (c_start.m_startNode != 0)
887   {
888     jam();
889     /**
890      * President busy by adding another node
891     */
892     sendCmRegrefLab(signal, Tblockref, CmRegRef::ZBUSY_PRESIDENT);
893     return;
894   }//if
895 
896   if (ctoStatus == Q_ACTIVE)
897   {
898     jam();
899     /**
900      * Active taking over as president
901      */
902     sendCmRegrefLab(signal, Tblockref, CmRegRef::ZBUSY_TO_PRES);
903     return;
904   }//if
905 
906   if (getNodeInfo(addNodePtr.i).m_type != NodeInfo::DB)
907   {
908     jam();
909     /**
910      * The new node is not in config file
911      */
912     sendCmRegrefLab(signal, Tblockref, CmRegRef::ZNOT_IN_CFG);
913     return;
914   }
915 
916   if (getNodeState().getSingleUserMode())
917   {
918     /**
919      * The cluster is in single user mode.
920      * Data node is not allowed to get added in the cluster
921      * while in single user mode.
922      */
923     // handle rolling upgrade
924     {
925       unsigned int get_major = getMajor(startingVersion);
926       unsigned int get_minor = getMinor(startingVersion);
927       unsigned int get_build = getBuild(startingVersion);
928 
929       if (startingVersion < NDBD_QMGR_SINGLEUSER_VERSION_5) {
930         jam();
931 
932         infoEvent("QMGR: detect upgrade: new node %u old version %u.%u.%u",
933           (unsigned int)addNodePtr.i, get_major, get_minor, get_build);
934         /**
935          * The new node is old version, send ZINCOMPATIBLE_VERSION instead
936          * of ZSINGLE_USER_MODE.
937          */
938         sendCmRegrefLab(signal, Tblockref, CmRegRef::ZINCOMPATIBLE_VERSION);
939       } else {
940         jam();
941 
942         sendCmRegrefLab(signal, Tblockref, CmRegRef::ZSINGLE_USER_MODE);
943       }//if
944     }
945 
946     return;
947   }//if
948 
949   ptrCheckGuard(addNodePtr, MAX_NDB_NODES, nodeRec);
950   Phase phase = addNodePtr.p->phase;
951   if (phase != ZINIT)
952   {
953     jam();
954     DEBUG("phase = " << phase);
955     sendCmRegrefLab(signal, Tblockref, CmRegRef::ZNOT_DEAD);
956     return;
957   }
958 
959   jam();
960   /**
961    * WE ARE PRESIDENT AND WE ARE NOT BUSY ADDING ANOTHER NODE.
962    * WE WILL TAKE CARE OF THE INCLUSION OF THIS NODE INTO THE CLUSTER.
963    * WE NEED TO START TIME SUPERVISION OF THIS. SINCE WE CANNOT STOP
964    * TIMED SIGNAL IF THE INCLUSION IS INTERRUPTED WE IDENTIFY
965    * EACH INCLUSION WITH A UNIQUE IDENTITY. THIS IS CHECKED WHEN
966    * THE SIGNAL ARRIVES. IF IT HAS CHANGED THEN WE SIMPLY IGNORE
967    * THE TIMED SIGNAL.
968    */
969 
970   /**
971    * Update start record
972    */
973   c_start.m_startKey++;
974   c_start.m_startNode = addNodePtr.i;
975 
976   /**
977    * Assign dynamic id
978    */
979   UintR TdynId = (++c_maxDynamicId) & 0xFFFF;
980   TdynId |= (addNodePtr.p->hbOrder << 16);
981   setNodeInfo(addNodePtr.i).m_version = startingVersion;
982   setNodeInfo(addNodePtr.i).m_mysql_version = startingMysqlVersion;
983   recompute_version_info(NodeInfo::DB, startingVersion);
984   addNodePtr.p->ndynamicId = TdynId;
985 
986   /**
987    * Reply with CM_REGCONF
988    */
989   CmRegConf * const cmRegConf = (CmRegConf *)&signal->theData[0];
990   cmRegConf->presidentBlockRef = reference();
991   cmRegConf->presidentNodeId   = getOwnNodeId();
992   cmRegConf->presidentVersion  = getNodeInfo(getOwnNodeId()).m_version;
993   cmRegConf->presidentMysqlVersion = getNodeInfo(getOwnNodeId()).m_mysql_version;
994   cmRegConf->dynamicId         = TdynId;
995   c_clusterNodes.copyto(NdbNodeBitmask::Size, cmRegConf->allNdbNodes);
996   sendSignal(Tblockref, GSN_CM_REGCONF, signal,
997 	     CmRegConf::SignalLength, JBA);
998   DEBUG_START(GSN_CM_REGCONF, refToNode(Tblockref), "");
999 
1000   /**
1001    * Send CmAdd to all nodes (including starting)
1002    */
1003   c_start.m_nodes = c_clusterNodes;
1004   c_start.m_nodes.setWaitingFor(addNodePtr.i);
1005   c_start.m_gsn = GSN_CM_ADD;
1006 
1007   NodeReceiverGroup rg(QMGR, c_start.m_nodes);
1008   CmAdd * const cmAdd = (CmAdd*)signal->getDataPtrSend();
1009   cmAdd->requestType = CmAdd::Prepare;
1010   cmAdd->startingNodeId = addNodePtr.i;
1011   cmAdd->startingVersion = startingVersion;
1012   cmAdd->startingMysqlVersion = startingMysqlVersion;
1013   sendSignal(rg, GSN_CM_ADD, signal, CmAdd::SignalLength, JBA);
1014   DEBUG_START2(GSN_CM_ADD, rg, "Prepare");
1015 
1016   /**
1017    * Set timer
1018    */
1019   return;
1020   signal->theData[0] = ZREGREQ_MASTER_TIMELIMIT;
1021   signal->theData[1] = c_start.m_startKey;
1022   sendSignalWithDelay(QMGR_REF, GSN_CONTINUEB, signal, 30000, 2);
1023 
1024   return;
1025 }//Qmgr::execCM_REGREQ()
1026 
sendCmRegrefLab(Signal * signal,BlockReference TBRef,CmRegRef::ErrorCode Terror)1027 void Qmgr::sendCmRegrefLab(Signal* signal, BlockReference TBRef,
1028 			   CmRegRef::ErrorCode Terror)
1029 {
1030   CmRegRef* ref = (CmRegRef*)signal->getDataPtrSend();
1031   ref->blockRef = reference();
1032   ref->nodeId = getOwnNodeId();
1033   ref->errorCode = Terror;
1034   ref->presidentCandidate =
1035     (cpresident == ZNIL ? c_start.m_president_candidate : cpresident);
1036   ref->candidate_latest_gci = c_start.m_president_candidate_gci;
1037   ref->latest_gci = c_start.m_latest_gci;
1038   ref->start_type = c_start.m_start_type;
1039   c_start.m_skip_nodes.copyto(NdbNodeBitmask::Size, ref->skip_nodes);
1040   sendSignal(TBRef, GSN_CM_REGREF, signal,
1041 	     CmRegRef::SignalLength, JBB);
1042   DEBUG_START(GSN_CM_REGREF, refToNode(TBRef), "");
1043   return;
1044 }//Qmgr::sendCmRegrefLab()
1045 
1046 /*
1047 4.4.11 CM_REGCONF */
1048 /**--------------------------------------------------------------------------
1049  * President gives permission to a node which wants to join the cluster.
1050  * The president will prepare the cluster that a new node will be added to
1051  * cluster. When the new node has set up all connections to the cluster,
1052  * the president will send commit to all clusternodes so the phase of the
1053  * new node can be changed to ZRUNNING.
1054  *--------------------------------------------------------------------------*/
1055 /*******************************/
1056 /* CM_REGCONF                 */
1057 /*******************************/
execCM_REGCONF(Signal * signal)1058 void Qmgr::execCM_REGCONF(Signal* signal)
1059 {
1060   DEBUG_START3(signal, "");
1061 
1062   NodeRecPtr myNodePtr;
1063   NodeRecPtr nodePtr;
1064   jamEntry();
1065 
1066   const CmRegConf * const cmRegConf = (CmRegConf *)&signal->theData[0];
1067 
1068   if (!ndbCompatible_ndb_ndb(NDB_VERSION, cmRegConf->presidentVersion)) {
1069     jam();
1070     char buf[128];
1071     BaseString::snprintf(buf,sizeof(buf),
1072 			 "incompatible version own=0x%x other=0x%x, "
1073 			 " shutting down",
1074 			 NDB_VERSION, cmRegConf->presidentVersion);
1075     progError(__LINE__, NDBD_EXIT_UNSUPPORTED_VERSION, buf);
1076     return;
1077   }
1078 
1079   if (!ndb_check_hb_order_version(cmRegConf->presidentVersion) &&
1080       m_hb_order_config_used) {
1081     jam();
1082     char buf[128];
1083     BaseString::snprintf(buf,sizeof(buf),
1084 			 "incompatible version own=0x%x other=0x%x, "
1085 			 "due to user-defined HeartbeatOrder, shutting down",
1086 			 NDB_VERSION, cmRegConf->presidentVersion);
1087     progError(__LINE__, NDBD_EXIT_UNSUPPORTED_VERSION, buf);
1088     return;
1089   }
1090 
1091   if (m_connectivity_check.m_enabled &&
1092       !ndbd_connectivity_check(cmRegConf->presidentVersion))
1093   {
1094     jam();
1095     m_connectivity_check.m_enabled = false;
1096     ndbout_c("Disabling ConnectCheckIntervalDelay as president "
1097              " does not support it");
1098     infoEvent("Disabling ConnectCheckIntervalDelay as president "
1099               " does not support it");
1100   }
1101 
1102   myNodePtr.i = getOwnNodeId();
1103   ptrCheckGuard(myNodePtr, MAX_NDB_NODES, nodeRec);
1104 
1105   ndbrequire(c_start.m_gsn == GSN_CM_REGREQ);
1106   ndbrequire(myNodePtr.p->phase == ZSTARTING);
1107 
1108   cpdistref    = cmRegConf->presidentBlockRef;
1109   cpresident   = cmRegConf->presidentNodeId;
1110   UintR TdynamicId   = cmRegConf->dynamicId;
1111   c_maxDynamicId = TdynamicId & 0xFFFF;
1112   c_clusterNodes.assign(NdbNodeBitmask::Size, cmRegConf->allNdbNodes);
1113 
1114   myNodePtr.p->ndynamicId = TdynamicId;
1115 
1116   // set own MT config here or in REF, and others in CM_NODEINFOREQ/CONF
1117   setNodeInfo(getOwnNodeId()).m_lqh_workers = globalData.ndbMtLqhWorkers;
1118 
1119 /*--------------------------------------------------------------*/
1120 // Send this as an EVENT REPORT to inform about hearing about
1121 // other NDB node proclaiming to be president.
1122 /*--------------------------------------------------------------*/
1123   signal->theData[0] = NDB_LE_CM_REGCONF;
1124   signal->theData[1] = getOwnNodeId();
1125   signal->theData[2] = cpresident;
1126   signal->theData[3] = TdynamicId;
1127   sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 4, JBB);
1128 
1129   for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
1130     jam();
1131     if (c_clusterNodes.get(nodePtr.i)){
1132       jam();
1133       ptrAss(nodePtr, nodeRec);
1134 
1135       ndbrequire(nodePtr.p->phase == ZINIT);
1136       nodePtr.p->phase = ZRUNNING;
1137 
1138       if(c_connectedNodes.get(nodePtr.i)){
1139 	jam();
1140 	sendCmNodeInfoReq(signal, nodePtr.i, myNodePtr.p);
1141       }
1142     }
1143   }
1144 
1145   c_start.m_gsn = GSN_CM_NODEINFOREQ;
1146   c_start.m_nodes = c_clusterNodes;
1147 
1148   if (ERROR_INSERTED(937))
1149   {
1150     CLEAR_ERROR_INSERT_VALUE;
1151     signal->theData[0] = 9999;
1152     sendSignalWithDelay(CMVMI_REF, GSN_NDB_TAMPER, signal, 500, 1);
1153   }
1154 
1155   return;
1156 }//Qmgr::execCM_REGCONF()
1157 
1158 void
check_readnodes_reply(Signal * signal,Uint32 nodeId,Uint32 gsn)1159 Qmgr::check_readnodes_reply(Signal* signal, Uint32 nodeId, Uint32 gsn)
1160 {
1161   NodeRecPtr myNodePtr;
1162   myNodePtr.i = getOwnNodeId();
1163   ptrCheckGuard(myNodePtr, MAX_NDB_NODES, nodeRec);
1164 
1165   NodeRecPtr nodePtr;
1166   nodePtr.i = nodeId;
1167   ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
1168 
1169   ndbrequire(c_readnodes_nodes.get(nodeId));
1170   ReadNodesConf* conf = (ReadNodesConf*)signal->getDataPtr();
1171   if (gsn == GSN_READ_NODESREF)
1172   {
1173     jam();
1174 retry:
1175     signal->theData[0] = reference();
1176     sendSignal(calcQmgrBlockRef(nodeId), GSN_READ_NODESREQ, signal, 1, JBA);
1177     return;
1178   }
1179 
1180   if (conf->masterNodeId == ZNIL)
1181   {
1182     jam();
1183     goto retry;
1184   }
1185 
1186   Uint32 president = conf->masterNodeId;
1187   if (president == cpresident)
1188   {
1189     jam();
1190     c_readnodes_nodes.clear(nodeId);
1191     return;
1192   }
1193 
1194   char buf[255];
1195   BaseString::snprintf(buf, sizeof(buf),
1196 		       "check StartPartialTimeout, "
1197 		       "node %d thinks %d is president, "
1198 		       "I think president is: %d",
1199 		       nodeId, president, cpresident);
1200 
1201   ndbout_c("%s", buf);
1202   CRASH_INSERTION(933);
1203 
1204   if (getNodeState().startLevel == NodeState::SL_STARTED)
1205   {
1206     jam();
1207     NdbNodeBitmask part;
1208     part.assign(NdbNodeBitmask::Size, conf->clusterNodes);
1209     FailRep* rep = (FailRep*)signal->getDataPtrSend();
1210     rep->failCause = FailRep::ZPARTITIONED_CLUSTER;
1211     rep->partitioned.president = cpresident;
1212     c_clusterNodes.copyto(NdbNodeBitmask::Size, rep->partitioned.partition);
1213     rep->partitioned.partitionFailSourceNodeId = getOwnNodeId();
1214     Uint32 ref = calcQmgrBlockRef(nodeId);
1215     Uint32 i = 0;
1216     /* Send source of event info if a node supports it */
1217     Uint32 length = FailRep::OrigSignalLength + FailRep::PartitionedExtraLength;
1218     while((i = part.find(i + 1)) != NdbNodeBitmask::NotFound)
1219     {
1220       if (i == nodeId)
1221 	continue;
1222       rep->failNodeId = i;
1223       bool sendSourceId = ndbd_fail_rep_source_node((getNodeInfo(i)).m_version);
1224       sendSignal(ref, GSN_FAIL_REP, signal,
1225                  length + (sendSourceId ? FailRep::SourceExtraLength : 0),
1226                  JBA);
1227     }
1228     rep->failNodeId = nodeId;
1229     bool sendSourceId = ndbd_fail_rep_source_node((getNodeInfo(nodeId)).m_version);
1230 
1231     sendSignal(ref, GSN_FAIL_REP, signal,
1232                length + (sendSourceId ? FailRep::SourceExtraLength : 0),
1233                JBB);
1234     return;
1235   }
1236 
1237   CRASH_INSERTION(932);
1238   CRASH_INSERTION(938);
1239 
1240   progError(__LINE__,
1241 	    NDBD_EXIT_PARTITIONED_SHUTDOWN,
1242 	    buf);
1243 
1244   ndbrequire(false);
1245 }
1246 
1247 void
sendCmNodeInfoReq(Signal * signal,Uint32 nodeId,const NodeRec * self)1248 Qmgr::sendCmNodeInfoReq(Signal* signal, Uint32 nodeId, const NodeRec * self){
1249   CmNodeInfoReq * const req = (CmNodeInfoReq*)signal->getDataPtrSend();
1250   req->nodeId = getOwnNodeId();
1251   req->dynamicId = self->ndynamicId;
1252   req->version = getNodeInfo(getOwnNodeId()).m_version;
1253   req->mysql_version = getNodeInfo(getOwnNodeId()).m_mysql_version;
1254   req->lqh_workers = getNodeInfo(getOwnNodeId()).m_lqh_workers;
1255   const Uint32 ref = calcQmgrBlockRef(nodeId);
1256   sendSignal(ref,GSN_CM_NODEINFOREQ, signal, CmNodeInfoReq::SignalLength, JBB);
1257   DEBUG_START(GSN_CM_NODEINFOREQ, nodeId, "");
1258 }
1259 
1260 /*
1261 4.4.11 CM_REGREF */
1262 /**--------------------------------------------------------------------------
1263  * Only a president or a president candidate can refuse a node to get added to
1264  * the cluster.
1265  * Refuse reasons:
1266  * ZBUSY         We know that the sender is the president and we have to
1267  *               make a new CM_REGREQ.
1268  * ZNOT_IN_CFG   This node number is not specified in the configfile,
1269  *               SYSTEM ERROR
1270  * ZELECTION     Sender is a president candidate, his timelimit
1271  *               hasn't expired so maybe someone else will show up.
1272  *               Update the CPRESIDENT_CANDIDATE, then wait for our
1273  *               timelimit to expire.
1274  *---------------------------------------------------------------------------*/
1275 /*******************************/
1276 /* CM_REGREF                  */
1277 /*******************************/
1278 static
1279 const char *
get_start_type_string(Uint32 st)1280 get_start_type_string(Uint32 st)
1281 {
1282   static char buf[256];
1283 
1284   if (st == 0)
1285   {
1286     return "<ANY>";
1287   }
1288   else
1289   {
1290     buf[0] = 0;
1291     for(Uint32 i = 0; i<NodeState::ST_ILLEGAL_TYPE; i++)
1292     {
1293       if (st & (1 << i))
1294       {
1295 	if (buf[0])
1296 	  strcat(buf, "/");
1297 	switch(i){
1298 	case NodeState::ST_INITIAL_START:
1299 	  strcat(buf, "inital start");
1300 	  break;
1301 	case NodeState::ST_SYSTEM_RESTART:
1302 	  strcat(buf, "system restart");
1303 	  break;
1304 	case NodeState::ST_NODE_RESTART:
1305 	  strcat(buf, "node restart");
1306 	  break;
1307 	case NodeState::ST_INITIAL_NODE_RESTART:
1308 	  strcat(buf, "initial node restart");
1309 	  break;
1310 	}
1311       }
1312     }
1313     return buf;
1314   }
1315 }
1316 
execCM_REGREF(Signal * signal)1317 void Qmgr::execCM_REGREF(Signal* signal)
1318 {
1319   jamEntry();
1320 
1321   CmRegRef* ref = (CmRegRef*)signal->getDataPtr();
1322   UintR TaddNodeno = ref->nodeId;
1323   UintR TrefuseReason = ref->errorCode;
1324   Uint32 candidate = ref->presidentCandidate;
1325   Uint32 node_gci = 1;
1326   Uint32 candidate_gci = 1;
1327   Uint32 start_type = ~0;
1328   NdbNodeBitmask skip_nodes;
1329   DEBUG_START3(signal, TrefuseReason);
1330 
1331   if (signal->getLength() == CmRegRef::SignalLength)
1332   {
1333     jam();
1334     node_gci = ref->latest_gci;
1335     candidate_gci = ref->candidate_latest_gci;
1336     start_type = ref->start_type;
1337     skip_nodes.assign(NdbNodeBitmask::Size, ref->skip_nodes);
1338   }
1339 
1340   c_start.m_regReqReqRecv++;
1341 
1342   // Ignore block reference in data[0]
1343 
1344   if(candidate != c_start.m_president_candidate)
1345   {
1346     jam();
1347     c_start.m_regReqReqRecv = ~0;
1348   }
1349 
1350   c_start.m_starting_nodes.set(TaddNodeno);
1351   if (node_gci)
1352   {
1353     jam();
1354     c_start.m_starting_nodes_w_log.set(TaddNodeno);
1355   }
1356   c_start.m_node_gci[TaddNodeno] = node_gci;
1357 
1358   skip_nodes.bitAND(c_definedNodes);
1359   c_start.m_skip_nodes.bitOR(skip_nodes);
1360 
1361   // set own MT config here or in CONF, and others in CM_NODEINFOREQ/CONF
1362   setNodeInfo(getOwnNodeId()).m_lqh_workers = globalData.ndbMtLqhWorkers;
1363 
1364   char buf[100];
1365   switch (TrefuseReason) {
1366   case CmRegRef::ZINCOMPATIBLE_VERSION:
1367     jam();
1368     progError(__LINE__, NDBD_EXIT_UNSUPPORTED_VERSION,
1369               "incompatible version, "
1370               "connection refused by running ndb node");
1371   case CmRegRef::ZINCOMPATIBLE_START_TYPE:
1372     jam();
1373     BaseString::snprintf(buf, sizeof(buf),
1374 			 "incompatible start type detected: node %d"
1375 			 " reports %s(%d) my start type: %s(%d)",
1376 			 TaddNodeno,
1377 			 get_start_type_string(start_type), start_type,
1378 			 get_start_type_string(c_start.m_start_type),
1379 			 c_start.m_start_type);
1380     progError(__LINE__, NDBD_EXIT_SR_RESTARTCONFLICT, buf);
1381     break;
1382   case CmRegRef::ZBUSY:
1383   case CmRegRef::ZBUSY_TO_PRES:
1384   case CmRegRef::ZBUSY_PRESIDENT:
1385     jam();
1386     cpresidentAlive = ZTRUE;
1387     signal->theData[3] = 0;
1388     break;
1389   case CmRegRef::ZNOT_IN_CFG:
1390     jam();
1391     progError(__LINE__, NDBD_EXIT_NODE_NOT_IN_CONFIG);
1392     break;
1393   case CmRegRef::ZNOT_DEAD:
1394     jam();
1395     progError(__LINE__, NDBD_EXIT_NODE_NOT_DEAD);
1396     break;
1397   case CmRegRef::ZSINGLE_USER_MODE:
1398     jam();
1399     progError(__LINE__, NDBD_EXIT_SINGLE_USER_MODE);
1400     break;
1401   /**
1402    * For generic refuse error.
1403    * e.g. in online upgrade, we can use this error code instead
1404    * of the incompatible error code.
1405    */
1406   case CmRegRef::ZGENERIC:
1407     jam();
1408     progError(__LINE__, NDBD_EXIT_GENERIC);
1409     break;
1410   case CmRegRef::ZELECTION:
1411     jam();
1412     if (candidate_gci > c_start.m_president_candidate_gci ||
1413 	(candidate_gci == c_start.m_president_candidate_gci &&
1414 	 candidate < c_start.m_president_candidate))
1415     {
1416       jam();
1417       //----------------------------------------
1418       /* We may already have a candidate      */
1419       /* choose the lowest nodeno             */
1420       //----------------------------------------
1421       signal->theData[3] = 2;
1422       c_start.m_president_candidate = candidate;
1423       c_start.m_president_candidate_gci = candidate_gci;
1424     } else {
1425       signal->theData[3] = 4;
1426     }//if
1427     break;
1428   case CmRegRef::ZNOT_PRESIDENT:
1429     jam();
1430     cpresidentAlive = ZTRUE;
1431     signal->theData[3] = 3;
1432     break;
1433   default:
1434     jam();
1435     signal->theData[3] = 5;
1436     /*empty*/;
1437     break;
1438   }//switch
1439 /*--------------------------------------------------------------*/
1440 // Send this as an EVENT REPORT to inform about hearing about
1441 // other NDB node proclaiming not to be president.
1442 /*--------------------------------------------------------------*/
1443   signal->theData[0] = NDB_LE_CM_REGREF;
1444   signal->theData[1] = getOwnNodeId();
1445   signal->theData[2] = TaddNodeno;
1446 //-----------------------------------------
1447 // signal->theData[3] filled in above
1448 //-----------------------------------------
1449   sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 4, JBB);
1450 
1451   if(cpresidentAlive == ZTRUE)
1452   {
1453     jam();
1454     DEBUG("cpresidentAlive");
1455     return;
1456   }
1457 
1458   if(c_start.m_regReqReqSent != c_start.m_regReqReqRecv)
1459   {
1460     jam();
1461     DEBUG(c_start.m_regReqReqSent << " != " << c_start.m_regReqReqRecv);
1462     return;
1463   }
1464 
1465   if(c_start.m_president_candidate != getOwnNodeId())
1466   {
1467     jam();
1468     DEBUG("i'm not the candidate");
1469     return;
1470   }
1471 
1472   /**
1473    * All connected nodes has agreed
1474    */
1475   if(check_startup(signal))
1476   {
1477     jam();
1478     electionWon(signal);
1479   }
1480 
1481   return;
1482 }//Qmgr::execCM_REGREF()
1483 
1484 Uint32
check_startup(Signal * signal)1485 Qmgr::check_startup(Signal* signal)
1486 {
1487   Uint64 now = NdbTick_CurrentMillisecond();
1488   Uint64 partial_timeout = c_start_election_time + c_restartPartialTimeout;
1489   Uint64 partitioned_timeout = partial_timeout + c_restartPartionedTimeout;
1490   Uint64 no_nodegroup_timeout = c_start_election_time +
1491     c_restartNoNodegroupTimeout;
1492 
1493   const bool no_nodegroup_active =
1494     (c_restartNoNodegroupTimeout != ~Uint32(0)) &&
1495     (! c_start.m_no_nodegroup_nodes.isclear());
1496 
1497   /**
1498    * First see if we should wait more...
1499    */
1500   NdbNodeBitmask tmp;
1501   tmp.bitOR(c_start.m_skip_nodes);
1502   tmp.bitOR(c_start.m_starting_nodes);
1503 
1504   NdbNodeBitmask wait;
1505   wait.assign(c_definedNodes);
1506   wait.bitANDC(tmp);
1507 
1508   Uint32 retVal = 0;
1509   Uint32 incompleteng = MAX_NDB_NODES; // Illegal value
1510   NdbNodeBitmask report_mask;
1511 
1512   if ((c_start.m_latest_gci == 0) ||
1513       (c_start.m_start_type == (1 << NodeState::ST_INITIAL_START)))
1514   {
1515     if (tmp.equal(c_definedNodes))
1516     {
1517       jam();
1518       signal->theData[1] = 0x8000;
1519       report_mask.assign(c_definedNodes);
1520       report_mask.bitANDC(c_start.m_starting_nodes);
1521       retVal = 1;
1522       goto start_report;
1523     }
1524     else if (no_nodegroup_active)
1525     {
1526       if (now < no_nodegroup_timeout)
1527       {
1528         signal->theData[1] = 6;
1529         signal->theData[2] = Uint32((no_nodegroup_timeout - now + 500) / 1000);
1530         report_mask.assign(wait);
1531         retVal = 0;
1532         goto start_report;
1533       }
1534       tmp.bitOR(c_start.m_no_nodegroup_nodes);
1535       if (tmp.equal(c_definedNodes))
1536       {
1537         signal->theData[1] = 0x8000;
1538         report_mask.assign(c_definedNodes);
1539         report_mask.bitANDC(c_start.m_starting_nodes);
1540         retVal = 1;
1541         goto start_report;
1542       }
1543       else
1544       {
1545         jam();
1546         signal->theData[1] = 1;
1547         signal->theData[2] = ~0;
1548         report_mask.assign(wait);
1549         retVal = 0;
1550         goto start_report;
1551       }
1552     }
1553     else
1554     {
1555       jam();
1556       signal->theData[1] = 1;
1557       signal->theData[2] = ~0;
1558       report_mask.assign(wait);
1559       retVal = 0;
1560       goto start_report;
1561     }
1562   }
1563 
1564   if (now >= no_nodegroup_timeout)
1565   {
1566     tmp.bitOR(c_start.m_no_nodegroup_nodes);
1567   }
1568 
1569   {
1570     const bool all = c_start.m_starting_nodes.equal(c_definedNodes);
1571     CheckNodeGroups* sd = (CheckNodeGroups*)&signal->theData[0];
1572 
1573     {
1574       /**
1575        * Check for missing node group directly
1576        */
1577       NdbNodeBitmask check;
1578       check.assign(c_definedNodes);
1579       check.bitANDC(c_start.m_starting_nodes);    // Not connected nodes
1580       check.bitOR(c_start.m_starting_nodes_w_log);
1581 
1582       sd->blockRef = reference();
1583       sd->requestType = CheckNodeGroups::Direct | CheckNodeGroups::ArbitCheck;
1584       sd->mask = check;
1585       EXECUTE_DIRECT(DBDIH, GSN_CHECKNODEGROUPSREQ, signal,
1586                      CheckNodeGroups::SignalLength);
1587 
1588       if (sd->output == CheckNodeGroups::Lose)
1589       {
1590         jam();
1591         goto missing_nodegroup;
1592       }
1593     }
1594 
1595     sd->blockRef = reference();
1596     sd->requestType = CheckNodeGroups::Direct | CheckNodeGroups::ArbitCheck;
1597     sd->mask = c_start.m_starting_nodes;
1598     EXECUTE_DIRECT(DBDIH, GSN_CHECKNODEGROUPSREQ, signal,
1599                    CheckNodeGroups::SignalLength);
1600 
1601     const Uint32 result = sd->output;
1602 
1603     sd->blockRef = reference();
1604     sd->requestType = CheckNodeGroups::Direct | CheckNodeGroups::ArbitCheck;
1605     sd->mask = c_start.m_starting_nodes_w_log;
1606     EXECUTE_DIRECT(DBDIH, GSN_CHECKNODEGROUPSREQ, signal,
1607                    CheckNodeGroups::SignalLength);
1608 
1609     const Uint32 result_w_log = sd->output;
1610 
1611     if (tmp.equal(c_definedNodes))
1612     {
1613       /**
1614        * All nodes (wrt no-wait nodes) has connected...
1615        *   this means that we will now start or die
1616        */
1617       jam();
1618       switch(result_w_log){
1619       case CheckNodeGroups::Lose:
1620       {
1621         jam();
1622         goto missing_nodegroup;
1623       }
1624       case CheckNodeGroups::Win:
1625         signal->theData[1] = all ? 0x8001 : 0x8002;
1626         report_mask.assign(c_definedNodes);
1627         report_mask.bitANDC(c_start.m_starting_nodes);
1628         retVal = 1;
1629         goto check_log;
1630       case CheckNodeGroups::Partitioning:
1631         ndbrequire(result != CheckNodeGroups::Lose);
1632         signal->theData[1] =
1633           all ? 0x8001 : (result == CheckNodeGroups::Win ? 0x8002 : 0x8003);
1634         report_mask.assign(c_definedNodes);
1635         report_mask.bitANDC(c_start.m_starting_nodes);
1636         retVal = 1;
1637         goto check_log;
1638       }
1639     }
1640 
1641     if (now < partial_timeout)
1642     {
1643       jam();
1644 
1645       signal->theData[1] = c_restartPartialTimeout == (Uint32) ~0 ? 2 : 3;
1646       signal->theData[2] = Uint32((partial_timeout - now + 500) / 1000);
1647       report_mask.assign(wait);
1648       retVal = 0;
1649 
1650       if (no_nodegroup_active && now < no_nodegroup_timeout)
1651       {
1652         signal->theData[1] = 7;
1653         signal->theData[2] = Uint32((no_nodegroup_timeout - now + 500) / 1000);
1654       }
1655       else if (no_nodegroup_active && now >= no_nodegroup_timeout)
1656       {
1657         report_mask.bitANDC(c_start.m_no_nodegroup_nodes);
1658       }
1659 
1660       goto start_report;
1661     }
1662 
1663     /**
1664      * Start partial has passed...check for partitioning...
1665      */
1666     switch(result_w_log){
1667     case CheckNodeGroups::Lose:
1668       jam();
1669       goto missing_nodegroup;
1670     case CheckNodeGroups::Partitioning:
1671       if (now < partitioned_timeout && result != CheckNodeGroups::Win)
1672       {
1673         goto missinglog;
1674       }
1675       // Fall through...
1676     case CheckNodeGroups::Win:
1677       signal->theData[1] =
1678         all ? 0x8001 : (result == CheckNodeGroups::Win ? 0x8002 : 0x8003);
1679       report_mask.assign(c_definedNodes);
1680       report_mask.bitANDC(c_start.m_starting_nodes);
1681       retVal = 2;
1682       goto check_log;
1683     }
1684   }
1685   ndbrequire(false);
1686 
1687 check_log:
1688   jam();
1689   {
1690     Uint32 save[4+4*NdbNodeBitmask::Size];
1691     memcpy(save, signal->theData, sizeof(save));
1692 
1693     DihRestartReq * req = CAST_PTR(DihRestartReq, signal->getDataPtrSend());
1694     req->senderRef = 0;
1695     c_start.m_starting_nodes.copyto(NdbNodeBitmask::Size, req->nodemask);
1696     memcpy(req->node_gcis, c_start.m_node_gci, 4*MAX_NDB_NODES);
1697     EXECUTE_DIRECT(DBDIH, GSN_DIH_RESTARTREQ, signal,
1698 		   DihRestartReq::CheckLength);
1699 
1700     incompleteng = signal->theData[0];
1701     memcpy(signal->theData, save, sizeof(save));
1702 
1703     if (incompleteng != MAX_NDB_NODES)
1704     {
1705       jam();
1706       if (retVal == 1)
1707       {
1708 	jam();
1709 	goto incomplete_log;
1710       }
1711       else if (retVal == 2)
1712       {
1713 	if (now <= partitioned_timeout)
1714 	{
1715 	  jam();
1716 	  goto missinglog;
1717 	}
1718 	else
1719 	{
1720 	  goto incomplete_log;
1721 	}
1722       }
1723       ndbrequire(false);
1724     }
1725   }
1726   goto start_report;
1727 
1728 missinglog:
1729   signal->theData[1] = c_restartPartionedTimeout == (Uint32) ~0 ? 4 : 5;
1730   signal->theData[2] = Uint32((partitioned_timeout - now + 500) / 1000);
1731   report_mask.assign(c_definedNodes);
1732   report_mask.bitANDC(c_start.m_starting_nodes);
1733   retVal = 0;
1734   goto start_report;
1735 
1736 start_report:
1737   jam();
1738   {
1739     Uint32 sz = NdbNodeBitmask::Size;
1740     signal->theData[0] = NDB_LE_StartReport;
1741     signal->theData[3] = sz;
1742     Uint32* ptr = signal->theData+4;
1743     c_definedNodes.copyto(sz, ptr); ptr += sz;
1744     c_start.m_starting_nodes.copyto(sz, ptr); ptr += sz;
1745     c_start.m_skip_nodes.copyto(sz, ptr); ptr += sz;
1746     report_mask.copyto(sz, ptr); ptr+= sz;
1747     c_start.m_no_nodegroup_nodes.copyto(sz, ptr); ptr += sz;
1748     sendSignal(CMVMI_REF, GSN_EVENT_REP, signal,
1749 	       4+5*NdbNodeBitmask::Size, JBB);
1750   }
1751   return retVal;
1752 
1753 missing_nodegroup:
1754   jam();
1755   {
1756     char buf[100], mask1[100], mask2[100];
1757     c_start.m_starting_nodes.getText(mask1);
1758     tmp.assign(c_start.m_starting_nodes);
1759     tmp.bitANDC(c_start.m_starting_nodes_w_log);
1760     tmp.getText(mask2);
1761     BaseString::snprintf(buf, sizeof(buf),
1762 			 "Unable to start missing node group! "
1763 			 " starting: %s (missing fs for: %s)",
1764 			 mask1, mask2);
1765     progError(__LINE__, NDBD_EXIT_INSUFFICENT_NODES, buf);
1766     return 0;                                     // Deadcode
1767   }
1768 
1769 incomplete_log:
1770   jam();
1771   {
1772     char buf[100], mask1[100];
1773     c_start.m_starting_nodes.getText(mask1);
1774     BaseString::snprintf(buf, sizeof(buf),
1775 			 "Incomplete log for node group: %d! "
1776 			 " starting nodes: %s",
1777 			 incompleteng, mask1);
1778     progError(__LINE__, NDBD_EXIT_INSUFFICENT_NODES, buf);
1779     return 0;                                     // Deadcode
1780   }
1781 }
1782 
1783 void
electionWon(Signal * signal)1784 Qmgr::electionWon(Signal* signal){
1785   NodeRecPtr myNodePtr;
1786   cpresident = getOwnNodeId(); /* This node becomes president. */
1787   myNodePtr.i = getOwnNodeId();
1788   ptrCheckGuard(myNodePtr, MAX_NDB_NODES, nodeRec);
1789 
1790   myNodePtr.p->phase = ZRUNNING;
1791 
1792   cpdistref = reference();
1793   cneighbourl = ZNIL;
1794   cneighbourh = ZNIL;
1795   myNodePtr.p->ndynamicId = 1 | (myNodePtr.p->hbOrder << 16);
1796   c_maxDynamicId = 1;
1797   c_clusterNodes.clear();
1798   c_clusterNodes.set(getOwnNodeId());
1799 
1800   cpresidentAlive = ZTRUE;
1801   c_start_election_time = ~0;
1802   c_start.reset();
1803 
1804   signal->theData[0] = NDB_LE_CM_REGCONF;
1805   signal->theData[1] = getOwnNodeId();
1806   signal->theData[2] = cpresident;
1807   signal->theData[3] = myNodePtr.p->ndynamicId;
1808   sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 4, JBB);
1809 
1810   c_start.m_starting_nodes.clear(getOwnNodeId());
1811   if (c_start.m_starting_nodes.isclear())
1812   {
1813     jam();
1814     sendSttorryLab(signal);
1815   }
1816 }
1817 
1818 /*
1819 4.4.11 CONTINUEB */
1820 /*--------------------------------------------------------------------------*/
1821 /*                                                                          */
1822 /*--------------------------------------------------------------------------*/
1823 /****************************>---------------------------------------------*/
1824 /* CONTINUEB                 >        SENDER: Own block, Own node          */
1825 /****************************>-------+INPUT : TCONTINUEB_TYPE              */
1826 /*--------------------------------------------------------------*/
regreqTimeLimitLab(Signal * signal)1827 void Qmgr::regreqTimeLimitLab(Signal* signal)
1828 {
1829   if(cpresident == ZNIL)
1830   {
1831     if (c_start.m_president_candidate == ZNIL)
1832     {
1833       jam();
1834       c_start.m_president_candidate = getOwnNodeId();
1835     }
1836 
1837     cmInfoconf010Lab(signal);
1838   }
1839 }//Qmgr::regreqTimelimitLab()
1840 
1841 /**---------------------------------------------------------------------------
1842  * The new node will take care of giving information about own node and ask
1843  * all other nodes for nodeinfo. The new node will use CM_NODEINFOREQ for
1844  * that purpose. When the setup of connections to all running, the president
1845  * will send a commit to all running nodes + the new node
1846  * INPUT: NODE_PTR1, must be set as ZNIL if we don't enter CONNECT_NODES)
1847  *                   from signal CM_NODEINFOCONF.
1848  *---------------------------------------------------------------------------*/
1849 /*******************************/
1850 /* CM_NODEINFOCONF            */
1851 /*******************************/
execCM_NODEINFOCONF(Signal * signal)1852 void Qmgr::execCM_NODEINFOCONF(Signal* signal)
1853 {
1854   DEBUG_START3(signal, "");
1855 
1856   jamEntry();
1857 
1858   CmNodeInfoConf * const conf = (CmNodeInfoConf*)signal->getDataPtr();
1859 
1860   const Uint32 nodeId = conf->nodeId;
1861   const Uint32 dynamicId = conf->dynamicId;
1862   const Uint32 version = conf->version;
1863   Uint32 mysql_version = conf->mysql_version;
1864   Uint32 lqh_workers = conf->lqh_workers;
1865   if (version < NDBD_SPLIT_VERSION)
1866   {
1867     jam();
1868     mysql_version = 0;
1869   }
1870   if (version < NDBD_MT_LQH_VERSION)
1871   {
1872     jam();
1873     lqh_workers = 0;
1874   }
1875 
1876   NodeRecPtr nodePtr;
1877   nodePtr.i = getOwnNodeId();
1878   ptrAss(nodePtr, nodeRec);
1879   ndbrequire(nodePtr.p->phase == ZSTARTING);
1880   ndbrequire(c_start.m_gsn == GSN_CM_NODEINFOREQ);
1881   c_start.m_nodes.clearWaitingFor(nodeId);
1882 
1883   /**
1884    * Update node info
1885    */
1886   NodeRecPtr replyNodePtr;
1887   replyNodePtr.i = nodeId;
1888   ptrCheckGuard(replyNodePtr, MAX_NDB_NODES, nodeRec);
1889   replyNodePtr.p->ndynamicId = dynamicId;
1890   replyNodePtr.p->blockRef = signal->getSendersBlockRef();
1891   setNodeInfo(replyNodePtr.i).m_version = version;
1892   setNodeInfo(replyNodePtr.i).m_mysql_version = mysql_version;
1893   setNodeInfo(replyNodePtr.i).m_lqh_workers = lqh_workers;
1894 
1895   recompute_version_info(NodeInfo::DB, version);
1896 
1897   if(!c_start.m_nodes.done()){
1898     jam();
1899     return;
1900   }
1901 
1902   /**********************************************<*/
1903   /* Send an ack. back to the president.          */
1904   /* CM_ACKADD                                    */
1905   /* The new node has been registered by all      */
1906   /* running nodes and has stored nodeinfo about  */
1907   /* all running nodes. The new node has to wait  */
1908   /* for CM_ADD (commit) from president to become */
1909   /* a running node in the cluster.               */
1910   /**********************************************<*/
1911   sendCmAckAdd(signal, getOwnNodeId(), CmAdd::Prepare);
1912   return;
1913 }//Qmgr::execCM_NODEINFOCONF()
1914 
1915 /**---------------------------------------------------------------------------
1916  * A new node sends nodeinfo about himself. The new node asks for
1917  * corresponding nodeinfo back in the  CM_NODEINFOCONF.
1918  *---------------------------------------------------------------------------*/
1919 /*******************************/
1920 /* CM_NODEINFOREQ             */
1921 /*******************************/
execCM_NODEINFOREQ(Signal * signal)1922 void Qmgr::execCM_NODEINFOREQ(Signal* signal)
1923 {
1924   jamEntry();
1925 
1926   const Uint32 Tblockref = signal->getSendersBlockRef();
1927 
1928   NodeRecPtr nodePtr;
1929   nodePtr.i = getOwnNodeId();
1930   ptrAss(nodePtr, nodeRec);
1931   if(nodePtr.p->phase != ZRUNNING){
1932     jam();
1933     signal->theData[0] = reference();
1934     signal->theData[1] = getOwnNodeId();
1935     signal->theData[2] = ZNOT_RUNNING;
1936     sendSignal(Tblockref, GSN_CM_NODEINFOREF, signal, 3, JBB);
1937     return;
1938   }
1939 
1940   NodeRecPtr addNodePtr;
1941   CmNodeInfoReq * const req = (CmNodeInfoReq*)signal->getDataPtr();
1942   addNodePtr.i = req->nodeId;
1943   ptrCheckGuard(addNodePtr, MAX_NDB_NODES, nodeRec);
1944   addNodePtr.p->ndynamicId = req->dynamicId;
1945   addNodePtr.p->blockRef = signal->getSendersBlockRef();
1946   setNodeInfo(addNodePtr.i).m_version = req->version;
1947 
1948   Uint32 mysql_version = req->mysql_version;
1949   if (req->version < NDBD_SPLIT_VERSION)
1950     mysql_version = 0;
1951   setNodeInfo(addNodePtr.i).m_mysql_version = mysql_version;
1952 
1953   Uint32 lqh_workers = req->lqh_workers;
1954   if (req->version < NDBD_MT_LQH_VERSION)
1955     lqh_workers = 0;
1956   setNodeInfo(addNodePtr.i).m_lqh_workers = lqh_workers;
1957 
1958   c_maxDynamicId = req->dynamicId & 0xFFFF;
1959 
1960   cmAddPrepare(signal, addNodePtr, nodePtr.p);
1961 }//Qmgr::execCM_NODEINFOREQ()
1962 
1963 void
cmAddPrepare(Signal * signal,NodeRecPtr nodePtr,const NodeRec * self)1964 Qmgr::cmAddPrepare(Signal* signal, NodeRecPtr nodePtr, const NodeRec * self){
1965   jam();
1966 
1967   switch(nodePtr.p->phase){
1968   case ZINIT:
1969     jam();
1970     nodePtr.p->phase = ZSTARTING;
1971     return;
1972   case ZFAIL_CLOSING:
1973     jam();
1974 
1975 #if 1
1976     warningEvent("Recieved request to incorperate node %u, "
1977 		 "while error handling has not yet completed",
1978 		 nodePtr.i);
1979 
1980     ndbrequire(getOwnNodeId() != cpresident);
1981     ndbrequire(signal->header.theVerId_signalNumber == GSN_CM_ADD);
1982     c_start.m_nodes.clearWaitingFor();
1983     c_start.m_nodes.setWaitingFor(nodePtr.i);
1984     c_start.m_gsn = GSN_CM_NODEINFOCONF;
1985 #else
1986     warningEvent("Enabling communication to CM_ADD node %u state=%d",
1987 		 nodePtr.i,
1988 		 nodePtr.p->phase);
1989     nodePtr.p->phase = ZSTARTING;
1990     nodePtr.p->failState = NORMAL;
1991     signal->theData[0] = 0;
1992     signal->theData[1] = nodePtr.i;
1993     sendSignal(CMVMI_REF, GSN_OPEN_COMREQ, signal, 2, JBA);
1994 #endif
1995     return;
1996   case ZSTARTING:
1997     break;
1998   case ZRUNNING:
1999   case ZPREPARE_FAIL:
2000   case ZAPI_ACTIVE:
2001   case ZAPI_INACTIVE:
2002     ndbrequire(false);
2003   }
2004 
2005   sendCmAckAdd(signal, nodePtr.i, CmAdd::Prepare);
2006   sendApiVersionRep(signal, nodePtr);
2007 
2008   /* President have prepared us */
2009   CmNodeInfoConf * conf = (CmNodeInfoConf*)signal->getDataPtrSend();
2010   conf->nodeId = getOwnNodeId();
2011   conf->dynamicId = self->ndynamicId;
2012   conf->version = getNodeInfo(getOwnNodeId()).m_version;
2013   conf->mysql_version = getNodeInfo(getOwnNodeId()).m_mysql_version;
2014   conf->lqh_workers = getNodeInfo(getOwnNodeId()).m_lqh_workers;
2015   sendSignal(nodePtr.p->blockRef, GSN_CM_NODEINFOCONF, signal,
2016 	     CmNodeInfoConf::SignalLength, JBB);
2017   DEBUG_START(GSN_CM_NODEINFOCONF, refToNode(nodePtr.p->blockRef), "");
2018 }
2019 
2020 void
sendApiVersionRep(Signal * signal,NodeRecPtr nodePtr)2021 Qmgr::sendApiVersionRep(Signal* signal, NodeRecPtr nodePtr)
2022 {
2023   if (getNodeInfo(nodePtr.i).m_version >= NDBD_NODE_VERSION_REP)
2024   {
2025     jam();
2026     Uint32 ref = calcQmgrBlockRef(nodePtr.i);
2027     for(Uint32 i = 1; i<MAX_NODES; i++)
2028     {
2029       jam();
2030       Uint32 version = getNodeInfo(i).m_version;
2031       Uint32 type = getNodeInfo(i).m_type;
2032       if (type != NodeInfo::DB && version)
2033       {
2034 	jam();
2035 	signal->theData[0] = i;
2036 	signal->theData[1] = version;
2037 	sendSignal(ref, GSN_NODE_VERSION_REP, signal, 2, JBB);
2038       }
2039     }
2040   }
2041 }
2042 
2043 void
sendCmAckAdd(Signal * signal,Uint32 nodeId,CmAdd::RequestType type)2044 Qmgr::sendCmAckAdd(Signal * signal, Uint32 nodeId, CmAdd::RequestType type){
2045 
2046   CmAckAdd * cmAckAdd = (CmAckAdd*)signal->getDataPtrSend();
2047   cmAckAdd->requestType = type;
2048   cmAckAdd->startingNodeId = nodeId;
2049   cmAckAdd->senderNodeId = getOwnNodeId();
2050   sendSignal(cpdistref, GSN_CM_ACKADD, signal, CmAckAdd::SignalLength, JBA);
2051   DEBUG_START(GSN_CM_ACKADD, cpresident, "");
2052 
2053   switch(type){
2054   case CmAdd::Prepare:
2055     return;
2056   case CmAdd::AddCommit:
2057   case CmAdd::CommitNew:
2058     break;
2059   }
2060 
2061   signal->theData[0] = nodeId;
2062   EXECUTE_DIRECT(NDBCNTR, GSN_CM_ADD_REP, signal, 1);
2063   jamEntry();
2064 }
2065 
2066 /*
2067 4.4.11 CM_ADD */
2068 /**--------------------------------------------------------------------------
2069  * Prepare a running node to add a new node to the cluster. The running node
2070  * will change phase of the new node fron ZINIT to ZWAITING. The running node
2071  * will also mark that we have received a prepare. When the new node has sent
2072  * us nodeinfo we can send an acknowledgement back to the president. When all
2073  * running nodes has acknowledged the new node, the president will send a
2074  * commit and we can change phase of the new node to ZRUNNING. The president
2075  * will also send CM_ADD to himself.
2076  *---------------------------------------------------------------------------*/
2077 /*******************************/
2078 /* CM_ADD                     */
2079 /*******************************/
execCM_ADD(Signal * signal)2080 void Qmgr::execCM_ADD(Signal* signal)
2081 {
2082   NodeRecPtr addNodePtr;
2083   jamEntry();
2084 
2085   NodeRecPtr nodePtr;
2086   nodePtr.i = getOwnNodeId();
2087   ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
2088 
2089   CmAdd * const cmAdd = (CmAdd*)signal->getDataPtr();
2090   const CmAdd::RequestType type = (CmAdd::RequestType)cmAdd->requestType;
2091   addNodePtr.i = cmAdd->startingNodeId;
2092   //const Uint32 startingVersion = cmAdd->startingVersion;
2093   ptrCheckGuard(addNodePtr, MAX_NDB_NODES, nodeRec);
2094 
2095   DEBUG_START3(signal, type);
2096 
2097   if(nodePtr.p->phase == ZSTARTING){
2098     jam();
2099     /**
2100      * We are joining...
2101      */
2102     ndbrequire(addNodePtr.i == nodePtr.i);
2103     switch(type){
2104     case CmAdd::Prepare:
2105       ndbrequire(c_start.m_gsn == GSN_CM_NODEINFOREQ);
2106       /**
2107        * Wait for CM_NODEINFO_CONF
2108        */
2109       return;
2110     case CmAdd::CommitNew:
2111       /**
2112        * Tata. we're in the cluster
2113        */
2114       joinedCluster(signal, addNodePtr);
2115       return;
2116     case CmAdd::AddCommit:
2117       ndbrequire(false);
2118     }
2119   }
2120 
2121   switch (type) {
2122   case CmAdd::Prepare:
2123     cmAddPrepare(signal, addNodePtr, nodePtr.p);
2124     break;
2125   case CmAdd::AddCommit:{
2126     jam();
2127     ndbrequire(addNodePtr.p->phase == ZSTARTING);
2128     addNodePtr.p->phase = ZRUNNING;
2129     m_connectivity_check.reportNodeConnect(addNodePtr.i);
2130     setNodeInfo(addNodePtr.i).m_heartbeat_cnt= 0;
2131     c_clusterNodes.set(addNodePtr.i);
2132     findNeighbours(signal, __LINE__);
2133 
2134     /**
2135      * SEND A HEARTBEAT IMMEDIATELY TO DECREASE THE RISK THAT WE MISS EARLY
2136      * HEARTBEATS.
2137      */
2138     sendHeartbeat(signal);
2139     hb_send_timer.reset(0);
2140 
2141     /**
2142      *  ENABLE COMMUNICATION WITH ALL BLOCKS WITH THE NEWLY ADDED NODE
2143      */
2144     EnableComReq *enableComReq = (EnableComReq *)signal->getDataPtrSend();
2145     enableComReq->m_senderRef = reference();
2146     enableComReq->m_senderData = ENABLE_COM_CM_ADD_COMMIT;
2147     NodeBitmask::clear(enableComReq->m_nodeIds);
2148     NodeBitmask::set(enableComReq->m_nodeIds, addNodePtr.i);
2149     sendSignal(CMVMI_REF, GSN_ENABLE_COMREQ, signal,
2150                EnableComReq::SignalLength, JBA);
2151     break;
2152   }
2153   case CmAdd::CommitNew:
2154     jam();
2155     ndbrequire(false);
2156   }
2157 
2158 }//Qmgr::execCM_ADD()
2159 
2160 void
handleEnableComAddCommit(Signal * signal,Uint32 node)2161 Qmgr::handleEnableComAddCommit(Signal *signal, Uint32 node)
2162 {
2163   sendCmAckAdd(signal, node, CmAdd::AddCommit);
2164   if(getOwnNodeId() != cpresident){
2165     jam();
2166     c_start.reset();
2167   }
2168 }
2169 
2170 void
execENABLE_COMCONF(Signal * signal)2171 Qmgr::execENABLE_COMCONF(Signal *signal)
2172 {
2173   const EnableComConf *enableComConf =
2174     (const EnableComConf *)signal->getDataPtr();
2175   Uint32 state = enableComConf->m_senderData;
2176   Uint32 node = NodeBitmask::find(enableComConf->m_nodeIds, 0);
2177 
2178   jamEntry();
2179 
2180   switch (state)
2181   {
2182     case ENABLE_COM_CM_ADD_COMMIT:
2183       jam();
2184       /* Only exactly one node possible here. */
2185       ndbrequire(node != NodeBitmask::NotFound);
2186       ndbrequire(NodeBitmask::find(enableComConf->m_nodeIds, node + 1) ==
2187                  NodeBitmask::NotFound);
2188       handleEnableComAddCommit(signal, node);
2189       break;
2190 
2191     case ENABLE_COM_CM_COMMIT_NEW:
2192       jam();
2193       handleEnableComCommitNew(signal);
2194       break;
2195 
2196     case ENABLE_COM_API_REGREQ:
2197       jam();
2198       /* Only exactly one node possible here. */
2199       ndbrequire(node != NodeBitmask::NotFound);
2200       ndbrequire(NodeBitmask::find(enableComConf->m_nodeIds, node + 1) ==
2201                  NodeBitmask::NotFound);
2202       handleEnableComApiRegreq(signal, node);
2203       break;
2204 
2205     default:
2206       jam();
2207       ndbrequire(false);
2208   }
2209 }
2210 
2211 void
joinedCluster(Signal * signal,NodeRecPtr nodePtr)2212 Qmgr::joinedCluster(Signal* signal, NodeRecPtr nodePtr){
2213   /**
2214    * WE HAVE BEEN INCLUDED IN THE CLUSTER WE CAN START BEING PART OF THE
2215    * HEARTBEAT PROTOCOL AND WE WILL ALSO ENABLE COMMUNICATION WITH ALL
2216    * NODES IN THE CLUSTER.
2217    */
2218   nodePtr.p->phase = ZRUNNING;
2219   setNodeInfo(nodePtr.i).m_heartbeat_cnt= 0;
2220   findNeighbours(signal, __LINE__);
2221   c_clusterNodes.set(nodePtr.i);
2222   c_start.reset();
2223 
2224   /**
2225    * SEND A HEARTBEAT IMMEDIATELY TO DECREASE THE RISK
2226    * THAT WE MISS EARLY HEARTBEATS.
2227    */
2228   sendHeartbeat(signal);
2229   hb_send_timer.reset(0);
2230 
2231   /**
2232    * ENABLE COMMUNICATION WITH ALL BLOCKS IN THE CURRENT CLUSTER AND SET
2233    * THE NODES IN THE CLUSTER TO BE RUNNING.
2234    */
2235   EnableComReq *enableComReq = (EnableComReq *)signal->getDataPtrSend();
2236   enableComReq->m_senderRef = reference();
2237   enableComReq->m_senderData = ENABLE_COM_CM_COMMIT_NEW;
2238   NodeBitmask::clear(enableComReq->m_nodeIds);
2239   for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
2240     jam();
2241     ptrAss(nodePtr, nodeRec);
2242     if ((nodePtr.p->phase == ZRUNNING) && (nodePtr.i != getOwnNodeId())) {
2243       /*-------------------------------------------------------------------*/
2244       // Enable full communication to all other nodes. Not really necessary
2245       // to open communication to ourself.
2246       /*-------------------------------------------------------------------*/
2247       jam();
2248       NodeBitmask::set(enableComReq->m_nodeIds, nodePtr.i);
2249     }//if
2250   }//for
2251 
2252   if (!NodeBitmask::isclear(enableComReq->m_nodeIds))
2253   {
2254     jam();
2255     sendSignal(CMVMI_REF, GSN_ENABLE_COMREQ, signal,
2256                EnableComReq::SignalLength, JBA);
2257   }
2258   else
2259   {
2260     handleEnableComCommitNew(signal);
2261   }
2262 }
2263 
2264 void
handleEnableComCommitNew(Signal * signal)2265 Qmgr::handleEnableComCommitNew(Signal *signal)
2266 {
2267   sendSttorryLab(signal);
2268 
2269   sendCmAckAdd(signal, getOwnNodeId(), CmAdd::CommitNew);
2270 }
2271 
2272 /*  4.10.7 CM_ACKADD        - PRESIDENT IS RECEIVER -       */
2273 /*---------------------------------------------------------------------------*/
2274 /* Entry point for an ack add signal.
2275  * The TTYPE defines if it is a prepare or a commit.                         */
2276 /*---------------------------------------------------------------------------*/
execCM_ACKADD(Signal * signal)2277 void Qmgr::execCM_ACKADD(Signal* signal)
2278 {
2279   NodeRecPtr addNodePtr;
2280   NodeRecPtr senderNodePtr;
2281   jamEntry();
2282 
2283   CmAckAdd * const cmAckAdd = (CmAckAdd*)signal->getDataPtr();
2284   const CmAdd::RequestType type = (CmAdd::RequestType)cmAckAdd->requestType;
2285   addNodePtr.i = cmAckAdd->startingNodeId;
2286   senderNodePtr.i = cmAckAdd->senderNodeId;
2287 
2288   DEBUG_START3(signal, type);
2289 
2290   if (cpresident != getOwnNodeId()) {
2291     jam();
2292     /*-----------------------------------------------------------------------*/
2293     /* IF WE ARE NOT PRESIDENT THEN WE SHOULD NOT RECEIVE THIS MESSAGE.      */
2294     /*------------------------------------------------------------_----------*/
2295     warningEvent("Received CM_ACKADD from %d president=%d",
2296 		 senderNodePtr.i, cpresident);
2297     return;
2298   }//if
2299 
2300   if (addNodePtr.i != c_start.m_startNode) {
2301     jam();
2302     /*----------------------------------------------------------------------*/
2303     /* THIS IS NOT THE STARTING NODE. WE ARE ACTIVE NOW WITH ANOTHER START. */
2304     /*----------------------------------------------------------------------*/
2305     warningEvent("Received CM_ACKADD from %d with startNode=%d != own %d",
2306 		 senderNodePtr.i, addNodePtr.i, c_start.m_startNode);
2307     return;
2308   }//if
2309 
2310   ndbrequire(c_start.m_gsn == GSN_CM_ADD);
2311   c_start.m_nodes.clearWaitingFor(senderNodePtr.i);
2312   if(!c_start.m_nodes.done()){
2313     jam();
2314     return;
2315   }
2316 
2317   switch (type) {
2318   case CmAdd::Prepare:{
2319     jam();
2320 
2321     /*----------------------------------------------------------------------*/
2322     /* ALL RUNNING NODES HAVE PREPARED THE INCLUSION OF THIS NEW NODE.      */
2323     /*----------------------------------------------------------------------*/
2324     c_start.m_gsn = GSN_CM_ADD;
2325     c_start.m_nodes = c_clusterNodes;
2326 
2327     CmAdd * const cmAdd = (CmAdd*)signal->getDataPtrSend();
2328     cmAdd->requestType = CmAdd::AddCommit;
2329     cmAdd->startingNodeId = addNodePtr.i;
2330     cmAdd->startingVersion = getNodeInfo(addNodePtr.i).m_version;
2331     cmAdd->startingMysqlVersion = getNodeInfo(addNodePtr.i).m_mysql_version;
2332     NodeReceiverGroup rg(QMGR, c_clusterNodes);
2333     sendSignal(rg, GSN_CM_ADD, signal, CmAdd::SignalLength, JBA);
2334     DEBUG_START2(GSN_CM_ADD, rg, "AddCommit");
2335     return;
2336   }
2337   case CmAdd::AddCommit:{
2338     jam();
2339 
2340     /****************************************/
2341     /* Send commit to the new node so he    */
2342     /* will change PHASE into ZRUNNING      */
2343     /****************************************/
2344     c_start.m_gsn = GSN_CM_ADD;
2345     c_start.m_nodes.clearWaitingFor();
2346     c_start.m_nodes.setWaitingFor(addNodePtr.i);
2347 
2348     CmAdd * const cmAdd = (CmAdd*)signal->getDataPtrSend();
2349     cmAdd->requestType = CmAdd::CommitNew;
2350     cmAdd->startingNodeId = addNodePtr.i;
2351     cmAdd->startingVersion = getNodeInfo(addNodePtr.i).m_version;
2352     cmAdd->startingMysqlVersion = getNodeInfo(addNodePtr.i).m_mysql_version;
2353     sendSignal(calcQmgrBlockRef(addNodePtr.i), GSN_CM_ADD, signal,
2354 	       CmAdd::SignalLength, JBA);
2355     DEBUG_START(GSN_CM_ADD, addNodePtr.i, "CommitNew");
2356     return;
2357   }
2358   case CmAdd::CommitNew:
2359     jam();
2360     /**
2361      * Tell arbitration about new node.
2362      */
2363     handleArbitNdbAdd(signal, addNodePtr.i);
2364     c_start.reset();
2365 
2366     if (c_start.m_starting_nodes.get(addNodePtr.i))
2367     {
2368       jam();
2369       c_start.m_starting_nodes.clear(addNodePtr.i);
2370       if (c_start.m_starting_nodes.isclear())
2371       {
2372 	jam();
2373 	sendSttorryLab(signal);
2374       }
2375     }
2376     return;
2377   }//switch
2378   ndbrequire(false);
2379 }//Qmgr::execCM_ACKADD()
2380 
2381 /**-------------------------------------------------------------------------
2382  * WE HAVE BEEN INCLUDED INTO THE CLUSTER. IT IS NOW TIME TO CALCULATE WHICH
2383  * ARE OUR LEFT AND RIGHT NEIGHBOURS FOR THE HEARTBEAT PROTOCOL.
2384  *--------------------------------------------------------------------------*/
findNeighbours(Signal * signal,Uint32 from)2385 void Qmgr::findNeighbours(Signal* signal, Uint32 from)
2386 {
2387   UintR toldLeftNeighbour;
2388   UintR tfnLeftFound;
2389   UintR tfnMaxFound;
2390   UintR tfnMinFound;
2391   UintR tfnRightFound;
2392   NodeRecPtr fnNodePtr;
2393   NodeRecPtr fnOwnNodePtr;
2394 
2395   Uint32 toldRightNeighbour = cneighbourh;
2396   toldLeftNeighbour = cneighbourl;
2397   tfnLeftFound = 0;
2398   tfnMaxFound = 0;
2399   tfnMinFound = (UintR)-1;
2400   tfnRightFound = (UintR)-1;
2401   fnOwnNodePtr.i = getOwnNodeId();
2402   ptrCheckGuard(fnOwnNodePtr, MAX_NDB_NODES, nodeRec);
2403   for (fnNodePtr.i = 1; fnNodePtr.i < MAX_NDB_NODES; fnNodePtr.i++) {
2404     jam();
2405     ptrAss(fnNodePtr, nodeRec);
2406     if (fnNodePtr.i != fnOwnNodePtr.i) {
2407       if (fnNodePtr.p->phase == ZRUNNING) {
2408         if (tfnMinFound > fnNodePtr.p->ndynamicId) {
2409           jam();
2410           tfnMinFound = fnNodePtr.p->ndynamicId;
2411         }//if
2412         if (tfnMaxFound < fnNodePtr.p->ndynamicId) {
2413           jam();
2414           tfnMaxFound = fnNodePtr.p->ndynamicId;
2415         }//if
2416         if (fnOwnNodePtr.p->ndynamicId > fnNodePtr.p->ndynamicId) {
2417           jam();
2418           if (fnNodePtr.p->ndynamicId > tfnLeftFound) {
2419             jam();
2420             tfnLeftFound = fnNodePtr.p->ndynamicId;
2421           }//if
2422         } else {
2423           jam();
2424           if (fnNodePtr.p->ndynamicId < tfnRightFound) {
2425             jam();
2426             tfnRightFound = fnNodePtr.p->ndynamicId;
2427           }//if
2428         }//if
2429       }//if
2430     }//if
2431   }//for
2432   if (tfnLeftFound == 0) {
2433     if (tfnMinFound == (UintR)-1) {
2434       jam();
2435       cneighbourl = ZNIL;
2436     } else {
2437       jam();
2438       cneighbourl = translateDynamicIdToNodeId(signal, tfnMaxFound);
2439     }//if
2440   } else {
2441     jam();
2442     cneighbourl = translateDynamicIdToNodeId(signal, tfnLeftFound);
2443   }//if
2444   if (tfnRightFound == (UintR)-1) {
2445     if (tfnMaxFound == 0) {
2446       jam();
2447       cneighbourh = ZNIL;
2448     } else {
2449       jam();
2450       cneighbourh = translateDynamicIdToNodeId(signal, tfnMinFound);
2451     }//if
2452   } else {
2453     jam();
2454     cneighbourh = translateDynamicIdToNodeId(signal, tfnRightFound);
2455   }//if
2456   if (toldLeftNeighbour != cneighbourl) {
2457     jam();
2458     if (cneighbourl != ZNIL) {
2459       jam();
2460       /**-------------------------------------------------------------------*/
2461       /* WE ARE SUPERVISING A NEW LEFT NEIGHBOUR. WE START WITH ALARM COUNT
2462        * EQUAL TO ZERO.
2463        *---------------------------------------------------------------------*/
2464       fnNodePtr.i = cneighbourl;
2465       ptrCheckGuard(fnNodePtr, MAX_NDB_NODES, nodeRec);
2466       setNodeInfo(fnNodePtr.i).m_heartbeat_cnt= 0;
2467     }//if
2468   }//if
2469 
2470   signal->theData[0] = NDB_LE_FIND_NEIGHBOURS;
2471   signal->theData[1] = getOwnNodeId();
2472   signal->theData[2] = cneighbourl;
2473   signal->theData[3] = cneighbourh;
2474   signal->theData[4] = fnOwnNodePtr.p->ndynamicId;
2475   UintR Tlen = 5;
2476   sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, Tlen, JBB);
2477   g_eventLogger->info("findNeighbours from: %u old (left: %u right: %u) new (%u %u)",
2478                       from,
2479                       toldLeftNeighbour,
2480                       toldRightNeighbour,
2481                       cneighbourl,
2482                       cneighbourh);
2483 }//Qmgr::findNeighbours()
2484 
2485 /*
2486 4.10.7 INIT_DATA        */
2487 /*---------------------------------------------------------------------------*/
2488 /*---------------------------------------------------------------------------*/
initData(Signal * signal)2489 void Qmgr::initData(Signal* signal)
2490 {
2491   NDB_TICKS now = NdbTick_CurrentMillisecond();
2492   interface_check_timer.setDelay(1000);
2493   interface_check_timer.reset(now);
2494 
2495   // catch-all for missing initializations
2496   memset(&arbitRec, 0, sizeof(arbitRec));
2497 
2498   /**
2499    * Timeouts
2500    */
2501   const ndb_mgm_configuration_iterator * p =
2502     m_ctx.m_config.getOwnConfigIterator();
2503   ndbrequire(p != 0);
2504 
2505   Uint32 hbDBDB = 1500;
2506   Uint32 arbitTimeout = 1000;
2507   Uint32 arbitMethod = ARBIT_METHOD_DEFAULT;
2508   Uint32 ccInterval = 0;
2509   c_restartPartialTimeout = 30000;
2510   c_restartPartionedTimeout = 60000;
2511   c_restartFailureTimeout = ~0;
2512   c_restartNoNodegroupTimeout = 15000;
2513   ndb_mgm_get_int_parameter(p, CFG_DB_HEARTBEAT_INTERVAL, &hbDBDB);
2514   ndb_mgm_get_int_parameter(p, CFG_DB_ARBIT_TIMEOUT, &arbitTimeout);
2515   ndb_mgm_get_int_parameter(p, CFG_DB_ARBIT_METHOD, &arbitMethod);
2516   ndb_mgm_get_int_parameter(p, CFG_DB_START_PARTIAL_TIMEOUT,
2517 			    &c_restartPartialTimeout);
2518   ndb_mgm_get_int_parameter(p, CFG_DB_START_PARTITION_TIMEOUT,
2519 			    &c_restartPartionedTimeout);
2520   ndb_mgm_get_int_parameter(p, CFG_DB_START_NO_NODEGROUP_TIMEOUT,
2521 			    &c_restartNoNodegroupTimeout);
2522   ndb_mgm_get_int_parameter(p, CFG_DB_START_FAILURE_TIMEOUT,
2523 			    &c_restartFailureTimeout);
2524   ndb_mgm_get_int_parameter(p, CFG_DB_CONNECT_CHECK_DELAY,
2525                             &ccInterval);
2526 
2527   if(c_restartPartialTimeout == 0)
2528   {
2529     c_restartPartialTimeout = ~0;
2530   }
2531 
2532   if (c_restartPartionedTimeout ==0)
2533   {
2534     c_restartPartionedTimeout = ~0;
2535   }
2536 
2537   if (c_restartFailureTimeout == 0)
2538   {
2539     c_restartFailureTimeout = ~0;
2540   }
2541 
2542   if (c_restartNoNodegroupTimeout == 0)
2543   {
2544     c_restartNoNodegroupTimeout = ~0;
2545   }
2546 
2547   setHbDelay(hbDBDB);
2548   setCCDelay(ccInterval);
2549   setArbitTimeout(arbitTimeout);
2550 
2551   arbitRec.method = (ArbitRec::Method)arbitMethod;
2552   arbitRec.state = ARBIT_NULL;          // start state for all nodes
2553   arbitRec.apiMask[0].clear();          // prepare for ARBIT_CFG
2554 
2555   Uint32 sum = 0;
2556   ArbitSignalData* const sd = (ArbitSignalData*)&signal->theData[0];
2557   for (unsigned rank = 1; rank <= 2; rank++) {
2558     sd->sender = getOwnNodeId();
2559     sd->code = rank;
2560     sd->node = 0;
2561     sd->ticket.clear();
2562     sd->mask.clear();
2563     ndb_mgm_configuration_iterator * iter =
2564       m_ctx.m_config.getClusterConfigIterator();
2565     for (ndb_mgm_first(iter); ndb_mgm_valid(iter); ndb_mgm_next(iter)) {
2566       Uint32 tmp = 0;
2567       if (ndb_mgm_get_int_parameter(iter, CFG_NODE_ARBIT_RANK, &tmp) == 0 &&
2568 	  tmp == rank){
2569 	Uint32 nodeId = 0;
2570 	ndbrequire(!ndb_mgm_get_int_parameter(iter, CFG_NODE_ID, &nodeId));
2571 	sd->mask.set(nodeId);
2572       }
2573     }
2574     sum += sd->mask.count();
2575     execARBIT_CFG(signal);
2576   }
2577 
2578   if (arbitRec.method == ArbitRec::METHOD_DEFAULT &&
2579       sum == 0)
2580   {
2581     jam();
2582     infoEvent("Arbitration disabled, all API nodes have rank 0");
2583     arbitRec.method = ArbitRec::DISABLED;
2584   }
2585 
2586   setNodeInfo(getOwnNodeId()).m_mysql_version = NDB_MYSQL_VERSION_D;
2587 
2588   ndb_mgm_configuration_iterator * iter =
2589     m_ctx.m_config.getClusterConfigIterator();
2590   for (ndb_mgm_first(iter); ndb_mgm_valid(iter); ndb_mgm_next(iter))
2591   {
2592     jam();
2593     Uint32 nodeId = 0;
2594     if (ndb_mgm_get_int_parameter(iter, CFG_NODE_ID, &nodeId) == 0)
2595     {
2596       jam();
2597       if (nodeId < MAX_NDB_NODES && getNodeInfo(nodeId).m_type == NodeInfo::DB)
2598       {
2599         Uint32 hbOrder = 0;
2600         ndb_mgm_get_int_parameter(iter, CFG_DB_HB_ORDER, &hbOrder);
2601 
2602         NodeRecPtr nodePtr;
2603         nodePtr.i = nodeId;
2604         ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
2605         nodePtr.p->hbOrder = hbOrder;
2606       }
2607     }
2608   }
2609   int hb_order_error = check_hb_order_config();
2610   if (hb_order_error == -1)
2611   {
2612     char msg[] = "Illegal HeartbeatOrder config, "
2613                  "all nodes must have non-zero config value";
2614     progError(__LINE__, NDBD_EXIT_INVALID_CONFIG, msg);
2615     return;
2616   }
2617   if (hb_order_error == -2)
2618   {
2619     char msg[] = "Illegal HeartbeatOrder config, "
2620                  "the nodes must have distinct config values";
2621     progError(__LINE__, NDBD_EXIT_INVALID_CONFIG, msg);
2622     return;
2623   }
2624   ndbrequire(hb_order_error == 0);
2625 }//Qmgr::initData()
2626 
2627 
2628 /**---------------------------------------------------------------------------
2629  * HERE WE RECEIVE THE JOB TABLE SIGNAL EVERY 10 MILLISECONDS.
2630  * WE WILL USE THIS TO CHECK IF IT IS TIME TO CHECK THE NEIGHBOUR NODE.
2631  * WE WILL ALSO SEND A SIGNAL TO BLOCKS THAT NEED A TIME SIGNAL AND
2632  * DO NOT WANT TO USE JOB TABLE SIGNALS.
2633  *---------------------------------------------------------------------------*/
timerHandlingLab(Signal * signal)2634 void Qmgr::timerHandlingLab(Signal* signal)
2635 {
2636   NDB_TICKS TcurrentTime = NdbTick_CurrentMillisecond();
2637   NodeRecPtr myNodePtr;
2638   myNodePtr.i = getOwnNodeId();
2639   ptrCheckGuard(myNodePtr, MAX_NDB_NODES, nodeRec);
2640 
2641   Uint32 sentHi = signal->theData[1];
2642   Uint32 sentLo = signal->theData[2];
2643   Uint64 sent = (Uint64(sentHi) << 32) + sentLo;
2644 
2645   if (TcurrentTime >= sent + 1000 || (TcurrentTime < sent))
2646   {
2647     jam();
2648     g_eventLogger->warning("timerHandlingLab now: %llu sent: %llu diff: %d",
2649                            TcurrentTime, sent, int(TcurrentTime - sent));
2650   }
2651   else if (TcurrentTime >= sent + 150)
2652   {
2653     g_eventLogger->info("timerHandlingLab now: %llu sent: %llu diff: %d",
2654                         TcurrentTime, sent, int(TcurrentTime - sent));
2655   }
2656 
2657   if (myNodePtr.p->phase == ZRUNNING) {
2658     jam();
2659     /**---------------------------------------------------------------------
2660      * WE ARE ONLY PART OF HEARTBEAT CLUSTER IF WE ARE UP AND RUNNING.
2661      *---------------------------------------------------------------------*/
2662     if (hb_send_timer.check(TcurrentTime)) {
2663       jam();
2664       sendHeartbeat(signal);
2665       hb_send_timer.reset(TcurrentTime);
2666     }
2667     if (likely(! m_connectivity_check.m_active))
2668     {
2669       if (hb_check_timer.check(TcurrentTime)) {
2670         jam();
2671         checkHeartbeat(signal);
2672         hb_check_timer.reset(TcurrentTime);
2673       }
2674     }
2675     else
2676     {
2677       /* Connectivity check */
2678       if (m_connectivity_check.m_timer.check(TcurrentTime)) {
2679         jam();
2680         checkConnectivityTimeSignal(signal);
2681         m_connectivity_check.m_timer.reset(TcurrentTime);
2682       }
2683     }
2684   }
2685 
2686   if (interface_check_timer.check(TcurrentTime)) {
2687     jam();
2688     interface_check_timer.reset(TcurrentTime);
2689     checkStartInterface(signal, TcurrentTime);
2690   }
2691 
2692   if (hb_api_timer.check(TcurrentTime))
2693   {
2694     jam();
2695     hb_api_timer.reset(TcurrentTime);
2696     apiHbHandlingLab(signal, TcurrentTime);
2697   }
2698 
2699   if (cactivateApiCheck != 0) {
2700     jam();
2701     if (clatestTransactionCheck == 0) {
2702       //-------------------------------------------------------------
2703       // Initialise the Transaction check timer.
2704       //-------------------------------------------------------------
2705       clatestTransactionCheck = TcurrentTime;
2706     }//if
2707     int counter = 0;
2708     while (TcurrentTime > ((NDB_TICKS)10 + clatestTransactionCheck)) {
2709       jam();
2710       clatestTransactionCheck += (NDB_TICKS)10;
2711       sendSignal(DBTC_REF, GSN_TIME_SIGNAL, signal, 1, JBB);
2712       sendSignal(DBLQH_REF, GSN_TIME_SIGNAL, signal, 1, JBB);
2713       counter++;
2714       if (counter > 1) {
2715 	jam();
2716 	break;
2717       } else {
2718 	;
2719       }//if
2720     }//while
2721   }//if
2722 
2723   //--------------------------------------------------
2724   // Resend this signal with 10 milliseconds delay.
2725   //--------------------------------------------------
2726   signal->theData[0] = ZTIMER_HANDLING;
2727   signal->theData[1] = Uint32(TcurrentTime >> 32);
2728   signal->theData[2] = Uint32(TcurrentTime);
2729   sendSignalWithDelay(QMGR_REF, GSN_CONTINUEB, signal, 10, 3);
2730   return;
2731 }//Qmgr::timerHandlingLab()
2732 
2733 /*---------------------------------------------------------------------------*/
2734 /*       THIS MODULE HANDLES THE SENDING AND RECEIVING OF HEARTBEATS.        */
2735 /*---------------------------------------------------------------------------*/
sendHeartbeat(Signal * signal)2736 void Qmgr::sendHeartbeat(Signal* signal)
2737 {
2738   NodeRecPtr localNodePtr;
2739   localNodePtr.i = cneighbourh;
2740   if (localNodePtr.i == ZNIL) {
2741     jam();
2742     /**---------------------------------------------------------------------
2743      * THERE ARE NO NEIGHBOURS. THIS IS POSSIBLE IF WE ARE THE ONLY NODE IN
2744      * THE CLUSTER.IN THIS CASE WE DO NOT NEED TO SEND ANY HEARTBEAT SIGNALS.
2745      *-----------------------------------------------------------------------*/
2746     return;
2747   }//if
2748   ptrCheckGuard(localNodePtr, MAX_NDB_NODES, nodeRec);
2749   signal->theData[0] = getOwnNodeId();
2750 
2751   sendSignal(localNodePtr.p->blockRef, GSN_CM_HEARTBEAT, signal, 1, JBA);
2752 #ifdef VM_TRACE
2753   signal->theData[0] = NDB_LE_SentHeartbeat;
2754   signal->theData[1] = localNodePtr.i;
2755   sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB);
2756 #endif
2757 }//Qmgr::sendHeartbeat()
2758 
checkHeartbeat(Signal * signal)2759 void Qmgr::checkHeartbeat(Signal* signal)
2760 {
2761   NodeRecPtr nodePtr;
2762 
2763   nodePtr.i = cneighbourl;
2764   if (nodePtr.i == ZNIL) {
2765     jam();
2766     /**---------------------------------------------------------------------
2767      * THERE ARE NO NEIGHBOURS. THIS IS POSSIBLE IF WE ARE THE ONLY NODE IN
2768      * THE CLUSTER. IN THIS CASE WE DO NOT NEED TO CHECK ANY HEARTBEATS.
2769      *-----------------------------------------------------------------------*/
2770     return;
2771   }//if
2772   ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
2773 
2774   setNodeInfo(nodePtr.i).m_heartbeat_cnt++;
2775   ndbrequire(nodePtr.p->phase == ZRUNNING);
2776   ndbrequire(getNodeInfo(nodePtr.i).m_type == NodeInfo::DB);
2777 
2778   if(getNodeInfo(nodePtr.i).m_heartbeat_cnt > 2){
2779     signal->theData[0] = NDB_LE_MissedHeartbeat;
2780     signal->theData[1] = nodePtr.i;
2781     signal->theData[2] = getNodeInfo(nodePtr.i).m_heartbeat_cnt - 1;
2782     sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 3, JBB);
2783   }
2784 
2785   if (getNodeInfo(nodePtr.i).m_heartbeat_cnt > 4) {
2786     jam();
2787     if (m_connectivity_check.getEnabled())
2788     {
2789       jam();
2790       /* Start connectivity check, indicating the cause */
2791       startConnectivityCheck(signal, FailRep::ZHEARTBEAT_FAILURE, nodePtr.i);
2792       return;
2793     }
2794     else
2795     {
2796       /**----------------------------------------------------------------------
2797        * OUR LEFT NEIGHBOUR HAVE KEPT QUIET FOR THREE CONSECUTIVE HEARTBEAT
2798        * PERIODS. THUS WE DECLARE HIM DOWN.
2799        *----------------------------------------------------------------------*/
2800       signal->theData[0] = NDB_LE_DeadDueToHeartbeat;
2801       signal->theData[1] = nodePtr.i;
2802       sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB);
2803 
2804       failReportLab(signal, nodePtr.i, FailRep::ZHEARTBEAT_FAILURE, getOwnNodeId());
2805       return;
2806     }
2807   }//if
2808 }//Qmgr::checkHeartbeat()
2809 
apiHbHandlingLab(Signal * signal,Uint64 now)2810 void Qmgr::apiHbHandlingLab(Signal* signal, Uint64 now)
2811 {
2812   NodeRecPtr TnodePtr;
2813 
2814   for (TnodePtr.i = 1; TnodePtr.i < MAX_NODES; TnodePtr.i++) {
2815     const Uint32 nodeId = TnodePtr.i;
2816     ptrAss(TnodePtr, nodeRec);
2817 
2818     const NodeInfo::NodeType type = getNodeInfo(nodeId).getType();
2819     if(type == NodeInfo::DB)
2820       continue;
2821 
2822     if(type == NodeInfo::INVALID)
2823       continue;
2824 
2825     if (c_connectedNodes.get(nodeId))
2826     {
2827       jam();
2828       setNodeInfo(TnodePtr.i).m_heartbeat_cnt++;
2829 
2830       if(getNodeInfo(TnodePtr.i).m_heartbeat_cnt > 2)
2831       {
2832 	signal->theData[0] = NDB_LE_MissedHeartbeat;
2833 	signal->theData[1] = nodeId;
2834 	signal->theData[2] = getNodeInfo(TnodePtr.i).m_heartbeat_cnt - 1;
2835 	sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 3, JBB);
2836       }
2837 
2838       if (getNodeInfo(TnodePtr.i).m_heartbeat_cnt > 4)
2839       {
2840         jam();
2841 	/*------------------------------------------------------------------*/
2842 	/* THE API NODE HAS NOT SENT ANY HEARTBEAT FOR THREE SECONDS.
2843 	 * WE WILL DISCONNECT FROM IT NOW.
2844 	 *------------------------------------------------------------------*/
2845 	/*------------------------------------------------------------------*/
2846 	/* We call node_failed to release all connections for this api node */
2847 	/*------------------------------------------------------------------*/
2848 	signal->theData[0] = NDB_LE_DeadDueToHeartbeat;
2849 	signal->theData[1] = nodeId;
2850 	sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB);
2851 
2852         api_failed(signal, nodeId);
2853       }//if
2854     }//if
2855     else if (TnodePtr.p->phase == ZAPI_INACTIVE &&
2856              TnodePtr.p->m_secret != 0 && now > TnodePtr.p->m_alloc_timeout)
2857     {
2858       jam();
2859       TnodePtr.p->m_secret = 0;
2860       warningEvent("Releasing node id allocation for node %u",
2861                    TnodePtr.i);
2862     }
2863   }//for
2864   return;
2865 }//Qmgr::apiHbHandlingLab()
2866 
checkStartInterface(Signal * signal,Uint64 now)2867 void Qmgr::checkStartInterface(Signal* signal, Uint64 now)
2868 {
2869   NodeRecPtr nodePtr;
2870   /*------------------------------------------------------------------------*/
2871   // This method is called once per second. After a disconnect we wait at
2872   // least three seconds before allowing new connects. We will also ensure
2873   // that handling of the failure is completed before we allow new connections.
2874   /*------------------------------------------------------------------------*/
2875   for (nodePtr.i = 1; nodePtr.i < MAX_NODES; nodePtr.i++) {
2876     ptrAss(nodePtr, nodeRec);
2877     Uint32 type = getNodeInfo(nodePtr.i).m_type;
2878     if (nodePtr.p->phase == ZFAIL_CLOSING) {
2879       jam();
2880       setNodeInfo(nodePtr.i).m_heartbeat_cnt++;
2881       if (c_connectedNodes.get(nodePtr.i)){
2882         jam();
2883 	/*-------------------------------------------------------------------*/
2884 	// We need to ensure that the connection is not restored until it has
2885 	// been disconnected for at least three seconds.
2886 	/*-------------------------------------------------------------------*/
2887         setNodeInfo(nodePtr.i).m_heartbeat_cnt= 0;
2888       }//if
2889       if ((getNodeInfo(nodePtr.i).m_heartbeat_cnt > 3)
2890 	  && (nodePtr.p->failState == NORMAL)) {
2891 	/**------------------------------------------------------------------
2892 	 * WE HAVE DISCONNECTED THREE SECONDS AGO. WE ARE NOW READY TO
2893 	 * CONNECT AGAIN AND ACCEPT NEW REGISTRATIONS FROM THIS NODE.
2894 	 * WE WILL NOT ALLOW CONNECTIONS OF API NODES UNTIL API FAIL HANDLING
2895 	 * IS COMPLETE.
2896 	 *-------------------------------------------------------------------*/
2897         nodePtr.p->failState = NORMAL;
2898         nodePtr.p->m_secret = 0;
2899         switch(type){
2900         case NodeInfo::DB:
2901           jam();
2902           nodePtr.p->phase = ZINIT;
2903           break;
2904         case NodeInfo::MGM:
2905           jam();
2906           nodePtr.p->phase = ZAPI_INACTIVE;
2907           break;
2908         case NodeInfo::API:
2909           jam();
2910           if (c_allow_api_connect)
2911           {
2912             jam();
2913             nodePtr.p->phase = ZAPI_INACTIVE;
2914             break;
2915           }
2916           else
2917           {
2918             /**
2919              * Dont allow API node to connect before c_allow_api_connect
2920              */
2921             jam();
2922             setNodeInfo(nodePtr.i).m_heartbeat_cnt = 3;
2923             continue;
2924           }
2925         }
2926 
2927         setNodeInfo(nodePtr.i).m_heartbeat_cnt= 0;
2928         signal->theData[0] = 0;
2929         signal->theData[1] = nodePtr.i;
2930         sendSignal(CMVMI_REF, GSN_OPEN_COMREQ, signal, 2, JBA);
2931       }
2932       else
2933       {
2934         jam();
2935         if(((getNodeInfo(nodePtr.i).m_heartbeat_cnt + 1) % 60) == 0)
2936         {
2937           jam();
2938 	  char buf[256];
2939           if (getNodeInfo(nodePtr.i).m_type == NodeInfo::DB)
2940           {
2941             jam();
2942             BaseString::snprintf(buf, sizeof(buf),
2943                                  "Failure handling of node %d has not completed"
2944                                  " in %d min - state = %d",
2945                                  nodePtr.i,
2946                                  (getNodeInfo(nodePtr.i).m_heartbeat_cnt+1)/60,
2947                                  nodePtr.p->failState);
2948             warningEvent("%s", buf);
2949             if (((getNodeInfo(nodePtr.i).m_heartbeat_cnt + 1) % 300) == 0)
2950             {
2951               jam();
2952               /**
2953                * Also dump DIH nf-state
2954                */
2955               signal->theData[0] = 7019;
2956               signal->theData[1] = nodePtr.i;
2957               sendSignal(DBDIH_REF, GSN_DUMP_STATE_ORD, signal, 2, JBB);
2958             }
2959           }
2960           else
2961           {
2962             jam();
2963             BaseString::snprintf(buf, sizeof(buf),
2964                                  "Failure handling of api %u has not completed"
2965                                  " in %d min - state = %d",
2966                                  nodePtr.i,
2967                                  (getNodeInfo(nodePtr.i).m_heartbeat_cnt+1)/60,
2968                                  nodePtr.p->failState);
2969             warningEvent("%s", buf);
2970             if (nodePtr.p->failState == WAITING_FOR_API_FAILCONF)
2971             {
2972               jam();
2973               compile_time_assert(NDB_ARRAY_SIZE(nodePtr.p->m_failconf_blocks) == 5);
2974               BaseString::snprintf(buf, sizeof(buf),
2975                                    "  Waiting for blocks: %u %u %u %u %u",
2976                                    nodePtr.p->m_failconf_blocks[0],
2977                                    nodePtr.p->m_failconf_blocks[1],
2978                                    nodePtr.p->m_failconf_blocks[2],
2979                                    nodePtr.p->m_failconf_blocks[3],
2980                                    nodePtr.p->m_failconf_blocks[4]);
2981               warningEvent("%s", buf);
2982             }
2983           }
2984 	}
2985       }
2986     }
2987     else if (type == NodeInfo::DB && nodePtr.p->phase == ZINIT &&
2988              nodePtr.p->m_secret != 0 && now > nodePtr.p->m_alloc_timeout)
2989     {
2990       jam();
2991       nodePtr.p->m_secret = 0;
2992       warningEvent("Releasing node id allocation for node %u",
2993                    nodePtr.i);
2994     }
2995   }//for
2996   return;
2997 }//Qmgr::checkStartInterface()
2998 
2999 /**-------------------------------------------------------------------------
3000  * This method is called when a DISCONNECT_REP signal arrived which means that
3001  * the API node is gone and we want to release resources in TC/DICT blocks.
3002  *---------------------------------------------------------------------------*/
sendApiFailReq(Signal * signal,Uint16 failedNodeNo,bool sumaOnly)3003 void Qmgr::sendApiFailReq(Signal* signal, Uint16 failedNodeNo, bool sumaOnly)
3004 {
3005   jamEntry();
3006   signal->theData[0] = failedNodeNo;
3007   signal->theData[1] = QMGR_REF;
3008 
3009   /* We route the ApiFailReq signals via CMVMI
3010    * This is done to ensure that they are received after
3011    * any pending signals from the failed Api node when
3012    * running ndbmtd, as these signals would be enqueued from
3013    * the thread running CMVMI
3014    */
3015   Uint32 routedSignalSectionI = RNIL;
3016   ndbrequire(appendToSection(routedSignalSectionI,
3017                              &signal->theData[0],
3018                              2));
3019   SectionHandle handle(this, routedSignalSectionI);
3020 
3021   /* RouteOrd data */
3022   RouteOrd* routeOrd = (RouteOrd*) &signal->theData[0];
3023   routeOrd->srcRef = reference();
3024   routeOrd->gsn = GSN_API_FAILREQ;
3025 
3026   NodeRecPtr failedNodePtr;
3027   failedNodePtr.i = failedNodeNo;
3028   ptrCheckGuard(failedNodePtr, MAX_NODES, nodeRec);
3029   failedNodePtr.p->failState = WAITING_FOR_API_FAILCONF;
3030 
3031 
3032   /* Send ROUTE_ORD signals to CMVMI via JBA
3033    * CMVMI will then immediately send the API_FAILREQ
3034    * signals to the destination block(s) using JBB
3035    * These API_FAILREQ signals will be sent *after*
3036    * any JBB signals enqueued from the failed API
3037    * by the CMVMI thread.
3038    */
3039   if (!sumaOnly)
3040   {
3041     jam();
3042     add_failconf_block(failedNodePtr, DBTC);
3043     routeOrd->dstRef = DBTC_REF;
3044     sendSignalNoRelease(CMVMI_REF, GSN_ROUTE_ORD, signal,
3045                         RouteOrd::SignalLength,
3046                         JBA, &handle);
3047 
3048     add_failconf_block(failedNodePtr, DBDICT);
3049     routeOrd->dstRef = DBDICT_REF;
3050     sendSignalNoRelease(CMVMI_REF, GSN_ROUTE_ORD, signal,
3051                         RouteOrd::SignalLength,
3052                         JBA, &handle);
3053 
3054     add_failconf_block(failedNodePtr, DBSPJ);
3055     routeOrd->dstRef = DBSPJ_REF;
3056     sendSignalNoRelease(CMVMI_REF, GSN_ROUTE_ORD, signal,
3057                         RouteOrd::SignalLength,
3058                         JBA, &handle);
3059   }
3060 
3061   /* Suma always notified */
3062   add_failconf_block(failedNodePtr, SUMA);
3063   routeOrd->dstRef = SUMA_REF;
3064   sendSignal(CMVMI_REF, GSN_ROUTE_ORD, signal,
3065              RouteOrd::SignalLength,
3066              JBA, &handle);
3067 }//Qmgr::sendApiFailReq()
3068 
execAPI_FAILREQ(Signal * signal)3069 void Qmgr::execAPI_FAILREQ(Signal* signal)
3070 {
3071   jamEntry();
3072   NodeRecPtr failedNodePtr;
3073   failedNodePtr.i = signal->theData[0];
3074   // signal->theData[1] == QMGR_REF
3075   ptrCheckGuard(failedNodePtr, MAX_NODES, nodeRec);
3076 
3077   ndbrequire(getNodeInfo(failedNodePtr.i).getType() != NodeInfo::DB);
3078 
3079   api_failed(signal, signal->theData[0]);
3080 }
3081 
execAPI_FAILCONF(Signal * signal)3082 void Qmgr::execAPI_FAILCONF(Signal* signal)
3083 {
3084   NodeRecPtr failedNodePtr;
3085 
3086   jamEntry();
3087   failedNodePtr.i = signal->theData[0];
3088   ptrCheckGuard(failedNodePtr, MAX_NODES, nodeRec);
3089 
3090   Uint32 block = refToMain(signal->theData[1]);
3091   if (failedNodePtr.p->failState != WAITING_FOR_API_FAILCONF ||
3092       !remove_failconf_block(failedNodePtr, block))
3093   {
3094     jam();
3095     ndbout << "execAPI_FAILCONF from " << block
3096            << " failedNodePtr.p->failState = "
3097 	   << (Uint32)(failedNodePtr.p->failState)
3098            << " blocks: ";
3099     for (Uint32 i = 0;i<NDB_ARRAY_SIZE(failedNodePtr.p->m_failconf_blocks);i++)
3100     {
3101       printf("%u ", failedNodePtr.p->m_failconf_blocks[i]);
3102     }
3103     ndbout << endl;
3104     systemErrorLab(signal, __LINE__);
3105   }//if
3106 
3107   if (is_empty_failconf_block(failedNodePtr))
3108   {
3109     jam();
3110     failedNodePtr.p->failState = NORMAL;
3111 
3112     /**
3113      * When we set this state, connection will later be opened
3114      *   in checkStartInterface
3115      */
3116   }
3117   return;
3118 }//Qmgr::execAPI_FAILCONF()
3119 
3120 void
add_failconf_block(NodeRecPtr nodePtr,Uint32 block)3121 Qmgr::add_failconf_block(NodeRecPtr nodePtr, Uint32 block)
3122 {
3123   // Check that it does not already exists!!
3124   Uint32 pos = 0;
3125   for (; pos < NDB_ARRAY_SIZE(nodePtr.p->m_failconf_blocks); pos++)
3126   {
3127     jam();
3128     if (nodePtr.p->m_failconf_blocks[pos] == 0)
3129     {
3130       jam();
3131       break;
3132     }
3133     else if (nodePtr.p->m_failconf_blocks[pos] == block)
3134     {
3135       jam();
3136       break;
3137     }
3138   }
3139 
3140   ndbrequire(pos != NDB_ARRAY_SIZE(nodePtr.p->m_failconf_blocks));
3141   ndbassert(nodePtr.p->m_failconf_blocks[pos] != block);
3142   if (nodePtr.p->m_failconf_blocks[pos] == block)
3143   {
3144     jam();
3145     /**
3146      * Already in list!!
3147      */
3148 #ifdef ERROR_INSERT
3149     ndbrequire(false);
3150 #endif
3151     return;
3152   }
3153   ndbrequire(nodePtr.p->m_failconf_blocks[pos] == 0);
3154   nodePtr.p->m_failconf_blocks[pos] = block;
3155 }
3156 
3157 bool
remove_failconf_block(NodeRecPtr nodePtr,Uint32 block)3158 Qmgr::remove_failconf_block(NodeRecPtr nodePtr, Uint32 block)
3159 {
3160   // Check that it does exists!!
3161   Uint32 pos = 0;
3162   for (; pos < NDB_ARRAY_SIZE(nodePtr.p->m_failconf_blocks); pos++)
3163   {
3164     jam();
3165     if (nodePtr.p->m_failconf_blocks[pos] == 0)
3166     {
3167       jam();
3168       break;
3169     }
3170     else if (nodePtr.p->m_failconf_blocks[pos] == block)
3171     {
3172       jam();
3173       break;
3174     }
3175   }
3176 
3177   if (pos == NDB_ARRAY_SIZE(nodePtr.p->m_failconf_blocks) ||
3178       nodePtr.p->m_failconf_blocks[pos] != block)
3179   {
3180     jam();
3181     /**
3182      * Not found!!
3183      */
3184     return false;
3185   }
3186 
3187   nodePtr.p->m_failconf_blocks[pos] = 0;
3188   for (pos++; pos < NDB_ARRAY_SIZE(nodePtr.p->m_failconf_blocks); pos++)
3189   {
3190     jam();
3191     nodePtr.p->m_failconf_blocks[pos - 1] = nodePtr.p->m_failconf_blocks[pos];
3192   }
3193 
3194   return true;
3195 }
3196 
3197 bool
is_empty_failconf_block(NodeRecPtr nodePtr) const3198 Qmgr::is_empty_failconf_block(NodeRecPtr nodePtr) const
3199 {
3200   return nodePtr.p->m_failconf_blocks[0] == 0;
3201 }
3202 
execNDB_FAILCONF(Signal * signal)3203 void Qmgr::execNDB_FAILCONF(Signal* signal)
3204 {
3205   NodeRecPtr failedNodePtr;
3206   NodeRecPtr nodePtr;
3207 
3208   jamEntry();
3209   failedNodePtr.i = signal->theData[0];
3210 
3211   if (ERROR_INSERTED(930))
3212   {
3213     CLEAR_ERROR_INSERT_VALUE;
3214     infoEvent("Discarding NDB_FAILCONF for %u", failedNodePtr.i);
3215     return;
3216   }
3217 
3218   ptrCheckGuard(failedNodePtr, MAX_NDB_NODES, nodeRec);
3219   if (failedNodePtr.p->failState == WAITING_FOR_NDB_FAILCONF){
3220     failedNodePtr.p->failState = NORMAL;
3221   } else {
3222     jam();
3223 
3224     char buf[100];
3225     BaseString::snprintf(buf, 100,
3226 			 "Received NDB_FAILCONF for node %u with state: %d %d",
3227 			 failedNodePtr.i,
3228 			 failedNodePtr.p->phase,
3229 			 failedNodePtr.p->failState);
3230     progError(__LINE__, 0, buf);
3231     systemErrorLab(signal, __LINE__);
3232   }//if
3233 
3234   if (cpresident == getOwnNodeId())
3235   {
3236     jam();
3237 
3238     CRASH_INSERTION(936);
3239   }
3240 
3241   /**
3242    * Prepare a NFCompleteRep and send to all connected API's
3243    * They can then abort all transaction waiting for response from
3244    * the failed node
3245    *
3246    * NOTE: This is sent from all nodes, as otherwise we would need
3247    *       take-over if cpresident dies befor sending this
3248    */
3249   NFCompleteRep * const nfComp = (NFCompleteRep *)&signal->theData[0];
3250   nfComp->blockNo = QMGR_REF;
3251   nfComp->nodeId = getOwnNodeId();
3252   nfComp->failedNodeId = failedNodePtr.i;
3253 
3254   for (nodePtr.i = 1; nodePtr.i < MAX_NODES; nodePtr.i++)
3255   {
3256     jam();
3257     ptrAss(nodePtr, nodeRec);
3258     if (nodePtr.p->phase == ZAPI_ACTIVE){
3259       jam();
3260       sendSignal(nodePtr.p->blockRef, GSN_NF_COMPLETEREP, signal,
3261                  NFCompleteRep::SignalLength, JBB);
3262     }//if
3263   }//for
3264   return;
3265 }//Qmgr::execNDB_FAILCONF()
3266 
3267 void
execNF_COMPLETEREP(Signal * signal)3268 Qmgr::execNF_COMPLETEREP(Signal* signal)
3269 {
3270   jamEntry();
3271   NFCompleteRep rep = *(NFCompleteRep*)signal->getDataPtr();
3272   if (rep.blockNo != DBTC)
3273   {
3274     jam();
3275     ndbassert(false);
3276     return;
3277   }
3278 
3279   /**
3280    * This is a disgrace...but execNF_COMPLETEREP in ndbapi is a mess
3281    *   actually equally messy as it is in ndbd...
3282    *   this is therefore a simple way of having ndbapi to get
3283    *   earlier information that transactions can be aborted
3284    */
3285   signal->theData[0] = rep.failedNodeId;
3286   NodeRecPtr nodePtr;
3287   for (nodePtr.i = 1; nodePtr.i < MAX_NODES; nodePtr.i++)
3288   {
3289     jam();
3290     ptrAss(nodePtr, nodeRec);
3291     if (nodePtr.p->phase == ZAPI_ACTIVE &&
3292         ndb_takeovertc(getNodeInfo(nodePtr.i).m_version))
3293     {
3294       jam();
3295       sendSignal(nodePtr.p->blockRef, GSN_TAKE_OVERTCCONF, signal,
3296                  NFCompleteRep::SignalLength, JBB);
3297     }//if
3298   }//for
3299   return;
3300 }
3301 
3302 /*******************************/
3303 /* DISCONNECT_REP             */
3304 /*******************************/
3305 const char *lookupConnectionError(Uint32 err);
3306 
execDISCONNECT_REP(Signal * signal)3307 void Qmgr::execDISCONNECT_REP(Signal* signal)
3308 {
3309   jamEntry();
3310   const DisconnectRep * const rep = (DisconnectRep *)&signal->theData[0];
3311   const Uint32 nodeId = rep->nodeId;
3312   const Uint32 err = rep->err;
3313   const NodeInfo nodeInfo = getNodeInfo(nodeId);
3314   c_connectedNodes.clear(nodeId);
3315 
3316   if (nodeInfo.getType() == NodeInfo::DB)
3317   {
3318     c_readnodes_nodes.clear(nodeId);
3319   }
3320 
3321   NodeRecPtr nodePtr;
3322   nodePtr.i = getOwnNodeId();
3323   ptrCheckGuard(nodePtr, MAX_NODES, nodeRec);
3324 
3325   char buf[100];
3326   if (nodeInfo.getType() == NodeInfo::DB &&
3327       getNodeState().startLevel < NodeState::SL_STARTED)
3328   {
3329     jam();
3330     CRASH_INSERTION(932);
3331     CRASH_INSERTION(938);
3332     BaseString::snprintf(buf, 100, "Node %u disconnected", nodeId);
3333     progError(__LINE__, NDBD_EXIT_SR_OTHERNODEFAILED, buf);
3334     ndbrequire(false);
3335   }
3336 
3337   if (getNodeInfo(nodeId).getType() != NodeInfo::DB)
3338   {
3339     jam();
3340     api_failed(signal, nodeId);
3341     return;
3342   }
3343 
3344   switch(nodePtr.p->phase){
3345   case ZRUNNING:
3346     jam();
3347     break;
3348   case ZINIT:
3349     ndbrequire(false);
3350   case ZSTARTING:
3351     progError(__LINE__, NDBD_EXIT_CONNECTION_SETUP_FAILED,
3352 	      lookupConnectionError(err));
3353     ndbrequire(false);
3354   case ZPREPARE_FAIL:
3355     ndbrequire(false);
3356   case ZFAIL_CLOSING:
3357     ndbrequire(false);
3358   case ZAPI_ACTIVE:
3359     ndbrequire(false);
3360   case ZAPI_INACTIVE:
3361   {
3362     BaseString::snprintf(buf, 100, "Node %u disconnected", nodeId);
3363     progError(__LINE__, NDBD_EXIT_SR_OTHERNODEFAILED, buf);
3364     ndbrequire(false);
3365   }
3366   }
3367   node_failed(signal, nodeId);
3368 }//DISCONNECT_REP
3369 
node_failed(Signal * signal,Uint16 aFailedNode)3370 void Qmgr::node_failed(Signal* signal, Uint16 aFailedNode)
3371 {
3372   NodeRecPtr failedNodePtr;
3373   /**------------------------------------------------------------------------
3374    *   A COMMUNICATION LINK HAS BEEN DISCONNECTED. WE MUST TAKE SOME ACTION
3375    *   DUE TO THIS.
3376    *-----------------------------------------------------------------------*/
3377   failedNodePtr.i = aFailedNode;
3378   ptrCheckGuard(failedNodePtr, MAX_NODES, nodeRec);
3379   failedNodePtr.p->m_secret = 0; // Not yet Uint64(rand()) << 32 + rand();
3380 
3381   ndbrequire(getNodeInfo(failedNodePtr.i).getType() == NodeInfo::DB);
3382 
3383   /**---------------------------------------------------------------------
3384    *   THE OTHER NODE IS AN NDB NODE, WE HANDLE IT AS IF A HEARTBEAT
3385    *   FAILURE WAS DISCOVERED.
3386    *---------------------------------------------------------------------*/
3387   switch(failedNodePtr.p->phase){
3388   case ZRUNNING:
3389     jam();
3390     failReportLab(signal, aFailedNode, FailRep::ZLINK_FAILURE, getOwnNodeId());
3391     return;
3392   case ZFAIL_CLOSING:
3393     jam();
3394     return;
3395   case ZSTARTING:
3396     /**
3397      * bug#42422
3398      *   Force "real" failure handling
3399      */
3400     failedNodePtr.p->phase = ZRUNNING;
3401     failReportLab(signal, aFailedNode, FailRep::ZLINK_FAILURE, getOwnNodeId());
3402     return;
3403     // Fall-through
3404   default:
3405     jam();
3406     /*---------------------------------------------------------------------*/
3407     // The other node is still not in the cluster but disconnected.
3408     // We must restart communication in three seconds.
3409     /*---------------------------------------------------------------------*/
3410     failedNodePtr.p->failState = NORMAL;
3411     failedNodePtr.p->phase = ZFAIL_CLOSING;
3412     setNodeInfo(failedNodePtr.i).m_heartbeat_cnt= 0;
3413 
3414     CloseComReqConf * const closeCom =
3415       (CloseComReqConf *)&signal->theData[0];
3416 
3417     closeCom->xxxBlockRef = reference();
3418     closeCom->requestType = CloseComReqConf::RT_NO_REPLY;
3419     closeCom->failNo      = 0;
3420     closeCom->noOfNodes   = 1;
3421     NodeBitmask::clear(closeCom->theNodes);
3422     NodeBitmask::set(closeCom->theNodes, failedNodePtr.i);
3423     sendSignal(CMVMI_REF, GSN_CLOSE_COMREQ, signal,
3424                CloseComReqConf::SignalLength, JBA);
3425   }//if
3426   return;
3427 }
3428 
3429 void
execUPGRADE_PROTOCOL_ORD(Signal * signal)3430 Qmgr::execUPGRADE_PROTOCOL_ORD(Signal* signal)
3431 {
3432   const UpgradeProtocolOrd* ord = (UpgradeProtocolOrd*)signal->getDataPtr();
3433   switch(ord->type){
3434   case UpgradeProtocolOrd::UPO_ENABLE_MICRO_GCP:
3435     jam();
3436     m_micro_gcp_enabled = true;
3437     return;
3438   }
3439 }
3440 
3441 void
api_failed(Signal * signal,Uint32 nodeId)3442 Qmgr::api_failed(Signal* signal, Uint32 nodeId)
3443 {
3444   NodeRecPtr failedNodePtr;
3445   /**------------------------------------------------------------------------
3446    *   A COMMUNICATION LINK HAS BEEN DISCONNECTED. WE MUST TAKE SOME ACTION
3447    *   DUE TO THIS.
3448    *-----------------------------------------------------------------------*/
3449   failedNodePtr.i = nodeId;
3450   ptrCheckGuard(failedNodePtr, MAX_NODES, nodeRec);
3451   failedNodePtr.p->m_secret = 0; // Not yet Uint64(rand()) << 32 + rand();
3452 
3453   if (failedNodePtr.p->phase == ZFAIL_CLOSING)
3454   {
3455     /**
3456      * Failure handling already in progress
3457      */
3458     jam();
3459     return;
3460   }
3461 
3462   ndbrequire(failedNodePtr.p->failState == NORMAL);
3463 
3464   /* Send API_FAILREQ to peer QMGR blocks to allow them to disconnect
3465    * quickly
3466    * Local application blocks get API_FAILREQ once all pending signals
3467    * from the failed API have been processed.
3468    */
3469   signal->theData[0] = failedNodePtr.i;
3470   signal->theData[1] = QMGR_REF;
3471   NodeReceiverGroup rg(QMGR, c_clusterNodes);
3472   sendSignal(rg, GSN_API_FAILREQ, signal, 2, JBA);
3473 
3474   /* Now ask CMVMI to disconnect the node */
3475   FailState initialState = (failedNodePtr.p->phase == ZAPI_ACTIVE) ?
3476     WAITING_FOR_CLOSECOMCONF_ACTIVE :
3477     WAITING_FOR_CLOSECOMCONF_NOTACTIVE;
3478 
3479   failedNodePtr.p->failState = initialState;
3480   failedNodePtr.p->phase = ZFAIL_CLOSING;
3481   setNodeInfo(failedNodePtr.i).m_heartbeat_cnt= 0;
3482   setNodeInfo(failedNodePtr.i).m_version = 0;
3483   recompute_version_info(getNodeInfo(failedNodePtr.i).m_type);
3484 
3485   CloseComReqConf * const closeCom = (CloseComReqConf *)&signal->theData[0];
3486   closeCom->xxxBlockRef = reference();
3487   closeCom->requestType = CloseComReqConf::RT_API_FAILURE;
3488   closeCom->failNo      = 0;
3489   closeCom->noOfNodes   = 1;
3490   NodeBitmask::clear(closeCom->theNodes);
3491   NodeBitmask::set(closeCom->theNodes, failedNodePtr.i);
3492   sendSignal(CMVMI_REF, GSN_CLOSE_COMREQ, signal,
3493              CloseComReqConf::SignalLength, JBA);
3494 } // api_failed
3495 
3496 /**--------------------------------------------------------------------------
3497  * AN API NODE IS REGISTERING. IF FOR THE FIRST TIME WE WILL ENABLE
3498  * COMMUNICATION WITH ALL NDB BLOCKS.
3499  *---------------------------------------------------------------------------*/
3500 /*******************************/
3501 /* API_REGREQ                 */
3502 /*******************************/
execAPI_REGREQ(Signal * signal)3503 void Qmgr::execAPI_REGREQ(Signal* signal)
3504 {
3505   jamEntry();
3506 
3507   ApiRegReq* req = (ApiRegReq*)signal->getDataPtr();
3508   const Uint32 version = req->version;
3509   const BlockReference ref = req->ref;
3510 
3511   Uint32 mysql_version = req->mysql_version;
3512   if (version < NDBD_SPLIT_VERSION)
3513     mysql_version = 0;
3514 
3515   NodeRecPtr apiNodePtr;
3516   apiNodePtr.i = refToNode(ref);
3517   ptrCheckGuard(apiNodePtr, MAX_NODES, nodeRec);
3518 
3519   if (apiNodePtr.p->phase == ZFAIL_CLOSING)
3520   {
3521     jam();
3522     /**
3523      * This node is pending CLOSE_COM_CONF
3524      *   ignore API_REGREQ
3525      */
3526     return;
3527   }
3528 
3529 #if 0
3530   ndbout_c("Qmgr::execAPI_REGREQ: Recd API_REGREQ (NodeId=%d)", apiNodePtr.i);
3531 #endif
3532 
3533   bool compatability_check;
3534   const char * extra = 0;
3535   NodeInfo::NodeType type= getNodeInfo(apiNodePtr.i).getType();
3536   switch(type){
3537   case NodeInfo::API:
3538     if (m_micro_gcp_enabled && !ndb_check_micro_gcp(version))
3539     {
3540       jam();
3541       compatability_check = false;
3542       extra = ": micro gcp enabled";
3543     }
3544     else
3545     {
3546       jam();
3547       compatability_check = ndbCompatible_ndb_api(NDB_VERSION, version);
3548     }
3549     break;
3550   case NodeInfo::MGM:
3551     compatability_check = ndbCompatible_ndb_mgmt(NDB_VERSION, version);
3552     break;
3553   case NodeInfo::DB:
3554   case NodeInfo::INVALID:
3555   default:
3556     sendApiRegRef(signal, ref, ApiRegRef::WrongType);
3557     infoEvent("Invalid connection attempt with type %d", type);
3558     return;
3559   }
3560 
3561   if (!compatability_check) {
3562     jam();
3563     char buf[NDB_VERSION_STRING_BUF_SZ];
3564     infoEvent("Connection attempt from %s id=%d with %s "
3565 	      "incompatible with %s%s",
3566 	      type == NodeInfo::API ? "api or mysqld" : "management server",
3567 	      apiNodePtr.i,
3568 	      ndbGetVersionString(version, mysql_version, 0,
3569                                   buf,
3570                                   sizeof(buf)),
3571 	      NDB_VERSION_STRING,
3572               extra ? extra : "");
3573     apiNodePtr.p->phase = ZAPI_INACTIVE;
3574     sendApiRegRef(signal, ref, ApiRegRef::UnsupportedVersion);
3575     return;
3576   }
3577 
3578   setNodeInfo(apiNodePtr.i).m_version = version;
3579   setNodeInfo(apiNodePtr.i).m_mysql_version = mysql_version;
3580   setNodeInfo(apiNodePtr.i).m_heartbeat_cnt= 0;
3581 
3582   NodeState state = getNodeState();
3583   if (apiNodePtr.p->phase == ZAPI_INACTIVE)
3584   {
3585     apiNodePtr.p->blockRef = ref;
3586     if ((state.startLevel == NodeState::SL_STARTED ||
3587          state.getSingleUserMode() ||
3588          (state.startLevel == NodeState::SL_STARTING &&
3589           state.starting.startPhase >= 100)))
3590     {
3591       jam();
3592       /**----------------------------------------------------------------------
3593        * THE API NODE IS REGISTERING. WE WILL ACCEPT IT BY CHANGING STATE AND
3594        * SENDING A CONFIRM.
3595        *----------------------------------------------------------------------*/
3596       apiNodePtr.p->phase = ZAPI_ACTIVE;
3597       EnableComReq *enableComReq = (EnableComReq *)signal->getDataPtrSend();
3598       enableComReq->m_senderRef = reference();
3599       enableComReq->m_senderData = ENABLE_COM_API_REGREQ;
3600       NodeBitmask::clear(enableComReq->m_nodeIds);
3601       NodeBitmask::set(enableComReq->m_nodeIds, apiNodePtr.i);
3602       sendSignal(CMVMI_REF, GSN_ENABLE_COMREQ, signal,
3603                  EnableComReq::SignalLength, JBA);
3604       return;
3605     }
3606   }
3607 
3608   sendApiRegConf(signal, apiNodePtr.i);
3609 }//Qmgr::execAPI_REGREQ()
3610 
3611 void
handleEnableComApiRegreq(Signal * signal,Uint32 node)3612 Qmgr::handleEnableComApiRegreq(Signal *signal, Uint32 node)
3613 {
3614   NodeInfo::NodeType type = getNodeInfo(node).getType();
3615   Uint32 version = getNodeInfo(node).m_version;
3616   recompute_version_info(type, version);
3617 
3618   signal->theData[0] = node;
3619   signal->theData[1] = version;
3620   NodeReceiverGroup rg(QMGR, c_clusterNodes);
3621   rg.m_nodes.clear(getOwnNodeId());
3622   sendVersionedDb(rg, GSN_NODE_VERSION_REP, signal, 2, JBB,
3623                   NDBD_NODE_VERSION_REP);
3624 
3625   signal->theData[0] = node;
3626   EXECUTE_DIRECT(NDBCNTR, GSN_API_START_REP, signal, 1);
3627 
3628   sendApiRegConf(signal, node);
3629 }
3630 
3631 void
sendApiRegConf(Signal * signal,Uint32 node)3632 Qmgr::sendApiRegConf(Signal *signal, Uint32 node)
3633 {
3634   NodeRecPtr apiNodePtr;
3635   apiNodePtr.i = node;
3636   ptrCheckGuard(apiNodePtr, MAX_NODES, nodeRec);
3637   const BlockReference ref = apiNodePtr.p->blockRef;
3638   ndbassert(ref != 0);
3639 
3640   ApiRegConf * const apiRegConf = (ApiRegConf *)&signal->theData[0];
3641   apiRegConf->qmgrRef = reference();
3642   apiRegConf->apiHeartbeatFrequency = (chbApiDelay / 10);
3643   apiRegConf->version = NDB_VERSION;
3644   apiRegConf->mysql_version = NDB_MYSQL_VERSION_D;
3645   apiRegConf->nodeState = getNodeState();
3646   {
3647     NodeRecPtr nodePtr;
3648     nodePtr.i = getOwnNodeId();
3649     ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
3650     Uint32 dynamicId = nodePtr.p->ndynamicId;
3651 
3652     if(apiRegConf->nodeState.masterNodeId != getOwnNodeId()){
3653       jam();
3654       apiRegConf->nodeState.dynamicId = dynamicId;
3655     } else {
3656       apiRegConf->nodeState.dynamicId = (Uint32)(-(Int32)dynamicId);
3657     }
3658   }
3659   NodeVersionInfo info = getNodeVersionInfo();
3660   apiRegConf->minDbVersion = info.m_type[NodeInfo::DB].m_min_version;
3661   apiRegConf->nodeState.m_connected_nodes.assign(c_connectedNodes);
3662   sendSignal(ref, GSN_API_REGCONF, signal, ApiRegConf::SignalLength, JBB);
3663 }
3664 
3665 void
sendVersionedDb(NodeReceiverGroup rg,GlobalSignalNumber gsn,Signal * signal,Uint32 length,JobBufferLevel jbuf,Uint32 minversion)3666 Qmgr::sendVersionedDb(NodeReceiverGroup rg,
3667 		      GlobalSignalNumber gsn,
3668 		      Signal* signal,
3669 		      Uint32 length,
3670 		      JobBufferLevel jbuf,
3671 		      Uint32 minversion)
3672 {
3673   jam();
3674   NodeVersionInfo info = getNodeVersionInfo();
3675   if (info.m_type[NodeInfo::DB].m_min_version >= minversion)
3676   {
3677     jam();
3678     sendSignal(rg, gsn, signal, length, jbuf);
3679   }
3680   else
3681   {
3682     jam();
3683     Uint32 i = 0, cnt = 0;
3684     while((i = rg.m_nodes.find(i + 1)) != NodeBitmask::NotFound)
3685     {
3686       jam();
3687       if (getNodeInfo(i).m_version >= minversion)
3688       {
3689 	jam();
3690 	cnt++;
3691 	sendSignal(numberToRef(rg.m_block, i), gsn, signal, length, jbuf);
3692       }
3693     }
3694     ndbassert((cnt == 0 && rg.m_nodes.count() == 0) ||
3695 	      (cnt < rg.m_nodes.count()));
3696   }
3697 }
3698 
3699 void
execAPI_VERSION_REQ(Signal * signal)3700 Qmgr::execAPI_VERSION_REQ(Signal * signal) {
3701   jamEntry();
3702   ApiVersionReq * const req = (ApiVersionReq *)signal->getDataPtr();
3703 
3704   Uint32 senderRef = req->senderRef;
3705   Uint32 nodeId = req->nodeId;
3706 
3707   ApiVersionConf * conf = (ApiVersionConf *)req;
3708   if(getNodeInfo(nodeId).m_connected)
3709   {
3710     conf->version = getNodeInfo(nodeId).m_version;
3711     conf->mysql_version = getNodeInfo(nodeId).m_mysql_version;
3712     struct in_addr in= globalTransporterRegistry.get_connect_address(nodeId);
3713     conf->inet_addr= in.s_addr;
3714   }
3715   else
3716   {
3717     conf->version =  0;
3718     conf->mysql_version =  0;
3719     conf->inet_addr= 0;
3720   }
3721   conf->nodeId = nodeId;
3722 
3723   sendSignal(senderRef,
3724 	     GSN_API_VERSION_CONF,
3725 	     signal,
3726 	     ApiVersionConf::SignalLength, JBB);
3727 }
3728 
3729 void
execNODE_VERSION_REP(Signal * signal)3730 Qmgr::execNODE_VERSION_REP(Signal* signal)
3731 {
3732   jamEntry();
3733   Uint32 nodeId = signal->theData[0];
3734   Uint32 version = signal->theData[1];
3735 
3736   if (nodeId < MAX_NODES)
3737   {
3738     jam();
3739     Uint32 type = getNodeInfo(nodeId).m_type;
3740     setNodeInfo(nodeId).m_version = version;
3741     recompute_version_info(type, version);
3742   }
3743 }
3744 
3745 void
recompute_version_info(Uint32 type,Uint32 version)3746 Qmgr::recompute_version_info(Uint32 type, Uint32 version)
3747 {
3748   NodeVersionInfo& info = setNodeVersionInfo();
3749   switch(type){
3750   case NodeInfo::DB:
3751   case NodeInfo::API:
3752   case NodeInfo::MGM:
3753     break;
3754   default:
3755     return;
3756   }
3757 
3758   if (info.m_type[type].m_min_version == 0 ||
3759       version < info.m_type[type].m_min_version)
3760     info.m_type[type].m_min_version = version;
3761   if (version > info.m_type[type].m_max_version)
3762     info.m_type[type].m_max_version = version;
3763 }
3764 
3765 void
recompute_version_info(Uint32 type)3766 Qmgr::recompute_version_info(Uint32 type)
3767 {
3768   switch(type){
3769   case NodeInfo::DB:
3770   case NodeInfo::API:
3771   case NodeInfo::MGM:
3772     break;
3773   default:
3774     return;
3775   }
3776 
3777   Uint32 min = ~0, max = 0;
3778   Uint32 cnt = type == NodeInfo::DB ? MAX_NDB_NODES : MAX_NODES;
3779   for (Uint32 i = 1; i<cnt; i++)
3780   {
3781     if (getNodeInfo(i).m_type == type)
3782     {
3783       Uint32 version = getNodeInfo(i).m_version;
3784 
3785       if (version)
3786       {
3787 	if (version < min)
3788 	  min = version;
3789 	if (version > max)
3790 	  max = version;
3791       }
3792     }
3793   }
3794 
3795   NodeVersionInfo& info = setNodeVersionInfo();
3796   info.m_type[type].m_min_version = min == ~(Uint32)0 ? 0 : min;
3797   info.m_type[type].m_max_version = max;
3798 }
3799 
3800 #if 0
3801 bool
3802 Qmgr::checkAPIVersion(NodeId nodeId,
3803 		      Uint32 apiVersion, Uint32 ownVersion) const {
3804   bool ret=true;
3805   /**
3806    * First implementation...
3807    */
3808   if ((getMajor(apiVersion) < getMajor(ownVersion) ||
3809        getMinor(apiVersion) < getMinor(ownVersion)) &&
3810       apiVersion >= API_UPGRADE_VERSION) {
3811     jam();
3812     if ( getNodeInfo(nodeId).getType() !=  NodeInfo::MGM ) {
3813       jam();
3814       ret = false;
3815     } else {
3816       jam();
3817       /* we have a software upgrade situation, mgmtsrvr should be
3818        * the highest, let him decide what to do
3819        */
3820       ;
3821     }
3822   }
3823   return ret;
3824 }
3825 #endif
3826 
3827 void
sendApiRegRef(Signal * signal,Uint32 Tref,ApiRegRef::ErrorCode err)3828 Qmgr::sendApiRegRef(Signal* signal, Uint32 Tref, ApiRegRef::ErrorCode err){
3829   ApiRegRef* ref = (ApiRegRef*)signal->getDataPtrSend();
3830   ref->ref = reference();
3831   ref->version = NDB_VERSION;
3832   ref->mysql_version = NDB_MYSQL_VERSION_D;
3833   ref->errorCode = err;
3834   sendSignal(Tref, GSN_API_REGREF, signal, ApiRegRef::SignalLength, JBB);
3835 }
3836 
3837 /**--------------------------------------------------------------------------
3838  * A NODE HAS BEEN DECLARED AS DOWN. WE WILL CLOSE THE COMMUNICATION TO THIS
3839  * NODE IF NOT ALREADY DONE. IF WE ARE PRESIDENT OR BECOMES PRESIDENT BECAUSE
3840  * OF A FAILED PRESIDENT THEN WE WILL TAKE FURTHER ACTION.
3841  *---------------------------------------------------------------------------*/
failReportLab(Signal * signal,Uint16 aFailedNode,FailRep::FailCause aFailCause,Uint16 sourceNode)3842 void Qmgr::failReportLab(Signal* signal, Uint16 aFailedNode,
3843 			 FailRep::FailCause aFailCause,
3844                          Uint16 sourceNode)
3845 {
3846   NodeRecPtr nodePtr;
3847   NodeRecPtr failedNodePtr;
3848   NodeRecPtr myNodePtr;
3849   UintR TnoFailedNodes;
3850 
3851   failedNodePtr.i = aFailedNode;
3852   ptrCheckGuard(failedNodePtr, MAX_NDB_NODES, nodeRec);
3853   FailRep* rep = (FailRep*)signal->getDataPtr();
3854 
3855   if (check_multi_node_shutdown(signal))
3856   {
3857     jam();
3858     return;
3859   }
3860 
3861   if (isNodeConnectivitySuspect(sourceNode) &&
3862       // (! isNodeConnectivitySuspect(aFailedNode)) &&  // TODO : Required?
3863       ((aFailCause == FailRep::ZCONNECT_CHECK_FAILURE) ||
3864        (aFailCause == FailRep::ZLINK_FAILURE)))
3865   {
3866     jam();
3867     /* Connectivity related failure report from a node with suspect
3868      * connectivity, handle differently
3869      */
3870     ndbrequire(sourceNode != getOwnNodeId());
3871 
3872     handleFailFromSuspect(signal,
3873                           aFailCause,
3874                           aFailedNode,
3875                           sourceNode);
3876     return;
3877   }
3878 
3879   if (failedNodePtr.i == getOwnNodeId()) {
3880     jam();
3881 
3882     Uint32 code = NDBD_EXIT_NODE_DECLARED_DEAD;
3883     const char * msg = 0;
3884     char extra[100];
3885     switch(aFailCause){
3886     case FailRep::ZOWN_FAILURE:
3887       msg = "Own failure";
3888       break;
3889     case FailRep::ZOTHER_NODE_WHEN_WE_START:
3890     case FailRep::ZOTHERNODE_FAILED_DURING_START:
3891       msg = "Other node died during start";
3892       break;
3893     case FailRep::ZIN_PREP_FAIL_REQ:
3894       msg = "Prep fail";
3895       break;
3896     case FailRep::ZSTART_IN_REGREQ:
3897       msg = "Start timeout";
3898       break;
3899     case FailRep::ZHEARTBEAT_FAILURE:
3900       msg = "Heartbeat failure";
3901       break;
3902     case FailRep::ZLINK_FAILURE:
3903       msg = "Connection failure";
3904       break;
3905     case FailRep::ZPARTITIONED_CLUSTER:
3906     {
3907       code = NDBD_EXIT_PARTITIONED_SHUTDOWN;
3908       char buf1[100], buf2[100];
3909       c_clusterNodes.getText(buf1);
3910       if (((signal->getLength()== FailRep::OrigSignalLength + FailRep::PartitionedExtraLength) ||
3911            (signal->getLength()== FailRep::SignalLength + FailRep::PartitionedExtraLength)) &&
3912           signal->header.theVerId_signalNumber == GSN_FAIL_REP)
3913       {
3914 	jam();
3915 	NdbNodeBitmask part;
3916 	part.assign(NdbNodeBitmask::Size, rep->partitioned.partition);
3917 	part.getText(buf2);
3918 	BaseString::snprintf(extra, sizeof(extra),
3919 			     "Our cluster: %s other cluster: %s",
3920 			     buf1, buf2);
3921       }
3922       else
3923       {
3924 	jam();
3925 	BaseString::snprintf(extra, sizeof(extra),
3926 			     "Our cluster: %s", buf1);
3927       }
3928       msg = extra;
3929       break;
3930     }
3931     case FailRep::ZMULTI_NODE_SHUTDOWN:
3932       msg = "Multi node shutdown";
3933       break;
3934     case FailRep::ZCONNECT_CHECK_FAILURE:
3935       msg = "Connectivity check failure";
3936       break;
3937     default:
3938       msg = "<UNKNOWN>";
3939     }
3940 
3941     CRASH_INSERTION(932);
3942     CRASH_INSERTION(938);
3943 
3944     char buf[255];
3945     BaseString::snprintf(buf, sizeof(buf),
3946 			 "We(%u) have been declared dead by %u (via %u) reason: %s(%u)",
3947 			 getOwnNodeId(),
3948                          sourceNode,
3949 			 refToNode(signal->getSendersBlockRef()),
3950 			 msg ? msg : "<Unknown>",
3951 			 aFailCause);
3952 
3953     progError(__LINE__, code, buf);
3954     return;
3955   }//if
3956 
3957   myNodePtr.i = getOwnNodeId();
3958   ptrCheckGuard(myNodePtr, MAX_NDB_NODES, nodeRec);
3959   if (myNodePtr.p->phase != ZRUNNING) {
3960     jam();
3961     systemErrorLab(signal, __LINE__);
3962     return;
3963   }//if
3964 
3965   if (getNodeState().startLevel < NodeState::SL_STARTED)
3966   {
3967     jam();
3968     CRASH_INSERTION(932);
3969     CRASH_INSERTION(938);
3970     char buf[100];
3971     BaseString::snprintf(buf, 100, "Node failure during restart");
3972     progError(__LINE__, NDBD_EXIT_SR_OTHERNODEFAILED, buf);
3973     ndbrequire(false);
3974   }
3975 
3976   TnoFailedNodes = cnoFailedNodes;
3977   failReport(signal, failedNodePtr.i, (UintR)ZTRUE, aFailCause, sourceNode);
3978   if (cpresident == getOwnNodeId()) {
3979     jam();
3980     if (ctoStatus == Q_NOT_ACTIVE) {
3981       jam();
3982       /**--------------------------------------------------------------------
3983        * AS PRESIDENT WE ARE REQUIRED TO START THE EXCLUSION PROCESS SUCH THAT
3984        * THE APPLICATION SEE NODE FAILURES IN A CONSISTENT ORDER.
3985        * IF WE HAVE BECOME PRESIDENT NOW (CTO_STATUS = ACTIVE) THEN WE HAVE
3986        * TO COMPLETE THE PREVIOUS COMMIT FAILED NODE PROCESS BEFORE STARTING
3987        * A NEW.
3988        * CTO_STATUS = ACTIVE CAN ALSO MEAN THAT WE ARE PRESIDENT AND ARE
3989        * CURRENTLY COMMITTING A SET OF NODE CRASHES. IN THIS CASE IT IS NOT
3990        * ALLOWED TO START PREPARING NEW NODE CRASHES.
3991        *---------------------------------------------------------------------*/
3992       if (TnoFailedNodes != cnoFailedNodes) {
3993         jam();
3994         cfailureNr = cfailureNr + 1;
3995         for (nodePtr.i = 1;
3996              nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
3997           jam();
3998           ptrAss(nodePtr, nodeRec);
3999           if (nodePtr.p->phase == ZRUNNING) {
4000             jam();
4001             sendPrepFailReq(signal, nodePtr.i);
4002           }//if
4003         }//for
4004       }//if
4005     }//if
4006   }//if
4007   return;
4008 }//Qmgr::failReportLab()
4009 
4010 /**-------------------------------------------------------------------------
4011  * WE HAVE RECEIVED A PREPARE TO EXCLUDE A NUMBER OF NODES FROM THE CLUSTER.
4012  * WE WILL FIRST CHECK THAT WE HAVE NOT ANY MORE NODES THAT
4013  * WE ALSO HAVE EXCLUDED
4014  *--------------------------------------------------------------------------*/
4015 /*******************************/
4016 /* PREP_FAILREQ               */
4017 /*******************************/
execPREP_FAILREQ(Signal * signal)4018 void Qmgr::execPREP_FAILREQ(Signal* signal)
4019 {
4020   NodeRecPtr myNodePtr;
4021   jamEntry();
4022 
4023   c_start.reset();
4024 
4025   if (check_multi_node_shutdown(signal))
4026   {
4027     jam();
4028     return;
4029   }
4030 
4031   PrepFailReqRef * const prepFail = (PrepFailReqRef *)&signal->theData[0];
4032 
4033   BlockReference Tblockref  = prepFail->xxxBlockRef;
4034   Uint16 TfailureNr = prepFail->failNo;
4035   cnoPrepFailedNodes = prepFail->noOfNodes;
4036   UintR arrayIndex = 0;
4037   Uint32 Tindex;
4038   for (Tindex = 0; Tindex < MAX_NDB_NODES; Tindex++) {
4039     if (NdbNodeBitmask::get(prepFail->theNodes, Tindex)){
4040       cprepFailedNodes[arrayIndex] = Tindex;
4041       arrayIndex++;
4042     }//if
4043   }//for
4044   UintR guard0;
4045 
4046   /**
4047    * Block commit until node failures has stabilized
4048    *
4049    * @See RT352
4050    */
4051   BlockCommitOrd* const block = (BlockCommitOrd *)&signal->theData[0];
4052   block->failNo = TfailureNr;
4053   EXECUTE_DIRECT(DBDIH, GSN_BLOCK_COMMIT_ORD, signal,
4054 		 BlockCommitOrd::SignalLength);
4055 
4056   myNodePtr.i = getOwnNodeId();
4057   ptrCheckGuard(myNodePtr, MAX_NDB_NODES, nodeRec);
4058   if (myNodePtr.p->phase != ZRUNNING) {
4059     jam();
4060     systemErrorLab(signal, __LINE__);
4061     return;
4062   }//if
4063 
4064   if (getNodeState().startLevel < NodeState::SL_STARTED)
4065   {
4066     jam();
4067     CRASH_INSERTION(932);
4068     CRASH_INSERTION(938);
4069     char buf[100];
4070     BaseString::snprintf(buf, 100, "Node failure during restart");
4071     progError(__LINE__, NDBD_EXIT_SR_OTHERNODEFAILED, buf);
4072     ndbrequire(false);
4073   }
4074 
4075   guard0 = cnoPrepFailedNodes - 1;
4076   arrGuard(guard0, MAX_NDB_NODES);
4077   for (Tindex = 0; Tindex <= guard0; Tindex++) {
4078     jam();
4079     failReport(signal,
4080                cprepFailedNodes[Tindex],
4081                (UintR)ZFALSE,
4082                FailRep::ZIN_PREP_FAIL_REQ,
4083                0); /* Source node not required (or known) here */
4084   }//for
4085   sendCloseComReq(signal, Tblockref, TfailureNr);
4086   cnoCommitFailedNodes = 0;
4087   cprepareFailureNr = TfailureNr;
4088   return;
4089 }//Qmgr::execPREP_FAILREQ()
4090 
4091 
handleApiCloseComConf(Signal * signal)4092 void Qmgr::handleApiCloseComConf(Signal* signal)
4093 {
4094   jam();
4095   CloseComReqConf * const closeCom = (CloseComReqConf *)&signal->theData[0];
4096 
4097   /* Api failure special case */
4098   for(Uint32 nodeId = 0; nodeId < MAX_NODES; nodeId ++)
4099   {
4100     if (NodeBitmask::get(closeCom->theNodes, nodeId))
4101     {
4102       jam();
4103       /* Check that *only* 1 *API* node is included in
4104        * this CLOSE_COM_CONF
4105        */
4106       ndbrequire(getNodeInfo(nodeId).getType() != NodeInfo::DB);
4107       ndbrequire(closeCom->noOfNodes == 1);
4108       NodeBitmask::clear(closeCom->theNodes, nodeId);
4109       ndbrequire(NodeBitmask::isclear(closeCom->theNodes));
4110 
4111       /* Now that we know communication from the failed Api has
4112        * ceased, we can send the required API_FAILREQ signals
4113        * and continue API failure handling
4114        */
4115       NodeRecPtr failedNodePtr;
4116       failedNodePtr.i = nodeId;
4117       ptrCheckGuard(failedNodePtr, MAX_NODES, nodeRec);
4118 
4119       ndbrequire((failedNodePtr.p->failState ==
4120                   WAITING_FOR_CLOSECOMCONF_ACTIVE) ||
4121                  (failedNodePtr.p->failState ==
4122                   WAITING_FOR_CLOSECOMCONF_NOTACTIVE));
4123 
4124       if (failedNodePtr.p->failState == WAITING_FOR_CLOSECOMCONF_ACTIVE)
4125       {
4126         /**
4127          * Inform application blocks TC, DICT, SUMA etc.
4128          */
4129         jam();
4130         sendApiFailReq(signal, nodeId, false); // !sumaOnly
4131         arbitRec.code = ArbitCode::ApiFail;
4132         handleArbitApiFail(signal, nodeId);
4133       }
4134       else
4135       {
4136         /**
4137          * Always inform SUMA
4138          */
4139         jam();
4140         sendApiFailReq(signal, nodeId, true); // sumaOnly
4141       }
4142 
4143       if (getNodeInfo(failedNodePtr.i).getType() == NodeInfo::MGM)
4144       {
4145         /**
4146          * Allow MGM do reconnect "directly"
4147          */
4148         jam();
4149         setNodeInfo(failedNodePtr.i).m_heartbeat_cnt = 3;
4150       }
4151 
4152       /* Handled the single API node failure */
4153       return;
4154     }
4155   }
4156   /* Never get here */
4157   ndbrequire(false);
4158 }
4159 
4160 /**---------------------------------------------------------------------------
4161  * THE CRASHED NODES HAS BEEN EXCLUDED FROM COMMUNICATION.
4162  * WE WILL CHECK WHETHER ANY MORE NODES HAVE FAILED DURING THE PREPARE PROCESS.
4163  * IF SO WE WILL REFUSE THE PREPARE PHASE AND EXPECT A NEW PREPARE MESSAGE
4164  * WITH ALL FAILED NODES INCLUDED.
4165  *---------------------------------------------------------------------------*/
4166 /*******************************/
4167 /* CLOSE_COMCONF              */
4168 /*******************************/
execCLOSE_COMCONF(Signal * signal)4169 void Qmgr::execCLOSE_COMCONF(Signal* signal)
4170 {
4171   jamEntry();
4172 
4173   CloseComReqConf * const closeCom = (CloseComReqConf *)&signal->theData[0];
4174 
4175   Uint32 requestType = closeCom->requestType;
4176 
4177   if (requestType == CloseComReqConf::RT_API_FAILURE)
4178   {
4179     jam();
4180     handleApiCloseComConf(signal);
4181     return;
4182   }
4183 
4184   /* Normal node failure preparation path */
4185   ndbassert(requestType == CloseComReqConf::RT_NODE_FAILURE);
4186   BlockReference Tblockref  = closeCom->xxxBlockRef;
4187   Uint16 TfailureNr = closeCom->failNo;
4188 
4189   cnoPrepFailedNodes = closeCom->noOfNodes;
4190   UintR arrayIndex = 0;
4191   UintR Tindex = 0;
4192   for(Tindex = 0; Tindex < MAX_NDB_NODES; Tindex++){
4193     if(NdbNodeBitmask::get(closeCom->theNodes, Tindex)){
4194       cprepFailedNodes[arrayIndex] = Tindex;
4195       arrayIndex++;
4196     }
4197   }
4198   ndbassert(arrayIndex == cnoPrepFailedNodes);
4199   UintR tprepFailConf;
4200   UintR Tindex2;
4201   UintR guard0;
4202   UintR guard1;
4203   UintR Tfound;
4204   Uint16 TfailedNodeNo;
4205 
4206   tprepFailConf = ZTRUE;
4207   if (cnoFailedNodes > 0) {
4208     jam();
4209     /* Check whether the set of nodes which have had communications
4210      * closed is the same as the set of failed nodes.
4211      * If it is, we can confirm the PREP_FAIL phase for this set
4212      * of nodes to the President.
4213      * If it is not, we Refuse the PREP_FAIL phase for this set
4214      * of nodes, the President will start a new PREP_FAIL phase
4215      * for the new set.
4216      */
4217     guard0 = cnoFailedNodes - 1;
4218     arrGuard(guard0, MAX_NDB_NODES);
4219     for (Tindex = 0; Tindex <= guard0; Tindex++) {
4220       jam();
4221       TfailedNodeNo = cfailedNodes[Tindex];
4222       Tfound = ZFALSE;
4223       guard1 = cnoPrepFailedNodes - 1;
4224       arrGuard(guard1, MAX_NDB_NODES);
4225       for (Tindex2 = 0; Tindex2 <= guard1; Tindex2++) {
4226         jam();
4227         if (TfailedNodeNo == cprepFailedNodes[Tindex2]) {
4228           jam();
4229           Tfound = ZTRUE;
4230         }//if
4231       }//for
4232       if (Tfound == ZFALSE) {
4233         jam();
4234         /* A failed node is missing from the set, we will not
4235          * confirm this Prepare_Fail phase.
4236          * Store the node id in the array for later.
4237          */
4238         tprepFailConf = ZFALSE;
4239         arrGuard(cnoPrepFailedNodes, MAX_NDB_NODES);
4240         cprepFailedNodes[cnoPrepFailedNodes] = TfailedNodeNo;
4241         cnoPrepFailedNodes = cnoPrepFailedNodes + 1;
4242       }//if
4243     }//for
4244   }//if
4245   if (tprepFailConf == ZFALSE) {
4246     jam();
4247     /* Inform President that we cannot confirm the PREP_FAIL
4248      * phase as we are aware of at least one other node
4249      * failure
4250      */
4251     for (Tindex = 0; Tindex < MAX_NDB_NODES; Tindex++) {
4252       cfailedNodes[Tindex] = cprepFailedNodes[Tindex];
4253     }//for
4254     cnoFailedNodes = cnoPrepFailedNodes;
4255     sendPrepFailReqRef(signal,
4256 		       Tblockref,
4257 		       GSN_PREP_FAILREF,
4258 		       reference(),
4259 		       cfailureNr,
4260 		       cnoPrepFailedNodes,
4261 		       cprepFailedNodes);
4262   } else {
4263     /* We have prepared the failure of the requested nodes
4264      * send confirmation to the president
4265      */
4266     jam();
4267     cnoCommitFailedNodes = cnoPrepFailedNodes;
4268     guard0 = cnoPrepFailedNodes - 1;
4269     arrGuard(guard0, MAX_NDB_NODES);
4270     for (Tindex = 0; Tindex <= guard0; Tindex++) {
4271       jam();
4272       arrGuard(Tindex, MAX_NDB_NODES);
4273       ccommitFailedNodes[Tindex] = cprepFailedNodes[Tindex];
4274     }//for
4275     signal->theData[0] = getOwnNodeId();
4276     signal->theData[1] = TfailureNr;
4277     sendSignal(Tblockref, GSN_PREP_FAILCONF, signal, 2, JBA);
4278   }//if
4279   return;
4280 }//Qmgr::execCLOSE_COMCONF()
4281 
4282 /*---------------------------------------------------------------------------*/
4283 /* WE HAVE RECEIVED A CONFIRM OF THAT THIS NODE HAVE PREPARED THE FAILURE.   */
4284 /*---------------------------------------------------------------------------*/
4285 /*******************************/
4286 /* PREP_FAILCONF              */
4287 /*******************************/
execPREP_FAILCONF(Signal * signal)4288 void Qmgr::execPREP_FAILCONF(Signal* signal)
4289 {
4290   NodeRecPtr nodePtr;
4291   NodeRecPtr replyNodePtr;
4292   jamEntry();
4293   replyNodePtr.i = signal->theData[0];
4294   Uint16 TfailureNr = signal->theData[1];
4295   if (TfailureNr != cfailureNr) {
4296     jam();
4297     /**----------------------------------------------------------------------
4298      * WE HAVE ALREADY STARTING A NEW ATTEMPT TO EXCLUDE A NUMBER OF NODES.
4299      *  IGNORE
4300      *----------------------------------------------------------------------*/
4301     return;
4302   }//if
4303   ptrCheckGuard(replyNodePtr, MAX_NDB_NODES, nodeRec);
4304   replyNodePtr.p->sendPrepFailReqStatus = Q_NOT_ACTIVE;
4305   for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
4306     jam();
4307     ptrAss(nodePtr, nodeRec);
4308     if (nodePtr.p->phase == ZRUNNING) {
4309       if (nodePtr.p->sendPrepFailReqStatus == Q_ACTIVE) {
4310         jam();
4311         return;
4312       }//if
4313     }//if
4314   }//for
4315   /**
4316    * Check node count and groups and invoke arbitrator if necessary.
4317    * Continues via sendCommitFailReq() if successful.
4318    */
4319   arbitRec.failureNr = cfailureNr;
4320   const NodeState & s = getNodeState();
4321   if(s.startLevel == NodeState::SL_STOPPING_3 && s.stopping.systemShutdown){
4322     jam();
4323     /**
4324      * We're performing a system shutdown,
4325      * don't let artibtrator shut us down
4326      */
4327     return;
4328   }
4329 
4330   switch(arbitRec.method){
4331   case ArbitRec::DISABLED:
4332     jam();
4333     // No arbitration -> immediately commit the failed nodes
4334     sendCommitFailReq(signal);
4335     break;
4336 
4337   case ArbitRec::METHOD_EXTERNAL:
4338   case ArbitRec::METHOD_DEFAULT:
4339     jam();
4340     handleArbitCheck(signal);
4341     break;
4342 
4343   }
4344   return;
4345 }//Qmgr::execPREP_FAILCONF()
4346 
4347 void
sendCommitFailReq(Signal * signal)4348 Qmgr::sendCommitFailReq(Signal* signal)
4349 {
4350   NodeRecPtr nodePtr;
4351   jam();
4352   if (arbitRec.failureNr != cfailureNr) {
4353     jam();
4354     /**----------------------------------------------------------------------
4355      * WE HAVE ALREADY STARTING A NEW ATTEMPT TO EXCLUDE A NUMBER OF NODES.
4356      *  IGNORE
4357      *----------------------------------------------------------------------*/
4358     return;
4359   }//if
4360   /**-----------------------------------------------------------------------
4361    * WE HAVE SUCCESSFULLY PREPARED A SET OF NODE FAILURES. WE WILL NOW COMMIT
4362    * THESE NODE FAILURES.
4363    *-------------------------------------------------------------------------*/
4364   for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
4365     jam();
4366     ptrAss(nodePtr, nodeRec);
4367 
4368 #ifdef ERROR_INSERT
4369     if (false && ERROR_INSERTED(935) && nodePtr.i == c_error_insert_extra)
4370     {
4371       ndbout_c("skipping node %d", c_error_insert_extra);
4372       CLEAR_ERROR_INSERT_VALUE;
4373       signal->theData[0] = 9999;
4374       sendSignalWithDelay(CMVMI_REF, GSN_NDB_TAMPER, signal, 1000, 1);
4375       continue;
4376     }
4377 #endif
4378 
4379     if (nodePtr.p->phase == ZRUNNING) {
4380       jam();
4381       nodePtr.p->sendCommitFailReqStatus = Q_ACTIVE;
4382       signal->theData[0] = cpdistref;
4383       signal->theData[1] = cfailureNr;
4384       sendSignal(nodePtr.p->blockRef, GSN_COMMIT_FAILREQ, signal, 2, JBA);
4385     }//if
4386   }//for
4387   ctoStatus = Q_ACTIVE;
4388   cnoFailedNodes = 0;
4389   return;
4390 }//sendCommitFailReq()
4391 
4392 /*---------------------------------------------------------------------------*/
4393 /* SOME NODE HAVE DISCOVERED A NODE FAILURE THAT WE HAVE NOT YET DISCOVERED. */
4394 /* WE WILL START ANOTHER ROUND OF PREPARING A SET OF NODE FAILURES.          */
4395 /*---------------------------------------------------------------------------*/
4396 /*******************************/
4397 /* PREP_FAILREF               */
4398 /*******************************/
execPREP_FAILREF(Signal * signal)4399 void Qmgr::execPREP_FAILREF(Signal* signal)
4400 {
4401   NodeRecPtr nodePtr;
4402   jamEntry();
4403 
4404   PrepFailReqRef * const prepFail = (PrepFailReqRef *)&signal->theData[0];
4405 
4406   Uint16 TfailureNr = prepFail->failNo;
4407   cnoPrepFailedNodes = prepFail->noOfNodes;
4408 
4409   UintR arrayIndex = 0;
4410   UintR Tindex = 0;
4411   for(Tindex = 0; Tindex < MAX_NDB_NODES; Tindex++) {
4412     jam();
4413     if(NdbNodeBitmask::get(prepFail->theNodes, Tindex)){
4414       jam();
4415       cprepFailedNodes[arrayIndex] = Tindex;
4416       arrayIndex++;
4417     }//if
4418   }//for
4419   if (TfailureNr != cfailureNr) {
4420     jam();
4421     /**---------------------------------------------------------------------
4422      * WE HAVE ALREADY STARTING A NEW ATTEMPT TO EXCLUDE A NUMBER OF NODES.
4423      *  IGNORE
4424      *----------------------------------------------------------------------*/
4425     return;
4426   }//if
4427   UintR guard0;
4428   UintR Ti;
4429 
4430   cnoFailedNodes = cnoPrepFailedNodes;
4431   guard0 = cnoPrepFailedNodes - 1;
4432   arrGuard(guard0, MAX_NDB_NODES);
4433   for (Ti = 0; Ti <= guard0; Ti++) {
4434     jam();
4435     cfailedNodes[Ti] = cprepFailedNodes[Ti];
4436   }//for
4437   cfailureNr = cfailureNr + 1;
4438   for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
4439     jam();
4440     ptrAss(nodePtr, nodeRec);
4441     if (nodePtr.p->phase == ZRUNNING) {
4442       jam();
4443       sendPrepFailReq(signal, nodePtr.i);
4444     }//if
4445   }//for
4446   return;
4447 }//Qmgr::execPREP_FAILREF()
4448 
4449 static
4450 Uint32
clear_nodes(Uint32 dstcnt,Uint16 dst[],Uint32 srccnt,const Uint16 src[])4451 clear_nodes(Uint32 dstcnt, Uint16 dst[], Uint32 srccnt, const Uint16 src[])
4452 {
4453   if (srccnt == 0)
4454     return dstcnt;
4455 
4456   Uint32 pos = 0;
4457   for (Uint32 i = 0; i<dstcnt; i++)
4458   {
4459     Uint32 node = dst[i];
4460     for (Uint32 j = 0; j<srccnt; j++)
4461     {
4462       if (node == dst[j])
4463       {
4464 	node = RNIL;
4465 	break;
4466       }
4467     }
4468     if (node != RNIL)
4469     {
4470       dst[pos++] = node;
4471     }
4472   }
4473   return pos;
4474 }
4475 
4476 /*---------------------------------------------------------------------------*/
4477 /*    THE PRESIDENT IS NOW COMMITTING THE PREVIOUSLY PREPARED NODE FAILURE.  */
4478 /*---------------------------------------------------------------------------*/
4479 /***********************/
4480 /* COMMIT_FAILREQ     */
4481 /***********************/
execCOMMIT_FAILREQ(Signal * signal)4482 void Qmgr::execCOMMIT_FAILREQ(Signal* signal)
4483 {
4484   NodeRecPtr nodePtr;
4485   jamEntry();
4486 
4487   CRASH_INSERTION(935);
4488 
4489   BlockReference Tblockref = signal->theData[0];
4490   UintR TfailureNr = signal->theData[1];
4491   if (Tblockref != cpdistref) {
4492     jam();
4493     return;
4494   }//if
4495   UintR guard0;
4496   UintR Tj;
4497 
4498   /**
4499    * Block commit until node failures has stabilized
4500    *
4501    * @See RT352
4502    */
4503   UnblockCommitOrd* const unblock = (UnblockCommitOrd *)&signal->theData[0];
4504   unblock->failNo = TfailureNr;
4505   EXECUTE_DIRECT(DBDIH, GSN_UNBLOCK_COMMIT_ORD, signal,
4506 		 UnblockCommitOrd::SignalLength);
4507 
4508   if ((ccommitFailureNr != TfailureNr) &&
4509       (cnoCommitFailedNodes > 0)) {
4510     jam();
4511     /**-----------------------------------------------------------------------
4512      * WE ONLY DO THIS PART OF THE COMMIT HANDLING THE FIRST TIME WE HEAR THIS
4513      * SIGNAL. WE CAN HEAR IT SEVERAL TIMES IF THE PRESIDENTS KEEP FAILING.
4514      *-----------------------------------------------------------------------*/
4515     ccommitFailureNr = TfailureNr;
4516     NodeFailRep * const nodeFail = (NodeFailRep *)&signal->theData[0];
4517 
4518     nodeFail->failNo    = ccommitFailureNr;
4519     nodeFail->noOfNodes = cnoCommitFailedNodes;
4520     nodeFail->masterNodeId = cpresident;
4521     NdbNodeBitmask::clear(nodeFail->theNodes);
4522     for(unsigned i = 0; i < cnoCommitFailedNodes; i++) {
4523       jam();
4524       NdbNodeBitmask::set(nodeFail->theNodes, ccommitFailedNodes[i]);
4525     }//if
4526 
4527     if (ERROR_INSERTED(936))
4528     {
4529       sendSignalWithDelay(NDBCNTR_REF, GSN_NODE_FAILREP, signal,
4530                           200, NodeFailRep::SignalLength);
4531     }
4532     else
4533     {
4534       sendSignal(NDBCNTR_REF, GSN_NODE_FAILREP, signal,
4535                  NodeFailRep::SignalLength, JBB);
4536     }
4537 
4538     guard0 = cnoCommitFailedNodes - 1;
4539     arrGuard(guard0, MAX_NDB_NODES);
4540     /**--------------------------------------------------------------------
4541      * WE MUST PREPARE TO ACCEPT THE CRASHED NODE INTO THE CLUSTER AGAIN BY
4542      * SETTING UP CONNECTIONS AGAIN AFTER THREE SECONDS OF DELAY.
4543      *--------------------------------------------------------------------*/
4544     for (Tj = 0; Tj <= guard0; Tj++) {
4545       jam();
4546       nodePtr.i = ccommitFailedNodes[Tj];
4547       ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
4548       nodePtr.p->phase = ZFAIL_CLOSING;
4549       nodePtr.p->failState = WAITING_FOR_NDB_FAILCONF;
4550       setNodeInfo(nodePtr.i).m_heartbeat_cnt= 0;
4551       setNodeInfo(nodePtr.i).m_version = 0;
4552       c_clusterNodes.clear(nodePtr.i);
4553     }//for
4554     recompute_version_info(NodeInfo::DB);
4555     /*----------------------------------------------------------------------*/
4556     /*       WE INFORM THE API'S WE HAVE CONNECTED ABOUT THE FAILED NODES.  */
4557     /*----------------------------------------------------------------------*/
4558     for (nodePtr.i = 1; nodePtr.i < MAX_NODES; nodePtr.i++) {
4559       jam();
4560       ptrAss(nodePtr, nodeRec);
4561       if (nodePtr.p->phase == ZAPI_ACTIVE) {
4562         jam();
4563 
4564 	NodeFailRep * const nodeFail = (NodeFailRep *)&signal->theData[0];
4565 
4566 	nodeFail->failNo    = ccommitFailureNr;
4567 	nodeFail->noOfNodes = cnoCommitFailedNodes;
4568 	NdbNodeBitmask::clear(nodeFail->theNodes);
4569 	for(unsigned i = 0; i < cnoCommitFailedNodes; i++) {
4570           jam();
4571 	  NdbNodeBitmask::set(nodeFail->theNodes, ccommitFailedNodes[i]);
4572         }//for
4573         sendSignal(nodePtr.p->blockRef, GSN_NODE_FAILREP, signal,
4574 		   NodeFailRep::SignalLength, JBB);
4575       }//if
4576     }//for
4577 
4578     /**
4579      * Remove committed nodes from failed/prepared
4580      */
4581     cnoFailedNodes = clear_nodes(cnoFailedNodes,
4582 				 cfailedNodes,
4583 				 cnoCommitFailedNodes,
4584 				 ccommitFailedNodes);
4585     cnoPrepFailedNodes = clear_nodes(cnoPrepFailedNodes,
4586 				     cprepFailedNodes,
4587 				     cnoCommitFailedNodes,
4588 				     ccommitFailedNodes);
4589     cnoCommitFailedNodes = 0;
4590   }//if
4591   /**-----------------------------------------------------------------------
4592    * WE WILL ALWAYS ACKNOWLEDGE THE COMMIT EVEN WHEN RECEIVING IT MULTIPLE
4593    * TIMES SINCE IT WILL ALWAYS COME FROM A NEW PRESIDENT.
4594    *------------------------------------------------------------------------*/
4595   signal->theData[0] = getOwnNodeId();
4596   sendSignal(Tblockref, GSN_COMMIT_FAILCONF, signal, 1, JBA);
4597   return;
4598 }//Qmgr::execCOMMIT_FAILREQ()
4599 
4600 /*--------------------------------------------------------------------------*/
4601 /* WE HAVE RECEIVED A CONFIRM OF THAT THIS NODE HAVE COMMITTED THE FAILURES.*/
4602 /*--------------------------------------------------------------------------*/
4603 /*******************************/
4604 /* COMMIT_FAILCONF            */
4605 /*******************************/
execCOMMIT_FAILCONF(Signal * signal)4606 void Qmgr::execCOMMIT_FAILCONF(Signal* signal)
4607 {
4608   NodeRecPtr nodePtr;
4609   NodeRecPtr replyNodePtr;
4610   jamEntry();
4611   replyNodePtr.i = signal->theData[0];
4612 
4613   ptrCheckGuard(replyNodePtr, MAX_NDB_NODES, nodeRec);
4614   replyNodePtr.p->sendCommitFailReqStatus = Q_NOT_ACTIVE;
4615   for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
4616     jam();
4617     ptrAss(nodePtr, nodeRec);
4618     if (nodePtr.p->phase == ZRUNNING) {
4619       if (nodePtr.p->sendCommitFailReqStatus == Q_ACTIVE) {
4620         jam();
4621         return;
4622       }//if
4623     }//if
4624   }//for
4625   /*-----------------------------------------------------------------------*/
4626   /*   WE HAVE SUCCESSFULLY COMMITTED A SET OF NODE FAILURES.              */
4627   /*-----------------------------------------------------------------------*/
4628   ctoStatus = Q_NOT_ACTIVE;
4629   if (cnoFailedNodes != 0) {
4630     jam();
4631     /**----------------------------------------------------------------------
4632      *	A FAILURE OCCURRED IN THE MIDDLE OF THE COMMIT PROCESS. WE ARE NOW
4633      *  READY TO START THE FAILED NODE PROCESS FOR THIS NODE.
4634      *----------------------------------------------------------------------*/
4635     cfailureNr = cfailureNr + 1;
4636     for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
4637       jam();
4638       ptrAss(nodePtr, nodeRec);
4639       if (nodePtr.p->phase == ZRUNNING) {
4640         jam();
4641         sendPrepFailReq(signal, nodePtr.i);
4642       }//if
4643     }//for
4644   }//if
4645   return;
4646 }//Qmgr::execCOMMIT_FAILCONF()
4647 
4648 /**--------------------------------------------------------------------------
4649  * IF THE PRESIDENT FAILS IN THE MIDDLE OF THE COMMIT OF A FAILED NODE THEN
4650  * THE NEW PRESIDENT NEEDS TO QUERY THE COMMIT STATUS IN THE RUNNING NODES.
4651  *---------------------------------------------------------------------------*/
4652 /*******************************/
4653 /* PRES_TOCONF                */
4654 /*******************************/
execPRES_TOCONF(Signal * signal)4655 void Qmgr::execPRES_TOCONF(Signal* signal)
4656 {
4657   NodeRecPtr nodePtr;
4658   NodeRecPtr replyNodePtr;
4659   jamEntry();
4660   replyNodePtr.i = signal->theData[0];
4661   UintR TfailureNr = signal->theData[1];
4662   if (ctoFailureNr < TfailureNr) {
4663     jam();
4664     ctoFailureNr = TfailureNr;
4665   }//if
4666   ptrCheckGuard(replyNodePtr, MAX_NDB_NODES, nodeRec);
4667   replyNodePtr.p->sendPresToStatus = Q_NOT_ACTIVE;
4668   for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
4669     jam();
4670     ptrAss(nodePtr, nodeRec);
4671     if (nodePtr.p->sendPresToStatus == Q_ACTIVE) {
4672       jam();
4673       return;
4674     }//if
4675   }//for
4676   /*-------------------------------------------------------------------------*/
4677   /* WE ARE NOW READY TO DISCOVER WHETHER THE FAILURE WAS COMMITTED OR NOT.  */
4678   /*-------------------------------------------------------------------------*/
4679   if (ctoFailureNr > ccommitFailureNr) {
4680     jam();
4681     for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
4682       jam();
4683       ptrAss(nodePtr, nodeRec);
4684       if (nodePtr.p->phase == ZRUNNING) {
4685         jam();
4686         nodePtr.p->sendCommitFailReqStatus = Q_ACTIVE;
4687         signal->theData[0] = cpdistref;
4688         signal->theData[1] = ctoFailureNr;
4689         sendSignal(nodePtr.p->blockRef, GSN_COMMIT_FAILREQ, signal, 2, JBA);
4690       }//if
4691     }//for
4692     return;
4693   }//if
4694   /*-------------------------------------------------------------------------*/
4695   /*       WE ARE NOW READY TO START THE NEW NODE FAILURE PROCESS.           */
4696   /*-------------------------------------------------------------------------*/
4697   ctoStatus = Q_NOT_ACTIVE;
4698   cfailureNr = cfailureNr + 1;
4699   for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
4700     jam();
4701     ptrAss(nodePtr, nodeRec);
4702     if (nodePtr.p->phase == ZRUNNING) {
4703       jam();
4704       sendPrepFailReq(signal, nodePtr.i);
4705     }//if
4706   }//for
4707   return;
4708 }//Qmgr::execPRES_TOCONF()
4709 
4710 /*--------------------------------------------------------------------------*/
4711 // Provide information about the configured NDB nodes in the system.
4712 /*--------------------------------------------------------------------------*/
execREAD_NODESREQ(Signal * signal)4713 void Qmgr::execREAD_NODESREQ(Signal* signal)
4714 {
4715   jamEntry();
4716 
4717   BlockReference TBref = signal->theData[0];
4718 
4719   ReadNodesConf * const readNodes = (ReadNodesConf *)&signal->theData[0];
4720 
4721   NodeRecPtr nodePtr;
4722   nodePtr.i = getOwnNodeId();
4723   ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
4724 
4725   NdbNodeBitmask tmp = c_definedNodes;
4726   tmp.bitANDC(c_clusterNodes);
4727 
4728   readNodes->noOfNodes = c_definedNodes.count();
4729   readNodes->masterNodeId = cpresident;
4730   readNodes->ndynamicId = nodePtr.p->ndynamicId;
4731   c_definedNodes.copyto(NdbNodeBitmask::Size, readNodes->definedNodes);
4732   c_clusterNodes.copyto(NdbNodeBitmask::Size, readNodes->clusterNodes);
4733   tmp.copyto(NdbNodeBitmask::Size, readNodes->inactiveNodes);
4734   NdbNodeBitmask::clear(readNodes->startingNodes);
4735   NdbNodeBitmask::clear(readNodes->startedNodes);
4736 
4737   sendSignal(TBref, GSN_READ_NODESCONF, signal,
4738 	     ReadNodesConf::SignalLength, JBB);
4739 }//Qmgr::execREAD_NODESREQ()
4740 
systemErrorBecauseOtherNodeFailed(Signal * signal,Uint32 line,NodeId failedNodeId)4741 void Qmgr::systemErrorBecauseOtherNodeFailed(Signal* signal, Uint32 line,
4742 					     NodeId failedNodeId) {
4743   jam();
4744 
4745   // Broadcast that this node is failing to other nodes
4746   failReport(signal, getOwnNodeId(), (UintR)ZTRUE, FailRep::ZOWN_FAILURE, getOwnNodeId());
4747 
4748   char buf[100];
4749   BaseString::snprintf(buf, 100,
4750 	   "Node was shutdown during startup because node %d failed",
4751 	   failedNodeId);
4752 
4753   progError(line, NDBD_EXIT_SR_OTHERNODEFAILED, buf);
4754 }
4755 
4756 
systemErrorLab(Signal * signal,Uint32 line,const char * message)4757 void Qmgr::systemErrorLab(Signal* signal, Uint32 line, const char * message)
4758 {
4759   jam();
4760   // Broadcast that this node is failing to other nodes
4761   failReport(signal, getOwnNodeId(), (UintR)ZTRUE, FailRep::ZOWN_FAILURE, getOwnNodeId());
4762 
4763   // If it's known why shutdown occured
4764   // an error message has been passed to this function
4765   progError(line, NDBD_EXIT_NDBREQUIRE, message);
4766 
4767   return;
4768 }//Qmgr::systemErrorLab()
4769 
4770 
4771 /**---------------------------------------------------------------------------
4772  * A FAILURE HAVE BEEN DISCOVERED ON A NODE. WE NEED TO CLEAR A
4773  * NUMBER OF VARIABLES.
4774  *---------------------------------------------------------------------------*/
failReport(Signal * signal,Uint16 aFailedNode,UintR aSendFailRep,FailRep::FailCause aFailCause,Uint16 sourceNode)4775 void Qmgr::failReport(Signal* signal,
4776                       Uint16 aFailedNode,
4777                       UintR aSendFailRep,
4778                       FailRep::FailCause aFailCause,
4779                       Uint16 sourceNode)
4780 {
4781   UintR tfrMinDynamicId;
4782   NodeRecPtr failedNodePtr;
4783   NodeRecPtr nodePtr;
4784   NodeRecPtr presidentNodePtr;
4785 
4786 
4787   ndbassert((! aSendFailRep) || (sourceNode != 0));
4788 
4789   failedNodePtr.i = aFailedNode;
4790   ptrCheckGuard(failedNodePtr, MAX_NDB_NODES, nodeRec);
4791   if (failedNodePtr.p->phase == ZRUNNING) {
4792     jam();
4793 
4794 #ifdef ERROR_INSERT
4795     if (ERROR_INSERTED(938))
4796     {
4797       nodeFailCount++;
4798       ndbout_c("QMGR : execFAIL_REP : %u nodes have failed", nodeFailCount);
4799       /* Count DB nodes */
4800       Uint32 nodeCount = 0;
4801       for (Uint32 i = 1; i < MAX_NDB_NODES; i++)
4802       {
4803         if (getNodeInfo(i).getType() == NODE_TYPE_DB)
4804           nodeCount++;
4805       }
4806 
4807       /* When > 25% of cluster has failed, resume communications */
4808       if (nodeFailCount > (nodeCount / 4))
4809       {
4810         ndbout_c("QMGR : execFAIL_REP > 25%% nodes failed, resuming comms");
4811         Signal save = *signal;
4812         signal->theData[0] = 9991;
4813         sendSignal(CMVMI_REF, GSN_DUMP_STATE_ORD, signal, 1, JBB);
4814         *signal = save;
4815         nodeFailCount = 0;
4816         SET_ERROR_INSERT_VALUE(932);
4817       }
4818     }
4819 #endif
4820 
4821 /* WE ALSO NEED TO ADD HERE SOME CODE THAT GETS OUR NEW NEIGHBOURS. */
4822     if (cpresident == getOwnNodeId()) {
4823       jam();
4824       if (failedNodePtr.p->sendCommitFailReqStatus == Q_ACTIVE) {
4825         jam();
4826         signal->theData[0] = failedNodePtr.i;
4827         sendSignal(QMGR_REF, GSN_COMMIT_FAILCONF, signal, 1, JBA);
4828       }//if
4829       if (failedNodePtr.p->sendPresToStatus == Q_ACTIVE) {
4830         jam();
4831         signal->theData[0] = failedNodePtr.i;
4832         signal->theData[1] = ccommitFailureNr;
4833         sendSignal(QMGR_REF, GSN_PRES_TOCONF, signal, 2, JBA);
4834       }//if
4835     }//if
4836     failedNodePtr.p->phase = ZPREPARE_FAIL;
4837     failedNodePtr.p->sendPrepFailReqStatus = Q_NOT_ACTIVE;
4838     failedNodePtr.p->sendCommitFailReqStatus = Q_NOT_ACTIVE;
4839     failedNodePtr.p->sendPresToStatus = Q_NOT_ACTIVE;
4840     setNodeInfo(failedNodePtr.i).m_heartbeat_cnt= 0;
4841     if (aSendFailRep == ZTRUE) {
4842       jam();
4843       if (failedNodePtr.i != getOwnNodeId()) {
4844         jam();
4845 	FailRep * const failRep = (FailRep *)&signal->theData[0];
4846         failRep->failNodeId = failedNodePtr.i;
4847         failRep->failCause = aFailCause;
4848         failRep->failSourceNodeId = sourceNode;
4849         sendSignal(failedNodePtr.p->blockRef, GSN_FAIL_REP, signal,
4850 		   FailRep::SignalLength, JBA);
4851       }//if
4852       for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
4853         jam();
4854         ptrAss(nodePtr, nodeRec);
4855         if (nodePtr.p->phase == ZRUNNING) {
4856           jam();
4857 	  FailRep * const failRep = (FailRep *)&signal->theData[0];
4858 	  failRep->failNodeId = failedNodePtr.i;
4859 	  failRep->failCause = aFailCause;
4860           failRep->failSourceNodeId = sourceNode;
4861           sendSignal(nodePtr.p->blockRef, GSN_FAIL_REP, signal,
4862 		     FailRep::SignalLength, JBA);
4863         }//if
4864       }//for
4865     }//if
4866     if (failedNodePtr.i == getOwnNodeId()) {
4867       jam();
4868       return;
4869     }//if
4870 
4871     if (unlikely(m_connectivity_check.reportNodeFailure(failedNodePtr.i)))
4872     {
4873       jam();
4874       connectivityCheckCompleted(signal);
4875     }
4876 
4877     failedNodePtr.p->ndynamicId = 0;
4878     findNeighbours(signal, __LINE__);
4879     if (failedNodePtr.i == cpresident) {
4880       jam();
4881       /**--------------------------------------------------------------------
4882        * IF PRESIDENT HAVE FAILED WE MUST CALCULATE THE NEW PRESIDENT BY
4883        * FINDING THE NODE WITH THE MINIMUM DYNAMIC IDENTITY.
4884        *---------------------------------------------------------------------*/
4885       tfrMinDynamicId = (UintR)-1;
4886       for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
4887         jam();
4888         ptrAss(nodePtr, nodeRec);
4889         if (nodePtr.p->phase == ZRUNNING) {
4890           if ((nodePtr.p->ndynamicId & 0xFFFF) < tfrMinDynamicId) {
4891             jam();
4892             tfrMinDynamicId = (nodePtr.p->ndynamicId & 0xFFFF);
4893             cpresident = nodePtr.i;
4894           }//if
4895         }//if
4896       }//for
4897       presidentNodePtr.i = cpresident;
4898       ptrCheckGuard(presidentNodePtr, MAX_NDB_NODES, nodeRec);
4899       cpdistref = presidentNodePtr.p->blockRef;
4900       if (cpresident == getOwnNodeId()) {
4901 	CRASH_INSERTION(920);
4902         cfailureNr = cprepareFailureNr;
4903         ctoFailureNr = 0;
4904         ctoStatus = Q_ACTIVE;
4905 	c_start.reset(); // Don't take over nodes being started
4906         if (cnoCommitFailedNodes > 0) {
4907           jam();
4908 	  /**-----------------------------------------------------------------
4909 	   * IN THIS SITUATION WE ARE UNCERTAIN OF WHETHER THE NODE FAILURE
4910 	   * PROCESS WAS COMMITTED. WE NEED TO QUERY THE OTHER NODES ABOUT
4911 	   * THEIR STATUS.
4912 	   *-----------------------------------------------------------------*/
4913           for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES;
4914 	       nodePtr.i++) {
4915             jam();
4916             ptrAss(nodePtr, nodeRec);
4917             if (nodePtr.p->phase == ZRUNNING) {
4918               jam();
4919               nodePtr.p->sendPresToStatus = Q_ACTIVE;
4920               signal->theData[0] = cpdistref;
4921               signal->theData[1] = cprepareFailureNr;
4922               sendSignal(nodePtr.p->blockRef, GSN_PRES_TOREQ,
4923 			 signal, 1, JBA);
4924             }//if
4925           }//for
4926         } else {
4927           jam();
4928 	  /*-----------------------------------------------------------------*/
4929 	  // In this case it could be that a commit process is still ongoing.
4930 	  // If so we must conclude it as the new master.
4931 	  /*-----------------------------------------------------------------*/
4932           for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES;
4933 	       nodePtr.i++) {
4934             jam();
4935             ptrAss(nodePtr, nodeRec);
4936             if (nodePtr.p->phase == ZRUNNING) {
4937               jam();
4938               nodePtr.p->sendCommitFailReqStatus = Q_ACTIVE;
4939               signal->theData[0] = cpdistref;
4940               signal->theData[1] = ccommitFailureNr;
4941               sendSignal(nodePtr.p->blockRef, GSN_COMMIT_FAILREQ, signal,
4942 			 2, JBA);
4943             }//if
4944           }//for
4945         }//if
4946       }//if
4947     }//if
4948     arrGuard(cnoFailedNodes, MAX_NDB_NODES);
4949     cfailedNodes[cnoFailedNodes] = failedNodePtr.i;
4950     cnoFailedNodes = cnoFailedNodes + 1;
4951   }//if
4952 }//Qmgr::failReport()
4953 
4954 /*---------------------------------------------------------------------------*/
4955 /*       INPUT:  TTDI_DYN_ID                                                 */
4956 /*       OUTPUT: TTDI_NODE_ID                                                */
4957 /*---------------------------------------------------------------------------*/
translateDynamicIdToNodeId(Signal * signal,UintR TdynamicId)4958 Uint16 Qmgr::translateDynamicIdToNodeId(Signal* signal, UintR TdynamicId)
4959 {
4960   NodeRecPtr tdiNodePtr;
4961   Uint16 TtdiNodeId = ZNIL;
4962 
4963   for (tdiNodePtr.i = 1; tdiNodePtr.i < MAX_NDB_NODES; tdiNodePtr.i++) {
4964     jam();
4965     ptrAss(tdiNodePtr, nodeRec);
4966     if (tdiNodePtr.p->ndynamicId == TdynamicId) {
4967       jam();
4968       TtdiNodeId = tdiNodePtr.i;
4969       break;
4970     }//if
4971   }//for
4972   if (TtdiNodeId == ZNIL) {
4973     jam();
4974     systemErrorLab(signal, __LINE__);
4975   }//if
4976   return TtdiNodeId;
4977 }//Qmgr::translateDynamicIdToNodeId()
4978 
4979 /**--------------------------------------------------------------------------
4980  *       WHEN RECEIVING PREPARE FAILURE REQUEST WE WILL IMMEDIATELY CLOSE
4981  *       COMMUNICATION WITH ALL THOSE NODES.
4982  *--------------------------------------------------------------------------*/
sendCloseComReq(Signal * signal,BlockReference TBRef,Uint16 aFailNo)4983 void Qmgr::sendCloseComReq(Signal* signal, BlockReference TBRef, Uint16 aFailNo)
4984 {
4985   CloseComReqConf * const closeCom = (CloseComReqConf *)&signal->theData[0];
4986 
4987   closeCom->xxxBlockRef = TBRef;
4988   closeCom->requestType = CloseComReqConf::RT_NODE_FAILURE;
4989   closeCom->failNo      = aFailNo;
4990   closeCom->noOfNodes   = cnoPrepFailedNodes;
4991 
4992   NodeBitmask::clear(closeCom->theNodes);
4993 
4994   for(int i = 0; i < cnoPrepFailedNodes; i++) {
4995     const NodeId nodeId = cprepFailedNodes[i];
4996     jam();
4997     NodeBitmask::set(closeCom->theNodes, nodeId);
4998   }
4999 
5000   sendSignal(CMVMI_REF, GSN_CLOSE_COMREQ, signal,
5001 	     CloseComReqConf::SignalLength, JBA);
5002 
5003 }//Qmgr::sendCloseComReq()
5004 
5005 void
sendPrepFailReqRef(Signal * signal,Uint32 dstBlockRef,GlobalSignalNumber gsn,Uint32 blockRef,Uint32 failNo,Uint32 noOfNodes,const NodeId theNodes[])5006 Qmgr::sendPrepFailReqRef(Signal* signal,
5007 			 Uint32 dstBlockRef,
5008 			 GlobalSignalNumber gsn,
5009 			 Uint32 blockRef,
5010 			 Uint32 failNo,
5011 			 Uint32 noOfNodes,
5012 			 const NodeId theNodes[]){
5013 
5014   PrepFailReqRef * const prepFail = (PrepFailReqRef *)&signal->theData[0];
5015   prepFail->xxxBlockRef = blockRef;
5016   prepFail->failNo = failNo;
5017   prepFail->noOfNodes = noOfNodes;
5018 
5019   NdbNodeBitmask::clear(prepFail->theNodes);
5020 
5021   for(Uint32 i = 0; i<noOfNodes; i++){
5022     const NodeId nodeId = theNodes[i];
5023     NdbNodeBitmask::set(prepFail->theNodes, nodeId);
5024   }
5025 
5026   sendSignal(dstBlockRef, gsn, signal, PrepFailReqRef::SignalLength, JBA);
5027 }
5028 
5029 
5030 /**--------------------------------------------------------------------------
5031  *       SEND PREPARE FAIL REQUEST FROM PRESIDENT.
5032  *---------------------------------------------------------------------------*/
sendPrepFailReq(Signal * signal,Uint16 aNode)5033 void Qmgr::sendPrepFailReq(Signal* signal, Uint16 aNode)
5034 {
5035   NodeRecPtr sendNodePtr;
5036   sendNodePtr.i = aNode;
5037   ptrCheckGuard(sendNodePtr, MAX_NDB_NODES, nodeRec);
5038   sendNodePtr.p->sendPrepFailReqStatus = Q_ACTIVE;
5039 
5040   sendPrepFailReqRef(signal,
5041 		     sendNodePtr.p->blockRef,
5042 		     GSN_PREP_FAILREQ,
5043 		     reference(),
5044 		     cfailureNr,
5045 		     cnoFailedNodes,
5046 		     cfailedNodes);
5047 }//Qmgr::sendPrepFailReq()
5048 
5049 /**
5050  * Arbitration module.  Rest of QMGR calls us only via
5051  * the "handle" routines.
5052  */
5053 
5054 /**
5055  * Should < 1/2 nodes die unconditionally.  Affects only >= 3-way
5056  * replication.
5057  */
5058 static const bool g_ndb_arbit_one_half_rule = false;
5059 
5060 /**
5061  * Config signals are logically part of CM_INIT.
5062  */
5063 void
execARBIT_CFG(Signal * signal)5064 Qmgr::execARBIT_CFG(Signal* signal)
5065 {
5066   jamEntry();
5067   ArbitSignalData* sd = (ArbitSignalData*)&signal->theData[0];
5068   unsigned rank = sd->code;
5069   ndbrequire(1 <= rank && rank <= 2);
5070   arbitRec.apiMask[0].bitOR(sd->mask);
5071   arbitRec.apiMask[rank].assign(sd->mask);
5072 }
5073 
5074 /**
5075  * ContinueB delay (0=JBA 1=JBB)
5076  */
getArbitDelay()5077 Uint32 Qmgr::getArbitDelay()
5078 {
5079   switch (arbitRec.state) {
5080   case ARBIT_NULL:
5081     jam();
5082     break;
5083   case ARBIT_INIT:
5084     jam();
5085   case ARBIT_FIND:
5086     jam();
5087   case ARBIT_PREP1:
5088     jam();
5089   case ARBIT_PREP2:
5090     jam();
5091   case ARBIT_START:
5092     jam();
5093     return 100;
5094   case ARBIT_RUN:
5095     jam();
5096     return 1000;
5097   case ARBIT_CHOOSE:
5098     jam();
5099     return 10;
5100   case ARBIT_CRASH:             // if we could wait
5101     jam();
5102     return 100;
5103   }
5104   ndbrequire(false);
5105   return (Uint32)-1;
5106 }
5107 
5108 /**
5109  * Time to wait for reply.  There is only 1 config parameter
5110  * (timeout for CHOOSE).  XXX The rest are guesses.
5111  */
getArbitTimeout()5112 Uint32 Qmgr::getArbitTimeout()
5113 {
5114   switch (arbitRec.state) {
5115   case ARBIT_NULL:
5116     jam();
5117     break;
5118   case ARBIT_INIT:              // not used
5119     jam();
5120   case ARBIT_FIND:              // not used
5121     jam();
5122     return 1000;
5123   case ARBIT_PREP1:
5124     jam();
5125   case ARBIT_PREP2:
5126     jam();
5127     return 1000 + cnoOfNodes * Uint32(hb_send_timer.getDelay());
5128   case ARBIT_START:
5129     jam();
5130     return 1000 + arbitRec.timeout;
5131   case ARBIT_RUN:               // not used (yet)
5132     jam();
5133     return 1000;
5134   case ARBIT_CHOOSE:
5135     jam();
5136     return arbitRec.timeout;
5137   case ARBIT_CRASH:             // if we could wait
5138     jam();
5139     return 100;
5140   }
5141   ndbrequire(false);
5142   return (Uint32)-1;
5143 }
5144 
5145 /**
5146  * Start arbitration thread when we are president and database
5147  * is opened for the first time.
5148  *
5149  * XXX  Do arbitration check just like on node failure.  Since
5150  * there is no arbitrator yet, must win on counts alone.
5151  */
5152 void
handleArbitStart(Signal * signal)5153 Qmgr::handleArbitStart(Signal* signal)
5154 {
5155   jam();
5156   ndbrequire(cpresident == getOwnNodeId());
5157   ndbrequire(arbitRec.state == ARBIT_NULL);
5158   arbitRec.state = ARBIT_INIT;
5159   arbitRec.newstate = true;
5160   startArbitThread(signal);
5161 }
5162 
5163 /**
5164  * Handle API node failure.  Called also by non-president nodes.
5165  * If we are president go back to INIT state, otherwise to NULL.
5166  * Start new thread to save time.
5167  */
5168 void
handleArbitApiFail(Signal * signal,Uint16 nodeId)5169 Qmgr::handleArbitApiFail(Signal* signal, Uint16 nodeId)
5170 {
5171   if (arbitRec.node != nodeId) {
5172     jam();
5173     return;
5174   }
5175   reportArbitEvent(signal, NDB_LE_ArbitState);
5176   arbitRec.node = 0;
5177   switch (arbitRec.state) {
5178   case ARBIT_NULL:              // should not happen
5179     jam();
5180   case ARBIT_INIT:
5181     jam();
5182   case ARBIT_FIND:
5183     jam();
5184     break;
5185   case ARBIT_PREP1:		// start from beginning
5186     jam();
5187   case ARBIT_PREP2:
5188     jam();
5189   case ARBIT_START:
5190     jam();
5191   case ARBIT_RUN:
5192     if (cpresident == getOwnNodeId()) {
5193       jam();
5194       arbitRec.state = ARBIT_INIT;
5195       arbitRec.newstate = true;
5196       startArbitThread(signal);
5197     } else {
5198       jam();
5199       arbitRec.state = ARBIT_NULL;
5200     }
5201     break;
5202   case ARBIT_CHOOSE:		// XXX too late
5203     jam();
5204   case ARBIT_CRASH:
5205     jam();
5206     break;
5207   default:
5208     ndbrequire(false);
5209     break;
5210   }
5211 }
5212 
5213 /**
5214  * Handle NDB node add.  Ignore if arbitration thread not yet
5215  * started.  If PREP is not ready, go back to INIT.  Otherwise
5216  * the new node gets arbitrator and ticket once we reach RUN state.
5217  * Start new thread to save time.
5218  */
5219 void
handleArbitNdbAdd(Signal * signal,Uint16 nodeId)5220 Qmgr::handleArbitNdbAdd(Signal* signal, Uint16 nodeId)
5221 {
5222   jam();
5223   ndbrequire(cpresident == getOwnNodeId());
5224   switch (arbitRec.state) {
5225   case ARBIT_NULL:              // before db opened
5226     jam();
5227     break;
5228   case ARBIT_INIT:		// start from beginning
5229     jam();
5230   case ARBIT_FIND:
5231     jam();
5232   case ARBIT_PREP1:
5233     jam();
5234   case ARBIT_PREP2:
5235     jam();
5236     arbitRec.state = ARBIT_INIT;
5237     arbitRec.newstate = true;
5238     startArbitThread(signal);
5239     break;
5240   case ARBIT_START:		// process in RUN state
5241     jam();
5242   case ARBIT_RUN:
5243     jam();
5244     arbitRec.newMask.set(nodeId);
5245     break;
5246   case ARBIT_CHOOSE:            // XXX too late
5247     jam();
5248   case ARBIT_CRASH:
5249     jam();
5250     break;
5251   default:
5252     ndbrequire(false);
5253     break;
5254   }
5255 }
5256 
5257 /**
5258  * Check if current nodeset can survive.  The decision is
5259  * based on node count, node groups, and on external arbitrator
5260  * (if we have one).  Always starts a new thread because
5261  * 1) CHOOSE cannot wait 2) if we are new president we need
5262  * a thread 3) if we are old president it does no harm.
5263  */
5264 void
handleArbitCheck(Signal * signal)5265 Qmgr::handleArbitCheck(Signal* signal)
5266 {
5267   jam();
5268   ndbrequire(cpresident == getOwnNodeId());
5269   NdbNodeBitmask ndbMask;
5270   computeArbitNdbMask(ndbMask);
5271   if (g_ndb_arbit_one_half_rule &&
5272       2 * ndbMask.count() < cnoOfNodes) {
5273     jam();
5274     arbitRec.code = ArbitCode::LoseNodes;
5275   } else {
5276     jam();
5277     CheckNodeGroups* sd = (CheckNodeGroups*)&signal->theData[0];
5278     sd->blockRef = reference();
5279     sd->requestType = CheckNodeGroups::Direct | CheckNodeGroups::ArbitCheck;
5280     sd->mask = ndbMask;
5281     EXECUTE_DIRECT(DBDIH, GSN_CHECKNODEGROUPSREQ, signal,
5282 		   CheckNodeGroups::SignalLength);
5283     jamEntry();
5284     switch (sd->output) {
5285     case CheckNodeGroups::Win:
5286       jam();
5287       arbitRec.code = ArbitCode::WinGroups;
5288       break;
5289     case CheckNodeGroups::Lose:
5290       jam();
5291       arbitRec.code = ArbitCode::LoseGroups;
5292       break;
5293     case CheckNodeGroups::Partitioning:
5294       jam();
5295       arbitRec.code = ArbitCode::Partitioning;
5296       if (g_ndb_arbit_one_half_rule &&
5297           2 * ndbMask.count() > cnoOfNodes) {
5298         jam();
5299         arbitRec.code = ArbitCode::WinNodes;
5300       }
5301       break;
5302     default:
5303       ndbrequire(false);
5304       break;
5305     }
5306   }
5307   switch (arbitRec.code) {
5308   case ArbitCode::LoseNodes:
5309     jam();
5310   case ArbitCode::LoseGroups:
5311     jam();
5312     goto crashme;
5313   case ArbitCode::WinNodes:
5314     jam();
5315   case ArbitCode::WinGroups:
5316     jam();
5317     if (arbitRec.state == ARBIT_RUN) {
5318       jam();
5319       break;
5320     }
5321     arbitRec.state = ARBIT_INIT;
5322     arbitRec.newstate = true;
5323     break;
5324   case ArbitCode::Partitioning:
5325     if (arbitRec.state == ARBIT_RUN) {
5326       jam();
5327       arbitRec.state = ARBIT_CHOOSE;
5328       arbitRec.newstate = true;
5329       break;
5330     }
5331     if (arbitRec.apiMask[0].count() != 0) {
5332       jam();
5333       arbitRec.code = ArbitCode::LoseNorun;
5334     } else {
5335       jam();
5336       arbitRec.code = ArbitCode::LoseNocfg;
5337     }
5338     goto crashme;
5339   default:
5340   crashme:
5341     jam();
5342     arbitRec.state = ARBIT_CRASH;
5343     arbitRec.newstate = true;
5344     break;
5345   }
5346   reportArbitEvent(signal, NDB_LE_ArbitResult);
5347   switch (arbitRec.state) {
5348   default:
5349     jam();
5350     arbitRec.newMask.bitAND(ndbMask);   // delete failed nodes
5351     arbitRec.recvMask.bitAND(ndbMask);
5352     sendCommitFailReq(signal);          // start commit of failed nodes
5353     break;
5354   case ARBIT_CHOOSE:
5355     jam();
5356   case ARBIT_CRASH:
5357     jam();
5358     break;
5359   }
5360   startArbitThread(signal);
5361 }
5362 
5363 /**
5364  * Start a new continueB thread.  The thread id is incremented
5365  * so that any old thread will exit.
5366  */
5367 void
startArbitThread(Signal * signal)5368 Qmgr::startArbitThread(Signal* signal)
5369 {
5370   jam();
5371   ndbrequire(cpresident == getOwnNodeId());
5372   arbitRec.code = ArbitCode::ThreadStart;
5373   reportArbitEvent(signal, NDB_LE_ArbitState);
5374   signal->theData[1] = ++arbitRec.thread;
5375   runArbitThread(signal);
5376 }
5377 
5378 /**
5379  * Handle arbitration thread.  The initial thread normally ends
5380  * up in RUN state.  New thread can be started to save time.
5381  */
5382 void
runArbitThread(Signal * signal)5383 Qmgr::runArbitThread(Signal* signal)
5384 {
5385 #ifdef DEBUG_ARBIT
5386   char buf[256];
5387   NdbNodeBitmask ndbMask;
5388   computeArbitNdbMask(ndbMask);
5389   ndbout << "arbit thread:";
5390   ndbout << " state=" << arbitRec.state;
5391   ndbout << " newstate=" << arbitRec.newstate;
5392   ndbout << " thread=" << arbitRec.thread;
5393   ndbout << " node=" << arbitRec.node;
5394   arbitRec.ticket.getText(buf, sizeof(buf));
5395   ndbout << " ticket=" << buf;
5396   ndbMask.getText(buf);
5397   ndbout << " ndbmask=" << buf;
5398   ndbout << " sendcount=" << arbitRec.sendCount;
5399   ndbout << " recvcount=" << arbitRec.recvCount;
5400   arbitRec.recvMask.getText(buf);
5401   ndbout << " recvmask=" << buf;
5402   ndbout << " code=" << arbitRec.code;
5403   ndbout << endl;
5404 #endif
5405   if (signal->theData[1] != arbitRec.thread) {
5406     jam();
5407     return;	        	// old thread dies
5408   }
5409   switch (arbitRec.state) {
5410   case ARBIT_INIT:		// main thread
5411     jam();
5412     stateArbitInit(signal);
5413     break;
5414   case ARBIT_FIND:
5415     jam();
5416     stateArbitFind(signal);
5417     break;
5418   case ARBIT_PREP1:
5419     jam();
5420   case ARBIT_PREP2:
5421     jam();
5422     stateArbitPrep(signal);
5423     break;
5424   case ARBIT_START:
5425     jam();
5426     stateArbitStart(signal);
5427     break;
5428   case ARBIT_RUN:
5429     jam();
5430     stateArbitRun(signal);
5431     break;
5432   case ARBIT_CHOOSE:		// partitition thread
5433     jam();
5434     stateArbitChoose(signal);
5435     break;
5436   case ARBIT_CRASH:
5437     jam();
5438     stateArbitCrash(signal);
5439     break;
5440   default:
5441     ndbrequire(false);
5442     break;
5443   }
5444   signal->theData[0] = ZARBIT_HANDLING;
5445   signal->theData[1] = arbitRec.thread;
5446   signal->theData[2] = arbitRec.state;		// just for signal log
5447   Uint32 delay = getArbitDelay();
5448   if (delay == 0) {
5449     jam();
5450     sendSignal(QMGR_REF, GSN_CONTINUEB, signal, 3, JBA);
5451   } else if (delay == 1) {
5452     jam();
5453     sendSignal(QMGR_REF, GSN_CONTINUEB, signal, 3, JBB);
5454   } else {
5455     jam();
5456     sendSignalWithDelay(QMGR_REF, GSN_CONTINUEB, signal, delay, 3);
5457   }//if
5458 }
5459 
5460 /**
5461  * Handle INIT state.  Generate next ticket.  Switch to FIND
5462  * state without delay.
5463  */
5464 void
stateArbitInit(Signal * signal)5465 Qmgr::stateArbitInit(Signal* signal)
5466 {
5467   if (arbitRec.newstate) {
5468     jam();
5469     CRASH_INSERTION((Uint32)910 + arbitRec.state);
5470 
5471     arbitRec.node = 0;
5472     arbitRec.ticket.update();
5473     arbitRec.newMask.clear();
5474     arbitRec.code = 0;
5475     arbitRec.newstate = false;
5476   }
5477   arbitRec.state = ARBIT_FIND;
5478   arbitRec.newstate = true;
5479   stateArbitFind(signal);
5480 }
5481 
5482 /**
5483  * Handle FIND state.  Find first arbitrator which is alive
5484  * and invoke PREP state without delay.  If none are found,
5485  * loop in FIND state.  This is forever if no arbitrators
5486  * are configured (not the normal case).
5487  *
5488  * XXX  Add adaptive behaviour to avoid getting stuck on API
5489  * nodes which are alive but do not respond or die too soon.
5490  */
5491 void
stateArbitFind(Signal * signal)5492 Qmgr::stateArbitFind(Signal* signal)
5493 {
5494   if (arbitRec.newstate) {
5495     jam();
5496     CRASH_INSERTION((Uint32)910 + arbitRec.state);
5497 
5498     arbitRec.code = 0;
5499     arbitRec.newstate = false;
5500   }
5501 
5502   switch (arbitRec.method){
5503   case ArbitRec::METHOD_EXTERNAL:
5504   {
5505     // Don't select any API node as arbitrator
5506     arbitRec.node = 0;
5507     arbitRec.state = ARBIT_PREP1;
5508     arbitRec.newstate = true;
5509     stateArbitPrep(signal);
5510     return;
5511     break;
5512   }
5513 
5514   case ArbitRec::METHOD_DEFAULT:
5515   {
5516     NodeRecPtr aPtr;
5517     // Select the best available API node as arbitrator
5518     for (unsigned rank = 1; rank <= 2; rank++) {
5519       jam();
5520       aPtr.i = 0;
5521       const unsigned stop = NodeBitmask::NotFound;
5522       while ((aPtr.i = arbitRec.apiMask[rank].find(aPtr.i + 1)) != stop) {
5523         jam();
5524         ptrAss(aPtr, nodeRec);
5525         if (aPtr.p->phase != ZAPI_ACTIVE)
5526           continue;
5527         arbitRec.node = aPtr.i;
5528         arbitRec.state = ARBIT_PREP1;
5529         arbitRec.newstate = true;
5530         stateArbitPrep(signal);
5531         return;
5532       }
5533     }
5534     return;
5535     break;
5536   }
5537 
5538   default:
5539     ndbrequire(false);
5540   }
5541 }
5542 
5543 /**
5544  * Handle PREP states.  First round nulls any existing tickets.
5545  * Second round sends new ticket.  When all confirms have been
5546  * received invoke START state immediately.
5547  */
5548 void
stateArbitPrep(Signal * signal)5549 Qmgr::stateArbitPrep(Signal* signal)
5550 {
5551   if (arbitRec.newstate) {
5552     jam();
5553     CRASH_INSERTION((Uint32)910 + arbitRec.state);
5554 
5555     arbitRec.sendCount = 0;                     // send all at once
5556     computeArbitNdbMask(arbitRec.recvMask);     // to send and recv
5557     arbitRec.recvMask.clear(getOwnNodeId());
5558     arbitRec.code = 0;
5559     arbitRec.newstate = false;
5560   }
5561   if (! arbitRec.sendCount) {
5562     jam();
5563     NodeRecPtr aPtr;
5564     aPtr.i = 0;
5565     const unsigned stop = NodeBitmask::NotFound;
5566     while ((aPtr.i = arbitRec.recvMask.find(aPtr.i + 1)) != stop) {
5567       jam();
5568       ptrAss(aPtr, nodeRec);
5569       ArbitSignalData* sd = (ArbitSignalData*)&signal->theData[0];
5570       sd->sender = getOwnNodeId();
5571       if (arbitRec.state == ARBIT_PREP1) {
5572         jam();
5573         sd->code = ArbitCode::PrepPart1;
5574       } else {
5575         jam();
5576         sd->code = ArbitCode::PrepPart2;
5577       }
5578       sd->node = arbitRec.node;
5579       sd->ticket = arbitRec.ticket;
5580       sd->mask.clear();
5581       sendSignal(aPtr.p->blockRef, GSN_ARBIT_PREPREQ, signal,
5582         ArbitSignalData::SignalLength, JBB);
5583     }
5584     arbitRec.setTimestamp();			// send time
5585     arbitRec.sendCount = 1;
5586     return;
5587   }
5588   if (arbitRec.code != 0) {			// error
5589     jam();
5590     arbitRec.state = ARBIT_INIT;
5591     arbitRec.newstate = true;
5592     return;
5593   }
5594   if (arbitRec.recvMask.count() == 0) {		// recv all
5595     if (arbitRec.state == ARBIT_PREP1) {
5596       jam();
5597       arbitRec.state = ARBIT_PREP2;
5598       arbitRec.newstate = true;
5599     } else {
5600       jam();
5601       arbitRec.state = ARBIT_START;
5602       arbitRec.newstate = true;
5603       stateArbitStart(signal);
5604     }
5605     return;
5606   }
5607   if (arbitRec.getTimediff() > getArbitTimeout()) {
5608     jam();
5609     arbitRec.state = ARBIT_INIT;
5610     arbitRec.newstate = true;
5611     return;
5612   }
5613 }
5614 
5615 void
execARBIT_PREPREQ(Signal * signal)5616 Qmgr::execARBIT_PREPREQ(Signal* signal)
5617 {
5618   jamEntry();
5619   ArbitSignalData* sd = (ArbitSignalData*)&signal->theData[0];
5620   if (getOwnNodeId() == cpresident) {
5621     jam();
5622     return;		// wrong state
5623   }
5624   if (sd->sender != cpresident) {
5625     jam();
5626     return;		// wrong state
5627   }
5628   NodeRecPtr aPtr;
5629   aPtr.i = sd->sender;
5630   ptrAss(aPtr, nodeRec);
5631   switch (sd->code) {
5632   case ArbitCode::PrepPart1:    // zero them just to be sure
5633     jam();
5634     arbitRec.node = 0;
5635     arbitRec.ticket.clear();
5636     break;
5637   case ArbitCode::PrepPart2:    // non-president enters RUN state
5638     jam();
5639   case ArbitCode::PrepAtrun:
5640     jam();
5641     arbitRec.node = sd->node;
5642     arbitRec.ticket = sd->ticket;
5643     arbitRec.code = sd->code;
5644     reportArbitEvent(signal, NDB_LE_ArbitState);
5645     arbitRec.state = ARBIT_RUN;
5646     arbitRec.newstate = true;
5647     if (sd->code == ArbitCode::PrepAtrun) {
5648       jam();
5649       return;
5650     }
5651     break;
5652   default:
5653     jam();
5654     ndbrequire(false);
5655   }
5656   sd->sender = getOwnNodeId();
5657   sd->code = 0;
5658   sendSignal(aPtr.p->blockRef, GSN_ARBIT_PREPCONF, signal,
5659     ArbitSignalData::SignalLength, JBB);
5660 }
5661 
5662 void
execARBIT_PREPCONF(Signal * signal)5663 Qmgr::execARBIT_PREPCONF(Signal* signal)
5664 {
5665   jamEntry();
5666   ArbitSignalData* sd = (ArbitSignalData*)&signal->theData[0];
5667   if (! arbitRec.match(sd)) {
5668     jam();
5669     return;		// stray signal
5670   }
5671   if (arbitRec.state != ARBIT_PREP1 && arbitRec.state != ARBIT_PREP2) {
5672     jam();
5673     return;		// wrong state
5674   }
5675   if (! arbitRec.recvMask.get(sd->sender)) {
5676     jam();
5677     return;		// wrong state
5678   }
5679   arbitRec.recvMask.clear(sd->sender);
5680   if (arbitRec.code == 0 && sd->code != 0) {
5681     jam();
5682     arbitRec.code = sd->code;
5683   }//if
5684 }
5685 
5686 void
execARBIT_PREPREF(Signal * signal)5687 Qmgr::execARBIT_PREPREF(Signal* signal)
5688 {
5689   jamEntry();
5690   ArbitSignalData* sd = (ArbitSignalData*)&signal->theData[0];
5691   if (sd->code == 0) {
5692     jam();
5693     sd->code = ArbitCode::ErrUnknown;
5694   }
5695   execARBIT_PREPCONF(signal);
5696 }
5697 
5698 /**
5699  * Handle START state.  On first call send start request to
5700  * the chosen arbitrator.  Then wait for a CONF.
5701  */
5702 void
stateArbitStart(Signal * signal)5703 Qmgr::stateArbitStart(Signal* signal)
5704 {
5705   if (arbitRec.newstate) {
5706     jam();
5707     CRASH_INSERTION((Uint32)910 + arbitRec.state);
5708 
5709     arbitRec.sendCount = 0;
5710     arbitRec.recvCount = 0;
5711     arbitRec.code = 0;
5712     arbitRec.newstate = false;
5713   }
5714 
5715   switch (arbitRec.method){
5716   case ArbitRec::METHOD_EXTERNAL:
5717     jam();
5718     ndbrequire(arbitRec.node == 0); // No arbitrator selected
5719 
5720     // Don't start arbitrator in API node => ARBIT_RUN
5721     arbitRec.state = ARBIT_RUN;
5722     arbitRec.newstate = true;
5723     return;
5724     break;
5725 
5726   case ArbitRec::METHOD_DEFAULT:
5727     if (! arbitRec.sendCount) {
5728       jam();
5729       BlockReference blockRef = calcApiClusterMgrBlockRef(arbitRec.node);
5730       ArbitSignalData* sd = (ArbitSignalData*)&signal->theData[0];
5731       sd->sender = getOwnNodeId();
5732       sd->code = 0;
5733       sd->node = arbitRec.node;
5734       sd->ticket = arbitRec.ticket;
5735       sd->mask.clear();
5736       sendSignal(blockRef, GSN_ARBIT_STARTREQ, signal,
5737                  ArbitSignalData::SignalLength, JBB);
5738       arbitRec.sendCount = 1;
5739       arbitRec.setTimestamp();		// send time
5740       return;
5741     }
5742     if (arbitRec.recvCount) {
5743       jam();
5744       reportArbitEvent(signal, NDB_LE_ArbitState);
5745       if (arbitRec.code == ArbitCode::ApiStart) {
5746         jam();
5747         arbitRec.state = ARBIT_RUN;
5748         arbitRec.newstate = true;
5749         return;
5750       }
5751       arbitRec.state = ARBIT_INIT;
5752       arbitRec.newstate = true;
5753       return;
5754     }
5755     if (arbitRec.getTimediff() > getArbitTimeout()) {
5756       jam();
5757       arbitRec.code = ArbitCode::ErrTimeout;
5758       reportArbitEvent(signal, NDB_LE_ArbitState);
5759       arbitRec.state = ARBIT_INIT;
5760       arbitRec.newstate = true;
5761       return;
5762     }
5763     break;
5764 
5765   default:
5766     ndbrequire(false);
5767     break;
5768   }
5769 }
5770 
5771 void
execARBIT_STARTCONF(Signal * signal)5772 Qmgr::execARBIT_STARTCONF(Signal* signal)
5773 {
5774   jamEntry();
5775   ArbitSignalData* sd = (ArbitSignalData*)&signal->theData[0];
5776   if (! arbitRec.match(sd)) {
5777     jam();
5778     return;		// stray signal
5779   }
5780   if (arbitRec.state != ARBIT_START) {
5781     jam();
5782     return;		// wrong state
5783   }
5784   if (arbitRec.recvCount) {
5785     jam();
5786     return;		// wrong state
5787   }
5788   arbitRec.code = sd->code;
5789   arbitRec.recvCount = 1;
5790 }
5791 
5792 void
execARBIT_STARTREF(Signal * signal)5793 Qmgr::execARBIT_STARTREF(Signal* signal)
5794 {
5795   jamEntry();
5796   ArbitSignalData* sd = (ArbitSignalData*)&signal->theData[0];
5797   if (sd->code == 0) {
5798     jam();
5799     sd->code = ArbitCode::ErrUnknown;
5800   }
5801   execARBIT_STARTCONF(signal);
5802 }
5803 
5804 /**
5805  * Handle RUN state.  Send ticket to any new nodes which have
5806  * appeared after PREP state.  We don't care about a CONF.
5807  */
5808 void
stateArbitRun(Signal * signal)5809 Qmgr::stateArbitRun(Signal* signal)
5810 {
5811   if (arbitRec.newstate) {
5812     jam();
5813     CRASH_INSERTION((Uint32)910 + arbitRec.state);
5814 
5815     arbitRec.code = 0;
5816     arbitRec.newstate = false;
5817   }
5818   NodeRecPtr aPtr;
5819   aPtr.i = 0;
5820   const unsigned stop = NodeBitmask::NotFound;
5821   while ((aPtr.i = arbitRec.newMask.find(aPtr.i + 1)) != stop) {
5822     jam();
5823     arbitRec.newMask.clear(aPtr.i);
5824     ptrAss(aPtr, nodeRec);
5825     ArbitSignalData* sd = (ArbitSignalData*)&signal->theData[0];
5826     sd->sender = getOwnNodeId();
5827     sd->code = ArbitCode::PrepAtrun;
5828     sd->node = arbitRec.node;
5829     sd->ticket = arbitRec.ticket;
5830     sd->mask.clear();
5831     sendSignal(aPtr.p->blockRef, GSN_ARBIT_PREPREQ, signal,
5832       ArbitSignalData::SignalLength, JBB);
5833   }
5834 }
5835 
5836 /**
5837  * Handle CHOOSE state.  Entered only from RUN state when
5838  * there is a possible network partitioning.  Send CHOOSE to
5839  * the arbitrator.  On win switch to INIT state because a new
5840  * ticket must be created.
5841  */
5842 void
stateArbitChoose(Signal * signal)5843 Qmgr::stateArbitChoose(Signal* signal)
5844 {
5845   if (arbitRec.newstate) {
5846     jam();
5847     CRASH_INSERTION((Uint32)910 + arbitRec.state);
5848 
5849     arbitRec.sendCount = 0;
5850     arbitRec.recvCount = 0;
5851     arbitRec.code = 0;
5852     arbitRec.newstate = false;
5853   }
5854 
5855   switch(arbitRec.method){
5856   case ArbitRec::METHOD_EXTERNAL:
5857   {
5858     if (! arbitRec.sendCount) {
5859       jam();
5860       ndbrequire(arbitRec.node == 0); // No arbitrator selected
5861       // Don't send CHOOSE to anyone, just wait for timeout to expire
5862       arbitRec.sendCount = 1;
5863       arbitRec.setTimestamp();
5864       return;
5865     }
5866 
5867     if (arbitRec.getTimediff() > getArbitTimeout()) {
5868       jam();
5869       // Arbitration timeout has expired
5870       ndbrequire(arbitRec.node == 0); // No arbitrator selected
5871 
5872       NodeBitmask nodes;
5873       computeArbitNdbMask(nodes);
5874       arbitRec.code = ArbitCode::WinWaitExternal;
5875       reportArbitEvent(signal, NDB_LE_ArbitResult, nodes);
5876 
5877       sendCommitFailReq(signal);        // start commit of failed nodes
5878       arbitRec.state = ARBIT_INIT;
5879       arbitRec.newstate = true;
5880       return;
5881     }
5882     break;
5883   }
5884 
5885   case ArbitRec::METHOD_DEFAULT:
5886   {
5887     if (! arbitRec.sendCount) {
5888       jam();
5889       const BlockReference blockRef = calcApiClusterMgrBlockRef(arbitRec.node);
5890       ArbitSignalData* sd = (ArbitSignalData*)&signal->theData[0];
5891       sd->sender = getOwnNodeId();
5892       sd->code = 0;
5893       sd->node = arbitRec.node;
5894       sd->ticket = arbitRec.ticket;
5895       computeArbitNdbMask(sd->mask);
5896       sendSignal(blockRef, GSN_ARBIT_CHOOSEREQ, signal,
5897                  ArbitSignalData::SignalLength, JBA);
5898       arbitRec.sendCount = 1;
5899       arbitRec.setTimestamp();		// send time
5900       return;
5901     }
5902 
5903     if (arbitRec.recvCount) {
5904       jam();
5905       reportArbitEvent(signal, NDB_LE_ArbitResult);
5906       if (arbitRec.code == ArbitCode::WinChoose) {
5907         jam();
5908         sendCommitFailReq(signal);        // start commit of failed nodes
5909         arbitRec.state = ARBIT_INIT;
5910         arbitRec.newstate = true;
5911         return;
5912       }
5913       arbitRec.state = ARBIT_CRASH;
5914       arbitRec.newstate = true;
5915       stateArbitCrash(signal);		// do it at once
5916       return;
5917     }
5918 
5919     if (arbitRec.getTimediff() > getArbitTimeout()) {
5920       jam();
5921       // Arbitration timeout has expired
5922       arbitRec.code = ArbitCode::ErrTimeout;
5923       reportArbitEvent(signal, NDB_LE_ArbitState);
5924       arbitRec.state = ARBIT_CRASH;
5925       arbitRec.newstate = true;
5926       stateArbitCrash(signal);		// do it at once
5927       return;
5928     }
5929     break;
5930   }
5931 
5932   default:
5933     ndbrequire(false);
5934     break;
5935   }
5936 }
5937 
5938 void
execARBIT_CHOOSECONF(Signal * signal)5939 Qmgr::execARBIT_CHOOSECONF(Signal* signal)
5940 {
5941   jamEntry();
5942   ArbitSignalData* sd = (ArbitSignalData*)&signal->theData[0];
5943   if (!arbitRec.match(sd)) {
5944     jam();
5945     return;		// stray signal
5946   }
5947   if (arbitRec.state != ARBIT_CHOOSE) {
5948     jam();
5949     return;		// wrong state
5950   }
5951   if (arbitRec.recvCount) {
5952     jam();
5953     return;		// wrong state
5954   }
5955   arbitRec.recvCount = 1;
5956   arbitRec.code = sd->code;
5957 }
5958 
5959 void
execARBIT_CHOOSEREF(Signal * signal)5960 Qmgr::execARBIT_CHOOSEREF(Signal* signal)
5961 {
5962   jamEntry();
5963   ArbitSignalData* sd = (ArbitSignalData*)&signal->theData[0];
5964   if (sd->code == 0) {
5965     jam();
5966     sd->code = ArbitCode::ErrUnknown;
5967   }
5968   execARBIT_CHOOSECONF(signal);
5969 }
5970 
5971 /**
5972  * Handle CRASH state.  We must crash immediately.
5973  * XXX tell other nodes in our party to crash too.
5974  */
5975 void
stateArbitCrash(Signal * signal)5976 Qmgr::stateArbitCrash(Signal* signal)
5977 {
5978   jam();
5979   if (arbitRec.newstate) {
5980     jam();
5981     CRASH_INSERTION((Uint32)910 + arbitRec.state);
5982     arbitRec.setTimestamp();
5983     arbitRec.code = 0;
5984     arbitRec.newstate = false;
5985   }
5986 #ifdef ndb_arbit_crash_wait_for_event_report_to_get_out
5987   if (! (arbitRec.getTimediff() > getArbitTimeout()))
5988     return;
5989 #endif
5990   CRASH_INSERTION(932);
5991   CRASH_INSERTION(938);
5992   progError(__LINE__, NDBD_EXIT_ARBIT_SHUTDOWN,
5993             "Arbitrator decided to shutdown this node");
5994 }
5995 
5996 /**
5997  * Arbitrator may inform us that it will exit.  This lets us
5998  * start looking sooner for a new one.  Handle it like API node
5999  * failure.
6000  */
6001 void
execARBIT_STOPREP(Signal * signal)6002 Qmgr::execARBIT_STOPREP(Signal* signal)
6003 {
6004   jamEntry();
6005   ArbitSignalData* sd = (ArbitSignalData*)&signal->theData[0];
6006   if (! arbitRec.match(sd)) {
6007     jam();
6008     return;		// stray signal
6009   }
6010   arbitRec.code = ArbitCode::ApiExit;
6011   handleArbitApiFail(signal, arbitRec.node);
6012 }
6013 
6014 void
computeArbitNdbMask(NodeBitmaskPOD & aMask)6015 Qmgr::computeArbitNdbMask(NodeBitmaskPOD& aMask)
6016 {
6017   NodeRecPtr aPtr;
6018   aMask.clear();
6019   for (aPtr.i = 1; aPtr.i < MAX_NDB_NODES; aPtr.i++) {
6020     jam();
6021     ptrAss(aPtr, nodeRec);
6022     if (getNodeInfo(aPtr.i).getType() == NodeInfo::DB && aPtr.p->phase == ZRUNNING){
6023       jam();
6024       aMask.set(aPtr.i);
6025     }
6026   }
6027 }
6028 
6029 void
computeArbitNdbMask(NdbNodeBitmaskPOD & aMask)6030 Qmgr::computeArbitNdbMask(NdbNodeBitmaskPOD& aMask)
6031 {
6032   NodeRecPtr aPtr;
6033   aMask.clear();
6034   for (aPtr.i = 1; aPtr.i < MAX_NDB_NODES; aPtr.i++) {
6035     jam();
6036     ptrAss(aPtr, nodeRec);
6037     if (getNodeInfo(aPtr.i).getType() == NodeInfo::DB && aPtr.p->phase == ZRUNNING){
6038       jam();
6039       aMask.set(aPtr.i);
6040     }
6041   }
6042 }
6043 
6044 /**
6045  * Report arbitration event.  We use arbitration signal format
6046  * where sender (word 0) is event type.
6047  */
6048 void
reportArbitEvent(Signal * signal,Ndb_logevent_type type,const NodeBitmask mask)6049 Qmgr::reportArbitEvent(Signal* signal, Ndb_logevent_type type,
6050                        const NodeBitmask mask)
6051 {
6052   ArbitSignalData* sd = (ArbitSignalData*)&signal->theData[0];
6053   sd->sender = type;
6054   sd->code = arbitRec.code | (arbitRec.state << 16);
6055   sd->node = arbitRec.node;
6056   sd->ticket = arbitRec.ticket;
6057   sd->mask = mask;
6058   sendSignal(CMVMI_REF, GSN_EVENT_REP, signal,
6059     ArbitSignalData::SignalLength, JBB);
6060 }
6061 
6062 // end of arbitration module
6063 
6064 void
execDUMP_STATE_ORD(Signal * signal)6065 Qmgr::execDUMP_STATE_ORD(Signal* signal)
6066 {
6067   switch (signal->theData[0]) {
6068   case 1:
6069     infoEvent("creadyDistCom = %d, cpresident = %d\n",
6070 	      creadyDistCom, cpresident);
6071     infoEvent("cpresidentAlive = %d, cpresidentCand = %d (gci: %d)\n",
6072               cpresidentAlive,
6073 	      c_start.m_president_candidate,
6074 	      c_start.m_president_candidate_gci);
6075     infoEvent("ctoStatus = %d\n", ctoStatus);
6076     for(Uint32 i = 1; i<MAX_NDB_NODES; i++){
6077       NodeRecPtr nodePtr;
6078       nodePtr.i = i;
6079       ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
6080       char buf[100];
6081       switch(nodePtr.p->phase){
6082       case ZINIT:
6083         sprintf(buf, "Node %d: ZINIT(%d)", i, nodePtr.p->phase);
6084         break;
6085       case ZSTARTING:
6086         sprintf(buf, "Node %d: ZSTARTING(%d)", i, nodePtr.p->phase);
6087         break;
6088       case ZRUNNING:
6089         sprintf(buf, "Node %d: ZRUNNING(%d)", i, nodePtr.p->phase);
6090         break;
6091       case ZPREPARE_FAIL:
6092         sprintf(buf, "Node %d: ZPREPARE_FAIL(%d)", i, nodePtr.p->phase);
6093         break;
6094       case ZFAIL_CLOSING:
6095         sprintf(buf, "Node %d: ZFAIL_CLOSING(%d)", i, nodePtr.p->phase);
6096         break;
6097       case ZAPI_INACTIVE:
6098         sprintf(buf, "Node %d: ZAPI_INACTIVE(%d)", i, nodePtr.p->phase);
6099         break;
6100       case ZAPI_ACTIVE:
6101         sprintf(buf, "Node %d: ZAPI_ACTIVE(%d)", i, nodePtr.p->phase);
6102         break;
6103       default:
6104         sprintf(buf, "Node %d: <UNKNOWN>(%d)", i, nodePtr.p->phase);
6105         break;
6106       }
6107       infoEvent("%s", buf);
6108     }
6109   }
6110 
6111 #ifdef ERROR_INSERT
6112   if (signal->theData[0] == 935 && signal->getLength() == 2)
6113   {
6114     SET_ERROR_INSERT_VALUE(935);
6115     c_error_insert_extra = signal->theData[1];
6116   }
6117 #endif
6118 
6119   if (signal->theData[0] == 900 && signal->getLength() == 2)
6120   {
6121     ndbout_c("disconnecting %u", signal->theData[1]);
6122     api_failed(signal, signal->theData[1]);
6123   }
6124 
6125   if (signal->theData[0] == 908)
6126   {
6127     int tag = signal->getLength() < 2 ? -1 : signal->theData[1];
6128     char buf[8192];
6129     // for easy grepping in *out.log ...
6130     strcpy(buf, "HB:");
6131     if (tag >= 0)
6132       sprintf(buf+strlen(buf), "%d:", tag);
6133     sprintf(buf+strlen(buf), " pres:%u", cpresident);
6134     sprintf(buf+strlen(buf), " own:%u", getOwnNodeId());
6135     NodeRecPtr myNodePtr;
6136     myNodePtr.i = getOwnNodeId();
6137     ptrCheckGuard(myNodePtr, MAX_NDB_NODES, nodeRec);
6138     sprintf(buf+strlen(buf), " dyn:%u-%u", myNodePtr.p->ndynamicId & 0xFFFF, myNodePtr.p->ndynamicId >> 16);
6139     sprintf(buf+strlen(buf), " mxdyn:%u", c_maxDynamicId);
6140     sprintf(buf+strlen(buf), " hb:%u->%u->%u", cneighbourl, getOwnNodeId(), cneighbourh);
6141     sprintf(buf+strlen(buf), " node:dyn-hi,cfg:");
6142     NodeRecPtr nodePtr;
6143     for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++)
6144     {
6145       ptrAss(nodePtr, nodeRec);
6146       Uint32 type = getNodeInfo(nodePtr.i).m_type;
6147       if (type == NodeInfo::DB)
6148       {
6149         sprintf(buf+strlen(buf), " %u:%u-%u,%u", nodePtr.i, nodePtr.p->ndynamicId & 0xFFFF, nodePtr.p->ndynamicId >> 16, nodePtr.p->hbOrder);
6150       }
6151     }
6152     ndbout << buf << endl;
6153   }
6154 
6155 #ifdef ERROR_INSERT
6156   Uint32 dumpCode = signal->theData[0];
6157   if ((dumpCode == 9992) ||
6158       (dumpCode == 9993))
6159   {
6160     if (signal->getLength() == 2)
6161     {
6162       Uint32 nodeId = signal->theData[1];
6163       Uint32& newNodeId = signal->theData[1];
6164       Uint32 length = 2;
6165       assert(257 > MAX_NODES);
6166       if (nodeId > MAX_NODES)
6167       {
6168         const char* type = "None";
6169         switch (nodeId)
6170         {
6171         case 257:
6172         {
6173           /* Left (lower) neighbour */
6174           newNodeId = cneighbourl;
6175           type = "Left neighbour";
6176           break;
6177         }
6178         case 258:
6179         {
6180           /* Right (higher) neighbour */
6181           newNodeId = cneighbourh;
6182           type = "Right neighbour";
6183           break;
6184         }
6185         case 259:
6186         {
6187           /* President */
6188           newNodeId = cpresident;
6189           type = "President";
6190           break;
6191         }
6192         }
6193         ndbout_c("QMGR : Mapping request on node id %u to node id %u (%s)",
6194                  nodeId, newNodeId, type);
6195         if (newNodeId != nodeId)
6196         {
6197           sendSignal(CMVMI_REF, GSN_DUMP_STATE_ORD, signal, length, JBB);
6198         }
6199       }
6200     }
6201   }
6202 
6203   if (dumpCode == 9994)
6204   {
6205     ndbout_c("setCCDelay(%u)", signal->theData[1]);
6206     setCCDelay(signal->theData[1]);
6207     m_connectivity_check.m_enabled = true;
6208   }
6209 #endif
6210 }//Qmgr::execDUMP_STATE_ORD()
6211 
6212 
6213 void
execAPI_BROADCAST_REP(Signal * signal)6214 Qmgr::execAPI_BROADCAST_REP(Signal* signal)
6215 {
6216   jamEntry();
6217   ApiBroadcastRep api= *(const ApiBroadcastRep*)signal->getDataPtr();
6218 
6219   SectionHandle handle(this, signal);
6220   Uint32 len = signal->getLength() - ApiBroadcastRep::SignalLength;
6221   memmove(signal->theData, signal->theData+ApiBroadcastRep::SignalLength,
6222 	  4*len);
6223 
6224   NodeBitmask mask;
6225   NodeRecPtr nodePtr;
6226   for (nodePtr.i = 1; nodePtr.i < MAX_NODES; nodePtr.i++)
6227   {
6228     jam();
6229     ptrAss(nodePtr, nodeRec);
6230     if (nodePtr.p->phase == ZAPI_ACTIVE &&
6231 	getNodeInfo(nodePtr.i).m_version >= api.minVersion)
6232     {
6233       jam();
6234       mask.set(nodePtr.i);
6235     }
6236   }
6237 
6238   if (mask.isclear())
6239   {
6240     jam();
6241     releaseSections(handle);
6242     return;
6243   }
6244 
6245   NodeReceiverGroup rg(API_CLUSTERMGR, mask);
6246   sendSignal(rg, api.gsn, signal, len, JBB,
6247 	     &handle);
6248 }
6249 
6250 void
execNODE_FAILREP(Signal * signal)6251 Qmgr::execNODE_FAILREP(Signal * signal)
6252 {
6253   jamEntry();
6254   // make sure any distributed signals get acknowledged
6255   // destructive of the signal
6256   c_counterMgr.execNODE_FAILREP(signal);
6257 }
6258 
6259 void
execALLOC_NODEID_REQ(Signal * signal)6260 Qmgr::execALLOC_NODEID_REQ(Signal * signal)
6261 {
6262   jamEntry();
6263   AllocNodeIdReq req = *(AllocNodeIdReq*)signal->getDataPtr();
6264   Uint32 error = 0;
6265 
6266   NodeRecPtr nodePtr;
6267   nodePtr.i = req.nodeId;
6268   ptrAss(nodePtr, nodeRec);
6269 
6270   if (refToBlock(req.senderRef) != QMGR) // request from management server
6271   {
6272     /* master */
6273 
6274     if (getOwnNodeId() != cpresident)
6275     {
6276       jam();
6277       error = AllocNodeIdRef::NotMaster;
6278     }
6279     else if (!opAllocNodeIdReq.m_tracker.done())
6280     {
6281       jam();
6282       error = AllocNodeIdRef::Busy;
6283     }
6284     else if (c_connectedNodes.get(req.nodeId))
6285     {
6286       jam();
6287       error = AllocNodeIdRef::NodeConnected;
6288     }
6289     else if (nodePtr.p->m_secret != 0)
6290     {
6291       jam();
6292       error = AllocNodeIdRef::NodeReserved;
6293     }
6294 
6295     if (error)
6296     {
6297       jam();
6298       AllocNodeIdRef * ref = (AllocNodeIdRef*)signal->getDataPtrSend();
6299       ref->senderRef = reference();
6300       ref->errorCode = error;
6301       ref->masterRef = numberToRef(QMGR, cpresident);
6302       ref->senderData = req.senderData;
6303       ref->nodeId = req.nodeId;
6304       sendSignal(req.senderRef, GSN_ALLOC_NODEID_REF, signal,
6305                  AllocNodeIdRef::SignalLength, JBB);
6306       return;
6307     }
6308 
6309     if (ERROR_INSERTED(934) && req.nodeId != getOwnNodeId())
6310     {
6311       CRASH_INSERTION(934);
6312     }
6313 
6314     /**
6315      * generate secret
6316      */
6317     Uint64 now = NdbTick_CurrentMillisecond();
6318     Uint32 secret_hi = Uint32(now >> 24);
6319     Uint32 secret_lo = Uint32(now << 8) + getOwnNodeId();
6320     req.secret_hi = secret_hi;
6321     req.secret_lo = secret_lo;
6322 
6323     if (req.timeout > 60000)
6324       req.timeout = 60000;
6325 
6326     nodePtr.p->m_secret = (Uint64(secret_hi) << 32) + secret_lo;
6327     nodePtr.p->m_alloc_timeout = now + req.timeout;
6328 
6329     opAllocNodeIdReq.m_req = req;
6330     opAllocNodeIdReq.m_error = 0;
6331     opAllocNodeIdReq.m_connectCount =
6332       getNodeInfo(refToNode(req.senderRef)).m_connectCount;
6333 
6334     jam();
6335     AllocNodeIdReq * req2 = (AllocNodeIdReq*)signal->getDataPtrSend();
6336     * req2 = req;
6337     req2->senderRef = reference();
6338     NodeReceiverGroup rg(QMGR, c_clusterNodes);
6339     RequestTracker & p = opAllocNodeIdReq.m_tracker;
6340     p.init<AllocNodeIdRef>(c_counterMgr, rg, GSN_ALLOC_NODEID_REF, 0);
6341 
6342     sendSignal(rg, GSN_ALLOC_NODEID_REQ, signal,
6343                AllocNodeIdReq::SignalLengthQMGR, JBB);
6344     return;
6345   }
6346 
6347   /* participant */
6348   if (c_connectedNodes.get(req.nodeId))
6349   {
6350     jam();
6351     error = AllocNodeIdRef::NodeConnected;
6352   }
6353   else if (req.nodeType != getNodeInfo(req.nodeId).m_type)
6354   {
6355     jam();
6356     error = AllocNodeIdRef::NodeTypeMismatch;
6357   }
6358   else if (nodePtr.p->failState != NORMAL)
6359   {
6360     jam();
6361     error = AllocNodeIdRef::NodeFailureHandlingNotCompleted;
6362   }
6363 #if 0
6364   /**
6365    * For now only make "time/secret" based reservation on master
6366    *   as we otherwise also need to clear it on failure + handle
6367    *   master failure
6368    */
6369   else if (nodePtr.p->m_secret != 0)
6370   {
6371     jam();
6372     error = AllocNodeIdRef::NodeReserved;
6373   }
6374 #endif
6375 
6376   if (error)
6377   {
6378     jam();
6379     AllocNodeIdRef * ref = (AllocNodeIdRef*)signal->getDataPtrSend();
6380     ref->senderRef = reference();
6381     ref->errorCode = error;
6382     ref->senderData = req.senderData;
6383     ref->nodeId = req.nodeId;
6384     ref->masterRef = numberToRef(QMGR, cpresident);
6385     sendSignal(req.senderRef, GSN_ALLOC_NODEID_REF, signal,
6386                AllocNodeIdRef::SignalLength, JBB);
6387     return;
6388   }
6389 
6390   AllocNodeIdConf * conf = (AllocNodeIdConf*)signal->getDataPtrSend();
6391   conf->senderRef = reference();
6392   conf->secret_hi = req.secret_hi;
6393   conf->secret_lo = req.secret_lo;
6394   sendSignal(req.senderRef, GSN_ALLOC_NODEID_CONF, signal,
6395              AllocNodeIdConf::SignalLength, JBB);
6396 }
6397 
6398 void
execALLOC_NODEID_CONF(Signal * signal)6399 Qmgr::execALLOC_NODEID_CONF(Signal * signal)
6400 {
6401   /* master */
6402 
6403   jamEntry();
6404   const AllocNodeIdConf * conf = (AllocNodeIdConf*)signal->getDataPtr();
6405   opAllocNodeIdReq.m_tracker.reportConf(c_counterMgr,
6406                                         refToNode(conf->senderRef));
6407 
6408   if (signal->getLength() >= AllocNodeIdConf::SignalLength)
6409   {
6410     jam();
6411     if (opAllocNodeIdReq.m_req.secret_hi != conf->secret_hi ||
6412         opAllocNodeIdReq.m_req.secret_lo != conf->secret_lo)
6413     {
6414       jam();
6415       if (opAllocNodeIdReq.m_error == 0)
6416       {
6417         jam();
6418         opAllocNodeIdReq.m_error = AllocNodeIdRef::Undefined;
6419       }
6420     }
6421   }
6422 
6423   completeAllocNodeIdReq(signal);
6424 }
6425 
6426 
6427 void
execALLOC_NODEID_REF(Signal * signal)6428 Qmgr::execALLOC_NODEID_REF(Signal * signal)
6429 {
6430   /* master */
6431 
6432   jamEntry();
6433   const AllocNodeIdRef * ref = (AllocNodeIdRef*)signal->getDataPtr();
6434   if (ref->errorCode == AllocNodeIdRef::NF_FakeErrorREF)
6435   {
6436     jam();
6437     opAllocNodeIdReq.m_tracker.ignoreRef(c_counterMgr,
6438                                          refToNode(ref->senderRef));
6439   }
6440   else
6441   {
6442     jam();
6443     opAllocNodeIdReq.m_tracker.reportRef(c_counterMgr,
6444                                          refToNode(ref->senderRef));
6445     if (opAllocNodeIdReq.m_error == 0)
6446     {
6447       jam();
6448       opAllocNodeIdReq.m_error = ref->errorCode;
6449     }
6450   }
6451   completeAllocNodeIdReq(signal);
6452 }
6453 
6454 void
completeAllocNodeIdReq(Signal * signal)6455 Qmgr::completeAllocNodeIdReq(Signal *signal)
6456 {
6457   /* master */
6458 
6459   if (!opAllocNodeIdReq.m_tracker.done())
6460   {
6461     jam();
6462     return;
6463   }
6464 
6465   if (opAllocNodeIdReq.m_connectCount !=
6466       getNodeInfo(refToNode(opAllocNodeIdReq.m_req.senderRef)).m_connectCount)
6467   {
6468     // management server not same version as the original requester
6469     jam();
6470     return;
6471   }
6472 
6473   if (opAllocNodeIdReq.m_tracker.hasRef())
6474   {
6475     jam();
6476 
6477     {
6478       /**
6479        * Clear reservation
6480        */
6481       NodeRecPtr nodePtr;
6482       nodePtr.i = opAllocNodeIdReq.m_req.nodeId;
6483       ptrAss(nodePtr, nodeRec);
6484       nodePtr.p->m_secret = 0;
6485     }
6486 
6487     AllocNodeIdRef * ref = (AllocNodeIdRef*)signal->getDataPtrSend();
6488     ref->senderRef = reference();
6489     ref->senderData = opAllocNodeIdReq.m_req.senderData;
6490     ref->nodeId = opAllocNodeIdReq.m_req.nodeId;
6491     ref->errorCode = opAllocNodeIdReq.m_error;
6492     ref->masterRef = numberToRef(QMGR, cpresident);
6493     ndbassert(AllocNodeIdRef::SignalLength == 5);
6494     sendSignal(opAllocNodeIdReq.m_req.senderRef, GSN_ALLOC_NODEID_REF, signal,
6495                AllocNodeIdRef::SignalLength, JBB);
6496     return;
6497   }
6498 
6499   jam();
6500 
6501   AllocNodeIdConf * conf = (AllocNodeIdConf*)signal->getDataPtrSend();
6502   conf->senderRef = reference();
6503   conf->senderData = opAllocNodeIdReq.m_req.senderData;
6504   conf->nodeId = opAllocNodeIdReq.m_req.nodeId;
6505   conf->secret_lo = opAllocNodeIdReq.m_req.secret_lo;
6506   conf->secret_hi = opAllocNodeIdReq.m_req.secret_hi;
6507   sendSignal(opAllocNodeIdReq.m_req.senderRef, GSN_ALLOC_NODEID_CONF, signal,
6508              AllocNodeIdConf::SignalLength, JBB);
6509 }
6510 
6511 void
execSTOP_REQ(Signal * signal)6512 Qmgr::execSTOP_REQ(Signal* signal)
6513 {
6514   jamEntry();
6515   c_stopReq = * (StopReq*)signal->getDataPtr();
6516 
6517   if (c_stopReq.senderRef)
6518   {
6519     jam();
6520     ndbrequire(NdbNodeBitmask::get(c_stopReq.nodes, getOwnNodeId()));
6521 
6522     StopConf *conf = (StopConf*)signal->getDataPtrSend();
6523     conf->senderData = c_stopReq.senderData;
6524     conf->nodeState = getOwnNodeId();
6525     sendSignal(c_stopReq.senderRef,
6526 	       GSN_STOP_CONF, signal, StopConf::SignalLength, JBA);
6527   }
6528 }
6529 
6530 bool
check_multi_node_shutdown(Signal * signal)6531 Qmgr::check_multi_node_shutdown(Signal* signal)
6532 {
6533   if (c_stopReq.senderRef &&
6534       NdbNodeBitmask::get(c_stopReq.nodes, getOwnNodeId()))
6535   {
6536     jam();
6537     if(StopReq::getPerformRestart(c_stopReq.requestInfo))
6538     {
6539       jam();
6540       StartOrd * startOrd = (StartOrd *)&signal->theData[0];
6541       startOrd->restartInfo = c_stopReq.requestInfo;
6542       sendSignal(CMVMI_REF, GSN_START_ORD, signal, 2, JBA);
6543     } else {
6544       sendSignal(CMVMI_REF, GSN_STOP_ORD, signal, 1, JBA);
6545     }
6546     return true;
6547   }
6548   return false;
6549 }
6550 
6551 int
check_hb_order_config()6552 Qmgr::check_hb_order_config()
6553 {
6554   m_hb_order_config_used = false;
6555   Uint32 count = 0;
6556   Uint32 count_zero = 0;
6557   NodeRecPtr nodePtr;
6558   for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++)
6559   {
6560     ptrAss(nodePtr, nodeRec);
6561     const NodeInfo& nodeInfo = getNodeInfo(nodePtr.i);
6562     if (nodeInfo.m_type == NodeInfo::DB)
6563     {
6564       count++;
6565       if (nodePtr.p->hbOrder == 0)
6566         count_zero++;
6567     }
6568   }
6569   ndbrequire(count != 0); // must have node info
6570   if (count_zero == count)
6571   {
6572     jam();
6573     return 0; // no hbOrder defined
6574   }
6575   if (count_zero != 0)
6576   {
6577     jam();
6578     return -1; // error: not all zero or all nonzero
6579   }
6580   for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++)
6581   {
6582     ptrAss(nodePtr, nodeRec);
6583     const NodeInfo& nodeInfo = getNodeInfo(nodePtr.i);
6584     if (nodeInfo.m_type == NodeInfo::DB)
6585     {
6586       NodeRecPtr nodePtr2;
6587       for (nodePtr2.i = nodePtr.i + 1; nodePtr2.i < MAX_NDB_NODES; nodePtr2.i++)
6588       {
6589         ptrAss(nodePtr2, nodeRec);
6590         const NodeInfo& nodeInfo2 = getNodeInfo(nodePtr2.i);
6591         if (nodeInfo2.m_type == NodeInfo::DB)
6592         {
6593           if (nodePtr.i != nodePtr2.i &&
6594               nodePtr.p->hbOrder == nodePtr2.p->hbOrder)
6595           {
6596             jam();
6597             return -2; // error: duplicate nonzero value
6598           }
6599         }
6600       }
6601     }
6602   }
6603   m_hb_order_config_used = true;
6604   return 0;
6605 }
6606 
6607 static const Uint32 CC_SuspectTicks = 1;
6608 static const Uint32 CC_FailedTicks = 2;
6609 
6610 void
startConnectivityCheck(Signal * signal,Uint32 reason,Uint32 causingNode)6611 Qmgr::startConnectivityCheck(Signal* signal, Uint32 reason, Uint32 causingNode)
6612 {
6613   jam();
6614   ndbrequire(m_connectivity_check.getEnabled());
6615 
6616   if (m_connectivity_check.m_active)
6617   {
6618     jam();
6619     /* Connectivity check underway already
6620      * do nothing
6621      */
6622     return;
6623   }
6624 
6625 
6626   m_connectivity_check.m_nodesPinged.clear();
6627 
6628   /* Send NODE_PINGREQ signal to all other running nodes, and
6629    * initialise connectivity check bitmasks.
6630    * Note that nodes may already be considered suspect due to
6631    * a previous connectivity check round.
6632    */
6633   Uint32 ownId = getOwnNodeId();
6634   NodePingReq* pingReq = CAST_PTR(NodePingReq, &signal->theData[0]);
6635   pingReq->senderData = ++m_connectivity_check.m_currentRound;
6636   pingReq->senderRef = reference();
6637 
6638   for (Uint32 i=1; i < MAX_NDB_NODES; i++)
6639   {
6640     if (i != ownId)
6641     {
6642       NodeRec& node = nodeRec[i];
6643       if (node.phase == ZRUNNING)
6644       {
6645         /* If connection was considered ok, treat as unknown,
6646          * If it was considered slow, continue to treat
6647          *   as slow
6648          */
6649         sendSignal(node.blockRef,
6650                    GSN_NODE_PING_REQ,
6651                    signal,
6652                    NodePingReq::SignalLength,
6653                    JBA);
6654 
6655         m_connectivity_check.m_nodesPinged.set(i);
6656       }
6657     }
6658   }
6659 
6660   /* Initialise result bitmasks */
6661   m_connectivity_check.m_nodesWaiting.assign(m_connectivity_check.m_nodesPinged);
6662   m_connectivity_check.m_nodesFailedDuring.clear();
6663 
6664   /* Ensure only live nodes are considered suspect */
6665   m_connectivity_check.m_nodesSuspect.bitAND(m_connectivity_check.m_nodesPinged);
6666 
6667   const char* reasonText = "Unknown";
6668   bool firstTime = true;
6669 
6670   switch(reason)
6671   {
6672   case FailRep::ZHEARTBEAT_FAILURE:
6673     reasonText = "Heartbeat failure";
6674     break;
6675   case FailRep::ZCONNECT_CHECK_FAILURE:
6676     reasonText = "Connectivity check request";
6677     break;
6678   default:
6679     firstTime = false;
6680     ndbrequire(m_connectivity_check.m_nodesSuspect.count() > 0);
6681     break;
6682   }
6683 
6684   if (!m_connectivity_check.m_nodesPinged.isclear())
6685   {
6686     jam();
6687     {
6688       char buff[100];
6689       m_connectivity_check.m_nodesPinged.getText(buff);
6690       if (firstTime)
6691       {
6692         g_eventLogger->info("QMGR : Starting connectivity check of %u other nodes (%s) due to %s from node %u.",
6693                             m_connectivity_check.m_nodesPinged.count(),
6694                             buff,
6695                             reasonText,
6696                             causingNode);
6697       }
6698       else
6699       {
6700         char buff2[100];
6701         m_connectivity_check.m_nodesSuspect.getText(buff2);
6702         g_eventLogger->info("QMGR : Restarting connectivity check of %u other nodes (%s) due to %u syspect nodes (%s)",
6703                             m_connectivity_check.m_nodesPinged.count(),
6704                             buff,
6705                             m_connectivity_check.m_nodesSuspect.count(),
6706                             buff2);
6707       }
6708     }
6709 
6710     /* Generate cluster log event */
6711     Uint32 bitmaskSz = NdbNodeBitmask::Size;
6712     signal->theData[0] = NDB_LE_ConnectCheckStarted;
6713     signal->theData[1] = m_connectivity_check.m_nodesPinged.count();
6714     signal->theData[2] = reason;
6715     signal->theData[3] = causingNode;
6716     signal->theData[4] = bitmaskSz;
6717     Uint32* sigPtr = &signal->theData[5];
6718     m_connectivity_check.m_nodesPinged.copyto(bitmaskSz, sigPtr); sigPtr+= bitmaskSz;
6719     m_connectivity_check.m_nodesSuspect.copyto(bitmaskSz, sigPtr);
6720     sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 5 + (2 * bitmaskSz), JBB);
6721 
6722     m_connectivity_check.m_active = true;
6723     m_connectivity_check.m_tick = 0;
6724     NDB_TICKS now = NdbTick_CurrentMillisecond();
6725     m_connectivity_check.m_timer.reset(now);
6726   }
6727   else
6728   {
6729     g_eventLogger->info("QMGR : Connectivity check requested due to %s (from %u) not started as no other running nodes.",
6730                         reasonText,
6731                         causingNode);
6732   }
6733 }
6734 
6735 void
execNODE_PINGREQ(Signal * signal)6736 Qmgr::execNODE_PINGREQ(Signal* signal)
6737 {
6738   jamEntry();
6739   Uint32 ownId = getOwnNodeId();
6740   const NodePingReq* pingReq = CAST_CONSTPTR(NodePingReq, &signal->theData[0]);
6741   Uint32 sendersRef = signal->getSendersBlockRef();
6742   Uint32 sendersNodeId = refToNode(sendersRef);
6743   Uint32 senderData = pingReq->senderData;
6744 
6745   ndbrequire(sendersNodeId != ownId);
6746 
6747   /* We will start our own connectivity check if necessary
6748    * before responding with PING_CONF to the requestor.
6749    * This means that the sending node will receive our PING_REQ
6750    * before our PING_CONF, which should avoid them starting an
6751    * unnecessary extra connectivity check round in some cases.
6752    */
6753   if (likely(m_connectivity_check.getEnabled()))
6754   {
6755     jam();
6756     /* We have connectivity checking configured */
6757     if (! m_connectivity_check.m_active)
6758     {
6759       jam();
6760 
6761       {
6762         /* Don't start a new connectivity check if the requesting
6763          * node has failed from our point of view
6764          */
6765         NodeRecPtr nodePtr;
6766         nodePtr.i = sendersNodeId;
6767         ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
6768         if (unlikely(nodePtr.p->phase != ZRUNNING))
6769         {
6770           jam();
6771 
6772           g_eventLogger->warning("QMGR : Discarding NODE_PINGREQ from non-running node %u (%u)",
6773                                  sendersNodeId, nodePtr.p->phase);
6774           return;
6775         }
6776       }
6777 
6778       /* Start our own Connectivity Check now indicating reason and causing node */
6779       startConnectivityCheck(signal, FailRep::ZCONNECT_CHECK_FAILURE, sendersNodeId);
6780     }
6781   }
6782   else
6783   {
6784     jam();
6785     g_eventLogger->warning("QMGR : NODE_PINGREQ received from node %u, but connectivity "
6786                            "checking not configured on this node.  Ensure all "
6787                            "nodes have the same configuration for parameter "
6788                            "ConnectCheckIntervalMillis.",
6789                            sendersNodeId);
6790   }
6791 
6792   /* Now respond with NODE_PINGCONF */
6793   NodePingConf* pingConf = CAST_PTR(NodePingConf, &signal->theData[0]);
6794 
6795   pingConf->senderData = senderData;
6796   pingConf->senderRef = reference();
6797 
6798   sendSignal(sendersRef,
6799              GSN_NODE_PING_CONF,
6800              signal,
6801              NodePingConf::SignalLength,
6802              JBA);
6803 }
6804 
6805 void
reportNodeConnect(Uint32 nodeId)6806 Qmgr::ConnectCheckRec::reportNodeConnect(Uint32 nodeId)
6807 {
6808   /* Clear any suspicion */
6809   m_nodesSuspect.clear(nodeId);
6810 }
6811 
6812 bool
reportNodeFailure(Uint32 nodeId)6813 Qmgr::ConnectCheckRec::reportNodeFailure(Uint32 nodeId)
6814 {
6815   if (unlikely(m_active))
6816   {
6817     m_nodesFailedDuring.set(nodeId);
6818 
6819     if (m_nodesWaiting.get(nodeId))
6820     {
6821       /* We were waiting for a NODE_PING_CONF from this node,
6822        * remove it from the set
6823        */
6824       m_nodesWaiting.clear(nodeId);
6825 
6826       return m_nodesWaiting.isclear();
6827     }
6828   }
6829   return false;
6830 }
6831 
6832 void
execNODE_PINGCONF(Signal * signal)6833 Qmgr::execNODE_PINGCONF(Signal* signal)
6834 {
6835   jamEntry();
6836 
6837   ndbrequire(m_connectivity_check.getEnabled());
6838 
6839   const NodePingConf* pingConf = CAST_CONSTPTR(NodePingConf, &signal->theData[0]);
6840   Uint32 sendersBlockRef = signal->getSendersBlockRef();
6841   Uint32 sendersNodeId = refToNode(sendersBlockRef);
6842   Uint32 roundNumber = pingConf->senderData;
6843 
6844   ndbrequire(sendersNodeId != getOwnNodeId());
6845   ndbrequire((m_connectivity_check.m_active)                                || /* Normal */
6846              (m_connectivity_check.m_nodesWaiting.get(sendersNodeId)          || /* We killed last round */
6847               m_connectivity_check.m_nodesFailedDuring.get(sendersNodeId)));     /* Someone killed */
6848 
6849   if (unlikely((! m_connectivity_check.m_active) ||
6850                (roundNumber != m_connectivity_check.m_currentRound)))
6851   {
6852     g_eventLogger->warning("QMGR : Received NODEPING_CONF from node %u for round %u, "
6853                            "but we are %sactive on round %u.  Discarding.",
6854                            sendersNodeId,
6855                            roundNumber,
6856                            ((m_connectivity_check.m_active)?"":"in"),
6857                            m_connectivity_check.m_currentRound);
6858     return;
6859   }
6860 
6861   /* Node must have been pinged, we must be waiting for the response,
6862    * or the node must have already failed
6863    */
6864   ndbrequire(m_connectivity_check.m_nodesPinged.get(sendersNodeId));
6865   ndbrequire(m_connectivity_check.m_nodesWaiting.get(sendersNodeId) ||
6866              m_connectivity_check.m_nodesFailedDuring.get(sendersNodeId));
6867 
6868   m_connectivity_check.m_nodesWaiting.clear(sendersNodeId);
6869 
6870   if (likely(m_connectivity_check.m_tick < CC_SuspectTicks))
6871   {
6872     jam();
6873     /* Node responded on time, clear any suspicion about it */
6874     m_connectivity_check.m_nodesSuspect.clear(sendersNodeId);
6875   }
6876 
6877   if (m_connectivity_check.m_nodesWaiting.isclear())
6878   {
6879     jam();
6880     /* Connectivity check round is now finished */
6881     connectivityCheckCompleted(signal);
6882   }
6883 }
6884 
6885 void
connectivityCheckCompleted(Signal * signal)6886 Qmgr::connectivityCheckCompleted(Signal* signal)
6887 {
6888   jam();
6889 
6890   m_connectivity_check.m_active = false;
6891 
6892   /* Log the following :
6893    * Nodes checked
6894    * Nodes responded ok
6895    * Nodes responded late (now suspect)
6896    * Nodes failed to respond.
6897    * Nodes failed during
6898    */
6899   char pinged[100];
6900   char late[100];
6901   char silent[100];
6902   char failed[100];
6903 
6904   /* Any 'waiting' nodes have been killed
6905    * Surviving suspects do not include them.
6906    */
6907   NdbNodeBitmask survivingSuspects(m_connectivity_check.m_nodesSuspect);
6908   survivingSuspects.bitANDC(m_connectivity_check.m_nodesWaiting);
6909 
6910   /* Nodes that failed during the check are also excluded */
6911   survivingSuspects.bitANDC(m_connectivity_check.m_nodesFailedDuring);
6912 
6913   m_connectivity_check.m_nodesPinged.getText(pinged);
6914   survivingSuspects.getText(late);
6915   m_connectivity_check.m_nodesWaiting.getText(silent);
6916   m_connectivity_check.m_nodesFailedDuring.getText(failed);
6917 
6918   g_eventLogger->info("QMGR : Connectivity check completed, "
6919                       "%u other nodes checked (%s), "
6920                       "%u responded on time, "
6921                       "%u responded late (%s), "
6922                       "%u no response will be failed (%s), "
6923                       "%u failed during check (%s)\n",
6924                       m_connectivity_check.m_nodesPinged.count(),
6925                       pinged,
6926                       m_connectivity_check.m_nodesPinged.count() -
6927                       m_connectivity_check.m_nodesSuspect.count(),
6928                       survivingSuspects.count(),
6929                       late,
6930                       m_connectivity_check.m_nodesWaiting.count(),
6931                       silent,
6932                       m_connectivity_check.m_nodesFailedDuring.count(),
6933                       failed);
6934 
6935   /* Log in Cluster log */
6936   signal->theData[0] = NDB_LE_ConnectCheckCompleted;
6937   signal->theData[1] = m_connectivity_check.m_nodesPinged.count();
6938   signal->theData[2] = survivingSuspects.count();
6939   signal->theData[3] = m_connectivity_check.m_nodesWaiting.count() +
6940     m_connectivity_check.m_nodesFailedDuring.count();
6941 
6942   sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 4, JBB);
6943 
6944   if (survivingSuspects.count() > 0)
6945   {
6946     jam();
6947     /* Still suspect nodes, start another round */
6948     g_eventLogger->info("QMGR : Starting new connectivity check due to suspect nodes.");
6949     /* Restart connectivity check, no external reason or cause */
6950     startConnectivityCheck(signal, 0, 0);
6951   }
6952   else
6953   {
6954     jam();
6955     /* No suspect nodes, stop the protocol now */
6956 
6957     g_eventLogger->info("QMGR : All other nodes (%u) connectivity ok.",
6958                         m_connectivity_check.m_nodesPinged.count() -
6959                         (m_connectivity_check.m_nodesWaiting.count() +
6960                          m_connectivity_check.m_nodesFailedDuring.count()));
6961 
6962     /* Send a heartbeat to our right neighbour at this point as a gesture
6963      * of goodwill
6964      */
6965     sendHeartbeat(signal);
6966     hb_send_timer.reset(NdbTick_CurrentMillisecond());
6967   };
6968 }
6969 
6970 void
checkConnectivityTimeSignal(Signal * signal)6971 Qmgr::checkConnectivityTimeSignal(Signal* signal)
6972 {
6973   /* Executed periodically when a connectivity check is
6974    * underway.
6975    * After CC_SuspectTicks have elapsed, any nodes
6976    * which have not responded are considered
6977    * 'Suspect'.
6978    * After CC_FailedTicks have elapsed, any nodes
6979    * which have not responded are considered
6980    * to have failed, and failure handling
6981    * begins.
6982    */
6983   jam();
6984 
6985   /* Preconditions, otherwise we shouldn't have been called */
6986   ndbrequire(m_connectivity_check.getEnabled());
6987   ndbrequire(m_connectivity_check.m_active);
6988   ndbrequire(!m_connectivity_check.m_nodesWaiting.isclear());
6989 
6990   m_connectivity_check.m_tick++;
6991 
6992   switch (m_connectivity_check.m_tick)
6993   {
6994   case CC_SuspectTicks:
6995   {
6996     jam();
6997     /* Still waiting to hear from some nodes, they are now
6998      * suspect
6999      */
7000     m_connectivity_check.m_nodesSuspect.bitOR(m_connectivity_check.m_nodesWaiting);
7001     return;
7002   }
7003   case CC_FailedTicks:
7004   {
7005     jam();
7006     /* Still waiting to hear from some nodes, they will now
7007      * be failed
7008      */
7009     m_connectivity_check.m_active = false;
7010     Uint32 nodeId = 0;
7011 
7012     while ((nodeId = m_connectivity_check.m_nodesWaiting.find(nodeId))
7013            != BitmaskImpl::NotFound)
7014     {
7015       jam();
7016       /* Log failure reason */
7017       /* Todo : Connectivity Check specific failure log? */
7018       signal->theData[0] = NDB_LE_DeadDueToHeartbeat;
7019       signal->theData[1] = nodeId;
7020 
7021       sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB);
7022 
7023       /* Fail the node */
7024       /* TODO : Consider real time break here */
7025       failReportLab(signal, nodeId, FailRep::ZCONNECT_CHECK_FAILURE, getOwnNodeId());
7026       nodeId++;
7027     }
7028 
7029     /* Now handle the end of the Connectivity Check */
7030     connectivityCheckCompleted(signal);
7031   }
7032   }
7033 }
7034 
7035 bool
isNodeConnectivitySuspect(Uint32 nodeId) const7036 Qmgr::isNodeConnectivitySuspect(Uint32 nodeId) const
7037 {
7038   return m_connectivity_check.m_nodesSuspect.get(nodeId);
7039 }
7040 
7041 void
handleFailFromSuspect(Signal * signal,Uint32 reason,Uint16 aFailedNode,Uint16 sourceNode)7042 Qmgr::handleFailFromSuspect(Signal* signal,
7043                             Uint32 reason,
7044                             Uint16 aFailedNode,
7045                             Uint16 sourceNode)
7046 {
7047   jam();
7048 
7049   const char* reasonText = "Unknown";
7050 
7051   /* We have received a failure report about some node X from
7052    * some other node that we consider to have suspect connectivity
7053    * which may have caused the report.
7054    *
7055    * We will 'invert' the sense of this, and handle it as
7056    * a failure report of the sender, with the same cause.
7057    */
7058   switch(reason)
7059   {
7060   case FailRep::ZCONNECT_CHECK_FAILURE:
7061     jam();
7062     /* Suspect says that connectivity check failed for another node.
7063      * As suspect has bad connectivity from our point of view, we
7064      * blame him.
7065      */
7066     reasonText = "ZCONNECT_CHECK_FAILURE";
7067     break;
7068   case FailRep::ZLINK_FAILURE:
7069     jam();
7070     /* Suspect says that link failed for another node.
7071      * As suspect has bad connectivity from our point of view, we
7072      * blame her.
7073      */
7074     reasonText = "ZLINK_FAILURE";
7075     break;
7076   default:
7077     ndbrequire(false);
7078   }
7079 
7080   g_eventLogger->warning("QMGR : Received Connectivity failure notification about "
7081                          "%u from suspect node %u with reason %s.  "
7082                          "Mapping to failure of %u sourced by me.",
7083                          aFailedNode, sourceNode, reasonText, sourceNode);
7084 
7085   signal->theData[0] = NDB_LE_NodeFailRejected;
7086   signal->theData[1] = reason;
7087   signal->theData[2] = aFailedNode;
7088   signal->theData[3] = sourceNode;
7089 
7090   sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 4, JBB);
7091 
7092   failReportLab(signal, sourceNode, (FailRep::FailCause) reason, getOwnNodeId());
7093 }
7094