1 /*
2    Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
3 
4    This program is free software; you can redistribute it and/or modify
5    it under the terms of the GNU General Public License, version 2.0,
6    as published by the Free Software Foundation.
7 
8    This program is also distributed with certain software (including
9    but not limited to OpenSSL) that is licensed under separate terms,
10    as designated in a particular file or component or in included license
11    documentation.  The authors of MySQL hereby grant you an additional
12    permission to link the program and your derivative works with the
13    separately licensed software that they have included with MySQL.
14 
15    This program is distributed in the hope that it will be useful,
16    but WITHOUT ANY WARRANTY; without even the implied warranty of
17    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18    GNU General Public License, version 2.0, for more details.
19 
20    You should have received a copy of the GNU General Public License
21    along with this program; if not, write to the Free Software
22    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA
23 */
24 
25 #define QMGR_C
26 #include "Qmgr.hpp"
27 #include <pc.hpp>
28 #include <NdbTick.h>
29 #include <signaldata/NodeRecoveryStatusRep.hpp>
30 #include <signaldata/EventReport.hpp>
31 #include <signaldata/StartOrd.hpp>
32 #include <signaldata/CloseComReqConf.hpp>
33 #include <signaldata/PrepFailReqRef.hpp>
34 #include <signaldata/NodeFailRep.hpp>
35 #include <signaldata/ReadNodesConf.hpp>
36 #include <signaldata/NFCompleteRep.hpp>
37 #include <signaldata/CheckNodeGroups.hpp>
38 #include <signaldata/ArbitSignalData.hpp>
39 #include <signaldata/ApiRegSignalData.hpp>
40 #include <signaldata/ApiVersion.hpp>
41 #include <signaldata/BlockCommitOrd.hpp>
42 #include <signaldata/FailRep.hpp>
43 #include <signaldata/DisconnectRep.hpp>
44 #include <signaldata/ApiBroadcast.hpp>
45 #include <signaldata/Upgrade.hpp>
46 #include <signaldata/EnableCom.hpp>
47 #include <signaldata/RouteOrd.hpp>
48 #include <signaldata/NodePing.hpp>
49 #include <signaldata/DihRestart.hpp>
50 #include <signaldata/DumpStateOrd.hpp>
51 #include <signaldata/IsolateOrd.hpp>
52 #include <signaldata/ProcessInfoRep.hpp>
53 #include <signaldata/LocalSysfile.hpp>
54 #include <signaldata/SyncThreadViaReqConf.hpp>
55 #include <signaldata/TakeOverTcConf.hpp>
56 #include <signaldata/GetNumMultiTrp.hpp>
57 #include <signaldata/Sync.hpp>
58 #include <ndb_version.h>
59 #include <OwnProcessInfo.hpp>
60 #include <NodeInfo.hpp>
61 #include <NdbSleep.h>
62 
63 #include <TransporterRegistry.hpp> // Get connect address
64 
65 #include "../dbdih/Dbdih.hpp"
66 #include <EventLogger.hpp>
67 extern EventLogger * g_eventLogger;
68 
69 #if (defined(VM_TRACE) || defined(ERROR_INSERT))
70 //#define DEBUG_MULTI_TRP 1
71 //#define DEBUG_STARTUP 1
72 //#define DEBUG_ARBIT 1
73 #endif
74 
75 #ifdef DEBUG_ARBIT
76 #define DEB_ARBIT(arglist) do { g_eventLogger->info arglist ; } while (0)
77 #else
78 #define DEB_ARBIT(arglist) do { } while (0)
79 #endif
80 
81 #ifdef DEBUG_MULTI_TRP
82 #define DEB_MULTI_TRP(arglist) do { g_eventLogger->info arglist ; } while (0)
83 #else
84 #define DEB_MULTI_TRP(arglist) do { } while (0)
85 #endif
86 
87 #ifdef DEBUG_STARTUP
88 #define DEB_STARTUP(arglist) do { g_eventLogger->info arglist ; } while (0)
89 #else
90 #define DEB_STARTUP(arglist) do { } while (0)
91 #endif
92 
93 //#define DEBUG_QMGR_START
94 #ifdef DEBUG_QMGR_START
95 #include <DebuggerNames.hpp>
96 #define QMGR_DEBUG(x) ndbout << "QMGR " << __LINE__ << ": " << x << endl
97 #define DEBUG_START(gsn, node, msg) QMGR_DEBUG(getSignalName(gsn) << " to: " << node << " - " << msg)
98 #define DEBUG_START2(gsn, rg, msg) { \
99   char nodes[NdbNodeBitmask::TextLength + 1]; \
100   QMGR_DEBUG(getSignalName(gsn) << " to: " << rg.m_nodes.getText(nodes) << " - " << msg); \
101 }
102 #define DEBUG_START3(signal, msg) QMGR_DEBUG(getSignalName(signal->header.theVerId_signalNumber) << " from " << refToNode(signal->getSendersBlockRef()) << " - " << msg);
103 #else
104 #define QMGR_DEBUG(x)
105 #define DEBUG_START(gsn, node, msg)
106 #define DEBUG_START2(gsn, rg, msg)
107 #define DEBUG_START3(signal, msg)
108 #endif
109 
110 #define JAM_FILE_ID 360
111 
112 
113 /**
114  * QMGR provides the following services:
115  *
116  * 1) Node id allocation
117  * ---------------------
118  *    This is a service provided to the Management server when a node is
119  *    requesting a config.
120  *
121  * 2) Heartbeat service for data nodes
122  * -----------------------------------
123  *    This is a service provided to the upper levels in NDB. When the
124  *    heartbeat discovers a failure it will send a FAIL_REP signal to
125  *    NDBCNTR.
126  *
127  * 3) Master assignment
128  * --------------------
129  *    NDB relies on that a new master can be allocated at each failure
130  *    through the usage of an algorithm to calculate the next master.
131  *    To handle this nodes are entering the cluster one node at a time.
132  *    This gives each node a dynamic node id, the new master is simply
133  *    selected as the node with the lowest dynamic id.
134  *
135  *    When the cluster is started from scratch it is important to select
136  *    a master that is actually part of the cluster startup and not
137  *    started later through a node restart handling. To handle this
138  *    QMGR makes use of the DIH_RESTART service provided by DIH.
139  *    This service will provide the GCI that the node can be started
140  *    from. This GCI is sent in each CM_REGREQ signal to ensure that
141  *    each node can decide whether they should be assigned as master
142  *    of the cluster.
143  *
144  *    In QMGR the master is called President and in DIH, NDBCNTR and DICT
145  *    the node is called master node. All these roles are always given
146  *    to the same node. Most protocols have a master role and thus most
147  *    protocols need to handle master take over.
148  *
149  * 4) Transactional node failure service
150  * -------------------------------------
151  *    Whenever a node fails, we need to ensure that all nodes agree on the
152  *    failed nodes. To handle this QMGR uses a prepare phase where the
153  *    president sends a list of failed nodes, other nodes can add to this
154  *    list in which case a new prepare phase is started. After all nodes
155  *    have agreed on the list of failed nodes the QMGR president sends a
156  *    list of nodes in the COMMIT_FAILREQ signal that specifies which nodes
157  *    have failed. This list is then sent up to NDBCNTR that handles the
158  *    spreading of this information to all other blocks in the NDB data
159  *    node.
160  *
161  *    The information is also sent to the connected API nodes.
162  *
163  * 5) Arbitration service
164  * ----------------------
165  *    In the case where we are not sure if the cluster has been partitioned,
166  *    we need to query an arbitrator to decide whether our node should survive
167  *    the crash. If no arbitrator is assigned, the node will fail. The
168  *    arbitrator must be prepared before the crash happens, the arbitrator
169  *    can only be used for one response. After this response a new arbitrator
170  *    must be selected.
171  *
172  *    It is also possible to not use any arbitrator service provided by NDB.
173  *    In this case QMGR will write a message to the Cluster log and the
174  *    external arbitrator needs to take action and shut down the node that
175  *    it wants to not survive.
176  *
177  * 6) Skip node service
178  * --------------------
179  *    When starting a data node it is possible to select a set of nodes to not
180  *    wait for in cluster restart. These nodes are provided as startup
181  *    parameter in ndbmtd/ndbd, --nowait-nodes.
182  *
183  * 7) Heartbeat service for API nodes
184  * ----------------------------------
185  *    QMGR sends heartbeat signals to all API nodes connected with some delay.
186  *    If API doesn't send any response, it will shut down the API connection.
187  *
188  * 8) Read nodes service
189  * ---------------------
190  *    This is used to check nodes in certain situations.
191  *
192  * 9) Connectivity check service
193  * -----------------------------
194  *    In the case of node failures we can configure NDB to make a full
195  *    connectivity check before deciding which nodes to assign as failed
196  *    nodes.
197  *
198  * 10) Ndbinfo membership table
199  * ----------------------------
200  *    Reports the current setup of nodes, their dynamic ids and neighbours.
201  *
202  * 11) Ndbinfo process table
203  * -------------------------
204  *    Reports various information required to manage NDB Cluster.
205  *
206  * 12) Isolate node service
207  * ------------------------
208  *    Connected to the connectivity check service.
209  *
210  * 13) Global node state service
211  * -----------------------------
212  *    Service used by many other blocks to inform them of node status.
213  *
214  * QMGR uses the following services:
215  *
216  * 1) Connect service
217  * ------------------
218  *    The transporter will inform QMGR about nodes connected through the
219  *    CONNECT_REP signal.
220  *
221  * 2) Check node group service in DIH
222  * ----------------------------------
223  *    Used by master assignment service and node failure services.
224  *
225  * 3) DIH_RESTART service in DIH
226  * -----------------------------
227  *    See above in master assignment service.
228  *
229  * 4) Block commit service
230  * -----------------------
231  *    Block commits when we form a new cluster after node failures.
232  *    This service is provided by DIH.
233  *
234  * 5) Close communication service
235  * ------------------------------
236  *    We need to inform transporter when a node has failed to ensure
237  *    the transporter will close the communication to this node.
238  *
239  * 6) Enable communication service
240  * -------------------------------
241  *    We need to enable communication to a node after we finished node
242  *    failure handling for a node.
243  */
244 
245 /**
246  * c_start.m_gsn = GSN_CM_REGREQ
247  *   Possible for all nodes
248  *   c_start.m_nodes contains all nodes in config
249  *
250  * c_start.m_gsn = GSN_CM_NODEINFOREQ;
251  *   Set when receiving CM_REGCONF
252  *   State possible for starting node only (not in cluster)
253  *
254  *   c_start.m_nodes contains all node in alive cluster that
255  *                   that has not replied to GSN_CM_NODEINFOREQ
256  *                   passed by president in GSN_CM_REGCONF
257  *
258  * c_start.m_gsn = GSN_CM_ADD
259  *   Possible for president only
260  *   Set when receiving and accepting CM_REGREQ (to include node)
261  *
262  *   c_start.m_nodes contains all nodes in alive cluster + starting node
263  *                   that has not replied to GSN_CM_ADD
264  *                   by sending GSN_CM_ACKADD
265  *
266  * c_start.m_gsn = GSN_CM_NODEINFOCONF
267  *   Possible for non presidents only
268  *     c_start.m_nodes contains a node that has been accepted by president
269  *     but has not connected to us yet
270  */
271 
272 // Signal entries and statement blocks
273 /* 4  P R O G R A M        */
274 /*******************************/
275 /* CMHEART_BEAT               */
276 /*******************************/
execCM_HEARTBEAT(Signal * signal)277 void Qmgr::execCM_HEARTBEAT(Signal* signal)
278 {
279   NodeRecPtr hbNodePtr;
280   jamEntry();
281   hbNodePtr.i = signal->theData[0];
282   ptrCheckGuard(hbNodePtr, MAX_NDB_NODES, nodeRec);
283   set_hb_count(hbNodePtr.i) = 0;
284   return;
285 }//Qmgr::execCM_HEARTBEAT()
286 
287 /*******************************/
288 /* CM_NODEINFOREF             */
289 /*******************************/
execCM_NODEINFOREF(Signal * signal)290 void Qmgr::execCM_NODEINFOREF(Signal* signal)
291 {
292   jamEntry();
293   systemErrorLab(signal, __LINE__);
294   return;
295 }//Qmgr::execCM_NODEINFOREF()
296 
297 /*******************************/
298 /* CONTINUEB                  */
299 /*******************************/
execCONTINUEB(Signal * signal)300 void Qmgr::execCONTINUEB(Signal* signal)
301 {
302   jamEntry();
303   const Uint32 tcontinuebType = signal->theData[0];
304   const Uint32 tdata0 = signal->theData[1];
305   const Uint32 tdata1 = signal->theData[2];
306   switch (tcontinuebType) {
307   case ZREGREQ_TIMELIMIT:
308     jam();
309     if (c_start.m_startKey != tdata0 || c_start.m_startNode != tdata1) {
310       jam();
311       return;
312     }//if
313     regreqTimeLimitLab(signal);
314     break;
315   case ZREGREQ_MASTER_TIMELIMIT:
316     jam();
317     if (c_start.m_startKey != tdata0 || c_start.m_startNode != tdata1) {
318       jam();
319       return;
320     }//if
321     //regreqMasterTimeLimitLab(signal);
322     failReportLab(signal,
323                   c_start.m_startNode,
324                   FailRep::ZSTART_IN_REGREQ,
325                   getOwnNodeId());
326     return;
327   case ZTIMER_HANDLING:
328     jam();
329     timerHandlingLab(signal);
330     return;
331   case ZARBIT_HANDLING:
332     jam();
333     runArbitThread(signal);
334     return;
335   case ZSTART_FAILURE_LIMIT:{
336     if (cpresident != ZNIL)
337     {
338       jam();
339       return;
340     }
341     const NDB_TICKS now = NdbTick_getCurrentTicks();
342     const Uint64 elapsed = NdbTick_Elapsed(c_start_election_time,now).milliSec();
343     if (c_restartFailureTimeout != Uint32(~0) &&
344         elapsed > c_restartFailureTimeout)
345     {
346       jam();
347       BaseString tmp;
348       tmp.append("Shutting down node as total restart time exceeds "
349 		 " StartFailureTimeout as set in config file ");
350       if(c_restartFailureTimeout == (Uint32) ~0)
351 	tmp.append(" 0 (inifinite)");
352       else
353 	tmp.appfmt(" %d", c_restartFailureTimeout);
354 
355       progError(__LINE__, NDBD_EXIT_SYSTEM_ERROR, tmp.c_str());
356     }
357     signal->theData[0] = ZSTART_FAILURE_LIMIT;
358     sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 3000, 1);
359     return;
360   }
361   case ZNOTIFY_STATE_CHANGE:
362   {
363     jam();
364     handleStateChange(signal, tdata0);
365     return;
366   }
367   case ZCHECK_MULTI_TRP_CONNECT:
368   {
369     jam();
370     check_connect_multi_transporter(signal, tdata0);
371     return;
372   }
373   case ZRESEND_GET_NUM_MULTI_TRP_REQ:
374   {
375     jam();
376     send_get_num_multi_trp_req(signal, signal->theData[1]);
377     return;
378   }
379   case ZSWITCH_MULTI_TRP:
380   {
381     jam();
382     send_switch_multi_transporter(signal, signal->theData[1], true);
383     return;
384   }
385   default:
386     jam();
387     // ZCOULD_NOT_OCCUR_ERROR;
388     systemErrorLab(signal, __LINE__);
389     return;
390   }//switch
391   return;
392 }//Qmgr::execCONTINUEB()
393 
394 
execDEBUG_SIG(Signal * signal)395 void Qmgr::execDEBUG_SIG(Signal* signal)
396 {
397   NodeRecPtr debugNodePtr;
398   jamEntry();
399   debugNodePtr.i = signal->theData[0];
400   ptrCheckGuard(debugNodePtr, MAX_NODES, nodeRec);
401   return;
402 }//Qmgr::execDEBUG_SIG()
403 
404 /*******************************/
405 /* FAIL_REP                   */
406 /*******************************/
execFAIL_REP(Signal * signal)407 void Qmgr::execFAIL_REP(Signal* signal)
408 {
409   const FailRep * const failRep = (FailRep *)&signal->theData[0];
410   const NodeId failNodeId = failRep->failNodeId;
411   const FailRep::FailCause failCause = (FailRep::FailCause)failRep->failCause;
412   Uint32 failSource = failRep->getFailSourceNodeId(signal->length());
413   if (ERROR_INSERT_VALUE >= 951 && ERROR_INSERT_VALUE <= 960)
414   {
415     CRASH_INSERTION3();
416   }
417   if (!failSource)
418   {
419     /* Failure source not included, use sender of signal as 'source' */
420     failSource = refToNode(signal->getSendersBlockRef());
421   }
422 
423   CRASH_INSERTION(948);
424 
425   jamEntry();
426   failReportLab(signal, failNodeId, failCause, failSource);
427   return;
428 }//Qmgr::execFAIL_REP()
429 
430 /*******************************/
431 /* PRES_TOREQ                 */
432 /*******************************/
execPRES_TOREQ(Signal * signal)433 void Qmgr::execPRES_TOREQ(Signal* signal)
434 {
435   jamEntry();
436   BlockReference Tblockref = signal->theData[0];
437   signal->theData[0] = getOwnNodeId();
438   signal->theData[1] = ccommitFailureNr;
439   sendSignal(Tblockref, GSN_PRES_TOCONF, signal, 2, JBA);
440   return;
441 }//Qmgr::execPRES_TOREQ()
442 
443 void
execREAD_CONFIG_REQ(Signal * signal)444 Qmgr::execREAD_CONFIG_REQ(Signal* signal)
445 {
446   jamEntry();
447 
448   const ReadConfigReq * req = (ReadConfigReq*)signal->getDataPtr();
449 
450   Uint32 ref = req->senderRef;
451   Uint32 senderData = req->senderData;
452 
453   const ndb_mgm_configuration_iterator * p =
454     m_ctx.m_config.getOwnConfigIterator();
455   ndbrequire(p != 0);
456 
457   m_num_multi_trps = 0;
458   if (isNdbMt() && globalData.ndbMtSendThreads)
459   {
460     ndb_mgm_get_int_parameter(p,
461                               CFG_DB_NODE_GROUP_TRANSPORTERS,
462                               &m_num_multi_trps);
463     if (m_num_multi_trps == 0)
464     {
465       jam();
466       /**
467        * The default assignment is to use the same number of multi
468        * transporters as there are LDM instances in this node.
469        * So essentially each LDM thread will have its own transporter
470        * to the corresponding LDM thread in the other nodes in the
471        * same node group. This will ensure that I can assign the
472        * transporter to the send thread the LDM thread assists as
473        * well.
474        */
475       m_num_multi_trps = globalData.ndbMtLqhThreads;
476     }
477     else
478     {
479       jam();
480       /**
481        * No reason to use more sockets than the maximum threads in one
482        * thread group. We select the socket to use based on the
483        * instance id of the receiving thread. So if we use more sockets
484        * than threads in the largest thread group, there will be unused
485        * sockets.
486        *
487        * So we select the configured number unless the maximum number of
488        * LDM and/or TC threads is smaller than this number.
489        */
490       m_num_multi_trps = MIN(m_num_multi_trps,
491                          MAX(globalData.ndbMtLqhThreads,
492                              globalData.ndbMtTcThreads));
493     }
494     /**
495      * Whatever value this node has choosen, we will never be able to use
496      * more transporters than the other node permits as well. This will be
497      * established in the setup phase of multi transporters.
498      */
499   }
500   if (m_num_multi_trps == 0)
501   {
502     jam();
503     m_num_multi_trps = 1;
504   }
505   ReadConfigConf * conf = (ReadConfigConf*)signal->getDataPtrSend();
506   conf->senderRef = reference();
507   conf->senderData = senderData;
508   sendSignal(ref, GSN_READ_CONFIG_CONF, signal,
509 	     ReadConfigConf::SignalLength, JBB);
510 }
511 
512 void
execSTART_ORD(Signal * signal)513 Qmgr::execSTART_ORD(Signal* signal)
514 {
515   /**
516    * Start timer handling
517    */
518   const NDB_TICKS now = NdbTick_getCurrentTicks();
519   signal->theData[0] = ZTIMER_HANDLING;
520   signal->theData[1] = Uint32(now.getUint64() >> 32);
521   signal->theData[2] = Uint32(now.getUint64());
522   sendSignal(QMGR_REF, GSN_CONTINUEB, signal, 3, JBB);
523 }
524 
525 /*
526 4.2  ADD NODE MODULE*/
527 /*##########################################################################*/
528 /*
529 4.2.1 STTOR     */
530 /**--------------------------------------------------------------------------
531  * Start phase signal, must be handled by all blocks.
532  * QMGR is only interested in the first phase.
533  * During phase one we clear all registered applications.
534  *---------------------------------------------------------------------------*/
535 /*******************************/
536 /* STTOR                      */
537 /*******************************/
execSTTOR(Signal * signal)538 void Qmgr::execSTTOR(Signal* signal)
539 {
540   jamEntry();
541 
542   switch(signal->theData[1]){
543   case 1:
544     jam();
545     initData(signal);
546     g_eventLogger->info("Starting QMGR phase 1");
547     c_ndbcntr = (Ndbcntr*)globalData.getBlock(NDBCNTR);
548     startphase1(signal);
549     recompute_version_info(NodeInfo::DB);
550     recompute_version_info(NodeInfo::API);
551     recompute_version_info(NodeInfo::MGM);
552     return;
553   case 3:
554     jam();
555     break;
556   case 7:
557     jam();
558     if (cpresident == getOwnNodeId())
559     {
560       jam();
561       switch(arbitRec.method){
562       case ArbitRec::DISABLED:
563         jam();
564         break;
565 
566       case ArbitRec::METHOD_EXTERNAL:
567       case ArbitRec::METHOD_DEFAULT:
568         /**
569          * Start arbitration thread.  This could be done as soon as
570          * we have all nodes (or a winning majority).
571          */
572         jam();
573         handleArbitStart(signal);
574         break;
575       }
576     }
577     break;
578   case 9:{
579     jam();
580     /**
581      * Enable communication to all API nodes by setting state
582      *   to ZFAIL_CLOSING (which will make it auto-open in checkStartInterface)
583      */
584     if (ERROR_INSERTED(949))
585     {
586       jam();
587       g_eventLogger->info("QMGR : Delaying allow-api-connect processing");
588       sendSignalWithDelay(reference(), GSN_STTOR, signal, 1000, 2);
589       return;
590     }
591     c_allow_api_connect = 1;
592     NodeRecPtr nodePtr;
593     for (nodePtr.i = 1; nodePtr.i < MAX_NODES; nodePtr.i++)
594     {
595       Uint32 type = getNodeInfo(nodePtr.i).m_type;
596       if (type != NodeInfo::API)
597         continue;
598 
599       ptrAss(nodePtr, nodeRec);
600       jam();
601       jamLine(Uint16(nodePtr.i));
602       if (nodePtr.p->phase == ZAPI_INACTIVE)
603       {
604         jam();
605         set_hb_count(nodePtr.i) = 3;
606         nodePtr.p->phase = ZFAIL_CLOSING;
607         nodePtr.p->failState = NORMAL;
608       }
609     }
610   }
611   }
612 
613   sendSttorryLab(signal, false);
614   return;
615 }//Qmgr::execSTTOR()
616 
sendSttorryLab(Signal * signal,bool first_phase)617 void Qmgr::sendSttorryLab(Signal* signal, bool first_phase)
618 {
619   if (first_phase)
620   {
621     g_eventLogger->info("Include node protocol completed, phase 1 in QMGR"
622                         " completed");
623   }
624 /*****************************/
625 /*  STTORRY                  */
626 /*****************************/
627   signal->theData[3] = 3;
628   signal->theData[4] = 7;
629   signal->theData[5] = 9;
630   signal->theData[6] = 255;
631   sendSignal(NDBCNTR_REF, GSN_STTORRY, signal, 7, JBB);
632   return;
633 }//Qmgr::sendSttorryLab()
634 
startphase1(Signal * signal)635 void Qmgr::startphase1(Signal* signal)
636 {
637   jamEntry();
638 
639   NodeRecPtr nodePtr;
640   nodePtr.i = getOwnNodeId();
641   ptrAss(nodePtr, nodeRec);
642   nodePtr.p->phase = ZSTARTING;
643   DEB_STARTUP(("phase(%u) = ZSTARTING", nodePtr.i));
644 
645   DihRestartReq * req = CAST_PTR(DihRestartReq, signal->getDataPtrSend());
646   req->senderRef = reference();
647   sendSignal(DBDIH_REF, GSN_DIH_RESTARTREQ, signal,
648              DihRestartReq::SignalLength, JBB);
649   return;
650 }
651 
652 void
execDIH_RESTARTREF(Signal * signal)653 Qmgr::execDIH_RESTARTREF(Signal*signal)
654 {
655   jamEntry();
656 
657   ndbrequire(signal->getNoOfSections() == 1);
658   SectionHandle handle(this, signal);
659   SegmentedSectionPtr ptr;
660   handle.getSection(ptr, 0);
661   ndbrequire(ptr.sz <= NdbNodeBitmask::Size);
662   c_start.m_no_nodegroup_nodes.clear();
663   copy(c_start.m_no_nodegroup_nodes.rep.data, ptr);
664   releaseSections(handle);
665 
666   g_eventLogger->info("DIH reported initial start, now starting the"
667                       " Node Inclusion Protocol");
668   c_start.m_latest_gci = 0;
669   execCM_INFOCONF(signal);
670 }
671 
672 void
execDIH_RESTARTCONF(Signal * signal)673 Qmgr::execDIH_RESTARTCONF(Signal*signal)
674 {
675   jamEntry();
676 
677   ndbrequire(signal->getNoOfSections() == 1);
678   SectionHandle handle(this, signal);
679   SegmentedSectionPtr ptr;
680   handle.getSection(ptr, 0);
681   ndbrequire(ptr.sz <= NdbNodeBitmask::Size);
682   c_start.m_no_nodegroup_nodes.clear();
683   copy(c_start.m_no_nodegroup_nodes.rep.data, ptr);
684   releaseSections(handle);
685 
686   const DihRestartConf * conf = CAST_CONSTPTR(DihRestartConf,
687                                               signal->getDataPtr());
688   c_start.m_latest_gci = conf->latest_gci;
689   sendReadLocalSysfile(signal);
690 }
691 
692 void
sendReadLocalSysfile(Signal * signal)693 Qmgr::sendReadLocalSysfile(Signal *signal)
694 {
695   ReadLocalSysfileReq *req = (ReadLocalSysfileReq*)signal->getDataPtrSend();
696   req->userPointer = 0;
697   req->userReference = reference();
698   sendSignal(NDBCNTR_REF,
699              GSN_READ_LOCAL_SYSFILE_REQ,
700              signal,
701              ReadLocalSysfileReq::SignalLength,
702              JBB);
703 }
704 
705 void
execREAD_LOCAL_SYSFILE_CONF(Signal * signal)706 Qmgr::execREAD_LOCAL_SYSFILE_CONF(Signal *signal)
707 {
708   ReadLocalSysfileConf *conf = (ReadLocalSysfileConf*)signal->getDataPtr();
709   if (conf->nodeRestorableOnItsOwn ==
710       ReadLocalSysfileReq::NODE_RESTORABLE_ON_ITS_OWN)
711   {
712     g_eventLogger->info("DIH reported normal start, now starting the"
713                         " Node Inclusion Protocol");
714   }
715   else if (conf->nodeRestorableOnItsOwn ==
716            ReadLocalSysfileReq::NODE_NOT_RESTORABLE_ON_ITS_OWN)
717   {
718     /**
719      * We set gci = 1 and rely here on that gci here is simply used
720      * as a tool to decide which nodes can be started up on their
721      * own and which node to choose as master node. Only nodes
722      * where m_latest_gci is set to a real GCI can be choosen as
723      * master nodes.
724      */
725     g_eventLogger->info("Node not restorable on its own, now starting the"
726                         " Node Inclusion Protocol");
727     c_start.m_latest_gci = ZUNDEFINED_GCI_LIMIT;
728   }
729   else
730   {
731     g_eventLogger->info("Node requires initial start, now starting the"
732                         " Node Inclusion Protocol");
733     c_start.m_latest_gci = 0;
734   }
735   execCM_INFOCONF(signal);
736 }
737 
setHbDelay(UintR aHbDelay)738 void Qmgr::setHbDelay(UintR aHbDelay)
739 {
740   const NDB_TICKS now = NdbTick_getCurrentTicks();
741   hb_send_timer.setDelay(aHbDelay < 10 ? 10 : aHbDelay);
742   hb_send_timer.reset(now);
743   hb_check_timer.setDelay(aHbDelay < 10 ? 10 : aHbDelay);
744   hb_check_timer.reset(now);
745 }
746 
setHbApiDelay(UintR aHbApiDelay)747 void Qmgr::setHbApiDelay(UintR aHbApiDelay)
748 {
749   const NDB_TICKS now = NdbTick_getCurrentTicks();
750   chbApiDelay = (aHbApiDelay < 100 ? 100 : aHbApiDelay);
751   hb_api_timer.setDelay(chbApiDelay);
752   hb_api_timer.reset(now);
753 }
754 
setArbitTimeout(UintR aArbitTimeout)755 void Qmgr::setArbitTimeout(UintR aArbitTimeout)
756 {
757   arbitRec.timeout = (aArbitTimeout < 10 ? 10 : aArbitTimeout);
758 }
759 
setCCDelay(UintR aCCDelay)760 void Qmgr::setCCDelay(UintR aCCDelay)
761 {
762   const NDB_TICKS now = NdbTick_getCurrentTicks();
763   if (aCCDelay == 0)
764   {
765     /* Connectivity check disabled */
766     m_connectivity_check.m_enabled = false;
767     m_connectivity_check.m_timer.setDelay(0);
768   }
769   else
770   {
771     m_connectivity_check.m_enabled = true;
772     m_connectivity_check.m_timer.setDelay(aCCDelay < 10 ? 10 : aCCDelay);
773     m_connectivity_check.m_timer.reset(now);
774   }
775 }
776 
execCONNECT_REP(Signal * signal)777 void Qmgr::execCONNECT_REP(Signal* signal)
778 {
779   jamEntry();
780   const Uint32 connectedNodeId = signal->theData[0];
781 
782   if (ERROR_INSERTED(931))
783   {
784     jam();
785     ndbout_c("Discarding CONNECT_REP(%d)", connectedNodeId);
786     infoEvent("Discarding CONNECT_REP(%d)", connectedNodeId);
787     return;
788   }
789 
790   if (ERROR_INSERTED(941) &&
791       getNodeInfo(connectedNodeId).getType() == NodeInfo::API)
792   {
793     jam();
794     CLEAR_ERROR_INSERT_VALUE;
795     ndbout_c("Discarding one API CONNECT_REP(%d)", connectedNodeId);
796     infoEvent("Discarding one API CONNECT_REP(%d)", connectedNodeId);
797     return;
798   }
799 
800   if (c_connectedNodes.get(connectedNodeId) == false)
801   {
802     jam();
803     setNodeInfo(connectedNodeId).m_version = 0;
804     setNodeInfo(connectedNodeId).m_mysql_version = 0;
805   }
806 
807   c_connectedNodes.set(connectedNodeId);
808   DEB_STARTUP(("c_connectedNodes(%u) set", connectedNodeId));
809 
810   {
811     NodeRecPtr connectedNodePtr;
812     connectedNodePtr.i = connectedNodeId;
813     ptrCheckGuard(connectedNodePtr, MAX_NODES, nodeRec);
814     connectedNodePtr.p->m_secret = 0;
815   }
816 
817   NodeRecPtr myNodePtr;
818   myNodePtr.i = getOwnNodeId();
819   ptrCheckGuard(myNodePtr, MAX_NODES, nodeRec);
820   NodeInfo connectedNodeInfo = getNodeInfo(connectedNodeId);
821   switch(myNodePtr.p->phase){
822   case ZRUNNING:
823     jam();
824     if (connectedNodeInfo.getType() == NodeInfo::DB)
825     {
826       ndbrequire(!c_clusterNodes.get(connectedNodeId));
827     }
828     break;
829   case ZSTARTING:
830     jam();
831     break;
832   case ZPREPARE_FAIL:
833   case ZFAIL_CLOSING:
834     jam();
835     return;
836   case ZAPI_ACTIVATION_ONGOING:
837     ndbabort();
838   case ZAPI_ACTIVE:
839     ndbabort();
840   case ZAPI_INACTIVE:
841     ndbabort();
842   case ZINIT:
843     ndbrequire(getNodeInfo(connectedNodeId).m_type == NodeInfo::MGM);
844     break;
845   default:
846     ndbabort();
847   }
848 
849   if (connectedNodeInfo.getType() != NodeInfo::DB)
850   {
851     jam();
852     return;
853   }
854 
855   switch(c_start.m_gsn){
856   case GSN_CM_REGREQ:
857     jam();
858     sendCmRegReq(signal, connectedNodeId);
859 
860     /**
861      * We're waiting for CM_REGCONF c_start.m_nodes contains all configured
862      *   nodes
863      */
864     ndbrequire(myNodePtr.p->phase == ZSTARTING);
865     ndbrequire(c_start.m_nodes.isWaitingFor(connectedNodeId));
866     return;
867   case GSN_CM_NODEINFOREQ:
868     jam();
869 
870     if (c_start.m_nodes.isWaitingFor(connectedNodeId))
871     {
872       jam();
873       ndbrequire(getOwnNodeId() != cpresident);
874       ndbrequire(myNodePtr.p->phase == ZSTARTING);
875       sendCmNodeInfoReq(signal, connectedNodeId, myNodePtr.p);
876       return;
877     }
878     return;
879   case GSN_CM_NODEINFOCONF:{
880     jam();
881 
882     ndbrequire(getOwnNodeId() != cpresident);
883     ndbrequire(myNodePtr.p->phase == ZRUNNING);
884     if (c_start.m_nodes.isWaitingFor(connectedNodeId))
885     {
886       jam();
887       c_start.m_nodes.clearWaitingFor(connectedNodeId);
888       c_start.m_gsn = RNIL;
889 
890       NodeRecPtr addNodePtr;
891       addNodePtr.i = connectedNodeId;
892       ptrCheckGuard(addNodePtr, MAX_NDB_NODES, nodeRec);
893       cmAddPrepare(signal, addNodePtr, myNodePtr.p);
894       return;
895     }
896   }
897   default:
898     (void)1;
899   }
900 
901   ReadNodesReq *req = (ReadNodesReq *)&signal->theData[0];
902   ndbrequire(!c_start.m_nodes.isWaitingFor(connectedNodeId));
903   ndbrequire(!c_readnodes_nodes.get(connectedNodeId));
904   c_readnodes_nodes.set(connectedNodeId);
905   req->myRef = reference();
906   req->myVersion = NDB_VERSION_D;
907   sendSignal(calcQmgrBlockRef(connectedNodeId),
908              GSN_READ_NODESREQ,
909              signal,
910              ReadNodesReq::SignalLength,
911              JBA);
912   return;
913 }//Qmgr::execCONNECT_REP()
914 
915 void
execREAD_NODESCONF(Signal * signal)916 Qmgr::execREAD_NODESCONF(Signal* signal)
917 {
918   jamEntry();
919   if (signal->getNoOfSections() > 0)
920   {
921     jam();
922     const ReadNodesConf * readNodes = (ReadNodesConf *)&signal->theData[0];
923     ndbrequire(signal->getNoOfSections() == 1);
924     SegmentedSectionPtr ptr;
925     SectionHandle handle(this, signal);
926     handle.getSection(ptr, 0);
927     ndbrequire(ptr.sz == 5 * NdbNodeBitmask::Size);
928     copy((Uint32*)&readNodes->definedNodes.rep.data, ptr);
929     releaseSections(handle);
930   }
931   else
932   {
933     jam();
934 
935     /**
936      * Handle transformation from old signal format with 5 bitmask with
937      * 2 words in each bitmask to 5 bitmasks with 5 words in each bitmask.
938      */
939     const ReadNodesConf_v1 * readNodes_v1 =
940       (ReadNodesConf_v1 *)&signal->theData[0];
941     ReadNodesConf * readNodes = (ReadNodesConf *)&signal->theData[0];
942 
943     NdbNodeBitmask48 defined48Nodes;
944     NdbNodeBitmask48 inactive48Nodes;
945     NdbNodeBitmask48 cluster48Nodes;
946     NdbNodeBitmask48 starting48Nodes;
947     NdbNodeBitmask48 started48Nodes;
948 
949     defined48Nodes.assign(NdbNodeBitmask48::Size,
950                           readNodes_v1->definedNodes);
951     inactive48Nodes.assign(NdbNodeBitmask48::Size,
952                            readNodes_v1->inactiveNodes);
953     cluster48Nodes.assign(NdbNodeBitmask48::Size,
954                           readNodes_v1->clusterNodes);
955     starting48Nodes.assign(NdbNodeBitmask48::Size,
956                            readNodes_v1->startingNodes);
957     started48Nodes.assign(NdbNodeBitmask48::Size,
958                           readNodes_v1->startedNodes);
959 
960     NdbNodeBitmask clear_bitmask;
961     readNodes->definedNodes = clear_bitmask;
962     readNodes->inactiveNodes = clear_bitmask;
963     readNodes->clusterNodes = clear_bitmask;
964     readNodes->startingNodes = clear_bitmask;
965     readNodes->startedNodes = clear_bitmask;
966 
967     readNodes->definedNodes = defined48Nodes;
968     readNodes->inactiveNodes = inactive48Nodes;
969     readNodes->clusterNodes = cluster48Nodes;
970     readNodes->startingNodes = starting48Nodes;
971     readNodes->startedNodes = started48Nodes;
972   }
973 
974   check_readnodes_reply(signal,
975 			refToNode(signal->getSendersBlockRef()),
976 			GSN_READ_NODESCONF);
977 }
978 
979 void
execREAD_NODESREF(Signal * signal)980 Qmgr::execREAD_NODESREF(Signal* signal)
981 {
982   jamEntry();
983   check_readnodes_reply(signal,
984 			refToNode(signal->getSendersBlockRef()),
985 			GSN_READ_NODESREF);
986 }
987 
988 /**
989  * Heartbeat Inclusion Protocol Handling
990  * -------------------------------------
991  * The protocol to include our node in the heartbeat protocol starts when
992  * we call execCM_INFOCONF. We start by opening communication to all nodes
993  * in the cluster. When we start this protocol we don't know anything about
994  * which nodes are up and running and we don't which node is currently the
995  * president of the heartbeat protocol.
996  *
997  * For us to be successful with being included in the heartbeat protocol we
998  * need to be connected to all nodes currently in the heartbeat protocol. It
999  * is important to remember that QMGR sees a node as alive if it is included
1000  * in the heartbeat protocol. Higher level notions of aliveness is handled
1001  * primarily by the DBDIH block, but also to some extent by NDBCNTR.
1002  *
1003  * The protocol starts by the new node sending CM_REGREQ to all nodes it is
1004  * connected to. Only the president will respond to this message. We could
1005  * have a situation where there currently isn't a president choosen. In this
1006  * case an election is held whereby a new president is assigned. In the rest
1007  * of this comment we assume that a president already exists.
1008  *
1009  * So if we were connected to the president we will get a response to the
1010  * CM_REGREQ from the president with CM_REGCONF. The CM_REGCONF contains
1011  * the set of nodes currently included in the heartbeat protocol.
1012  *
1013  * The president will send in parallel to sending CM_REGCONF a CM_ADD(prepare)
1014  * message to all nodes included in the protocol.
1015  *
1016  * When receiving CM_REGCONF the new node will send CM_NODEINFOREQ with
1017  * information about version of the binary, number of LDM workers and
1018  * MySQL version of binary.
1019  *
1020  * The nodes already included in the heartbeat protocol will wait until it
1021  * receives both the CM_ADD(prepare) from the president and the
1022  * CM_NODEINFOREQ from the starting node. When it receives those two
1023  * messages it will send CM_ACKADD(prepare) to the president and
1024  * CM_NODEINFOCONF to the starting node with its own node information.
1025  *
1026  * When the president received CM_ACKADD(prepare) from all nodes included
1027  * in the heartbeat protocol then it sends CM_ADD(AddCommit) to all nodes
1028  * included in the heartbeat protocol.
1029  *
1030  * When the nodes receives CM_ADD(AddCommit) from the president then
1031  * they will enable communication to the new node and immediately start
1032  * sending heartbeats to the new node. They will also include the new
1033  * node in their view of the nodes included in the heartbeat protocol.
1034  * Next they will send CM_ACKADD(AddCommit) back to the president.
1035  *
1036  * When the president has received CM_ACKADD(AddCommit) from all nodes
1037  * included in the heartbeat protocol then it sends CM_ADD(CommitNew)
1038  * to the starting node.
1039  *
1040  * This is also the point where we report the node as included in the
1041  * heartbeat protocol to DBDIH as from here the rest of the protocol is
1042  * only about informing the new node about the outcome of inclusion
1043  * protocol. When we receive the response to this message the new node
1044  * can already have proceeded a bit into its restart.
1045  *
1046  * The starting node after receiving CM_REGCONF waits for all nodes
1047  * included in the heartbeat protocol to send CM_NODEINFOCONF and
1048  * also for receiving the CM_ADD(CommitNew) from the president. When
1049  * all this have been received the new nodes adds itself and all nodes
1050  * it have been informed about into its view of the nodes included in
1051  * the heartbeat protocol and enables communication to all other
1052  * nodes included therein. Finally it sends CM_ACKADD(CommitNew) to
1053  * the president.
1054  *
1055  * When the president has received CM_ACKADD(CommitNew) from the starting
1056  * node the inclusion protocol is completed and the president is ready
1057  * to receive a new node into the cluster.
1058  *
1059  * It is the responsibility of the starting nodes to retry after a failed
1060  * node inclusion, they will do so with 3 seconds delay. This means that
1061  * at most one node per 3 seconds will normally be added to the cluster.
1062  * So this phase of adding nodes to the cluster can add up to a little bit
1063  * more than a minute of delay in a large cluster starting up.
1064  *
1065  * We try to depict the above in a graph here as well:
1066  *
1067  * New node           Nodes included in the heartbeat protocol        President
1068  * ----------------------------------------------------------------------------
1069  * ----CM_REGREQ--------------------->>
1070  * ----CM_REGREQ---------------------------------------------------------->
1071  *
1072  * <----------------CM_REGCONF---------------------------------------------
1073  *                                   <<------CM_ADD(Prepare)---------------
1074  *
1075  * -----CM_NODEINFOREQ--------------->>
1076  *
1077  * Nodes included in heartbeat protocol can receive CM_ADD(Prepare) and
1078  * CM_NODEINFOREQ in any order.
1079  *
1080  * <<---CM_NODEINFOCONF-------------- --------CM_ACKADD(Prepare)--------->>
1081  *
1082  *                                   <<-------CM_ADD(AddCommit)------------
1083  *
1084  * Here nodes enables communication to new node and starts sending heartbeats
1085  *
1086  *                                   ---------CM_ACKADD(AddCommit)------->>
1087  *
1088  * Here we report to DBDIH about new node included in heartbeat protocol
1089  * in master node.
1090  *
1091  * <----CM_ADD(CommitNew)--------------------------------------------------
1092  *
1093  * Here new node enables communication to new nodes and starts sending
1094  * heartbeat messages.
1095  *
1096  * -----CM_ACKADD(CommitNew)---------------------------------------------->
1097  *
1098  * Here the president can complete the inclusion protocol and is ready to
1099  * receive new nodes into the heartbeat protocol.
1100  */
1101 /*******************************/
1102 /* CM_INFOCONF                */
1103 /*******************************/
execCM_INFOCONF(Signal * signal)1104 void Qmgr::execCM_INFOCONF(Signal* signal)
1105 {
1106   /**
1107    * Open communcation to all DB nodes
1108    */
1109   signal->theData[0] = 0; // no answer
1110   signal->theData[1] = 0; // no id
1111   signal->theData[2] = NodeInfo::DB;
1112   sendSignal(TRPMAN_REF, GSN_OPEN_COMORD, signal, 3, JBB);
1113 
1114   cpresident = ZNIL;
1115   cpresidentAlive = ZFALSE;
1116   c_start_election_time = NdbTick_getCurrentTicks();
1117 
1118   signal->theData[0] = ZSTART_FAILURE_LIMIT;
1119   sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 3000, 1);
1120 
1121   cmInfoconf010Lab(signal);
1122 
1123   return;
1124 }//Qmgr::execCM_INFOCONF()
1125 
1126 Uint32 g_start_type = 0;
1127 NdbNodeBitmask g_nowait_nodes; // Set by clo
1128 
cmInfoconf010Lab(Signal * signal)1129 void Qmgr::cmInfoconf010Lab(Signal* signal)
1130 {
1131   c_start.m_startKey = 0;
1132   c_start.m_startNode = getOwnNodeId();
1133   c_start.m_nodes.clearWaitingFor();
1134   c_start.m_gsn = GSN_CM_REGREQ;
1135   c_start.m_starting_nodes.clear();
1136   c_start.m_starting_nodes_w_log.clear();
1137   c_start.m_regReqReqSent = 0;
1138   c_start.m_regReqReqRecv = 0;
1139   c_start.m_skip_nodes = g_nowait_nodes;
1140   c_start.m_skip_nodes.bitAND(c_definedNodes);
1141   c_start.m_start_type = g_start_type;
1142 
1143   NodeRecPtr nodePtr;
1144   cnoOfNodes = 0;
1145   for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
1146     jam();
1147     ptrAss(nodePtr, nodeRec);
1148 
1149     if(getNodeInfo(nodePtr.i).getType() != NodeInfo::DB)
1150       continue;
1151 
1152     c_start.m_nodes.setWaitingFor(nodePtr.i);
1153     cnoOfNodes++;
1154 
1155     if(!c_connectedNodes.get(nodePtr.i))
1156       continue;
1157 
1158     sendCmRegReq(signal, nodePtr.i);
1159   }
1160 
1161   //----------------------------------------
1162   /* Wait for a while. When it returns    */
1163   /* we will check if we got any CM_REGREF*/
1164   /* or CM_REGREQ (lower nodeid than our  */
1165   /* own).                                */
1166   //----------------------------------------
1167   signal->theData[0] = ZREGREQ_TIMELIMIT;
1168   signal->theData[1] = c_start.m_startKey;
1169   signal->theData[2] = c_start.m_startNode;
1170   sendSignalWithDelay(QMGR_REF, GSN_CONTINUEB, signal, 3000, 3);
1171 
1172   creadyDistCom = ZTRUE;
1173   return;
1174 }//Qmgr::cmInfoconf010Lab()
1175 
1176 void
sendCmRegReq(Signal * signal,Uint32 nodeId)1177 Qmgr::sendCmRegReq(Signal * signal, Uint32 nodeId)
1178 {
1179   CmRegReq * req = (CmRegReq *)&signal->theData[0];
1180   req->blockRef = reference();
1181   req->nodeId = getOwnNodeId();
1182   req->version = NDB_VERSION;
1183   req->mysql_version = NDB_MYSQL_VERSION_D;
1184   req->latest_gci = c_start.m_latest_gci;
1185   req->start_type = c_start.m_start_type;
1186   const Uint32 ref = calcQmgrBlockRef(nodeId);
1187   /**
1188    *  Clear the additional bits, see comment above CmRegReq::SignalLength
1189    *  in CmRegSignalData for details.
1190    */
1191   memset(req->unused_words, 0, sizeof(req->unused_words));
1192   sendSignal(ref, GSN_CM_REGREQ, signal, CmRegReq::SignalLength, JBB);
1193   DEB_STARTUP(("CM_REGREQ sent to node %u", nodeId));
1194   DEBUG_START(GSN_CM_REGREQ, nodeId, "");
1195 
1196   c_start.m_regReqReqSent++;
1197 }
1198 
1199 /*
1200 4.4.11 CM_REGREQ */
1201 /**--------------------------------------------------------------------------
1202  * If this signal is received someone tries to get registrated.
1203  * Only the president have the authority make decisions about new nodes,
1204  * so only a president or a node that claims to be the president may send a
1205  * reply to this signal.
1206  * This signal can occur any time after that STTOR was received.
1207  * CPRESIDENT:             Timelimit has expired and someone has
1208  *                         decided to enter the president role
1209  * CPRESIDENT_CANDIDATE:
1210  *     Assigned when we receive a CM_REGREF, if we got more than one REF
1211  *     then we always keep the lowest nodenumber.
1212  *     We accept this nodeno as president when our timelimit expires
1213  * We should consider the following cases:
1214  * 1- We are the president. If we are busy by adding new nodes to cluster,
1215  *    then we have to refuse this node to be added.
1216  *    The refused node will try in ZREFUSE_ADD_TIME seconds again.
1217  *    If we are not busy then we confirm
1218  *
1219  * 2- We know the president, we dont bother us about this REQ.
1220  *    The president has also got this REQ and will take care of it.
1221  *
1222  * 3- The president isn't known. An election is currently ongoing.
1223  *    This election will not be decided until all nodes in the cluster
1224  *    except those specifically in skip list has been started.
1225  *    The skip list comes from the startup parameter --nowait-nodes.
1226  *    So if no one knows the President it means that we are performing
1227  *    a cluster startup, either initial or a normal System restart of
1228  *    the cluster.
1229  *
1230  *    In this case we wait until all nodes except those in the skip list
1231  *    have sent CM_REGREQ to us. If this is the case the node with the
1232  *    lowest node id AND that can start from the highest GCI promotes itself
1233  *    to President. Since all nodes follow the same algorithm we are certain
1234  *    that this will bring us to a point where all nodes has the same node
1235  *    as President.
1236  *    In addition this election ensures that the President in QMGR is also
1237  *    selected as Master in NDBCNTR. It should not be possible that
1238  *    CNTR_START_REQ gets a response where the Master says that it isn't
1239  *    the master.
1240  *
1241  *     To ensure that the President is equal to the Master we send the
1242  *     start GCI a node can handle in CM_REGREQ. This enables us to elect
1243  *     a President that can also act as Master for NDBCNTR.
1244  *--------------------------------------------------------------------------*/
1245 /*******************************/
1246 /* CM_REGREQ                  */
1247 /*******************************/
1248 static
1249 int
check_start_type(Uint32 starting,Uint32 own)1250 check_start_type(Uint32 starting, Uint32 own)
1251 {
1252   if (starting == (1 << NodeState::ST_INITIAL_START) &&
1253       ((own & (1 << NodeState::ST_INITIAL_START)) == 0))
1254   {
1255     return 1;
1256   }
1257   return 0;
1258 }
1259 
execCM_REGREQ(Signal * signal)1260 void Qmgr::execCM_REGREQ(Signal* signal)
1261 {
1262   DEBUG_START3(signal, "");
1263 
1264   NodeRecPtr addNodePtr;
1265   jamEntry();
1266 
1267   CmRegReq * const cmRegReq = (CmRegReq *)&signal->theData[0];
1268   const BlockReference Tblockref = cmRegReq->blockRef;
1269   const Uint32 startingVersion = cmRegReq->version;
1270   Uint32 startingMysqlVersion = cmRegReq->mysql_version;
1271   addNodePtr.i = cmRegReq->nodeId;
1272   Uint32 gci = 1;
1273   Uint32 start_type = ~0;
1274 
1275   if (!c_connectedNodes.get(cmRegReq->nodeId))
1276   {
1277     jam();
1278 
1279     /**
1280      * With ndbmtd, there is a race condition such that
1281      *   CM_REGREQ can arrive prior to CONNECT_REP
1282      *   since CONNECT_REP is sent from CMVMI
1283      *
1284      * In such cases, ignore the CM_REGREQ which is safe
1285      *   as it will anyway be resent by starting node
1286      */
1287     g_eventLogger->info("discarding CM_REGREQ from %u "
1288                         "as we're not yet connected (isNdbMt: %u)",
1289                         cmRegReq->nodeId,
1290                         (unsigned)isNdbMt());
1291 
1292     return;
1293   }
1294 
1295   if (signal->getLength() == CmRegReq::SignalLength)
1296   {
1297     jam();
1298     gci = cmRegReq->latest_gci;
1299     start_type = cmRegReq->start_type;
1300   }
1301 
1302   if (creadyDistCom == ZFALSE) {
1303     jam();
1304     DEB_STARTUP(("Not ready for distributed communication yet"));
1305     /* NOT READY FOR DISTRIBUTED COMMUNICATION.*/
1306     return;
1307   }//if
1308 
1309   if (!ndbCompatible_ndb_ndb(NDB_VERSION, startingVersion)) {
1310     jam();
1311     DEB_STARTUP(("Incompatible versions"));
1312     sendCmRegrefLab(signal, Tblockref, CmRegRef::ZINCOMPATIBLE_VERSION,
1313                     startingVersion);
1314     return;
1315   }
1316 
1317   if (!ndbd_upgrade_ok(startingVersion))
1318   {
1319     jam();
1320     infoEvent("Connection from node %u refused as it's not ok to upgrade from",
1321               addNodePtr.i);
1322     sendCmRegrefLab(signal, Tblockref, CmRegRef::ZINCOMPATIBLE_VERSION,
1323                     startingVersion);
1324     return;
1325   }
1326 
1327   if (check_start_type(start_type, c_start.m_start_type))
1328   {
1329     jam();
1330     DEB_STARTUP(("Incompatible start types"));
1331     sendCmRegrefLab(signal, Tblockref, CmRegRef::ZINCOMPATIBLE_START_TYPE,
1332                     startingVersion);
1333     return;
1334   }
1335 
1336   if (cpresident != getOwnNodeId())
1337   {
1338     jam();
1339 
1340     if (cpresident == ZNIL)
1341     {
1342       /***
1343        * We don't know the president.
1344        * If the node to be added has lower node id
1345        * than it will be our president candidate. Set it as
1346        * candidate.
1347        */
1348       jam();
1349       if (gci != ZUNDEFINED_GCI_LIMIT &&
1350           (gci > c_start.m_president_candidate_gci ||
1351 	  (gci == c_start.m_president_candidate_gci &&
1352 	   addNodePtr.i < c_start.m_president_candidate)))
1353       {
1354 	jam();
1355 	c_start.m_president_candidate = addNodePtr.i;
1356 	c_start.m_president_candidate_gci = gci;
1357         DEB_STARTUP(("President candidate: %u, gci: %u",
1358                      addNodePtr.i, gci));
1359       }
1360       DEB_STARTUP(("Election error to %x", Tblockref));
1361       sendCmRegrefLab(signal, Tblockref, CmRegRef::ZELECTION,
1362                       startingVersion);
1363       return;
1364     }
1365 
1366     /**
1367      * We are not the president.
1368      * We know the president.
1369      * President will answer.
1370      */
1371     DEB_STARTUP(("Not president error"));
1372     sendCmRegrefLab(signal, Tblockref, CmRegRef::ZNOT_PRESIDENT,
1373                     startingVersion);
1374     return;
1375   }//if
1376 
1377   if (c_start.m_startNode != 0)
1378   {
1379     jam();
1380     /**
1381      * President busy by adding another node
1382     */
1383     DEB_STARTUP(("Busy president error"));
1384     sendCmRegrefLab(signal, Tblockref, CmRegRef::ZBUSY_PRESIDENT,
1385                     startingVersion);
1386     return;
1387   }
1388 
1389   if (ctoStatus == Q_ACTIVE)
1390   {
1391     jam();
1392     /**
1393      * Active taking over as president
1394      */
1395     DEB_STARTUP(("President take over error"));
1396     sendCmRegrefLab(signal, Tblockref, CmRegRef::ZBUSY_TO_PRES,
1397                     startingVersion);
1398     return;
1399   }//if
1400 
1401   if (getNodeInfo(addNodePtr.i).m_type != NodeInfo::DB)
1402   {
1403     jam();
1404     /**
1405      * The new node is not in config file
1406      */
1407     DEB_STARTUP(("Not in cfg error"));
1408     sendCmRegrefLab(signal, Tblockref, CmRegRef::ZNOT_IN_CFG,
1409                     startingVersion);
1410     return;
1411   }
1412 
1413   if (getNodeState().getSingleUserMode())
1414   {
1415     /**
1416      * The cluster is in single user mode.
1417      * Data node is not allowed to get added in the cluster
1418      * while in single user mode.
1419      */
1420     // handle rolling upgrade
1421     jam();
1422     DEB_STARTUP(("Single user mode error"));
1423     sendCmRegrefLab(signal, Tblockref, CmRegRef::ZSINGLE_USER_MODE,
1424                     startingVersion);
1425     return;
1426   }//if
1427 
1428   ptrCheckGuard(addNodePtr, MAX_NDB_NODES, nodeRec);
1429   Phase phase = addNodePtr.p->phase;
1430   if (phase != ZINIT)
1431   {
1432     jam();
1433     QMGR_DEBUG("phase = " << phase);
1434     DEB_STARTUP(("Not dead error"));
1435     sendCmRegrefLab(signal, Tblockref, CmRegRef::ZNOT_DEAD,
1436                     startingVersion);
1437     return;
1438   }
1439 
1440   jam();
1441   /**
1442    * WE ARE PRESIDENT AND WE ARE NOT BUSY ADDING ANOTHER NODE.
1443    * WE WILL TAKE CARE OF THE INCLUSION OF THIS NODE INTO THE CLUSTER.
1444    * WE NEED TO START TIME SUPERVISION OF THIS. SINCE WE CANNOT STOP
1445    * TIMED SIGNAL IF THE INCLUSION IS INTERRUPTED WE IDENTIFY
1446    * EACH INCLUSION WITH A UNIQUE IDENTITY. THIS IS CHECKED WHEN
1447    * THE SIGNAL ARRIVES. IF IT HAS CHANGED THEN WE SIMPLY IGNORE
1448    * THE TIMED SIGNAL.
1449    */
1450 
1451   /**
1452    * Update start record
1453    */
1454   c_start.m_startKey++;
1455   c_start.m_startNode = addNodePtr.i;
1456   DEB_STARTUP(("Node %u is starting node", addNodePtr.i));
1457 
1458   /**
1459    * Assign dynamic id
1460    */
1461   UintR TdynId = (++c_maxDynamicId) & 0xFFFF;
1462   TdynId |= (addNodePtr.p->hbOrder << 16);
1463   setNodeInfo(addNodePtr.i).m_version = startingVersion;
1464   setNodeInfo(addNodePtr.i).m_mysql_version = startingMysqlVersion;
1465   recompute_version_info(NodeInfo::DB, startingVersion);
1466   addNodePtr.p->ndynamicId = TdynId;
1467 
1468   /**
1469    * Reply with CM_REGCONF
1470    */
1471   CmRegConf * const cmRegConf = (CmRegConf *)&signal->theData[0];
1472   cmRegConf->presidentBlockRef = reference();
1473   cmRegConf->presidentNodeId   = getOwnNodeId();
1474   cmRegConf->presidentVersion  = getNodeInfo(getOwnNodeId()).m_version;
1475   cmRegConf->presidentMysqlVersion = getNodeInfo(getOwnNodeId()).m_mysql_version;
1476   cmRegConf->dynamicId         = TdynId;
1477   const Uint32 packed_nodebitmask_length = c_clusterNodes.getPackedLengthInWords();
1478 #ifdef DEBUG_STARTUP
1479   {
1480     char node_mask[NdbNodeBitmask::TextLength + 1];
1481     c_clusterNodes.getText(node_mask);
1482     DEB_STARTUP(("Sending CM_REGCONF from president, c_clusterNodes: %s",
1483                  node_mask));
1484   }
1485 #endif
1486   if (ndbd_send_node_bitmask_in_section(startingVersion))
1487   {
1488     jam();
1489     // Send node bitmask in linear section.
1490     LinearSectionPtr lsptr[3];
1491 
1492     // 8192 is the size of signal->theData array.
1493     STATIC_ASSERT(CmRegConf::SignalLength_v1 + NdbNodeBitmask::Size <=
1494                   NDB_ARRAY_SIZE(signal->theData));
1495     c_clusterNodes.copyto(packed_nodebitmask_length,
1496                           &signal->theData[CmRegConf::SignalLength_v1]);
1497     lsptr[0].p = &signal->theData[CmRegConf::SignalLength_v1];
1498     lsptr[0].sz = packed_nodebitmask_length;
1499 
1500     DEB_STARTUP(("Sending CM_REGCONF to %x", Tblockref));
1501     sendSignal(Tblockref,
1502                GSN_CM_REGCONF,
1503                signal,
1504                CmRegConf::SignalLength,
1505                JBA,
1506                lsptr,
1507                1);
1508   }
1509   else if (packed_nodebitmask_length <= NdbNodeBitmask48::Size)
1510   {
1511     jam();
1512     c_clusterNodes.copyto(NdbNodeBitmask48::Size, cmRegConf->allNdbNodes_v1);
1513     DEB_STARTUP(("2:Sending CM_REGCONF to %x", Tblockref));
1514     sendSignal(Tblockref, GSN_CM_REGCONF, signal,
1515                CmRegConf::SignalLength_v1, JBA);
1516   }
1517   else
1518   {
1519     infoEvent("Connection from node %u refused as it does not support node "
1520               "bitmask in signal section.",
1521               addNodePtr.i);
1522     DEB_STARTUP(("Incompatible start types"));
1523     sendCmRegrefLab(signal, Tblockref, CmRegRef::ZINCOMPATIBLE_START_TYPE,
1524                     startingVersion);
1525   }
1526   DEBUG_START(GSN_CM_REGCONF, refToNode(Tblockref), "");
1527 
1528   /**
1529    * Send CmAdd to all nodes (including starting)
1530    */
1531   c_start.m_nodes = c_clusterNodes;
1532   c_start.m_nodes.setWaitingFor(addNodePtr.i);
1533   c_start.m_gsn = GSN_CM_ADD;
1534 
1535   NodeReceiverGroup rg(QMGR, c_start.m_nodes);
1536   CmAdd * const cmAdd = (CmAdd*)signal->getDataPtrSend();
1537   cmAdd->requestType = CmAdd::Prepare;
1538   cmAdd->startingNodeId = addNodePtr.i;
1539   cmAdd->startingVersion = startingVersion;
1540   cmAdd->startingMysqlVersion = startingMysqlVersion;
1541   sendSignal(rg, GSN_CM_ADD, signal, CmAdd::SignalLength, JBA);
1542   DEBUG_START2(GSN_CM_ADD, rg, "Prepare");
1543 
1544   /**
1545    * Set timer
1546    */
1547   return;
1548   signal->theData[0] = ZREGREQ_MASTER_TIMELIMIT;
1549   signal->theData[1] = c_start.m_startKey;
1550   sendSignalWithDelay(QMGR_REF, GSN_CONTINUEB, signal, 30000, 2);
1551 
1552   return;
1553 }//Qmgr::execCM_REGREQ()
1554 
sendCmRegrefLab(Signal * signal,BlockReference TBRef,CmRegRef::ErrorCode Terror,Uint32 remote_node_version)1555 void Qmgr::sendCmRegrefLab(Signal* signal, BlockReference TBRef,
1556 			   CmRegRef::ErrorCode Terror, Uint32 remote_node_version)
1557 {
1558   const Uint32 remoteNodeVersion = remote_node_version;
1559 
1560   CmRegRef* ref = (CmRegRef*)signal->getDataPtrSend();
1561   ref->blockRef = reference();
1562   ref->nodeId = getOwnNodeId();
1563   ref->errorCode = Terror;
1564   ref->presidentCandidate =
1565     (cpresident == ZNIL ? c_start.m_president_candidate : cpresident);
1566   ref->candidate_latest_gci = c_start.m_president_candidate_gci;
1567   ref->latest_gci = c_start.m_latest_gci;
1568   ref->start_type = c_start.m_start_type;
1569   Uint32 packed_nodebitmask_length =
1570       c_start.m_skip_nodes.getPackedLengthInWords();
1571 
1572   if (ndbd_send_node_bitmask_in_section(remoteNodeVersion))
1573   {
1574     jam();
1575     // Send node bitmask in linear section.
1576     LinearSectionPtr lsptr[3];
1577     c_start.m_skip_nodes.copyto(packed_nodebitmask_length,
1578                                 &signal->theData[CmRegRef::SignalLength_v1]);
1579     lsptr[0].p = &signal->theData[CmRegRef::SignalLength_v1];
1580     lsptr[0].sz = packed_nodebitmask_length;
1581 
1582     sendSignal(TBRef,
1583                GSN_CM_REGREF,
1584                signal,
1585                CmRegRef::SignalLength,
1586                JBB,
1587                lsptr,
1588                1);
1589   }
1590   else if (packed_nodebitmask_length <= NdbNodeBitmask48::Size)
1591   {
1592     jam();
1593     c_start.m_skip_nodes.copyto(NdbNodeBitmask48::Size, ref->skip_nodes_v1);
1594     sendSignal(TBRef, GSN_CM_REGREF, signal,
1595                CmRegRef::SignalLength_v1, JBB);
1596   }
1597   else
1598   {
1599     /**
1600      * Node bitmask cannot be sent to other node since it is longer
1601      * than two words. We crash if the error is not ZINCOMPATIBLE_VERSION
1602      * or ZINCOMPATIBLE_START_TYPE since other errors may change the state
1603      * of qmgr. Also, other errors require us to have the correct bitmask
1604      * for proper functioning.
1605      */
1606     ndbrequire((Terror == CmRegRef::ZINCOMPATIBLE_VERSION) ||
1607                (Terror == CmRegRef::ZINCOMPATIBLE_START_TYPE));
1608     memset(ref->skip_nodes_v1, 0, sizeof(ref->skip_nodes_v1));
1609     sendSignal(TBRef, GSN_CM_REGREF, signal,
1610                CmRegRef::SignalLength_v1, JBB);
1611   }
1612   DEBUG_START(GSN_CM_REGREF, refToNode(TBRef), "");
1613   return;
1614 }//Qmgr::sendCmRegrefLab()
1615 
1616 /*
1617 4.4.11 CM_REGCONF */
1618 /**--------------------------------------------------------------------------
1619  * President gives permission to a node which wants to join the cluster.
1620  * The president will prepare the cluster that a new node will be added to
1621  * cluster. When the new node has set up all connections to the cluster,
1622  * the president will send commit to all clusternodes so the phase of the
1623  * new node can be changed to ZRUNNING.
1624  *--------------------------------------------------------------------------*/
1625 /*******************************/
1626 /* CM_REGCONF                 */
1627 /*******************************/
execCM_REGCONF(Signal * signal)1628 void Qmgr::execCM_REGCONF(Signal* signal)
1629 {
1630   DEBUG_START3(signal, "");
1631 
1632   NodeRecPtr myNodePtr;
1633   NodeRecPtr nodePtr;
1634   jamEntry();
1635 
1636   CmRegConf * const cmRegConf = (CmRegConf *)&signal->theData[0];
1637 
1638   DEB_STARTUP(("Received CM_REGCONF"));
1639   NdbNodeBitmask allNdbNodes;
1640   if (signal->getNoOfSections() >= 1)
1641   {
1642     // copy node bitmask to cmRegConf->allNdbNodes from the signal section
1643     jam();
1644     ndbrequire(ndbd_send_node_bitmask_in_section(cmRegConf->presidentVersion));
1645     SectionHandle handle(this, signal);
1646     SegmentedSectionPtr ptr;
1647     handle.getSection(ptr, 0);
1648     ndbrequire(ptr.sz <= NdbNodeBitmask::Size);
1649     copy(allNdbNodes.rep.data, ptr);
1650     releaseSections(handle);
1651   }
1652   else
1653   {
1654     allNdbNodes.assign(NdbNodeBitmask48::Size, cmRegConf->allNdbNodes_v1);
1655   }
1656 
1657   if (!ndbCompatible_ndb_ndb(NDB_VERSION, cmRegConf->presidentVersion)) {
1658     jam();
1659     char buf[128];
1660     BaseString::snprintf(buf,sizeof(buf),
1661 			 "incompatible version own=0x%x other=0x%x, "
1662 			 " shutting down",
1663 			 NDB_VERSION, cmRegConf->presidentVersion);
1664     progError(__LINE__, NDBD_EXIT_UNSUPPORTED_VERSION, buf);
1665     return;
1666   }
1667 
1668   if (!ndbd_upgrade_ok(cmRegConf->presidentVersion)) {
1669     jam();
1670     char buf[128];
1671     BaseString::snprintf(buf,sizeof(buf),
1672       "Not okay to upgrade from 0x%x, "
1673       "shutting down",
1674       cmRegConf->presidentVersion);
1675     progError(__LINE__, NDBD_EXIT_UNSUPPORTED_VERSION, buf);
1676     return;
1677   }
1678 
1679   myNodePtr.i = getOwnNodeId();
1680   ptrCheckGuard(myNodePtr, MAX_NDB_NODES, nodeRec);
1681 
1682   ndbrequire(c_start.m_gsn == GSN_CM_REGREQ);
1683   ndbrequire(myNodePtr.p->phase == ZSTARTING);
1684 
1685   cpdistref    = cmRegConf->presidentBlockRef;
1686   cpresident   = cmRegConf->presidentNodeId;
1687   UintR TdynamicId   = cmRegConf->dynamicId;
1688   c_maxDynamicId = TdynamicId & 0xFFFF;
1689   c_clusterNodes.assign(allNdbNodes);
1690 
1691   myNodePtr.p->ndynamicId = TdynamicId;
1692 
1693   // set own MT config here or in REF, and others in CM_NODEINFOREQ/CONF
1694   setNodeInfo(getOwnNodeId()).m_lqh_workers = globalData.ndbMtLqhWorkers;
1695 
1696 #ifdef DEBUG_STARTUP
1697   {
1698     char node_mask[NdbNodeBitmask::TextLength + 1];
1699     c_clusterNodes.getText(node_mask);
1700     DEB_STARTUP(("CM_REGCONF from president: %u, c_clusterNodes: %s",
1701                 cpresident, node_mask));
1702   }
1703 #endif
1704 /*--------------------------------------------------------------*/
1705 // Send this as an EVENT REPORT to inform about hearing about
1706 // other NDB node proclaiming to be president.
1707 /*--------------------------------------------------------------*/
1708   signal->theData[0] = NDB_LE_CM_REGCONF;
1709   signal->theData[1] = getOwnNodeId();
1710   signal->theData[2] = cpresident;
1711   signal->theData[3] = TdynamicId;
1712   sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 4, JBB);
1713 
1714   for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
1715     if (c_clusterNodes.get(nodePtr.i)){
1716       jamLine(nodePtr.i);
1717       ptrAss(nodePtr, nodeRec);
1718 
1719       DEB_MULTI_TRP(("Node %u in ZRUNNING", nodePtr.i));
1720       ndbrequire(nodePtr.p->phase == ZINIT);
1721       nodePtr.p->phase = ZRUNNING;
1722       DEB_STARTUP(("phase(%u) = ZRUNNING", nodePtr.i));
1723 
1724       if(c_connectedNodes.get(nodePtr.i)){
1725 	jam();
1726 	sendCmNodeInfoReq(signal, nodePtr.i, myNodePtr.p);
1727       }
1728     }
1729   }
1730 
1731   c_start.m_gsn = GSN_CM_NODEINFOREQ;
1732   c_start.m_nodes = c_clusterNodes;
1733 
1734   if (ERROR_INSERTED(937))
1735   {
1736     CLEAR_ERROR_INSERT_VALUE;
1737     signal->theData[0] = 9999;
1738     sendSignalWithDelay(CMVMI_REF, GSN_NDB_TAMPER, signal, 500, 1);
1739   }
1740 
1741   return;
1742 }//Qmgr::execCM_REGCONF()
1743 
1744 void
check_readnodes_reply(Signal * signal,Uint32 nodeId,Uint32 gsn)1745 Qmgr::check_readnodes_reply(Signal* signal, Uint32 nodeId, Uint32 gsn)
1746 {
1747   NodeRecPtr myNodePtr;
1748   myNodePtr.i = getOwnNodeId();
1749   ptrCheckGuard(myNodePtr, MAX_NDB_NODES, nodeRec);
1750 
1751   NodeRecPtr nodePtr;
1752   nodePtr.i = nodeId;
1753   ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
1754 
1755   ndbrequire(c_readnodes_nodes.get(nodeId));
1756   ReadNodesConf* conf = (ReadNodesConf*)signal->getDataPtr();
1757   ReadNodesReq* req = (ReadNodesReq*)signal->getDataPtrSend();
1758   if (gsn == GSN_READ_NODESREF)
1759   {
1760     jam();
1761 retry:
1762     req->myRef = reference();
1763     req->myVersion = NDB_VERSION_D;
1764     sendSignal(calcQmgrBlockRef(nodeId),
1765                GSN_READ_NODESREQ,
1766                signal,
1767                ReadNodesReq::SignalLength,
1768                JBA);
1769     return;
1770   }
1771 
1772   if (conf->masterNodeId == ZNIL)
1773   {
1774     jam();
1775     goto retry;
1776   }
1777 
1778   Uint32 president = conf->masterNodeId;
1779   if (president == cpresident)
1780   {
1781     jam();
1782     c_readnodes_nodes.clear(nodeId);
1783     return;
1784   }
1785 
1786   char buf[255];
1787   BaseString::snprintf(buf, sizeof(buf),
1788 		       "check StartPartialTimeout, "
1789 		       "node %d thinks %d is president, "
1790 		       "I think president is: %d",
1791 		       nodeId, president, cpresident);
1792 
1793   ndbout_c("%s", buf);
1794   CRASH_INSERTION(933);
1795 
1796   if (getNodeState().startLevel == NodeState::SL_STARTED)
1797   {
1798     jam();
1799     NdbNodeBitmask part = conf->clusterNodes;
1800     FailRep* rep = (FailRep*)signal->getDataPtrSend();
1801     rep->failCause = FailRep::ZPARTITIONED_CLUSTER;
1802     rep->partitioned.president = cpresident;
1803     memset(rep->partitioned.partition_v1, 0,
1804            sizeof(rep->partitioned.partition_v1));
1805     rep->partitioned.partitionFailSourceNodeId = getOwnNodeId();
1806     Uint32 ref = calcQmgrBlockRef(nodeId);
1807     Uint32 i = 0;
1808     /* Send source of event info if a node supports it */
1809     Uint32 length = FailRep::OrigSignalLength +
1810         FailRep::PartitionedExtraLength_v1;
1811     Uint32 packed_bitmask_length = c_clusterNodes.getPackedLengthInWords();
1812 
1813     while((i = part.find(i + 1)) != NdbNodeBitmask::NotFound)
1814     {
1815       if (i == nodeId)
1816 	continue;
1817       rep->failNodeId = i;
1818       if (ndbd_send_node_bitmask_in_section(getNodeInfo(refToNode(ref)).m_version))
1819       {
1820         jam();
1821         // Send node bitmask in signal section.
1822         LinearSectionPtr lsptr[3];
1823         Uint32* temp_buffer = &signal->
1824             theData[FailRep::SignalLength +
1825                     FailRep::PartitionedExtraLength_v1];
1826         c_clusterNodes.copyto(packed_bitmask_length, temp_buffer);
1827         lsptr[0].p = temp_buffer;
1828         lsptr[0].sz = c_clusterNodes.getPackedLengthInWords();
1829         sendSignal(ref,
1830                    GSN_FAIL_REP,
1831                    signal,
1832                    length + FailRep::SourceExtraLength,
1833                    JBA,
1834                    lsptr,
1835                    1);
1836       }
1837       else if (packed_bitmask_length <= 2)
1838       {
1839         jam();
1840         c_clusterNodes.copyto(NdbNodeBitmask48::Size, rep->partitioned.partition_v1);
1841         sendSignal(ref, GSN_FAIL_REP, signal,
1842                    length + FailRep::SourceExtraLength,
1843                    JBA);
1844       }
1845       else
1846       {
1847         ndbabort();
1848       }
1849     }
1850     rep->failNodeId = nodeId;
1851 
1852     if (ndbd_send_node_bitmask_in_section(
1853         getNodeInfo(refToNode(ref)).m_version))
1854     {
1855       jam();
1856       // Send node bitmask in signal section.
1857       LinearSectionPtr lsptr[3];
1858       Uint32* temp_buffer = &signal->
1859           theData[FailRep::SignalLength +
1860                   FailRep::PartitionedExtraLength_v1];
1861       c_clusterNodes.copyto(packed_bitmask_length, temp_buffer);
1862       lsptr[0].p = temp_buffer;
1863       lsptr[0].sz = c_clusterNodes.getPackedLengthInWords();
1864       // clear the unused bits
1865       memset(rep->partitioned.partition_v1, 0,
1866              sizeof(rep->partitioned.partition_v1));
1867       sendSignal(ref,
1868                  GSN_FAIL_REP,
1869                  signal,
1870                  length + FailRep::SourceExtraLength,
1871                  JBA,
1872                  lsptr,
1873                  1);
1874     }
1875     else if (packed_bitmask_length <= 2)
1876     {
1877       jam();
1878       sendSignal(ref, GSN_FAIL_REP, signal,
1879                  length + FailRep::SourceExtraLength,
1880                  JBB);
1881     }
1882     else
1883     {
1884       ndbabort();
1885     }
1886     return;
1887   }
1888 
1889   CRASH_INSERTION(932);
1890   CRASH_INSERTION(938);
1891 
1892   progError(__LINE__,
1893 	    NDBD_EXIT_PARTITIONED_SHUTDOWN,
1894 	    buf);
1895 
1896   ndbabort();
1897 }
1898 
1899 void
sendCmNodeInfoReq(Signal * signal,Uint32 nodeId,const NodeRec * self)1900 Qmgr::sendCmNodeInfoReq(Signal* signal, Uint32 nodeId, const NodeRec * self){
1901   CmNodeInfoReq * const req = (CmNodeInfoReq*)signal->getDataPtrSend();
1902   req->nodeId = getOwnNodeId();
1903   req->dynamicId = self->ndynamicId;
1904   req->version = getNodeInfo(getOwnNodeId()).m_version;
1905   req->mysql_version = getNodeInfo(getOwnNodeId()).m_mysql_version;
1906   req->lqh_workers = getNodeInfo(getOwnNodeId()).m_lqh_workers;
1907   const Uint32 ref = calcQmgrBlockRef(nodeId);
1908   sendSignal(ref,GSN_CM_NODEINFOREQ, signal, CmNodeInfoReq::SignalLength, JBB);
1909   DEBUG_START(GSN_CM_NODEINFOREQ, nodeId, "");
1910 }
1911 
1912 /*
1913 4.4.11 CM_REGREF */
1914 /**--------------------------------------------------------------------------
1915  * Only a president or a president candidate can refuse a node to get added to
1916  * the cluster.
1917  * Refuse reasons:
1918  * ZBUSY         We know that the sender is the president and we have to
1919  *               make a new CM_REGREQ.
1920  * ZNOT_IN_CFG   This node number is not specified in the configfile,
1921  *               SYSTEM ERROR
1922  * ZELECTION     Sender is a president candidate, his timelimit
1923  *               hasn't expired so maybe someone else will show up.
1924  *               Update the CPRESIDENT_CANDIDATE, then wait for our
1925  *               timelimit to expire.
1926  *---------------------------------------------------------------------------*/
1927 /*******************************/
1928 /* CM_REGREF                  */
1929 /*******************************/
1930 static
1931 const char *
get_start_type_string(Uint32 st)1932 get_start_type_string(Uint32 st)
1933 {
1934   static char buf[256];
1935 
1936   if (st == 0)
1937   {
1938     return "<ANY>";
1939   }
1940   else
1941   {
1942     buf[0] = 0;
1943     for(Uint32 i = 0; i<NodeState::ST_ILLEGAL_TYPE; i++)
1944     {
1945       if (st & (1 << i))
1946       {
1947 	if (buf[0])
1948 	  strcat(buf, "/");
1949 	switch(i){
1950 	case NodeState::ST_INITIAL_START:
1951 	  strcat(buf, "inital start");
1952 	  break;
1953 	case NodeState::ST_SYSTEM_RESTART:
1954 	  strcat(buf, "system restart");
1955 	  break;
1956 	case NodeState::ST_NODE_RESTART:
1957 	  strcat(buf, "node restart");
1958 	  break;
1959 	case NodeState::ST_INITIAL_NODE_RESTART:
1960 	  strcat(buf, "initial node restart");
1961 	  break;
1962 	}
1963       }
1964     }
1965     return buf;
1966   }
1967 }
1968 
execCM_REGREF(Signal * signal)1969 void Qmgr::execCM_REGREF(Signal* signal)
1970 {
1971   jamEntry();
1972 
1973   CmRegRef* ref = (CmRegRef*)signal->getDataPtr();
1974   UintR TaddNodeno = ref->nodeId;
1975   UintR TrefuseReason = ref->errorCode;
1976   Uint32 candidate = ref->presidentCandidate;
1977   Uint32 node_gci = 1;
1978   Uint32 candidate_gci = 1;
1979   Uint32 start_type = ~0;
1980   NdbNodeBitmask skip_nodes;
1981   DEBUG_START3(signal, TrefuseReason);
1982 
1983   ndbrequire(signal->getLength() >= CmRegRef::SignalLength);
1984   node_gci = ref->latest_gci;
1985   candidate_gci = ref->candidate_latest_gci;
1986   start_type = ref->start_type;
1987 
1988   // check if node bitmask is in signal section
1989   if (signal->getNoOfSections() >= 1)
1990   {
1991     jam();
1992     ndbrequire(signal->getLength() >= CmRegRef::SignalLength);
1993     SectionHandle handle(this, signal);
1994     SegmentedSectionPtr ptr;
1995     handle.getSection(ptr, 0);
1996 
1997     ndbrequire(ptr.sz <= NdbNodeBitmask::Size);
1998     copy(skip_nodes.rep.data, ptr);
1999     releaseSections(handle);
2000   }
2001   else
2002   {
2003     skip_nodes.assign(NdbNodeBitmask48::Size, ref->skip_nodes_v1);
2004   }
2005 
2006   c_start.m_regReqReqRecv++;
2007 
2008   // Ignore block reference in data[0]
2009 
2010   if(candidate != c_start.m_president_candidate)
2011   {
2012     jam();
2013     c_start.m_regReqReqRecv = ~0;
2014   }
2015 
2016   c_start.m_starting_nodes.set(TaddNodeno);
2017   if (node_gci > ZUNDEFINED_GCI_LIMIT)
2018   {
2019     jam();
2020     c_start.m_starting_nodes_w_log.set(TaddNodeno);
2021   }
2022   c_start.m_node_gci[TaddNodeno] = node_gci;
2023 
2024   skip_nodes.bitAND(c_definedNodes);
2025   c_start.m_skip_nodes.bitOR(skip_nodes);
2026 
2027   // set own MT config here or in CONF, and others in CM_NODEINFOREQ/CONF
2028   setNodeInfo(getOwnNodeId()).m_lqh_workers = globalData.ndbMtLqhWorkers;
2029 
2030   char buf[100];
2031   switch (TrefuseReason) {
2032   case CmRegRef::ZINCOMPATIBLE_VERSION:
2033     jam();
2034     progError(__LINE__, NDBD_EXIT_UNSUPPORTED_VERSION,
2035               "incompatible version, "
2036               "connection refused by running ndb node");
2037   case CmRegRef::ZINCOMPATIBLE_START_TYPE:
2038     jam();
2039     BaseString::snprintf(buf, sizeof(buf),
2040 			 "incompatible start type detected: node %d"
2041 			 " reports %s(%d) my start type: %s(%d)",
2042 			 TaddNodeno,
2043 			 get_start_type_string(start_type), start_type,
2044 			 get_start_type_string(c_start.m_start_type),
2045 			 c_start.m_start_type);
2046     progError(__LINE__, NDBD_EXIT_SR_RESTARTCONFLICT, buf);
2047     break;
2048   case CmRegRef::ZBUSY:
2049   case CmRegRef::ZBUSY_TO_PRES:
2050   case CmRegRef::ZBUSY_PRESIDENT:
2051     jam();
2052     cpresidentAlive = ZTRUE;
2053     signal->theData[3] = 0;
2054     break;
2055   case CmRegRef::ZNOT_IN_CFG:
2056     jam();
2057     progError(__LINE__, NDBD_EXIT_NODE_NOT_IN_CONFIG);
2058     break;
2059   case CmRegRef::ZNOT_DEAD:
2060     jam();
2061     progError(__LINE__, NDBD_EXIT_NODE_NOT_DEAD);
2062     break;
2063   case CmRegRef::ZSINGLE_USER_MODE:
2064     jam();
2065     progError(__LINE__, NDBD_EXIT_SINGLE_USER_MODE);
2066     break;
2067   /**
2068    * For generic refuse error.
2069    * e.g. in online upgrade, we can use this error code instead
2070    * of the incompatible error code.
2071    */
2072   case CmRegRef::ZGENERIC:
2073     jam();
2074     progError(__LINE__, NDBD_EXIT_GENERIC);
2075     break;
2076   case CmRegRef::ZELECTION:
2077     jam();
2078     if (candidate_gci != ZUNDEFINED_GCI_LIMIT &&
2079         (candidate_gci > c_start.m_president_candidate_gci ||
2080 	 (candidate_gci == c_start.m_president_candidate_gci &&
2081 	 candidate < c_start.m_president_candidate)))
2082     {
2083       jam();
2084       //----------------------------------------
2085       /* We may already have a candidate      */
2086       /* choose the lowest nodeno             */
2087       //----------------------------------------
2088       signal->theData[3] = 2;
2089       c_start.m_president_candidate = candidate;
2090       c_start.m_president_candidate_gci = candidate_gci;
2091       DEB_STARTUP(("2:President candidate: %u, gci: %u",
2092                    candidate, candidate_gci));
2093     } else {
2094       signal->theData[3] = 4;
2095     }//if
2096     break;
2097   case CmRegRef::ZNOT_PRESIDENT:
2098     jam();
2099     cpresidentAlive = ZTRUE;
2100     signal->theData[3] = 3;
2101     break;
2102   default:
2103     jam();
2104     signal->theData[3] = 5;
2105     /*empty*/;
2106     break;
2107   }//switch
2108 /*--------------------------------------------------------------*/
2109 // Send this as an EVENT REPORT to inform about hearing about
2110 // other NDB node proclaiming not to be president.
2111 /*--------------------------------------------------------------*/
2112   signal->theData[0] = NDB_LE_CM_REGREF;
2113   signal->theData[1] = getOwnNodeId();
2114   signal->theData[2] = TaddNodeno;
2115 //-----------------------------------------
2116 // signal->theData[3] filled in above
2117 //-----------------------------------------
2118   sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 4, JBB);
2119 
2120   if(cpresidentAlive == ZTRUE)
2121   {
2122     jam();
2123     QMGR_DEBUG("cpresidentAlive");
2124     return;
2125   }
2126 
2127   if(c_start.m_regReqReqSent != c_start.m_regReqReqRecv)
2128   {
2129     jam();
2130     QMGR_DEBUG(c_start.m_regReqReqSent << " != " << c_start.m_regReqReqRecv);
2131     return;
2132   }
2133 
2134   if(c_start.m_president_candidate != getOwnNodeId())
2135   {
2136     jam();
2137     QMGR_DEBUG("i'm not the candidate");
2138     return;
2139   }
2140 
2141   /**
2142    * All connected nodes has agreed
2143    */
2144   if(check_startup(signal))
2145   {
2146     jam();
2147     electionWon(signal);
2148   }
2149 
2150   return;
2151 }//Qmgr::execCM_REGREF()
2152 
2153 /**
2154  * This function contains the logic to decide if we won the election.
2155  * A prerequisite to win an election is that no one is president and
2156  * that all nodes in the cluster have tried to register (except those
2157  * nodes in the skip list). We will wait for a time even for the skip
2158  * nodes. Each node has sent its starting GCI, so we can also ensure
2159  * that any node elected as President can also act as Master in NDBCNTR.
2160  */
2161 Uint32
check_startup(Signal * signal)2162 Qmgr::check_startup(Signal* signal)
2163 {
2164   const NDB_TICKS now  = NdbTick_getCurrentTicks();
2165   const Uint64 elapsed = NdbTick_Elapsed(c_start_election_time,now).milliSec();
2166   const Uint64 partitionedTimeout =
2167     c_restartPartitionedTimeout == Uint32(~0) ? Uint32(~0) :
2168      (c_restartPartialTimeout + c_restartPartitionedTimeout);
2169 
2170   const bool no_nodegroup_active =
2171     (c_restartNoNodegroupTimeout != ~Uint32(0)) &&
2172     (! c_start.m_no_nodegroup_nodes.isclear());
2173 
2174   /**
2175    * First see if we should wait more...
2176    */
2177   NdbNodeBitmask tmp;
2178   tmp.bitOR(c_start.m_skip_nodes);
2179   tmp.bitOR(c_start.m_starting_nodes);
2180 
2181   NdbNodeBitmask wait;
2182   wait.assign(c_definedNodes);
2183   wait.bitANDC(tmp);
2184 
2185   Uint32 retVal = 0;
2186   Uint32 incompleteng = MAX_NDB_NODES; // Illegal value
2187   NdbNodeBitmask report_mask;
2188 
2189   if ((c_start.m_latest_gci == 0) ||
2190       (c_start.m_start_type == (1 << NodeState::ST_INITIAL_START)))
2191   {
2192     if (tmp.equal(c_definedNodes))
2193     {
2194       jam();
2195       signal->theData[1] = 0x8000;
2196       report_mask.assign(c_definedNodes);
2197       report_mask.bitANDC(c_start.m_starting_nodes);
2198       retVal = 1;
2199       goto start_report;
2200     }
2201     else if (no_nodegroup_active)
2202     {
2203       if (elapsed < c_restartNoNodegroupTimeout)
2204       {
2205         signal->theData[1] = 6;
2206         signal->theData[2] = Uint32((c_restartNoNodegroupTimeout - elapsed + 500) / 1000);
2207         report_mask.assign(wait);
2208         retVal = 0;
2209         goto start_report;
2210       }
2211       tmp.bitOR(c_start.m_no_nodegroup_nodes);
2212       if (tmp.equal(c_definedNodes))
2213       {
2214         signal->theData[1] = 0x8000;
2215         report_mask.assign(c_definedNodes);
2216         report_mask.bitANDC(c_start.m_starting_nodes);
2217         retVal = 1;
2218         goto start_report;
2219       }
2220       else
2221       {
2222         jam();
2223         signal->theData[1] = 1;
2224         signal->theData[2] = ~0;
2225         report_mask.assign(wait);
2226         retVal = 0;
2227         goto start_report;
2228       }
2229     }
2230     else
2231     {
2232       jam();
2233       signal->theData[1] = 1;
2234       signal->theData[2] = ~0;
2235       report_mask.assign(wait);
2236       retVal = 0;
2237       goto start_report;
2238     }
2239   }
2240 
2241   if (c_restartNoNodegroupTimeout != Uint32(~0) &&
2242       elapsed >= c_restartNoNodegroupTimeout)
2243   {
2244     tmp.bitOR(c_start.m_no_nodegroup_nodes);
2245   }
2246 
2247   {
2248     const bool all = c_start.m_starting_nodes.equal(c_definedNodes);
2249     CheckNodeGroups* sd = (CheckNodeGroups*)&signal->theData[0];
2250 
2251     {
2252       /**
2253        * Check for missing node group directly
2254        */
2255       NdbNodeBitmask check;
2256       check.assign(c_definedNodes);
2257       check.bitANDC(c_start.m_starting_nodes);     // Keep not connected nodes
2258       check.bitOR(c_start.m_starting_nodes_w_log); //Add nodes with log
2259 
2260       sd->blockRef = reference();
2261       sd->requestType = CheckNodeGroups::Direct | CheckNodeGroups::ArbitCheck;
2262       sd->mask = check;
2263       EXECUTE_DIRECT(DBDIH, GSN_CHECKNODEGROUPSREQ, signal,
2264                      CheckNodeGroups::SignalLength);
2265 
2266       if (sd->output == CheckNodeGroups::Lose)
2267       {
2268         jam();
2269         goto missing_nodegroup;
2270       }
2271     }
2272 
2273     sd->blockRef = reference();
2274     sd->requestType = CheckNodeGroups::Direct | CheckNodeGroups::ArbitCheck;
2275     sd->mask = c_start.m_starting_nodes;
2276     EXECUTE_DIRECT(DBDIH, GSN_CHECKNODEGROUPSREQ, signal,
2277                    CheckNodeGroups::SignalLength);
2278 
2279     const Uint32 result = sd->output;
2280 
2281     sd->blockRef = reference();
2282     sd->requestType = CheckNodeGroups::Direct | CheckNodeGroups::ArbitCheck;
2283     sd->mask = c_start.m_starting_nodes_w_log;
2284     EXECUTE_DIRECT(DBDIH, GSN_CHECKNODEGROUPSREQ, signal,
2285                    CheckNodeGroups::SignalLength);
2286 
2287     const Uint32 result_w_log = sd->output;
2288 
2289     if (tmp.equal(c_definedNodes))
2290     {
2291       /**
2292        * All nodes (wrt no-wait nodes) has connected...
2293        *   this means that we will now start or die
2294        */
2295       jam();
2296       switch(result_w_log){
2297       case CheckNodeGroups::Lose:
2298       {
2299         jam();
2300         goto missing_nodegroup;
2301       }
2302       case CheckNodeGroups::Win:
2303         signal->theData[1] = all ? 0x8001 : 0x8002;
2304         report_mask.assign(c_definedNodes);
2305         report_mask.bitANDC(c_start.m_starting_nodes);
2306         retVal = 1;
2307         goto check_log;
2308       case CheckNodeGroups::Partitioning:
2309         ndbrequire(result != CheckNodeGroups::Lose);
2310         signal->theData[1] =
2311           all ? 0x8001 : (result == CheckNodeGroups::Win ? 0x8002 : 0x8003);
2312         report_mask.assign(c_definedNodes);
2313         report_mask.bitANDC(c_start.m_starting_nodes);
2314         retVal = 1;
2315         goto check_log;
2316       }
2317     }
2318 
2319     if (c_restartPartialTimeout == Uint32(~0) ||
2320         elapsed < c_restartPartialTimeout)
2321     {
2322       jam();
2323 
2324       signal->theData[1] = c_restartPartialTimeout == (Uint32) ~0 ? 2 : 3;
2325       signal->theData[2] =
2326         c_restartPartialTimeout == Uint32(~0) ?
2327           Uint32(~0) :
2328           Uint32((c_restartPartialTimeout - elapsed + 500) / 1000);
2329       report_mask.assign(wait);
2330       retVal = 0;
2331 
2332       if (no_nodegroup_active && elapsed < c_restartNoNodegroupTimeout)
2333       {
2334         signal->theData[1] = 7;
2335         signal->theData[2] = Uint32((c_restartNoNodegroupTimeout - elapsed + 500) / 1000);
2336       }
2337       else if (no_nodegroup_active && elapsed >= c_restartNoNodegroupTimeout)
2338       {
2339         report_mask.bitANDC(c_start.m_no_nodegroup_nodes);
2340       }
2341 
2342       goto start_report;
2343     }
2344 
2345     /**
2346      * Start partial has passed...check for partitioning...
2347      */
2348     switch(result_w_log){
2349     case CheckNodeGroups::Lose:
2350       jam();
2351       goto missing_nodegroup;
2352     case CheckNodeGroups::Partitioning:
2353       if (elapsed != Uint32(~0) &&
2354           elapsed < partitionedTimeout &&
2355           result != CheckNodeGroups::Win)
2356       {
2357         goto missinglog;
2358       }
2359       // Fall through...
2360     case CheckNodeGroups::Win:
2361       signal->theData[1] =
2362         all ? 0x8001 : (result == CheckNodeGroups::Win ? 0x8002 : 0x8003);
2363       report_mask.assign(c_definedNodes);
2364       report_mask.bitANDC(c_start.m_starting_nodes);
2365       retVal = 2;
2366       goto check_log;
2367     }
2368   }
2369   ndbabort();
2370 
2371 check_log:
2372   jam();
2373   {
2374     Uint32 save[4+4*NdbNodeBitmask::Size];
2375     memcpy(save, signal->theData, sizeof(save));
2376 
2377     DihRestartReq * req = CAST_PTR(DihRestartReq, signal->getDataPtrSend());
2378     req->senderRef = 0;
2379     c_start.m_starting_nodes.copyto(NdbNodeBitmask::Size, req->nodemask);
2380     memcpy(req->node_gcis, c_start.m_node_gci, 4*MAX_NDB_NODES);
2381     EXECUTE_DIRECT(DBDIH, GSN_DIH_RESTARTREQ, signal,
2382 		   DihRestartReq::CheckLength);
2383 
2384     incompleteng = signal->theData[0];
2385     memcpy(signal->theData, save, sizeof(save));
2386 
2387     if (incompleteng != MAX_NDB_NODES)
2388     {
2389       jam();
2390       if (retVal == 1)
2391       {
2392 	jam();
2393 	goto incomplete_log;
2394       }
2395       else if (retVal == 2)
2396       {
2397 	if (elapsed != Uint32(~0) && elapsed <= partitionedTimeout)
2398 	{
2399 	  jam();
2400 	  goto missinglog;
2401 	}
2402 	else
2403 	{
2404 	  goto incomplete_log;
2405 	}
2406       }
2407       ndbabort();
2408     }
2409   }
2410   goto start_report;
2411 
2412 missinglog:
2413   signal->theData[1] = c_restartPartitionedTimeout == Uint32(~0) ? 4 : 5;
2414   signal->theData[2] =
2415     partitionedTimeout == Uint32(~0) ?
2416       Uint32(~0) : Uint32((partitionedTimeout - elapsed + 500) / 1000);
2417   infoEvent("partitionedTimeout = %llu, elapsed = %llu", partitionedTimeout, elapsed);
2418   report_mask.assign(c_definedNodes);
2419   report_mask.bitANDC(c_start.m_starting_nodes);
2420   retVal = 0;
2421   goto start_report;
2422 
2423 start_report:
2424   jam();
2425   {
2426     Uint32 sz = NdbNodeBitmask::Size;
2427     signal->theData[0] = NDB_LE_StartReport;
2428     signal->theData[3] = sz;
2429     Uint32* ptr = signal->theData+4;
2430     c_definedNodes.copyto(sz, ptr); ptr += sz;
2431     c_start.m_starting_nodes.copyto(sz, ptr); ptr += sz;
2432     c_start.m_skip_nodes.copyto(sz, ptr); ptr += sz;
2433     report_mask.copyto(sz, ptr); ptr+= sz;
2434     c_start.m_no_nodegroup_nodes.copyto(sz, ptr); ptr += sz;
2435     LinearSectionPtr lsptr[3];
2436     lsptr[0].p = signal->theData;
2437     lsptr[0].sz = 4 + 5 * NdbNodeBitmask::Size;
2438     sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 1, JBB, lsptr, 1);
2439   }
2440   return retVal;
2441 
2442 missing_nodegroup:
2443   jam();
2444   {
2445     const Uint32 extra = 100;
2446     char buf[2 * (NdbNodeBitmask::TextLength + 1) + extra];
2447     char mask1[NdbNodeBitmask::TextLength + 1];
2448     char mask2[NdbNodeBitmask::TextLength + 1];
2449     c_start.m_starting_nodes.getText(mask1);
2450     tmp.assign(c_start.m_starting_nodes);
2451     tmp.bitANDC(c_start.m_starting_nodes_w_log);
2452     tmp.getText(mask2);
2453     BaseString::snprintf(buf, sizeof(buf),
2454 			 "Unable to start missing node group! "
2455 			 " starting: %s (missing working fs for: %s)",
2456 			 mask1, mask2);
2457     CRASH_INSERTION(944);
2458     progError(__LINE__, NDBD_EXIT_INSUFFICENT_NODES, buf);
2459     return 0;                                     // Deadcode
2460   }
2461 
2462 incomplete_log:
2463   jam();
2464   {
2465     const Uint32 extra = 100;
2466     char buf[NdbNodeBitmask::TextLength + 1 + extra];
2467     char mask1[NdbNodeBitmask::TextLength + 1];
2468     c_start.m_starting_nodes.getText(mask1);
2469     BaseString::snprintf(buf, sizeof(buf),
2470 			 "Incomplete log for node group: %d! "
2471 			 " starting nodes: %s",
2472 			 incompleteng, mask1);
2473     CRASH_INSERTION(944);
2474     progError(__LINE__, NDBD_EXIT_INSUFFICENT_NODES, buf);
2475     return 0;                                     // Deadcode
2476   }
2477 }
2478 
2479 void
electionWon(Signal * signal)2480 Qmgr::electionWon(Signal* signal)
2481 {
2482   NodeRecPtr myNodePtr;
2483   cpresident = getOwnNodeId(); /* This node becomes president. */
2484   myNodePtr.i = getOwnNodeId();
2485   ptrCheckGuard(myNodePtr, MAX_NDB_NODES, nodeRec);
2486 
2487   myNodePtr.p->phase = ZRUNNING;
2488   DEB_STARTUP(("phase(%u) = ZRUNNING", myNodePtr.i));
2489   DEB_MULTI_TRP(("Node %u in ZRUNNING, electionWon", myNodePtr.i));
2490 
2491   cpdistref = reference();
2492   cneighbourl = ZNIL;
2493   cneighbourh = ZNIL;
2494   myNodePtr.p->ndynamicId = 1 | (myNodePtr.p->hbOrder << 16);
2495   c_maxDynamicId = 1;
2496   c_clusterNodes.clear();
2497   c_clusterNodes.set(getOwnNodeId());
2498 
2499   cpresidentAlive = ZTRUE;
2500   NdbTick_Invalidate(&c_start_election_time);
2501   c_start.reset();
2502 
2503   signal->theData[0] = NDB_LE_CM_REGCONF;
2504   signal->theData[1] = getOwnNodeId();
2505   signal->theData[2] = cpresident;
2506   signal->theData[3] = myNodePtr.p->ndynamicId;
2507   sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 4, JBB);
2508 
2509   c_start.m_starting_nodes.clear(getOwnNodeId());
2510   if (c_start.m_starting_nodes.isclear())
2511   {
2512     jam();
2513     sendSttorryLab(signal, true);
2514   }
2515 }
2516 
2517 /*
2518 4.4.11 CONTINUEB */
2519 /*--------------------------------------------------------------------------*/
2520 /*                                                                          */
2521 /*--------------------------------------------------------------------------*/
2522 /****************************>---------------------------------------------*/
2523 /* CONTINUEB                 >        SENDER: Own block, Own node          */
2524 /****************************>-------+INPUT : TCONTINUEB_TYPE              */
2525 /*--------------------------------------------------------------*/
regreqTimeLimitLab(Signal * signal)2526 void Qmgr::regreqTimeLimitLab(Signal* signal)
2527 {
2528   if(cpresident == ZNIL)
2529   {
2530     if (c_start.m_president_candidate == ZNIL)
2531     {
2532       jam();
2533       c_start.m_president_candidate = getOwnNodeId();
2534     }
2535 
2536     cmInfoconf010Lab(signal);
2537   }
2538 }//Qmgr::regreqTimelimitLab()
2539 
2540 /**---------------------------------------------------------------------------
2541  * The new node will take care of giving information about own node and ask
2542  * all other nodes for nodeinfo. The new node will use CM_NODEINFOREQ for
2543  * that purpose. When the setup of connections to all running, the president
2544  * will send a commit to all running nodes + the new node
2545  * INPUT: NODE_PTR1, must be set as ZNIL if we don't enter CONNECT_NODES)
2546  *                   from signal CM_NODEINFOCONF.
2547  *---------------------------------------------------------------------------*/
2548 /*******************************/
2549 /* CM_NODEINFOCONF            */
2550 /*******************************/
execCM_NODEINFOCONF(Signal * signal)2551 void Qmgr::execCM_NODEINFOCONF(Signal* signal)
2552 {
2553   DEBUG_START3(signal, "");
2554 
2555   jamEntry();
2556 
2557   CmNodeInfoConf * const conf = (CmNodeInfoConf*)signal->getDataPtr();
2558 
2559   const Uint32 nodeId = conf->nodeId;
2560   const Uint32 dynamicId = conf->dynamicId;
2561   const Uint32 version = conf->version;
2562   Uint32 mysql_version = conf->mysql_version;
2563   Uint32 lqh_workers = conf->lqh_workers;
2564 
2565   NodeRecPtr nodePtr;
2566   nodePtr.i = getOwnNodeId();
2567   ptrAss(nodePtr, nodeRec);
2568   ndbrequire(nodePtr.p->phase == ZSTARTING);
2569   ndbrequire(c_start.m_gsn == GSN_CM_NODEINFOREQ);
2570   c_start.m_nodes.clearWaitingFor(nodeId);
2571 
2572   /**
2573    * Update node info
2574    */
2575   NodeRecPtr replyNodePtr;
2576   replyNodePtr.i = nodeId;
2577   ptrCheckGuard(replyNodePtr, MAX_NDB_NODES, nodeRec);
2578   replyNodePtr.p->ndynamicId = dynamicId;
2579   replyNodePtr.p->blockRef = signal->getSendersBlockRef();
2580   setNodeInfo(replyNodePtr.i).m_version = version;
2581   setNodeInfo(replyNodePtr.i).m_mysql_version = mysql_version;
2582   setNodeInfo(replyNodePtr.i).m_lqh_workers = lqh_workers;
2583 
2584   recompute_version_info(NodeInfo::DB, version);
2585 
2586   if(!c_start.m_nodes.done()){
2587     jam();
2588     return;
2589   }
2590 
2591   /**********************************************<*/
2592   /* Send an ack. back to the president.          */
2593   /* CM_ACKADD                                    */
2594   /* The new node has been registered by all      */
2595   /* running nodes and has stored nodeinfo about  */
2596   /* all running nodes. The new node has to wait  */
2597   /* for CM_ADD (commit) from president to become */
2598   /* a running node in the cluster.               */
2599   /**********************************************<*/
2600   sendCmAckAdd(signal, getOwnNodeId(), CmAdd::Prepare);
2601   return;
2602 }//Qmgr::execCM_NODEINFOCONF()
2603 
2604 /**---------------------------------------------------------------------------
2605  * A new node sends nodeinfo about himself. The new node asks for
2606  * corresponding nodeinfo back in the  CM_NODEINFOCONF.
2607  *---------------------------------------------------------------------------*/
2608 /*******************************/
2609 /* CM_NODEINFOREQ             */
2610 /*******************************/
execCM_NODEINFOREQ(Signal * signal)2611 void Qmgr::execCM_NODEINFOREQ(Signal* signal)
2612 {
2613   jamEntry();
2614 
2615   const Uint32 Tblockref = signal->getSendersBlockRef();
2616 
2617   NodeRecPtr nodePtr;
2618   nodePtr.i = getOwnNodeId();
2619   ptrAss(nodePtr, nodeRec);
2620   if(nodePtr.p->phase != ZRUNNING){
2621     jam();
2622     signal->theData[0] = reference();
2623     signal->theData[1] = getOwnNodeId();
2624     signal->theData[2] = ZNOT_RUNNING;
2625     sendSignal(Tblockref, GSN_CM_NODEINFOREF, signal, 3, JBB);
2626     return;
2627   }
2628 
2629   NodeRecPtr addNodePtr;
2630   CmNodeInfoReq * const req = (CmNodeInfoReq*)signal->getDataPtr();
2631   addNodePtr.i = req->nodeId;
2632   ptrCheckGuard(addNodePtr, MAX_NDB_NODES, nodeRec);
2633   addNodePtr.p->ndynamicId = req->dynamicId;
2634   addNodePtr.p->blockRef = signal->getSendersBlockRef();
2635   setNodeInfo(addNodePtr.i).m_version = req->version;
2636 
2637   Uint32 mysql_version = req->mysql_version;
2638   setNodeInfo(addNodePtr.i).m_mysql_version = mysql_version;
2639 
2640   Uint32 lqh_workers = req->lqh_workers;
2641   setNodeInfo(addNodePtr.i).m_lqh_workers = lqh_workers;
2642 
2643   c_maxDynamicId = req->dynamicId & 0xFFFF;
2644 
2645   cmAddPrepare(signal, addNodePtr, nodePtr.p);
2646 }//Qmgr::execCM_NODEINFOREQ()
2647 
2648 void
cmAddPrepare(Signal * signal,NodeRecPtr nodePtr,const NodeRec * self)2649 Qmgr::cmAddPrepare(Signal* signal, NodeRecPtr nodePtr, const NodeRec * self){
2650   jam();
2651 
2652   switch(nodePtr.p->phase){
2653   case ZINIT:
2654     jam();
2655     nodePtr.p->phase = ZSTARTING;
2656     DEB_STARTUP(("2:phase(%u) = ZSTARTING", nodePtr.i));
2657     return;
2658   case ZFAIL_CLOSING:
2659     jam();
2660 
2661 #if 1
2662     warningEvent("Received request to incorporate node %u, "
2663 		 "while error handling has not yet completed",
2664 		 nodePtr.i);
2665 
2666     ndbrequire(getOwnNodeId() != cpresident);
2667     ndbrequire(signal->header.theVerId_signalNumber == GSN_CM_ADD);
2668     c_start.m_nodes.clearWaitingFor();
2669     c_start.m_nodes.setWaitingFor(nodePtr.i);
2670     c_start.m_gsn = GSN_CM_NODEINFOCONF;
2671 #else
2672     warningEvent("Enabling communication to CM_ADD node %u state=%d",
2673 		 nodePtr.i,
2674 		 nodePtr.p->phase);
2675     nodePtr.p->phase = ZSTARTING;
2676     nodePtr.p->failState = NORMAL;
2677     signal->theData[0] = 0;
2678     signal->theData[1] = nodePtr.i;
2679     sendSignal(TRPMAN_REF, GSN_OPEN_COMORD, signal, 2, JBB);
2680 #endif
2681     return;
2682   case ZSTARTING:
2683     break;
2684   case ZRUNNING:
2685     ndbabort();
2686   case ZPREPARE_FAIL:
2687     ndbabort();
2688   case ZAPI_ACTIVATION_ONGOING:
2689     ndbabort();
2690   case ZAPI_ACTIVE:
2691     ndbabort();
2692   case ZAPI_INACTIVE:
2693     ndbabort();
2694   }
2695 
2696   sendCmAckAdd(signal, nodePtr.i, CmAdd::Prepare);
2697   sendApiVersionRep(signal, nodePtr);
2698 
2699   /* President have prepared us */
2700   CmNodeInfoConf * conf = (CmNodeInfoConf*)signal->getDataPtrSend();
2701   conf->nodeId = getOwnNodeId();
2702   conf->dynamicId = self->ndynamicId;
2703   conf->version = getNodeInfo(getOwnNodeId()).m_version;
2704   conf->mysql_version = getNodeInfo(getOwnNodeId()).m_mysql_version;
2705   conf->lqh_workers = getNodeInfo(getOwnNodeId()).m_lqh_workers;
2706   sendSignal(nodePtr.p->blockRef, GSN_CM_NODEINFOCONF, signal,
2707 	     CmNodeInfoConf::SignalLength, JBB);
2708   DEBUG_START(GSN_CM_NODEINFOCONF, refToNode(nodePtr.p->blockRef), "");
2709 }
2710 
2711 void
sendApiVersionRep(Signal * signal,NodeRecPtr nodePtr)2712 Qmgr::sendApiVersionRep(Signal* signal, NodeRecPtr nodePtr)
2713 {
2714   {
2715     jam();
2716     Uint32 ref = calcQmgrBlockRef(nodePtr.i);
2717     for(Uint32 i = 1; i<MAX_NODES; i++)
2718     {
2719       jam();
2720       Uint32 version = getNodeInfo(i).m_version;
2721       Uint32 type = getNodeInfo(i).m_type;
2722       if (type != NodeInfo::DB && version)
2723       {
2724 	jam();
2725 	signal->theData[0] = i;
2726 	signal->theData[1] = version;
2727 	sendSignal(ref, GSN_NODE_VERSION_REP, signal, 2, JBB);
2728       }
2729     }
2730   }
2731 }
2732 
2733 void
sendCmAckAdd(Signal * signal,Uint32 nodeId,CmAdd::RequestType type)2734 Qmgr::sendCmAckAdd(Signal * signal, Uint32 nodeId, CmAdd::RequestType type){
2735 
2736   CmAckAdd * cmAckAdd = (CmAckAdd*)signal->getDataPtrSend();
2737   cmAckAdd->requestType = type;
2738   cmAckAdd->startingNodeId = nodeId;
2739   cmAckAdd->senderNodeId = getOwnNodeId();
2740   sendSignal(cpdistref, GSN_CM_ACKADD, signal, CmAckAdd::SignalLength, JBA);
2741   DEBUG_START(GSN_CM_ACKADD, cpresident, "");
2742 
2743   switch(type){
2744   case CmAdd::Prepare:
2745     return;
2746   case CmAdd::AddCommit:
2747   case CmAdd::CommitNew:
2748     break;
2749   }
2750 
2751   signal->theData[0] = nodeId;
2752   EXECUTE_DIRECT(NDBCNTR, GSN_CM_ADD_REP, signal, 1);
2753   jamEntry();
2754 }
2755 
2756 /*
2757 4.4.11 CM_ADD */
2758 /**--------------------------------------------------------------------------
2759  * Prepare a running node to add a new node to the cluster. The running node
2760  * will change phase of the new node fron ZINIT to ZWAITING. The running node
2761  * will also mark that we have received a prepare. When the new node has sent
2762  * us nodeinfo we can send an acknowledgement back to the president. When all
2763  * running nodes has acknowledged the new node, the president will send a
2764  * commit and we can change phase of the new node to ZRUNNING. The president
2765  * will also send CM_ADD to himself.
2766  *---------------------------------------------------------------------------*/
2767 /*******************************/
2768 /* CM_ADD                     */
2769 /*******************************/
execCM_ADD(Signal * signal)2770 void Qmgr::execCM_ADD(Signal* signal)
2771 {
2772   NodeRecPtr addNodePtr;
2773   jamEntry();
2774 
2775   NodeRecPtr nodePtr;
2776   nodePtr.i = getOwnNodeId();
2777   ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
2778 
2779   CRASH_INSERTION(940);
2780 
2781   CmAdd * const cmAdd = (CmAdd*)signal->getDataPtr();
2782   const CmAdd::RequestType type = (CmAdd::RequestType)cmAdd->requestType;
2783   addNodePtr.i = cmAdd->startingNodeId;
2784   //const Uint32 startingVersion = cmAdd->startingVersion;
2785   ptrCheckGuard(addNodePtr, MAX_NDB_NODES, nodeRec);
2786 
2787   DEBUG_START3(signal, type);
2788 
2789   if(nodePtr.p->phase == ZSTARTING){
2790     jam();
2791     /**
2792      * We are joining...
2793      */
2794     ndbrequire(addNodePtr.i == nodePtr.i);
2795     switch(type){
2796     case CmAdd::Prepare:
2797       ndbrequire(c_start.m_gsn == GSN_CM_NODEINFOREQ);
2798       /**
2799        * Wait for CM_NODEINFO_CONF
2800        */
2801       return;
2802     case CmAdd::CommitNew:
2803       /**
2804        * Tata. we're in the cluster
2805        */
2806       joinedCluster(signal, addNodePtr);
2807       return;
2808     case CmAdd::AddCommit:
2809       ndbabort();
2810     }
2811   }
2812 
2813   switch (type) {
2814   case CmAdd::Prepare:
2815     cmAddPrepare(signal, addNodePtr, nodePtr.p);
2816     break;
2817   case CmAdd::AddCommit:{
2818     jam();
2819     ndbrequire(addNodePtr.p->phase == ZSTARTING);
2820     addNodePtr.p->phase = ZRUNNING;
2821     DEB_STARTUP(("2:phase(%u) = ZRUNNING", addNodePtr.i));
2822     DEB_MULTI_TRP(("Node %u in ZRUNNING, AddCommit", addNodePtr.i));
2823     m_connectivity_check.reportNodeConnect(addNodePtr.i);
2824     set_hb_count(addNodePtr.i) = 0;
2825     c_clusterNodes.set(addNodePtr.i);
2826     findNeighbours(signal, __LINE__);
2827 
2828     /**
2829      * SEND A HEARTBEAT IMMEDIATELY TO DECREASE THE RISK THAT WE MISS EARLY
2830      * HEARTBEATS.
2831      */
2832     sendHeartbeat(signal);
2833     hb_send_timer.reset(NdbTick_getCurrentTicks());
2834 
2835     /**
2836      *  ENABLE COMMUNICATION WITH ALL BLOCKS WITH THE NEWLY ADDED NODE
2837      */
2838     EnableComReq *enableComReq = (EnableComReq *)signal->getDataPtrSend();
2839     enableComReq->m_senderRef = reference();
2840     enableComReq->m_senderData = ENABLE_COM_CM_ADD_COMMIT;
2841     enableComReq->m_enableNodeId = addNodePtr.i;
2842     sendSignal(TRPMAN_REF, GSN_ENABLE_COMREQ, signal,
2843                EnableComReq::SignalLength, JBB);
2844     break;
2845   }
2846   case CmAdd::CommitNew:
2847     jam();
2848     ndbabort();
2849   }
2850 
2851 }//Qmgr::execCM_ADD()
2852 
2853 void
handleEnableComAddCommit(Signal * signal,Uint32 node)2854 Qmgr::handleEnableComAddCommit(Signal *signal, Uint32 node)
2855 {
2856   sendCmAckAdd(signal, node, CmAdd::AddCommit);
2857   if(getOwnNodeId() != cpresident){
2858     jam();
2859     c_start.reset();
2860   }
2861 }
2862 
2863 void
execENABLE_COMCONF(Signal * signal)2864 Qmgr::execENABLE_COMCONF(Signal *signal)
2865 {
2866   const EnableComConf *enableComConf =
2867     (const EnableComConf *)signal->getDataPtr();
2868   Uint32 state = enableComConf->m_senderData;
2869   Uint32 node = enableComConf->m_enableNodeId;
2870 
2871   jamEntry();
2872 
2873   switch (state)
2874   {
2875     case ENABLE_COM_CM_ADD_COMMIT:
2876       jam();
2877       /* Only exactly one node possible here. */
2878       handleEnableComAddCommit(signal, node);
2879       break;
2880 
2881     case ENABLE_COM_CM_COMMIT_NEW:
2882       jam();
2883       handleEnableComCommitNew(signal);
2884       break;
2885 
2886     case ENABLE_COM_API_REGREQ:
2887       jam();
2888       /* Only exactly one node possible here. */
2889       handleEnableComApiRegreq(signal, node);
2890       break;
2891 
2892     default:
2893       jam();
2894       ndbabort();
2895   }
2896 }
2897 
2898 void
joinedCluster(Signal * signal,NodeRecPtr nodePtr)2899 Qmgr::joinedCluster(Signal* signal, NodeRecPtr nodePtr){
2900   /**
2901    * WE HAVE BEEN INCLUDED IN THE CLUSTER WE CAN START BEING PART OF THE
2902    * HEARTBEAT PROTOCOL AND WE WILL ALSO ENABLE COMMUNICATION WITH ALL
2903    * NODES IN THE CLUSTER.
2904    */
2905   DEB_MULTI_TRP(("Node %u in ZRUNNING, AddCommit", nodePtr.i));
2906   nodePtr.p->phase = ZRUNNING;
2907   DEB_STARTUP(("3:phase(%u) = ZRUNNING", nodePtr.i));
2908   set_hb_count(nodePtr.i) = 0;
2909   findNeighbours(signal, __LINE__);
2910   c_clusterNodes.set(nodePtr.i);
2911   c_start.reset();
2912 
2913   /**
2914    * SEND A HEARTBEAT IMMEDIATELY TO DECREASE THE RISK
2915    * THAT WE MISS EARLY HEARTBEATS.
2916    */
2917   sendHeartbeat(signal);
2918   hb_send_timer.reset(NdbTick_getCurrentTicks());
2919 
2920   /**
2921    * ENABLE COMMUNICATION WITH ALL BLOCKS IN THE CURRENT CLUSTER AND SET
2922    * THE NODES IN THE CLUSTER TO BE RUNNING.
2923    */
2924   EnableComReq *enableComReq = (EnableComReq *)signal->getDataPtrSend();
2925   enableComReq->m_senderRef = reference();
2926   enableComReq->m_senderData = ENABLE_COM_CM_COMMIT_NEW;
2927   enableComReq->m_enableNodeId = 0;
2928   enableComReq->m_nodeIds.clear();
2929   jam();
2930   for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
2931     ptrAss(nodePtr, nodeRec);
2932     if ((nodePtr.p->phase == ZRUNNING) && (nodePtr.i != getOwnNodeId())) {
2933       /*-------------------------------------------------------------------*/
2934       // Enable full communication to all other nodes. Not really necessary
2935       // to open communication to ourself.
2936       /*-------------------------------------------------------------------*/
2937       jamLine(nodePtr.i);
2938       enableComReq->m_nodeIds.set(nodePtr.i);
2939     }//if
2940   }//for
2941 
2942   if (!enableComReq->m_nodeIds.isclear())
2943   {
2944     jam();
2945     LinearSectionPtr lsptr[3];
2946     lsptr[0].p = enableComReq->m_nodeIds.rep.data;
2947     lsptr[0].sz = enableComReq->m_nodeIds.getPackedLengthInWords();
2948     sendSignal(TRPMAN_REF,
2949                GSN_ENABLE_COMREQ,
2950                signal,
2951                EnableComReq::SignalLength,
2952                JBB,
2953                lsptr,
2954                1);
2955   }
2956   else
2957   {
2958     handleEnableComCommitNew(signal);
2959   }
2960 }
2961 
2962 void
handleEnableComCommitNew(Signal * signal)2963 Qmgr::handleEnableComCommitNew(Signal *signal)
2964 {
2965   sendSttorryLab(signal, true);
2966 
2967   sendCmAckAdd(signal, getOwnNodeId(), CmAdd::CommitNew);
2968 }
2969 
2970 /*  4.10.7 CM_ACKADD        - PRESIDENT IS RECEIVER -       */
2971 /*---------------------------------------------------------------------------*/
2972 /* Entry point for an ack add signal.
2973  * The TTYPE defines if it is a prepare or a commit.                         */
2974 /*---------------------------------------------------------------------------*/
execCM_ACKADD(Signal * signal)2975 void Qmgr::execCM_ACKADD(Signal* signal)
2976 {
2977   NodeRecPtr addNodePtr;
2978   NodeRecPtr senderNodePtr;
2979   jamEntry();
2980 
2981   CmAckAdd * const cmAckAdd = (CmAckAdd*)signal->getDataPtr();
2982   const CmAdd::RequestType type = (CmAdd::RequestType)cmAckAdd->requestType;
2983   addNodePtr.i = cmAckAdd->startingNodeId;
2984   senderNodePtr.i = cmAckAdd->senderNodeId;
2985 
2986   DEBUG_START3(signal, type);
2987 
2988   if (cpresident != getOwnNodeId()) {
2989     jam();
2990     /*-----------------------------------------------------------------------*/
2991     /* IF WE ARE NOT PRESIDENT THEN WE SHOULD NOT RECEIVE THIS MESSAGE.      */
2992     /*------------------------------------------------------------_----------*/
2993     warningEvent("Received CM_ACKADD from %d president=%d",
2994 		 senderNodePtr.i, cpresident);
2995     return;
2996   }//if
2997 
2998   if (addNodePtr.i != c_start.m_startNode) {
2999     jam();
3000     /*----------------------------------------------------------------------*/
3001     /* THIS IS NOT THE STARTING NODE. WE ARE ACTIVE NOW WITH ANOTHER START. */
3002     /*----------------------------------------------------------------------*/
3003     warningEvent("Received CM_ACKADD from %d with startNode=%d != own %d",
3004 		 senderNodePtr.i, addNodePtr.i, c_start.m_startNode);
3005     return;
3006   }//if
3007 
3008   ndbrequire(c_start.m_gsn == GSN_CM_ADD);
3009   c_start.m_nodes.clearWaitingFor(senderNodePtr.i);
3010   if(!c_start.m_nodes.done()){
3011     jam();
3012     return;
3013   }
3014 
3015   switch (type) {
3016   case CmAdd::Prepare:{
3017     jam();
3018 
3019     /*----------------------------------------------------------------------*/
3020     /* ALL RUNNING NODES HAVE PREPARED THE INCLUSION OF THIS NEW NODE.      */
3021     /*----------------------------------------------------------------------*/
3022     c_start.m_gsn = GSN_CM_ADD;
3023     c_start.m_nodes = c_clusterNodes;
3024 
3025     CmAdd * const cmAdd = (CmAdd*)signal->getDataPtrSend();
3026     cmAdd->requestType = CmAdd::AddCommit;
3027     cmAdd->startingNodeId = addNodePtr.i;
3028     cmAdd->startingVersion = getNodeInfo(addNodePtr.i).m_version;
3029     cmAdd->startingMysqlVersion = getNodeInfo(addNodePtr.i).m_mysql_version;
3030     NodeReceiverGroup rg(QMGR, c_clusterNodes);
3031     sendSignal(rg, GSN_CM_ADD, signal, CmAdd::SignalLength, JBA);
3032     DEBUG_START2(GSN_CM_ADD, rg, "AddCommit");
3033     return;
3034   }
3035   case CmAdd::AddCommit:{
3036     jam();
3037 
3038     /****************************************/
3039     /* Send commit to the new node so he    */
3040     /* will change PHASE into ZRUNNING      */
3041     /****************************************/
3042     c_start.m_gsn = GSN_CM_ADD;
3043     c_start.m_nodes.clearWaitingFor();
3044     c_start.m_nodes.setWaitingFor(addNodePtr.i);
3045 
3046     CmAdd * const cmAdd = (CmAdd*)signal->getDataPtrSend();
3047     cmAdd->requestType = CmAdd::CommitNew;
3048     cmAdd->startingNodeId = addNodePtr.i;
3049     cmAdd->startingVersion = getNodeInfo(addNodePtr.i).m_version;
3050     cmAdd->startingMysqlVersion = getNodeInfo(addNodePtr.i).m_mysql_version;
3051     sendSignal(calcQmgrBlockRef(addNodePtr.i), GSN_CM_ADD, signal,
3052 	       CmAdd::SignalLength, JBA);
3053     DEBUG_START(GSN_CM_ADD, addNodePtr.i, "CommitNew");
3054     /**
3055      * Report to DBDIH that a node have been added to the nodes included
3056      * in the heartbeat protocol.
3057      */
3058     InclNodeHBProtocolRep *rep = (InclNodeHBProtocolRep*)signal->getDataPtrSend();
3059     rep->nodeId = addNodePtr.i;
3060     EXECUTE_DIRECT(DBDIH, GSN_INCL_NODE_HB_PROTOCOL_REP, signal,
3061                    InclNodeHBProtocolRep::SignalLength);
3062     return;
3063   }
3064   case CmAdd::CommitNew:
3065     jam();
3066     /**
3067      * Tell arbitration about new node.
3068      */
3069     handleArbitNdbAdd(signal, addNodePtr.i);
3070     c_start.reset();
3071 
3072     if (c_start.m_starting_nodes.get(addNodePtr.i))
3073     {
3074       jam();
3075       c_start.m_starting_nodes.clear(addNodePtr.i);
3076       if (c_start.m_starting_nodes.isclear())
3077       {
3078 	jam();
3079 	sendSttorryLab(signal, true);
3080       }
3081     }
3082     return;
3083   }//switch
3084   ndbabort();
3085 }//Qmgr::execCM_ACKADD()
3086 
3087 /**-------------------------------------------------------------------------
3088  * WE HAVE BEEN INCLUDED INTO THE CLUSTER. IT IS NOW TIME TO CALCULATE WHICH
3089  * ARE OUR LEFT AND RIGHT NEIGHBOURS FOR THE HEARTBEAT PROTOCOL.
3090  *--------------------------------------------------------------------------*/
findNeighbours(Signal * signal,Uint32 from)3091 void Qmgr::findNeighbours(Signal* signal, Uint32 from)
3092 {
3093   UintR toldLeftNeighbour;
3094   UintR tfnLeftFound;
3095   UintR tfnMaxFound;
3096   UintR tfnMinFound;
3097   UintR tfnRightFound;
3098   NodeRecPtr fnNodePtr;
3099   NodeRecPtr fnOwnNodePtr;
3100 
3101   Uint32 toldRightNeighbour = cneighbourh;
3102   toldLeftNeighbour = cneighbourl;
3103   tfnLeftFound = 0;
3104   tfnMaxFound = 0;
3105   tfnMinFound = (UintR)-1;
3106   tfnRightFound = (UintR)-1;
3107   fnOwnNodePtr.i = getOwnNodeId();
3108   ptrCheckGuard(fnOwnNodePtr, MAX_NDB_NODES, nodeRec);
3109   for (fnNodePtr.i = 1; fnNodePtr.i < MAX_NDB_NODES; fnNodePtr.i++) {
3110     ptrAss(fnNodePtr, nodeRec);
3111     if (fnNodePtr.i != fnOwnNodePtr.i) {
3112       jamLine(fnNodePtr.i);
3113       if (fnNodePtr.p->phase == ZRUNNING) {
3114         if (tfnMinFound > fnNodePtr.p->ndynamicId) {
3115           jam();
3116           tfnMinFound = fnNodePtr.p->ndynamicId;
3117         }//if
3118         if (tfnMaxFound < fnNodePtr.p->ndynamicId) {
3119           jam();
3120           tfnMaxFound = fnNodePtr.p->ndynamicId;
3121         }//if
3122         if (fnOwnNodePtr.p->ndynamicId > fnNodePtr.p->ndynamicId) {
3123           jam();
3124           if (fnNodePtr.p->ndynamicId > tfnLeftFound) {
3125             jam();
3126             tfnLeftFound = fnNodePtr.p->ndynamicId;
3127           }//if
3128         } else {
3129           jam();
3130           if (fnNodePtr.p->ndynamicId < tfnRightFound) {
3131             jam();
3132             tfnRightFound = fnNodePtr.p->ndynamicId;
3133           }//if
3134         }//if
3135       }//if
3136     }//if
3137   }//for
3138   if (tfnLeftFound == 0) {
3139     if (tfnMinFound == (UintR)-1) {
3140       jam();
3141       cneighbourl = ZNIL;
3142     } else {
3143       jam();
3144       cneighbourl = translateDynamicIdToNodeId(signal, tfnMaxFound);
3145     }//if
3146   } else {
3147     jam();
3148     cneighbourl = translateDynamicIdToNodeId(signal, tfnLeftFound);
3149   }//if
3150   if (tfnRightFound == (UintR)-1) {
3151     if (tfnMaxFound == 0) {
3152       jam();
3153       cneighbourh = ZNIL;
3154     } else {
3155       jam();
3156       cneighbourh = translateDynamicIdToNodeId(signal, tfnMinFound);
3157     }//if
3158   } else {
3159     jam();
3160     cneighbourh = translateDynamicIdToNodeId(signal, tfnRightFound);
3161   }//if
3162   if (toldLeftNeighbour != cneighbourl) {
3163     jam();
3164     if (cneighbourl != ZNIL) {
3165       jam();
3166       /**-------------------------------------------------------------------*/
3167       /* WE ARE SUPERVISING A NEW LEFT NEIGHBOUR. WE START WITH ALARM COUNT
3168        * EQUAL TO ZERO.
3169        *---------------------------------------------------------------------*/
3170       fnNodePtr.i = cneighbourl;
3171       ptrCheckGuard(fnNodePtr, MAX_NDB_NODES, nodeRec);
3172       set_hb_count(fnNodePtr.i) = 0;
3173     }//if
3174   }//if
3175 
3176   signal->theData[0] = NDB_LE_FIND_NEIGHBOURS;
3177   signal->theData[1] = getOwnNodeId();
3178   signal->theData[2] = cneighbourl;
3179   signal->theData[3] = cneighbourh;
3180   signal->theData[4] = fnOwnNodePtr.p->ndynamicId;
3181   UintR Tlen = 5;
3182   sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, Tlen, JBB);
3183   g_eventLogger->info("findNeighbours from: %u old (left: %u right: %u) new (%u %u)",
3184                       from,
3185                       toldLeftNeighbour,
3186                       toldRightNeighbour,
3187                       cneighbourl,
3188                       cneighbourh);
3189 }//Qmgr::findNeighbours()
3190 
3191 /*
3192 4.10.7 INIT_DATA        */
3193 /*---------------------------------------------------------------------------*/
3194 /*---------------------------------------------------------------------------*/
initData(Signal * signal)3195 void Qmgr::initData(Signal* signal)
3196 {
3197   // catch-all for missing initializations
3198   memset(&arbitRec, 0, sizeof(arbitRec));
3199 
3200   /**
3201    * Timeouts
3202    */
3203   const ndb_mgm_configuration_iterator * p =
3204     m_ctx.m_config.getOwnConfigIterator();
3205   ndbrequire(p != 0);
3206 
3207   Uint32 hbDBDB = 1500;
3208   Uint32 arbitTimeout = 1000;
3209   Uint32 arbitMethod = ARBIT_METHOD_DEFAULT;
3210   Uint32 ccInterval = 0;
3211   c_restartPartialTimeout = 30000;
3212   c_restartPartitionedTimeout = Uint32(~0);
3213   c_restartFailureTimeout = Uint32(~0);
3214   c_restartNoNodegroupTimeout = 15000;
3215   ndb_mgm_get_int_parameter(p, CFG_DB_HEARTBEAT_INTERVAL, &hbDBDB);
3216   ndb_mgm_get_int_parameter(p, CFG_DB_ARBIT_TIMEOUT, &arbitTimeout);
3217   ndb_mgm_get_int_parameter(p, CFG_DB_ARBIT_METHOD, &arbitMethod);
3218   ndb_mgm_get_int_parameter(p, CFG_DB_START_PARTIAL_TIMEOUT,
3219 			    &c_restartPartialTimeout);
3220   ndb_mgm_get_int_parameter(p, CFG_DB_START_PARTITION_TIMEOUT,
3221 			    &c_restartPartitionedTimeout);
3222   ndb_mgm_get_int_parameter(p, CFG_DB_START_NO_NODEGROUP_TIMEOUT,
3223 			    &c_restartNoNodegroupTimeout);
3224   ndb_mgm_get_int_parameter(p, CFG_DB_START_FAILURE_TIMEOUT,
3225 			    &c_restartFailureTimeout);
3226   ndb_mgm_get_int_parameter(p, CFG_DB_CONNECT_CHECK_DELAY,
3227                             &ccInterval);
3228 
3229   if(c_restartPartialTimeout == 0)
3230   {
3231     c_restartPartialTimeout = Uint32(~0);
3232   }
3233 
3234   if (c_restartPartitionedTimeout == 0)
3235   {
3236     c_restartPartitionedTimeout = Uint32(~0);
3237   }
3238 
3239   if (c_restartFailureTimeout == 0)
3240   {
3241     c_restartFailureTimeout = Uint32(~0);
3242   }
3243 
3244   if (c_restartNoNodegroupTimeout == 0)
3245   {
3246     c_restartNoNodegroupTimeout = Uint32(~0);
3247   }
3248 
3249   setHbDelay(hbDBDB);
3250   setCCDelay(ccInterval);
3251   setArbitTimeout(arbitTimeout);
3252 
3253   arbitRec.method = (ArbitRec::Method)arbitMethod;
3254   arbitRec.state = ARBIT_NULL;          // start state for all nodes
3255   DEB_ARBIT(("Arbit state = ARBIT_INIT init"));
3256   arbitRec.apiMask[0].clear();          // prepare for ARBIT_CFG
3257 
3258   Uint32 sum = 0;
3259   ArbitSignalData* const sd = (ArbitSignalData*)&signal->theData[0];
3260   for (unsigned rank = 1; rank <= 2; rank++) {
3261     sd->sender = getOwnNodeId();
3262     sd->code = rank;
3263     sd->node = 0;
3264     sd->ticket.clear();
3265     sd->mask.clear();
3266     ndb_mgm_configuration_iterator * iter =
3267       m_ctx.m_config.getClusterConfigIterator();
3268     for (ndb_mgm_first(iter); ndb_mgm_valid(iter); ndb_mgm_next(iter)) {
3269       Uint32 tmp = 0;
3270       if (ndb_mgm_get_int_parameter(iter, CFG_NODE_ARBIT_RANK, &tmp) == 0 &&
3271 	  tmp == rank){
3272 	Uint32 nodeId = 0;
3273 	ndbrequire(!ndb_mgm_get_int_parameter(iter, CFG_NODE_ID, &nodeId));
3274 	sd->mask.set(nodeId);
3275       }
3276     }
3277     sum += sd->mask.count();
3278     execARBIT_CFG(signal);
3279   }
3280 
3281   if (arbitRec.method == ArbitRec::METHOD_DEFAULT &&
3282       sum == 0)
3283   {
3284     jam();
3285     infoEvent("Arbitration disabled, all API nodes have rank 0");
3286     arbitRec.method = ArbitRec::DISABLED;
3287   }
3288 
3289   setNodeInfo(getOwnNodeId()).m_mysql_version = NDB_MYSQL_VERSION_D;
3290 
3291   ndb_mgm_configuration_iterator * iter =
3292     m_ctx.m_config.getClusterConfigIterator();
3293   for (ndb_mgm_first(iter); ndb_mgm_valid(iter); ndb_mgm_next(iter))
3294   {
3295     jam();
3296     Uint32 nodeId = 0;
3297     if (ndb_mgm_get_int_parameter(iter, CFG_NODE_ID, &nodeId) == 0)
3298     {
3299       jam();
3300       if (nodeId < MAX_NDB_NODES && getNodeInfo(nodeId).m_type == NodeInfo::DB)
3301       {
3302         Uint32 hbOrder = 0;
3303         ndb_mgm_get_int_parameter(iter, CFG_DB_HB_ORDER, &hbOrder);
3304 
3305         NodeRecPtr nodePtr;
3306         nodePtr.i = nodeId;
3307         ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
3308         nodePtr.p->hbOrder = hbOrder;
3309       }
3310     }
3311   }
3312   int hb_order_error = check_hb_order_config();
3313   if (hb_order_error == -1)
3314   {
3315     char msg[] = "Illegal HeartbeatOrder config, "
3316                  "all nodes must have non-zero config value";
3317     progError(__LINE__, NDBD_EXIT_INVALID_CONFIG, msg);
3318     return;
3319   }
3320   if (hb_order_error == -2)
3321   {
3322     char msg[] = "Illegal HeartbeatOrder config, "
3323                  "the nodes must have distinct config values";
3324     progError(__LINE__, NDBD_EXIT_INVALID_CONFIG, msg);
3325     return;
3326   }
3327   ndbrequire(hb_order_error == 0);
3328 }//Qmgr::initData()
3329 
3330 
3331 /**---------------------------------------------------------------------------
3332  * HERE WE RECEIVE THE JOB TABLE SIGNAL EVERY 10 MILLISECONDS.
3333  * WE WILL USE THIS TO CHECK IF IT IS TIME TO CHECK THE NEIGHBOUR NODE.
3334  * WE WILL ALSO SEND A SIGNAL TO BLOCKS THAT NEED A TIME SIGNAL AND
3335  * DO NOT WANT TO USE JOB TABLE SIGNALS.
3336  *---------------------------------------------------------------------------*/
timerHandlingLab(Signal * signal)3337 void Qmgr::timerHandlingLab(Signal* signal)
3338 {
3339   const NDB_TICKS TcurrentTime = NdbTick_getCurrentTicks();
3340   NodeRecPtr myNodePtr;
3341   myNodePtr.i = getOwnNodeId();
3342   ptrCheckGuard(myNodePtr, MAX_NDB_NODES, nodeRec);
3343 
3344   const Uint32 sentHi = signal->theData[1];
3345   const Uint32 sentLo = signal->theData[2];
3346   const NDB_TICKS sent((Uint64(sentHi) << 32) | sentLo);
3347   bool send_hb_always = false;
3348 
3349   if (NdbTick_Compare(sent,TcurrentTime) > 0)
3350   {
3351     jam();
3352     const Uint64 backwards = NdbTick_Elapsed(TcurrentTime,sent).milliSec();
3353     if (backwards > 0) //Ignore sub millisecond backticks
3354     {
3355       g_eventLogger->warning("timerHandlingLab, clock ticked backwards: %llu (ms)",
3356                               backwards);
3357       send_hb_always = true;
3358     }
3359   }
3360   else
3361   {
3362     const Uint64 elapsed = NdbTick_Elapsed(sent,TcurrentTime).milliSec();
3363     if (elapsed >= 150)
3364     {
3365       struct ndb_rusage curr_rusage;
3366       jam();
3367       send_hb_always = true;
3368       bool rusage_worked = true;
3369       Uint64 exec_time = 0;
3370       Uint64 sys_time = 0;
3371       Ndb_GetRUsage(&curr_rusage, false);
3372       if ((curr_rusage.ru_utime == 0 &&
3373            curr_rusage.ru_stime == 0) ||
3374           (m_timer_handling_rusage.ru_utime == 0 &&
3375            m_timer_handling_rusage.ru_stime == 0))
3376       {
3377         jam();
3378         rusage_worked = false;
3379       }
3380       if (rusage_worked)
3381       {
3382         exec_time = curr_rusage.ru_utime -
3383                     m_timer_handling_rusage.ru_utime;
3384         sys_time = curr_rusage.ru_stime -
3385                     m_timer_handling_rusage.ru_stime;
3386       }
3387 
3388       if (elapsed >= 1000)
3389       {
3390         if (rusage_worked)
3391         {
3392           g_eventLogger->warning("timerHandlingLab, expected 10ms sleep"
3393                                  ", not scheduled for: %d (ms), "
3394                                  "exec_time %llu us, sys_time %llu us",
3395                                  int(elapsed),
3396                                  exec_time,
3397                                  sys_time);
3398         }
3399         else
3400         {
3401           g_eventLogger->warning("timerHandlingLab, expected 10ms sleep"
3402                               ", not scheduled for: %d (ms)", int(elapsed));
3403         }
3404       }
3405       else
3406       {
3407         if (rusage_worked)
3408         {
3409           g_eventLogger->info("timerHandlingLab, expected 10ms sleep"
3410                               ", not scheduled for: %d (ms), "
3411                               "exec_time %llu us, sys_time %llu us",
3412                               int(elapsed),
3413                               exec_time,
3414                               sys_time);
3415         }
3416         else
3417         {
3418           g_eventLogger->info("timerHandlingLab, expected 10ms sleep"
3419                               ", not scheduled for: %d (ms)", int(elapsed));
3420         }
3421       }
3422     }
3423   }
3424 
3425   if (myNodePtr.p->phase == ZRUNNING) {
3426     jam();
3427     /**---------------------------------------------------------------------
3428      * WE ARE ONLY PART OF HEARTBEAT CLUSTER IF WE ARE UP AND RUNNING.
3429      *---------------------------------------------------------------------*/
3430     if (hb_send_timer.check(TcurrentTime) || send_hb_always)
3431     {
3432       /**
3433        * We send heartbeats once per heartbeat interval and 4 missed heartbeat
3434        * intervals will cause a failure. If QMGR is not so responsive we're
3435        * having some sort of overload issue. In this case we will always take
3436        * the chance to send heartbeats immediately to avoid risking heartbeat
3437        * failures (send_hb_always == true).
3438        *
3439        * Delaying checks of heartbeat timers is much less of a problem.
3440        */
3441       jam();
3442       sendHeartbeat(signal);
3443       hb_send_timer.reset(TcurrentTime);
3444     }
3445     if (likely(! m_connectivity_check.m_active))
3446     {
3447       if (hb_check_timer.check(TcurrentTime)) {
3448         jam();
3449         checkHeartbeat(signal);
3450         hb_check_timer.reset(TcurrentTime);
3451       }
3452     }
3453     else
3454     {
3455       /* Connectivity check */
3456       if (m_connectivity_check.m_timer.check(TcurrentTime)) {
3457         jam();
3458         checkConnectivityTimeSignal(signal);
3459         m_connectivity_check.m_timer.reset(TcurrentTime);
3460       }
3461     }
3462   }
3463 
3464   if (interface_check_timer.check(TcurrentTime)) {
3465     jam();
3466     interface_check_timer.reset(TcurrentTime);
3467     checkStartInterface(signal, TcurrentTime);
3468   }
3469 
3470   if (hb_api_timer.check(TcurrentTime))
3471   {
3472     jam();
3473     hb_api_timer.reset(TcurrentTime);
3474     apiHbHandlingLab(signal, TcurrentTime);
3475   }
3476 
3477   Ndb_GetRUsage(&m_timer_handling_rusage, false);
3478 
3479   //--------------------------------------------------
3480   // Resend this signal with 10 milliseconds delay.
3481   //--------------------------------------------------
3482   signal->theData[0] = ZTIMER_HANDLING;
3483   signal->theData[1] = Uint32(TcurrentTime.getUint64() >> 32);
3484   signal->theData[2] = Uint32(TcurrentTime.getUint64());
3485   sendSignalWithDelay(QMGR_REF, GSN_CONTINUEB, signal, 10, 3);
3486   return;
3487 }//Qmgr::timerHandlingLab()
3488 
3489 /*---------------------------------------------------------------------------*/
3490 /*       THIS MODULE HANDLES THE SENDING AND RECEIVING OF HEARTBEATS.        */
3491 /*---------------------------------------------------------------------------*/
sendHeartbeat(Signal * signal)3492 void Qmgr::sendHeartbeat(Signal* signal)
3493 {
3494   NodeRecPtr localNodePtr;
3495   localNodePtr.i = cneighbourh;
3496   if (localNodePtr.i == ZNIL) {
3497     jam();
3498     /**---------------------------------------------------------------------
3499      * THERE ARE NO NEIGHBOURS. THIS IS POSSIBLE IF WE ARE THE ONLY NODE IN
3500      * THE CLUSTER.IN THIS CASE WE DO NOT NEED TO SEND ANY HEARTBEAT SIGNALS.
3501      *-----------------------------------------------------------------------*/
3502     return;
3503   }//if
3504 
3505   if(ERROR_INSERTED(946))
3506   {
3507     sleep(180);
3508     return;
3509   }
3510 
3511   ptrCheckGuard(localNodePtr, MAX_NDB_NODES, nodeRec);
3512   signal->theData[0] = getOwnNodeId();
3513 
3514   sendSignal(localNodePtr.p->blockRef, GSN_CM_HEARTBEAT, signal, 1, JBA);
3515 #ifdef VM_TRACE
3516   signal->theData[0] = NDB_LE_SentHeartbeat;
3517   signal->theData[1] = localNodePtr.i;
3518   sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB);
3519 #endif
3520 }//Qmgr::sendHeartbeat()
3521 
checkHeartbeat(Signal * signal)3522 void Qmgr::checkHeartbeat(Signal* signal)
3523 {
3524   NodeRecPtr nodePtr;
3525 
3526   nodePtr.i = cneighbourl;
3527   if (nodePtr.i == ZNIL) {
3528     jam();
3529     /**---------------------------------------------------------------------
3530      * THERE ARE NO NEIGHBOURS. THIS IS POSSIBLE IF WE ARE THE ONLY NODE IN
3531      * THE CLUSTER. IN THIS CASE WE DO NOT NEED TO CHECK ANY HEARTBEATS.
3532      *-----------------------------------------------------------------------*/
3533     return;
3534   }//if
3535   ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
3536 
3537   set_hb_count(nodePtr.i)++;
3538   ndbrequire(nodePtr.p->phase == ZRUNNING);
3539   ndbrequire(getNodeInfo(nodePtr.i).m_type == NodeInfo::DB);
3540 
3541   if (get_hb_count(nodePtr.i) > 2)
3542   {
3543     signal->theData[0] = NDB_LE_MissedHeartbeat;
3544     signal->theData[1] = nodePtr.i;
3545     signal->theData[2] = get_hb_count(nodePtr.i) - 1;
3546     sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 3, JBB);
3547   }
3548 
3549   if (get_hb_count(nodePtr.i) > 4)
3550   {
3551     jam();
3552     if (m_connectivity_check.getEnabled())
3553     {
3554       jam();
3555       /* Start connectivity check, indicating the cause */
3556       startConnectivityCheck(signal, FailRep::ZHEARTBEAT_FAILURE, nodePtr.i);
3557       return;
3558     }
3559     else
3560     {
3561       /**----------------------------------------------------------------------
3562        * OUR LEFT NEIGHBOUR HAVE KEPT QUIET FOR THREE CONSECUTIVE HEARTBEAT
3563        * PERIODS. THUS WE DECLARE HIM DOWN.
3564        *----------------------------------------------------------------------*/
3565       signal->theData[0] = NDB_LE_DeadDueToHeartbeat;
3566       signal->theData[1] = nodePtr.i;
3567       sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB);
3568 
3569       failReportLab(signal, nodePtr.i, FailRep::ZHEARTBEAT_FAILURE, getOwnNodeId());
3570       return;
3571     }
3572   }//if
3573 }//Qmgr::checkHeartbeat()
3574 
apiHbHandlingLab(Signal * signal,NDB_TICKS now)3575 void Qmgr::apiHbHandlingLab(Signal* signal, NDB_TICKS now)
3576 {
3577   NodeRecPtr TnodePtr;
3578 
3579   jam();
3580   for (TnodePtr.i = 1; TnodePtr.i < MAX_NODES; TnodePtr.i++) {
3581     const Uint32 nodeId = TnodePtr.i;
3582     ptrAss(TnodePtr, nodeRec);
3583 
3584     const NodeInfo::NodeType type = getNodeInfo(nodeId).getType();
3585     if(type == NodeInfo::DB)
3586       continue;
3587 
3588     if(type == NodeInfo::INVALID)
3589       continue;
3590 
3591     if (c_connectedNodes.get(nodeId))
3592     {
3593       jamLine(nodeId);
3594       set_hb_count(TnodePtr.i)++;
3595 
3596       if (get_hb_count(TnodePtr.i) > 2)
3597       {
3598 	signal->theData[0] = NDB_LE_MissedHeartbeat;
3599 	signal->theData[1] = nodeId;
3600 	signal->theData[2] = get_hb_count(TnodePtr.i) - 1;
3601 	sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 3, JBB);
3602       }
3603 
3604       if (get_hb_count(TnodePtr.i) > 4)
3605       {
3606         jam();
3607 	/*------------------------------------------------------------------*/
3608 	/* THE API NODE HAS NOT SENT ANY HEARTBEAT FOR THREE SECONDS.
3609 	 * WE WILL DISCONNECT FROM IT NOW.
3610 	 *------------------------------------------------------------------*/
3611 	/*------------------------------------------------------------------*/
3612 	/* We call node_failed to release all connections for this api node */
3613 	/*------------------------------------------------------------------*/
3614 	signal->theData[0] = NDB_LE_DeadDueToHeartbeat;
3615 	signal->theData[1] = nodeId;
3616 	sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB);
3617 
3618         api_failed(signal, nodeId);
3619       }//if
3620     }//if
3621     else if (TnodePtr.p->phase == ZAPI_INACTIVE &&
3622              TnodePtr.p->m_secret != 0 &&
3623              NdbTick_Compare(now,TnodePtr.p->m_alloc_timeout) > 0)
3624     {
3625       jam();
3626       TnodePtr.p->m_secret = 0;
3627       warningEvent("Releasing node id allocation for node %u",
3628                    TnodePtr.i);
3629     }
3630   }//for
3631   return;
3632 }//Qmgr::apiHbHandlingLab()
3633 
checkStartInterface(Signal * signal,NDB_TICKS now)3634 void Qmgr::checkStartInterface(Signal* signal, NDB_TICKS now)
3635 {
3636   NodeRecPtr nodePtr;
3637   /*------------------------------------------------------------------------*/
3638   // This method is called once per second. After a disconnect we wait at
3639   // least three seconds before allowing new connects. We will also ensure
3640   // that handling of the failure is completed before we allow new connections.
3641   /*------------------------------------------------------------------------*/
3642   jam();
3643   for (nodePtr.i = 1; nodePtr.i < MAX_NODES; nodePtr.i++) {
3644     ptrAss(nodePtr, nodeRec);
3645     Uint32 type = getNodeInfo(nodePtr.i).m_type;
3646     if (nodePtr.p->phase == ZFAIL_CLOSING) {
3647       jamLine(nodePtr.i);
3648       set_hb_count(nodePtr.i)++;
3649       if (c_connectedNodes.get(nodePtr.i)){
3650         jam();
3651 	/*-------------------------------------------------------------------*/
3652 	// We need to ensure that the connection is not restored until it has
3653 	// been disconnected for at least three seconds.
3654 	/*-------------------------------------------------------------------*/
3655         set_hb_count(nodePtr.i) = 0;
3656       }//if
3657       if ((get_hb_count(nodePtr.i) > 3)
3658 	  && (nodePtr.p->failState == NORMAL)) {
3659 	/**------------------------------------------------------------------
3660 	 * WE HAVE DISCONNECTED THREE SECONDS AGO. WE ARE NOW READY TO
3661 	 * CONNECT AGAIN AND ACCEPT NEW REGISTRATIONS FROM THIS NODE.
3662 	 * WE WILL NOT ALLOW CONNECTIONS OF API NODES UNTIL API FAIL HANDLING
3663 	 * IS COMPLETE.
3664 	 *-------------------------------------------------------------------*/
3665         nodePtr.p->failState = NORMAL;
3666         nodePtr.p->m_secret = 0;
3667         switch(type){
3668         case NodeInfo::DB:
3669           jam();
3670           nodePtr.p->phase = ZINIT;
3671           DEB_STARTUP(("2:phase(%u) = ZINIT", nodePtr.i));
3672           break;
3673         case NodeInfo::MGM:
3674           jam();
3675           nodePtr.p->phase = ZAPI_INACTIVE;
3676           break;
3677         case NodeInfo::API:
3678           jam();
3679           if (c_allow_api_connect)
3680           {
3681             jam();
3682             nodePtr.p->phase = ZAPI_INACTIVE;
3683             break;
3684           }
3685           else
3686           {
3687             /**
3688              * Dont allow API node to connect before c_allow_api_connect
3689              */
3690             jam();
3691             set_hb_count(nodePtr.i) = 3;
3692             continue;
3693           }
3694         }
3695 
3696         set_hb_count(nodePtr.i) = 0;
3697         signal->theData[0] = 0;
3698         signal->theData[1] = nodePtr.i;
3699         sendSignal(TRPMAN_REF, GSN_OPEN_COMORD, signal, 2, JBB);
3700       }
3701       else
3702       {
3703         jam();
3704         if(((get_hb_count(nodePtr.i) + 1) % 30) == 0)
3705         {
3706           jam();
3707 	  char buf[256];
3708           if (getNodeInfo(nodePtr.i).m_type == NodeInfo::DB)
3709           {
3710             jam();
3711             BaseString::snprintf(buf, sizeof(buf),
3712                                  "Failure handling of node %d has not completed"
3713                                  " in %d seconds - state = %d",
3714                                  nodePtr.i,
3715                                  get_hb_count(nodePtr.i),
3716                                  nodePtr.p->failState);
3717             warningEvent("%s", buf);
3718 
3719             /**
3720              * Also dump DIH nf-state
3721              */
3722             signal->theData[0] = DumpStateOrd::DihTcSumaNodeFailCompleted;
3723             signal->theData[1] = nodePtr.i;
3724             sendSignal(DBDIH_REF, GSN_DUMP_STATE_ORD, signal, 2, JBB);
3725           }
3726           else
3727           {
3728             jam();
3729             BaseString::snprintf(buf, sizeof(buf),
3730                                  "Failure handling of api %u has not completed"
3731                                  " in %d seconds - state = %d",
3732                                  nodePtr.i,
3733                                  get_hb_count(nodePtr.i),
3734                                  nodePtr.p->failState);
3735             warningEvent("%s", buf);
3736             if (nodePtr.p->failState == WAITING_FOR_API_FAILCONF)
3737             {
3738               jam();
3739               static_assert(NDB_ARRAY_SIZE(nodePtr.p->m_failconf_blocks) == 5, "");
3740               BaseString::snprintf(buf, sizeof(buf),
3741                                    "  Waiting for blocks: %u %u %u %u %u",
3742                                    nodePtr.p->m_failconf_blocks[0],
3743                                    nodePtr.p->m_failconf_blocks[1],
3744                                    nodePtr.p->m_failconf_blocks[2],
3745                                    nodePtr.p->m_failconf_blocks[3],
3746                                    nodePtr.p->m_failconf_blocks[4]);
3747               warningEvent("%s", buf);
3748             }
3749           }
3750 	}
3751       }
3752     }
3753     else if (type == NodeInfo::DB && nodePtr.p->phase == ZINIT &&
3754              nodePtr.p->m_secret != 0 &&
3755              NdbTick_Compare(now,nodePtr.p->m_alloc_timeout) > 0)
3756     {
3757       jam();
3758       nodePtr.p->m_secret = 0;
3759       warningEvent("Releasing node id allocation for node %u",
3760                    nodePtr.i);
3761     }
3762   }//for
3763   return;
3764 }//Qmgr::checkStartInterface()
3765 
3766 /**-------------------------------------------------------------------------
3767  * This method is called when a DISCONNECT_REP signal arrived which means that
3768  * the API node is gone and we want to release resources in TC/DICT blocks.
3769  *---------------------------------------------------------------------------*/
sendApiFailReq(Signal * signal,Uint16 failedNodeNo,bool sumaOnly)3770 void Qmgr::sendApiFailReq(Signal* signal, Uint16 failedNodeNo, bool sumaOnly)
3771 {
3772   jamEntry();
3773   signal->theData[0] = failedNodeNo;
3774   signal->theData[1] = QMGR_REF;
3775 
3776   /* We route the ApiFailReq signals via CMVMI
3777    * This is done to ensure that they are received after
3778    * any pending signals from the failed Api node when
3779    * running ndbmtd, as these signals would be enqueued from
3780    * the thread running CMVMI
3781    */
3782   Uint32 routedSignalSectionI = RNIL;
3783   ndbrequire(appendToSection(routedSignalSectionI,
3784                              &signal->theData[0],
3785                              2));
3786   SectionHandle handle(this, routedSignalSectionI);
3787 
3788   /* RouteOrd data */
3789   RouteOrd* routeOrd = (RouteOrd*) &signal->theData[0];
3790   routeOrd->srcRef = reference();
3791   routeOrd->gsn = GSN_API_FAILREQ;
3792   routeOrd->from = failedNodeNo;
3793 
3794   NodeRecPtr failedNodePtr;
3795   failedNodePtr.i = failedNodeNo;
3796   ptrCheckGuard(failedNodePtr, MAX_NODES, nodeRec);
3797   failedNodePtr.p->failState = WAITING_FOR_API_FAILCONF;
3798 
3799 
3800   /* Send ROUTE_ORD signals to CMVMI via JBA
3801    * CMVMI will then immediately send the API_FAILREQ
3802    * signals to the destination block(s) using JBB
3803    * These API_FAILREQ signals will be sent *after*
3804    * any JBB signals enqueued from the failed API
3805    * by the CMVMI thread.
3806    */
3807   if (!sumaOnly)
3808   {
3809     jam();
3810     add_failconf_block(failedNodePtr, DBTC);
3811     routeOrd->dstRef = DBTC_REF;
3812     sendSignalNoRelease(TRPMAN_REF, GSN_ROUTE_ORD, signal,
3813                         RouteOrd::SignalLength,
3814                         JBA, &handle);
3815 
3816     add_failconf_block(failedNodePtr, DBDICT);
3817     routeOrd->dstRef = DBDICT_REF;
3818     sendSignalNoRelease(TRPMAN_REF, GSN_ROUTE_ORD, signal,
3819                         RouteOrd::SignalLength,
3820                         JBA, &handle);
3821 
3822     add_failconf_block(failedNodePtr, DBSPJ);
3823     routeOrd->dstRef = DBSPJ_REF;
3824     sendSignalNoRelease(TRPMAN_REF, GSN_ROUTE_ORD, signal,
3825                         RouteOrd::SignalLength,
3826                         JBA, &handle);
3827   }
3828 
3829   /* Suma always notified */
3830   add_failconf_block(failedNodePtr, SUMA);
3831   routeOrd->dstRef = SUMA_REF;
3832   sendSignal(TRPMAN_REF, GSN_ROUTE_ORD, signal,
3833              RouteOrd::SignalLength,
3834              JBA, &handle);
3835 }//Qmgr::sendApiFailReq()
3836 
execAPI_FAILREQ(Signal * signal)3837 void Qmgr::execAPI_FAILREQ(Signal* signal)
3838 {
3839   jamEntry();
3840   NodeRecPtr failedNodePtr;
3841   failedNodePtr.i = signal->theData[0];
3842   // signal->theData[1] == QMGR_REF
3843   ptrCheckGuard(failedNodePtr, MAX_NODES, nodeRec);
3844 
3845   ndbrequire(getNodeInfo(failedNodePtr.i).getType() != NodeInfo::DB);
3846 
3847   api_failed(signal, signal->theData[0]);
3848 }
3849 
execAPI_FAILCONF(Signal * signal)3850 void Qmgr::execAPI_FAILCONF(Signal* signal)
3851 {
3852   NodeRecPtr failedNodePtr;
3853 
3854   jamEntry();
3855   failedNodePtr.i = signal->theData[0];
3856   ptrCheckGuard(failedNodePtr, MAX_NODES, nodeRec);
3857 
3858   Uint32 block = refToMain(signal->theData[1]);
3859   if (failedNodePtr.p->failState != WAITING_FOR_API_FAILCONF ||
3860       !remove_failconf_block(failedNodePtr, block))
3861   {
3862     jam();
3863     ndbout << "execAPI_FAILCONF from " << block
3864            << " failedNodePtr.p->failState = "
3865 	   << (Uint32)(failedNodePtr.p->failState)
3866            << " blocks: ";
3867     for (Uint32 i = 0;i<NDB_ARRAY_SIZE(failedNodePtr.p->m_failconf_blocks);i++)
3868     {
3869       printf("%u ", failedNodePtr.p->m_failconf_blocks[i]);
3870     }
3871     ndbout << endl;
3872     systemErrorLab(signal, __LINE__);
3873   }//if
3874 
3875   if (is_empty_failconf_block(failedNodePtr))
3876   {
3877     jam();
3878     /**
3879      * When we set this state, connection will later be opened
3880      *   in checkStartInterface
3881      */
3882     failedNodePtr.p->failState = NORMAL;
3883 
3884     /**
3885      * Reset m_version only after all blocks has responded with API_FAILCONF
3886      *   so that no block risks reading 0 as node-version
3887      */
3888     setNodeInfo(failedNodePtr.i).m_version = 0;
3889     recompute_version_info(getNodeInfo(failedNodePtr.i).m_type);
3890   }
3891   return;
3892 }//Qmgr::execAPI_FAILCONF()
3893 
3894 void
add_failconf_block(NodeRecPtr nodePtr,Uint32 block)3895 Qmgr::add_failconf_block(NodeRecPtr nodePtr, Uint32 block)
3896 {
3897   // Check that it does not already exists!!
3898   Uint32 pos = 0;
3899   for (; pos < NDB_ARRAY_SIZE(nodePtr.p->m_failconf_blocks); pos++)
3900   {
3901     jam();
3902     if (nodePtr.p->m_failconf_blocks[pos] == 0)
3903     {
3904       jam();
3905       break;
3906     }
3907     else if (nodePtr.p->m_failconf_blocks[pos] == block)
3908     {
3909       jam();
3910       break;
3911     }
3912   }
3913 
3914   ndbrequire(pos != NDB_ARRAY_SIZE(nodePtr.p->m_failconf_blocks));
3915   ndbassert(nodePtr.p->m_failconf_blocks[pos] != block);
3916   if (nodePtr.p->m_failconf_blocks[pos] == block)
3917   {
3918     jam();
3919     /**
3920      * Already in list!!
3921      */
3922 #ifdef ERROR_INSERT
3923     ndbabort();
3924 #endif
3925     return;
3926   }
3927   ndbrequire(nodePtr.p->m_failconf_blocks[pos] == 0);
3928   nodePtr.p->m_failconf_blocks[pos] = block;
3929 }
3930 
3931 bool
remove_failconf_block(NodeRecPtr nodePtr,Uint32 block)3932 Qmgr::remove_failconf_block(NodeRecPtr nodePtr, Uint32 block)
3933 {
3934   // Check that it does exists!!
3935   Uint32 pos = 0;
3936   for (; pos < NDB_ARRAY_SIZE(nodePtr.p->m_failconf_blocks); pos++)
3937   {
3938     jam();
3939     if (nodePtr.p->m_failconf_blocks[pos] == 0)
3940     {
3941       jam();
3942       break;
3943     }
3944     else if (nodePtr.p->m_failconf_blocks[pos] == block)
3945     {
3946       jam();
3947       break;
3948     }
3949   }
3950 
3951   if (pos == NDB_ARRAY_SIZE(nodePtr.p->m_failconf_blocks) ||
3952       nodePtr.p->m_failconf_blocks[pos] != block)
3953   {
3954     jam();
3955     /**
3956      * Not found!!
3957      */
3958     return false;
3959   }
3960 
3961   nodePtr.p->m_failconf_blocks[pos] = 0;
3962   for (pos++; pos < NDB_ARRAY_SIZE(nodePtr.p->m_failconf_blocks); pos++)
3963   {
3964     jam();
3965     nodePtr.p->m_failconf_blocks[pos - 1] = nodePtr.p->m_failconf_blocks[pos];
3966   }
3967 
3968   return true;
3969 }
3970 
3971 bool
is_empty_failconf_block(NodeRecPtr nodePtr) const3972 Qmgr::is_empty_failconf_block(NodeRecPtr nodePtr) const
3973 {
3974   return nodePtr.p->m_failconf_blocks[0] == 0;
3975 }
3976 
execNDB_FAILCONF(Signal * signal)3977 void Qmgr::execNDB_FAILCONF(Signal* signal)
3978 {
3979   NodeRecPtr failedNodePtr;
3980   NodeRecPtr nodePtr;
3981 
3982   jamEntry();
3983   failedNodePtr.i = signal->theData[0];
3984 
3985   if (ERROR_INSERTED(930))
3986   {
3987     CLEAR_ERROR_INSERT_VALUE;
3988     infoEvent("Discarding NDB_FAILCONF for %u", failedNodePtr.i);
3989     return;
3990   }
3991 
3992   ptrCheckGuard(failedNodePtr, MAX_NDB_NODES, nodeRec);
3993   if (failedNodePtr.p->failState == WAITING_FOR_NDB_FAILCONF)
3994   {
3995     g_eventLogger->info("Node %u has completed node fail handling",
3996                         failedNodePtr.i);
3997     failedNodePtr.p->failState = NORMAL;
3998   }
3999   else
4000   {
4001     jam();
4002 
4003     char buf[100];
4004     BaseString::snprintf(buf, 100,
4005 			 "Received NDB_FAILCONF for node %u with state: %d %d",
4006 			 failedNodePtr.i,
4007 			 failedNodePtr.p->phase,
4008 			 failedNodePtr.p->failState);
4009     progError(__LINE__, 0, buf);
4010     systemErrorLab(signal, __LINE__);
4011   }//if
4012 
4013   if (cpresident == getOwnNodeId())
4014   {
4015     jam();
4016 
4017     CRASH_INSERTION(936);
4018   }
4019 
4020   /**
4021    * Reset node version only after all blocks has handled the failure
4022    *   so that no block risks reading 0 as node version
4023    */
4024   setNodeInfo(failedNodePtr.i).m_version = 0;
4025   recompute_version_info(NodeInfo::DB);
4026 
4027   /**
4028    * Prepare a NFCompleteRep and send to all connected API's
4029    * They can then abort all transaction waiting for response from
4030    * the failed node
4031    *
4032    * NOTE: This is sent from all nodes, as otherwise we would need
4033    *       take-over if cpresident dies befor sending this
4034    */
4035   NFCompleteRep * const nfComp = (NFCompleteRep *)&signal->theData[0];
4036   nfComp->blockNo = QMGR_REF;
4037   nfComp->nodeId = getOwnNodeId();
4038   nfComp->failedNodeId = failedNodePtr.i;
4039 
4040   jam();
4041   for (nodePtr.i = 1; nodePtr.i < MAX_NODES; nodePtr.i++)
4042   {
4043     ptrAss(nodePtr, nodeRec);
4044     if (nodePtr.p->phase == ZAPI_ACTIVE){
4045       jamLine(nodePtr.i);
4046       sendSignal(nodePtr.p->blockRef, GSN_NF_COMPLETEREP, signal,
4047                  NFCompleteRep::SignalLength, JBB);
4048     }//if
4049   }//for
4050   return;
4051 }//Qmgr::execNDB_FAILCONF()
4052 
4053 void
execNF_COMPLETEREP(Signal * signal)4054 Qmgr::execNF_COMPLETEREP(Signal* signal)
4055 {
4056   jamEntry();
4057   NFCompleteRep rep = *(NFCompleteRep*)signal->getDataPtr();
4058   if (rep.blockNo != DBTC)
4059   {
4060     jam();
4061     ndbassert(false);
4062     return;
4063   }
4064 
4065   /**
4066    * This is a simple way of having ndbapi to get
4067    * earlier information that transactions can be aborted
4068    */
4069   signal->theData[0] = rep.failedNodeId;
4070   // The below entries are not used by NdbAPI.
4071   signal->theData[1] = reference();
4072   signal->theData[2] = 0; // Unknown failure number
4073   NodeRecPtr nodePtr;
4074   for (nodePtr.i = 1; nodePtr.i < MAX_NODES; nodePtr.i++)
4075   {
4076     ptrAss(nodePtr, nodeRec);
4077     if (nodePtr.p->phase == ZAPI_ACTIVE)
4078     {
4079       jamLine(nodePtr.i);
4080       sendSignal(nodePtr.p->blockRef, GSN_TAKE_OVERTCCONF, signal,
4081                  TakeOverTcConf::SignalLength, JBB);
4082     }//if
4083   }//for
4084   return;
4085 }
4086 
4087 /*******************************/
4088 /* DISCONNECT_REP             */
4089 /*******************************/
4090 const char *lookupConnectionError(Uint32 err);
4091 
execDISCONNECT_REP(Signal * signal)4092 void Qmgr::execDISCONNECT_REP(Signal* signal)
4093 {
4094   jamEntry();
4095   const DisconnectRep * const rep = (DisconnectRep *)&signal->theData[0];
4096   if (ERROR_INSERT_VALUE >= 951 && ERROR_INSERT_VALUE <= 960)
4097   {
4098     CRASH_INSERTION3();
4099   }
4100   const Uint32 nodeId = rep->nodeId;
4101   const Uint32 err = rep->err;
4102   const NodeInfo nodeInfo = getNodeInfo(nodeId);
4103   c_connectedNodes.clear(nodeId);
4104   DEB_STARTUP(("connectedNodes(%u) cleared", nodeId));
4105 
4106   if (nodeInfo.getType() == NodeInfo::DB)
4107   {
4108     c_readnodes_nodes.clear(nodeId);
4109 
4110     if (ERROR_INSERTED(942))
4111     {
4112       g_eventLogger->info("DISCONNECT_REP received from data node %u - crash insertion",
4113                           nodeId);
4114       CRASH_INSERTION(942);
4115     }
4116   }
4117 
4118   {
4119     NodeRecPtr disc_nodePtr;
4120     disc_nodePtr.i = nodeId;
4121     ptrCheckGuard(disc_nodePtr, MAX_NODES, nodeRec);
4122 
4123     disc_nodePtr.p->m_is_activate_trp_ready_for_me = false;
4124     disc_nodePtr.p->m_is_activate_trp_ready_for_other = false;
4125     disc_nodePtr.p->m_is_multi_trp_setup = false;
4126     disc_nodePtr.p->m_is_freeze_thread_completed = false;
4127     disc_nodePtr.p->m_is_ready_to_switch_trp = false;
4128     disc_nodePtr.p->m_is_preparing_switch_trp = false;
4129     disc_nodePtr.p->m_is_using_multi_trp = false;
4130     disc_nodePtr.p->m_set_up_multi_trp_started = false;
4131     disc_nodePtr.p->m_used_num_multi_trps = 0;
4132     disc_nodePtr.p->m_multi_trp_blockref = 0;
4133     disc_nodePtr.p->m_check_multi_trp_connect_loop_count = 0;
4134     disc_nodePtr.p->m_num_activated_trps = 0;
4135     if (disc_nodePtr.p->m_is_in_same_nodegroup)
4136     {
4137       jam();
4138       DEB_MULTI_TRP(("Change neighbour node setup for node %u",
4139                      disc_nodePtr.i));
4140       check_no_multi_trp(signal, disc_nodePtr.i);
4141       startChangeNeighbourNode();
4142       setNeighbourNode(disc_nodePtr.i);
4143       endChangeNeighbourNode();
4144     }
4145   }
4146 
4147   NodeRecPtr nodePtr;
4148   nodePtr.i = getOwnNodeId();
4149   ptrCheckGuard(nodePtr, MAX_NODES, nodeRec);
4150 
4151   char buf[100];
4152   if (nodeInfo.getType() == NodeInfo::DB &&
4153       getNodeState().startLevel < NodeState::SL_STARTED)
4154   {
4155     jam();
4156     CRASH_INSERTION(932);
4157     CRASH_INSERTION(938);
4158     CRASH_INSERTION(944);
4159     CRASH_INSERTION(946);
4160     BaseString::snprintf(buf, 100, "Node %u disconnected in phase: %u",
4161                          nodeId,
4162                          nodePtr.p->phase);
4163     progError(__LINE__, NDBD_EXIT_SR_OTHERNODEFAILED, buf);
4164     ndbabort();
4165   }
4166 
4167   if (getNodeInfo(nodeId).getType() != NodeInfo::DB)
4168   {
4169     jam();
4170     api_failed(signal, nodeId);
4171     return;
4172   }
4173 
4174   switch(nodePtr.p->phase){
4175   case ZRUNNING:
4176     jam();
4177     break;
4178   case ZINIT:
4179     ndbabort();
4180   case ZSTARTING:
4181     progError(__LINE__, NDBD_EXIT_CONNECTION_SETUP_FAILED,
4182 	      lookupConnectionError(err));
4183   case ZPREPARE_FAIL:
4184     ndbabort();
4185   case ZFAIL_CLOSING:
4186     ndbabort();
4187   case ZAPI_ACTIVATION_ONGOING:
4188     ndbabort();
4189   case ZAPI_ACTIVE:
4190     ndbabort();
4191   case ZAPI_INACTIVE:
4192   {
4193     BaseString::snprintf(buf, 100, "Node %u disconnected", nodeId);
4194     progError(__LINE__, NDBD_EXIT_SR_OTHERNODEFAILED, buf);
4195   }
4196   }
4197 
4198   if (ERROR_INSERTED(939) && ERROR_INSERT_EXTRA == nodeId)
4199   {
4200     ndbout_c("Ignoring DISCONNECT_REP for node %u that was force disconnected",
4201              nodeId);
4202     CLEAR_ERROR_INSERT_VALUE;
4203     return;
4204   }
4205 
4206   node_failed(signal, nodeId);
4207 }//DISCONNECT_REP
4208 
node_failed(Signal * signal,Uint16 aFailedNode)4209 void Qmgr::node_failed(Signal* signal, Uint16 aFailedNode)
4210 {
4211   NodeRecPtr failedNodePtr;
4212   /**------------------------------------------------------------------------
4213    *   A COMMUNICATION LINK HAS BEEN DISCONNECTED. WE MUST TAKE SOME ACTION
4214    *   DUE TO THIS.
4215    *-----------------------------------------------------------------------*/
4216   failedNodePtr.i = aFailedNode;
4217   ptrCheckGuard(failedNodePtr, MAX_NODES, nodeRec);
4218   failedNodePtr.p->m_secret = 0; // Not yet Uint64(rand()) << 32 + rand();
4219 
4220   ndbrequire(getNodeInfo(failedNodePtr.i).getType() == NodeInfo::DB);
4221 
4222   /**---------------------------------------------------------------------
4223    *   THE OTHER NODE IS AN NDB NODE, WE HANDLE IT AS IF A HEARTBEAT
4224    *   FAILURE WAS DISCOVERED.
4225    *---------------------------------------------------------------------*/
4226   switch(failedNodePtr.p->phase){
4227   case ZRUNNING:
4228     jam();
4229     failReportLab(signal, aFailedNode, FailRep::ZLINK_FAILURE, getOwnNodeId());
4230     return;
4231   case ZSTARTING:
4232     /**
4233      * bug#42422
4234      *   Force "real" failure handling
4235      */
4236     jam();
4237     DEB_MULTI_TRP(("Node %u in ZRUNNING, failedNode", failedNodePtr.i));
4238     failedNodePtr.p->phase = ZRUNNING;
4239     DEB_STARTUP(("4:phase(%u) = ZRUNNING", failedNodePtr.i));
4240     failReportLab(signal, aFailedNode, FailRep::ZLINK_FAILURE, getOwnNodeId());
4241     return;
4242   case ZFAIL_CLOSING:  // Close already in progress
4243     jam();
4244     return;
4245   case ZPREPARE_FAIL:  // PREP_FAIL already sent CLOSE_COMREQ
4246     jam();
4247     return;
4248   case ZINIT:
4249   {
4250     jam();
4251     /*---------------------------------------------------------------------*/
4252     // The other node is still not in the cluster but disconnected.
4253     // We must restart communication in three seconds.
4254     /*---------------------------------------------------------------------*/
4255     failedNodePtr.p->failState = NORMAL;
4256     failedNodePtr.p->phase = ZFAIL_CLOSING;
4257     DEB_STARTUP(("phase(%u) = ZFAIL_CLOSING", failedNodePtr.i));
4258     set_hb_count(failedNodePtr.i) = 0;
4259 
4260     CloseComReqConf * const closeCom =
4261       (CloseComReqConf *)&signal->theData[0];
4262 
4263     closeCom->xxxBlockRef = reference();
4264     closeCom->requestType = CloseComReqConf::RT_NO_REPLY;
4265     closeCom->failNo      = 0;
4266     closeCom->noOfNodes   = 1;
4267     closeCom->failedNodeId = failedNodePtr.i;
4268     sendSignal(TRPMAN_REF, GSN_CLOSE_COMREQ, signal,
4269                CloseComReqConf::SignalLength, JBB);
4270     return;
4271   }
4272   case ZAPI_ACTIVE:     // Unexpected states handled in ::api_failed()
4273     ndbabort();
4274   case ZAPI_INACTIVE:
4275     ndbabort();
4276   case ZAPI_ACTIVATION_ONGOING:
4277     ndbabort();
4278   default:
4279     ndbabort();  // Unhandled state
4280   }//switch
4281 
4282   return;
4283 }
4284 
4285 void
execUPGRADE_PROTOCOL_ORD(Signal * signal)4286 Qmgr::execUPGRADE_PROTOCOL_ORD(Signal* signal)
4287 {
4288   const UpgradeProtocolOrd* ord = (UpgradeProtocolOrd*)signal->getDataPtr();
4289   switch(ord->type){
4290   case UpgradeProtocolOrd::UPO_ENABLE_MICRO_GCP:
4291     jam();
4292     m_micro_gcp_enabled = true;
4293     return;
4294   }
4295 }
4296 
4297 void
api_failed(Signal * signal,Uint32 nodeId)4298 Qmgr::api_failed(Signal* signal, Uint32 nodeId)
4299 {
4300   NodeRecPtr failedNodePtr;
4301   /**------------------------------------------------------------------------
4302    *   A COMMUNICATION LINK HAS BEEN DISCONNECTED. WE MUST TAKE SOME ACTION
4303    *   DUE TO THIS.
4304    *-----------------------------------------------------------------------*/
4305   failedNodePtr.i = nodeId;
4306   ptrCheckGuard(failedNodePtr, MAX_NODES, nodeRec);
4307   failedNodePtr.p->m_secret = 0; // Not yet Uint64(rand()) << 32 + rand();
4308 
4309   if (failedNodePtr.p->phase == ZFAIL_CLOSING)
4310   {
4311     /**
4312      * Failure handling already in progress
4313      */
4314     jam();
4315     return;
4316   }
4317 
4318   ndbrequire(failedNodePtr.p->failState == NORMAL);
4319 
4320   /* Send API_FAILREQ to peer QMGR blocks to allow them to disconnect
4321    * quickly
4322    * Local application blocks get API_FAILREQ once all pending signals
4323    * from the failed API have been processed.
4324    */
4325   signal->theData[0] = failedNodePtr.i;
4326   signal->theData[1] = QMGR_REF;
4327   NodeReceiverGroup rg(QMGR, c_clusterNodes);
4328   sendSignal(rg, GSN_API_FAILREQ, signal, 2, JBA);
4329 
4330   /* Now ask CMVMI to disconnect the node */
4331   FailState initialState = (failedNodePtr.p->phase == ZAPI_ACTIVE) ?
4332     WAITING_FOR_CLOSECOMCONF_ACTIVE :
4333     WAITING_FOR_CLOSECOMCONF_NOTACTIVE;
4334 
4335   failedNodePtr.p->failState = initialState;
4336   failedNodePtr.p->phase = ZFAIL_CLOSING;
4337   set_hb_count(failedNodePtr.i) = 0;
4338 
4339   CloseComReqConf * const closeCom = (CloseComReqConf *)&signal->theData[0];
4340   closeCom->xxxBlockRef = reference();
4341   closeCom->requestType = CloseComReqConf::RT_API_FAILURE;
4342   closeCom->failNo      = 0;
4343   closeCom->noOfNodes   = 1;
4344   closeCom->failedNodeId = nodeId;
4345   ProcessInfo * processInfo = getProcessInfo(nodeId);
4346   if (processInfo)
4347   {
4348     processInfo->invalidate();
4349   }
4350   sendSignal(TRPMAN_REF, GSN_CLOSE_COMREQ, signal,
4351              CloseComReqConf::SignalLength, JBB);
4352 } // api_failed
4353 
4354 /**--------------------------------------------------------------------------
4355  * AN API NODE IS REGISTERING. IF FOR THE FIRST TIME WE WILL ENABLE
4356  * COMMUNICATION WITH ALL NDB BLOCKS.
4357  *---------------------------------------------------------------------------*/
4358 /*******************************/
4359 /* API_REGREQ                 */
4360 /*******************************/
execAPI_REGREQ(Signal * signal)4361 void Qmgr::execAPI_REGREQ(Signal* signal)
4362 {
4363   jamEntry();
4364 
4365   ApiRegReq* req = (ApiRegReq*)signal->getDataPtr();
4366   const Uint32 version = req->version;
4367   const BlockReference ref = req->ref;
4368 
4369   Uint32 mysql_version = req->mysql_version;
4370 
4371   NodeRecPtr apiNodePtr;
4372   apiNodePtr.i = refToNode(ref);
4373   ptrCheckGuard(apiNodePtr, MAX_NODES, nodeRec);
4374 
4375   if (apiNodePtr.p->phase == ZFAIL_CLOSING)
4376   {
4377     jam();
4378     /**
4379      * This node is pending CLOSE_COM_CONF
4380      *   ignore API_REGREQ
4381      */
4382     return;
4383   }
4384 
4385   if (!c_connectedNodes.get(apiNodePtr.i))
4386   {
4387     jam();
4388     /**
4389      * We have not yet heard execCONNECT_REP
4390      *   so ignore this until we do...
4391      */
4392     return;
4393   }
4394 
4395 #if 0
4396   ndbout_c("Qmgr::execAPI_REGREQ: Recd API_REGREQ (NodeId=%d)", apiNodePtr.i);
4397 #endif
4398 
4399   bool compatability_check;
4400   const char * extra = 0;
4401   NodeInfo::NodeType type= getNodeInfo(apiNodePtr.i).getType();
4402   switch(type){
4403   case NodeInfo::API:
4404     jam();
4405     compatability_check = ndbCompatible_ndb_api(NDB_VERSION, version);
4406     break;
4407   case NodeInfo::MGM:
4408     compatability_check = ndbCompatible_ndb_mgmt(NDB_VERSION, version);
4409     break;
4410   case NodeInfo::DB:
4411   case NodeInfo::INVALID:
4412   default:
4413     sendApiRegRef(signal, ref, ApiRegRef::WrongType);
4414     infoEvent("Invalid connection attempt with type %d", type);
4415     return;
4416   }
4417 
4418   if (!ndbd_upgrade_ok(version))
4419   {
4420     compatability_check = false;
4421   }
4422 
4423   if (!compatability_check) {
4424     jam();
4425     char buf[NDB_VERSION_STRING_BUF_SZ];
4426     infoEvent("Connection attempt from %s id=%d with %s "
4427 	      "incompatible with %s%s",
4428 	      type == NodeInfo::API ? "api or mysqld" : "management server",
4429 	      apiNodePtr.i,
4430 	      ndbGetVersionString(version, mysql_version, 0,
4431                                   buf,
4432                                   sizeof(buf)),
4433 	      NDB_VERSION_STRING,
4434               extra ? extra : "");
4435     apiNodePtr.p->phase = ZAPI_INACTIVE;
4436     sendApiRegRef(signal, ref, ApiRegRef::UnsupportedVersion);
4437     return;
4438   }
4439 
4440   setNodeInfo(apiNodePtr.i).m_version = version;
4441   setNodeInfo(apiNodePtr.i).m_mysql_version = mysql_version;
4442   set_hb_count(apiNodePtr.i) = 0;
4443 
4444   NodeState state = getNodeState();
4445   if (apiNodePtr.p->phase == ZAPI_INACTIVE)
4446   {
4447     apiNodePtr.p->blockRef = ref;
4448     if ((state.startLevel == NodeState::SL_STARTED ||
4449          state.getSingleUserMode() ||
4450          (state.startLevel == NodeState::SL_STARTING &&
4451           state.starting.startPhase >= 8)))
4452     {
4453       jam();
4454       /**----------------------------------------------------------------------
4455        * THE API NODE IS REGISTERING. WE WILL ACCEPT IT BY CHANGING STATE AND
4456        * SENDING A CONFIRM. We set state to ZAPI_ACTIVATION_ONGOING to ensure
4457        * that we don't send unsolicited API_REGCONF or other things before we
4458        * actually fully enabled the node for communicating with the new API
4459        * node. It also avoids sending NODE_FAILREP, NF_COMPLETEREP and
4460        * TAKE_OVERTCCONF even before the API_REGCONF is sent. We will get a
4461        * fresh state of the nodes in API_REGCONF which is sufficient, no need
4462        * to update the API before the API got the initial state.
4463        *----------------------------------------------------------------------*/
4464       apiNodePtr.p->phase = ZAPI_ACTIVATION_ONGOING;
4465       EnableComReq *enableComReq = (EnableComReq *)signal->getDataPtrSend();
4466       enableComReq->m_senderRef = reference();
4467       enableComReq->m_senderData = ENABLE_COM_API_REGREQ;
4468       enableComReq->m_enableNodeId = apiNodePtr.i;
4469       sendSignal(TRPMAN_REF, GSN_ENABLE_COMREQ, signal,
4470                  EnableComReq::SignalLength, JBB);
4471       return;
4472     }
4473     /**
4474      * The node is in some kind of STOPPING state, so we send API_REGCONF even
4475      * though we've not enabled communication, if the API tries to send
4476      * anything to us anyways it will simply be ignored since only QMGR will
4477      * receive signals in this state. The API receives the node states, so it
4478      * should be able to discover what nodes that it is able to actually use.
4479      */
4480   }
4481 
4482   sendApiRegConf(signal, apiNodePtr.i);
4483 }//Qmgr::execAPI_REGREQ()
4484 
4485 void
handleEnableComApiRegreq(Signal * signal,Uint32 node)4486 Qmgr::handleEnableComApiRegreq(Signal *signal, Uint32 node)
4487 {
4488   NodeRecPtr apiNodePtr;
4489   NodeInfo::NodeType type = getNodeInfo(node).getType();
4490   Uint32 version = getNodeInfo(node).m_version;
4491   recompute_version_info(type, version);
4492 
4493   signal->theData[0] = node;
4494   signal->theData[1] = version;
4495   NodeReceiverGroup rg(QMGR, c_clusterNodes);
4496   rg.m_nodes.clear(getOwnNodeId());
4497   sendSignal(rg, GSN_NODE_VERSION_REP, signal, 2, JBB);
4498 
4499   signal->theData[0] = node;
4500   EXECUTE_DIRECT(NDBCNTR, GSN_API_START_REP, signal, 1);
4501 
4502   apiNodePtr.i = node;
4503   ptrCheckGuard(apiNodePtr, MAX_NODES, nodeRec);
4504   if (apiNodePtr.p->phase == ZAPI_ACTIVATION_ONGOING)
4505   {
4506     /**
4507      * Now we're about to send API_REGCONF to an API node, this means
4508      * that this node can immediately start communicating to TC, SUMA
4509      * and so forth. The state also indicates that the API is ready
4510      * to receive an unsolicited API_REGCONF when the node goes to
4511      * state SL_STARTED.
4512      */
4513     jam();
4514     apiNodePtr.p->phase = ZAPI_ACTIVE;
4515     sendApiRegConf(signal, node);
4516   }
4517   jam();
4518   /**
4519    * Node is no longer in state ZAPI_ACTIVATION_ONGOING, the node must
4520    * have failed, we can ignore sending API_REGCONF to a failed node.
4521    */
4522 }
4523 
4524 void
execNODE_STARTED_REP(Signal * signal)4525 Qmgr::execNODE_STARTED_REP(Signal *signal)
4526 {
4527   NodeRecPtr apiNodePtr;
4528   for (apiNodePtr.i = 1;
4529        apiNodePtr.i < MAX_NODES;
4530        apiNodePtr.i++)
4531   {
4532     ptrCheckGuard(apiNodePtr, MAX_NODES, nodeRec);
4533     NodeInfo::NodeType type = getNodeInfo(apiNodePtr.i).getType();
4534     if (type != NodeInfo::API)
4535     {
4536       /* Not an API node */
4537       continue;
4538     }
4539     if (!c_connectedNodes.get(apiNodePtr.i))
4540     {
4541       /* API not connected */
4542       continue;
4543     }
4544     if (apiNodePtr.p->phase != ZAPI_ACTIVE)
4545     {
4546       /**
4547        * The phase variable can be in three states for the API nodes, it can
4548        * be ZAPI_INACTIVE for an API node that hasn't connected, it can be
4549        * ZFAIL_CLOSING for an API node that recently failed and is performing
4550        * failure handling. It can be in the state ZAPI_ACTIVE which it enters
4551        * upon us receiving an API_REGREQ from the API. So at this point the
4552        * API is also able to receive an unsolicited API_REGCONF message.
4553        */
4554       continue;
4555     }
4556     /**
4557      * We will send an unsolicited API_REGCONF to the API node, this makes the
4558      * API node aware of our existence much faster (without it can wait up to
4559      * the lenght of a heartbeat DB-API period. For rolling restarts and other
4560      * similar actions this can easily cause the API to not have any usable
4561      * DB connections at all. This unsolicited response minimises this window
4562      * of unavailability to zero for all practical purposes.
4563      */
4564     sendApiRegConf(signal, apiNodePtr.i);
4565   }
4566 }
4567 
4568 void
sendApiRegConf(Signal * signal,Uint32 node)4569 Qmgr::sendApiRegConf(Signal *signal, Uint32 node)
4570 {
4571   NodeRecPtr apiNodePtr;
4572   apiNodePtr.i = node;
4573   ptrCheckGuard(apiNodePtr, MAX_NODES, nodeRec);
4574   const BlockReference ref = apiNodePtr.p->blockRef;
4575   ndbassert(ref != 0);
4576 
4577   ApiRegConf * const apiRegConf = (ApiRegConf *)&signal->theData[0];
4578   apiRegConf->qmgrRef = reference();
4579   apiRegConf->apiHeartbeatFrequency = (chbApiDelay / 10);
4580   apiRegConf->version = NDB_VERSION;
4581   apiRegConf->mysql_version = NDB_MYSQL_VERSION_D;
4582   apiRegConf->nodeState = getNodeState();
4583   {
4584     NodeRecPtr nodePtr;
4585     nodePtr.i = getOwnNodeId();
4586     ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
4587     Uint32 dynamicId = nodePtr.p->ndynamicId;
4588 
4589     if(apiRegConf->nodeState.masterNodeId != getOwnNodeId()){
4590       jam();
4591       apiRegConf->nodeState.dynamicId = dynamicId;
4592     } else {
4593       apiRegConf->nodeState.dynamicId = (Uint32)(-(Int32)dynamicId);
4594     }
4595   }
4596   NodeVersionInfo info = getNodeVersionInfo();
4597   apiRegConf->minDbVersion = info.m_type[NodeInfo::DB].m_min_version;
4598   apiRegConf->minApiVersion = info.m_type[NodeInfo::API].m_min_version;
4599   apiRegConf->nodeState.m_connected_nodes.assign(c_connectedNodes);
4600   sendSignal(ref, GSN_API_REGCONF, signal, ApiRegConf::SignalLength, JBB);
4601 }
4602 
4603 void
sendVersionedDb(NodeReceiverGroup rg,GlobalSignalNumber gsn,Signal * signal,Uint32 length,JobBufferLevel jbuf,Uint32 minversion)4604 Qmgr::sendVersionedDb(NodeReceiverGroup rg,
4605 		      GlobalSignalNumber gsn,
4606 		      Signal* signal,
4607 		      Uint32 length,
4608 		      JobBufferLevel jbuf,
4609 		      Uint32 minversion)
4610 {
4611   jam();
4612   NodeVersionInfo info = getNodeVersionInfo();
4613   if (info.m_type[NodeInfo::DB].m_min_version >= minversion)
4614   {
4615     jam();
4616     sendSignal(rg, gsn, signal, length, jbuf);
4617   }
4618   else
4619   {
4620     jam();
4621     Uint32 i = 0, cnt = 0;
4622     while((i = rg.m_nodes.find(i + 1)) != NodeBitmask::NotFound)
4623     {
4624       jam();
4625       if (getNodeInfo(i).m_version >= minversion)
4626       {
4627 	jam();
4628 	cnt++;
4629 	sendSignal(numberToRef(rg.m_block, i), gsn, signal, length, jbuf);
4630       }
4631     }
4632     ndbassert((cnt == 0 && rg.m_nodes.count() == 0) ||
4633 	      (cnt < rg.m_nodes.count()));
4634   }
4635 }
4636 
4637 void
execAPI_VERSION_REQ(Signal * signal)4638 Qmgr::execAPI_VERSION_REQ(Signal * signal) {
4639   jamEntry();
4640   ApiVersionReq * const req = (ApiVersionReq *)signal->getDataPtr();
4641 
4642   Uint32 senderRef = req->senderRef;
4643   Uint32 nodeId = req->nodeId;
4644 
4645   ApiVersionConf * conf = (ApiVersionConf *)req;
4646   if(getNodeInfo(nodeId).m_connected)
4647   {
4648     conf->version = getNodeInfo(nodeId).m_version;
4649     conf->mysql_version = getNodeInfo(nodeId).m_mysql_version;
4650     struct in_addr in= globalTransporterRegistry.get_connect_address(nodeId);
4651     conf->m_inet_addr= in.s_addr;
4652   }
4653   else
4654   {
4655     conf->version =  0;
4656     conf->mysql_version =  0;
4657     conf->m_inet_addr= 0;
4658   }
4659   conf->nodeId = nodeId;
4660   conf->isSingleUser = (nodeId == getNodeState().getSingleUserApi());
4661   sendSignal(senderRef,
4662 	     GSN_API_VERSION_CONF,
4663 	     signal,
4664 	     ApiVersionConf::SignalLength, JBB);
4665 }
4666 
4667 void
execNODE_VERSION_REP(Signal * signal)4668 Qmgr::execNODE_VERSION_REP(Signal* signal)
4669 {
4670   jamEntry();
4671   Uint32 nodeId = signal->theData[0];
4672   Uint32 version = signal->theData[1];
4673 
4674   if (nodeId < MAX_NODES)
4675   {
4676     jam();
4677     Uint32 type = getNodeInfo(nodeId).m_type;
4678     setNodeInfo(nodeId).m_version = version;
4679     recompute_version_info(type, version);
4680   }
4681 }
4682 
4683 void
recompute_version_info(Uint32 type,Uint32 version)4684 Qmgr::recompute_version_info(Uint32 type, Uint32 version)
4685 {
4686   NodeVersionInfo& info = setNodeVersionInfo();
4687   switch(type){
4688   case NodeInfo::DB:
4689   case NodeInfo::API:
4690   case NodeInfo::MGM:
4691     break;
4692   default:
4693     return;
4694   }
4695 
4696   if (info.m_type[type].m_min_version == 0 ||
4697       version < info.m_type[type].m_min_version)
4698     info.m_type[type].m_min_version = version;
4699   if (version > info.m_type[type].m_max_version)
4700     info.m_type[type].m_max_version = version;
4701 }
4702 
4703 void
recompute_version_info(Uint32 type)4704 Qmgr::recompute_version_info(Uint32 type)
4705 {
4706   switch(type){
4707   case NodeInfo::DB:
4708   case NodeInfo::API:
4709   case NodeInfo::MGM:
4710     break;
4711   default:
4712     return;
4713   }
4714 
4715   Uint32 min = ~0, max = 0;
4716   Uint32 cnt = type == NodeInfo::DB ? MAX_NDB_NODES : MAX_NODES;
4717   for (Uint32 i = 1; i<cnt; i++)
4718   {
4719     if (getNodeInfo(i).m_type == type)
4720     {
4721       Uint32 version = getNodeInfo(i).m_version;
4722 
4723       if (version)
4724       {
4725 	if (version < min)
4726 	  min = version;
4727 	if (version > max)
4728 	  max = version;
4729       }
4730     }
4731   }
4732 
4733   NodeVersionInfo& info = setNodeVersionInfo();
4734   info.m_type[type].m_min_version = min == ~(Uint32)0 ? 0 : min;
4735   info.m_type[type].m_max_version = max;
4736 }
4737 
4738 #if 0
4739 bool
4740 Qmgr::checkAPIVersion(NodeId nodeId,
4741 		      Uint32 apiVersion, Uint32 ownVersion) const {
4742   bool ret=true;
4743   /**
4744    * First implementation...
4745    */
4746   if ((getMajor(apiVersion) < getMajor(ownVersion) ||
4747        getMinor(apiVersion) < getMinor(ownVersion)) &&
4748       apiVersion >= API_UPGRADE_VERSION) {
4749     jam();
4750     if ( getNodeInfo(nodeId).getType() !=  NodeInfo::MGM ) {
4751       jam();
4752       ret = false;
4753     } else {
4754       jam();
4755       /* we have a software upgrade situation, mgmtsrvr should be
4756        * the highest, let him decide what to do
4757        */
4758       ;
4759     }
4760   }
4761   return ret;
4762 }
4763 #endif
4764 
4765 void
sendApiRegRef(Signal * signal,Uint32 Tref,ApiRegRef::ErrorCode err)4766 Qmgr::sendApiRegRef(Signal* signal, Uint32 Tref, ApiRegRef::ErrorCode err){
4767   ApiRegRef* ref = (ApiRegRef*)signal->getDataPtrSend();
4768   ref->ref = reference();
4769   ref->version = NDB_VERSION;
4770   ref->mysql_version = NDB_MYSQL_VERSION_D;
4771   ref->errorCode = err;
4772   sendSignal(Tref, GSN_API_REGREF, signal, ApiRegRef::SignalLength, JBB);
4773 }
4774 
4775 /**--------------------------------------------------------------------------
4776  * A NODE HAS BEEN DECLARED AS DOWN. WE WILL CLOSE THE COMMUNICATION TO THIS
4777  * NODE IF NOT ALREADY DONE. IF WE ARE PRESIDENT OR BECOMES PRESIDENT BECAUSE
4778  * OF A FAILED PRESIDENT THEN WE WILL TAKE FURTHER ACTION.
4779  *---------------------------------------------------------------------------*/
failReportLab(Signal * signal,Uint16 aFailedNode,FailRep::FailCause aFailCause,Uint16 sourceNode)4780 void Qmgr::failReportLab(Signal* signal, Uint16 aFailedNode,
4781 			 FailRep::FailCause aFailCause,
4782                          Uint16 sourceNode)
4783 {
4784   NodeRecPtr nodePtr;
4785   NodeRecPtr failedNodePtr;
4786   NodeRecPtr myNodePtr;
4787 
4788   failedNodePtr.i = aFailedNode;
4789   ptrCheckGuard(failedNodePtr, MAX_NDB_NODES, nodeRec);
4790   FailRep* rep = (FailRep*)signal->getDataPtr();
4791 
4792   if (check_multi_node_shutdown(signal))
4793   {
4794     jam();
4795     return;
4796   }
4797 
4798   if (isNodeConnectivitySuspect(sourceNode) &&
4799       // (! isNodeConnectivitySuspect(aFailedNode)) &&  // TODO : Required?
4800       ((aFailCause == FailRep::ZCONNECT_CHECK_FAILURE) ||
4801        (aFailCause == FailRep::ZLINK_FAILURE)))
4802   {
4803     jam();
4804     /* Connectivity related failure report from a node with suspect
4805      * connectivity, handle differently
4806      */
4807     ndbrequire(sourceNode != getOwnNodeId());
4808 
4809     handleFailFromSuspect(signal,
4810                           aFailCause,
4811                           aFailedNode,
4812                           sourceNode);
4813     return;
4814   }
4815 
4816   if (failedNodePtr.i == getOwnNodeId()) {
4817     jam();
4818 
4819     Uint32 code = NDBD_EXIT_NODE_DECLARED_DEAD;
4820     const char * msg = 0;
4821     // Message buffer for FailRep::ZPARTITIONED_CLUSTER
4822     static const Uint32 bitmaskTextLen = NdbNodeBitmask::TextLength + 1;
4823     char extra[2 * bitmaskTextLen + 30];
4824 
4825     switch(aFailCause){
4826     case FailRep::ZOWN_FAILURE:
4827       msg = "Own failure";
4828       break;
4829     case FailRep::ZOTHER_NODE_WHEN_WE_START:
4830     case FailRep::ZOTHERNODE_FAILED_DURING_START:
4831       msg = "Other node died during start";
4832       break;
4833     case FailRep::ZIN_PREP_FAIL_REQ:
4834       msg = "Prep fail";
4835       break;
4836     case FailRep::ZSTART_IN_REGREQ:
4837       msg = "Start timeout";
4838       break;
4839     case FailRep::ZHEARTBEAT_FAILURE:
4840       msg = "Heartbeat failure";
4841       break;
4842     case FailRep::ZLINK_FAILURE:
4843       msg = "Connection failure";
4844       break;
4845     case FailRep::ZPARTITIONED_CLUSTER:
4846     {
4847       code = NDBD_EXIT_PARTITIONED_SHUTDOWN;
4848       char buf1[bitmaskTextLen], buf2[bitmaskTextLen];
4849       c_clusterNodes.getText(buf1);
4850       if (((signal->getLength()== FailRep::OrigSignalLength + FailRep::PartitionedExtraLength_v1) ||
4851            (signal->getLength()== FailRep::SignalLength + FailRep::PartitionedExtraLength_v1)) &&
4852           signal->header.theVerId_signalNumber == GSN_FAIL_REP)
4853       {
4854 	jam();
4855 	NdbNodeBitmask part;
4856 	Uint32 senderRef = signal->getSendersBlockRef();
4857 	Uint32 senderVersion = getNodeInfo(refToNode(senderRef)).m_version;
4858 	if (signal->getNoOfSections() >= 1)
4859 	{
4860 	  ndbrequire(ndbd_send_node_bitmask_in_section(senderVersion));
4861 	  SectionHandle handle(this, signal);
4862 	  SegmentedSectionPtr ptr;
4863 	  handle.getSection(ptr, 0);
4864 
4865 	  ndbrequire(ptr.sz <= NdbNodeBitmask::Size);
4866           copy(part.rep.data, ptr);
4867 
4868 	  releaseSections(handle);
4869 	}
4870 	else
4871 	{
4872 	  part.assign(NdbNodeBitmask48::Size, rep->partitioned.partition_v1);
4873 	}
4874 	part.getText(buf2);
4875 	BaseString::snprintf(extra, sizeof(extra),
4876 			     "Our cluster: %s other cluster: %s",
4877 			     buf1, buf2);
4878       }
4879       else
4880       {
4881 	jam();
4882 	BaseString::snprintf(extra, sizeof(extra),
4883 			     "Our cluster: %s", buf1);
4884       }
4885       msg = extra;
4886       break;
4887     }
4888     case FailRep::ZMULTI_NODE_SHUTDOWN:
4889       msg = "Multi node shutdown";
4890       break;
4891     case FailRep::ZCONNECT_CHECK_FAILURE:
4892       msg = "Connectivity check failure";
4893       break;
4894     case FailRep::ZFORCED_ISOLATION:
4895       msg = "Forced isolation";
4896       if (ERROR_INSERTED(942))
4897       {
4898         g_eventLogger->info("FAIL_REP FORCED_ISOLATION received from data node %u - ignoring.",
4899                             sourceNode);
4900         /* Let's wait for remote disconnection */
4901         return;
4902       }
4903       break;
4904     default:
4905       msg = "<UNKNOWN>";
4906     }
4907 
4908     CRASH_INSERTION(932);
4909     CRASH_INSERTION(938);
4910 
4911     char buf[sizeof(extra) + 100];
4912     BaseString::snprintf(buf, sizeof(buf),
4913 			 "We(%u) have been declared dead by %u (via %u) reason: %s(%u)",
4914 			 getOwnNodeId(),
4915                          sourceNode,
4916 			 refToNode(signal->getSendersBlockRef()),
4917 			 msg ? msg : "<Unknown>",
4918 			 aFailCause);
4919 
4920     progError(__LINE__, code, buf);
4921     return;
4922   }//if
4923 
4924   myNodePtr.i = getOwnNodeId();
4925   ptrCheckGuard(myNodePtr, MAX_NDB_NODES, nodeRec);
4926   if (myNodePtr.p->phase != ZRUNNING) {
4927     jam();
4928     systemErrorLab(signal, __LINE__);
4929     return;
4930   }//if
4931 
4932   if (getNodeState().startLevel < NodeState::SL_STARTED)
4933   {
4934     jam();
4935     CRASH_INSERTION(932);
4936     CRASH_INSERTION(938);
4937     char buf[100];
4938     switch(aFailCause)
4939     {
4940       case FailRep::ZHEARTBEAT_FAILURE:
4941         BaseString::snprintf(buf, 100 ,"Node %d heartbeat failure",
4942                              failedNodePtr.i);
4943         CRASH_INSERTION(947);
4944         break;
4945       default:
4946         BaseString::snprintf(buf, 100 , "Node %d failed",
4947                              failedNodePtr.i);
4948     }
4949     progError(__LINE__, NDBD_EXIT_SR_OTHERNODEFAILED, buf);
4950   }
4951 
4952   const NdbNodeBitmask TfailedNodes(cfailedNodes);
4953   failReport(signal, failedNodePtr.i, (UintR)ZTRUE, aFailCause, sourceNode);
4954 
4955   /**
4956    * If any node is starting now (c_start.startNode != 0)
4957    *   include it in nodes handled by sendPrepFailReq
4958    */
4959   if (c_start.m_startNode != 0)
4960   {
4961     jam();
4962     cfailedNodes.set(c_start.m_startNode);
4963   }
4964 
4965   if (cpresident == getOwnNodeId()) {
4966     jam();
4967     if (ctoStatus == Q_NOT_ACTIVE) {
4968       jam();
4969       /**--------------------------------------------------------------------
4970        * AS PRESIDENT WE ARE REQUIRED TO START THE EXCLUSION PROCESS SUCH THAT
4971        * THE APPLICATION SEE NODE FAILURES IN A CONSISTENT ORDER.
4972        * IF WE HAVE BECOME PRESIDENT NOW (CTO_STATUS = ACTIVE) THEN WE HAVE
4973        * TO COMPLETE THE PREVIOUS COMMIT FAILED NODE PROCESS BEFORE STARTING
4974        * A NEW.
4975        * CTO_STATUS = ACTIVE CAN ALSO MEAN THAT WE ARE PRESIDENT AND ARE
4976        * CURRENTLY COMMITTING A SET OF NODE CRASHES. IN THIS CASE IT IS NOT
4977        * ALLOWED TO START PREPARING NEW NODE CRASHES.
4978        *---------------------------------------------------------------------*/
4979       if (!cfailedNodes.equal(TfailedNodes)) {
4980         jam();
4981         cfailureNr = cfailureNr + 1;
4982         for (nodePtr.i = 1;
4983              nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
4984           ptrAss(nodePtr, nodeRec);
4985           if (nodePtr.p->phase == ZRUNNING) {
4986             jamLine(nodePtr.i);
4987             sendPrepFailReq(signal, nodePtr.i);
4988           }//if
4989         }//for
4990       }//if
4991     }//if
4992   }
4993   return;
4994 }//Qmgr::failReportLab()
4995 
4996 /**-------------------------------------------------------------------------
4997  * WE HAVE RECEIVED A PREPARE TO EXCLUDE A NUMBER OF NODES FROM THE CLUSTER.
4998  * WE WILL FIRST CHECK THAT WE HAVE NOT ANY MORE NODES THAT
4999  * WE ALSO HAVE EXCLUDED
5000  *--------------------------------------------------------------------------*/
5001 /*******************************/
5002 /* PREP_FAILREQ               */
5003 /*******************************/
execPREP_FAILREQ(Signal * signal)5004 void Qmgr::execPREP_FAILREQ(Signal* signal)
5005 {
5006   NodeRecPtr myNodePtr;
5007   PrepFailReqRef * const prepFail = (PrepFailReqRef *)&signal->theData[0];
5008   BlockReference Tblockref  = prepFail->xxxBlockRef;
5009   Uint16 TfailureNr = prepFail->failNo;
5010   Uint32 senderRef = signal->getSendersBlockRef();
5011   Uint32 senderVersion = getNodeInfo(refToNode(senderRef)).m_version;
5012 
5013   jamEntry();
5014 
5015   NdbNodeBitmask nodes;
5016   if (signal->getNoOfSections() >= 1)
5017   {
5018     jam();
5019     ndbrequire(ndbd_send_node_bitmask_in_section(senderVersion));
5020     SectionHandle handle(this, signal);
5021     SegmentedSectionPtr ptr;
5022     handle.getSection(ptr, 0);
5023     ndbrequire(ptr.sz <= NdbNodeBitmask::Size);
5024     copy(nodes.rep.data, ptr);
5025     releaseSections(handle);
5026   }
5027   else
5028   {
5029     jam();
5030     nodes.assign(NdbNodeBitmask48::Size, prepFail->theNodes);
5031   }
5032 
5033   // Clear 'c_start.m_startNode' if it failed.
5034   if (nodes.get(c_start.m_startNode))
5035   {
5036     jam();
5037     DEB_STARTUP(("Clear c_start.m_startNode"));
5038     c_start.reset();
5039   }
5040   if (c_start.m_gsn == GSN_CM_NODEINFOCONF)
5041   {
5042     Uint32 nodeId;
5043     jam();
5044     /**
5045      * This is a very unusual event we are looking for, but still required
5046      * to be handled. The starting node has connected to the president and
5047      * managed to start the node inclusion protocol. We received an indication
5048      * of this from the president. The starting node now however fails before
5049      * it connected to us, so we need to clear the indication of that we
5050      * received CM_ADD(Prepare) from president since this belonged to an
5051      * already cancelled node restart.
5052      */
5053     for (nodeId = 1; nodeId < MAX_NDB_NODES; nodeId++)
5054     {
5055       if (c_start.m_nodes.isWaitingFor(nodeId) &&
5056           nodes.get(nodeId))
5057       {
5058         jamLine(nodeId);
5059         /* Found such a condition as described above, clear state */
5060         c_start.m_gsn = RNIL;
5061         c_start.m_nodes.clearWaitingFor();
5062         break;
5063       }
5064     }
5065   }
5066 
5067 
5068   if (check_multi_node_shutdown(signal))
5069   {
5070     jam();
5071     return;
5072   }
5073 
5074   if (ERROR_INSERTED(941) &&
5075       getOwnNodeId() == 4 &&
5076       nodes.get(2))
5077   {
5078     /* Insert ERROR_INSERT crash */
5079     CRASH_INSERTION(941);
5080   }
5081 
5082   cprepFailedNodes.assign(nodes);
5083   ndbassert(prepFail->noOfNodes == cprepFailedNodes.count());
5084 
5085   /**
5086    * Block commit until node failures has stabilized
5087    *
5088    * @See RT352
5089    */
5090   BlockCommitOrd* const block = (BlockCommitOrd *)&signal->theData[0];
5091   block->failNo = TfailureNr;
5092   EXECUTE_DIRECT(DBDIH, GSN_BLOCK_COMMIT_ORD, signal,
5093 		 BlockCommitOrd::SignalLength);
5094 
5095   myNodePtr.i = getOwnNodeId();
5096   ptrCheckGuard(myNodePtr, MAX_NDB_NODES, nodeRec);
5097   if (myNodePtr.p->phase != ZRUNNING) {
5098     jam();
5099     systemErrorLab(signal, __LINE__);
5100     return;
5101   }//if
5102 
5103   if (getNodeState().startLevel < NodeState::SL_STARTED)
5104   {
5105     jam();
5106     CRASH_INSERTION(932);
5107     CRASH_INSERTION(938);
5108     char buf[100];
5109     BaseString::snprintf(buf, 100, "Node failure during restart");
5110     progError(__LINE__, NDBD_EXIT_SR_OTHERNODEFAILED, buf);
5111   }
5112 
5113   for (unsigned nodeId = 1; nodeId < MAX_NDB_NODES; nodeId++)
5114   {
5115     if (cprepFailedNodes.get(nodeId))
5116     {
5117       jam();
5118       failReport(signal,
5119                  nodeId,
5120                  (UintR)ZFALSE,
5121                  FailRep::ZIN_PREP_FAIL_REQ,
5122                  0); /* Source node not required (or known) here */
5123     }//if
5124   }//for
5125   sendCloseComReq(signal, Tblockref, TfailureNr);
5126   ccommitFailedNodes.clear();
5127   cprepareFailureNr = TfailureNr;
5128   return;
5129 }//Qmgr::execPREP_FAILREQ()
5130 
5131 
handleApiCloseComConf(Signal * signal)5132 void Qmgr::handleApiCloseComConf(Signal* signal)
5133 {
5134   jam();
5135   CloseComReqConf * const closeCom = (CloseComReqConf *)&signal->theData[0];
5136 
5137   Uint32 nodeId = closeCom->failedNodeId;
5138   /* Api failure special case */
5139   /* Check that *only* 1 *API* node is included in
5140    * this CLOSE_COM_CONF
5141    */
5142   ndbrequire(getNodeInfo(nodeId).getType() != NodeInfo::DB);
5143   ndbrequire(closeCom->noOfNodes == 1);
5144 
5145   /* Now that we know communication from the failed Api has
5146    * ceased, we can send the required API_FAILREQ signals
5147    * and continue API failure handling
5148    */
5149   NodeRecPtr failedNodePtr;
5150   failedNodePtr.i = nodeId;
5151   ptrCheckGuard(failedNodePtr, MAX_NODES, nodeRec);
5152 
5153   ndbrequire((failedNodePtr.p->failState ==
5154               WAITING_FOR_CLOSECOMCONF_ACTIVE) ||
5155               (failedNodePtr.p->failState ==
5156                WAITING_FOR_CLOSECOMCONF_NOTACTIVE));
5157 
5158   if (failedNodePtr.p->failState == WAITING_FOR_CLOSECOMCONF_ACTIVE)
5159   {
5160     /**
5161      * Inform application blocks TC, DICT, SUMA etc.
5162      */
5163     jam();
5164     sendApiFailReq(signal, nodeId, false); // !sumaOnly
5165     if(arbitRec.node == nodeId)
5166     {
5167       arbitRec.code = ArbitCode::ApiFail;
5168       handleArbitApiFail(signal, nodeId);
5169     }
5170   }
5171   else
5172   {
5173     /**
5174      * Always inform SUMA
5175      */
5176     jam();
5177     sendApiFailReq(signal, nodeId, true); // sumaOnly
5178   }
5179 
5180   if (getNodeInfo(failedNodePtr.i).getType() == NodeInfo::MGM)
5181   {
5182     /**
5183      * Allow MGM do reconnect "directly"
5184      */
5185     jam();
5186     set_hb_count(failedNodePtr.i) = 3;
5187   }
5188 
5189   /* Handled the single API node failure */
5190   return;
5191 }
5192 
5193 /**---------------------------------------------------------------------------
5194  * THE CRASHED NODES HAS BEEN EXCLUDED FROM COMMUNICATION.
5195  * WE WILL CHECK WHETHER ANY MORE NODES HAVE FAILED DURING THE PREPARE PROCESS.
5196  * IF SO WE WILL REFUSE THE PREPARE PHASE AND EXPECT A NEW PREPARE MESSAGE
5197  * WITH ALL FAILED NODES INCLUDED.
5198  *---------------------------------------------------------------------------*/
5199 /*******************************/
5200 /* CLOSE_COMCONF              */
5201 /*******************************/
execCLOSE_COMCONF(Signal * signal)5202 void Qmgr::execCLOSE_COMCONF(Signal* signal)
5203 {
5204   jamEntry();
5205 
5206   CloseComReqConf * const closeCom = (CloseComReqConf *)&signal->theData[0];
5207 
5208   Uint32 requestType = closeCom->requestType;
5209 
5210   if (requestType == CloseComReqConf::RT_API_FAILURE)
5211   {
5212     jam();
5213     if (ERROR_INSERTED(945))
5214     {
5215       if (arbitRec.code != ArbitCode::WinChoose)
5216       {
5217         // Delay API failure handling until arbitration in WinChoose
5218         sendSignalWithDelay(reference(),
5219                             GSN_CLOSE_COMCONF,
5220                             signal,
5221                             10,
5222                             signal->getLength());
5223         return;
5224       }
5225       CLEAR_ERROR_INSERT_VALUE;
5226     }
5227     handleApiCloseComConf(signal);
5228     return;
5229   }
5230 
5231   /* Normal node failure preparation path */
5232   ndbassert(requestType == CloseComReqConf::RT_NODE_FAILURE);
5233   BlockReference Tblockref  = closeCom->xxxBlockRef;
5234   Uint16 TfailureNr = closeCom->failNo;
5235 
5236   if (TfailureNr != cprepareFailureNr)
5237   {
5238     /**
5239      * A new PREP_FAILREQ was already started, so ignore this
5240      * one, we will soon enough be here again for the new
5241      * failure and respond to this one instead. If we were to
5242      * send something, it would be ignored by President anyways.
5243      */
5244     jam();
5245     return;
5246   }
5247 
5248   UintR tprepFailConf = ZTRUE;
5249 
5250   /* Check whether the set of nodes which have had communications
5251    * closed is the same as the set of failed nodes.
5252    * If it is, we can confirm the PREP_FAIL phase for this set
5253    * of nodes to the President.
5254    * If it is not, we Refuse the PREP_FAIL phase for this set
5255    * of nodes, the President will start a new PREP_FAIL phase
5256    * for the new set.
5257    */
5258   if (!cprepFailedNodes.contains(cfailedNodes)) {
5259     /* Failed node(s) is missing from the set, we will not
5260      * confirm this Prepare_Fail phase.
5261      * Store the node id in the array for later.
5262      */
5263     jam();
5264     tprepFailConf = ZFALSE;
5265     cprepFailedNodes.bitOR(cfailedNodes);
5266   }//if
5267   if (tprepFailConf == ZFALSE) {
5268     jam();
5269     /* Inform President that we cannot confirm the PREP_FAIL
5270      * phase as we are aware of at least one other node
5271      * failure
5272      */
5273     cfailedNodes = cprepFailedNodes;
5274 
5275     sendPrepFailReqRef(signal,
5276 		       Tblockref,
5277 		       GSN_PREP_FAILREF,
5278 		       reference(),
5279 		       TfailureNr,
5280 		       cprepFailedNodes);
5281   } else {
5282     /* We have prepared the failure of the requested nodes
5283      * send confirmation to the president
5284      */
5285     jam();
5286     ccommitFailedNodes = cprepFailedNodes;
5287 
5288     signal->theData[0] = getOwnNodeId();
5289     signal->theData[1] = TfailureNr;
5290     sendSignal(Tblockref, GSN_PREP_FAILCONF, signal, 2, JBA);
5291   }//if
5292   return;
5293 }//Qmgr::execCLOSE_COMCONF()
5294 
5295 /*---------------------------------------------------------------------------*/
5296 /* WE HAVE RECEIVED A CONFIRM OF THAT THIS NODE HAVE PREPARED THE FAILURE.   */
5297 /*---------------------------------------------------------------------------*/
5298 /*******************************/
5299 /* PREP_FAILCONF              */
5300 /*******************************/
execPREP_FAILCONF(Signal * signal)5301 void Qmgr::execPREP_FAILCONF(Signal* signal)
5302 {
5303   NodeRecPtr nodePtr;
5304   NodeRecPtr replyNodePtr;
5305   jamEntry();
5306   replyNodePtr.i = signal->theData[0];
5307   Uint16 TfailureNr = signal->theData[1];
5308   if (TfailureNr != cfailureNr) {
5309     jam();
5310     /**----------------------------------------------------------------------
5311      * WE HAVE ALREADY STARTING A NEW ATTEMPT TO EXCLUDE A NUMBER OF NODES.
5312      *  IGNORE
5313      *----------------------------------------------------------------------*/
5314     return;
5315   }//if
5316   ptrCheckGuard(replyNodePtr, MAX_NDB_NODES, nodeRec);
5317   replyNodePtr.p->sendPrepFailReqStatus = Q_NOT_ACTIVE;
5318   for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
5319     ptrAss(nodePtr, nodeRec);
5320     if (nodePtr.p->phase == ZRUNNING) {
5321       if (nodePtr.p->sendPrepFailReqStatus == Q_ACTIVE) {
5322         jamLine(nodePtr.i);
5323         return;
5324       }//if
5325     }//if
5326   }//for
5327   /**
5328    * Check node count and groups and invoke arbitrator if necessary.
5329    * Continues via sendCommitFailReq() if successful.
5330    */
5331   arbitRec.failureNr = cfailureNr;
5332   const NodeState & s = getNodeState();
5333   if(s.startLevel == NodeState::SL_STOPPING_3 &&
5334      s.stopping.systemShutdown)
5335   {
5336     jam();
5337     /**
5338      * We're performing a system shutdown,
5339      * don't let arbitrator shut us down
5340      */
5341     return;
5342   }
5343 
5344   switch(arbitRec.method){
5345   case ArbitRec::DISABLED:
5346     jam();
5347     // No arbitration -> immediately commit the failed nodes
5348     sendCommitFailReq(signal);
5349     break;
5350 
5351   case ArbitRec::METHOD_EXTERNAL:
5352   case ArbitRec::METHOD_DEFAULT:
5353     jam();
5354     handleArbitCheck(signal);
5355     break;
5356 
5357   }
5358   return;
5359 }//Qmgr::execPREP_FAILCONF()
5360 
5361 void
sendCommitFailReq(Signal * signal)5362 Qmgr::sendCommitFailReq(Signal* signal)
5363 {
5364   NodeRecPtr nodePtr;
5365   jam();
5366   if (arbitRec.failureNr != cfailureNr) {
5367     jam();
5368     /**----------------------------------------------------------------------
5369      * WE HAVE ALREADY STARTING A NEW ATTEMPT TO EXCLUDE A NUMBER OF NODES.
5370      *  IGNORE
5371      *----------------------------------------------------------------------*/
5372     return;
5373   }//if
5374   /**-----------------------------------------------------------------------
5375    * WE HAVE SUCCESSFULLY PREPARED A SET OF NODE FAILURES. WE WILL NOW COMMIT
5376    * THESE NODE FAILURES.
5377    *-------------------------------------------------------------------------*/
5378   for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
5379     ptrAss(nodePtr, nodeRec);
5380 
5381 #ifdef ERROR_INSERT
5382     if (false && ERROR_INSERTED(935) && nodePtr.i == c_error_insert_extra)
5383     {
5384       ndbout_c("skipping node %d", c_error_insert_extra);
5385       CLEAR_ERROR_INSERT_VALUE;
5386       signal->theData[0] = 9999;
5387       sendSignalWithDelay(CMVMI_REF, GSN_NDB_TAMPER, signal, 1000, 1);
5388       continue;
5389     }
5390 #endif
5391 
5392     if (nodePtr.p->phase == ZRUNNING) {
5393       jamLine(nodePtr.i);
5394       nodePtr.p->sendCommitFailReqStatus = Q_ACTIVE;
5395       signal->theData[0] = cpdistref;
5396       signal->theData[1] = cfailureNr;
5397       sendSignal(nodePtr.p->blockRef, GSN_COMMIT_FAILREQ, signal, 2, JBA);
5398     }//if
5399   }//for
5400   ctoStatus = Q_ACTIVE;
5401   cfailedNodes.clear();
5402   return;
5403 }//sendCommitFailReq()
5404 
5405 /*---------------------------------------------------------------------------*/
5406 /* SOME NODE HAVE DISCOVERED A NODE FAILURE THAT WE HAVE NOT YET DISCOVERED. */
5407 /* WE WILL START ANOTHER ROUND OF PREPARING A SET OF NODE FAILURES.          */
5408 /*---------------------------------------------------------------------------*/
5409 /*******************************/
5410 /* PREP_FAILREF               */
5411 /*******************************/
execPREP_FAILREF(Signal * signal)5412 void Qmgr::execPREP_FAILREF(Signal* signal)
5413 {
5414   NodeRecPtr nodePtr;
5415   jamEntry();
5416 
5417   PrepFailReqRef * const prepFail = (PrepFailReqRef *)&signal->theData[0];
5418 
5419   Uint16 TfailureNr = prepFail->failNo;
5420   cprepFailedNodes.clear();
5421 
5422   if(signal->getNoOfSections() >= 1)
5423   {
5424     jam();
5425     Uint32 senderRef = signal->getSendersBlockRef();
5426     Uint32 senderVersion = getNodeInfo(refToNode(senderRef)).m_version;
5427     ndbrequire(ndbd_send_node_bitmask_in_section(senderVersion));
5428     SegmentedSectionPtr ptr;
5429     SectionHandle handle(this, signal);
5430     handle.getSection(ptr, 0);
5431     ndbrequire(ptr.sz <= NdbNodeBitmask::Size);
5432     copy(cprepFailedNodes.rep.data, ptr);
5433     releaseSections(handle);
5434   }
5435   else
5436   {
5437     jam();
5438     cprepFailedNodes.assign(NdbNodeBitmask48::Size, prepFail->theNodes);
5439   }
5440   ndbassert(prepFail->noOfNodes == cprepFailedNodes.count());
5441 
5442   if (TfailureNr != cfailureNr) {
5443     jam();
5444     /**---------------------------------------------------------------------
5445      * WE HAVE ALREADY STARTING A NEW ATTEMPT TO EXCLUDE A NUMBER OF NODES.
5446      *  IGNORE
5447      *----------------------------------------------------------------------*/
5448     return;
5449   }//if
5450 
5451   cfailedNodes = cprepFailedNodes;
5452 
5453   cfailureNr = cfailureNr + 1;
5454   // Failure number may not wrap
5455   ndbrequire(cfailureNr != 0);
5456   for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
5457     ptrAss(nodePtr, nodeRec);
5458     if (nodePtr.p->phase == ZRUNNING) {
5459       jamLine(nodePtr.i);
5460       sendPrepFailReq(signal, nodePtr.i);
5461     }//if
5462   }//for
5463   return;
5464 }//Qmgr::execPREP_FAILREF()
5465 
5466 /*---------------------------------------------------------------------------*/
5467 /*    THE PRESIDENT IS NOW COMMITTING THE PREVIOUSLY PREPARED NODE FAILURE.  */
5468 /*---------------------------------------------------------------------------*/
5469 /***********************/
5470 /* COMMIT_FAILREQ     */
5471 /***********************/
execCOMMIT_FAILREQ(Signal * signal)5472 void Qmgr::execCOMMIT_FAILREQ(Signal* signal)
5473 {
5474   NodeRecPtr nodePtr;
5475   jamEntry();
5476 
5477   CRASH_INSERTION(935);
5478 
5479   BlockReference Tblockref = signal->theData[0];
5480   UintR TfailureNr = signal->theData[1];
5481   if (Tblockref != cpdistref) {
5482     jam();
5483     return;
5484   }//if
5485 
5486   /**
5487    * Block commit until node failures has stabilized
5488    *
5489    * @See RT352
5490    */
5491   UnblockCommitOrd* const unblock = (UnblockCommitOrd *)&signal->theData[0];
5492   unblock->failNo = TfailureNr;
5493   EXECUTE_DIRECT(DBDIH, GSN_UNBLOCK_COMMIT_ORD, signal,
5494 		 UnblockCommitOrd::SignalLength);
5495 
5496   if ((ccommitFailureNr != TfailureNr) &&
5497       (!ccommitFailedNodes.isclear()))
5498   {
5499     jam();
5500     /**-----------------------------------------------------------------------
5501      * WE ONLY DO THIS PART OF THE COMMIT HANDLING THE FIRST TIME WE HEAR THIS
5502      * SIGNAL. WE CAN HEAR IT SEVERAL TIMES IF THE PRESIDENTS KEEP FAILING.
5503      *-----------------------------------------------------------------------*/
5504     ccommitFailureNr = TfailureNr;
5505 
5506     Uint32 nodeFailIndex = TfailureNr % MAX_DATA_NODE_FAILURES;
5507     NodeFailRec* TnodeFailRec = &nodeFailRec[nodeFailIndex];
5508     ndbrequire(TnodeFailRec->president == 0);
5509     TnodeFailRec->failureNr = TfailureNr;
5510     TnodeFailRec->president = cpresident;
5511     TnodeFailRec->nodes = ccommitFailedNodes;
5512 
5513     SyncThreadViaReqConf* syncReq =(SyncThreadViaReqConf*)&signal->theData[0];
5514     syncReq->senderRef = reference();
5515     syncReq->senderData = TfailureNr;
5516     syncReq->actionType = SyncThreadViaReqConf::FOR_NODE_FAILREP;
5517     sendSignal(TRPMAN_REF, GSN_SYNC_THREAD_VIA_REQ, signal,
5518                SyncThreadViaReqConf::SignalLength, JBA);
5519 
5520     /**--------------------------------------------------------------------
5521      * WE MUST PREPARE TO ACCEPT THE CRASHED NODE INTO THE CLUSTER AGAIN BY
5522      * SETTING UP CONNECTIONS AGAIN AFTER THREE SECONDS OF DELAY.
5523      *--------------------------------------------------------------------*/
5524     for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
5525       if (ccommitFailedNodes.get(nodePtr.i)) {
5526         jamLine(nodePtr.i);
5527         ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
5528         nodePtr.p->phase = ZFAIL_CLOSING;
5529         DEB_STARTUP(("2: phase(%u) = ZFAIL_CLOSING", nodePtr.i));
5530         nodePtr.p->failState = WAITING_FOR_NDB_FAILCONF;
5531         set_hb_count(nodePtr.i) = 0;
5532         c_clusterNodes.clear(nodePtr.i);
5533       }//if
5534     }//for
5535 
5536     /*----------------------------------------------------------------------*/
5537     /*       WE INFORM THE API'S WE HAVE CONNECTED ABOUT THE FAILED NODES.  */
5538     /*----------------------------------------------------------------------*/
5539     LinearSectionPtr lsptr[3];
5540     lsptr->p = TnodeFailRec->nodes.rep.data;
5541     lsptr->sz = TnodeFailRec->nodes.getPackedLengthInWords();
5542 
5543     for (nodePtr.i = 1; nodePtr.i < MAX_NODES; nodePtr.i++) {
5544       ptrAss(nodePtr, nodeRec);
5545       if (nodePtr.p->phase == ZAPI_ACTIVE) {
5546         jamLine(nodePtr.i);
5547 
5548 	NodeFailRep * const nodeFail = (NodeFailRep *)&signal->theData[0];
5549 
5550 	nodeFail->failNo    = ccommitFailureNr;
5551 	nodeFail->noOfNodes = ccommitFailedNodes.count();
5552 
5553         if (ndbd_send_node_bitmask_in_section(
5554             getNodeInfo(refToNode(nodePtr.p->blockRef)).m_version))
5555         {
5556           sendSignal(nodePtr.p->blockRef, GSN_NODE_FAILREP, signal,
5557              NodeFailRep::SignalLength, JBB, lsptr, 1);
5558         }
5559         else if (lsptr->sz <= NdbNodeBitmask48::Size)
5560         {
5561 	  TnodeFailRec->nodes.copyto(NdbNodeBitmask48::Size,
5562 	                             nodeFail->theNodes);
5563           sendSignal(nodePtr.p->blockRef, GSN_NODE_FAILREP, signal,
5564       		   NodeFailRep::SignalLength_v1, JBB);
5565         }
5566         else
5567         {
5568           ndbabort();
5569         }
5570       }//if
5571     }//for
5572 
5573     /**
5574      * Remove committed nodes from failed/prepared
5575      */
5576     cfailedNodes.bitANDC(ccommitFailedNodes);
5577     cprepFailedNodes.bitANDC(ccommitFailedNodes);
5578     ccommitFailedNodes.clear();
5579   }//if
5580   /**-----------------------------------------------------------------------
5581    * WE WILL ALWAYS ACKNOWLEDGE THE COMMIT EVEN WHEN RECEIVING IT MULTIPLE
5582    * TIMES SINCE IT WILL ALWAYS COME FROM A NEW PRESIDENT.
5583    *------------------------------------------------------------------------*/
5584   signal->theData[0] = getOwnNodeId();
5585   sendSignal(Tblockref, GSN_COMMIT_FAILCONF, signal, 1, JBA);
5586   return;
5587 }//Qmgr::execCOMMIT_FAILREQ()
5588 
execSYNC_THREAD_VIA_CONF(Signal * signal)5589 void Qmgr::execSYNC_THREAD_VIA_CONF(Signal* signal)
5590 {
5591   const SyncThreadViaReqConf* syncConf =
5592     (const SyncThreadViaReqConf*)&signal->theData[0];
5593   if (syncConf->actionType == SyncThreadViaReqConf::FOR_NODE_FAILREP)
5594   {
5595     jam();
5596     const Uint32 index = syncConf->senderData % MAX_DATA_NODE_FAILURES;
5597     NodeFailRec* TnodeFailRec = &nodeFailRec[index];
5598     ndbrequire(TnodeFailRec->president != 0);
5599     ndbrequire(TnodeFailRec->nodes.count() != 0);
5600     NodeFailRep* nodeFail = (NodeFailRep*)&signal->theData[0];
5601     nodeFail->failNo = TnodeFailRec->failureNr;
5602     nodeFail->masterNodeId = TnodeFailRec->president;
5603     nodeFail->noOfNodes = TnodeFailRec->nodes.count();
5604 
5605     LinearSectionPtr lsptr[3];
5606     lsptr->p = TnodeFailRec->nodes.rep.data;
5607     lsptr->sz = TnodeFailRec->nodes.getPackedLengthInWords();
5608 
5609     TnodeFailRec->president = 0; // Mark entry as unused.
5610 
5611     if (ERROR_INSERTED(936))
5612     {
5613       SectionHandle handle(this);
5614       ndbrequire(import(handle.m_ptr[0], lsptr[0].p, lsptr[0].sz));
5615       handle.m_cnt = 1;
5616       sendSignalWithDelay(NDBCNTR_REF, GSN_NODE_FAILREP, signal,
5617                           200, NodeFailRep::SignalLength, &handle);
5618       releaseSections(handle);
5619     }
5620     else
5621     {
5622       sendSignal(NDBCNTR_REF, GSN_NODE_FAILREP, signal,
5623                  NodeFailRep::SignalLength, JBA, lsptr, 1);
5624     }
5625   }
5626   else if (syncConf->actionType == SyncThreadViaReqConf::FOR_ACTIVATE_TRP_REQ)
5627   {
5628     jam();
5629     handle_activate_trp_req(signal, syncConf->senderData);
5630   }
5631   else
5632   {
5633     ndbabort();
5634   }
5635 }
5636 
5637 /*--------------------------------------------------------------------------*/
5638 /* WE HAVE RECEIVED A CONFIRM OF THAT THIS NODE HAVE COMMITTED THE FAILURES.*/
5639 /*--------------------------------------------------------------------------*/
5640 /*******************************/
5641 /* COMMIT_FAILCONF            */
5642 /*******************************/
execCOMMIT_FAILCONF(Signal * signal)5643 void Qmgr::execCOMMIT_FAILCONF(Signal* signal)
5644 {
5645   NodeRecPtr nodePtr;
5646   NodeRecPtr replyNodePtr;
5647   jamEntry();
5648   replyNodePtr.i = signal->theData[0];
5649 
5650   ptrCheckGuard(replyNodePtr, MAX_NDB_NODES, nodeRec);
5651   replyNodePtr.p->sendCommitFailReqStatus = Q_NOT_ACTIVE;
5652   for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
5653     ptrAss(nodePtr, nodeRec);
5654     if (nodePtr.p->phase == ZRUNNING) {
5655       if (nodePtr.p->sendCommitFailReqStatus == Q_ACTIVE) {
5656         jamLine(nodePtr.i);
5657         return;
5658       }//if
5659     }//if
5660   }//for
5661   /*-----------------------------------------------------------------------*/
5662   /*   WE HAVE SUCCESSFULLY COMMITTED A SET OF NODE FAILURES.              */
5663   /*-----------------------------------------------------------------------*/
5664   ctoStatus = Q_NOT_ACTIVE;
5665   if (!cfailedNodes.isclear()) {
5666     jam();
5667     /**----------------------------------------------------------------------
5668      *	A FAILURE OCCURRED IN THE MIDDLE OF THE COMMIT PROCESS. WE ARE NOW
5669      *  READY TO START THE FAILED NODE PROCESS FOR THIS NODE.
5670      *----------------------------------------------------------------------*/
5671     cfailureNr = cfailureNr + 1;
5672     for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
5673       ptrAss(nodePtr, nodeRec);
5674       if (nodePtr.p->phase == ZRUNNING) {
5675         jamLine(nodePtr.i);
5676         sendPrepFailReq(signal, nodePtr.i);
5677       }//if
5678     }//for
5679   }//if
5680   return;
5681 }//Qmgr::execCOMMIT_FAILCONF()
5682 
5683 /**--------------------------------------------------------------------------
5684  * IF THE PRESIDENT FAILS IN THE MIDDLE OF THE COMMIT OF A FAILED NODE THEN
5685  * THE NEW PRESIDENT NEEDS TO QUERY THE COMMIT STATUS IN THE RUNNING NODES.
5686  *---------------------------------------------------------------------------*/
5687 /*******************************/
5688 /* PRES_TOCONF                */
5689 /*******************************/
execPRES_TOCONF(Signal * signal)5690 void Qmgr::execPRES_TOCONF(Signal* signal)
5691 {
5692   NodeRecPtr nodePtr;
5693   NodeRecPtr replyNodePtr;
5694   jamEntry();
5695   replyNodePtr.i = signal->theData[0];
5696   UintR TfailureNr = signal->theData[1];
5697   if (ctoFailureNr < TfailureNr) {
5698     jam();
5699     ctoFailureNr = TfailureNr;
5700   }//if
5701   ptrCheckGuard(replyNodePtr, MAX_NDB_NODES, nodeRec);
5702   replyNodePtr.p->sendPresToStatus = Q_NOT_ACTIVE;
5703   for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
5704     ptrAss(nodePtr, nodeRec);
5705     if (nodePtr.p->sendPresToStatus == Q_ACTIVE) {
5706       jamLine(nodePtr.i);
5707       return;
5708     }//if
5709   }//for
5710   /*-------------------------------------------------------------------------*/
5711   /* WE ARE NOW READY TO DISCOVER WHETHER THE FAILURE WAS COMMITTED OR NOT.  */
5712   /*-------------------------------------------------------------------------*/
5713   if (ctoFailureNr > ccommitFailureNr) {
5714     jam();
5715     for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
5716       ptrAss(nodePtr, nodeRec);
5717       if (nodePtr.p->phase == ZRUNNING) {
5718         jamLine(nodePtr.i);
5719         nodePtr.p->sendCommitFailReqStatus = Q_ACTIVE;
5720         signal->theData[0] = cpdistref;
5721         signal->theData[1] = ctoFailureNr;
5722         sendSignal(nodePtr.p->blockRef, GSN_COMMIT_FAILREQ, signal, 2, JBA);
5723       }//if
5724     }//for
5725     return;
5726   }//if
5727   /*-------------------------------------------------------------------------*/
5728   /*       WE ARE NOW READY TO START THE NEW NODE FAILURE PROCESS.           */
5729   /*-------------------------------------------------------------------------*/
5730   ctoStatus = Q_NOT_ACTIVE;
5731   cfailureNr = cfailureNr + 1;
5732   for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
5733     ptrAss(nodePtr, nodeRec);
5734     if (nodePtr.p->phase == ZRUNNING) {
5735       jamLine(nodePtr.i);
5736       sendPrepFailReq(signal, nodePtr.i);
5737     }//if
5738   }//for
5739   return;
5740 }//Qmgr::execPRES_TOCONF()
5741 
5742 /*--------------------------------------------------------------------------*/
5743 // Provide information about the configured NDB nodes in the system.
5744 /*--------------------------------------------------------------------------*/
execREAD_NODESREQ(Signal * signal)5745 void Qmgr::execREAD_NODESREQ(Signal* signal)
5746 {
5747   jamEntry();
5748 
5749   ReadNodesReq *req = (ReadNodesReq *)&signal->theData[0];
5750   BlockReference TBref = req->myRef;
5751   NodeRecPtr nodePtr;
5752   nodePtr.i = getOwnNodeId();
5753   ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
5754 
5755   NdbNodeBitmask tmp = c_definedNodes;
5756   tmp.bitANDC(c_clusterNodes);
5757 
5758   Uint32 packed_length1 = c_definedNodes.getPackedLengthInWords();
5759   Uint32 packed_length2 = c_clusterNodes.getPackedLengthInWords();
5760   Uint32 packed_length3 = tmp.getPackedLengthInWords();
5761 
5762   if (signal->length() >= ReadNodesReq::SignalLength)
5763   {
5764     jam();
5765     ReadNodesConf * const readNodes = (ReadNodesConf *)&signal->theData[0];
5766 
5767     readNodes->noOfNodes = c_definedNodes.count();
5768     readNodes->masterNodeId = cpresident;
5769     readNodes->ndynamicId = nodePtr.p->ndynamicId;
5770 
5771     readNodes->definedNodes = c_definedNodes;
5772     readNodes->clusterNodes = c_clusterNodes;
5773     readNodes->inactiveNodes = tmp;
5774     readNodes->startingNodes.clear();
5775     readNodes->startedNodes.clear();
5776 
5777     LinearSectionPtr lsptr[3];
5778     lsptr[0].p = readNodes->definedNodes.rep.data;
5779     lsptr[0].sz = 5 * NdbNodeBitmask::Size;
5780     sendSignal(TBref,
5781                GSN_READ_NODESCONF,
5782                signal,
5783                ReadNodesConf::SignalLength,
5784                JBB,
5785                lsptr,
5786                1);
5787   }
5788   else if (packed_length1 <= NdbNodeBitmask48::Size &&
5789            packed_length2 <= NdbNodeBitmask48::Size &&
5790            packed_length3 <= NdbNodeBitmask48::Size)
5791   {
5792     jam();
5793     ReadNodesConf_v1 * const readNodes = (ReadNodesConf_v1 *)&signal->theData[0];
5794     readNodes->noOfNodes = c_definedNodes.count();
5795     readNodes->masterNodeId = cpresident;
5796     readNodes->ndynamicId = nodePtr.p->ndynamicId;
5797 
5798     c_definedNodes.copyto(NdbNodeBitmask::Size, readNodes->definedNodes);
5799     c_clusterNodes.copyto(NdbNodeBitmask::Size, readNodes->clusterNodes);
5800     tmp.copyto(NdbNodeBitmask::Size, readNodes->inactiveNodes);
5801     NdbNodeBitmask::clear(readNodes->startingNodes);
5802     NdbNodeBitmask::clear(readNodes->startedNodes);
5803 
5804     sendSignal(TBref, GSN_READ_NODESCONF, signal,
5805               ReadNodesConf_v1::SignalLength, JBB);
5806   }
5807   else
5808   {
5809     ndbabort();
5810   }
5811 }//Qmgr::execREAD_NODESREQ()
5812 
systemErrorBecauseOtherNodeFailed(Signal * signal,Uint32 line,NodeId failedNodeId)5813 void Qmgr::systemErrorBecauseOtherNodeFailed(Signal* signal, Uint32 line,
5814 					     NodeId failedNodeId) {
5815   jam();
5816 
5817   // Broadcast that this node is failing to other nodes
5818   failReport(signal, getOwnNodeId(), (UintR)ZTRUE, FailRep::ZOWN_FAILURE, getOwnNodeId());
5819 
5820   char buf[100];
5821   BaseString::snprintf(buf, 100,
5822 	   "Node was shutdown during startup because node %d failed",
5823 	   failedNodeId);
5824 
5825   progError(line, NDBD_EXIT_SR_OTHERNODEFAILED, buf);
5826 }
5827 
5828 
systemErrorLab(Signal * signal,Uint32 line,const char * message)5829 void Qmgr::systemErrorLab(Signal* signal, Uint32 line, const char * message)
5830 {
5831   jam();
5832   // Broadcast that this node is failing to other nodes
5833   failReport(signal, getOwnNodeId(), (UintR)ZTRUE, FailRep::ZOWN_FAILURE, getOwnNodeId());
5834 
5835   // If it's known why shutdown occurred
5836   // an error message has been passed to this function
5837   progError(line, NDBD_EXIT_NDBREQUIRE, message);
5838 }//Qmgr::systemErrorLab()
5839 
5840 
5841 /**---------------------------------------------------------------------------
5842  * A FAILURE HAVE BEEN DISCOVERED ON A NODE. WE NEED TO CLEAR A
5843  * NUMBER OF VARIABLES.
5844  *---------------------------------------------------------------------------*/
failReport(Signal * signal,Uint16 aFailedNode,UintR aSendFailRep,FailRep::FailCause aFailCause,Uint16 sourceNode)5845 void Qmgr::failReport(Signal* signal,
5846                       Uint16 aFailedNode,
5847                       UintR aSendFailRep,
5848                       FailRep::FailCause aFailCause,
5849                       Uint16 sourceNode)
5850 {
5851   UintR tfrMinDynamicId;
5852   NodeRecPtr failedNodePtr;
5853   NodeRecPtr nodePtr;
5854   NodeRecPtr presidentNodePtr;
5855 
5856 
5857   ndbassert((! aSendFailRep) || (sourceNode != 0));
5858 
5859   failedNodePtr.i = aFailedNode;
5860   ptrCheckGuard(failedNodePtr, MAX_NDB_NODES, nodeRec);
5861   if (failedNodePtr.p->phase == ZRUNNING) {
5862     jam();
5863 
5864 #ifdef ERROR_INSERT
5865     if (ERROR_INSERTED(938))
5866     {
5867       nodeFailCount++;
5868       ndbout_c("QMGR : execFAIL_REP(Failed : %u Source : %u  Cause : %u) : "
5869                "%u nodes have failed",
5870                aFailedNode, sourceNode, aFailCause, nodeFailCount);
5871       /* Count DB nodes */
5872       Uint32 nodeCount = 0;
5873       for (Uint32 i = 1; i < MAX_NDB_NODES; i++)
5874       {
5875         if (getNodeInfo(i).getType() == NODE_TYPE_DB)
5876           nodeCount++;
5877       }
5878 
5879       /* When > 25% of cluster has failed, resume communications */
5880       if (nodeFailCount > (nodeCount / 4))
5881       {
5882         ndbout_c("QMGR : execFAIL_REP > 25%% nodes failed, resuming comms");
5883         Signal save = *signal;
5884         signal->theData[0] = 9991;
5885         sendSignal(CMVMI_REF, GSN_DUMP_STATE_ORD, signal, 1, JBB);
5886         *signal = save;
5887         nodeFailCount = 0;
5888         SET_ERROR_INSERT_VALUE(932);
5889       }
5890     }
5891 #endif
5892 
5893 /* WE ALSO NEED TO ADD HERE SOME CODE THAT GETS OUR NEW NEIGHBOURS. */
5894     if (cpresident == getOwnNodeId()) {
5895       jam();
5896       if (failedNodePtr.p->sendCommitFailReqStatus == Q_ACTIVE) {
5897         jam();
5898         signal->theData[0] = failedNodePtr.i;
5899         sendSignal(QMGR_REF, GSN_COMMIT_FAILCONF, signal, 1, JBA);
5900       }//if
5901       if (failedNodePtr.p->sendPresToStatus == Q_ACTIVE) {
5902         jam();
5903         signal->theData[0] = failedNodePtr.i;
5904         signal->theData[1] = ccommitFailureNr;
5905         sendSignal(QMGR_REF, GSN_PRES_TOCONF, signal, 2, JBA);
5906       }//if
5907     }//if
5908     DEB_STARTUP(("phase(%u) = ZPREPARE_FAIL", failedNodePtr.i));
5909     failedNodePtr.p->phase = ZPREPARE_FAIL;
5910     failedNodePtr.p->sendPrepFailReqStatus = Q_NOT_ACTIVE;
5911     failedNodePtr.p->sendCommitFailReqStatus = Q_NOT_ACTIVE;
5912     failedNodePtr.p->sendPresToStatus = Q_NOT_ACTIVE;
5913     set_hb_count(failedNodePtr.i) = 0;
5914     if (aSendFailRep == ZTRUE) {
5915       jam();
5916       if (failedNodePtr.i != getOwnNodeId()) {
5917         jam();
5918 	FailRep * const failRep = (FailRep *)&signal->theData[0];
5919         failRep->failNodeId = failedNodePtr.i;
5920         failRep->failCause = aFailCause;
5921         failRep->failSourceNodeId = sourceNode;
5922         sendSignal(failedNodePtr.p->blockRef, GSN_FAIL_REP, signal,
5923 		   FailRep::SignalLength, JBA);
5924       }//if
5925       for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
5926         ptrAss(nodePtr, nodeRec);
5927         if (nodePtr.p->phase == ZRUNNING) {
5928           jamLine(nodePtr.i);
5929 	  FailRep * const failRep = (FailRep *)&signal->theData[0];
5930 	  failRep->failNodeId = failedNodePtr.i;
5931 	  failRep->failCause = aFailCause;
5932           failRep->failSourceNodeId = sourceNode;
5933           sendSignal(nodePtr.p->blockRef, GSN_FAIL_REP, signal,
5934 		     FailRep::SignalLength, JBA);
5935         }//if
5936       }//for
5937     }//if
5938     if (failedNodePtr.i == getOwnNodeId()) {
5939       jam();
5940       return;
5941     }//if
5942 
5943     if (unlikely(m_connectivity_check.reportNodeFailure(failedNodePtr.i)))
5944     {
5945       jam();
5946       connectivityCheckCompleted(signal);
5947     }
5948 
5949     failedNodePtr.p->ndynamicId = 0;
5950     findNeighbours(signal, __LINE__);
5951     if (failedNodePtr.i == cpresident) {
5952       jam();
5953       /**--------------------------------------------------------------------
5954        * IF PRESIDENT HAVE FAILED WE MUST CALCULATE THE NEW PRESIDENT BY
5955        * FINDING THE NODE WITH THE MINIMUM DYNAMIC IDENTITY.
5956        *---------------------------------------------------------------------*/
5957       tfrMinDynamicId = (UintR)-1;
5958       for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
5959         ptrAss(nodePtr, nodeRec);
5960         if (nodePtr.p->phase == ZRUNNING) {
5961           jamLine(nodePtr.i);
5962           if ((nodePtr.p->ndynamicId & 0xFFFF) < tfrMinDynamicId) {
5963             jam();
5964             tfrMinDynamicId = (nodePtr.p->ndynamicId & 0xFFFF);
5965             cpresident = nodePtr.i;
5966           }//if
5967         }//if
5968       }//for
5969       presidentNodePtr.i = cpresident;
5970       ptrCheckGuard(presidentNodePtr, MAX_NDB_NODES, nodeRec);
5971       cpdistref = presidentNodePtr.p->blockRef;
5972       if (cpresident == getOwnNodeId()) {
5973 	CRASH_INSERTION(920);
5974         cfailureNr = cprepareFailureNr;
5975         ctoFailureNr = 0;
5976         ctoStatus = Q_ACTIVE;
5977         DEB_STARTUP(("2:Clear c_start.m_startNode"));
5978 	c_start.reset(); // Don't take over nodes being started
5979         if (!ccommitFailedNodes.isclear()) {
5980           jam();
5981 	  /**-----------------------------------------------------------------
5982 	   * IN THIS SITUATION WE ARE UNCERTAIN OF WHETHER THE NODE FAILURE
5983 	   * PROCESS WAS COMMITTED. WE NEED TO QUERY THE OTHER NODES ABOUT
5984 	   * THEIR STATUS.
5985 	   *-----------------------------------------------------------------*/
5986           for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES;
5987 	       nodePtr.i++) {
5988             jam();
5989             ptrAss(nodePtr, nodeRec);
5990             if (nodePtr.p->phase == ZRUNNING) {
5991               jam();
5992               nodePtr.p->sendPresToStatus = Q_ACTIVE;
5993               signal->theData[0] = cpdistref;
5994               signal->theData[1] = cprepareFailureNr;
5995               sendSignal(nodePtr.p->blockRef, GSN_PRES_TOREQ,
5996 			 signal, 1, JBA);
5997             }//if
5998           }//for
5999         } else {
6000           jam();
6001 	  /*-----------------------------------------------------------------*/
6002 	  // In this case it could be that a commit process is still ongoing.
6003 	  // If so we must conclude it as the new master.
6004 	  /*-----------------------------------------------------------------*/
6005           for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES;
6006 	       nodePtr.i++) {
6007             ptrAss(nodePtr, nodeRec);
6008             if (nodePtr.p->phase == ZRUNNING) {
6009               jamLine(nodePtr.i);
6010               nodePtr.p->sendCommitFailReqStatus = Q_ACTIVE;
6011               signal->theData[0] = cpdistref;
6012               signal->theData[1] = ccommitFailureNr;
6013               sendSignal(nodePtr.p->blockRef, GSN_COMMIT_FAILREQ, signal,
6014 			 2, JBA);
6015             }//if
6016           }//for
6017         }//if
6018       }//if
6019     }//if
6020     cfailedNodes.set(failedNodePtr.i);
6021   }//if
6022 }//Qmgr::failReport()
6023 
6024 /*---------------------------------------------------------------------------*/
6025 /*       INPUT:  TTDI_DYN_ID                                                 */
6026 /*       OUTPUT: TTDI_NODE_ID                                                */
6027 /*---------------------------------------------------------------------------*/
translateDynamicIdToNodeId(Signal * signal,UintR TdynamicId)6028 Uint16 Qmgr::translateDynamicIdToNodeId(Signal* signal, UintR TdynamicId)
6029 {
6030   NodeRecPtr tdiNodePtr;
6031   Uint16 TtdiNodeId = ZNIL;
6032 
6033   for (tdiNodePtr.i = 1; tdiNodePtr.i < MAX_NDB_NODES; tdiNodePtr.i++) {
6034     jam();
6035     ptrAss(tdiNodePtr, nodeRec);
6036     if (tdiNodePtr.p->ndynamicId == TdynamicId) {
6037       jam();
6038       TtdiNodeId = tdiNodePtr.i;
6039       break;
6040     }//if
6041   }//for
6042   if (TtdiNodeId == ZNIL) {
6043     jam();
6044     systemErrorLab(signal, __LINE__);
6045   }//if
6046   return TtdiNodeId;
6047 }//Qmgr::translateDynamicIdToNodeId()
6048 
6049 /**--------------------------------------------------------------------------
6050  *       WHEN RECEIVING PREPARE FAILURE REQUEST WE WILL IMMEDIATELY CLOSE
6051  *       COMMUNICATION WITH ALL THOSE NODES.
6052  *--------------------------------------------------------------------------*/
sendCloseComReq(Signal * signal,BlockReference TBRef,Uint16 aFailNo)6053 void Qmgr::sendCloseComReq(Signal* signal, BlockReference TBRef, Uint16 aFailNo)
6054 {
6055   jam();
6056   CloseComReqConf * const closeCom = (CloseComReqConf *)&signal->theData[0];
6057 
6058   closeCom->xxxBlockRef = TBRef;
6059   closeCom->requestType = CloseComReqConf::RT_NODE_FAILURE;
6060   closeCom->failNo      = aFailNo;
6061   closeCom->noOfNodes   = cprepFailedNodes.count();
6062   {
6063     closeCom->failedNodeId = 0; /* Indicates we're sending bitmask */
6064     LinearSectionPtr lsptr[3];
6065     lsptr[0].p = cprepFailedNodes.rep.data;
6066     lsptr[0].sz = cprepFailedNodes.getPackedLengthInWords();
6067     sendSignal(TRPMAN_REF,
6068                GSN_CLOSE_COMREQ,
6069                signal,
6070 	       CloseComReqConf::SignalLength,
6071                JBB,
6072                lsptr,
6073                1);
6074   }
6075 
6076 }//Qmgr::sendCloseComReq()
6077 
6078 void
sendPrepFailReqRef(Signal * signal,Uint32 dstBlockRef,GlobalSignalNumber gsn,Uint32 blockRef,Uint32 failNo,const NdbNodeBitmask & nodes)6079 Qmgr::sendPrepFailReqRef(Signal* signal,
6080 			 Uint32 dstBlockRef,
6081 			 GlobalSignalNumber gsn,
6082 			 Uint32 blockRef,
6083 			 Uint32 failNo,
6084 			 const NdbNodeBitmask& nodes)
6085 {
6086   PrepFailReqRef * const prepFail = (PrepFailReqRef *)&signal->theData[0];
6087   prepFail->xxxBlockRef = blockRef;
6088   prepFail->failNo = failNo;
6089   prepFail->noOfNodes = nodes.count();
6090   Uint32 packed_length = nodes.getPackedLengthInWords();
6091 
6092   if (ndbd_send_node_bitmask_in_section(
6093       getNodeInfo(refToNode(dstBlockRef)).m_version))
6094   {
6095     Uint32* temp_failed_nodes = &signal->theData[PrepFailReqRef::SignalLength];
6096     nodes.copyto(NdbNodeBitmask::Size, temp_failed_nodes);
6097     LinearSectionPtr lsptr[3];
6098     lsptr[0].p = temp_failed_nodes;
6099     lsptr[0].sz = packed_length;
6100     sendSignal(dstBlockRef, gsn, signal, PrepFailReqRef::SignalLength, JBA,
6101                lsptr, 1);
6102   }
6103   else if (packed_length <= NdbNodeBitmask48::Size)
6104   {
6105     nodes.copyto(NdbNodeBitmask48::Size, prepFail->theNodes);
6106     sendSignal(dstBlockRef, gsn, signal, PrepFailReqRef::SignalLength_v1, JBA);
6107   }
6108   else
6109   {
6110     ndbabort();
6111   }
6112 }
6113 
6114 
6115 /**--------------------------------------------------------------------------
6116  *       SEND PREPARE FAIL REQUEST FROM PRESIDENT.
6117  *---------------------------------------------------------------------------*/
sendPrepFailReq(Signal * signal,Uint16 aNode)6118 void Qmgr::sendPrepFailReq(Signal* signal, Uint16 aNode)
6119 {
6120   NodeRecPtr sendNodePtr;
6121   sendNodePtr.i = aNode;
6122   ptrCheckGuard(sendNodePtr, MAX_NDB_NODES, nodeRec);
6123   sendNodePtr.p->sendPrepFailReqStatus = Q_ACTIVE;
6124 
6125   sendPrepFailReqRef(signal,
6126 		     sendNodePtr.p->blockRef,
6127 		     GSN_PREP_FAILREQ,
6128 		     reference(),
6129 		     cfailureNr,
6130 		     cfailedNodes);
6131 }//Qmgr::sendPrepFailReq()
6132 
6133 /**
6134  * Arbitration module.  Rest of QMGR calls us only via
6135  * the "handle" routines.
6136  */
6137 
6138 /**
6139  * Config signals are logically part of CM_REG.
6140  */
6141 void
execARBIT_CFG(Signal * signal)6142 Qmgr::execARBIT_CFG(Signal* signal)
6143 {
6144   jamEntry();
6145   ArbitSignalData* sd = (ArbitSignalData*)&signal->theData[0];
6146   unsigned rank = sd->code;
6147   ndbrequire(1 <= rank && rank <= 2);
6148   arbitRec.apiMask[0].bitOR(sd->mask);
6149   arbitRec.apiMask[rank].assign(sd->mask);
6150 }
6151 
6152 /**
6153  * ContinueB delay (0=JBA 1=JBB)
6154  */
getArbitDelay()6155 Uint32 Qmgr::getArbitDelay()
6156 {
6157   switch (arbitRec.state) {
6158   case ARBIT_NULL:
6159     jam();
6160     break;
6161   case ARBIT_INIT:
6162     jam();
6163     return 100;
6164   case ARBIT_FIND:
6165     jam();
6166     return 100;
6167   case ARBIT_PREP1:
6168     jam();
6169     return 100;
6170   case ARBIT_PREP2:
6171     jam();
6172     return 100;
6173   case ARBIT_START:
6174     jam();
6175     return 100;
6176   case ARBIT_RUN:
6177     jam();
6178     return 1000;
6179   case ARBIT_CHOOSE:
6180     jam();
6181     return 10;
6182   case ARBIT_CRASH:             // if we could wait
6183     jam();
6184     return 100;
6185   }
6186   ndbabort();
6187   return (Uint32)-1;
6188 }
6189 
6190 /**
6191  * Time to wait for reply.  There is only 1 config parameter
6192  * (timeout for CHOOSE).  XXX The rest are guesses.
6193  */
getArbitTimeout()6194 Uint32 Qmgr::getArbitTimeout()
6195 {
6196   switch (arbitRec.state) {
6197   case ARBIT_NULL:
6198     jam();
6199     break;
6200   case ARBIT_INIT:              // not used
6201     jam();
6202     // Fall through
6203   case ARBIT_FIND:
6204     jam();
6205     /* This timeout will be used only to print out a warning
6206      * when a suitable arbitrator is not found.
6207      */
6208     return 60000;
6209   case ARBIT_PREP1:
6210     jam();
6211     // Fall through
6212   case ARBIT_PREP2:
6213     jam();
6214     return 1000 + cnoOfNodes * Uint32(hb_send_timer.getDelay());
6215   case ARBIT_START:
6216     jam();
6217     return 1000 + arbitRec.timeout;
6218   case ARBIT_RUN:               // not used (yet)
6219     jam();
6220     return 1000;
6221   case ARBIT_CHOOSE:
6222     jam();
6223     return arbitRec.timeout;
6224   case ARBIT_CRASH:             // if we could wait
6225     jam();
6226     return 100;
6227   }
6228   ndbabort();
6229   return (Uint32)-1;
6230 }
6231 
6232 /**
6233  * Start arbitration thread when we are president and database
6234  * is opened for the first time.
6235  *
6236  * XXX  Do arbitration check just like on node failure.  Since
6237  * there is no arbitrator yet, must win on counts alone.
6238  */
6239 void
handleArbitStart(Signal * signal)6240 Qmgr::handleArbitStart(Signal* signal)
6241 {
6242   jam();
6243   ndbrequire(cpresident == getOwnNodeId());
6244   ndbrequire(arbitRec.state == ARBIT_NULL);
6245   arbitRec.state = ARBIT_INIT;
6246   DEB_ARBIT(("Arbit state = ARBIT_INIT from NULL"));
6247   arbitRec.newstate = true;
6248   startArbitThread(signal);
6249 }
6250 
6251 /**
6252  * Handle API node failure.  Called also by non-president nodes.
6253  * If we are president go back to INIT state, otherwise to NULL.
6254  * Start new thread to save time.
6255  */
6256 void
handleArbitApiFail(Signal * signal,Uint16 nodeId)6257 Qmgr::handleArbitApiFail(Signal* signal, Uint16 nodeId)
6258 {
6259   if (arbitRec.node != nodeId) {
6260     jam();
6261     return;
6262   }
6263   reportArbitEvent(signal, NDB_LE_ArbitState);
6264   arbitRec.node = 0;
6265   switch (arbitRec.state) {
6266   case ARBIT_NULL:              // should not happen
6267     jam();
6268     break;
6269   case ARBIT_INIT:
6270     jam();
6271     break;
6272   case ARBIT_FIND:
6273     jam();
6274     break;
6275   case ARBIT_PREP1:		// start from beginning
6276     jam();
6277     // Fall through
6278   case ARBIT_PREP2:
6279     jam();
6280     // Fall through
6281   case ARBIT_START:
6282     jam();
6283     // Fall through
6284   case ARBIT_RUN:
6285     if (cpresident == getOwnNodeId()) {
6286       jam();
6287       arbitRec.state = ARBIT_INIT;
6288       DEB_ARBIT(("Arbit state = ARBIT_INIT from RUN"));
6289       arbitRec.newstate = true;
6290       startArbitThread(signal);
6291     } else {
6292       jam();
6293       arbitRec.state = ARBIT_NULL;
6294       DEB_ARBIT(("Arbit state = ARBIT_NULL from RUN"));
6295     }
6296     break;
6297   case ARBIT_CHOOSE:		// XXX too late
6298     jam();
6299     break;
6300   case ARBIT_CRASH:
6301     jam();
6302     break;
6303   default:
6304     ndbabort();
6305   }
6306 }
6307 
6308 /**
6309  * Handle NDB node add.  Ignore if arbitration thread not yet
6310  * started.  If PREP is not ready, go back to INIT.  Otherwise
6311  * the new node gets arbitrator and ticket once we reach RUN state.
6312  * Start new thread to save time.
6313  */
6314 void
handleArbitNdbAdd(Signal * signal,Uint16 nodeId)6315 Qmgr::handleArbitNdbAdd(Signal* signal, Uint16 nodeId)
6316 {
6317   jam();
6318   ndbrequire(cpresident == getOwnNodeId());
6319   switch (arbitRec.state) {
6320   case ARBIT_NULL:              // before db opened
6321     jam();
6322     break;
6323   case ARBIT_INIT:		// start from beginning
6324     jam();
6325     // Fall through
6326   case ARBIT_FIND:
6327     jam();
6328     // Fall through
6329   case ARBIT_PREP1:
6330     jam();
6331     // Fall through
6332   case ARBIT_PREP2:
6333     jam();
6334     arbitRec.state = ARBIT_INIT;
6335     DEB_ARBIT(("Arbit state = ARBIT_INIT from PREP2"));
6336     arbitRec.newstate = true;
6337     startArbitThread(signal);
6338     break;
6339   case ARBIT_START:		// process in RUN state
6340     jam();
6341     // Fall through
6342   case ARBIT_RUN:
6343     jam();
6344     arbitRec.newMask.set(nodeId);
6345     break;
6346   case ARBIT_CHOOSE:            // XXX too late
6347     jam();
6348     break;
6349   case ARBIT_CRASH:
6350     jam();
6351     break;
6352   default:
6353     ndbabort();
6354   }
6355 }
6356 
6357 /**
6358  * Check if current nodeset can survive.  The decision is
6359  * based on node count, node groups, and on external arbitrator
6360  * (if we have one).  Always starts a new thread because
6361  * 1) CHOOSE cannot wait 2) if we are new president we need
6362  * a thread 3) if we are old president it does no harm.
6363  *
6364  * The following logic governs if we will survive or not.
6365  * 1) If at least one node group is fully dead then we will not survive.
6366  * 2) If 1) is false AND at least one group is fully alive then we will
6367  *    survive.
6368  * 3) If 1) AND 2) is false AND a majority of the previously alive nodes are
6369  *    dead then we will not survive.
6370  * 4) If 1) AND 2) AND 3) is false AND a majority of the previously alive
6371  *    nodes are still alive, then we will survive.
6372  * 5) If 1) AND 2) AND 3) AND 4) is false then exactly half of the previously
6373  *    alive nodes are dead and the other half is alive. In this case we will
6374  *    ask the arbitrator whether we can continue or not. If no arbitrator is
6375  *    currently selected then we will fail. If an arbitrator exists then it
6376  *    will respond with either WIN in which case our part of the cluster will
6377  *    remain alive and LOSE in which case our part of the cluster will not
6378  *    survive.
6379  *
6380  * The number of previously alive nodes are the sum of the currently alive
6381  * nodes plus the number of nodes currently forming a node set that will
6382  * die. All other nodes was dead in a previous node fail transaction and are
6383  * not counted in the number of previously alive nodes.
6384  */
6385 void
handleArbitCheck(Signal * signal)6386 Qmgr::handleArbitCheck(Signal* signal)
6387 {
6388   jam();
6389   Uint32 prev_alive_nodes = count_previously_alive_nodes();
6390   ndbrequire(cpresident == getOwnNodeId());
6391   NdbNodeBitmask survivorNodes;
6392   /**
6393    * computeArbitNdbMask will only count nodes in the state ZRUNNING, crashed
6394    * nodes are thus not part of this set of nodes. The method
6395    * count_previously_alive_nodes counts both nodes in ZRUNNING and in
6396    * ZPREPARE_FAIL but deducts those that was previously not started to ensure
6397    * that we don't rely on non-started nodes in our check for whether
6398    * arbitration is required.
6399    */
6400   computeArbitNdbMask(survivorNodes);
6401   {
6402     jam();
6403     CheckNodeGroups* sd = (CheckNodeGroups*)&signal->theData[0];
6404     sd->blockRef = reference();
6405     sd->requestType = CheckNodeGroups::Direct | CheckNodeGroups::ArbitCheck;
6406     sd->mask = survivorNodes;
6407     EXECUTE_DIRECT(DBDIH, GSN_CHECKNODEGROUPSREQ, signal,
6408 		   CheckNodeGroups::SignalLength);
6409     jamEntry();
6410     if (ERROR_INSERTED(943))
6411     {
6412       ndbout << "Requiring arbitration, even if there is no"
6413              << " possible split."<< endl;
6414       sd->output = CheckNodeGroups::Partitioning;
6415       DEB_ARBIT(("Arbit state = ARBIT_RUN in 943"));
6416       arbitRec.state = ARBIT_RUN;
6417     }
6418     switch (sd->output) {
6419     case CheckNodeGroups::Win:
6420       jam();
6421       arbitRec.code = ArbitCode::WinGroups;
6422       break;
6423     case CheckNodeGroups::Lose:
6424       jam();
6425       arbitRec.code = ArbitCode::LoseGroups;
6426       break;
6427     case CheckNodeGroups::Partitioning:
6428       jam();
6429       arbitRec.code = ArbitCode::Partitioning;
6430       if (2 * survivorNodes.count() > prev_alive_nodes)
6431       {
6432         /**
6433          * We have lost nodes in all node groups so we are in a
6434          * potentially partitioned state. If we have the majority
6435          * of the nodes in this partition we will definitely
6436          * survive.
6437          */
6438         jam();
6439         arbitRec.code = ArbitCode::WinNodes;
6440       }
6441       else if (2 * survivorNodes.count() < prev_alive_nodes)
6442       {
6443         jam();
6444         /**
6445          * More than half of the live nodes failed and nodes from
6446          * all node groups failed, we are definitely in a losing
6447          * streak and we will be part of the failing side. Time
6448          * to crash.
6449          */
6450         arbitRec.code = ArbitCode::LoseNodes;
6451       }
6452       else
6453       {
6454         jam();
6455         /**
6456          * Half of the live nodes failed, we can be in a partitioned
6457          * state, use the arbitrator to decide what to do next.
6458          */
6459       }
6460       break;
6461     default:
6462       ndbabort();
6463     }
6464   }
6465   switch (arbitRec.code) {
6466   case ArbitCode::LoseNodes:
6467     jam();
6468     goto crashme;
6469   case ArbitCode::LoseGroups:
6470     jam();
6471     goto crashme;
6472   case ArbitCode::WinNodes:
6473     jam();
6474     // Fall through
6475   case ArbitCode::WinGroups:
6476     jam();
6477     if (arbitRec.state == ARBIT_RUN)
6478     {
6479       jam();
6480       break;
6481     }
6482     arbitRec.state = ARBIT_INIT;
6483     DEB_ARBIT(("Arbit state = ARBIT_INIT from non-RUN WinGroups"));
6484     arbitRec.newstate = true;
6485     break;
6486   case ArbitCode::Partitioning:
6487     if (arbitRec.state == ARBIT_RUN)
6488     {
6489       jam();
6490       arbitRec.state = ARBIT_CHOOSE;
6491       DEB_ARBIT(("Arbit state = ARBIT_CHOOSE from RUN"));
6492       arbitRec.newstate = true;
6493       break;
6494     }
6495     if (arbitRec.apiMask[0].count() != 0)
6496     {
6497       jam();
6498       arbitRec.code = ArbitCode::LoseNorun;
6499     }
6500     else
6501     {
6502       jam();
6503       arbitRec.code = ArbitCode::LoseNocfg;
6504     }
6505     goto crashme;
6506   default:
6507   crashme:
6508     jam();
6509     arbitRec.state = ARBIT_CRASH;
6510     DEB_ARBIT(("Arbit state = ARBIT_CRASH"));
6511     arbitRec.newstate = true;
6512     break;
6513   }
6514   reportArbitEvent(signal, NDB_LE_ArbitResult);
6515   switch (arbitRec.state) {
6516   default:
6517     jam();
6518     arbitRec.newMask.bitAND(survivorNodes);   // delete failed nodes
6519     arbitRec.recvMask.bitAND(survivorNodes);
6520     sendCommitFailReq(signal);          // start commit of failed nodes
6521     break;
6522   case ARBIT_CHOOSE:
6523     jam();
6524     break;
6525   case ARBIT_CRASH:
6526     jam();
6527     break;
6528   }
6529   startArbitThread(signal);
6530 }
6531 
6532 /**
6533  * Start a new continueB thread.  The thread id is incremented
6534  * so that any old thread will exit.
6535  */
6536 void
startArbitThread(Signal * signal)6537 Qmgr::startArbitThread(Signal* signal)
6538 {
6539   jam();
6540   ndbrequire(cpresident == getOwnNodeId());
6541   arbitRec.code = ArbitCode::ThreadStart;
6542   reportArbitEvent(signal, NDB_LE_ArbitState);
6543   signal->theData[1] = ++arbitRec.thread;
6544   runArbitThread(signal);
6545 }
6546 
6547 /**
6548  * Handle arbitration thread.  The initial thread normally ends
6549  * up in RUN state.  New thread can be started to save time.
6550  */
6551 void
runArbitThread(Signal * signal)6552 Qmgr::runArbitThread(Signal* signal)
6553 {
6554 #ifdef DEBUG_ARBIT
6555   char buf[256];
6556   NdbNodeBitmask ndbMask;
6557   char maskbuf[NdbNodeBitmask::TextLength + 1];
6558   computeArbitNdbMask(ndbMask);
6559   ndbout << "arbit thread:";
6560   ndbout << " state=" << arbitRec.state;
6561   ndbout << " newstate=" << arbitRec.newstate;
6562   ndbout << " thread=" << arbitRec.thread;
6563   ndbout << " node=" << arbitRec.node;
6564   arbitRec.ticket.getText(buf, sizeof(buf));
6565   ndbout << " ticket=" << buf;
6566   ndbMask.getText(maskbuf);
6567   ndbout << " ndbmask=" << maskbuf;
6568   ndbout << " sendcount=" << arbitRec.sendCount;
6569   ndbout << " recvcount=" << arbitRec.recvCount;
6570   arbitRec.recvMask.getText(maskbuf);
6571   ndbout << " recvmask=" << maskbuf;
6572   ndbout << " code=" << arbitRec.code;
6573   ndbout << endl;
6574 #endif
6575   if (signal->theData[1] != arbitRec.thread) {
6576     jam();
6577     return;	        	// old thread dies
6578   }
6579   switch (arbitRec.state) {
6580   case ARBIT_INIT:		// main thread
6581     jam();
6582     stateArbitInit(signal);
6583     break;
6584   case ARBIT_FIND:
6585     jam();
6586     stateArbitFind(signal);
6587     break;
6588   case ARBIT_PREP1:
6589     jam();
6590     // Fall through
6591   case ARBIT_PREP2:
6592     jam();
6593     stateArbitPrep(signal);
6594     break;
6595   case ARBIT_START:
6596     jam();
6597     stateArbitStart(signal);
6598     break;
6599   case ARBIT_RUN:
6600     jam();
6601     stateArbitRun(signal);
6602     break;
6603   case ARBIT_CHOOSE:		// partitition thread
6604     jam();
6605     if (ERROR_INSERTED(945) && arbitRec.code == ArbitCode::WinChoose)
6606     {
6607       // Delay ARBIT_CHOOSE until NdbAPI node is disconnected
6608       break;
6609     }
6610     stateArbitChoose(signal);
6611     break;
6612   case ARBIT_CRASH:
6613     jam();
6614     stateArbitCrash(signal);
6615     break;
6616   default:
6617     ndbabort();
6618   }
6619   signal->theData[0] = ZARBIT_HANDLING;
6620   signal->theData[1] = arbitRec.thread;
6621   signal->theData[2] = arbitRec.state;		// just for signal log
6622   Uint32 delay = getArbitDelay();
6623   if (delay == 0) {
6624     jam();
6625     sendSignal(QMGR_REF, GSN_CONTINUEB, signal, 3, JBA);
6626   } else if (delay == 1) {
6627     jam();
6628     sendSignal(QMGR_REF, GSN_CONTINUEB, signal, 3, JBB);
6629   } else {
6630     jam();
6631     sendSignalWithDelay(QMGR_REF, GSN_CONTINUEB, signal, delay, 3);
6632   }//if
6633 }
6634 
6635 /**
6636  * Handle INIT state.  Generate next ticket.  Switch to FIND
6637  * state without delay.
6638  */
6639 void
stateArbitInit(Signal * signal)6640 Qmgr::stateArbitInit(Signal* signal)
6641 {
6642   if (arbitRec.newstate) {
6643     jam();
6644     CRASH_INSERTION((Uint32)910 + arbitRec.state);
6645 
6646     arbitRec.node = 0;
6647     arbitRec.ticket.update();
6648     arbitRec.newMask.clear();
6649     arbitRec.code = 0;
6650     arbitRec.newstate = false;
6651   }
6652   arbitRec.setTimestamp();  // Init arbitration timer
6653   arbitRec.state = ARBIT_FIND;
6654   DEB_ARBIT(("Arbit state = ARBIT_FIND"));
6655   arbitRec.newstate = true;
6656   stateArbitFind(signal);
6657 }
6658 
6659 /**
6660  * Handle FIND state.  Find first arbitrator which is alive
6661  * and invoke PREP state without delay.  If none are found,
6662  * loop in FIND state.  This is forever if no arbitrators
6663  * are configured (not the normal case).
6664  *
6665  * XXX  Add adaptive behaviour to avoid getting stuck on API
6666  * nodes which are alive but do not respond or die too soon.
6667  */
6668 void
stateArbitFind(Signal * signal)6669 Qmgr::stateArbitFind(Signal* signal)
6670 {
6671   if (arbitRec.newstate) {
6672     jam();
6673     CRASH_INSERTION((Uint32)910 + arbitRec.state);
6674 
6675     arbitRec.code = 0;
6676     arbitRec.newstate = false;
6677   }
6678 
6679   switch (arbitRec.method){
6680   case ArbitRec::METHOD_EXTERNAL:
6681   {
6682     // Don't select any API node as arbitrator
6683     arbitRec.node = 0;
6684     arbitRec.state = ARBIT_PREP1;
6685     DEB_ARBIT(("Arbit state = ARBIT_PREP1"));
6686     arbitRec.newstate = true;
6687     stateArbitPrep(signal);
6688     return;
6689     break;
6690   }
6691 
6692   case ArbitRec::METHOD_DEFAULT:
6693   {
6694     NodeRecPtr aPtr;
6695     // Select the best available API node as arbitrator
6696     for (unsigned rank = 1; rank <= 2; rank++) {
6697       jam();
6698       aPtr.i = 0;
6699       const unsigned stop = NodeBitmask::NotFound;
6700       while ((aPtr.i = arbitRec.apiMask[rank].find(aPtr.i + 1)) != stop) {
6701         jam();
6702         ptrAss(aPtr, nodeRec);
6703         if (aPtr.p->phase != ZAPI_ACTIVE)
6704           continue;
6705         ndbrequire(c_connectedNodes.get(aPtr.i));
6706         arbitRec.node = aPtr.i;
6707         arbitRec.state = ARBIT_PREP1;
6708         DEB_ARBIT(("2:Arbit state = ARBIT_PREP1"));
6709         arbitRec.newstate = true;
6710         stateArbitPrep(signal);
6711         return;
6712       }
6713     }
6714 
6715     /* If the president cannot find a suitable arbitrator then
6716      * it will report this once a minute. Success in finding
6717      * an arbitrator will be notified when the arbitrator
6718      * accepts and acks the offer.
6719     */
6720 
6721     if (arbitRec.getTimediff() > getArbitTimeout()) {
6722       jam();
6723       g_eventLogger->warning("Could not find an arbitrator, cluster is not partition-safe");
6724       warningEvent("Could not find an arbitrator, cluster is not partition-safe");
6725       arbitRec.setTimestamp();
6726     }
6727     return;
6728     break;
6729   }
6730 
6731   default:
6732     ndbabort();
6733   }
6734 }
6735 
6736 /**
6737  * Handle PREP states.  First round nulls any existing tickets.
6738  * Second round sends new ticket.  When all confirms have been
6739  * received invoke START state immediately.
6740  */
6741 void
stateArbitPrep(Signal * signal)6742 Qmgr::stateArbitPrep(Signal* signal)
6743 {
6744   if (arbitRec.newstate) {
6745     jam();
6746     CRASH_INSERTION((Uint32)910 + arbitRec.state);
6747 
6748     arbitRec.sendCount = 0;                     // send all at once
6749     computeArbitNdbMask(arbitRec.recvMask);     // to send and recv
6750     arbitRec.recvMask.clear(getOwnNodeId());
6751     arbitRec.code = 0;
6752     arbitRec.newstate = false;
6753   }
6754   if (! arbitRec.sendCount) {
6755     jam();
6756     NodeRecPtr aPtr;
6757     aPtr.i = 0;
6758     const unsigned stop = NodeBitmask::NotFound;
6759     while ((aPtr.i = arbitRec.recvMask.find(aPtr.i + 1)) != stop) {
6760       jam();
6761       ptrAss(aPtr, nodeRec);
6762       ArbitSignalData* sd = (ArbitSignalData*)&signal->theData[0];
6763       sd->sender = getOwnNodeId();
6764       if (arbitRec.state == ARBIT_PREP1) {
6765         jam();
6766         sd->code = ArbitCode::PrepPart1;
6767       } else {
6768         jam();
6769         sd->code = ArbitCode::PrepPart2;
6770       }
6771       sd->node = arbitRec.node;
6772       sd->ticket = arbitRec.ticket;
6773       sd->mask.clear();
6774       sendSignal(aPtr.p->blockRef, GSN_ARBIT_PREPREQ, signal,
6775         ArbitSignalData::SignalLength, JBB);
6776     }
6777     arbitRec.setTimestamp();			// send time
6778     arbitRec.sendCount = 1;
6779     return;
6780   }
6781   if (arbitRec.code != 0) {			// error
6782     jam();
6783     arbitRec.state = ARBIT_INIT;
6784     DEB_ARBIT(("Arbit state = ARBIT_INIT stateArbitPrep"));
6785     arbitRec.newstate = true;
6786     return;
6787   }
6788   if (arbitRec.recvMask.count() == 0) {		// recv all
6789     if (arbitRec.state == ARBIT_PREP1) {
6790       jam();
6791       DEB_ARBIT(("Arbit state = ARBIT_PREP2 stateArbitPrep"));
6792       arbitRec.state = ARBIT_PREP2;
6793       arbitRec.newstate = true;
6794     } else {
6795       jam();
6796       DEB_ARBIT(("Arbit state = ARBIT_START stateArbitPrep"));
6797       arbitRec.state = ARBIT_START;
6798       arbitRec.newstate = true;
6799       stateArbitStart(signal);
6800     }
6801     return;
6802   }
6803   if (arbitRec.getTimediff() > getArbitTimeout()) {
6804     jam();
6805     arbitRec.state = ARBIT_INIT;
6806     DEB_ARBIT(("Arbit state = ARBIT_INIT stateArbitPrep"));
6807     arbitRec.newstate = true;
6808     return;
6809   }
6810 }
6811 
6812 void
execARBIT_PREPREQ(Signal * signal)6813 Qmgr::execARBIT_PREPREQ(Signal* signal)
6814 {
6815   jamEntry();
6816   ArbitSignalData* sd = (ArbitSignalData*)&signal->theData[0];
6817   if (getOwnNodeId() == cpresident) {
6818     jam();
6819     return;		// wrong state
6820   }
6821   if (sd->sender != cpresident) {
6822     jam();
6823     return;		// wrong state
6824   }
6825   NodeRecPtr aPtr;
6826   aPtr.i = sd->sender;
6827   ptrAss(aPtr, nodeRec);
6828   switch (sd->code) {
6829   case ArbitCode::PrepPart1:    // zero them just to be sure
6830     jam();
6831     arbitRec.node = 0;
6832     arbitRec.ticket.clear();
6833     break;
6834   case ArbitCode::PrepPart2:    // non-president enters RUN state
6835     jam();
6836     // Fall through
6837   case ArbitCode::PrepAtrun:
6838     jam();
6839     arbitRec.node = sd->node;
6840     arbitRec.ticket = sd->ticket;
6841     arbitRec.code = sd->code;
6842     reportArbitEvent(signal, NDB_LE_ArbitState);
6843     arbitRec.state = ARBIT_RUN;
6844     arbitRec.newstate = true;
6845     DEB_ARBIT(("Arbit state = ARBIT_RUN PrepAtRun"));
6846 
6847     // Non-president node logs.
6848     if (!c_connectedNodes.get(arbitRec.node))
6849     {
6850       char buf[20]; // needs 16 + 1 for '\0'
6851       arbitRec.ticket.getText(buf, sizeof(buf));
6852       g_eventLogger->warning("President %u proposed disconnected "
6853                              "node %u as arbitrator [ticket=%s]. "
6854                              "Cluster may be partially connected. "
6855                              "Connected nodes: %s",
6856                              cpresident, arbitRec.node, buf,
6857                              BaseString::getPrettyTextShort(c_connectedNodes).c_str());
6858 
6859       warningEvent("President %u proposed disconnected node %u "
6860                    "as arbitrator [ticket %s]",
6861                    cpresident, arbitRec.node, buf);
6862       warningEvent("Cluster may be partially connected. Connected nodes: ");
6863 
6864       // Split the connected-node list, since warningEvents are
6865       // limited to ~24 words / 96 chars
6866       BaseString tmp(BaseString::getPrettyTextShort(c_connectedNodes).c_str());
6867       Vector<BaseString> split;
6868       tmp.split(split, "", 92);
6869       for(unsigned i = 0; i < split.size(); ++i)
6870       {
6871         warningEvent("%s", split[i].c_str());
6872       }
6873     }
6874 
6875     if (sd->code == ArbitCode::PrepAtrun) {
6876       jam();
6877       return;
6878     }
6879     break;
6880   default:
6881     jam();
6882     ndbabort();
6883   }
6884   sd->sender = getOwnNodeId();
6885   sd->code = 0;
6886   sendSignal(aPtr.p->blockRef, GSN_ARBIT_PREPCONF, signal,
6887     ArbitSignalData::SignalLength, JBB);
6888 }
6889 
6890 void
execARBIT_PREPCONF(Signal * signal)6891 Qmgr::execARBIT_PREPCONF(Signal* signal)
6892 {
6893   jamEntry();
6894   ArbitSignalData* sd = (ArbitSignalData*)&signal->theData[0];
6895   if (! arbitRec.match(sd)) {
6896     jam();
6897     return;		// stray signal
6898   }
6899   if (arbitRec.state != ARBIT_PREP1 && arbitRec.state != ARBIT_PREP2) {
6900     jam();
6901     return;		// wrong state
6902   }
6903   if (! arbitRec.recvMask.get(sd->sender)) {
6904     jam();
6905     return;		// wrong state
6906   }
6907   arbitRec.recvMask.clear(sd->sender);
6908   if (arbitRec.code == 0 && sd->code != 0) {
6909     jam();
6910     arbitRec.code = sd->code;
6911   }//if
6912 }
6913 
6914 void
execARBIT_PREPREF(Signal * signal)6915 Qmgr::execARBIT_PREPREF(Signal* signal)
6916 {
6917   jamEntry();
6918   ArbitSignalData* sd = (ArbitSignalData*)&signal->theData[0];
6919   if (sd->code == 0) {
6920     jam();
6921     sd->code = ArbitCode::ErrUnknown;
6922   }
6923   execARBIT_PREPCONF(signal);
6924 }
6925 
6926 /**
6927  * Handle START state.  On first call send start request to
6928  * the chosen arbitrator.  Then wait for a CONF.
6929  */
6930 void
stateArbitStart(Signal * signal)6931 Qmgr::stateArbitStart(Signal* signal)
6932 {
6933   if (arbitRec.newstate) {
6934     jam();
6935     CRASH_INSERTION((Uint32)910 + arbitRec.state);
6936 
6937     arbitRec.sendCount = 0;
6938     arbitRec.recvCount = 0;
6939     arbitRec.code = 0;
6940     arbitRec.newstate = false;
6941   }
6942 
6943   switch (arbitRec.method){
6944   case ArbitRec::METHOD_EXTERNAL:
6945     jam();
6946     ndbrequire(arbitRec.node == 0); // No arbitrator selected
6947 
6948     // Don't start arbitrator in API node => ARBIT_RUN
6949     arbitRec.state = ARBIT_RUN;
6950     DEB_ARBIT(("Arbit state = ARBIT_RUN stateArbitStart"));
6951     arbitRec.newstate = true;
6952     return;
6953     break;
6954 
6955   case ArbitRec::METHOD_DEFAULT:
6956     if (! arbitRec.sendCount) {
6957       jam();
6958       BlockReference blockRef = calcApiClusterMgrBlockRef(arbitRec.node);
6959       ArbitSignalData* sd = (ArbitSignalData*)&signal->theData[0];
6960       sd->sender = getOwnNodeId();
6961       sd->code = 0;
6962       sd->node = arbitRec.node;
6963       sd->ticket = arbitRec.ticket;
6964       sd->mask.clear();
6965       sendSignal(blockRef, GSN_ARBIT_STARTREQ, signal,
6966                  ArbitSignalData::SignalLength, JBB);
6967       arbitRec.sendCount = 1;
6968       arbitRec.setTimestamp();		// send time
6969       return;
6970     }
6971     if (arbitRec.recvCount) {
6972       jam();
6973       reportArbitEvent(signal, NDB_LE_ArbitState);
6974       if (arbitRec.code == ArbitCode::ApiStart) {
6975         jam();
6976         arbitRec.state = ARBIT_RUN;
6977         DEB_ARBIT(("Arbit state = ARBIT_RUN stateArbitStart:Default"));
6978         arbitRec.newstate = true;
6979         return;
6980       }
6981       arbitRec.state = ARBIT_INIT;
6982       DEB_ARBIT(("Arbit state = ARBIT_INIT stateArbitStart:Default"));
6983       arbitRec.newstate = true;
6984       return;
6985     }
6986     if (arbitRec.getTimediff() > getArbitTimeout()) {
6987       jam();
6988       arbitRec.code = ArbitCode::ErrTimeout;
6989       reportArbitEvent(signal, NDB_LE_ArbitState);
6990       arbitRec.state = ARBIT_INIT;
6991       DEB_ARBIT(("Arbit state = ARBIT_INIT stateArbitStart:Default timeout"));
6992       arbitRec.newstate = true;
6993       return;
6994     }
6995     break;
6996 
6997   default:
6998     ndbabort();
6999   }
7000 }
7001 
7002 void
execARBIT_STARTCONF(Signal * signal)7003 Qmgr::execARBIT_STARTCONF(Signal* signal)
7004 {
7005   jamEntry();
7006   ArbitSignalData* sd = (ArbitSignalData*)&signal->theData[0];
7007   if (! arbitRec.match(sd)) {
7008     jam();
7009     return;		// stray signal
7010   }
7011   if (arbitRec.state != ARBIT_START) {
7012     jam();
7013     return;		// wrong state
7014   }
7015   if (arbitRec.recvCount) {
7016     jam();
7017     return;		// wrong state
7018   }
7019   arbitRec.code = sd->code;
7020   arbitRec.recvCount = 1;
7021 }
7022 
7023 void
execARBIT_STARTREF(Signal * signal)7024 Qmgr::execARBIT_STARTREF(Signal* signal)
7025 {
7026   jamEntry();
7027   ArbitSignalData* sd = (ArbitSignalData*)&signal->theData[0];
7028   if (sd->code == 0) {
7029     jam();
7030     sd->code = ArbitCode::ErrUnknown;
7031   }
7032   execARBIT_STARTCONF(signal);
7033 }
7034 
7035 /**
7036  * Handle RUN state.  Send ticket to any new nodes which have
7037  * appeared after PREP state.  We don't care about a CONF.
7038  */
7039 void
stateArbitRun(Signal * signal)7040 Qmgr::stateArbitRun(Signal* signal)
7041 {
7042   if (arbitRec.newstate) {
7043     jam();
7044     CRASH_INSERTION((Uint32)910 + arbitRec.state);
7045 
7046     arbitRec.code = 0;
7047     arbitRec.newstate = false;
7048   }
7049   NodeRecPtr aPtr;
7050   aPtr.i = 0;
7051   const unsigned stop = NodeBitmask::NotFound;
7052   while ((aPtr.i = arbitRec.newMask.find(aPtr.i + 1)) != stop) {
7053     jam();
7054     arbitRec.newMask.clear(aPtr.i);
7055     ptrAss(aPtr, nodeRec);
7056     ArbitSignalData* sd = (ArbitSignalData*)&signal->theData[0];
7057     sd->sender = getOwnNodeId();
7058     sd->code = ArbitCode::PrepAtrun;
7059     sd->node = arbitRec.node;
7060     sd->ticket = arbitRec.ticket;
7061     sd->mask.clear();
7062     sendSignal(aPtr.p->blockRef, GSN_ARBIT_PREPREQ, signal,
7063       ArbitSignalData::SignalLength, JBB);
7064   }
7065 }
7066 
7067 /**
7068  * Handle CHOOSE state.  Entered only from RUN state when
7069  * there is a possible network partitioning.  Send CHOOSE to
7070  * the arbitrator.  On win switch to INIT state because a new
7071  * ticket must be created.
7072  */
7073 void
stateArbitChoose(Signal * signal)7074 Qmgr::stateArbitChoose(Signal* signal)
7075 {
7076   if (arbitRec.newstate) {
7077     jam();
7078     CRASH_INSERTION((Uint32)910 + arbitRec.state);
7079 
7080     arbitRec.sendCount = 0;
7081     arbitRec.recvCount = 0;
7082     arbitRec.code = 0;
7083     arbitRec.newstate = false;
7084   }
7085 
7086   switch(arbitRec.method){
7087   case ArbitRec::METHOD_EXTERNAL:
7088   {
7089     if (! arbitRec.sendCount) {
7090       jam();
7091       ndbrequire(arbitRec.node == 0); // No arbitrator selected
7092       // Don't send CHOOSE to anyone, just wait for timeout to expire
7093       arbitRec.sendCount = 1;
7094       arbitRec.setTimestamp();
7095       return;
7096     }
7097 
7098     if (arbitRec.getTimediff() > getArbitTimeout()) {
7099       jam();
7100       // Arbitration timeout has expired
7101       ndbrequire(arbitRec.node == 0); // No arbitrator selected
7102 
7103       NodeBitmask nodes;
7104       computeArbitNdbMask(nodes);
7105       arbitRec.code = ArbitCode::WinWaitExternal;
7106       reportArbitEvent(signal, NDB_LE_ArbitResult, nodes);
7107 
7108       sendCommitFailReq(signal);        // start commit of failed nodes
7109       arbitRec.state = ARBIT_INIT;
7110       DEB_ARBIT(("Arbit state = ARBIT_INIT stateArbitChoose"));
7111       arbitRec.newstate = true;
7112       return;
7113     }
7114     break;
7115   }
7116 
7117   case ArbitRec::METHOD_DEFAULT:
7118   {
7119     if (! arbitRec.sendCount) {
7120       jam();
7121       const BlockReference blockRef = calcApiClusterMgrBlockRef(arbitRec.node);
7122       ArbitSignalData* sd = (ArbitSignalData*)&signal->theData[0];
7123       sd->sender = getOwnNodeId();
7124       sd->code = 0;
7125       sd->node = arbitRec.node;
7126       sd->ticket = arbitRec.ticket;
7127       computeArbitNdbMask(sd->mask);
7128       if (ERROR_INSERTED(943))
7129       {
7130         ndbout << "Not sending GSN_ARBIT_CHOOSEREQ, thereby causing"
7131                << " arbitration to time out."<< endl;
7132       }
7133       else
7134       {
7135         sendSignal(blockRef, GSN_ARBIT_CHOOSEREQ, signal,
7136                    ArbitSignalData::SignalLength, JBA);
7137       }
7138       arbitRec.sendCount = 1;
7139       arbitRec.setTimestamp();		// send time
7140       return;
7141     }
7142 
7143     if (arbitRec.recvCount) {
7144       jam();
7145       reportArbitEvent(signal, NDB_LE_ArbitResult);
7146       if (arbitRec.code == ArbitCode::WinChoose) {
7147         jam();
7148         sendCommitFailReq(signal);        // start commit of failed nodes
7149         arbitRec.state = ARBIT_INIT;
7150         DEB_ARBIT(("Arbit state = ARBIT_INIT stateArbitChoose:Default"));
7151         arbitRec.newstate = true;
7152         return;
7153       }
7154       arbitRec.state = ARBIT_CRASH;
7155       DEB_ARBIT(("Arbit state = ARBIT_CRASH stateArbitChoose:Default"));
7156       arbitRec.newstate = true;
7157       stateArbitCrash(signal);		// do it at once
7158       return;
7159     }
7160 
7161     if (arbitRec.getTimediff() > getArbitTimeout()) {
7162       jam();
7163       // Arbitration timeout has expired
7164       arbitRec.code = ArbitCode::ErrTimeout;
7165       reportArbitEvent(signal, NDB_LE_ArbitState);
7166       arbitRec.state = ARBIT_CRASH;
7167       DEB_ARBIT(("Arbit state = ARBIT_CRASH stateArbitChoose:Def timeout"));
7168       arbitRec.newstate = true;
7169       stateArbitCrash(signal);		// do it at once
7170       return;
7171     }
7172     break;
7173   }
7174 
7175   default:
7176     ndbabort();
7177   }
7178 }
7179 
7180 void
execARBIT_CHOOSECONF(Signal * signal)7181 Qmgr::execARBIT_CHOOSECONF(Signal* signal)
7182 {
7183   jamEntry();
7184   ArbitSignalData* sd = (ArbitSignalData*)&signal->theData[0];
7185   if (!arbitRec.match(sd)) {
7186     jam();
7187     return;		// stray signal
7188   }
7189   if (arbitRec.state != ARBIT_CHOOSE) {
7190     jam();
7191     return;		// wrong state
7192   }
7193   if (arbitRec.recvCount) {
7194     jam();
7195     return;		// wrong state
7196   }
7197   arbitRec.recvCount = 1;
7198   arbitRec.code = sd->code;
7199 }
7200 
7201 void
execARBIT_CHOOSEREF(Signal * signal)7202 Qmgr::execARBIT_CHOOSEREF(Signal* signal)
7203 {
7204   jamEntry();
7205   ArbitSignalData* sd = (ArbitSignalData*)&signal->theData[0];
7206   if (sd->code == 0) {
7207     jam();
7208     sd->code = ArbitCode::ErrUnknown;
7209   }
7210   execARBIT_CHOOSECONF(signal);
7211 }
7212 
7213 /**
7214  * Handle CRASH state.  We must crash immediately.
7215  * XXX tell other nodes in our party to crash too.
7216  */
7217 void
stateArbitCrash(Signal * signal)7218 Qmgr::stateArbitCrash(Signal* signal)
7219 {
7220   jam();
7221   if (arbitRec.newstate) {
7222     jam();
7223     CRASH_INSERTION((Uint32)910 + arbitRec.state);
7224     arbitRec.setTimestamp();
7225     arbitRec.code = 0;
7226     arbitRec.newstate = false;
7227   }
7228 #ifdef ndb_arbit_crash_wait_for_event_report_to_get_out
7229   if (! (arbitRec.getTimediff() > getArbitTimeout()))
7230     return;
7231 #endif
7232   CRASH_INSERTION(932);
7233   CRASH_INSERTION(938);
7234   CRASH_INSERTION(943);
7235   CRASH_INSERTION(944);
7236   progError(__LINE__, NDBD_EXIT_ARBIT_SHUTDOWN,
7237             "Arbitrator decided to shutdown this node");
7238 }
7239 
7240 /**
7241  * Arbitrator may inform us that it will exit.  This lets us
7242  * start looking sooner for a new one.  Handle it like API node
7243  * failure.
7244  */
7245 void
execARBIT_STOPREP(Signal * signal)7246 Qmgr::execARBIT_STOPREP(Signal* signal)
7247 {
7248   jamEntry();
7249   ArbitSignalData* sd = (ArbitSignalData*)&signal->theData[0];
7250   if (! arbitRec.match(sd)) {
7251     jam();
7252     return;		// stray signal
7253   }
7254   arbitRec.code = ArbitCode::ApiExit;
7255   handleArbitApiFail(signal, arbitRec.node);
7256 }
7257 
7258 Uint32
count_previously_alive_nodes()7259 Qmgr::count_previously_alive_nodes()
7260 {
7261   /**
7262    * This function is called as part of PREP_FAILCONF handling. This
7263    * means that we are preparing a node failure. This means that
7264    * NDBCNTR have not yet heard about the node failure and thus we
7265    * can still use the method is_node_started to see whether the
7266    * node was fully started before this failure.
7267    *
7268    * This method is called as part of arbitration check. A node is
7269    * only counted as previously alive if the node was fully started.
7270    *
7271    * In addition we check that the node is a data node and that the
7272    * QMGR node state is what we expect it to be if it was previously
7273    * alive.
7274    */
7275   Uint32 count = 0;
7276   NodeRecPtr aPtr;
7277   for (aPtr.i = 1; aPtr.i < MAX_NDB_NODES; aPtr.i++)
7278   {
7279     ptrAss(aPtr, nodeRec);
7280     if (getNodeInfo(aPtr.i).getType() == NodeInfo::DB &&
7281         c_ndbcntr->is_node_started(aPtr.i) &&
7282         (aPtr.p->phase == ZRUNNING || aPtr.p->phase == ZPREPARE_FAIL))
7283     {
7284       jam();
7285       jamLine(Uint16(aPtr.i));
7286       count++;
7287     }
7288   }
7289   return count;
7290 }
7291 
7292 void
computeArbitNdbMask(NodeBitmaskPOD & aMask)7293 Qmgr::computeArbitNdbMask(NodeBitmaskPOD& aMask)
7294 {
7295   NodeRecPtr aPtr;
7296   aMask.clear();
7297   for (aPtr.i = 1; aPtr.i < MAX_NDB_NODES; aPtr.i++) {
7298     jam();
7299     ptrAss(aPtr, nodeRec);
7300     if (getNodeInfo(aPtr.i).getType() == NodeInfo::DB &&
7301         aPtr.p->phase == ZRUNNING)
7302     {
7303       jam();
7304       aMask.set(aPtr.i);
7305     }
7306   }
7307 }
7308 
7309 void
computeArbitNdbMask(NdbNodeBitmaskPOD & aMask)7310 Qmgr::computeArbitNdbMask(NdbNodeBitmaskPOD& aMask)
7311 {
7312   NodeRecPtr aPtr;
7313   aMask.clear();
7314   for (aPtr.i = 1; aPtr.i < MAX_NDB_NODES; aPtr.i++) {
7315     jam();
7316     ptrAss(aPtr, nodeRec);
7317     if (getNodeInfo(aPtr.i).getType() == NodeInfo::DB &&
7318         aPtr.p->phase == ZRUNNING)
7319     {
7320       jam();
7321       aMask.set(aPtr.i);
7322     }
7323   }
7324 }
7325 
7326 /**
7327  * Report arbitration event.  We use arbitration signal format
7328  * where sender (word 0) is event type.
7329  */
7330 void
reportArbitEvent(Signal * signal,Ndb_logevent_type type,const NodeBitmask mask)7331 Qmgr::reportArbitEvent(Signal* signal, Ndb_logevent_type type,
7332                        const NodeBitmask mask)
7333 {
7334   ArbitSignalData* sd = (ArbitSignalData*)&signal->theData[0];
7335   sd->sender = type;
7336   sd->code = arbitRec.code | (arbitRec.state << 16);
7337   sd->node = arbitRec.node;
7338   sd->ticket = arbitRec.ticket;
7339   sd->mask = mask;
7340 
7341   // Log to console/stdout
7342   LogLevel ll;
7343   ll.setLogLevel(LogLevel::llNodeRestart, 15);
7344   g_eventLogger->log(type, &signal->theData[0],
7345                      ArbitSignalData::SignalLength, 0, &ll);
7346 
7347   sendSignal(CMVMI_REF, GSN_EVENT_REP, signal,
7348     ArbitSignalData::SignalLength, JBB);
7349 }
7350 
7351 // end of arbitration module
7352 
7353 void
execDUMP_STATE_ORD(Signal * signal)7354 Qmgr::execDUMP_STATE_ORD(Signal* signal)
7355 {
7356   if (signal->theData[0] == 1)
7357   {
7358     unsigned max_nodes = MAX_NDB_NODES;
7359     if (signal->getLength() == 2)
7360     {
7361       max_nodes = signal->theData[1];
7362       if (max_nodes == 0 || max_nodes >= MAX_NODES)
7363       {
7364         max_nodes = MAX_NODES;
7365       }
7366       else
7367       {
7368         max_nodes++; // Include node id argument in loop
7369       }
7370     }
7371     infoEvent("creadyDistCom = %d, cpresident = %d\n",
7372 	      creadyDistCom, cpresident);
7373     infoEvent("cpresidentAlive = %d, cpresidentCand = %d (gci: %d)\n",
7374               cpresidentAlive,
7375 	      c_start.m_president_candidate,
7376 	      c_start.m_president_candidate_gci);
7377     infoEvent("ctoStatus = %d\n", ctoStatus);
7378     for(Uint32 i = 1; i < max_nodes; i++){
7379       NodeRecPtr nodePtr;
7380       nodePtr.i = i;
7381       ptrCheckGuard(nodePtr, MAX_NODES, nodeRec);
7382       char buf[100];
7383       switch(nodePtr.p->phase){
7384       case ZINIT:
7385         sprintf(buf, "Node %d: ZINIT(%d)", i, nodePtr.p->phase);
7386         break;
7387       case ZSTARTING:
7388         sprintf(buf, "Node %d: ZSTARTING(%d)", i, nodePtr.p->phase);
7389         break;
7390       case ZRUNNING:
7391         sprintf(buf, "Node %d: ZRUNNING(%d)", i, nodePtr.p->phase);
7392         break;
7393       case ZPREPARE_FAIL:
7394         sprintf(buf, "Node %d: ZPREPARE_FAIL(%d)", i, nodePtr.p->phase);
7395         break;
7396       case ZFAIL_CLOSING:
7397         sprintf(buf, "Node %d: ZFAIL_CLOSING(%d)", i, nodePtr.p->phase);
7398         break;
7399       case ZAPI_INACTIVE:
7400         sprintf(buf, "Node %d: ZAPI_INACTIVE(%d)", i, nodePtr.p->phase);
7401         break;
7402       case ZAPI_ACTIVE:
7403         sprintf(buf, "Node %d: ZAPI_ACTIVE(%d)", i, nodePtr.p->phase);
7404         break;
7405       case ZAPI_ACTIVATION_ONGOING:
7406         sprintf(buf, "Node %d: ZAPI_ACTIVATION_ONGOING(%d)",
7407                 i,
7408                 nodePtr.p->phase);
7409         break;
7410       default:
7411         sprintf(buf, "Node %d: <UNKNOWN>(%d)", i, nodePtr.p->phase);
7412         break;
7413       }
7414       infoEvent("%s", buf);
7415     }
7416   }
7417 
7418 #ifdef ERROR_INSERT
7419   if (signal->theData[0] == 935 && signal->getLength() == 2)
7420   {
7421     SET_ERROR_INSERT_VALUE(935);
7422     c_error_insert_extra = signal->theData[1];
7423   }
7424 #endif
7425 
7426   if (signal->theData[0] == 900 && signal->getLength() == 2)
7427   {
7428     ndbout_c("disconnecting %u", signal->theData[1]);
7429     api_failed(signal, signal->theData[1]);
7430   }
7431 
7432   if (signal->theData[0] == 908)
7433   {
7434     int tag = signal->getLength() < 2 ? -1 : signal->theData[1];
7435     char buf[8192];
7436     // for easy grepping in *out.log ...
7437     strcpy(buf, "HB:");
7438     if (tag >= 0)
7439       sprintf(buf+strlen(buf), "%d:", tag);
7440     sprintf(buf+strlen(buf), " pres:%u", cpresident);
7441     sprintf(buf+strlen(buf), " own:%u", getOwnNodeId());
7442     NodeRecPtr myNodePtr;
7443     myNodePtr.i = getOwnNodeId();
7444     ptrCheckGuard(myNodePtr, MAX_NDB_NODES, nodeRec);
7445     sprintf(buf+strlen(buf), " dyn:%u-%u", myNodePtr.p->ndynamicId & 0xFFFF, myNodePtr.p->ndynamicId >> 16);
7446     sprintf(buf+strlen(buf), " mxdyn:%u", c_maxDynamicId);
7447     sprintf(buf+strlen(buf), " hb:%u->%u->%u", cneighbourl, getOwnNodeId(), cneighbourh);
7448     sprintf(buf+strlen(buf), " node:dyn-hi,cfg:");
7449     NodeRecPtr nodePtr;
7450     for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++)
7451     {
7452       ptrAss(nodePtr, nodeRec);
7453       Uint32 type = getNodeInfo(nodePtr.i).m_type;
7454       if (type == NodeInfo::DB)
7455       {
7456         sprintf(buf+strlen(buf), " %u:%u-%u,%u", nodePtr.i, nodePtr.p->ndynamicId & 0xFFFF, nodePtr.p->ndynamicId >> 16, nodePtr.p->hbOrder);
7457       }
7458     }
7459     ndbout << buf << endl;
7460   }
7461 
7462 #ifdef ERROR_INSERT
7463   Uint32 dumpCode = signal->theData[0];
7464   if ((dumpCode == 9992) ||
7465       (dumpCode == 9993))
7466   {
7467     if (signal->getLength() == 2)
7468     {
7469       Uint32 nodeId = signal->theData[1];
7470       Uint32& newNodeId = signal->theData[1];
7471       Uint32 length = 2;
7472       assert(257 > MAX_NODES);
7473       if (nodeId > MAX_NODES)
7474       {
7475         const char* type = "None";
7476         switch (nodeId)
7477         {
7478         case 257:
7479         {
7480           /* Left (lower) neighbour */
7481           newNodeId = cneighbourl;
7482           type = "Left neighbour";
7483           break;
7484         }
7485         case 258:
7486         {
7487           /* Right (higher) neighbour */
7488           newNodeId = cneighbourh;
7489           type = "Right neighbour";
7490           break;
7491         }
7492         case 259:
7493         {
7494           /* President */
7495           newNodeId = cpresident;
7496           type = "President";
7497           break;
7498         }
7499         }
7500         ndbout_c("QMGR : Mapping request on node id %u to node id %u (%s)",
7501                  nodeId, newNodeId, type);
7502         if (newNodeId != nodeId)
7503         {
7504           sendSignal(CMVMI_REF, GSN_DUMP_STATE_ORD, signal, length, JBB);
7505         }
7506       }
7507     }
7508   }
7509 
7510   if (dumpCode == 9994)
7511   {
7512     ndbout_c("setCCDelay(%u)", signal->theData[1]);
7513     setCCDelay(signal->theData[1]);
7514     m_connectivity_check.m_enabled = true;
7515   }
7516 #endif
7517 
7518   if (signal->theData[0] == 939 && signal->getLength() == 2)
7519   {
7520     jam();
7521     Uint32 nodeId = signal->theData[1];
7522     ndbout_c("Force close communication to %u", nodeId);
7523     SET_ERROR_INSERT_VALUE2(939, nodeId);
7524     CloseComReqConf * closeCom = CAST_PTR(CloseComReqConf,
7525                                           signal->getDataPtrSend());
7526 
7527     closeCom->xxxBlockRef = reference();
7528     closeCom->requestType = CloseComReqConf::RT_NO_REPLY;
7529     closeCom->failNo      = 0;
7530     closeCom->noOfNodes   = 1;
7531     closeCom->failedNodeId = nodeId;
7532     sendSignal(TRPMAN_REF, GSN_CLOSE_COMREQ, signal,
7533                CloseComReqConf::SignalLength, JBB);
7534   }
7535 }//Qmgr::execDUMP_STATE_ORD()
7536 
7537 void
execAPI_BROADCAST_REP(Signal * signal)7538 Qmgr::execAPI_BROADCAST_REP(Signal* signal)
7539 {
7540   jamEntry();
7541   ApiBroadcastRep api= *(const ApiBroadcastRep*)signal->getDataPtr();
7542 
7543   SectionHandle handle(this, signal);
7544   Uint32 len = signal->getLength() - ApiBroadcastRep::SignalLength;
7545   memmove(signal->theData, signal->theData+ApiBroadcastRep::SignalLength,
7546 	  4*len);
7547 
7548   NodeBitmask mask;
7549   NodeRecPtr nodePtr;
7550   for (nodePtr.i = 1; nodePtr.i < MAX_NODES; nodePtr.i++)
7551   {
7552     jam();
7553     ptrAss(nodePtr, nodeRec);
7554     if (nodePtr.p->phase == ZAPI_ACTIVE &&
7555 	getNodeInfo(nodePtr.i).m_version >= api.minVersion)
7556     {
7557       jam();
7558       mask.set(nodePtr.i);
7559     }
7560   }
7561 
7562   if (mask.isclear())
7563   {
7564     jam();
7565     releaseSections(handle);
7566     return;
7567   }
7568 
7569   NodeReceiverGroup rg(API_CLUSTERMGR, mask);
7570   sendSignal(rg, api.gsn, signal, len, JBB,
7571 	     &handle);
7572 }
7573 
7574 void
execNODE_FAILREP(Signal * signal)7575 Qmgr::execNODE_FAILREP(Signal * signal)
7576 {
7577   jamEntry();
7578   NodeFailRep* nodeFail = (NodeFailRep*)signal->getDataPtr();
7579   if(signal->getNoOfSections() >= 1)
7580   {
7581     ndbrequire(ndbd_send_node_bitmask_in_section(
7582         getNodeInfo(refToNode(signal->getSendersBlockRef())).m_version));
7583     SegmentedSectionPtr ptr;
7584     SectionHandle handle(this, signal);
7585     handle.getSection(ptr, 0);
7586     memset(nodeFail->theNodes, 0, sizeof(nodeFail->theNodes));
7587     copy(nodeFail->theNodes, ptr);
7588     releaseSections(handle);
7589   }
7590   else
7591   {
7592     memset(nodeFail->theNodes + NdbNodeBitmask48::Size, 0,
7593            _NDB_NBM_DIFF_BYTES);
7594   }
7595 
7596   NdbNodeBitmask allFailed;
7597   allFailed.assign(NdbNodeBitmask::Size, nodeFail->theNodes);
7598 
7599   // make sure any distributed signals get acknowledged
7600   // destructive of the signal
7601   NdbNodeBitmask failedNodes;
7602   failedNodes.assign(NdbNodeBitmask::Size, nodeFail->theNodes);
7603   c_counterMgr.execNODE_FAILREP(signal, failedNodes);
7604   Uint32 nodeId = 0;
7605   while (!allFailed.isclear())
7606   {
7607     nodeId = allFailed.find(nodeId + 1);
7608     //ndbrequire(nodeId != Bitmask::NotFound);
7609     allFailed.clear(nodeId);
7610     NodeRecPtr nodePtr;
7611     nodePtr.i = nodeId;
7612     ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
7613     nodePtr.p->m_is_multi_trp_setup = false;
7614     nodePtr.p->m_is_ready_to_switch_trp = false;
7615     nodePtr.p->m_is_freeze_thread_completed = false;
7616     nodePtr.p->m_is_activate_trp_ready_for_me = false;
7617     nodePtr.p->m_is_activate_trp_ready_for_other = false;
7618     nodePtr.p->m_is_preparing_switch_trp = false;
7619     nodePtr.p->m_is_using_multi_trp = false;
7620     nodePtr.p->m_set_up_multi_trp_started = false;
7621     nodePtr.p->m_multi_trp_blockref = 0;
7622     nodePtr.p->m_used_num_multi_trps = 0;
7623     nodePtr.p->m_check_multi_trp_connect_loop_count = 0;
7624     nodePtr.p->m_num_activated_trps = 0;
7625     if (nodePtr.p->m_is_in_same_nodegroup)
7626     {
7627       jam();
7628       check_no_multi_trp(signal, nodePtr.i);
7629       globalTransporterRegistry.lockMultiTransporters();
7630       bool switch_required = false;
7631       Multi_Transporter *multi_trp =
7632         globalTransporterRegistry.get_node_multi_transporter(nodePtr.i);
7633       if (multi_trp &&
7634           globalTransporterRegistry.get_num_active_transporters(multi_trp) > 1)
7635       {
7636         /**
7637          * The timing of the NODE_FAILREP signal is such that the transporter
7638          * haven't had time to switch the active transporters yet, we know
7639          * this will happen, so we switch now to use the old transporter for
7640          * the neighbour node. The node is currently down, so will have to
7641          * be setup before it can be used again.
7642          *
7643          * We will restore the active transporters to be the multi
7644          * transporters to enable the transporters to be handled by the
7645          * disconnect code. This is why it is required to lock the
7646          * multi transporter mutex while performing this action.
7647          */
7648         switch_required = true;
7649         DEB_MULTI_TRP(("switch_active_trp for node %u's transporter",
7650                        nodePtr.i));
7651         globalTransporterRegistry.switch_active_trp(multi_trp);
7652       }
7653 
7654       DEB_MULTI_TRP(("Change neighbour node setup for node %u",
7655                      nodePtr.i));
7656       startChangeNeighbourNode();
7657       setNeighbourNode(nodePtr.i);
7658       endChangeNeighbourNode();
7659       if (switch_required)
7660       {
7661         globalTransporterRegistry.switch_active_trp(multi_trp);
7662         DEB_MULTI_TRP(("switch_active_trp for node %u's transporter",
7663                        nodePtr.i));
7664       }
7665       globalTransporterRegistry.unlockMultiTransporters();
7666     }
7667   }
7668 }
7669 
7670 void
execALLOC_NODEID_REQ(Signal * signal)7671 Qmgr::execALLOC_NODEID_REQ(Signal * signal)
7672 {
7673   jamEntry();
7674   AllocNodeIdReq req = *(AllocNodeIdReq*)signal->getDataPtr();
7675   Uint32 error = 0;
7676 
7677   NodeRecPtr nodePtr;
7678   nodePtr.i = req.nodeId;
7679   if ((nodePtr.i >= MAX_NODES) ||
7680       ((req.nodeType == NodeInfo::DB) &&
7681        (nodePtr.i >= MAX_NDB_NODES)))
7682   {
7683     /* Ignore messages about nodes not even within range */
7684     jam();
7685     return;
7686   }
7687   ptrAss(nodePtr, nodeRec);
7688 
7689   if (refToBlock(req.senderRef) != QMGR) // request from management server
7690   {
7691     /* master */
7692     Dbdih *dih = (Dbdih*)globalData.getBlock(DBDIH, instance());
7693     bool is_dih_master = dih->is_master();
7694     if (getOwnNodeId() != cpresident || !is_dih_master)
7695     {
7696       jam();
7697       /**
7698        * Either we are not president which leads to that we are not master
7699        * in DIH, or we are president but hasn't yet seen our election to
7700        * master in DIH. Either way we respond with NotMaster, if we are
7701        * president and not master the response will lead to a retry which
7702        * is likely to be successful.
7703        */
7704       if (getOwnNodeId() == cpresident)
7705       {
7706         jam();
7707         g_eventLogger->debug("President, but not master at ALLOC_NODEID_REQ");
7708       }
7709       error = AllocNodeIdRef::NotMaster;
7710     }
7711     else if (!opAllocNodeIdReq.m_tracker.done())
7712     {
7713       jam();
7714       error = AllocNodeIdRef::Busy;
7715     }
7716     else if (c_connectedNodes.get(req.nodeId))
7717     {
7718       jam();
7719       error = AllocNodeIdRef::NodeConnected;
7720     }
7721     else if (nodePtr.p->m_secret != 0)
7722     {
7723       jam();
7724       error = AllocNodeIdRef::NodeReserved;
7725     }
7726     else if (req.nodeType != getNodeInfo(req.nodeId).m_type)
7727     {
7728       jam();
7729       error = AllocNodeIdRef::NodeTypeMismatch;
7730     }
7731     else if (req.nodeType == NodeInfo::API && c_allow_api_connect == 0)
7732     {
7733       jam();
7734       error = AllocNodeIdRef::NotReady;
7735     }
7736 
7737     if (error)
7738     {
7739       jam();
7740       g_eventLogger->debug("Alloc node id for node %u failed, err: %u",
7741                            nodePtr.i,
7742                            error);
7743       AllocNodeIdRef * ref = (AllocNodeIdRef*)signal->getDataPtrSend();
7744       ref->senderRef = reference();
7745       ref->errorCode = error;
7746       ref->masterRef = numberToRef(QMGR, cpresident);
7747       ref->senderData = req.senderData;
7748       ref->nodeId = req.nodeId;
7749       sendSignal(req.senderRef, GSN_ALLOC_NODEID_REF, signal,
7750                  AllocNodeIdRef::SignalLength, JBB);
7751       return;
7752     }
7753 
7754     if (ERROR_INSERTED(934) && req.nodeId != getOwnNodeId())
7755     {
7756       CRASH_INSERTION(934);
7757     }
7758 
7759     /**
7760      * generate secret
7761      */
7762     const NDB_TICKS now = NdbTick_getCurrentTicks();
7763     const Uint32 secret_hi = Uint32(now.getUint64() >> 24);
7764     const Uint32 secret_lo = Uint32(now.getUint64() << 8) + getOwnNodeId();
7765     req.secret_hi = secret_hi;
7766     req.secret_lo = secret_lo;
7767 
7768     if (req.timeout > 60000)
7769       req.timeout = 60000;
7770 
7771     nodePtr.p->m_secret = (Uint64(secret_hi) << 32) + secret_lo;
7772     nodePtr.p->m_alloc_timeout = NdbTick_AddMilliseconds(now,req.timeout);
7773 
7774     opAllocNodeIdReq.m_req = req;
7775     opAllocNodeIdReq.m_error = 0;
7776     opAllocNodeIdReq.m_connectCount =
7777       getNodeInfo(refToNode(req.senderRef)).m_connectCount;
7778 
7779     jam();
7780     AllocNodeIdReq * req2 = (AllocNodeIdReq*)signal->getDataPtrSend();
7781     * req2 = req;
7782     req2->senderRef = reference();
7783     NodeReceiverGroup rg(QMGR, c_clusterNodes);
7784     RequestTracker & p = opAllocNodeIdReq.m_tracker;
7785     p.init<AllocNodeIdRef>(c_counterMgr, rg, GSN_ALLOC_NODEID_REF, 0);
7786 
7787     sendSignal(rg, GSN_ALLOC_NODEID_REQ, signal,
7788                AllocNodeIdReq::SignalLengthQMGR, JBB);
7789     return;
7790   }
7791 
7792   /* participant */
7793   if (c_connectedNodes.get(req.nodeId))
7794   {
7795     jam();
7796     error = AllocNodeIdRef::NodeConnected;
7797   }
7798   else if (req.nodeType != getNodeInfo(req.nodeId).m_type)
7799   {
7800     jam();
7801     error = AllocNodeIdRef::NodeTypeMismatch;
7802   }
7803   else if ((nodePtr.p->failState != NORMAL) ||
7804            ((req.nodeType == NodeInfo::DB) &&
7805             (cfailedNodes.get(nodePtr.i))))
7806   {
7807     /**
7808      * Either the node has committed its node failure in QMGR but not yet
7809      * completed the node internal node failure handling. Or the node
7810      * failure commit process is still ongoing in QMGR. We should not
7811      * allocate a node id in either case.
7812      */
7813     jam();
7814     error = AllocNodeIdRef::NodeFailureHandlingNotCompleted;
7815   }
7816   else if (req.nodeType == NodeInfo::API && nodePtr.p->phase != ZAPI_INACTIVE)
7817   {
7818     jam();
7819     if (cpresident != getOwnNodeId() && c_allow_api_connect == 0)
7820     {
7821       /**
7822        * Don't block during NR
7823        */
7824       jam();
7825     }
7826     else
7827     {
7828       jam();
7829       if (nodePtr.p->phase == ZFAIL_CLOSING)
7830       {
7831         /* Occurs during node startup */
7832         error = AllocNodeIdRef::NodeFailureHandlingNotCompleted;
7833       }
7834       else
7835       {
7836         error = AllocNodeIdRef::NodeReserved;
7837       }
7838     }
7839   }
7840 #if 0
7841   /**
7842    * For now only make "time/secret" based reservation on master
7843    *   as we otherwise also need to clear it on failure + handle
7844    *   master failure
7845    */
7846   else if (nodePtr.p->m_secret != 0)
7847   {
7848     jam();
7849     error = AllocNodeIdRef::NodeReserved;
7850   }
7851 #endif
7852 
7853   if (error)
7854   {
7855     jam();
7856     g_eventLogger->info("Alloc nodeid for node %u failed,err: %u",
7857                         req.nodeId,
7858                         error);
7859     AllocNodeIdRef * ref = (AllocNodeIdRef*)signal->getDataPtrSend();
7860     ref->senderRef = reference();
7861     ref->errorCode = error;
7862     ref->senderData = req.senderData;
7863     ref->nodeId = req.nodeId;
7864     ref->masterRef = numberToRef(QMGR, cpresident);
7865     sendSignal(req.senderRef, GSN_ALLOC_NODEID_REF, signal,
7866                AllocNodeIdRef::SignalLength, JBB);
7867     return;
7868   }
7869 
7870   AllocNodeIdConf * conf = (AllocNodeIdConf*)signal->getDataPtrSend();
7871   conf->senderRef = reference();
7872   conf->secret_hi = req.secret_hi;
7873   conf->secret_lo = req.secret_lo;
7874   sendSignal(req.senderRef, GSN_ALLOC_NODEID_CONF, signal,
7875              AllocNodeIdConf::SignalLength, JBB);
7876 }
7877 
7878 void
execALLOC_NODEID_CONF(Signal * signal)7879 Qmgr::execALLOC_NODEID_CONF(Signal * signal)
7880 {
7881   /* master */
7882 
7883   jamEntry();
7884   const AllocNodeIdConf * conf = (AllocNodeIdConf*)signal->getDataPtr();
7885   opAllocNodeIdReq.m_tracker.reportConf(c_counterMgr,
7886                                         refToNode(conf->senderRef));
7887 
7888   if (signal->getLength() >= AllocNodeIdConf::SignalLength)
7889   {
7890     jam();
7891     if (opAllocNodeIdReq.m_req.secret_hi != conf->secret_hi ||
7892         opAllocNodeIdReq.m_req.secret_lo != conf->secret_lo)
7893     {
7894       jam();
7895       if (opAllocNodeIdReq.m_error == 0)
7896       {
7897         jam();
7898         opAllocNodeIdReq.m_error = AllocNodeIdRef::Undefined;
7899       }
7900     }
7901   }
7902 
7903   completeAllocNodeIdReq(signal);
7904 }
7905 
7906 
7907 void
execALLOC_NODEID_REF(Signal * signal)7908 Qmgr::execALLOC_NODEID_REF(Signal * signal)
7909 {
7910   /* master */
7911 
7912   jamEntry();
7913   const AllocNodeIdRef * ref = (AllocNodeIdRef*)signal->getDataPtr();
7914 
7915   if (ref->errorCode == AllocNodeIdRef::NF_FakeErrorREF)
7916   {
7917     jam();
7918     if (ref->nodeId == refToNode(ref->senderRef))
7919     {
7920       /**
7921        * The node id we are trying to allocate has responded with a REF,
7922        * this was sent in response to a node failure, so we are most
7923        * likely not ready to allocate this node id yet. Report node
7924        * failure handling not ready yet.
7925        */
7926       jam();
7927       opAllocNodeIdReq.m_tracker.reportRef(c_counterMgr,
7928                                            refToNode(ref->senderRef));
7929       if (opAllocNodeIdReq.m_error == 0)
7930       {
7931         jam();
7932         opAllocNodeIdReq.m_error =
7933           AllocNodeIdRef::NodeFailureHandlingNotCompleted;
7934       }
7935     }
7936     else
7937     {
7938       jam();
7939       opAllocNodeIdReq.m_tracker.ignoreRef(c_counterMgr,
7940                                            refToNode(ref->senderRef));
7941     }
7942   }
7943   else
7944   {
7945     jam();
7946     opAllocNodeIdReq.m_tracker.reportRef(c_counterMgr,
7947                                          refToNode(ref->senderRef));
7948     if (opAllocNodeIdReq.m_error == 0)
7949     {
7950       jam();
7951       opAllocNodeIdReq.m_error = ref->errorCode;
7952     }
7953   }
7954   completeAllocNodeIdReq(signal);
7955 }
7956 
7957 void
completeAllocNodeIdReq(Signal * signal)7958 Qmgr::completeAllocNodeIdReq(Signal *signal)
7959 {
7960   /* master */
7961 
7962   if (!opAllocNodeIdReq.m_tracker.done())
7963   {
7964     jam();
7965     return;
7966   }
7967 
7968   if (opAllocNodeIdReq.m_connectCount !=
7969       getNodeInfo(refToNode(opAllocNodeIdReq.m_req.senderRef)).m_connectCount)
7970   {
7971     // management server not same version as the original requester
7972     jam();
7973     return;
7974   }
7975 
7976   if (opAllocNodeIdReq.m_tracker.hasRef())
7977   {
7978     jam();
7979 
7980     {
7981       /**
7982        * Clear reservation
7983        */
7984       NodeRecPtr nodePtr;
7985       nodePtr.i = opAllocNodeIdReq.m_req.nodeId;
7986       ptrAss(nodePtr, nodeRec);
7987       nodePtr.p->m_secret = 0;
7988     }
7989     g_eventLogger->info("Alloc node id for node %u failed, err: %u",
7990                         opAllocNodeIdReq.m_req.nodeId,
7991                         opAllocNodeIdReq.m_error);
7992 
7993     AllocNodeIdRef * ref = (AllocNodeIdRef*)signal->getDataPtrSend();
7994     ref->senderRef = reference();
7995     ref->senderData = opAllocNodeIdReq.m_req.senderData;
7996     ref->nodeId = opAllocNodeIdReq.m_req.nodeId;
7997     ref->errorCode = opAllocNodeIdReq.m_error;
7998     ref->masterRef = numberToRef(QMGR, cpresident);
7999     ndbassert(AllocNodeIdRef::SignalLength == 5);
8000     sendSignal(opAllocNodeIdReq.m_req.senderRef, GSN_ALLOC_NODEID_REF, signal,
8001                AllocNodeIdRef::SignalLength, JBB);
8002     return;
8003   }
8004 
8005   jam();
8006 
8007   g_eventLogger->info("Alloc node id for node %u succeeded",
8008                       opAllocNodeIdReq.m_req.nodeId);
8009   AllocNodeIdConf * conf = (AllocNodeIdConf*)signal->getDataPtrSend();
8010   conf->senderRef = reference();
8011   conf->senderData = opAllocNodeIdReq.m_req.senderData;
8012   conf->nodeId = opAllocNodeIdReq.m_req.nodeId;
8013   conf->secret_lo = opAllocNodeIdReq.m_req.secret_lo;
8014   conf->secret_hi = opAllocNodeIdReq.m_req.secret_hi;
8015   sendSignal(opAllocNodeIdReq.m_req.senderRef, GSN_ALLOC_NODEID_CONF, signal,
8016              AllocNodeIdConf::SignalLength, JBB);
8017 
8018   /**
8019    * We are the master and master DIH wants to keep track of node restart
8020    * state to be able to control LCP start and stop and also to be able
8021    * to easily report this state to the user when he asks for it.
8022    */
8023   AllocNodeIdRep *rep = (AllocNodeIdRep*)signal->getDataPtrSend();
8024   rep->nodeId = opAllocNodeIdReq.m_req.nodeId;
8025   EXECUTE_DIRECT(DBDIH, GSN_ALLOC_NODEID_REP, signal,
8026 		 AllocNodeIdRep::SignalLength);
8027 }
8028 
8029 void
execSTOP_REQ(Signal * signal)8030 Qmgr::execSTOP_REQ(Signal* signal)
8031 {
8032   jamEntry();
8033 
8034   const StopReq* req = (const StopReq*)signal->getDataPtr();
8035   c_stopReq.senderRef = req->senderRef;
8036   c_stopReq.senderData = req->senderData;
8037   c_stopReq.requestInfo = req->requestInfo;
8038   c_stopReq.nodes.clear();
8039   if (signal->getNoOfSections() >= 1)
8040   {
8041     jam();
8042     SectionHandle handle(this, signal);
8043     SegmentedSectionPtr ptr;
8044     handle.getSection(ptr, 0);
8045     ndbrequire(ptr.sz <= NdbNodeBitmask::Size);
8046     copy(c_stopReq.nodes.rep.data, ptr);
8047     releaseSections(handle);
8048   }
8049   else
8050   {
8051     jam();
8052     c_stopReq.nodes.assign(NdbNodeBitmask48::Size, req->nodes);
8053   }
8054 
8055   if (c_stopReq.senderRef)
8056   {
8057     jam();
8058     ndbrequire(c_stopReq.nodes.get(getOwnNodeId()));
8059 
8060     StopConf *conf = (StopConf*)signal->getDataPtrSend();
8061     conf->senderData = c_stopReq.senderData;
8062     conf->nodeState = getOwnNodeId();
8063     sendSignal(c_stopReq.senderRef,
8064 	       GSN_STOP_CONF, signal, StopConf::SignalLength, JBA);
8065   }
8066 }
8067 
8068 bool
check_multi_node_shutdown(Signal * signal)8069 Qmgr::check_multi_node_shutdown(Signal* signal)
8070 {
8071   if (c_stopReq.senderRef &&
8072       c_stopReq.nodes.get(getOwnNodeId()))
8073   {
8074     jam();
8075     if(StopReq::getPerformRestart(c_stopReq.requestInfo))
8076     {
8077       jam();
8078       StartOrd * startOrd = (StartOrd *)&signal->theData[0];
8079       startOrd->restartInfo = c_stopReq.requestInfo;
8080       sendSignal(CMVMI_REF, GSN_START_ORD, signal, 2, JBA);
8081     } else {
8082       sendSignal(CMVMI_REF, GSN_STOP_ORD, signal, 1, JBA);
8083     }
8084     return true;
8085   }
8086   return false;
8087 }
8088 
8089 int
check_hb_order_config()8090 Qmgr::check_hb_order_config()
8091 {
8092   m_hb_order_config_used = false;
8093   Uint32 count = 0;
8094   Uint32 count_zero = 0;
8095   NodeRecPtr nodePtr;
8096   for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++)
8097   {
8098     ptrAss(nodePtr, nodeRec);
8099     const NodeInfo& nodeInfo = getNodeInfo(nodePtr.i);
8100     if (nodeInfo.m_type == NodeInfo::DB)
8101     {
8102       count++;
8103       if (nodePtr.p->hbOrder == 0)
8104         count_zero++;
8105     }
8106   }
8107   ndbrequire(count != 0); // must have node info
8108   if (count_zero == count)
8109   {
8110     jam();
8111     return 0; // no hbOrder defined
8112   }
8113   if (count_zero != 0)
8114   {
8115     jam();
8116     return -1; // error: not all zero or all nonzero
8117   }
8118   for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++)
8119   {
8120     ptrAss(nodePtr, nodeRec);
8121     const NodeInfo& nodeInfo = getNodeInfo(nodePtr.i);
8122     if (nodeInfo.m_type == NodeInfo::DB)
8123     {
8124       NodeRecPtr nodePtr2;
8125       for (nodePtr2.i = nodePtr.i + 1; nodePtr2.i < MAX_NDB_NODES; nodePtr2.i++)
8126       {
8127         ptrAss(nodePtr2, nodeRec);
8128         const NodeInfo& nodeInfo2 = getNodeInfo(nodePtr2.i);
8129         if (nodeInfo2.m_type == NodeInfo::DB)
8130         {
8131           if (nodePtr.i != nodePtr2.i &&
8132               nodePtr.p->hbOrder == nodePtr2.p->hbOrder)
8133           {
8134             jam();
8135             return -2; // error: duplicate nonzero value
8136           }
8137         }
8138       }
8139     }
8140   }
8141   m_hb_order_config_used = true;
8142   return 0;
8143 }
8144 
8145 static const Uint32 CC_SuspectTicks = 1;
8146 static const Uint32 CC_FailedTicks = 2;
8147 
8148 void
startConnectivityCheck(Signal * signal,Uint32 reason,Uint32 causingNode)8149 Qmgr::startConnectivityCheck(Signal* signal, Uint32 reason, Uint32 causingNode)
8150 {
8151   jam();
8152   ndbrequire(m_connectivity_check.getEnabled());
8153 
8154   if (m_connectivity_check.m_active)
8155   {
8156     jam();
8157     /* Connectivity check underway already
8158      * do nothing
8159      */
8160     return;
8161   }
8162 
8163 
8164   m_connectivity_check.m_nodesPinged.clear();
8165 
8166   /* Send NODE_PINGREQ signal to all other running nodes, and
8167    * initialise connectivity check bitmasks.
8168    * Note that nodes may already be considered suspect due to
8169    * a previous connectivity check round.
8170    */
8171   Uint32 ownId = getOwnNodeId();
8172   NodePingReq* pingReq = CAST_PTR(NodePingReq, &signal->theData[0]);
8173   pingReq->senderData = ++m_connectivity_check.m_currentRound;
8174   pingReq->senderRef = reference();
8175 
8176   for (Uint32 i=1; i < MAX_NDB_NODES; i++)
8177   {
8178     if (i != ownId)
8179     {
8180       NodeRec& node = nodeRec[i];
8181       if (node.phase == ZRUNNING)
8182       {
8183         /* If connection was considered ok, treat as unknown,
8184          * If it was considered slow, continue to treat
8185          *   as slow
8186          */
8187         sendSignal(node.blockRef,
8188                    GSN_NODE_PING_REQ,
8189                    signal,
8190                    NodePingReq::SignalLength,
8191                    JBA);
8192 
8193         m_connectivity_check.m_nodesPinged.set(i);
8194       }
8195     }
8196   }
8197 
8198   /* Initialise result bitmasks */
8199   m_connectivity_check.m_nodesWaiting.assign(m_connectivity_check.m_nodesPinged);
8200   m_connectivity_check.m_nodesFailedDuring.clear();
8201 
8202   /* Ensure only live nodes are considered suspect */
8203   m_connectivity_check.m_nodesSuspect.bitAND(m_connectivity_check.m_nodesPinged);
8204 
8205   const char* reasonText = "Unknown";
8206   bool firstTime = true;
8207 
8208   switch(reason)
8209   {
8210   case FailRep::ZHEARTBEAT_FAILURE:
8211     reasonText = "Heartbeat failure";
8212     break;
8213   case FailRep::ZCONNECT_CHECK_FAILURE:
8214     reasonText = "Connectivity check request";
8215     break;
8216   default:
8217     firstTime = false;
8218     ndbrequire(m_connectivity_check.m_nodesSuspect.count() > 0);
8219     break;
8220   }
8221 
8222   if (!m_connectivity_check.m_nodesPinged.isclear())
8223   {
8224     jam();
8225     {
8226       char buff[NdbNodeBitmask::TextLength + 1];
8227       m_connectivity_check.m_nodesPinged.getText(buff);
8228       if (firstTime)
8229       {
8230         g_eventLogger->info("QMGR : Starting connectivity check of %u other nodes (%s) due to %s from node %u.",
8231                             m_connectivity_check.m_nodesPinged.count(),
8232                             buff,
8233                             reasonText,
8234                             causingNode);
8235       }
8236       else
8237       {
8238         char buff2[NdbNodeBitmask::TextLength + 1];
8239         m_connectivity_check.m_nodesSuspect.getText(buff2);
8240         g_eventLogger->info("QMGR : Restarting connectivity check of %u other nodes (%s) due to %u syspect nodes (%s)",
8241                             m_connectivity_check.m_nodesPinged.count(),
8242                             buff,
8243                             m_connectivity_check.m_nodesSuspect.count(),
8244                             buff2);
8245       }
8246     }
8247 
8248     /* Generate cluster log event */
8249     Uint32 bitmaskSz = NdbNodeBitmask::Size;
8250     signal->theData[0] = NDB_LE_ConnectCheckStarted;
8251     signal->theData[1] = m_connectivity_check.m_nodesPinged.count();
8252     signal->theData[2] = reason;
8253     signal->theData[3] = causingNode;
8254     signal->theData[4] = bitmaskSz;
8255     Uint32* sigPtr = &signal->theData[5];
8256     m_connectivity_check.m_nodesPinged.copyto(bitmaskSz, sigPtr); sigPtr+= bitmaskSz;
8257     m_connectivity_check.m_nodesSuspect.copyto(bitmaskSz, sigPtr);
8258 
8259     LinearSectionPtr lsptr[3];
8260     lsptr[0].p = signal->theData;
8261     lsptr[0].sz = 5 + 2 * NdbNodeBitmask::Size;
8262     sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 1, JBB, lsptr, 1);
8263 
8264     m_connectivity_check.m_active = true;
8265     m_connectivity_check.m_tick = 0;
8266     const NDB_TICKS now = NdbTick_getCurrentTicks();
8267     m_connectivity_check.m_timer.reset(now);
8268   }
8269   else
8270   {
8271     g_eventLogger->info("QMGR : Connectivity check requested due to %s (from %u) not started as no other running nodes.",
8272                         reasonText,
8273                         causingNode);
8274   }
8275 }
8276 
8277 void
execNODE_PINGREQ(Signal * signal)8278 Qmgr::execNODE_PINGREQ(Signal* signal)
8279 {
8280   jamEntry();
8281   Uint32 ownId = getOwnNodeId();
8282   const NodePingReq* pingReq = CAST_CONSTPTR(NodePingReq, &signal->theData[0]);
8283   Uint32 sendersRef = signal->getSendersBlockRef();
8284   Uint32 sendersNodeId = refToNode(sendersRef);
8285   Uint32 senderData = pingReq->senderData;
8286 
8287   ndbrequire(sendersNodeId != ownId);
8288 
8289   /* We will start our own connectivity check if necessary
8290    * before responding with PING_CONF to the requestor.
8291    * This means that the sending node will receive our PING_REQ
8292    * before our PING_CONF, which should avoid them starting an
8293    * unnecessary extra connectivity check round in some cases.
8294    */
8295   if (likely(m_connectivity_check.getEnabled()))
8296   {
8297     jam();
8298     /* We have connectivity checking configured */
8299     if (! m_connectivity_check.m_active)
8300     {
8301       jam();
8302 
8303       {
8304         /* Don't start a new connectivity check if the requesting
8305          * node has failed from our point of view
8306          */
8307         NodeRecPtr nodePtr;
8308         nodePtr.i = sendersNodeId;
8309         ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
8310         if (unlikely(nodePtr.p->phase != ZRUNNING))
8311         {
8312           jam();
8313 
8314           g_eventLogger->warning("QMGR : Discarding NODE_PINGREQ from non-running node %u (%u)",
8315                                  sendersNodeId, nodePtr.p->phase);
8316           return;
8317         }
8318       }
8319 
8320       /* Start our own Connectivity Check now indicating reason and causing node */
8321       startConnectivityCheck(signal, FailRep::ZCONNECT_CHECK_FAILURE, sendersNodeId);
8322     }
8323   }
8324   else
8325   {
8326     jam();
8327     g_eventLogger->warning("QMGR : NODE_PINGREQ received from node %u, but connectivity "
8328                            "checking not configured on this node.  Ensure all "
8329                            "nodes have the same configuration for parameter "
8330                            "ConnectCheckIntervalMillis.",
8331                            sendersNodeId);
8332   }
8333 
8334   /* Now respond with NODE_PINGCONF */
8335   NodePingConf* pingConf = CAST_PTR(NodePingConf, &signal->theData[0]);
8336 
8337   pingConf->senderData = senderData;
8338   pingConf->senderRef = reference();
8339 
8340   sendSignal(sendersRef,
8341              GSN_NODE_PING_CONF,
8342              signal,
8343              NodePingConf::SignalLength,
8344              JBA);
8345 }
8346 
8347 void
reportNodeConnect(Uint32 nodeId)8348 Qmgr::ConnectCheckRec::reportNodeConnect(Uint32 nodeId)
8349 {
8350   /* Clear any suspicion */
8351   m_nodesSuspect.clear(nodeId);
8352 }
8353 
8354 bool
reportNodeFailure(Uint32 nodeId)8355 Qmgr::ConnectCheckRec::reportNodeFailure(Uint32 nodeId)
8356 {
8357   if (unlikely(m_active))
8358   {
8359     m_nodesFailedDuring.set(nodeId);
8360 
8361     if (m_nodesWaiting.get(nodeId))
8362     {
8363       /* We were waiting for a NODE_PING_CONF from this node,
8364        * remove it from the set
8365        */
8366       m_nodesWaiting.clear(nodeId);
8367 
8368       return m_nodesWaiting.isclear();
8369     }
8370   }
8371   return false;
8372 }
8373 
8374 void
execNODE_PINGCONF(Signal * signal)8375 Qmgr::execNODE_PINGCONF(Signal* signal)
8376 {
8377   jamEntry();
8378 
8379   ndbrequire(m_connectivity_check.getEnabled());
8380 
8381   const NodePingConf* pingConf = CAST_CONSTPTR(NodePingConf, &signal->theData[0]);
8382   Uint32 sendersBlockRef = signal->getSendersBlockRef();
8383   Uint32 sendersNodeId = refToNode(sendersBlockRef);
8384   Uint32 roundNumber = pingConf->senderData;
8385 
8386   ndbrequire(sendersNodeId != getOwnNodeId());
8387   ndbrequire((m_connectivity_check.m_active)                                || /* Normal */
8388              (m_connectivity_check.m_nodesWaiting.get(sendersNodeId)          || /* We killed last round */
8389               m_connectivity_check.m_nodesFailedDuring.get(sendersNodeId)));     /* Someone killed */
8390 
8391   if (unlikely((! m_connectivity_check.m_active) ||
8392                (roundNumber != m_connectivity_check.m_currentRound)))
8393   {
8394     g_eventLogger->warning("QMGR : Received NODEPING_CONF from node %u for round %u, "
8395                            "but we are %sactive on round %u.  Discarding.",
8396                            sendersNodeId,
8397                            roundNumber,
8398                            ((m_connectivity_check.m_active)?"":"in"),
8399                            m_connectivity_check.m_currentRound);
8400     return;
8401   }
8402 
8403   if (ERROR_INSERTED(938))
8404   {
8405     ndbout_c("QMGR : execNODE_PING_CONF() from %u in tick %u",
8406              sendersNodeId, m_connectivity_check.m_tick);
8407   }
8408 
8409   /* Node must have been pinged, we must be waiting for the response,
8410    * or the node must have already failed
8411    */
8412   ndbrequire(m_connectivity_check.m_nodesPinged.get(sendersNodeId));
8413   ndbrequire(m_connectivity_check.m_nodesWaiting.get(sendersNodeId) ||
8414              m_connectivity_check.m_nodesFailedDuring.get(sendersNodeId));
8415 
8416   m_connectivity_check.m_nodesWaiting.clear(sendersNodeId);
8417 
8418   if (likely(m_connectivity_check.m_tick < CC_SuspectTicks))
8419   {
8420     jam();
8421     /* Node responded on time, clear any suspicion about it */
8422     m_connectivity_check.m_nodesSuspect.clear(sendersNodeId);
8423   }
8424 
8425   if (m_connectivity_check.m_nodesWaiting.isclear())
8426   {
8427     jam();
8428     /* Connectivity check round is now finished */
8429     connectivityCheckCompleted(signal);
8430   }
8431 }
8432 
8433 void
connectivityCheckCompleted(Signal * signal)8434 Qmgr::connectivityCheckCompleted(Signal* signal)
8435 {
8436   jam();
8437 
8438   m_connectivity_check.m_active = false;
8439 
8440   /* Log the following :
8441    * Nodes checked
8442    * Nodes responded ok
8443    * Nodes responded late (now suspect)
8444    * Nodes failed to respond.
8445    * Nodes failed during
8446    */
8447   char pinged[NdbNodeBitmask::TextLength + 1];
8448   char late[NdbNodeBitmask::TextLength + 1];
8449   char silent[NdbNodeBitmask::TextLength + 1];
8450   char failed[NdbNodeBitmask::TextLength + 1];
8451 
8452   /* Any 'waiting' nodes have been killed
8453    * Surviving suspects do not include them.
8454    */
8455   NdbNodeBitmask survivingSuspects(m_connectivity_check.m_nodesSuspect);
8456   survivingSuspects.bitANDC(m_connectivity_check.m_nodesWaiting);
8457 
8458   /* Nodes that failed during the check are also excluded */
8459   survivingSuspects.bitANDC(m_connectivity_check.m_nodesFailedDuring);
8460 
8461   m_connectivity_check.m_nodesPinged.getText(pinged);
8462   survivingSuspects.getText(late);
8463   m_connectivity_check.m_nodesWaiting.getText(silent);
8464   m_connectivity_check.m_nodesFailedDuring.getText(failed);
8465 
8466   g_eventLogger->info("QMGR : Connectivity check completed, "
8467                       "%u other nodes checked (%s), "
8468                       "%u responded on time, "
8469                       "%u responded late (%s), "
8470                       "%u no response will be failed (%s), "
8471                       "%u failed during check (%s)\n",
8472                       m_connectivity_check.m_nodesPinged.count(),
8473                       pinged,
8474                       m_connectivity_check.m_nodesPinged.count() -
8475                       m_connectivity_check.m_nodesSuspect.count(),
8476                       survivingSuspects.count(),
8477                       late,
8478                       m_connectivity_check.m_nodesWaiting.count(),
8479                       silent,
8480                       m_connectivity_check.m_nodesFailedDuring.count(),
8481                       failed);
8482 
8483   /* Log in Cluster log */
8484   signal->theData[0] = NDB_LE_ConnectCheckCompleted;
8485   signal->theData[1] = m_connectivity_check.m_nodesPinged.count();
8486   signal->theData[2] = survivingSuspects.count();
8487   signal->theData[3] = m_connectivity_check.m_nodesWaiting.count() +
8488     m_connectivity_check.m_nodesFailedDuring.count();
8489 
8490   sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 4, JBB);
8491 
8492   if (survivingSuspects.count() > 0)
8493   {
8494     jam();
8495     /* Still suspect nodes, start another round */
8496     g_eventLogger->info("QMGR : Starting new connectivity check due to suspect nodes.");
8497     /* Restart connectivity check, no external reason or cause */
8498     startConnectivityCheck(signal, 0, 0);
8499   }
8500   else
8501   {
8502     jam();
8503     /* No suspect nodes, stop the protocol now */
8504 
8505     g_eventLogger->info("QMGR : All other nodes (%u) connectivity ok.",
8506                         m_connectivity_check.m_nodesPinged.count() -
8507                         (m_connectivity_check.m_nodesWaiting.count() +
8508                          m_connectivity_check.m_nodesFailedDuring.count()));
8509 
8510     /* Send a heartbeat to our right neighbour at this point as a gesture
8511      * of goodwill
8512      */
8513     sendHeartbeat(signal);
8514     hb_send_timer.reset(NdbTick_getCurrentTicks());
8515   };
8516 }
8517 
8518 void
checkConnectivityTimeSignal(Signal * signal)8519 Qmgr::checkConnectivityTimeSignal(Signal* signal)
8520 {
8521   /* Executed periodically when a connectivity check is
8522    * underway.
8523    * After CC_SuspectTicks have elapsed, any nodes
8524    * which have not responded are considered
8525    * 'Suspect'.
8526    * After CC_FailedTicks have elapsed, any nodes
8527    * which have not responded are considered
8528    * to have failed, and failure handling
8529    * begins.
8530    */
8531   jam();
8532 
8533   /* Preconditions, otherwise we shouldn't have been called */
8534   ndbrequire(m_connectivity_check.getEnabled());
8535   ndbrequire(m_connectivity_check.m_active);
8536   ndbrequire(!m_connectivity_check.m_nodesWaiting.isclear());
8537 
8538   m_connectivity_check.m_tick++;
8539 
8540   switch (m_connectivity_check.m_tick)
8541   {
8542   case CC_SuspectTicks:
8543   {
8544     jam();
8545     /* Still waiting to hear from some nodes, they are now
8546      * suspect
8547      */
8548     m_connectivity_check.m_nodesSuspect.bitOR(m_connectivity_check.m_nodesWaiting);
8549     return;
8550   }
8551   case CC_FailedTicks:
8552   {
8553     jam();
8554     /* Still waiting to hear from some nodes, they will now
8555      * be failed
8556      */
8557     m_connectivity_check.m_active = false;
8558     Uint32 nodeId = 0;
8559 
8560     while ((nodeId = m_connectivity_check.m_nodesWaiting.find(nodeId))
8561            != BitmaskImpl::NotFound)
8562     {
8563       jam();
8564       /* Log failure reason */
8565       /* Todo : Connectivity Check specific failure log? */
8566       signal->theData[0] = NDB_LE_DeadDueToHeartbeat;
8567       signal->theData[1] = nodeId;
8568 
8569       sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB);
8570 
8571       /* Fail the node */
8572       /* TODO : Consider real time break here */
8573       failReportLab(signal, nodeId, FailRep::ZCONNECT_CHECK_FAILURE, getOwnNodeId());
8574       nodeId++;
8575     }
8576 
8577     /* Now handle the end of the Connectivity Check */
8578     connectivityCheckCompleted(signal);
8579   }
8580   }
8581 }
8582 
8583 bool
isNodeConnectivitySuspect(Uint32 nodeId) const8584 Qmgr::isNodeConnectivitySuspect(Uint32 nodeId) const
8585 {
8586   return m_connectivity_check.m_nodesSuspect.get(nodeId);
8587 }
8588 
8589 void
handleFailFromSuspect(Signal * signal,Uint32 reason,Uint16 aFailedNode,Uint16 sourceNode)8590 Qmgr::handleFailFromSuspect(Signal* signal,
8591                             Uint32 reason,
8592                             Uint16 aFailedNode,
8593                             Uint16 sourceNode)
8594 {
8595   jam();
8596 
8597   const char* reasonText = "Unknown";
8598 
8599   /* We have received a failure report about some node X from
8600    * some other node that we consider to have suspect connectivity
8601    * which may have caused the report.
8602    *
8603    * We will 'invert' the sense of this, and handle it as
8604    * a failure report of the sender, with the same cause.
8605    */
8606   switch(reason)
8607   {
8608   case FailRep::ZCONNECT_CHECK_FAILURE:
8609     jam();
8610     /* Suspect says that connectivity check failed for another node.
8611      * As suspect has bad connectivity from our point of view, we
8612      * blame him.
8613      */
8614     reasonText = "ZCONNECT_CHECK_FAILURE";
8615     break;
8616   case FailRep::ZLINK_FAILURE:
8617     jam();
8618     /* Suspect says that link failed for another node.
8619      * As suspect has bad connectivity from our point of view, we
8620      * blame her.
8621      */
8622     reasonText = "ZLINK_FAILURE";
8623     break;
8624   default:
8625     ndbabort();
8626   }
8627 
8628   g_eventLogger->warning("QMGR : Received Connectivity failure notification about "
8629                          "%u from suspect node %u with reason %s.  "
8630                          "Mapping to failure of %u sourced by me.",
8631                          aFailedNode, sourceNode, reasonText, sourceNode);
8632 
8633   signal->theData[0] = NDB_LE_NodeFailRejected;
8634   signal->theData[1] = reason;
8635   signal->theData[2] = aFailedNode;
8636   signal->theData[3] = sourceNode;
8637 
8638   sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 4, JBB);
8639 
8640   failReportLab(signal, sourceNode, (FailRep::FailCause) reason, getOwnNodeId());
8641 }
8642 
8643 ProcessInfo *
getProcessInfo(Uint32 nodeId)8644 Qmgr::getProcessInfo(Uint32 nodeId)
8645 {
8646   ProcessInfo * storedProcessInfo = 0;
8647   Int16 index = processInfoNodeIndex[nodeId];
8648   if(index >= 0)
8649     storedProcessInfo = & receivedProcessInfo[index];
8650   else if(nodeId == getOwnNodeId())
8651     storedProcessInfo = getOwnProcessInfo(getOwnNodeId());
8652   return storedProcessInfo;
8653 }
8654 
8655 void
execDBINFO_SCANREQ(Signal * signal)8656 Qmgr::execDBINFO_SCANREQ(Signal *signal)
8657 {
8658   DbinfoScanReq req= *(DbinfoScanReq*)signal->theData;
8659   Ndbinfo::Ratelimit rl;
8660 
8661   jamEntry();
8662   switch(req.tableId) {
8663   case Ndbinfo::MEMBERSHIP_TABLEID:
8664   {
8665     jam();
8666     Ndbinfo::Row row(signal, req);
8667     row.write_uint32(getOwnNodeId());
8668     row.write_uint32(getNodeState().nodeGroup);
8669     row.write_uint32(cneighbourl);
8670     row.write_uint32(cneighbourh);
8671     row.write_uint32(cpresident);
8672 
8673     // President successor
8674     Uint32 successor = 0;
8675     {
8676       NodeRecPtr nodePtr;
8677       UintR minDynamicId = (UintR)-1;
8678       for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++)
8679       {
8680         jam();
8681         ptrAss(nodePtr, nodeRec);
8682         if (nodePtr.p->phase == ZRUNNING)
8683         {
8684           if ((nodePtr.p->ndynamicId & 0xFFFF) < minDynamicId)
8685           {
8686             jam();
8687             if (cpresident !=  nodePtr.i)
8688             {
8689               minDynamicId = (nodePtr.p->ndynamicId & 0xFFFF);
8690               successor = nodePtr.i;
8691             }
8692           }
8693         }
8694       }
8695     }
8696     row.write_uint32(successor);
8697 
8698     NodeRecPtr myNodePtr;
8699     myNodePtr.i = getOwnNodeId();
8700     ptrCheckGuard(myNodePtr, MAX_NDB_NODES, nodeRec);
8701     row.write_uint32(myNodePtr.p->ndynamicId);
8702 
8703     row.write_uint32(arbitRec.node); // arbitrator
8704 
8705     char ticket[20]; // Need 16 characters + 1 for trailing '\0'
8706     arbitRec.ticket.getText(ticket, sizeof(ticket));
8707     row.write_string(ticket);
8708 
8709     row.write_uint32(arbitRec.state);
8710 
8711     // arbitrator connected
8712     row.write_uint32(c_connectedNodes.get(arbitRec.node));
8713 
8714     // Find potential (rank1 and rank2) arbitrators that are connected.
8715     NodeRecPtr aPtr;
8716     // buf_size: Node nr (max 3 chars) and ', '  + trailing '\0'
8717     const int buf_size = 5 * MAX_NODES + 1;
8718     char buf[buf_size];
8719 
8720     for (unsigned rank = 1; rank <= 2; rank++)
8721     {
8722       jam();
8723       aPtr.i = 0;
8724       const unsigned stop = NodeBitmask::NotFound;
8725       int buf_offset = 0;
8726       const char* delimiter = "";
8727 
8728       while ((aPtr.i = arbitRec.apiMask[rank].find(aPtr.i + 1)) != stop)
8729       {
8730         jam();
8731         ptrAss(aPtr, nodeRec);
8732         if (c_connectedNodes.get(aPtr.i))
8733         {
8734           buf_offset += BaseString::snprintf(buf + buf_offset,
8735                                              buf_size - buf_offset,
8736                                              "%s%u", delimiter, aPtr.i);
8737           delimiter = ", ";
8738         }
8739       }
8740 
8741       if (buf_offset == 0)
8742         row.write_string("-");
8743       else
8744         row.write_string(buf);
8745     }
8746 
8747     ndbinfo_send_row(signal, req, row, rl);
8748     break;
8749   }
8750   case Ndbinfo::PROCESSES_TABLEID:
8751   {
8752     jam();
8753     for(int i = 1 ; i <= max_api_node_id ; i++)
8754     {
8755       NodeInfo nodeInfo = getNodeInfo(i);
8756       if(nodeInfo.m_connected)
8757       {
8758         char version_buffer[NDB_VERSION_STRING_BUF_SZ];
8759         ndbGetVersionString(nodeInfo.m_version, nodeInfo.m_mysql_version,
8760                             0, version_buffer, NDB_VERSION_STRING_BUF_SZ);
8761 
8762         ProcessInfo *processInfo = getProcessInfo(i);
8763         if(processInfo && processInfo->isValid())
8764         {
8765           char uri_buffer[512];
8766           processInfo->getServiceUri(uri_buffer, sizeof(uri_buffer));
8767           Ndbinfo::Row row(signal, req);
8768           row.write_uint32(getOwnNodeId());                 // reporting_node_id
8769           row.write_uint32(i);                              // node_id
8770           row.write_uint32(nodeInfo.getType());             // node_type
8771           row.write_string(version_buffer);                 // node_version
8772           row.write_uint32(processInfo->getPid());          // process_id
8773           row.write_uint32(processInfo->getAngelPid());     // angel_process_id
8774           row.write_string(processInfo->getProcessName());  // process_name
8775           row.write_string(uri_buffer);                     // service_URI
8776           ndbinfo_send_row(signal, req, row, rl);
8777         }
8778         else if(nodeInfo.m_type != NodeInfo::DB &&
8779                 nodeInfo.m_version > 0 &&
8780                 ! ndbd_supports_processinfo(nodeInfo.m_version))
8781         {
8782           /* MGM/API node is too old to send ProcessInfoRep, so create a
8783              fallback-style report */
8784 
8785           struct in_addr addr= globalTransporterRegistry.get_connect_address(i);
8786           char service_uri[32];
8787           strcpy(service_uri, "ndb://");
8788           Ndb_inet_ntop(AF_INET, & addr, service_uri + 6, 24);
8789 
8790           Ndbinfo::Row row(signal, req);
8791           row.write_uint32(getOwnNodeId());                 // reporting_node_id
8792           row.write_uint32(i);                              // node_id
8793           row.write_uint32(nodeInfo.getType());             // node_type
8794           row.write_string(version_buffer);                 // node_version
8795           row.write_uint32(0);                              // process_id
8796           row.write_uint32(0);                              // angel_process_id
8797           row.write_string("");                             // process_name
8798           row.write_string(service_uri);                    // service_URI
8799           ndbinfo_send_row(signal, req, row, rl);
8800         }
8801       }
8802     }
8803     break;
8804   }
8805   default:
8806     break;
8807   }
8808   ndbinfo_send_scan_conf(signal, req, rl);
8809 }
8810 
8811 
8812 void
execPROCESSINFO_REP(Signal * signal)8813 Qmgr::execPROCESSINFO_REP(Signal *signal)
8814 {
8815   jamEntry();
8816   ProcessInfoRep * report = (ProcessInfoRep *) signal->theData;
8817   SectionHandle handle(this, signal);
8818   SegmentedSectionPtr pathSectionPtr, hostSectionPtr;
8819 
8820   ProcessInfo * processInfo = getProcessInfo(report->node_id);
8821   if(processInfo)
8822   {
8823     /* Set everything except the connection name and host address */
8824     processInfo->initializeFromProcessInfoRep(report);
8825 
8826     /* Set the URI path */
8827     if(handle.getSection(pathSectionPtr, ProcessInfoRep::PathSectionNum))
8828     {
8829       processInfo->setUriPath(pathSectionPtr.p->theData);
8830     }
8831 
8832     /* Set the host address */
8833     if(handle.getSection(hostSectionPtr, ProcessInfoRep::HostSectionNum))
8834     {
8835       processInfo->setHostAddress(hostSectionPtr.p->theData);
8836     }
8837     else
8838     {
8839       /* Use the address from the transporter registry.
8840          As implemented below we use setHostAddress() with struct in_addr
8841          to set an IPv4 address.  An alternate more abstract version
8842          of ProcessInfo::setHostAddress() is also available, which
8843          takes a struct sockaddr * and length.
8844       */
8845       struct in_addr addr=
8846         globalTransporterRegistry.get_connect_address(report->node_id);
8847       processInfo->setHostAddress(& addr);
8848     }
8849   }
8850   releaseSections(handle);
8851 }
8852 
8853 void
execISOLATE_ORD(Signal * signal)8854 Qmgr::execISOLATE_ORD(Signal* signal)
8855 {
8856   jamEntry();
8857 
8858   IsolateOrd* sig = (IsolateOrd*) signal->theData;
8859 
8860   ndbrequire(sig->senderRef != 0);
8861   Uint32 senderNode = refToNode(sig->senderRef);
8862   Uint32 sz;
8863   Uint32 num_sections = signal->getNoOfSections();
8864   SectionHandle handle(this, signal);
8865   if (ndbd_send_node_bitmask_in_section(getNodeInfo(senderNode).m_version))
8866   {
8867     jam();
8868     ndbrequire(num_sections == 1);
8869     SegmentedSectionPtr ptr;
8870     handle.getSection(ptr, 0);
8871     copy(sig->nodesToIsolate, ptr);
8872     ndbrequire(ptr.sz <= NdbNodeBitmask::Size);
8873     sz = ptr.sz;
8874   }
8875   else
8876   {
8877     jam();
8878     memset(sig->nodesToIsolate + NdbNodeBitmask48::Size,
8879            0,
8880            _NDB_NBM_DIFF_BYTES);
8881     sz = NdbNodeBitmask::Size;
8882   }
8883   NdbNodeBitmask victims;
8884   memset(&victims, 0, sizeof(victims));
8885   victims.assign(sz, sig->nodesToIsolate);
8886   ndbrequire(!victims.isclear());
8887 
8888   switch (sig->isolateStep)
8889   {
8890   case IsolateOrd::IS_REQ:
8891   {
8892     jam();
8893     releaseSections(handle);
8894     /* Initial request, broadcast immediately */
8895 
8896     /* Need to get the set of live nodes to broadcast to */
8897     NdbNodeBitmask hitmen(c_clusterNodes);
8898 
8899     sig->isolateStep = IsolateOrd::IS_BROADCAST;
8900     unsigned nodeId = hitmen.find_first();
8901     do
8902     {
8903       jam();
8904       BlockReference ref = calcQmgrBlockRef(nodeId);
8905       if (ndbd_send_node_bitmask_in_section(getNodeInfo(nodeId).m_version))
8906       {
8907         jam();
8908         LinearSectionPtr lsptr[3];
8909         lsptr[0].p = (Uint32*)&victims;
8910         lsptr[0].sz = victims.getPackedLengthInWords();
8911         sendSignal(ref,
8912                    GSN_ISOLATE_ORD,
8913                    signal,
8914                    IsolateOrd::SignalLength,
8915                    JBA,
8916                    lsptr,
8917                    1);
8918       }
8919       else
8920       {
8921         jam();
8922         ndbrequire(victims.getPackedLengthInWords() <= 2);
8923         memset(&sig->nodesToIsolate, 0, 8);
8924         memcpy(&sig->nodesToIsolate,
8925                &victims,
8926                4 * victims.getPackedLengthInWords());
8927         sendSignal(ref,
8928                    GSN_ISOLATE_ORD,
8929                    signal,
8930                    IsolateOrd::SignalLengthWithBitmask48,
8931                    JBA);
8932       }
8933       nodeId = hitmen.find_next(nodeId + 1);
8934     } while (nodeId != BitmaskImpl::NotFound);
8935 
8936     ndbrequire(!hitmen.isclear()); /* At least me */
8937     return;
8938   }
8939   case IsolateOrd::IS_BROADCAST:
8940   {
8941     jam();
8942     /* Received reqest, delay */
8943     sig->isolateStep = IsolateOrd::IS_DELAY;
8944 
8945     if (sig->delayMillis > 0)
8946     {
8947       /* Delay processing until delayMillis passes */
8948       jam();
8949       sendSignalWithDelay(reference(),
8950                           GSN_ISOLATE_ORD,
8951                           signal,
8952                           sig->delayMillis,
8953                           IsolateOrd::SignalLength,
8954                           &handle);
8955       return;
8956     }
8957   }
8958   // Fall through
8959   case IsolateOrd::IS_DELAY:
8960   {
8961     jam();
8962 
8963     releaseSections(handle);
8964     if (ERROR_INSERTED(942))
8965     {
8966       jam();
8967       g_eventLogger->info("QMGR discarding IsolateRequest");
8968       return;
8969     }
8970 
8971     /* Map to FAIL_REP signal(s) */
8972     Uint32 failSource = refToNode(sig->senderRef);
8973 
8974     unsigned nodeId = victims.find_first();
8975     do
8976     {
8977       jam();
8978 
8979       /* TODO : Consider checking node state and skipping if
8980        * failing already
8981        * Consider logging that action is being taken here
8982        */
8983 
8984       FailRep* failRep = (FailRep*)&signal->theData[0];
8985       failRep->failNodeId = nodeId;
8986       failRep->failCause = FailRep::ZFORCED_ISOLATION;
8987       failRep->failSourceNodeId = failSource;
8988 
8989       sendSignal(reference(), GSN_FAIL_REP, signal, 3, JBA);
8990 
8991       nodeId = victims.find_next(nodeId + 1);
8992     } while (nodeId != BitmaskImpl::NotFound);
8993 
8994     /* Fail rep signals are en-route... */
8995 
8996     return;
8997   }
8998   }
8999 
9000   ndbabort();
9001 }
9002 
9003 
9004 void
execNODE_STATE_REP(Signal * signal)9005 Qmgr::execNODE_STATE_REP(Signal* signal)
9006 {
9007   jam();
9008   const NodeState prevState = getNodeState();
9009   SimulatedBlock::execNODE_STATE_REP(signal);
9010 
9011   /* Check whether we are changing state */
9012   const Uint32 prevStartLevel = prevState.startLevel;
9013   const Uint32 newStartLevel = getNodeState().startLevel;
9014 
9015   if (newStartLevel != prevStartLevel)
9016   {
9017     jam();
9018     /* Inform APIs */
9019     signal->theData[0] = ZNOTIFY_STATE_CHANGE;
9020     signal->theData[1] = 1;
9021     sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
9022   }
9023 
9024   return;
9025 }
9026 
9027 void
handleStateChange(Signal * signal,Uint32 nodeToNotify)9028 Qmgr::handleStateChange(Signal* signal, Uint32 nodeToNotify)
9029 {
9030   jam();
9031   bool take_a_break = false;
9032 
9033   do
9034   {
9035     const NodeInfo::NodeType nt = getNodeInfo(nodeToNotify).getType();
9036 
9037     if (nt == NodeInfo::API ||
9038         nt == NodeInfo::MGM)
9039     {
9040       jam();
9041 
9042       NodeRecPtr notifyNode;
9043       notifyNode.i = nodeToNotify;
9044       ptrCheckGuard(notifyNode, MAX_NODES, nodeRec);
9045 
9046       if (notifyNode.p->phase == ZAPI_ACTIVE)
9047       {
9048         jam();
9049         ndbassert(c_connectedNodes.get(nodeToNotify));
9050 
9051         /**
9052          * Ok, send an unsolicited API_REGCONF to inform
9053          * the API of the state change
9054          */
9055         set_hb_count(nodeToNotify) = 0;
9056         sendApiRegConf(signal, nodeToNotify);
9057 
9058         take_a_break = true;
9059       }
9060     }
9061 
9062     nodeToNotify++;
9063   } while (nodeToNotify < MAX_NODES &&
9064            !take_a_break);
9065 
9066   if (nodeToNotify < MAX_NODES)
9067   {
9068     jam();
9069     signal->theData[0] = ZNOTIFY_STATE_CHANGE;
9070     signal->theData[1] = nodeToNotify;
9071     sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
9072   }
9073 
9074   return;
9075 }
9076 
9077 /**
9078  * SET_UP_MULTI_TRP_REQ starts the setup of multi socket transporters
9079  * that currently is setup between two data nodes in the same node group.
9080  * This signal is sent in start phase 3 from NDBCNTR  when we are performing
9081  * an initial start or a cluster restart at a time when we know the version
9082  * info about other data nodes. For node restarts it is sent later in phase
9083  * 4 when the master has informed us of the current sysfile. We need to wait
9084  * for this to ensure that we know the node group information for all nodes.
9085  * We will only allow one use of SET_UP_MULTI_TRP_REQ per start of a data
9086  * node. We can still participate in setting up multi sockets after that,
9087  * but only when another node is starting and requesting us to assist in
9088  * setting up a multi socket setup.
9089  *
9090  * We cannot use multi sockets towards versions before MySQL Cluster
9091  * 8.0.20.
9092  *
9093  * The signal flow to accomplish this setup of multi sockets is the
9094  * following. It is currently only possible to setup when a node is
9095  * starting up, but some parts of the code is prepared to also handle
9096  * this change while the cluster is operational.
9097  *
9098  * The protocol below assumes that both node support multi sockets.
9099  *
9100  * NDBCNTR/DBDIH          QMGR                              QMGR neighbour
9101  *    SET_UP_MULTI_TRP_REQ
9102  *    ------------------->
9103  *
9104  * Scenario 1: QMGR Neighbour starts after first QMGR
9105  *                        GET_NUM_MULTI_TRP_REQ
9106  *                        ------------------------------------->
9107  *                        GET_NUM_MULTI_TRP_CONF
9108  *                        <------------------------------------
9109  *                     Create multi transporters
9110  *                     Connect multi transporters
9111  *
9112  *                        GET_NUM_MULTI_TRP_REQ
9113  *                        <------------------------------------
9114  *                        GET_NUM_MULTI_TRP_CONF
9115  *                        ------------------------------------>
9116  *                                                  Create multi transporter
9117  *                                                  Connect multi transporter
9118  *                       Multi transporters connect to each other
9119  *
9120  * QMGR                                                  QMGR Neighbour
9121  *     SWITCH_MULTI_TRP_REQ
9122  *   ---------------------------------------------------------->
9123  *                                                       When QMGR neighbour
9124  *                                                       has added to epoll
9125  *                                                       set.
9126  *     SWITCH_MULTI_TRP_REQ
9127  *   <---------------------------------------------------------
9128  *     SWITCH_MULTI_TRP_CONF
9129  *   <-------------------------------------------------------->
9130  *     Now both nodes are ready to perform the actual switch over
9131  *
9132  *  QMGR               THRMAN Proxy                 THRMAN
9133  *    FREEZE_THREAD_REQ
9134  *    ---------------------->
9135  *                           FREEZE_THREAD_REQ
9136  *                           -------------------------->>
9137  *                                                   Freeze all threads
9138  *                                                   except main thread
9139  *              FREEZE_ACTION_REQ
9140  *    <--------------------------------------------------
9141  *    Switch to using multi transporter sockets
9142  *
9143  * At this point the only thread that is active is the main thread.
9144  * Every other thread is frozen waiting to be woken up when the
9145  * new multi socket setup is set up. We will send the last signal
9146  * ACTIVATE_TRP_REQ on the old transporter, before we send that we
9147  * ensure that we have locked all send transporters and after that
9148  * we enable the send buffer and after that all signals will be
9149  * sent on the new multi sockets.
9150  *
9151  * QMGR                  THRMAN (main thread)            QMGR Neighbour
9152  *       ACTIVATE_TRP_REQ
9153  *   -------------------------------------------------------->
9154  *       FREEZE_ACTION_CONF
9155  *   -------------------------->
9156  *                           unlock all thread
9157  *                           wait until all threads woken up again
9158  *       FREEZE_THREAD_CONF
9159  *   <--------------------------
9160  *
9161  * In parallel with the above we will also do the same thing in the
9162  * neighbour node and this node will initiate the second round of
9163  * events when we receive the signal ACTIVATE_TRP_REQ.
9164  *
9165  * QMGR         TRPMAN Proxy     TRPMAN                  QMGR Neighbour
9166  *       ACTIVATE_TRP_REQ
9167  *   <--------------------------------------------------------
9168  *   SYNC_THREAD_VIA_REQ
9169  *   --------------->
9170  *                   SYNC_THREAD_VIA_REQ
9171  *                   --------------->>                  THRMANs
9172  *                                    SYNC_THREAD_REQ
9173  *                                    -------------------->>
9174  *                                    SYNC_THREAD_CONF
9175  *                                    <<--------------------
9176  *                   SYNC_THREAD_VIA_CONF
9177  *                   <<---------------
9178  *   SYNC_THREAD_VIA_CONF
9179  *   <---------------
9180  *
9181  * SYNC_THREAD_VIA_REQ/CONF is used to ensure that all receive threads
9182  * have delivered any signals it has received. Since at this point we
9183  * haven't activated the new multi sockets, and we have deactivated
9184  * the old socket, this means that we have a clear signal order in that
9185  * signal sent on old socket is always delivered to all other threads
9186  * before any new signal on the new multi socket transporters are
9187  * delivered.
9188  *
9189  *   <---------------
9190  *     ACTIVATE_TRP_REQ
9191  *   --------------->-------------->>
9192  *                                Activate the receive on the
9193  *                                new transporters
9194  *     ACTIVATE_TRP_CONF
9195  *   <<------------------------------
9196  *     ACTIVATE_TRP_CONF
9197  *   --------------------------------------------------------->
9198  *                                                           Here the
9199  *                                                        switch is completed
9200  *  After receiving ACTIVATE_TRP_CONF we have no use of the socket anymore
9201  *  and since the sender obviously has also
9202  *
9203  * If more nodes are in node group to also set up we do it after this.
9204  * Otherwise we are ready.
9205  *
9206  *  QMGR                           NDBCNTR/DBDIH
9207  *      SET_UP_MULTI_TRP_CONF
9208  *    ------------------------------->
9209  */
9210 void
execSET_UP_MULTI_TRP_REQ(Signal * signal)9211 Qmgr::execSET_UP_MULTI_TRP_REQ(Signal *signal)
9212 {
9213   jamEntry();
9214   if (m_ref_set_up_multi_trp_req != 0)
9215   {
9216     jam();
9217     DEB_MULTI_TRP(("Already handled SET_UP_MULTI_TRP_REQ"));
9218     sendSignal(signal->theData[0],
9219                GSN_SET_UP_MULTI_TRP_CONF,
9220                signal,
9221                1,
9222                JBB);
9223     return;
9224   }
9225   m_ref_set_up_multi_trp_req = signal->theData[0];
9226   m_get_num_multi_trps_sent = 0;
9227   for (Uint32 node_id = 1; node_id < MAX_NDB_NODES; node_id++)
9228   {
9229     NodeRecPtr nodePtr;
9230     nodePtr.i = node_id;
9231     ptrAss(nodePtr, nodeRec);
9232     nodePtr.p->m_used_num_multi_trps = m_num_multi_trps;
9233     nodePtr.p->m_initial_set_up_multi_trp_done = false;
9234   }
9235   DEB_MULTI_TRP(("m_num_multi_trps = %u", m_num_multi_trps));
9236   bool done = false;
9237   bool completed = get_num_multi_trps(signal, done);
9238   if (!completed)
9239   {
9240     jam();
9241     return;
9242   }
9243   else
9244   {
9245     jam();
9246     DEB_MULTI_TRP(("m_num_multi_trps == 1, no need to setup multi sockets"));
9247   }
9248   complete_multi_trp_setup(signal, done);
9249 }
9250 
9251 void
get_node_group_mask(Signal * signal,NdbNodeBitmask & mask)9252 Qmgr::get_node_group_mask(Signal *signal, NdbNodeBitmask& mask)
9253 {
9254   CheckNodeGroups * sd = (CheckNodeGroups*)signal->getDataPtrSend();
9255   sd->blockRef = reference();
9256   sd->requestType =
9257     CheckNodeGroups::Direct |
9258     CheckNodeGroups::GetNodeGroupMembers;
9259   sd->nodeId = getOwnNodeId();
9260   EXECUTE_DIRECT_MT(DBDIH, GSN_CHECKNODEGROUPSREQ, signal,
9261 		    CheckNodeGroups::SignalLength, 0);
9262   jamEntry();
9263   mask.assign(sd->mask);
9264   mask.clear(getOwnNodeId());
9265 }
9266 
9267 bool
get_num_multi_trps(Signal * signal,bool & done)9268 Qmgr::get_num_multi_trps(Signal *signal, bool &done)
9269 {
9270   jamEntry();
9271   NdbNodeBitmask mask;
9272   get_node_group_mask(signal, mask);
9273   m_get_num_multi_trps_sent++;
9274   if (m_num_multi_trps == 1)
9275   {
9276     jam();
9277     done = true;
9278   }
9279   for (Uint32 node_id = 1; node_id < MAX_NDB_NODES; node_id++)
9280   {
9281     if (mask.get(node_id))
9282     {
9283       jam();
9284       jamLine(node_id);
9285       DEB_MULTI_TRP(("Node %u is in the same node group", node_id));
9286       NodeRecPtr nodePtr;
9287       nodePtr.i = node_id;
9288       ptrAss(nodePtr, nodeRec);
9289       nodePtr.p->m_is_in_same_nodegroup = true;
9290       done = true;
9291       Uint32 version = getNodeInfo(nodePtr.i).m_version;
9292       if (m_num_multi_trps > 1)
9293       {
9294         create_multi_transporter(nodePtr.i);
9295         if (nodePtr.p->phase == ZRUNNING &&
9296             ndbd_use_multi_ng_trps(version) &&
9297             (c_ndbcntr->is_node_started(nodePtr.i) ||
9298              c_ndbcntr->is_node_starting(nodePtr.i)))
9299         {
9300           jam();
9301           if (ERROR_INSERTED(970))
9302           {
9303             NdbSleep_MilliSleep(500);
9304           }
9305           nodePtr.p->m_set_up_multi_trp_started = true;
9306           inc_get_num_multi_trps_sent(nodePtr.i);
9307           send_get_num_multi_trp_req(signal, node_id);
9308         }
9309       }
9310     }
9311   }
9312   m_get_num_multi_trps_sent--;
9313   return (m_get_num_multi_trps_sent == 0);
9314 }
9315 
9316 void
execGET_NUM_MULTI_TRP_REQ(Signal * signal)9317 Qmgr::execGET_NUM_MULTI_TRP_REQ(Signal* signal)
9318 {
9319   jamEntry();
9320   GetNumMultiTrpReq* req = (GetNumMultiTrpReq*)&signal->theData[0];
9321   Uint32 sender_node_id = req->nodeId;
9322 
9323   NodeRecPtr nodePtr;
9324   nodePtr.i = sender_node_id;
9325   ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
9326   nodePtr.p->m_initial_set_up_multi_trp_done =
9327     req->initial_set_up_multi_trp_done;
9328   /*
9329    * Set used number of multi sockets to be minimum of our own config
9330    * and the node config of the node contacting us.
9331    */
9332   nodePtr.p->m_used_num_multi_trps =
9333     MIN(req->numMultiTrps, m_num_multi_trps);
9334 
9335   if (m_initial_set_up_multi_trp_done && nodePtr.p->m_used_num_multi_trps > 1)
9336   {
9337     /**
9338      * We passed the startup phase 2 where the connection setup
9339      * of multi transporters happens normally. So the node sending
9340      * this message is a new node starting and we're either already
9341      * started or have passed phase 2 of the startup. We will start
9342      * enabling communication to this new node.
9343      *
9344      * This is only required if we want to use more than one socket.
9345      */
9346     jam();
9347     DEB_MULTI_TRP(("Node %u starting, prepare switch trp using %u trps",
9348                    sender_node_id,
9349                    nodePtr.p->m_used_num_multi_trps));
9350     connect_multi_transporter(signal, sender_node_id);
9351     if (ERROR_INSERTED(972))
9352     {
9353       NdbSleep_MilliSleep(500);
9354     }
9355   }
9356   else
9357   {
9358     jam();
9359     if (ERROR_INSERTED(971))
9360     {
9361       NdbSleep_MilliSleep(500);
9362     }
9363   }
9364   if (m_ref_set_up_multi_trp_req != 0)
9365   {
9366     jam();
9367     DEB_MULTI_TRP(("Node %u starting, sent GET_NUM_MULTI_TRP_REQ, get"
9368                    " num multi %u",
9369                    sender_node_id,
9370                    nodePtr.p->m_used_num_multi_trps));
9371     GetNumMultiTrpConf* conf = (GetNumMultiTrpConf*)signal->getDataPtrSend();
9372     conf->numMultiTrps = nodePtr.p->m_used_num_multi_trps;
9373     conf->nodeId = getOwnNodeId();
9374     conf->initial_set_up_multi_trp_done = m_initial_set_up_multi_trp_done;
9375 
9376     BlockReference ref = calcQmgrBlockRef(sender_node_id);
9377     sendSignal(ref, GSN_GET_NUM_MULTI_TRP_CONF, signal,
9378                GetNumMultiTrpConf::SignalLength, JBB);
9379   }
9380   else
9381   {
9382     jam();
9383     DEB_MULTI_TRP(("Node %u starting, GET_NUM_MULTI_TRP_REQ sent,"
9384                    " we're not ready",
9385                    sender_node_id));
9386     GetNumMultiTrpRef* ref = (GetNumMultiTrpRef*)signal->getDataPtrSend();
9387     ref->nodeId = getOwnNodeId();
9388     ref->errorCode = GetNumMultiTrpRef::NotReadyYet;
9389     BlockReference block_ref = calcQmgrBlockRef(sender_node_id);
9390     sendSignal(block_ref, GSN_GET_NUM_MULTI_TRP_REF, signal,
9391                GetNumMultiTrpRef::SignalLength, JBB);
9392   }
9393 }
9394 
9395 void
execGET_NUM_MULTI_TRP_REF(Signal * signal)9396 Qmgr::execGET_NUM_MULTI_TRP_REF(Signal *signal)
9397 {
9398   GetNumMultiTrpRef ref = *(GetNumMultiTrpRef*)&signal->theData[0];
9399   /**
9400    * The other node is not ready yet, we'll wait for it to become ready before
9401    * progressing.
9402    */
9403   NodeRecPtr nodePtr;
9404   nodePtr.i = ref.nodeId;
9405   ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
9406   nodePtr.p->m_count_multi_trp_ref++;
9407   if (nodePtr.p->m_count_multi_trp_ref > 60)
9408   {
9409     jam();
9410     nodePtr.p->m_count_multi_trp_ref = 0;
9411     DEB_MULTI_TRP(("GET_NUM_MULTI_TRP_REF 60 times from %u", ref.nodeId));
9412     ndbassert(false);
9413     dec_get_num_multi_trps_sent(ref.nodeId);
9414     complete_multi_trp_setup(signal, false);
9415     return;
9416   }
9417   DEB_MULTI_TRP(("GET_NUM_MULTI_TRP_REF received from %u", ref.nodeId));
9418   signal->theData[0] = ZRESEND_GET_NUM_MULTI_TRP_REQ;
9419   signal->theData[1] = ref.nodeId;
9420   sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 500, 2);
9421 }
9422 
9423 void
complete_multi_trp_setup(Signal * signal,bool set_done)9424 Qmgr::complete_multi_trp_setup(Signal *signal, bool set_done)
9425 {
9426   if (m_get_num_multi_trps_sent == 0)
9427   {
9428     jam();
9429     if (set_done)
9430     {
9431       jam();
9432       m_initial_set_up_multi_trp_done = true;
9433     }
9434     sendSignal(m_ref_set_up_multi_trp_req,
9435                GSN_SET_UP_MULTI_TRP_CONF,
9436                signal,
9437                1,
9438                JBB);
9439     if (!set_done)
9440     {
9441       jam();
9442       m_ref_set_up_multi_trp_req = 0;
9443     }
9444   }
9445   else
9446   {
9447     jam();
9448   }
9449 }
9450 
9451 void
send_get_num_multi_trp_req(Signal * signal,NodeId node_id)9452 Qmgr::send_get_num_multi_trp_req(Signal *signal, NodeId node_id)
9453 {
9454   if (m_get_num_multi_trps_sent == 0)
9455   {
9456     jam();
9457     DEB_MULTI_TRP(("We have already completed the SET_UP_MULTI_TRP_REQ"
9458                    ", no need to continue retrying"));
9459     complete_multi_trp_setup(signal, false);
9460     return;
9461   }
9462   jam();
9463   DEB_MULTI_TRP(("Get num multi trp for node %u", node_id));
9464   GetNumMultiTrpReq* req = (GetNumMultiTrpReq*)signal->getDataPtrSend();
9465   req->nodeId = getOwnNodeId();
9466   req->numMultiTrps = m_num_multi_trps;
9467   req->initial_set_up_multi_trp_done = false;
9468   BlockReference ref = calcQmgrBlockRef(node_id);
9469   sendSignal(ref, GSN_GET_NUM_MULTI_TRP_REQ, signal,
9470              GetNumMultiTrpReq::SignalLength, JBB);
9471 }
9472 
9473 void
inc_get_num_multi_trps_sent(NodeId node_id)9474 Qmgr::inc_get_num_multi_trps_sent(NodeId node_id)
9475 {
9476   NodeRecPtr nodePtr;
9477   nodePtr.i = node_id;
9478   ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
9479   ndbrequire(!nodePtr.p->m_is_get_num_multi_trp_active);
9480   m_get_num_multi_trps_sent++;
9481   nodePtr.p->m_is_get_num_multi_trp_active = true;
9482 }
9483 
9484 void
dec_get_num_multi_trps_sent(NodeId node_id)9485 Qmgr::dec_get_num_multi_trps_sent(NodeId node_id)
9486 {
9487   NodeRecPtr nodePtr;
9488   nodePtr.i = node_id;
9489   ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
9490   ndbrequire(m_get_num_multi_trps_sent > 0);
9491   ndbrequire(nodePtr.p->m_is_get_num_multi_trp_active);
9492   m_get_num_multi_trps_sent--;
9493   nodePtr.p->m_is_get_num_multi_trp_active = false;
9494 }
9495 
9496 void
execGET_NUM_MULTI_TRP_CONF(Signal * signal)9497 Qmgr::execGET_NUM_MULTI_TRP_CONF(Signal* signal)
9498 {
9499   /**
9500    * We receive the number of sockets to use from the other node. Could
9501    * also be a signal we sent to ourselves if the other node isn't
9502    * started yet or is running a version not supporting multi sockets.
9503    * In these cases the number of sockets will always be 1.
9504    */
9505   jamEntry();
9506   CRASH_INSERTION(951);
9507   GetNumMultiTrpConf* conf = (GetNumMultiTrpConf*)&signal->theData[0];
9508   Uint32 sender_node_id = conf->nodeId;
9509   NodeRecPtr nodePtr;
9510   nodePtr.i = sender_node_id;
9511   ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
9512 
9513   nodePtr.p->m_count_multi_trp_ref = 0;
9514   Uint32 rec_num_multi_trps = conf->numMultiTrps;
9515   Uint32 initial_set_up_multi_trp_done = conf->initial_set_up_multi_trp_done;
9516   ndbrequire(nodePtr.p->m_used_num_multi_trps > 0);
9517   ndbrequire(rec_num_multi_trps <= m_num_multi_trps);
9518   /**
9519    * If the other side cannot handle the number of multi sockets we wanted,
9520    * we set it to the other sides number instead.
9521    */
9522   nodePtr.p->m_used_num_multi_trps =
9523     MIN(conf->numMultiTrps, nodePtr.p->m_used_num_multi_trps);
9524   nodePtr.p->m_initial_set_up_multi_trp_done =
9525     initial_set_up_multi_trp_done;
9526   dec_get_num_multi_trps_sent(nodePtr.i);
9527   if (rec_num_multi_trps == 1)
9528   {
9529     jam();
9530     DEB_MULTI_TRP(("No need to setup multi sockets to node %u",
9531                    nodePtr.i));
9532     complete_multi_trp_setup(signal, true);
9533     return;
9534   }
9535   DEB_MULTI_TRP(("GET_NUM_MULTI_TRP_CONF received from %u using %u trps",
9536                  sender_node_id,
9537                  nodePtr.p->m_used_num_multi_trps));
9538   jam();
9539   connect_multi_transporter(signal, nodePtr.i);
9540   if (ERROR_INSERTED(973))
9541   {
9542     NdbSleep_MilliSleep(1500);
9543   }
9544 }
9545 
9546 void
create_multi_transporter(NodeId node_id)9547 Qmgr::create_multi_transporter(NodeId node_id)
9548 {
9549   jamEntry();
9550   DEB_MULTI_TRP(("Create multi trp for node %u", node_id));
9551   globalTransporterRegistry.createMultiTransporter(node_id,
9552                                                    m_num_multi_trps);
9553 }
9554 
9555 #include "../../../common/transporter/Transporter.hpp"
9556 #include "../../../common/transporter/Multi_Transporter.hpp"
9557 
9558 void
connect_multi_transporter(Signal * signal,NodeId node_id)9559 Qmgr::connect_multi_transporter(Signal *signal, NodeId node_id)
9560 {
9561   /**
9562    * We have created the Multi transporters, now it is time to setup
9563    * connections to those that are running and also to switch over to
9564    * using the multi transporter. We currently only perform this as
9565    * part of startup. This means that if a node is already started
9566    * it is the responsibility of the starting node always to perform
9567    * the setup. If both nodes are starting the node with lowest node
9568    * id is responsible for the setup.
9569    */
9570   NodeRecPtr nodePtr;
9571   nodePtr.i = node_id;
9572   ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
9573   nodePtr.p->m_check_multi_trp_connect_loop_count = 0;
9574   nodePtr.p->m_is_preparing_switch_trp = true;
9575   /**
9576    * Connect a multi-transporter.
9577    * For clients this happens by moving the transporters inside the
9578    * multi-transporter into the allTransporters array. This leads to
9579    * that they are checked in start_clients_thread. These transporters
9580    * are special in that they only connect in the CONNECTED state.
9581    *
9582    * To differentiate between normal transporters and these transporters
9583    * that are part of a multi-transporter we have a method called
9584    * isPartOfMultiTransporter. The method set_part_of_multi_transporter
9585    * toggles this state, by default it is false.
9586    *
9587    * By replacing the position in theNodeIdTransporters with a
9588    * multi transporter we ensure that connect_server will handle the
9589    * connection properly.
9590    *
9591    * By placing the transporters in the allTransporters array ensures
9592    * that we connect as clients in start_clients_thread.
9593    */
9594   Multi_Transporter *multi_trp =
9595     globalTransporterRegistry.get_node_multi_transporter(node_id);
9596   ndbrequire(multi_trp != 0);
9597 
9598   globalTransporterRegistry.lockMultiTransporters();
9599   multi_trp->set_num_inactive_transporters(
9600     nodePtr.p->m_used_num_multi_trps);
9601   Uint32 num_inactive_transporters =
9602     multi_trp->get_num_inactive_transporters();
9603   Transporter *current_trp =
9604     globalTransporterRegistry.get_node_transporter(node_id);
9605   if (current_trp->isMultiTransporter())
9606   {
9607     jam();
9608     DEB_MULTI_TRP(("Get current trp from multi transporter"));
9609     ndbrequire(current_trp == multi_trp);
9610     current_trp = multi_trp->get_active_transporter(0);
9611     ndbrequire(multi_trp->get_num_active_transporters() == 1);
9612   }
9613   DEB_MULTI_TRP(("Base transporter has trp_id: %u",
9614                  current_trp->getTransporterIndex()));
9615   int trp_port = current_trp->get_s_port();
9616 
9617   for (Uint32 i = 0; i < num_inactive_transporters; i++)
9618   {
9619     /**
9620      * It is vital that we set the port number in the transporters used
9621      * by the multi transporter. It is possible that the node comes up
9622      * with a different port number after a restart. For the first
9623      * transporter this port number is set in start_clients_thread.
9624      * Thus before we connect using these transporters we update the
9625      * port number of those transporters to be the same port number as
9626      * used by the first transporter.
9627      */
9628     jam();
9629     Transporter *t = multi_trp->get_inactive_transporter(i);
9630     t->set_s_port(trp_port);
9631     globalTransporterRegistry.insert_allTransporters(t);
9632     assign_recv_thread_new_trp(t->getTransporterIndex());
9633     DEB_MULTI_TRP(("Insert trp id %u for node %u, mti = %u, server: %u"
9634                    ", port: %d",
9635                    t->getTransporterIndex(),
9636                    node_id,
9637                    t->get_multi_transporter_instance(),
9638                    t->isServer,
9639                    trp_port));
9640   }
9641   globalTransporterRegistry.unlockMultiTransporters();
9642   signal->theData[0] = ZCHECK_MULTI_TRP_CONNECT;
9643   signal->theData[1] = node_id;
9644   sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 10, 2);
9645 }
9646 
9647 void
check_connect_multi_transporter(Signal * signal,NodeId node_id)9648 Qmgr::check_connect_multi_transporter(Signal *signal, NodeId node_id)
9649 {
9650   NodeRecPtr nodePtr;
9651   nodePtr.i = node_id;
9652   ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
9653   globalTransporterRegistry.lockMultiTransporters();
9654   Multi_Transporter *multi_trp =
9655     globalTransporterRegistry.get_node_multi_transporter(node_id);
9656   if (nodePtr.p->phase == ZRUNNING)
9657   {
9658     jam();
9659     bool connected = true;
9660     Uint32 num_inactive_transporters =
9661       multi_trp->get_num_inactive_transporters();
9662     for (Uint32 i = 0; i < num_inactive_transporters; i++)
9663     {
9664       jam();
9665       Transporter *tmp_trp = multi_trp->get_inactive_transporter(i);
9666       bool is_connected = tmp_trp->isConnected();
9667       if (!is_connected)
9668       {
9669         jam();
9670         connected = false;
9671         break;
9672       }
9673     }
9674     if (!connected)
9675     {
9676       jam();
9677       globalTransporterRegistry.unlockMultiTransporters();
9678       nodePtr.p->m_check_multi_trp_connect_loop_count++;
9679       /**
9680        * We are only connecting to nodes already connected, thus we
9681        * should not fail to connect here, just in case something
9682        * weird happens we will still fail after waiting for
9683        * 30 minutes (100 * 30 * 60 times sending 10ms delayed signal).
9684        */
9685       ndbrequire(nodePtr.p->m_check_multi_trp_connect_loop_count <
9686                  (100 * 60 * 30));
9687       signal->theData[0] = ZCHECK_MULTI_TRP_CONNECT;
9688       signal->theData[1] = node_id;
9689       sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 10, 2);
9690       return;
9691     }
9692     DEB_MULTI_TRP(("Multi trp connected for node %u", node_id));
9693     globalTransporterRegistry.unlockMultiTransporters();
9694     ndbrequire(nodePtr.p->m_is_multi_trp_setup == false);
9695     nodePtr.p->m_is_multi_trp_setup = true;
9696     if (!check_all_multi_trp_nodes_connected())
9697     {
9698       jam();
9699       /* We are not ready to start switch process yet. */
9700       return;
9701     }
9702     if (!select_node_id_for_switch(node_id, true))
9703     {
9704       /**
9705        * We were already busy with a switch, could also be
9706        * that we didn't find any lower node id to switch to.
9707        * We will only initiate switch from nodes with lower
9708        * node ids than our node id.
9709        *
9710        * By always selecting the highest node id to start with,
9711        * we ensure that we select a node that hasn't initiated
9712        * any switch on their own. Thus we are certain that this
9713        * node will eventually accept our switch request even if
9714        * it has to process all the other neighbour nodes before
9715        * us. This is definitely not an optimal algorithm, but it
9716        * is safe in that it avoids deadlock that could lead to
9717        * eternal wait states.
9718        */
9719       jam();
9720       return;
9721     }
9722     assign_multi_trps_to_send_threads();
9723     send_switch_multi_transporter(signal, node_id, false);
9724     return;
9725   }
9726   else
9727   {
9728     /**
9729      * The connection is no longer using the Multi_Transporter object.
9730      * Can only happen when the connection is broken before we completed
9731      * the connection setup of all connections. No need to do anything
9732      * more in this case other than release mutex.
9733      */
9734     jam();
9735     if (ERROR_INSERTED(974))
9736     {
9737       NdbSleep_MilliSleep(1500);
9738     }
9739     nodePtr.p->m_is_preparing_switch_trp = false;
9740     globalTransporterRegistry.unlockMultiTransporters();
9741     check_more_trp_switch_nodes(signal);
9742   }
9743   return;
9744 }
9745 
9746 void
send_switch_multi_transporter(Signal * signal,NodeId node_id,bool retry)9747 Qmgr::send_switch_multi_transporter(Signal *signal,
9748                                     NodeId node_id,
9749                                     bool retry)
9750 {
9751   NodeRecPtr nodePtr;
9752   nodePtr.i = node_id;
9753   ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
9754   jam();
9755   if (!retry)
9756   {
9757     jam();
9758     ndbrequire(m_current_switch_multi_trp_node == 0);
9759   }
9760   else if (m_current_switch_multi_trp_node == node_id)
9761   {
9762     jam();
9763     DEB_MULTI_TRP(("Retry of send SWITCH_MULTI_TRP_REQ to node %u"
9764                    " not needed since already ongoing",
9765                    node_id));
9766     return;
9767   }
9768   else if (m_current_switch_multi_trp_node != 0)
9769   {
9770     jam();
9771     DEB_MULTI_TRP(("Retry of send SWITCH_MULTI_TRP_REQ to node %u"
9772                    " failed since other node already started",
9773                    node_id));
9774     return;
9775   }
9776   else if (nodePtr.p->m_is_using_multi_trp)
9777   {
9778     jam();
9779     DEB_MULTI_TRP(("Retry of send SWITCH_MULTI_TRP_REQ to node %u"
9780                    " not needed since already setup",
9781                    node_id));
9782     return;
9783   }
9784   else
9785   {
9786     jam();
9787     DEB_MULTI_TRP(("Retry of SWITCH_MULTI_TRP_REQ to node %u",
9788                    node_id));
9789   }
9790   m_current_switch_multi_trp_node = node_id;
9791   nodePtr.p->m_is_ready_to_switch_trp = true;
9792   DEB_MULTI_TRP(("Send SWITCH_MULTI_TRP_REQ to node %u", node_id));
9793   SwitchMultiTrpReq* req = (SwitchMultiTrpReq*)signal->getDataPtrSend();
9794   req->nodeId = getOwnNodeId();
9795   req->senderRef = reference();
9796   BlockReference ref = calcQmgrBlockRef(node_id);
9797   sendSignal(ref, GSN_SWITCH_MULTI_TRP_REQ, signal,
9798              SwitchMultiTrpReq::SignalLength, JBB);
9799   if (ERROR_INSERTED(978))
9800   {
9801     NdbSleep_MilliSleep(1500);
9802   }
9803 }
9804 
9805 void
execSWITCH_MULTI_TRP_REQ(Signal * signal)9806 Qmgr::execSWITCH_MULTI_TRP_REQ(Signal *signal)
9807 {
9808   SwitchMultiTrpReq* req = (SwitchMultiTrpReq*)&signal->theData[0];
9809   NodeId node_id = req->nodeId;
9810   BlockReference block_ref = req->senderRef;
9811   DEB_MULTI_TRP(("SWITCH_MULTI_TRP_REQ node %u", node_id));
9812   NodeRecPtr nodePtr;
9813   nodePtr.i = node_id;
9814   ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
9815   assign_multi_trps_to_send_threads();
9816 
9817   CRASH_INSERTION(954);
9818   if (!check_all_multi_trp_nodes_connected())
9819   {
9820     if (nodePtr.p->m_is_multi_trp_setup &&
9821         m_current_switch_multi_trp_node == 0)
9822     {
9823       ndbrequire(nodePtr.p->phase == ZRUNNING);
9824       ndbrequire(nodePtr.p->m_is_in_same_nodegroup);
9825       ndbrequire(nodePtr.p->m_is_preparing_switch_trp);
9826       /* Fall through to send SWITCH_MULTI_TRP_CONF */
9827     }
9828     else
9829     {
9830       jam();
9831       ndbrequire(m_current_switch_multi_trp_node != node_id);
9832       DEB_MULTI_TRP(("Send SWITCH_MULTI_TRP_REF node %u", node_id));
9833       SwitchMultiTrpRef *ref = (SwitchMultiTrpRef*)signal->getDataPtrSend();
9834       ref->nodeId = getOwnNodeId();
9835       ref->errorCode = SwitchMultiTrpRef::SMTR_NOT_READY_FOR_SWITCH;
9836       sendSignal(block_ref, GSN_SWITCH_MULTI_TRP_REF, signal,
9837                  SwitchMultiTrpRef::SignalLength, JBB);
9838       return;
9839     }
9840   }
9841   else if (m_current_switch_multi_trp_node != 0 &&
9842            m_current_switch_multi_trp_node != node_id)
9843   {
9844     /**
9845      * We are already trying to connect multi sockets to another
9846      * node. We will wait for this to complete before moving
9847      * on to the next node.
9848      */
9849     jam();
9850     DEB_MULTI_TRP(("2:Send SWITCH_MULTI_TRP_REF node %u", node_id));
9851     SwitchMultiTrpRef *ref = (SwitchMultiTrpRef*)signal->getDataPtrSend();
9852     ref->nodeId = getOwnNodeId();
9853     ref->errorCode = SwitchMultiTrpRef::SMTR_NOT_READY_FOR_SWITCH;
9854     sendSignal(block_ref, GSN_SWITCH_MULTI_TRP_REF, signal,
9855                SwitchMultiTrpRef::SignalLength, JBB);
9856     return;
9857   }
9858   /**
9859    * We haven't selected any node to connect multi sockets to yet.
9860    * In that case it is safe to answer positively since we know
9861    * that this cannot cause any deadlock.
9862    */
9863   if (m_current_switch_multi_trp_node == 0)
9864   {
9865     jam();
9866     ndbrequire(!nodePtr.p->m_is_ready_to_switch_trp);
9867     SwitchMultiTrpReq* req = (SwitchMultiTrpReq*)signal->getDataPtrSend();
9868     req->nodeId = getOwnNodeId();
9869     req->senderRef = reference();
9870     BlockReference ref = calcQmgrBlockRef(node_id);
9871     sendSignal(ref, GSN_SWITCH_MULTI_TRP_REQ, signal,
9872                SwitchMultiTrpReq::SignalLength, JBB);
9873   }
9874   else
9875   {
9876     ndbrequire(m_current_switch_multi_trp_node == node_id);
9877   }
9878   ndbrequire(nodePtr.p->m_is_multi_trp_setup)
9879   nodePtr.p->m_is_ready_to_switch_trp = true;
9880   m_current_switch_multi_trp_node = node_id;
9881   jam();
9882   DEB_MULTI_TRP(("Send SWITCH_MULTI_TRP_CONF node %u", node_id));
9883   if (ERROR_INSERTED(979))
9884   {
9885     NdbSleep_MilliSleep(1500);
9886   }
9887   SwitchMultiTrpConf *conf = (SwitchMultiTrpConf*)signal->getDataPtrSend();
9888   conf->nodeId = getOwnNodeId();
9889   sendSignal(block_ref, GSN_SWITCH_MULTI_TRP_CONF, signal,
9890              SwitchMultiTrpConf::SignalLength, JBB);
9891 }
9892 
9893 void
execSWITCH_MULTI_TRP_CONF(Signal * signal)9894 Qmgr::execSWITCH_MULTI_TRP_CONF(Signal *signal)
9895 {
9896   /**
9897    * This signal can get lost if the other node fails and we have
9898    * already started.
9899    *
9900    * The TransporterRegistry will ensure that we switch back to using a
9901    * single transporter in this case, the DISCONNECT_REP code and the
9902    * NODE_FAILREP code will ensure that we reset the variables used
9903    * to setup the multi sockets next time the node starts up.
9904    */
9905   jamEntry();
9906   CRASH_INSERTION(955);
9907   SwitchMultiTrpConf *conf = (SwitchMultiTrpConf*)&signal->theData[0];
9908   Uint32 node_id = conf->nodeId;
9909   NodeRecPtr nodePtr;
9910   nodePtr.i = node_id;
9911   ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
9912   ndbrequire(nodePtr.p->m_is_ready_to_switch_trp == true);
9913   ndbrequire(nodePtr.p->m_is_multi_trp_setup == true);
9914   DEB_MULTI_TRP(("Recvd SWITCH_MULTI_TRP_CONF node %u", node_id));
9915   if (ERROR_INSERTED(980))
9916   {
9917     NdbSleep_MilliSleep(1500);
9918   }
9919   switch_multi_transporter(signal, node_id);
9920 }
9921 
9922 void
execSWITCH_MULTI_TRP_REF(Signal * signal)9923 Qmgr::execSWITCH_MULTI_TRP_REF(Signal *signal)
9924 {
9925   /**
9926    * The other node wasn't ready to connect multi sockets to us yet.
9927    * We will wait for a short time and try again.
9928    */
9929   SwitchMultiTrpRef *ref = (SwitchMultiTrpRef*)&signal->theData[0];
9930   Uint32 node_id = ref->nodeId;
9931   NodeRecPtr nodePtr;
9932   nodePtr.i = node_id;
9933   ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
9934   ndbrequire(m_current_switch_multi_trp_node == node_id);
9935   ndbrequire(nodePtr.p->m_is_ready_to_switch_trp);
9936   m_current_switch_multi_trp_node = 0;
9937   nodePtr.p->m_is_ready_to_switch_trp = false;
9938   DEB_MULTI_TRP(("Recvd SWITCH_MULTI_TRP_REF from node %u", node_id));
9939   signal->theData[0] = ZSWITCH_MULTI_TRP;
9940   signal->theData[1] = node_id;
9941   sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 100, 2);
9942 }
9943 
9944 void
switch_multi_transporter(Signal * signal,NodeId node_id)9945 Qmgr::switch_multi_transporter(Signal *signal, NodeId node_id)
9946 {
9947   ndbrequire(m_current_switch_multi_trp_node == node_id);
9948   NodeRecPtr nodePtr;
9949   nodePtr.i = node_id;
9950   ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
9951   g_eventLogger->info("Switch to %u multi trp for node %u",
9952                       nodePtr.p->m_used_num_multi_trps,
9953                       node_id);
9954   nodePtr.p->m_is_preparing_switch_trp = false;
9955   nodePtr.p->m_is_ready_to_switch_trp = false;
9956   nodePtr.p->m_is_multi_trp_setup = false;
9957   /**
9958    * We have now reached the point where it is time to switch the transporter
9959    * from using the old transporters, currently in the active transporter set.
9960    *
9961    * The switch must be made such that we don't risk changing signal order
9962    * for signals sent from one thread to another thread in another node.
9963    *
9964    * To accomplish this we will ensure that all block threads are blocked
9965    * in THRMAN. THRMAN exists in each block thread. So a signal to THRMAN
9966    * in each THRMAN can be used to quickly synchronize all threads in the
9967    * node and keep them waiting in THRMAN. When all threads have stopped we
9968    * will also call lockMultiTransporters to avoid the connect threads from
9969    * interfering in the middle of this change and finally we will lock
9970    * the send mutex on the node we are changing to ensure that also the
9971    * send threads avoid interference with this process.
9972    *
9973    * At this point also each thread will have flushed the send buffers to
9974    * ensure that we can ensure that the last signal sent in the node
9975    * connection is a ACTIVATE_TRP_REQ signal. When the receiver gets this
9976    * signal he can activate the receiving from the new transporters since
9977    * we have ensured that no more signals will be received on the old
9978    * transporters.
9979    *
9980    * When all this things have been prepared and the ACTIVATE_TRP_REQ signal
9981    * is sent, now is the time to switch the active transporters and also
9982    * to change the MultiTransporter to use the new hash algorithm, this
9983    * is automatic by changing the number of transporters.
9984    *
9985    * We close the original socket when ACTIVATE_TRP_CONF is received from
9986    * the other side indicating that we are now in communication with the
9987    * other side over the new transporters.
9988    */
9989   FreezeThreadReq* req = CAST_PTR(FreezeThreadReq, signal->getDataPtrSend());
9990   req->nodeId = node_id;
9991   req->senderRef = reference();
9992   sendSignal(THRMAN_REF, GSN_FREEZE_THREAD_REQ, signal,
9993              FreezeThreadReq::SignalLength, JBA);
9994   return;
9995 }
9996 
9997 void
execFREEZE_ACTION_REQ(Signal * signal)9998 Qmgr::execFREEZE_ACTION_REQ(Signal *signal)
9999 {
10000   jamEntry();
10001   FreezeActionReq *req = (FreezeActionReq*)&signal->theData[0];
10002   Uint32 node_id = req->nodeId;
10003   BlockReference ret_ref = req->senderRef;
10004   CRASH_INSERTION(956);
10005   if (ERROR_INSERTED(981))
10006   {
10007     NdbSleep_MilliSleep(1500);
10008   }
10009   /**
10010    * All threads except our thread is now frozen.
10011    *
10012    * Before we send the final signal on the current transporter we switch to
10013    * having the multi socket transporters as neighbours. By so doing we ensure
10014    * that the current transporter is inserted into the non-neighbour list when
10015    * sending the signal. If we would change after the sending we would miss
10016    * sending this signal since we change to the new neighbour setup after
10017    * sending, but before we perform the actual send.
10018    *
10019    * It is a bit tricky to change the neighbour transporters. We check the
10020    * neighbour in sendSignal and expect that in do_send that the same
10021    * neighbour handling is performed. We handle this here by first changing
10022    * the neighbour setting and next sending the signal. This ensures that
10023    * the transporter will be handled by non-neighbour handling.
10024    *
10025    * We will lock the send to
10026    * the current transporter to ensure that the transporter will notice when
10027    * the last signal have been sent. Next we will send the last signal
10028    * on the the currently active socket. When this signal is sent we will flush
10029    * the send buffers to ensure that the transporter knows when the last data
10030    * have been sent. We will then flag to the transporter that it should
10031    * shutdown the socket for writes. When both sides have performed this
10032    * action the socket will be closed.
10033    *
10034    * These actions will ensure that ACTIVATE_TRP_REQ is the last data
10035    * received on the current transporter and ensure that from now on
10036    * all sends are directed to the new set of transporters.
10037    * To ensure that no other thread is changing the multi transporter
10038    * setup we will lock the multi transporter mutex while performing
10039    * these actions. The only other thread that can be active here is
10040    * the send threads since we blocked all other threads at this point.
10041    *
10042    * Next we will release all mutexes and send FREEZE_ACTION_CONF to
10043    * THRMAN to ensure that things get started again. We will receive
10044    * FREEZE_THREAD_CONF back from THRMAN when all threads are in action
10045    * again.
10046    */
10047   DEB_MULTI_TRP(("Block threads frozen for node %u", node_id));
10048 
10049   globalTransporterRegistry.lockMultiTransporters();
10050   Multi_Transporter *multi_trp =
10051     globalTransporterRegistry.get_node_multi_transporter(node_id);
10052   if (is_multi_socket_setup_active(node_id, true))
10053   {
10054     jam();
10055 
10056     Transporter *current_trp = multi_trp->get_active_transporter(0);
10057     TrpId current_trp_id = current_trp->getTransporterIndex();
10058     multi_trp->get_callback_obj()->lock_send_transporter(node_id,
10059                                                          current_trp_id);
10060 
10061     Uint32 num_inactive_transporters =
10062       multi_trp->get_num_inactive_transporters();
10063     for (Uint32 i = 0; i < num_inactive_transporters; i++)
10064     {
10065       jam();
10066       Transporter *tmp_trp = multi_trp->get_inactive_transporter(i);
10067       TrpId trp_id = tmp_trp->getTransporterIndex();
10068       multi_trp->get_callback_obj()->lock_send_transporter(node_id, trp_id);
10069     }
10070 
10071     ActivateTrpReq* act_trp_req = CAST_PTR(ActivateTrpReq,
10072                                            signal->getDataPtrSend());
10073     act_trp_req->nodeId = getOwnNodeId();
10074     act_trp_req->numTrps = num_inactive_transporters;
10075     act_trp_req->senderRef = reference();
10076     sendSignal(calcQmgrBlockRef(node_id), GSN_ACTIVATE_TRP_REQ, signal,
10077                ActivateTrpReq::SignalLength, JBB);
10078 
10079     flush_send_buffers();
10080     /* Either perform send or insert_trp below TODO */
10081     multi_trp->get_callback_obj()->unlock_send_transporter(node_id,
10082                                                            current_trp_id);
10083 
10084     if (ERROR_INSERTED(982))
10085     {
10086       NdbSleep_MilliSleep(2500);
10087     }
10088     multi_trp->switch_active_trp();
10089 
10090     Uint32 num_active_transporters =
10091       multi_trp->get_num_active_transporters();
10092     for (Uint32 i = 0; i < num_active_transporters; i++)
10093     {
10094       jam();
10095       Transporter *tmp_trp = multi_trp->get_active_transporter(i);
10096       TrpId id = tmp_trp->getTransporterIndex();
10097       multi_trp->get_callback_obj()->unlock_send_transporter(node_id, id);
10098       multi_trp->get_callback_obj()->enable_send_buffer(node_id, id);
10099     }
10100     globalTransporterRegistry.insert_node_transporter(node_id, multi_trp);
10101     globalTransporterRegistry.unlockMultiTransporters();
10102 
10103     if (ERROR_INSERTED(983))
10104     {
10105       NdbSleep_MilliSleep(2500);
10106     }
10107     DEB_MULTI_TRP(("Change neighbour node setup for node %u",
10108                    node_id));
10109     startChangeNeighbourNode();
10110     setNeighbourNode(node_id);
10111     endChangeNeighbourNode();
10112 
10113     if (ERROR_INSERTED(984))
10114     {
10115       NdbSleep_MilliSleep(2500);
10116     }
10117     DEB_MULTI_TRP(("Now communication is active with node %u using multi trp"
10118                    ", using %u transporters",
10119                    node_id,
10120                    num_active_transporters));
10121   }
10122   else
10123   {
10124     jam();
10125     DEB_MULTI_TRP(("Node %u failed when freezing threads", node_id));
10126     globalTransporterRegistry.unlockMultiTransporters();
10127   }
10128   FreezeActionConf *conf =
10129     CAST_PTR(FreezeActionConf, signal->getDataPtrSend());
10130   conf->nodeId = node_id;
10131   sendSignal(ret_ref, GSN_FREEZE_ACTION_CONF, signal,
10132              FreezeActionConf::SignalLength, JBA);
10133 }
10134 
10135 bool
is_multi_socket_setup_active(Uint32 node_id,bool locked)10136 Qmgr::is_multi_socket_setup_active(Uint32 node_id, bool locked)
10137 {
10138   bool ret_val = false;
10139   NodeRecPtr nodePtr;
10140   nodePtr.i = node_id;
10141   ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
10142   if (!locked)
10143   {
10144     globalTransporterRegistry.lockMultiTransporters();
10145   }
10146   if (c_connectedNodes.get(node_id) &&
10147       nodePtr.p->phase == ZRUNNING)
10148   {
10149     jam();
10150     DEB_MULTI_TRP(("Multi socket setup for node %u is active",
10151                    node_id));
10152     ret_val = true;
10153   }
10154   if (!locked)
10155   {
10156     globalTransporterRegistry.unlockMultiTransporters();
10157   }
10158   return ret_val;
10159 }
10160 
10161 void
execFREEZE_THREAD_CONF(Signal * signal)10162 Qmgr::execFREEZE_THREAD_CONF(Signal *signal)
10163 {
10164   FreezeThreadConf *conf = (FreezeThreadConf*)&signal->theData[0];
10165   Uint32 node_id = conf->nodeId;
10166   NodeRecPtr nodePtr;
10167   nodePtr.i = node_id;
10168   ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
10169   CRASH_INSERTION(957);
10170   if (is_multi_socket_setup_active(node_id, false))
10171   {
10172     jam();
10173     nodePtr.p->m_is_freeze_thread_completed = true;
10174     DEB_MULTI_TRP(("Freeze block threads for node %u completed", node_id));
10175     if (ERROR_INSERTED(985))
10176     {
10177       NdbSleep_MilliSleep(1500);
10178     }
10179     check_switch_completed(signal, node_id);
10180   }
10181   else
10182   {
10183     jam();
10184     DEB_MULTI_TRP(("2:Node %u failed when freezing threads", node_id));
10185   }
10186 }
10187 
10188 void
execACTIVATE_TRP_REQ(Signal * signal)10189 Qmgr::execACTIVATE_TRP_REQ(Signal *signal)
10190 {
10191   /**
10192    * Receiving this signal implies that node sending it is still
10193    * seen as being up and running.
10194    */
10195   jamEntry();
10196   CRASH_INSERTION(958);
10197   ActivateTrpReq* req = (ActivateTrpReq*)&signal->theData[0];
10198   Uint32 node_id = req->nodeId;
10199   Uint32 num_trps = req->numTrps;
10200   NodeRecPtr nodePtr;
10201   nodePtr.i = node_id;
10202   ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
10203   nodePtr.p->m_multi_trp_blockref = req->senderRef;
10204   nodePtr.p->m_num_activated_trps = num_trps;
10205   ndbrequire(num_trps == nodePtr.p->m_used_num_multi_trps);
10206 
10207   if (ERROR_INSERTED(977))
10208   {
10209     NdbSleep_MilliSleep(1500);
10210   }
10211   SyncThreadViaReqConf *syncReq =
10212     (SyncThreadViaReqConf*)signal->getDataPtrSend();
10213   syncReq->senderRef = reference();
10214   syncReq->senderData = node_id;
10215   syncReq->actionType = SyncThreadViaReqConf::FOR_ACTIVATE_TRP_REQ;
10216   sendSignal(TRPMAN_REF, GSN_SYNC_THREAD_VIA_REQ, signal,
10217              SyncThreadViaReqConf::SignalLength, JBA);
10218 }
10219 
10220 void
handle_activate_trp_req(Signal * signal,Uint32 node_id)10221 Qmgr::handle_activate_trp_req(Signal *signal, Uint32 node_id)
10222 {
10223   jam();
10224   NodeRecPtr nodePtr;
10225   nodePtr.i = node_id;
10226   ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
10227   Uint32 num_trps = nodePtr.p->m_num_activated_trps;
10228   CRASH_INSERTION(959);
10229   nodePtr.p->m_num_activated_trps = 0;
10230   DEB_MULTI_TRP(("Activate receive in multi trp for node %u, from ref: %x",
10231                  node_id,
10232                  nodePtr.p->m_multi_trp_blockref));
10233   globalTransporterRegistry.lockMultiTransporters();
10234   Multi_Transporter *multi_trp =
10235     globalTransporterRegistry.get_node_multi_transporter(node_id);
10236   if (is_multi_socket_setup_active(node_id, true))
10237   {
10238     jam();
10239     Transporter *t;
10240     for (Uint32 i = 0; i < num_trps; i++)
10241     {
10242       if (multi_trp->get_num_inactive_transporters() == num_trps)
10243       {
10244         jam();
10245         t = multi_trp->get_inactive_transporter(i);
10246       }
10247       else
10248       {
10249         jam();
10250         t = multi_trp->get_active_transporter(i);
10251         ndbrequire(multi_trp->get_num_active_transporters());
10252       }
10253       Uint32 trp_id = t->getTransporterIndex();
10254       ActivateTrpReq *act_trp_req =
10255         CAST_PTR(ActivateTrpReq, signal->getDataPtrSend());
10256       act_trp_req->nodeId = node_id;
10257       act_trp_req->trpId = trp_id;
10258       act_trp_req->numTrps = num_trps;
10259       act_trp_req->senderRef = reference();
10260       sendSignal(TRPMAN_REF, GSN_ACTIVATE_TRP_REQ, signal,
10261                  ActivateTrpReq::SignalLength, JBB);
10262       if (ERROR_INSERTED(986))
10263       {
10264         NdbSleep_MilliSleep(500);
10265       }
10266     }
10267   }
10268   globalTransporterRegistry.unlockMultiTransporters();
10269 }
10270 
10271 void
execACTIVATE_TRP_CONF(Signal * signal)10272 Qmgr::execACTIVATE_TRP_CONF(Signal *signal)
10273 {
10274   jamEntry();
10275   ActivateTrpConf *conf = (ActivateTrpConf*)&signal->theData[0];
10276   Uint32 node_id = conf->nodeId;
10277   BlockReference sender_ref = conf->senderRef;
10278   NodeRecPtr nodePtr;
10279   nodePtr.i = node_id;
10280   ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
10281 
10282   DEB_MULTI_TRP(("ACTIVATE_TRP_CONF(QMGR) own node %u about node %u"
10283                  ", ref: %x",
10284                  getOwnNodeId(),
10285                  node_id,
10286                  sender_ref));
10287   if (refToNode(sender_ref) == getOwnNodeId())
10288   {
10289     if (is_multi_socket_setup_active(node_id, false))
10290     {
10291       jam();
10292       CRASH_INSERTION(960);
10293       nodePtr.p->m_num_activated_trps++;
10294       if (nodePtr.p->m_num_activated_trps < nodePtr.p->m_used_num_multi_trps)
10295       {
10296         jam();
10297         return;
10298       }
10299       DEB_MULTI_TRP(("Complete activation recv for multi trp node %u,"
10300                      " own node: %u",
10301                      node_id,
10302                      getOwnNodeId()));
10303       ndbrequire(nodePtr.p->m_num_activated_trps ==
10304                  nodePtr.p->m_used_num_multi_trps);
10305       ActivateTrpConf *conf =
10306         CAST_PTR(ActivateTrpConf, signal->getDataPtrSend());
10307       conf->nodeId = getOwnNodeId();
10308       conf->senderRef = reference();
10309       BlockReference ref = nodePtr.p->m_multi_trp_blockref;
10310       nodePtr.p->m_multi_trp_blockref = 0;
10311       ndbrequire(refToNode(ref) == node_id);
10312       ndbrequire(refToMain(ref) == QMGR);
10313       sendSignal(ref, GSN_ACTIVATE_TRP_CONF, signal,
10314                  ActivateTrpConf::SignalLength, JBB);
10315       nodePtr.p->m_is_activate_trp_ready_for_me = true;
10316       if (ERROR_INSERTED(975))
10317       {
10318         NdbSleep_MilliSleep(1500);
10319       }
10320       check_switch_completed(signal, node_id);
10321     }
10322     else
10323     {
10324       jam();
10325       DEB_MULTI_TRP(("Node %u failed in multi trp activation", node_id));
10326     }
10327   }
10328   else
10329   {
10330     jam();
10331     CRASH_INSERTION(952);
10332     DEB_MULTI_TRP(("Completed activation recv for multi trp node %u",
10333                    node_id));
10334     ndbrequire(is_multi_socket_setup_active(node_id, false));
10335     nodePtr.p->m_is_activate_trp_ready_for_other = true;
10336     check_switch_completed(signal, node_id);
10337   }
10338 }
10339 
10340 void
check_switch_completed(Signal * signal,NodeId node_id)10341 Qmgr::check_switch_completed(Signal *signal, NodeId node_id)
10342 {
10343   NodeRecPtr nodePtr;
10344   nodePtr.i = node_id;
10345   ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
10346   if (!(nodePtr.p->m_is_activate_trp_ready_for_other &&
10347         nodePtr.p->m_is_activate_trp_ready_for_me &&
10348         nodePtr.p->m_is_freeze_thread_completed))
10349   {
10350     jam();
10351     DEB_MULTI_TRP(("Still waiting for node %u switch to complete", node_id));
10352     return;
10353   }
10354 
10355   globalTransporterRegistry.lockMultiTransporters();
10356   Multi_Transporter *multi_trp =
10357     globalTransporterRegistry.get_node_multi_transporter(node_id);
10358   ndbrequire(multi_trp && multi_trp->isMultiTransporter());
10359   Uint32 num_inactive_transporters =
10360     multi_trp->get_num_inactive_transporters();
10361   Transporter *array_trp[MAX_NODE_GROUP_TRANSPORTERS];
10362   for (Uint32 i = 0; i < num_inactive_transporters; i++)
10363   {
10364     jam();
10365     Transporter *tmp_trp = multi_trp->get_inactive_transporter(i);
10366     array_trp[i] = tmp_trp;
10367   }
10368   globalTransporterRegistry.unlockMultiTransporters();
10369   for (Uint32 i = 0; i < num_inactive_transporters; i++)
10370   {
10371     jam();
10372     Transporter *tmp_trp = array_trp[i];
10373     TrpId trp_id = tmp_trp->getTransporterIndex();
10374     tmp_trp->get_callback_obj()->lock_transporter(node_id, trp_id);
10375     tmp_trp->shutdown();
10376     tmp_trp->get_callback_obj()->unlock_transporter(node_id, trp_id);
10377     multi_trp->get_callback_obj()->disable_send_buffer(node_id, trp_id);
10378   }
10379   /**
10380    * We have now completed the switch to new set of transporters, the
10381    * old set is inactive and will be put back if the node fails. We
10382    * are now ready to see if any more nodes require attention.
10383    */
10384   if (ERROR_INSERTED(976))
10385   {
10386     NdbSleep_MilliSleep(1500);
10387   }
10388   m_current_switch_multi_trp_node = 0;
10389   nodePtr.p->m_is_using_multi_trp = true;
10390   nodePtr.p->m_is_ready_to_switch_trp = false;
10391   nodePtr.p->m_is_activate_trp_ready_for_me = false;
10392   nodePtr.p->m_is_activate_trp_ready_for_other = false;
10393   nodePtr.p->m_is_freeze_thread_completed = false;
10394   nodePtr.p->m_set_up_multi_trp_started = false;
10395   DEB_MULTI_TRP(("Completed switch to multi trp for node %u", node_id));
10396   CRASH_INSERTION(953);
10397   check_more_trp_switch_nodes(signal);
10398 }
10399 
10400 void
check_more_trp_switch_nodes(Signal * signal)10401 Qmgr::check_more_trp_switch_nodes(Signal* signal)
10402 {
10403   if (!check_all_multi_trp_nodes_connected())
10404   {
10405     jam();
10406     /* Still waiting for nodes to complete connect */
10407     DEB_MULTI_TRP(("Still waiting for nodes to complete connect"));
10408     return;
10409   }
10410   NodeId node_id = 0;
10411   if (select_node_id_for_switch(node_id, false))
10412   {
10413     jam();
10414     send_switch_multi_transporter(signal, node_id, false);
10415     return;
10416   }
10417   if (m_initial_set_up_multi_trp_done)
10418   {
10419     jam();
10420     DEB_MULTI_TRP(("Initial setup already done"));
10421     return;
10422   }
10423   if (m_get_num_multi_trps_sent != 0)
10424   {
10425     jam();
10426     DEB_MULTI_TRP(("Still waiting for GET_NUM_MULTI_TRP_REQ"));
10427     return;
10428   }
10429   bool done = true;
10430   for (Uint32 node_id = 1; node_id < MAX_NDB_NODES; node_id++)
10431   {
10432     NodeRecPtr nodePtr;
10433     nodePtr.i = node_id;
10434     ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
10435     if (nodePtr.p->m_is_in_same_nodegroup &&
10436         nodePtr.p->phase == ZRUNNING &&
10437         nodePtr.p->m_set_up_multi_trp_started)
10438     {
10439       if (!nodePtr.p->m_is_using_multi_trp)
10440       {
10441         jam();
10442         done = false;
10443       }
10444     }
10445   }
10446   if (done)
10447   {
10448     jam();
10449     DEB_MULTI_TRP(("Initial setup of multi trp now done"));
10450     m_initial_set_up_multi_trp_done = true;
10451     sendSignal(m_ref_set_up_multi_trp_req,
10452                GSN_SET_UP_MULTI_TRP_CONF,
10453                signal,
10454                1,
10455                JBB);
10456   }
10457   else
10458   {
10459     DEB_MULTI_TRP(("Not done with setup of multi trp yet"));
10460     jam();
10461   }
10462 }
10463 
10464 void
check_no_multi_trp(Signal * signal,NodeId node_id)10465 Qmgr::check_no_multi_trp(Signal *signal, NodeId node_id)
10466 {
10467   NodeRecPtr nodePtr;
10468   nodePtr.i = node_id;
10469   ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
10470   if (nodePtr.p->m_is_get_num_multi_trp_active)
10471   {
10472     jam();
10473     dec_get_num_multi_trps_sent(nodePtr.i);
10474   }
10475   DEB_MULTI_TRP(("check_no_multi_trp for node %u", node_id));
10476   if (node_id == m_current_switch_multi_trp_node)
10477   {
10478     jam();
10479     m_current_switch_multi_trp_node = 0;
10480     check_more_trp_switch_nodes(signal);
10481   }
10482 }
10483 
10484 bool
check_all_multi_trp_nodes_connected()10485 Qmgr::check_all_multi_trp_nodes_connected()
10486 {
10487   /**
10488    * Wait for all neighbour nodes to connect all multi transporters
10489    * before proceeding with the next phase where we start switching
10490    * to multi transporter setup.
10491    */
10492   NodeRecPtr nodePtr;
10493   for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++)
10494   {
10495     ptrAss(nodePtr, nodeRec);
10496     if (nodePtr.p->phase == ZRUNNING &&
10497         nodePtr.p->m_is_in_same_nodegroup &&
10498         (nodePtr.p->m_is_preparing_switch_trp ||
10499          nodePtr.p->m_is_get_num_multi_trp_active))
10500     {
10501       /* Neighbour node preparing switch */
10502       jam();
10503       jamLine(Uint16(nodePtr.i));
10504       if (!nodePtr.p->m_is_multi_trp_setup)
10505       {
10506         jam();
10507         /* Still waiting for connections of this node to complete */
10508         return false;
10509       }
10510     }
10511   }
10512   jam();
10513   /* All nodes to connect are done */
10514   return true;
10515 }
10516 
10517 bool
select_node_id_for_switch(NodeId & node_id,bool check_found)10518 Qmgr::select_node_id_for_switch(NodeId &node_id, bool check_found)
10519 {
10520   NodeId max_node_id = 0;
10521   NodeRecPtr nodePtr;
10522   for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++)
10523   {
10524     ptrAss(nodePtr, nodeRec);
10525     if (nodePtr.p->phase == ZRUNNING &&
10526         nodePtr.p->m_is_in_same_nodegroup &&
10527         nodePtr.p->m_is_preparing_switch_trp &&
10528         nodePtr.p->m_is_multi_trp_setup)
10529     {
10530       if (nodePtr.i > max_node_id)
10531       {
10532         jam();
10533         jamLine(Uint16(nodePtr.i));
10534         max_node_id = nodePtr.i;
10535       }
10536     }
10537   }
10538   ndbrequire((!check_found) || (max_node_id != 0));
10539   if (m_current_switch_multi_trp_node != 0)
10540   {
10541     jam();
10542     return false;
10543   }
10544   if (max_node_id < getOwnNodeId())
10545   {
10546     jam();
10547     return false;
10548   }
10549   node_id = max_node_id;
10550   nodePtr.i = max_node_id;
10551   ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
10552   ndbrequire(!nodePtr.p->m_is_ready_to_switch_trp);
10553   jam();
10554   return true;
10555 }
10556