1 /*
2 Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License, version 2.0,
6 as published by the Free Software Foundation.
7
8 This program is also distributed with certain software (including
9 but not limited to OpenSSL) that is licensed under separate terms,
10 as designated in a particular file or component or in included license
11 documentation. The authors of MySQL hereby grant you an additional
12 permission to link the program and your derivative works with the
13 separately licensed software that they have included with MySQL.
14
15 This program is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License, version 2.0, for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with this program; if not, write to the Free Software
22 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
23 */
24
25 #define QMGR_C
26 #include "Qmgr.hpp"
27 #include <pc.hpp>
28 #include <NdbTick.h>
29 #include <signaldata/NodeRecoveryStatusRep.hpp>
30 #include <signaldata/EventReport.hpp>
31 #include <signaldata/StartOrd.hpp>
32 #include <signaldata/CloseComReqConf.hpp>
33 #include <signaldata/PrepFailReqRef.hpp>
34 #include <signaldata/NodeFailRep.hpp>
35 #include <signaldata/ReadNodesConf.hpp>
36 #include <signaldata/NFCompleteRep.hpp>
37 #include <signaldata/CheckNodeGroups.hpp>
38 #include <signaldata/ArbitSignalData.hpp>
39 #include <signaldata/ApiRegSignalData.hpp>
40 #include <signaldata/ApiVersion.hpp>
41 #include <signaldata/BlockCommitOrd.hpp>
42 #include <signaldata/FailRep.hpp>
43 #include <signaldata/DisconnectRep.hpp>
44 #include <signaldata/ApiBroadcast.hpp>
45 #include <signaldata/Upgrade.hpp>
46 #include <signaldata/EnableCom.hpp>
47 #include <signaldata/RouteOrd.hpp>
48 #include <signaldata/NodePing.hpp>
49 #include <signaldata/DihRestart.hpp>
50 #include <signaldata/DumpStateOrd.hpp>
51 #include <signaldata/IsolateOrd.hpp>
52 #include <signaldata/ProcessInfoRep.hpp>
53 #include <signaldata/LocalSysfile.hpp>
54 #include <signaldata/SyncThreadViaReqConf.hpp>
55 #include <signaldata/TakeOverTcConf.hpp>
56 #include <signaldata/GetNumMultiTrp.hpp>
57 #include <signaldata/Sync.hpp>
58 #include <ndb_version.h>
59 #include <OwnProcessInfo.hpp>
60 #include <NodeInfo.hpp>
61 #include <NdbSleep.h>
62
63 #include <TransporterRegistry.hpp> // Get connect address
64
65 #include "../dbdih/Dbdih.hpp"
66 #include <EventLogger.hpp>
67 extern EventLogger * g_eventLogger;
68
69 #if (defined(VM_TRACE) || defined(ERROR_INSERT))
70 //#define DEBUG_MULTI_TRP 1
71 //#define DEBUG_STARTUP 1
72 //#define DEBUG_ARBIT 1
73 #endif
74
75 #ifdef DEBUG_ARBIT
76 #define DEB_ARBIT(arglist) do { g_eventLogger->info arglist ; } while (0)
77 #else
78 #define DEB_ARBIT(arglist) do { } while (0)
79 #endif
80
81 #ifdef DEBUG_MULTI_TRP
82 #define DEB_MULTI_TRP(arglist) do { g_eventLogger->info arglist ; } while (0)
83 #else
84 #define DEB_MULTI_TRP(arglist) do { } while (0)
85 #endif
86
87 #ifdef DEBUG_STARTUP
88 #define DEB_STARTUP(arglist) do { g_eventLogger->info arglist ; } while (0)
89 #else
90 #define DEB_STARTUP(arglist) do { } while (0)
91 #endif
92
93 //#define DEBUG_QMGR_START
94 #ifdef DEBUG_QMGR_START
95 #include <DebuggerNames.hpp>
96 #define QMGR_DEBUG(x) ndbout << "QMGR " << __LINE__ << ": " << x << endl
97 #define DEBUG_START(gsn, node, msg) QMGR_DEBUG(getSignalName(gsn) << " to: " << node << " - " << msg)
98 #define DEBUG_START2(gsn, rg, msg) { \
99 char nodes[NdbNodeBitmask::TextLength + 1]; \
100 QMGR_DEBUG(getSignalName(gsn) << " to: " << rg.m_nodes.getText(nodes) << " - " << msg); \
101 }
102 #define DEBUG_START3(signal, msg) QMGR_DEBUG(getSignalName(signal->header.theVerId_signalNumber) << " from " << refToNode(signal->getSendersBlockRef()) << " - " << msg);
103 #else
104 #define QMGR_DEBUG(x)
105 #define DEBUG_START(gsn, node, msg)
106 #define DEBUG_START2(gsn, rg, msg)
107 #define DEBUG_START3(signal, msg)
108 #endif
109
110 #define JAM_FILE_ID 360
111
112
113 /**
114 * QMGR provides the following services:
115 *
116 * 1) Node id allocation
117 * ---------------------
118 * This is a service provided to the Management server when a node is
119 * requesting a config.
120 *
121 * 2) Heartbeat service for data nodes
122 * -----------------------------------
123 * This is a service provided to the upper levels in NDB. When the
124 * heartbeat discovers a failure it will send a FAIL_REP signal to
125 * NDBCNTR.
126 *
127 * 3) Master assignment
128 * --------------------
129 * NDB relies on that a new master can be allocated at each failure
130 * through the usage of an algorithm to calculate the next master.
131 * To handle this nodes are entering the cluster one node at a time.
132 * This gives each node a dynamic node id, the new master is simply
133 * selected as the node with the lowest dynamic id.
134 *
135 * When the cluster is started from scratch it is important to select
136 * a master that is actually part of the cluster startup and not
137 * started later through a node restart handling. To handle this
138 * QMGR makes use of the DIH_RESTART service provided by DIH.
139 * This service will provide the GCI that the node can be started
140 * from. This GCI is sent in each CM_REGREQ signal to ensure that
141 * each node can decide whether they should be assigned as master
142 * of the cluster.
143 *
144 * In QMGR the master is called President and in DIH, NDBCNTR and DICT
145 * the node is called master node. All these roles are always given
146 * to the same node. Most protocols have a master role and thus most
147 * protocols need to handle master take over.
148 *
149 * 4) Transactional node failure service
150 * -------------------------------------
151 * Whenever a node fails, we need to ensure that all nodes agree on the
152 * failed nodes. To handle this QMGR uses a prepare phase where the
153 * president sends a list of failed nodes, other nodes can add to this
154 * list in which case a new prepare phase is started. After all nodes
155 * have agreed on the list of failed nodes the QMGR president sends a
156 * list of nodes in the COMMIT_FAILREQ signal that specifies which nodes
157 * have failed. This list is then sent up to NDBCNTR that handles the
158 * spreading of this information to all other blocks in the NDB data
159 * node.
160 *
161 * The information is also sent to the connected API nodes.
162 *
163 * 5) Arbitration service
164 * ----------------------
165 * In the case where we are not sure if the cluster has been partitioned,
166 * we need to query an arbitrator to decide whether our node should survive
167 * the crash. If no arbitrator is assigned, the node will fail. The
168 * arbitrator must be prepared before the crash happens, the arbitrator
169 * can only be used for one response. After this response a new arbitrator
170 * must be selected.
171 *
172 * It is also possible to not use any arbitrator service provided by NDB.
173 * In this case QMGR will write a message to the Cluster log and the
174 * external arbitrator needs to take action and shut down the node that
175 * it wants to not survive.
176 *
177 * 6) Skip node service
178 * --------------------
179 * When starting a data node it is possible to select a set of nodes to not
180 * wait for in cluster restart. These nodes are provided as startup
181 * parameter in ndbmtd/ndbd, --nowait-nodes.
182 *
183 * 7) Heartbeat service for API nodes
184 * ----------------------------------
185 * QMGR sends heartbeat signals to all API nodes connected with some delay.
186 * If API doesn't send any response, it will shut down the API connection.
187 *
188 * 8) Read nodes service
189 * ---------------------
190 * This is used to check nodes in certain situations.
191 *
192 * 9) Connectivity check service
193 * -----------------------------
194 * In the case of node failures we can configure NDB to make a full
195 * connectivity check before deciding which nodes to assign as failed
196 * nodes.
197 *
198 * 10) Ndbinfo membership table
199 * ----------------------------
200 * Reports the current setup of nodes, their dynamic ids and neighbours.
201 *
202 * 11) Ndbinfo process table
203 * -------------------------
204 * Reports various information required to manage NDB Cluster.
205 *
206 * 12) Isolate node service
207 * ------------------------
208 * Connected to the connectivity check service.
209 *
210 * 13) Global node state service
211 * -----------------------------
212 * Service used by many other blocks to inform them of node status.
213 *
214 * QMGR uses the following services:
215 *
216 * 1) Connect service
217 * ------------------
218 * The transporter will inform QMGR about nodes connected through the
219 * CONNECT_REP signal.
220 *
221 * 2) Check node group service in DIH
222 * ----------------------------------
223 * Used by master assignment service and node failure services.
224 *
225 * 3) DIH_RESTART service in DIH
226 * -----------------------------
227 * See above in master assignment service.
228 *
229 * 4) Block commit service
230 * -----------------------
231 * Block commits when we form a new cluster after node failures.
232 * This service is provided by DIH.
233 *
234 * 5) Close communication service
235 * ------------------------------
236 * We need to inform transporter when a node has failed to ensure
237 * the transporter will close the communication to this node.
238 *
239 * 6) Enable communication service
240 * -------------------------------
241 * We need to enable communication to a node after we finished node
242 * failure handling for a node.
243 */
244
245 /**
246 * c_start.m_gsn = GSN_CM_REGREQ
247 * Possible for all nodes
248 * c_start.m_nodes contains all nodes in config
249 *
250 * c_start.m_gsn = GSN_CM_NODEINFOREQ;
251 * Set when receiving CM_REGCONF
252 * State possible for starting node only (not in cluster)
253 *
254 * c_start.m_nodes contains all node in alive cluster that
255 * that has not replied to GSN_CM_NODEINFOREQ
256 * passed by president in GSN_CM_REGCONF
257 *
258 * c_start.m_gsn = GSN_CM_ADD
259 * Possible for president only
260 * Set when receiving and accepting CM_REGREQ (to include node)
261 *
262 * c_start.m_nodes contains all nodes in alive cluster + starting node
263 * that has not replied to GSN_CM_ADD
264 * by sending GSN_CM_ACKADD
265 *
266 * c_start.m_gsn = GSN_CM_NODEINFOCONF
267 * Possible for non presidents only
268 * c_start.m_nodes contains a node that has been accepted by president
269 * but has not connected to us yet
270 */
271
272 // Signal entries and statement blocks
273 /* 4 P R O G R A M */
274 /*******************************/
275 /* CMHEART_BEAT */
276 /*******************************/
execCM_HEARTBEAT(Signal * signal)277 void Qmgr::execCM_HEARTBEAT(Signal* signal)
278 {
279 NodeRecPtr hbNodePtr;
280 jamEntry();
281 hbNodePtr.i = signal->theData[0];
282 ptrCheckGuard(hbNodePtr, MAX_NDB_NODES, nodeRec);
283 set_hb_count(hbNodePtr.i) = 0;
284 return;
285 }//Qmgr::execCM_HEARTBEAT()
286
287 /*******************************/
288 /* CM_NODEINFOREF */
289 /*******************************/
execCM_NODEINFOREF(Signal * signal)290 void Qmgr::execCM_NODEINFOREF(Signal* signal)
291 {
292 jamEntry();
293 systemErrorLab(signal, __LINE__);
294 return;
295 }//Qmgr::execCM_NODEINFOREF()
296
297 /*******************************/
298 /* CONTINUEB */
299 /*******************************/
execCONTINUEB(Signal * signal)300 void Qmgr::execCONTINUEB(Signal* signal)
301 {
302 jamEntry();
303 const Uint32 tcontinuebType = signal->theData[0];
304 const Uint32 tdata0 = signal->theData[1];
305 const Uint32 tdata1 = signal->theData[2];
306 switch (tcontinuebType) {
307 case ZREGREQ_TIMELIMIT:
308 jam();
309 if (c_start.m_startKey != tdata0 || c_start.m_startNode != tdata1) {
310 jam();
311 return;
312 }//if
313 regreqTimeLimitLab(signal);
314 break;
315 case ZREGREQ_MASTER_TIMELIMIT:
316 jam();
317 if (c_start.m_startKey != tdata0 || c_start.m_startNode != tdata1) {
318 jam();
319 return;
320 }//if
321 //regreqMasterTimeLimitLab(signal);
322 failReportLab(signal,
323 c_start.m_startNode,
324 FailRep::ZSTART_IN_REGREQ,
325 getOwnNodeId());
326 return;
327 case ZTIMER_HANDLING:
328 jam();
329 timerHandlingLab(signal);
330 return;
331 case ZARBIT_HANDLING:
332 jam();
333 runArbitThread(signal);
334 return;
335 case ZSTART_FAILURE_LIMIT:{
336 if (cpresident != ZNIL)
337 {
338 jam();
339 return;
340 }
341 const NDB_TICKS now = NdbTick_getCurrentTicks();
342 const Uint64 elapsed = NdbTick_Elapsed(c_start_election_time,now).milliSec();
343 if (c_restartFailureTimeout != Uint32(~0) &&
344 elapsed > c_restartFailureTimeout)
345 {
346 jam();
347 BaseString tmp;
348 tmp.append("Shutting down node as total restart time exceeds "
349 " StartFailureTimeout as set in config file ");
350 if(c_restartFailureTimeout == (Uint32) ~0)
351 tmp.append(" 0 (inifinite)");
352 else
353 tmp.appfmt(" %d", c_restartFailureTimeout);
354
355 progError(__LINE__, NDBD_EXIT_SYSTEM_ERROR, tmp.c_str());
356 }
357 signal->theData[0] = ZSTART_FAILURE_LIMIT;
358 sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 3000, 1);
359 return;
360 }
361 case ZNOTIFY_STATE_CHANGE:
362 {
363 jam();
364 handleStateChange(signal, tdata0);
365 return;
366 }
367 case ZCHECK_MULTI_TRP_CONNECT:
368 {
369 jam();
370 check_connect_multi_transporter(signal, tdata0);
371 return;
372 }
373 case ZRESEND_GET_NUM_MULTI_TRP_REQ:
374 {
375 jam();
376 send_get_num_multi_trp_req(signal, signal->theData[1]);
377 return;
378 }
379 case ZSWITCH_MULTI_TRP:
380 {
381 jam();
382 send_switch_multi_transporter(signal, signal->theData[1], true);
383 return;
384 }
385 default:
386 jam();
387 // ZCOULD_NOT_OCCUR_ERROR;
388 systemErrorLab(signal, __LINE__);
389 return;
390 }//switch
391 return;
392 }//Qmgr::execCONTINUEB()
393
394
execDEBUG_SIG(Signal * signal)395 void Qmgr::execDEBUG_SIG(Signal* signal)
396 {
397 NodeRecPtr debugNodePtr;
398 jamEntry();
399 debugNodePtr.i = signal->theData[0];
400 ptrCheckGuard(debugNodePtr, MAX_NODES, nodeRec);
401 return;
402 }//Qmgr::execDEBUG_SIG()
403
404 /*******************************/
405 /* FAIL_REP */
406 /*******************************/
execFAIL_REP(Signal * signal)407 void Qmgr::execFAIL_REP(Signal* signal)
408 {
409 const FailRep * const failRep = (FailRep *)&signal->theData[0];
410 const NodeId failNodeId = failRep->failNodeId;
411 const FailRep::FailCause failCause = (FailRep::FailCause)failRep->failCause;
412 Uint32 failSource = failRep->getFailSourceNodeId(signal->length());
413 if (ERROR_INSERT_VALUE >= 951 && ERROR_INSERT_VALUE <= 960)
414 {
415 CRASH_INSERTION3();
416 }
417 if (!failSource)
418 {
419 /* Failure source not included, use sender of signal as 'source' */
420 failSource = refToNode(signal->getSendersBlockRef());
421 }
422
423 CRASH_INSERTION(948);
424
425 jamEntry();
426 failReportLab(signal, failNodeId, failCause, failSource);
427 return;
428 }//Qmgr::execFAIL_REP()
429
430 /*******************************/
431 /* PRES_TOREQ */
432 /*******************************/
execPRES_TOREQ(Signal * signal)433 void Qmgr::execPRES_TOREQ(Signal* signal)
434 {
435 jamEntry();
436 BlockReference Tblockref = signal->theData[0];
437 signal->theData[0] = getOwnNodeId();
438 signal->theData[1] = ccommitFailureNr;
439 sendSignal(Tblockref, GSN_PRES_TOCONF, signal, 2, JBA);
440 return;
441 }//Qmgr::execPRES_TOREQ()
442
443 void
execREAD_CONFIG_REQ(Signal * signal)444 Qmgr::execREAD_CONFIG_REQ(Signal* signal)
445 {
446 jamEntry();
447
448 const ReadConfigReq * req = (ReadConfigReq*)signal->getDataPtr();
449
450 Uint32 ref = req->senderRef;
451 Uint32 senderData = req->senderData;
452
453 const ndb_mgm_configuration_iterator * p =
454 m_ctx.m_config.getOwnConfigIterator();
455 ndbrequire(p != 0);
456
457 m_num_multi_trps = 0;
458 if (isNdbMt() && globalData.ndbMtSendThreads)
459 {
460 ndb_mgm_get_int_parameter(p,
461 CFG_DB_NODE_GROUP_TRANSPORTERS,
462 &m_num_multi_trps);
463 if (m_num_multi_trps == 0)
464 {
465 jam();
466 /**
467 * The default assignment is to use the same number of multi
468 * transporters as there are LDM instances in this node.
469 * So essentially each LDM thread will have its own transporter
470 * to the corresponding LDM thread in the other nodes in the
471 * same node group. This will ensure that I can assign the
472 * transporter to the send thread the LDM thread assists as
473 * well.
474 */
475 m_num_multi_trps = globalData.ndbMtLqhThreads;
476 }
477 else
478 {
479 jam();
480 /**
481 * No reason to use more sockets than the maximum threads in one
482 * thread group. We select the socket to use based on the
483 * instance id of the receiving thread. So if we use more sockets
484 * than threads in the largest thread group, there will be unused
485 * sockets.
486 *
487 * So we select the configured number unless the maximum number of
488 * LDM and/or TC threads is smaller than this number.
489 */
490 m_num_multi_trps = MIN(m_num_multi_trps,
491 MAX(globalData.ndbMtLqhThreads,
492 globalData.ndbMtTcThreads));
493 }
494 /**
495 * Whatever value this node has choosen, we will never be able to use
496 * more transporters than the other node permits as well. This will be
497 * established in the setup phase of multi transporters.
498 */
499 }
500 if (m_num_multi_trps == 0)
501 {
502 jam();
503 m_num_multi_trps = 1;
504 }
505 ReadConfigConf * conf = (ReadConfigConf*)signal->getDataPtrSend();
506 conf->senderRef = reference();
507 conf->senderData = senderData;
508 sendSignal(ref, GSN_READ_CONFIG_CONF, signal,
509 ReadConfigConf::SignalLength, JBB);
510 }
511
512 void
execSTART_ORD(Signal * signal)513 Qmgr::execSTART_ORD(Signal* signal)
514 {
515 /**
516 * Start timer handling
517 */
518 const NDB_TICKS now = NdbTick_getCurrentTicks();
519 signal->theData[0] = ZTIMER_HANDLING;
520 signal->theData[1] = Uint32(now.getUint64() >> 32);
521 signal->theData[2] = Uint32(now.getUint64());
522 sendSignal(QMGR_REF, GSN_CONTINUEB, signal, 3, JBB);
523 }
524
525 /*
526 4.2 ADD NODE MODULE*/
527 /*##########################################################################*/
528 /*
529 4.2.1 STTOR */
530 /**--------------------------------------------------------------------------
531 * Start phase signal, must be handled by all blocks.
532 * QMGR is only interested in the first phase.
533 * During phase one we clear all registered applications.
534 *---------------------------------------------------------------------------*/
535 /*******************************/
536 /* STTOR */
537 /*******************************/
execSTTOR(Signal * signal)538 void Qmgr::execSTTOR(Signal* signal)
539 {
540 jamEntry();
541
542 switch(signal->theData[1]){
543 case 1:
544 jam();
545 initData(signal);
546 g_eventLogger->info("Starting QMGR phase 1");
547 c_ndbcntr = (Ndbcntr*)globalData.getBlock(NDBCNTR);
548 startphase1(signal);
549 recompute_version_info(NodeInfo::DB);
550 recompute_version_info(NodeInfo::API);
551 recompute_version_info(NodeInfo::MGM);
552 return;
553 case 3:
554 jam();
555 break;
556 case 7:
557 jam();
558 if (cpresident == getOwnNodeId())
559 {
560 jam();
561 switch(arbitRec.method){
562 case ArbitRec::DISABLED:
563 jam();
564 break;
565
566 case ArbitRec::METHOD_EXTERNAL:
567 case ArbitRec::METHOD_DEFAULT:
568 /**
569 * Start arbitration thread. This could be done as soon as
570 * we have all nodes (or a winning majority).
571 */
572 jam();
573 handleArbitStart(signal);
574 break;
575 }
576 }
577 break;
578 case 9:{
579 jam();
580 /**
581 * Enable communication to all API nodes by setting state
582 * to ZFAIL_CLOSING (which will make it auto-open in checkStartInterface)
583 */
584 if (ERROR_INSERTED(949))
585 {
586 jam();
587 g_eventLogger->info("QMGR : Delaying allow-api-connect processing");
588 sendSignalWithDelay(reference(), GSN_STTOR, signal, 1000, 2);
589 return;
590 }
591 c_allow_api_connect = 1;
592 NodeRecPtr nodePtr;
593 for (nodePtr.i = 1; nodePtr.i < MAX_NODES; nodePtr.i++)
594 {
595 Uint32 type = getNodeInfo(nodePtr.i).m_type;
596 if (type != NodeInfo::API)
597 continue;
598
599 ptrAss(nodePtr, nodeRec);
600 jam();
601 jamLine(Uint16(nodePtr.i));
602 if (nodePtr.p->phase == ZAPI_INACTIVE)
603 {
604 jam();
605 set_hb_count(nodePtr.i) = 3;
606 nodePtr.p->phase = ZFAIL_CLOSING;
607 nodePtr.p->failState = NORMAL;
608 }
609 }
610 }
611 }
612
613 sendSttorryLab(signal, false);
614 return;
615 }//Qmgr::execSTTOR()
616
sendSttorryLab(Signal * signal,bool first_phase)617 void Qmgr::sendSttorryLab(Signal* signal, bool first_phase)
618 {
619 if (first_phase)
620 {
621 g_eventLogger->info("Include node protocol completed, phase 1 in QMGR"
622 " completed");
623 }
624 /*****************************/
625 /* STTORRY */
626 /*****************************/
627 signal->theData[3] = 3;
628 signal->theData[4] = 7;
629 signal->theData[5] = 9;
630 signal->theData[6] = 255;
631 sendSignal(NDBCNTR_REF, GSN_STTORRY, signal, 7, JBB);
632 return;
633 }//Qmgr::sendSttorryLab()
634
startphase1(Signal * signal)635 void Qmgr::startphase1(Signal* signal)
636 {
637 jamEntry();
638
639 NodeRecPtr nodePtr;
640 nodePtr.i = getOwnNodeId();
641 ptrAss(nodePtr, nodeRec);
642 nodePtr.p->phase = ZSTARTING;
643 DEB_STARTUP(("phase(%u) = ZSTARTING", nodePtr.i));
644
645 DihRestartReq * req = CAST_PTR(DihRestartReq, signal->getDataPtrSend());
646 req->senderRef = reference();
647 sendSignal(DBDIH_REF, GSN_DIH_RESTARTREQ, signal,
648 DihRestartReq::SignalLength, JBB);
649 return;
650 }
651
652 void
execDIH_RESTARTREF(Signal * signal)653 Qmgr::execDIH_RESTARTREF(Signal*signal)
654 {
655 jamEntry();
656
657 ndbrequire(signal->getNoOfSections() == 1);
658 SectionHandle handle(this, signal);
659 SegmentedSectionPtr ptr;
660 handle.getSection(ptr, 0);
661 ndbrequire(ptr.sz <= NdbNodeBitmask::Size);
662 c_start.m_no_nodegroup_nodes.clear();
663 copy(c_start.m_no_nodegroup_nodes.rep.data, ptr);
664 releaseSections(handle);
665
666 g_eventLogger->info("DIH reported initial start, now starting the"
667 " Node Inclusion Protocol");
668 c_start.m_latest_gci = 0;
669 execCM_INFOCONF(signal);
670 }
671
672 void
execDIH_RESTARTCONF(Signal * signal)673 Qmgr::execDIH_RESTARTCONF(Signal*signal)
674 {
675 jamEntry();
676
677 ndbrequire(signal->getNoOfSections() == 1);
678 SectionHandle handle(this, signal);
679 SegmentedSectionPtr ptr;
680 handle.getSection(ptr, 0);
681 ndbrequire(ptr.sz <= NdbNodeBitmask::Size);
682 c_start.m_no_nodegroup_nodes.clear();
683 copy(c_start.m_no_nodegroup_nodes.rep.data, ptr);
684 releaseSections(handle);
685
686 const DihRestartConf * conf = CAST_CONSTPTR(DihRestartConf,
687 signal->getDataPtr());
688 c_start.m_latest_gci = conf->latest_gci;
689 sendReadLocalSysfile(signal);
690 }
691
692 void
sendReadLocalSysfile(Signal * signal)693 Qmgr::sendReadLocalSysfile(Signal *signal)
694 {
695 ReadLocalSysfileReq *req = (ReadLocalSysfileReq*)signal->getDataPtrSend();
696 req->userPointer = 0;
697 req->userReference = reference();
698 sendSignal(NDBCNTR_REF,
699 GSN_READ_LOCAL_SYSFILE_REQ,
700 signal,
701 ReadLocalSysfileReq::SignalLength,
702 JBB);
703 }
704
705 void
execREAD_LOCAL_SYSFILE_CONF(Signal * signal)706 Qmgr::execREAD_LOCAL_SYSFILE_CONF(Signal *signal)
707 {
708 ReadLocalSysfileConf *conf = (ReadLocalSysfileConf*)signal->getDataPtr();
709 if (conf->nodeRestorableOnItsOwn ==
710 ReadLocalSysfileReq::NODE_RESTORABLE_ON_ITS_OWN)
711 {
712 g_eventLogger->info("DIH reported normal start, now starting the"
713 " Node Inclusion Protocol");
714 }
715 else if (conf->nodeRestorableOnItsOwn ==
716 ReadLocalSysfileReq::NODE_NOT_RESTORABLE_ON_ITS_OWN)
717 {
718 /**
719 * We set gci = 1 and rely here on that gci here is simply used
720 * as a tool to decide which nodes can be started up on their
721 * own and which node to choose as master node. Only nodes
722 * where m_latest_gci is set to a real GCI can be choosen as
723 * master nodes.
724 */
725 g_eventLogger->info("Node not restorable on its own, now starting the"
726 " Node Inclusion Protocol");
727 c_start.m_latest_gci = ZUNDEFINED_GCI_LIMIT;
728 }
729 else
730 {
731 g_eventLogger->info("Node requires initial start, now starting the"
732 " Node Inclusion Protocol");
733 c_start.m_latest_gci = 0;
734 }
735 execCM_INFOCONF(signal);
736 }
737
setHbDelay(UintR aHbDelay)738 void Qmgr::setHbDelay(UintR aHbDelay)
739 {
740 const NDB_TICKS now = NdbTick_getCurrentTicks();
741 hb_send_timer.setDelay(aHbDelay < 10 ? 10 : aHbDelay);
742 hb_send_timer.reset(now);
743 hb_check_timer.setDelay(aHbDelay < 10 ? 10 : aHbDelay);
744 hb_check_timer.reset(now);
745 }
746
setHbApiDelay(UintR aHbApiDelay)747 void Qmgr::setHbApiDelay(UintR aHbApiDelay)
748 {
749 const NDB_TICKS now = NdbTick_getCurrentTicks();
750 chbApiDelay = (aHbApiDelay < 100 ? 100 : aHbApiDelay);
751 hb_api_timer.setDelay(chbApiDelay);
752 hb_api_timer.reset(now);
753 }
754
setArbitTimeout(UintR aArbitTimeout)755 void Qmgr::setArbitTimeout(UintR aArbitTimeout)
756 {
757 arbitRec.timeout = (aArbitTimeout < 10 ? 10 : aArbitTimeout);
758 }
759
setCCDelay(UintR aCCDelay)760 void Qmgr::setCCDelay(UintR aCCDelay)
761 {
762 const NDB_TICKS now = NdbTick_getCurrentTicks();
763 if (aCCDelay == 0)
764 {
765 /* Connectivity check disabled */
766 m_connectivity_check.m_enabled = false;
767 m_connectivity_check.m_timer.setDelay(0);
768 }
769 else
770 {
771 m_connectivity_check.m_enabled = true;
772 m_connectivity_check.m_timer.setDelay(aCCDelay < 10 ? 10 : aCCDelay);
773 m_connectivity_check.m_timer.reset(now);
774 }
775 }
776
execCONNECT_REP(Signal * signal)777 void Qmgr::execCONNECT_REP(Signal* signal)
778 {
779 jamEntry();
780 const Uint32 connectedNodeId = signal->theData[0];
781
782 if (ERROR_INSERTED(931))
783 {
784 jam();
785 ndbout_c("Discarding CONNECT_REP(%d)", connectedNodeId);
786 infoEvent("Discarding CONNECT_REP(%d)", connectedNodeId);
787 return;
788 }
789
790 if (ERROR_INSERTED(941) &&
791 getNodeInfo(connectedNodeId).getType() == NodeInfo::API)
792 {
793 jam();
794 CLEAR_ERROR_INSERT_VALUE;
795 ndbout_c("Discarding one API CONNECT_REP(%d)", connectedNodeId);
796 infoEvent("Discarding one API CONNECT_REP(%d)", connectedNodeId);
797 return;
798 }
799
800 if (c_connectedNodes.get(connectedNodeId) == false)
801 {
802 jam();
803 setNodeInfo(connectedNodeId).m_version = 0;
804 setNodeInfo(connectedNodeId).m_mysql_version = 0;
805 }
806
807 c_connectedNodes.set(connectedNodeId);
808 DEB_STARTUP(("c_connectedNodes(%u) set", connectedNodeId));
809
810 {
811 NodeRecPtr connectedNodePtr;
812 connectedNodePtr.i = connectedNodeId;
813 ptrCheckGuard(connectedNodePtr, MAX_NODES, nodeRec);
814 connectedNodePtr.p->m_secret = 0;
815 }
816
817 NodeRecPtr myNodePtr;
818 myNodePtr.i = getOwnNodeId();
819 ptrCheckGuard(myNodePtr, MAX_NODES, nodeRec);
820 NodeInfo connectedNodeInfo = getNodeInfo(connectedNodeId);
821 switch(myNodePtr.p->phase){
822 case ZRUNNING:
823 jam();
824 if (connectedNodeInfo.getType() == NodeInfo::DB)
825 {
826 ndbrequire(!c_clusterNodes.get(connectedNodeId));
827 }
828 break;
829 case ZSTARTING:
830 jam();
831 break;
832 case ZPREPARE_FAIL:
833 case ZFAIL_CLOSING:
834 jam();
835 return;
836 case ZAPI_ACTIVATION_ONGOING:
837 ndbabort();
838 case ZAPI_ACTIVE:
839 ndbabort();
840 case ZAPI_INACTIVE:
841 ndbabort();
842 case ZINIT:
843 ndbrequire(getNodeInfo(connectedNodeId).m_type == NodeInfo::MGM);
844 break;
845 default:
846 ndbabort();
847 }
848
849 if (connectedNodeInfo.getType() != NodeInfo::DB)
850 {
851 jam();
852 return;
853 }
854
855 switch(c_start.m_gsn){
856 case GSN_CM_REGREQ:
857 jam();
858 sendCmRegReq(signal, connectedNodeId);
859
860 /**
861 * We're waiting for CM_REGCONF c_start.m_nodes contains all configured
862 * nodes
863 */
864 ndbrequire(myNodePtr.p->phase == ZSTARTING);
865 ndbrequire(c_start.m_nodes.isWaitingFor(connectedNodeId));
866 return;
867 case GSN_CM_NODEINFOREQ:
868 jam();
869
870 if (c_start.m_nodes.isWaitingFor(connectedNodeId))
871 {
872 jam();
873 ndbrequire(getOwnNodeId() != cpresident);
874 ndbrequire(myNodePtr.p->phase == ZSTARTING);
875 sendCmNodeInfoReq(signal, connectedNodeId, myNodePtr.p);
876 return;
877 }
878 return;
879 case GSN_CM_NODEINFOCONF:{
880 jam();
881
882 ndbrequire(getOwnNodeId() != cpresident);
883 ndbrequire(myNodePtr.p->phase == ZRUNNING);
884 if (c_start.m_nodes.isWaitingFor(connectedNodeId))
885 {
886 jam();
887 c_start.m_nodes.clearWaitingFor(connectedNodeId);
888 c_start.m_gsn = RNIL;
889
890 NodeRecPtr addNodePtr;
891 addNodePtr.i = connectedNodeId;
892 ptrCheckGuard(addNodePtr, MAX_NDB_NODES, nodeRec);
893 cmAddPrepare(signal, addNodePtr, myNodePtr.p);
894 return;
895 }
896 }
897 default:
898 (void)1;
899 }
900
901 ReadNodesReq *req = (ReadNodesReq *)&signal->theData[0];
902 ndbrequire(!c_start.m_nodes.isWaitingFor(connectedNodeId));
903 ndbrequire(!c_readnodes_nodes.get(connectedNodeId));
904 c_readnodes_nodes.set(connectedNodeId);
905 req->myRef = reference();
906 req->myVersion = NDB_VERSION_D;
907 sendSignal(calcQmgrBlockRef(connectedNodeId),
908 GSN_READ_NODESREQ,
909 signal,
910 ReadNodesReq::SignalLength,
911 JBA);
912 return;
913 }//Qmgr::execCONNECT_REP()
914
915 void
execREAD_NODESCONF(Signal * signal)916 Qmgr::execREAD_NODESCONF(Signal* signal)
917 {
918 jamEntry();
919 if (signal->getNoOfSections() > 0)
920 {
921 jam();
922 const ReadNodesConf * readNodes = (ReadNodesConf *)&signal->theData[0];
923 ndbrequire(signal->getNoOfSections() == 1);
924 SegmentedSectionPtr ptr;
925 SectionHandle handle(this, signal);
926 handle.getSection(ptr, 0);
927 ndbrequire(ptr.sz == 5 * NdbNodeBitmask::Size);
928 copy((Uint32*)&readNodes->definedNodes.rep.data, ptr);
929 releaseSections(handle);
930 }
931 else
932 {
933 jam();
934
935 /**
936 * Handle transformation from old signal format with 5 bitmask with
937 * 2 words in each bitmask to 5 bitmasks with 5 words in each bitmask.
938 */
939 const ReadNodesConf_v1 * readNodes_v1 =
940 (ReadNodesConf_v1 *)&signal->theData[0];
941 ReadNodesConf * readNodes = (ReadNodesConf *)&signal->theData[0];
942
943 NdbNodeBitmask48 defined48Nodes;
944 NdbNodeBitmask48 inactive48Nodes;
945 NdbNodeBitmask48 cluster48Nodes;
946 NdbNodeBitmask48 starting48Nodes;
947 NdbNodeBitmask48 started48Nodes;
948
949 defined48Nodes.assign(NdbNodeBitmask48::Size,
950 readNodes_v1->definedNodes);
951 inactive48Nodes.assign(NdbNodeBitmask48::Size,
952 readNodes_v1->inactiveNodes);
953 cluster48Nodes.assign(NdbNodeBitmask48::Size,
954 readNodes_v1->clusterNodes);
955 starting48Nodes.assign(NdbNodeBitmask48::Size,
956 readNodes_v1->startingNodes);
957 started48Nodes.assign(NdbNodeBitmask48::Size,
958 readNodes_v1->startedNodes);
959
960 NdbNodeBitmask clear_bitmask;
961 readNodes->definedNodes = clear_bitmask;
962 readNodes->inactiveNodes = clear_bitmask;
963 readNodes->clusterNodes = clear_bitmask;
964 readNodes->startingNodes = clear_bitmask;
965 readNodes->startedNodes = clear_bitmask;
966
967 readNodes->definedNodes = defined48Nodes;
968 readNodes->inactiveNodes = inactive48Nodes;
969 readNodes->clusterNodes = cluster48Nodes;
970 readNodes->startingNodes = starting48Nodes;
971 readNodes->startedNodes = started48Nodes;
972 }
973
974 check_readnodes_reply(signal,
975 refToNode(signal->getSendersBlockRef()),
976 GSN_READ_NODESCONF);
977 }
978
979 void
execREAD_NODESREF(Signal * signal)980 Qmgr::execREAD_NODESREF(Signal* signal)
981 {
982 jamEntry();
983 check_readnodes_reply(signal,
984 refToNode(signal->getSendersBlockRef()),
985 GSN_READ_NODESREF);
986 }
987
988 /**
989 * Heartbeat Inclusion Protocol Handling
990 * -------------------------------------
991 * The protocol to include our node in the heartbeat protocol starts when
992 * we call execCM_INFOCONF. We start by opening communication to all nodes
993 * in the cluster. When we start this protocol we don't know anything about
994 * which nodes are up and running and we don't which node is currently the
995 * president of the heartbeat protocol.
996 *
997 * For us to be successful with being included in the heartbeat protocol we
998 * need to be connected to all nodes currently in the heartbeat protocol. It
999 * is important to remember that QMGR sees a node as alive if it is included
1000 * in the heartbeat protocol. Higher level notions of aliveness is handled
1001 * primarily by the DBDIH block, but also to some extent by NDBCNTR.
1002 *
1003 * The protocol starts by the new node sending CM_REGREQ to all nodes it is
1004 * connected to. Only the president will respond to this message. We could
1005 * have a situation where there currently isn't a president choosen. In this
1006 * case an election is held whereby a new president is assigned. In the rest
1007 * of this comment we assume that a president already exists.
1008 *
1009 * So if we were connected to the president we will get a response to the
1010 * CM_REGREQ from the president with CM_REGCONF. The CM_REGCONF contains
1011 * the set of nodes currently included in the heartbeat protocol.
1012 *
1013 * The president will send in parallel to sending CM_REGCONF a CM_ADD(prepare)
1014 * message to all nodes included in the protocol.
1015 *
1016 * When receiving CM_REGCONF the new node will send CM_NODEINFOREQ with
1017 * information about version of the binary, number of LDM workers and
1018 * MySQL version of binary.
1019 *
1020 * The nodes already included in the heartbeat protocol will wait until it
1021 * receives both the CM_ADD(prepare) from the president and the
1022 * CM_NODEINFOREQ from the starting node. When it receives those two
1023 * messages it will send CM_ACKADD(prepare) to the president and
1024 * CM_NODEINFOCONF to the starting node with its own node information.
1025 *
1026 * When the president received CM_ACKADD(prepare) from all nodes included
1027 * in the heartbeat protocol then it sends CM_ADD(AddCommit) to all nodes
1028 * included in the heartbeat protocol.
1029 *
1030 * When the nodes receives CM_ADD(AddCommit) from the president then
1031 * they will enable communication to the new node and immediately start
1032 * sending heartbeats to the new node. They will also include the new
1033 * node in their view of the nodes included in the heartbeat protocol.
1034 * Next they will send CM_ACKADD(AddCommit) back to the president.
1035 *
1036 * When the president has received CM_ACKADD(AddCommit) from all nodes
1037 * included in the heartbeat protocol then it sends CM_ADD(CommitNew)
1038 * to the starting node.
1039 *
1040 * This is also the point where we report the node as included in the
1041 * heartbeat protocol to DBDIH as from here the rest of the protocol is
1042 * only about informing the new node about the outcome of inclusion
1043 * protocol. When we receive the response to this message the new node
1044 * can already have proceeded a bit into its restart.
1045 *
1046 * The starting node after receiving CM_REGCONF waits for all nodes
1047 * included in the heartbeat protocol to send CM_NODEINFOCONF and
1048 * also for receiving the CM_ADD(CommitNew) from the president. When
1049 * all this have been received the new nodes adds itself and all nodes
1050 * it have been informed about into its view of the nodes included in
1051 * the heartbeat protocol and enables communication to all other
1052 * nodes included therein. Finally it sends CM_ACKADD(CommitNew) to
1053 * the president.
1054 *
1055 * When the president has received CM_ACKADD(CommitNew) from the starting
1056 * node the inclusion protocol is completed and the president is ready
1057 * to receive a new node into the cluster.
1058 *
1059 * It is the responsibility of the starting nodes to retry after a failed
1060 * node inclusion, they will do so with 3 seconds delay. This means that
1061 * at most one node per 3 seconds will normally be added to the cluster.
1062 * So this phase of adding nodes to the cluster can add up to a little bit
1063 * more than a minute of delay in a large cluster starting up.
1064 *
1065 * We try to depict the above in a graph here as well:
1066 *
1067 * New node Nodes included in the heartbeat protocol President
1068 * ----------------------------------------------------------------------------
1069 * ----CM_REGREQ--------------------->>
1070 * ----CM_REGREQ---------------------------------------------------------->
1071 *
1072 * <----------------CM_REGCONF---------------------------------------------
1073 * <<------CM_ADD(Prepare)---------------
1074 *
1075 * -----CM_NODEINFOREQ--------------->>
1076 *
1077 * Nodes included in heartbeat protocol can receive CM_ADD(Prepare) and
1078 * CM_NODEINFOREQ in any order.
1079 *
1080 * <<---CM_NODEINFOCONF-------------- --------CM_ACKADD(Prepare)--------->>
1081 *
1082 * <<-------CM_ADD(AddCommit)------------
1083 *
1084 * Here nodes enables communication to new node and starts sending heartbeats
1085 *
1086 * ---------CM_ACKADD(AddCommit)------->>
1087 *
1088 * Here we report to DBDIH about new node included in heartbeat protocol
1089 * in master node.
1090 *
1091 * <----CM_ADD(CommitNew)--------------------------------------------------
1092 *
1093 * Here new node enables communication to new nodes and starts sending
1094 * heartbeat messages.
1095 *
1096 * -----CM_ACKADD(CommitNew)---------------------------------------------->
1097 *
1098 * Here the president can complete the inclusion protocol and is ready to
1099 * receive new nodes into the heartbeat protocol.
1100 */
1101 /*******************************/
1102 /* CM_INFOCONF */
1103 /*******************************/
execCM_INFOCONF(Signal * signal)1104 void Qmgr::execCM_INFOCONF(Signal* signal)
1105 {
1106 /**
1107 * Open communcation to all DB nodes
1108 */
1109 signal->theData[0] = 0; // no answer
1110 signal->theData[1] = 0; // no id
1111 signal->theData[2] = NodeInfo::DB;
1112 sendSignal(TRPMAN_REF, GSN_OPEN_COMORD, signal, 3, JBB);
1113
1114 cpresident = ZNIL;
1115 cpresidentAlive = ZFALSE;
1116 c_start_election_time = NdbTick_getCurrentTicks();
1117
1118 signal->theData[0] = ZSTART_FAILURE_LIMIT;
1119 sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 3000, 1);
1120
1121 cmInfoconf010Lab(signal);
1122
1123 return;
1124 }//Qmgr::execCM_INFOCONF()
1125
1126 Uint32 g_start_type = 0;
1127 NdbNodeBitmask g_nowait_nodes; // Set by clo
1128
cmInfoconf010Lab(Signal * signal)1129 void Qmgr::cmInfoconf010Lab(Signal* signal)
1130 {
1131 c_start.m_startKey = 0;
1132 c_start.m_startNode = getOwnNodeId();
1133 c_start.m_nodes.clearWaitingFor();
1134 c_start.m_gsn = GSN_CM_REGREQ;
1135 c_start.m_starting_nodes.clear();
1136 c_start.m_starting_nodes_w_log.clear();
1137 c_start.m_regReqReqSent = 0;
1138 c_start.m_regReqReqRecv = 0;
1139 c_start.m_skip_nodes = g_nowait_nodes;
1140 c_start.m_skip_nodes.bitAND(c_definedNodes);
1141 c_start.m_start_type = g_start_type;
1142
1143 NodeRecPtr nodePtr;
1144 cnoOfNodes = 0;
1145 for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
1146 jam();
1147 ptrAss(nodePtr, nodeRec);
1148
1149 if(getNodeInfo(nodePtr.i).getType() != NodeInfo::DB)
1150 continue;
1151
1152 c_start.m_nodes.setWaitingFor(nodePtr.i);
1153 cnoOfNodes++;
1154
1155 if(!c_connectedNodes.get(nodePtr.i))
1156 continue;
1157
1158 sendCmRegReq(signal, nodePtr.i);
1159 }
1160
1161 //----------------------------------------
1162 /* Wait for a while. When it returns */
1163 /* we will check if we got any CM_REGREF*/
1164 /* or CM_REGREQ (lower nodeid than our */
1165 /* own). */
1166 //----------------------------------------
1167 signal->theData[0] = ZREGREQ_TIMELIMIT;
1168 signal->theData[1] = c_start.m_startKey;
1169 signal->theData[2] = c_start.m_startNode;
1170 sendSignalWithDelay(QMGR_REF, GSN_CONTINUEB, signal, 3000, 3);
1171
1172 creadyDistCom = ZTRUE;
1173 return;
1174 }//Qmgr::cmInfoconf010Lab()
1175
1176 void
sendCmRegReq(Signal * signal,Uint32 nodeId)1177 Qmgr::sendCmRegReq(Signal * signal, Uint32 nodeId)
1178 {
1179 CmRegReq * req = (CmRegReq *)&signal->theData[0];
1180 req->blockRef = reference();
1181 req->nodeId = getOwnNodeId();
1182 req->version = NDB_VERSION;
1183 req->mysql_version = NDB_MYSQL_VERSION_D;
1184 req->latest_gci = c_start.m_latest_gci;
1185 req->start_type = c_start.m_start_type;
1186 const Uint32 ref = calcQmgrBlockRef(nodeId);
1187 /**
1188 * Clear the additional bits, see comment above CmRegReq::SignalLength
1189 * in CmRegSignalData for details.
1190 */
1191 memset(req->unused_words, 0, sizeof(req->unused_words));
1192 sendSignal(ref, GSN_CM_REGREQ, signal, CmRegReq::SignalLength, JBB);
1193 DEB_STARTUP(("CM_REGREQ sent to node %u", nodeId));
1194 DEBUG_START(GSN_CM_REGREQ, nodeId, "");
1195
1196 c_start.m_regReqReqSent++;
1197 }
1198
1199 /*
1200 4.4.11 CM_REGREQ */
1201 /**--------------------------------------------------------------------------
1202 * If this signal is received someone tries to get registrated.
1203 * Only the president have the authority make decisions about new nodes,
1204 * so only a president or a node that claims to be the president may send a
1205 * reply to this signal.
1206 * This signal can occur any time after that STTOR was received.
1207 * CPRESIDENT: Timelimit has expired and someone has
1208 * decided to enter the president role
1209 * CPRESIDENT_CANDIDATE:
1210 * Assigned when we receive a CM_REGREF, if we got more than one REF
1211 * then we always keep the lowest nodenumber.
1212 * We accept this nodeno as president when our timelimit expires
1213 * We should consider the following cases:
1214 * 1- We are the president. If we are busy by adding new nodes to cluster,
1215 * then we have to refuse this node to be added.
1216 * The refused node will try in ZREFUSE_ADD_TIME seconds again.
1217 * If we are not busy then we confirm
1218 *
1219 * 2- We know the president, we dont bother us about this REQ.
1220 * The president has also got this REQ and will take care of it.
1221 *
1222 * 3- The president isn't known. An election is currently ongoing.
1223 * This election will not be decided until all nodes in the cluster
1224 * except those specifically in skip list has been started.
1225 * The skip list comes from the startup parameter --nowait-nodes.
1226 * So if no one knows the President it means that we are performing
1227 * a cluster startup, either initial or a normal System restart of
1228 * the cluster.
1229 *
1230 * In this case we wait until all nodes except those in the skip list
1231 * have sent CM_REGREQ to us. If this is the case the node with the
1232 * lowest node id AND that can start from the highest GCI promotes itself
1233 * to President. Since all nodes follow the same algorithm we are certain
1234 * that this will bring us to a point where all nodes has the same node
1235 * as President.
1236 * In addition this election ensures that the President in QMGR is also
1237 * selected as Master in NDBCNTR. It should not be possible that
1238 * CNTR_START_REQ gets a response where the Master says that it isn't
1239 * the master.
1240 *
1241 * To ensure that the President is equal to the Master we send the
1242 * start GCI a node can handle in CM_REGREQ. This enables us to elect
1243 * a President that can also act as Master for NDBCNTR.
1244 *--------------------------------------------------------------------------*/
1245 /*******************************/
1246 /* CM_REGREQ */
1247 /*******************************/
1248 static
1249 int
check_start_type(Uint32 starting,Uint32 own)1250 check_start_type(Uint32 starting, Uint32 own)
1251 {
1252 if (starting == (1 << NodeState::ST_INITIAL_START) &&
1253 ((own & (1 << NodeState::ST_INITIAL_START)) == 0))
1254 {
1255 return 1;
1256 }
1257 return 0;
1258 }
1259
execCM_REGREQ(Signal * signal)1260 void Qmgr::execCM_REGREQ(Signal* signal)
1261 {
1262 DEBUG_START3(signal, "");
1263
1264 NodeRecPtr addNodePtr;
1265 jamEntry();
1266
1267 CmRegReq * const cmRegReq = (CmRegReq *)&signal->theData[0];
1268 const BlockReference Tblockref = cmRegReq->blockRef;
1269 const Uint32 startingVersion = cmRegReq->version;
1270 Uint32 startingMysqlVersion = cmRegReq->mysql_version;
1271 addNodePtr.i = cmRegReq->nodeId;
1272 Uint32 gci = 1;
1273 Uint32 start_type = ~0;
1274
1275 if (!c_connectedNodes.get(cmRegReq->nodeId))
1276 {
1277 jam();
1278
1279 /**
1280 * With ndbmtd, there is a race condition such that
1281 * CM_REGREQ can arrive prior to CONNECT_REP
1282 * since CONNECT_REP is sent from CMVMI
1283 *
1284 * In such cases, ignore the CM_REGREQ which is safe
1285 * as it will anyway be resent by starting node
1286 */
1287 g_eventLogger->info("discarding CM_REGREQ from %u "
1288 "as we're not yet connected (isNdbMt: %u)",
1289 cmRegReq->nodeId,
1290 (unsigned)isNdbMt());
1291
1292 return;
1293 }
1294
1295 if (signal->getLength() == CmRegReq::SignalLength)
1296 {
1297 jam();
1298 gci = cmRegReq->latest_gci;
1299 start_type = cmRegReq->start_type;
1300 }
1301
1302 if (creadyDistCom == ZFALSE) {
1303 jam();
1304 DEB_STARTUP(("Not ready for distributed communication yet"));
1305 /* NOT READY FOR DISTRIBUTED COMMUNICATION.*/
1306 return;
1307 }//if
1308
1309 if (!ndbCompatible_ndb_ndb(NDB_VERSION, startingVersion)) {
1310 jam();
1311 DEB_STARTUP(("Incompatible versions"));
1312 sendCmRegrefLab(signal, Tblockref, CmRegRef::ZINCOMPATIBLE_VERSION,
1313 startingVersion);
1314 return;
1315 }
1316
1317 if (!ndbd_upgrade_ok(startingVersion))
1318 {
1319 jam();
1320 infoEvent("Connection from node %u refused as it's not ok to upgrade from",
1321 addNodePtr.i);
1322 sendCmRegrefLab(signal, Tblockref, CmRegRef::ZINCOMPATIBLE_VERSION,
1323 startingVersion);
1324 return;
1325 }
1326
1327 if (check_start_type(start_type, c_start.m_start_type))
1328 {
1329 jam();
1330 DEB_STARTUP(("Incompatible start types"));
1331 sendCmRegrefLab(signal, Tblockref, CmRegRef::ZINCOMPATIBLE_START_TYPE,
1332 startingVersion);
1333 return;
1334 }
1335
1336 if (cpresident != getOwnNodeId())
1337 {
1338 jam();
1339
1340 if (cpresident == ZNIL)
1341 {
1342 /***
1343 * We don't know the president.
1344 * If the node to be added has lower node id
1345 * than it will be our president candidate. Set it as
1346 * candidate.
1347 */
1348 jam();
1349 if (gci != ZUNDEFINED_GCI_LIMIT &&
1350 (gci > c_start.m_president_candidate_gci ||
1351 (gci == c_start.m_president_candidate_gci &&
1352 addNodePtr.i < c_start.m_president_candidate)))
1353 {
1354 jam();
1355 c_start.m_president_candidate = addNodePtr.i;
1356 c_start.m_president_candidate_gci = gci;
1357 DEB_STARTUP(("President candidate: %u, gci: %u",
1358 addNodePtr.i, gci));
1359 }
1360 DEB_STARTUP(("Election error to %x", Tblockref));
1361 sendCmRegrefLab(signal, Tblockref, CmRegRef::ZELECTION,
1362 startingVersion);
1363 return;
1364 }
1365
1366 /**
1367 * We are not the president.
1368 * We know the president.
1369 * President will answer.
1370 */
1371 DEB_STARTUP(("Not president error"));
1372 sendCmRegrefLab(signal, Tblockref, CmRegRef::ZNOT_PRESIDENT,
1373 startingVersion);
1374 return;
1375 }//if
1376
1377 if (c_start.m_startNode != 0)
1378 {
1379 jam();
1380 /**
1381 * President busy by adding another node
1382 */
1383 DEB_STARTUP(("Busy president error"));
1384 sendCmRegrefLab(signal, Tblockref, CmRegRef::ZBUSY_PRESIDENT,
1385 startingVersion);
1386 return;
1387 }
1388
1389 if (ctoStatus == Q_ACTIVE)
1390 {
1391 jam();
1392 /**
1393 * Active taking over as president
1394 */
1395 DEB_STARTUP(("President take over error"));
1396 sendCmRegrefLab(signal, Tblockref, CmRegRef::ZBUSY_TO_PRES,
1397 startingVersion);
1398 return;
1399 }//if
1400
1401 if (getNodeInfo(addNodePtr.i).m_type != NodeInfo::DB)
1402 {
1403 jam();
1404 /**
1405 * The new node is not in config file
1406 */
1407 DEB_STARTUP(("Not in cfg error"));
1408 sendCmRegrefLab(signal, Tblockref, CmRegRef::ZNOT_IN_CFG,
1409 startingVersion);
1410 return;
1411 }
1412
1413 if (getNodeState().getSingleUserMode())
1414 {
1415 /**
1416 * The cluster is in single user mode.
1417 * Data node is not allowed to get added in the cluster
1418 * while in single user mode.
1419 */
1420 // handle rolling upgrade
1421 jam();
1422 DEB_STARTUP(("Single user mode error"));
1423 sendCmRegrefLab(signal, Tblockref, CmRegRef::ZSINGLE_USER_MODE,
1424 startingVersion);
1425 return;
1426 }//if
1427
1428 ptrCheckGuard(addNodePtr, MAX_NDB_NODES, nodeRec);
1429 Phase phase = addNodePtr.p->phase;
1430 if (phase != ZINIT)
1431 {
1432 jam();
1433 QMGR_DEBUG("phase = " << phase);
1434 DEB_STARTUP(("Not dead error"));
1435 sendCmRegrefLab(signal, Tblockref, CmRegRef::ZNOT_DEAD,
1436 startingVersion);
1437 return;
1438 }
1439
1440 jam();
1441 /**
1442 * WE ARE PRESIDENT AND WE ARE NOT BUSY ADDING ANOTHER NODE.
1443 * WE WILL TAKE CARE OF THE INCLUSION OF THIS NODE INTO THE CLUSTER.
1444 * WE NEED TO START TIME SUPERVISION OF THIS. SINCE WE CANNOT STOP
1445 * TIMED SIGNAL IF THE INCLUSION IS INTERRUPTED WE IDENTIFY
1446 * EACH INCLUSION WITH A UNIQUE IDENTITY. THIS IS CHECKED WHEN
1447 * THE SIGNAL ARRIVES. IF IT HAS CHANGED THEN WE SIMPLY IGNORE
1448 * THE TIMED SIGNAL.
1449 */
1450
1451 /**
1452 * Update start record
1453 */
1454 c_start.m_startKey++;
1455 c_start.m_startNode = addNodePtr.i;
1456 DEB_STARTUP(("Node %u is starting node", addNodePtr.i));
1457
1458 /**
1459 * Assign dynamic id
1460 */
1461 UintR TdynId = (++c_maxDynamicId) & 0xFFFF;
1462 TdynId |= (addNodePtr.p->hbOrder << 16);
1463 setNodeInfo(addNodePtr.i).m_version = startingVersion;
1464 setNodeInfo(addNodePtr.i).m_mysql_version = startingMysqlVersion;
1465 recompute_version_info(NodeInfo::DB, startingVersion);
1466 addNodePtr.p->ndynamicId = TdynId;
1467
1468 /**
1469 * Reply with CM_REGCONF
1470 */
1471 CmRegConf * const cmRegConf = (CmRegConf *)&signal->theData[0];
1472 cmRegConf->presidentBlockRef = reference();
1473 cmRegConf->presidentNodeId = getOwnNodeId();
1474 cmRegConf->presidentVersion = getNodeInfo(getOwnNodeId()).m_version;
1475 cmRegConf->presidentMysqlVersion = getNodeInfo(getOwnNodeId()).m_mysql_version;
1476 cmRegConf->dynamicId = TdynId;
1477 const Uint32 packed_nodebitmask_length = c_clusterNodes.getPackedLengthInWords();
1478 #ifdef DEBUG_STARTUP
1479 {
1480 char node_mask[NdbNodeBitmask::TextLength + 1];
1481 c_clusterNodes.getText(node_mask);
1482 DEB_STARTUP(("Sending CM_REGCONF from president, c_clusterNodes: %s",
1483 node_mask));
1484 }
1485 #endif
1486 if (ndbd_send_node_bitmask_in_section(startingVersion))
1487 {
1488 jam();
1489 // Send node bitmask in linear section.
1490 LinearSectionPtr lsptr[3];
1491
1492 // 8192 is the size of signal->theData array.
1493 STATIC_ASSERT(CmRegConf::SignalLength_v1 + NdbNodeBitmask::Size <=
1494 NDB_ARRAY_SIZE(signal->theData));
1495 c_clusterNodes.copyto(packed_nodebitmask_length,
1496 &signal->theData[CmRegConf::SignalLength_v1]);
1497 lsptr[0].p = &signal->theData[CmRegConf::SignalLength_v1];
1498 lsptr[0].sz = packed_nodebitmask_length;
1499
1500 DEB_STARTUP(("Sending CM_REGCONF to %x", Tblockref));
1501 sendSignal(Tblockref,
1502 GSN_CM_REGCONF,
1503 signal,
1504 CmRegConf::SignalLength,
1505 JBA,
1506 lsptr,
1507 1);
1508 }
1509 else if (packed_nodebitmask_length <= NdbNodeBitmask48::Size)
1510 {
1511 jam();
1512 c_clusterNodes.copyto(NdbNodeBitmask48::Size, cmRegConf->allNdbNodes_v1);
1513 DEB_STARTUP(("2:Sending CM_REGCONF to %x", Tblockref));
1514 sendSignal(Tblockref, GSN_CM_REGCONF, signal,
1515 CmRegConf::SignalLength_v1, JBA);
1516 }
1517 else
1518 {
1519 infoEvent("Connection from node %u refused as it does not support node "
1520 "bitmask in signal section.",
1521 addNodePtr.i);
1522 DEB_STARTUP(("Incompatible start types"));
1523 sendCmRegrefLab(signal, Tblockref, CmRegRef::ZINCOMPATIBLE_START_TYPE,
1524 startingVersion);
1525 }
1526 DEBUG_START(GSN_CM_REGCONF, refToNode(Tblockref), "");
1527
1528 /**
1529 * Send CmAdd to all nodes (including starting)
1530 */
1531 c_start.m_nodes = c_clusterNodes;
1532 c_start.m_nodes.setWaitingFor(addNodePtr.i);
1533 c_start.m_gsn = GSN_CM_ADD;
1534
1535 NodeReceiverGroup rg(QMGR, c_start.m_nodes);
1536 CmAdd * const cmAdd = (CmAdd*)signal->getDataPtrSend();
1537 cmAdd->requestType = CmAdd::Prepare;
1538 cmAdd->startingNodeId = addNodePtr.i;
1539 cmAdd->startingVersion = startingVersion;
1540 cmAdd->startingMysqlVersion = startingMysqlVersion;
1541 sendSignal(rg, GSN_CM_ADD, signal, CmAdd::SignalLength, JBA);
1542 DEBUG_START2(GSN_CM_ADD, rg, "Prepare");
1543
1544 /**
1545 * Set timer
1546 */
1547 return;
1548 signal->theData[0] = ZREGREQ_MASTER_TIMELIMIT;
1549 signal->theData[1] = c_start.m_startKey;
1550 sendSignalWithDelay(QMGR_REF, GSN_CONTINUEB, signal, 30000, 2);
1551
1552 return;
1553 }//Qmgr::execCM_REGREQ()
1554
sendCmRegrefLab(Signal * signal,BlockReference TBRef,CmRegRef::ErrorCode Terror,Uint32 remote_node_version)1555 void Qmgr::sendCmRegrefLab(Signal* signal, BlockReference TBRef,
1556 CmRegRef::ErrorCode Terror, Uint32 remote_node_version)
1557 {
1558 const Uint32 remoteNodeVersion = remote_node_version;
1559
1560 CmRegRef* ref = (CmRegRef*)signal->getDataPtrSend();
1561 ref->blockRef = reference();
1562 ref->nodeId = getOwnNodeId();
1563 ref->errorCode = Terror;
1564 ref->presidentCandidate =
1565 (cpresident == ZNIL ? c_start.m_president_candidate : cpresident);
1566 ref->candidate_latest_gci = c_start.m_president_candidate_gci;
1567 ref->latest_gci = c_start.m_latest_gci;
1568 ref->start_type = c_start.m_start_type;
1569 Uint32 packed_nodebitmask_length =
1570 c_start.m_skip_nodes.getPackedLengthInWords();
1571
1572 if (ndbd_send_node_bitmask_in_section(remoteNodeVersion))
1573 {
1574 jam();
1575 // Send node bitmask in linear section.
1576 LinearSectionPtr lsptr[3];
1577 c_start.m_skip_nodes.copyto(packed_nodebitmask_length,
1578 &signal->theData[CmRegRef::SignalLength_v1]);
1579 lsptr[0].p = &signal->theData[CmRegRef::SignalLength_v1];
1580 lsptr[0].sz = packed_nodebitmask_length;
1581
1582 sendSignal(TBRef,
1583 GSN_CM_REGREF,
1584 signal,
1585 CmRegRef::SignalLength,
1586 JBB,
1587 lsptr,
1588 1);
1589 }
1590 else if (packed_nodebitmask_length <= NdbNodeBitmask48::Size)
1591 {
1592 jam();
1593 c_start.m_skip_nodes.copyto(NdbNodeBitmask48::Size, ref->skip_nodes_v1);
1594 sendSignal(TBRef, GSN_CM_REGREF, signal,
1595 CmRegRef::SignalLength_v1, JBB);
1596 }
1597 else
1598 {
1599 /**
1600 * Node bitmask cannot be sent to other node since it is longer
1601 * than two words. We crash if the error is not ZINCOMPATIBLE_VERSION
1602 * or ZINCOMPATIBLE_START_TYPE since other errors may change the state
1603 * of qmgr. Also, other errors require us to have the correct bitmask
1604 * for proper functioning.
1605 */
1606 ndbrequire((Terror == CmRegRef::ZINCOMPATIBLE_VERSION) ||
1607 (Terror == CmRegRef::ZINCOMPATIBLE_START_TYPE));
1608 memset(ref->skip_nodes_v1, 0, sizeof(ref->skip_nodes_v1));
1609 sendSignal(TBRef, GSN_CM_REGREF, signal,
1610 CmRegRef::SignalLength_v1, JBB);
1611 }
1612 DEBUG_START(GSN_CM_REGREF, refToNode(TBRef), "");
1613 return;
1614 }//Qmgr::sendCmRegrefLab()
1615
1616 /*
1617 4.4.11 CM_REGCONF */
1618 /**--------------------------------------------------------------------------
1619 * President gives permission to a node which wants to join the cluster.
1620 * The president will prepare the cluster that a new node will be added to
1621 * cluster. When the new node has set up all connections to the cluster,
1622 * the president will send commit to all clusternodes so the phase of the
1623 * new node can be changed to ZRUNNING.
1624 *--------------------------------------------------------------------------*/
1625 /*******************************/
1626 /* CM_REGCONF */
1627 /*******************************/
execCM_REGCONF(Signal * signal)1628 void Qmgr::execCM_REGCONF(Signal* signal)
1629 {
1630 DEBUG_START3(signal, "");
1631
1632 NodeRecPtr myNodePtr;
1633 NodeRecPtr nodePtr;
1634 jamEntry();
1635
1636 CmRegConf * const cmRegConf = (CmRegConf *)&signal->theData[0];
1637
1638 DEB_STARTUP(("Received CM_REGCONF"));
1639 NdbNodeBitmask allNdbNodes;
1640 if (signal->getNoOfSections() >= 1)
1641 {
1642 // copy node bitmask to cmRegConf->allNdbNodes from the signal section
1643 jam();
1644 ndbrequire(ndbd_send_node_bitmask_in_section(cmRegConf->presidentVersion));
1645 SectionHandle handle(this, signal);
1646 SegmentedSectionPtr ptr;
1647 handle.getSection(ptr, 0);
1648 ndbrequire(ptr.sz <= NdbNodeBitmask::Size);
1649 copy(allNdbNodes.rep.data, ptr);
1650 releaseSections(handle);
1651 }
1652 else
1653 {
1654 allNdbNodes.assign(NdbNodeBitmask48::Size, cmRegConf->allNdbNodes_v1);
1655 }
1656
1657 if (!ndbCompatible_ndb_ndb(NDB_VERSION, cmRegConf->presidentVersion)) {
1658 jam();
1659 char buf[128];
1660 BaseString::snprintf(buf,sizeof(buf),
1661 "incompatible version own=0x%x other=0x%x, "
1662 " shutting down",
1663 NDB_VERSION, cmRegConf->presidentVersion);
1664 progError(__LINE__, NDBD_EXIT_UNSUPPORTED_VERSION, buf);
1665 return;
1666 }
1667
1668 if (!ndbd_upgrade_ok(cmRegConf->presidentVersion)) {
1669 jam();
1670 char buf[128];
1671 BaseString::snprintf(buf,sizeof(buf),
1672 "Not okay to upgrade from 0x%x, "
1673 "shutting down",
1674 cmRegConf->presidentVersion);
1675 progError(__LINE__, NDBD_EXIT_UNSUPPORTED_VERSION, buf);
1676 return;
1677 }
1678
1679 myNodePtr.i = getOwnNodeId();
1680 ptrCheckGuard(myNodePtr, MAX_NDB_NODES, nodeRec);
1681
1682 ndbrequire(c_start.m_gsn == GSN_CM_REGREQ);
1683 ndbrequire(myNodePtr.p->phase == ZSTARTING);
1684
1685 cpdistref = cmRegConf->presidentBlockRef;
1686 cpresident = cmRegConf->presidentNodeId;
1687 UintR TdynamicId = cmRegConf->dynamicId;
1688 c_maxDynamicId = TdynamicId & 0xFFFF;
1689 c_clusterNodes.assign(allNdbNodes);
1690
1691 myNodePtr.p->ndynamicId = TdynamicId;
1692
1693 // set own MT config here or in REF, and others in CM_NODEINFOREQ/CONF
1694 setNodeInfo(getOwnNodeId()).m_lqh_workers = globalData.ndbMtLqhWorkers;
1695
1696 #ifdef DEBUG_STARTUP
1697 {
1698 char node_mask[NdbNodeBitmask::TextLength + 1];
1699 c_clusterNodes.getText(node_mask);
1700 DEB_STARTUP(("CM_REGCONF from president: %u, c_clusterNodes: %s",
1701 cpresident, node_mask));
1702 }
1703 #endif
1704 /*--------------------------------------------------------------*/
1705 // Send this as an EVENT REPORT to inform about hearing about
1706 // other NDB node proclaiming to be president.
1707 /*--------------------------------------------------------------*/
1708 signal->theData[0] = NDB_LE_CM_REGCONF;
1709 signal->theData[1] = getOwnNodeId();
1710 signal->theData[2] = cpresident;
1711 signal->theData[3] = TdynamicId;
1712 sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 4, JBB);
1713
1714 for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
1715 if (c_clusterNodes.get(nodePtr.i)){
1716 jamLine(nodePtr.i);
1717 ptrAss(nodePtr, nodeRec);
1718
1719 DEB_MULTI_TRP(("Node %u in ZRUNNING", nodePtr.i));
1720 ndbrequire(nodePtr.p->phase == ZINIT);
1721 nodePtr.p->phase = ZRUNNING;
1722 DEB_STARTUP(("phase(%u) = ZRUNNING", nodePtr.i));
1723
1724 if(c_connectedNodes.get(nodePtr.i)){
1725 jam();
1726 sendCmNodeInfoReq(signal, nodePtr.i, myNodePtr.p);
1727 }
1728 }
1729 }
1730
1731 c_start.m_gsn = GSN_CM_NODEINFOREQ;
1732 c_start.m_nodes = c_clusterNodes;
1733
1734 if (ERROR_INSERTED(937))
1735 {
1736 CLEAR_ERROR_INSERT_VALUE;
1737 signal->theData[0] = 9999;
1738 sendSignalWithDelay(CMVMI_REF, GSN_NDB_TAMPER, signal, 500, 1);
1739 }
1740
1741 return;
1742 }//Qmgr::execCM_REGCONF()
1743
1744 void
check_readnodes_reply(Signal * signal,Uint32 nodeId,Uint32 gsn)1745 Qmgr::check_readnodes_reply(Signal* signal, Uint32 nodeId, Uint32 gsn)
1746 {
1747 NodeRecPtr myNodePtr;
1748 myNodePtr.i = getOwnNodeId();
1749 ptrCheckGuard(myNodePtr, MAX_NDB_NODES, nodeRec);
1750
1751 NodeRecPtr nodePtr;
1752 nodePtr.i = nodeId;
1753 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
1754
1755 ndbrequire(c_readnodes_nodes.get(nodeId));
1756 ReadNodesConf* conf = (ReadNodesConf*)signal->getDataPtr();
1757 ReadNodesReq* req = (ReadNodesReq*)signal->getDataPtrSend();
1758 if (gsn == GSN_READ_NODESREF)
1759 {
1760 jam();
1761 retry:
1762 req->myRef = reference();
1763 req->myVersion = NDB_VERSION_D;
1764 sendSignal(calcQmgrBlockRef(nodeId),
1765 GSN_READ_NODESREQ,
1766 signal,
1767 ReadNodesReq::SignalLength,
1768 JBA);
1769 return;
1770 }
1771
1772 if (conf->masterNodeId == ZNIL)
1773 {
1774 jam();
1775 goto retry;
1776 }
1777
1778 Uint32 president = conf->masterNodeId;
1779 if (president == cpresident)
1780 {
1781 jam();
1782 c_readnodes_nodes.clear(nodeId);
1783 return;
1784 }
1785
1786 char buf[255];
1787 BaseString::snprintf(buf, sizeof(buf),
1788 "check StartPartialTimeout, "
1789 "node %d thinks %d is president, "
1790 "I think president is: %d",
1791 nodeId, president, cpresident);
1792
1793 ndbout_c("%s", buf);
1794 CRASH_INSERTION(933);
1795
1796 if (getNodeState().startLevel == NodeState::SL_STARTED)
1797 {
1798 jam();
1799 NdbNodeBitmask part = conf->clusterNodes;
1800 FailRep* rep = (FailRep*)signal->getDataPtrSend();
1801 rep->failCause = FailRep::ZPARTITIONED_CLUSTER;
1802 rep->partitioned.president = cpresident;
1803 memset(rep->partitioned.partition_v1, 0,
1804 sizeof(rep->partitioned.partition_v1));
1805 rep->partitioned.partitionFailSourceNodeId = getOwnNodeId();
1806 Uint32 ref = calcQmgrBlockRef(nodeId);
1807 Uint32 i = 0;
1808 /* Send source of event info if a node supports it */
1809 Uint32 length = FailRep::OrigSignalLength +
1810 FailRep::PartitionedExtraLength_v1;
1811 Uint32 packed_bitmask_length = c_clusterNodes.getPackedLengthInWords();
1812
1813 while((i = part.find(i + 1)) != NdbNodeBitmask::NotFound)
1814 {
1815 if (i == nodeId)
1816 continue;
1817 rep->failNodeId = i;
1818 if (ndbd_send_node_bitmask_in_section(getNodeInfo(refToNode(ref)).m_version))
1819 {
1820 jam();
1821 // Send node bitmask in signal section.
1822 LinearSectionPtr lsptr[3];
1823 Uint32* temp_buffer = &signal->
1824 theData[FailRep::SignalLength +
1825 FailRep::PartitionedExtraLength_v1];
1826 c_clusterNodes.copyto(packed_bitmask_length, temp_buffer);
1827 lsptr[0].p = temp_buffer;
1828 lsptr[0].sz = c_clusterNodes.getPackedLengthInWords();
1829 sendSignal(ref,
1830 GSN_FAIL_REP,
1831 signal,
1832 length + FailRep::SourceExtraLength,
1833 JBA,
1834 lsptr,
1835 1);
1836 }
1837 else if (packed_bitmask_length <= 2)
1838 {
1839 jam();
1840 c_clusterNodes.copyto(NdbNodeBitmask48::Size, rep->partitioned.partition_v1);
1841 sendSignal(ref, GSN_FAIL_REP, signal,
1842 length + FailRep::SourceExtraLength,
1843 JBA);
1844 }
1845 else
1846 {
1847 ndbabort();
1848 }
1849 }
1850 rep->failNodeId = nodeId;
1851
1852 if (ndbd_send_node_bitmask_in_section(
1853 getNodeInfo(refToNode(ref)).m_version))
1854 {
1855 jam();
1856 // Send node bitmask in signal section.
1857 LinearSectionPtr lsptr[3];
1858 Uint32* temp_buffer = &signal->
1859 theData[FailRep::SignalLength +
1860 FailRep::PartitionedExtraLength_v1];
1861 c_clusterNodes.copyto(packed_bitmask_length, temp_buffer);
1862 lsptr[0].p = temp_buffer;
1863 lsptr[0].sz = c_clusterNodes.getPackedLengthInWords();
1864 // clear the unused bits
1865 memset(rep->partitioned.partition_v1, 0,
1866 sizeof(rep->partitioned.partition_v1));
1867 sendSignal(ref,
1868 GSN_FAIL_REP,
1869 signal,
1870 length + FailRep::SourceExtraLength,
1871 JBA,
1872 lsptr,
1873 1);
1874 }
1875 else if (packed_bitmask_length <= 2)
1876 {
1877 jam();
1878 sendSignal(ref, GSN_FAIL_REP, signal,
1879 length + FailRep::SourceExtraLength,
1880 JBB);
1881 }
1882 else
1883 {
1884 ndbabort();
1885 }
1886 return;
1887 }
1888
1889 CRASH_INSERTION(932);
1890 CRASH_INSERTION(938);
1891
1892 progError(__LINE__,
1893 NDBD_EXIT_PARTITIONED_SHUTDOWN,
1894 buf);
1895
1896 ndbabort();
1897 }
1898
1899 void
sendCmNodeInfoReq(Signal * signal,Uint32 nodeId,const NodeRec * self)1900 Qmgr::sendCmNodeInfoReq(Signal* signal, Uint32 nodeId, const NodeRec * self){
1901 CmNodeInfoReq * const req = (CmNodeInfoReq*)signal->getDataPtrSend();
1902 req->nodeId = getOwnNodeId();
1903 req->dynamicId = self->ndynamicId;
1904 req->version = getNodeInfo(getOwnNodeId()).m_version;
1905 req->mysql_version = getNodeInfo(getOwnNodeId()).m_mysql_version;
1906 req->lqh_workers = getNodeInfo(getOwnNodeId()).m_lqh_workers;
1907 const Uint32 ref = calcQmgrBlockRef(nodeId);
1908 sendSignal(ref,GSN_CM_NODEINFOREQ, signal, CmNodeInfoReq::SignalLength, JBB);
1909 DEBUG_START(GSN_CM_NODEINFOREQ, nodeId, "");
1910 }
1911
1912 /*
1913 4.4.11 CM_REGREF */
1914 /**--------------------------------------------------------------------------
1915 * Only a president or a president candidate can refuse a node to get added to
1916 * the cluster.
1917 * Refuse reasons:
1918 * ZBUSY We know that the sender is the president and we have to
1919 * make a new CM_REGREQ.
1920 * ZNOT_IN_CFG This node number is not specified in the configfile,
1921 * SYSTEM ERROR
1922 * ZELECTION Sender is a president candidate, his timelimit
1923 * hasn't expired so maybe someone else will show up.
1924 * Update the CPRESIDENT_CANDIDATE, then wait for our
1925 * timelimit to expire.
1926 *---------------------------------------------------------------------------*/
1927 /*******************************/
1928 /* CM_REGREF */
1929 /*******************************/
1930 static
1931 const char *
get_start_type_string(Uint32 st)1932 get_start_type_string(Uint32 st)
1933 {
1934 static char buf[256];
1935
1936 if (st == 0)
1937 {
1938 return "<ANY>";
1939 }
1940 else
1941 {
1942 buf[0] = 0;
1943 for(Uint32 i = 0; i<NodeState::ST_ILLEGAL_TYPE; i++)
1944 {
1945 if (st & (1 << i))
1946 {
1947 if (buf[0])
1948 strcat(buf, "/");
1949 switch(i){
1950 case NodeState::ST_INITIAL_START:
1951 strcat(buf, "inital start");
1952 break;
1953 case NodeState::ST_SYSTEM_RESTART:
1954 strcat(buf, "system restart");
1955 break;
1956 case NodeState::ST_NODE_RESTART:
1957 strcat(buf, "node restart");
1958 break;
1959 case NodeState::ST_INITIAL_NODE_RESTART:
1960 strcat(buf, "initial node restart");
1961 break;
1962 }
1963 }
1964 }
1965 return buf;
1966 }
1967 }
1968
execCM_REGREF(Signal * signal)1969 void Qmgr::execCM_REGREF(Signal* signal)
1970 {
1971 jamEntry();
1972
1973 CmRegRef* ref = (CmRegRef*)signal->getDataPtr();
1974 UintR TaddNodeno = ref->nodeId;
1975 UintR TrefuseReason = ref->errorCode;
1976 Uint32 candidate = ref->presidentCandidate;
1977 Uint32 node_gci = 1;
1978 Uint32 candidate_gci = 1;
1979 Uint32 start_type = ~0;
1980 NdbNodeBitmask skip_nodes;
1981 DEBUG_START3(signal, TrefuseReason);
1982
1983 ndbrequire(signal->getLength() >= CmRegRef::SignalLength);
1984 node_gci = ref->latest_gci;
1985 candidate_gci = ref->candidate_latest_gci;
1986 start_type = ref->start_type;
1987
1988 // check if node bitmask is in signal section
1989 if (signal->getNoOfSections() >= 1)
1990 {
1991 jam();
1992 ndbrequire(signal->getLength() >= CmRegRef::SignalLength);
1993 SectionHandle handle(this, signal);
1994 SegmentedSectionPtr ptr;
1995 handle.getSection(ptr, 0);
1996
1997 ndbrequire(ptr.sz <= NdbNodeBitmask::Size);
1998 copy(skip_nodes.rep.data, ptr);
1999 releaseSections(handle);
2000 }
2001 else
2002 {
2003 skip_nodes.assign(NdbNodeBitmask48::Size, ref->skip_nodes_v1);
2004 }
2005
2006 c_start.m_regReqReqRecv++;
2007
2008 // Ignore block reference in data[0]
2009
2010 if(candidate != c_start.m_president_candidate)
2011 {
2012 jam();
2013 c_start.m_regReqReqRecv = ~0;
2014 }
2015
2016 c_start.m_starting_nodes.set(TaddNodeno);
2017 if (node_gci > ZUNDEFINED_GCI_LIMIT)
2018 {
2019 jam();
2020 c_start.m_starting_nodes_w_log.set(TaddNodeno);
2021 }
2022 c_start.m_node_gci[TaddNodeno] = node_gci;
2023
2024 skip_nodes.bitAND(c_definedNodes);
2025 c_start.m_skip_nodes.bitOR(skip_nodes);
2026
2027 // set own MT config here or in CONF, and others in CM_NODEINFOREQ/CONF
2028 setNodeInfo(getOwnNodeId()).m_lqh_workers = globalData.ndbMtLqhWorkers;
2029
2030 char buf[100];
2031 switch (TrefuseReason) {
2032 case CmRegRef::ZINCOMPATIBLE_VERSION:
2033 jam();
2034 progError(__LINE__, NDBD_EXIT_UNSUPPORTED_VERSION,
2035 "incompatible version, "
2036 "connection refused by running ndb node");
2037 case CmRegRef::ZINCOMPATIBLE_START_TYPE:
2038 jam();
2039 BaseString::snprintf(buf, sizeof(buf),
2040 "incompatible start type detected: node %d"
2041 " reports %s(%d) my start type: %s(%d)",
2042 TaddNodeno,
2043 get_start_type_string(start_type), start_type,
2044 get_start_type_string(c_start.m_start_type),
2045 c_start.m_start_type);
2046 progError(__LINE__, NDBD_EXIT_SR_RESTARTCONFLICT, buf);
2047 break;
2048 case CmRegRef::ZBUSY:
2049 case CmRegRef::ZBUSY_TO_PRES:
2050 case CmRegRef::ZBUSY_PRESIDENT:
2051 jam();
2052 cpresidentAlive = ZTRUE;
2053 signal->theData[3] = 0;
2054 break;
2055 case CmRegRef::ZNOT_IN_CFG:
2056 jam();
2057 progError(__LINE__, NDBD_EXIT_NODE_NOT_IN_CONFIG);
2058 break;
2059 case CmRegRef::ZNOT_DEAD:
2060 jam();
2061 progError(__LINE__, NDBD_EXIT_NODE_NOT_DEAD);
2062 break;
2063 case CmRegRef::ZSINGLE_USER_MODE:
2064 jam();
2065 progError(__LINE__, NDBD_EXIT_SINGLE_USER_MODE);
2066 break;
2067 /**
2068 * For generic refuse error.
2069 * e.g. in online upgrade, we can use this error code instead
2070 * of the incompatible error code.
2071 */
2072 case CmRegRef::ZGENERIC:
2073 jam();
2074 progError(__LINE__, NDBD_EXIT_GENERIC);
2075 break;
2076 case CmRegRef::ZELECTION:
2077 jam();
2078 if (candidate_gci != ZUNDEFINED_GCI_LIMIT &&
2079 (candidate_gci > c_start.m_president_candidate_gci ||
2080 (candidate_gci == c_start.m_president_candidate_gci &&
2081 candidate < c_start.m_president_candidate)))
2082 {
2083 jam();
2084 //----------------------------------------
2085 /* We may already have a candidate */
2086 /* choose the lowest nodeno */
2087 //----------------------------------------
2088 signal->theData[3] = 2;
2089 c_start.m_president_candidate = candidate;
2090 c_start.m_president_candidate_gci = candidate_gci;
2091 DEB_STARTUP(("2:President candidate: %u, gci: %u",
2092 candidate, candidate_gci));
2093 } else {
2094 signal->theData[3] = 4;
2095 }//if
2096 break;
2097 case CmRegRef::ZNOT_PRESIDENT:
2098 jam();
2099 cpresidentAlive = ZTRUE;
2100 signal->theData[3] = 3;
2101 break;
2102 default:
2103 jam();
2104 signal->theData[3] = 5;
2105 /*empty*/;
2106 break;
2107 }//switch
2108 /*--------------------------------------------------------------*/
2109 // Send this as an EVENT REPORT to inform about hearing about
2110 // other NDB node proclaiming not to be president.
2111 /*--------------------------------------------------------------*/
2112 signal->theData[0] = NDB_LE_CM_REGREF;
2113 signal->theData[1] = getOwnNodeId();
2114 signal->theData[2] = TaddNodeno;
2115 //-----------------------------------------
2116 // signal->theData[3] filled in above
2117 //-----------------------------------------
2118 sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 4, JBB);
2119
2120 if(cpresidentAlive == ZTRUE)
2121 {
2122 jam();
2123 QMGR_DEBUG("cpresidentAlive");
2124 return;
2125 }
2126
2127 if(c_start.m_regReqReqSent != c_start.m_regReqReqRecv)
2128 {
2129 jam();
2130 QMGR_DEBUG(c_start.m_regReqReqSent << " != " << c_start.m_regReqReqRecv);
2131 return;
2132 }
2133
2134 if(c_start.m_president_candidate != getOwnNodeId())
2135 {
2136 jam();
2137 QMGR_DEBUG("i'm not the candidate");
2138 return;
2139 }
2140
2141 /**
2142 * All connected nodes has agreed
2143 */
2144 if(check_startup(signal))
2145 {
2146 jam();
2147 electionWon(signal);
2148 }
2149
2150 return;
2151 }//Qmgr::execCM_REGREF()
2152
2153 /**
2154 * This function contains the logic to decide if we won the election.
2155 * A prerequisite to win an election is that no one is president and
2156 * that all nodes in the cluster have tried to register (except those
2157 * nodes in the skip list). We will wait for a time even for the skip
2158 * nodes. Each node has sent its starting GCI, so we can also ensure
2159 * that any node elected as President can also act as Master in NDBCNTR.
2160 */
2161 Uint32
check_startup(Signal * signal)2162 Qmgr::check_startup(Signal* signal)
2163 {
2164 const NDB_TICKS now = NdbTick_getCurrentTicks();
2165 const Uint64 elapsed = NdbTick_Elapsed(c_start_election_time,now).milliSec();
2166 const Uint64 partitionedTimeout =
2167 c_restartPartitionedTimeout == Uint32(~0) ? Uint32(~0) :
2168 (c_restartPartialTimeout + c_restartPartitionedTimeout);
2169
2170 const bool no_nodegroup_active =
2171 (c_restartNoNodegroupTimeout != ~Uint32(0)) &&
2172 (! c_start.m_no_nodegroup_nodes.isclear());
2173
2174 /**
2175 * First see if we should wait more...
2176 */
2177 NdbNodeBitmask tmp;
2178 tmp.bitOR(c_start.m_skip_nodes);
2179 tmp.bitOR(c_start.m_starting_nodes);
2180
2181 NdbNodeBitmask wait;
2182 wait.assign(c_definedNodes);
2183 wait.bitANDC(tmp);
2184
2185 Uint32 retVal = 0;
2186 Uint32 incompleteng = MAX_NDB_NODES; // Illegal value
2187 NdbNodeBitmask report_mask;
2188
2189 if ((c_start.m_latest_gci == 0) ||
2190 (c_start.m_start_type == (1 << NodeState::ST_INITIAL_START)))
2191 {
2192 if (tmp.equal(c_definedNodes))
2193 {
2194 jam();
2195 signal->theData[1] = 0x8000;
2196 report_mask.assign(c_definedNodes);
2197 report_mask.bitANDC(c_start.m_starting_nodes);
2198 retVal = 1;
2199 goto start_report;
2200 }
2201 else if (no_nodegroup_active)
2202 {
2203 if (elapsed < c_restartNoNodegroupTimeout)
2204 {
2205 signal->theData[1] = 6;
2206 signal->theData[2] = Uint32((c_restartNoNodegroupTimeout - elapsed + 500) / 1000);
2207 report_mask.assign(wait);
2208 retVal = 0;
2209 goto start_report;
2210 }
2211 tmp.bitOR(c_start.m_no_nodegroup_nodes);
2212 if (tmp.equal(c_definedNodes))
2213 {
2214 signal->theData[1] = 0x8000;
2215 report_mask.assign(c_definedNodes);
2216 report_mask.bitANDC(c_start.m_starting_nodes);
2217 retVal = 1;
2218 goto start_report;
2219 }
2220 else
2221 {
2222 jam();
2223 signal->theData[1] = 1;
2224 signal->theData[2] = ~0;
2225 report_mask.assign(wait);
2226 retVal = 0;
2227 goto start_report;
2228 }
2229 }
2230 else
2231 {
2232 jam();
2233 signal->theData[1] = 1;
2234 signal->theData[2] = ~0;
2235 report_mask.assign(wait);
2236 retVal = 0;
2237 goto start_report;
2238 }
2239 }
2240
2241 if (c_restartNoNodegroupTimeout != Uint32(~0) &&
2242 elapsed >= c_restartNoNodegroupTimeout)
2243 {
2244 tmp.bitOR(c_start.m_no_nodegroup_nodes);
2245 }
2246
2247 {
2248 const bool all = c_start.m_starting_nodes.equal(c_definedNodes);
2249 CheckNodeGroups* sd = (CheckNodeGroups*)&signal->theData[0];
2250
2251 {
2252 /**
2253 * Check for missing node group directly
2254 */
2255 NdbNodeBitmask check;
2256 check.assign(c_definedNodes);
2257 check.bitANDC(c_start.m_starting_nodes); // Keep not connected nodes
2258 check.bitOR(c_start.m_starting_nodes_w_log); //Add nodes with log
2259
2260 sd->blockRef = reference();
2261 sd->requestType = CheckNodeGroups::Direct | CheckNodeGroups::ArbitCheck;
2262 sd->mask = check;
2263 EXECUTE_DIRECT(DBDIH, GSN_CHECKNODEGROUPSREQ, signal,
2264 CheckNodeGroups::SignalLength);
2265
2266 if (sd->output == CheckNodeGroups::Lose)
2267 {
2268 jam();
2269 goto missing_nodegroup;
2270 }
2271 }
2272
2273 sd->blockRef = reference();
2274 sd->requestType = CheckNodeGroups::Direct | CheckNodeGroups::ArbitCheck;
2275 sd->mask = c_start.m_starting_nodes;
2276 EXECUTE_DIRECT(DBDIH, GSN_CHECKNODEGROUPSREQ, signal,
2277 CheckNodeGroups::SignalLength);
2278
2279 const Uint32 result = sd->output;
2280
2281 sd->blockRef = reference();
2282 sd->requestType = CheckNodeGroups::Direct | CheckNodeGroups::ArbitCheck;
2283 sd->mask = c_start.m_starting_nodes_w_log;
2284 EXECUTE_DIRECT(DBDIH, GSN_CHECKNODEGROUPSREQ, signal,
2285 CheckNodeGroups::SignalLength);
2286
2287 const Uint32 result_w_log = sd->output;
2288
2289 if (tmp.equal(c_definedNodes))
2290 {
2291 /**
2292 * All nodes (wrt no-wait nodes) has connected...
2293 * this means that we will now start or die
2294 */
2295 jam();
2296 switch(result_w_log){
2297 case CheckNodeGroups::Lose:
2298 {
2299 jam();
2300 goto missing_nodegroup;
2301 }
2302 case CheckNodeGroups::Win:
2303 signal->theData[1] = all ? 0x8001 : 0x8002;
2304 report_mask.assign(c_definedNodes);
2305 report_mask.bitANDC(c_start.m_starting_nodes);
2306 retVal = 1;
2307 goto check_log;
2308 case CheckNodeGroups::Partitioning:
2309 ndbrequire(result != CheckNodeGroups::Lose);
2310 signal->theData[1] =
2311 all ? 0x8001 : (result == CheckNodeGroups::Win ? 0x8002 : 0x8003);
2312 report_mask.assign(c_definedNodes);
2313 report_mask.bitANDC(c_start.m_starting_nodes);
2314 retVal = 1;
2315 goto check_log;
2316 }
2317 }
2318
2319 if (c_restartPartialTimeout == Uint32(~0) ||
2320 elapsed < c_restartPartialTimeout)
2321 {
2322 jam();
2323
2324 signal->theData[1] = c_restartPartialTimeout == (Uint32) ~0 ? 2 : 3;
2325 signal->theData[2] =
2326 c_restartPartialTimeout == Uint32(~0) ?
2327 Uint32(~0) :
2328 Uint32((c_restartPartialTimeout - elapsed + 500) / 1000);
2329 report_mask.assign(wait);
2330 retVal = 0;
2331
2332 if (no_nodegroup_active && elapsed < c_restartNoNodegroupTimeout)
2333 {
2334 signal->theData[1] = 7;
2335 signal->theData[2] = Uint32((c_restartNoNodegroupTimeout - elapsed + 500) / 1000);
2336 }
2337 else if (no_nodegroup_active && elapsed >= c_restartNoNodegroupTimeout)
2338 {
2339 report_mask.bitANDC(c_start.m_no_nodegroup_nodes);
2340 }
2341
2342 goto start_report;
2343 }
2344
2345 /**
2346 * Start partial has passed...check for partitioning...
2347 */
2348 switch(result_w_log){
2349 case CheckNodeGroups::Lose:
2350 jam();
2351 goto missing_nodegroup;
2352 case CheckNodeGroups::Partitioning:
2353 if (elapsed != Uint32(~0) &&
2354 elapsed < partitionedTimeout &&
2355 result != CheckNodeGroups::Win)
2356 {
2357 goto missinglog;
2358 }
2359 // Fall through...
2360 case CheckNodeGroups::Win:
2361 signal->theData[1] =
2362 all ? 0x8001 : (result == CheckNodeGroups::Win ? 0x8002 : 0x8003);
2363 report_mask.assign(c_definedNodes);
2364 report_mask.bitANDC(c_start.m_starting_nodes);
2365 retVal = 2;
2366 goto check_log;
2367 }
2368 }
2369 ndbabort();
2370
2371 check_log:
2372 jam();
2373 {
2374 Uint32 save[4+4*NdbNodeBitmask::Size];
2375 memcpy(save, signal->theData, sizeof(save));
2376
2377 DihRestartReq * req = CAST_PTR(DihRestartReq, signal->getDataPtrSend());
2378 req->senderRef = 0;
2379 c_start.m_starting_nodes.copyto(NdbNodeBitmask::Size, req->nodemask);
2380 memcpy(req->node_gcis, c_start.m_node_gci, 4*MAX_NDB_NODES);
2381 EXECUTE_DIRECT(DBDIH, GSN_DIH_RESTARTREQ, signal,
2382 DihRestartReq::CheckLength);
2383
2384 incompleteng = signal->theData[0];
2385 memcpy(signal->theData, save, sizeof(save));
2386
2387 if (incompleteng != MAX_NDB_NODES)
2388 {
2389 jam();
2390 if (retVal == 1)
2391 {
2392 jam();
2393 goto incomplete_log;
2394 }
2395 else if (retVal == 2)
2396 {
2397 if (elapsed != Uint32(~0) && elapsed <= partitionedTimeout)
2398 {
2399 jam();
2400 goto missinglog;
2401 }
2402 else
2403 {
2404 goto incomplete_log;
2405 }
2406 }
2407 ndbabort();
2408 }
2409 }
2410 goto start_report;
2411
2412 missinglog:
2413 signal->theData[1] = c_restartPartitionedTimeout == Uint32(~0) ? 4 : 5;
2414 signal->theData[2] =
2415 partitionedTimeout == Uint32(~0) ?
2416 Uint32(~0) : Uint32((partitionedTimeout - elapsed + 500) / 1000);
2417 infoEvent("partitionedTimeout = %llu, elapsed = %llu", partitionedTimeout, elapsed);
2418 report_mask.assign(c_definedNodes);
2419 report_mask.bitANDC(c_start.m_starting_nodes);
2420 retVal = 0;
2421 goto start_report;
2422
2423 start_report:
2424 jam();
2425 {
2426 Uint32 sz = NdbNodeBitmask::Size;
2427 signal->theData[0] = NDB_LE_StartReport;
2428 signal->theData[3] = sz;
2429 Uint32* ptr = signal->theData+4;
2430 c_definedNodes.copyto(sz, ptr); ptr += sz;
2431 c_start.m_starting_nodes.copyto(sz, ptr); ptr += sz;
2432 c_start.m_skip_nodes.copyto(sz, ptr); ptr += sz;
2433 report_mask.copyto(sz, ptr); ptr+= sz;
2434 c_start.m_no_nodegroup_nodes.copyto(sz, ptr); ptr += sz;
2435 LinearSectionPtr lsptr[3];
2436 lsptr[0].p = signal->theData;
2437 lsptr[0].sz = 4 + 5 * NdbNodeBitmask::Size;
2438 sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 1, JBB, lsptr, 1);
2439 }
2440 return retVal;
2441
2442 missing_nodegroup:
2443 jam();
2444 {
2445 const Uint32 extra = 100;
2446 char buf[2 * (NdbNodeBitmask::TextLength + 1) + extra];
2447 char mask1[NdbNodeBitmask::TextLength + 1];
2448 char mask2[NdbNodeBitmask::TextLength + 1];
2449 c_start.m_starting_nodes.getText(mask1);
2450 tmp.assign(c_start.m_starting_nodes);
2451 tmp.bitANDC(c_start.m_starting_nodes_w_log);
2452 tmp.getText(mask2);
2453 BaseString::snprintf(buf, sizeof(buf),
2454 "Unable to start missing node group! "
2455 " starting: %s (missing working fs for: %s)",
2456 mask1, mask2);
2457 CRASH_INSERTION(944);
2458 progError(__LINE__, NDBD_EXIT_INSUFFICENT_NODES, buf);
2459 return 0; // Deadcode
2460 }
2461
2462 incomplete_log:
2463 jam();
2464 {
2465 const Uint32 extra = 100;
2466 char buf[NdbNodeBitmask::TextLength + 1 + extra];
2467 char mask1[NdbNodeBitmask::TextLength + 1];
2468 c_start.m_starting_nodes.getText(mask1);
2469 BaseString::snprintf(buf, sizeof(buf),
2470 "Incomplete log for node group: %d! "
2471 " starting nodes: %s",
2472 incompleteng, mask1);
2473 CRASH_INSERTION(944);
2474 progError(__LINE__, NDBD_EXIT_INSUFFICENT_NODES, buf);
2475 return 0; // Deadcode
2476 }
2477 }
2478
2479 void
electionWon(Signal * signal)2480 Qmgr::electionWon(Signal* signal)
2481 {
2482 NodeRecPtr myNodePtr;
2483 cpresident = getOwnNodeId(); /* This node becomes president. */
2484 myNodePtr.i = getOwnNodeId();
2485 ptrCheckGuard(myNodePtr, MAX_NDB_NODES, nodeRec);
2486
2487 myNodePtr.p->phase = ZRUNNING;
2488 DEB_STARTUP(("phase(%u) = ZRUNNING", myNodePtr.i));
2489 DEB_MULTI_TRP(("Node %u in ZRUNNING, electionWon", myNodePtr.i));
2490
2491 cpdistref = reference();
2492 cneighbourl = ZNIL;
2493 cneighbourh = ZNIL;
2494 myNodePtr.p->ndynamicId = 1 | (myNodePtr.p->hbOrder << 16);
2495 c_maxDynamicId = 1;
2496 c_clusterNodes.clear();
2497 c_clusterNodes.set(getOwnNodeId());
2498
2499 cpresidentAlive = ZTRUE;
2500 NdbTick_Invalidate(&c_start_election_time);
2501 c_start.reset();
2502
2503 signal->theData[0] = NDB_LE_CM_REGCONF;
2504 signal->theData[1] = getOwnNodeId();
2505 signal->theData[2] = cpresident;
2506 signal->theData[3] = myNodePtr.p->ndynamicId;
2507 sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 4, JBB);
2508
2509 c_start.m_starting_nodes.clear(getOwnNodeId());
2510 if (c_start.m_starting_nodes.isclear())
2511 {
2512 jam();
2513 sendSttorryLab(signal, true);
2514 }
2515 }
2516
2517 /*
2518 4.4.11 CONTINUEB */
2519 /*--------------------------------------------------------------------------*/
2520 /* */
2521 /*--------------------------------------------------------------------------*/
2522 /****************************>---------------------------------------------*/
2523 /* CONTINUEB > SENDER: Own block, Own node */
2524 /****************************>-------+INPUT : TCONTINUEB_TYPE */
2525 /*--------------------------------------------------------------*/
regreqTimeLimitLab(Signal * signal)2526 void Qmgr::regreqTimeLimitLab(Signal* signal)
2527 {
2528 if(cpresident == ZNIL)
2529 {
2530 if (c_start.m_president_candidate == ZNIL)
2531 {
2532 jam();
2533 c_start.m_president_candidate = getOwnNodeId();
2534 }
2535
2536 cmInfoconf010Lab(signal);
2537 }
2538 }//Qmgr::regreqTimelimitLab()
2539
2540 /**---------------------------------------------------------------------------
2541 * The new node will take care of giving information about own node and ask
2542 * all other nodes for nodeinfo. The new node will use CM_NODEINFOREQ for
2543 * that purpose. When the setup of connections to all running, the president
2544 * will send a commit to all running nodes + the new node
2545 * INPUT: NODE_PTR1, must be set as ZNIL if we don't enter CONNECT_NODES)
2546 * from signal CM_NODEINFOCONF.
2547 *---------------------------------------------------------------------------*/
2548 /*******************************/
2549 /* CM_NODEINFOCONF */
2550 /*******************************/
execCM_NODEINFOCONF(Signal * signal)2551 void Qmgr::execCM_NODEINFOCONF(Signal* signal)
2552 {
2553 DEBUG_START3(signal, "");
2554
2555 jamEntry();
2556
2557 CmNodeInfoConf * const conf = (CmNodeInfoConf*)signal->getDataPtr();
2558
2559 const Uint32 nodeId = conf->nodeId;
2560 const Uint32 dynamicId = conf->dynamicId;
2561 const Uint32 version = conf->version;
2562 Uint32 mysql_version = conf->mysql_version;
2563 Uint32 lqh_workers = conf->lqh_workers;
2564
2565 NodeRecPtr nodePtr;
2566 nodePtr.i = getOwnNodeId();
2567 ptrAss(nodePtr, nodeRec);
2568 ndbrequire(nodePtr.p->phase == ZSTARTING);
2569 ndbrequire(c_start.m_gsn == GSN_CM_NODEINFOREQ);
2570 c_start.m_nodes.clearWaitingFor(nodeId);
2571
2572 /**
2573 * Update node info
2574 */
2575 NodeRecPtr replyNodePtr;
2576 replyNodePtr.i = nodeId;
2577 ptrCheckGuard(replyNodePtr, MAX_NDB_NODES, nodeRec);
2578 replyNodePtr.p->ndynamicId = dynamicId;
2579 replyNodePtr.p->blockRef = signal->getSendersBlockRef();
2580 setNodeInfo(replyNodePtr.i).m_version = version;
2581 setNodeInfo(replyNodePtr.i).m_mysql_version = mysql_version;
2582 setNodeInfo(replyNodePtr.i).m_lqh_workers = lqh_workers;
2583
2584 recompute_version_info(NodeInfo::DB, version);
2585
2586 if(!c_start.m_nodes.done()){
2587 jam();
2588 return;
2589 }
2590
2591 /**********************************************<*/
2592 /* Send an ack. back to the president. */
2593 /* CM_ACKADD */
2594 /* The new node has been registered by all */
2595 /* running nodes and has stored nodeinfo about */
2596 /* all running nodes. The new node has to wait */
2597 /* for CM_ADD (commit) from president to become */
2598 /* a running node in the cluster. */
2599 /**********************************************<*/
2600 sendCmAckAdd(signal, getOwnNodeId(), CmAdd::Prepare);
2601 return;
2602 }//Qmgr::execCM_NODEINFOCONF()
2603
2604 /**---------------------------------------------------------------------------
2605 * A new node sends nodeinfo about himself. The new node asks for
2606 * corresponding nodeinfo back in the CM_NODEINFOCONF.
2607 *---------------------------------------------------------------------------*/
2608 /*******************************/
2609 /* CM_NODEINFOREQ */
2610 /*******************************/
execCM_NODEINFOREQ(Signal * signal)2611 void Qmgr::execCM_NODEINFOREQ(Signal* signal)
2612 {
2613 jamEntry();
2614
2615 const Uint32 Tblockref = signal->getSendersBlockRef();
2616
2617 NodeRecPtr nodePtr;
2618 nodePtr.i = getOwnNodeId();
2619 ptrAss(nodePtr, nodeRec);
2620 if(nodePtr.p->phase != ZRUNNING){
2621 jam();
2622 signal->theData[0] = reference();
2623 signal->theData[1] = getOwnNodeId();
2624 signal->theData[2] = ZNOT_RUNNING;
2625 sendSignal(Tblockref, GSN_CM_NODEINFOREF, signal, 3, JBB);
2626 return;
2627 }
2628
2629 NodeRecPtr addNodePtr;
2630 CmNodeInfoReq * const req = (CmNodeInfoReq*)signal->getDataPtr();
2631 addNodePtr.i = req->nodeId;
2632 ptrCheckGuard(addNodePtr, MAX_NDB_NODES, nodeRec);
2633 addNodePtr.p->ndynamicId = req->dynamicId;
2634 addNodePtr.p->blockRef = signal->getSendersBlockRef();
2635 setNodeInfo(addNodePtr.i).m_version = req->version;
2636
2637 Uint32 mysql_version = req->mysql_version;
2638 setNodeInfo(addNodePtr.i).m_mysql_version = mysql_version;
2639
2640 Uint32 lqh_workers = req->lqh_workers;
2641 setNodeInfo(addNodePtr.i).m_lqh_workers = lqh_workers;
2642
2643 c_maxDynamicId = req->dynamicId & 0xFFFF;
2644
2645 cmAddPrepare(signal, addNodePtr, nodePtr.p);
2646 }//Qmgr::execCM_NODEINFOREQ()
2647
2648 void
cmAddPrepare(Signal * signal,NodeRecPtr nodePtr,const NodeRec * self)2649 Qmgr::cmAddPrepare(Signal* signal, NodeRecPtr nodePtr, const NodeRec * self){
2650 jam();
2651
2652 switch(nodePtr.p->phase){
2653 case ZINIT:
2654 jam();
2655 nodePtr.p->phase = ZSTARTING;
2656 DEB_STARTUP(("2:phase(%u) = ZSTARTING", nodePtr.i));
2657 return;
2658 case ZFAIL_CLOSING:
2659 jam();
2660
2661 #if 1
2662 warningEvent("Received request to incorporate node %u, "
2663 "while error handling has not yet completed",
2664 nodePtr.i);
2665
2666 ndbrequire(getOwnNodeId() != cpresident);
2667 ndbrequire(signal->header.theVerId_signalNumber == GSN_CM_ADD);
2668 c_start.m_nodes.clearWaitingFor();
2669 c_start.m_nodes.setWaitingFor(nodePtr.i);
2670 c_start.m_gsn = GSN_CM_NODEINFOCONF;
2671 #else
2672 warningEvent("Enabling communication to CM_ADD node %u state=%d",
2673 nodePtr.i,
2674 nodePtr.p->phase);
2675 nodePtr.p->phase = ZSTARTING;
2676 nodePtr.p->failState = NORMAL;
2677 signal->theData[0] = 0;
2678 signal->theData[1] = nodePtr.i;
2679 sendSignal(TRPMAN_REF, GSN_OPEN_COMORD, signal, 2, JBB);
2680 #endif
2681 return;
2682 case ZSTARTING:
2683 break;
2684 case ZRUNNING:
2685 ndbabort();
2686 case ZPREPARE_FAIL:
2687 ndbabort();
2688 case ZAPI_ACTIVATION_ONGOING:
2689 ndbabort();
2690 case ZAPI_ACTIVE:
2691 ndbabort();
2692 case ZAPI_INACTIVE:
2693 ndbabort();
2694 }
2695
2696 sendCmAckAdd(signal, nodePtr.i, CmAdd::Prepare);
2697 sendApiVersionRep(signal, nodePtr);
2698
2699 /* President have prepared us */
2700 CmNodeInfoConf * conf = (CmNodeInfoConf*)signal->getDataPtrSend();
2701 conf->nodeId = getOwnNodeId();
2702 conf->dynamicId = self->ndynamicId;
2703 conf->version = getNodeInfo(getOwnNodeId()).m_version;
2704 conf->mysql_version = getNodeInfo(getOwnNodeId()).m_mysql_version;
2705 conf->lqh_workers = getNodeInfo(getOwnNodeId()).m_lqh_workers;
2706 sendSignal(nodePtr.p->blockRef, GSN_CM_NODEINFOCONF, signal,
2707 CmNodeInfoConf::SignalLength, JBB);
2708 DEBUG_START(GSN_CM_NODEINFOCONF, refToNode(nodePtr.p->blockRef), "");
2709 }
2710
2711 void
sendApiVersionRep(Signal * signal,NodeRecPtr nodePtr)2712 Qmgr::sendApiVersionRep(Signal* signal, NodeRecPtr nodePtr)
2713 {
2714 {
2715 jam();
2716 Uint32 ref = calcQmgrBlockRef(nodePtr.i);
2717 for(Uint32 i = 1; i<MAX_NODES; i++)
2718 {
2719 jam();
2720 Uint32 version = getNodeInfo(i).m_version;
2721 Uint32 type = getNodeInfo(i).m_type;
2722 if (type != NodeInfo::DB && version)
2723 {
2724 jam();
2725 signal->theData[0] = i;
2726 signal->theData[1] = version;
2727 sendSignal(ref, GSN_NODE_VERSION_REP, signal, 2, JBB);
2728 }
2729 }
2730 }
2731 }
2732
2733 void
sendCmAckAdd(Signal * signal,Uint32 nodeId,CmAdd::RequestType type)2734 Qmgr::sendCmAckAdd(Signal * signal, Uint32 nodeId, CmAdd::RequestType type){
2735
2736 CmAckAdd * cmAckAdd = (CmAckAdd*)signal->getDataPtrSend();
2737 cmAckAdd->requestType = type;
2738 cmAckAdd->startingNodeId = nodeId;
2739 cmAckAdd->senderNodeId = getOwnNodeId();
2740 sendSignal(cpdistref, GSN_CM_ACKADD, signal, CmAckAdd::SignalLength, JBA);
2741 DEBUG_START(GSN_CM_ACKADD, cpresident, "");
2742
2743 switch(type){
2744 case CmAdd::Prepare:
2745 return;
2746 case CmAdd::AddCommit:
2747 case CmAdd::CommitNew:
2748 break;
2749 }
2750
2751 signal->theData[0] = nodeId;
2752 EXECUTE_DIRECT(NDBCNTR, GSN_CM_ADD_REP, signal, 1);
2753 jamEntry();
2754 }
2755
2756 /*
2757 4.4.11 CM_ADD */
2758 /**--------------------------------------------------------------------------
2759 * Prepare a running node to add a new node to the cluster. The running node
2760 * will change phase of the new node fron ZINIT to ZWAITING. The running node
2761 * will also mark that we have received a prepare. When the new node has sent
2762 * us nodeinfo we can send an acknowledgement back to the president. When all
2763 * running nodes has acknowledged the new node, the president will send a
2764 * commit and we can change phase of the new node to ZRUNNING. The president
2765 * will also send CM_ADD to himself.
2766 *---------------------------------------------------------------------------*/
2767 /*******************************/
2768 /* CM_ADD */
2769 /*******************************/
execCM_ADD(Signal * signal)2770 void Qmgr::execCM_ADD(Signal* signal)
2771 {
2772 NodeRecPtr addNodePtr;
2773 jamEntry();
2774
2775 NodeRecPtr nodePtr;
2776 nodePtr.i = getOwnNodeId();
2777 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
2778
2779 CRASH_INSERTION(940);
2780
2781 CmAdd * const cmAdd = (CmAdd*)signal->getDataPtr();
2782 const CmAdd::RequestType type = (CmAdd::RequestType)cmAdd->requestType;
2783 addNodePtr.i = cmAdd->startingNodeId;
2784 //const Uint32 startingVersion = cmAdd->startingVersion;
2785 ptrCheckGuard(addNodePtr, MAX_NDB_NODES, nodeRec);
2786
2787 DEBUG_START3(signal, type);
2788
2789 if(nodePtr.p->phase == ZSTARTING){
2790 jam();
2791 /**
2792 * We are joining...
2793 */
2794 ndbrequire(addNodePtr.i == nodePtr.i);
2795 switch(type){
2796 case CmAdd::Prepare:
2797 ndbrequire(c_start.m_gsn == GSN_CM_NODEINFOREQ);
2798 /**
2799 * Wait for CM_NODEINFO_CONF
2800 */
2801 return;
2802 case CmAdd::CommitNew:
2803 /**
2804 * Tata. we're in the cluster
2805 */
2806 joinedCluster(signal, addNodePtr);
2807 return;
2808 case CmAdd::AddCommit:
2809 ndbabort();
2810 }
2811 }
2812
2813 switch (type) {
2814 case CmAdd::Prepare:
2815 cmAddPrepare(signal, addNodePtr, nodePtr.p);
2816 break;
2817 case CmAdd::AddCommit:{
2818 jam();
2819 ndbrequire(addNodePtr.p->phase == ZSTARTING);
2820 addNodePtr.p->phase = ZRUNNING;
2821 DEB_STARTUP(("2:phase(%u) = ZRUNNING", addNodePtr.i));
2822 DEB_MULTI_TRP(("Node %u in ZRUNNING, AddCommit", addNodePtr.i));
2823 m_connectivity_check.reportNodeConnect(addNodePtr.i);
2824 set_hb_count(addNodePtr.i) = 0;
2825 c_clusterNodes.set(addNodePtr.i);
2826 findNeighbours(signal, __LINE__);
2827
2828 /**
2829 * SEND A HEARTBEAT IMMEDIATELY TO DECREASE THE RISK THAT WE MISS EARLY
2830 * HEARTBEATS.
2831 */
2832 sendHeartbeat(signal);
2833 hb_send_timer.reset(NdbTick_getCurrentTicks());
2834
2835 /**
2836 * ENABLE COMMUNICATION WITH ALL BLOCKS WITH THE NEWLY ADDED NODE
2837 */
2838 EnableComReq *enableComReq = (EnableComReq *)signal->getDataPtrSend();
2839 enableComReq->m_senderRef = reference();
2840 enableComReq->m_senderData = ENABLE_COM_CM_ADD_COMMIT;
2841 enableComReq->m_enableNodeId = addNodePtr.i;
2842 sendSignal(TRPMAN_REF, GSN_ENABLE_COMREQ, signal,
2843 EnableComReq::SignalLength, JBB);
2844 break;
2845 }
2846 case CmAdd::CommitNew:
2847 jam();
2848 ndbabort();
2849 }
2850
2851 }//Qmgr::execCM_ADD()
2852
2853 void
handleEnableComAddCommit(Signal * signal,Uint32 node)2854 Qmgr::handleEnableComAddCommit(Signal *signal, Uint32 node)
2855 {
2856 sendCmAckAdd(signal, node, CmAdd::AddCommit);
2857 if(getOwnNodeId() != cpresident){
2858 jam();
2859 c_start.reset();
2860 }
2861 }
2862
2863 void
execENABLE_COMCONF(Signal * signal)2864 Qmgr::execENABLE_COMCONF(Signal *signal)
2865 {
2866 const EnableComConf *enableComConf =
2867 (const EnableComConf *)signal->getDataPtr();
2868 Uint32 state = enableComConf->m_senderData;
2869 Uint32 node = enableComConf->m_enableNodeId;
2870
2871 jamEntry();
2872
2873 switch (state)
2874 {
2875 case ENABLE_COM_CM_ADD_COMMIT:
2876 jam();
2877 /* Only exactly one node possible here. */
2878 handleEnableComAddCommit(signal, node);
2879 break;
2880
2881 case ENABLE_COM_CM_COMMIT_NEW:
2882 jam();
2883 handleEnableComCommitNew(signal);
2884 break;
2885
2886 case ENABLE_COM_API_REGREQ:
2887 jam();
2888 /* Only exactly one node possible here. */
2889 handleEnableComApiRegreq(signal, node);
2890 break;
2891
2892 default:
2893 jam();
2894 ndbabort();
2895 }
2896 }
2897
2898 void
joinedCluster(Signal * signal,NodeRecPtr nodePtr)2899 Qmgr::joinedCluster(Signal* signal, NodeRecPtr nodePtr){
2900 /**
2901 * WE HAVE BEEN INCLUDED IN THE CLUSTER WE CAN START BEING PART OF THE
2902 * HEARTBEAT PROTOCOL AND WE WILL ALSO ENABLE COMMUNICATION WITH ALL
2903 * NODES IN THE CLUSTER.
2904 */
2905 DEB_MULTI_TRP(("Node %u in ZRUNNING, AddCommit", nodePtr.i));
2906 nodePtr.p->phase = ZRUNNING;
2907 DEB_STARTUP(("3:phase(%u) = ZRUNNING", nodePtr.i));
2908 set_hb_count(nodePtr.i) = 0;
2909 findNeighbours(signal, __LINE__);
2910 c_clusterNodes.set(nodePtr.i);
2911 c_start.reset();
2912
2913 /**
2914 * SEND A HEARTBEAT IMMEDIATELY TO DECREASE THE RISK
2915 * THAT WE MISS EARLY HEARTBEATS.
2916 */
2917 sendHeartbeat(signal);
2918 hb_send_timer.reset(NdbTick_getCurrentTicks());
2919
2920 /**
2921 * ENABLE COMMUNICATION WITH ALL BLOCKS IN THE CURRENT CLUSTER AND SET
2922 * THE NODES IN THE CLUSTER TO BE RUNNING.
2923 */
2924 EnableComReq *enableComReq = (EnableComReq *)signal->getDataPtrSend();
2925 enableComReq->m_senderRef = reference();
2926 enableComReq->m_senderData = ENABLE_COM_CM_COMMIT_NEW;
2927 enableComReq->m_enableNodeId = 0;
2928 enableComReq->m_nodeIds.clear();
2929 jam();
2930 for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
2931 ptrAss(nodePtr, nodeRec);
2932 if ((nodePtr.p->phase == ZRUNNING) && (nodePtr.i != getOwnNodeId())) {
2933 /*-------------------------------------------------------------------*/
2934 // Enable full communication to all other nodes. Not really necessary
2935 // to open communication to ourself.
2936 /*-------------------------------------------------------------------*/
2937 jamLine(nodePtr.i);
2938 enableComReq->m_nodeIds.set(nodePtr.i);
2939 }//if
2940 }//for
2941
2942 if (!enableComReq->m_nodeIds.isclear())
2943 {
2944 jam();
2945 LinearSectionPtr lsptr[3];
2946 lsptr[0].p = enableComReq->m_nodeIds.rep.data;
2947 lsptr[0].sz = enableComReq->m_nodeIds.getPackedLengthInWords();
2948 sendSignal(TRPMAN_REF,
2949 GSN_ENABLE_COMREQ,
2950 signal,
2951 EnableComReq::SignalLength,
2952 JBB,
2953 lsptr,
2954 1);
2955 }
2956 else
2957 {
2958 handleEnableComCommitNew(signal);
2959 }
2960 }
2961
2962 void
handleEnableComCommitNew(Signal * signal)2963 Qmgr::handleEnableComCommitNew(Signal *signal)
2964 {
2965 sendSttorryLab(signal, true);
2966
2967 sendCmAckAdd(signal, getOwnNodeId(), CmAdd::CommitNew);
2968 }
2969
2970 /* 4.10.7 CM_ACKADD - PRESIDENT IS RECEIVER - */
2971 /*---------------------------------------------------------------------------*/
2972 /* Entry point for an ack add signal.
2973 * The TTYPE defines if it is a prepare or a commit. */
2974 /*---------------------------------------------------------------------------*/
execCM_ACKADD(Signal * signal)2975 void Qmgr::execCM_ACKADD(Signal* signal)
2976 {
2977 NodeRecPtr addNodePtr;
2978 NodeRecPtr senderNodePtr;
2979 jamEntry();
2980
2981 CmAckAdd * const cmAckAdd = (CmAckAdd*)signal->getDataPtr();
2982 const CmAdd::RequestType type = (CmAdd::RequestType)cmAckAdd->requestType;
2983 addNodePtr.i = cmAckAdd->startingNodeId;
2984 senderNodePtr.i = cmAckAdd->senderNodeId;
2985
2986 DEBUG_START3(signal, type);
2987
2988 if (cpresident != getOwnNodeId()) {
2989 jam();
2990 /*-----------------------------------------------------------------------*/
2991 /* IF WE ARE NOT PRESIDENT THEN WE SHOULD NOT RECEIVE THIS MESSAGE. */
2992 /*------------------------------------------------------------_----------*/
2993 warningEvent("Received CM_ACKADD from %d president=%d",
2994 senderNodePtr.i, cpresident);
2995 return;
2996 }//if
2997
2998 if (addNodePtr.i != c_start.m_startNode) {
2999 jam();
3000 /*----------------------------------------------------------------------*/
3001 /* THIS IS NOT THE STARTING NODE. WE ARE ACTIVE NOW WITH ANOTHER START. */
3002 /*----------------------------------------------------------------------*/
3003 warningEvent("Received CM_ACKADD from %d with startNode=%d != own %d",
3004 senderNodePtr.i, addNodePtr.i, c_start.m_startNode);
3005 return;
3006 }//if
3007
3008 ndbrequire(c_start.m_gsn == GSN_CM_ADD);
3009 c_start.m_nodes.clearWaitingFor(senderNodePtr.i);
3010 if(!c_start.m_nodes.done()){
3011 jam();
3012 return;
3013 }
3014
3015 switch (type) {
3016 case CmAdd::Prepare:{
3017 jam();
3018
3019 /*----------------------------------------------------------------------*/
3020 /* ALL RUNNING NODES HAVE PREPARED THE INCLUSION OF THIS NEW NODE. */
3021 /*----------------------------------------------------------------------*/
3022 c_start.m_gsn = GSN_CM_ADD;
3023 c_start.m_nodes = c_clusterNodes;
3024
3025 CmAdd * const cmAdd = (CmAdd*)signal->getDataPtrSend();
3026 cmAdd->requestType = CmAdd::AddCommit;
3027 cmAdd->startingNodeId = addNodePtr.i;
3028 cmAdd->startingVersion = getNodeInfo(addNodePtr.i).m_version;
3029 cmAdd->startingMysqlVersion = getNodeInfo(addNodePtr.i).m_mysql_version;
3030 NodeReceiverGroup rg(QMGR, c_clusterNodes);
3031 sendSignal(rg, GSN_CM_ADD, signal, CmAdd::SignalLength, JBA);
3032 DEBUG_START2(GSN_CM_ADD, rg, "AddCommit");
3033 return;
3034 }
3035 case CmAdd::AddCommit:{
3036 jam();
3037
3038 /****************************************/
3039 /* Send commit to the new node so he */
3040 /* will change PHASE into ZRUNNING */
3041 /****************************************/
3042 c_start.m_gsn = GSN_CM_ADD;
3043 c_start.m_nodes.clearWaitingFor();
3044 c_start.m_nodes.setWaitingFor(addNodePtr.i);
3045
3046 CmAdd * const cmAdd = (CmAdd*)signal->getDataPtrSend();
3047 cmAdd->requestType = CmAdd::CommitNew;
3048 cmAdd->startingNodeId = addNodePtr.i;
3049 cmAdd->startingVersion = getNodeInfo(addNodePtr.i).m_version;
3050 cmAdd->startingMysqlVersion = getNodeInfo(addNodePtr.i).m_mysql_version;
3051 sendSignal(calcQmgrBlockRef(addNodePtr.i), GSN_CM_ADD, signal,
3052 CmAdd::SignalLength, JBA);
3053 DEBUG_START(GSN_CM_ADD, addNodePtr.i, "CommitNew");
3054 /**
3055 * Report to DBDIH that a node have been added to the nodes included
3056 * in the heartbeat protocol.
3057 */
3058 InclNodeHBProtocolRep *rep = (InclNodeHBProtocolRep*)signal->getDataPtrSend();
3059 rep->nodeId = addNodePtr.i;
3060 EXECUTE_DIRECT(DBDIH, GSN_INCL_NODE_HB_PROTOCOL_REP, signal,
3061 InclNodeHBProtocolRep::SignalLength);
3062 return;
3063 }
3064 case CmAdd::CommitNew:
3065 jam();
3066 /**
3067 * Tell arbitration about new node.
3068 */
3069 handleArbitNdbAdd(signal, addNodePtr.i);
3070 c_start.reset();
3071
3072 if (c_start.m_starting_nodes.get(addNodePtr.i))
3073 {
3074 jam();
3075 c_start.m_starting_nodes.clear(addNodePtr.i);
3076 if (c_start.m_starting_nodes.isclear())
3077 {
3078 jam();
3079 sendSttorryLab(signal, true);
3080 }
3081 }
3082 return;
3083 }//switch
3084 ndbabort();
3085 }//Qmgr::execCM_ACKADD()
3086
3087 /**-------------------------------------------------------------------------
3088 * WE HAVE BEEN INCLUDED INTO THE CLUSTER. IT IS NOW TIME TO CALCULATE WHICH
3089 * ARE OUR LEFT AND RIGHT NEIGHBOURS FOR THE HEARTBEAT PROTOCOL.
3090 *--------------------------------------------------------------------------*/
findNeighbours(Signal * signal,Uint32 from)3091 void Qmgr::findNeighbours(Signal* signal, Uint32 from)
3092 {
3093 UintR toldLeftNeighbour;
3094 UintR tfnLeftFound;
3095 UintR tfnMaxFound;
3096 UintR tfnMinFound;
3097 UintR tfnRightFound;
3098 NodeRecPtr fnNodePtr;
3099 NodeRecPtr fnOwnNodePtr;
3100
3101 Uint32 toldRightNeighbour = cneighbourh;
3102 toldLeftNeighbour = cneighbourl;
3103 tfnLeftFound = 0;
3104 tfnMaxFound = 0;
3105 tfnMinFound = (UintR)-1;
3106 tfnRightFound = (UintR)-1;
3107 fnOwnNodePtr.i = getOwnNodeId();
3108 ptrCheckGuard(fnOwnNodePtr, MAX_NDB_NODES, nodeRec);
3109 for (fnNodePtr.i = 1; fnNodePtr.i < MAX_NDB_NODES; fnNodePtr.i++) {
3110 ptrAss(fnNodePtr, nodeRec);
3111 if (fnNodePtr.i != fnOwnNodePtr.i) {
3112 jamLine(fnNodePtr.i);
3113 if (fnNodePtr.p->phase == ZRUNNING) {
3114 if (tfnMinFound > fnNodePtr.p->ndynamicId) {
3115 jam();
3116 tfnMinFound = fnNodePtr.p->ndynamicId;
3117 }//if
3118 if (tfnMaxFound < fnNodePtr.p->ndynamicId) {
3119 jam();
3120 tfnMaxFound = fnNodePtr.p->ndynamicId;
3121 }//if
3122 if (fnOwnNodePtr.p->ndynamicId > fnNodePtr.p->ndynamicId) {
3123 jam();
3124 if (fnNodePtr.p->ndynamicId > tfnLeftFound) {
3125 jam();
3126 tfnLeftFound = fnNodePtr.p->ndynamicId;
3127 }//if
3128 } else {
3129 jam();
3130 if (fnNodePtr.p->ndynamicId < tfnRightFound) {
3131 jam();
3132 tfnRightFound = fnNodePtr.p->ndynamicId;
3133 }//if
3134 }//if
3135 }//if
3136 }//if
3137 }//for
3138 if (tfnLeftFound == 0) {
3139 if (tfnMinFound == (UintR)-1) {
3140 jam();
3141 cneighbourl = ZNIL;
3142 } else {
3143 jam();
3144 cneighbourl = translateDynamicIdToNodeId(signal, tfnMaxFound);
3145 }//if
3146 } else {
3147 jam();
3148 cneighbourl = translateDynamicIdToNodeId(signal, tfnLeftFound);
3149 }//if
3150 if (tfnRightFound == (UintR)-1) {
3151 if (tfnMaxFound == 0) {
3152 jam();
3153 cneighbourh = ZNIL;
3154 } else {
3155 jam();
3156 cneighbourh = translateDynamicIdToNodeId(signal, tfnMinFound);
3157 }//if
3158 } else {
3159 jam();
3160 cneighbourh = translateDynamicIdToNodeId(signal, tfnRightFound);
3161 }//if
3162 if (toldLeftNeighbour != cneighbourl) {
3163 jam();
3164 if (cneighbourl != ZNIL) {
3165 jam();
3166 /**-------------------------------------------------------------------*/
3167 /* WE ARE SUPERVISING A NEW LEFT NEIGHBOUR. WE START WITH ALARM COUNT
3168 * EQUAL TO ZERO.
3169 *---------------------------------------------------------------------*/
3170 fnNodePtr.i = cneighbourl;
3171 ptrCheckGuard(fnNodePtr, MAX_NDB_NODES, nodeRec);
3172 set_hb_count(fnNodePtr.i) = 0;
3173 }//if
3174 }//if
3175
3176 signal->theData[0] = NDB_LE_FIND_NEIGHBOURS;
3177 signal->theData[1] = getOwnNodeId();
3178 signal->theData[2] = cneighbourl;
3179 signal->theData[3] = cneighbourh;
3180 signal->theData[4] = fnOwnNodePtr.p->ndynamicId;
3181 UintR Tlen = 5;
3182 sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, Tlen, JBB);
3183 g_eventLogger->info("findNeighbours from: %u old (left: %u right: %u) new (%u %u)",
3184 from,
3185 toldLeftNeighbour,
3186 toldRightNeighbour,
3187 cneighbourl,
3188 cneighbourh);
3189 }//Qmgr::findNeighbours()
3190
3191 /*
3192 4.10.7 INIT_DATA */
3193 /*---------------------------------------------------------------------------*/
3194 /*---------------------------------------------------------------------------*/
initData(Signal * signal)3195 void Qmgr::initData(Signal* signal)
3196 {
3197 // catch-all for missing initializations
3198 memset(&arbitRec, 0, sizeof(arbitRec));
3199
3200 /**
3201 * Timeouts
3202 */
3203 const ndb_mgm_configuration_iterator * p =
3204 m_ctx.m_config.getOwnConfigIterator();
3205 ndbrequire(p != 0);
3206
3207 Uint32 hbDBDB = 1500;
3208 Uint32 arbitTimeout = 1000;
3209 Uint32 arbitMethod = ARBIT_METHOD_DEFAULT;
3210 Uint32 ccInterval = 0;
3211 c_restartPartialTimeout = 30000;
3212 c_restartPartitionedTimeout = Uint32(~0);
3213 c_restartFailureTimeout = Uint32(~0);
3214 c_restartNoNodegroupTimeout = 15000;
3215 ndb_mgm_get_int_parameter(p, CFG_DB_HEARTBEAT_INTERVAL, &hbDBDB);
3216 ndb_mgm_get_int_parameter(p, CFG_DB_ARBIT_TIMEOUT, &arbitTimeout);
3217 ndb_mgm_get_int_parameter(p, CFG_DB_ARBIT_METHOD, &arbitMethod);
3218 ndb_mgm_get_int_parameter(p, CFG_DB_START_PARTIAL_TIMEOUT,
3219 &c_restartPartialTimeout);
3220 ndb_mgm_get_int_parameter(p, CFG_DB_START_PARTITION_TIMEOUT,
3221 &c_restartPartitionedTimeout);
3222 ndb_mgm_get_int_parameter(p, CFG_DB_START_NO_NODEGROUP_TIMEOUT,
3223 &c_restartNoNodegroupTimeout);
3224 ndb_mgm_get_int_parameter(p, CFG_DB_START_FAILURE_TIMEOUT,
3225 &c_restartFailureTimeout);
3226 ndb_mgm_get_int_parameter(p, CFG_DB_CONNECT_CHECK_DELAY,
3227 &ccInterval);
3228
3229 if(c_restartPartialTimeout == 0)
3230 {
3231 c_restartPartialTimeout = Uint32(~0);
3232 }
3233
3234 if (c_restartPartitionedTimeout == 0)
3235 {
3236 c_restartPartitionedTimeout = Uint32(~0);
3237 }
3238
3239 if (c_restartFailureTimeout == 0)
3240 {
3241 c_restartFailureTimeout = Uint32(~0);
3242 }
3243
3244 if (c_restartNoNodegroupTimeout == 0)
3245 {
3246 c_restartNoNodegroupTimeout = Uint32(~0);
3247 }
3248
3249 setHbDelay(hbDBDB);
3250 setCCDelay(ccInterval);
3251 setArbitTimeout(arbitTimeout);
3252
3253 arbitRec.method = (ArbitRec::Method)arbitMethod;
3254 arbitRec.state = ARBIT_NULL; // start state for all nodes
3255 DEB_ARBIT(("Arbit state = ARBIT_INIT init"));
3256 arbitRec.apiMask[0].clear(); // prepare for ARBIT_CFG
3257
3258 Uint32 sum = 0;
3259 ArbitSignalData* const sd = (ArbitSignalData*)&signal->theData[0];
3260 for (unsigned rank = 1; rank <= 2; rank++) {
3261 sd->sender = getOwnNodeId();
3262 sd->code = rank;
3263 sd->node = 0;
3264 sd->ticket.clear();
3265 sd->mask.clear();
3266 ndb_mgm_configuration_iterator * iter =
3267 m_ctx.m_config.getClusterConfigIterator();
3268 for (ndb_mgm_first(iter); ndb_mgm_valid(iter); ndb_mgm_next(iter)) {
3269 Uint32 tmp = 0;
3270 if (ndb_mgm_get_int_parameter(iter, CFG_NODE_ARBIT_RANK, &tmp) == 0 &&
3271 tmp == rank){
3272 Uint32 nodeId = 0;
3273 ndbrequire(!ndb_mgm_get_int_parameter(iter, CFG_NODE_ID, &nodeId));
3274 sd->mask.set(nodeId);
3275 }
3276 }
3277 sum += sd->mask.count();
3278 execARBIT_CFG(signal);
3279 }
3280
3281 if (arbitRec.method == ArbitRec::METHOD_DEFAULT &&
3282 sum == 0)
3283 {
3284 jam();
3285 infoEvent("Arbitration disabled, all API nodes have rank 0");
3286 arbitRec.method = ArbitRec::DISABLED;
3287 }
3288
3289 setNodeInfo(getOwnNodeId()).m_mysql_version = NDB_MYSQL_VERSION_D;
3290
3291 ndb_mgm_configuration_iterator * iter =
3292 m_ctx.m_config.getClusterConfigIterator();
3293 for (ndb_mgm_first(iter); ndb_mgm_valid(iter); ndb_mgm_next(iter))
3294 {
3295 jam();
3296 Uint32 nodeId = 0;
3297 if (ndb_mgm_get_int_parameter(iter, CFG_NODE_ID, &nodeId) == 0)
3298 {
3299 jam();
3300 if (nodeId < MAX_NDB_NODES && getNodeInfo(nodeId).m_type == NodeInfo::DB)
3301 {
3302 Uint32 hbOrder = 0;
3303 ndb_mgm_get_int_parameter(iter, CFG_DB_HB_ORDER, &hbOrder);
3304
3305 NodeRecPtr nodePtr;
3306 nodePtr.i = nodeId;
3307 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
3308 nodePtr.p->hbOrder = hbOrder;
3309 }
3310 }
3311 }
3312 int hb_order_error = check_hb_order_config();
3313 if (hb_order_error == -1)
3314 {
3315 char msg[] = "Illegal HeartbeatOrder config, "
3316 "all nodes must have non-zero config value";
3317 progError(__LINE__, NDBD_EXIT_INVALID_CONFIG, msg);
3318 return;
3319 }
3320 if (hb_order_error == -2)
3321 {
3322 char msg[] = "Illegal HeartbeatOrder config, "
3323 "the nodes must have distinct config values";
3324 progError(__LINE__, NDBD_EXIT_INVALID_CONFIG, msg);
3325 return;
3326 }
3327 ndbrequire(hb_order_error == 0);
3328 }//Qmgr::initData()
3329
3330
3331 /**---------------------------------------------------------------------------
3332 * HERE WE RECEIVE THE JOB TABLE SIGNAL EVERY 10 MILLISECONDS.
3333 * WE WILL USE THIS TO CHECK IF IT IS TIME TO CHECK THE NEIGHBOUR NODE.
3334 * WE WILL ALSO SEND A SIGNAL TO BLOCKS THAT NEED A TIME SIGNAL AND
3335 * DO NOT WANT TO USE JOB TABLE SIGNALS.
3336 *---------------------------------------------------------------------------*/
timerHandlingLab(Signal * signal)3337 void Qmgr::timerHandlingLab(Signal* signal)
3338 {
3339 const NDB_TICKS TcurrentTime = NdbTick_getCurrentTicks();
3340 NodeRecPtr myNodePtr;
3341 myNodePtr.i = getOwnNodeId();
3342 ptrCheckGuard(myNodePtr, MAX_NDB_NODES, nodeRec);
3343
3344 const Uint32 sentHi = signal->theData[1];
3345 const Uint32 sentLo = signal->theData[2];
3346 const NDB_TICKS sent((Uint64(sentHi) << 32) | sentLo);
3347 bool send_hb_always = false;
3348
3349 if (NdbTick_Compare(sent,TcurrentTime) > 0)
3350 {
3351 jam();
3352 const Uint64 backwards = NdbTick_Elapsed(TcurrentTime,sent).milliSec();
3353 if (backwards > 0) //Ignore sub millisecond backticks
3354 {
3355 g_eventLogger->warning("timerHandlingLab, clock ticked backwards: %llu (ms)",
3356 backwards);
3357 send_hb_always = true;
3358 }
3359 }
3360 else
3361 {
3362 const Uint64 elapsed = NdbTick_Elapsed(sent,TcurrentTime).milliSec();
3363 if (elapsed >= 150)
3364 {
3365 struct ndb_rusage curr_rusage;
3366 jam();
3367 send_hb_always = true;
3368 bool rusage_worked = true;
3369 Uint64 exec_time = 0;
3370 Uint64 sys_time = 0;
3371 Ndb_GetRUsage(&curr_rusage, false);
3372 if ((curr_rusage.ru_utime == 0 &&
3373 curr_rusage.ru_stime == 0) ||
3374 (m_timer_handling_rusage.ru_utime == 0 &&
3375 m_timer_handling_rusage.ru_stime == 0))
3376 {
3377 jam();
3378 rusage_worked = false;
3379 }
3380 if (rusage_worked)
3381 {
3382 exec_time = curr_rusage.ru_utime -
3383 m_timer_handling_rusage.ru_utime;
3384 sys_time = curr_rusage.ru_stime -
3385 m_timer_handling_rusage.ru_stime;
3386 }
3387
3388 if (elapsed >= 1000)
3389 {
3390 if (rusage_worked)
3391 {
3392 g_eventLogger->warning("timerHandlingLab, expected 10ms sleep"
3393 ", not scheduled for: %d (ms), "
3394 "exec_time %llu us, sys_time %llu us",
3395 int(elapsed),
3396 exec_time,
3397 sys_time);
3398 }
3399 else
3400 {
3401 g_eventLogger->warning("timerHandlingLab, expected 10ms sleep"
3402 ", not scheduled for: %d (ms)", int(elapsed));
3403 }
3404 }
3405 else
3406 {
3407 if (rusage_worked)
3408 {
3409 g_eventLogger->info("timerHandlingLab, expected 10ms sleep"
3410 ", not scheduled for: %d (ms), "
3411 "exec_time %llu us, sys_time %llu us",
3412 int(elapsed),
3413 exec_time,
3414 sys_time);
3415 }
3416 else
3417 {
3418 g_eventLogger->info("timerHandlingLab, expected 10ms sleep"
3419 ", not scheduled for: %d (ms)", int(elapsed));
3420 }
3421 }
3422 }
3423 }
3424
3425 if (myNodePtr.p->phase == ZRUNNING) {
3426 jam();
3427 /**---------------------------------------------------------------------
3428 * WE ARE ONLY PART OF HEARTBEAT CLUSTER IF WE ARE UP AND RUNNING.
3429 *---------------------------------------------------------------------*/
3430 if (hb_send_timer.check(TcurrentTime) || send_hb_always)
3431 {
3432 /**
3433 * We send heartbeats once per heartbeat interval and 4 missed heartbeat
3434 * intervals will cause a failure. If QMGR is not so responsive we're
3435 * having some sort of overload issue. In this case we will always take
3436 * the chance to send heartbeats immediately to avoid risking heartbeat
3437 * failures (send_hb_always == true).
3438 *
3439 * Delaying checks of heartbeat timers is much less of a problem.
3440 */
3441 jam();
3442 sendHeartbeat(signal);
3443 hb_send_timer.reset(TcurrentTime);
3444 }
3445 if (likely(! m_connectivity_check.m_active))
3446 {
3447 if (hb_check_timer.check(TcurrentTime)) {
3448 jam();
3449 checkHeartbeat(signal);
3450 hb_check_timer.reset(TcurrentTime);
3451 }
3452 }
3453 else
3454 {
3455 /* Connectivity check */
3456 if (m_connectivity_check.m_timer.check(TcurrentTime)) {
3457 jam();
3458 checkConnectivityTimeSignal(signal);
3459 m_connectivity_check.m_timer.reset(TcurrentTime);
3460 }
3461 }
3462 }
3463
3464 if (interface_check_timer.check(TcurrentTime)) {
3465 jam();
3466 interface_check_timer.reset(TcurrentTime);
3467 checkStartInterface(signal, TcurrentTime);
3468 }
3469
3470 if (hb_api_timer.check(TcurrentTime))
3471 {
3472 jam();
3473 hb_api_timer.reset(TcurrentTime);
3474 apiHbHandlingLab(signal, TcurrentTime);
3475 }
3476
3477 Ndb_GetRUsage(&m_timer_handling_rusage, false);
3478
3479 //--------------------------------------------------
3480 // Resend this signal with 10 milliseconds delay.
3481 //--------------------------------------------------
3482 signal->theData[0] = ZTIMER_HANDLING;
3483 signal->theData[1] = Uint32(TcurrentTime.getUint64() >> 32);
3484 signal->theData[2] = Uint32(TcurrentTime.getUint64());
3485 sendSignalWithDelay(QMGR_REF, GSN_CONTINUEB, signal, 10, 3);
3486 return;
3487 }//Qmgr::timerHandlingLab()
3488
3489 /*---------------------------------------------------------------------------*/
3490 /* THIS MODULE HANDLES THE SENDING AND RECEIVING OF HEARTBEATS. */
3491 /*---------------------------------------------------------------------------*/
sendHeartbeat(Signal * signal)3492 void Qmgr::sendHeartbeat(Signal* signal)
3493 {
3494 NodeRecPtr localNodePtr;
3495 localNodePtr.i = cneighbourh;
3496 if (localNodePtr.i == ZNIL) {
3497 jam();
3498 /**---------------------------------------------------------------------
3499 * THERE ARE NO NEIGHBOURS. THIS IS POSSIBLE IF WE ARE THE ONLY NODE IN
3500 * THE CLUSTER.IN THIS CASE WE DO NOT NEED TO SEND ANY HEARTBEAT SIGNALS.
3501 *-----------------------------------------------------------------------*/
3502 return;
3503 }//if
3504
3505 if(ERROR_INSERTED(946))
3506 {
3507 sleep(180);
3508 return;
3509 }
3510
3511 ptrCheckGuard(localNodePtr, MAX_NDB_NODES, nodeRec);
3512 signal->theData[0] = getOwnNodeId();
3513
3514 sendSignal(localNodePtr.p->blockRef, GSN_CM_HEARTBEAT, signal, 1, JBA);
3515 #ifdef VM_TRACE
3516 signal->theData[0] = NDB_LE_SentHeartbeat;
3517 signal->theData[1] = localNodePtr.i;
3518 sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB);
3519 #endif
3520 }//Qmgr::sendHeartbeat()
3521
checkHeartbeat(Signal * signal)3522 void Qmgr::checkHeartbeat(Signal* signal)
3523 {
3524 NodeRecPtr nodePtr;
3525
3526 nodePtr.i = cneighbourl;
3527 if (nodePtr.i == ZNIL) {
3528 jam();
3529 /**---------------------------------------------------------------------
3530 * THERE ARE NO NEIGHBOURS. THIS IS POSSIBLE IF WE ARE THE ONLY NODE IN
3531 * THE CLUSTER. IN THIS CASE WE DO NOT NEED TO CHECK ANY HEARTBEATS.
3532 *-----------------------------------------------------------------------*/
3533 return;
3534 }//if
3535 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
3536
3537 set_hb_count(nodePtr.i)++;
3538 ndbrequire(nodePtr.p->phase == ZRUNNING);
3539 ndbrequire(getNodeInfo(nodePtr.i).m_type == NodeInfo::DB);
3540
3541 if (get_hb_count(nodePtr.i) > 2)
3542 {
3543 signal->theData[0] = NDB_LE_MissedHeartbeat;
3544 signal->theData[1] = nodePtr.i;
3545 signal->theData[2] = get_hb_count(nodePtr.i) - 1;
3546 sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 3, JBB);
3547 }
3548
3549 if (get_hb_count(nodePtr.i) > 4)
3550 {
3551 jam();
3552 if (m_connectivity_check.getEnabled())
3553 {
3554 jam();
3555 /* Start connectivity check, indicating the cause */
3556 startConnectivityCheck(signal, FailRep::ZHEARTBEAT_FAILURE, nodePtr.i);
3557 return;
3558 }
3559 else
3560 {
3561 /**----------------------------------------------------------------------
3562 * OUR LEFT NEIGHBOUR HAVE KEPT QUIET FOR THREE CONSECUTIVE HEARTBEAT
3563 * PERIODS. THUS WE DECLARE HIM DOWN.
3564 *----------------------------------------------------------------------*/
3565 signal->theData[0] = NDB_LE_DeadDueToHeartbeat;
3566 signal->theData[1] = nodePtr.i;
3567 sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB);
3568
3569 failReportLab(signal, nodePtr.i, FailRep::ZHEARTBEAT_FAILURE, getOwnNodeId());
3570 return;
3571 }
3572 }//if
3573 }//Qmgr::checkHeartbeat()
3574
apiHbHandlingLab(Signal * signal,NDB_TICKS now)3575 void Qmgr::apiHbHandlingLab(Signal* signal, NDB_TICKS now)
3576 {
3577 NodeRecPtr TnodePtr;
3578
3579 jam();
3580 for (TnodePtr.i = 1; TnodePtr.i < MAX_NODES; TnodePtr.i++) {
3581 const Uint32 nodeId = TnodePtr.i;
3582 ptrAss(TnodePtr, nodeRec);
3583
3584 const NodeInfo::NodeType type = getNodeInfo(nodeId).getType();
3585 if(type == NodeInfo::DB)
3586 continue;
3587
3588 if(type == NodeInfo::INVALID)
3589 continue;
3590
3591 if (c_connectedNodes.get(nodeId))
3592 {
3593 jamLine(nodeId);
3594 set_hb_count(TnodePtr.i)++;
3595
3596 if (get_hb_count(TnodePtr.i) > 2)
3597 {
3598 signal->theData[0] = NDB_LE_MissedHeartbeat;
3599 signal->theData[1] = nodeId;
3600 signal->theData[2] = get_hb_count(TnodePtr.i) - 1;
3601 sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 3, JBB);
3602 }
3603
3604 if (get_hb_count(TnodePtr.i) > 4)
3605 {
3606 jam();
3607 /*------------------------------------------------------------------*/
3608 /* THE API NODE HAS NOT SENT ANY HEARTBEAT FOR THREE SECONDS.
3609 * WE WILL DISCONNECT FROM IT NOW.
3610 *------------------------------------------------------------------*/
3611 /*------------------------------------------------------------------*/
3612 /* We call node_failed to release all connections for this api node */
3613 /*------------------------------------------------------------------*/
3614 signal->theData[0] = NDB_LE_DeadDueToHeartbeat;
3615 signal->theData[1] = nodeId;
3616 sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB);
3617
3618 api_failed(signal, nodeId);
3619 }//if
3620 }//if
3621 else if (TnodePtr.p->phase == ZAPI_INACTIVE &&
3622 TnodePtr.p->m_secret != 0 &&
3623 NdbTick_Compare(now,TnodePtr.p->m_alloc_timeout) > 0)
3624 {
3625 jam();
3626 TnodePtr.p->m_secret = 0;
3627 warningEvent("Releasing node id allocation for node %u",
3628 TnodePtr.i);
3629 }
3630 }//for
3631 return;
3632 }//Qmgr::apiHbHandlingLab()
3633
checkStartInterface(Signal * signal,NDB_TICKS now)3634 void Qmgr::checkStartInterface(Signal* signal, NDB_TICKS now)
3635 {
3636 NodeRecPtr nodePtr;
3637 /*------------------------------------------------------------------------*/
3638 // This method is called once per second. After a disconnect we wait at
3639 // least three seconds before allowing new connects. We will also ensure
3640 // that handling of the failure is completed before we allow new connections.
3641 /*------------------------------------------------------------------------*/
3642 jam();
3643 for (nodePtr.i = 1; nodePtr.i < MAX_NODES; nodePtr.i++) {
3644 ptrAss(nodePtr, nodeRec);
3645 Uint32 type = getNodeInfo(nodePtr.i).m_type;
3646 if (nodePtr.p->phase == ZFAIL_CLOSING) {
3647 jamLine(nodePtr.i);
3648 set_hb_count(nodePtr.i)++;
3649 if (c_connectedNodes.get(nodePtr.i)){
3650 jam();
3651 /*-------------------------------------------------------------------*/
3652 // We need to ensure that the connection is not restored until it has
3653 // been disconnected for at least three seconds.
3654 /*-------------------------------------------------------------------*/
3655 set_hb_count(nodePtr.i) = 0;
3656 }//if
3657 if ((get_hb_count(nodePtr.i) > 3)
3658 && (nodePtr.p->failState == NORMAL)) {
3659 /**------------------------------------------------------------------
3660 * WE HAVE DISCONNECTED THREE SECONDS AGO. WE ARE NOW READY TO
3661 * CONNECT AGAIN AND ACCEPT NEW REGISTRATIONS FROM THIS NODE.
3662 * WE WILL NOT ALLOW CONNECTIONS OF API NODES UNTIL API FAIL HANDLING
3663 * IS COMPLETE.
3664 *-------------------------------------------------------------------*/
3665 nodePtr.p->failState = NORMAL;
3666 nodePtr.p->m_secret = 0;
3667 switch(type){
3668 case NodeInfo::DB:
3669 jam();
3670 nodePtr.p->phase = ZINIT;
3671 DEB_STARTUP(("2:phase(%u) = ZINIT", nodePtr.i));
3672 break;
3673 case NodeInfo::MGM:
3674 jam();
3675 nodePtr.p->phase = ZAPI_INACTIVE;
3676 break;
3677 case NodeInfo::API:
3678 jam();
3679 if (c_allow_api_connect)
3680 {
3681 jam();
3682 nodePtr.p->phase = ZAPI_INACTIVE;
3683 break;
3684 }
3685 else
3686 {
3687 /**
3688 * Dont allow API node to connect before c_allow_api_connect
3689 */
3690 jam();
3691 set_hb_count(nodePtr.i) = 3;
3692 continue;
3693 }
3694 }
3695
3696 set_hb_count(nodePtr.i) = 0;
3697 signal->theData[0] = 0;
3698 signal->theData[1] = nodePtr.i;
3699 sendSignal(TRPMAN_REF, GSN_OPEN_COMORD, signal, 2, JBB);
3700 }
3701 else
3702 {
3703 jam();
3704 if(((get_hb_count(nodePtr.i) + 1) % 30) == 0)
3705 {
3706 jam();
3707 char buf[256];
3708 if (getNodeInfo(nodePtr.i).m_type == NodeInfo::DB)
3709 {
3710 jam();
3711 BaseString::snprintf(buf, sizeof(buf),
3712 "Failure handling of node %d has not completed"
3713 " in %d seconds - state = %d",
3714 nodePtr.i,
3715 get_hb_count(nodePtr.i),
3716 nodePtr.p->failState);
3717 warningEvent("%s", buf);
3718
3719 /**
3720 * Also dump DIH nf-state
3721 */
3722 signal->theData[0] = DumpStateOrd::DihTcSumaNodeFailCompleted;
3723 signal->theData[1] = nodePtr.i;
3724 sendSignal(DBDIH_REF, GSN_DUMP_STATE_ORD, signal, 2, JBB);
3725 }
3726 else
3727 {
3728 jam();
3729 BaseString::snprintf(buf, sizeof(buf),
3730 "Failure handling of api %u has not completed"
3731 " in %d seconds - state = %d",
3732 nodePtr.i,
3733 get_hb_count(nodePtr.i),
3734 nodePtr.p->failState);
3735 warningEvent("%s", buf);
3736 if (nodePtr.p->failState == WAITING_FOR_API_FAILCONF)
3737 {
3738 jam();
3739 static_assert(NDB_ARRAY_SIZE(nodePtr.p->m_failconf_blocks) == 5, "");
3740 BaseString::snprintf(buf, sizeof(buf),
3741 " Waiting for blocks: %u %u %u %u %u",
3742 nodePtr.p->m_failconf_blocks[0],
3743 nodePtr.p->m_failconf_blocks[1],
3744 nodePtr.p->m_failconf_blocks[2],
3745 nodePtr.p->m_failconf_blocks[3],
3746 nodePtr.p->m_failconf_blocks[4]);
3747 warningEvent("%s", buf);
3748 }
3749 }
3750 }
3751 }
3752 }
3753 else if (type == NodeInfo::DB && nodePtr.p->phase == ZINIT &&
3754 nodePtr.p->m_secret != 0 &&
3755 NdbTick_Compare(now,nodePtr.p->m_alloc_timeout) > 0)
3756 {
3757 jam();
3758 nodePtr.p->m_secret = 0;
3759 warningEvent("Releasing node id allocation for node %u",
3760 nodePtr.i);
3761 }
3762 }//for
3763 return;
3764 }//Qmgr::checkStartInterface()
3765
3766 /**-------------------------------------------------------------------------
3767 * This method is called when a DISCONNECT_REP signal arrived which means that
3768 * the API node is gone and we want to release resources in TC/DICT blocks.
3769 *---------------------------------------------------------------------------*/
sendApiFailReq(Signal * signal,Uint16 failedNodeNo,bool sumaOnly)3770 void Qmgr::sendApiFailReq(Signal* signal, Uint16 failedNodeNo, bool sumaOnly)
3771 {
3772 jamEntry();
3773 signal->theData[0] = failedNodeNo;
3774 signal->theData[1] = QMGR_REF;
3775
3776 /* We route the ApiFailReq signals via CMVMI
3777 * This is done to ensure that they are received after
3778 * any pending signals from the failed Api node when
3779 * running ndbmtd, as these signals would be enqueued from
3780 * the thread running CMVMI
3781 */
3782 Uint32 routedSignalSectionI = RNIL;
3783 ndbrequire(appendToSection(routedSignalSectionI,
3784 &signal->theData[0],
3785 2));
3786 SectionHandle handle(this, routedSignalSectionI);
3787
3788 /* RouteOrd data */
3789 RouteOrd* routeOrd = (RouteOrd*) &signal->theData[0];
3790 routeOrd->srcRef = reference();
3791 routeOrd->gsn = GSN_API_FAILREQ;
3792 routeOrd->from = failedNodeNo;
3793
3794 NodeRecPtr failedNodePtr;
3795 failedNodePtr.i = failedNodeNo;
3796 ptrCheckGuard(failedNodePtr, MAX_NODES, nodeRec);
3797 failedNodePtr.p->failState = WAITING_FOR_API_FAILCONF;
3798
3799
3800 /* Send ROUTE_ORD signals to CMVMI via JBA
3801 * CMVMI will then immediately send the API_FAILREQ
3802 * signals to the destination block(s) using JBB
3803 * These API_FAILREQ signals will be sent *after*
3804 * any JBB signals enqueued from the failed API
3805 * by the CMVMI thread.
3806 */
3807 if (!sumaOnly)
3808 {
3809 jam();
3810 add_failconf_block(failedNodePtr, DBTC);
3811 routeOrd->dstRef = DBTC_REF;
3812 sendSignalNoRelease(TRPMAN_REF, GSN_ROUTE_ORD, signal,
3813 RouteOrd::SignalLength,
3814 JBA, &handle);
3815
3816 add_failconf_block(failedNodePtr, DBDICT);
3817 routeOrd->dstRef = DBDICT_REF;
3818 sendSignalNoRelease(TRPMAN_REF, GSN_ROUTE_ORD, signal,
3819 RouteOrd::SignalLength,
3820 JBA, &handle);
3821
3822 add_failconf_block(failedNodePtr, DBSPJ);
3823 routeOrd->dstRef = DBSPJ_REF;
3824 sendSignalNoRelease(TRPMAN_REF, GSN_ROUTE_ORD, signal,
3825 RouteOrd::SignalLength,
3826 JBA, &handle);
3827 }
3828
3829 /* Suma always notified */
3830 add_failconf_block(failedNodePtr, SUMA);
3831 routeOrd->dstRef = SUMA_REF;
3832 sendSignal(TRPMAN_REF, GSN_ROUTE_ORD, signal,
3833 RouteOrd::SignalLength,
3834 JBA, &handle);
3835 }//Qmgr::sendApiFailReq()
3836
execAPI_FAILREQ(Signal * signal)3837 void Qmgr::execAPI_FAILREQ(Signal* signal)
3838 {
3839 jamEntry();
3840 NodeRecPtr failedNodePtr;
3841 failedNodePtr.i = signal->theData[0];
3842 // signal->theData[1] == QMGR_REF
3843 ptrCheckGuard(failedNodePtr, MAX_NODES, nodeRec);
3844
3845 ndbrequire(getNodeInfo(failedNodePtr.i).getType() != NodeInfo::DB);
3846
3847 api_failed(signal, signal->theData[0]);
3848 }
3849
execAPI_FAILCONF(Signal * signal)3850 void Qmgr::execAPI_FAILCONF(Signal* signal)
3851 {
3852 NodeRecPtr failedNodePtr;
3853
3854 jamEntry();
3855 failedNodePtr.i = signal->theData[0];
3856 ptrCheckGuard(failedNodePtr, MAX_NODES, nodeRec);
3857
3858 Uint32 block = refToMain(signal->theData[1]);
3859 if (failedNodePtr.p->failState != WAITING_FOR_API_FAILCONF ||
3860 !remove_failconf_block(failedNodePtr, block))
3861 {
3862 jam();
3863 ndbout << "execAPI_FAILCONF from " << block
3864 << " failedNodePtr.p->failState = "
3865 << (Uint32)(failedNodePtr.p->failState)
3866 << " blocks: ";
3867 for (Uint32 i = 0;i<NDB_ARRAY_SIZE(failedNodePtr.p->m_failconf_blocks);i++)
3868 {
3869 printf("%u ", failedNodePtr.p->m_failconf_blocks[i]);
3870 }
3871 ndbout << endl;
3872 systemErrorLab(signal, __LINE__);
3873 }//if
3874
3875 if (is_empty_failconf_block(failedNodePtr))
3876 {
3877 jam();
3878 /**
3879 * When we set this state, connection will later be opened
3880 * in checkStartInterface
3881 */
3882 failedNodePtr.p->failState = NORMAL;
3883
3884 /**
3885 * Reset m_version only after all blocks has responded with API_FAILCONF
3886 * so that no block risks reading 0 as node-version
3887 */
3888 setNodeInfo(failedNodePtr.i).m_version = 0;
3889 recompute_version_info(getNodeInfo(failedNodePtr.i).m_type);
3890 }
3891 return;
3892 }//Qmgr::execAPI_FAILCONF()
3893
3894 void
add_failconf_block(NodeRecPtr nodePtr,Uint32 block)3895 Qmgr::add_failconf_block(NodeRecPtr nodePtr, Uint32 block)
3896 {
3897 // Check that it does not already exists!!
3898 Uint32 pos = 0;
3899 for (; pos < NDB_ARRAY_SIZE(nodePtr.p->m_failconf_blocks); pos++)
3900 {
3901 jam();
3902 if (nodePtr.p->m_failconf_blocks[pos] == 0)
3903 {
3904 jam();
3905 break;
3906 }
3907 else if (nodePtr.p->m_failconf_blocks[pos] == block)
3908 {
3909 jam();
3910 break;
3911 }
3912 }
3913
3914 ndbrequire(pos != NDB_ARRAY_SIZE(nodePtr.p->m_failconf_blocks));
3915 ndbassert(nodePtr.p->m_failconf_blocks[pos] != block);
3916 if (nodePtr.p->m_failconf_blocks[pos] == block)
3917 {
3918 jam();
3919 /**
3920 * Already in list!!
3921 */
3922 #ifdef ERROR_INSERT
3923 ndbabort();
3924 #endif
3925 return;
3926 }
3927 ndbrequire(nodePtr.p->m_failconf_blocks[pos] == 0);
3928 nodePtr.p->m_failconf_blocks[pos] = block;
3929 }
3930
3931 bool
remove_failconf_block(NodeRecPtr nodePtr,Uint32 block)3932 Qmgr::remove_failconf_block(NodeRecPtr nodePtr, Uint32 block)
3933 {
3934 // Check that it does exists!!
3935 Uint32 pos = 0;
3936 for (; pos < NDB_ARRAY_SIZE(nodePtr.p->m_failconf_blocks); pos++)
3937 {
3938 jam();
3939 if (nodePtr.p->m_failconf_blocks[pos] == 0)
3940 {
3941 jam();
3942 break;
3943 }
3944 else if (nodePtr.p->m_failconf_blocks[pos] == block)
3945 {
3946 jam();
3947 break;
3948 }
3949 }
3950
3951 if (pos == NDB_ARRAY_SIZE(nodePtr.p->m_failconf_blocks) ||
3952 nodePtr.p->m_failconf_blocks[pos] != block)
3953 {
3954 jam();
3955 /**
3956 * Not found!!
3957 */
3958 return false;
3959 }
3960
3961 nodePtr.p->m_failconf_blocks[pos] = 0;
3962 for (pos++; pos < NDB_ARRAY_SIZE(nodePtr.p->m_failconf_blocks); pos++)
3963 {
3964 jam();
3965 nodePtr.p->m_failconf_blocks[pos - 1] = nodePtr.p->m_failconf_blocks[pos];
3966 }
3967
3968 return true;
3969 }
3970
3971 bool
is_empty_failconf_block(NodeRecPtr nodePtr) const3972 Qmgr::is_empty_failconf_block(NodeRecPtr nodePtr) const
3973 {
3974 return nodePtr.p->m_failconf_blocks[0] == 0;
3975 }
3976
execNDB_FAILCONF(Signal * signal)3977 void Qmgr::execNDB_FAILCONF(Signal* signal)
3978 {
3979 NodeRecPtr failedNodePtr;
3980 NodeRecPtr nodePtr;
3981
3982 jamEntry();
3983 failedNodePtr.i = signal->theData[0];
3984
3985 if (ERROR_INSERTED(930))
3986 {
3987 CLEAR_ERROR_INSERT_VALUE;
3988 infoEvent("Discarding NDB_FAILCONF for %u", failedNodePtr.i);
3989 return;
3990 }
3991
3992 ptrCheckGuard(failedNodePtr, MAX_NDB_NODES, nodeRec);
3993 if (failedNodePtr.p->failState == WAITING_FOR_NDB_FAILCONF)
3994 {
3995 g_eventLogger->info("Node %u has completed node fail handling",
3996 failedNodePtr.i);
3997 failedNodePtr.p->failState = NORMAL;
3998 }
3999 else
4000 {
4001 jam();
4002
4003 char buf[100];
4004 BaseString::snprintf(buf, 100,
4005 "Received NDB_FAILCONF for node %u with state: %d %d",
4006 failedNodePtr.i,
4007 failedNodePtr.p->phase,
4008 failedNodePtr.p->failState);
4009 progError(__LINE__, 0, buf);
4010 systemErrorLab(signal, __LINE__);
4011 }//if
4012
4013 if (cpresident == getOwnNodeId())
4014 {
4015 jam();
4016
4017 CRASH_INSERTION(936);
4018 }
4019
4020 /**
4021 * Reset node version only after all blocks has handled the failure
4022 * so that no block risks reading 0 as node version
4023 */
4024 setNodeInfo(failedNodePtr.i).m_version = 0;
4025 recompute_version_info(NodeInfo::DB);
4026
4027 /**
4028 * Prepare a NFCompleteRep and send to all connected API's
4029 * They can then abort all transaction waiting for response from
4030 * the failed node
4031 *
4032 * NOTE: This is sent from all nodes, as otherwise we would need
4033 * take-over if cpresident dies befor sending this
4034 */
4035 NFCompleteRep * const nfComp = (NFCompleteRep *)&signal->theData[0];
4036 nfComp->blockNo = QMGR_REF;
4037 nfComp->nodeId = getOwnNodeId();
4038 nfComp->failedNodeId = failedNodePtr.i;
4039
4040 jam();
4041 for (nodePtr.i = 1; nodePtr.i < MAX_NODES; nodePtr.i++)
4042 {
4043 ptrAss(nodePtr, nodeRec);
4044 if (nodePtr.p->phase == ZAPI_ACTIVE){
4045 jamLine(nodePtr.i);
4046 sendSignal(nodePtr.p->blockRef, GSN_NF_COMPLETEREP, signal,
4047 NFCompleteRep::SignalLength, JBB);
4048 }//if
4049 }//for
4050 return;
4051 }//Qmgr::execNDB_FAILCONF()
4052
4053 void
execNF_COMPLETEREP(Signal * signal)4054 Qmgr::execNF_COMPLETEREP(Signal* signal)
4055 {
4056 jamEntry();
4057 NFCompleteRep rep = *(NFCompleteRep*)signal->getDataPtr();
4058 if (rep.blockNo != DBTC)
4059 {
4060 jam();
4061 ndbassert(false);
4062 return;
4063 }
4064
4065 /**
4066 * This is a simple way of having ndbapi to get
4067 * earlier information that transactions can be aborted
4068 */
4069 signal->theData[0] = rep.failedNodeId;
4070 // The below entries are not used by NdbAPI.
4071 signal->theData[1] = reference();
4072 signal->theData[2] = 0; // Unknown failure number
4073 NodeRecPtr nodePtr;
4074 for (nodePtr.i = 1; nodePtr.i < MAX_NODES; nodePtr.i++)
4075 {
4076 ptrAss(nodePtr, nodeRec);
4077 if (nodePtr.p->phase == ZAPI_ACTIVE)
4078 {
4079 jamLine(nodePtr.i);
4080 sendSignal(nodePtr.p->blockRef, GSN_TAKE_OVERTCCONF, signal,
4081 TakeOverTcConf::SignalLength, JBB);
4082 }//if
4083 }//for
4084 return;
4085 }
4086
4087 /*******************************/
4088 /* DISCONNECT_REP */
4089 /*******************************/
4090 const char *lookupConnectionError(Uint32 err);
4091
execDISCONNECT_REP(Signal * signal)4092 void Qmgr::execDISCONNECT_REP(Signal* signal)
4093 {
4094 jamEntry();
4095 const DisconnectRep * const rep = (DisconnectRep *)&signal->theData[0];
4096 if (ERROR_INSERT_VALUE >= 951 && ERROR_INSERT_VALUE <= 960)
4097 {
4098 CRASH_INSERTION3();
4099 }
4100 const Uint32 nodeId = rep->nodeId;
4101 const Uint32 err = rep->err;
4102 const NodeInfo nodeInfo = getNodeInfo(nodeId);
4103 c_connectedNodes.clear(nodeId);
4104 DEB_STARTUP(("connectedNodes(%u) cleared", nodeId));
4105
4106 if (nodeInfo.getType() == NodeInfo::DB)
4107 {
4108 c_readnodes_nodes.clear(nodeId);
4109
4110 if (ERROR_INSERTED(942))
4111 {
4112 g_eventLogger->info("DISCONNECT_REP received from data node %u - crash insertion",
4113 nodeId);
4114 CRASH_INSERTION(942);
4115 }
4116 }
4117
4118 {
4119 NodeRecPtr disc_nodePtr;
4120 disc_nodePtr.i = nodeId;
4121 ptrCheckGuard(disc_nodePtr, MAX_NODES, nodeRec);
4122
4123 disc_nodePtr.p->m_is_activate_trp_ready_for_me = false;
4124 disc_nodePtr.p->m_is_activate_trp_ready_for_other = false;
4125 disc_nodePtr.p->m_is_multi_trp_setup = false;
4126 disc_nodePtr.p->m_is_freeze_thread_completed = false;
4127 disc_nodePtr.p->m_is_ready_to_switch_trp = false;
4128 disc_nodePtr.p->m_is_preparing_switch_trp = false;
4129 disc_nodePtr.p->m_is_using_multi_trp = false;
4130 disc_nodePtr.p->m_set_up_multi_trp_started = false;
4131 disc_nodePtr.p->m_used_num_multi_trps = 0;
4132 disc_nodePtr.p->m_multi_trp_blockref = 0;
4133 disc_nodePtr.p->m_check_multi_trp_connect_loop_count = 0;
4134 disc_nodePtr.p->m_num_activated_trps = 0;
4135 if (disc_nodePtr.p->m_is_in_same_nodegroup)
4136 {
4137 jam();
4138 DEB_MULTI_TRP(("Change neighbour node setup for node %u",
4139 disc_nodePtr.i));
4140 check_no_multi_trp(signal, disc_nodePtr.i);
4141 startChangeNeighbourNode();
4142 setNeighbourNode(disc_nodePtr.i);
4143 endChangeNeighbourNode();
4144 }
4145 }
4146
4147 NodeRecPtr nodePtr;
4148 nodePtr.i = getOwnNodeId();
4149 ptrCheckGuard(nodePtr, MAX_NODES, nodeRec);
4150
4151 char buf[100];
4152 if (nodeInfo.getType() == NodeInfo::DB &&
4153 getNodeState().startLevel < NodeState::SL_STARTED)
4154 {
4155 jam();
4156 CRASH_INSERTION(932);
4157 CRASH_INSERTION(938);
4158 CRASH_INSERTION(944);
4159 CRASH_INSERTION(946);
4160 BaseString::snprintf(buf, 100, "Node %u disconnected in phase: %u",
4161 nodeId,
4162 nodePtr.p->phase);
4163 progError(__LINE__, NDBD_EXIT_SR_OTHERNODEFAILED, buf);
4164 ndbabort();
4165 }
4166
4167 if (getNodeInfo(nodeId).getType() != NodeInfo::DB)
4168 {
4169 jam();
4170 api_failed(signal, nodeId);
4171 return;
4172 }
4173
4174 switch(nodePtr.p->phase){
4175 case ZRUNNING:
4176 jam();
4177 break;
4178 case ZINIT:
4179 ndbabort();
4180 case ZSTARTING:
4181 progError(__LINE__, NDBD_EXIT_CONNECTION_SETUP_FAILED,
4182 lookupConnectionError(err));
4183 case ZPREPARE_FAIL:
4184 ndbabort();
4185 case ZFAIL_CLOSING:
4186 ndbabort();
4187 case ZAPI_ACTIVATION_ONGOING:
4188 ndbabort();
4189 case ZAPI_ACTIVE:
4190 ndbabort();
4191 case ZAPI_INACTIVE:
4192 {
4193 BaseString::snprintf(buf, 100, "Node %u disconnected", nodeId);
4194 progError(__LINE__, NDBD_EXIT_SR_OTHERNODEFAILED, buf);
4195 }
4196 }
4197
4198 if (ERROR_INSERTED(939) && ERROR_INSERT_EXTRA == nodeId)
4199 {
4200 ndbout_c("Ignoring DISCONNECT_REP for node %u that was force disconnected",
4201 nodeId);
4202 CLEAR_ERROR_INSERT_VALUE;
4203 return;
4204 }
4205
4206 node_failed(signal, nodeId);
4207 }//DISCONNECT_REP
4208
node_failed(Signal * signal,Uint16 aFailedNode)4209 void Qmgr::node_failed(Signal* signal, Uint16 aFailedNode)
4210 {
4211 NodeRecPtr failedNodePtr;
4212 /**------------------------------------------------------------------------
4213 * A COMMUNICATION LINK HAS BEEN DISCONNECTED. WE MUST TAKE SOME ACTION
4214 * DUE TO THIS.
4215 *-----------------------------------------------------------------------*/
4216 failedNodePtr.i = aFailedNode;
4217 ptrCheckGuard(failedNodePtr, MAX_NODES, nodeRec);
4218 failedNodePtr.p->m_secret = 0; // Not yet Uint64(rand()) << 32 + rand();
4219
4220 ndbrequire(getNodeInfo(failedNodePtr.i).getType() == NodeInfo::DB);
4221
4222 /**---------------------------------------------------------------------
4223 * THE OTHER NODE IS AN NDB NODE, WE HANDLE IT AS IF A HEARTBEAT
4224 * FAILURE WAS DISCOVERED.
4225 *---------------------------------------------------------------------*/
4226 switch(failedNodePtr.p->phase){
4227 case ZRUNNING:
4228 jam();
4229 failReportLab(signal, aFailedNode, FailRep::ZLINK_FAILURE, getOwnNodeId());
4230 return;
4231 case ZSTARTING:
4232 /**
4233 * bug#42422
4234 * Force "real" failure handling
4235 */
4236 jam();
4237 DEB_MULTI_TRP(("Node %u in ZRUNNING, failedNode", failedNodePtr.i));
4238 failedNodePtr.p->phase = ZRUNNING;
4239 DEB_STARTUP(("4:phase(%u) = ZRUNNING", failedNodePtr.i));
4240 failReportLab(signal, aFailedNode, FailRep::ZLINK_FAILURE, getOwnNodeId());
4241 return;
4242 case ZFAIL_CLOSING: // Close already in progress
4243 jam();
4244 return;
4245 case ZPREPARE_FAIL: // PREP_FAIL already sent CLOSE_COMREQ
4246 jam();
4247 return;
4248 case ZINIT:
4249 {
4250 jam();
4251 /*---------------------------------------------------------------------*/
4252 // The other node is still not in the cluster but disconnected.
4253 // We must restart communication in three seconds.
4254 /*---------------------------------------------------------------------*/
4255 failedNodePtr.p->failState = NORMAL;
4256 failedNodePtr.p->phase = ZFAIL_CLOSING;
4257 DEB_STARTUP(("phase(%u) = ZFAIL_CLOSING", failedNodePtr.i));
4258 set_hb_count(failedNodePtr.i) = 0;
4259
4260 CloseComReqConf * const closeCom =
4261 (CloseComReqConf *)&signal->theData[0];
4262
4263 closeCom->xxxBlockRef = reference();
4264 closeCom->requestType = CloseComReqConf::RT_NO_REPLY;
4265 closeCom->failNo = 0;
4266 closeCom->noOfNodes = 1;
4267 closeCom->failedNodeId = failedNodePtr.i;
4268 sendSignal(TRPMAN_REF, GSN_CLOSE_COMREQ, signal,
4269 CloseComReqConf::SignalLength, JBB);
4270 return;
4271 }
4272 case ZAPI_ACTIVE: // Unexpected states handled in ::api_failed()
4273 ndbabort();
4274 case ZAPI_INACTIVE:
4275 ndbabort();
4276 case ZAPI_ACTIVATION_ONGOING:
4277 ndbabort();
4278 default:
4279 ndbabort(); // Unhandled state
4280 }//switch
4281
4282 return;
4283 }
4284
4285 void
execUPGRADE_PROTOCOL_ORD(Signal * signal)4286 Qmgr::execUPGRADE_PROTOCOL_ORD(Signal* signal)
4287 {
4288 const UpgradeProtocolOrd* ord = (UpgradeProtocolOrd*)signal->getDataPtr();
4289 switch(ord->type){
4290 case UpgradeProtocolOrd::UPO_ENABLE_MICRO_GCP:
4291 jam();
4292 m_micro_gcp_enabled = true;
4293 return;
4294 }
4295 }
4296
4297 void
api_failed(Signal * signal,Uint32 nodeId)4298 Qmgr::api_failed(Signal* signal, Uint32 nodeId)
4299 {
4300 NodeRecPtr failedNodePtr;
4301 /**------------------------------------------------------------------------
4302 * A COMMUNICATION LINK HAS BEEN DISCONNECTED. WE MUST TAKE SOME ACTION
4303 * DUE TO THIS.
4304 *-----------------------------------------------------------------------*/
4305 failedNodePtr.i = nodeId;
4306 ptrCheckGuard(failedNodePtr, MAX_NODES, nodeRec);
4307 failedNodePtr.p->m_secret = 0; // Not yet Uint64(rand()) << 32 + rand();
4308
4309 if (failedNodePtr.p->phase == ZFAIL_CLOSING)
4310 {
4311 /**
4312 * Failure handling already in progress
4313 */
4314 jam();
4315 return;
4316 }
4317
4318 ndbrequire(failedNodePtr.p->failState == NORMAL);
4319
4320 /* Send API_FAILREQ to peer QMGR blocks to allow them to disconnect
4321 * quickly
4322 * Local application blocks get API_FAILREQ once all pending signals
4323 * from the failed API have been processed.
4324 */
4325 signal->theData[0] = failedNodePtr.i;
4326 signal->theData[1] = QMGR_REF;
4327 NodeReceiverGroup rg(QMGR, c_clusterNodes);
4328 sendSignal(rg, GSN_API_FAILREQ, signal, 2, JBA);
4329
4330 /* Now ask CMVMI to disconnect the node */
4331 FailState initialState = (failedNodePtr.p->phase == ZAPI_ACTIVE) ?
4332 WAITING_FOR_CLOSECOMCONF_ACTIVE :
4333 WAITING_FOR_CLOSECOMCONF_NOTACTIVE;
4334
4335 failedNodePtr.p->failState = initialState;
4336 failedNodePtr.p->phase = ZFAIL_CLOSING;
4337 set_hb_count(failedNodePtr.i) = 0;
4338
4339 CloseComReqConf * const closeCom = (CloseComReqConf *)&signal->theData[0];
4340 closeCom->xxxBlockRef = reference();
4341 closeCom->requestType = CloseComReqConf::RT_API_FAILURE;
4342 closeCom->failNo = 0;
4343 closeCom->noOfNodes = 1;
4344 closeCom->failedNodeId = nodeId;
4345 ProcessInfo * processInfo = getProcessInfo(nodeId);
4346 if (processInfo)
4347 {
4348 processInfo->invalidate();
4349 }
4350 sendSignal(TRPMAN_REF, GSN_CLOSE_COMREQ, signal,
4351 CloseComReqConf::SignalLength, JBB);
4352 } // api_failed
4353
4354 /**--------------------------------------------------------------------------
4355 * AN API NODE IS REGISTERING. IF FOR THE FIRST TIME WE WILL ENABLE
4356 * COMMUNICATION WITH ALL NDB BLOCKS.
4357 *---------------------------------------------------------------------------*/
4358 /*******************************/
4359 /* API_REGREQ */
4360 /*******************************/
execAPI_REGREQ(Signal * signal)4361 void Qmgr::execAPI_REGREQ(Signal* signal)
4362 {
4363 jamEntry();
4364
4365 ApiRegReq* req = (ApiRegReq*)signal->getDataPtr();
4366 const Uint32 version = req->version;
4367 const BlockReference ref = req->ref;
4368
4369 Uint32 mysql_version = req->mysql_version;
4370
4371 NodeRecPtr apiNodePtr;
4372 apiNodePtr.i = refToNode(ref);
4373 ptrCheckGuard(apiNodePtr, MAX_NODES, nodeRec);
4374
4375 if (apiNodePtr.p->phase == ZFAIL_CLOSING)
4376 {
4377 jam();
4378 /**
4379 * This node is pending CLOSE_COM_CONF
4380 * ignore API_REGREQ
4381 */
4382 return;
4383 }
4384
4385 if (!c_connectedNodes.get(apiNodePtr.i))
4386 {
4387 jam();
4388 /**
4389 * We have not yet heard execCONNECT_REP
4390 * so ignore this until we do...
4391 */
4392 return;
4393 }
4394
4395 #if 0
4396 ndbout_c("Qmgr::execAPI_REGREQ: Recd API_REGREQ (NodeId=%d)", apiNodePtr.i);
4397 #endif
4398
4399 bool compatability_check;
4400 const char * extra = 0;
4401 NodeInfo::NodeType type= getNodeInfo(apiNodePtr.i).getType();
4402 switch(type){
4403 case NodeInfo::API:
4404 jam();
4405 compatability_check = ndbCompatible_ndb_api(NDB_VERSION, version);
4406 break;
4407 case NodeInfo::MGM:
4408 compatability_check = ndbCompatible_ndb_mgmt(NDB_VERSION, version);
4409 break;
4410 case NodeInfo::DB:
4411 case NodeInfo::INVALID:
4412 default:
4413 sendApiRegRef(signal, ref, ApiRegRef::WrongType);
4414 infoEvent("Invalid connection attempt with type %d", type);
4415 return;
4416 }
4417
4418 if (!ndbd_upgrade_ok(version))
4419 {
4420 compatability_check = false;
4421 }
4422
4423 if (!compatability_check) {
4424 jam();
4425 char buf[NDB_VERSION_STRING_BUF_SZ];
4426 infoEvent("Connection attempt from %s id=%d with %s "
4427 "incompatible with %s%s",
4428 type == NodeInfo::API ? "api or mysqld" : "management server",
4429 apiNodePtr.i,
4430 ndbGetVersionString(version, mysql_version, 0,
4431 buf,
4432 sizeof(buf)),
4433 NDB_VERSION_STRING,
4434 extra ? extra : "");
4435 apiNodePtr.p->phase = ZAPI_INACTIVE;
4436 sendApiRegRef(signal, ref, ApiRegRef::UnsupportedVersion);
4437 return;
4438 }
4439
4440 setNodeInfo(apiNodePtr.i).m_version = version;
4441 setNodeInfo(apiNodePtr.i).m_mysql_version = mysql_version;
4442 set_hb_count(apiNodePtr.i) = 0;
4443
4444 NodeState state = getNodeState();
4445 if (apiNodePtr.p->phase == ZAPI_INACTIVE)
4446 {
4447 apiNodePtr.p->blockRef = ref;
4448 if ((state.startLevel == NodeState::SL_STARTED ||
4449 state.getSingleUserMode() ||
4450 (state.startLevel == NodeState::SL_STARTING &&
4451 state.starting.startPhase >= 8)))
4452 {
4453 jam();
4454 /**----------------------------------------------------------------------
4455 * THE API NODE IS REGISTERING. WE WILL ACCEPT IT BY CHANGING STATE AND
4456 * SENDING A CONFIRM. We set state to ZAPI_ACTIVATION_ONGOING to ensure
4457 * that we don't send unsolicited API_REGCONF or other things before we
4458 * actually fully enabled the node for communicating with the new API
4459 * node. It also avoids sending NODE_FAILREP, NF_COMPLETEREP and
4460 * TAKE_OVERTCCONF even before the API_REGCONF is sent. We will get a
4461 * fresh state of the nodes in API_REGCONF which is sufficient, no need
4462 * to update the API before the API got the initial state.
4463 *----------------------------------------------------------------------*/
4464 apiNodePtr.p->phase = ZAPI_ACTIVATION_ONGOING;
4465 EnableComReq *enableComReq = (EnableComReq *)signal->getDataPtrSend();
4466 enableComReq->m_senderRef = reference();
4467 enableComReq->m_senderData = ENABLE_COM_API_REGREQ;
4468 enableComReq->m_enableNodeId = apiNodePtr.i;
4469 sendSignal(TRPMAN_REF, GSN_ENABLE_COMREQ, signal,
4470 EnableComReq::SignalLength, JBB);
4471 return;
4472 }
4473 /**
4474 * The node is in some kind of STOPPING state, so we send API_REGCONF even
4475 * though we've not enabled communication, if the API tries to send
4476 * anything to us anyways it will simply be ignored since only QMGR will
4477 * receive signals in this state. The API receives the node states, so it
4478 * should be able to discover what nodes that it is able to actually use.
4479 */
4480 }
4481
4482 sendApiRegConf(signal, apiNodePtr.i);
4483 }//Qmgr::execAPI_REGREQ()
4484
4485 void
handleEnableComApiRegreq(Signal * signal,Uint32 node)4486 Qmgr::handleEnableComApiRegreq(Signal *signal, Uint32 node)
4487 {
4488 NodeRecPtr apiNodePtr;
4489 NodeInfo::NodeType type = getNodeInfo(node).getType();
4490 Uint32 version = getNodeInfo(node).m_version;
4491 recompute_version_info(type, version);
4492
4493 signal->theData[0] = node;
4494 signal->theData[1] = version;
4495 NodeReceiverGroup rg(QMGR, c_clusterNodes);
4496 rg.m_nodes.clear(getOwnNodeId());
4497 sendSignal(rg, GSN_NODE_VERSION_REP, signal, 2, JBB);
4498
4499 signal->theData[0] = node;
4500 EXECUTE_DIRECT(NDBCNTR, GSN_API_START_REP, signal, 1);
4501
4502 apiNodePtr.i = node;
4503 ptrCheckGuard(apiNodePtr, MAX_NODES, nodeRec);
4504 if (apiNodePtr.p->phase == ZAPI_ACTIVATION_ONGOING)
4505 {
4506 /**
4507 * Now we're about to send API_REGCONF to an API node, this means
4508 * that this node can immediately start communicating to TC, SUMA
4509 * and so forth. The state also indicates that the API is ready
4510 * to receive an unsolicited API_REGCONF when the node goes to
4511 * state SL_STARTED.
4512 */
4513 jam();
4514 apiNodePtr.p->phase = ZAPI_ACTIVE;
4515 sendApiRegConf(signal, node);
4516 }
4517 jam();
4518 /**
4519 * Node is no longer in state ZAPI_ACTIVATION_ONGOING, the node must
4520 * have failed, we can ignore sending API_REGCONF to a failed node.
4521 */
4522 }
4523
4524 void
execNODE_STARTED_REP(Signal * signal)4525 Qmgr::execNODE_STARTED_REP(Signal *signal)
4526 {
4527 NodeRecPtr apiNodePtr;
4528 for (apiNodePtr.i = 1;
4529 apiNodePtr.i < MAX_NODES;
4530 apiNodePtr.i++)
4531 {
4532 ptrCheckGuard(apiNodePtr, MAX_NODES, nodeRec);
4533 NodeInfo::NodeType type = getNodeInfo(apiNodePtr.i).getType();
4534 if (type != NodeInfo::API)
4535 {
4536 /* Not an API node */
4537 continue;
4538 }
4539 if (!c_connectedNodes.get(apiNodePtr.i))
4540 {
4541 /* API not connected */
4542 continue;
4543 }
4544 if (apiNodePtr.p->phase != ZAPI_ACTIVE)
4545 {
4546 /**
4547 * The phase variable can be in three states for the API nodes, it can
4548 * be ZAPI_INACTIVE for an API node that hasn't connected, it can be
4549 * ZFAIL_CLOSING for an API node that recently failed and is performing
4550 * failure handling. It can be in the state ZAPI_ACTIVE which it enters
4551 * upon us receiving an API_REGREQ from the API. So at this point the
4552 * API is also able to receive an unsolicited API_REGCONF message.
4553 */
4554 continue;
4555 }
4556 /**
4557 * We will send an unsolicited API_REGCONF to the API node, this makes the
4558 * API node aware of our existence much faster (without it can wait up to
4559 * the lenght of a heartbeat DB-API period. For rolling restarts and other
4560 * similar actions this can easily cause the API to not have any usable
4561 * DB connections at all. This unsolicited response minimises this window
4562 * of unavailability to zero for all practical purposes.
4563 */
4564 sendApiRegConf(signal, apiNodePtr.i);
4565 }
4566 }
4567
4568 void
sendApiRegConf(Signal * signal,Uint32 node)4569 Qmgr::sendApiRegConf(Signal *signal, Uint32 node)
4570 {
4571 NodeRecPtr apiNodePtr;
4572 apiNodePtr.i = node;
4573 ptrCheckGuard(apiNodePtr, MAX_NODES, nodeRec);
4574 const BlockReference ref = apiNodePtr.p->blockRef;
4575 ndbassert(ref != 0);
4576
4577 ApiRegConf * const apiRegConf = (ApiRegConf *)&signal->theData[0];
4578 apiRegConf->qmgrRef = reference();
4579 apiRegConf->apiHeartbeatFrequency = (chbApiDelay / 10);
4580 apiRegConf->version = NDB_VERSION;
4581 apiRegConf->mysql_version = NDB_MYSQL_VERSION_D;
4582 apiRegConf->nodeState = getNodeState();
4583 {
4584 NodeRecPtr nodePtr;
4585 nodePtr.i = getOwnNodeId();
4586 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
4587 Uint32 dynamicId = nodePtr.p->ndynamicId;
4588
4589 if(apiRegConf->nodeState.masterNodeId != getOwnNodeId()){
4590 jam();
4591 apiRegConf->nodeState.dynamicId = dynamicId;
4592 } else {
4593 apiRegConf->nodeState.dynamicId = (Uint32)(-(Int32)dynamicId);
4594 }
4595 }
4596 NodeVersionInfo info = getNodeVersionInfo();
4597 apiRegConf->minDbVersion = info.m_type[NodeInfo::DB].m_min_version;
4598 apiRegConf->minApiVersion = info.m_type[NodeInfo::API].m_min_version;
4599 apiRegConf->nodeState.m_connected_nodes.assign(c_connectedNodes);
4600 sendSignal(ref, GSN_API_REGCONF, signal, ApiRegConf::SignalLength, JBB);
4601 }
4602
4603 void
sendVersionedDb(NodeReceiverGroup rg,GlobalSignalNumber gsn,Signal * signal,Uint32 length,JobBufferLevel jbuf,Uint32 minversion)4604 Qmgr::sendVersionedDb(NodeReceiverGroup rg,
4605 GlobalSignalNumber gsn,
4606 Signal* signal,
4607 Uint32 length,
4608 JobBufferLevel jbuf,
4609 Uint32 minversion)
4610 {
4611 jam();
4612 NodeVersionInfo info = getNodeVersionInfo();
4613 if (info.m_type[NodeInfo::DB].m_min_version >= minversion)
4614 {
4615 jam();
4616 sendSignal(rg, gsn, signal, length, jbuf);
4617 }
4618 else
4619 {
4620 jam();
4621 Uint32 i = 0, cnt = 0;
4622 while((i = rg.m_nodes.find(i + 1)) != NodeBitmask::NotFound)
4623 {
4624 jam();
4625 if (getNodeInfo(i).m_version >= minversion)
4626 {
4627 jam();
4628 cnt++;
4629 sendSignal(numberToRef(rg.m_block, i), gsn, signal, length, jbuf);
4630 }
4631 }
4632 ndbassert((cnt == 0 && rg.m_nodes.count() == 0) ||
4633 (cnt < rg.m_nodes.count()));
4634 }
4635 }
4636
4637 void
execAPI_VERSION_REQ(Signal * signal)4638 Qmgr::execAPI_VERSION_REQ(Signal * signal) {
4639 jamEntry();
4640 ApiVersionReq * const req = (ApiVersionReq *)signal->getDataPtr();
4641
4642 Uint32 senderRef = req->senderRef;
4643 Uint32 nodeId = req->nodeId;
4644
4645 ApiVersionConf * conf = (ApiVersionConf *)req;
4646 if(getNodeInfo(nodeId).m_connected)
4647 {
4648 conf->version = getNodeInfo(nodeId).m_version;
4649 conf->mysql_version = getNodeInfo(nodeId).m_mysql_version;
4650 struct in_addr in= globalTransporterRegistry.get_connect_address(nodeId);
4651 conf->m_inet_addr= in.s_addr;
4652 }
4653 else
4654 {
4655 conf->version = 0;
4656 conf->mysql_version = 0;
4657 conf->m_inet_addr= 0;
4658 }
4659 conf->nodeId = nodeId;
4660 conf->isSingleUser = (nodeId == getNodeState().getSingleUserApi());
4661 sendSignal(senderRef,
4662 GSN_API_VERSION_CONF,
4663 signal,
4664 ApiVersionConf::SignalLength, JBB);
4665 }
4666
4667 void
execNODE_VERSION_REP(Signal * signal)4668 Qmgr::execNODE_VERSION_REP(Signal* signal)
4669 {
4670 jamEntry();
4671 Uint32 nodeId = signal->theData[0];
4672 Uint32 version = signal->theData[1];
4673
4674 if (nodeId < MAX_NODES)
4675 {
4676 jam();
4677 Uint32 type = getNodeInfo(nodeId).m_type;
4678 setNodeInfo(nodeId).m_version = version;
4679 recompute_version_info(type, version);
4680 }
4681 }
4682
4683 void
recompute_version_info(Uint32 type,Uint32 version)4684 Qmgr::recompute_version_info(Uint32 type, Uint32 version)
4685 {
4686 NodeVersionInfo& info = setNodeVersionInfo();
4687 switch(type){
4688 case NodeInfo::DB:
4689 case NodeInfo::API:
4690 case NodeInfo::MGM:
4691 break;
4692 default:
4693 return;
4694 }
4695
4696 if (info.m_type[type].m_min_version == 0 ||
4697 version < info.m_type[type].m_min_version)
4698 info.m_type[type].m_min_version = version;
4699 if (version > info.m_type[type].m_max_version)
4700 info.m_type[type].m_max_version = version;
4701 }
4702
4703 void
recompute_version_info(Uint32 type)4704 Qmgr::recompute_version_info(Uint32 type)
4705 {
4706 switch(type){
4707 case NodeInfo::DB:
4708 case NodeInfo::API:
4709 case NodeInfo::MGM:
4710 break;
4711 default:
4712 return;
4713 }
4714
4715 Uint32 min = ~0, max = 0;
4716 Uint32 cnt = type == NodeInfo::DB ? MAX_NDB_NODES : MAX_NODES;
4717 for (Uint32 i = 1; i<cnt; i++)
4718 {
4719 if (getNodeInfo(i).m_type == type)
4720 {
4721 Uint32 version = getNodeInfo(i).m_version;
4722
4723 if (version)
4724 {
4725 if (version < min)
4726 min = version;
4727 if (version > max)
4728 max = version;
4729 }
4730 }
4731 }
4732
4733 NodeVersionInfo& info = setNodeVersionInfo();
4734 info.m_type[type].m_min_version = min == ~(Uint32)0 ? 0 : min;
4735 info.m_type[type].m_max_version = max;
4736 }
4737
4738 #if 0
4739 bool
4740 Qmgr::checkAPIVersion(NodeId nodeId,
4741 Uint32 apiVersion, Uint32 ownVersion) const {
4742 bool ret=true;
4743 /**
4744 * First implementation...
4745 */
4746 if ((getMajor(apiVersion) < getMajor(ownVersion) ||
4747 getMinor(apiVersion) < getMinor(ownVersion)) &&
4748 apiVersion >= API_UPGRADE_VERSION) {
4749 jam();
4750 if ( getNodeInfo(nodeId).getType() != NodeInfo::MGM ) {
4751 jam();
4752 ret = false;
4753 } else {
4754 jam();
4755 /* we have a software upgrade situation, mgmtsrvr should be
4756 * the highest, let him decide what to do
4757 */
4758 ;
4759 }
4760 }
4761 return ret;
4762 }
4763 #endif
4764
4765 void
sendApiRegRef(Signal * signal,Uint32 Tref,ApiRegRef::ErrorCode err)4766 Qmgr::sendApiRegRef(Signal* signal, Uint32 Tref, ApiRegRef::ErrorCode err){
4767 ApiRegRef* ref = (ApiRegRef*)signal->getDataPtrSend();
4768 ref->ref = reference();
4769 ref->version = NDB_VERSION;
4770 ref->mysql_version = NDB_MYSQL_VERSION_D;
4771 ref->errorCode = err;
4772 sendSignal(Tref, GSN_API_REGREF, signal, ApiRegRef::SignalLength, JBB);
4773 }
4774
4775 /**--------------------------------------------------------------------------
4776 * A NODE HAS BEEN DECLARED AS DOWN. WE WILL CLOSE THE COMMUNICATION TO THIS
4777 * NODE IF NOT ALREADY DONE. IF WE ARE PRESIDENT OR BECOMES PRESIDENT BECAUSE
4778 * OF A FAILED PRESIDENT THEN WE WILL TAKE FURTHER ACTION.
4779 *---------------------------------------------------------------------------*/
failReportLab(Signal * signal,Uint16 aFailedNode,FailRep::FailCause aFailCause,Uint16 sourceNode)4780 void Qmgr::failReportLab(Signal* signal, Uint16 aFailedNode,
4781 FailRep::FailCause aFailCause,
4782 Uint16 sourceNode)
4783 {
4784 NodeRecPtr nodePtr;
4785 NodeRecPtr failedNodePtr;
4786 NodeRecPtr myNodePtr;
4787
4788 failedNodePtr.i = aFailedNode;
4789 ptrCheckGuard(failedNodePtr, MAX_NDB_NODES, nodeRec);
4790 FailRep* rep = (FailRep*)signal->getDataPtr();
4791
4792 if (check_multi_node_shutdown(signal))
4793 {
4794 jam();
4795 return;
4796 }
4797
4798 if (isNodeConnectivitySuspect(sourceNode) &&
4799 // (! isNodeConnectivitySuspect(aFailedNode)) && // TODO : Required?
4800 ((aFailCause == FailRep::ZCONNECT_CHECK_FAILURE) ||
4801 (aFailCause == FailRep::ZLINK_FAILURE)))
4802 {
4803 jam();
4804 /* Connectivity related failure report from a node with suspect
4805 * connectivity, handle differently
4806 */
4807 ndbrequire(sourceNode != getOwnNodeId());
4808
4809 handleFailFromSuspect(signal,
4810 aFailCause,
4811 aFailedNode,
4812 sourceNode);
4813 return;
4814 }
4815
4816 if (failedNodePtr.i == getOwnNodeId()) {
4817 jam();
4818
4819 Uint32 code = NDBD_EXIT_NODE_DECLARED_DEAD;
4820 const char * msg = 0;
4821 // Message buffer for FailRep::ZPARTITIONED_CLUSTER
4822 static const Uint32 bitmaskTextLen = NdbNodeBitmask::TextLength + 1;
4823 char extra[2 * bitmaskTextLen + 30];
4824
4825 switch(aFailCause){
4826 case FailRep::ZOWN_FAILURE:
4827 msg = "Own failure";
4828 break;
4829 case FailRep::ZOTHER_NODE_WHEN_WE_START:
4830 case FailRep::ZOTHERNODE_FAILED_DURING_START:
4831 msg = "Other node died during start";
4832 break;
4833 case FailRep::ZIN_PREP_FAIL_REQ:
4834 msg = "Prep fail";
4835 break;
4836 case FailRep::ZSTART_IN_REGREQ:
4837 msg = "Start timeout";
4838 break;
4839 case FailRep::ZHEARTBEAT_FAILURE:
4840 msg = "Heartbeat failure";
4841 break;
4842 case FailRep::ZLINK_FAILURE:
4843 msg = "Connection failure";
4844 break;
4845 case FailRep::ZPARTITIONED_CLUSTER:
4846 {
4847 code = NDBD_EXIT_PARTITIONED_SHUTDOWN;
4848 char buf1[bitmaskTextLen], buf2[bitmaskTextLen];
4849 c_clusterNodes.getText(buf1);
4850 if (((signal->getLength()== FailRep::OrigSignalLength + FailRep::PartitionedExtraLength_v1) ||
4851 (signal->getLength()== FailRep::SignalLength + FailRep::PartitionedExtraLength_v1)) &&
4852 signal->header.theVerId_signalNumber == GSN_FAIL_REP)
4853 {
4854 jam();
4855 NdbNodeBitmask part;
4856 Uint32 senderRef = signal->getSendersBlockRef();
4857 Uint32 senderVersion = getNodeInfo(refToNode(senderRef)).m_version;
4858 if (signal->getNoOfSections() >= 1)
4859 {
4860 ndbrequire(ndbd_send_node_bitmask_in_section(senderVersion));
4861 SectionHandle handle(this, signal);
4862 SegmentedSectionPtr ptr;
4863 handle.getSection(ptr, 0);
4864
4865 ndbrequire(ptr.sz <= NdbNodeBitmask::Size);
4866 copy(part.rep.data, ptr);
4867
4868 releaseSections(handle);
4869 }
4870 else
4871 {
4872 part.assign(NdbNodeBitmask48::Size, rep->partitioned.partition_v1);
4873 }
4874 part.getText(buf2);
4875 BaseString::snprintf(extra, sizeof(extra),
4876 "Our cluster: %s other cluster: %s",
4877 buf1, buf2);
4878 }
4879 else
4880 {
4881 jam();
4882 BaseString::snprintf(extra, sizeof(extra),
4883 "Our cluster: %s", buf1);
4884 }
4885 msg = extra;
4886 break;
4887 }
4888 case FailRep::ZMULTI_NODE_SHUTDOWN:
4889 msg = "Multi node shutdown";
4890 break;
4891 case FailRep::ZCONNECT_CHECK_FAILURE:
4892 msg = "Connectivity check failure";
4893 break;
4894 case FailRep::ZFORCED_ISOLATION:
4895 msg = "Forced isolation";
4896 if (ERROR_INSERTED(942))
4897 {
4898 g_eventLogger->info("FAIL_REP FORCED_ISOLATION received from data node %u - ignoring.",
4899 sourceNode);
4900 /* Let's wait for remote disconnection */
4901 return;
4902 }
4903 break;
4904 default:
4905 msg = "<UNKNOWN>";
4906 }
4907
4908 CRASH_INSERTION(932);
4909 CRASH_INSERTION(938);
4910
4911 char buf[sizeof(extra) + 100];
4912 BaseString::snprintf(buf, sizeof(buf),
4913 "We(%u) have been declared dead by %u (via %u) reason: %s(%u)",
4914 getOwnNodeId(),
4915 sourceNode,
4916 refToNode(signal->getSendersBlockRef()),
4917 msg ? msg : "<Unknown>",
4918 aFailCause);
4919
4920 progError(__LINE__, code, buf);
4921 return;
4922 }//if
4923
4924 myNodePtr.i = getOwnNodeId();
4925 ptrCheckGuard(myNodePtr, MAX_NDB_NODES, nodeRec);
4926 if (myNodePtr.p->phase != ZRUNNING) {
4927 jam();
4928 systemErrorLab(signal, __LINE__);
4929 return;
4930 }//if
4931
4932 if (getNodeState().startLevel < NodeState::SL_STARTED)
4933 {
4934 jam();
4935 CRASH_INSERTION(932);
4936 CRASH_INSERTION(938);
4937 char buf[100];
4938 switch(aFailCause)
4939 {
4940 case FailRep::ZHEARTBEAT_FAILURE:
4941 BaseString::snprintf(buf, 100 ,"Node %d heartbeat failure",
4942 failedNodePtr.i);
4943 CRASH_INSERTION(947);
4944 break;
4945 default:
4946 BaseString::snprintf(buf, 100 , "Node %d failed",
4947 failedNodePtr.i);
4948 }
4949 progError(__LINE__, NDBD_EXIT_SR_OTHERNODEFAILED, buf);
4950 }
4951
4952 const NdbNodeBitmask TfailedNodes(cfailedNodes);
4953 failReport(signal, failedNodePtr.i, (UintR)ZTRUE, aFailCause, sourceNode);
4954
4955 /**
4956 * If any node is starting now (c_start.startNode != 0)
4957 * include it in nodes handled by sendPrepFailReq
4958 */
4959 if (c_start.m_startNode != 0)
4960 {
4961 jam();
4962 cfailedNodes.set(c_start.m_startNode);
4963 }
4964
4965 if (cpresident == getOwnNodeId()) {
4966 jam();
4967 if (ctoStatus == Q_NOT_ACTIVE) {
4968 jam();
4969 /**--------------------------------------------------------------------
4970 * AS PRESIDENT WE ARE REQUIRED TO START THE EXCLUSION PROCESS SUCH THAT
4971 * THE APPLICATION SEE NODE FAILURES IN A CONSISTENT ORDER.
4972 * IF WE HAVE BECOME PRESIDENT NOW (CTO_STATUS = ACTIVE) THEN WE HAVE
4973 * TO COMPLETE THE PREVIOUS COMMIT FAILED NODE PROCESS BEFORE STARTING
4974 * A NEW.
4975 * CTO_STATUS = ACTIVE CAN ALSO MEAN THAT WE ARE PRESIDENT AND ARE
4976 * CURRENTLY COMMITTING A SET OF NODE CRASHES. IN THIS CASE IT IS NOT
4977 * ALLOWED TO START PREPARING NEW NODE CRASHES.
4978 *---------------------------------------------------------------------*/
4979 if (!cfailedNodes.equal(TfailedNodes)) {
4980 jam();
4981 cfailureNr = cfailureNr + 1;
4982 for (nodePtr.i = 1;
4983 nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
4984 ptrAss(nodePtr, nodeRec);
4985 if (nodePtr.p->phase == ZRUNNING) {
4986 jamLine(nodePtr.i);
4987 sendPrepFailReq(signal, nodePtr.i);
4988 }//if
4989 }//for
4990 }//if
4991 }//if
4992 }
4993 return;
4994 }//Qmgr::failReportLab()
4995
4996 /**-------------------------------------------------------------------------
4997 * WE HAVE RECEIVED A PREPARE TO EXCLUDE A NUMBER OF NODES FROM THE CLUSTER.
4998 * WE WILL FIRST CHECK THAT WE HAVE NOT ANY MORE NODES THAT
4999 * WE ALSO HAVE EXCLUDED
5000 *--------------------------------------------------------------------------*/
5001 /*******************************/
5002 /* PREP_FAILREQ */
5003 /*******************************/
execPREP_FAILREQ(Signal * signal)5004 void Qmgr::execPREP_FAILREQ(Signal* signal)
5005 {
5006 NodeRecPtr myNodePtr;
5007 PrepFailReqRef * const prepFail = (PrepFailReqRef *)&signal->theData[0];
5008 BlockReference Tblockref = prepFail->xxxBlockRef;
5009 Uint16 TfailureNr = prepFail->failNo;
5010 Uint32 senderRef = signal->getSendersBlockRef();
5011 Uint32 senderVersion = getNodeInfo(refToNode(senderRef)).m_version;
5012
5013 jamEntry();
5014
5015 NdbNodeBitmask nodes;
5016 if (signal->getNoOfSections() >= 1)
5017 {
5018 jam();
5019 ndbrequire(ndbd_send_node_bitmask_in_section(senderVersion));
5020 SectionHandle handle(this, signal);
5021 SegmentedSectionPtr ptr;
5022 handle.getSection(ptr, 0);
5023 ndbrequire(ptr.sz <= NdbNodeBitmask::Size);
5024 copy(nodes.rep.data, ptr);
5025 releaseSections(handle);
5026 }
5027 else
5028 {
5029 jam();
5030 nodes.assign(NdbNodeBitmask48::Size, prepFail->theNodes);
5031 }
5032
5033 // Clear 'c_start.m_startNode' if it failed.
5034 if (nodes.get(c_start.m_startNode))
5035 {
5036 jam();
5037 DEB_STARTUP(("Clear c_start.m_startNode"));
5038 c_start.reset();
5039 }
5040 if (c_start.m_gsn == GSN_CM_NODEINFOCONF)
5041 {
5042 Uint32 nodeId;
5043 jam();
5044 /**
5045 * This is a very unusual event we are looking for, but still required
5046 * to be handled. The starting node has connected to the president and
5047 * managed to start the node inclusion protocol. We received an indication
5048 * of this from the president. The starting node now however fails before
5049 * it connected to us, so we need to clear the indication of that we
5050 * received CM_ADD(Prepare) from president since this belonged to an
5051 * already cancelled node restart.
5052 */
5053 for (nodeId = 1; nodeId < MAX_NDB_NODES; nodeId++)
5054 {
5055 if (c_start.m_nodes.isWaitingFor(nodeId) &&
5056 nodes.get(nodeId))
5057 {
5058 jamLine(nodeId);
5059 /* Found such a condition as described above, clear state */
5060 c_start.m_gsn = RNIL;
5061 c_start.m_nodes.clearWaitingFor();
5062 break;
5063 }
5064 }
5065 }
5066
5067
5068 if (check_multi_node_shutdown(signal))
5069 {
5070 jam();
5071 return;
5072 }
5073
5074 if (ERROR_INSERTED(941) &&
5075 getOwnNodeId() == 4 &&
5076 nodes.get(2))
5077 {
5078 /* Insert ERROR_INSERT crash */
5079 CRASH_INSERTION(941);
5080 }
5081
5082 cprepFailedNodes.assign(nodes);
5083 ndbassert(prepFail->noOfNodes == cprepFailedNodes.count());
5084
5085 /**
5086 * Block commit until node failures has stabilized
5087 *
5088 * @See RT352
5089 */
5090 BlockCommitOrd* const block = (BlockCommitOrd *)&signal->theData[0];
5091 block->failNo = TfailureNr;
5092 EXECUTE_DIRECT(DBDIH, GSN_BLOCK_COMMIT_ORD, signal,
5093 BlockCommitOrd::SignalLength);
5094
5095 myNodePtr.i = getOwnNodeId();
5096 ptrCheckGuard(myNodePtr, MAX_NDB_NODES, nodeRec);
5097 if (myNodePtr.p->phase != ZRUNNING) {
5098 jam();
5099 systemErrorLab(signal, __LINE__);
5100 return;
5101 }//if
5102
5103 if (getNodeState().startLevel < NodeState::SL_STARTED)
5104 {
5105 jam();
5106 CRASH_INSERTION(932);
5107 CRASH_INSERTION(938);
5108 char buf[100];
5109 BaseString::snprintf(buf, 100, "Node failure during restart");
5110 progError(__LINE__, NDBD_EXIT_SR_OTHERNODEFAILED, buf);
5111 }
5112
5113 for (unsigned nodeId = 1; nodeId < MAX_NDB_NODES; nodeId++)
5114 {
5115 if (cprepFailedNodes.get(nodeId))
5116 {
5117 jam();
5118 failReport(signal,
5119 nodeId,
5120 (UintR)ZFALSE,
5121 FailRep::ZIN_PREP_FAIL_REQ,
5122 0); /* Source node not required (or known) here */
5123 }//if
5124 }//for
5125 sendCloseComReq(signal, Tblockref, TfailureNr);
5126 ccommitFailedNodes.clear();
5127 cprepareFailureNr = TfailureNr;
5128 return;
5129 }//Qmgr::execPREP_FAILREQ()
5130
5131
handleApiCloseComConf(Signal * signal)5132 void Qmgr::handleApiCloseComConf(Signal* signal)
5133 {
5134 jam();
5135 CloseComReqConf * const closeCom = (CloseComReqConf *)&signal->theData[0];
5136
5137 Uint32 nodeId = closeCom->failedNodeId;
5138 /* Api failure special case */
5139 /* Check that *only* 1 *API* node is included in
5140 * this CLOSE_COM_CONF
5141 */
5142 ndbrequire(getNodeInfo(nodeId).getType() != NodeInfo::DB);
5143 ndbrequire(closeCom->noOfNodes == 1);
5144
5145 /* Now that we know communication from the failed Api has
5146 * ceased, we can send the required API_FAILREQ signals
5147 * and continue API failure handling
5148 */
5149 NodeRecPtr failedNodePtr;
5150 failedNodePtr.i = nodeId;
5151 ptrCheckGuard(failedNodePtr, MAX_NODES, nodeRec);
5152
5153 ndbrequire((failedNodePtr.p->failState ==
5154 WAITING_FOR_CLOSECOMCONF_ACTIVE) ||
5155 (failedNodePtr.p->failState ==
5156 WAITING_FOR_CLOSECOMCONF_NOTACTIVE));
5157
5158 if (failedNodePtr.p->failState == WAITING_FOR_CLOSECOMCONF_ACTIVE)
5159 {
5160 /**
5161 * Inform application blocks TC, DICT, SUMA etc.
5162 */
5163 jam();
5164 sendApiFailReq(signal, nodeId, false); // !sumaOnly
5165 if(arbitRec.node == nodeId)
5166 {
5167 arbitRec.code = ArbitCode::ApiFail;
5168 handleArbitApiFail(signal, nodeId);
5169 }
5170 }
5171 else
5172 {
5173 /**
5174 * Always inform SUMA
5175 */
5176 jam();
5177 sendApiFailReq(signal, nodeId, true); // sumaOnly
5178 }
5179
5180 if (getNodeInfo(failedNodePtr.i).getType() == NodeInfo::MGM)
5181 {
5182 /**
5183 * Allow MGM do reconnect "directly"
5184 */
5185 jam();
5186 set_hb_count(failedNodePtr.i) = 3;
5187 }
5188
5189 /* Handled the single API node failure */
5190 return;
5191 }
5192
5193 /**---------------------------------------------------------------------------
5194 * THE CRASHED NODES HAS BEEN EXCLUDED FROM COMMUNICATION.
5195 * WE WILL CHECK WHETHER ANY MORE NODES HAVE FAILED DURING THE PREPARE PROCESS.
5196 * IF SO WE WILL REFUSE THE PREPARE PHASE AND EXPECT A NEW PREPARE MESSAGE
5197 * WITH ALL FAILED NODES INCLUDED.
5198 *---------------------------------------------------------------------------*/
5199 /*******************************/
5200 /* CLOSE_COMCONF */
5201 /*******************************/
execCLOSE_COMCONF(Signal * signal)5202 void Qmgr::execCLOSE_COMCONF(Signal* signal)
5203 {
5204 jamEntry();
5205
5206 CloseComReqConf * const closeCom = (CloseComReqConf *)&signal->theData[0];
5207
5208 Uint32 requestType = closeCom->requestType;
5209
5210 if (requestType == CloseComReqConf::RT_API_FAILURE)
5211 {
5212 jam();
5213 if (ERROR_INSERTED(945))
5214 {
5215 if (arbitRec.code != ArbitCode::WinChoose)
5216 {
5217 // Delay API failure handling until arbitration in WinChoose
5218 sendSignalWithDelay(reference(),
5219 GSN_CLOSE_COMCONF,
5220 signal,
5221 10,
5222 signal->getLength());
5223 return;
5224 }
5225 CLEAR_ERROR_INSERT_VALUE;
5226 }
5227 handleApiCloseComConf(signal);
5228 return;
5229 }
5230
5231 /* Normal node failure preparation path */
5232 ndbassert(requestType == CloseComReqConf::RT_NODE_FAILURE);
5233 BlockReference Tblockref = closeCom->xxxBlockRef;
5234 Uint16 TfailureNr = closeCom->failNo;
5235
5236 if (TfailureNr != cprepareFailureNr)
5237 {
5238 /**
5239 * A new PREP_FAILREQ was already started, so ignore this
5240 * one, we will soon enough be here again for the new
5241 * failure and respond to this one instead. If we were to
5242 * send something, it would be ignored by President anyways.
5243 */
5244 jam();
5245 return;
5246 }
5247
5248 UintR tprepFailConf = ZTRUE;
5249
5250 /* Check whether the set of nodes which have had communications
5251 * closed is the same as the set of failed nodes.
5252 * If it is, we can confirm the PREP_FAIL phase for this set
5253 * of nodes to the President.
5254 * If it is not, we Refuse the PREP_FAIL phase for this set
5255 * of nodes, the President will start a new PREP_FAIL phase
5256 * for the new set.
5257 */
5258 if (!cprepFailedNodes.contains(cfailedNodes)) {
5259 /* Failed node(s) is missing from the set, we will not
5260 * confirm this Prepare_Fail phase.
5261 * Store the node id in the array for later.
5262 */
5263 jam();
5264 tprepFailConf = ZFALSE;
5265 cprepFailedNodes.bitOR(cfailedNodes);
5266 }//if
5267 if (tprepFailConf == ZFALSE) {
5268 jam();
5269 /* Inform President that we cannot confirm the PREP_FAIL
5270 * phase as we are aware of at least one other node
5271 * failure
5272 */
5273 cfailedNodes = cprepFailedNodes;
5274
5275 sendPrepFailReqRef(signal,
5276 Tblockref,
5277 GSN_PREP_FAILREF,
5278 reference(),
5279 TfailureNr,
5280 cprepFailedNodes);
5281 } else {
5282 /* We have prepared the failure of the requested nodes
5283 * send confirmation to the president
5284 */
5285 jam();
5286 ccommitFailedNodes = cprepFailedNodes;
5287
5288 signal->theData[0] = getOwnNodeId();
5289 signal->theData[1] = TfailureNr;
5290 sendSignal(Tblockref, GSN_PREP_FAILCONF, signal, 2, JBA);
5291 }//if
5292 return;
5293 }//Qmgr::execCLOSE_COMCONF()
5294
5295 /*---------------------------------------------------------------------------*/
5296 /* WE HAVE RECEIVED A CONFIRM OF THAT THIS NODE HAVE PREPARED THE FAILURE. */
5297 /*---------------------------------------------------------------------------*/
5298 /*******************************/
5299 /* PREP_FAILCONF */
5300 /*******************************/
execPREP_FAILCONF(Signal * signal)5301 void Qmgr::execPREP_FAILCONF(Signal* signal)
5302 {
5303 NodeRecPtr nodePtr;
5304 NodeRecPtr replyNodePtr;
5305 jamEntry();
5306 replyNodePtr.i = signal->theData[0];
5307 Uint16 TfailureNr = signal->theData[1];
5308 if (TfailureNr != cfailureNr) {
5309 jam();
5310 /**----------------------------------------------------------------------
5311 * WE HAVE ALREADY STARTING A NEW ATTEMPT TO EXCLUDE A NUMBER OF NODES.
5312 * IGNORE
5313 *----------------------------------------------------------------------*/
5314 return;
5315 }//if
5316 ptrCheckGuard(replyNodePtr, MAX_NDB_NODES, nodeRec);
5317 replyNodePtr.p->sendPrepFailReqStatus = Q_NOT_ACTIVE;
5318 for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
5319 ptrAss(nodePtr, nodeRec);
5320 if (nodePtr.p->phase == ZRUNNING) {
5321 if (nodePtr.p->sendPrepFailReqStatus == Q_ACTIVE) {
5322 jamLine(nodePtr.i);
5323 return;
5324 }//if
5325 }//if
5326 }//for
5327 /**
5328 * Check node count and groups and invoke arbitrator if necessary.
5329 * Continues via sendCommitFailReq() if successful.
5330 */
5331 arbitRec.failureNr = cfailureNr;
5332 const NodeState & s = getNodeState();
5333 if(s.startLevel == NodeState::SL_STOPPING_3 &&
5334 s.stopping.systemShutdown)
5335 {
5336 jam();
5337 /**
5338 * We're performing a system shutdown,
5339 * don't let arbitrator shut us down
5340 */
5341 return;
5342 }
5343
5344 switch(arbitRec.method){
5345 case ArbitRec::DISABLED:
5346 jam();
5347 // No arbitration -> immediately commit the failed nodes
5348 sendCommitFailReq(signal);
5349 break;
5350
5351 case ArbitRec::METHOD_EXTERNAL:
5352 case ArbitRec::METHOD_DEFAULT:
5353 jam();
5354 handleArbitCheck(signal);
5355 break;
5356
5357 }
5358 return;
5359 }//Qmgr::execPREP_FAILCONF()
5360
5361 void
sendCommitFailReq(Signal * signal)5362 Qmgr::sendCommitFailReq(Signal* signal)
5363 {
5364 NodeRecPtr nodePtr;
5365 jam();
5366 if (arbitRec.failureNr != cfailureNr) {
5367 jam();
5368 /**----------------------------------------------------------------------
5369 * WE HAVE ALREADY STARTING A NEW ATTEMPT TO EXCLUDE A NUMBER OF NODES.
5370 * IGNORE
5371 *----------------------------------------------------------------------*/
5372 return;
5373 }//if
5374 /**-----------------------------------------------------------------------
5375 * WE HAVE SUCCESSFULLY PREPARED A SET OF NODE FAILURES. WE WILL NOW COMMIT
5376 * THESE NODE FAILURES.
5377 *-------------------------------------------------------------------------*/
5378 for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
5379 ptrAss(nodePtr, nodeRec);
5380
5381 #ifdef ERROR_INSERT
5382 if (false && ERROR_INSERTED(935) && nodePtr.i == c_error_insert_extra)
5383 {
5384 ndbout_c("skipping node %d", c_error_insert_extra);
5385 CLEAR_ERROR_INSERT_VALUE;
5386 signal->theData[0] = 9999;
5387 sendSignalWithDelay(CMVMI_REF, GSN_NDB_TAMPER, signal, 1000, 1);
5388 continue;
5389 }
5390 #endif
5391
5392 if (nodePtr.p->phase == ZRUNNING) {
5393 jamLine(nodePtr.i);
5394 nodePtr.p->sendCommitFailReqStatus = Q_ACTIVE;
5395 signal->theData[0] = cpdistref;
5396 signal->theData[1] = cfailureNr;
5397 sendSignal(nodePtr.p->blockRef, GSN_COMMIT_FAILREQ, signal, 2, JBA);
5398 }//if
5399 }//for
5400 ctoStatus = Q_ACTIVE;
5401 cfailedNodes.clear();
5402 return;
5403 }//sendCommitFailReq()
5404
5405 /*---------------------------------------------------------------------------*/
5406 /* SOME NODE HAVE DISCOVERED A NODE FAILURE THAT WE HAVE NOT YET DISCOVERED. */
5407 /* WE WILL START ANOTHER ROUND OF PREPARING A SET OF NODE FAILURES. */
5408 /*---------------------------------------------------------------------------*/
5409 /*******************************/
5410 /* PREP_FAILREF */
5411 /*******************************/
execPREP_FAILREF(Signal * signal)5412 void Qmgr::execPREP_FAILREF(Signal* signal)
5413 {
5414 NodeRecPtr nodePtr;
5415 jamEntry();
5416
5417 PrepFailReqRef * const prepFail = (PrepFailReqRef *)&signal->theData[0];
5418
5419 Uint16 TfailureNr = prepFail->failNo;
5420 cprepFailedNodes.clear();
5421
5422 if(signal->getNoOfSections() >= 1)
5423 {
5424 jam();
5425 Uint32 senderRef = signal->getSendersBlockRef();
5426 Uint32 senderVersion = getNodeInfo(refToNode(senderRef)).m_version;
5427 ndbrequire(ndbd_send_node_bitmask_in_section(senderVersion));
5428 SegmentedSectionPtr ptr;
5429 SectionHandle handle(this, signal);
5430 handle.getSection(ptr, 0);
5431 ndbrequire(ptr.sz <= NdbNodeBitmask::Size);
5432 copy(cprepFailedNodes.rep.data, ptr);
5433 releaseSections(handle);
5434 }
5435 else
5436 {
5437 jam();
5438 cprepFailedNodes.assign(NdbNodeBitmask48::Size, prepFail->theNodes);
5439 }
5440 ndbassert(prepFail->noOfNodes == cprepFailedNodes.count());
5441
5442 if (TfailureNr != cfailureNr) {
5443 jam();
5444 /**---------------------------------------------------------------------
5445 * WE HAVE ALREADY STARTING A NEW ATTEMPT TO EXCLUDE A NUMBER OF NODES.
5446 * IGNORE
5447 *----------------------------------------------------------------------*/
5448 return;
5449 }//if
5450
5451 cfailedNodes = cprepFailedNodes;
5452
5453 cfailureNr = cfailureNr + 1;
5454 // Failure number may not wrap
5455 ndbrequire(cfailureNr != 0);
5456 for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
5457 ptrAss(nodePtr, nodeRec);
5458 if (nodePtr.p->phase == ZRUNNING) {
5459 jamLine(nodePtr.i);
5460 sendPrepFailReq(signal, nodePtr.i);
5461 }//if
5462 }//for
5463 return;
5464 }//Qmgr::execPREP_FAILREF()
5465
5466 /*---------------------------------------------------------------------------*/
5467 /* THE PRESIDENT IS NOW COMMITTING THE PREVIOUSLY PREPARED NODE FAILURE. */
5468 /*---------------------------------------------------------------------------*/
5469 /***********************/
5470 /* COMMIT_FAILREQ */
5471 /***********************/
execCOMMIT_FAILREQ(Signal * signal)5472 void Qmgr::execCOMMIT_FAILREQ(Signal* signal)
5473 {
5474 NodeRecPtr nodePtr;
5475 jamEntry();
5476
5477 CRASH_INSERTION(935);
5478
5479 BlockReference Tblockref = signal->theData[0];
5480 UintR TfailureNr = signal->theData[1];
5481 if (Tblockref != cpdistref) {
5482 jam();
5483 return;
5484 }//if
5485
5486 /**
5487 * Block commit until node failures has stabilized
5488 *
5489 * @See RT352
5490 */
5491 UnblockCommitOrd* const unblock = (UnblockCommitOrd *)&signal->theData[0];
5492 unblock->failNo = TfailureNr;
5493 EXECUTE_DIRECT(DBDIH, GSN_UNBLOCK_COMMIT_ORD, signal,
5494 UnblockCommitOrd::SignalLength);
5495
5496 if ((ccommitFailureNr != TfailureNr) &&
5497 (!ccommitFailedNodes.isclear()))
5498 {
5499 jam();
5500 /**-----------------------------------------------------------------------
5501 * WE ONLY DO THIS PART OF THE COMMIT HANDLING THE FIRST TIME WE HEAR THIS
5502 * SIGNAL. WE CAN HEAR IT SEVERAL TIMES IF THE PRESIDENTS KEEP FAILING.
5503 *-----------------------------------------------------------------------*/
5504 ccommitFailureNr = TfailureNr;
5505
5506 Uint32 nodeFailIndex = TfailureNr % MAX_DATA_NODE_FAILURES;
5507 NodeFailRec* TnodeFailRec = &nodeFailRec[nodeFailIndex];
5508 ndbrequire(TnodeFailRec->president == 0);
5509 TnodeFailRec->failureNr = TfailureNr;
5510 TnodeFailRec->president = cpresident;
5511 TnodeFailRec->nodes = ccommitFailedNodes;
5512
5513 SyncThreadViaReqConf* syncReq =(SyncThreadViaReqConf*)&signal->theData[0];
5514 syncReq->senderRef = reference();
5515 syncReq->senderData = TfailureNr;
5516 syncReq->actionType = SyncThreadViaReqConf::FOR_NODE_FAILREP;
5517 sendSignal(TRPMAN_REF, GSN_SYNC_THREAD_VIA_REQ, signal,
5518 SyncThreadViaReqConf::SignalLength, JBA);
5519
5520 /**--------------------------------------------------------------------
5521 * WE MUST PREPARE TO ACCEPT THE CRASHED NODE INTO THE CLUSTER AGAIN BY
5522 * SETTING UP CONNECTIONS AGAIN AFTER THREE SECONDS OF DELAY.
5523 *--------------------------------------------------------------------*/
5524 for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
5525 if (ccommitFailedNodes.get(nodePtr.i)) {
5526 jamLine(nodePtr.i);
5527 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
5528 nodePtr.p->phase = ZFAIL_CLOSING;
5529 DEB_STARTUP(("2: phase(%u) = ZFAIL_CLOSING", nodePtr.i));
5530 nodePtr.p->failState = WAITING_FOR_NDB_FAILCONF;
5531 set_hb_count(nodePtr.i) = 0;
5532 c_clusterNodes.clear(nodePtr.i);
5533 }//if
5534 }//for
5535
5536 /*----------------------------------------------------------------------*/
5537 /* WE INFORM THE API'S WE HAVE CONNECTED ABOUT THE FAILED NODES. */
5538 /*----------------------------------------------------------------------*/
5539 LinearSectionPtr lsptr[3];
5540 lsptr->p = TnodeFailRec->nodes.rep.data;
5541 lsptr->sz = TnodeFailRec->nodes.getPackedLengthInWords();
5542
5543 for (nodePtr.i = 1; nodePtr.i < MAX_NODES; nodePtr.i++) {
5544 ptrAss(nodePtr, nodeRec);
5545 if (nodePtr.p->phase == ZAPI_ACTIVE) {
5546 jamLine(nodePtr.i);
5547
5548 NodeFailRep * const nodeFail = (NodeFailRep *)&signal->theData[0];
5549
5550 nodeFail->failNo = ccommitFailureNr;
5551 nodeFail->noOfNodes = ccommitFailedNodes.count();
5552
5553 if (ndbd_send_node_bitmask_in_section(
5554 getNodeInfo(refToNode(nodePtr.p->blockRef)).m_version))
5555 {
5556 sendSignal(nodePtr.p->blockRef, GSN_NODE_FAILREP, signal,
5557 NodeFailRep::SignalLength, JBB, lsptr, 1);
5558 }
5559 else if (lsptr->sz <= NdbNodeBitmask48::Size)
5560 {
5561 TnodeFailRec->nodes.copyto(NdbNodeBitmask48::Size,
5562 nodeFail->theNodes);
5563 sendSignal(nodePtr.p->blockRef, GSN_NODE_FAILREP, signal,
5564 NodeFailRep::SignalLength_v1, JBB);
5565 }
5566 else
5567 {
5568 ndbabort();
5569 }
5570 }//if
5571 }//for
5572
5573 /**
5574 * Remove committed nodes from failed/prepared
5575 */
5576 cfailedNodes.bitANDC(ccommitFailedNodes);
5577 cprepFailedNodes.bitANDC(ccommitFailedNodes);
5578 ccommitFailedNodes.clear();
5579 }//if
5580 /**-----------------------------------------------------------------------
5581 * WE WILL ALWAYS ACKNOWLEDGE THE COMMIT EVEN WHEN RECEIVING IT MULTIPLE
5582 * TIMES SINCE IT WILL ALWAYS COME FROM A NEW PRESIDENT.
5583 *------------------------------------------------------------------------*/
5584 signal->theData[0] = getOwnNodeId();
5585 sendSignal(Tblockref, GSN_COMMIT_FAILCONF, signal, 1, JBA);
5586 return;
5587 }//Qmgr::execCOMMIT_FAILREQ()
5588
execSYNC_THREAD_VIA_CONF(Signal * signal)5589 void Qmgr::execSYNC_THREAD_VIA_CONF(Signal* signal)
5590 {
5591 const SyncThreadViaReqConf* syncConf =
5592 (const SyncThreadViaReqConf*)&signal->theData[0];
5593 if (syncConf->actionType == SyncThreadViaReqConf::FOR_NODE_FAILREP)
5594 {
5595 jam();
5596 const Uint32 index = syncConf->senderData % MAX_DATA_NODE_FAILURES;
5597 NodeFailRec* TnodeFailRec = &nodeFailRec[index];
5598 ndbrequire(TnodeFailRec->president != 0);
5599 ndbrequire(TnodeFailRec->nodes.count() != 0);
5600 NodeFailRep* nodeFail = (NodeFailRep*)&signal->theData[0];
5601 nodeFail->failNo = TnodeFailRec->failureNr;
5602 nodeFail->masterNodeId = TnodeFailRec->president;
5603 nodeFail->noOfNodes = TnodeFailRec->nodes.count();
5604
5605 LinearSectionPtr lsptr[3];
5606 lsptr->p = TnodeFailRec->nodes.rep.data;
5607 lsptr->sz = TnodeFailRec->nodes.getPackedLengthInWords();
5608
5609 TnodeFailRec->president = 0; // Mark entry as unused.
5610
5611 if (ERROR_INSERTED(936))
5612 {
5613 SectionHandle handle(this);
5614 ndbrequire(import(handle.m_ptr[0], lsptr[0].p, lsptr[0].sz));
5615 handle.m_cnt = 1;
5616 sendSignalWithDelay(NDBCNTR_REF, GSN_NODE_FAILREP, signal,
5617 200, NodeFailRep::SignalLength, &handle);
5618 releaseSections(handle);
5619 }
5620 else
5621 {
5622 sendSignal(NDBCNTR_REF, GSN_NODE_FAILREP, signal,
5623 NodeFailRep::SignalLength, JBA, lsptr, 1);
5624 }
5625 }
5626 else if (syncConf->actionType == SyncThreadViaReqConf::FOR_ACTIVATE_TRP_REQ)
5627 {
5628 jam();
5629 handle_activate_trp_req(signal, syncConf->senderData);
5630 }
5631 else
5632 {
5633 ndbabort();
5634 }
5635 }
5636
5637 /*--------------------------------------------------------------------------*/
5638 /* WE HAVE RECEIVED A CONFIRM OF THAT THIS NODE HAVE COMMITTED THE FAILURES.*/
5639 /*--------------------------------------------------------------------------*/
5640 /*******************************/
5641 /* COMMIT_FAILCONF */
5642 /*******************************/
execCOMMIT_FAILCONF(Signal * signal)5643 void Qmgr::execCOMMIT_FAILCONF(Signal* signal)
5644 {
5645 NodeRecPtr nodePtr;
5646 NodeRecPtr replyNodePtr;
5647 jamEntry();
5648 replyNodePtr.i = signal->theData[0];
5649
5650 ptrCheckGuard(replyNodePtr, MAX_NDB_NODES, nodeRec);
5651 replyNodePtr.p->sendCommitFailReqStatus = Q_NOT_ACTIVE;
5652 for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
5653 ptrAss(nodePtr, nodeRec);
5654 if (nodePtr.p->phase == ZRUNNING) {
5655 if (nodePtr.p->sendCommitFailReqStatus == Q_ACTIVE) {
5656 jamLine(nodePtr.i);
5657 return;
5658 }//if
5659 }//if
5660 }//for
5661 /*-----------------------------------------------------------------------*/
5662 /* WE HAVE SUCCESSFULLY COMMITTED A SET OF NODE FAILURES. */
5663 /*-----------------------------------------------------------------------*/
5664 ctoStatus = Q_NOT_ACTIVE;
5665 if (!cfailedNodes.isclear()) {
5666 jam();
5667 /**----------------------------------------------------------------------
5668 * A FAILURE OCCURRED IN THE MIDDLE OF THE COMMIT PROCESS. WE ARE NOW
5669 * READY TO START THE FAILED NODE PROCESS FOR THIS NODE.
5670 *----------------------------------------------------------------------*/
5671 cfailureNr = cfailureNr + 1;
5672 for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
5673 ptrAss(nodePtr, nodeRec);
5674 if (nodePtr.p->phase == ZRUNNING) {
5675 jamLine(nodePtr.i);
5676 sendPrepFailReq(signal, nodePtr.i);
5677 }//if
5678 }//for
5679 }//if
5680 return;
5681 }//Qmgr::execCOMMIT_FAILCONF()
5682
5683 /**--------------------------------------------------------------------------
5684 * IF THE PRESIDENT FAILS IN THE MIDDLE OF THE COMMIT OF A FAILED NODE THEN
5685 * THE NEW PRESIDENT NEEDS TO QUERY THE COMMIT STATUS IN THE RUNNING NODES.
5686 *---------------------------------------------------------------------------*/
5687 /*******************************/
5688 /* PRES_TOCONF */
5689 /*******************************/
execPRES_TOCONF(Signal * signal)5690 void Qmgr::execPRES_TOCONF(Signal* signal)
5691 {
5692 NodeRecPtr nodePtr;
5693 NodeRecPtr replyNodePtr;
5694 jamEntry();
5695 replyNodePtr.i = signal->theData[0];
5696 UintR TfailureNr = signal->theData[1];
5697 if (ctoFailureNr < TfailureNr) {
5698 jam();
5699 ctoFailureNr = TfailureNr;
5700 }//if
5701 ptrCheckGuard(replyNodePtr, MAX_NDB_NODES, nodeRec);
5702 replyNodePtr.p->sendPresToStatus = Q_NOT_ACTIVE;
5703 for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
5704 ptrAss(nodePtr, nodeRec);
5705 if (nodePtr.p->sendPresToStatus == Q_ACTIVE) {
5706 jamLine(nodePtr.i);
5707 return;
5708 }//if
5709 }//for
5710 /*-------------------------------------------------------------------------*/
5711 /* WE ARE NOW READY TO DISCOVER WHETHER THE FAILURE WAS COMMITTED OR NOT. */
5712 /*-------------------------------------------------------------------------*/
5713 if (ctoFailureNr > ccommitFailureNr) {
5714 jam();
5715 for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
5716 ptrAss(nodePtr, nodeRec);
5717 if (nodePtr.p->phase == ZRUNNING) {
5718 jamLine(nodePtr.i);
5719 nodePtr.p->sendCommitFailReqStatus = Q_ACTIVE;
5720 signal->theData[0] = cpdistref;
5721 signal->theData[1] = ctoFailureNr;
5722 sendSignal(nodePtr.p->blockRef, GSN_COMMIT_FAILREQ, signal, 2, JBA);
5723 }//if
5724 }//for
5725 return;
5726 }//if
5727 /*-------------------------------------------------------------------------*/
5728 /* WE ARE NOW READY TO START THE NEW NODE FAILURE PROCESS. */
5729 /*-------------------------------------------------------------------------*/
5730 ctoStatus = Q_NOT_ACTIVE;
5731 cfailureNr = cfailureNr + 1;
5732 for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
5733 ptrAss(nodePtr, nodeRec);
5734 if (nodePtr.p->phase == ZRUNNING) {
5735 jamLine(nodePtr.i);
5736 sendPrepFailReq(signal, nodePtr.i);
5737 }//if
5738 }//for
5739 return;
5740 }//Qmgr::execPRES_TOCONF()
5741
5742 /*--------------------------------------------------------------------------*/
5743 // Provide information about the configured NDB nodes in the system.
5744 /*--------------------------------------------------------------------------*/
execREAD_NODESREQ(Signal * signal)5745 void Qmgr::execREAD_NODESREQ(Signal* signal)
5746 {
5747 jamEntry();
5748
5749 ReadNodesReq *req = (ReadNodesReq *)&signal->theData[0];
5750 BlockReference TBref = req->myRef;
5751 NodeRecPtr nodePtr;
5752 nodePtr.i = getOwnNodeId();
5753 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
5754
5755 NdbNodeBitmask tmp = c_definedNodes;
5756 tmp.bitANDC(c_clusterNodes);
5757
5758 Uint32 packed_length1 = c_definedNodes.getPackedLengthInWords();
5759 Uint32 packed_length2 = c_clusterNodes.getPackedLengthInWords();
5760 Uint32 packed_length3 = tmp.getPackedLengthInWords();
5761
5762 if (signal->length() >= ReadNodesReq::SignalLength)
5763 {
5764 jam();
5765 ReadNodesConf * const readNodes = (ReadNodesConf *)&signal->theData[0];
5766
5767 readNodes->noOfNodes = c_definedNodes.count();
5768 readNodes->masterNodeId = cpresident;
5769 readNodes->ndynamicId = nodePtr.p->ndynamicId;
5770
5771 readNodes->definedNodes = c_definedNodes;
5772 readNodes->clusterNodes = c_clusterNodes;
5773 readNodes->inactiveNodes = tmp;
5774 readNodes->startingNodes.clear();
5775 readNodes->startedNodes.clear();
5776
5777 LinearSectionPtr lsptr[3];
5778 lsptr[0].p = readNodes->definedNodes.rep.data;
5779 lsptr[0].sz = 5 * NdbNodeBitmask::Size;
5780 sendSignal(TBref,
5781 GSN_READ_NODESCONF,
5782 signal,
5783 ReadNodesConf::SignalLength,
5784 JBB,
5785 lsptr,
5786 1);
5787 }
5788 else if (packed_length1 <= NdbNodeBitmask48::Size &&
5789 packed_length2 <= NdbNodeBitmask48::Size &&
5790 packed_length3 <= NdbNodeBitmask48::Size)
5791 {
5792 jam();
5793 ReadNodesConf_v1 * const readNodes = (ReadNodesConf_v1 *)&signal->theData[0];
5794 readNodes->noOfNodes = c_definedNodes.count();
5795 readNodes->masterNodeId = cpresident;
5796 readNodes->ndynamicId = nodePtr.p->ndynamicId;
5797
5798 c_definedNodes.copyto(NdbNodeBitmask::Size, readNodes->definedNodes);
5799 c_clusterNodes.copyto(NdbNodeBitmask::Size, readNodes->clusterNodes);
5800 tmp.copyto(NdbNodeBitmask::Size, readNodes->inactiveNodes);
5801 NdbNodeBitmask::clear(readNodes->startingNodes);
5802 NdbNodeBitmask::clear(readNodes->startedNodes);
5803
5804 sendSignal(TBref, GSN_READ_NODESCONF, signal,
5805 ReadNodesConf_v1::SignalLength, JBB);
5806 }
5807 else
5808 {
5809 ndbabort();
5810 }
5811 }//Qmgr::execREAD_NODESREQ()
5812
systemErrorBecauseOtherNodeFailed(Signal * signal,Uint32 line,NodeId failedNodeId)5813 void Qmgr::systemErrorBecauseOtherNodeFailed(Signal* signal, Uint32 line,
5814 NodeId failedNodeId) {
5815 jam();
5816
5817 // Broadcast that this node is failing to other nodes
5818 failReport(signal, getOwnNodeId(), (UintR)ZTRUE, FailRep::ZOWN_FAILURE, getOwnNodeId());
5819
5820 char buf[100];
5821 BaseString::snprintf(buf, 100,
5822 "Node was shutdown during startup because node %d failed",
5823 failedNodeId);
5824
5825 progError(line, NDBD_EXIT_SR_OTHERNODEFAILED, buf);
5826 }
5827
5828
systemErrorLab(Signal * signal,Uint32 line,const char * message)5829 void Qmgr::systemErrorLab(Signal* signal, Uint32 line, const char * message)
5830 {
5831 jam();
5832 // Broadcast that this node is failing to other nodes
5833 failReport(signal, getOwnNodeId(), (UintR)ZTRUE, FailRep::ZOWN_FAILURE, getOwnNodeId());
5834
5835 // If it's known why shutdown occurred
5836 // an error message has been passed to this function
5837 progError(line, NDBD_EXIT_NDBREQUIRE, message);
5838 }//Qmgr::systemErrorLab()
5839
5840
5841 /**---------------------------------------------------------------------------
5842 * A FAILURE HAVE BEEN DISCOVERED ON A NODE. WE NEED TO CLEAR A
5843 * NUMBER OF VARIABLES.
5844 *---------------------------------------------------------------------------*/
failReport(Signal * signal,Uint16 aFailedNode,UintR aSendFailRep,FailRep::FailCause aFailCause,Uint16 sourceNode)5845 void Qmgr::failReport(Signal* signal,
5846 Uint16 aFailedNode,
5847 UintR aSendFailRep,
5848 FailRep::FailCause aFailCause,
5849 Uint16 sourceNode)
5850 {
5851 UintR tfrMinDynamicId;
5852 NodeRecPtr failedNodePtr;
5853 NodeRecPtr nodePtr;
5854 NodeRecPtr presidentNodePtr;
5855
5856
5857 ndbassert((! aSendFailRep) || (sourceNode != 0));
5858
5859 failedNodePtr.i = aFailedNode;
5860 ptrCheckGuard(failedNodePtr, MAX_NDB_NODES, nodeRec);
5861 if (failedNodePtr.p->phase == ZRUNNING) {
5862 jam();
5863
5864 #ifdef ERROR_INSERT
5865 if (ERROR_INSERTED(938))
5866 {
5867 nodeFailCount++;
5868 ndbout_c("QMGR : execFAIL_REP(Failed : %u Source : %u Cause : %u) : "
5869 "%u nodes have failed",
5870 aFailedNode, sourceNode, aFailCause, nodeFailCount);
5871 /* Count DB nodes */
5872 Uint32 nodeCount = 0;
5873 for (Uint32 i = 1; i < MAX_NDB_NODES; i++)
5874 {
5875 if (getNodeInfo(i).getType() == NODE_TYPE_DB)
5876 nodeCount++;
5877 }
5878
5879 /* When > 25% of cluster has failed, resume communications */
5880 if (nodeFailCount > (nodeCount / 4))
5881 {
5882 ndbout_c("QMGR : execFAIL_REP > 25%% nodes failed, resuming comms");
5883 Signal save = *signal;
5884 signal->theData[0] = 9991;
5885 sendSignal(CMVMI_REF, GSN_DUMP_STATE_ORD, signal, 1, JBB);
5886 *signal = save;
5887 nodeFailCount = 0;
5888 SET_ERROR_INSERT_VALUE(932);
5889 }
5890 }
5891 #endif
5892
5893 /* WE ALSO NEED TO ADD HERE SOME CODE THAT GETS OUR NEW NEIGHBOURS. */
5894 if (cpresident == getOwnNodeId()) {
5895 jam();
5896 if (failedNodePtr.p->sendCommitFailReqStatus == Q_ACTIVE) {
5897 jam();
5898 signal->theData[0] = failedNodePtr.i;
5899 sendSignal(QMGR_REF, GSN_COMMIT_FAILCONF, signal, 1, JBA);
5900 }//if
5901 if (failedNodePtr.p->sendPresToStatus == Q_ACTIVE) {
5902 jam();
5903 signal->theData[0] = failedNodePtr.i;
5904 signal->theData[1] = ccommitFailureNr;
5905 sendSignal(QMGR_REF, GSN_PRES_TOCONF, signal, 2, JBA);
5906 }//if
5907 }//if
5908 DEB_STARTUP(("phase(%u) = ZPREPARE_FAIL", failedNodePtr.i));
5909 failedNodePtr.p->phase = ZPREPARE_FAIL;
5910 failedNodePtr.p->sendPrepFailReqStatus = Q_NOT_ACTIVE;
5911 failedNodePtr.p->sendCommitFailReqStatus = Q_NOT_ACTIVE;
5912 failedNodePtr.p->sendPresToStatus = Q_NOT_ACTIVE;
5913 set_hb_count(failedNodePtr.i) = 0;
5914 if (aSendFailRep == ZTRUE) {
5915 jam();
5916 if (failedNodePtr.i != getOwnNodeId()) {
5917 jam();
5918 FailRep * const failRep = (FailRep *)&signal->theData[0];
5919 failRep->failNodeId = failedNodePtr.i;
5920 failRep->failCause = aFailCause;
5921 failRep->failSourceNodeId = sourceNode;
5922 sendSignal(failedNodePtr.p->blockRef, GSN_FAIL_REP, signal,
5923 FailRep::SignalLength, JBA);
5924 }//if
5925 for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
5926 ptrAss(nodePtr, nodeRec);
5927 if (nodePtr.p->phase == ZRUNNING) {
5928 jamLine(nodePtr.i);
5929 FailRep * const failRep = (FailRep *)&signal->theData[0];
5930 failRep->failNodeId = failedNodePtr.i;
5931 failRep->failCause = aFailCause;
5932 failRep->failSourceNodeId = sourceNode;
5933 sendSignal(nodePtr.p->blockRef, GSN_FAIL_REP, signal,
5934 FailRep::SignalLength, JBA);
5935 }//if
5936 }//for
5937 }//if
5938 if (failedNodePtr.i == getOwnNodeId()) {
5939 jam();
5940 return;
5941 }//if
5942
5943 if (unlikely(m_connectivity_check.reportNodeFailure(failedNodePtr.i)))
5944 {
5945 jam();
5946 connectivityCheckCompleted(signal);
5947 }
5948
5949 failedNodePtr.p->ndynamicId = 0;
5950 findNeighbours(signal, __LINE__);
5951 if (failedNodePtr.i == cpresident) {
5952 jam();
5953 /**--------------------------------------------------------------------
5954 * IF PRESIDENT HAVE FAILED WE MUST CALCULATE THE NEW PRESIDENT BY
5955 * FINDING THE NODE WITH THE MINIMUM DYNAMIC IDENTITY.
5956 *---------------------------------------------------------------------*/
5957 tfrMinDynamicId = (UintR)-1;
5958 for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
5959 ptrAss(nodePtr, nodeRec);
5960 if (nodePtr.p->phase == ZRUNNING) {
5961 jamLine(nodePtr.i);
5962 if ((nodePtr.p->ndynamicId & 0xFFFF) < tfrMinDynamicId) {
5963 jam();
5964 tfrMinDynamicId = (nodePtr.p->ndynamicId & 0xFFFF);
5965 cpresident = nodePtr.i;
5966 }//if
5967 }//if
5968 }//for
5969 presidentNodePtr.i = cpresident;
5970 ptrCheckGuard(presidentNodePtr, MAX_NDB_NODES, nodeRec);
5971 cpdistref = presidentNodePtr.p->blockRef;
5972 if (cpresident == getOwnNodeId()) {
5973 CRASH_INSERTION(920);
5974 cfailureNr = cprepareFailureNr;
5975 ctoFailureNr = 0;
5976 ctoStatus = Q_ACTIVE;
5977 DEB_STARTUP(("2:Clear c_start.m_startNode"));
5978 c_start.reset(); // Don't take over nodes being started
5979 if (!ccommitFailedNodes.isclear()) {
5980 jam();
5981 /**-----------------------------------------------------------------
5982 * IN THIS SITUATION WE ARE UNCERTAIN OF WHETHER THE NODE FAILURE
5983 * PROCESS WAS COMMITTED. WE NEED TO QUERY THE OTHER NODES ABOUT
5984 * THEIR STATUS.
5985 *-----------------------------------------------------------------*/
5986 for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES;
5987 nodePtr.i++) {
5988 jam();
5989 ptrAss(nodePtr, nodeRec);
5990 if (nodePtr.p->phase == ZRUNNING) {
5991 jam();
5992 nodePtr.p->sendPresToStatus = Q_ACTIVE;
5993 signal->theData[0] = cpdistref;
5994 signal->theData[1] = cprepareFailureNr;
5995 sendSignal(nodePtr.p->blockRef, GSN_PRES_TOREQ,
5996 signal, 1, JBA);
5997 }//if
5998 }//for
5999 } else {
6000 jam();
6001 /*-----------------------------------------------------------------*/
6002 // In this case it could be that a commit process is still ongoing.
6003 // If so we must conclude it as the new master.
6004 /*-----------------------------------------------------------------*/
6005 for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES;
6006 nodePtr.i++) {
6007 ptrAss(nodePtr, nodeRec);
6008 if (nodePtr.p->phase == ZRUNNING) {
6009 jamLine(nodePtr.i);
6010 nodePtr.p->sendCommitFailReqStatus = Q_ACTIVE;
6011 signal->theData[0] = cpdistref;
6012 signal->theData[1] = ccommitFailureNr;
6013 sendSignal(nodePtr.p->blockRef, GSN_COMMIT_FAILREQ, signal,
6014 2, JBA);
6015 }//if
6016 }//for
6017 }//if
6018 }//if
6019 }//if
6020 cfailedNodes.set(failedNodePtr.i);
6021 }//if
6022 }//Qmgr::failReport()
6023
6024 /*---------------------------------------------------------------------------*/
6025 /* INPUT: TTDI_DYN_ID */
6026 /* OUTPUT: TTDI_NODE_ID */
6027 /*---------------------------------------------------------------------------*/
translateDynamicIdToNodeId(Signal * signal,UintR TdynamicId)6028 Uint16 Qmgr::translateDynamicIdToNodeId(Signal* signal, UintR TdynamicId)
6029 {
6030 NodeRecPtr tdiNodePtr;
6031 Uint16 TtdiNodeId = ZNIL;
6032
6033 for (tdiNodePtr.i = 1; tdiNodePtr.i < MAX_NDB_NODES; tdiNodePtr.i++) {
6034 jam();
6035 ptrAss(tdiNodePtr, nodeRec);
6036 if (tdiNodePtr.p->ndynamicId == TdynamicId) {
6037 jam();
6038 TtdiNodeId = tdiNodePtr.i;
6039 break;
6040 }//if
6041 }//for
6042 if (TtdiNodeId == ZNIL) {
6043 jam();
6044 systemErrorLab(signal, __LINE__);
6045 }//if
6046 return TtdiNodeId;
6047 }//Qmgr::translateDynamicIdToNodeId()
6048
6049 /**--------------------------------------------------------------------------
6050 * WHEN RECEIVING PREPARE FAILURE REQUEST WE WILL IMMEDIATELY CLOSE
6051 * COMMUNICATION WITH ALL THOSE NODES.
6052 *--------------------------------------------------------------------------*/
sendCloseComReq(Signal * signal,BlockReference TBRef,Uint16 aFailNo)6053 void Qmgr::sendCloseComReq(Signal* signal, BlockReference TBRef, Uint16 aFailNo)
6054 {
6055 jam();
6056 CloseComReqConf * const closeCom = (CloseComReqConf *)&signal->theData[0];
6057
6058 closeCom->xxxBlockRef = TBRef;
6059 closeCom->requestType = CloseComReqConf::RT_NODE_FAILURE;
6060 closeCom->failNo = aFailNo;
6061 closeCom->noOfNodes = cprepFailedNodes.count();
6062 {
6063 closeCom->failedNodeId = 0; /* Indicates we're sending bitmask */
6064 LinearSectionPtr lsptr[3];
6065 lsptr[0].p = cprepFailedNodes.rep.data;
6066 lsptr[0].sz = cprepFailedNodes.getPackedLengthInWords();
6067 sendSignal(TRPMAN_REF,
6068 GSN_CLOSE_COMREQ,
6069 signal,
6070 CloseComReqConf::SignalLength,
6071 JBB,
6072 lsptr,
6073 1);
6074 }
6075
6076 }//Qmgr::sendCloseComReq()
6077
6078 void
sendPrepFailReqRef(Signal * signal,Uint32 dstBlockRef,GlobalSignalNumber gsn,Uint32 blockRef,Uint32 failNo,const NdbNodeBitmask & nodes)6079 Qmgr::sendPrepFailReqRef(Signal* signal,
6080 Uint32 dstBlockRef,
6081 GlobalSignalNumber gsn,
6082 Uint32 blockRef,
6083 Uint32 failNo,
6084 const NdbNodeBitmask& nodes)
6085 {
6086 PrepFailReqRef * const prepFail = (PrepFailReqRef *)&signal->theData[0];
6087 prepFail->xxxBlockRef = blockRef;
6088 prepFail->failNo = failNo;
6089 prepFail->noOfNodes = nodes.count();
6090 Uint32 packed_length = nodes.getPackedLengthInWords();
6091
6092 if (ndbd_send_node_bitmask_in_section(
6093 getNodeInfo(refToNode(dstBlockRef)).m_version))
6094 {
6095 Uint32* temp_failed_nodes = &signal->theData[PrepFailReqRef::SignalLength];
6096 nodes.copyto(NdbNodeBitmask::Size, temp_failed_nodes);
6097 LinearSectionPtr lsptr[3];
6098 lsptr[0].p = temp_failed_nodes;
6099 lsptr[0].sz = packed_length;
6100 sendSignal(dstBlockRef, gsn, signal, PrepFailReqRef::SignalLength, JBA,
6101 lsptr, 1);
6102 }
6103 else if (packed_length <= NdbNodeBitmask48::Size)
6104 {
6105 nodes.copyto(NdbNodeBitmask48::Size, prepFail->theNodes);
6106 sendSignal(dstBlockRef, gsn, signal, PrepFailReqRef::SignalLength_v1, JBA);
6107 }
6108 else
6109 {
6110 ndbabort();
6111 }
6112 }
6113
6114
6115 /**--------------------------------------------------------------------------
6116 * SEND PREPARE FAIL REQUEST FROM PRESIDENT.
6117 *---------------------------------------------------------------------------*/
sendPrepFailReq(Signal * signal,Uint16 aNode)6118 void Qmgr::sendPrepFailReq(Signal* signal, Uint16 aNode)
6119 {
6120 NodeRecPtr sendNodePtr;
6121 sendNodePtr.i = aNode;
6122 ptrCheckGuard(sendNodePtr, MAX_NDB_NODES, nodeRec);
6123 sendNodePtr.p->sendPrepFailReqStatus = Q_ACTIVE;
6124
6125 sendPrepFailReqRef(signal,
6126 sendNodePtr.p->blockRef,
6127 GSN_PREP_FAILREQ,
6128 reference(),
6129 cfailureNr,
6130 cfailedNodes);
6131 }//Qmgr::sendPrepFailReq()
6132
6133 /**
6134 * Arbitration module. Rest of QMGR calls us only via
6135 * the "handle" routines.
6136 */
6137
6138 /**
6139 * Config signals are logically part of CM_REG.
6140 */
6141 void
execARBIT_CFG(Signal * signal)6142 Qmgr::execARBIT_CFG(Signal* signal)
6143 {
6144 jamEntry();
6145 ArbitSignalData* sd = (ArbitSignalData*)&signal->theData[0];
6146 unsigned rank = sd->code;
6147 ndbrequire(1 <= rank && rank <= 2);
6148 arbitRec.apiMask[0].bitOR(sd->mask);
6149 arbitRec.apiMask[rank].assign(sd->mask);
6150 }
6151
6152 /**
6153 * ContinueB delay (0=JBA 1=JBB)
6154 */
getArbitDelay()6155 Uint32 Qmgr::getArbitDelay()
6156 {
6157 switch (arbitRec.state) {
6158 case ARBIT_NULL:
6159 jam();
6160 break;
6161 case ARBIT_INIT:
6162 jam();
6163 return 100;
6164 case ARBIT_FIND:
6165 jam();
6166 return 100;
6167 case ARBIT_PREP1:
6168 jam();
6169 return 100;
6170 case ARBIT_PREP2:
6171 jam();
6172 return 100;
6173 case ARBIT_START:
6174 jam();
6175 return 100;
6176 case ARBIT_RUN:
6177 jam();
6178 return 1000;
6179 case ARBIT_CHOOSE:
6180 jam();
6181 return 10;
6182 case ARBIT_CRASH: // if we could wait
6183 jam();
6184 return 100;
6185 }
6186 ndbabort();
6187 return (Uint32)-1;
6188 }
6189
6190 /**
6191 * Time to wait for reply. There is only 1 config parameter
6192 * (timeout for CHOOSE). XXX The rest are guesses.
6193 */
getArbitTimeout()6194 Uint32 Qmgr::getArbitTimeout()
6195 {
6196 switch (arbitRec.state) {
6197 case ARBIT_NULL:
6198 jam();
6199 break;
6200 case ARBIT_INIT: // not used
6201 jam();
6202 // Fall through
6203 case ARBIT_FIND:
6204 jam();
6205 /* This timeout will be used only to print out a warning
6206 * when a suitable arbitrator is not found.
6207 */
6208 return 60000;
6209 case ARBIT_PREP1:
6210 jam();
6211 // Fall through
6212 case ARBIT_PREP2:
6213 jam();
6214 return 1000 + cnoOfNodes * Uint32(hb_send_timer.getDelay());
6215 case ARBIT_START:
6216 jam();
6217 return 1000 + arbitRec.timeout;
6218 case ARBIT_RUN: // not used (yet)
6219 jam();
6220 return 1000;
6221 case ARBIT_CHOOSE:
6222 jam();
6223 return arbitRec.timeout;
6224 case ARBIT_CRASH: // if we could wait
6225 jam();
6226 return 100;
6227 }
6228 ndbabort();
6229 return (Uint32)-1;
6230 }
6231
6232 /**
6233 * Start arbitration thread when we are president and database
6234 * is opened for the first time.
6235 *
6236 * XXX Do arbitration check just like on node failure. Since
6237 * there is no arbitrator yet, must win on counts alone.
6238 */
6239 void
handleArbitStart(Signal * signal)6240 Qmgr::handleArbitStart(Signal* signal)
6241 {
6242 jam();
6243 ndbrequire(cpresident == getOwnNodeId());
6244 ndbrequire(arbitRec.state == ARBIT_NULL);
6245 arbitRec.state = ARBIT_INIT;
6246 DEB_ARBIT(("Arbit state = ARBIT_INIT from NULL"));
6247 arbitRec.newstate = true;
6248 startArbitThread(signal);
6249 }
6250
6251 /**
6252 * Handle API node failure. Called also by non-president nodes.
6253 * If we are president go back to INIT state, otherwise to NULL.
6254 * Start new thread to save time.
6255 */
6256 void
handleArbitApiFail(Signal * signal,Uint16 nodeId)6257 Qmgr::handleArbitApiFail(Signal* signal, Uint16 nodeId)
6258 {
6259 if (arbitRec.node != nodeId) {
6260 jam();
6261 return;
6262 }
6263 reportArbitEvent(signal, NDB_LE_ArbitState);
6264 arbitRec.node = 0;
6265 switch (arbitRec.state) {
6266 case ARBIT_NULL: // should not happen
6267 jam();
6268 break;
6269 case ARBIT_INIT:
6270 jam();
6271 break;
6272 case ARBIT_FIND:
6273 jam();
6274 break;
6275 case ARBIT_PREP1: // start from beginning
6276 jam();
6277 // Fall through
6278 case ARBIT_PREP2:
6279 jam();
6280 // Fall through
6281 case ARBIT_START:
6282 jam();
6283 // Fall through
6284 case ARBIT_RUN:
6285 if (cpresident == getOwnNodeId()) {
6286 jam();
6287 arbitRec.state = ARBIT_INIT;
6288 DEB_ARBIT(("Arbit state = ARBIT_INIT from RUN"));
6289 arbitRec.newstate = true;
6290 startArbitThread(signal);
6291 } else {
6292 jam();
6293 arbitRec.state = ARBIT_NULL;
6294 DEB_ARBIT(("Arbit state = ARBIT_NULL from RUN"));
6295 }
6296 break;
6297 case ARBIT_CHOOSE: // XXX too late
6298 jam();
6299 break;
6300 case ARBIT_CRASH:
6301 jam();
6302 break;
6303 default:
6304 ndbabort();
6305 }
6306 }
6307
6308 /**
6309 * Handle NDB node add. Ignore if arbitration thread not yet
6310 * started. If PREP is not ready, go back to INIT. Otherwise
6311 * the new node gets arbitrator and ticket once we reach RUN state.
6312 * Start new thread to save time.
6313 */
6314 void
handleArbitNdbAdd(Signal * signal,Uint16 nodeId)6315 Qmgr::handleArbitNdbAdd(Signal* signal, Uint16 nodeId)
6316 {
6317 jam();
6318 ndbrequire(cpresident == getOwnNodeId());
6319 switch (arbitRec.state) {
6320 case ARBIT_NULL: // before db opened
6321 jam();
6322 break;
6323 case ARBIT_INIT: // start from beginning
6324 jam();
6325 // Fall through
6326 case ARBIT_FIND:
6327 jam();
6328 // Fall through
6329 case ARBIT_PREP1:
6330 jam();
6331 // Fall through
6332 case ARBIT_PREP2:
6333 jam();
6334 arbitRec.state = ARBIT_INIT;
6335 DEB_ARBIT(("Arbit state = ARBIT_INIT from PREP2"));
6336 arbitRec.newstate = true;
6337 startArbitThread(signal);
6338 break;
6339 case ARBIT_START: // process in RUN state
6340 jam();
6341 // Fall through
6342 case ARBIT_RUN:
6343 jam();
6344 arbitRec.newMask.set(nodeId);
6345 break;
6346 case ARBIT_CHOOSE: // XXX too late
6347 jam();
6348 break;
6349 case ARBIT_CRASH:
6350 jam();
6351 break;
6352 default:
6353 ndbabort();
6354 }
6355 }
6356
6357 /**
6358 * Check if current nodeset can survive. The decision is
6359 * based on node count, node groups, and on external arbitrator
6360 * (if we have one). Always starts a new thread because
6361 * 1) CHOOSE cannot wait 2) if we are new president we need
6362 * a thread 3) if we are old president it does no harm.
6363 *
6364 * The following logic governs if we will survive or not.
6365 * 1) If at least one node group is fully dead then we will not survive.
6366 * 2) If 1) is false AND at least one group is fully alive then we will
6367 * survive.
6368 * 3) If 1) AND 2) is false AND a majority of the previously alive nodes are
6369 * dead then we will not survive.
6370 * 4) If 1) AND 2) AND 3) is false AND a majority of the previously alive
6371 * nodes are still alive, then we will survive.
6372 * 5) If 1) AND 2) AND 3) AND 4) is false then exactly half of the previously
6373 * alive nodes are dead and the other half is alive. In this case we will
6374 * ask the arbitrator whether we can continue or not. If no arbitrator is
6375 * currently selected then we will fail. If an arbitrator exists then it
6376 * will respond with either WIN in which case our part of the cluster will
6377 * remain alive and LOSE in which case our part of the cluster will not
6378 * survive.
6379 *
6380 * The number of previously alive nodes are the sum of the currently alive
6381 * nodes plus the number of nodes currently forming a node set that will
6382 * die. All other nodes was dead in a previous node fail transaction and are
6383 * not counted in the number of previously alive nodes.
6384 */
6385 void
handleArbitCheck(Signal * signal)6386 Qmgr::handleArbitCheck(Signal* signal)
6387 {
6388 jam();
6389 Uint32 prev_alive_nodes = count_previously_alive_nodes();
6390 ndbrequire(cpresident == getOwnNodeId());
6391 NdbNodeBitmask survivorNodes;
6392 /**
6393 * computeArbitNdbMask will only count nodes in the state ZRUNNING, crashed
6394 * nodes are thus not part of this set of nodes. The method
6395 * count_previously_alive_nodes counts both nodes in ZRUNNING and in
6396 * ZPREPARE_FAIL but deducts those that was previously not started to ensure
6397 * that we don't rely on non-started nodes in our check for whether
6398 * arbitration is required.
6399 */
6400 computeArbitNdbMask(survivorNodes);
6401 {
6402 jam();
6403 CheckNodeGroups* sd = (CheckNodeGroups*)&signal->theData[0];
6404 sd->blockRef = reference();
6405 sd->requestType = CheckNodeGroups::Direct | CheckNodeGroups::ArbitCheck;
6406 sd->mask = survivorNodes;
6407 EXECUTE_DIRECT(DBDIH, GSN_CHECKNODEGROUPSREQ, signal,
6408 CheckNodeGroups::SignalLength);
6409 jamEntry();
6410 if (ERROR_INSERTED(943))
6411 {
6412 ndbout << "Requiring arbitration, even if there is no"
6413 << " possible split."<< endl;
6414 sd->output = CheckNodeGroups::Partitioning;
6415 DEB_ARBIT(("Arbit state = ARBIT_RUN in 943"));
6416 arbitRec.state = ARBIT_RUN;
6417 }
6418 switch (sd->output) {
6419 case CheckNodeGroups::Win:
6420 jam();
6421 arbitRec.code = ArbitCode::WinGroups;
6422 break;
6423 case CheckNodeGroups::Lose:
6424 jam();
6425 arbitRec.code = ArbitCode::LoseGroups;
6426 break;
6427 case CheckNodeGroups::Partitioning:
6428 jam();
6429 arbitRec.code = ArbitCode::Partitioning;
6430 if (2 * survivorNodes.count() > prev_alive_nodes)
6431 {
6432 /**
6433 * We have lost nodes in all node groups so we are in a
6434 * potentially partitioned state. If we have the majority
6435 * of the nodes in this partition we will definitely
6436 * survive.
6437 */
6438 jam();
6439 arbitRec.code = ArbitCode::WinNodes;
6440 }
6441 else if (2 * survivorNodes.count() < prev_alive_nodes)
6442 {
6443 jam();
6444 /**
6445 * More than half of the live nodes failed and nodes from
6446 * all node groups failed, we are definitely in a losing
6447 * streak and we will be part of the failing side. Time
6448 * to crash.
6449 */
6450 arbitRec.code = ArbitCode::LoseNodes;
6451 }
6452 else
6453 {
6454 jam();
6455 /**
6456 * Half of the live nodes failed, we can be in a partitioned
6457 * state, use the arbitrator to decide what to do next.
6458 */
6459 }
6460 break;
6461 default:
6462 ndbabort();
6463 }
6464 }
6465 switch (arbitRec.code) {
6466 case ArbitCode::LoseNodes:
6467 jam();
6468 goto crashme;
6469 case ArbitCode::LoseGroups:
6470 jam();
6471 goto crashme;
6472 case ArbitCode::WinNodes:
6473 jam();
6474 // Fall through
6475 case ArbitCode::WinGroups:
6476 jam();
6477 if (arbitRec.state == ARBIT_RUN)
6478 {
6479 jam();
6480 break;
6481 }
6482 arbitRec.state = ARBIT_INIT;
6483 DEB_ARBIT(("Arbit state = ARBIT_INIT from non-RUN WinGroups"));
6484 arbitRec.newstate = true;
6485 break;
6486 case ArbitCode::Partitioning:
6487 if (arbitRec.state == ARBIT_RUN)
6488 {
6489 jam();
6490 arbitRec.state = ARBIT_CHOOSE;
6491 DEB_ARBIT(("Arbit state = ARBIT_CHOOSE from RUN"));
6492 arbitRec.newstate = true;
6493 break;
6494 }
6495 if (arbitRec.apiMask[0].count() != 0)
6496 {
6497 jam();
6498 arbitRec.code = ArbitCode::LoseNorun;
6499 }
6500 else
6501 {
6502 jam();
6503 arbitRec.code = ArbitCode::LoseNocfg;
6504 }
6505 goto crashme;
6506 default:
6507 crashme:
6508 jam();
6509 arbitRec.state = ARBIT_CRASH;
6510 DEB_ARBIT(("Arbit state = ARBIT_CRASH"));
6511 arbitRec.newstate = true;
6512 break;
6513 }
6514 reportArbitEvent(signal, NDB_LE_ArbitResult);
6515 switch (arbitRec.state) {
6516 default:
6517 jam();
6518 arbitRec.newMask.bitAND(survivorNodes); // delete failed nodes
6519 arbitRec.recvMask.bitAND(survivorNodes);
6520 sendCommitFailReq(signal); // start commit of failed nodes
6521 break;
6522 case ARBIT_CHOOSE:
6523 jam();
6524 break;
6525 case ARBIT_CRASH:
6526 jam();
6527 break;
6528 }
6529 startArbitThread(signal);
6530 }
6531
6532 /**
6533 * Start a new continueB thread. The thread id is incremented
6534 * so that any old thread will exit.
6535 */
6536 void
startArbitThread(Signal * signal)6537 Qmgr::startArbitThread(Signal* signal)
6538 {
6539 jam();
6540 ndbrequire(cpresident == getOwnNodeId());
6541 arbitRec.code = ArbitCode::ThreadStart;
6542 reportArbitEvent(signal, NDB_LE_ArbitState);
6543 signal->theData[1] = ++arbitRec.thread;
6544 runArbitThread(signal);
6545 }
6546
6547 /**
6548 * Handle arbitration thread. The initial thread normally ends
6549 * up in RUN state. New thread can be started to save time.
6550 */
6551 void
runArbitThread(Signal * signal)6552 Qmgr::runArbitThread(Signal* signal)
6553 {
6554 #ifdef DEBUG_ARBIT
6555 char buf[256];
6556 NdbNodeBitmask ndbMask;
6557 char maskbuf[NdbNodeBitmask::TextLength + 1];
6558 computeArbitNdbMask(ndbMask);
6559 ndbout << "arbit thread:";
6560 ndbout << " state=" << arbitRec.state;
6561 ndbout << " newstate=" << arbitRec.newstate;
6562 ndbout << " thread=" << arbitRec.thread;
6563 ndbout << " node=" << arbitRec.node;
6564 arbitRec.ticket.getText(buf, sizeof(buf));
6565 ndbout << " ticket=" << buf;
6566 ndbMask.getText(maskbuf);
6567 ndbout << " ndbmask=" << maskbuf;
6568 ndbout << " sendcount=" << arbitRec.sendCount;
6569 ndbout << " recvcount=" << arbitRec.recvCount;
6570 arbitRec.recvMask.getText(maskbuf);
6571 ndbout << " recvmask=" << maskbuf;
6572 ndbout << " code=" << arbitRec.code;
6573 ndbout << endl;
6574 #endif
6575 if (signal->theData[1] != arbitRec.thread) {
6576 jam();
6577 return; // old thread dies
6578 }
6579 switch (arbitRec.state) {
6580 case ARBIT_INIT: // main thread
6581 jam();
6582 stateArbitInit(signal);
6583 break;
6584 case ARBIT_FIND:
6585 jam();
6586 stateArbitFind(signal);
6587 break;
6588 case ARBIT_PREP1:
6589 jam();
6590 // Fall through
6591 case ARBIT_PREP2:
6592 jam();
6593 stateArbitPrep(signal);
6594 break;
6595 case ARBIT_START:
6596 jam();
6597 stateArbitStart(signal);
6598 break;
6599 case ARBIT_RUN:
6600 jam();
6601 stateArbitRun(signal);
6602 break;
6603 case ARBIT_CHOOSE: // partitition thread
6604 jam();
6605 if (ERROR_INSERTED(945) && arbitRec.code == ArbitCode::WinChoose)
6606 {
6607 // Delay ARBIT_CHOOSE until NdbAPI node is disconnected
6608 break;
6609 }
6610 stateArbitChoose(signal);
6611 break;
6612 case ARBIT_CRASH:
6613 jam();
6614 stateArbitCrash(signal);
6615 break;
6616 default:
6617 ndbabort();
6618 }
6619 signal->theData[0] = ZARBIT_HANDLING;
6620 signal->theData[1] = arbitRec.thread;
6621 signal->theData[2] = arbitRec.state; // just for signal log
6622 Uint32 delay = getArbitDelay();
6623 if (delay == 0) {
6624 jam();
6625 sendSignal(QMGR_REF, GSN_CONTINUEB, signal, 3, JBA);
6626 } else if (delay == 1) {
6627 jam();
6628 sendSignal(QMGR_REF, GSN_CONTINUEB, signal, 3, JBB);
6629 } else {
6630 jam();
6631 sendSignalWithDelay(QMGR_REF, GSN_CONTINUEB, signal, delay, 3);
6632 }//if
6633 }
6634
6635 /**
6636 * Handle INIT state. Generate next ticket. Switch to FIND
6637 * state without delay.
6638 */
6639 void
stateArbitInit(Signal * signal)6640 Qmgr::stateArbitInit(Signal* signal)
6641 {
6642 if (arbitRec.newstate) {
6643 jam();
6644 CRASH_INSERTION((Uint32)910 + arbitRec.state);
6645
6646 arbitRec.node = 0;
6647 arbitRec.ticket.update();
6648 arbitRec.newMask.clear();
6649 arbitRec.code = 0;
6650 arbitRec.newstate = false;
6651 }
6652 arbitRec.setTimestamp(); // Init arbitration timer
6653 arbitRec.state = ARBIT_FIND;
6654 DEB_ARBIT(("Arbit state = ARBIT_FIND"));
6655 arbitRec.newstate = true;
6656 stateArbitFind(signal);
6657 }
6658
6659 /**
6660 * Handle FIND state. Find first arbitrator which is alive
6661 * and invoke PREP state without delay. If none are found,
6662 * loop in FIND state. This is forever if no arbitrators
6663 * are configured (not the normal case).
6664 *
6665 * XXX Add adaptive behaviour to avoid getting stuck on API
6666 * nodes which are alive but do not respond or die too soon.
6667 */
6668 void
stateArbitFind(Signal * signal)6669 Qmgr::stateArbitFind(Signal* signal)
6670 {
6671 if (arbitRec.newstate) {
6672 jam();
6673 CRASH_INSERTION((Uint32)910 + arbitRec.state);
6674
6675 arbitRec.code = 0;
6676 arbitRec.newstate = false;
6677 }
6678
6679 switch (arbitRec.method){
6680 case ArbitRec::METHOD_EXTERNAL:
6681 {
6682 // Don't select any API node as arbitrator
6683 arbitRec.node = 0;
6684 arbitRec.state = ARBIT_PREP1;
6685 DEB_ARBIT(("Arbit state = ARBIT_PREP1"));
6686 arbitRec.newstate = true;
6687 stateArbitPrep(signal);
6688 return;
6689 break;
6690 }
6691
6692 case ArbitRec::METHOD_DEFAULT:
6693 {
6694 NodeRecPtr aPtr;
6695 // Select the best available API node as arbitrator
6696 for (unsigned rank = 1; rank <= 2; rank++) {
6697 jam();
6698 aPtr.i = 0;
6699 const unsigned stop = NodeBitmask::NotFound;
6700 while ((aPtr.i = arbitRec.apiMask[rank].find(aPtr.i + 1)) != stop) {
6701 jam();
6702 ptrAss(aPtr, nodeRec);
6703 if (aPtr.p->phase != ZAPI_ACTIVE)
6704 continue;
6705 ndbrequire(c_connectedNodes.get(aPtr.i));
6706 arbitRec.node = aPtr.i;
6707 arbitRec.state = ARBIT_PREP1;
6708 DEB_ARBIT(("2:Arbit state = ARBIT_PREP1"));
6709 arbitRec.newstate = true;
6710 stateArbitPrep(signal);
6711 return;
6712 }
6713 }
6714
6715 /* If the president cannot find a suitable arbitrator then
6716 * it will report this once a minute. Success in finding
6717 * an arbitrator will be notified when the arbitrator
6718 * accepts and acks the offer.
6719 */
6720
6721 if (arbitRec.getTimediff() > getArbitTimeout()) {
6722 jam();
6723 g_eventLogger->warning("Could not find an arbitrator, cluster is not partition-safe");
6724 warningEvent("Could not find an arbitrator, cluster is not partition-safe");
6725 arbitRec.setTimestamp();
6726 }
6727 return;
6728 break;
6729 }
6730
6731 default:
6732 ndbabort();
6733 }
6734 }
6735
6736 /**
6737 * Handle PREP states. First round nulls any existing tickets.
6738 * Second round sends new ticket. When all confirms have been
6739 * received invoke START state immediately.
6740 */
6741 void
stateArbitPrep(Signal * signal)6742 Qmgr::stateArbitPrep(Signal* signal)
6743 {
6744 if (arbitRec.newstate) {
6745 jam();
6746 CRASH_INSERTION((Uint32)910 + arbitRec.state);
6747
6748 arbitRec.sendCount = 0; // send all at once
6749 computeArbitNdbMask(arbitRec.recvMask); // to send and recv
6750 arbitRec.recvMask.clear(getOwnNodeId());
6751 arbitRec.code = 0;
6752 arbitRec.newstate = false;
6753 }
6754 if (! arbitRec.sendCount) {
6755 jam();
6756 NodeRecPtr aPtr;
6757 aPtr.i = 0;
6758 const unsigned stop = NodeBitmask::NotFound;
6759 while ((aPtr.i = arbitRec.recvMask.find(aPtr.i + 1)) != stop) {
6760 jam();
6761 ptrAss(aPtr, nodeRec);
6762 ArbitSignalData* sd = (ArbitSignalData*)&signal->theData[0];
6763 sd->sender = getOwnNodeId();
6764 if (arbitRec.state == ARBIT_PREP1) {
6765 jam();
6766 sd->code = ArbitCode::PrepPart1;
6767 } else {
6768 jam();
6769 sd->code = ArbitCode::PrepPart2;
6770 }
6771 sd->node = arbitRec.node;
6772 sd->ticket = arbitRec.ticket;
6773 sd->mask.clear();
6774 sendSignal(aPtr.p->blockRef, GSN_ARBIT_PREPREQ, signal,
6775 ArbitSignalData::SignalLength, JBB);
6776 }
6777 arbitRec.setTimestamp(); // send time
6778 arbitRec.sendCount = 1;
6779 return;
6780 }
6781 if (arbitRec.code != 0) { // error
6782 jam();
6783 arbitRec.state = ARBIT_INIT;
6784 DEB_ARBIT(("Arbit state = ARBIT_INIT stateArbitPrep"));
6785 arbitRec.newstate = true;
6786 return;
6787 }
6788 if (arbitRec.recvMask.count() == 0) { // recv all
6789 if (arbitRec.state == ARBIT_PREP1) {
6790 jam();
6791 DEB_ARBIT(("Arbit state = ARBIT_PREP2 stateArbitPrep"));
6792 arbitRec.state = ARBIT_PREP2;
6793 arbitRec.newstate = true;
6794 } else {
6795 jam();
6796 DEB_ARBIT(("Arbit state = ARBIT_START stateArbitPrep"));
6797 arbitRec.state = ARBIT_START;
6798 arbitRec.newstate = true;
6799 stateArbitStart(signal);
6800 }
6801 return;
6802 }
6803 if (arbitRec.getTimediff() > getArbitTimeout()) {
6804 jam();
6805 arbitRec.state = ARBIT_INIT;
6806 DEB_ARBIT(("Arbit state = ARBIT_INIT stateArbitPrep"));
6807 arbitRec.newstate = true;
6808 return;
6809 }
6810 }
6811
6812 void
execARBIT_PREPREQ(Signal * signal)6813 Qmgr::execARBIT_PREPREQ(Signal* signal)
6814 {
6815 jamEntry();
6816 ArbitSignalData* sd = (ArbitSignalData*)&signal->theData[0];
6817 if (getOwnNodeId() == cpresident) {
6818 jam();
6819 return; // wrong state
6820 }
6821 if (sd->sender != cpresident) {
6822 jam();
6823 return; // wrong state
6824 }
6825 NodeRecPtr aPtr;
6826 aPtr.i = sd->sender;
6827 ptrAss(aPtr, nodeRec);
6828 switch (sd->code) {
6829 case ArbitCode::PrepPart1: // zero them just to be sure
6830 jam();
6831 arbitRec.node = 0;
6832 arbitRec.ticket.clear();
6833 break;
6834 case ArbitCode::PrepPart2: // non-president enters RUN state
6835 jam();
6836 // Fall through
6837 case ArbitCode::PrepAtrun:
6838 jam();
6839 arbitRec.node = sd->node;
6840 arbitRec.ticket = sd->ticket;
6841 arbitRec.code = sd->code;
6842 reportArbitEvent(signal, NDB_LE_ArbitState);
6843 arbitRec.state = ARBIT_RUN;
6844 arbitRec.newstate = true;
6845 DEB_ARBIT(("Arbit state = ARBIT_RUN PrepAtRun"));
6846
6847 // Non-president node logs.
6848 if (!c_connectedNodes.get(arbitRec.node))
6849 {
6850 char buf[20]; // needs 16 + 1 for '\0'
6851 arbitRec.ticket.getText(buf, sizeof(buf));
6852 g_eventLogger->warning("President %u proposed disconnected "
6853 "node %u as arbitrator [ticket=%s]. "
6854 "Cluster may be partially connected. "
6855 "Connected nodes: %s",
6856 cpresident, arbitRec.node, buf,
6857 BaseString::getPrettyTextShort(c_connectedNodes).c_str());
6858
6859 warningEvent("President %u proposed disconnected node %u "
6860 "as arbitrator [ticket %s]",
6861 cpresident, arbitRec.node, buf);
6862 warningEvent("Cluster may be partially connected. Connected nodes: ");
6863
6864 // Split the connected-node list, since warningEvents are
6865 // limited to ~24 words / 96 chars
6866 BaseString tmp(BaseString::getPrettyTextShort(c_connectedNodes).c_str());
6867 Vector<BaseString> split;
6868 tmp.split(split, "", 92);
6869 for(unsigned i = 0; i < split.size(); ++i)
6870 {
6871 warningEvent("%s", split[i].c_str());
6872 }
6873 }
6874
6875 if (sd->code == ArbitCode::PrepAtrun) {
6876 jam();
6877 return;
6878 }
6879 break;
6880 default:
6881 jam();
6882 ndbabort();
6883 }
6884 sd->sender = getOwnNodeId();
6885 sd->code = 0;
6886 sendSignal(aPtr.p->blockRef, GSN_ARBIT_PREPCONF, signal,
6887 ArbitSignalData::SignalLength, JBB);
6888 }
6889
6890 void
execARBIT_PREPCONF(Signal * signal)6891 Qmgr::execARBIT_PREPCONF(Signal* signal)
6892 {
6893 jamEntry();
6894 ArbitSignalData* sd = (ArbitSignalData*)&signal->theData[0];
6895 if (! arbitRec.match(sd)) {
6896 jam();
6897 return; // stray signal
6898 }
6899 if (arbitRec.state != ARBIT_PREP1 && arbitRec.state != ARBIT_PREP2) {
6900 jam();
6901 return; // wrong state
6902 }
6903 if (! arbitRec.recvMask.get(sd->sender)) {
6904 jam();
6905 return; // wrong state
6906 }
6907 arbitRec.recvMask.clear(sd->sender);
6908 if (arbitRec.code == 0 && sd->code != 0) {
6909 jam();
6910 arbitRec.code = sd->code;
6911 }//if
6912 }
6913
6914 void
execARBIT_PREPREF(Signal * signal)6915 Qmgr::execARBIT_PREPREF(Signal* signal)
6916 {
6917 jamEntry();
6918 ArbitSignalData* sd = (ArbitSignalData*)&signal->theData[0];
6919 if (sd->code == 0) {
6920 jam();
6921 sd->code = ArbitCode::ErrUnknown;
6922 }
6923 execARBIT_PREPCONF(signal);
6924 }
6925
6926 /**
6927 * Handle START state. On first call send start request to
6928 * the chosen arbitrator. Then wait for a CONF.
6929 */
6930 void
stateArbitStart(Signal * signal)6931 Qmgr::stateArbitStart(Signal* signal)
6932 {
6933 if (arbitRec.newstate) {
6934 jam();
6935 CRASH_INSERTION((Uint32)910 + arbitRec.state);
6936
6937 arbitRec.sendCount = 0;
6938 arbitRec.recvCount = 0;
6939 arbitRec.code = 0;
6940 arbitRec.newstate = false;
6941 }
6942
6943 switch (arbitRec.method){
6944 case ArbitRec::METHOD_EXTERNAL:
6945 jam();
6946 ndbrequire(arbitRec.node == 0); // No arbitrator selected
6947
6948 // Don't start arbitrator in API node => ARBIT_RUN
6949 arbitRec.state = ARBIT_RUN;
6950 DEB_ARBIT(("Arbit state = ARBIT_RUN stateArbitStart"));
6951 arbitRec.newstate = true;
6952 return;
6953 break;
6954
6955 case ArbitRec::METHOD_DEFAULT:
6956 if (! arbitRec.sendCount) {
6957 jam();
6958 BlockReference blockRef = calcApiClusterMgrBlockRef(arbitRec.node);
6959 ArbitSignalData* sd = (ArbitSignalData*)&signal->theData[0];
6960 sd->sender = getOwnNodeId();
6961 sd->code = 0;
6962 sd->node = arbitRec.node;
6963 sd->ticket = arbitRec.ticket;
6964 sd->mask.clear();
6965 sendSignal(blockRef, GSN_ARBIT_STARTREQ, signal,
6966 ArbitSignalData::SignalLength, JBB);
6967 arbitRec.sendCount = 1;
6968 arbitRec.setTimestamp(); // send time
6969 return;
6970 }
6971 if (arbitRec.recvCount) {
6972 jam();
6973 reportArbitEvent(signal, NDB_LE_ArbitState);
6974 if (arbitRec.code == ArbitCode::ApiStart) {
6975 jam();
6976 arbitRec.state = ARBIT_RUN;
6977 DEB_ARBIT(("Arbit state = ARBIT_RUN stateArbitStart:Default"));
6978 arbitRec.newstate = true;
6979 return;
6980 }
6981 arbitRec.state = ARBIT_INIT;
6982 DEB_ARBIT(("Arbit state = ARBIT_INIT stateArbitStart:Default"));
6983 arbitRec.newstate = true;
6984 return;
6985 }
6986 if (arbitRec.getTimediff() > getArbitTimeout()) {
6987 jam();
6988 arbitRec.code = ArbitCode::ErrTimeout;
6989 reportArbitEvent(signal, NDB_LE_ArbitState);
6990 arbitRec.state = ARBIT_INIT;
6991 DEB_ARBIT(("Arbit state = ARBIT_INIT stateArbitStart:Default timeout"));
6992 arbitRec.newstate = true;
6993 return;
6994 }
6995 break;
6996
6997 default:
6998 ndbabort();
6999 }
7000 }
7001
7002 void
execARBIT_STARTCONF(Signal * signal)7003 Qmgr::execARBIT_STARTCONF(Signal* signal)
7004 {
7005 jamEntry();
7006 ArbitSignalData* sd = (ArbitSignalData*)&signal->theData[0];
7007 if (! arbitRec.match(sd)) {
7008 jam();
7009 return; // stray signal
7010 }
7011 if (arbitRec.state != ARBIT_START) {
7012 jam();
7013 return; // wrong state
7014 }
7015 if (arbitRec.recvCount) {
7016 jam();
7017 return; // wrong state
7018 }
7019 arbitRec.code = sd->code;
7020 arbitRec.recvCount = 1;
7021 }
7022
7023 void
execARBIT_STARTREF(Signal * signal)7024 Qmgr::execARBIT_STARTREF(Signal* signal)
7025 {
7026 jamEntry();
7027 ArbitSignalData* sd = (ArbitSignalData*)&signal->theData[0];
7028 if (sd->code == 0) {
7029 jam();
7030 sd->code = ArbitCode::ErrUnknown;
7031 }
7032 execARBIT_STARTCONF(signal);
7033 }
7034
7035 /**
7036 * Handle RUN state. Send ticket to any new nodes which have
7037 * appeared after PREP state. We don't care about a CONF.
7038 */
7039 void
stateArbitRun(Signal * signal)7040 Qmgr::stateArbitRun(Signal* signal)
7041 {
7042 if (arbitRec.newstate) {
7043 jam();
7044 CRASH_INSERTION((Uint32)910 + arbitRec.state);
7045
7046 arbitRec.code = 0;
7047 arbitRec.newstate = false;
7048 }
7049 NodeRecPtr aPtr;
7050 aPtr.i = 0;
7051 const unsigned stop = NodeBitmask::NotFound;
7052 while ((aPtr.i = arbitRec.newMask.find(aPtr.i + 1)) != stop) {
7053 jam();
7054 arbitRec.newMask.clear(aPtr.i);
7055 ptrAss(aPtr, nodeRec);
7056 ArbitSignalData* sd = (ArbitSignalData*)&signal->theData[0];
7057 sd->sender = getOwnNodeId();
7058 sd->code = ArbitCode::PrepAtrun;
7059 sd->node = arbitRec.node;
7060 sd->ticket = arbitRec.ticket;
7061 sd->mask.clear();
7062 sendSignal(aPtr.p->blockRef, GSN_ARBIT_PREPREQ, signal,
7063 ArbitSignalData::SignalLength, JBB);
7064 }
7065 }
7066
7067 /**
7068 * Handle CHOOSE state. Entered only from RUN state when
7069 * there is a possible network partitioning. Send CHOOSE to
7070 * the arbitrator. On win switch to INIT state because a new
7071 * ticket must be created.
7072 */
7073 void
stateArbitChoose(Signal * signal)7074 Qmgr::stateArbitChoose(Signal* signal)
7075 {
7076 if (arbitRec.newstate) {
7077 jam();
7078 CRASH_INSERTION((Uint32)910 + arbitRec.state);
7079
7080 arbitRec.sendCount = 0;
7081 arbitRec.recvCount = 0;
7082 arbitRec.code = 0;
7083 arbitRec.newstate = false;
7084 }
7085
7086 switch(arbitRec.method){
7087 case ArbitRec::METHOD_EXTERNAL:
7088 {
7089 if (! arbitRec.sendCount) {
7090 jam();
7091 ndbrequire(arbitRec.node == 0); // No arbitrator selected
7092 // Don't send CHOOSE to anyone, just wait for timeout to expire
7093 arbitRec.sendCount = 1;
7094 arbitRec.setTimestamp();
7095 return;
7096 }
7097
7098 if (arbitRec.getTimediff() > getArbitTimeout()) {
7099 jam();
7100 // Arbitration timeout has expired
7101 ndbrequire(arbitRec.node == 0); // No arbitrator selected
7102
7103 NodeBitmask nodes;
7104 computeArbitNdbMask(nodes);
7105 arbitRec.code = ArbitCode::WinWaitExternal;
7106 reportArbitEvent(signal, NDB_LE_ArbitResult, nodes);
7107
7108 sendCommitFailReq(signal); // start commit of failed nodes
7109 arbitRec.state = ARBIT_INIT;
7110 DEB_ARBIT(("Arbit state = ARBIT_INIT stateArbitChoose"));
7111 arbitRec.newstate = true;
7112 return;
7113 }
7114 break;
7115 }
7116
7117 case ArbitRec::METHOD_DEFAULT:
7118 {
7119 if (! arbitRec.sendCount) {
7120 jam();
7121 const BlockReference blockRef = calcApiClusterMgrBlockRef(arbitRec.node);
7122 ArbitSignalData* sd = (ArbitSignalData*)&signal->theData[0];
7123 sd->sender = getOwnNodeId();
7124 sd->code = 0;
7125 sd->node = arbitRec.node;
7126 sd->ticket = arbitRec.ticket;
7127 computeArbitNdbMask(sd->mask);
7128 if (ERROR_INSERTED(943))
7129 {
7130 ndbout << "Not sending GSN_ARBIT_CHOOSEREQ, thereby causing"
7131 << " arbitration to time out."<< endl;
7132 }
7133 else
7134 {
7135 sendSignal(blockRef, GSN_ARBIT_CHOOSEREQ, signal,
7136 ArbitSignalData::SignalLength, JBA);
7137 }
7138 arbitRec.sendCount = 1;
7139 arbitRec.setTimestamp(); // send time
7140 return;
7141 }
7142
7143 if (arbitRec.recvCount) {
7144 jam();
7145 reportArbitEvent(signal, NDB_LE_ArbitResult);
7146 if (arbitRec.code == ArbitCode::WinChoose) {
7147 jam();
7148 sendCommitFailReq(signal); // start commit of failed nodes
7149 arbitRec.state = ARBIT_INIT;
7150 DEB_ARBIT(("Arbit state = ARBIT_INIT stateArbitChoose:Default"));
7151 arbitRec.newstate = true;
7152 return;
7153 }
7154 arbitRec.state = ARBIT_CRASH;
7155 DEB_ARBIT(("Arbit state = ARBIT_CRASH stateArbitChoose:Default"));
7156 arbitRec.newstate = true;
7157 stateArbitCrash(signal); // do it at once
7158 return;
7159 }
7160
7161 if (arbitRec.getTimediff() > getArbitTimeout()) {
7162 jam();
7163 // Arbitration timeout has expired
7164 arbitRec.code = ArbitCode::ErrTimeout;
7165 reportArbitEvent(signal, NDB_LE_ArbitState);
7166 arbitRec.state = ARBIT_CRASH;
7167 DEB_ARBIT(("Arbit state = ARBIT_CRASH stateArbitChoose:Def timeout"));
7168 arbitRec.newstate = true;
7169 stateArbitCrash(signal); // do it at once
7170 return;
7171 }
7172 break;
7173 }
7174
7175 default:
7176 ndbabort();
7177 }
7178 }
7179
7180 void
execARBIT_CHOOSECONF(Signal * signal)7181 Qmgr::execARBIT_CHOOSECONF(Signal* signal)
7182 {
7183 jamEntry();
7184 ArbitSignalData* sd = (ArbitSignalData*)&signal->theData[0];
7185 if (!arbitRec.match(sd)) {
7186 jam();
7187 return; // stray signal
7188 }
7189 if (arbitRec.state != ARBIT_CHOOSE) {
7190 jam();
7191 return; // wrong state
7192 }
7193 if (arbitRec.recvCount) {
7194 jam();
7195 return; // wrong state
7196 }
7197 arbitRec.recvCount = 1;
7198 arbitRec.code = sd->code;
7199 }
7200
7201 void
execARBIT_CHOOSEREF(Signal * signal)7202 Qmgr::execARBIT_CHOOSEREF(Signal* signal)
7203 {
7204 jamEntry();
7205 ArbitSignalData* sd = (ArbitSignalData*)&signal->theData[0];
7206 if (sd->code == 0) {
7207 jam();
7208 sd->code = ArbitCode::ErrUnknown;
7209 }
7210 execARBIT_CHOOSECONF(signal);
7211 }
7212
7213 /**
7214 * Handle CRASH state. We must crash immediately.
7215 * XXX tell other nodes in our party to crash too.
7216 */
7217 void
stateArbitCrash(Signal * signal)7218 Qmgr::stateArbitCrash(Signal* signal)
7219 {
7220 jam();
7221 if (arbitRec.newstate) {
7222 jam();
7223 CRASH_INSERTION((Uint32)910 + arbitRec.state);
7224 arbitRec.setTimestamp();
7225 arbitRec.code = 0;
7226 arbitRec.newstate = false;
7227 }
7228 #ifdef ndb_arbit_crash_wait_for_event_report_to_get_out
7229 if (! (arbitRec.getTimediff() > getArbitTimeout()))
7230 return;
7231 #endif
7232 CRASH_INSERTION(932);
7233 CRASH_INSERTION(938);
7234 CRASH_INSERTION(943);
7235 CRASH_INSERTION(944);
7236 progError(__LINE__, NDBD_EXIT_ARBIT_SHUTDOWN,
7237 "Arbitrator decided to shutdown this node");
7238 }
7239
7240 /**
7241 * Arbitrator may inform us that it will exit. This lets us
7242 * start looking sooner for a new one. Handle it like API node
7243 * failure.
7244 */
7245 void
execARBIT_STOPREP(Signal * signal)7246 Qmgr::execARBIT_STOPREP(Signal* signal)
7247 {
7248 jamEntry();
7249 ArbitSignalData* sd = (ArbitSignalData*)&signal->theData[0];
7250 if (! arbitRec.match(sd)) {
7251 jam();
7252 return; // stray signal
7253 }
7254 arbitRec.code = ArbitCode::ApiExit;
7255 handleArbitApiFail(signal, arbitRec.node);
7256 }
7257
7258 Uint32
count_previously_alive_nodes()7259 Qmgr::count_previously_alive_nodes()
7260 {
7261 /**
7262 * This function is called as part of PREP_FAILCONF handling. This
7263 * means that we are preparing a node failure. This means that
7264 * NDBCNTR have not yet heard about the node failure and thus we
7265 * can still use the method is_node_started to see whether the
7266 * node was fully started before this failure.
7267 *
7268 * This method is called as part of arbitration check. A node is
7269 * only counted as previously alive if the node was fully started.
7270 *
7271 * In addition we check that the node is a data node and that the
7272 * QMGR node state is what we expect it to be if it was previously
7273 * alive.
7274 */
7275 Uint32 count = 0;
7276 NodeRecPtr aPtr;
7277 for (aPtr.i = 1; aPtr.i < MAX_NDB_NODES; aPtr.i++)
7278 {
7279 ptrAss(aPtr, nodeRec);
7280 if (getNodeInfo(aPtr.i).getType() == NodeInfo::DB &&
7281 c_ndbcntr->is_node_started(aPtr.i) &&
7282 (aPtr.p->phase == ZRUNNING || aPtr.p->phase == ZPREPARE_FAIL))
7283 {
7284 jam();
7285 jamLine(Uint16(aPtr.i));
7286 count++;
7287 }
7288 }
7289 return count;
7290 }
7291
7292 void
computeArbitNdbMask(NodeBitmaskPOD & aMask)7293 Qmgr::computeArbitNdbMask(NodeBitmaskPOD& aMask)
7294 {
7295 NodeRecPtr aPtr;
7296 aMask.clear();
7297 for (aPtr.i = 1; aPtr.i < MAX_NDB_NODES; aPtr.i++) {
7298 jam();
7299 ptrAss(aPtr, nodeRec);
7300 if (getNodeInfo(aPtr.i).getType() == NodeInfo::DB &&
7301 aPtr.p->phase == ZRUNNING)
7302 {
7303 jam();
7304 aMask.set(aPtr.i);
7305 }
7306 }
7307 }
7308
7309 void
computeArbitNdbMask(NdbNodeBitmaskPOD & aMask)7310 Qmgr::computeArbitNdbMask(NdbNodeBitmaskPOD& aMask)
7311 {
7312 NodeRecPtr aPtr;
7313 aMask.clear();
7314 for (aPtr.i = 1; aPtr.i < MAX_NDB_NODES; aPtr.i++) {
7315 jam();
7316 ptrAss(aPtr, nodeRec);
7317 if (getNodeInfo(aPtr.i).getType() == NodeInfo::DB &&
7318 aPtr.p->phase == ZRUNNING)
7319 {
7320 jam();
7321 aMask.set(aPtr.i);
7322 }
7323 }
7324 }
7325
7326 /**
7327 * Report arbitration event. We use arbitration signal format
7328 * where sender (word 0) is event type.
7329 */
7330 void
reportArbitEvent(Signal * signal,Ndb_logevent_type type,const NodeBitmask mask)7331 Qmgr::reportArbitEvent(Signal* signal, Ndb_logevent_type type,
7332 const NodeBitmask mask)
7333 {
7334 ArbitSignalData* sd = (ArbitSignalData*)&signal->theData[0];
7335 sd->sender = type;
7336 sd->code = arbitRec.code | (arbitRec.state << 16);
7337 sd->node = arbitRec.node;
7338 sd->ticket = arbitRec.ticket;
7339 sd->mask = mask;
7340
7341 // Log to console/stdout
7342 LogLevel ll;
7343 ll.setLogLevel(LogLevel::llNodeRestart, 15);
7344 g_eventLogger->log(type, &signal->theData[0],
7345 ArbitSignalData::SignalLength, 0, &ll);
7346
7347 sendSignal(CMVMI_REF, GSN_EVENT_REP, signal,
7348 ArbitSignalData::SignalLength, JBB);
7349 }
7350
7351 // end of arbitration module
7352
7353 void
execDUMP_STATE_ORD(Signal * signal)7354 Qmgr::execDUMP_STATE_ORD(Signal* signal)
7355 {
7356 if (signal->theData[0] == 1)
7357 {
7358 unsigned max_nodes = MAX_NDB_NODES;
7359 if (signal->getLength() == 2)
7360 {
7361 max_nodes = signal->theData[1];
7362 if (max_nodes == 0 || max_nodes >= MAX_NODES)
7363 {
7364 max_nodes = MAX_NODES;
7365 }
7366 else
7367 {
7368 max_nodes++; // Include node id argument in loop
7369 }
7370 }
7371 infoEvent("creadyDistCom = %d, cpresident = %d\n",
7372 creadyDistCom, cpresident);
7373 infoEvent("cpresidentAlive = %d, cpresidentCand = %d (gci: %d)\n",
7374 cpresidentAlive,
7375 c_start.m_president_candidate,
7376 c_start.m_president_candidate_gci);
7377 infoEvent("ctoStatus = %d\n", ctoStatus);
7378 for(Uint32 i = 1; i < max_nodes; i++){
7379 NodeRecPtr nodePtr;
7380 nodePtr.i = i;
7381 ptrCheckGuard(nodePtr, MAX_NODES, nodeRec);
7382 char buf[100];
7383 switch(nodePtr.p->phase){
7384 case ZINIT:
7385 sprintf(buf, "Node %d: ZINIT(%d)", i, nodePtr.p->phase);
7386 break;
7387 case ZSTARTING:
7388 sprintf(buf, "Node %d: ZSTARTING(%d)", i, nodePtr.p->phase);
7389 break;
7390 case ZRUNNING:
7391 sprintf(buf, "Node %d: ZRUNNING(%d)", i, nodePtr.p->phase);
7392 break;
7393 case ZPREPARE_FAIL:
7394 sprintf(buf, "Node %d: ZPREPARE_FAIL(%d)", i, nodePtr.p->phase);
7395 break;
7396 case ZFAIL_CLOSING:
7397 sprintf(buf, "Node %d: ZFAIL_CLOSING(%d)", i, nodePtr.p->phase);
7398 break;
7399 case ZAPI_INACTIVE:
7400 sprintf(buf, "Node %d: ZAPI_INACTIVE(%d)", i, nodePtr.p->phase);
7401 break;
7402 case ZAPI_ACTIVE:
7403 sprintf(buf, "Node %d: ZAPI_ACTIVE(%d)", i, nodePtr.p->phase);
7404 break;
7405 case ZAPI_ACTIVATION_ONGOING:
7406 sprintf(buf, "Node %d: ZAPI_ACTIVATION_ONGOING(%d)",
7407 i,
7408 nodePtr.p->phase);
7409 break;
7410 default:
7411 sprintf(buf, "Node %d: <UNKNOWN>(%d)", i, nodePtr.p->phase);
7412 break;
7413 }
7414 infoEvent("%s", buf);
7415 }
7416 }
7417
7418 #ifdef ERROR_INSERT
7419 if (signal->theData[0] == 935 && signal->getLength() == 2)
7420 {
7421 SET_ERROR_INSERT_VALUE(935);
7422 c_error_insert_extra = signal->theData[1];
7423 }
7424 #endif
7425
7426 if (signal->theData[0] == 900 && signal->getLength() == 2)
7427 {
7428 ndbout_c("disconnecting %u", signal->theData[1]);
7429 api_failed(signal, signal->theData[1]);
7430 }
7431
7432 if (signal->theData[0] == 908)
7433 {
7434 int tag = signal->getLength() < 2 ? -1 : signal->theData[1];
7435 char buf[8192];
7436 // for easy grepping in *out.log ...
7437 strcpy(buf, "HB:");
7438 if (tag >= 0)
7439 sprintf(buf+strlen(buf), "%d:", tag);
7440 sprintf(buf+strlen(buf), " pres:%u", cpresident);
7441 sprintf(buf+strlen(buf), " own:%u", getOwnNodeId());
7442 NodeRecPtr myNodePtr;
7443 myNodePtr.i = getOwnNodeId();
7444 ptrCheckGuard(myNodePtr, MAX_NDB_NODES, nodeRec);
7445 sprintf(buf+strlen(buf), " dyn:%u-%u", myNodePtr.p->ndynamicId & 0xFFFF, myNodePtr.p->ndynamicId >> 16);
7446 sprintf(buf+strlen(buf), " mxdyn:%u", c_maxDynamicId);
7447 sprintf(buf+strlen(buf), " hb:%u->%u->%u", cneighbourl, getOwnNodeId(), cneighbourh);
7448 sprintf(buf+strlen(buf), " node:dyn-hi,cfg:");
7449 NodeRecPtr nodePtr;
7450 for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++)
7451 {
7452 ptrAss(nodePtr, nodeRec);
7453 Uint32 type = getNodeInfo(nodePtr.i).m_type;
7454 if (type == NodeInfo::DB)
7455 {
7456 sprintf(buf+strlen(buf), " %u:%u-%u,%u", nodePtr.i, nodePtr.p->ndynamicId & 0xFFFF, nodePtr.p->ndynamicId >> 16, nodePtr.p->hbOrder);
7457 }
7458 }
7459 ndbout << buf << endl;
7460 }
7461
7462 #ifdef ERROR_INSERT
7463 Uint32 dumpCode = signal->theData[0];
7464 if ((dumpCode == 9992) ||
7465 (dumpCode == 9993))
7466 {
7467 if (signal->getLength() == 2)
7468 {
7469 Uint32 nodeId = signal->theData[1];
7470 Uint32& newNodeId = signal->theData[1];
7471 Uint32 length = 2;
7472 assert(257 > MAX_NODES);
7473 if (nodeId > MAX_NODES)
7474 {
7475 const char* type = "None";
7476 switch (nodeId)
7477 {
7478 case 257:
7479 {
7480 /* Left (lower) neighbour */
7481 newNodeId = cneighbourl;
7482 type = "Left neighbour";
7483 break;
7484 }
7485 case 258:
7486 {
7487 /* Right (higher) neighbour */
7488 newNodeId = cneighbourh;
7489 type = "Right neighbour";
7490 break;
7491 }
7492 case 259:
7493 {
7494 /* President */
7495 newNodeId = cpresident;
7496 type = "President";
7497 break;
7498 }
7499 }
7500 ndbout_c("QMGR : Mapping request on node id %u to node id %u (%s)",
7501 nodeId, newNodeId, type);
7502 if (newNodeId != nodeId)
7503 {
7504 sendSignal(CMVMI_REF, GSN_DUMP_STATE_ORD, signal, length, JBB);
7505 }
7506 }
7507 }
7508 }
7509
7510 if (dumpCode == 9994)
7511 {
7512 ndbout_c("setCCDelay(%u)", signal->theData[1]);
7513 setCCDelay(signal->theData[1]);
7514 m_connectivity_check.m_enabled = true;
7515 }
7516 #endif
7517
7518 if (signal->theData[0] == 939 && signal->getLength() == 2)
7519 {
7520 jam();
7521 Uint32 nodeId = signal->theData[1];
7522 ndbout_c("Force close communication to %u", nodeId);
7523 SET_ERROR_INSERT_VALUE2(939, nodeId);
7524 CloseComReqConf * closeCom = CAST_PTR(CloseComReqConf,
7525 signal->getDataPtrSend());
7526
7527 closeCom->xxxBlockRef = reference();
7528 closeCom->requestType = CloseComReqConf::RT_NO_REPLY;
7529 closeCom->failNo = 0;
7530 closeCom->noOfNodes = 1;
7531 closeCom->failedNodeId = nodeId;
7532 sendSignal(TRPMAN_REF, GSN_CLOSE_COMREQ, signal,
7533 CloseComReqConf::SignalLength, JBB);
7534 }
7535 }//Qmgr::execDUMP_STATE_ORD()
7536
7537 void
execAPI_BROADCAST_REP(Signal * signal)7538 Qmgr::execAPI_BROADCAST_REP(Signal* signal)
7539 {
7540 jamEntry();
7541 ApiBroadcastRep api= *(const ApiBroadcastRep*)signal->getDataPtr();
7542
7543 SectionHandle handle(this, signal);
7544 Uint32 len = signal->getLength() - ApiBroadcastRep::SignalLength;
7545 memmove(signal->theData, signal->theData+ApiBroadcastRep::SignalLength,
7546 4*len);
7547
7548 NodeBitmask mask;
7549 NodeRecPtr nodePtr;
7550 for (nodePtr.i = 1; nodePtr.i < MAX_NODES; nodePtr.i++)
7551 {
7552 jam();
7553 ptrAss(nodePtr, nodeRec);
7554 if (nodePtr.p->phase == ZAPI_ACTIVE &&
7555 getNodeInfo(nodePtr.i).m_version >= api.minVersion)
7556 {
7557 jam();
7558 mask.set(nodePtr.i);
7559 }
7560 }
7561
7562 if (mask.isclear())
7563 {
7564 jam();
7565 releaseSections(handle);
7566 return;
7567 }
7568
7569 NodeReceiverGroup rg(API_CLUSTERMGR, mask);
7570 sendSignal(rg, api.gsn, signal, len, JBB,
7571 &handle);
7572 }
7573
7574 void
execNODE_FAILREP(Signal * signal)7575 Qmgr::execNODE_FAILREP(Signal * signal)
7576 {
7577 jamEntry();
7578 NodeFailRep* nodeFail = (NodeFailRep*)signal->getDataPtr();
7579 if(signal->getNoOfSections() >= 1)
7580 {
7581 ndbrequire(ndbd_send_node_bitmask_in_section(
7582 getNodeInfo(refToNode(signal->getSendersBlockRef())).m_version));
7583 SegmentedSectionPtr ptr;
7584 SectionHandle handle(this, signal);
7585 handle.getSection(ptr, 0);
7586 memset(nodeFail->theNodes, 0, sizeof(nodeFail->theNodes));
7587 copy(nodeFail->theNodes, ptr);
7588 releaseSections(handle);
7589 }
7590 else
7591 {
7592 memset(nodeFail->theNodes + NdbNodeBitmask48::Size, 0,
7593 _NDB_NBM_DIFF_BYTES);
7594 }
7595
7596 NdbNodeBitmask allFailed;
7597 allFailed.assign(NdbNodeBitmask::Size, nodeFail->theNodes);
7598
7599 // make sure any distributed signals get acknowledged
7600 // destructive of the signal
7601 NdbNodeBitmask failedNodes;
7602 failedNodes.assign(NdbNodeBitmask::Size, nodeFail->theNodes);
7603 c_counterMgr.execNODE_FAILREP(signal, failedNodes);
7604 Uint32 nodeId = 0;
7605 while (!allFailed.isclear())
7606 {
7607 nodeId = allFailed.find(nodeId + 1);
7608 //ndbrequire(nodeId != Bitmask::NotFound);
7609 allFailed.clear(nodeId);
7610 NodeRecPtr nodePtr;
7611 nodePtr.i = nodeId;
7612 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
7613 nodePtr.p->m_is_multi_trp_setup = false;
7614 nodePtr.p->m_is_ready_to_switch_trp = false;
7615 nodePtr.p->m_is_freeze_thread_completed = false;
7616 nodePtr.p->m_is_activate_trp_ready_for_me = false;
7617 nodePtr.p->m_is_activate_trp_ready_for_other = false;
7618 nodePtr.p->m_is_preparing_switch_trp = false;
7619 nodePtr.p->m_is_using_multi_trp = false;
7620 nodePtr.p->m_set_up_multi_trp_started = false;
7621 nodePtr.p->m_multi_trp_blockref = 0;
7622 nodePtr.p->m_used_num_multi_trps = 0;
7623 nodePtr.p->m_check_multi_trp_connect_loop_count = 0;
7624 nodePtr.p->m_num_activated_trps = 0;
7625 if (nodePtr.p->m_is_in_same_nodegroup)
7626 {
7627 jam();
7628 check_no_multi_trp(signal, nodePtr.i);
7629 globalTransporterRegistry.lockMultiTransporters();
7630 bool switch_required = false;
7631 Multi_Transporter *multi_trp =
7632 globalTransporterRegistry.get_node_multi_transporter(nodePtr.i);
7633 if (multi_trp &&
7634 globalTransporterRegistry.get_num_active_transporters(multi_trp) > 1)
7635 {
7636 /**
7637 * The timing of the NODE_FAILREP signal is such that the transporter
7638 * haven't had time to switch the active transporters yet, we know
7639 * this will happen, so we switch now to use the old transporter for
7640 * the neighbour node. The node is currently down, so will have to
7641 * be setup before it can be used again.
7642 *
7643 * We will restore the active transporters to be the multi
7644 * transporters to enable the transporters to be handled by the
7645 * disconnect code. This is why it is required to lock the
7646 * multi transporter mutex while performing this action.
7647 */
7648 switch_required = true;
7649 DEB_MULTI_TRP(("switch_active_trp for node %u's transporter",
7650 nodePtr.i));
7651 globalTransporterRegistry.switch_active_trp(multi_trp);
7652 }
7653
7654 DEB_MULTI_TRP(("Change neighbour node setup for node %u",
7655 nodePtr.i));
7656 startChangeNeighbourNode();
7657 setNeighbourNode(nodePtr.i);
7658 endChangeNeighbourNode();
7659 if (switch_required)
7660 {
7661 globalTransporterRegistry.switch_active_trp(multi_trp);
7662 DEB_MULTI_TRP(("switch_active_trp for node %u's transporter",
7663 nodePtr.i));
7664 }
7665 globalTransporterRegistry.unlockMultiTransporters();
7666 }
7667 }
7668 }
7669
7670 void
execALLOC_NODEID_REQ(Signal * signal)7671 Qmgr::execALLOC_NODEID_REQ(Signal * signal)
7672 {
7673 jamEntry();
7674 AllocNodeIdReq req = *(AllocNodeIdReq*)signal->getDataPtr();
7675 Uint32 error = 0;
7676
7677 NodeRecPtr nodePtr;
7678 nodePtr.i = req.nodeId;
7679 if ((nodePtr.i >= MAX_NODES) ||
7680 ((req.nodeType == NodeInfo::DB) &&
7681 (nodePtr.i >= MAX_NDB_NODES)))
7682 {
7683 /* Ignore messages about nodes not even within range */
7684 jam();
7685 return;
7686 }
7687 ptrAss(nodePtr, nodeRec);
7688
7689 if (refToBlock(req.senderRef) != QMGR) // request from management server
7690 {
7691 /* master */
7692 Dbdih *dih = (Dbdih*)globalData.getBlock(DBDIH, instance());
7693 bool is_dih_master = dih->is_master();
7694 if (getOwnNodeId() != cpresident || !is_dih_master)
7695 {
7696 jam();
7697 /**
7698 * Either we are not president which leads to that we are not master
7699 * in DIH, or we are president but hasn't yet seen our election to
7700 * master in DIH. Either way we respond with NotMaster, if we are
7701 * president and not master the response will lead to a retry which
7702 * is likely to be successful.
7703 */
7704 if (getOwnNodeId() == cpresident)
7705 {
7706 jam();
7707 g_eventLogger->debug("President, but not master at ALLOC_NODEID_REQ");
7708 }
7709 error = AllocNodeIdRef::NotMaster;
7710 }
7711 else if (!opAllocNodeIdReq.m_tracker.done())
7712 {
7713 jam();
7714 error = AllocNodeIdRef::Busy;
7715 }
7716 else if (c_connectedNodes.get(req.nodeId))
7717 {
7718 jam();
7719 error = AllocNodeIdRef::NodeConnected;
7720 }
7721 else if (nodePtr.p->m_secret != 0)
7722 {
7723 jam();
7724 error = AllocNodeIdRef::NodeReserved;
7725 }
7726 else if (req.nodeType != getNodeInfo(req.nodeId).m_type)
7727 {
7728 jam();
7729 error = AllocNodeIdRef::NodeTypeMismatch;
7730 }
7731 else if (req.nodeType == NodeInfo::API && c_allow_api_connect == 0)
7732 {
7733 jam();
7734 error = AllocNodeIdRef::NotReady;
7735 }
7736
7737 if (error)
7738 {
7739 jam();
7740 g_eventLogger->debug("Alloc node id for node %u failed, err: %u",
7741 nodePtr.i,
7742 error);
7743 AllocNodeIdRef * ref = (AllocNodeIdRef*)signal->getDataPtrSend();
7744 ref->senderRef = reference();
7745 ref->errorCode = error;
7746 ref->masterRef = numberToRef(QMGR, cpresident);
7747 ref->senderData = req.senderData;
7748 ref->nodeId = req.nodeId;
7749 sendSignal(req.senderRef, GSN_ALLOC_NODEID_REF, signal,
7750 AllocNodeIdRef::SignalLength, JBB);
7751 return;
7752 }
7753
7754 if (ERROR_INSERTED(934) && req.nodeId != getOwnNodeId())
7755 {
7756 CRASH_INSERTION(934);
7757 }
7758
7759 /**
7760 * generate secret
7761 */
7762 const NDB_TICKS now = NdbTick_getCurrentTicks();
7763 const Uint32 secret_hi = Uint32(now.getUint64() >> 24);
7764 const Uint32 secret_lo = Uint32(now.getUint64() << 8) + getOwnNodeId();
7765 req.secret_hi = secret_hi;
7766 req.secret_lo = secret_lo;
7767
7768 if (req.timeout > 60000)
7769 req.timeout = 60000;
7770
7771 nodePtr.p->m_secret = (Uint64(secret_hi) << 32) + secret_lo;
7772 nodePtr.p->m_alloc_timeout = NdbTick_AddMilliseconds(now,req.timeout);
7773
7774 opAllocNodeIdReq.m_req = req;
7775 opAllocNodeIdReq.m_error = 0;
7776 opAllocNodeIdReq.m_connectCount =
7777 getNodeInfo(refToNode(req.senderRef)).m_connectCount;
7778
7779 jam();
7780 AllocNodeIdReq * req2 = (AllocNodeIdReq*)signal->getDataPtrSend();
7781 * req2 = req;
7782 req2->senderRef = reference();
7783 NodeReceiverGroup rg(QMGR, c_clusterNodes);
7784 RequestTracker & p = opAllocNodeIdReq.m_tracker;
7785 p.init<AllocNodeIdRef>(c_counterMgr, rg, GSN_ALLOC_NODEID_REF, 0);
7786
7787 sendSignal(rg, GSN_ALLOC_NODEID_REQ, signal,
7788 AllocNodeIdReq::SignalLengthQMGR, JBB);
7789 return;
7790 }
7791
7792 /* participant */
7793 if (c_connectedNodes.get(req.nodeId))
7794 {
7795 jam();
7796 error = AllocNodeIdRef::NodeConnected;
7797 }
7798 else if (req.nodeType != getNodeInfo(req.nodeId).m_type)
7799 {
7800 jam();
7801 error = AllocNodeIdRef::NodeTypeMismatch;
7802 }
7803 else if ((nodePtr.p->failState != NORMAL) ||
7804 ((req.nodeType == NodeInfo::DB) &&
7805 (cfailedNodes.get(nodePtr.i))))
7806 {
7807 /**
7808 * Either the node has committed its node failure in QMGR but not yet
7809 * completed the node internal node failure handling. Or the node
7810 * failure commit process is still ongoing in QMGR. We should not
7811 * allocate a node id in either case.
7812 */
7813 jam();
7814 error = AllocNodeIdRef::NodeFailureHandlingNotCompleted;
7815 }
7816 else if (req.nodeType == NodeInfo::API && nodePtr.p->phase != ZAPI_INACTIVE)
7817 {
7818 jam();
7819 if (cpresident != getOwnNodeId() && c_allow_api_connect == 0)
7820 {
7821 /**
7822 * Don't block during NR
7823 */
7824 jam();
7825 }
7826 else
7827 {
7828 jam();
7829 if (nodePtr.p->phase == ZFAIL_CLOSING)
7830 {
7831 /* Occurs during node startup */
7832 error = AllocNodeIdRef::NodeFailureHandlingNotCompleted;
7833 }
7834 else
7835 {
7836 error = AllocNodeIdRef::NodeReserved;
7837 }
7838 }
7839 }
7840 #if 0
7841 /**
7842 * For now only make "time/secret" based reservation on master
7843 * as we otherwise also need to clear it on failure + handle
7844 * master failure
7845 */
7846 else if (nodePtr.p->m_secret != 0)
7847 {
7848 jam();
7849 error = AllocNodeIdRef::NodeReserved;
7850 }
7851 #endif
7852
7853 if (error)
7854 {
7855 jam();
7856 g_eventLogger->info("Alloc nodeid for node %u failed,err: %u",
7857 req.nodeId,
7858 error);
7859 AllocNodeIdRef * ref = (AllocNodeIdRef*)signal->getDataPtrSend();
7860 ref->senderRef = reference();
7861 ref->errorCode = error;
7862 ref->senderData = req.senderData;
7863 ref->nodeId = req.nodeId;
7864 ref->masterRef = numberToRef(QMGR, cpresident);
7865 sendSignal(req.senderRef, GSN_ALLOC_NODEID_REF, signal,
7866 AllocNodeIdRef::SignalLength, JBB);
7867 return;
7868 }
7869
7870 AllocNodeIdConf * conf = (AllocNodeIdConf*)signal->getDataPtrSend();
7871 conf->senderRef = reference();
7872 conf->secret_hi = req.secret_hi;
7873 conf->secret_lo = req.secret_lo;
7874 sendSignal(req.senderRef, GSN_ALLOC_NODEID_CONF, signal,
7875 AllocNodeIdConf::SignalLength, JBB);
7876 }
7877
7878 void
execALLOC_NODEID_CONF(Signal * signal)7879 Qmgr::execALLOC_NODEID_CONF(Signal * signal)
7880 {
7881 /* master */
7882
7883 jamEntry();
7884 const AllocNodeIdConf * conf = (AllocNodeIdConf*)signal->getDataPtr();
7885 opAllocNodeIdReq.m_tracker.reportConf(c_counterMgr,
7886 refToNode(conf->senderRef));
7887
7888 if (signal->getLength() >= AllocNodeIdConf::SignalLength)
7889 {
7890 jam();
7891 if (opAllocNodeIdReq.m_req.secret_hi != conf->secret_hi ||
7892 opAllocNodeIdReq.m_req.secret_lo != conf->secret_lo)
7893 {
7894 jam();
7895 if (opAllocNodeIdReq.m_error == 0)
7896 {
7897 jam();
7898 opAllocNodeIdReq.m_error = AllocNodeIdRef::Undefined;
7899 }
7900 }
7901 }
7902
7903 completeAllocNodeIdReq(signal);
7904 }
7905
7906
7907 void
execALLOC_NODEID_REF(Signal * signal)7908 Qmgr::execALLOC_NODEID_REF(Signal * signal)
7909 {
7910 /* master */
7911
7912 jamEntry();
7913 const AllocNodeIdRef * ref = (AllocNodeIdRef*)signal->getDataPtr();
7914
7915 if (ref->errorCode == AllocNodeIdRef::NF_FakeErrorREF)
7916 {
7917 jam();
7918 if (ref->nodeId == refToNode(ref->senderRef))
7919 {
7920 /**
7921 * The node id we are trying to allocate has responded with a REF,
7922 * this was sent in response to a node failure, so we are most
7923 * likely not ready to allocate this node id yet. Report node
7924 * failure handling not ready yet.
7925 */
7926 jam();
7927 opAllocNodeIdReq.m_tracker.reportRef(c_counterMgr,
7928 refToNode(ref->senderRef));
7929 if (opAllocNodeIdReq.m_error == 0)
7930 {
7931 jam();
7932 opAllocNodeIdReq.m_error =
7933 AllocNodeIdRef::NodeFailureHandlingNotCompleted;
7934 }
7935 }
7936 else
7937 {
7938 jam();
7939 opAllocNodeIdReq.m_tracker.ignoreRef(c_counterMgr,
7940 refToNode(ref->senderRef));
7941 }
7942 }
7943 else
7944 {
7945 jam();
7946 opAllocNodeIdReq.m_tracker.reportRef(c_counterMgr,
7947 refToNode(ref->senderRef));
7948 if (opAllocNodeIdReq.m_error == 0)
7949 {
7950 jam();
7951 opAllocNodeIdReq.m_error = ref->errorCode;
7952 }
7953 }
7954 completeAllocNodeIdReq(signal);
7955 }
7956
7957 void
completeAllocNodeIdReq(Signal * signal)7958 Qmgr::completeAllocNodeIdReq(Signal *signal)
7959 {
7960 /* master */
7961
7962 if (!opAllocNodeIdReq.m_tracker.done())
7963 {
7964 jam();
7965 return;
7966 }
7967
7968 if (opAllocNodeIdReq.m_connectCount !=
7969 getNodeInfo(refToNode(opAllocNodeIdReq.m_req.senderRef)).m_connectCount)
7970 {
7971 // management server not same version as the original requester
7972 jam();
7973 return;
7974 }
7975
7976 if (opAllocNodeIdReq.m_tracker.hasRef())
7977 {
7978 jam();
7979
7980 {
7981 /**
7982 * Clear reservation
7983 */
7984 NodeRecPtr nodePtr;
7985 nodePtr.i = opAllocNodeIdReq.m_req.nodeId;
7986 ptrAss(nodePtr, nodeRec);
7987 nodePtr.p->m_secret = 0;
7988 }
7989 g_eventLogger->info("Alloc node id for node %u failed, err: %u",
7990 opAllocNodeIdReq.m_req.nodeId,
7991 opAllocNodeIdReq.m_error);
7992
7993 AllocNodeIdRef * ref = (AllocNodeIdRef*)signal->getDataPtrSend();
7994 ref->senderRef = reference();
7995 ref->senderData = opAllocNodeIdReq.m_req.senderData;
7996 ref->nodeId = opAllocNodeIdReq.m_req.nodeId;
7997 ref->errorCode = opAllocNodeIdReq.m_error;
7998 ref->masterRef = numberToRef(QMGR, cpresident);
7999 ndbassert(AllocNodeIdRef::SignalLength == 5);
8000 sendSignal(opAllocNodeIdReq.m_req.senderRef, GSN_ALLOC_NODEID_REF, signal,
8001 AllocNodeIdRef::SignalLength, JBB);
8002 return;
8003 }
8004
8005 jam();
8006
8007 g_eventLogger->info("Alloc node id for node %u succeeded",
8008 opAllocNodeIdReq.m_req.nodeId);
8009 AllocNodeIdConf * conf = (AllocNodeIdConf*)signal->getDataPtrSend();
8010 conf->senderRef = reference();
8011 conf->senderData = opAllocNodeIdReq.m_req.senderData;
8012 conf->nodeId = opAllocNodeIdReq.m_req.nodeId;
8013 conf->secret_lo = opAllocNodeIdReq.m_req.secret_lo;
8014 conf->secret_hi = opAllocNodeIdReq.m_req.secret_hi;
8015 sendSignal(opAllocNodeIdReq.m_req.senderRef, GSN_ALLOC_NODEID_CONF, signal,
8016 AllocNodeIdConf::SignalLength, JBB);
8017
8018 /**
8019 * We are the master and master DIH wants to keep track of node restart
8020 * state to be able to control LCP start and stop and also to be able
8021 * to easily report this state to the user when he asks for it.
8022 */
8023 AllocNodeIdRep *rep = (AllocNodeIdRep*)signal->getDataPtrSend();
8024 rep->nodeId = opAllocNodeIdReq.m_req.nodeId;
8025 EXECUTE_DIRECT(DBDIH, GSN_ALLOC_NODEID_REP, signal,
8026 AllocNodeIdRep::SignalLength);
8027 }
8028
8029 void
execSTOP_REQ(Signal * signal)8030 Qmgr::execSTOP_REQ(Signal* signal)
8031 {
8032 jamEntry();
8033
8034 const StopReq* req = (const StopReq*)signal->getDataPtr();
8035 c_stopReq.senderRef = req->senderRef;
8036 c_stopReq.senderData = req->senderData;
8037 c_stopReq.requestInfo = req->requestInfo;
8038 c_stopReq.nodes.clear();
8039 if (signal->getNoOfSections() >= 1)
8040 {
8041 jam();
8042 SectionHandle handle(this, signal);
8043 SegmentedSectionPtr ptr;
8044 handle.getSection(ptr, 0);
8045 ndbrequire(ptr.sz <= NdbNodeBitmask::Size);
8046 copy(c_stopReq.nodes.rep.data, ptr);
8047 releaseSections(handle);
8048 }
8049 else
8050 {
8051 jam();
8052 c_stopReq.nodes.assign(NdbNodeBitmask48::Size, req->nodes);
8053 }
8054
8055 if (c_stopReq.senderRef)
8056 {
8057 jam();
8058 ndbrequire(c_stopReq.nodes.get(getOwnNodeId()));
8059
8060 StopConf *conf = (StopConf*)signal->getDataPtrSend();
8061 conf->senderData = c_stopReq.senderData;
8062 conf->nodeState = getOwnNodeId();
8063 sendSignal(c_stopReq.senderRef,
8064 GSN_STOP_CONF, signal, StopConf::SignalLength, JBA);
8065 }
8066 }
8067
8068 bool
check_multi_node_shutdown(Signal * signal)8069 Qmgr::check_multi_node_shutdown(Signal* signal)
8070 {
8071 if (c_stopReq.senderRef &&
8072 c_stopReq.nodes.get(getOwnNodeId()))
8073 {
8074 jam();
8075 if(StopReq::getPerformRestart(c_stopReq.requestInfo))
8076 {
8077 jam();
8078 StartOrd * startOrd = (StartOrd *)&signal->theData[0];
8079 startOrd->restartInfo = c_stopReq.requestInfo;
8080 sendSignal(CMVMI_REF, GSN_START_ORD, signal, 2, JBA);
8081 } else {
8082 sendSignal(CMVMI_REF, GSN_STOP_ORD, signal, 1, JBA);
8083 }
8084 return true;
8085 }
8086 return false;
8087 }
8088
8089 int
check_hb_order_config()8090 Qmgr::check_hb_order_config()
8091 {
8092 m_hb_order_config_used = false;
8093 Uint32 count = 0;
8094 Uint32 count_zero = 0;
8095 NodeRecPtr nodePtr;
8096 for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++)
8097 {
8098 ptrAss(nodePtr, nodeRec);
8099 const NodeInfo& nodeInfo = getNodeInfo(nodePtr.i);
8100 if (nodeInfo.m_type == NodeInfo::DB)
8101 {
8102 count++;
8103 if (nodePtr.p->hbOrder == 0)
8104 count_zero++;
8105 }
8106 }
8107 ndbrequire(count != 0); // must have node info
8108 if (count_zero == count)
8109 {
8110 jam();
8111 return 0; // no hbOrder defined
8112 }
8113 if (count_zero != 0)
8114 {
8115 jam();
8116 return -1; // error: not all zero or all nonzero
8117 }
8118 for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++)
8119 {
8120 ptrAss(nodePtr, nodeRec);
8121 const NodeInfo& nodeInfo = getNodeInfo(nodePtr.i);
8122 if (nodeInfo.m_type == NodeInfo::DB)
8123 {
8124 NodeRecPtr nodePtr2;
8125 for (nodePtr2.i = nodePtr.i + 1; nodePtr2.i < MAX_NDB_NODES; nodePtr2.i++)
8126 {
8127 ptrAss(nodePtr2, nodeRec);
8128 const NodeInfo& nodeInfo2 = getNodeInfo(nodePtr2.i);
8129 if (nodeInfo2.m_type == NodeInfo::DB)
8130 {
8131 if (nodePtr.i != nodePtr2.i &&
8132 nodePtr.p->hbOrder == nodePtr2.p->hbOrder)
8133 {
8134 jam();
8135 return -2; // error: duplicate nonzero value
8136 }
8137 }
8138 }
8139 }
8140 }
8141 m_hb_order_config_used = true;
8142 return 0;
8143 }
8144
8145 static const Uint32 CC_SuspectTicks = 1;
8146 static const Uint32 CC_FailedTicks = 2;
8147
8148 void
startConnectivityCheck(Signal * signal,Uint32 reason,Uint32 causingNode)8149 Qmgr::startConnectivityCheck(Signal* signal, Uint32 reason, Uint32 causingNode)
8150 {
8151 jam();
8152 ndbrequire(m_connectivity_check.getEnabled());
8153
8154 if (m_connectivity_check.m_active)
8155 {
8156 jam();
8157 /* Connectivity check underway already
8158 * do nothing
8159 */
8160 return;
8161 }
8162
8163
8164 m_connectivity_check.m_nodesPinged.clear();
8165
8166 /* Send NODE_PINGREQ signal to all other running nodes, and
8167 * initialise connectivity check bitmasks.
8168 * Note that nodes may already be considered suspect due to
8169 * a previous connectivity check round.
8170 */
8171 Uint32 ownId = getOwnNodeId();
8172 NodePingReq* pingReq = CAST_PTR(NodePingReq, &signal->theData[0]);
8173 pingReq->senderData = ++m_connectivity_check.m_currentRound;
8174 pingReq->senderRef = reference();
8175
8176 for (Uint32 i=1; i < MAX_NDB_NODES; i++)
8177 {
8178 if (i != ownId)
8179 {
8180 NodeRec& node = nodeRec[i];
8181 if (node.phase == ZRUNNING)
8182 {
8183 /* If connection was considered ok, treat as unknown,
8184 * If it was considered slow, continue to treat
8185 * as slow
8186 */
8187 sendSignal(node.blockRef,
8188 GSN_NODE_PING_REQ,
8189 signal,
8190 NodePingReq::SignalLength,
8191 JBA);
8192
8193 m_connectivity_check.m_nodesPinged.set(i);
8194 }
8195 }
8196 }
8197
8198 /* Initialise result bitmasks */
8199 m_connectivity_check.m_nodesWaiting.assign(m_connectivity_check.m_nodesPinged);
8200 m_connectivity_check.m_nodesFailedDuring.clear();
8201
8202 /* Ensure only live nodes are considered suspect */
8203 m_connectivity_check.m_nodesSuspect.bitAND(m_connectivity_check.m_nodesPinged);
8204
8205 const char* reasonText = "Unknown";
8206 bool firstTime = true;
8207
8208 switch(reason)
8209 {
8210 case FailRep::ZHEARTBEAT_FAILURE:
8211 reasonText = "Heartbeat failure";
8212 break;
8213 case FailRep::ZCONNECT_CHECK_FAILURE:
8214 reasonText = "Connectivity check request";
8215 break;
8216 default:
8217 firstTime = false;
8218 ndbrequire(m_connectivity_check.m_nodesSuspect.count() > 0);
8219 break;
8220 }
8221
8222 if (!m_connectivity_check.m_nodesPinged.isclear())
8223 {
8224 jam();
8225 {
8226 char buff[NdbNodeBitmask::TextLength + 1];
8227 m_connectivity_check.m_nodesPinged.getText(buff);
8228 if (firstTime)
8229 {
8230 g_eventLogger->info("QMGR : Starting connectivity check of %u other nodes (%s) due to %s from node %u.",
8231 m_connectivity_check.m_nodesPinged.count(),
8232 buff,
8233 reasonText,
8234 causingNode);
8235 }
8236 else
8237 {
8238 char buff2[NdbNodeBitmask::TextLength + 1];
8239 m_connectivity_check.m_nodesSuspect.getText(buff2);
8240 g_eventLogger->info("QMGR : Restarting connectivity check of %u other nodes (%s) due to %u syspect nodes (%s)",
8241 m_connectivity_check.m_nodesPinged.count(),
8242 buff,
8243 m_connectivity_check.m_nodesSuspect.count(),
8244 buff2);
8245 }
8246 }
8247
8248 /* Generate cluster log event */
8249 Uint32 bitmaskSz = NdbNodeBitmask::Size;
8250 signal->theData[0] = NDB_LE_ConnectCheckStarted;
8251 signal->theData[1] = m_connectivity_check.m_nodesPinged.count();
8252 signal->theData[2] = reason;
8253 signal->theData[3] = causingNode;
8254 signal->theData[4] = bitmaskSz;
8255 Uint32* sigPtr = &signal->theData[5];
8256 m_connectivity_check.m_nodesPinged.copyto(bitmaskSz, sigPtr); sigPtr+= bitmaskSz;
8257 m_connectivity_check.m_nodesSuspect.copyto(bitmaskSz, sigPtr);
8258
8259 LinearSectionPtr lsptr[3];
8260 lsptr[0].p = signal->theData;
8261 lsptr[0].sz = 5 + 2 * NdbNodeBitmask::Size;
8262 sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 1, JBB, lsptr, 1);
8263
8264 m_connectivity_check.m_active = true;
8265 m_connectivity_check.m_tick = 0;
8266 const NDB_TICKS now = NdbTick_getCurrentTicks();
8267 m_connectivity_check.m_timer.reset(now);
8268 }
8269 else
8270 {
8271 g_eventLogger->info("QMGR : Connectivity check requested due to %s (from %u) not started as no other running nodes.",
8272 reasonText,
8273 causingNode);
8274 }
8275 }
8276
8277 void
execNODE_PINGREQ(Signal * signal)8278 Qmgr::execNODE_PINGREQ(Signal* signal)
8279 {
8280 jamEntry();
8281 Uint32 ownId = getOwnNodeId();
8282 const NodePingReq* pingReq = CAST_CONSTPTR(NodePingReq, &signal->theData[0]);
8283 Uint32 sendersRef = signal->getSendersBlockRef();
8284 Uint32 sendersNodeId = refToNode(sendersRef);
8285 Uint32 senderData = pingReq->senderData;
8286
8287 ndbrequire(sendersNodeId != ownId);
8288
8289 /* We will start our own connectivity check if necessary
8290 * before responding with PING_CONF to the requestor.
8291 * This means that the sending node will receive our PING_REQ
8292 * before our PING_CONF, which should avoid them starting an
8293 * unnecessary extra connectivity check round in some cases.
8294 */
8295 if (likely(m_connectivity_check.getEnabled()))
8296 {
8297 jam();
8298 /* We have connectivity checking configured */
8299 if (! m_connectivity_check.m_active)
8300 {
8301 jam();
8302
8303 {
8304 /* Don't start a new connectivity check if the requesting
8305 * node has failed from our point of view
8306 */
8307 NodeRecPtr nodePtr;
8308 nodePtr.i = sendersNodeId;
8309 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
8310 if (unlikely(nodePtr.p->phase != ZRUNNING))
8311 {
8312 jam();
8313
8314 g_eventLogger->warning("QMGR : Discarding NODE_PINGREQ from non-running node %u (%u)",
8315 sendersNodeId, nodePtr.p->phase);
8316 return;
8317 }
8318 }
8319
8320 /* Start our own Connectivity Check now indicating reason and causing node */
8321 startConnectivityCheck(signal, FailRep::ZCONNECT_CHECK_FAILURE, sendersNodeId);
8322 }
8323 }
8324 else
8325 {
8326 jam();
8327 g_eventLogger->warning("QMGR : NODE_PINGREQ received from node %u, but connectivity "
8328 "checking not configured on this node. Ensure all "
8329 "nodes have the same configuration for parameter "
8330 "ConnectCheckIntervalMillis.",
8331 sendersNodeId);
8332 }
8333
8334 /* Now respond with NODE_PINGCONF */
8335 NodePingConf* pingConf = CAST_PTR(NodePingConf, &signal->theData[0]);
8336
8337 pingConf->senderData = senderData;
8338 pingConf->senderRef = reference();
8339
8340 sendSignal(sendersRef,
8341 GSN_NODE_PING_CONF,
8342 signal,
8343 NodePingConf::SignalLength,
8344 JBA);
8345 }
8346
8347 void
reportNodeConnect(Uint32 nodeId)8348 Qmgr::ConnectCheckRec::reportNodeConnect(Uint32 nodeId)
8349 {
8350 /* Clear any suspicion */
8351 m_nodesSuspect.clear(nodeId);
8352 }
8353
8354 bool
reportNodeFailure(Uint32 nodeId)8355 Qmgr::ConnectCheckRec::reportNodeFailure(Uint32 nodeId)
8356 {
8357 if (unlikely(m_active))
8358 {
8359 m_nodesFailedDuring.set(nodeId);
8360
8361 if (m_nodesWaiting.get(nodeId))
8362 {
8363 /* We were waiting for a NODE_PING_CONF from this node,
8364 * remove it from the set
8365 */
8366 m_nodesWaiting.clear(nodeId);
8367
8368 return m_nodesWaiting.isclear();
8369 }
8370 }
8371 return false;
8372 }
8373
8374 void
execNODE_PINGCONF(Signal * signal)8375 Qmgr::execNODE_PINGCONF(Signal* signal)
8376 {
8377 jamEntry();
8378
8379 ndbrequire(m_connectivity_check.getEnabled());
8380
8381 const NodePingConf* pingConf = CAST_CONSTPTR(NodePingConf, &signal->theData[0]);
8382 Uint32 sendersBlockRef = signal->getSendersBlockRef();
8383 Uint32 sendersNodeId = refToNode(sendersBlockRef);
8384 Uint32 roundNumber = pingConf->senderData;
8385
8386 ndbrequire(sendersNodeId != getOwnNodeId());
8387 ndbrequire((m_connectivity_check.m_active) || /* Normal */
8388 (m_connectivity_check.m_nodesWaiting.get(sendersNodeId) || /* We killed last round */
8389 m_connectivity_check.m_nodesFailedDuring.get(sendersNodeId))); /* Someone killed */
8390
8391 if (unlikely((! m_connectivity_check.m_active) ||
8392 (roundNumber != m_connectivity_check.m_currentRound)))
8393 {
8394 g_eventLogger->warning("QMGR : Received NODEPING_CONF from node %u for round %u, "
8395 "but we are %sactive on round %u. Discarding.",
8396 sendersNodeId,
8397 roundNumber,
8398 ((m_connectivity_check.m_active)?"":"in"),
8399 m_connectivity_check.m_currentRound);
8400 return;
8401 }
8402
8403 if (ERROR_INSERTED(938))
8404 {
8405 ndbout_c("QMGR : execNODE_PING_CONF() from %u in tick %u",
8406 sendersNodeId, m_connectivity_check.m_tick);
8407 }
8408
8409 /* Node must have been pinged, we must be waiting for the response,
8410 * or the node must have already failed
8411 */
8412 ndbrequire(m_connectivity_check.m_nodesPinged.get(sendersNodeId));
8413 ndbrequire(m_connectivity_check.m_nodesWaiting.get(sendersNodeId) ||
8414 m_connectivity_check.m_nodesFailedDuring.get(sendersNodeId));
8415
8416 m_connectivity_check.m_nodesWaiting.clear(sendersNodeId);
8417
8418 if (likely(m_connectivity_check.m_tick < CC_SuspectTicks))
8419 {
8420 jam();
8421 /* Node responded on time, clear any suspicion about it */
8422 m_connectivity_check.m_nodesSuspect.clear(sendersNodeId);
8423 }
8424
8425 if (m_connectivity_check.m_nodesWaiting.isclear())
8426 {
8427 jam();
8428 /* Connectivity check round is now finished */
8429 connectivityCheckCompleted(signal);
8430 }
8431 }
8432
8433 void
connectivityCheckCompleted(Signal * signal)8434 Qmgr::connectivityCheckCompleted(Signal* signal)
8435 {
8436 jam();
8437
8438 m_connectivity_check.m_active = false;
8439
8440 /* Log the following :
8441 * Nodes checked
8442 * Nodes responded ok
8443 * Nodes responded late (now suspect)
8444 * Nodes failed to respond.
8445 * Nodes failed during
8446 */
8447 char pinged[NdbNodeBitmask::TextLength + 1];
8448 char late[NdbNodeBitmask::TextLength + 1];
8449 char silent[NdbNodeBitmask::TextLength + 1];
8450 char failed[NdbNodeBitmask::TextLength + 1];
8451
8452 /* Any 'waiting' nodes have been killed
8453 * Surviving suspects do not include them.
8454 */
8455 NdbNodeBitmask survivingSuspects(m_connectivity_check.m_nodesSuspect);
8456 survivingSuspects.bitANDC(m_connectivity_check.m_nodesWaiting);
8457
8458 /* Nodes that failed during the check are also excluded */
8459 survivingSuspects.bitANDC(m_connectivity_check.m_nodesFailedDuring);
8460
8461 m_connectivity_check.m_nodesPinged.getText(pinged);
8462 survivingSuspects.getText(late);
8463 m_connectivity_check.m_nodesWaiting.getText(silent);
8464 m_connectivity_check.m_nodesFailedDuring.getText(failed);
8465
8466 g_eventLogger->info("QMGR : Connectivity check completed, "
8467 "%u other nodes checked (%s), "
8468 "%u responded on time, "
8469 "%u responded late (%s), "
8470 "%u no response will be failed (%s), "
8471 "%u failed during check (%s)\n",
8472 m_connectivity_check.m_nodesPinged.count(),
8473 pinged,
8474 m_connectivity_check.m_nodesPinged.count() -
8475 m_connectivity_check.m_nodesSuspect.count(),
8476 survivingSuspects.count(),
8477 late,
8478 m_connectivity_check.m_nodesWaiting.count(),
8479 silent,
8480 m_connectivity_check.m_nodesFailedDuring.count(),
8481 failed);
8482
8483 /* Log in Cluster log */
8484 signal->theData[0] = NDB_LE_ConnectCheckCompleted;
8485 signal->theData[1] = m_connectivity_check.m_nodesPinged.count();
8486 signal->theData[2] = survivingSuspects.count();
8487 signal->theData[3] = m_connectivity_check.m_nodesWaiting.count() +
8488 m_connectivity_check.m_nodesFailedDuring.count();
8489
8490 sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 4, JBB);
8491
8492 if (survivingSuspects.count() > 0)
8493 {
8494 jam();
8495 /* Still suspect nodes, start another round */
8496 g_eventLogger->info("QMGR : Starting new connectivity check due to suspect nodes.");
8497 /* Restart connectivity check, no external reason or cause */
8498 startConnectivityCheck(signal, 0, 0);
8499 }
8500 else
8501 {
8502 jam();
8503 /* No suspect nodes, stop the protocol now */
8504
8505 g_eventLogger->info("QMGR : All other nodes (%u) connectivity ok.",
8506 m_connectivity_check.m_nodesPinged.count() -
8507 (m_connectivity_check.m_nodesWaiting.count() +
8508 m_connectivity_check.m_nodesFailedDuring.count()));
8509
8510 /* Send a heartbeat to our right neighbour at this point as a gesture
8511 * of goodwill
8512 */
8513 sendHeartbeat(signal);
8514 hb_send_timer.reset(NdbTick_getCurrentTicks());
8515 };
8516 }
8517
8518 void
checkConnectivityTimeSignal(Signal * signal)8519 Qmgr::checkConnectivityTimeSignal(Signal* signal)
8520 {
8521 /* Executed periodically when a connectivity check is
8522 * underway.
8523 * After CC_SuspectTicks have elapsed, any nodes
8524 * which have not responded are considered
8525 * 'Suspect'.
8526 * After CC_FailedTicks have elapsed, any nodes
8527 * which have not responded are considered
8528 * to have failed, and failure handling
8529 * begins.
8530 */
8531 jam();
8532
8533 /* Preconditions, otherwise we shouldn't have been called */
8534 ndbrequire(m_connectivity_check.getEnabled());
8535 ndbrequire(m_connectivity_check.m_active);
8536 ndbrequire(!m_connectivity_check.m_nodesWaiting.isclear());
8537
8538 m_connectivity_check.m_tick++;
8539
8540 switch (m_connectivity_check.m_tick)
8541 {
8542 case CC_SuspectTicks:
8543 {
8544 jam();
8545 /* Still waiting to hear from some nodes, they are now
8546 * suspect
8547 */
8548 m_connectivity_check.m_nodesSuspect.bitOR(m_connectivity_check.m_nodesWaiting);
8549 return;
8550 }
8551 case CC_FailedTicks:
8552 {
8553 jam();
8554 /* Still waiting to hear from some nodes, they will now
8555 * be failed
8556 */
8557 m_connectivity_check.m_active = false;
8558 Uint32 nodeId = 0;
8559
8560 while ((nodeId = m_connectivity_check.m_nodesWaiting.find(nodeId))
8561 != BitmaskImpl::NotFound)
8562 {
8563 jam();
8564 /* Log failure reason */
8565 /* Todo : Connectivity Check specific failure log? */
8566 signal->theData[0] = NDB_LE_DeadDueToHeartbeat;
8567 signal->theData[1] = nodeId;
8568
8569 sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB);
8570
8571 /* Fail the node */
8572 /* TODO : Consider real time break here */
8573 failReportLab(signal, nodeId, FailRep::ZCONNECT_CHECK_FAILURE, getOwnNodeId());
8574 nodeId++;
8575 }
8576
8577 /* Now handle the end of the Connectivity Check */
8578 connectivityCheckCompleted(signal);
8579 }
8580 }
8581 }
8582
8583 bool
isNodeConnectivitySuspect(Uint32 nodeId) const8584 Qmgr::isNodeConnectivitySuspect(Uint32 nodeId) const
8585 {
8586 return m_connectivity_check.m_nodesSuspect.get(nodeId);
8587 }
8588
8589 void
handleFailFromSuspect(Signal * signal,Uint32 reason,Uint16 aFailedNode,Uint16 sourceNode)8590 Qmgr::handleFailFromSuspect(Signal* signal,
8591 Uint32 reason,
8592 Uint16 aFailedNode,
8593 Uint16 sourceNode)
8594 {
8595 jam();
8596
8597 const char* reasonText = "Unknown";
8598
8599 /* We have received a failure report about some node X from
8600 * some other node that we consider to have suspect connectivity
8601 * which may have caused the report.
8602 *
8603 * We will 'invert' the sense of this, and handle it as
8604 * a failure report of the sender, with the same cause.
8605 */
8606 switch(reason)
8607 {
8608 case FailRep::ZCONNECT_CHECK_FAILURE:
8609 jam();
8610 /* Suspect says that connectivity check failed for another node.
8611 * As suspect has bad connectivity from our point of view, we
8612 * blame him.
8613 */
8614 reasonText = "ZCONNECT_CHECK_FAILURE";
8615 break;
8616 case FailRep::ZLINK_FAILURE:
8617 jam();
8618 /* Suspect says that link failed for another node.
8619 * As suspect has bad connectivity from our point of view, we
8620 * blame her.
8621 */
8622 reasonText = "ZLINK_FAILURE";
8623 break;
8624 default:
8625 ndbabort();
8626 }
8627
8628 g_eventLogger->warning("QMGR : Received Connectivity failure notification about "
8629 "%u from suspect node %u with reason %s. "
8630 "Mapping to failure of %u sourced by me.",
8631 aFailedNode, sourceNode, reasonText, sourceNode);
8632
8633 signal->theData[0] = NDB_LE_NodeFailRejected;
8634 signal->theData[1] = reason;
8635 signal->theData[2] = aFailedNode;
8636 signal->theData[3] = sourceNode;
8637
8638 sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 4, JBB);
8639
8640 failReportLab(signal, sourceNode, (FailRep::FailCause) reason, getOwnNodeId());
8641 }
8642
8643 ProcessInfo *
getProcessInfo(Uint32 nodeId)8644 Qmgr::getProcessInfo(Uint32 nodeId)
8645 {
8646 ProcessInfo * storedProcessInfo = 0;
8647 Int16 index = processInfoNodeIndex[nodeId];
8648 if(index >= 0)
8649 storedProcessInfo = & receivedProcessInfo[index];
8650 else if(nodeId == getOwnNodeId())
8651 storedProcessInfo = getOwnProcessInfo(getOwnNodeId());
8652 return storedProcessInfo;
8653 }
8654
8655 void
execDBINFO_SCANREQ(Signal * signal)8656 Qmgr::execDBINFO_SCANREQ(Signal *signal)
8657 {
8658 DbinfoScanReq req= *(DbinfoScanReq*)signal->theData;
8659 Ndbinfo::Ratelimit rl;
8660
8661 jamEntry();
8662 switch(req.tableId) {
8663 case Ndbinfo::MEMBERSHIP_TABLEID:
8664 {
8665 jam();
8666 Ndbinfo::Row row(signal, req);
8667 row.write_uint32(getOwnNodeId());
8668 row.write_uint32(getNodeState().nodeGroup);
8669 row.write_uint32(cneighbourl);
8670 row.write_uint32(cneighbourh);
8671 row.write_uint32(cpresident);
8672
8673 // President successor
8674 Uint32 successor = 0;
8675 {
8676 NodeRecPtr nodePtr;
8677 UintR minDynamicId = (UintR)-1;
8678 for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++)
8679 {
8680 jam();
8681 ptrAss(nodePtr, nodeRec);
8682 if (nodePtr.p->phase == ZRUNNING)
8683 {
8684 if ((nodePtr.p->ndynamicId & 0xFFFF) < minDynamicId)
8685 {
8686 jam();
8687 if (cpresident != nodePtr.i)
8688 {
8689 minDynamicId = (nodePtr.p->ndynamicId & 0xFFFF);
8690 successor = nodePtr.i;
8691 }
8692 }
8693 }
8694 }
8695 }
8696 row.write_uint32(successor);
8697
8698 NodeRecPtr myNodePtr;
8699 myNodePtr.i = getOwnNodeId();
8700 ptrCheckGuard(myNodePtr, MAX_NDB_NODES, nodeRec);
8701 row.write_uint32(myNodePtr.p->ndynamicId);
8702
8703 row.write_uint32(arbitRec.node); // arbitrator
8704
8705 char ticket[20]; // Need 16 characters + 1 for trailing '\0'
8706 arbitRec.ticket.getText(ticket, sizeof(ticket));
8707 row.write_string(ticket);
8708
8709 row.write_uint32(arbitRec.state);
8710
8711 // arbitrator connected
8712 row.write_uint32(c_connectedNodes.get(arbitRec.node));
8713
8714 // Find potential (rank1 and rank2) arbitrators that are connected.
8715 NodeRecPtr aPtr;
8716 // buf_size: Node nr (max 3 chars) and ', ' + trailing '\0'
8717 const int buf_size = 5 * MAX_NODES + 1;
8718 char buf[buf_size];
8719
8720 for (unsigned rank = 1; rank <= 2; rank++)
8721 {
8722 jam();
8723 aPtr.i = 0;
8724 const unsigned stop = NodeBitmask::NotFound;
8725 int buf_offset = 0;
8726 const char* delimiter = "";
8727
8728 while ((aPtr.i = arbitRec.apiMask[rank].find(aPtr.i + 1)) != stop)
8729 {
8730 jam();
8731 ptrAss(aPtr, nodeRec);
8732 if (c_connectedNodes.get(aPtr.i))
8733 {
8734 buf_offset += BaseString::snprintf(buf + buf_offset,
8735 buf_size - buf_offset,
8736 "%s%u", delimiter, aPtr.i);
8737 delimiter = ", ";
8738 }
8739 }
8740
8741 if (buf_offset == 0)
8742 row.write_string("-");
8743 else
8744 row.write_string(buf);
8745 }
8746
8747 ndbinfo_send_row(signal, req, row, rl);
8748 break;
8749 }
8750 case Ndbinfo::PROCESSES_TABLEID:
8751 {
8752 jam();
8753 for(int i = 1 ; i <= max_api_node_id ; i++)
8754 {
8755 NodeInfo nodeInfo = getNodeInfo(i);
8756 if(nodeInfo.m_connected)
8757 {
8758 char version_buffer[NDB_VERSION_STRING_BUF_SZ];
8759 ndbGetVersionString(nodeInfo.m_version, nodeInfo.m_mysql_version,
8760 0, version_buffer, NDB_VERSION_STRING_BUF_SZ);
8761
8762 ProcessInfo *processInfo = getProcessInfo(i);
8763 if(processInfo && processInfo->isValid())
8764 {
8765 char uri_buffer[512];
8766 processInfo->getServiceUri(uri_buffer, sizeof(uri_buffer));
8767 Ndbinfo::Row row(signal, req);
8768 row.write_uint32(getOwnNodeId()); // reporting_node_id
8769 row.write_uint32(i); // node_id
8770 row.write_uint32(nodeInfo.getType()); // node_type
8771 row.write_string(version_buffer); // node_version
8772 row.write_uint32(processInfo->getPid()); // process_id
8773 row.write_uint32(processInfo->getAngelPid()); // angel_process_id
8774 row.write_string(processInfo->getProcessName()); // process_name
8775 row.write_string(uri_buffer); // service_URI
8776 ndbinfo_send_row(signal, req, row, rl);
8777 }
8778 else if(nodeInfo.m_type != NodeInfo::DB &&
8779 nodeInfo.m_version > 0 &&
8780 ! ndbd_supports_processinfo(nodeInfo.m_version))
8781 {
8782 /* MGM/API node is too old to send ProcessInfoRep, so create a
8783 fallback-style report */
8784
8785 struct in_addr addr= globalTransporterRegistry.get_connect_address(i);
8786 char service_uri[32];
8787 strcpy(service_uri, "ndb://");
8788 Ndb_inet_ntop(AF_INET, & addr, service_uri + 6, 24);
8789
8790 Ndbinfo::Row row(signal, req);
8791 row.write_uint32(getOwnNodeId()); // reporting_node_id
8792 row.write_uint32(i); // node_id
8793 row.write_uint32(nodeInfo.getType()); // node_type
8794 row.write_string(version_buffer); // node_version
8795 row.write_uint32(0); // process_id
8796 row.write_uint32(0); // angel_process_id
8797 row.write_string(""); // process_name
8798 row.write_string(service_uri); // service_URI
8799 ndbinfo_send_row(signal, req, row, rl);
8800 }
8801 }
8802 }
8803 break;
8804 }
8805 default:
8806 break;
8807 }
8808 ndbinfo_send_scan_conf(signal, req, rl);
8809 }
8810
8811
8812 void
execPROCESSINFO_REP(Signal * signal)8813 Qmgr::execPROCESSINFO_REP(Signal *signal)
8814 {
8815 jamEntry();
8816 ProcessInfoRep * report = (ProcessInfoRep *) signal->theData;
8817 SectionHandle handle(this, signal);
8818 SegmentedSectionPtr pathSectionPtr, hostSectionPtr;
8819
8820 ProcessInfo * processInfo = getProcessInfo(report->node_id);
8821 if(processInfo)
8822 {
8823 /* Set everything except the connection name and host address */
8824 processInfo->initializeFromProcessInfoRep(report);
8825
8826 /* Set the URI path */
8827 if(handle.getSection(pathSectionPtr, ProcessInfoRep::PathSectionNum))
8828 {
8829 processInfo->setUriPath(pathSectionPtr.p->theData);
8830 }
8831
8832 /* Set the host address */
8833 if(handle.getSection(hostSectionPtr, ProcessInfoRep::HostSectionNum))
8834 {
8835 processInfo->setHostAddress(hostSectionPtr.p->theData);
8836 }
8837 else
8838 {
8839 /* Use the address from the transporter registry.
8840 As implemented below we use setHostAddress() with struct in_addr
8841 to set an IPv4 address. An alternate more abstract version
8842 of ProcessInfo::setHostAddress() is also available, which
8843 takes a struct sockaddr * and length.
8844 */
8845 struct in_addr addr=
8846 globalTransporterRegistry.get_connect_address(report->node_id);
8847 processInfo->setHostAddress(& addr);
8848 }
8849 }
8850 releaseSections(handle);
8851 }
8852
8853 void
execISOLATE_ORD(Signal * signal)8854 Qmgr::execISOLATE_ORD(Signal* signal)
8855 {
8856 jamEntry();
8857
8858 IsolateOrd* sig = (IsolateOrd*) signal->theData;
8859
8860 ndbrequire(sig->senderRef != 0);
8861 Uint32 senderNode = refToNode(sig->senderRef);
8862 Uint32 sz;
8863 Uint32 num_sections = signal->getNoOfSections();
8864 SectionHandle handle(this, signal);
8865 if (ndbd_send_node_bitmask_in_section(getNodeInfo(senderNode).m_version))
8866 {
8867 jam();
8868 ndbrequire(num_sections == 1);
8869 SegmentedSectionPtr ptr;
8870 handle.getSection(ptr, 0);
8871 copy(sig->nodesToIsolate, ptr);
8872 ndbrequire(ptr.sz <= NdbNodeBitmask::Size);
8873 sz = ptr.sz;
8874 }
8875 else
8876 {
8877 jam();
8878 memset(sig->nodesToIsolate + NdbNodeBitmask48::Size,
8879 0,
8880 _NDB_NBM_DIFF_BYTES);
8881 sz = NdbNodeBitmask::Size;
8882 }
8883 NdbNodeBitmask victims;
8884 memset(&victims, 0, sizeof(victims));
8885 victims.assign(sz, sig->nodesToIsolate);
8886 ndbrequire(!victims.isclear());
8887
8888 switch (sig->isolateStep)
8889 {
8890 case IsolateOrd::IS_REQ:
8891 {
8892 jam();
8893 releaseSections(handle);
8894 /* Initial request, broadcast immediately */
8895
8896 /* Need to get the set of live nodes to broadcast to */
8897 NdbNodeBitmask hitmen(c_clusterNodes);
8898
8899 sig->isolateStep = IsolateOrd::IS_BROADCAST;
8900 unsigned nodeId = hitmen.find_first();
8901 do
8902 {
8903 jam();
8904 BlockReference ref = calcQmgrBlockRef(nodeId);
8905 if (ndbd_send_node_bitmask_in_section(getNodeInfo(nodeId).m_version))
8906 {
8907 jam();
8908 LinearSectionPtr lsptr[3];
8909 lsptr[0].p = (Uint32*)&victims;
8910 lsptr[0].sz = victims.getPackedLengthInWords();
8911 sendSignal(ref,
8912 GSN_ISOLATE_ORD,
8913 signal,
8914 IsolateOrd::SignalLength,
8915 JBA,
8916 lsptr,
8917 1);
8918 }
8919 else
8920 {
8921 jam();
8922 ndbrequire(victims.getPackedLengthInWords() <= 2);
8923 memset(&sig->nodesToIsolate, 0, 8);
8924 memcpy(&sig->nodesToIsolate,
8925 &victims,
8926 4 * victims.getPackedLengthInWords());
8927 sendSignal(ref,
8928 GSN_ISOLATE_ORD,
8929 signal,
8930 IsolateOrd::SignalLengthWithBitmask48,
8931 JBA);
8932 }
8933 nodeId = hitmen.find_next(nodeId + 1);
8934 } while (nodeId != BitmaskImpl::NotFound);
8935
8936 ndbrequire(!hitmen.isclear()); /* At least me */
8937 return;
8938 }
8939 case IsolateOrd::IS_BROADCAST:
8940 {
8941 jam();
8942 /* Received reqest, delay */
8943 sig->isolateStep = IsolateOrd::IS_DELAY;
8944
8945 if (sig->delayMillis > 0)
8946 {
8947 /* Delay processing until delayMillis passes */
8948 jam();
8949 sendSignalWithDelay(reference(),
8950 GSN_ISOLATE_ORD,
8951 signal,
8952 sig->delayMillis,
8953 IsolateOrd::SignalLength,
8954 &handle);
8955 return;
8956 }
8957 }
8958 // Fall through
8959 case IsolateOrd::IS_DELAY:
8960 {
8961 jam();
8962
8963 releaseSections(handle);
8964 if (ERROR_INSERTED(942))
8965 {
8966 jam();
8967 g_eventLogger->info("QMGR discarding IsolateRequest");
8968 return;
8969 }
8970
8971 /* Map to FAIL_REP signal(s) */
8972 Uint32 failSource = refToNode(sig->senderRef);
8973
8974 unsigned nodeId = victims.find_first();
8975 do
8976 {
8977 jam();
8978
8979 /* TODO : Consider checking node state and skipping if
8980 * failing already
8981 * Consider logging that action is being taken here
8982 */
8983
8984 FailRep* failRep = (FailRep*)&signal->theData[0];
8985 failRep->failNodeId = nodeId;
8986 failRep->failCause = FailRep::ZFORCED_ISOLATION;
8987 failRep->failSourceNodeId = failSource;
8988
8989 sendSignal(reference(), GSN_FAIL_REP, signal, 3, JBA);
8990
8991 nodeId = victims.find_next(nodeId + 1);
8992 } while (nodeId != BitmaskImpl::NotFound);
8993
8994 /* Fail rep signals are en-route... */
8995
8996 return;
8997 }
8998 }
8999
9000 ndbabort();
9001 }
9002
9003
9004 void
execNODE_STATE_REP(Signal * signal)9005 Qmgr::execNODE_STATE_REP(Signal* signal)
9006 {
9007 jam();
9008 const NodeState prevState = getNodeState();
9009 SimulatedBlock::execNODE_STATE_REP(signal);
9010
9011 /* Check whether we are changing state */
9012 const Uint32 prevStartLevel = prevState.startLevel;
9013 const Uint32 newStartLevel = getNodeState().startLevel;
9014
9015 if (newStartLevel != prevStartLevel)
9016 {
9017 jam();
9018 /* Inform APIs */
9019 signal->theData[0] = ZNOTIFY_STATE_CHANGE;
9020 signal->theData[1] = 1;
9021 sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
9022 }
9023
9024 return;
9025 }
9026
9027 void
handleStateChange(Signal * signal,Uint32 nodeToNotify)9028 Qmgr::handleStateChange(Signal* signal, Uint32 nodeToNotify)
9029 {
9030 jam();
9031 bool take_a_break = false;
9032
9033 do
9034 {
9035 const NodeInfo::NodeType nt = getNodeInfo(nodeToNotify).getType();
9036
9037 if (nt == NodeInfo::API ||
9038 nt == NodeInfo::MGM)
9039 {
9040 jam();
9041
9042 NodeRecPtr notifyNode;
9043 notifyNode.i = nodeToNotify;
9044 ptrCheckGuard(notifyNode, MAX_NODES, nodeRec);
9045
9046 if (notifyNode.p->phase == ZAPI_ACTIVE)
9047 {
9048 jam();
9049 ndbassert(c_connectedNodes.get(nodeToNotify));
9050
9051 /**
9052 * Ok, send an unsolicited API_REGCONF to inform
9053 * the API of the state change
9054 */
9055 set_hb_count(nodeToNotify) = 0;
9056 sendApiRegConf(signal, nodeToNotify);
9057
9058 take_a_break = true;
9059 }
9060 }
9061
9062 nodeToNotify++;
9063 } while (nodeToNotify < MAX_NODES &&
9064 !take_a_break);
9065
9066 if (nodeToNotify < MAX_NODES)
9067 {
9068 jam();
9069 signal->theData[0] = ZNOTIFY_STATE_CHANGE;
9070 signal->theData[1] = nodeToNotify;
9071 sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
9072 }
9073
9074 return;
9075 }
9076
9077 /**
9078 * SET_UP_MULTI_TRP_REQ starts the setup of multi socket transporters
9079 * that currently is setup between two data nodes in the same node group.
9080 * This signal is sent in start phase 3 from NDBCNTR when we are performing
9081 * an initial start or a cluster restart at a time when we know the version
9082 * info about other data nodes. For node restarts it is sent later in phase
9083 * 4 when the master has informed us of the current sysfile. We need to wait
9084 * for this to ensure that we know the node group information for all nodes.
9085 * We will only allow one use of SET_UP_MULTI_TRP_REQ per start of a data
9086 * node. We can still participate in setting up multi sockets after that,
9087 * but only when another node is starting and requesting us to assist in
9088 * setting up a multi socket setup.
9089 *
9090 * We cannot use multi sockets towards versions before MySQL Cluster
9091 * 8.0.20.
9092 *
9093 * The signal flow to accomplish this setup of multi sockets is the
9094 * following. It is currently only possible to setup when a node is
9095 * starting up, but some parts of the code is prepared to also handle
9096 * this change while the cluster is operational.
9097 *
9098 * The protocol below assumes that both node support multi sockets.
9099 *
9100 * NDBCNTR/DBDIH QMGR QMGR neighbour
9101 * SET_UP_MULTI_TRP_REQ
9102 * ------------------->
9103 *
9104 * Scenario 1: QMGR Neighbour starts after first QMGR
9105 * GET_NUM_MULTI_TRP_REQ
9106 * ------------------------------------->
9107 * GET_NUM_MULTI_TRP_CONF
9108 * <------------------------------------
9109 * Create multi transporters
9110 * Connect multi transporters
9111 *
9112 * GET_NUM_MULTI_TRP_REQ
9113 * <------------------------------------
9114 * GET_NUM_MULTI_TRP_CONF
9115 * ------------------------------------>
9116 * Create multi transporter
9117 * Connect multi transporter
9118 * Multi transporters connect to each other
9119 *
9120 * QMGR QMGR Neighbour
9121 * SWITCH_MULTI_TRP_REQ
9122 * ---------------------------------------------------------->
9123 * When QMGR neighbour
9124 * has added to epoll
9125 * set.
9126 * SWITCH_MULTI_TRP_REQ
9127 * <---------------------------------------------------------
9128 * SWITCH_MULTI_TRP_CONF
9129 * <-------------------------------------------------------->
9130 * Now both nodes are ready to perform the actual switch over
9131 *
9132 * QMGR THRMAN Proxy THRMAN
9133 * FREEZE_THREAD_REQ
9134 * ---------------------->
9135 * FREEZE_THREAD_REQ
9136 * -------------------------->>
9137 * Freeze all threads
9138 * except main thread
9139 * FREEZE_ACTION_REQ
9140 * <--------------------------------------------------
9141 * Switch to using multi transporter sockets
9142 *
9143 * At this point the only thread that is active is the main thread.
9144 * Every other thread is frozen waiting to be woken up when the
9145 * new multi socket setup is set up. We will send the last signal
9146 * ACTIVATE_TRP_REQ on the old transporter, before we send that we
9147 * ensure that we have locked all send transporters and after that
9148 * we enable the send buffer and after that all signals will be
9149 * sent on the new multi sockets.
9150 *
9151 * QMGR THRMAN (main thread) QMGR Neighbour
9152 * ACTIVATE_TRP_REQ
9153 * -------------------------------------------------------->
9154 * FREEZE_ACTION_CONF
9155 * -------------------------->
9156 * unlock all thread
9157 * wait until all threads woken up again
9158 * FREEZE_THREAD_CONF
9159 * <--------------------------
9160 *
9161 * In parallel with the above we will also do the same thing in the
9162 * neighbour node and this node will initiate the second round of
9163 * events when we receive the signal ACTIVATE_TRP_REQ.
9164 *
9165 * QMGR TRPMAN Proxy TRPMAN QMGR Neighbour
9166 * ACTIVATE_TRP_REQ
9167 * <--------------------------------------------------------
9168 * SYNC_THREAD_VIA_REQ
9169 * --------------->
9170 * SYNC_THREAD_VIA_REQ
9171 * --------------->> THRMANs
9172 * SYNC_THREAD_REQ
9173 * -------------------->>
9174 * SYNC_THREAD_CONF
9175 * <<--------------------
9176 * SYNC_THREAD_VIA_CONF
9177 * <<---------------
9178 * SYNC_THREAD_VIA_CONF
9179 * <---------------
9180 *
9181 * SYNC_THREAD_VIA_REQ/CONF is used to ensure that all receive threads
9182 * have delivered any signals it has received. Since at this point we
9183 * haven't activated the new multi sockets, and we have deactivated
9184 * the old socket, this means that we have a clear signal order in that
9185 * signal sent on old socket is always delivered to all other threads
9186 * before any new signal on the new multi socket transporters are
9187 * delivered.
9188 *
9189 * <---------------
9190 * ACTIVATE_TRP_REQ
9191 * --------------->-------------->>
9192 * Activate the receive on the
9193 * new transporters
9194 * ACTIVATE_TRP_CONF
9195 * <<------------------------------
9196 * ACTIVATE_TRP_CONF
9197 * --------------------------------------------------------->
9198 * Here the
9199 * switch is completed
9200 * After receiving ACTIVATE_TRP_CONF we have no use of the socket anymore
9201 * and since the sender obviously has also
9202 *
9203 * If more nodes are in node group to also set up we do it after this.
9204 * Otherwise we are ready.
9205 *
9206 * QMGR NDBCNTR/DBDIH
9207 * SET_UP_MULTI_TRP_CONF
9208 * ------------------------------->
9209 */
9210 void
execSET_UP_MULTI_TRP_REQ(Signal * signal)9211 Qmgr::execSET_UP_MULTI_TRP_REQ(Signal *signal)
9212 {
9213 jamEntry();
9214 if (m_ref_set_up_multi_trp_req != 0)
9215 {
9216 jam();
9217 DEB_MULTI_TRP(("Already handled SET_UP_MULTI_TRP_REQ"));
9218 sendSignal(signal->theData[0],
9219 GSN_SET_UP_MULTI_TRP_CONF,
9220 signal,
9221 1,
9222 JBB);
9223 return;
9224 }
9225 m_ref_set_up_multi_trp_req = signal->theData[0];
9226 m_get_num_multi_trps_sent = 0;
9227 for (Uint32 node_id = 1; node_id < MAX_NDB_NODES; node_id++)
9228 {
9229 NodeRecPtr nodePtr;
9230 nodePtr.i = node_id;
9231 ptrAss(nodePtr, nodeRec);
9232 nodePtr.p->m_used_num_multi_trps = m_num_multi_trps;
9233 nodePtr.p->m_initial_set_up_multi_trp_done = false;
9234 }
9235 DEB_MULTI_TRP(("m_num_multi_trps = %u", m_num_multi_trps));
9236 bool done = false;
9237 bool completed = get_num_multi_trps(signal, done);
9238 if (!completed)
9239 {
9240 jam();
9241 return;
9242 }
9243 else
9244 {
9245 jam();
9246 DEB_MULTI_TRP(("m_num_multi_trps == 1, no need to setup multi sockets"));
9247 }
9248 complete_multi_trp_setup(signal, done);
9249 }
9250
9251 void
get_node_group_mask(Signal * signal,NdbNodeBitmask & mask)9252 Qmgr::get_node_group_mask(Signal *signal, NdbNodeBitmask& mask)
9253 {
9254 CheckNodeGroups * sd = (CheckNodeGroups*)signal->getDataPtrSend();
9255 sd->blockRef = reference();
9256 sd->requestType =
9257 CheckNodeGroups::Direct |
9258 CheckNodeGroups::GetNodeGroupMembers;
9259 sd->nodeId = getOwnNodeId();
9260 EXECUTE_DIRECT_MT(DBDIH, GSN_CHECKNODEGROUPSREQ, signal,
9261 CheckNodeGroups::SignalLength, 0);
9262 jamEntry();
9263 mask.assign(sd->mask);
9264 mask.clear(getOwnNodeId());
9265 }
9266
9267 bool
get_num_multi_trps(Signal * signal,bool & done)9268 Qmgr::get_num_multi_trps(Signal *signal, bool &done)
9269 {
9270 jamEntry();
9271 NdbNodeBitmask mask;
9272 get_node_group_mask(signal, mask);
9273 m_get_num_multi_trps_sent++;
9274 if (m_num_multi_trps == 1)
9275 {
9276 jam();
9277 done = true;
9278 }
9279 for (Uint32 node_id = 1; node_id < MAX_NDB_NODES; node_id++)
9280 {
9281 if (mask.get(node_id))
9282 {
9283 jam();
9284 jamLine(node_id);
9285 DEB_MULTI_TRP(("Node %u is in the same node group", node_id));
9286 NodeRecPtr nodePtr;
9287 nodePtr.i = node_id;
9288 ptrAss(nodePtr, nodeRec);
9289 nodePtr.p->m_is_in_same_nodegroup = true;
9290 done = true;
9291 Uint32 version = getNodeInfo(nodePtr.i).m_version;
9292 if (m_num_multi_trps > 1)
9293 {
9294 create_multi_transporter(nodePtr.i);
9295 if (nodePtr.p->phase == ZRUNNING &&
9296 ndbd_use_multi_ng_trps(version) &&
9297 (c_ndbcntr->is_node_started(nodePtr.i) ||
9298 c_ndbcntr->is_node_starting(nodePtr.i)))
9299 {
9300 jam();
9301 if (ERROR_INSERTED(970))
9302 {
9303 NdbSleep_MilliSleep(500);
9304 }
9305 nodePtr.p->m_set_up_multi_trp_started = true;
9306 inc_get_num_multi_trps_sent(nodePtr.i);
9307 send_get_num_multi_trp_req(signal, node_id);
9308 }
9309 }
9310 }
9311 }
9312 m_get_num_multi_trps_sent--;
9313 return (m_get_num_multi_trps_sent == 0);
9314 }
9315
9316 void
execGET_NUM_MULTI_TRP_REQ(Signal * signal)9317 Qmgr::execGET_NUM_MULTI_TRP_REQ(Signal* signal)
9318 {
9319 jamEntry();
9320 GetNumMultiTrpReq* req = (GetNumMultiTrpReq*)&signal->theData[0];
9321 Uint32 sender_node_id = req->nodeId;
9322
9323 NodeRecPtr nodePtr;
9324 nodePtr.i = sender_node_id;
9325 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
9326 nodePtr.p->m_initial_set_up_multi_trp_done =
9327 req->initial_set_up_multi_trp_done;
9328 /*
9329 * Set used number of multi sockets to be minimum of our own config
9330 * and the node config of the node contacting us.
9331 */
9332 nodePtr.p->m_used_num_multi_trps =
9333 MIN(req->numMultiTrps, m_num_multi_trps);
9334
9335 if (m_initial_set_up_multi_trp_done && nodePtr.p->m_used_num_multi_trps > 1)
9336 {
9337 /**
9338 * We passed the startup phase 2 where the connection setup
9339 * of multi transporters happens normally. So the node sending
9340 * this message is a new node starting and we're either already
9341 * started or have passed phase 2 of the startup. We will start
9342 * enabling communication to this new node.
9343 *
9344 * This is only required if we want to use more than one socket.
9345 */
9346 jam();
9347 DEB_MULTI_TRP(("Node %u starting, prepare switch trp using %u trps",
9348 sender_node_id,
9349 nodePtr.p->m_used_num_multi_trps));
9350 connect_multi_transporter(signal, sender_node_id);
9351 if (ERROR_INSERTED(972))
9352 {
9353 NdbSleep_MilliSleep(500);
9354 }
9355 }
9356 else
9357 {
9358 jam();
9359 if (ERROR_INSERTED(971))
9360 {
9361 NdbSleep_MilliSleep(500);
9362 }
9363 }
9364 if (m_ref_set_up_multi_trp_req != 0)
9365 {
9366 jam();
9367 DEB_MULTI_TRP(("Node %u starting, sent GET_NUM_MULTI_TRP_REQ, get"
9368 " num multi %u",
9369 sender_node_id,
9370 nodePtr.p->m_used_num_multi_trps));
9371 GetNumMultiTrpConf* conf = (GetNumMultiTrpConf*)signal->getDataPtrSend();
9372 conf->numMultiTrps = nodePtr.p->m_used_num_multi_trps;
9373 conf->nodeId = getOwnNodeId();
9374 conf->initial_set_up_multi_trp_done = m_initial_set_up_multi_trp_done;
9375
9376 BlockReference ref = calcQmgrBlockRef(sender_node_id);
9377 sendSignal(ref, GSN_GET_NUM_MULTI_TRP_CONF, signal,
9378 GetNumMultiTrpConf::SignalLength, JBB);
9379 }
9380 else
9381 {
9382 jam();
9383 DEB_MULTI_TRP(("Node %u starting, GET_NUM_MULTI_TRP_REQ sent,"
9384 " we're not ready",
9385 sender_node_id));
9386 GetNumMultiTrpRef* ref = (GetNumMultiTrpRef*)signal->getDataPtrSend();
9387 ref->nodeId = getOwnNodeId();
9388 ref->errorCode = GetNumMultiTrpRef::NotReadyYet;
9389 BlockReference block_ref = calcQmgrBlockRef(sender_node_id);
9390 sendSignal(block_ref, GSN_GET_NUM_MULTI_TRP_REF, signal,
9391 GetNumMultiTrpRef::SignalLength, JBB);
9392 }
9393 }
9394
9395 void
execGET_NUM_MULTI_TRP_REF(Signal * signal)9396 Qmgr::execGET_NUM_MULTI_TRP_REF(Signal *signal)
9397 {
9398 GetNumMultiTrpRef ref = *(GetNumMultiTrpRef*)&signal->theData[0];
9399 /**
9400 * The other node is not ready yet, we'll wait for it to become ready before
9401 * progressing.
9402 */
9403 NodeRecPtr nodePtr;
9404 nodePtr.i = ref.nodeId;
9405 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
9406 nodePtr.p->m_count_multi_trp_ref++;
9407 if (nodePtr.p->m_count_multi_trp_ref > 60)
9408 {
9409 jam();
9410 nodePtr.p->m_count_multi_trp_ref = 0;
9411 DEB_MULTI_TRP(("GET_NUM_MULTI_TRP_REF 60 times from %u", ref.nodeId));
9412 ndbassert(false);
9413 dec_get_num_multi_trps_sent(ref.nodeId);
9414 complete_multi_trp_setup(signal, false);
9415 return;
9416 }
9417 DEB_MULTI_TRP(("GET_NUM_MULTI_TRP_REF received from %u", ref.nodeId));
9418 signal->theData[0] = ZRESEND_GET_NUM_MULTI_TRP_REQ;
9419 signal->theData[1] = ref.nodeId;
9420 sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 500, 2);
9421 }
9422
9423 void
complete_multi_trp_setup(Signal * signal,bool set_done)9424 Qmgr::complete_multi_trp_setup(Signal *signal, bool set_done)
9425 {
9426 if (m_get_num_multi_trps_sent == 0)
9427 {
9428 jam();
9429 if (set_done)
9430 {
9431 jam();
9432 m_initial_set_up_multi_trp_done = true;
9433 }
9434 sendSignal(m_ref_set_up_multi_trp_req,
9435 GSN_SET_UP_MULTI_TRP_CONF,
9436 signal,
9437 1,
9438 JBB);
9439 if (!set_done)
9440 {
9441 jam();
9442 m_ref_set_up_multi_trp_req = 0;
9443 }
9444 }
9445 else
9446 {
9447 jam();
9448 }
9449 }
9450
9451 void
send_get_num_multi_trp_req(Signal * signal,NodeId node_id)9452 Qmgr::send_get_num_multi_trp_req(Signal *signal, NodeId node_id)
9453 {
9454 if (m_get_num_multi_trps_sent == 0)
9455 {
9456 jam();
9457 DEB_MULTI_TRP(("We have already completed the SET_UP_MULTI_TRP_REQ"
9458 ", no need to continue retrying"));
9459 complete_multi_trp_setup(signal, false);
9460 return;
9461 }
9462 jam();
9463 DEB_MULTI_TRP(("Get num multi trp for node %u", node_id));
9464 GetNumMultiTrpReq* req = (GetNumMultiTrpReq*)signal->getDataPtrSend();
9465 req->nodeId = getOwnNodeId();
9466 req->numMultiTrps = m_num_multi_trps;
9467 req->initial_set_up_multi_trp_done = false;
9468 BlockReference ref = calcQmgrBlockRef(node_id);
9469 sendSignal(ref, GSN_GET_NUM_MULTI_TRP_REQ, signal,
9470 GetNumMultiTrpReq::SignalLength, JBB);
9471 }
9472
9473 void
inc_get_num_multi_trps_sent(NodeId node_id)9474 Qmgr::inc_get_num_multi_trps_sent(NodeId node_id)
9475 {
9476 NodeRecPtr nodePtr;
9477 nodePtr.i = node_id;
9478 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
9479 ndbrequire(!nodePtr.p->m_is_get_num_multi_trp_active);
9480 m_get_num_multi_trps_sent++;
9481 nodePtr.p->m_is_get_num_multi_trp_active = true;
9482 }
9483
9484 void
dec_get_num_multi_trps_sent(NodeId node_id)9485 Qmgr::dec_get_num_multi_trps_sent(NodeId node_id)
9486 {
9487 NodeRecPtr nodePtr;
9488 nodePtr.i = node_id;
9489 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
9490 ndbrequire(m_get_num_multi_trps_sent > 0);
9491 ndbrequire(nodePtr.p->m_is_get_num_multi_trp_active);
9492 m_get_num_multi_trps_sent--;
9493 nodePtr.p->m_is_get_num_multi_trp_active = false;
9494 }
9495
9496 void
execGET_NUM_MULTI_TRP_CONF(Signal * signal)9497 Qmgr::execGET_NUM_MULTI_TRP_CONF(Signal* signal)
9498 {
9499 /**
9500 * We receive the number of sockets to use from the other node. Could
9501 * also be a signal we sent to ourselves if the other node isn't
9502 * started yet or is running a version not supporting multi sockets.
9503 * In these cases the number of sockets will always be 1.
9504 */
9505 jamEntry();
9506 CRASH_INSERTION(951);
9507 GetNumMultiTrpConf* conf = (GetNumMultiTrpConf*)&signal->theData[0];
9508 Uint32 sender_node_id = conf->nodeId;
9509 NodeRecPtr nodePtr;
9510 nodePtr.i = sender_node_id;
9511 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
9512
9513 nodePtr.p->m_count_multi_trp_ref = 0;
9514 Uint32 rec_num_multi_trps = conf->numMultiTrps;
9515 Uint32 initial_set_up_multi_trp_done = conf->initial_set_up_multi_trp_done;
9516 ndbrequire(nodePtr.p->m_used_num_multi_trps > 0);
9517 ndbrequire(rec_num_multi_trps <= m_num_multi_trps);
9518 /**
9519 * If the other side cannot handle the number of multi sockets we wanted,
9520 * we set it to the other sides number instead.
9521 */
9522 nodePtr.p->m_used_num_multi_trps =
9523 MIN(conf->numMultiTrps, nodePtr.p->m_used_num_multi_trps);
9524 nodePtr.p->m_initial_set_up_multi_trp_done =
9525 initial_set_up_multi_trp_done;
9526 dec_get_num_multi_trps_sent(nodePtr.i);
9527 if (rec_num_multi_trps == 1)
9528 {
9529 jam();
9530 DEB_MULTI_TRP(("No need to setup multi sockets to node %u",
9531 nodePtr.i));
9532 complete_multi_trp_setup(signal, true);
9533 return;
9534 }
9535 DEB_MULTI_TRP(("GET_NUM_MULTI_TRP_CONF received from %u using %u trps",
9536 sender_node_id,
9537 nodePtr.p->m_used_num_multi_trps));
9538 jam();
9539 connect_multi_transporter(signal, nodePtr.i);
9540 if (ERROR_INSERTED(973))
9541 {
9542 NdbSleep_MilliSleep(1500);
9543 }
9544 }
9545
9546 void
create_multi_transporter(NodeId node_id)9547 Qmgr::create_multi_transporter(NodeId node_id)
9548 {
9549 jamEntry();
9550 DEB_MULTI_TRP(("Create multi trp for node %u", node_id));
9551 globalTransporterRegistry.createMultiTransporter(node_id,
9552 m_num_multi_trps);
9553 }
9554
9555 #include "../../../common/transporter/Transporter.hpp"
9556 #include "../../../common/transporter/Multi_Transporter.hpp"
9557
9558 void
connect_multi_transporter(Signal * signal,NodeId node_id)9559 Qmgr::connect_multi_transporter(Signal *signal, NodeId node_id)
9560 {
9561 /**
9562 * We have created the Multi transporters, now it is time to setup
9563 * connections to those that are running and also to switch over to
9564 * using the multi transporter. We currently only perform this as
9565 * part of startup. This means that if a node is already started
9566 * it is the responsibility of the starting node always to perform
9567 * the setup. If both nodes are starting the node with lowest node
9568 * id is responsible for the setup.
9569 */
9570 NodeRecPtr nodePtr;
9571 nodePtr.i = node_id;
9572 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
9573 nodePtr.p->m_check_multi_trp_connect_loop_count = 0;
9574 nodePtr.p->m_is_preparing_switch_trp = true;
9575 /**
9576 * Connect a multi-transporter.
9577 * For clients this happens by moving the transporters inside the
9578 * multi-transporter into the allTransporters array. This leads to
9579 * that they are checked in start_clients_thread. These transporters
9580 * are special in that they only connect in the CONNECTED state.
9581 *
9582 * To differentiate between normal transporters and these transporters
9583 * that are part of a multi-transporter we have a method called
9584 * isPartOfMultiTransporter. The method set_part_of_multi_transporter
9585 * toggles this state, by default it is false.
9586 *
9587 * By replacing the position in theNodeIdTransporters with a
9588 * multi transporter we ensure that connect_server will handle the
9589 * connection properly.
9590 *
9591 * By placing the transporters in the allTransporters array ensures
9592 * that we connect as clients in start_clients_thread.
9593 */
9594 Multi_Transporter *multi_trp =
9595 globalTransporterRegistry.get_node_multi_transporter(node_id);
9596 ndbrequire(multi_trp != 0);
9597
9598 globalTransporterRegistry.lockMultiTransporters();
9599 multi_trp->set_num_inactive_transporters(
9600 nodePtr.p->m_used_num_multi_trps);
9601 Uint32 num_inactive_transporters =
9602 multi_trp->get_num_inactive_transporters();
9603 Transporter *current_trp =
9604 globalTransporterRegistry.get_node_transporter(node_id);
9605 if (current_trp->isMultiTransporter())
9606 {
9607 jam();
9608 DEB_MULTI_TRP(("Get current trp from multi transporter"));
9609 ndbrequire(current_trp == multi_trp);
9610 current_trp = multi_trp->get_active_transporter(0);
9611 ndbrequire(multi_trp->get_num_active_transporters() == 1);
9612 }
9613 DEB_MULTI_TRP(("Base transporter has trp_id: %u",
9614 current_trp->getTransporterIndex()));
9615 int trp_port = current_trp->get_s_port();
9616
9617 for (Uint32 i = 0; i < num_inactive_transporters; i++)
9618 {
9619 /**
9620 * It is vital that we set the port number in the transporters used
9621 * by the multi transporter. It is possible that the node comes up
9622 * with a different port number after a restart. For the first
9623 * transporter this port number is set in start_clients_thread.
9624 * Thus before we connect using these transporters we update the
9625 * port number of those transporters to be the same port number as
9626 * used by the first transporter.
9627 */
9628 jam();
9629 Transporter *t = multi_trp->get_inactive_transporter(i);
9630 t->set_s_port(trp_port);
9631 globalTransporterRegistry.insert_allTransporters(t);
9632 assign_recv_thread_new_trp(t->getTransporterIndex());
9633 DEB_MULTI_TRP(("Insert trp id %u for node %u, mti = %u, server: %u"
9634 ", port: %d",
9635 t->getTransporterIndex(),
9636 node_id,
9637 t->get_multi_transporter_instance(),
9638 t->isServer,
9639 trp_port));
9640 }
9641 globalTransporterRegistry.unlockMultiTransporters();
9642 signal->theData[0] = ZCHECK_MULTI_TRP_CONNECT;
9643 signal->theData[1] = node_id;
9644 sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 10, 2);
9645 }
9646
9647 void
check_connect_multi_transporter(Signal * signal,NodeId node_id)9648 Qmgr::check_connect_multi_transporter(Signal *signal, NodeId node_id)
9649 {
9650 NodeRecPtr nodePtr;
9651 nodePtr.i = node_id;
9652 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
9653 globalTransporterRegistry.lockMultiTransporters();
9654 Multi_Transporter *multi_trp =
9655 globalTransporterRegistry.get_node_multi_transporter(node_id);
9656 if (nodePtr.p->phase == ZRUNNING)
9657 {
9658 jam();
9659 bool connected = true;
9660 Uint32 num_inactive_transporters =
9661 multi_trp->get_num_inactive_transporters();
9662 for (Uint32 i = 0; i < num_inactive_transporters; i++)
9663 {
9664 jam();
9665 Transporter *tmp_trp = multi_trp->get_inactive_transporter(i);
9666 bool is_connected = tmp_trp->isConnected();
9667 if (!is_connected)
9668 {
9669 jam();
9670 connected = false;
9671 break;
9672 }
9673 }
9674 if (!connected)
9675 {
9676 jam();
9677 globalTransporterRegistry.unlockMultiTransporters();
9678 nodePtr.p->m_check_multi_trp_connect_loop_count++;
9679 /**
9680 * We are only connecting to nodes already connected, thus we
9681 * should not fail to connect here, just in case something
9682 * weird happens we will still fail after waiting for
9683 * 30 minutes (100 * 30 * 60 times sending 10ms delayed signal).
9684 */
9685 ndbrequire(nodePtr.p->m_check_multi_trp_connect_loop_count <
9686 (100 * 60 * 30));
9687 signal->theData[0] = ZCHECK_MULTI_TRP_CONNECT;
9688 signal->theData[1] = node_id;
9689 sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 10, 2);
9690 return;
9691 }
9692 DEB_MULTI_TRP(("Multi trp connected for node %u", node_id));
9693 globalTransporterRegistry.unlockMultiTransporters();
9694 ndbrequire(nodePtr.p->m_is_multi_trp_setup == false);
9695 nodePtr.p->m_is_multi_trp_setup = true;
9696 if (!check_all_multi_trp_nodes_connected())
9697 {
9698 jam();
9699 /* We are not ready to start switch process yet. */
9700 return;
9701 }
9702 if (!select_node_id_for_switch(node_id, true))
9703 {
9704 /**
9705 * We were already busy with a switch, could also be
9706 * that we didn't find any lower node id to switch to.
9707 * We will only initiate switch from nodes with lower
9708 * node ids than our node id.
9709 *
9710 * By always selecting the highest node id to start with,
9711 * we ensure that we select a node that hasn't initiated
9712 * any switch on their own. Thus we are certain that this
9713 * node will eventually accept our switch request even if
9714 * it has to process all the other neighbour nodes before
9715 * us. This is definitely not an optimal algorithm, but it
9716 * is safe in that it avoids deadlock that could lead to
9717 * eternal wait states.
9718 */
9719 jam();
9720 return;
9721 }
9722 assign_multi_trps_to_send_threads();
9723 send_switch_multi_transporter(signal, node_id, false);
9724 return;
9725 }
9726 else
9727 {
9728 /**
9729 * The connection is no longer using the Multi_Transporter object.
9730 * Can only happen when the connection is broken before we completed
9731 * the connection setup of all connections. No need to do anything
9732 * more in this case other than release mutex.
9733 */
9734 jam();
9735 if (ERROR_INSERTED(974))
9736 {
9737 NdbSleep_MilliSleep(1500);
9738 }
9739 nodePtr.p->m_is_preparing_switch_trp = false;
9740 globalTransporterRegistry.unlockMultiTransporters();
9741 check_more_trp_switch_nodes(signal);
9742 }
9743 return;
9744 }
9745
9746 void
send_switch_multi_transporter(Signal * signal,NodeId node_id,bool retry)9747 Qmgr::send_switch_multi_transporter(Signal *signal,
9748 NodeId node_id,
9749 bool retry)
9750 {
9751 NodeRecPtr nodePtr;
9752 nodePtr.i = node_id;
9753 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
9754 jam();
9755 if (!retry)
9756 {
9757 jam();
9758 ndbrequire(m_current_switch_multi_trp_node == 0);
9759 }
9760 else if (m_current_switch_multi_trp_node == node_id)
9761 {
9762 jam();
9763 DEB_MULTI_TRP(("Retry of send SWITCH_MULTI_TRP_REQ to node %u"
9764 " not needed since already ongoing",
9765 node_id));
9766 return;
9767 }
9768 else if (m_current_switch_multi_trp_node != 0)
9769 {
9770 jam();
9771 DEB_MULTI_TRP(("Retry of send SWITCH_MULTI_TRP_REQ to node %u"
9772 " failed since other node already started",
9773 node_id));
9774 return;
9775 }
9776 else if (nodePtr.p->m_is_using_multi_trp)
9777 {
9778 jam();
9779 DEB_MULTI_TRP(("Retry of send SWITCH_MULTI_TRP_REQ to node %u"
9780 " not needed since already setup",
9781 node_id));
9782 return;
9783 }
9784 else
9785 {
9786 jam();
9787 DEB_MULTI_TRP(("Retry of SWITCH_MULTI_TRP_REQ to node %u",
9788 node_id));
9789 }
9790 m_current_switch_multi_trp_node = node_id;
9791 nodePtr.p->m_is_ready_to_switch_trp = true;
9792 DEB_MULTI_TRP(("Send SWITCH_MULTI_TRP_REQ to node %u", node_id));
9793 SwitchMultiTrpReq* req = (SwitchMultiTrpReq*)signal->getDataPtrSend();
9794 req->nodeId = getOwnNodeId();
9795 req->senderRef = reference();
9796 BlockReference ref = calcQmgrBlockRef(node_id);
9797 sendSignal(ref, GSN_SWITCH_MULTI_TRP_REQ, signal,
9798 SwitchMultiTrpReq::SignalLength, JBB);
9799 if (ERROR_INSERTED(978))
9800 {
9801 NdbSleep_MilliSleep(1500);
9802 }
9803 }
9804
9805 void
execSWITCH_MULTI_TRP_REQ(Signal * signal)9806 Qmgr::execSWITCH_MULTI_TRP_REQ(Signal *signal)
9807 {
9808 SwitchMultiTrpReq* req = (SwitchMultiTrpReq*)&signal->theData[0];
9809 NodeId node_id = req->nodeId;
9810 BlockReference block_ref = req->senderRef;
9811 DEB_MULTI_TRP(("SWITCH_MULTI_TRP_REQ node %u", node_id));
9812 NodeRecPtr nodePtr;
9813 nodePtr.i = node_id;
9814 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
9815 assign_multi_trps_to_send_threads();
9816
9817 CRASH_INSERTION(954);
9818 if (!check_all_multi_trp_nodes_connected())
9819 {
9820 if (nodePtr.p->m_is_multi_trp_setup &&
9821 m_current_switch_multi_trp_node == 0)
9822 {
9823 ndbrequire(nodePtr.p->phase == ZRUNNING);
9824 ndbrequire(nodePtr.p->m_is_in_same_nodegroup);
9825 ndbrequire(nodePtr.p->m_is_preparing_switch_trp);
9826 /* Fall through to send SWITCH_MULTI_TRP_CONF */
9827 }
9828 else
9829 {
9830 jam();
9831 ndbrequire(m_current_switch_multi_trp_node != node_id);
9832 DEB_MULTI_TRP(("Send SWITCH_MULTI_TRP_REF node %u", node_id));
9833 SwitchMultiTrpRef *ref = (SwitchMultiTrpRef*)signal->getDataPtrSend();
9834 ref->nodeId = getOwnNodeId();
9835 ref->errorCode = SwitchMultiTrpRef::SMTR_NOT_READY_FOR_SWITCH;
9836 sendSignal(block_ref, GSN_SWITCH_MULTI_TRP_REF, signal,
9837 SwitchMultiTrpRef::SignalLength, JBB);
9838 return;
9839 }
9840 }
9841 else if (m_current_switch_multi_trp_node != 0 &&
9842 m_current_switch_multi_trp_node != node_id)
9843 {
9844 /**
9845 * We are already trying to connect multi sockets to another
9846 * node. We will wait for this to complete before moving
9847 * on to the next node.
9848 */
9849 jam();
9850 DEB_MULTI_TRP(("2:Send SWITCH_MULTI_TRP_REF node %u", node_id));
9851 SwitchMultiTrpRef *ref = (SwitchMultiTrpRef*)signal->getDataPtrSend();
9852 ref->nodeId = getOwnNodeId();
9853 ref->errorCode = SwitchMultiTrpRef::SMTR_NOT_READY_FOR_SWITCH;
9854 sendSignal(block_ref, GSN_SWITCH_MULTI_TRP_REF, signal,
9855 SwitchMultiTrpRef::SignalLength, JBB);
9856 return;
9857 }
9858 /**
9859 * We haven't selected any node to connect multi sockets to yet.
9860 * In that case it is safe to answer positively since we know
9861 * that this cannot cause any deadlock.
9862 */
9863 if (m_current_switch_multi_trp_node == 0)
9864 {
9865 jam();
9866 ndbrequire(!nodePtr.p->m_is_ready_to_switch_trp);
9867 SwitchMultiTrpReq* req = (SwitchMultiTrpReq*)signal->getDataPtrSend();
9868 req->nodeId = getOwnNodeId();
9869 req->senderRef = reference();
9870 BlockReference ref = calcQmgrBlockRef(node_id);
9871 sendSignal(ref, GSN_SWITCH_MULTI_TRP_REQ, signal,
9872 SwitchMultiTrpReq::SignalLength, JBB);
9873 }
9874 else
9875 {
9876 ndbrequire(m_current_switch_multi_trp_node == node_id);
9877 }
9878 ndbrequire(nodePtr.p->m_is_multi_trp_setup)
9879 nodePtr.p->m_is_ready_to_switch_trp = true;
9880 m_current_switch_multi_trp_node = node_id;
9881 jam();
9882 DEB_MULTI_TRP(("Send SWITCH_MULTI_TRP_CONF node %u", node_id));
9883 if (ERROR_INSERTED(979))
9884 {
9885 NdbSleep_MilliSleep(1500);
9886 }
9887 SwitchMultiTrpConf *conf = (SwitchMultiTrpConf*)signal->getDataPtrSend();
9888 conf->nodeId = getOwnNodeId();
9889 sendSignal(block_ref, GSN_SWITCH_MULTI_TRP_CONF, signal,
9890 SwitchMultiTrpConf::SignalLength, JBB);
9891 }
9892
9893 void
execSWITCH_MULTI_TRP_CONF(Signal * signal)9894 Qmgr::execSWITCH_MULTI_TRP_CONF(Signal *signal)
9895 {
9896 /**
9897 * This signal can get lost if the other node fails and we have
9898 * already started.
9899 *
9900 * The TransporterRegistry will ensure that we switch back to using a
9901 * single transporter in this case, the DISCONNECT_REP code and the
9902 * NODE_FAILREP code will ensure that we reset the variables used
9903 * to setup the multi sockets next time the node starts up.
9904 */
9905 jamEntry();
9906 CRASH_INSERTION(955);
9907 SwitchMultiTrpConf *conf = (SwitchMultiTrpConf*)&signal->theData[0];
9908 Uint32 node_id = conf->nodeId;
9909 NodeRecPtr nodePtr;
9910 nodePtr.i = node_id;
9911 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
9912 ndbrequire(nodePtr.p->m_is_ready_to_switch_trp == true);
9913 ndbrequire(nodePtr.p->m_is_multi_trp_setup == true);
9914 DEB_MULTI_TRP(("Recvd SWITCH_MULTI_TRP_CONF node %u", node_id));
9915 if (ERROR_INSERTED(980))
9916 {
9917 NdbSleep_MilliSleep(1500);
9918 }
9919 switch_multi_transporter(signal, node_id);
9920 }
9921
9922 void
execSWITCH_MULTI_TRP_REF(Signal * signal)9923 Qmgr::execSWITCH_MULTI_TRP_REF(Signal *signal)
9924 {
9925 /**
9926 * The other node wasn't ready to connect multi sockets to us yet.
9927 * We will wait for a short time and try again.
9928 */
9929 SwitchMultiTrpRef *ref = (SwitchMultiTrpRef*)&signal->theData[0];
9930 Uint32 node_id = ref->nodeId;
9931 NodeRecPtr nodePtr;
9932 nodePtr.i = node_id;
9933 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
9934 ndbrequire(m_current_switch_multi_trp_node == node_id);
9935 ndbrequire(nodePtr.p->m_is_ready_to_switch_trp);
9936 m_current_switch_multi_trp_node = 0;
9937 nodePtr.p->m_is_ready_to_switch_trp = false;
9938 DEB_MULTI_TRP(("Recvd SWITCH_MULTI_TRP_REF from node %u", node_id));
9939 signal->theData[0] = ZSWITCH_MULTI_TRP;
9940 signal->theData[1] = node_id;
9941 sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 100, 2);
9942 }
9943
9944 void
switch_multi_transporter(Signal * signal,NodeId node_id)9945 Qmgr::switch_multi_transporter(Signal *signal, NodeId node_id)
9946 {
9947 ndbrequire(m_current_switch_multi_trp_node == node_id);
9948 NodeRecPtr nodePtr;
9949 nodePtr.i = node_id;
9950 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
9951 g_eventLogger->info("Switch to %u multi trp for node %u",
9952 nodePtr.p->m_used_num_multi_trps,
9953 node_id);
9954 nodePtr.p->m_is_preparing_switch_trp = false;
9955 nodePtr.p->m_is_ready_to_switch_trp = false;
9956 nodePtr.p->m_is_multi_trp_setup = false;
9957 /**
9958 * We have now reached the point where it is time to switch the transporter
9959 * from using the old transporters, currently in the active transporter set.
9960 *
9961 * The switch must be made such that we don't risk changing signal order
9962 * for signals sent from one thread to another thread in another node.
9963 *
9964 * To accomplish this we will ensure that all block threads are blocked
9965 * in THRMAN. THRMAN exists in each block thread. So a signal to THRMAN
9966 * in each THRMAN can be used to quickly synchronize all threads in the
9967 * node and keep them waiting in THRMAN. When all threads have stopped we
9968 * will also call lockMultiTransporters to avoid the connect threads from
9969 * interfering in the middle of this change and finally we will lock
9970 * the send mutex on the node we are changing to ensure that also the
9971 * send threads avoid interference with this process.
9972 *
9973 * At this point also each thread will have flushed the send buffers to
9974 * ensure that we can ensure that the last signal sent in the node
9975 * connection is a ACTIVATE_TRP_REQ signal. When the receiver gets this
9976 * signal he can activate the receiving from the new transporters since
9977 * we have ensured that no more signals will be received on the old
9978 * transporters.
9979 *
9980 * When all this things have been prepared and the ACTIVATE_TRP_REQ signal
9981 * is sent, now is the time to switch the active transporters and also
9982 * to change the MultiTransporter to use the new hash algorithm, this
9983 * is automatic by changing the number of transporters.
9984 *
9985 * We close the original socket when ACTIVATE_TRP_CONF is received from
9986 * the other side indicating that we are now in communication with the
9987 * other side over the new transporters.
9988 */
9989 FreezeThreadReq* req = CAST_PTR(FreezeThreadReq, signal->getDataPtrSend());
9990 req->nodeId = node_id;
9991 req->senderRef = reference();
9992 sendSignal(THRMAN_REF, GSN_FREEZE_THREAD_REQ, signal,
9993 FreezeThreadReq::SignalLength, JBA);
9994 return;
9995 }
9996
9997 void
execFREEZE_ACTION_REQ(Signal * signal)9998 Qmgr::execFREEZE_ACTION_REQ(Signal *signal)
9999 {
10000 jamEntry();
10001 FreezeActionReq *req = (FreezeActionReq*)&signal->theData[0];
10002 Uint32 node_id = req->nodeId;
10003 BlockReference ret_ref = req->senderRef;
10004 CRASH_INSERTION(956);
10005 if (ERROR_INSERTED(981))
10006 {
10007 NdbSleep_MilliSleep(1500);
10008 }
10009 /**
10010 * All threads except our thread is now frozen.
10011 *
10012 * Before we send the final signal on the current transporter we switch to
10013 * having the multi socket transporters as neighbours. By so doing we ensure
10014 * that the current transporter is inserted into the non-neighbour list when
10015 * sending the signal. If we would change after the sending we would miss
10016 * sending this signal since we change to the new neighbour setup after
10017 * sending, but before we perform the actual send.
10018 *
10019 * It is a bit tricky to change the neighbour transporters. We check the
10020 * neighbour in sendSignal and expect that in do_send that the same
10021 * neighbour handling is performed. We handle this here by first changing
10022 * the neighbour setting and next sending the signal. This ensures that
10023 * the transporter will be handled by non-neighbour handling.
10024 *
10025 * We will lock the send to
10026 * the current transporter to ensure that the transporter will notice when
10027 * the last signal have been sent. Next we will send the last signal
10028 * on the the currently active socket. When this signal is sent we will flush
10029 * the send buffers to ensure that the transporter knows when the last data
10030 * have been sent. We will then flag to the transporter that it should
10031 * shutdown the socket for writes. When both sides have performed this
10032 * action the socket will be closed.
10033 *
10034 * These actions will ensure that ACTIVATE_TRP_REQ is the last data
10035 * received on the current transporter and ensure that from now on
10036 * all sends are directed to the new set of transporters.
10037 * To ensure that no other thread is changing the multi transporter
10038 * setup we will lock the multi transporter mutex while performing
10039 * these actions. The only other thread that can be active here is
10040 * the send threads since we blocked all other threads at this point.
10041 *
10042 * Next we will release all mutexes and send FREEZE_ACTION_CONF to
10043 * THRMAN to ensure that things get started again. We will receive
10044 * FREEZE_THREAD_CONF back from THRMAN when all threads are in action
10045 * again.
10046 */
10047 DEB_MULTI_TRP(("Block threads frozen for node %u", node_id));
10048
10049 globalTransporterRegistry.lockMultiTransporters();
10050 Multi_Transporter *multi_trp =
10051 globalTransporterRegistry.get_node_multi_transporter(node_id);
10052 if (is_multi_socket_setup_active(node_id, true))
10053 {
10054 jam();
10055
10056 Transporter *current_trp = multi_trp->get_active_transporter(0);
10057 TrpId current_trp_id = current_trp->getTransporterIndex();
10058 multi_trp->get_callback_obj()->lock_send_transporter(node_id,
10059 current_trp_id);
10060
10061 Uint32 num_inactive_transporters =
10062 multi_trp->get_num_inactive_transporters();
10063 for (Uint32 i = 0; i < num_inactive_transporters; i++)
10064 {
10065 jam();
10066 Transporter *tmp_trp = multi_trp->get_inactive_transporter(i);
10067 TrpId trp_id = tmp_trp->getTransporterIndex();
10068 multi_trp->get_callback_obj()->lock_send_transporter(node_id, trp_id);
10069 }
10070
10071 ActivateTrpReq* act_trp_req = CAST_PTR(ActivateTrpReq,
10072 signal->getDataPtrSend());
10073 act_trp_req->nodeId = getOwnNodeId();
10074 act_trp_req->numTrps = num_inactive_transporters;
10075 act_trp_req->senderRef = reference();
10076 sendSignal(calcQmgrBlockRef(node_id), GSN_ACTIVATE_TRP_REQ, signal,
10077 ActivateTrpReq::SignalLength, JBB);
10078
10079 flush_send_buffers();
10080 /* Either perform send or insert_trp below TODO */
10081 multi_trp->get_callback_obj()->unlock_send_transporter(node_id,
10082 current_trp_id);
10083
10084 if (ERROR_INSERTED(982))
10085 {
10086 NdbSleep_MilliSleep(2500);
10087 }
10088 multi_trp->switch_active_trp();
10089
10090 Uint32 num_active_transporters =
10091 multi_trp->get_num_active_transporters();
10092 for (Uint32 i = 0; i < num_active_transporters; i++)
10093 {
10094 jam();
10095 Transporter *tmp_trp = multi_trp->get_active_transporter(i);
10096 TrpId id = tmp_trp->getTransporterIndex();
10097 multi_trp->get_callback_obj()->unlock_send_transporter(node_id, id);
10098 multi_trp->get_callback_obj()->enable_send_buffer(node_id, id);
10099 }
10100 globalTransporterRegistry.insert_node_transporter(node_id, multi_trp);
10101 globalTransporterRegistry.unlockMultiTransporters();
10102
10103 if (ERROR_INSERTED(983))
10104 {
10105 NdbSleep_MilliSleep(2500);
10106 }
10107 DEB_MULTI_TRP(("Change neighbour node setup for node %u",
10108 node_id));
10109 startChangeNeighbourNode();
10110 setNeighbourNode(node_id);
10111 endChangeNeighbourNode();
10112
10113 if (ERROR_INSERTED(984))
10114 {
10115 NdbSleep_MilliSleep(2500);
10116 }
10117 DEB_MULTI_TRP(("Now communication is active with node %u using multi trp"
10118 ", using %u transporters",
10119 node_id,
10120 num_active_transporters));
10121 }
10122 else
10123 {
10124 jam();
10125 DEB_MULTI_TRP(("Node %u failed when freezing threads", node_id));
10126 globalTransporterRegistry.unlockMultiTransporters();
10127 }
10128 FreezeActionConf *conf =
10129 CAST_PTR(FreezeActionConf, signal->getDataPtrSend());
10130 conf->nodeId = node_id;
10131 sendSignal(ret_ref, GSN_FREEZE_ACTION_CONF, signal,
10132 FreezeActionConf::SignalLength, JBA);
10133 }
10134
10135 bool
is_multi_socket_setup_active(Uint32 node_id,bool locked)10136 Qmgr::is_multi_socket_setup_active(Uint32 node_id, bool locked)
10137 {
10138 bool ret_val = false;
10139 NodeRecPtr nodePtr;
10140 nodePtr.i = node_id;
10141 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
10142 if (!locked)
10143 {
10144 globalTransporterRegistry.lockMultiTransporters();
10145 }
10146 if (c_connectedNodes.get(node_id) &&
10147 nodePtr.p->phase == ZRUNNING)
10148 {
10149 jam();
10150 DEB_MULTI_TRP(("Multi socket setup for node %u is active",
10151 node_id));
10152 ret_val = true;
10153 }
10154 if (!locked)
10155 {
10156 globalTransporterRegistry.unlockMultiTransporters();
10157 }
10158 return ret_val;
10159 }
10160
10161 void
execFREEZE_THREAD_CONF(Signal * signal)10162 Qmgr::execFREEZE_THREAD_CONF(Signal *signal)
10163 {
10164 FreezeThreadConf *conf = (FreezeThreadConf*)&signal->theData[0];
10165 Uint32 node_id = conf->nodeId;
10166 NodeRecPtr nodePtr;
10167 nodePtr.i = node_id;
10168 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
10169 CRASH_INSERTION(957);
10170 if (is_multi_socket_setup_active(node_id, false))
10171 {
10172 jam();
10173 nodePtr.p->m_is_freeze_thread_completed = true;
10174 DEB_MULTI_TRP(("Freeze block threads for node %u completed", node_id));
10175 if (ERROR_INSERTED(985))
10176 {
10177 NdbSleep_MilliSleep(1500);
10178 }
10179 check_switch_completed(signal, node_id);
10180 }
10181 else
10182 {
10183 jam();
10184 DEB_MULTI_TRP(("2:Node %u failed when freezing threads", node_id));
10185 }
10186 }
10187
10188 void
execACTIVATE_TRP_REQ(Signal * signal)10189 Qmgr::execACTIVATE_TRP_REQ(Signal *signal)
10190 {
10191 /**
10192 * Receiving this signal implies that node sending it is still
10193 * seen as being up and running.
10194 */
10195 jamEntry();
10196 CRASH_INSERTION(958);
10197 ActivateTrpReq* req = (ActivateTrpReq*)&signal->theData[0];
10198 Uint32 node_id = req->nodeId;
10199 Uint32 num_trps = req->numTrps;
10200 NodeRecPtr nodePtr;
10201 nodePtr.i = node_id;
10202 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
10203 nodePtr.p->m_multi_trp_blockref = req->senderRef;
10204 nodePtr.p->m_num_activated_trps = num_trps;
10205 ndbrequire(num_trps == nodePtr.p->m_used_num_multi_trps);
10206
10207 if (ERROR_INSERTED(977))
10208 {
10209 NdbSleep_MilliSleep(1500);
10210 }
10211 SyncThreadViaReqConf *syncReq =
10212 (SyncThreadViaReqConf*)signal->getDataPtrSend();
10213 syncReq->senderRef = reference();
10214 syncReq->senderData = node_id;
10215 syncReq->actionType = SyncThreadViaReqConf::FOR_ACTIVATE_TRP_REQ;
10216 sendSignal(TRPMAN_REF, GSN_SYNC_THREAD_VIA_REQ, signal,
10217 SyncThreadViaReqConf::SignalLength, JBA);
10218 }
10219
10220 void
handle_activate_trp_req(Signal * signal,Uint32 node_id)10221 Qmgr::handle_activate_trp_req(Signal *signal, Uint32 node_id)
10222 {
10223 jam();
10224 NodeRecPtr nodePtr;
10225 nodePtr.i = node_id;
10226 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
10227 Uint32 num_trps = nodePtr.p->m_num_activated_trps;
10228 CRASH_INSERTION(959);
10229 nodePtr.p->m_num_activated_trps = 0;
10230 DEB_MULTI_TRP(("Activate receive in multi trp for node %u, from ref: %x",
10231 node_id,
10232 nodePtr.p->m_multi_trp_blockref));
10233 globalTransporterRegistry.lockMultiTransporters();
10234 Multi_Transporter *multi_trp =
10235 globalTransporterRegistry.get_node_multi_transporter(node_id);
10236 if (is_multi_socket_setup_active(node_id, true))
10237 {
10238 jam();
10239 Transporter *t;
10240 for (Uint32 i = 0; i < num_trps; i++)
10241 {
10242 if (multi_trp->get_num_inactive_transporters() == num_trps)
10243 {
10244 jam();
10245 t = multi_trp->get_inactive_transporter(i);
10246 }
10247 else
10248 {
10249 jam();
10250 t = multi_trp->get_active_transporter(i);
10251 ndbrequire(multi_trp->get_num_active_transporters());
10252 }
10253 Uint32 trp_id = t->getTransporterIndex();
10254 ActivateTrpReq *act_trp_req =
10255 CAST_PTR(ActivateTrpReq, signal->getDataPtrSend());
10256 act_trp_req->nodeId = node_id;
10257 act_trp_req->trpId = trp_id;
10258 act_trp_req->numTrps = num_trps;
10259 act_trp_req->senderRef = reference();
10260 sendSignal(TRPMAN_REF, GSN_ACTIVATE_TRP_REQ, signal,
10261 ActivateTrpReq::SignalLength, JBB);
10262 if (ERROR_INSERTED(986))
10263 {
10264 NdbSleep_MilliSleep(500);
10265 }
10266 }
10267 }
10268 globalTransporterRegistry.unlockMultiTransporters();
10269 }
10270
10271 void
execACTIVATE_TRP_CONF(Signal * signal)10272 Qmgr::execACTIVATE_TRP_CONF(Signal *signal)
10273 {
10274 jamEntry();
10275 ActivateTrpConf *conf = (ActivateTrpConf*)&signal->theData[0];
10276 Uint32 node_id = conf->nodeId;
10277 BlockReference sender_ref = conf->senderRef;
10278 NodeRecPtr nodePtr;
10279 nodePtr.i = node_id;
10280 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
10281
10282 DEB_MULTI_TRP(("ACTIVATE_TRP_CONF(QMGR) own node %u about node %u"
10283 ", ref: %x",
10284 getOwnNodeId(),
10285 node_id,
10286 sender_ref));
10287 if (refToNode(sender_ref) == getOwnNodeId())
10288 {
10289 if (is_multi_socket_setup_active(node_id, false))
10290 {
10291 jam();
10292 CRASH_INSERTION(960);
10293 nodePtr.p->m_num_activated_trps++;
10294 if (nodePtr.p->m_num_activated_trps < nodePtr.p->m_used_num_multi_trps)
10295 {
10296 jam();
10297 return;
10298 }
10299 DEB_MULTI_TRP(("Complete activation recv for multi trp node %u,"
10300 " own node: %u",
10301 node_id,
10302 getOwnNodeId()));
10303 ndbrequire(nodePtr.p->m_num_activated_trps ==
10304 nodePtr.p->m_used_num_multi_trps);
10305 ActivateTrpConf *conf =
10306 CAST_PTR(ActivateTrpConf, signal->getDataPtrSend());
10307 conf->nodeId = getOwnNodeId();
10308 conf->senderRef = reference();
10309 BlockReference ref = nodePtr.p->m_multi_trp_blockref;
10310 nodePtr.p->m_multi_trp_blockref = 0;
10311 ndbrequire(refToNode(ref) == node_id);
10312 ndbrequire(refToMain(ref) == QMGR);
10313 sendSignal(ref, GSN_ACTIVATE_TRP_CONF, signal,
10314 ActivateTrpConf::SignalLength, JBB);
10315 nodePtr.p->m_is_activate_trp_ready_for_me = true;
10316 if (ERROR_INSERTED(975))
10317 {
10318 NdbSleep_MilliSleep(1500);
10319 }
10320 check_switch_completed(signal, node_id);
10321 }
10322 else
10323 {
10324 jam();
10325 DEB_MULTI_TRP(("Node %u failed in multi trp activation", node_id));
10326 }
10327 }
10328 else
10329 {
10330 jam();
10331 CRASH_INSERTION(952);
10332 DEB_MULTI_TRP(("Completed activation recv for multi trp node %u",
10333 node_id));
10334 ndbrequire(is_multi_socket_setup_active(node_id, false));
10335 nodePtr.p->m_is_activate_trp_ready_for_other = true;
10336 check_switch_completed(signal, node_id);
10337 }
10338 }
10339
10340 void
check_switch_completed(Signal * signal,NodeId node_id)10341 Qmgr::check_switch_completed(Signal *signal, NodeId node_id)
10342 {
10343 NodeRecPtr nodePtr;
10344 nodePtr.i = node_id;
10345 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
10346 if (!(nodePtr.p->m_is_activate_trp_ready_for_other &&
10347 nodePtr.p->m_is_activate_trp_ready_for_me &&
10348 nodePtr.p->m_is_freeze_thread_completed))
10349 {
10350 jam();
10351 DEB_MULTI_TRP(("Still waiting for node %u switch to complete", node_id));
10352 return;
10353 }
10354
10355 globalTransporterRegistry.lockMultiTransporters();
10356 Multi_Transporter *multi_trp =
10357 globalTransporterRegistry.get_node_multi_transporter(node_id);
10358 ndbrequire(multi_trp && multi_trp->isMultiTransporter());
10359 Uint32 num_inactive_transporters =
10360 multi_trp->get_num_inactive_transporters();
10361 Transporter *array_trp[MAX_NODE_GROUP_TRANSPORTERS];
10362 for (Uint32 i = 0; i < num_inactive_transporters; i++)
10363 {
10364 jam();
10365 Transporter *tmp_trp = multi_trp->get_inactive_transporter(i);
10366 array_trp[i] = tmp_trp;
10367 }
10368 globalTransporterRegistry.unlockMultiTransporters();
10369 for (Uint32 i = 0; i < num_inactive_transporters; i++)
10370 {
10371 jam();
10372 Transporter *tmp_trp = array_trp[i];
10373 TrpId trp_id = tmp_trp->getTransporterIndex();
10374 tmp_trp->get_callback_obj()->lock_transporter(node_id, trp_id);
10375 tmp_trp->shutdown();
10376 tmp_trp->get_callback_obj()->unlock_transporter(node_id, trp_id);
10377 multi_trp->get_callback_obj()->disable_send_buffer(node_id, trp_id);
10378 }
10379 /**
10380 * We have now completed the switch to new set of transporters, the
10381 * old set is inactive and will be put back if the node fails. We
10382 * are now ready to see if any more nodes require attention.
10383 */
10384 if (ERROR_INSERTED(976))
10385 {
10386 NdbSleep_MilliSleep(1500);
10387 }
10388 m_current_switch_multi_trp_node = 0;
10389 nodePtr.p->m_is_using_multi_trp = true;
10390 nodePtr.p->m_is_ready_to_switch_trp = false;
10391 nodePtr.p->m_is_activate_trp_ready_for_me = false;
10392 nodePtr.p->m_is_activate_trp_ready_for_other = false;
10393 nodePtr.p->m_is_freeze_thread_completed = false;
10394 nodePtr.p->m_set_up_multi_trp_started = false;
10395 DEB_MULTI_TRP(("Completed switch to multi trp for node %u", node_id));
10396 CRASH_INSERTION(953);
10397 check_more_trp_switch_nodes(signal);
10398 }
10399
10400 void
check_more_trp_switch_nodes(Signal * signal)10401 Qmgr::check_more_trp_switch_nodes(Signal* signal)
10402 {
10403 if (!check_all_multi_trp_nodes_connected())
10404 {
10405 jam();
10406 /* Still waiting for nodes to complete connect */
10407 DEB_MULTI_TRP(("Still waiting for nodes to complete connect"));
10408 return;
10409 }
10410 NodeId node_id = 0;
10411 if (select_node_id_for_switch(node_id, false))
10412 {
10413 jam();
10414 send_switch_multi_transporter(signal, node_id, false);
10415 return;
10416 }
10417 if (m_initial_set_up_multi_trp_done)
10418 {
10419 jam();
10420 DEB_MULTI_TRP(("Initial setup already done"));
10421 return;
10422 }
10423 if (m_get_num_multi_trps_sent != 0)
10424 {
10425 jam();
10426 DEB_MULTI_TRP(("Still waiting for GET_NUM_MULTI_TRP_REQ"));
10427 return;
10428 }
10429 bool done = true;
10430 for (Uint32 node_id = 1; node_id < MAX_NDB_NODES; node_id++)
10431 {
10432 NodeRecPtr nodePtr;
10433 nodePtr.i = node_id;
10434 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
10435 if (nodePtr.p->m_is_in_same_nodegroup &&
10436 nodePtr.p->phase == ZRUNNING &&
10437 nodePtr.p->m_set_up_multi_trp_started)
10438 {
10439 if (!nodePtr.p->m_is_using_multi_trp)
10440 {
10441 jam();
10442 done = false;
10443 }
10444 }
10445 }
10446 if (done)
10447 {
10448 jam();
10449 DEB_MULTI_TRP(("Initial setup of multi trp now done"));
10450 m_initial_set_up_multi_trp_done = true;
10451 sendSignal(m_ref_set_up_multi_trp_req,
10452 GSN_SET_UP_MULTI_TRP_CONF,
10453 signal,
10454 1,
10455 JBB);
10456 }
10457 else
10458 {
10459 DEB_MULTI_TRP(("Not done with setup of multi trp yet"));
10460 jam();
10461 }
10462 }
10463
10464 void
check_no_multi_trp(Signal * signal,NodeId node_id)10465 Qmgr::check_no_multi_trp(Signal *signal, NodeId node_id)
10466 {
10467 NodeRecPtr nodePtr;
10468 nodePtr.i = node_id;
10469 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
10470 if (nodePtr.p->m_is_get_num_multi_trp_active)
10471 {
10472 jam();
10473 dec_get_num_multi_trps_sent(nodePtr.i);
10474 }
10475 DEB_MULTI_TRP(("check_no_multi_trp for node %u", node_id));
10476 if (node_id == m_current_switch_multi_trp_node)
10477 {
10478 jam();
10479 m_current_switch_multi_trp_node = 0;
10480 check_more_trp_switch_nodes(signal);
10481 }
10482 }
10483
10484 bool
check_all_multi_trp_nodes_connected()10485 Qmgr::check_all_multi_trp_nodes_connected()
10486 {
10487 /**
10488 * Wait for all neighbour nodes to connect all multi transporters
10489 * before proceeding with the next phase where we start switching
10490 * to multi transporter setup.
10491 */
10492 NodeRecPtr nodePtr;
10493 for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++)
10494 {
10495 ptrAss(nodePtr, nodeRec);
10496 if (nodePtr.p->phase == ZRUNNING &&
10497 nodePtr.p->m_is_in_same_nodegroup &&
10498 (nodePtr.p->m_is_preparing_switch_trp ||
10499 nodePtr.p->m_is_get_num_multi_trp_active))
10500 {
10501 /* Neighbour node preparing switch */
10502 jam();
10503 jamLine(Uint16(nodePtr.i));
10504 if (!nodePtr.p->m_is_multi_trp_setup)
10505 {
10506 jam();
10507 /* Still waiting for connections of this node to complete */
10508 return false;
10509 }
10510 }
10511 }
10512 jam();
10513 /* All nodes to connect are done */
10514 return true;
10515 }
10516
10517 bool
select_node_id_for_switch(NodeId & node_id,bool check_found)10518 Qmgr::select_node_id_for_switch(NodeId &node_id, bool check_found)
10519 {
10520 NodeId max_node_id = 0;
10521 NodeRecPtr nodePtr;
10522 for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++)
10523 {
10524 ptrAss(nodePtr, nodeRec);
10525 if (nodePtr.p->phase == ZRUNNING &&
10526 nodePtr.p->m_is_in_same_nodegroup &&
10527 nodePtr.p->m_is_preparing_switch_trp &&
10528 nodePtr.p->m_is_multi_trp_setup)
10529 {
10530 if (nodePtr.i > max_node_id)
10531 {
10532 jam();
10533 jamLine(Uint16(nodePtr.i));
10534 max_node_id = nodePtr.i;
10535 }
10536 }
10537 }
10538 ndbrequire((!check_found) || (max_node_id != 0));
10539 if (m_current_switch_multi_trp_node != 0)
10540 {
10541 jam();
10542 return false;
10543 }
10544 if (max_node_id < getOwnNodeId())
10545 {
10546 jam();
10547 return false;
10548 }
10549 node_id = max_node_id;
10550 nodePtr.i = max_node_id;
10551 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
10552 ndbrequire(!nodePtr.p->m_is_ready_to_switch_trp);
10553 jam();
10554 return true;
10555 }
10556