1 /*
2    Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
3 
4    This program is free software; you can redistribute it and/or modify
5    it under the terms of the GNU General Public License, version 2.0,
6    as published by the Free Software Foundation.
7 
8    This program is also distributed with certain software (including
9    but not limited to OpenSSL) that is licensed under separate terms,
10    as designated in a particular file or component or in included license
11    documentation.  The authors of MySQL hereby grant you an additional
12    permission to link the program and your derivative works with the
13    separately licensed software that they have included with MySQL.
14 
15    This program is distributed in the hope that it will be useful,
16    but WITHOUT ANY WARRANTY; without even the implied warranty of
17    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18    GNU General Public License, version 2.0, for more details.
19 
20    You should have received a copy of the GNU General Public License
21    along with this program; if not, write to the Free Software
22    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA
23 */
24 
25 #define DBDIH_C
26 #include <ndb_global.h>
27 #include <ndb_limits.h>
28 #include <ndb_version.h>
29 #include <NdbOut.hpp>
30 
31 #include "Dbdih.hpp"
32 #include "Configuration.hpp"
33 
34 #include <signaldata/BlockCommitOrd.hpp>
35 #include <signaldata/CheckNodeGroups.hpp>
36 #include <signaldata/CopyActive.hpp>
37 #include <signaldata/CopyFrag.hpp>
38 #include <signaldata/CopyGCIReq.hpp>
39 #include <signaldata/DiAddTab.hpp>
40 #include <signaldata/DictStart.hpp>
41 #include <signaldata/DiGetNodes.hpp>
42 #include <signaldata/DihContinueB.hpp>
43 #include <signaldata/DihSwitchReplica.hpp>
44 #include <signaldata/DumpStateOrd.hpp>
45 #include <signaldata/EmptyLcp.hpp>
46 #include <signaldata/EventReport.hpp>
47 #include <signaldata/GCP.hpp>
48 #include <signaldata/HotSpareRep.hpp>
49 #include <signaldata/MasterGCP.hpp>
50 #include <signaldata/MasterLCP.hpp>
51 #include <signaldata/NFCompleteRep.hpp>
52 #include <signaldata/NodeFailRep.hpp>
53 #include <signaldata/ReadNodesConf.hpp>
54 #include <signaldata/StartFragReq.hpp>
55 #include <signaldata/StartInfo.hpp>
56 #include <signaldata/StartMe.hpp>
57 #include <signaldata/StartPerm.hpp>
58 #include <signaldata/StartRec.hpp>
59 #include <signaldata/StopPerm.hpp>
60 #include <signaldata/StopMe.hpp>
61 #include <signaldata/TestOrd.hpp>
62 #include <signaldata/WaitGCP.hpp>
63 #include <signaldata/DihStartTab.hpp>
64 #include <signaldata/LCP.hpp>
65 #include <signaldata/SystemError.hpp>
66 
67 #include <signaldata/TakeOver.hpp>
68 
69 #include <signaldata/DropTab.hpp>
70 #include <signaldata/AlterTab.hpp>
71 #include <signaldata/AlterTable.hpp>
72 #include <signaldata/PrepDropTab.hpp>
73 #include <signaldata/SumaImpl.hpp>
74 #include <signaldata/DictTabInfo.hpp>
75 #include <signaldata/CreateFragmentation.hpp>
76 #include <signaldata/LqhFrag.hpp>
77 #include <signaldata/FsOpenReq.hpp>
78 #include <signaldata/DihScanTab.hpp>
79 #include <signaldata/DictLock.hpp>
80 #include <DebuggerNames.hpp>
81 #include <signaldata/Upgrade.hpp>
82 #include <NdbEnv.h>
83 #include <signaldata/CreateNodegroup.hpp>
84 #include <signaldata/CreateNodegroupImpl.hpp>
85 #include <signaldata/DropNodegroup.hpp>
86 #include <signaldata/DropNodegroupImpl.hpp>
87 #include <signaldata/DihGetTabInfo.hpp>
88 #include <SectionReader.hpp>
89 #include <signaldata/DihRestart.hpp>
90 
91 #include <EventLogger.hpp>
92 extern EventLogger * g_eventLogger;
93 
94 #define SYSFILE ((Sysfile *)&sysfileData[0])
95 #define MAX_CRASHED_REPLICAS 8
96 #define ZINIT_CREATE_GCI Uint32(0)
97 #define ZINIT_REPLICA_LAST_GCI Uint32(-1)
98 
99 #define RETURN_IF_NODE_NOT_ALIVE(node) \
100   if (!checkNodeAlive((node))) { \
101     jam(); \
102     return; \
103   } \
104 
105 #define receiveLoopMacro(sigName, receiveNodeId)\
106 {                                                \
107   c_##sigName##_Counter.clearWaitingFor(receiveNodeId); \
108   if(c_##sigName##_Counter.done() == false){     \
109      jam();                                      \
110      return;                                     \
111   }                                              \
112 }
113 
114 #define sendLoopMacro(sigName, signalRoutine, extra)                    \
115 {                                                                       \
116   c_##sigName##_Counter.clearWaitingFor();                              \
117   NodeRecordPtr specNodePtr;                                            \
118   specNodePtr.i = cfirstAliveNode;                                      \
119   do {                                                                  \
120     jam();                                                              \
121     ptrCheckGuard(specNodePtr, MAX_NDB_NODES, nodeRecord);              \
122     c_##sigName##_Counter.setWaitingFor(specNodePtr.i);                 \
123     signalRoutine(signal, specNodePtr.i, extra);                        \
124     specNodePtr.i = specNodePtr.p->nextNode;                            \
125   } while (specNodePtr.i != RNIL);                                      \
126 }
127 
128 static
129 Uint32
prevLcpNo(Uint32 lcpNo)130 prevLcpNo(Uint32 lcpNo){
131   if(lcpNo == 0)
132     return MAX_LCP_USED - 1;
133   return lcpNo - 1;
134 }
135 
136 static
137 Uint32
nextLcpNo(Uint32 lcpNo)138 nextLcpNo(Uint32 lcpNo){
139   lcpNo++;
140   if(lcpNo >= MAX_LCP_USED)
141     return 0;
142   return lcpNo;
143 }
144 
nullRoutine(Signal * signal,Uint32 nodeId,Uint32 extra)145 void Dbdih::nullRoutine(Signal* signal, Uint32 nodeId, Uint32 extra)
146 {
147 }//Dbdih::nullRoutine()
148 
sendCOPY_GCIREQ(Signal * signal,Uint32 nodeId,Uint32 extra)149 void Dbdih::sendCOPY_GCIREQ(Signal* signal, Uint32 nodeId, Uint32 extra)
150 {
151   ndbrequire(c_copyGCIMaster.m_copyReason != CopyGCIReq::IDLE);
152 
153   const BlockReference ref = calcDihBlockRef(nodeId);
154   const Uint32 wordPerSignal = CopyGCIReq::DATA_SIZE;
155   const Uint32 noOfSignals = ((Sysfile::SYSFILE_SIZE32 + (wordPerSignal - 1)) /
156 			      wordPerSignal);
157 
158   CopyGCIReq * const copyGCI = (CopyGCIReq *)&signal->theData[0];
159   copyGCI->anyData = nodeId;
160   copyGCI->copyReason = c_copyGCIMaster.m_copyReason;
161   copyGCI->startWord = 0;
162 
163   for(Uint32 i = 0; i < noOfSignals; i++) {
164     jam();
165     { // Do copy
166       const int startWord = copyGCI->startWord;
167       for(Uint32 j = 0; j < wordPerSignal; j++) {
168         copyGCI->data[j] = sysfileData[j+startWord];
169       }//for
170     }
171     sendSignal(ref, GSN_COPY_GCIREQ, signal, 25, JBB);
172     copyGCI->startWord += wordPerSignal;
173   }//for
174 }//Dbdih::sendCOPY_GCIREQ()
175 
176 
sendDIH_SWITCH_REPLICA_REQ(Signal * signal,Uint32 nodeId,Uint32 extra)177 void Dbdih::sendDIH_SWITCH_REPLICA_REQ(Signal* signal, Uint32 nodeId,
178                                        Uint32 extra)
179 {
180   const BlockReference ref    = calcDihBlockRef(nodeId);
181   sendSignal(ref, GSN_DIH_SWITCH_REPLICA_REQ, signal,
182              DihSwitchReplicaReq::SignalLength, JBB);
183 }//Dbdih::sendDIH_SWITCH_REPLICA_REQ()
184 
sendEMPTY_LCP_REQ(Signal * signal,Uint32 nodeId,Uint32 extra)185 void Dbdih::sendEMPTY_LCP_REQ(Signal* signal, Uint32 nodeId, Uint32 extra)
186 {
187   BlockReference ref = calcLqhBlockRef(nodeId);
188   sendSignal(ref, GSN_EMPTY_LCP_REQ, signal, EmptyLcpReq::SignalLength, JBB);
189 }//Dbdih::sendEMPTY_LCPREQ()
190 
sendGCP_COMMIT(Signal * signal,Uint32 nodeId,Uint32 extra)191 void Dbdih::sendGCP_COMMIT(Signal* signal, Uint32 nodeId, Uint32 extra)
192 {
193   BlockReference ref = calcDihBlockRef(nodeId);
194   GCPCommit *req = (GCPCommit*)signal->getDataPtrSend();
195   req->nodeId = cownNodeId;
196   req->gci_hi = Uint32(m_micro_gcp.m_master.m_new_gci >> 32);
197   req->gci_lo = Uint32(m_micro_gcp.m_master.m_new_gci);
198   sendSignal(ref, GSN_GCP_COMMIT, signal, GCPCommit::SignalLength, JBA);
199 
200   ndbassert(m_micro_gcp.m_enabled || Uint32(m_micro_gcp.m_new_gci) == 0);
201 }//Dbdih::sendGCP_COMMIT()
202 
sendGCP_PREPARE(Signal * signal,Uint32 nodeId,Uint32 extra)203 void Dbdih::sendGCP_PREPARE(Signal* signal, Uint32 nodeId, Uint32 extra)
204 {
205   BlockReference ref = calcDihBlockRef(nodeId);
206   GCPPrepare *req = (GCPPrepare*)signal->getDataPtrSend();
207   req->nodeId = cownNodeId;
208   req->gci_hi = Uint32(m_micro_gcp.m_master.m_new_gci >> 32);
209   req->gci_lo = Uint32(m_micro_gcp.m_master.m_new_gci);
210 
211   if (! (ERROR_INSERTED(7201) || ERROR_INSERTED(7202)))
212   {
213     sendSignal(ref, GSN_GCP_PREPARE, signal, GCPPrepare::SignalLength, JBA);
214   }
215   else if (ERROR_INSERTED(7201))
216   {
217     sendSignal(ref, GSN_GCP_PREPARE, signal, GCPPrepare::SignalLength, JBB);
218   }
219   else if (ERROR_INSERTED(7202))
220   {
221     ndbrequire(nodeId == getOwnNodeId());
222     sendSignalWithDelay(ref, GSN_GCP_PREPARE, signal, 2000,
223                         GCPPrepare::SignalLength);
224   }
225   else
226   {
227     ndbrequire(false); // should be dead code #ifndef ERROR_INSERT
228   }
229 
230   ndbassert(m_micro_gcp.m_enabled || Uint32(m_micro_gcp.m_new_gci) == 0);
231 }//Dbdih::sendGCP_PREPARE()
232 
233 void
sendSUB_GCP_COMPLETE_REP(Signal * signal,Uint32 nodeId,Uint32 extra)234 Dbdih::sendSUB_GCP_COMPLETE_REP(Signal* signal, Uint32 nodeId, Uint32 extra)
235 {
236   ndbassert(m_micro_gcp.m_enabled || Uint32(m_micro_gcp.m_new_gci) == 0);
237   if (!ndbd_dih_sub_gcp_complete_ack(getNodeInfo(nodeId).m_version))
238   {
239     jam();
240     c_SUB_GCP_COMPLETE_REP_Counter.clearWaitingFor(nodeId);
241   }
242   BlockReference ref = calcDihBlockRef(nodeId);
243   sendSignal(ref, GSN_SUB_GCP_COMPLETE_REP, signal,
244              SubGcpCompleteRep::SignalLength, JBA);
245 }
246 
sendGCP_SAVEREQ(Signal * signal,Uint32 nodeId,Uint32 extra)247 void Dbdih::sendGCP_SAVEREQ(Signal* signal, Uint32 nodeId, Uint32 extra)
248 {
249   GCPSaveReq * const saveReq = (GCPSaveReq*)&signal->theData[0];
250   BlockReference ref = calcDihBlockRef(nodeId);
251   saveReq->dihBlockRef = reference();
252   saveReq->dihPtr = nodeId;
253   saveReq->gci = m_gcp_save.m_master.m_new_gci;
254   sendSignal(ref, GSN_GCP_SAVEREQ, signal, GCPSaveReq::SignalLength, JBB);
255 }//Dbdih::sendGCP_SAVEREQ()
256 
sendINCL_NODEREQ(Signal * signal,Uint32 nodeId,Uint32 extra)257 void Dbdih::sendINCL_NODEREQ(Signal* signal, Uint32 nodeId, Uint32 extra)
258 {
259   BlockReference nodeDihRef = calcDihBlockRef(nodeId);
260   signal->theData[0] = reference();
261   signal->theData[1] = c_nodeStartMaster.startNode;
262   signal->theData[2] = c_nodeStartMaster.failNr;
263   signal->theData[3] = 0;
264   signal->theData[4] = (Uint32)(m_micro_gcp.m_current_gci >> 32);
265   signal->theData[5] = (Uint32)(m_micro_gcp.m_current_gci & 0xFFFFFFFF);
266   sendSignal(nodeDihRef, GSN_INCL_NODEREQ, signal, 6, JBA);
267 }//Dbdih::sendINCL_NODEREQ()
268 
sendMASTER_GCPREQ(Signal * signal,Uint32 nodeId,Uint32 extra)269 void Dbdih::sendMASTER_GCPREQ(Signal* signal, Uint32 nodeId, Uint32 extra)
270 {
271   BlockReference ref = calcDihBlockRef(nodeId);
272   sendSignal(ref, GSN_MASTER_GCPREQ, signal, MasterGCPReq::SignalLength, JBB);
273 }//Dbdih::sendMASTER_GCPREQ()
274 
sendMASTER_LCPREQ(Signal * signal,Uint32 nodeId,Uint32 extra)275 void Dbdih::sendMASTER_LCPREQ(Signal* signal, Uint32 nodeId, Uint32 extra)
276 {
277   BlockReference ref = calcDihBlockRef(nodeId);
278   sendSignal(ref, GSN_MASTER_LCPREQ, signal, MasterLCPReq::SignalLength, JBB);
279 }//Dbdih::sendMASTER_LCPREQ()
280 
sendSTART_INFOREQ(Signal * signal,Uint32 nodeId,Uint32 extra)281 void Dbdih::sendSTART_INFOREQ(Signal* signal, Uint32 nodeId, Uint32 extra)
282 {
283   const BlockReference ref = calcDihBlockRef(nodeId);
284   sendSignal(ref, GSN_START_INFOREQ, signal, StartInfoReq::SignalLength, JBB);
285 }//sendSTART_INFOREQ()
286 
sendSTART_RECREQ(Signal * signal,Uint32 nodeId,Uint32 extra)287 void Dbdih::sendSTART_RECREQ(Signal* signal, Uint32 nodeId, Uint32 extra)
288 {
289   if (!m_sr_nodes.get(nodeId))
290   {
291     jam();
292     c_START_RECREQ_Counter.clearWaitingFor(nodeId);
293     return;
294   }
295 
296   Uint32 keepGCI = SYSFILE->keepGCI;
297   Uint32 lastCompletedGCI = SYSFILE->lastCompletedGCI[nodeId];
298   if (keepGCI > lastCompletedGCI)
299   {
300     jam();
301     keepGCI = lastCompletedGCI;
302   }
303 
304   StartRecReq * const req = (StartRecReq*)&signal->theData[0];
305   BlockReference ref = calcLqhBlockRef(nodeId);
306   req->receivingNodeId = nodeId;
307   req->senderRef = reference();
308   req->keepGci = keepGCI;
309   req->lastCompletedGci = lastCompletedGCI;
310   req->newestGci = SYSFILE->newestRestorableGCI;
311   req->senderData = extra;
312   m_sr_nodes.copyto(NdbNodeBitmask::Size, req->sr_nodes);
313   sendSignal(ref, GSN_START_RECREQ, signal, StartRecReq::SignalLength, JBB);
314 
315   signal->theData[0] = NDB_LE_StartREDOLog;
316   signal->theData[1] = nodeId;
317   signal->theData[2] = keepGCI;
318   signal->theData[3] = lastCompletedGCI;
319   signal->theData[4] = SYSFILE->newestRestorableGCI;
320   sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 5, JBB);
321 }//Dbdih::sendSTART_RECREQ()
322 
sendSTART_TOREQ(Signal * signal,Uint32 nodeId,Uint32 extra)323 void Dbdih::sendSTART_TOREQ(Signal* signal, Uint32 nodeId, Uint32 extra)
324 {
325   BlockReference ref = calcDihBlockRef(nodeId);
326   sendSignal(ref, GSN_START_TOREQ, signal, StartToReq::SignalLength, JBB);
327 }//Dbdih::sendSTART_TOREQ()
328 
sendSTOP_ME_REQ(Signal * signal,Uint32 nodeId,Uint32 extra)329 void Dbdih::sendSTOP_ME_REQ(Signal* signal, Uint32 nodeId, Uint32 extra)
330 {
331   if (nodeId != getOwnNodeId()) {
332     jam();
333     const BlockReference ref = calcDihBlockRef(nodeId);
334     sendSignal(ref, GSN_STOP_ME_REQ, signal, StopMeReq::SignalLength, JBB);
335   }//if
336 }//Dbdih::sendSTOP_ME_REQ()
337 
sendTC_CLOPSIZEREQ(Signal * signal,Uint32 nodeId,Uint32 extra)338 void Dbdih::sendTC_CLOPSIZEREQ(Signal* signal, Uint32 nodeId, Uint32 extra)
339 {
340   BlockReference ref = calcTcBlockRef(nodeId);
341   signal->theData[0] = nodeId;
342   signal->theData[1] = reference();
343   sendSignal(ref, GSN_TC_CLOPSIZEREQ, signal, 2, JBB);
344 }//Dbdih::sendTC_CLOPSIZEREQ()
345 
sendTCGETOPSIZEREQ(Signal * signal,Uint32 nodeId,Uint32 extra)346 void Dbdih::sendTCGETOPSIZEREQ(Signal* signal, Uint32 nodeId, Uint32 extra)
347 {
348   BlockReference ref = calcTcBlockRef(nodeId);
349   signal->theData[0] = nodeId;
350   signal->theData[1] = reference();
351   sendSignal(ref, GSN_TCGETOPSIZEREQ, signal, 2, JBB);
352 }//Dbdih::sendTCGETOPSIZEREQ()
353 
sendUPDATE_TOREQ(Signal * signal,Uint32 nodeId,Uint32 extra)354 void Dbdih::sendUPDATE_TOREQ(Signal* signal, Uint32 nodeId, Uint32 extra)
355 {
356   const BlockReference ref = calcDihBlockRef(nodeId);
357   sendSignal(ref, GSN_UPDATE_TOREQ, signal, UpdateToReq::SignalLength, JBB);
358 }//sendUPDATE_TOREQ()
359 
execCONTINUEB(Signal * signal)360 void Dbdih::execCONTINUEB(Signal* signal)
361 {
362   jamEntry();
363   switch ((DihContinueB::Type)signal->theData[0]) {
364   case DihContinueB::ZPACK_TABLE_INTO_PAGES:
365     {
366       jam();
367       Uint32 tableId = signal->theData[1];
368       packTableIntoPagesLab(signal, tableId);
369       return;
370       break;
371     }
372   case DihContinueB::ZPACK_FRAG_INTO_PAGES:
373     {
374       RWFragment wf;
375       jam();
376       wf.rwfTabPtr.i = signal->theData[1];
377       ptrCheckGuard(wf.rwfTabPtr, ctabFileSize, tabRecord);
378       wf.fragId = signal->theData[2];
379       wf.pageIndex = signal->theData[3];
380       wf.wordIndex = signal->theData[4];
381       wf.totalfragments = signal->theData[5];
382       packFragIntoPagesLab(signal, &wf);
383       return;
384       break;
385     }
386   case DihContinueB::ZREAD_PAGES_INTO_TABLE:
387     {
388       jam();
389       Uint32 tableId = signal->theData[1];
390       readPagesIntoTableLab(signal, tableId);
391       return;
392       break;
393     }
394   case DihContinueB::ZREAD_PAGES_INTO_FRAG:
395     {
396       RWFragment rf;
397       jam();
398       rf.rwfTabPtr.i = signal->theData[1];
399       ptrCheckGuard(rf.rwfTabPtr, ctabFileSize, tabRecord);
400       rf.fragId = signal->theData[2];
401       rf.pageIndex = signal->theData[3];
402       rf.wordIndex = signal->theData[4];
403       readPagesIntoFragLab(signal, &rf);
404       return;
405       break;
406     }
407   case DihContinueB::ZCOPY_TABLE:
408     {
409       jam();
410       Uint32 tableId = signal->theData[1];
411       copyTableLab(signal, tableId);
412       return;
413     }
414   case DihContinueB::ZCOPY_TABLE_NODE:
415     {
416       NodeRecordPtr nodePtr;
417       CopyTableNode ctn;
418       jam();
419       ctn.ctnTabPtr.i = signal->theData[1];
420       ptrCheckGuard(ctn.ctnTabPtr, ctabFileSize, tabRecord);
421       nodePtr.i = signal->theData[2];
422       ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
423       ctn.pageIndex = signal->theData[3];
424       ctn.wordIndex = signal->theData[4];
425       ctn.noOfWords = signal->theData[5];
426       copyTableNode(signal, &ctn, nodePtr);
427       return;
428     }
429   case DihContinueB::ZSTART_FRAGMENT:
430     {
431       jam();
432       Uint32 tableId = signal->theData[1];
433       Uint32 fragId = signal->theData[2];
434       startFragment(signal, tableId, fragId);
435       return;
436     }
437   case DihContinueB::ZCOMPLETE_RESTART:
438     jam();
439     completeRestartLab(signal);
440     return;
441   case DihContinueB::ZREAD_TABLE_FROM_PAGES:
442     {
443       TabRecordPtr tabPtr;
444       jam();
445       tabPtr.i = signal->theData[1];
446       ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
447       readTableFromPagesLab(signal, tabPtr);
448       return;
449     }
450   case DihContinueB::ZSR_PHASE2_READ_TABLE:
451     {
452       TabRecordPtr tabPtr;
453       jam();
454       tabPtr.i = signal->theData[1];
455       ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
456       srPhase2ReadTableLab(signal, tabPtr);
457       return;
458     }
459   case DihContinueB::ZCHECK_TC_COUNTER:
460     jam();
461 #ifndef NO_LCP
462     checkTcCounterLab(signal);
463 #endif
464     return;
465   case DihContinueB::ZCALCULATE_KEEP_GCI:
466     {
467       jam();
468       Uint32 tableId = signal->theData[1];
469       Uint32 fragId = signal->theData[2];
470       calculateKeepGciLab(signal, tableId, fragId);
471       return;
472     }
473   case DihContinueB::ZSTORE_NEW_LCP_ID:
474     jam();
475     storeNewLcpIdLab(signal);
476     return;
477   case DihContinueB::ZTABLE_UPDATE:
478     {
479       TabRecordPtr tabPtr;
480       jam();
481       tabPtr.i = signal->theData[1];
482       ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
483       tableUpdateLab(signal, tabPtr);
484       return;
485     }
486   case DihContinueB::ZCHECK_LCP_COMPLETED:
487     {
488       jam();
489       checkLcpCompletedLab(signal);
490       return;
491     }
492   case DihContinueB::ZINIT_LCP:
493     {
494       jam();
495       Uint32 senderRef = signal->theData[1];
496       Uint32 tableId = signal->theData[2];
497       initLcpLab(signal, senderRef, tableId);
498       return;
499     }
500   case DihContinueB::ZADD_TABLE_MASTER_PAGES:
501     {
502       TabRecordPtr tabPtr;
503       jam();
504       tabPtr.i = signal->theData[1];
505       ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
506       tabPtr.p->tabUpdateState = TabRecord::US_ADD_TABLE_MASTER;
507       tableUpdateLab(signal, tabPtr);
508       return;
509       break;
510     }
511   case DihContinueB::ZDIH_ADD_TABLE_MASTER:
512     {
513       jam();
514       addTable_closeConf(signal, signal->theData[1]);
515       return;
516     }
517   case DihContinueB::ZADD_TABLE_SLAVE_PAGES:
518     {
519       TabRecordPtr tabPtr;
520       jam();
521       tabPtr.i = signal->theData[1];
522       ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
523       tabPtr.p->tabUpdateState = TabRecord::US_ADD_TABLE_SLAVE;
524       tableUpdateLab(signal, tabPtr);
525       return;
526     }
527   case DihContinueB::ZDIH_ADD_TABLE_SLAVE:
528     {
529       ndbrequire(false);
530       return;
531     }
532   case DihContinueB::ZSTART_GCP:
533     jam();
534 #ifndef NO_GCP
535     startGcpLab(signal, signal->theData[1]);
536 #endif
537     return;
538     break;
539   case DihContinueB::ZCOPY_GCI:{
540     jam();
541     CopyGCIReq::CopyReason reason = (CopyGCIReq::CopyReason)signal->theData[1];
542     ndbrequire(c_copyGCIMaster.m_copyReason == reason);
543 
544     // set to idle, to be able to reuse method
545     c_copyGCIMaster.m_copyReason = CopyGCIReq::IDLE;
546     copyGciLab(signal, reason);
547     return;
548   }
549     break;
550   case DihContinueB::ZEMPTY_VERIFY_QUEUE:
551     jam();
552     emptyverificbuffer(signal, signal->theData[1], true);
553     return;
554     break;
555   case DihContinueB::ZCHECK_GCP_STOP:
556     jam();
557 #ifndef NO_GCP
558     checkGcpStopLab(signal);
559 #endif
560     return;
561     break;
562   case DihContinueB::ZREMOVE_NODE_FROM_TABLE:
563     {
564       jam();
565       Uint32 nodeId = signal->theData[1];
566       Uint32 tableId = signal->theData[2];
567       removeNodeFromTables(signal, nodeId, tableId);
568       return;
569     }
570   case DihContinueB::ZCOPY_NODE:
571     {
572       jam();
573       Uint32 tableId = signal->theData[1];
574       copyNodeLab(signal, tableId);
575       return;
576     }
577   case DihContinueB::ZTO_START_COPY_FRAG:
578     {
579       jam();
580       Uint32 takeOverPtrI = signal->theData[1];
581       startNextCopyFragment(signal, takeOverPtrI);
582       return;
583     }
584   case DihContinueB::ZINVALIDATE_NODE_LCP:
585     {
586       jam();
587       const Uint32 nodeId = signal->theData[1];
588       const Uint32 tableId = signal->theData[2];
589       invalidateNodeLCP(signal, nodeId, tableId);
590       return;
591     }
592   case DihContinueB::ZINITIALISE_RECORDS:
593     jam();
594     initialiseRecordsLab(signal,
595 			 signal->theData[1],
596 			 signal->theData[2],
597 			 signal->theData[3]);
598     return;
599     break;
600   case DihContinueB::ZSTART_PERMREQ_AGAIN:
601     jam();
602     nodeRestartPh2Lab2(signal);
603     return;
604     break;
605   case DihContinueB::SwitchReplica:
606     {
607       jam();
608       const Uint32 nodeId = signal->theData[1];
609       const Uint32 tableId = signal->theData[2];
610       const Uint32 fragNo = signal->theData[3];
611       switchReplica(signal, nodeId, tableId, fragNo);
612       return;
613     }
614   case DihContinueB::ZSEND_ADD_FRAG:
615     {
616       jam();
617       Uint32 takeOverPtrI = signal->theData[1];
618       toCopyFragLab(signal, takeOverPtrI);
619       return;
620     }
621   case DihContinueB::ZSEND_START_TO:
622     {
623       jam();
624       Ptr<TakeOverRecord> takeOverPtr;
625       c_takeOverPool.getPtr(takeOverPtr, signal->theData[1]);
626       sendStartTo(signal, takeOverPtr);
627       return;
628     }
629   case DihContinueB::ZSEND_UPDATE_TO:
630     {
631       jam();
632       Ptr<TakeOverRecord> takeOverPtr;
633       c_takeOverPool.getPtr(takeOverPtr, signal->theData[1]);
634       sendUpdateTo(signal, takeOverPtr);
635       return;
636     }
637   case DihContinueB::WAIT_DROP_TAB_WRITING_TO_FILE:{
638     jam();
639     TabRecordPtr tabPtr;
640     tabPtr.i = signal->theData[1];
641     ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
642     waitDropTabWritingToFile(signal, tabPtr);
643     return;
644   }
645   case DihContinueB::ZTO_START_FRAGMENTS:
646   {
647     TakeOverRecordPtr takeOverPtr;
648     c_takeOverPool.getPtr(takeOverPtr, signal->theData[1]);
649     nr_start_fragments(signal, takeOverPtr);
650     return;
651   }
652   case DihContinueB::ZCOPY_NODE_WAIT_CREATE_FRAG:
653   {
654     jam();
655     lcpBlockedLab(signal, true, signal->theData[1]);
656     return;
657   }
658   case DihContinueB::ZWAIT_OLD_SCAN:
659   {
660     jam();
661     wait_old_scan(signal);
662     return;
663   }
664   case DihContinueB::ZLCP_TRY_LOCK:
665   {
666     jam();
667     Mutex mutex(signal, c_mutexMgr, c_fragmentInfoMutex_lcp);
668     Callback c = { safe_cast(&Dbdih::lcpFragmentMutex_locked),
669                    signal->theData[1] };
670     ndbrequire(mutex.trylock(c, false));
671     return;
672   }
673   case DihContinueB::ZDELAY_RELEASE_FRAGMENT_INFO_MUTEX:
674   {
675     jam();
676     MutexHandle2<DIH_FRAGMENT_INFO> mh;
677     mh.setHandle(signal->theData[1]);
678     Mutex mutex(signal, c_mutexMgr, mh);
679     mutex.unlock();
680     return;
681   }
682   case DihContinueB::ZTO_START_LOGGING:
683   {
684     jam();
685     TakeOverRecordPtr takeOverPtr;
686     c_takeOverPool.getPtr(takeOverPtr, signal->theData[1]);
687     nr_start_logging(signal, takeOverPtr);
688     return;
689   }
690   case DihContinueB::ZGET_TABINFO:
691   {
692     jam();
693     getTabInfo(signal);
694     return;
695   }
696   case DihContinueB::ZGET_TABINFO_SEND:
697   {
698     jam();
699     TabRecordPtr tabPtr;
700     jam();
701     tabPtr.i = signal->theData[1];
702     ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
703     getTabInfo_send(signal, tabPtr);
704     return;
705   }
706   }
707 
708   ndbrequire(false);
709   return;
710 }//Dbdih::execCONTINUEB()
711 
execCOPY_GCIREQ(Signal * signal)712 void Dbdih::execCOPY_GCIREQ(Signal* signal)
713 {
714   CopyGCIReq * const copyGCI = (CopyGCIReq *)&signal->theData[0];
715   jamEntry();
716   CopyGCIReq::CopyReason reason = (CopyGCIReq::CopyReason)copyGCI->copyReason;
717   const Uint32 tstart = copyGCI->startWord;
718 
719   ndbrequire(cmasterdihref == signal->senderBlockRef()) ;
720   ndbrequire((reason == CopyGCIReq::GLOBAL_CHECKPOINT &&
721               c_copyGCISlave.m_copyReason == CopyGCIReq::GLOBAL_CHECKPOINT) ||
722              c_copyGCISlave.m_copyReason == CopyGCIReq::IDLE);
723   ndbrequire(c_copyGCISlave.m_expectedNextWord == tstart);
724   ndbrequire(reason != CopyGCIReq::IDLE);
725   bool isdone = (tstart + CopyGCIReq::DATA_SIZE) >= Sysfile::SYSFILE_SIZE32;
726 
727   if (ERROR_INSERTED(7177))
728   {
729     jam();
730 
731     if (signal->getLength() == 3)
732     {
733       jam();
734       goto done;
735     }
736   }
737 
738   arrGuard(tstart + CopyGCIReq::DATA_SIZE, sizeof(sysfileData)/4);
739   for(Uint32 i = 0; i<CopyGCIReq::DATA_SIZE; i++)
740     cdata[tstart+i] = copyGCI->data[i];
741 
742   if (ERROR_INSERTED(7177) && isMaster() && isdone)
743   {
744     sendSignalWithDelay(reference(), GSN_COPY_GCIREQ, signal, 1000, 3);
745     return;
746   }
747 
748 done:
749   if (isdone)
750   {
751     jam();
752     c_copyGCISlave.m_expectedNextWord = 0;
753   }
754   else
755   {
756     jam();
757     c_copyGCISlave.m_expectedNextWord += CopyGCIReq::DATA_SIZE;
758     return;
759   }
760 
761   if (cmasterdihref != reference())
762   {
763     jam();
764     Uint32 tmp= SYSFILE->m_restart_seq;
765     memcpy(sysfileData, cdata, sizeof(sysfileData));
766     SYSFILE->m_restart_seq = tmp;
767 
768     if (c_set_initial_start_flag)
769     {
770       jam();
771       Sysfile::setInitialStartOngoing(SYSFILE->systemRestartBits);
772     }
773   }
774 
775   c_copyGCISlave.m_copyReason = reason;
776   c_copyGCISlave.m_senderRef  = signal->senderBlockRef();
777   c_copyGCISlave.m_senderData = copyGCI->anyData;
778 
779   CRASH_INSERTION2(7020, reason==CopyGCIReq::LOCAL_CHECKPOINT);
780   CRASH_INSERTION2(7008, reason==CopyGCIReq::GLOBAL_CHECKPOINT);
781 
782   if (m_local_lcp_state.check_cut_log_tail(c_newest_restorable_gci))
783   {
784     jam();
785 
786 #if NOT_YET
787     LcpCompleteRep* rep = (LcpCompleteRep*)signal->getDataPtrSend();
788     rep->nodeId = getOwnNodeId();
789     rep->blockNo = 0;
790     rep->lcpId = m_local_lcp_state.m_start_lcp_req.lcpId;
791     rep->keepGci = m_local_lcp_state.m_keep_gci;
792     sendSignal(DBLQH_REF, GSN_LCP_COMPLETE_REP, signal,
793                LcpCompleteRep::SignalLength, JBB);
794 
795     warningEvent("CUT LOG TAIL: reason: %u lcp: %u m_keep_gci: %u stop: %u",
796                  reason,
797                  m_local_lcp_state.m_start_lcp_req.lcpId,
798                  m_local_lcp_state.m_keep_gci,
799                  m_local_lcp_state.m_stop_gci);
800 #endif
801     m_local_lcp_state.reset();
802   }
803 
804   /* -------------------------------------------------------------------------*/
805   /*     WE SET THE REQUESTER OF THE COPY GCI TO THE CURRENT MASTER. IF THE   */
806   /*     CURRENT MASTER WE DO NOT WANT THE NEW MASTER TO RECEIVE CONFIRM OF   */
807   /*     SOMETHING HE HAS NOT SENT. THE TAKE OVER MUST BE CAREFUL.            */
808   /* -------------------------------------------------------------------------*/
809   bool ok = false;
810   switch(reason){
811   case CopyGCIReq::IDLE:
812     ok = true;
813     jam();
814     ndbrequire(false);
815     break;
816   case CopyGCIReq::LOCAL_CHECKPOINT: {
817     ok = true;
818     jam();
819     c_lcpState.setLcpStatus(LCP_COPY_GCI, __LINE__);
820     c_lcpState.m_masterLcpDihRef = cmasterdihref;
821     setNodeActiveStatus();
822     break;
823   }
824   case CopyGCIReq::RESTART: {
825     ok = true;
826     jam();
827     Uint32 newest = SYSFILE->newestRestorableGCI;
828     m_micro_gcp.m_old_gci = Uint64(newest) << 32;
829     crestartGci = newest;
830     c_newest_restorable_gci = newest;
831     Sysfile::setRestartOngoing(SYSFILE->systemRestartBits);
832     m_micro_gcp.m_current_gci = Uint64(newest + 1) << 32;
833     setNodeActiveStatus();
834     setNodeGroups();
835     if ((Sysfile::getLCPOngoing(SYSFILE->systemRestartBits))) {
836       jam();
837       /* -------------------------------------------------------------------- */
838       //  IF THERE WAS A LOCAL CHECKPOINT ONGOING AT THE CRASH MOMENT WE WILL
839       //    INVALIDATE THAT LOCAL CHECKPOINT.
840       /* -------------------------------------------------------------------- */
841       invalidateLcpInfoAfterSr(signal);
842     }//if
843 
844     if (m_micro_gcp.m_enabled == false &&
845         m_micro_gcp.m_master.m_time_between_gcp)
846     {
847       /**
848        * Micro GCP is disabled...but configured...
849        */
850       jam();
851       m_micro_gcp.m_enabled = true;
852       UpgradeProtocolOrd * ord = (UpgradeProtocolOrd*)signal->getDataPtrSend();
853       ord->type = UpgradeProtocolOrd::UPO_ENABLE_MICRO_GCP;
854       EXECUTE_DIRECT(QMGR,GSN_UPGRADE_PROTOCOL_ORD,signal,signal->getLength());
855     }
856     break;
857   }
858   case CopyGCIReq::GLOBAL_CHECKPOINT: {
859     ok = true;
860     jam();
861 
862     if (m_gcp_save.m_state == GcpSave::GCP_SAVE_COPY_GCI)
863     {
864       jam();
865       /**
866        * This must be master take over...and it already running...
867        */
868       ndbrequire(c_newest_restorable_gci == SYSFILE->newestRestorableGCI);
869       m_gcp_save.m_master_ref = c_copyGCISlave.m_senderRef;
870       return;
871     }
872 
873     if (c_newest_restorable_gci == SYSFILE->newestRestorableGCI)
874     {
875       jam();
876 
877       /**
878        * This must be master take over...and it already complete...
879        */
880       m_gcp_save.m_master_ref = c_copyGCISlave.m_senderRef;
881       c_copyGCISlave.m_copyReason = CopyGCIReq::IDLE;
882       signal->theData[0] = c_copyGCISlave.m_senderData;
883       sendSignal(m_gcp_save.m_master_ref, GSN_COPY_GCICONF, signal, 1, JBB);
884       return;
885     }
886 
887     ndbrequire(m_gcp_save.m_state == GcpSave::GCP_SAVE_CONF);
888     m_gcp_save.m_state = GcpSave::GCP_SAVE_COPY_GCI;
889     m_gcp_save.m_master_ref = c_copyGCISlave.m_senderRef;
890     c_newest_restorable_gci = SYSFILE->newestRestorableGCI;
891     setNodeActiveStatus();
892     break;
893   }//if
894   case CopyGCIReq::INITIAL_START_COMPLETED:
895     ok = true;
896     jam();
897     break;
898   case CopyGCIReq::RESTART_NR:
899     jam();
900     setNodeGroups();
901     /**
902      * We dont really need to make anything durable here...skip it
903      */
904     c_copyGCISlave.m_copyReason = CopyGCIReq::IDLE;
905     signal->theData[0] = c_copyGCISlave.m_senderData;
906     sendSignal(c_copyGCISlave.m_senderRef, GSN_COPY_GCICONF, signal, 1, JBB);
907     return;
908   }
909   ndbrequire(ok);
910 
911   CRASH_INSERTION(7183);
912 
913   if (ERROR_INSERTED(7185) && reason==CopyGCIReq::GLOBAL_CHECKPOINT)
914   {
915     jam();
916     return;
917   }
918 #ifdef GCP_TIMER_HACK
919   if (reason == CopyGCIReq::GLOBAL_CHECKPOINT) {
920     jam();
921     NdbTick_getMicroTimer(&globalData.gcp_timer_copygci[0]);
922   }
923 #endif
924 
925   /* ----------------------------------------------------------------------- */
926   /*     WE START BY TRYING TO OPEN THE FIRST RESTORABLE GCI FILE.           */
927   /* ----------------------------------------------------------------------- */
928   FileRecordPtr filePtr;
929   filePtr.i = crestartInfoFile[0];
930   ptrCheckGuard(filePtr, cfileFileSize, fileRecord);
931   if (filePtr.p->fileStatus == FileRecord::OPEN) {
932     jam();
933     openingCopyGciSkipInitLab(signal, filePtr);
934     return;
935   }//if
936   openFileRw(signal, filePtr);
937   filePtr.p->reqStatus = FileRecord::OPENING_COPY_GCI;
938   return;
939 }//Dbdih::execCOPY_GCIREQ()
940 
execDICTSTARTCONF(Signal * signal)941 void Dbdih::execDICTSTARTCONF(Signal* signal)
942 {
943   jamEntry();
944   Uint32 nodeId = refToNode(signal->getSendersBlockRef());
945   if (nodeId != getOwnNodeId()) {
946     jam();
947     nodeDictStartConfLab(signal);
948   } else {
949     jam();
950     dictStartConfLab(signal);
951   }//if
952 }//Dbdih::execDICTSTARTCONF()
953 
execFSCLOSECONF(Signal * signal)954 void Dbdih::execFSCLOSECONF(Signal* signal)
955 {
956   FileRecordPtr filePtr;
957   jamEntry();
958   filePtr.i = signal->theData[0];
959   ptrCheckGuard(filePtr, cfileFileSize, fileRecord);
960   filePtr.p->fileStatus = FileRecord::CLOSED;
961   FileRecord::ReqStatus status = filePtr.p->reqStatus;
962   filePtr.p->reqStatus = FileRecord::IDLE;
963   switch (status) {
964   case FileRecord::CLOSING_GCP:
965     jam();
966     closingGcpLab(signal, filePtr);
967     break;
968   case FileRecord::CLOSING_GCP_CRASH:
969     jam();
970     closingGcpCrashLab(signal, filePtr);
971     break;
972   case FileRecord::CLOSING_TABLE_CRASH:
973     jam();
974     closingTableCrashLab(signal, filePtr);
975     break;
976   case FileRecord::CLOSING_TABLE_SR:
977     jam();
978     closingTableSrLab(signal, filePtr);
979     break;
980   case FileRecord::TABLE_CLOSE:
981     jam();
982     tableCloseLab(signal, filePtr);
983     break;
984   case FileRecord::TABLE_CLOSE_DELETE:
985     jam();
986     tableDeleteLab(signal, filePtr);
987     break;
988   default:
989     ndbrequire(false);
990     break;
991   }//switch
992   return;
993 }//Dbdih::execFSCLOSECONF()
994 
execFSCLOSEREF(Signal * signal)995 void Dbdih::execFSCLOSEREF(Signal* signal)
996 {
997   FileRecordPtr filePtr;
998   jamEntry();
999   filePtr.i = signal->theData[0];
1000   ptrCheckGuard(filePtr, cfileFileSize, fileRecord);
1001   FileRecord::ReqStatus status = filePtr.p->reqStatus;
1002   filePtr.p->reqStatus = FileRecord::IDLE;
1003   switch (status) {
1004   case FileRecord::CLOSING_GCP:
1005     jam();
1006     break;
1007   case FileRecord::CLOSING_GCP_CRASH:
1008     jam();
1009     closingGcpCrashLab(signal, filePtr);
1010     return;
1011   case FileRecord::CLOSING_TABLE_CRASH:
1012     jam();
1013     closingTableCrashLab(signal, filePtr);
1014     return;
1015   case FileRecord::CLOSING_TABLE_SR:
1016     jam();
1017     break;
1018   case FileRecord::TABLE_CLOSE:
1019     jam();
1020     break;
1021   case FileRecord::TABLE_CLOSE_DELETE:
1022     jam();
1023     break;
1024   default:
1025     jam();
1026     break;
1027 
1028   }//switch
1029   {
1030     char msg[100];
1031     sprintf(msg, "File system close failed during FileRecord status %d", (Uint32)status);
1032     fsRefError(signal,__LINE__,msg);
1033   }
1034   return;
1035 }//Dbdih::execFSCLOSEREF()
1036 
execFSOPENCONF(Signal * signal)1037 void Dbdih::execFSOPENCONF(Signal* signal)
1038 {
1039   FileRecordPtr filePtr;
1040   jamEntry();
1041   filePtr.i = signal->theData[0];
1042   ptrCheckGuard(filePtr, cfileFileSize, fileRecord);
1043   filePtr.p->fileRef = signal->theData[1];
1044   filePtr.p->fileStatus = FileRecord::OPEN;
1045   FileRecord::ReqStatus status = filePtr.p->reqStatus;
1046   filePtr.p->reqStatus = FileRecord::IDLE;
1047   switch (status) {
1048   case FileRecord::CREATING_GCP:
1049     jam();
1050     creatingGcpLab(signal, filePtr);
1051     break;
1052   case FileRecord::OPENING_COPY_GCI:
1053     jam();
1054     openingCopyGciSkipInitLab(signal, filePtr);
1055     break;
1056   case FileRecord::CREATING_COPY_GCI:
1057     jam();
1058     openingCopyGciSkipInitLab(signal, filePtr);
1059     break;
1060   case FileRecord::OPENING_GCP:
1061     jam();
1062     openingGcpLab(signal, filePtr);
1063     break;
1064   case FileRecord::OPENING_TABLE:
1065     jam();
1066     openingTableLab(signal, filePtr);
1067     break;
1068   case FileRecord::TABLE_CREATE:
1069     jam();
1070     tableCreateLab(signal, filePtr);
1071     break;
1072   case FileRecord::TABLE_OPEN_FOR_DELETE:
1073     jam();
1074     tableOpenLab(signal, filePtr);
1075     break;
1076   default:
1077     ndbrequire(false);
1078     break;
1079   }//switch
1080   return;
1081 }//Dbdih::execFSOPENCONF()
1082 
execFSOPENREF(Signal * signal)1083 void Dbdih::execFSOPENREF(Signal* signal)
1084 {
1085   FileRecordPtr filePtr;
1086   jamEntry();
1087   filePtr.i = signal->theData[0];
1088   ptrCheckGuard(filePtr, cfileFileSize, fileRecord);
1089   FileRecord::ReqStatus status = filePtr.p->reqStatus;
1090   filePtr.p->reqStatus = FileRecord::IDLE;
1091   switch (status) {
1092   case FileRecord::CREATING_GCP:
1093     /* --------------------------------------------------------------------- */
1094     /*   WE DID NOT MANAGE TO CREATE A GLOBAL CHECKPOINT FILE. SERIOUS ERROR */
1095     /*   WHICH CAUSES A SYSTEM RESTART.                                      */
1096     /* --------------------------------------------------------------------- */
1097     jam();
1098     break;
1099   case FileRecord::OPENING_COPY_GCI:
1100     jam();
1101     openingCopyGciErrorLab(signal, filePtr);
1102     return;
1103   case FileRecord::CREATING_COPY_GCI:
1104     jam();
1105     break;
1106   case FileRecord::OPENING_GCP:
1107     jam();
1108     openingGcpErrorLab(signal, filePtr);
1109     return;
1110   case FileRecord::OPENING_TABLE:
1111     jam();
1112     openingTableErrorLab(signal, filePtr);
1113     return;
1114   case FileRecord::TABLE_CREATE:
1115     jam();
1116     break;
1117   case FileRecord::TABLE_OPEN_FOR_DELETE:
1118     jam();
1119     tableDeleteLab(signal, filePtr);
1120     return;
1121   default:
1122     jam();
1123     break;
1124   }//switch
1125   {
1126     char msg[100];
1127     sprintf(msg, "File system open failed during FileRecord status %d", (Uint32)status);
1128     fsRefError(signal,__LINE__,msg);
1129   }
1130   return;
1131 }//Dbdih::execFSOPENREF()
1132 
execFSREADCONF(Signal * signal)1133 void Dbdih::execFSREADCONF(Signal* signal)
1134 {
1135   FileRecordPtr filePtr;
1136   jamEntry();
1137   filePtr.i = signal->theData[0];
1138   ptrCheckGuard(filePtr, cfileFileSize, fileRecord);
1139   FileRecord::ReqStatus status = filePtr.p->reqStatus;
1140   filePtr.p->reqStatus = FileRecord::IDLE;
1141   switch (status) {
1142   case FileRecord::READING_GCP:
1143     jam();
1144     readingGcpLab(signal, filePtr);
1145     break;
1146   case FileRecord::READING_TABLE:
1147     jam();
1148     readingTableLab(signal, filePtr);
1149     break;
1150   default:
1151     ndbrequire(false);
1152     break;
1153   }//switch
1154   return;
1155 }//Dbdih::execFSREADCONF()
1156 
execFSREADREF(Signal * signal)1157 void Dbdih::execFSREADREF(Signal* signal)
1158 {
1159   FileRecordPtr filePtr;
1160   jamEntry();
1161   filePtr.i = signal->theData[0];
1162   ptrCheckGuard(filePtr, cfileFileSize, fileRecord);
1163   FileRecord::ReqStatus status = filePtr.p->reqStatus;
1164   filePtr.p->reqStatus = FileRecord::IDLE;
1165   switch (status) {
1166   case FileRecord::READING_GCP:
1167     jam();
1168     readingGcpErrorLab(signal, filePtr);
1169     return;
1170   case FileRecord::READING_TABLE:
1171     jam();
1172     readingTableErrorLab(signal, filePtr);
1173     return;
1174   default:
1175     break;
1176   }//switch
1177   {
1178     char msg[100];
1179     sprintf(msg, "File system read failed during FileRecord status %d", (Uint32)status);
1180     fsRefError(signal,__LINE__,msg);
1181   }
1182 }//Dbdih::execFSREADREF()
1183 
execFSWRITECONF(Signal * signal)1184 void Dbdih::execFSWRITECONF(Signal* signal)
1185 {
1186   FileRecordPtr filePtr;
1187   jamEntry();
1188   filePtr.i = signal->theData[0];
1189   ptrCheckGuard(filePtr, cfileFileSize, fileRecord);
1190   FileRecord::ReqStatus status = filePtr.p->reqStatus;
1191   filePtr.p->reqStatus = FileRecord::IDLE;
1192   switch (status) {
1193   case FileRecord::WRITING_COPY_GCI:
1194     jam();
1195     writingCopyGciLab(signal, filePtr);
1196     break;
1197   case FileRecord::WRITE_INIT_GCP:
1198     jam();
1199     writeInitGcpLab(signal, filePtr);
1200     break;
1201   case FileRecord::TABLE_WRITE:
1202     jam();
1203     tableWriteLab(signal, filePtr);
1204     break;
1205   default:
1206     ndbrequire(false);
1207     break;
1208   }//switch
1209   return;
1210 }//Dbdih::execFSWRITECONF()
1211 
execFSWRITEREF(Signal * signal)1212 void Dbdih::execFSWRITEREF(Signal* signal)
1213 {
1214   FileRecordPtr filePtr;
1215   jamEntry();
1216   filePtr.i = signal->theData[0];
1217   ptrCheckGuard(filePtr, cfileFileSize, fileRecord);
1218   FileRecord::ReqStatus status = filePtr.p->reqStatus;
1219   filePtr.p->reqStatus = FileRecord::IDLE;
1220   switch (status) {
1221   case FileRecord::WRITING_COPY_GCI:
1222     /* --------------------------------------------------------------------- */
1223     /*  EVEN CREATING THE FILE DID NOT WORK. WE WILL THEN CRASH.             */
1224     /*  ERROR IN WRITING FILE. WE WILL NOT CONTINUE FROM HERE.               */
1225     /* --------------------------------------------------------------------- */
1226     jam();
1227     break;
1228   case FileRecord::WRITE_INIT_GCP:
1229     /* --------------------------------------------------------------------- */
1230     /*   AN ERROR OCCURRED IN WRITING A GCI FILE WHICH IS A SERIOUS ERROR    */
1231     /*   THAT CAUSE A SYSTEM RESTART.                                        */
1232     /* --------------------------------------------------------------------- */
1233     jam();
1234     break;
1235   case FileRecord::TABLE_WRITE:
1236     jam();
1237     break;
1238   default:
1239     jam();
1240     break;
1241   }//switch
1242   {
1243     char msg[100];
1244     sprintf(msg, "File system write failed during FileRecord status %d", (Uint32)status);
1245     fsRefError(signal,__LINE__,msg);
1246   }
1247   return;
1248 }//Dbdih::execFSWRITEREF()
1249 
execGETGCIREQ(Signal * signal)1250 void Dbdih::execGETGCIREQ(Signal* signal)
1251 {
1252 
1253   jamEntry();
1254   Uint32 userPtr = signal->theData[0];
1255   BlockReference userRef = signal->theData[1];
1256   Uint32 type = signal->theData[2];
1257 
1258   Uint32 gci_hi = 0;
1259   Uint32 gci_lo = 0;
1260   switch(type){
1261   case 0:
1262     jam();
1263     gci_hi = SYSFILE->newestRestorableGCI;
1264     break;
1265   case 1:
1266     jam();
1267     gci_hi = Uint32(m_micro_gcp.m_current_gci >> 32);
1268     gci_lo = Uint32(m_micro_gcp.m_current_gci);
1269     break;
1270   }
1271 
1272   signal->theData[0] = userPtr;
1273   signal->theData[1] = gci_hi;
1274   signal->theData[2] = gci_lo;
1275 
1276   if (userRef)
1277   {
1278     jam();
1279     sendSignal(userRef, GSN_GETGCICONF, signal, 3, JBB);
1280   }
1281   else
1282   {
1283     jam();
1284     // Execute direct
1285   }
1286 }//Dbdih::execGETGCIREQ()
1287 
execREAD_CONFIG_REQ(Signal * signal)1288 void Dbdih::execREAD_CONFIG_REQ(Signal* signal)
1289 {
1290   const ReadConfigReq * req = (ReadConfigReq*)signal->getDataPtr();
1291   Uint32 ref = req->senderRef;
1292   Uint32 senderData = req->senderData;
1293   ndbrequire(req->noOfParameters == 0);
1294 
1295   jamEntry();
1296 
1297   const ndb_mgm_configuration_iterator * p =
1298     m_ctx.m_config.getOwnConfigIterator();
1299   ndbrequireErr(p != 0, NDBD_EXIT_INVALID_CONFIG);
1300 
1301   initData();
1302 
1303   cconnectFileSize = 256; // Only used for DDL
1304 
1305   ndbrequireErr(!ndb_mgm_get_int_parameter(p, CFG_DIH_API_CONNECT,
1306 					   &capiConnectFileSize),
1307 		NDBD_EXIT_INVALID_CONFIG);
1308   capiConnectFileSize++; // Increase by 1...so that srsw queue never gets full
1309 
1310   ndbrequireErr(!ndb_mgm_get_int_parameter(p, CFG_DIH_FRAG_CONNECT,
1311 					   &cfragstoreFileSize),
1312 		NDBD_EXIT_INVALID_CONFIG);
1313   ndbrequireErr(!ndb_mgm_get_int_parameter(p, CFG_DIH_REPLICAS,
1314 					   &creplicaFileSize),
1315 		NDBD_EXIT_INVALID_CONFIG);
1316   ndbrequireErr(!ndb_mgm_get_int_parameter(p, CFG_DIH_TABLE, &ctabFileSize),
1317 		NDBD_EXIT_INVALID_CONFIG);
1318 
1319   if (isNdbMtLqh())
1320   {
1321     jam();
1322     c_fragments_per_node = getLqhWorkers();
1323     // try to get some LQH workers which initially handle no fragments
1324     if (ERROR_INSERTED(7215)) {
1325       c_fragments_per_node = 1;
1326     }
1327   }
1328   ndbout_c("Using %u fragments per node", c_fragments_per_node);
1329 
1330   ndb_mgm_get_int_parameter(p, CFG_DB_LCP_TRY_LOCK_TIMEOUT,
1331                             &c_lcpState.m_lcp_trylock_timeout);
1332 
1333   cfileFileSize = (2 * ctabFileSize) + 2;
1334   initRecords();
1335   initialiseRecordsLab(signal, 0, ref, senderData);
1336 
1337   {
1338     Uint32 val = 0;
1339     ndb_mgm_get_int_parameter(p, CFG_DB_2PASS_INR,
1340                               &val);
1341     c_2pass_inr = val ? true : false;
1342   }
1343 
1344   /**
1345    * Set API assigned nodegroup(s)
1346    */
1347   {
1348     NodeRecordPtr nodePtr;
1349     for (nodePtr.i = 0; nodePtr.i < MAX_NDB_NODES; nodePtr.i++)
1350     {
1351       ptrAss(nodePtr, nodeRecord);
1352       new (nodePtr.p) NodeRecord();
1353       nodePtr.p->nodeGroup = RNIL;
1354     }
1355 
1356     ndb_mgm_configuration_iterator * iter =
1357       m_ctx.m_config.getClusterConfigIterator();
1358     for(ndb_mgm_first(iter); ndb_mgm_valid(iter); ndb_mgm_next(iter))
1359     {
1360       jam();
1361       Uint32 nodeId;
1362       Uint32 nodeType;
1363 
1364       ndbrequire(!ndb_mgm_get_int_parameter(iter,CFG_NODE_ID, &nodeId));
1365       ndbrequire(!ndb_mgm_get_int_parameter(iter,CFG_TYPE_OF_SECTION,
1366                                             &nodeType));
1367 
1368       if (nodeType == NodeInfo::DB)
1369       {
1370         jam();
1371         Uint32 ng;
1372         nodePtr.i = nodeId;
1373         ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
1374         if (ndb_mgm_get_int_parameter(iter, CFG_DB_NODEGROUP, &ng) == 0)
1375         {
1376           jam();
1377           nodePtr.p->nodeGroup = ng;
1378         }
1379         else
1380         {
1381           jam();
1382           nodePtr.p->nodeGroup = RNIL;
1383         }
1384       }
1385     }
1386   }
1387   return;
1388 }//Dbdih::execSIZEALT_REP()
1389 
execSTART_COPYREF(Signal * signal)1390 void Dbdih::execSTART_COPYREF(Signal* signal)
1391 {
1392   jamEntry();
1393   ndbrequire(false);
1394 }//Dbdih::execSTART_COPYREF()
1395 
execSTART_FRAGCONF(Signal * signal)1396 void Dbdih::execSTART_FRAGCONF(Signal* signal)
1397 {
1398   (void)signal;  // Don't want compiler warning
1399   /* ********************************************************************* */
1400   /*  If anyone wants to add functionality in this method, be aware that   */
1401   /*  for temporary tables no START_FRAGREQ is sent and therefore no       */
1402   /*  START_FRAGCONF signal will be received for those tables!!            */
1403   /* ********************************************************************* */
1404   jamEntry();
1405   return;
1406 }//Dbdih::execSTART_FRAGCONF()
1407 
execSTART_FRAGREF(Signal * signal)1408 void Dbdih::execSTART_FRAGREF(Signal* signal)
1409 {
1410   jamEntry();
1411 
1412   /**
1413    * Kill starting node
1414    */
1415   Uint32 errCode = signal->theData[1];
1416   Uint32 nodeId = signal->theData[2];
1417 
1418   SystemError * const sysErr = (SystemError*)&signal->theData[0];
1419   sysErr->errorCode = SystemError::StartFragRefError;
1420   sysErr->errorRef = reference();
1421   sysErr->data[0] = errCode;
1422   sysErr->data[1] = 0;
1423   sendSignal(calcNdbCntrBlockRef(nodeId), GSN_SYSTEM_ERROR, signal,
1424 	     SystemError::SignalLength, JBB);
1425   return;
1426 }//Dbdih::execSTART_FRAGCONF()
1427 
execSTART_MEREF(Signal * signal)1428 void Dbdih::execSTART_MEREF(Signal* signal)
1429 {
1430   jamEntry();
1431   ndbrequire(false);
1432 }//Dbdih::execSTART_MEREF()
1433 
execTAB_COMMITREQ(Signal * signal)1434 void Dbdih::execTAB_COMMITREQ(Signal* signal)
1435 {
1436   TabRecordPtr tabPtr;
1437   jamEntry();
1438   Uint32 tdictPtr = signal->theData[0];
1439   BlockReference tdictBlockref = signal->theData[1];
1440   tabPtr.i = signal->theData[2];
1441   ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
1442 
1443   ndbrequire(tabPtr.p->tabStatus == TabRecord::TS_CREATING);
1444   tabPtr.p->tabStatus = TabRecord::TS_ACTIVE;
1445   tabPtr.p->schemaTransId = 0;
1446   signal->theData[0] = tdictPtr;
1447   signal->theData[1] = cownNodeId;
1448   signal->theData[2] = tabPtr.i;
1449   sendSignal(tdictBlockref, GSN_TAB_COMMITCONF, signal, 3, JBB);
1450   return;
1451 }//Dbdih::execTAB_COMMITREQ()
1452 
1453 /*
1454   3.2   S T A N D A R D   S U B P R O G R A M S   I N   P L E X
1455   *************************************************************
1456   */
1457 /*
1458   3.2.1   S T A R T /  R E S T A R T
1459   **********************************
1460   */
1461 /*****************************************************************************/
1462 /* **********     START / RESTART MODULE                         *************/
1463 /*****************************************************************************/
1464 /*
1465   3.2.1.1    LOADING   O W N   B L O C K  R E F E R E N C E (ABSOLUTE PHASE 1)
1466   *****************************************************************************
1467   */
execDIH_RESTARTREQ(Signal * signal)1468 void Dbdih::execDIH_RESTARTREQ(Signal* signal)
1469 {
1470   jamEntry();
1471   const DihRestartReq* req = CAST_CONSTPTR(DihRestartReq,
1472                                            signal->getDataPtr());
1473   if (req->senderRef != 0)
1474   {
1475     jam();
1476     cntrlblockref = req->senderRef;
1477     if(m_ctx.m_config.getInitialStart())
1478     {
1479       sendDihRestartRef(signal);
1480     } else {
1481       readGciFileLab(signal);
1482     }
1483   }
1484   else
1485   {
1486     /**
1487      * Precondition, (not checked)
1488      *   atleast 1 node in each node group
1489      */
1490     Uint32 i;
1491     NdbNodeBitmask mask;
1492     mask.assign(NdbNodeBitmask::Size, req->nodemask);
1493     const Uint32 *node_gcis = req->node_gcis;
1494     Uint32 node_group_gcis[MAX_NDB_NODES+1];
1495     memset(node_group_gcis, 0, sizeof(node_group_gcis));
1496     for (i = 0; i<MAX_NDB_NODES; i++)
1497     {
1498       if (mask.get(i))
1499       {
1500 	jam();
1501 	Uint32 ng = Sysfile::getNodeGroup(i, SYSFILE->nodeGroups);
1502         if (ng != NO_NODE_GROUP_ID)
1503         {
1504           ndbrequire(ng < MAX_NDB_NODES);
1505           Uint32 gci = node_gcis[i];
1506           if (gci < SYSFILE->lastCompletedGCI[i])
1507           {
1508             jam();
1509             /**
1510              * Handle case, where *I* know that node complete GCI
1511              *   but node does not...bug#29167
1512              *   i.e node died before it wrote own sysfile
1513              */
1514             gci = SYSFILE->lastCompletedGCI[i];
1515           }
1516 
1517           if (gci > node_group_gcis[ng])
1518           {
1519             jam();
1520             node_group_gcis[ng] = gci;
1521           }
1522         }
1523       }
1524     }
1525     for (i = 0; i<MAX_NDB_NODES && node_group_gcis[i] == 0; i++);
1526 
1527     Uint32 gci = node_group_gcis[i];
1528     for (i++ ; i<MAX_NDB_NODES; i++)
1529     {
1530       jam();
1531       if (node_group_gcis[i] && node_group_gcis[i] != gci)
1532       {
1533 	jam();
1534 	signal->theData[0] = i;
1535 	return;
1536       }
1537     }
1538     signal->theData[0] = MAX_NDB_NODES;
1539     return;
1540   }
1541   return;
1542 }//Dbdih::execDIH_RESTARTREQ()
1543 
execSTTOR(Signal * signal)1544 void Dbdih::execSTTOR(Signal* signal)
1545 {
1546   jamEntry();
1547 
1548   Callback c = { safe_cast(&Dbdih::sendSTTORRY), 0 };
1549   m_sendSTTORRY = c;
1550 
1551   switch(signal->theData[1]){
1552   case 1:
1553     createMutexes(signal, 0);
1554     return;
1555   case 2:
1556     break;
1557   case 3:
1558     signal->theData[0] = reference();
1559     sendSignal(NDBCNTR_REF, GSN_READ_NODESREQ, signal, 1, JBB);
1560     return;
1561   }
1562 
1563   sendSTTORRY(signal);
1564 }//Dbdih::execSTTOR()
1565 
1566 void
sendSTTORRY(Signal * signal,Uint32 senderData,Uint32 retVal)1567 Dbdih::sendSTTORRY(Signal* signal, Uint32 senderData, Uint32 retVal)
1568 {
1569   signal->theData[0] = 0;
1570   signal->theData[1] = 0;
1571   signal->theData[2] = 0;
1572   signal->theData[3] = 1;   // Next start phase
1573   signal->theData[4] = 2;   // Next start phase
1574   signal->theData[5] = 3;
1575   signal->theData[6] = 255; // Next start phase
1576   sendSignal(NDBCNTR_REF, GSN_STTORRY, signal, 7, JBB);
1577   return;
1578 }
1579 
initialStartCompletedLab(Signal * signal)1580 void Dbdih::initialStartCompletedLab(Signal* signal)
1581 {
1582   /*-------------------------------------------------------------------------*/
1583   /* NOW THAT (RE)START IS COMPLETED WE CAN START THE LCP.*/
1584   /*-------------------------------------------------------------------------*/
1585   return;
1586 }//Dbdih::initialStartCompletedLab()
1587 
1588 /*
1589  * ***************************************************************************
1590  * S E N D I N G   R E P L Y  T O  S T A R T /  R E S T A R T   R E Q U E S T S
1591  * ****************************************************************************
1592  */
ndbsttorry10Lab(Signal * signal,Uint32 _line)1593 void Dbdih::ndbsttorry10Lab(Signal* signal, Uint32 _line)
1594 {
1595   /*-------------------------------------------------------------------------*/
1596   // AN NDB START PHASE HAS BEEN COMPLETED. WHEN START PHASE 6 IS COMPLETED WE
1597   // RECORD THAT THE SYSTEM IS RUNNING.
1598   /*-------------------------------------------------------------------------*/
1599   signal->theData[0] = reference();
1600   sendSignal(cntrlblockref, GSN_NDB_STTORRY, signal, 1, JBB);
1601   return;
1602 }//Dbdih::ndbsttorry10Lab()
1603 
1604 /*
1605 ****************************************
1606 I N T E R N A L  P H A S E S
1607 ****************************************
1608 */
1609 /*---------------------------------------------------------------------------*/
1610 /*NDB_STTOR                              START SIGNAL AT START/RESTART       */
1611 /*---------------------------------------------------------------------------*/
execNDB_STTOR(Signal * signal)1612 void Dbdih::execNDB_STTOR(Signal* signal)
1613 {
1614   jamEntry();
1615   BlockReference cntrRef = signal->theData[0];    /* SENDERS BLOCK REFERENCE */
1616   Uint32 ownNodeId = signal->theData[1];          /* OWN PROCESSOR ID*/
1617   Uint32 phase = signal->theData[2];              /* INTERNAL START PHASE*/
1618   Uint32 typestart = signal->theData[3];
1619 
1620   cstarttype = typestart;
1621   cstartPhase = phase;
1622 
1623   switch (phase){
1624   case ZNDB_SPH1:
1625     jam();
1626     /*----------------------------------------------------------------------*/
1627     /* Set the delay between local checkpoints in ndb startphase 1.         */
1628     /*----------------------------------------------------------------------*/
1629     cownNodeId = ownNodeId;
1630     /*-----------------------------------------------------------------------*/
1631     // Compute all static block references in this node as part of
1632     // ndb start phase 1.
1633     /*-----------------------------------------------------------------------*/
1634     cntrlblockref = cntrRef;
1635     clocaltcblockref = calcTcBlockRef(ownNodeId);
1636     clocallqhblockref = calcLqhBlockRef(ownNodeId);
1637     cdictblockref = calcDictBlockRef(ownNodeId);
1638     ndbsttorry10Lab(signal, __LINE__);
1639     break;
1640 
1641   case ZNDB_SPH2:
1642     jam();
1643     /*-----------------------------------------------------------------------*/
1644     // Set the number of replicas,  maximum is 4 replicas.
1645     // Read the ndb nodes from the configuration.
1646     /*-----------------------------------------------------------------------*/
1647 
1648     /*-----------------------------------------------------------------------*/
1649     // For node restarts we will also add a request for permission
1650     // to continue the system restart.
1651     // The permission is given by the master node in the alive set.
1652     /*-----------------------------------------------------------------------*/
1653     if (cstarttype == NodeState::ST_INITIAL_NODE_RESTART)
1654     {
1655       jam();
1656       c_set_initial_start_flag = TRUE; // In sysfile...
1657     }
1658 
1659     if (cstarttype == NodeState::ST_INITIAL_START) {
1660       jam();
1661       // setInitialActiveStatus is moved into makeNodeGroups
1662     } else if (cstarttype == NodeState::ST_SYSTEM_RESTART) {
1663       jam();
1664       /*empty*/;
1665     } else if ((cstarttype == NodeState::ST_NODE_RESTART) ||
1666                (cstarttype == NodeState::ST_INITIAL_NODE_RESTART)) {
1667       jam();
1668       nodeRestartPh2Lab(signal);
1669       return;
1670     } else {
1671       ndbrequire(false);
1672     }//if
1673     ndbsttorry10Lab(signal, __LINE__);
1674     return;
1675 
1676   case ZNDB_SPH3:
1677     jam();
1678     /*-----------------------------------------------------------------------*/
1679     // Non-master nodes performing an initial start will execute
1680     // the start request here since the
1681     // initial start do not synchronise so much from the master.
1682     // In the master nodes the start
1683     // request will be sent directly to dih (in ndb_startreq) when all
1684     // nodes have completed phase 3 of the start.
1685     /*-----------------------------------------------------------------------*/
1686     cmasterState = MASTER_IDLE;
1687     if(cstarttype == NodeState::ST_INITIAL_START ||
1688        cstarttype == NodeState::ST_SYSTEM_RESTART){
1689       jam();
1690       cmasterState = isMaster() ? MASTER_ACTIVE : MASTER_IDLE;
1691     }
1692     if (!isMaster() && cstarttype == NodeState::ST_INITIAL_START) {
1693       jam();
1694       ndbStartReqLab(signal, cntrRef);
1695       return;
1696     }//if
1697     ndbsttorry10Lab(signal, __LINE__);
1698     break;
1699 
1700   case ZNDB_SPH4:
1701     jam();
1702     c_lcpState.setLcpStatus(LCP_STATUS_IDLE, __LINE__);
1703     cmasterTakeOverNode = ZNIL;
1704     switch(typestart){
1705     case NodeState::ST_INITIAL_START:
1706       jam();
1707       ndbsttorry10Lab(signal, __LINE__);
1708       return;
1709     case NodeState::ST_SYSTEM_RESTART:
1710       jam();
1711       ndbsttorry10Lab(signal, __LINE__);
1712       return;
1713     case NodeState::ST_INITIAL_NODE_RESTART:
1714     case NodeState::ST_NODE_RESTART:
1715       jam();
1716 
1717       /***********************************************************************
1718        * When starting nodes while system is operational we must be controlled
1719        * by the master since only one node restart is allowed at a time.
1720        * When this signal is confirmed the master has also copied the
1721        * dictionary and the distribution information.
1722        */
1723       StartMeReq * req = (StartMeReq*)&signal->theData[0];
1724       req->startingRef = reference();
1725       req->startingVersion = 0; // Obsolete
1726       sendSignal(cmasterdihref, GSN_START_MEREQ, signal,
1727                  StartMeReq::SignalLength, JBB);
1728       return;
1729     }
1730     ndbrequire(false);
1731     break;
1732   case ZNDB_SPH5:
1733     jam();
1734     if (m_gcp_monitor.m_micro_gcp.m_max_lag > 0)
1735     {
1736       infoEvent("GCP Monitor: Computed max GCP_SAVE lag to %u seconds",
1737                 m_gcp_monitor.m_gcp_save.m_max_lag / 10);
1738       infoEvent("GCP Monitor: Computed max GCP_COMMIT lag to %u seconds",
1739                 m_gcp_monitor.m_micro_gcp.m_max_lag / 10);
1740     }
1741     else
1742     {
1743       infoEvent("GCP Monitor: unlimited lags allowed");
1744     }
1745     switch(typestart){
1746     case NodeState::ST_INITIAL_START:
1747     case NodeState::ST_SYSTEM_RESTART:
1748       jam();
1749       jam();
1750       /*---------------------------------------------------------------------*/
1751       // WE EXECUTE A LOCAL CHECKPOINT AS A PART OF A SYSTEM RESTART.
1752       // THE IDEA IS THAT WE NEED TO
1753       // ENSURE THAT WE CAN RECOVER FROM PROBLEMS CAUSED BY MANY NODE
1754       // CRASHES THAT CAUSES THE LOG
1755       // TO GROW AND THE NUMBER OF LOG ROUNDS TO EXECUTE TO GROW.
1756       // THIS CAN OTHERWISE GET US INTO
1757       // A SITUATION WHICH IS UNREPAIRABLE. THUS WE EXECUTE A CHECKPOINT
1758       // BEFORE ALLOWING ANY TRANSACTIONS TO START.
1759       /*---------------------------------------------------------------------*/
1760       if (!isMaster()) {
1761 	jam();
1762 	ndbsttorry10Lab(signal, __LINE__);
1763 	return;
1764       }//if
1765 
1766       c_lcpState.immediateLcpStart = true;
1767       cwaitLcpSr = true;
1768       checkLcpStart(signal, __LINE__);
1769       return;
1770     case NodeState::ST_NODE_RESTART:
1771     case NodeState::ST_INITIAL_NODE_RESTART:
1772       jam();
1773       {
1774         StartCopyReq* req = (StartCopyReq*)signal->getDataPtrSend();
1775         req->senderRef = reference();
1776         req->senderData = RNIL;
1777         req->flags = StartCopyReq::WAIT_LCP;
1778         req->startingNodeId = getOwnNodeId();
1779         if (!ndb_pnr(getNodeInfo(refToNode(cmasterdihref)).m_version))
1780         {
1781           jam();
1782           infoEvent("Detecting upgrade: Master(%u) does not support parallel node recovery",
1783                     refToNode(cmasterdihref));
1784           sendSignal(cmasterdihref, GSN_START_COPYREQ, signal,
1785                      StartCopyReq::SignalLength, JBB);
1786         }
1787         else
1788         {
1789           sendSignal(reference(), GSN_START_COPYREQ, signal,
1790                      StartCopyReq::SignalLength, JBB);
1791         }
1792       }
1793       return;
1794     }
1795     ndbrequire(false);
1796   case ZNDB_SPH6:
1797     jam();
1798     switch(typestart){
1799     case NodeState::ST_INITIAL_START:
1800     case NodeState::ST_SYSTEM_RESTART:
1801       jam();
1802       if(isMaster()){
1803 	jam();
1804 	startGcp(signal);
1805       }
1806       ndbsttorry10Lab(signal, __LINE__);
1807       return;
1808     case NodeState::ST_NODE_RESTART:
1809     case NodeState::ST_INITIAL_NODE_RESTART:
1810       ndbsttorry10Lab(signal, __LINE__);
1811       return;
1812     }
1813     ndbrequire(false);
1814     break;
1815   default:
1816     jam();
1817     ndbsttorry10Lab(signal, __LINE__);
1818     break;
1819   }//switch
1820 }//Dbdih::execNDB_STTOR()
1821 
1822 void
execNODE_START_REP(Signal * signal)1823 Dbdih::execNODE_START_REP(Signal* signal)
1824 {
1825   /*
1826    * Send DICT_UNLOCK_ORD when this node is SL_STARTED.
1827    *
1828    * Sending it before (sp 7) conflicts with code which assumes
1829    * SL_STARTING means we are in copy phase of NR.
1830    *
1831    * NodeState::starting.restartType is not supposed to be used
1832    * when SL_STARTED.  Also it seems NODE_START_REP can arrive twice.
1833    *
1834    * For these reasons there are no consistency checks and
1835    * we rely on c_dictLockSlavePtrI_nodeRestart alone.
1836    */
1837   if (signal->theData[0] == getOwnNodeId())
1838   {
1839     /**
1840      * With parallel node restart, only unlock self, if it's self that has
1841      *   started
1842      */
1843     jam();
1844     if (c_dictLockSlavePtrI_nodeRestart != RNIL) {
1845       sendDictUnlockOrd(signal, c_dictLockSlavePtrI_nodeRestart);
1846       c_dictLockSlavePtrI_nodeRestart = RNIL;
1847     }
1848   }
1849 }
1850 
1851 void
createMutexes(Signal * signal,Uint32 count)1852 Dbdih::createMutexes(Signal * signal, Uint32 count){
1853   Callback c = { safe_cast(&Dbdih::createMutex_done), count };
1854 
1855   switch(count){
1856   case 0:{
1857     Mutex mutex(signal, c_mutexMgr, c_startLcpMutexHandle);
1858     mutex.create(c);
1859     return;
1860   }
1861   case 1:{
1862     Mutex mutex(signal, c_mutexMgr, c_switchPrimaryMutexHandle);
1863     mutex.create(c);
1864     return;
1865   }
1866   case 2:{
1867     Mutex mutex(signal, c_mutexMgr, c_fragmentInfoMutex_lcp);
1868     mutex.create(c);
1869     return;
1870   }
1871   }
1872 
1873   execute(signal, m_sendSTTORRY, 0);
1874 }
1875 
1876 void
createMutex_done(Signal * signal,Uint32 senderData,Uint32 retVal)1877 Dbdih::createMutex_done(Signal* signal, Uint32 senderData, Uint32 retVal){
1878   jamEntry();
1879   ndbrequire(retVal == 0);
1880 
1881   switch(senderData){
1882   case 0:{
1883     Mutex mutex(signal, c_mutexMgr, c_startLcpMutexHandle);
1884     mutex.release();
1885     break;
1886   }
1887   case 1:{
1888     Mutex mutex(signal, c_mutexMgr, c_switchPrimaryMutexHandle);
1889     mutex.release();
1890     break;
1891   }
1892   case 2:{
1893     Mutex mutex(signal, c_mutexMgr, c_fragmentInfoMutex_lcp);
1894     mutex.release();
1895     break;
1896   }
1897   }
1898 
1899   createMutexes(signal, senderData + 1);
1900 }
1901 
1902 /*****************************************************************************/
1903 /* ------------------------------------------------------------------------- */
1904 /*       WE HAVE BEEN REQUESTED BY NDBCNTR TO PERFORM A RESTART OF THE       */
1905 /*       DATABASE TABLES.                                                    */
1906 /*       THIS SIGNAL IS SENT AFTER COMPLETING PHASE 3 IN ALL BLOCKS IN A     */
1907 /*       SYSTEM RESTART. WE WILL ALSO JUMP TO THIS LABEL FROM PHASE 3 IN AN  */
1908 /*       INITIAL START.                                                      */
1909 /* ------------------------------------------------------------------------- */
1910 /*****************************************************************************/
execNDB_STARTREQ(Signal * signal)1911 void Dbdih::execNDB_STARTREQ(Signal* signal)
1912 {
1913   jamEntry();
1914   BlockReference ref = signal->theData[0];
1915   cstarttype = signal->theData[1];
1916   ndbStartReqLab(signal, ref);
1917 }//Dbdih::execNDB_STARTREQ()
1918 
ndbStartReqLab(Signal * signal,BlockReference ref)1919 void Dbdih::ndbStartReqLab(Signal* signal, BlockReference ref)
1920 {
1921   cndbStartReqBlockref = ref;
1922   if (cstarttype == NodeState::ST_INITIAL_START) {
1923     jam();
1924     initRestartInfo(signal);
1925     initGciFilesLab(signal);
1926     return;
1927   }
1928 
1929   NodeRecordPtr nodePtr;
1930   Uint32 gci = SYSFILE->lastCompletedGCI[getOwnNodeId()];
1931   for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++)
1932   {
1933     jam();
1934     ptrAss(nodePtr, nodeRecord);
1935     if (SYSFILE->lastCompletedGCI[nodePtr.i] > gci)
1936     {
1937       jam();
1938       /**
1939        * Since we're starting(is master) and there
1940        *   there are other nodes with higher GCI...
1941        *   there gci's must be invalidated...
1942        *   and they _must_ do an initial start
1943        *   indicate this by setting lastCompletedGCI = 0
1944        */
1945       SYSFILE->lastCompletedGCI[nodePtr.i] = 0;
1946       ndbrequire(nodePtr.p->nodeStatus != NodeRecord::ALIVE);
1947       warningEvent("Making filesystem for node %d unusable (need --initial)",
1948 		   nodePtr.i);
1949     }
1950     else if (nodePtr.p->nodeStatus == NodeRecord::ALIVE &&
1951 	     SYSFILE->lastCompletedGCI[nodePtr.i] == 0)
1952     {
1953       jam();
1954       CRASH_INSERTION(7170);
1955       char buf[255];
1956       BaseString::snprintf(buf, sizeof(buf),
1957 			   "Cluster requires this node to be started "
1958 			   " with --initial as partial start has been performed"
1959 			   " and this filesystem is unusable");
1960       progError(__LINE__,
1961 		NDBD_EXIT_SR_RESTARTCONFLICT,
1962 		buf);
1963       ndbrequire(false);
1964     }
1965   }
1966 
1967   /**
1968    * This set which GCI we will try to restart to
1969    */
1970   SYSFILE->newestRestorableGCI = gci;
1971   infoEvent("Restarting cluster to GCI: %u", gci);
1972 
1973   ndbrequire(isMaster());
1974   copyGciLab(signal, CopyGCIReq::RESTART); // We have already read the file!
1975 }//Dbdih::ndbStartReqLab()
1976 
execREAD_NODESCONF(Signal * signal)1977 void Dbdih::execREAD_NODESCONF(Signal* signal)
1978 {
1979   unsigned i;
1980   ReadNodesConf * const readNodes = (ReadNodesConf *)&signal->theData[0];
1981   jamEntry();
1982   Uint32 nodeArray[MAX_NDB_NODES+1];
1983 
1984   csystemnodes  = readNodes->noOfNodes;
1985   cmasterNodeId = readNodes->masterNodeId;
1986   unsigned index = 0;
1987   NdbNodeBitmask tmp; tmp.assign(2, readNodes->allNodes);
1988   for (i = 1; i < MAX_NDB_NODES; i++){
1989     jam();
1990     if(tmp.get(i)){
1991       jam();
1992       nodeArray[index] = i;
1993       if(NdbNodeBitmask::get(readNodes->inactiveNodes, i) == false){
1994         jam();
1995         con_lineNodes++;
1996       }//if
1997       index++;
1998     }//if
1999   }//for
2000   nodeArray[index] = RNIL; // terminate
2001 
2002   if (c_2pass_inr)
2003   {
2004     jam();
2005     Uint32 workers = getNodeInfo(getOwnNodeId()).m_lqh_workers;
2006     printf("Checking 2-pass initial node restart: ");
2007     for (i = 0; i<index; i++)
2008     {
2009       if (NdbNodeBitmask::get(readNodes->inactiveNodes, nodeArray[i]))
2010         continue;
2011 
2012       if (!ndbd_non_trans_copy_frag_req(getNodeInfo(nodeArray[i]).m_version))
2013       {
2014         jam();
2015         c_2pass_inr = false;
2016         printf("not ok (version node %u) => disabled\n", nodeArray[i]);
2017         break;
2018       }
2019 
2020       if (workers > 1 &&
2021           workers != getNodeInfo(nodeArray[i]).m_lqh_workers)
2022       {
2023         c_2pass_inr = false;
2024         printf("not ok (different worker cnt node %u) => disabled\n",
2025                nodeArray[i]);
2026         break;
2027       }
2028     }
2029     if (c_2pass_inr)
2030       printf("ok\n");
2031 
2032     /**
2033      * Note: In theory it would be ok for just nodes that we plan to copy from
2034      *   supported this...but in e.g a 3/4-replica scenario,
2035      *      if one of the nodes does, and the other doesnt, we don't
2036      *      have enought infrastructure to easily check this...
2037      *      therefor we require all nodes to support it.
2038      */
2039   }
2040 
2041   if(cstarttype == NodeState::ST_SYSTEM_RESTART ||
2042      cstarttype == NodeState::ST_NODE_RESTART)
2043   {
2044 
2045     for(i = 1; i<MAX_NDB_NODES; i++){
2046       const Uint32 stat = Sysfile::getNodeStatus(i, SYSFILE->nodeStatus);
2047       if(stat == Sysfile::NS_NotDefined && !tmp.get(i))
2048       {
2049 	jam();
2050 	continue;
2051       }
2052 
2053       if(tmp.get(i) && stat != Sysfile::NS_NotDefined)
2054       {
2055 	jam();
2056 	continue;
2057       }
2058 
2059       if (stat == Sysfile::NS_NotDefined && tmp.get(i))
2060       {
2061         jam();
2062         infoEvent("Discovered new node %u", i);
2063         continue;
2064       }
2065 
2066       if (stat == Sysfile::NS_Configured && !tmp.get(i))
2067       {
2068         jam();
2069         infoEvent("Configured node %u not present, ignoring",
2070                   i);
2071         continue;
2072       }
2073 
2074       char buf[255];
2075       BaseString::snprintf(buf, sizeof(buf),
2076                            "Illegal configuration change."
2077                            " Initial start needs to be performed "
2078                            " when removing nodes with nodegroup (node %d)", i);
2079       progError(__LINE__, NDBD_EXIT_INVALID_CONFIG, buf);
2080     }
2081   }
2082 
2083   ndbrequire(csystemnodes >= 1 && csystemnodes < MAX_NDB_NODES);
2084 
2085   cmasterdihref = calcDihBlockRef(cmasterNodeId);
2086   /*-------------------------------------------------------------------------*/
2087   /* MAKE THE LIST OF PRN-RECORD WHICH IS ONE OF THE NODES-LIST IN THIS BLOCK*/
2088   /*-------------------------------------------------------------------------*/
2089   makePrnList(readNodes, nodeArray);
2090   if (cstarttype == NodeState::ST_INITIAL_START) {
2091     jam();
2092     /**----------------------------------------------------------------------
2093      * WHEN WE INITIALLY START A DATABASE WE WILL CREATE NODE GROUPS.
2094      * ALL NODES ARE PUT INTO NODE GROUPS ALTHOUGH HOT SPARE NODES ARE PUT
2095      * INTO A SPECIAL NODE GROUP. IN EACH NODE GROUP WE HAVE THE SAME AMOUNT
2096      * OF NODES AS THERE ARE NUMBER OF REPLICAS.
2097      * ONE POSSIBLE USAGE OF NODE GROUPS ARE TO MAKE A NODE GROUP A COMPLETE
2098      * FRAGMENT OF THE DATABASE. THIS MEANS THAT ALL REPLICAS WILL BE STORED
2099      * IN THE NODE GROUP.
2100      *-----------------------------------------------------------------------*/
2101     makeNodeGroups(nodeArray);
2102   }//if
2103   ndbrequire(checkNodeAlive(cmasterNodeId));
2104 
2105   /**
2106    * Keep bitmap of nodes that can be restored...
2107    *   and nodes that need take-over
2108    *
2109    */
2110   m_sr_nodes.clear();
2111   m_to_nodes.clear();
2112 
2113   // Start with assumption that all can restore
2114   {
2115     NodeRecordPtr specNodePtr;
2116     specNodePtr.i = cfirstAliveNode;
2117     do {
2118       jam();
2119       m_sr_nodes.set(specNodePtr.i);
2120       ptrCheckGuard(specNodePtr, MAX_NDB_NODES, nodeRecord);
2121       specNodePtr.i = specNodePtr.p->nextNode;
2122     } while (specNodePtr.i != RNIL);
2123   }
2124 
2125   execute(signal, m_sendSTTORRY, 0);
2126 }//Dbdih::execREAD_NODESCONF()
2127 
2128 /*---------------------------------------------------------------------------*/
2129 /*                    START NODE LOGIC FOR NODE RESTART                      */
2130 /*---------------------------------------------------------------------------*/
nodeRestartPh2Lab(Signal * signal)2131 void Dbdih::nodeRestartPh2Lab(Signal* signal)
2132 {
2133   /*
2134    * Lock master DICT to avoid metadata operations during INR/NR.
2135    * Done just before START_PERMREQ.
2136    *
2137    * It would be more elegant to do this just before START_MEREQ.
2138    * The problem is, on INR we end up in massive invalidateNodeLCP
2139    * which is not fully protected against metadata ops.
2140    */
2141   ndbrequire(c_dictLockSlavePtrI_nodeRestart == RNIL);
2142 
2143   // check that we are not yet taking part in schema ops
2144   CRASH_INSERTION(7174);
2145 
2146   Uint32 lockType = DictLockReq::NodeRestartLock;
2147   Callback c = { safe_cast(&Dbdih::recvDictLockConf_nodeRestart), 0 };
2148   sendDictLockReq(signal, lockType, c);
2149 }
2150 
recvDictLockConf_nodeRestart(Signal * signal,Uint32 data,Uint32 ret)2151 void Dbdih::recvDictLockConf_nodeRestart(Signal* signal, Uint32 data, Uint32 ret)
2152 {
2153   ndbrequire(c_dictLockSlavePtrI_nodeRestart == RNIL);
2154   ndbrequire(data != RNIL);
2155   c_dictLockSlavePtrI_nodeRestart = data;
2156 
2157   nodeRestartPh2Lab2(signal);
2158 }
2159 
nodeRestartPh2Lab2(Signal * signal)2160 void Dbdih::nodeRestartPh2Lab2(Signal* signal)
2161 {
2162   /*------------------------------------------------------------------------*/
2163   // REQUEST FOR PERMISSION FROM MASTER TO START A NODE IN AN ALREADY
2164   // RUNNING SYSTEM.
2165   /*------------------------------------------------------------------------*/
2166   StartPermReq * const req = (StartPermReq *)&signal->theData[0];
2167 
2168   req->blockRef  = reference();
2169   req->nodeId    = cownNodeId;
2170   req->startType = cstarttype;
2171   sendSignal(cmasterdihref, GSN_START_PERMREQ, signal, 3, JBB);
2172 
2173   if (ERROR_INSERTED(7203))
2174   {
2175     signal->theData[0] = 9999;
2176     sendSignalWithDelay(CMVMI_REF, GSN_NDB_TAMPER, signal, 200, 1);
2177   }
2178 }
2179 
execSTART_PERMCONF(Signal * signal)2180 void Dbdih::execSTART_PERMCONF(Signal* signal)
2181 {
2182   jamEntry();
2183   CRASH_INSERTION(7121);
2184   Uint32 nodeId = signal->theData[0];
2185   cfailurenr = signal->theData[1];
2186 
2187   bool microGCP = signal->theData[2];
2188   if (signal->getLength() < StartPermConf::SignalLength)
2189   {
2190     microGCP = false;
2191   }
2192   m_micro_gcp.m_enabled = microGCP;
2193   ndbrequire(nodeId == cownNodeId);
2194   ndbsttorry10Lab(signal, __LINE__);
2195 
2196   if (m_micro_gcp.m_enabled)
2197   {
2198     jam();
2199     UpgradeProtocolOrd * ord = (UpgradeProtocolOrd*)signal->getDataPtrSend();
2200     ord->type = UpgradeProtocolOrd::UPO_ENABLE_MICRO_GCP;
2201     EXECUTE_DIRECT(QMGR,GSN_UPGRADE_PROTOCOL_ORD,signal,signal->getLength());
2202   }
2203   else if(isMultiThreaded())
2204   {
2205     /**
2206      * Prevent this start, as there is some non-thread-safe upgrade code for
2207      * this case in LQH.
2208      */
2209     progError(__LINE__, NDBD_EXIT_SR_RESTARTCONFLICT,
2210               "Cluster requires that all old data nodes are upgraded "
2211               "while running single-threaded ndbd before starting "
2212               "multi-threaded ndbmtd data nodes.");
2213   }
2214 }//Dbdih::execSTART_PERMCONF()
2215 
execSTART_PERMREF(Signal * signal)2216 void Dbdih::execSTART_PERMREF(Signal* signal)
2217 {
2218   jamEntry();
2219   Uint32 errorCode = signal->theData[1];
2220   if (errorCode == StartPermRef::ZNODE_ALREADY_STARTING_ERROR ||
2221       errorCode == StartPermRef::ZNODE_START_DISALLOWED_ERROR) {
2222     jam();
2223     /*-----------------------------------------------------------------------*/
2224     // The master was busy adding another node. We will wait for a second and
2225     // try again.
2226     /*-----------------------------------------------------------------------*/
2227     infoEvent("Did not get permission to start (%u) retry in 3s",
2228               errorCode);
2229     signal->theData[0] = DihContinueB::ZSTART_PERMREQ_AGAIN;
2230     sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 3000, 1);
2231     return;
2232   }//if
2233 
2234   if (errorCode == StartPermRef::InitialStartRequired)
2235   {
2236     CRASH_INSERTION(7170);
2237     char buf[255];
2238     BaseString::snprintf(buf, sizeof(buf),
2239 			 "Cluster requires this node to be started "
2240 			 " with --initial as partial start has been performed"
2241 			 " and this filesystem is unusable");
2242     progError(__LINE__,
2243 	      NDBD_EXIT_SR_RESTARTCONFLICT,
2244 	      buf);
2245     ndbrequire(false);
2246   }
2247 
2248   /*------------------------------------------------------------------------*/
2249   // Some node process in another node involving our node was still active. We
2250   // will recover from this by crashing here.
2251   // This is controlled restart using the
2252   // already existing features of node crashes. It is not a bug getting here.
2253   /*-------------------------------------------------------------------------*/
2254   ndbrequire(false);
2255   return;
2256 }//Dbdih::execSTART_PERMREF()
2257 
2258 /*---------------------------------------------------------------------------*/
2259 /*       THIS SIGNAL IS RECEIVED IN THE STARTING NODE WHEN THE START_MEREQ   */
2260 /*       HAS BEEN EXECUTED IN THE MASTER NODE.                               */
2261 /*---------------------------------------------------------------------------*/
execSTART_MECONF(Signal * signal)2262 void Dbdih::execSTART_MECONF(Signal* signal)
2263 {
2264   jamEntry();
2265   StartMeConf * const startMe = (StartMeConf *)&signal->theData[0];
2266   Uint32 nodeId = startMe->startingNodeId;
2267   const Uint32 startWord = startMe->startWord;
2268   Uint32 i;
2269 
2270   CRASH_INSERTION(7130);
2271   ndbrequire(nodeId == cownNodeId);
2272   arrGuard(startWord + StartMeConf::DATA_SIZE, sizeof(cdata)/4);
2273   for(i = 0; i < StartMeConf::DATA_SIZE; i++)
2274     cdata[startWord+i] = startMe->data[i];
2275 
2276   if(startWord + StartMeConf::DATA_SIZE < Sysfile::SYSFILE_SIZE32){
2277     jam();
2278     /**
2279      * We are still waiting for data
2280      */
2281     return;
2282   }
2283   jam();
2284 
2285   /**
2286    * Copy into sysfile
2287    *
2288    * But dont copy lastCompletedGCI:s
2289    */
2290   Uint32 key = SYSFILE->m_restart_seq;
2291   Uint32 tempGCP[MAX_NDB_NODES];
2292   for(i = 0; i < MAX_NDB_NODES; i++)
2293     tempGCP[i] = SYSFILE->lastCompletedGCI[i];
2294 
2295   for(i = 0; i < Sysfile::SYSFILE_SIZE32; i++)
2296     sysfileData[i] = cdata[i];
2297 
2298   SYSFILE->m_restart_seq = key;
2299   for(i = 0; i < MAX_NDB_NODES; i++)
2300     SYSFILE->lastCompletedGCI[i] = tempGCP[i];
2301 
2302   setNodeActiveStatus();
2303   setNodeGroups();
2304   ndbsttorry10Lab(signal, __LINE__);
2305 
2306   if (getNodeActiveStatus(getOwnNodeId()) == Sysfile::NS_Configured)
2307   {
2308     jam();
2309     c_set_initial_start_flag = FALSE;
2310   }
2311 }//Dbdih::execSTART_MECONF()
2312 
execSTART_COPYCONF(Signal * signal)2313 void Dbdih::execSTART_COPYCONF(Signal* signal)
2314 {
2315   jamEntry();
2316 
2317   StartCopyConf* conf = (StartCopyConf*)signal->getDataPtr();
2318   Uint32 nodeId = conf->startingNodeId;
2319   Uint32 senderData = conf->senderData;
2320 
2321   if (!ndb_pnr(getNodeInfo(refToNode(signal->getSendersBlockRef())).m_version))
2322   {
2323     jam();
2324     senderData = RNIL;
2325   }
2326 
2327   if (senderData == RNIL)
2328   {
2329     /**
2330      * This is NR
2331      */
2332     jam();
2333     ndbrequire(nodeId == cownNodeId);
2334     CRASH_INSERTION(7132);
2335     ndbsttorry10Lab(signal, __LINE__);
2336   }
2337   else
2338   {
2339     /**
2340      * This is TO during SR...waiting for all nodes
2341      */
2342     infoEvent("Take-over of %u complete", nodeId);
2343 
2344     ndbrequire(senderData == getOwnNodeId());
2345     ndbrequire(m_to_nodes.get(nodeId));
2346     m_to_nodes.clear(nodeId);
2347     m_sr_nodes.set(nodeId);
2348     if (!m_to_nodes.isclear())
2349     {
2350       jam();
2351       return;
2352     }
2353 
2354     signal->theData[0] = reference();
2355     m_sr_nodes.copyto(NdbNodeBitmask::Size, signal->theData+1);
2356     sendSignal(cntrlblockref, GSN_NDB_STARTCONF, signal,
2357                1 + NdbNodeBitmask::Size, JBB);
2358     return;
2359   }
2360   return;
2361 }//Dbdih::execSTART_COPYCONF()
2362 
2363 /*---------------------------------------------------------------------------*/
2364 /*                    MASTER LOGIC FOR NODE RESTART                          */
2365 /*---------------------------------------------------------------------------*/
2366 /*                    NODE RESTART PERMISSION REQUEST                        */
2367 /*---------------------------------------------------------------------------*/
2368 // A REQUEST FROM A STARTING NODE TO PERFORM A NODE RESTART. IF NO OTHER NODE
2369 // IS ACTIVE IN PERFORMING A NODE RESTART AND THERE ARE NO ACTIVE PROCESSES IN
2370 // THIS NODE INVOLVING THE STARTING NODE  THIS REQUEST WILL BE GRANTED.
2371 /*---------------------------------------------------------------------------*/
execSTART_PERMREQ(Signal * signal)2372 void Dbdih::execSTART_PERMREQ(Signal* signal)
2373 {
2374   StartPermReq * const req = (StartPermReq*)&signal->theData[0];
2375   jamEntry();
2376   const BlockReference retRef = req->blockRef;
2377   const Uint32 nodeId   = req->nodeId;
2378   const Uint32 typeStart = req->startType;
2379   CRASH_INSERTION(7122);
2380   ndbrequire(isMaster());
2381   ndbrequire(refToNode(retRef) == nodeId);
2382   if ((c_nodeStartMaster.activeState) ||
2383       (c_nodeStartMaster.wait != ZFALSE) ||
2384       ERROR_INSERTED_CLEAR(7175)) {
2385     jam();
2386     signal->theData[0] = nodeId;
2387     signal->theData[1] = StartPermRef::ZNODE_ALREADY_STARTING_ERROR;
2388     sendSignal(retRef, GSN_START_PERMREF, signal, 2, JBB);
2389     return;
2390   }//if
2391 
2392   if (!getAllowNodeStart(nodeId))
2393   {
2394     jam();
2395 ref:
2396     signal->theData[0] = nodeId;
2397     signal->theData[1] = StartPermRef::ZNODE_START_DISALLOWED_ERROR;
2398     sendSignal(retRef, GSN_START_PERMREF, signal, 2, JBB);
2399     return;
2400   }
2401   if (getNodeStatus(nodeId) != NodeRecord::DEAD)
2402   {
2403     jam();
2404     g_eventLogger->error("nodeStatus in START_PERMREQ = %u",
2405                          (Uint32) getNodeStatus(nodeId));
2406     goto ref;
2407   }//if
2408 
2409   if (SYSFILE->lastCompletedGCI[nodeId] == 0 &&
2410       typeStart != NodeState::ST_INITIAL_NODE_RESTART)
2411   {
2412     jam();
2413     signal->theData[0] = nodeId;
2414     signal->theData[1] = StartPermRef::InitialStartRequired;
2415     sendSignal(retRef, GSN_START_PERMREF, signal, 2, JBB);
2416     return;
2417   }
2418 
2419   /*----------------------------------------------------------------------
2420    * WE START THE INCLUSION PROCEDURE
2421    * ---------------------------------------------------------------------*/
2422   c_nodeStartMaster.failNr   = cfailurenr;
2423   c_nodeStartMaster.wait     = ZFALSE;
2424   c_nodeStartMaster.startInfoErrorCode = 0;
2425   c_nodeStartMaster.startNode = nodeId;
2426   c_nodeStartMaster.activeState = true;
2427   c_nodeStartMaster.m_outstandingGsn =  GSN_START_INFOREQ;
2428 
2429   setNodeStatus(nodeId, NodeRecord::STARTING);
2430   /**
2431    * But if it's a NodeState::ST_INITIAL_NODE_RESTART
2432    *
2433    * We first have to clear LCP's
2434    * For normal node restart we simply ensure that all nodes
2435    * are informed of the node restart
2436    */
2437   StartInfoReq *const r =(StartInfoReq*)&signal->theData[0];
2438   r->startingNodeId = nodeId;
2439   r->typeStart = typeStart;
2440   r->systemFailureNo = cfailurenr;
2441   sendLoopMacro(START_INFOREQ, sendSTART_INFOREQ, RNIL);
2442 }//Dbdih::execSTART_PERMREQ()
2443 
execSTART_INFOREF(Signal * signal)2444 void Dbdih::execSTART_INFOREF(Signal* signal)
2445 {
2446   StartInfoRef * ref = (StartInfoRef*)&signal->theData[0];
2447   if (getNodeStatus(ref->startingNodeId) != NodeRecord::STARTING) {
2448     jam();
2449     return;
2450   }//if
2451   ndbrequire(c_nodeStartMaster.startNode == ref->startingNodeId);
2452   c_nodeStartMaster.startInfoErrorCode = ref->errorCode;
2453   startInfoReply(signal, ref->sendingNodeId);
2454 }//Dbdih::execSTART_INFOREF()
2455 
execSTART_INFOCONF(Signal * signal)2456 void Dbdih::execSTART_INFOCONF(Signal* signal)
2457 {
2458   jamEntry();
2459   StartInfoConf * conf = (StartInfoConf*)&signal->theData[0];
2460   if (getNodeStatus(conf->startingNodeId) != NodeRecord::STARTING) {
2461     jam();
2462     return;
2463   }//if
2464   ndbrequire(c_nodeStartMaster.startNode == conf->startingNodeId);
2465   startInfoReply(signal, conf->sendingNodeId);
2466 }//Dbdih::execSTART_INFOCONF()
2467 
startInfoReply(Signal * signal,Uint32 nodeId)2468 void Dbdih::startInfoReply(Signal* signal, Uint32 nodeId)
2469 {
2470   receiveLoopMacro(START_INFOREQ, nodeId);
2471   /**
2472    * We're finished with the START_INFOREQ's
2473    */
2474   if (c_nodeStartMaster.startInfoErrorCode == 0) {
2475     jam();
2476     /**
2477      * Everything has been a success so far
2478      */
2479     StartPermConf * conf = (StartPermConf*)&signal->theData[0];
2480     conf->startingNodeId = c_nodeStartMaster.startNode;
2481     conf->systemFailureNo = cfailurenr;
2482     conf->microGCP = m_micro_gcp.m_enabled;
2483     sendSignal(calcDihBlockRef(c_nodeStartMaster.startNode),
2484                GSN_START_PERMCONF, signal, StartPermConf::SignalLength, JBB);
2485     c_nodeStartMaster.m_outstandingGsn = GSN_START_PERMCONF;
2486   } else {
2487     jam();
2488     StartPermRef * ref = (StartPermRef*)&signal->theData[0];
2489     ref->startingNodeId = c_nodeStartMaster.startNode;
2490     ref->errorCode = c_nodeStartMaster.startInfoErrorCode;
2491     sendSignal(calcDihBlockRef(c_nodeStartMaster.startNode),
2492 	       GSN_START_PERMREF, signal, StartPermRef::SignalLength, JBB);
2493     nodeResetStart(signal);
2494   }//if
2495 }//Dbdih::startInfoReply()
2496 
2497 /*---------------------------------------------------------------------------*/
2498 /*                    NODE RESTART CONTINUE REQUEST                          */
2499 /*---------------------------------------------------------------------------*/
2500 // THIS SIGNAL AND THE CODE BELOW IS EXECUTED BY THE MASTER WHEN IT HAS BEEN
2501 // REQUESTED TO START UP A NEW NODE. The master instructs the starting node
2502 // how to set up its log for continued execution.
2503 /*---------------------------------------------------------------------------*/
execSTART_MEREQ(Signal * signal)2504 void Dbdih::execSTART_MEREQ(Signal* signal)
2505 {
2506   StartMeReq * req = (StartMeReq*)&signal->theData[0];
2507   jamEntry();
2508   const BlockReference Tblockref = req->startingRef;
2509   const Uint32 Tnodeid = refToNode(Tblockref);
2510 
2511   ndbrequire(isMaster());
2512   ndbrequire(c_nodeStartMaster.startNode == Tnodeid);
2513   ndbrequire(getNodeStatus(Tnodeid) == NodeRecord::STARTING);
2514 
2515   if (getNodeInfo(Tnodeid).m_version >= NDBD_COPY_GCI_RESTART_NR)
2516   {
2517     jam();
2518     /**
2519      * COPY sysfile to starting node here directly
2520      *   to that it gets nodegroups early on
2521      */
2522 
2523     /**
2524      * Note: only one node can be starting now, so we can use
2525      *       c_nodeStartMaster.startNode for determening where to send
2526      */
2527     c_nodeStartMaster.m_outstandingGsn = GSN_COPY_GCIREQ;
2528     copyGciLab(signal, CopyGCIReq::RESTART_NR);
2529   }
2530   else
2531   {
2532     jam();
2533     startme_copygci_conf(signal);
2534   }
2535 }//Dbdih::nodeRestartStartRecConfLab()
2536 
2537 void
startme_copygci_conf(Signal * signal)2538 Dbdih::startme_copygci_conf(Signal* signal)
2539 {
2540   jam();
2541   Callback c = { safe_cast(&Dbdih::lcpBlockedLab),
2542                  c_nodeStartMaster.startNode };
2543   Mutex mutex(signal, c_mutexMgr, c_nodeStartMaster.m_fragmentInfoMutex);
2544   mutex.lock(c, true, true);
2545 }
2546 
lcpBlockedLab(Signal * signal,Uint32 nodeId,Uint32 retVal)2547 void Dbdih::lcpBlockedLab(Signal* signal, Uint32 nodeId, Uint32 retVal)
2548 {
2549   jamEntry();
2550   if (c_nodeStartMaster.startNode != nodeId)
2551   {
2552     jam();
2553     if (retVal == 0 || retVal == UtilLockRef::InLockQueue)
2554     {
2555       infoEvent("Releasing table/fragment info lock for node %u", nodeId);
2556 
2557       Mutex mutex(signal, c_mutexMgr, c_nodeStartMaster.m_fragmentInfoMutex);
2558       mutex.unlock();
2559       return;
2560     }
2561     return;
2562   }
2563 
2564   if (retVal == UtilLockRef::InLockQueue)
2565   {
2566     jam();
2567     infoEvent("Node %u enqueued is waiting to copy table/fragment info",
2568               c_nodeStartMaster.startNode);
2569     return;
2570   }
2571 
2572   ndbrequire(retVal == 0); // Mutex error
2573   ndbrequire(getNodeStatus(c_nodeStartMaster.startNode)==NodeRecord::STARTING);
2574   /*------------------------------------------------------------------------*/
2575   // NOW WE HAVE COPIED ALL INFORMATION IN DICT WE ARE NOW READY TO COPY ALL
2576   // INFORMATION IN DIH TO THE NEW NODE.
2577   /*------------------------------------------------------------------------*/
2578 
2579   c_nodeStartMaster.wait = 10;
2580   signal->theData[0] = DihContinueB::ZCOPY_NODE;
2581   signal->theData[1] = 0;
2582   sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
2583   c_nodeStartMaster.m_outstandingGsn = GSN_COPY_TABREQ;
2584 }//Dbdih::lcpBlockedLab()
2585 
nodeDictStartConfLab(Signal * signal)2586 void Dbdih::nodeDictStartConfLab(Signal* signal)
2587 {
2588   /*-----------------------------------------------------------------*/
2589   // Report that node restart has completed copy of dictionary.
2590   /*-----------------------------------------------------------------*/
2591   signal->theData[0] = NDB_LE_NR_CopyDict;
2592   sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 1, JBB);
2593 
2594   /*-------------------------------------------------------------------------*/
2595   // NOW WE HAVE COPIED BOTH DIH AND DICT INFORMATION. WE ARE NOW READY TO
2596   // INTEGRATE THE NODE INTO THE LCP AND GCP PROTOCOLS AND TO ALLOW UPDATES OF
2597   // THE DICTIONARY AGAIN.
2598   /*-------------------------------------------------------------------------*/
2599   c_nodeStartMaster.wait = ZFALSE;
2600   c_nodeStartMaster.blockGcp = 1;
2601 
2602   return;
2603 }//Dbdih::nodeDictStartConfLab()
2604 
dihCopyCompletedLab(Signal * signal)2605 void Dbdih::dihCopyCompletedLab(Signal* signal)
2606 {
2607   BlockReference ref = calcDictBlockRef(c_nodeStartMaster.startNode);
2608   DictStartReq * req = (DictStartReq*)&signal->theData[0];
2609   req->restartGci = (Uint32)(m_micro_gcp.m_new_gci >> 32);
2610   req->senderRef = reference();
2611   sendSignal(ref, GSN_DICTSTARTREQ,
2612              signal, DictStartReq::SignalLength, JBB);
2613   c_nodeStartMaster.m_outstandingGsn = GSN_DICTSTARTREQ;
2614   c_nodeStartMaster.wait = 0;
2615 }//Dbdih::dihCopyCompletedLab()
2616 
gcpBlockedLab(Signal * signal)2617 void Dbdih::gcpBlockedLab(Signal* signal)
2618 {
2619   /**
2620    * The node DIH will be part of LCP
2621    */
2622   NodeRecordPtr nodePtr;
2623   nodePtr.i = c_nodeStartMaster.startNode;
2624   ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
2625   nodePtr.p->m_inclDihLcp = true;
2626 
2627   /**
2628    * If node is new...this is the place to do things,
2629    *   gcp+lcp is blocked
2630    */
2631   if (getNodeActiveStatus(nodePtr.i) == Sysfile::NS_NotDefined)
2632   {
2633     jam();
2634     infoEvent("Adding node %d to sysfile, NS_Configured",
2635               nodePtr.i);
2636     setNodeActiveStatus(nodePtr.i, Sysfile::NS_Configured);
2637     Sysfile::setNodeGroup(nodePtr.i, SYSFILE->nodeGroups,
2638                           NO_NODE_GROUP_ID);
2639     Sysfile::setNodeStatus(nodePtr.i,
2640                            SYSFILE->nodeStatus, Sysfile::NS_Configured);
2641   }
2642 
2643   /*-------------------------------------------------------------------------*/
2644   // NOW IT IS TIME TO INFORM ALL OTHER NODES IN THE CLUSTER OF THE STARTED
2645   // NODE SUCH THAT THEY ALSO INCLUDE THE NODE IN THE NODE LISTS AND SO FORTH.
2646   /*------------------------------------------------------------------------*/
2647   sendLoopMacro(INCL_NODEREQ, sendINCL_NODEREQ, RNIL);
2648   /*-------------------------------------------------------------------------*/
2649   // We also need to send to the starting node to ensure he is aware of the
2650   // global checkpoint id and the correct state. We do not wait for any reply
2651   // since the starting node will not send any.
2652   /*-------------------------------------------------------------------------*/
2653   Uint32 startVersion = getNodeInfo(c_nodeStartMaster.startNode).m_version;
2654 
2655   if ((getMajor(startVersion) == 4 &&
2656        startVersion >= NDBD_INCL_NODECONF_VERSION_4) ||
2657       (getMajor(startVersion) == 5 &&
2658        startVersion >= NDBD_INCL_NODECONF_VERSION_5) ||
2659       (getMajor(startVersion) > 5))
2660   {
2661     c_INCL_NODEREQ_Counter.setWaitingFor(c_nodeStartMaster.startNode);
2662   }
2663 
2664   sendINCL_NODEREQ(signal, c_nodeStartMaster.startNode, RNIL);
2665 }//Dbdih::gcpBlockedLab()
2666 
2667 /*---------------------------------------------------------------------------*/
2668 // THIS SIGNAL IS EXECUTED IN BOTH SLAVES AND IN THE MASTER
2669 /*---------------------------------------------------------------------------*/
execINCL_NODECONF(Signal * signal)2670 void Dbdih::execINCL_NODECONF(Signal* signal)
2671 {
2672   jamEntry();
2673   Uint32 TstartNode = signal->theData[0];
2674   Uint32 TsendNodeId_or_blockref = signal->theData[1];
2675 
2676   Uint32 blocklist[7];
2677   blocklist[0] = clocallqhblockref;
2678   blocklist[1] = clocaltcblockref;
2679   blocklist[2] = cdictblockref;
2680   blocklist[3] = numberToRef(BACKUP, getOwnNodeId());
2681   blocklist[4] = numberToRef(SUMA, getOwnNodeId());
2682   blocklist[5] = numberToRef(DBSPJ, getOwnNodeId());
2683   blocklist[6] = 0;
2684 
2685   for (Uint32 i = 0; blocklist[i] != 0; i++)
2686   {
2687     if (TsendNodeId_or_blockref == blocklist[i])
2688     {
2689       jam();
2690 
2691       if (TstartNode != c_nodeStartSlave.nodeId)
2692       {
2693         jam();
2694         warningEvent("Recevied INCL_NODECONF for %u from %s"
2695                      " while %u is starting",
2696                      TstartNode,
2697                      getBlockName(refToBlock(TsendNodeId_or_blockref)),
2698                      c_nodeStartSlave.nodeId);
2699         return;
2700       }
2701 
2702       if (getNodeStatus(c_nodeStartSlave.nodeId) == NodeRecord::ALIVE &&
2703 	  blocklist[i+1] != 0)
2704       {
2705 	/**
2706 	 * Send to next in block list
2707 	 */
2708 	jam();
2709 	signal->theData[0] = reference();
2710 	signal->theData[1] = c_nodeStartSlave.nodeId;
2711 	sendSignal(blocklist[i+1], GSN_INCL_NODEREQ, signal, 2, JBB);
2712 	return;
2713       }
2714       else
2715       {
2716 	/**
2717 	 * All done, reply to master
2718 	 */
2719 	jam();
2720 	signal->theData[0] = c_nodeStartSlave.nodeId;
2721 	signal->theData[1] = cownNodeId;
2722 	sendSignal(cmasterdihref, GSN_INCL_NODECONF, signal, 2, JBB);
2723 
2724 	c_nodeStartSlave.nodeId = 0;
2725 	return;
2726       }
2727     }
2728   }
2729 
2730   if (c_nodeStartMaster.startNode != TstartNode)
2731   {
2732     jam();
2733     warningEvent("Recevied INCL_NODECONF for %u from %u"
2734                  " while %u is starting",
2735                  TstartNode,
2736                  TsendNodeId_or_blockref,
2737                  c_nodeStartMaster.startNode);
2738     return;
2739   }
2740 
2741   ndbrequire(reference() == cmasterdihref);
2742   receiveLoopMacro(INCL_NODEREQ, TsendNodeId_or_blockref);
2743 
2744   CRASH_INSERTION(7128);
2745   /*-------------------------------------------------------------------------*/
2746   // Now that we have included the starting node in the node lists in the
2747   // various blocks we are ready to start the global checkpoint protocol
2748   /*------------------------------------------------------------------------*/
2749   c_nodeStartMaster.wait = 11;
2750   c_nodeStartMaster.blockGcp = 0;
2751 
2752   /**
2753    * Restart GCP
2754    */
2755   signal->theData[0] = reference();
2756   sendSignal(reference(), GSN_UNBLO_DICTCONF, signal, 1, JBB);
2757 
2758   signal->theData[0] = DihContinueB::ZSTART_GCP;
2759   sendSignal(reference(), GSN_CONTINUEB, signal, 1, JBB);
2760   /**
2761    * To increase likelyhood that multiple nodes starting simulatanious
2762    *   gets to copy fragment-info before a new LCP is started
2763    *   we delay the releasing of this mutex. So that node that (might)
2764    *   be started when GSN_START_PERMREP arrives will get mutex
2765    *   before LCP (which does trylock for 60s)
2766    */
2767   signal->theData[0] = DihContinueB::ZDELAY_RELEASE_FRAGMENT_INFO_MUTEX;
2768   signal->theData[1] = c_nodeStartMaster.m_fragmentInfoMutex.getHandle();
2769   c_nodeStartMaster.m_fragmentInfoMutex.clear();
2770   sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 500, 2);
2771 }//Dbdih::execINCL_NODECONF()
2772 
execUNBLO_DICTCONF(Signal * signal)2773 void Dbdih::execUNBLO_DICTCONF(Signal* signal)
2774 {
2775   jamEntry();
2776   c_nodeStartMaster.wait = ZFALSE;
2777   if (!c_nodeStartMaster.activeState) {
2778     jam();
2779     return;
2780   }//if
2781 
2782   CRASH_INSERTION(7129);
2783   /**-----------------------------------------------------------------------
2784    * WE HAVE NOW PREPARED IT FOR INCLUSION IN THE LCP PROTOCOL.
2785    * WE CAN NOW START THE LCP PROTOCOL AGAIN.
2786    * WE HAVE ALSO MADE THIS FOR THE GCP PROTOCOL.
2787    * WE ARE READY TO START THE PROTOCOLS AND RESPOND TO THE START REQUEST
2788    * FROM THE STARTING NODE.
2789    *------------------------------------------------------------------------*/
2790 
2791   StartMeConf * const startMe = (StartMeConf *)&signal->theData[0];
2792 
2793   const Uint32 wordPerSignal = StartMeConf::DATA_SIZE;
2794   const int noOfSignals = ((Sysfile::SYSFILE_SIZE32 + (wordPerSignal - 1)) /
2795                            wordPerSignal);
2796 
2797   Uint32 nodeId = startMe->startingNodeId = c_nodeStartMaster.startNode;
2798   startMe->startWord = 0;
2799 
2800   const Uint32 ref = calcDihBlockRef(c_nodeStartMaster.startNode);
2801   for(int i = 0; i < noOfSignals; i++){
2802     jam();
2803     { // Do copy
2804       const int startWord = startMe->startWord;
2805       for(Uint32 j = 0; j < wordPerSignal; j++){
2806         startMe->data[j] = sysfileData[j+startWord];
2807       }
2808     }
2809     sendSignal(ref, GSN_START_MECONF, signal, StartMeConf::SignalLength, JBB);
2810     startMe->startWord += wordPerSignal;
2811   }//for
2812   c_nodeStartMaster.m_outstandingGsn = GSN_START_MECONF;
2813   nodeResetStart(signal);
2814 
2815   /**
2816    * Allow next node to start...
2817    */
2818   signal->theData[0] = nodeId;
2819   sendSignal(NDBCNTR_REF, GSN_START_PERMREP, signal, 1, JBB);
2820 }//Dbdih::execUNBLO_DICTCONF()
2821 
2822 /*---------------------------------------------------------------------------*/
2823 /*                    NODE RESTART COPY REQUEST                              */
2824 /*---------------------------------------------------------------------------*/
2825 // A NODE RESTART HAS REACHED ITS FINAL PHASE WHEN THE DATA IS TO BE COPIED
2826 // TO THE NODE. START_COPYREQ IS EXECUTED BY THE STARTING NODE.
2827 /*---------------------------------------------------------------------------*/
execSTART_COPYREQ(Signal * signal)2828 void Dbdih::execSTART_COPYREQ(Signal* signal)
2829 {
2830   jamEntry();
2831   StartCopyReq req = *(StartCopyReq*)signal->getDataPtr();
2832 
2833   Uint32 startNodeId = req.startingNodeId;
2834 
2835   /*-------------------------------------------------------------------------*/
2836   // REPORT Copy process of node restart is now about to start up.
2837   /*-------------------------------------------------------------------------*/
2838   signal->theData[0] = NDB_LE_NR_CopyFragsStarted;
2839   signal->theData[1] = req.startingNodeId;
2840   sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB);
2841 
2842   CRASH_INSERTION(7131);
2843 
2844   switch (getNodeActiveStatus(startNodeId)) {
2845   case Sysfile::NS_Active:
2846   case Sysfile::NS_ActiveMissed_1:
2847   case Sysfile::NS_ActiveMissed_2:
2848   case Sysfile::NS_NotActive_NotTakenOver:
2849   case Sysfile::NS_Configured:
2850     jam();
2851     /*-----------------------------------------------------------------------*/
2852     // AN ACTIVE NODE HAS BEEN STARTED. THE ACTIVE NODE MUST THEN GET ALL DATA
2853     // IT HAD BEFORE ITS CRASH. WE START THE TAKE OVER IMMEDIATELY.
2854     // SINCE WE ARE AN ACTIVE NODE WE WILL TAKE OVER OUR OWN NODE THAT
2855     // PREVIOUSLY CRASHED.
2856     /*-----------------------------------------------------------------------*/
2857     startTakeOver(signal, startNodeId, startNodeId, &req);
2858     break;
2859   case Sysfile::NS_TakeOver:{
2860     jam();
2861     /*--------------------------------------------------------------------
2862      * We were in the process of taking over but it was not completed.
2863      * We will complete it now instead.
2864      *--------------------------------------------------------------------*/
2865     Uint32 takeOverNode = Sysfile::getTakeOverNode(startNodeId,
2866 						   SYSFILE->takeOver);
2867     if(takeOverNode == 0){
2868       jam();
2869       warningEvent("Bug in take-over code restarting");
2870       takeOverNode = startNodeId;
2871     }
2872 
2873     startTakeOver(signal, startNodeId, takeOverNode, &req);
2874     break;
2875   }
2876   default:
2877     ndbrequire(false);
2878     break;
2879   }//switch
2880 }//Dbdih::execSTART_COPYREQ()
2881 
2882 /*---------------------------------------------------------------------------*/
2883 /*                    SLAVE LOGIC FOR NODE RESTART                           */
2884 /*---------------------------------------------------------------------------*/
execSTART_INFOREQ(Signal * signal)2885 void Dbdih::execSTART_INFOREQ(Signal* signal)
2886 {
2887   jamEntry();
2888   StartInfoReq *const req =(StartInfoReq*)&signal->theData[0];
2889   Uint32 startNode = req->startingNodeId;
2890   if (cfailurenr != req->systemFailureNo) {
2891     jam();
2892     //---------------------------------------------------------------
2893     // A failure occurred since master sent this request. We will ignore
2894     // this request since the node is already dead that is starting.
2895     //---------------------------------------------------------------
2896     return;
2897   }//if
2898   CRASH_INSERTION(7123);
2899   if (isMaster()) {
2900     jam();
2901     ndbrequire(getNodeStatus(startNode) == NodeRecord::STARTING);
2902   } else {
2903     jam();
2904     ndbrequire(getNodeStatus(startNode) == NodeRecord::DEAD);
2905   }//if
2906   if ((!getAllowNodeStart(startNode)) ||
2907       (c_nodeStartSlave.nodeId != 0) ||
2908       (ERROR_INSERTED(7124))) {
2909     jam();
2910     StartInfoRef *const ref =(StartInfoRef*)&signal->theData[0];
2911     ref->startingNodeId = startNode;
2912     ref->sendingNodeId = cownNodeId;
2913     ref->errorCode = StartPermRef::ZNODE_START_DISALLOWED_ERROR;
2914     sendSignal(cmasterdihref, GSN_START_INFOREF, signal,
2915 	       StartInfoRef::SignalLength, JBB);
2916     return;
2917   }//if
2918   setNodeStatus(startNode, NodeRecord::STARTING);
2919   if (req->typeStart == NodeState::ST_INITIAL_NODE_RESTART) {
2920     jam();
2921     setAllowNodeStart(startNode, false);
2922     invalidateNodeLCP(signal, startNode, 0);
2923   } else {
2924     jam();
2925     StartInfoConf * c = (StartInfoConf*)&signal->theData[0];
2926     c->sendingNodeId = cownNodeId;
2927     c->startingNodeId = startNode;
2928     sendSignal(cmasterdihref, GSN_START_INFOCONF, signal,
2929 	       StartInfoConf::SignalLength, JBB);
2930     return;
2931   }//if
2932 }//Dbdih::execSTART_INFOREQ()
2933 
execINCL_NODEREQ(Signal * signal)2934 void Dbdih::execINCL_NODEREQ(Signal* signal)
2935 {
2936   jamEntry();
2937   Uint32 retRef = signal->theData[0];
2938   Uint32 nodeId = signal->theData[1];
2939   if (nodeId == getOwnNodeId() && ERROR_INSERTED(7165))
2940   {
2941     CLEAR_ERROR_INSERT_VALUE;
2942     sendSignalWithDelay(reference(), GSN_INCL_NODEREQ, signal, 5000,
2943                         signal->getLength());
2944     return;
2945   }
2946 
2947   Uint32 tnodeStartFailNr = signal->theData[2];
2948   Uint32 gci_hi = signal->theData[4];
2949   Uint32 gci_lo = signal->theData[5];
2950   if (unlikely(signal->getLength() < 6))
2951   {
2952     jam();
2953     gci_lo = 0;
2954   }
2955 
2956   Uint64 gci = gci_lo | (Uint64(gci_hi) << 32);
2957   CRASH_INSERTION(7127);
2958   m_micro_gcp.m_current_gci = gci;
2959   m_micro_gcp.m_old_gci = gci - 1;
2960   if (!isMaster()) {
2961     jam();
2962     /*-----------------------------------------------------------------------*/
2963     // We don't want to change the state of the master since he can be in the
2964     // state LCP_TCGET at this time.
2965     /*-----------------------------------------------------------------------*/
2966     c_lcpState.setLcpStatus(LCP_STATUS_IDLE, __LINE__);
2967   }//if
2968 
2969   /*-------------------------------------------------------------------------*/
2970   // When a node is restarted we must ensure that a lcp will be run
2971   // as soon as possible and the reset the delay according to the original
2972   // configuration.
2973   // Without an initial local checkpoint the new node will not be available.
2974   /*-------------------------------------------------------------------------*/
2975   if (getOwnNodeId() == nodeId) {
2976     jam();
2977     /*-----------------------------------------------------------------------*/
2978     // We are the starting node. We came here only to set the global checkpoint
2979     // id's and the lcp status.
2980     /*-----------------------------------------------------------------------*/
2981     CRASH_INSERTION(7171);
2982     Uint32 masterVersion = getNodeInfo(refToNode(cmasterdihref)).m_version;
2983 
2984     if ((NDB_VERSION_MAJOR == 4 &&
2985 	 masterVersion >= NDBD_INCL_NODECONF_VERSION_4) ||
2986 	(NDB_VERSION_MAJOR == 5 &&
2987 	 masterVersion >= NDBD_INCL_NODECONF_VERSION_5) ||
2988 	(NDB_VERSION_MAJOR > 5))
2989     {
2990       signal->theData[0] = getOwnNodeId();
2991       signal->theData[1] = getOwnNodeId();
2992       sendSignal(cmasterdihref, GSN_INCL_NODECONF, signal, 2, JBB);
2993     }
2994     return;
2995   }//if
2996   if (getNodeStatus(nodeId) != NodeRecord::STARTING) {
2997     jam();
2998     return;
2999   }//if
3000   ndbrequire(cfailurenr == tnodeStartFailNr);
3001   ndbrequire (c_nodeStartSlave.nodeId == 0);
3002   c_nodeStartSlave.nodeId = nodeId;
3003 
3004   ndbrequire (retRef == cmasterdihref);
3005 
3006   NodeRecordPtr nodePtr;
3007   nodePtr.i = nodeId;
3008   ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
3009 
3010   Sysfile::ActiveStatus TsaveState = nodePtr.p->activeStatus;
3011   Uint32 TnodeGroup = nodePtr.p->nodeGroup;
3012 
3013   new (nodePtr.p) NodeRecord();
3014   nodePtr.p->nodeGroup = TnodeGroup;
3015   nodePtr.p->activeStatus = TsaveState;
3016   nodePtr.p->nodeStatus = NodeRecord::ALIVE;
3017   nodePtr.p->useInTransactions = true;
3018   nodePtr.p->m_inclDihLcp = true;
3019 
3020   removeDeadNode(nodePtr);
3021   insertAlive(nodePtr);
3022   con_lineNodes++;
3023 
3024   /*-------------------------------------------------------------------------*/
3025   //      WE WILL ALSO SEND THE INCLUDE NODE REQUEST TO THE LOCAL LQH BLOCK.
3026   /*-------------------------------------------------------------------------*/
3027   signal->theData[0] = reference();
3028   signal->theData[1] = nodeId;
3029   signal->theData[2] = Uint32(m_micro_gcp.m_current_gci >> 32);
3030   sendSignal(clocallqhblockref, GSN_INCL_NODEREQ, signal, 3, JBB);
3031 }//Dbdih::execINCL_NODEREQ()
3032 
3033 /* ------------------------------------------------------------------------- */
3034 // execINCL_NODECONF() is found in the master logic part since it is used by
3035 // both the master and the slaves.
3036 /* ------------------------------------------------------------------------- */
3037 
execSTART_TOREQ(Signal * signal)3038 void Dbdih::execSTART_TOREQ(Signal* signal)
3039 {
3040   jamEntry();
3041   StartToReq req = *(StartToReq *)&signal->theData[0];
3042 
3043 
3044   if (ndb_pnr(getNodeInfo(refToNode(req.senderRef)).m_version))
3045   {
3046     jam();
3047     TakeOverRecordPtr takeOverPtr;
3048 
3049     c_activeTakeOverList.seize(takeOverPtr);
3050     takeOverPtr.p->toStartingNode = req.startingNodeId;
3051     takeOverPtr.p->m_senderRef = req.senderRef;
3052     takeOverPtr.p->m_senderData = req.senderData;
3053     takeOverPtr.p->toMasterStatus = TakeOverRecord::TO_MASTER_IDLE;
3054     takeOverPtr.p->toStartTime = c_current_time;
3055   }
3056 
3057   StartToConf * conf = (StartToConf *)&signal->theData[0];
3058   conf->senderData = req.senderData;
3059   conf->sendingNodeId = cownNodeId;
3060   conf->startingNodeId = req.startingNodeId;
3061   sendSignal(req.senderRef, GSN_START_TOCONF,
3062              signal, StartToConf::SignalLength, JBB);
3063 }//Dbdih::execSTART_TOREQ()
3064 
execUPDATE_TOREQ(Signal * signal)3065 void Dbdih::execUPDATE_TOREQ(Signal* signal)
3066 {
3067   jamEntry();
3068   UpdateToReq req = *(UpdateToReq *)&signal->theData[0];
3069 
3070   Uint32 errCode;
3071   Uint32 extra;
3072   if (ndb_pnr(getNodeInfo(refToNode(req.senderRef)).m_version))
3073   {
3074     jam();
3075     /**
3076      *
3077      */
3078     TakeOverRecordPtr takeOverPtr;
3079     if (findTakeOver(takeOverPtr, req.startingNodeId) == false)
3080     {
3081       errCode = UpdateToRef::UnknownTakeOver;
3082       extra = RNIL;
3083       goto ref;
3084     }
3085 
3086     CRASH_INSERTION(7141);
3087 
3088     takeOverPtr.p->toCopyNode = req.copyNodeId;
3089     takeOverPtr.p->toCurrentTabref = req.tableId;
3090     takeOverPtr.p->toCurrentFragid = req.fragmentNo;
3091 
3092     NodeRecordPtr nodePtr;
3093     NodeGroupRecordPtr NGPtr;
3094     nodePtr.i = req.copyNodeId;
3095     ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
3096     NGPtr.i = nodePtr.p->nodeGroup;
3097     ptrCheckGuard(NGPtr, MAX_NDB_NODES, nodeGroupRecord);
3098 
3099     Mutex mutex(signal, c_mutexMgr, takeOverPtr.p->m_fragmentInfoMutex);
3100     Callback c = { safe_cast(&Dbdih::updateToReq_fragmentMutex_locked),
3101                    takeOverPtr.i };
3102 
3103     switch(req.requestType){
3104     case UpdateToReq::BEFORE_STORED:
3105       jam();
3106 
3107       if (NGPtr.p->activeTakeOver == 0)
3108       {
3109         jam();
3110         NGPtr.p->activeTakeOver = req.startingNodeId;
3111       }
3112       else
3113       {
3114         jam();
3115         errCode = UpdateToRef::CopyFragInProgress;
3116         extra = NGPtr.p->activeTakeOver;
3117         goto ref;
3118       }
3119 
3120       takeOverPtr.p->toMasterStatus = TakeOverRecord::TO_MUTEX_BEFORE_STORED;
3121       mutex.lock(c, false, true);
3122       return;
3123     case UpdateToReq::AFTER_STORED:
3124     {
3125       jam();
3126       mutex.unlock();
3127       takeOverPtr.p->toMasterStatus = TakeOverRecord::TO_AFTER_STORED;
3128       // Send conf
3129       break;
3130     }
3131     case UpdateToReq::BEFORE_COMMIT_STORED:
3132       jam();
3133       takeOverPtr.p->toMasterStatus = TakeOverRecord::TO_MUTEX_BEFORE_COMMIT;
3134       mutex.lock(c, false, true);
3135       return;
3136     case UpdateToReq::AFTER_COMMIT_STORED:
3137     {
3138       jam();
3139       mutex.unlock();
3140 
3141       Mutex mutex2(signal, c_mutexMgr,
3142                    takeOverPtr.p->m_switchPrimaryMutexHandle);
3143       mutex2.unlock();
3144       takeOverPtr.p->toMasterStatus = TakeOverRecord::TO_MASTER_IDLE;
3145       break; // send conf
3146     }
3147     }
3148   }
3149   else
3150   {
3151     CRASH_INSERTION(7154);
3152     RETURN_IF_NODE_NOT_ALIVE(req.startingNodeId);
3153   }
3154 
3155   {
3156     UpdateToConf * conf = (UpdateToConf *)&signal->theData[0];
3157     conf->senderData = req.senderData;
3158     conf->sendingNodeId = cownNodeId;
3159     conf->startingNodeId = req.startingNodeId;
3160     sendSignal(req.senderRef, GSN_UPDATE_TOCONF, signal,
3161                UpdateToConf::SignalLength, JBB);
3162   }
3163   return;
3164 
3165 ref:
3166   UpdateToRef* ref = (UpdateToRef*)signal->getDataPtrSend();
3167   ref->senderData = req.senderData;
3168   ref->senderRef = reference();
3169   ref->errorCode = errCode;
3170   ref->extra = extra;
3171   sendSignal(req.senderRef, GSN_UPDATE_TOREF, signal,
3172              UpdateToRef::SignalLength, JBB);
3173 }
3174 
3175 void
updateToReq_fragmentMutex_locked(Signal * signal,Uint32 toPtrI,Uint32 retVal)3176 Dbdih::updateToReq_fragmentMutex_locked(Signal * signal,
3177                                         Uint32 toPtrI, Uint32 retVal)
3178 {
3179   jamEntry();
3180   TakeOverRecordPtr takeOverPtr;
3181   c_takeOverPool.getPtr(takeOverPtr, toPtrI);
3182 
3183   Uint32 nodeId = takeOverPtr.p->toStartingNode;
3184 
3185   if (retVal == UtilLockRef::InLockQueue)
3186   {
3187     jam();
3188     infoEvent("Node %u waiting to continue copying table %u fragment: %u (%s)",
3189               nodeId,
3190               takeOverPtr.p->toCurrentTabref,
3191               takeOverPtr.p->toCurrentFragid,
3192               takeOverPtr.p->toMasterStatus == TakeOverRecord::TO_MUTEX_BEFORE_STORED ? "STORED" : "COMMIT");
3193     return;
3194   }
3195 
3196   Uint32 errCode;
3197   Uint32 extra;
3198 
3199   NodeRecordPtr nodePtr;
3200   nodePtr.i = nodeId;
3201   ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
3202   if (unlikely(nodePtr.p->nodeStatus != NodeRecord::ALIVE))
3203   {
3204     jam();
3205     /**
3206      * Node died while we waited for lock...
3207      */
3208     abortTakeOver(signal, takeOverPtr);
3209     return;
3210   }
3211 
3212   switch(takeOverPtr.p->toMasterStatus){
3213   case TakeOverRecord::TO_MUTEX_BEFORE_STORED:
3214   {
3215     jam();
3216     // send conf
3217     takeOverPtr.p->toMasterStatus = TakeOverRecord::TO_MUTEX_BEFORE_LOCKED;
3218     break;
3219   }
3220   case TakeOverRecord::TO_MUTEX_BEFORE_COMMIT:
3221   {
3222     jam();
3223 
3224     NodeRecordPtr nodePtr;
3225     NodeGroupRecordPtr NGPtr;
3226     nodePtr.i = takeOverPtr.p->toCopyNode;
3227     ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
3228     NGPtr.i = nodePtr.p->nodeGroup;
3229     ptrCheckGuard(NGPtr, MAX_NDB_NODES, nodeGroupRecord);
3230 
3231     if (NGPtr.p->activeTakeOver != nodeId)
3232     {
3233       ndbassert(false);
3234       errCode = UpdateToRef::InvalidRequest;
3235       extra = NGPtr.p->activeTakeOver;
3236       goto ref;
3237     }
3238     NGPtr.p->activeTakeOver = 0;
3239     takeOverPtr.p->toCopyNode = RNIL;
3240     Mutex mutex(signal, c_mutexMgr,
3241                 takeOverPtr.p->m_switchPrimaryMutexHandle);
3242     Callback c = { safe_cast(&Dbdih::switchPrimaryMutex_locked),
3243                    takeOverPtr.i };
3244     ndbrequire(mutex.lock(c));
3245     takeOverPtr.p->toMasterStatus = TakeOverRecord::TO_MUTEX_BEFORE_SWITCH_REPLICA;
3246     return;
3247     break;
3248   }
3249   default:
3250     jamLine(takeOverPtr.p->toMasterStatus);
3251     ndbrequire(false);
3252   }
3253 
3254   {
3255     UpdateToConf * conf = (UpdateToConf *)&signal->theData[0];
3256     conf->senderData = takeOverPtr.p->m_senderData;
3257     conf->sendingNodeId = cownNodeId;
3258     conf->startingNodeId = takeOverPtr.p->toStartingNode;
3259     sendSignal(takeOverPtr.p->m_senderRef, GSN_UPDATE_TOCONF, signal,
3260                UpdateToConf::SignalLength, JBB);
3261   }
3262   return;
3263 
3264 ref:
3265   {
3266     Mutex mutex(signal, c_mutexMgr, takeOverPtr.p->m_fragmentInfoMutex);
3267     mutex.unlock();
3268 
3269     UpdateToRef* ref = (UpdateToRef*)signal->getDataPtrSend();
3270     ref->senderData = takeOverPtr.p->m_senderData;
3271     ref->senderRef = reference();
3272     ref->errorCode = errCode;
3273     ref->extra = extra;
3274     sendSignal(takeOverPtr.p->m_senderRef, GSN_UPDATE_TOREF, signal,
3275                UpdateToRef::SignalLength, JBB);
3276     return;
3277   }
3278 }
3279 
3280 void
switchPrimaryMutex_locked(Signal * signal,Uint32 toPtrI,Uint32 retVal)3281 Dbdih::switchPrimaryMutex_locked(Signal* signal, Uint32 toPtrI, Uint32 retVal)
3282 {
3283   jamEntry();
3284   ndbrequire(retVal == 0);
3285 
3286   TakeOverRecordPtr takeOverPtr;
3287   c_takeOverPool.getPtr(takeOverPtr, toPtrI);
3288 
3289   Uint32 nodeId = takeOverPtr.p->toStartingNode;
3290   NodeRecordPtr nodePtr;
3291   nodePtr.i = nodeId;
3292   ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
3293 
3294   if (unlikely(nodePtr.p->nodeStatus != NodeRecord::ALIVE))
3295   {
3296     jam();
3297     /**
3298      * Node died while we waited for lock...
3299      */
3300     abortTakeOver(signal, takeOverPtr);
3301     return;
3302   }
3303 
3304   takeOverPtr.p->toMasterStatus = TakeOverRecord::TO_MUTEX_AFTER_SWITCH_REPLICA;
3305 
3306   UpdateToConf * conf = (UpdateToConf *)&signal->theData[0];
3307   conf->senderData = takeOverPtr.p->m_senderData;
3308   conf->sendingNodeId = cownNodeId;
3309   conf->startingNodeId = takeOverPtr.p->toStartingNode;
3310   sendSignal(takeOverPtr.p->m_senderRef, GSN_UPDATE_TOCONF, signal,
3311              UpdateToConf::SignalLength, JBB);
3312 }
3313 
3314 void
switchPrimaryMutex_unlocked(Signal * signal,Uint32 toPtrI,Uint32 retVal)3315 Dbdih::switchPrimaryMutex_unlocked(Signal* signal, Uint32 toPtrI, Uint32 retVal)
3316 {
3317   jamEntry();
3318   ndbrequire(retVal == 0);
3319 
3320   TakeOverRecordPtr takeOverPtr;
3321   c_takeOverPool.getPtr(takeOverPtr, toPtrI);
3322 
3323   UpdateToConf * conf = (UpdateToConf *)&signal->theData[0];
3324   conf->senderData = takeOverPtr.p->m_senderData;
3325   conf->sendingNodeId = cownNodeId;
3326   conf->startingNodeId = takeOverPtr.p->toStartingNode;
3327   sendSignal(takeOverPtr.p->m_senderRef, GSN_UPDATE_TOCONF, signal,
3328              UpdateToConf::SignalLength, JBB);
3329 }
3330 
3331 void
abortTakeOver(Signal * signal,TakeOverRecordPtr takeOverPtr)3332 Dbdih::abortTakeOver(Signal* signal, TakeOverRecordPtr takeOverPtr)
3333 {
3334   if (!takeOverPtr.p->m_switchPrimaryMutexHandle.isNull())
3335   {
3336     jam();
3337     Mutex mutex(signal, c_mutexMgr,
3338                 takeOverPtr.p->m_switchPrimaryMutexHandle);
3339     mutex.unlock();
3340 
3341   }
3342 
3343   if (!takeOverPtr.p->m_fragmentInfoMutex.isNull())
3344   {
3345     jam();
3346     Mutex mutex(signal, c_mutexMgr,
3347                 takeOverPtr.p->m_fragmentInfoMutex);
3348     mutex.unlock();
3349   }
3350 
3351   NodeRecordPtr nodePtr;
3352   nodePtr.i = takeOverPtr.p->toCopyNode;
3353   if (nodePtr.i != RNIL)
3354   {
3355     ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
3356     NodeGroupRecordPtr NGPtr;
3357     NGPtr.i = nodePtr.p->nodeGroup;
3358     ptrCheckGuard(NGPtr, MAX_NDB_NODES, nodeGroupRecord);
3359     if (NGPtr.p->activeTakeOver == takeOverPtr.p->toStartingNode)
3360     {
3361       jam();
3362       NGPtr.p->activeTakeOver = 0;
3363     }
3364   }
3365 
3366   releaseTakeOver(takeOverPtr);
3367 }
3368 
3369 static
3370 void
add_lcp_counter(Uint32 * counter,Uint32 add)3371 add_lcp_counter(Uint32 * counter, Uint32 add)
3372 {
3373   Uint64 tmp = * counter;
3374   tmp += add;
3375   if (tmp > 0xFFFFFFFF)
3376     tmp = 0xFFFFFFFF;
3377   * counter = Uint32(tmp);
3378 }
3379 
3380 void
check_force_lcp(Ptr<TakeOverRecord> takeOverPtr)3381 Dbdih::check_force_lcp(Ptr<TakeOverRecord> takeOverPtr)
3382 {
3383   Uint64 duration = c_current_time - takeOverPtr.p->toStartTime;
3384   Uint64 lcp_time = c_lcpState.m_lcp_time;
3385 
3386   Ptr<TakeOverRecord> tmp;
3387   for (c_activeTakeOverList.first(tmp); !tmp.isNull();
3388        c_activeTakeOverList.next(tmp))
3389   {
3390     jam();
3391     if (tmp.p->toMasterStatus != TakeOverRecord::TO_WAIT_LCP)
3392     {
3393       jam();
3394 
3395       Uint64 elapsed = c_current_time - tmp.p->toStartTime;
3396       if (elapsed >= duration)
3397       {
3398         jam();
3399         /**
3400          * This has spent more...than our took...
3401          *   expect it to finish soon...
3402          *   i.e dont force LCP
3403          */
3404         infoEvent("Node %u not forcing LCP start(1 %llu >= %llu), wait on %u",
3405                   takeOverPtr.p->toStartingNode,
3406                   elapsed, duration,
3407                   tmp.p->toStartingNode);
3408         return;
3409       }
3410 
3411       Uint64 left = duration - elapsed;
3412       if (left < lcp_time)
3413       {
3414         jam();
3415         /**
3416          * This has less than one lcp left...
3417          *   dont force LCP
3418          */
3419         infoEvent("Node %u not forcing LCP start(2 %llu < %llu), wait on %u",
3420                   takeOverPtr.p->toStartingNode,
3421                   left, lcp_time,
3422                   tmp.p->toStartingNode);
3423         return;
3424       }
3425     }
3426   }
3427   add_lcp_counter(&c_lcpState.ctimer, (1 << 31));
3428 }
3429 
execEND_TOREQ(Signal * signal)3430 void Dbdih::execEND_TOREQ(Signal* signal)
3431 {
3432   jamEntry();
3433   EndToReq req = *(EndToReq *)&signal->theData[0];
3434 
3435   Uint32 nodeId = refToNode(req.senderRef);
3436   TakeOverRecordPtr takeOverPtr;
3437 
3438   if (ndb_pnr(getNodeInfo(nodeId).m_version))
3439   {
3440     jam();
3441     /**
3442      *
3443      */
3444     ndbrequire(findTakeOver(takeOverPtr, nodeId));
3445     NodeRecordPtr nodePtr;
3446     nodePtr.i = nodeId;
3447     ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
3448 
3449     if (req.flags & StartCopyReq::WAIT_LCP)
3450     {
3451       jam();
3452 
3453       /**
3454        * Wait for LCP
3455        */
3456       nodePtr.p->copyCompleted = 2;
3457       takeOverPtr.p->toMasterStatus = TakeOverRecord::TO_WAIT_LCP;
3458 
3459       /**
3460        * Make sure that node also participatened in 1 GCP
3461        *   before running it's first LCP, so that GCI variables
3462        *   in LQH are set properly
3463        */
3464       c_lcpState.lcpStopGcp = c_newest_restorable_gci;
3465 
3466       check_force_lcp(takeOverPtr);
3467       return;
3468     }
3469     nodePtr.p->copyCompleted = 1;
3470     releaseTakeOver(takeOverPtr);
3471   }
3472 
3473   EndToConf * conf = (EndToConf *)&signal->theData[0];
3474   conf->senderData = req.senderData;
3475   conf->sendingNodeId = cownNodeId;
3476   conf->startingNodeId = req.startingNodeId;
3477   sendSignal(req.senderRef, GSN_END_TOCONF, signal,
3478              EndToConf::SignalLength, JBB);
3479 }//Dbdih::execEND_TOREQ()
3480 
3481 #define DIH_TAB_WRITE_LOCK(tabPtrP) \
3482   do { assertOwnThread(); tabPtrP->m_lock.write_lock(); } while (0)
3483 
3484 #define DIH_TAB_WRITE_UNLOCK(tabPtrP) \
3485   do { assertOwnThread(); tabPtrP->m_lock.write_unlock(); } while (0)
3486 
3487 /* --------------------------------------------------------------------------*/
3488 /*       AN ORDER TO START OR COMMIT THE REPLICA CREATION ARRIVED FROM THE   */
3489 /*       MASTER.                                                             */
3490 /* --------------------------------------------------------------------------*/
execCREATE_FRAGREQ(Signal * signal)3491 void Dbdih::execCREATE_FRAGREQ(Signal* signal)
3492 {
3493   jamEntry();
3494   CreateFragReq * const req = (CreateFragReq *)&signal->theData[0];
3495 
3496   Uint32 senderData = req->senderData;
3497   Uint32 senderRef = req->senderRef;
3498 
3499   TabRecordPtr tabPtr;
3500   tabPtr.i = req->tableId;
3501   ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
3502 
3503   Uint32 fragId = req->fragId;
3504   Uint32 tdestNodeid = req->startingNodeId;
3505   //Uint32 tsourceNodeid = req->copyNodeId;
3506   Uint32 startGci = req->startGci;
3507   Uint32 replicaType = req->replicaType;
3508   Uint32 tFailedNodeId = req->failedNodeId;
3509 
3510   if (!ndb_pnr(getNodeInfo(refToNode(senderRef)).m_version))
3511   {
3512     jam();
3513     tFailedNodeId = tdestNodeid;
3514   }
3515 
3516   FragmentstorePtr fragPtr;
3517   getFragstore(tabPtr.p, fragId, fragPtr);
3518   RETURN_IF_NODE_NOT_ALIVE(tdestNodeid);
3519   ReplicaRecordPtr frReplicaPtr;
3520   findReplica(frReplicaPtr, fragPtr.p, tFailedNodeId,
3521               replicaType == CreateFragReq::START_LOGGING ? false : true);
3522   if (frReplicaPtr.i == RNIL)
3523   {
3524     dump_replica_info(fragPtr.p);
3525   }
3526   ndbrequire(frReplicaPtr.i != RNIL);
3527 
3528   DIH_TAB_WRITE_LOCK(tabPtr.p);
3529   switch (replicaType) {
3530   case CreateFragReq::STORED:
3531     jam();
3532     CRASH_INSERTION(7138);
3533     /* ----------------------------------------------------------------------*/
3534     /*  HERE WE ARE INSERTING THE NEW BACKUP NODE IN THE EXECUTION OF ALL    */
3535     /*  OPERATIONS. FROM HERE ON ALL OPERATIONS ON THIS FRAGMENT WILL INCLUDE*/
3536     /*  USE OF THE NEW REPLICA.                                              */
3537     /* --------------------------------------------------------------------- */
3538     insertBackup(fragPtr, tdestNodeid);
3539 
3540     fragPtr.p->distributionKey++;
3541     fragPtr.p->distributionKey &= 255;
3542     break;
3543   case CreateFragReq::COMMIT_STORED:
3544     jam();
3545     CRASH_INSERTION(7139);
3546     /* ----------------------------------------------------------------------*/
3547     /*  HERE WE ARE MOVING THE REPLICA TO THE STORED SECTION SINCE IT IS NOW */
3548     /*  FULLY LOADED WITH ALL DATA NEEDED.                                   */
3549     // We also update the order of the replicas here so that if the new
3550     // replica is the desired primary we insert it as primary.
3551     /* ----------------------------------------------------------------------*/
3552     removeOldStoredReplica(fragPtr, frReplicaPtr);
3553     linkStoredReplica(fragPtr, frReplicaPtr);
3554     updateNodeInfo(fragPtr);
3555     break;
3556   case CreateFragReq::START_LOGGING:
3557     jam();
3558     break;
3559   default:
3560     ndbrequire(false);
3561     break;
3562   }//switch
3563   DIH_TAB_WRITE_UNLOCK(tabPtr.p);
3564 
3565   /* ------------------------------------------------------------------------*/
3566   /*       THE NEW NODE OF THIS REPLICA IS THE STARTING NODE.                */
3567   /* ------------------------------------------------------------------------*/
3568   if (tFailedNodeId != tdestNodeid)
3569   {
3570     jam();
3571     /**
3572      * This is a Hot-spare or move partition
3573      */
3574 
3575     /*  IF WE ARE STARTING A TAKE OVER NODE WE MUST INVALIDATE ALL LCP'S.   */
3576     /*  OTHERWISE WE WILL TRY TO START LCP'S THAT DO NOT EXIST.             */
3577     /* ---------------------------------------------------------------------*/
3578     frReplicaPtr.p->procNode = tdestNodeid;
3579     frReplicaPtr.p->noCrashedReplicas = 0;
3580     frReplicaPtr.p->createGci[0] = startGci;
3581     frReplicaPtr.p->replicaLastGci[0] = (Uint32)-1;
3582     for (Uint32 i = 0; i < MAX_LCP_STORED; i++)
3583     {
3584       frReplicaPtr.p->lcpStatus[i] = ZINVALID;
3585     }
3586   }
3587   else
3588   {
3589     jam();
3590     const Uint32 noCrashed = frReplicaPtr.p->noCrashedReplicas;
3591     arrGuard(noCrashed, 8);
3592     frReplicaPtr.p->createGci[noCrashed] = startGci;
3593     frReplicaPtr.p->replicaLastGci[noCrashed] = (Uint32)-1;
3594   }
3595 
3596   CreateFragConf * const conf = (CreateFragConf *)&signal->theData[0];
3597   conf->senderData = senderData;
3598   conf->tableId = tabPtr.i;
3599   conf->fragId = fragId;
3600   conf->sendingNodeId = cownNodeId;
3601   conf->startingNodeId = tdestNodeid;
3602   conf->failedNodeId = tFailedNodeId;
3603   sendSignal(senderRef, GSN_CREATE_FRAGCONF, signal,
3604              CreateFragConf::SignalLength, JBB);
3605 }//Dbdih::execCREATE_FRAGREQ()
3606 
3607 /*****************************************************************************/
3608 /***********     NODE ADDING  MODULE                             *************/
3609 /***********     CODE TO HANDLE TAKE OVER                        *************/
3610 /*****************************************************************************/
3611 // A take over can be initiated by a number of things:
3612 // 1) A node restart, usually the node takes over itself but can also take
3613 //    over somebody else if its own data was already taken over
3614 // 2) At system restart it is necessary to use the take over code to recover
3615 //    nodes which had too old checkpoints to be restorable by the usual
3616 //    restoration from disk.
3617 // 3) When a node has missed too many local checkpoints and is decided by the
3618 //    master to be taken over by a hot spare node that sits around waiting
3619 //    for this to happen.
3620 //
3621 // To support multiple node failures efficiently the code is written such that
3622 // only one take over can handle transitions in state but during a copy
3623 // fragment other take over's can perform state transitions.
3624 /*****************************************************************************/
startTakeOver(Signal * signal,Uint32 startNode,Uint32 nodeTakenOver,const StartCopyReq * req)3625 void Dbdih::startTakeOver(Signal* signal,
3626                           Uint32 startNode,
3627                           Uint32 nodeTakenOver,
3628                           const StartCopyReq* req)
3629 {
3630   jam();
3631 
3632   TakeOverRecordPtr takeOverPtr;
3633   ndbrequire(c_activeTakeOverList.seize(takeOverPtr));
3634   takeOverPtr.p->startGci = SYSFILE->lastCompletedGCI[startNode];
3635   takeOverPtr.p->restorableGci = SYSFILE->lastCompletedGCI[startNode];
3636   takeOverPtr.p->toStartingNode = startNode;
3637   takeOverPtr.p->toFailedNode = nodeTakenOver;
3638   takeOverPtr.p->toCurrentTabref = 0;
3639   takeOverPtr.p->toCurrentFragid = 0;
3640 
3641   if (req)
3642   {
3643     jam();
3644     takeOverPtr.p->m_flags = req->flags;
3645     takeOverPtr.p->m_senderData = req->senderData;
3646     takeOverPtr.p->m_senderRef = req->senderRef;
3647   }
3648 
3649   takeOverPtr.p->toSlaveStatus = TakeOverRecord::TO_START_FRAGMENTS;
3650   nr_start_fragments(signal, takeOverPtr);
3651 }//Dbdih::startTakeOver()
3652 
3653 void
nr_start_fragments(Signal * signal,TakeOverRecordPtr takeOverPtr)3654 Dbdih::nr_start_fragments(Signal* signal,
3655 			  TakeOverRecordPtr takeOverPtr)
3656 {
3657   Uint32 loopCount = 0 ;
3658   TabRecordPtr tabPtr;
3659   while (loopCount++ < 100) {
3660     tabPtr.i = takeOverPtr.p->toCurrentTabref;
3661     if (tabPtr.i >= ctabFileSize) {
3662       jam();
3663       nr_run_redo(signal, takeOverPtr);
3664       return;
3665     }//if
3666     ptrAss(tabPtr, tabRecord);
3667     if (tabPtr.p->tabStatus != TabRecord::TS_ACTIVE ||
3668 	tabPtr.p->tabStorage != TabRecord::ST_NORMAL)
3669     {
3670       jam();
3671       takeOverPtr.p->toCurrentFragid = 0;
3672       takeOverPtr.p->toCurrentTabref++;
3673       continue;
3674     }//if
3675     Uint32 fragId = takeOverPtr.p->toCurrentFragid;
3676     if (fragId >= tabPtr.p->totalfragments) {
3677       jam();
3678       takeOverPtr.p->toCurrentFragid = 0;
3679       takeOverPtr.p->toCurrentTabref++;
3680       continue;
3681     }//if
3682     FragmentstorePtr fragPtr;
3683     getFragstore(tabPtr.p, fragId, fragPtr);
3684     ReplicaRecordPtr loopReplicaPtr;
3685     loopReplicaPtr.i = fragPtr.p->oldStoredReplicas;
3686     while (loopReplicaPtr.i != RNIL) {
3687       ptrCheckGuard(loopReplicaPtr, creplicaFileSize, replicaRecord);
3688       if (loopReplicaPtr.p->procNode == takeOverPtr.p->toStartingNode) {
3689         jam();
3690 	nr_start_fragment(signal, takeOverPtr, loopReplicaPtr);
3691 	break;
3692       } else {
3693         jam();
3694         loopReplicaPtr.i = loopReplicaPtr.p->nextReplica;
3695       }//if
3696     }//while
3697     takeOverPtr.p->toCurrentFragid++;
3698   }//while
3699   signal->theData[0] = DihContinueB::ZTO_START_FRAGMENTS;
3700   signal->theData[1] = takeOverPtr.i;
3701   sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
3702 }
3703 
3704 void
nr_start_fragment(Signal * signal,TakeOverRecordPtr takeOverPtr,ReplicaRecordPtr replicaPtr)3705 Dbdih::nr_start_fragment(Signal* signal,
3706 			 TakeOverRecordPtr takeOverPtr,
3707 			 ReplicaRecordPtr replicaPtr)
3708 {
3709   Uint32 i;
3710   Uint32 maxLcpId = 0;
3711   Uint32 maxLcpIndex = ~0;
3712 
3713   Uint32 gci = 0;
3714   Uint32 restorableGCI = takeOverPtr.p->restorableGci;
3715 
3716 #if defined VM_TRACE || defined ERROR_INSERT
3717   ndbout_c("tab: %d frag: %d replicaP->nextLcp: %d",
3718 	   takeOverPtr.p->toCurrentTabref,
3719 	   takeOverPtr.p->toCurrentFragid,
3720 	   replicaPtr.p->nextLcp);
3721 #endif
3722 
3723   Int32 j = replicaPtr.p->noCrashedReplicas - 1;
3724   Uint32 idx = prevLcpNo(replicaPtr.p->nextLcp);
3725   for(i = 0; i<MAX_LCP_USED; i++, idx = prevLcpNo(idx))
3726   {
3727 #if defined VM_TRACE || defined ERROR_INSERT
3728     printf("scanning idx: %d lcpId: %d crashed replicas: %u %s",
3729            idx, replicaPtr.p->lcpId[idx],
3730            replicaPtr.p->noCrashedReplicas,
3731            replicaPtr.p->lcpStatus[idx] == ZVALID ? "VALID" : "NOT VALID");
3732 #endif
3733     if (replicaPtr.p->lcpStatus[idx] == ZVALID)
3734     {
3735       Uint32 startGci = replicaPtr.p->maxGciCompleted[idx] + 1;
3736       Uint32 stopGci = replicaPtr.p->maxGciStarted[idx];
3737 #if defined VM_TRACE || defined ERROR_INSERT
3738       ndbout_c(" maxGciCompleted: %u maxGciStarted: %u", startGci - 1, stopGci);
3739 #endif
3740       for (; j>= 0; j--)
3741       {
3742 #if defined VM_TRACE || defined ERROR_INSERT
3743 	ndbout_c("crashed replica: %d(%d) replica(createGci: %u lastGci: %d )",
3744 		 j,
3745 		 replicaPtr.p->noCrashedReplicas,
3746                  replicaPtr.p->createGci[j],
3747 		 replicaPtr.p->replicaLastGci[j]);
3748 #endif
3749 	if (replicaPtr.p->createGci[j] <= startGci &&
3750             replicaPtr.p->replicaLastGci[j] >= stopGci)
3751 	{
3752 	  maxLcpId = replicaPtr.p->lcpId[idx];
3753 	  maxLcpIndex = idx;
3754           gci = replicaPtr.p->replicaLastGci[j];
3755 	  goto done;
3756 	}
3757       }
3758     }
3759     else
3760     {
3761 #if defined VM_TRACE || defined ERROR_INSERT
3762       printf("\n");
3763 #endif
3764     }
3765   }
3766 
3767   idx = 2; // backward compat code
3768 #if defined VM_TRACE || defined ERROR_INSERT
3769   ndbout_c("- scanning idx: %d lcpId: %d", idx, replicaPtr.p->lcpId[idx]);
3770 #endif
3771   if (replicaPtr.p->lcpStatus[idx] == ZVALID)
3772   {
3773     Uint32 startGci = replicaPtr.p->maxGciCompleted[idx] + 1;
3774     Uint32 stopGci = replicaPtr.p->maxGciStarted[idx];
3775     for (;j >= 0; j--)
3776     {
3777 #if defined VM_TRACE || defined ERROR_INSERT
3778       ndbout_c("crashed replica: %d(%d) replica(createGci: %u lastGci: %d )",
3779                j,
3780                replicaPtr.p->noCrashedReplicas,
3781                replicaPtr.p->createGci[j],
3782                replicaPtr.p->replicaLastGci[j]);
3783 #endif
3784       if (replicaPtr.p->createGci[j] <= startGci &&
3785           replicaPtr.p->replicaLastGci[j] >= stopGci)
3786       {
3787         maxLcpId = replicaPtr.p->lcpId[idx];
3788         maxLcpIndex = idx;
3789         gci = replicaPtr.p->replicaLastGci[j];
3790         goto done;
3791       }
3792     }
3793   }
3794 
3795 done:
3796 
3797   StartFragReq *req = (StartFragReq *)signal->getDataPtrSend();
3798   req->requestInfo = StartFragReq::SFR_RESTORE_LCP;
3799   if (maxLcpIndex == ~ (Uint32) 0)
3800   {
3801     /**
3802      * we didn't find a local LCP that we can restore
3803      */
3804     jam();
3805     ndbassert(gci == 0);
3806     replicaPtr.p->m_restorable_gci = gci;
3807 
3808     req->userPtr = 0;
3809     req->userRef = reference();
3810     req->lcpNo = ZNIL;
3811     req->lcpId = 0;
3812     req->tableId = takeOverPtr.p->toCurrentTabref;
3813     req->fragId = takeOverPtr.p->toCurrentFragid;
3814     req->noOfLogNodes = 0;
3815 
3816     if (c_2pass_inr && cstarttype == NodeState::ST_INITIAL_NODE_RESTART)
3817     {
3818       /**
3819        * Check if we can make 2-phase copy
3820        *   1) non-transaction, (after we rebuild indexes)
3821        *   2) transaction (maintaining indexes during rebuild)
3822        *      where the transactional copies efterything >= startGci
3823        *
3824        * NOTE: c_2pass_inr is only set if all nodes in cluster currently
3825        *       supports this
3826        */
3827 
3828       if (takeOverPtr.p->startGci == 0)
3829       {
3830         jam();
3831         /**
3832          * Set a startGci to currently lastCompletedGCI of master
3833          *   any value will do...as long as subsequent transactinal copy
3834          *   will be using it (scanning >= this value)
3835          */
3836         takeOverPtr.p->startGci = SYSFILE->lastCompletedGCI[cmasterNodeId];
3837       }
3838 
3839       TabRecordPtr tabPtr;
3840       tabPtr.i = takeOverPtr.p->toCurrentTabref;
3841       ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
3842 
3843       FragmentstorePtr fragPtr;
3844       getFragstore(tabPtr.p, takeOverPtr.p->toCurrentFragid, fragPtr);
3845       Uint32 nodes[MAX_REPLICAS];
3846       extractNodeInfo(fragPtr.p, nodes);
3847 
3848       req->lqhLogNode[0] = nodes[0]; // Source
3849       req->requestInfo = StartFragReq::SFR_COPY_FRAG;
3850       replicaPtr.p->m_restorable_gci = takeOverPtr.p->startGci;
3851     }
3852 
3853     if (req->requestInfo == StartFragReq::SFR_RESTORE_LCP)
3854     {
3855       ndbout_c("node: %d tab: %d frag: %d no lcp to restore",
3856                takeOverPtr.p->toStartingNode,
3857                takeOverPtr.p->toCurrentTabref,
3858                takeOverPtr.p->toCurrentFragid);
3859     }
3860     else
3861     {
3862       ndbout_c("node: %d tab: %d frag: %d copying data from %u (gci: %u)",
3863                takeOverPtr.p->toStartingNode,
3864                takeOverPtr.p->toCurrentTabref,
3865                takeOverPtr.p->toCurrentFragid,
3866                req->lqhLogNode[0],
3867                takeOverPtr.p->startGci);
3868     }
3869 
3870     BlockReference ref = numberToRef(DBLQH, takeOverPtr.p->toStartingNode);
3871     sendSignal(ref, GSN_START_FRAGREQ, signal,
3872 	       StartFragReq::SignalLength, JBB);
3873   }
3874   else
3875   {
3876     jam();
3877     if (gci != restorableGCI)
3878     {
3879       Ptr<TabRecord> tabPtr;
3880       tabPtr.i = takeOverPtr.p->toCurrentTabref;
3881       ptrAss(tabPtr, tabRecord);
3882 
3883       FragmentstorePtr fragPtr;
3884       getFragstore(tabPtr.p, takeOverPtr.p->toCurrentFragid, fragPtr);
3885       dump_replica_info(fragPtr.p);
3886     }
3887     ndbassert(gci == restorableGCI);
3888     replicaPtr.p->m_restorable_gci = gci;
3889     Uint32 startGci = replicaPtr.p->maxGciCompleted[maxLcpIndex] + 1;
3890     if (startGci > gci)
3891       startGci = gci;
3892     ndbout_c("node: %d tab: %d frag: %d restore lcp: %u(idx: %u) maxGciStarted: %u maxGciCompleted: %u (restorable: %u(%u) newestRestorableGCI: %u)",
3893              takeOverPtr.p->toStartingNode,
3894              takeOverPtr.p->toCurrentTabref,
3895              takeOverPtr.p->toCurrentFragid,
3896 	     maxLcpId,
3897              maxLcpIndex,
3898 	     replicaPtr.p->maxGciStarted[maxLcpIndex],
3899 	     replicaPtr.p->maxGciCompleted[maxLcpIndex],
3900 	     restorableGCI,
3901 	     SYSFILE->lastCompletedGCI[takeOverPtr.p->toStartingNode],
3902 	     SYSFILE->newestRestorableGCI);
3903 
3904     StartFragReq *req = (StartFragReq *)signal->getDataPtrSend();
3905     req->userPtr = 0;
3906     req->userRef = reference();
3907     req->lcpNo = maxLcpIndex;
3908     req->lcpId = maxLcpId;
3909     req->tableId = takeOverPtr.p->toCurrentTabref;
3910     req->fragId = takeOverPtr.p->toCurrentFragid;
3911     req->noOfLogNodes = 1;
3912     req->lqhLogNode[0] = takeOverPtr.p->toStartingNode;
3913     req->startGci[0] = startGci;
3914     req->lastGci[0] = gci;
3915 
3916     BlockReference ref = numberToRef(DBLQH, takeOverPtr.p->toStartingNode);
3917     sendSignal(ref, GSN_START_FRAGREQ, signal,
3918 	       StartFragReq::SignalLength, JBB);
3919 
3920     if (startGci < takeOverPtr.p->startGci)
3921     {
3922       jam();
3923       takeOverPtr.p->startGci = startGci;
3924     }
3925   }
3926 }
3927 
3928 void
nr_run_redo(Signal * signal,TakeOverRecordPtr takeOverPtr)3929 Dbdih::nr_run_redo(Signal* signal, TakeOverRecordPtr takeOverPtr)
3930 {
3931   /**
3932    * sendSTART_RECREQ uses m_sr_nodes
3933    *   and for TO during SR, we don't want to modify it
3934    *   so save/restore it
3935    */
3936   NdbNodeBitmask save = m_sr_nodes;
3937   m_sr_nodes.clear();
3938   m_sr_nodes.set(takeOverPtr.p->toStartingNode);
3939 
3940   Uint32 save_keepGCI = SYSFILE->keepGCI;
3941   if (takeOverPtr.p->startGci < SYSFILE->keepGCI)
3942   {
3943     jam();
3944     SYSFILE->keepGCI = takeOverPtr.p->startGci;
3945     ndbout_c("GSN_START_RECREQ keepGci: %u (%u)",
3946              takeOverPtr.p->startGci, save_keepGCI);
3947   }
3948 
3949   takeOverPtr.p->toCurrentTabref = 0;
3950   takeOverPtr.p->toCurrentFragid = 0;
3951   takeOverPtr.p->toSlaveStatus = TakeOverRecord::TO_RUN_REDO;
3952   sendSTART_RECREQ(signal, takeOverPtr.p->toStartingNode, takeOverPtr.i);
3953 
3954   m_sr_nodes = save; // restore
3955   SYSFILE->keepGCI = save_keepGCI;
3956 }
3957 
3958 void
nr_start_logging(Signal * signal,TakeOverRecordPtr takeOverPtr)3959 Dbdih::nr_start_logging(Signal* signal, TakeOverRecordPtr takeOverPtr)
3960 {
3961   Uint32 loopCount = 0 ;
3962   TabRecordPtr tabPtr;
3963   while (loopCount++ < 100)
3964   {
3965     tabPtr.i = takeOverPtr.p->toCurrentTabref;
3966     if (tabPtr.i >= ctabFileSize)
3967     {
3968       jam();
3969       takeOverPtr.p->toSlaveStatus = TakeOverRecord::TO_END_TO;
3970       EndToReq* req = (EndToReq*)signal->getDataPtrSend();
3971       req->senderData = takeOverPtr.i;
3972       req->senderRef = reference();
3973       req->flags = takeOverPtr.p->m_flags;
3974       sendSignal(cmasterdihref, GSN_END_TOREQ,
3975                  signal, EndToReq::SignalLength, JBB);
3976 
3977       return;
3978     }
3979     ptrAss(tabPtr, tabRecord);
3980     if (tabPtr.p->tabStatus != TabRecord::TS_ACTIVE ||
3981 	tabPtr.p->tabStorage != TabRecord::ST_NORMAL)
3982     {
3983       jam();
3984       takeOverPtr.p->toCurrentFragid = 0;
3985       takeOverPtr.p->toCurrentTabref++;
3986       continue;
3987     }
3988 
3989     Uint32 fragId = takeOverPtr.p->toCurrentFragid;
3990     if (fragId >= tabPtr.p->totalfragments)
3991     {
3992       jam();
3993       takeOverPtr.p->toCurrentFragid = 0;
3994       takeOverPtr.p->toCurrentTabref++;
3995       continue;
3996     }
3997 
3998     FragmentstorePtr fragPtr;
3999     getFragstore(tabPtr.p, fragId, fragPtr);
4000     ReplicaRecordPtr loopReplicaPtr;
4001     loopReplicaPtr.i = fragPtr.p->storedReplicas;
4002     while (loopReplicaPtr.i != RNIL)
4003     {
4004       ptrCheckGuard(loopReplicaPtr, creplicaFileSize, replicaRecord);
4005       if (loopReplicaPtr.p->procNode == takeOverPtr.p->toStartingNode)
4006       {
4007         jam();
4008         ndbrequire(loopReplicaPtr.p->procNode == getOwnNodeId());
4009         takeOverPtr.p->toSlaveStatus = TakeOverRecord::TO_SL_COPY_ACTIVE;
4010 
4011         Uint32 instanceKey = dihGetInstanceKey(fragPtr);
4012         BlockReference lqhRef = numberToRef(DBLQH, instanceKey,
4013                                             takeOverPtr.p->toStartingNode);
4014 
4015         CopyActiveReq * const req = (CopyActiveReq *)&signal->theData[0];
4016         req->userPtr = takeOverPtr.i;
4017         req->userRef = reference();
4018         req->tableId = takeOverPtr.p->toCurrentTabref;
4019         req->fragId = takeOverPtr.p->toCurrentFragid;
4020         req->distributionKey = fragPtr.p->distributionKey;
4021         req->flags = 0;
4022         sendSignal(lqhRef,GSN_COPY_ACTIVEREQ, signal,
4023                    CopyActiveReq::SignalLength, JBB);
4024         return;
4025       }
4026       else
4027       {
4028         jam();
4029         loopReplicaPtr.i = loopReplicaPtr.p->nextReplica;
4030       }
4031     }
4032     takeOverPtr.p->toCurrentFragid++;
4033   }
4034   signal->theData[0] = DihContinueB::ZTO_START_LOGGING;
4035   signal->theData[1] = takeOverPtr.i;
4036   sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
4037 }
4038 
4039 void
sendStartTo(Signal * signal,TakeOverRecordPtr takeOverPtr)4040 Dbdih::sendStartTo(Signal* signal, TakeOverRecordPtr takeOverPtr)
4041 {
4042   takeOverPtr.p->toSlaveStatus = TakeOverRecord::TO_START_TO;
4043 
4044   StartToReq* req = (StartToReq*)signal->getDataPtrSend();
4045   req->senderData = takeOverPtr.i;
4046   req->senderRef = reference();
4047   req->startingNodeId = takeOverPtr.p->toStartingNode;
4048   sendSignal(cmasterdihref, GSN_START_TOREQ,
4049              signal, StartToReq::SignalLength, JBB);
4050 }
4051 
4052 void
execSTART_TOREF(Signal * signal)4053 Dbdih::execSTART_TOREF(Signal* signal)
4054 {
4055   jamEntry();
4056 
4057   StartToRef* ref = (StartToRef*)signal->getDataPtr();
4058   Uint32 errCode = ref->errorCode;
4059   (void)errCode; // TODO check for "valid" error
4060 
4061   TakeOverRecordPtr takeOverPtr;
4062   c_takeOverPool.getPtr(takeOverPtr, ref->senderData);
4063 
4064   signal->theData[0] = DihContinueB::ZSEND_START_TO;
4065   signal->theData[1] = takeOverPtr.i;
4066 
4067   sendSignalWithDelay(reference(), GSN_CONTINUEB,
4068                       signal, 5000, 2);
4069 }
4070 
4071 void
execSTART_TOCONF(Signal * signal)4072 Dbdih::execSTART_TOCONF(Signal* signal)
4073 {
4074   jamEntry();
4075   StartToConf * conf = (StartToConf*)signal->getDataPtr();
4076 
4077   TakeOverRecordPtr takeOverPtr;
4078   c_takeOverPool.getPtr(takeOverPtr, conf->senderData);
4079 
4080   CRASH_INSERTION(7133);
4081 
4082   /**
4083    * We are now allowed to start copying
4084    */
4085   startNextCopyFragment(signal, takeOverPtr.i);
4086 }
4087 
startNextCopyFragment(Signal * signal,Uint32 takeOverPtrI)4088 void Dbdih::startNextCopyFragment(Signal* signal, Uint32 takeOverPtrI)
4089 {
4090   TabRecordPtr tabPtr;
4091   TakeOverRecordPtr takeOverPtr;
4092   c_takeOverPool.getPtr(takeOverPtr, takeOverPtrI);
4093 
4094   Uint32 loopCount;
4095   loopCount = 0;
4096   if (ERROR_INSERTED(7159)) {
4097     loopCount = 100;
4098   }//if
4099   while (loopCount++ < 100) {
4100     tabPtr.i = takeOverPtr.p->toCurrentTabref;
4101     if (tabPtr.i >= ctabFileSize) {
4102       jam();
4103       CRASH_INSERTION(7136);
4104       toCopyCompletedLab(signal, takeOverPtr);
4105       return;
4106     }//if
4107     ptrAss(tabPtr, tabRecord);
4108     if (tabPtr.p->tabStatus != TabRecord::TS_ACTIVE){
4109       jam();
4110       takeOverPtr.p->toCurrentFragid = 0;
4111       takeOverPtr.p->toCurrentTabref++;
4112       continue;
4113     }//if
4114     Uint32 fragId = takeOverPtr.p->toCurrentFragid;
4115     if (fragId >= tabPtr.p->totalfragments) {
4116       jam();
4117       takeOverPtr.p->toCurrentFragid = 0;
4118       takeOverPtr.p->toCurrentTabref++;
4119       if (ERROR_INSERTED(7135)) {
4120         if (takeOverPtr.p->toCurrentTabref == 1) {
4121           ndbrequire(false);
4122         }//if
4123       }//if
4124       continue;
4125     }//if
4126     FragmentstorePtr fragPtr;
4127     getFragstore(tabPtr.p, fragId, fragPtr);
4128     ReplicaRecordPtr loopReplicaPtr;
4129     loopReplicaPtr.i = fragPtr.p->oldStoredReplicas;
4130     while (loopReplicaPtr.i != RNIL) {
4131       ptrCheckGuard(loopReplicaPtr, creplicaFileSize, replicaRecord);
4132       if (loopReplicaPtr.p->procNode == takeOverPtr.p->toFailedNode) {
4133         jam();
4134 	/* ----------------------------------------------------------------- */
4135 	/* WE HAVE FOUND A REPLICA THAT BELONGED THE FAILED NODE THAT NEEDS  */
4136 	/* TAKE OVER. WE TAKE OVER THIS REPLICA TO THE NEW NODE.             */
4137 	/* ----------------------------------------------------------------- */
4138         takeOverPtr.p->toCurrentReplica = loopReplicaPtr.i;
4139         toCopyFragLab(signal, takeOverPtr.i);
4140         return;
4141       } else if (loopReplicaPtr.p->procNode == takeOverPtr.p->toStartingNode) {
4142         jam();
4143 	/* ----------------------------------------------------------------- */
4144 	/* WE HAVE OBVIOUSLY STARTED TAKING OVER THIS WITHOUT COMPLETING IT. */
4145 	/* WE     */
4146 	/* NEED TO COMPLETE THE TAKE OVER OF THIS REPLICA.                   */
4147 	/* ----------------------------------------------------------------- */
4148         takeOverPtr.p->toCurrentReplica = loopReplicaPtr.i;
4149         toCopyFragLab(signal, takeOverPtr.i);
4150         return;
4151       } else {
4152         jam();
4153         loopReplicaPtr.i = loopReplicaPtr.p->nextReplica;
4154       }//if
4155     }//while
4156     takeOverPtr.p->toCurrentFragid++;
4157   }//while
4158   signal->theData[0] = DihContinueB::ZTO_START_COPY_FRAG;
4159   signal->theData[1] = takeOverPtr.i;
4160   sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
4161 }//Dbdih::startNextCopyFragment()
4162 
toCopyFragLab(Signal * signal,Uint32 takeOverPtrI)4163 void Dbdih::toCopyFragLab(Signal* signal,
4164                           Uint32 takeOverPtrI)
4165 {
4166   TakeOverRecordPtr takeOverPtr;
4167   c_takeOverPool.getPtr(takeOverPtr, takeOverPtrI);
4168 
4169   /**
4170    * Inform starting node that TakeOver is about to start
4171    */
4172   TabRecordPtr tabPtr;
4173   tabPtr.i = takeOverPtr.p->toCurrentTabref;
4174   ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
4175 
4176   FragmentstorePtr fragPtr;
4177   getFragstore(tabPtr.p, takeOverPtr.p->toCurrentFragid, fragPtr);
4178   Uint32 nodes[MAX_REPLICAS];
4179   extractNodeInfo(fragPtr.p, nodes);
4180   takeOverPtr.p->toCopyNode = nodes[0];
4181 
4182   PrepareCopyFragReq* req= (PrepareCopyFragReq*)signal->getDataPtrSend();
4183   req->senderRef = reference();
4184   req->senderData = takeOverPtrI;
4185   req->tableId = takeOverPtr.p->toCurrentTabref;
4186   req->fragId = takeOverPtr.p->toCurrentFragid;
4187   req->copyNodeId = takeOverPtr.p->toCopyNode;
4188   req->startingNodeId = takeOverPtr.p->toStartingNode; // Dst
4189 
4190   Uint32 instanceKey = dihGetInstanceKey(req->tableId, req->fragId);
4191   Uint32 ref = numberToRef(DBLQH, instanceKey, takeOverPtr.p->toStartingNode);
4192 
4193   sendSignal(ref, GSN_PREPARE_COPY_FRAG_REQ, signal,
4194              PrepareCopyFragReq::SignalLength, JBB);
4195 
4196   takeOverPtr.p->toSlaveStatus = TakeOverRecord::TO_PREPARE_COPY;
4197 }
4198 
4199 void
execPREPARE_COPY_FRAG_REF(Signal * signal)4200 Dbdih::execPREPARE_COPY_FRAG_REF(Signal* signal)
4201 {
4202   jamEntry();
4203   PrepareCopyFragRef ref = *(PrepareCopyFragRef*)signal->getDataPtr();
4204 
4205   TakeOverRecordPtr takeOverPtr;
4206   c_takeOverPool.getPtr(takeOverPtr, ref.senderData);
4207 
4208   ndbrequire(takeOverPtr.p->toSlaveStatus == TakeOverRecord::TO_PREPARE_COPY);
4209 
4210   /**
4211    * Treat this as copy frag ref
4212    */
4213   CopyFragRef * cfref = (CopyFragRef*)signal->getDataPtrSend();
4214   cfref->userPtr = ref.senderData;
4215   cfref->startingNodeId = ref.startingNodeId;
4216   cfref->errorCode = ref.errorCode;
4217   cfref->tableId = ref.tableId;
4218   cfref->fragId = ref.fragId;
4219   cfref->sendingNodeId = ref.copyNodeId;
4220   takeOverPtr.p->toSlaveStatus = TakeOverRecord::TO_COPY_FRAG;
4221   execCOPY_FRAGREF(signal);
4222 }
4223 
4224 void
execPREPARE_COPY_FRAG_CONF(Signal * signal)4225 Dbdih::execPREPARE_COPY_FRAG_CONF(Signal* signal)
4226 {
4227   jamEntry();
4228   PrepareCopyFragConf conf = *(PrepareCopyFragConf*)signal->getDataPtr();
4229 
4230   TakeOverRecordPtr takeOverPtr;
4231   c_takeOverPool.getPtr(takeOverPtr, conf.senderData);
4232 
4233   Uint32 version = getNodeInfo(refToNode(conf.senderRef)).m_version;
4234   ndbrequire(ndb_check_prep_copy_frag_version(version) >= 2);
4235   takeOverPtr.p->maxPage = conf.maxPageNo;
4236 
4237   /**
4238    * We need to lock fragment info...in order to later run CREATE_FRAG_REQ
4239    */
4240   takeOverPtr.p->toSlaveStatus = TakeOverRecord::TO_UPDATE_BEFORE_STORED;
4241   sendUpdateTo(signal, takeOverPtr);
4242 }
4243 
4244 void
sendUpdateTo(Signal * signal,TakeOverRecordPtr takeOverPtr)4245 Dbdih::sendUpdateTo(Signal* signal, TakeOverRecordPtr takeOverPtr)
4246 {
4247   UpdateToReq* req = (UpdateToReq*)signal->getDataPtrSend();
4248   req->senderData = takeOverPtr.i;
4249   req->senderRef = reference();
4250   req->startingNodeId = takeOverPtr.p->toStartingNode;
4251   req->copyNodeId = takeOverPtr.p->toCopyNode;
4252   req->tableId = takeOverPtr.p->toCurrentTabref;
4253   req->fragmentNo = takeOverPtr.p->toCurrentFragid;
4254   switch(takeOverPtr.p->toSlaveStatus){
4255   case TakeOverRecord::TO_UPDATE_BEFORE_STORED:
4256     jam();
4257     req->requestType = UpdateToReq::BEFORE_STORED;
4258     break;
4259   case TakeOverRecord::TO_UPDATE_AFTER_STORED:
4260     req->requestType = UpdateToReq::AFTER_STORED;
4261     break;
4262   case TakeOverRecord::TO_UPDATE_BEFORE_COMMIT:
4263     jam();
4264     req->requestType = UpdateToReq::BEFORE_COMMIT_STORED;
4265     break;
4266   case TakeOverRecord::TO_UPDATE_AFTER_COMMIT:
4267     jam();
4268     req->requestType = UpdateToReq::AFTER_COMMIT_STORED;
4269     break;
4270   default:
4271     jamLine(takeOverPtr.p->toSlaveStatus);
4272     ndbrequire(false);
4273   }
4274   sendSignal(cmasterdihref, GSN_UPDATE_TOREQ,
4275              signal, UpdateToReq::SignalLength, JBB);
4276 }
4277 
4278 void
execUPDATE_TOREF(Signal * signal)4279 Dbdih::execUPDATE_TOREF(Signal* signal)
4280 {
4281   jamEntry();
4282   UpdateToRef* ref = (UpdateToRef*)signal->getDataPtr();
4283   Uint32 errCode = ref->errorCode;
4284   (void)errCode; // TODO check for "valid" error
4285 
4286   TakeOverRecordPtr takeOverPtr;
4287   c_takeOverPool.getPtr(takeOverPtr, ref->senderData);
4288 
4289   signal->theData[0] = DihContinueB::ZSEND_UPDATE_TO;
4290   signal->theData[1] = takeOverPtr.i;
4291 
4292   sendSignalWithDelay(reference(), GSN_CONTINUEB,
4293                       signal, 5000, 2);
4294 }
4295 
4296 void
execUPDATE_TOCONF(Signal * signal)4297 Dbdih::execUPDATE_TOCONF(Signal* signal)
4298 {
4299   jamEntry();
4300 
4301   UpdateToConf* conf = (UpdateToConf*)signal->getDataPtr();
4302 
4303   TakeOverRecordPtr takeOverPtr;
4304   c_takeOverPool.getPtr(takeOverPtr, conf->senderData);
4305 
4306   switch(takeOverPtr.p->toSlaveStatus){
4307   case TakeOverRecord::TO_UPDATE_BEFORE_STORED:
4308     jam();
4309 
4310     CRASH_INSERTION(7154);
4311 
4312     takeOverPtr.p->toSlaveStatus = TakeOverRecord::TO_CREATE_FRAG_STORED;
4313     sendCreateFragReq(signal, ZINIT_CREATE_GCI, CreateFragReq::STORED, takeOverPtr.i);
4314     return;
4315   case TakeOverRecord::TO_UPDATE_AFTER_STORED:
4316     jam();
4317 
4318     CRASH_INSERTION(7195);
4319 
4320     takeOverPtr.p->toSlaveStatus = TakeOverRecord::TO_COPY_FRAG;
4321     toStartCopyFrag(signal, takeOverPtr);
4322     return;
4323   case TakeOverRecord::TO_UPDATE_BEFORE_COMMIT:
4324     jam();
4325 
4326     CRASH_INSERTION(7196);
4327 
4328     takeOverPtr.p->toSlaveStatus = TakeOverRecord::TO_CREATE_FRAG_COMMIT;
4329     sendCreateFragReq(signal, takeOverPtr.p->startGci,
4330                       CreateFragReq::COMMIT_STORED, takeOverPtr.i);
4331     return;
4332   case TakeOverRecord::TO_UPDATE_AFTER_COMMIT:
4333     jam();
4334 
4335     CRASH_INSERTION(7197);
4336 
4337     takeOverPtr.p->toSlaveStatus = TakeOverRecord::TO_SELECTING_NEXT;
4338     startNextCopyFragment(signal, takeOverPtr.i);
4339     return;
4340   default:
4341     ndbrequire(false);
4342   }
4343 }
4344 
4345 void
toStartCopyFrag(Signal * signal,TakeOverRecordPtr takeOverPtr)4346 Dbdih::toStartCopyFrag(Signal* signal, TakeOverRecordPtr takeOverPtr)
4347 {
4348   TabRecordPtr tabPtr;
4349   tabPtr.i = takeOverPtr.p->toCurrentTabref;
4350   ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
4351 
4352   Uint32 fragId = takeOverPtr.p->toCurrentFragid;
4353 
4354   FragmentstorePtr fragPtr;
4355   getFragstore(tabPtr.p, fragId, fragPtr);
4356 
4357   ReplicaRecordPtr replicaPtr;
4358   findReplica(replicaPtr, fragPtr.p, getOwnNodeId(), true);
4359 
4360   Uint32 gci = replicaPtr.p->m_restorable_gci;
4361   replicaPtr.p->m_restorable_gci = 0; // used in union...
4362 
4363   Uint32 instanceKey = dihGetInstanceKey(tabPtr.i, fragId);
4364   BlockReference ref = numberToRef(DBLQH, instanceKey,
4365                                    takeOverPtr.p->toCopyNode);
4366   CopyFragReq * const copyFragReq = (CopyFragReq *)&signal->theData[0];
4367   copyFragReq->userPtr = takeOverPtr.i;
4368   copyFragReq->userRef = reference();
4369   copyFragReq->tableId = tabPtr.i;
4370   copyFragReq->fragId = fragId;
4371   copyFragReq->nodeId = takeOverPtr.p->toStartingNode;
4372   copyFragReq->schemaVersion = tabPtr.p->schemaVersion;
4373   copyFragReq->distributionKey = fragPtr.p->distributionKey;
4374   copyFragReq->gci = gci;
4375   Uint32 len = copyFragReq->nodeCount =
4376     extractNodeInfo(fragPtr.p,
4377                     copyFragReq->nodeList);
4378   copyFragReq->nodeList[len] = takeOverPtr.p->maxPage;
4379   copyFragReq->nodeList[len+1] = CopyFragReq::CFR_TRANSACTIONAL;
4380   sendSignal(ref, GSN_COPY_FRAGREQ, signal,
4381              CopyFragReq::SignalLength + len, JBB);
4382 }//Dbdih::toStartCopy()
4383 
sendCreateFragReq(Signal * signal,Uint32 startGci,Uint32 replicaType,Uint32 takeOverPtrI)4384 void Dbdih::sendCreateFragReq(Signal* signal,
4385                               Uint32 startGci,
4386                               Uint32 replicaType,
4387                               Uint32 takeOverPtrI)
4388 {
4389   Ptr<TakeOverRecord> takeOverPtr;
4390   c_takeOverPool.getPtr(takeOverPtr, takeOverPtrI);
4391 
4392   sendLoopMacro(CREATE_FRAGREQ, nullRoutine, RNIL);
4393 
4394   CreateFragReq * const req = (CreateFragReq *)&signal->theData[0];
4395   req->senderData = takeOverPtr.i;
4396   req->senderRef = reference();
4397   req->tableId = takeOverPtr.p->toCurrentTabref;
4398   req->fragId = takeOverPtr.p->toCurrentFragid;
4399   req->startingNodeId = takeOverPtr.p->toStartingNode;
4400   req->copyNodeId = takeOverPtr.p->toCopyNode;
4401   req->failedNodeId = takeOverPtr.p->toFailedNode;
4402   req->startGci = startGci;
4403   req->replicaType = replicaType;
4404 
4405   NodeRecordPtr nodePtr;
4406   nodePtr.i = cfirstAliveNode;
4407   do {
4408     ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
4409     BlockReference ref = calcDihBlockRef(nodePtr.i);
4410     sendSignal(ref, GSN_CREATE_FRAGREQ, signal,
4411 	       CreateFragReq::SignalLength, JBB);
4412     nodePtr.i = nodePtr.p->nextNode;
4413   } while (nodePtr.i != RNIL);
4414 }//Dbdih::sendCreateFragReq()
4415 
execCREATE_FRAGCONF(Signal * signal)4416 void Dbdih::execCREATE_FRAGCONF(Signal* signal)
4417 {
4418   jamEntry();
4419   CRASH_INSERTION(7148);
4420   CreateFragConf * conf = (CreateFragConf *)&signal->theData[0];
4421 
4422   TakeOverRecordPtr takeOverPtr;
4423   c_takeOverPool.getPtr(takeOverPtr, conf->senderData);
4424 
4425   receiveLoopMacro(CREATE_FRAGREQ, conf->sendingNodeId);
4426 
4427   switch(takeOverPtr.p->toSlaveStatus){
4428   case TakeOverRecord::TO_CREATE_FRAG_STORED:
4429     jam();
4430     CRASH_INSERTION(7198);
4431     takeOverPtr.p->toSlaveStatus = TakeOverRecord::TO_UPDATE_AFTER_STORED;
4432     break;
4433   case TakeOverRecord::TO_CREATE_FRAG_COMMIT:
4434     jam();
4435     CRASH_INSERTION(7199);
4436     takeOverPtr.p->toSlaveStatus = TakeOverRecord::TO_UPDATE_AFTER_COMMIT;
4437     break;
4438   case TakeOverRecord::TO_SL_CREATE_FRAG:
4439     jam();
4440     //CRASH_INSERTION(
4441     takeOverPtr.p->toSlaveStatus = TakeOverRecord::TO_START_LOGGING;
4442     takeOverPtr.p->toCurrentFragid++;
4443     signal->theData[0] = DihContinueB::ZTO_START_LOGGING;
4444     signal->theData[1] = takeOverPtr.i;
4445     sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
4446     return;
4447   default:
4448     jamLine(takeOverPtr.p->toSlaveStatus);
4449     ndbrequire(false);
4450   }
4451 
4452   sendUpdateTo(signal, takeOverPtr);
4453 }//Dbdih::execCREATE_FRAGCONF()
4454 
execCOPY_FRAGREF(Signal * signal)4455 void Dbdih::execCOPY_FRAGREF(Signal* signal)
4456 {
4457   const CopyFragRef * const ref = (CopyFragRef *)&signal->theData[0];
4458   jamEntry();
4459   Uint32 takeOverPtrI = ref->userPtr;
4460   Uint32 startingNodeId = ref->startingNodeId;
4461   Uint32 errorCode = ref->errorCode;
4462 
4463   TakeOverRecordPtr takeOverPtr;
4464   c_takeOverPool.getPtr(takeOverPtr, takeOverPtrI);
4465   ndbrequire(ref->tableId == takeOverPtr.p->toCurrentTabref);
4466   ndbrequire(ref->fragId == takeOverPtr.p->toCurrentFragid);
4467   ndbrequire(ref->startingNodeId == takeOverPtr.p->toStartingNode);
4468   ndbrequire(ref->sendingNodeId == takeOverPtr.p->toCopyNode);
4469   ndbrequire(takeOverPtr.p->toSlaveStatus == TakeOverRecord::TO_COPY_FRAG);
4470 
4471   //--------------------------------------------------------------------------
4472   // For some reason we did not succeed in copying a fragment. We treat this
4473   // as a serious failure and crash the starting node.
4474   //--------------------------------------------------------------------------
4475   BlockReference cntrRef = calcNdbCntrBlockRef(startingNodeId);
4476   SystemError * const sysErr = (SystemError*)&signal->theData[0];
4477   sysErr->errorCode = SystemError::CopyFragRefError;
4478   sysErr->errorRef = reference();
4479   sysErr->data[0] = errorCode;
4480   sysErr->data[1] = 0;
4481   sendSignal(cntrRef, GSN_SYSTEM_ERROR, signal,
4482 	     SystemError::SignalLength, JBB);
4483   return;
4484 }//Dbdih::execCOPY_FRAGREF()
4485 
execCOPY_FRAGCONF(Signal * signal)4486 void Dbdih::execCOPY_FRAGCONF(Signal* signal)
4487 {
4488   const CopyFragConf * const conf = (CopyFragConf *)&signal->theData[0];
4489   jamEntry();
4490   CRASH_INSERTION(7142);
4491 
4492   TakeOverRecordPtr takeOverPtr;
4493   c_takeOverPool.getPtr(takeOverPtr, conf->userPtr);
4494 
4495   Uint32 rows_lo = conf->rows_lo;
4496   Uint32 bytes_lo = conf->bytes_lo;
4497 
4498   ndbrequire(conf->tableId == takeOverPtr.p->toCurrentTabref);
4499   ndbrequire(conf->fragId == takeOverPtr.p->toCurrentFragid);
4500   ndbrequire(conf->startingNodeId == takeOverPtr.p->toStartingNode);
4501   ndbrequire(conf->sendingNodeId == takeOverPtr.p->toCopyNode);
4502   ndbrequire(takeOverPtr.p->toSlaveStatus == TakeOverRecord::TO_COPY_FRAG);
4503 
4504   TabRecordPtr tabPtr;
4505   tabPtr.i = takeOverPtr.p->toCurrentTabref;
4506   ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
4507 
4508   FragmentstorePtr fragPtr;
4509   getFragstore(tabPtr.p, takeOverPtr.p->toCurrentFragid, fragPtr);
4510   Uint32 instanceKey = dihGetInstanceKey(fragPtr);
4511   BlockReference lqhRef = numberToRef(DBLQH, instanceKey,
4512                                       takeOverPtr.p->toStartingNode);
4513   CopyActiveReq * const req = (CopyActiveReq *)&signal->theData[0];
4514   req->userPtr = takeOverPtr.i;
4515   req->userRef = reference();
4516   req->tableId = takeOverPtr.p->toCurrentTabref;
4517   req->fragId = takeOverPtr.p->toCurrentFragid;
4518   req->distributionKey = fragPtr.p->distributionKey;
4519   req->flags = 0;
4520 
4521   Uint32 min_version = getNodeVersionInfo().m_type[NodeInfo::DB].m_min_version;
4522   if (ndb_delayed_copy_active_req(min_version))
4523   {
4524     jam();
4525     /**
4526      * Bug48474 - Don't start logging an fragment
4527      *            until all fragments has been copied
4528      *            Else it's easy to run out of REDO
4529      */
4530     req->flags |= CopyActiveReq::CAR_NO_WAIT | CopyActiveReq::CAR_NO_LOGGING;
4531   }
4532 
4533   sendSignal(lqhRef, GSN_COPY_ACTIVEREQ, signal,
4534              CopyActiveReq::SignalLength, JBB);
4535 
4536   takeOverPtr.p->toSlaveStatus = TakeOverRecord::TO_COPY_ACTIVE;
4537 
4538   signal->theData[0] = NDB_LE_NR_CopyFragDone;
4539   signal->theData[1] = getOwnNodeId();
4540   signal->theData[2] = takeOverPtr.p->toCurrentTabref;
4541   signal->theData[3] = takeOverPtr.p->toCurrentFragid;
4542   signal->theData[4] = rows_lo;
4543   signal->theData[5] = 0;
4544   signal->theData[6] = bytes_lo;
4545   signal->theData[7] = 0;
4546   sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 8, JBB);
4547 }//Dbdih::execCOPY_FRAGCONF()
4548 
execCOPY_ACTIVECONF(Signal * signal)4549 void Dbdih::execCOPY_ACTIVECONF(Signal* signal)
4550 {
4551   const CopyActiveConf * const conf = (CopyActiveConf *)&signal->theData[0];
4552   jamEntry();
4553   CRASH_INSERTION(7143);
4554 
4555   TakeOverRecordPtr takeOverPtr;
4556   c_takeOverPool.getPtr(takeOverPtr, conf->userPtr);
4557 
4558   ndbrequire(conf->tableId == takeOverPtr.p->toCurrentTabref);
4559   ndbrequire(conf->fragId == takeOverPtr.p->toCurrentFragid);
4560   ndbrequire(checkNodeAlive(conf->startingNodeId));
4561 
4562   takeOverPtr.p->startGci = conf->startGci;
4563 
4564   if (takeOverPtr.p->toSlaveStatus == TakeOverRecord::TO_COPY_ACTIVE)
4565   {
4566     jam();
4567     ndbrequire(takeOverPtr.p->toSlaveStatus == TakeOverRecord::TO_COPY_ACTIVE);
4568     takeOverPtr.p->toSlaveStatus = TakeOverRecord::TO_UPDATE_BEFORE_COMMIT;
4569     sendUpdateTo(signal, takeOverPtr);
4570   }
4571   else
4572   {
4573     jam();
4574     ndbrequire(takeOverPtr.p->toSlaveStatus==TakeOverRecord::TO_SL_COPY_ACTIVE);
4575     takeOverPtr.p->toSlaveStatus = TakeOverRecord::TO_SL_CREATE_FRAG;
4576     sendCreateFragReq(signal, takeOverPtr.p->startGci,
4577                       CreateFragReq::START_LOGGING, takeOverPtr.i);
4578   }
4579 }//Dbdih::execCOPY_ACTIVECONF()
4580 
toCopyCompletedLab(Signal * signal,TakeOverRecordPtr takeOverPtr)4581 void Dbdih::toCopyCompletedLab(Signal * signal, TakeOverRecordPtr takeOverPtr)
4582 {
4583   signal->theData[0] = NDB_LE_NR_CopyFragsCompleted;
4584   signal->theData[1] = takeOverPtr.p->toStartingNode;
4585   sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB);
4586 
4587   Uint32 min_version = getNodeVersionInfo().m_type[NodeInfo::DB].m_min_version;
4588   if (ndb_delayed_copy_active_req(min_version))
4589   {
4590     jam();
4591     takeOverPtr.p->toSlaveStatus = TakeOverRecord::TO_START_LOGGING;
4592     takeOverPtr.p->toCurrentTabref = 0;
4593     takeOverPtr.p->toCurrentFragid = 0;
4594     takeOverPtr.p->toCurrentReplica = RNIL;
4595     nr_start_logging(signal, takeOverPtr);
4596     return;
4597   }
4598   else
4599   {
4600     jam();
4601 
4602     takeOverPtr.p->toSlaveStatus = TakeOverRecord::TO_END_TO;
4603 
4604     EndToReq* req = (EndToReq*)signal->getDataPtrSend();
4605     req->senderData = takeOverPtr.i;
4606     req->senderRef = reference();
4607     req->flags = takeOverPtr.p->m_flags;
4608     sendSignal(cmasterdihref, GSN_END_TOREQ,
4609                signal, EndToReq::SignalLength, JBB);
4610     return;
4611   }
4612 }//Dbdih::toCopyCompletedLab()
4613 
4614 void
execEND_TOREF(Signal * signal)4615 Dbdih::execEND_TOREF(Signal* signal)
4616 {
4617   jamEntry();
4618   EndToRef* ref = (EndToRef*)signal->getDataPtr();
4619 
4620   TakeOverRecordPtr takeOverPtr;
4621   c_takeOverPool.getPtr(takeOverPtr, ref->senderData);
4622 
4623   ndbrequire(false);
4624 }
4625 
4626 void
execEND_TOCONF(Signal * signal)4627 Dbdih::execEND_TOCONF(Signal* signal)
4628 {
4629   jamEntry();
4630   EndToConf* conf = (EndToConf*)signal->getDataPtr();
4631 
4632   CRASH_INSERTION(7144);
4633 
4634   TakeOverRecordPtr takeOverPtr;
4635   c_takeOverPool.getPtr(takeOverPtr, conf->senderData);
4636 
4637   Uint32 senderData = takeOverPtr.p->m_senderData;
4638   Uint32 senderRef = takeOverPtr.p->m_senderRef;
4639   Uint32 nodeId = takeOverPtr.p->toStartingNode;
4640 
4641   releaseTakeOver(takeOverPtr);
4642 
4643   StartCopyConf* ret = (StartCopyConf*)signal->getDataPtrSend();
4644   ret->startingNodeId = nodeId;
4645   ret->senderData = senderData;
4646   ret->senderRef = reference();
4647   sendSignal(senderRef, GSN_START_COPYCONF, signal,
4648              StartCopyConf::SignalLength, JBB);
4649 }
4650 
releaseTakeOver(TakeOverRecordPtr takeOverPtr)4651 void Dbdih::releaseTakeOver(TakeOverRecordPtr takeOverPtr)
4652 {
4653   takeOverPtr.p->toCopyNode = RNIL;
4654   takeOverPtr.p->toCurrentFragid = RNIL;
4655   takeOverPtr.p->toCurrentReplica = RNIL;
4656   takeOverPtr.p->toCurrentTabref = RNIL;
4657   takeOverPtr.p->toFailedNode = RNIL;
4658   takeOverPtr.p->toStartingNode = RNIL;
4659   takeOverPtr.p->toStartTime = 0;
4660   takeOverPtr.p->toSlaveStatus = TakeOverRecord::TO_SLAVE_IDLE;
4661   takeOverPtr.p->toMasterStatus = TakeOverRecord::TO_MASTER_IDLE;
4662 
4663   c_activeTakeOverList.release(takeOverPtr);
4664 }//Dbdih::releaseTakeOver()
4665 
4666 
4667 /*****************************************************************************/
4668 /* ------------------------------------------------------------------------- */
4669 /*       WE HAVE BEEN REQUESTED TO PERFORM A SYSTEM RESTART. WE START BY     */
4670 /*       READING THE GCI FILES. THIS REQUEST WILL ONLY BE SENT TO THE MASTER */
4671 /*       DIH. THAT MEANS WE HAVE TO REPLICATE THE INFORMATION WE READ FROM   */
4672 /*       OUR FILES TO ENSURE THAT ALL NODES HAVE THE SAME DISTRIBUTION       */
4673 /*       INFORMATION.                                                        */
4674 /* ------------------------------------------------------------------------- */
4675 /*****************************************************************************/
readGciFileLab(Signal * signal)4676 void Dbdih::readGciFileLab(Signal* signal)
4677 {
4678   FileRecordPtr filePtr;
4679   filePtr.i = crestartInfoFile[0];
4680   ptrCheckGuard(filePtr, cfileFileSize, fileRecord);
4681   filePtr.p->reqStatus = FileRecord::OPENING_GCP;
4682 
4683   openFileRo(signal, filePtr);
4684 }//Dbdih::readGciFileLab()
4685 
openingGcpLab(Signal * signal,FileRecordPtr filePtr)4686 void Dbdih::openingGcpLab(Signal* signal, FileRecordPtr filePtr)
4687 {
4688   /* ----------------------------------------------------------------------- */
4689   /*     WE HAVE SUCCESSFULLY OPENED A FILE CONTAINING INFORMATION ABOUT     */
4690   /*     THE GLOBAL CHECKPOINTS THAT ARE POSSIBLE TO RESTART.                */
4691   /* ----------------------------------------------------------------------- */
4692   readRestorableGci(signal, filePtr);
4693   filePtr.p->reqStatus = FileRecord::READING_GCP;
4694 }//Dbdih::openingGcpLab()
4695 
readingGcpLab(Signal * signal,FileRecordPtr filePtr)4696 void Dbdih::readingGcpLab(Signal* signal, FileRecordPtr filePtr)
4697 {
4698   /* ----------------------------------------------------------------------- */
4699   /*     WE HAVE NOW SUCCESSFULLY MANAGED TO READ IN THE GLOBAL CHECKPOINT   */
4700   /*     INFORMATION FROM FILE. LATER WE WILL ADD SOME FUNCTIONALITY THAT    */
4701   /*     CHECKS THE RESTART TIMERS TO DEDUCE FROM WHERE TO RESTART.          */
4702   /*     NOW WE WILL SIMPLY RESTART FROM THE NEWEST GLOBAL CHECKPOINT        */
4703   /*     POSSIBLE TO RESTORE.                                                */
4704   /*                                                                         */
4705   /*     BEFORE WE INVOKE DICT WE NEED TO COPY CRESTART_INFO TO ALL NODES.   */
4706   /*     WE ALSO COPY TO OUR OWN NODE. TO ENABLE US TO DO THIS PROPERLY WE   */
4707   /*     START BY CLOSING THIS FILE.                                         */
4708   /* ----------------------------------------------------------------------- */
4709   globalData.m_restart_seq = ++SYSFILE->m_restart_seq;
4710   closeFile(signal, filePtr);
4711   filePtr.p->reqStatus = FileRecord::CLOSING_GCP;
4712 }//Dbdih::readingGcpLab()
4713 
closingGcpLab(Signal * signal,FileRecordPtr filePtr)4714 void Dbdih::closingGcpLab(Signal* signal, FileRecordPtr filePtr)
4715 {
4716   if (Sysfile::getInitialStartOngoing(SYSFILE->systemRestartBits) == false){
4717     jam();
4718     selectMasterCandidateAndSend(signal);
4719     return;
4720   } else {
4721     jam();
4722     sendDihRestartRef(signal);
4723     return;
4724   }//if
4725 }//Dbdih::closingGcpLab()
4726 
4727 void
sendDihRestartRef(Signal * signal)4728 Dbdih::sendDihRestartRef(Signal* signal)
4729 {
4730   jam();
4731 
4732   /**
4733    * We couldn't read P0.Sysfile...
4734    *   so compute no_nodegroup_mask from configuration
4735    */
4736   NdbNodeBitmask no_nodegroup_mask;
4737 
4738   ndb_mgm_configuration_iterator * iter =
4739     m_ctx.m_config.getClusterConfigIterator();
4740   for(ndb_mgm_first(iter); ndb_mgm_valid(iter); ndb_mgm_next(iter))
4741   {
4742     jam();
4743     Uint32 nodeId;
4744     Uint32 nodeType;
4745 
4746     ndbrequire(!ndb_mgm_get_int_parameter(iter,CFG_NODE_ID, &nodeId));
4747     ndbrequire(!ndb_mgm_get_int_parameter(iter,CFG_TYPE_OF_SECTION,
4748                                           &nodeType));
4749 
4750     if (nodeType == NodeInfo::DB)
4751     {
4752       jam();
4753       Uint32 ng;
4754       if (ndb_mgm_get_int_parameter(iter, CFG_DB_NODEGROUP, &ng) == 0)
4755       {
4756         jam();
4757         if (ng == NDB_NO_NODEGROUP)
4758         {
4759           no_nodegroup_mask.set(nodeId);
4760         }
4761       }
4762     }
4763   }
4764   DihRestartRef * ref = CAST_PTR(DihRestartRef, signal->getDataPtrSend());
4765   no_nodegroup_mask.copyto(NdbNodeBitmask::Size, ref->no_nodegroup_mask);
4766   sendSignal(cntrlblockref, GSN_DIH_RESTARTREF, signal,
4767              DihRestartRef::SignalLength, JBB);
4768 }
4769 
4770 /* ------------------------------------------------------------------------- */
4771 /*       SELECT THE MASTER CANDIDATE TO BE USED IN SYSTEM RESTARTS.          */
4772 /* ------------------------------------------------------------------------- */
selectMasterCandidateAndSend(Signal * signal)4773 void Dbdih::selectMasterCandidateAndSend(Signal* signal)
4774 {
4775   setNodeGroups();
4776 
4777   NodeRecordPtr nodePtr;
4778   Uint32 node_groups[MAX_NDB_NODES];
4779   memset(node_groups, 0, sizeof(node_groups));
4780   NdbNodeBitmask no_nodegroup_mask;
4781   for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
4782     jam();
4783     if (Sysfile::getNodeStatus(nodePtr.i, SYSFILE->nodeStatus) == Sysfile::NS_NotDefined)
4784     {
4785       jam();
4786       continue;
4787     }
4788     const Uint32 ng = Sysfile::getNodeGroup(nodePtr.i, SYSFILE->nodeGroups);
4789     if(ng != NO_NODE_GROUP_ID)
4790     {
4791       ndbrequire(ng < MAX_NDB_NODES);
4792       node_groups[ng]++;
4793     }
4794     else
4795     {
4796       no_nodegroup_mask.set(nodePtr.i);
4797     }
4798   }
4799 
4800   DihRestartConf * conf = CAST_PTR(DihRestartConf, signal->getDataPtrSend());
4801   conf->unused = getOwnNodeId();
4802   conf->latest_gci = SYSFILE->lastCompletedGCI[getOwnNodeId()];
4803   no_nodegroup_mask.copyto(NdbNodeBitmask::Size, conf->no_nodegroup_mask);
4804   sendSignal(cntrlblockref, GSN_DIH_RESTARTCONF, signal,
4805              DihRestartConf::SignalLength, JBB);
4806 
4807   for (nodePtr.i = 0; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
4808     jam();
4809     Uint32 count = node_groups[nodePtr.i];
4810     if(count != 0 && count != cnoReplicas){
4811       char buf[255];
4812       BaseString::snprintf(buf, sizeof(buf),
4813 			   "Illegal configuration change."
4814 			   " Initial start needs to be performed "
4815 			   " when changing no of replicas (%d != %d)",
4816 			   node_groups[nodePtr.i], cnoReplicas);
4817       progError(__LINE__, NDBD_EXIT_INVALID_CONFIG, buf);
4818     }
4819   }
4820 }//Dbdih::selectMasterCandidate()
4821 
4822 /* ------------------------------------------------------------------------- */
4823 /*       ERROR HANDLING DURING READING RESTORABLE GCI FROM FILE.             */
4824 /* ------------------------------------------------------------------------- */
openingGcpErrorLab(Signal * signal,FileRecordPtr filePtr)4825 void Dbdih::openingGcpErrorLab(Signal* signal, FileRecordPtr filePtr)
4826 {
4827   filePtr.p->fileStatus = FileRecord::CRASHED;
4828   filePtr.p->reqStatus = FileRecord::IDLE;
4829   if (crestartInfoFile[0] == filePtr.i) {
4830     jam();
4831     /* --------------------------------------------------------------------- */
4832     /*   THE FIRST FILE WAS NOT ABLE TO BE OPENED. SET STATUS TO CRASHED AND */
4833     /*   TRY OPEN THE NEXT FILE.                                             */
4834     /* --------------------------------------------------------------------- */
4835     filePtr.i = crestartInfoFile[1];
4836     ptrCheckGuard(filePtr, cfileFileSize, fileRecord);
4837     openFileRo(signal, filePtr);
4838     filePtr.p->reqStatus = FileRecord::OPENING_GCP;
4839   } else {
4840     jam();
4841     /* --------------------------------------------------------------------- */
4842     /*   WE FAILED IN OPENING THE SECOND FILE. BOTH FILES WERE CORRUPTED. WE */
4843     /*   CANNOT CONTINUE THE RESTART IN THIS CASE. TELL NDBCNTR OF OUR       */
4844     /*   FAILURE.                                                            */
4845     /*---------------------------------------------------------------------- */
4846     sendDihRestartRef(signal);
4847     return;
4848   }//if
4849 }//Dbdih::openingGcpErrorLab()
4850 
readingGcpErrorLab(Signal * signal,FileRecordPtr filePtr)4851 void Dbdih::readingGcpErrorLab(Signal* signal, FileRecordPtr filePtr)
4852 {
4853   filePtr.p->fileStatus = FileRecord::CRASHED;
4854   /* ----------------------------------------------------------------------- */
4855   /*     WE FAILED IN READING THE FILE AS WELL. WE WILL CLOSE THIS FILE.     */
4856   /* ----------------------------------------------------------------------- */
4857   closeFile(signal, filePtr);
4858   filePtr.p->reqStatus = FileRecord::CLOSING_GCP_CRASH;
4859 }//Dbdih::readingGcpErrorLab()
4860 
closingGcpCrashLab(Signal * signal,FileRecordPtr filePtr)4861 void Dbdih::closingGcpCrashLab(Signal* signal, FileRecordPtr filePtr)
4862 {
4863   if (crestartInfoFile[0] == filePtr.i) {
4864     jam();
4865     /* --------------------------------------------------------------------- */
4866     /*   ERROR IN FIRST FILE, TRY THE SECOND FILE.                           */
4867     /* --------------------------------------------------------------------- */
4868     filePtr.i = crestartInfoFile[1];
4869     ptrCheckGuard(filePtr, cfileFileSize, fileRecord);
4870     openFileRw(signal, filePtr);
4871     filePtr.p->reqStatus = FileRecord::OPENING_GCP;
4872     return;
4873   }//if
4874   /* ----------------------------------------------------------------------- */
4875   /*     WE DISCOVERED A FAILURE WITH THE SECOND FILE AS WELL. THIS IS A     */
4876   /*     SERIOUS PROBLEM. REPORT FAILURE TO NDBCNTR.                         */
4877   /* ----------------------------------------------------------------------- */
4878   sendDihRestartRef(signal);
4879 }//Dbdih::closingGcpCrashLab()
4880 
4881 /*****************************************************************************/
4882 /* ------------------------------------------------------------------------- */
4883 /*       THIS IS AN INITIAL RESTART. WE WILL CREATE THE TWO FILES DESCRIBING */
4884 /*       THE GLOBAL CHECKPOINTS THAT ARE RESTORABLE.                         */
4885 /* ------------------------------------------------------------------------- */
4886 /*****************************************************************************/
initGciFilesLab(Signal * signal)4887 void Dbdih::initGciFilesLab(Signal* signal)
4888 {
4889   FileRecordPtr filePtr;
4890   filePtr.i = crestartInfoFile[0];
4891   ptrCheckGuard(filePtr, cfileFileSize, fileRecord);
4892   createFileRw(signal, filePtr);
4893   filePtr.p->reqStatus = FileRecord::CREATING_GCP;
4894 }//Dbdih::initGciFilesLab()
4895 
4896 /* ------------------------------------------------------------------------- */
4897 /*       GLOBAL CHECKPOINT FILE HAVE BEEN SUCCESSFULLY CREATED.              */
4898 /* ------------------------------------------------------------------------- */
creatingGcpLab(Signal * signal,FileRecordPtr filePtr)4899 void Dbdih::creatingGcpLab(Signal* signal, FileRecordPtr filePtr)
4900 {
4901   if (filePtr.i == crestartInfoFile[0]) {
4902     jam();
4903     /* --------------------------------------------------------------------- */
4904     /*   IF CREATED FIRST THEN ALSO CREATE THE SECOND FILE.                  */
4905     /* --------------------------------------------------------------------- */
4906     filePtr.i = crestartInfoFile[1];
4907     ptrCheckGuard(filePtr, cfileFileSize, fileRecord);
4908     createFileRw(signal, filePtr);
4909     filePtr.p->reqStatus = FileRecord::CREATING_GCP;
4910   } else {
4911     jam();
4912     /* --------------------------------------------------------------------- */
4913     /*   BOTH FILES HAVE BEEN CREATED. NOW WRITE THE INITIAL DATA TO BOTH    */
4914     /*   OF THE FILES.                                                       */
4915     /* --------------------------------------------------------------------- */
4916     filePtr.i = crestartInfoFile[0];
4917     ptrCheckGuard(filePtr, cfileFileSize, fileRecord);
4918     writeRestorableGci(signal, filePtr);
4919     filePtr.p->reqStatus = FileRecord::WRITE_INIT_GCP;
4920   }//if
4921 }//Dbdih::creatingGcpLab()
4922 
4923 /* ------------------------------------------------------------------------- */
4924 /*       WE HAVE SUCCESSFULLY WRITTEN A GCI FILE.                            */
4925 /* ------------------------------------------------------------------------- */
writeInitGcpLab(Signal * signal,FileRecordPtr filePtr)4926 void Dbdih::writeInitGcpLab(Signal* signal, FileRecordPtr filePtr)
4927 {
4928   filePtr.p->reqStatus = FileRecord::IDLE;
4929   if (filePtr.i == crestartInfoFile[0]) {
4930     jam();
4931     /* --------------------------------------------------------------------- */
4932     /*   WE HAVE WRITTEN THE FIRST FILE NOW ALSO WRITE THE SECOND FILE.      */
4933     /* --------------------------------------------------------------------- */
4934     filePtr.i = crestartInfoFile[1];
4935     ptrCheckGuard(filePtr, cfileFileSize, fileRecord);
4936     writeRestorableGci(signal, filePtr);
4937     filePtr.p->reqStatus = FileRecord::WRITE_INIT_GCP;
4938   } else {
4939     /* --------------------------------------------------------------------- */
4940     /*   WE HAVE WRITTEN BOTH FILES. LEAVE BOTH FILES OPEN AND CONFIRM OUR   */
4941     /*   PART OF THE INITIAL START.                                          */
4942     /* --------------------------------------------------------------------- */
4943     if (isMaster()) {
4944       jam();
4945       /*---------------------------------------------------------------------*/
4946       // IN MASTER NODES THE START REQUEST IS RECEIVED FROM NDBCNTR AND WE MUST
4947       // RESPOND WHEN COMPLETED.
4948       /*---------------------------------------------------------------------*/
4949       signal->theData[0] = reference();
4950       sendSignal(cndbStartReqBlockref, GSN_NDB_STARTCONF, signal, 1, JBB);
4951     } else {
4952       jam();
4953       ndbsttorry10Lab(signal, __LINE__);
4954       return;
4955     }//if
4956   }//if
4957 }//Dbdih::writeInitGcpLab()
4958 
4959 /*****************************************************************************/
4960 /* **********     NODES DELETION MODULE                          *************/
4961 /*****************************************************************************/
4962 /*---------------------------------------------------------------------------*/
4963 /*                    LOGIC FOR NODE FAILURE                                 */
4964 /*---------------------------------------------------------------------------*/
execNODE_FAILREP(Signal * signal)4965 void Dbdih::execNODE_FAILREP(Signal* signal)
4966 {
4967   Uint32 i;
4968   Uint32 failedNodes[MAX_NDB_NODES];
4969   jamEntry();
4970   NodeFailRep * const nodeFail = (NodeFailRep *)&signal->theData[0];
4971 
4972   cfailurenr = nodeFail->failNo;
4973   Uint32 newMasterId = nodeFail->masterNodeId;
4974   const Uint32 noOfFailedNodes = nodeFail->noOfNodes;
4975 
4976   if (ERROR_INSERTED(7179) || ERROR_INSERTED(7217))
4977   {
4978     CLEAR_ERROR_INSERT_VALUE;
4979   }
4980 
4981   if (ERROR_INSERTED(7184))
4982   {
4983     SET_ERROR_INSERT_VALUE(7000);
4984   }
4985 
4986 
4987 
4988   /*-------------------------------------------------------------------------*/
4989   // The first step is to convert from a bit mask to an array of failed nodes.
4990   /*-------------------------------------------------------------------------*/
4991   Uint32 index = 0;
4992   for (i = 1; i < MAX_NDB_NODES; i++) {
4993     jam();
4994     if(NdbNodeBitmask::get(nodeFail->theNodes, i)){
4995       jam();
4996       failedNodes[index] = i;
4997       index++;
4998     }//if
4999   }//for
5000   ndbrequire(noOfFailedNodes == index);
5001   ndbrequire(noOfFailedNodes - 1 < MAX_NDB_NODES);
5002 
5003   /*-------------------------------------------------------------------------*/
5004   // The second step is to update the node status of the failed nodes, remove
5005   // them from the alive node list and put them into the dead node list. Also
5006   // update the number of nodes on-line.
5007   // We also set certain state variables ensuring that the node no longer is
5008   // used in transactions and also mark that we received this signal.
5009   /*-------------------------------------------------------------------------*/
5010   for (i = 0; i < noOfFailedNodes; i++) {
5011     jam();
5012     NodeRecordPtr TNodePtr;
5013     TNodePtr.i = failedNodes[i];
5014     ptrCheckGuard(TNodePtr, MAX_NDB_NODES, nodeRecord);
5015     TNodePtr.p->useInTransactions = false;
5016     TNodePtr.p->m_inclDihLcp = false;
5017     TNodePtr.p->recNODE_FAILREP = ZTRUE;
5018     if (TNodePtr.p->nodeStatus == NodeRecord::ALIVE) {
5019       jam();
5020       con_lineNodes--;
5021       TNodePtr.p->nodeStatus = NodeRecord::DIED_NOW;
5022       removeAlive(TNodePtr);
5023       insertDeadNode(TNodePtr);
5024     }//if
5025   }//for
5026 
5027   /*-------------------------------------------------------------------------*/
5028   // Verify that we can continue to operate the cluster. If we cannot we will
5029   // not return from checkEscalation.
5030   /*-------------------------------------------------------------------------*/
5031   checkEscalation();
5032 
5033   /*------------------------------------------------------------------------*/
5034   // Verify that a starting node has also crashed. Reset the node start record.
5035   /*-------------------------------------------------------------------------*/
5036 #if 0
5037   /**
5038    * Node will crash by itself...
5039    *   nodeRestart is run then...
5040    */
5041   if (false && c_nodeStartMaster.startNode != RNIL && getNodeStatus(c_nodeStartMaster.startNode) == NodeRecord::ALIVE)
5042   {
5043     BlockReference cntrRef = calcNdbCntrBlockRef(c_nodeStartMaster.startNode);
5044     SystemError * const sysErr = (SystemError*)&signal->theData[0];
5045     sysErr->errorCode = SystemError::StartInProgressError;
5046     sysErr->errorRef = reference();
5047     sysErr->data[0]= 0;
5048     sysErr->data[1]= __LINE__;
5049     sendSignal(cntrRef, GSN_SYSTEM_ERROR, signal,  SystemError::SignalLength, JBA);
5050     nodeResetStart(signal);
5051   }//if
5052 #endif
5053 
5054   /*--------------------------------------------------*/
5055   /*                                                  */
5056   /*       WE CHANGE THE REFERENCE TO MASTER DIH      */
5057   /*       BLOCK AND POINTER AT THIS PLACE IN THE CODE*/
5058   /*--------------------------------------------------*/
5059   Uint32 oldMasterId = cmasterNodeId;
5060   BlockReference oldMasterRef = cmasterdihref;
5061   cmasterdihref = calcDihBlockRef(newMasterId);
5062   cmasterNodeId = newMasterId;
5063 
5064   const bool masterTakeOver = (oldMasterId != newMasterId);
5065 
5066   for(i = 0; i < noOfFailedNodes; i++) {
5067     NodeRecordPtr failedNodePtr;
5068     failedNodePtr.i = failedNodes[i];
5069     ptrCheckGuard(failedNodePtr, MAX_NDB_NODES, nodeRecord);
5070     if (oldMasterRef == reference()) {
5071       /*-------------------------------------------------------*/
5072       // Functions that need to be called only for master nodes.
5073       /*-------------------------------------------------------*/
5074       checkCopyTab(signal, failedNodePtr);
5075       checkStopPermMaster(signal, failedNodePtr);
5076       checkWaitGCPMaster(signal, failedNodes[i]);
5077 
5078       {
5079         Ptr<TakeOverRecord> takeOverPtr;
5080         if (findTakeOver(takeOverPtr, failedNodePtr.i))
5081         {
5082           handleTakeOver(signal, takeOverPtr);
5083         }
5084       }
5085       checkGcpOutstanding(signal, failedNodePtr.i);
5086     } else {
5087       jam();
5088       /*-----------------------------------------------------------*/
5089       // Functions that need to be called only for nodes that were
5090       // not master before these failures.
5091       /*-----------------------------------------------------------*/
5092       checkStopPermProxy(signal, failedNodes[i]);
5093       checkWaitGCPProxy(signal, failedNodes[i]);
5094     }//if
5095     /*--------------------------------------------------*/
5096     // Functions that need to be called for all nodes.
5097     /*--------------------------------------------------*/
5098     checkStopMe(signal, failedNodePtr);
5099     failedNodeLcpHandling(signal, failedNodePtr);
5100     startRemoveFailedNode(signal, failedNodePtr);
5101 
5102     /**
5103      * This is the last function called
5104      *   It modifies failedNodePtr.p->nodeStatus
5105      */
5106     failedNodeSynchHandling(signal, failedNodePtr);
5107   }//for
5108 
5109   if(masterTakeOver){
5110     jam();
5111     startLcpMasterTakeOver(signal, oldMasterId);
5112     startGcpMasterTakeOver(signal, oldMasterId);
5113 
5114     if(getNodeState().getNodeRestartInProgress()){
5115       jam();
5116       progError(__LINE__, NDBD_EXIT_MASTER_FAILURE_DURING_NR);
5117     }
5118   }
5119 
5120 
5121   if (isMaster()) {
5122     jam();
5123     setNodeRestartInfoBits(signal);
5124   }//if
5125 }//Dbdih::execNODE_FAILREP()
5126 
checkCopyTab(Signal * signal,NodeRecordPtr failedNodePtr)5127 void Dbdih::checkCopyTab(Signal* signal, NodeRecordPtr failedNodePtr)
5128 {
5129   jam();
5130 
5131   if(c_nodeStartMaster.startNode != failedNodePtr.i){
5132     jam();
5133     return;
5134   }
5135 
5136   switch(c_nodeStartMaster.m_outstandingGsn){
5137   case GSN_COPY_TABREQ:
5138     jam();
5139     ndbrequire(c_COPY_TABREQ_Counter.isWaitingFor(failedNodePtr.i));
5140     releaseTabPages(failedNodePtr.p->activeTabptr);
5141     c_COPY_TABREQ_Counter.clearWaitingFor(failedNodePtr.i);
5142     c_nodeStartMaster.wait = ZFALSE;
5143     break;
5144   case GSN_START_INFOREQ:
5145   case GSN_START_PERMCONF:
5146   case GSN_DICTSTARTREQ:
5147   case GSN_START_MECONF:
5148   case GSN_COPY_GCIREQ:
5149     jam();
5150     break;
5151   default:
5152     g_eventLogger->error("outstanding gsn: %s(%d)",
5153                          getSignalName(c_nodeStartMaster.m_outstandingGsn),
5154                          c_nodeStartMaster.m_outstandingGsn);
5155     ndbrequire(false);
5156   }
5157 
5158   if (!c_nodeStartMaster.m_fragmentInfoMutex.isNull())
5159   {
5160     jam();
5161     Mutex mutex(signal, c_mutexMgr, c_nodeStartMaster.m_fragmentInfoMutex);
5162     mutex.unlock();
5163   }
5164 
5165   nodeResetStart(signal);
5166 }//Dbdih::checkCopyTab()
5167 
checkStopMe(Signal * signal,NodeRecordPtr failedNodePtr)5168 void Dbdih::checkStopMe(Signal* signal, NodeRecordPtr failedNodePtr)
5169 {
5170   jam();
5171   if (c_STOP_ME_REQ_Counter.isWaitingFor(failedNodePtr.i)){
5172     jam();
5173     ndbrequire(c_stopMe.clientRef != 0);
5174     StopMeConf * const stopMeConf = (StopMeConf *)&signal->theData[0];
5175     stopMeConf->senderRef = calcDihBlockRef(failedNodePtr.i);
5176     stopMeConf->senderData = c_stopMe.clientData;
5177     sendSignal(reference(), GSN_STOP_ME_CONF, signal,
5178 	       StopMeConf::SignalLength, JBB);
5179   }//if
5180 }//Dbdih::checkStopMe()
5181 
checkStopPermMaster(Signal * signal,NodeRecordPtr failedNodePtr)5182 void Dbdih::checkStopPermMaster(Signal* signal, NodeRecordPtr failedNodePtr)
5183 {
5184   DihSwitchReplicaRef* const ref = (DihSwitchReplicaRef*)&signal->theData[0];
5185   jam();
5186   if (c_DIH_SWITCH_REPLICA_REQ_Counter.isWaitingFor(failedNodePtr.i)){
5187     jam();
5188     ndbrequire(c_stopPermMaster.clientRef != 0);
5189     ref->senderNode = failedNodePtr.i;
5190     ref->errorCode = StopPermRef::NF_CausedAbortOfStopProcedure;
5191     sendSignal(reference(), GSN_DIH_SWITCH_REPLICA_REF, signal,
5192                DihSwitchReplicaRef::SignalLength, JBB);
5193     return;
5194   }//if
5195 }//Dbdih::checkStopPermMaster()
5196 
checkStopPermProxy(Signal * signal,NodeId failedNodeId)5197 void Dbdih::checkStopPermProxy(Signal* signal, NodeId failedNodeId)
5198 {
5199   jam();
5200   if(c_stopPermProxy.clientRef != 0 &&
5201      refToNode(c_stopPermProxy.masterRef) == failedNodeId){
5202 
5203     /**
5204      * The master has failed report to proxy-client
5205      */
5206     jam();
5207     StopPermRef* const ref = (StopPermRef*)&signal->theData[0];
5208 
5209     ref->senderData = c_stopPermProxy.clientData;
5210     ref->errorCode  = StopPermRef::NF_CausedAbortOfStopProcedure;
5211     sendSignal(c_stopPermProxy.clientRef, GSN_STOP_PERM_REF, signal, 2, JBB);
5212     c_stopPermProxy.clientRef = 0;
5213   }//if
5214 }//Dbdih::checkStopPermProxy()
5215 
5216 void
handleTakeOver(Signal * signal,TakeOverRecordPtr takeOverPtr)5217 Dbdih::handleTakeOver(Signal* signal, TakeOverRecordPtr takeOverPtr)
5218 {
5219   jam();
5220   switch(takeOverPtr.p->toMasterStatus){
5221   case TakeOverRecord::TO_MASTER_IDLE:
5222     jam();
5223     releaseTakeOver(takeOverPtr);
5224     return;
5225   case TakeOverRecord::TO_MUTEX_BEFORE_STORED:
5226     jam();
5227     /**
5228      * Waiting for lock...
5229      *   do nothing...will be detected when lock is acquired
5230      */
5231     return;
5232   case TakeOverRecord::TO_MUTEX_BEFORE_LOCKED:
5233     jam();
5234     /**
5235      * Has lock...and NGPtr reservation...
5236      */
5237     abortTakeOver(signal, takeOverPtr);
5238     return;
5239   case TakeOverRecord::TO_AFTER_STORED:{
5240     jam();
5241     /**
5242      * No lock...but NGPtr reservation...remove NGPtr reservation
5243      */
5244     NodeRecordPtr nodePtr;
5245     NodeGroupRecordPtr NGPtr;
5246     nodePtr.i = takeOverPtr.p->toCopyNode;
5247     ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
5248     NGPtr.i = nodePtr.p->nodeGroup;
5249     ptrCheckGuard(NGPtr, MAX_NDB_NODES, nodeGroupRecord);
5250 
5251     ndbassert(NGPtr.p->activeTakeOver == takeOverPtr.p->toStartingNode);
5252     if (NGPtr.p->activeTakeOver == takeOverPtr.p->toStartingNode)
5253     {
5254       jam();
5255       NGPtr.p->activeTakeOver = 0;
5256     }
5257     releaseTakeOver(takeOverPtr);
5258     return;
5259   }
5260   case TakeOverRecord::TO_MUTEX_BEFORE_COMMIT:
5261     jam();
5262     /**
5263      * Waiting for lock...
5264      *   do nothing...will be detected when lock is acquired
5265      */
5266     return;
5267   case TakeOverRecord::TO_MUTEX_BEFORE_SWITCH_REPLICA:
5268     jam();
5269     /**
5270      * Waiting for lock...
5271      *   do nothing...will be detected when lock is acquired
5272      */
5273     return;
5274   case TakeOverRecord::TO_MUTEX_AFTER_SWITCH_REPLICA:
5275     jam();
5276     abortTakeOver(signal, takeOverPtr);
5277     return;
5278   case TakeOverRecord::TO_WAIT_LCP:{
5279     jam();
5280     /**
5281      * Waiting for LCP
5282      */
5283     NodeRecordPtr nodePtr;
5284     nodePtr.i = takeOverPtr.p->toStartingNode;
5285     ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
5286     nodePtr.p->copyCompleted = 0;
5287     releaseTakeOver(takeOverPtr);
5288     return;
5289   }
5290   default:
5291     jamLine(takeOverPtr.p->toMasterStatus);
5292     ndbrequire(false);
5293   }
5294 }
5295 
failedNodeSynchHandling(Signal * signal,NodeRecordPtr failedNodePtr)5296 void Dbdih::failedNodeSynchHandling(Signal* signal,
5297 				    NodeRecordPtr failedNodePtr)
5298 {
5299   jam();
5300   /*----------------------------------------------------*/
5301   /*       INITIALISE THE VARIABLES THAT KEEP TRACK OF  */
5302   /*       WHEN A NODE FAILURE IS COMPLETED.            */
5303   /*----------------------------------------------------*/
5304   failedNodePtr.p->dbdictFailCompleted = ZFALSE;
5305   failedNodePtr.p->dbtcFailCompleted = ZFALSE;
5306   failedNodePtr.p->dbdihFailCompleted = ZFALSE;
5307   failedNodePtr.p->dblqhFailCompleted = ZFALSE;
5308 
5309   failedNodePtr.p->m_NF_COMPLETE_REP.clearWaitingFor();
5310 
5311   NodeRecordPtr nodePtr;
5312   for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
5313     ptrAss(nodePtr, nodeRecord);
5314     if (nodePtr.p->nodeStatus == NodeRecord::ALIVE) {
5315       jam();
5316       /**
5317        * We'r waiting for nodePtr.i to complete
5318        * handling of failedNodePtr.i's death
5319        */
5320 
5321       failedNodePtr.p->m_NF_COMPLETE_REP.setWaitingFor(nodePtr.i);
5322     } else {
5323       jam();
5324       if ((nodePtr.p->nodeStatus == NodeRecord::DYING) &&
5325           (nodePtr.p->m_NF_COMPLETE_REP.isWaitingFor(failedNodePtr.i))){
5326         jam();
5327 	/*----------------------------------------------------*/
5328 	/*       THE NODE FAILED BEFORE REPORTING THE FAILURE */
5329 	/*       HANDLING COMPLETED ON THIS FAILED NODE.      */
5330 	/*       REPORT THAT NODE FAILURE HANDLING WAS        */
5331 	/*       COMPLETED ON THE NEW FAILED NODE FOR THIS    */
5332 	/*       PARTICULAR OLD FAILED NODE.                  */
5333 	/*----------------------------------------------------*/
5334         NFCompleteRep * const nf = (NFCompleteRep *)&signal->theData[0];
5335         nf->blockNo = 0;
5336         nf->nodeId  = failedNodePtr.i;
5337         nf->failedNodeId = nodePtr.i;
5338 	nf->from    = __LINE__;
5339         sendSignal(reference(), GSN_NF_COMPLETEREP, signal,
5340                    NFCompleteRep::SignalLength, JBB);
5341       }//if
5342     }//if
5343   }//for
5344   if (failedNodePtr.p->nodeStatus == NodeRecord::DIED_NOW) {
5345     jam();
5346     failedNodePtr.p->nodeStatus = NodeRecord::DYING;
5347   } else {
5348     jam();
5349     /*----------------------------------------------------*/
5350     // No more processing needed when node not even started
5351     // yet. We give the node status to DEAD since we do not
5352     // care whether all nodes complete the node failure
5353     // handling. The node have not been included in the
5354     // node failure protocols.
5355     /*----------------------------------------------------*/
5356     failedNodePtr.p->nodeStatus = NodeRecord::DEAD;
5357     /**-----------------------------------------------------------------------
5358      * WE HAVE COMPLETED HANDLING THE NODE FAILURE IN DIH. WE CAN REPORT THIS
5359      * TO DIH THAT WAIT FOR THE OTHER BLOCKS TO BE CONCLUDED AS WELL.
5360      *-----------------------------------------------------------------------*/
5361     NFCompleteRep * const nf = (NFCompleteRep *)&signal->theData[0];
5362     nf->blockNo      = DBDIH;
5363     nf->nodeId       = cownNodeId;
5364     nf->failedNodeId = failedNodePtr.i;
5365     nf->from         = __LINE__;
5366     sendSignal(reference(), GSN_NF_COMPLETEREP, signal,
5367                NFCompleteRep::SignalLength, JBB);
5368   }//if
5369 }//Dbdih::failedNodeSynchHandling()
5370 
5371 bool
findTakeOver(Ptr<TakeOverRecord> & ptr,Uint32 failedNodeId)5372 Dbdih::findTakeOver(Ptr<TakeOverRecord> & ptr, Uint32 failedNodeId)
5373 {
5374   for (c_activeTakeOverList.first(ptr); !ptr.isNull();
5375        c_activeTakeOverList.next(ptr))
5376   {
5377     jam();
5378     if (ptr.p->toStartingNode == failedNodeId)
5379     {
5380       jam();
5381       return true;
5382     }
5383   }
5384   ptr.setNull();
5385   return false;
5386 }//Dbdih::findTakeOver()
5387 
failedNodeLcpHandling(Signal * signal,NodeRecordPtr failedNodePtr)5388 void Dbdih::failedNodeLcpHandling(Signal* signal, NodeRecordPtr failedNodePtr)
5389 {
5390   jam();
5391   const Uint32 nodeId = failedNodePtr.i;
5392 
5393   if (isMaster() && c_lcpState.m_participatingLQH.get(failedNodePtr.i))
5394   {
5395     /*----------------------------------------------------*/
5396     /*  THE NODE WAS INVOLVED IN A LOCAL CHECKPOINT. WE   */
5397     /* MUST UPDATE THE ACTIVE STATUS TO INDICATE THAT     */
5398     /* THE NODE HAVE MISSED A LOCAL CHECKPOINT.           */
5399     /*----------------------------------------------------*/
5400 
5401     /**
5402      * Bug#28717, Only master should do this, as this status is copied
5403      *   to other nodes
5404      */
5405     switch (failedNodePtr.p->activeStatus) {
5406     case Sysfile::NS_Active:
5407       jam();
5408       failedNodePtr.p->activeStatus = Sysfile::NS_ActiveMissed_1;
5409       break;
5410     case Sysfile::NS_ActiveMissed_1:
5411       jam();
5412       failedNodePtr.p->activeStatus = Sysfile::NS_ActiveMissed_1;
5413       break;
5414     case Sysfile::NS_ActiveMissed_2:
5415       jam();
5416       failedNodePtr.p->activeStatus = Sysfile::NS_NotActive_NotTakenOver;
5417       break;
5418     case Sysfile::NS_TakeOver:
5419       jam();
5420       failedNodePtr.p->activeStatus = Sysfile::NS_NotActive_NotTakenOver;
5421       break;
5422     case Sysfile::NS_Configured:
5423       jam();
5424       break;
5425     default:
5426       g_eventLogger->error("activeStatus = %u "
5427                            "at failure after NODE_FAILREP of node = %u",
5428                            (Uint32) failedNodePtr.p->activeStatus,
5429                            failedNodePtr.i);
5430       ndbrequire(false);
5431       break;
5432     }//switch
5433   }//if
5434 
5435   c_lcpState.m_participatingDIH.clear(failedNodePtr.i);
5436   c_lcpState.m_participatingLQH.clear(failedNodePtr.i);
5437 
5438   bool wf = c_MASTER_LCPREQ_Counter.isWaitingFor(failedNodePtr.i);
5439 
5440   if(c_lcpState.m_LCP_COMPLETE_REP_Counter_DIH.isWaitingFor(failedNodePtr.i))
5441   {
5442     jam();
5443     LcpCompleteRep * rep = (LcpCompleteRep*)signal->getDataPtrSend();
5444     rep->nodeId = failedNodePtr.i;
5445     rep->lcpId = SYSFILE->latestLCP_ID;
5446     rep->blockNo = DBDIH;
5447     sendSignal(reference(), GSN_LCP_COMPLETE_REP, signal,
5448                LcpCompleteRep::SignalLength, JBB);
5449   }
5450 
5451   bool lcp_complete_rep = false;
5452   if (!wf)
5453   {
5454     jam();
5455 
5456     /**
5457      * Check if we'r waiting for the failed node's LQH to complete
5458      *
5459      * Note that this is ran "before" LCP master take over
5460      */
5461     if(c_lcpState.m_LCP_COMPLETE_REP_Counter_LQH.isWaitingFor(nodeId)){
5462       jam();
5463 
5464       lcp_complete_rep = true;
5465       LcpCompleteRep * rep = (LcpCompleteRep*)signal->getDataPtrSend();
5466       rep->nodeId  = nodeId;
5467       rep->lcpId   = SYSFILE->latestLCP_ID;
5468       rep->blockNo = DBLQH;
5469       sendSignal(reference(), GSN_LCP_COMPLETE_REP, signal,
5470                  LcpCompleteRep::SignalLength, JBB);
5471 
5472       if(c_lcpState.m_LAST_LCP_FRAG_ORD.isWaitingFor(nodeId)){
5473         jam();
5474         /**
5475          * Make sure we're ready to accept it
5476          */
5477         c_lcpState.m_LAST_LCP_FRAG_ORD.clearWaitingFor(nodeId);
5478       }
5479     }
5480   }
5481 
5482   if (c_TCGETOPSIZEREQ_Counter.isWaitingFor(failedNodePtr.i)) {
5483     jam();
5484     signal->theData[0] = failedNodePtr.i;
5485     signal->theData[1] = 0;
5486     sendSignal(reference(), GSN_TCGETOPSIZECONF, signal, 2, JBB);
5487   }//if
5488 
5489   if (c_TC_CLOPSIZEREQ_Counter.isWaitingFor(failedNodePtr.i)) {
5490     jam();
5491     signal->theData[0] = failedNodePtr.i;
5492     sendSignal(reference(), GSN_TC_CLOPSIZECONF, signal, 1, JBB);
5493   }//if
5494 
5495   if (c_START_LCP_REQ_Counter.isWaitingFor(failedNodePtr.i)) {
5496     jam();
5497     StartLcpConf * conf = (StartLcpConf*)signal->getDataPtrSend();
5498     conf->senderRef = numberToRef(DBLQH, failedNodePtr.i);
5499     conf->lcpId = SYSFILE->latestLCP_ID;
5500     sendSignal(reference(), GSN_START_LCP_CONF, signal,
5501 	       StartLcpConf::SignalLength, JBB);
5502   }//if
5503 
5504 dosend:
5505   if (c_EMPTY_LCP_REQ_Counter.isWaitingFor(failedNodePtr.i))
5506   {
5507     jam();
5508     EmptyLcpConf * const rep = (EmptyLcpConf *)&signal->theData[0];
5509     rep->senderNodeId = failedNodePtr.i;
5510     rep->tableId = ~0;
5511     rep->fragmentId = ~0;
5512     rep->lcpNo = 0;
5513     rep->lcpId = SYSFILE->latestLCP_ID;
5514     rep->idle = true;
5515     sendSignal(reference(), GSN_EMPTY_LCP_CONF, signal,
5516 	       EmptyLcpConf::SignalLength, JBB);
5517   }
5518   else if (!c_EMPTY_LCP_REQ_Counter.done() && lcp_complete_rep)
5519   {
5520     jam();
5521     c_EMPTY_LCP_REQ_Counter.setWaitingFor(failedNodePtr.i);
5522     goto dosend;
5523   }
5524 
5525   if (c_MASTER_LCPREQ_Counter.isWaitingFor(failedNodePtr.i)) {
5526     jam();
5527     MasterLCPRef * const ref = (MasterLCPRef *)&signal->theData[0];
5528     ref->senderNodeId = failedNodePtr.i;
5529     ref->failedNodeId = cmasterTakeOverNode;
5530     sendSignal(reference(), GSN_MASTER_LCPREF, signal,
5531 	       MasterLCPRef::SignalLength, JBB);
5532   }//if
5533 
5534 }//Dbdih::failedNodeLcpHandling()
5535 
checkGcpOutstanding(Signal * signal,Uint32 failedNodeId)5536 void Dbdih::checkGcpOutstanding(Signal* signal, Uint32 failedNodeId){
5537   if (c_GCP_PREPARE_Counter.isWaitingFor(failedNodeId)){
5538     jam();
5539     GCPPrepareConf* conf = (GCPPrepareConf*)signal->getDataPtrSend();
5540     conf->nodeId = failedNodeId;
5541     conf->gci_hi = Uint32(m_micro_gcp.m_master.m_new_gci >> 32);
5542     conf->gci_lo = Uint32(m_micro_gcp.m_master.m_new_gci);
5543     sendSignal(reference(), GSN_GCP_PREPARECONF, signal,
5544                GCPPrepareConf::SignalLength, JBB);
5545   }//if
5546 
5547   if (c_GCP_COMMIT_Counter.isWaitingFor(failedNodeId))
5548   {
5549     jam();
5550 
5551     /**
5552      * Waiting for GSN_GCP_NODEFINISH
5553      *   TC-take-over can generate new transactions
5554      *   that will be in this epoch
5555      *   re-run GCP_NOMORETRANS to master-TC (self) that will run
5556      *   take-over
5557      */
5558     c_GCP_COMMIT_Counter.clearWaitingFor(failedNodeId);
5559     if (!c_GCP_COMMIT_Counter.isWaitingFor(getOwnNodeId()))
5560     {
5561       jam();
5562       c_GCP_COMMIT_Counter.setWaitingFor(getOwnNodeId());
5563       m_micro_gcp.m_state = MicroGcp::M_GCP_COMMIT;
5564     }
5565 
5566     GCPNoMoreTrans* req = (GCPNoMoreTrans*)signal->getDataPtrSend();
5567     req->senderRef = reference();
5568     req->senderData = m_micro_gcp.m_master_ref;
5569     req->gci_hi = Uint32(m_micro_gcp.m_old_gci >> 32);
5570     req->gci_lo = Uint32(m_micro_gcp.m_old_gci);
5571     sendSignal(clocaltcblockref, GSN_GCP_NOMORETRANS, signal,
5572                GCPNoMoreTrans::SignalLength, JBB);
5573   }
5574 
5575   if (c_GCP_SAVEREQ_Counter.isWaitingFor(failedNodeId)) {
5576     jam();
5577     GCPSaveRef * const saveRef = (GCPSaveRef*)&signal->theData[0];
5578     saveRef->dihPtr = failedNodeId;
5579     saveRef->nodeId = failedNodeId;
5580     saveRef->gci    = m_gcp_save.m_master.m_new_gci;
5581     saveRef->errorCode = GCPSaveRef::FakedSignalDueToNodeFailure;
5582     sendSignal(reference(), GSN_GCP_SAVEREF, signal,
5583 	       GCPSaveRef::SignalLength, JBB);
5584   }//if
5585 
5586   if (c_COPY_GCIREQ_Counter.isWaitingFor(failedNodeId)) {
5587     jam();
5588     signal->theData[0] = failedNodeId;
5589     sendSignal(reference(), GSN_COPY_GCICONF, signal, 1, JBB);
5590   }//if
5591 
5592   if (c_MASTER_GCPREQ_Counter.isWaitingFor(failedNodeId)){
5593     jam();
5594     MasterGCPRef * const ref = (MasterGCPRef *)&signal->theData[0];
5595     ref->senderNodeId = failedNodeId;
5596     ref->failedNodeId = cmasterTakeOverNode;
5597     sendSignal(reference(), GSN_MASTER_GCPREF, signal,
5598 	       MasterGCPRef::SignalLength, JBB);
5599   }//if
5600 
5601   if (c_SUB_GCP_COMPLETE_REP_Counter.isWaitingFor(failedNodeId))
5602   {
5603     jam();
5604     SubGcpCompleteAck* ack = CAST_PTR(SubGcpCompleteAck,
5605                                       signal->getDataPtrSend());
5606     ack->rep.senderRef = numberToRef(DBDIH, failedNodeId);
5607     sendSignal(reference(), GSN_SUB_GCP_COMPLETE_ACK, signal,
5608 	       SubGcpCompleteAck::SignalLength, JBB);
5609   }
5610 }//Dbdih::handleGcpStateInMaster()
5611 
5612 
5613 void
startLcpMasterTakeOver(Signal * signal,Uint32 nodeId)5614 Dbdih::startLcpMasterTakeOver(Signal* signal, Uint32 nodeId){
5615   jam();
5616 
5617   Uint32 oldNode = c_lcpMasterTakeOverState.failedNodeId;
5618 
5619   c_lcpMasterTakeOverState.minTableId = ~0;
5620   c_lcpMasterTakeOverState.minFragId = ~0;
5621   c_lcpMasterTakeOverState.failedNodeId = nodeId;
5622 
5623   c_lcpMasterTakeOverState.set(LMTOS_WAIT_EMPTY_LCP, __LINE__);
5624 
5625   EmptyLcpReq* req = (EmptyLcpReq*)signal->getDataPtrSend();
5626   req->senderRef = reference();
5627   {
5628     NodeRecordPtr specNodePtr;
5629     specNodePtr.i = cfirstAliveNode;
5630     do {
5631       jam();
5632       ptrCheckGuard(specNodePtr, MAX_NDB_NODES, nodeRecord);
5633       if (!c_EMPTY_LCP_REQ_Counter.isWaitingFor(specNodePtr.i))
5634       {
5635         jam();
5636         c_EMPTY_LCP_REQ_Counter.setWaitingFor(specNodePtr.i);
5637         if (!(ERROR_INSERTED(7209) && specNodePtr.i == getOwnNodeId()))
5638         {
5639           sendEMPTY_LCP_REQ(signal, specNodePtr.i, 0);
5640         }
5641         else
5642         {
5643           ndbout_c("NOT sending EMPTY_LCP_REQ to %u", specNodePtr.i);
5644         }
5645 
5646         if (c_lcpState.m_LAST_LCP_FRAG_ORD.isWaitingFor(specNodePtr.i))
5647         {
5648           jam();
5649           c_lcpState.m_LAST_LCP_FRAG_ORD.clearWaitingFor();
5650         }
5651       }
5652       specNodePtr.i = specNodePtr.p->nextNode;
5653     } while (specNodePtr.i != RNIL);
5654   }
5655 
5656   NodeRecordPtr nodePtr;
5657   nodePtr.i = oldNode;
5658   if (oldNode > 0 && oldNode < MAX_NDB_NODES)
5659   {
5660     jam();
5661     ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
5662     if (nodePtr.p->m_nodefailSteps.get(NF_LCP_TAKE_OVER))
5663     {
5664       jam();
5665       checkLocalNodefailComplete(signal, oldNode, NF_LCP_TAKE_OVER);
5666     }
5667   }
5668 
5669   setLocalNodefailHandling(signal, nodeId, NF_LCP_TAKE_OVER);
5670 }
5671 
startGcpMasterTakeOver(Signal * signal,Uint32 oldMasterId)5672 void Dbdih::startGcpMasterTakeOver(Signal* signal, Uint32 oldMasterId){
5673   jam();
5674   /*--------------------------------------------------*/
5675   /*                                                  */
5676   /*       THE MASTER HAVE FAILED AND WE WERE ELECTED */
5677   /*       TO BE THE NEW MASTER NODE. WE NEED TO QUERY*/
5678   /*       ALL THE OTHER NODES ABOUT THEIR STATUS IN  */
5679   /*       ORDER TO BE ABLE TO TAKE OVER CONTROL OF   */
5680   /*       THE GLOBAL CHECKPOINT PROTOCOL AND THE     */
5681   /*       LOCAL CHECKPOINT PROTOCOL.                 */
5682   /*--------------------------------------------------*/
5683   if(!isMaster()){
5684     jam();
5685     return;
5686   }
5687   cmasterState = MASTER_TAKE_OVER_GCP;
5688   cmasterTakeOverNode = oldMasterId;
5689   MasterGCPReq * const req = (MasterGCPReq *)&signal->theData[0];
5690   req->masterRef = reference();
5691   req->failedNodeId = oldMasterId;
5692   sendLoopMacro(MASTER_GCPREQ, sendMASTER_GCPREQ, RNIL);
5693 
5694   signal->theData[0] = NDB_LE_GCP_TakeoverStarted;
5695   sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 1, JBB);
5696 
5697   /**
5698    * save own value...
5699    *   to be able to check values returned in MASTER_GCPCONF
5700    */
5701   m_gcp_save.m_master.m_new_gci = m_gcp_save.m_gci;
5702 
5703   setLocalNodefailHandling(signal, oldMasterId, NF_GCP_TAKE_OVER);
5704 }//Dbdih::handleNewMaster()
5705 
startRemoveFailedNode(Signal * signal,NodeRecordPtr failedNodePtr)5706 void Dbdih::startRemoveFailedNode(Signal* signal, NodeRecordPtr failedNodePtr)
5707 {
5708   Uint32 nodeId = failedNodePtr.i;
5709   if(failedNodePtr.p->nodeStatus != NodeRecord::DIED_NOW){
5710     jam();
5711     /**
5712      * Is node isn't alive. It can't be part of LCP
5713      */
5714     ndbrequire(!c_lcpState.m_LCP_COMPLETE_REP_Counter_LQH.isWaitingFor(nodeId));
5715 
5716     /**
5717      * And there is no point in removing any replicas
5718      *   It's dead...
5719      */
5720     return;
5721   }
5722 
5723   /**
5724    * If node has node complete LCP
5725    *   we need to remove it as undo might not be complete
5726    *   bug#31257
5727    */
5728   failedNodePtr.p->m_remove_node_from_table_lcp_id = RNIL;
5729   if (c_lcpState.m_LCP_COMPLETE_REP_Counter_LQH.isWaitingFor(failedNodePtr.i))
5730   {
5731     jam();
5732     failedNodePtr.p->m_remove_node_from_table_lcp_id = SYSFILE->latestLCP_ID;
5733   }
5734 
5735   jam();
5736 
5737   if (!ERROR_INSERTED(7194) && !ERROR_INSERTED(7221))
5738   {
5739     signal->theData[0] = DihContinueB::ZREMOVE_NODE_FROM_TABLE;
5740     signal->theData[1] = failedNodePtr.i;
5741     signal->theData[2] = 0; // Tab id
5742     sendSignal(reference(), GSN_CONTINUEB, signal, 3, JBB);
5743   }
5744   else
5745   {
5746     if (ERROR_INSERTED(7194))
5747     {
5748       ndbout_c("7194 Not starting ZREMOVE_NODE_FROM_TABLE");
5749     }
5750     else if (ERROR_INSERTED(7221))
5751     {
5752       ndbout_c("7221 Not starting ZREMOVE_NODE_FROM_TABLE");
5753     }
5754   }
5755 
5756   setLocalNodefailHandling(signal, failedNodePtr.i, NF_REMOVE_NODE_FROM_TABLE);
5757 }//Dbdih::startRemoveFailedNode()
5758 
5759 /*--------------------------------------------------*/
5760 /*       THE MASTER HAS FAILED AND THE NEW MASTER IS*/
5761 /*       QUERYING THIS NODE ABOUT THE STATE OF THE  */
5762 /*       GLOBAL CHECKPOINT PROTOCOL                 */
5763 /*--------------------------------------------------*/
execMASTER_GCPREQ(Signal * signal)5764 void Dbdih::execMASTER_GCPREQ(Signal* signal)
5765 {
5766   NodeRecordPtr failedNodePtr;
5767   MasterGCPReq * const masterGCPReq = (MasterGCPReq *)&signal->theData[0];
5768   jamEntry();
5769   const BlockReference newMasterBlockref = masterGCPReq->masterRef;
5770   const Uint32 failedNodeId = masterGCPReq->failedNodeId;
5771 
5772   failedNodePtr.i = failedNodeId;
5773   ptrCheckGuard(failedNodePtr, MAX_NDB_NODES, nodeRecord);
5774   if (failedNodePtr.p->nodeStatus == NodeRecord::ALIVE) {
5775     jam();
5776     /*--------------------------------------------------*/
5777     /*       ENSURE THAT WE HAVE PROCESSED THE SIGNAL   */
5778     /*       NODE_FAILURE BEFORE WE PROCESS THIS REQUEST*/
5779     /*       FROM THE NEW MASTER. THIS ENSURES THAT WE  */
5780     /*       HAVE REMOVED THE FAILED NODE FROM THE LIST */
5781     /*       OF ACTIVE NODES AND SO FORTH.              */
5782     /*--------------------------------------------------*/
5783     sendSignalWithDelay(reference(), GSN_MASTER_GCPREQ,
5784                         signal, 10, MasterGCPReq::SignalLength);
5785     return;
5786   } else {
5787     ndbrequire(failedNodePtr.p->nodeStatus == NodeRecord::DYING);
5788   }//if
5789 
5790   if (ERROR_INSERTED(7181))
5791   {
5792     ndbout_c("execGCP_TCFINISHED in MASTER_GCPREQ");
5793     CLEAR_ERROR_INSERT_VALUE;
5794     signal->theData[0] = c_error_7181_ref;
5795     signal->theData[1] = (Uint32)(m_micro_gcp.m_old_gci >> 32);
5796     signal->theData[2] = (Uint32)(m_micro_gcp.m_old_gci & 0xFFFFFFFF);
5797     execGCP_TCFINISHED(signal);
5798   }
5799 
5800   MasterGCPConf::State gcpState;
5801   switch(m_micro_gcp.m_state){
5802   case MicroGcp::M_GCP_IDLE:
5803     jam();
5804     gcpState = MasterGCPConf::GCP_READY;
5805     break;
5806   case MicroGcp::M_GCP_PREPARE:
5807     jam();
5808     gcpState = MasterGCPConf::GCP_PREPARE_RECEIVED;
5809     break;
5810   case MicroGcp::M_GCP_COMMIT:
5811     jam();
5812     gcpState = MasterGCPConf::GCP_COMMIT_RECEIVED;
5813     break;
5814   case MicroGcp::M_GCP_COMMITTED:
5815     jam();
5816     gcpState = MasterGCPConf::GCP_COMMITTED;
5817 
5818     /**
5819      * Change state to GCP_COMMIT_RECEIVEDn and rerun GSN_GCP_NOMORETRANS
5820      */
5821     gcpState = MasterGCPConf::GCP_COMMIT_RECEIVED;
5822     m_micro_gcp.m_state = MicroGcp::M_GCP_COMMIT;
5823 
5824     {
5825       GCPNoMoreTrans* req2 = (GCPNoMoreTrans*)signal->getDataPtrSend();
5826       req2->senderRef = reference();
5827       req2->senderData = m_micro_gcp.m_master_ref;
5828       req2->gci_hi = (Uint32)(m_micro_gcp.m_old_gci >> 32);
5829       req2->gci_lo = (Uint32)(m_micro_gcp.m_old_gci & 0xFFFFFFFF);
5830       sendSignal(clocaltcblockref, GSN_GCP_NOMORETRANS, signal,
5831                  GCPNoMoreTrans::SignalLength, JBB);
5832     }
5833     break;
5834   case MicroGcp::M_GCP_COMPLETE:
5835     /**
5836      * This is a master only state...
5837      */
5838     ndbrequire(false);
5839   }
5840 
5841   MasterGCPConf::SaveState saveState;
5842   switch(m_gcp_save.m_state){
5843   case GcpSave::GCP_SAVE_IDLE:
5844     jam();
5845     saveState = MasterGCPConf::GCP_SAVE_IDLE;
5846     break;
5847   case GcpSave::GCP_SAVE_REQ:
5848     jam();
5849     saveState = MasterGCPConf::GCP_SAVE_REQ;
5850     break;
5851   case GcpSave::GCP_SAVE_CONF:
5852     jam();
5853     saveState = MasterGCPConf::GCP_SAVE_CONF;
5854     break;
5855   case GcpSave::GCP_SAVE_COPY_GCI:
5856     jam();
5857     saveState = MasterGCPConf::GCP_SAVE_COPY_GCI;
5858     break;
5859   }
5860 
5861   MasterGCPConf * const masterGCPConf = (MasterGCPConf *)&signal->theData[0];
5862   masterGCPConf->gcpState  = gcpState;
5863   masterGCPConf->senderNodeId = cownNodeId;
5864   masterGCPConf->failedNodeId = failedNodeId;
5865   masterGCPConf->newGCP_hi = (Uint32)(m_micro_gcp.m_new_gci >> 32);
5866   masterGCPConf->latestLCP = SYSFILE->latestLCP_ID;
5867   masterGCPConf->oldestRestorableGCI = SYSFILE->oldestRestorableGCI;
5868   masterGCPConf->keepGCI = SYSFILE->keepGCI;
5869   masterGCPConf->newGCP_lo = Uint32(m_micro_gcp.m_new_gci);
5870   masterGCPConf->saveState = saveState;
5871   masterGCPConf->saveGCI = m_gcp_save.m_gci;
5872   for(Uint32 i = 0; i < NdbNodeBitmask::Size; i++)
5873     masterGCPConf->lcpActive[i] = SYSFILE->lcpActive[i];
5874 
5875   if (ERROR_INSERTED(7225))
5876   {
5877     CLEAR_ERROR_INSERT_VALUE;
5878     ndbrequire(refToNode(newMasterBlockref) == getOwnNodeId());
5879     sendSignalWithDelay(newMasterBlockref, GSN_MASTER_GCPCONF, signal,
5880                         500, MasterGCPConf::SignalLength);
5881   }
5882   else
5883   {
5884     sendSignal(newMasterBlockref, GSN_MASTER_GCPCONF, signal,
5885                MasterGCPConf::SignalLength, JBB);
5886   }
5887 
5888   if (ERROR_INSERTED(7182))
5889   {
5890     ndbout_c("execGCP_TCFINISHED in MASTER_GCPREQ");
5891     CLEAR_ERROR_INSERT_VALUE;
5892     signal->theData[0] = c_error_7181_ref;
5893     signal->theData[1] = (Uint32)(m_micro_gcp.m_old_gci >> 32);
5894     signal->theData[2] = (Uint32)(m_micro_gcp.m_old_gci & 0xFFFFFFFF);
5895     execGCP_TCFINISHED(signal);
5896   }
5897 
5898   if (c_copyGCISlave.m_expectedNextWord != 0)
5899   {
5900     jam();
5901     c_copyGCISlave.m_expectedNextWord = 0;
5902     c_copyGCISlave.m_copyReason = CopyGCIReq::IDLE;
5903   }
5904 }//Dbdih::execMASTER_GCPREQ()
5905 
execMASTER_GCPCONF(Signal * signal)5906 void Dbdih::execMASTER_GCPCONF(Signal* signal)
5907 {
5908   NodeRecordPtr senderNodePtr;
5909   MasterGCPConf * const masterGCPConf = (MasterGCPConf *)&signal->theData[0];
5910   jamEntry();
5911   senderNodePtr.i = masterGCPConf->senderNodeId;
5912   ptrCheckGuard(senderNodePtr, MAX_NDB_NODES, nodeRecord);
5913 
5914   MasterGCPConf::State gcpState = (MasterGCPConf::State)masterGCPConf->gcpState;
5915   MasterGCPConf::SaveState saveState =
5916     (MasterGCPConf::SaveState)masterGCPConf->saveState;
5917   const Uint32 failedNodeId = masterGCPConf->failedNodeId;
5918   const Uint32 newGcp_hi = masterGCPConf->newGCP_hi;
5919   const Uint32 newGcp_lo = masterGCPConf->newGCP_lo;
5920   Uint64 newGCI = newGcp_lo | (Uint64(newGcp_hi) << 32);
5921   const Uint32 latestLcpId = masterGCPConf->latestLCP;
5922   const Uint32 oldestRestorableGci = masterGCPConf->oldestRestorableGCI;
5923   const Uint32 oldestKeepGci = masterGCPConf->keepGCI;
5924   const Uint32 saveGCI = masterGCPConf->saveGCI;
5925 
5926   if (latestLcpId > SYSFILE->latestLCP_ID) {
5927     jam();
5928 #if 0
5929     g_eventLogger->info("Dbdih: Setting SYSFILE->latestLCP_ID to %d",
5930                         latestLcpId);
5931     SYSFILE->latestLCP_ID = latestLcpId;
5932 #endif
5933     SYSFILE->keepGCI = oldestKeepGci;
5934     SYSFILE->oldestRestorableGCI = oldestRestorableGci;
5935     for(Uint32 i = 0; i < NdbNodeBitmask::Size; i++)
5936       SYSFILE->lcpActive[i] = masterGCPConf->lcpActive[i];
5937   }//if
5938 
5939   bool ok = false;
5940   switch (gcpState) {
5941   case MasterGCPConf::GCP_READY:
5942     jam();
5943     ok = true;
5944     // Either not started or complete...
5945     break;
5946   case MasterGCPConf::GCP_PREPARE_RECEIVED:
5947     jam();
5948     ok = true;
5949     if (m_micro_gcp.m_master.m_state == MicroGcp::M_GCP_IDLE)
5950     {
5951       jam();
5952       m_micro_gcp.m_master.m_state = MicroGcp::M_GCP_PREPARE;
5953       m_micro_gcp.m_master.m_new_gci = newGCI;
5954     }
5955     else
5956     {
5957       jam();
5958       ndbrequire(m_micro_gcp.m_master.m_new_gci == newGCI);
5959     }
5960     break;
5961   case MasterGCPConf::GCP_COMMIT_RECEIVED:
5962     jam();
5963   case MasterGCPConf::GCP_COMMITTED:
5964     jam();
5965     ok = true;
5966     if (m_micro_gcp.m_master.m_state != MicroGcp::M_GCP_IDLE)
5967     {
5968       ndbrequire(m_micro_gcp.m_master.m_new_gci == newGCI);
5969     }
5970     m_micro_gcp.m_master.m_new_gci = newGCI;
5971     m_micro_gcp.m_master.m_state = MicroGcp::M_GCP_COMMIT;
5972     break;
5973 #ifndef VM_TRACE
5974   default:
5975     jamLine(gcpState);
5976     ndbrequire(false);
5977 #endif
5978   }
5979   ndbassert(ok); // Unhandled case...
5980 
5981   ok = false;
5982   /**
5983    * GCI should differ with atmost one
5984    */
5985   ndbrequire(saveGCI == m_gcp_save.m_gci ||
5986              saveGCI == m_gcp_save.m_gci + 1 ||
5987              saveGCI + 1 == m_gcp_save.m_gci);
5988   if (saveGCI > m_gcp_save.m_master.m_new_gci)
5989   {
5990     jam();
5991     m_gcp_save.m_master.m_new_gci = saveGCI;
5992   }
5993   switch(saveState){
5994   case MasterGCPConf::GCP_SAVE_IDLE:
5995     jam();
5996     break;
5997   case MasterGCPConf::GCP_SAVE_REQ:
5998     jam();
5999     if (m_gcp_save.m_master.m_state == GcpSave::GCP_SAVE_IDLE)
6000     {
6001       jam();
6002       m_gcp_save.m_master.m_state = GcpSave::GCP_SAVE_REQ;
6003     }
6004     break;
6005   case MasterGCPConf::GCP_SAVE_CONF:
6006     jam();
6007     if (m_gcp_save.m_master.m_state == GcpSave::GCP_SAVE_IDLE)
6008     {
6009       jam();
6010       m_gcp_save.m_master.m_state = GcpSave::GCP_SAVE_REQ;
6011     }
6012     break;
6013   case MasterGCPConf::GCP_SAVE_COPY_GCI:
6014     jam();
6015     if (m_gcp_save.m_master.m_state == GcpSave::GCP_SAVE_IDLE)
6016     {
6017       jam();
6018       m_gcp_save.m_master.m_state = GcpSave::GCP_SAVE_COPY_GCI;
6019     }
6020     break;
6021 #ifndef VM_TRACE
6022   default:
6023     jamLine(saveState);
6024     ndbrequire(false);
6025 #endif
6026   }
6027   //ndbassert(ok); // Unhandled case
6028 
6029   receiveLoopMacro(MASTER_GCPREQ, senderNodePtr.i);
6030   /*-------------------------------------------------------------------------*/
6031   // We have now received all responses and are ready to take over the GCP
6032   // protocol as master.
6033   /*-------------------------------------------------------------------------*/
6034   MASTER_GCPhandling(signal, failedNodeId);
6035 
6036   return;
6037 }//Dbdih::execMASTER_GCPCONF()
6038 
execMASTER_GCPREF(Signal * signal)6039 void Dbdih::execMASTER_GCPREF(Signal* signal)
6040 {
6041   const MasterGCPRef * const ref = (MasterGCPRef *)&signal->theData[0];
6042   jamEntry();
6043   receiveLoopMacro(MASTER_GCPREQ, ref->senderNodeId);
6044   /*-------------------------------------------------------------------------*/
6045   // We have now received all responses and are ready to take over the GCP
6046   // protocol as master.
6047   /*-------------------------------------------------------------------------*/
6048   MASTER_GCPhandling(signal, ref->failedNodeId);
6049 }//Dbdih::execMASTER_GCPREF()
6050 
MASTER_GCPhandling(Signal * signal,Uint32 failedNodeId)6051 void Dbdih::MASTER_GCPhandling(Signal* signal, Uint32 failedNodeId)
6052 {
6053   cmasterState = MASTER_ACTIVE;
6054 
6055   m_micro_gcp.m_master.m_start_time = 0;
6056   m_gcp_save.m_master.m_start_time = 0;
6057   if (m_gcp_monitor.m_micro_gcp.m_max_lag > 0)
6058   {
6059     infoEvent("GCP Monitor: Computed max GCP_SAVE lag to %u seconds",
6060               m_gcp_monitor.m_gcp_save.m_max_lag / 10);
6061     infoEvent("GCP Monitor: Computed max GCP_COMMIT lag to %u seconds",
6062               m_gcp_monitor.m_micro_gcp.m_max_lag / 10);
6063   }
6064   else
6065   {
6066     infoEvent("GCP Monitor: unlimited lags allowed");
6067   }
6068 
6069   bool ok = false;
6070   switch(m_micro_gcp.m_master.m_state){
6071   case MicroGcp::M_GCP_IDLE:
6072     jam();
6073     ok = true;
6074     signal->theData[0] = DihContinueB::ZSTART_GCP;
6075     sendSignal(reference(), GSN_CONTINUEB, signal, 1, JBB);
6076     break;
6077   case MicroGcp::M_GCP_PREPARE:
6078   {
6079     jam();
6080     ok = true;
6081 
6082     /**
6083      * Restart GCP_PREPARE
6084      */
6085     sendLoopMacro(GCP_PREPARE, sendGCP_PREPARE, RNIL);
6086     break;
6087   }
6088   case MicroGcp::M_GCP_COMMIT:
6089   {
6090     jam();
6091     ok = true;
6092 
6093     /**
6094      * Restart GCP_COMMIT
6095      */
6096     sendLoopMacro(GCP_COMMIT, sendGCP_COMMIT, RNIL);
6097     break;
6098   }
6099   case MicroGcp::M_GCP_COMMITTED:
6100     jam();
6101     ndbrequire(false);
6102   case MicroGcp::M_GCP_COMPLETE:
6103     jam();
6104     ndbrequire(false);
6105 #ifndef VM_TRACE
6106   default:
6107     jamLine(m_micro_gcp.m_master.m_state);
6108     ndbrequire(false);
6109 #endif
6110   }
6111   ndbassert(ok);
6112 
6113   if (m_micro_gcp.m_enabled == false)
6114   {
6115     jam();
6116     m_gcp_save.m_master.m_state = GcpSave::GCP_SAVE_IDLE;
6117   }
6118   else
6119   {
6120     ok = false;
6121     switch(m_gcp_save.m_master.m_state){
6122     case GcpSave::GCP_SAVE_IDLE:
6123       jam();
6124       ok = true;
6125       break;
6126     case GcpSave::GCP_SAVE_REQ:
6127     {
6128       jam();
6129       ok = true;
6130 
6131       /**
6132        * Restart GCP_SAVE_REQ
6133        */
6134       sendLoopMacro(GCP_SAVEREQ, sendGCP_SAVEREQ, RNIL);
6135       break;
6136     }
6137     case GcpSave::GCP_SAVE_CONF:
6138       jam();
6139     case GcpSave::GCP_SAVE_COPY_GCI:
6140       jam();
6141       ok = true;
6142       copyGciLab(signal, CopyGCIReq::GLOBAL_CHECKPOINT);
6143       m_gcp_save.m_master.m_state = GcpSave::GCP_SAVE_COPY_GCI;
6144       break;
6145 #ifndef VM_TRACE
6146     default:
6147       jamLine(m_gcp_save.m_master.m_state);
6148       ndbrequire(false);
6149 #endif
6150     }
6151     ndbrequire(ok);
6152   }
6153 
6154   signal->theData[0] = NDB_LE_GCP_TakeoverCompleted;
6155   signal->theData[1] = m_micro_gcp.m_master.m_state;
6156   signal->theData[2] = m_gcp_save.m_master.m_state;
6157   sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 1, JBB);
6158 
6159   infoEvent("kk: %u/%u %u %u",
6160             Uint32(m_micro_gcp.m_current_gci >> 32),
6161             Uint32(m_micro_gcp.m_current_gci),
6162             m_micro_gcp.m_master.m_state,
6163             m_gcp_save.m_master.m_state);
6164 
6165   /*--------------------------------------------------*/
6166   /*       WE SEPARATE HANDLING OF GLOBAL CHECKPOINTS */
6167   /*       AND LOCAL CHECKPOINTS HERE. LCP'S HAVE TO  */
6168   /*       REMOVE ALL FAILED FRAGMENTS BEFORE WE CAN  */
6169   /*       HANDLE THE LCP PROTOCOL.                   */
6170   /*--------------------------------------------------*/
6171   checkLocalNodefailComplete(signal, failedNodeId, NF_GCP_TAKE_OVER);
6172 
6173   startGcpMonitor(signal);
6174 
6175   return;
6176 }//Dbdih::masterGcpConfFromFailedLab()
6177 
6178 void
invalidateNodeLCP(Signal * signal,Uint32 nodeId,Uint32 tableId)6179 Dbdih::invalidateNodeLCP(Signal* signal, Uint32 nodeId, Uint32 tableId)
6180 {
6181   jamEntry();
6182   TabRecordPtr tabPtr;
6183   tabPtr.i = tableId;
6184   const Uint32 RT_BREAK = 64;
6185   if (ERROR_INSERTED(7125)) {
6186     return;
6187   }//if
6188   for (Uint32 i = 0; i<RT_BREAK; i++) {
6189     jam();
6190     if (tabPtr.i >= ctabFileSize){
6191       jam();
6192       /**
6193        * Ready with entire loop
6194        * Return to master
6195        */
6196       if (ERROR_INSERTED(7204))
6197       {
6198         CLEAR_ERROR_INSERT_VALUE;
6199       }
6200       setAllowNodeStart(nodeId, true);
6201       if (getNodeStatus(nodeId) == NodeRecord::STARTING) {
6202         jam();
6203         StartInfoConf * conf = (StartInfoConf*)&signal->theData[0];
6204         conf->sendingNodeId = cownNodeId;
6205         conf->startingNodeId = nodeId;
6206         sendSignal(cmasterdihref, GSN_START_INFOCONF, signal,
6207                    StartInfoConf::SignalLength, JBB);
6208       }//if
6209       return;
6210     }//if
6211     ptrAss(tabPtr, tabRecord);
6212     if (tabPtr.p->tabStatus == TabRecord::TS_ACTIVE) {
6213       jam();
6214       invalidateNodeLCP(signal, nodeId, tabPtr);
6215       return;
6216     }//if
6217     tabPtr.i++;
6218   }//for
6219   signal->theData[0] = DihContinueB::ZINVALIDATE_NODE_LCP;
6220   signal->theData[1] = nodeId;
6221   signal->theData[2] = tabPtr.i;
6222   sendSignal(reference(), GSN_CONTINUEB, signal, 3, JBB);
6223 }//Dbdih::invalidateNodeLCP()
6224 
6225 void
invalidateNodeLCP(Signal * signal,Uint32 nodeId,TabRecordPtr tabPtr)6226 Dbdih::invalidateNodeLCP(Signal* signal, Uint32 nodeId, TabRecordPtr tabPtr)
6227 {
6228   /**
6229    * Check so that no one else is using the tab descriptior
6230    */
6231   if (tabPtr.p->tabCopyStatus != TabRecord::CS_IDLE) {
6232     jam();
6233     signal->theData[0] = DihContinueB::ZINVALIDATE_NODE_LCP;
6234     signal->theData[1] = nodeId;
6235     signal->theData[2] = tabPtr.i;
6236     sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 20, 3);
6237     return;
6238   }//if
6239 
6240   /**
6241    * For each fragment
6242    */
6243   bool modified = false;
6244   FragmentstorePtr fragPtr;
6245   for(Uint32 fragNo = 0; fragNo < tabPtr.p->totalfragments; fragNo++){
6246     jam();
6247     getFragstore(tabPtr.p, fragNo, fragPtr);
6248     /**
6249      * For each of replica record
6250      */
6251     ReplicaRecordPtr replicaPtr;
6252     for(replicaPtr.i = fragPtr.p->oldStoredReplicas; replicaPtr.i != RNIL;
6253         replicaPtr.i = replicaPtr.p->nextReplica) {
6254       jam();
6255       ptrCheckGuard(replicaPtr, creplicaFileSize, replicaRecord);
6256       if(replicaPtr.p->procNode == nodeId){
6257         jam();
6258         /**
6259          * Found one with correct node id
6260          */
6261         /**
6262          * Invalidate all LCP's
6263          */
6264         modified = true;
6265         for(int i = 0; i < MAX_LCP_STORED; i++) {
6266           replicaPtr.p->lcpStatus[i] = ZINVALID;
6267         }//if
6268         /**
6269          * And reset nextLcp
6270          */
6271         replicaPtr.p->nextLcp = 0;
6272         replicaPtr.p->noCrashedReplicas = 0;
6273       }//if
6274     }//for
6275   }//for
6276 
6277   if (modified) {
6278     jam();
6279     /**
6280      * Save table description to disk
6281      */
6282     tabPtr.p->tabCopyStatus  = TabRecord::CS_INVALIDATE_NODE_LCP;
6283     tabPtr.p->tabUpdateState = TabRecord::US_INVALIDATE_NODE_LCP;
6284     tabPtr.p->tabRemoveNode  = nodeId;
6285     signal->theData[0] = DihContinueB::ZPACK_TABLE_INTO_PAGES;
6286     signal->theData[1] = tabPtr.i;
6287     sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
6288     return;
6289   }
6290 
6291   jam();
6292   /**
6293    * Move to next table
6294    */
6295   tabPtr.i++;
6296   signal->theData[0] = DihContinueB::ZINVALIDATE_NODE_LCP;
6297   signal->theData[1] = nodeId;
6298   signal->theData[2] = tabPtr.i;
6299 
6300   if (ERROR_INSERTED(7204))
6301   {
6302     sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 2000, 3);
6303   }
6304   else
6305   {
6306     sendSignal(reference(), GSN_CONTINUEB, signal, 3, JBB);
6307   }
6308   return;
6309 }//Dbdih::invalidateNodeLCP()
6310 
6311 /*------------------------------------------------*/
6312 /*       INPUT:  TABPTR                           */
6313 /*               TNODEID                          */
6314 /*------------------------------------------------*/
removeNodeFromTables(Signal * signal,Uint32 nodeId,Uint32 tableId)6315 void Dbdih::removeNodeFromTables(Signal* signal,
6316 				 Uint32 nodeId, Uint32 tableId)
6317 {
6318   jamEntry();
6319   TabRecordPtr tabPtr;
6320   tabPtr.i = tableId;
6321   const Uint32 RT_BREAK = 64;
6322   for (Uint32 i = 0; i<RT_BREAK; i++) {
6323     jam();
6324     if (tabPtr.i >= ctabFileSize){
6325       jam();
6326       removeNodeFromTablesComplete(signal, nodeId);
6327       return;
6328     }//if
6329 
6330     ptrAss(tabPtr, tabRecord);
6331     if (tabPtr.p->tabStatus == TabRecord::TS_ACTIVE) {
6332       jam();
6333       removeNodeFromTable(signal, nodeId, tabPtr);
6334       return;
6335     }//if
6336     tabPtr.i++;
6337   }//for
6338   signal->theData[0] = DihContinueB::ZREMOVE_NODE_FROM_TABLE;
6339   signal->theData[1] = nodeId;
6340   signal->theData[2] = tabPtr.i;
6341   sendSignal(reference(), GSN_CONTINUEB, signal, 3, JBB);
6342 }
6343 
removeNodeFromTable(Signal * signal,Uint32 nodeId,TabRecordPtr tabPtr)6344 void Dbdih::removeNodeFromTable(Signal* signal,
6345 				Uint32 nodeId, TabRecordPtr tabPtr){
6346 
6347   /**
6348    * Check so that no one else is using the tab descriptior
6349    */
6350   if (tabPtr.p->tabCopyStatus != TabRecord::CS_IDLE) {
6351     jam();
6352     signal->theData[0] = DihContinueB::ZREMOVE_NODE_FROM_TABLE;
6353     signal->theData[1] = nodeId;
6354     signal->theData[2] = tabPtr.i;
6355     sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 20, 3);
6356     return;
6357   }//if
6358 
6359   NodeRecordPtr nodePtr;
6360   nodePtr.i = nodeId;
6361   ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
6362   const Uint32 lcpId = nodePtr.p->m_remove_node_from_table_lcp_id;
6363 
6364   /**
6365    * For each fragment
6366    */
6367   Uint32 noOfRemovedReplicas = 0;     // No of replicas removed
6368   Uint32 noOfRemovedLcpReplicas = 0;  // No of replicas in LCP removed
6369   Uint32 noOfRemainingLcpReplicas = 0;// No of replicas in LCP remaining
6370 
6371   const bool lcpOngoingFlag = (tabPtr.p->tabLcpStatus== TabRecord::TLS_ACTIVE);
6372   const bool unlogged = (tabPtr.p->tabStorage != TabRecord::ST_NORMAL);
6373 
6374   FragmentstorePtr fragPtr;
6375   for(Uint32 fragNo = 0; fragNo < tabPtr.p->totalfragments; fragNo++){
6376     jam();
6377     getFragstore(tabPtr.p, fragNo, fragPtr);
6378 
6379     /**
6380      * For each of replica record
6381      */
6382     bool found = false;
6383     ReplicaRecordPtr replicaPtr;
6384     for(replicaPtr.i = fragPtr.p->storedReplicas; replicaPtr.i != RNIL;
6385         replicaPtr.i = replicaPtr.p->nextReplica) {
6386       jam();
6387 
6388       ptrCheckGuard(replicaPtr, creplicaFileSize, replicaRecord);
6389       if(replicaPtr.p->procNode == nodeId){
6390         jam();
6391 	found = true;
6392 	noOfRemovedReplicas++;
6393 	removeNodeFromStored(nodeId, fragPtr, replicaPtr, unlogged);
6394 	if(replicaPtr.p->lcpOngoingFlag){
6395 	  jam();
6396 	  /**
6397 	   * This replica is currently LCP:ed
6398 	   */
6399 	  ndbrequire(fragPtr.p->noLcpReplicas > 0);
6400 	  fragPtr.p->noLcpReplicas --;
6401 
6402 	  noOfRemovedLcpReplicas ++;
6403 	  replicaPtr.p->lcpOngoingFlag = false;
6404 	}
6405 
6406         if (lcpId != RNIL)
6407         {
6408           jam();
6409           Uint32 lcpNo = prevLcpNo(replicaPtr.p->nextLcp);
6410           if (replicaPtr.p->lcpStatus[lcpNo] == ZVALID &&
6411               replicaPtr.p->lcpId[lcpNo] == lcpId)
6412           {
6413             jam();
6414             replicaPtr.p->lcpStatus[lcpNo] = ZINVALID;
6415             replicaPtr.p->lcpId[lcpNo] = 0;
6416             replicaPtr.p->nextLcp = lcpNo;
6417             ndbout_c("REMOVING lcp: %u from table: %u frag: %u node: %u",
6418                      SYSFILE->latestLCP_ID,
6419                      tabPtr.i, fragNo, nodeId);
6420           }
6421         }
6422       }
6423     }
6424 
6425     /**
6426      * Run updateNodeInfo to remove any dead nodes from list of activeNodes
6427      *  see bug#15587
6428      */
6429     updateNodeInfo(fragPtr);
6430     noOfRemainingLcpReplicas += fragPtr.p->noLcpReplicas;
6431   }
6432 
6433   if (noOfRemovedReplicas == 0)
6434   {
6435     jam();
6436     /**
6437      * The table had no replica on the failed node
6438      *   continue with next table
6439      */
6440     tabPtr.i++;
6441     signal->theData[0] = DihContinueB::ZREMOVE_NODE_FROM_TABLE;
6442     signal->theData[1] = nodeId;
6443     signal->theData[2] = tabPtr.i;
6444     sendSignal(reference(), GSN_CONTINUEB, signal, 3, JBB);
6445     return;
6446   }
6447 
6448   /**
6449    * We did remove at least one replica
6450    */
6451   bool ok = false;
6452   switch(tabPtr.p->tabLcpStatus){
6453   case TabRecord::TLS_COMPLETED:
6454     ok = true;
6455     jam();
6456     /**
6457      * WE WILL WRITE THE TABLE DESCRIPTION TO DISK AT THIS TIME
6458      * INDEPENDENT OF WHAT THE LOCAL CHECKPOINT NEEDED.
6459      * THIS IS TO ENSURE THAT THE FAILED NODES ARE ALSO UPDATED ON DISK
6460      * IN THE DIH DATA STRUCTURES BEFORE WE COMPLETE HANDLING OF THE
6461      * NODE FAILURE.
6462      */
6463     ndbrequire(noOfRemovedLcpReplicas == 0);
6464 
6465     tabPtr.p->tabCopyStatus = TabRecord::CS_REMOVE_NODE;
6466     tabPtr.p->tabUpdateState = TabRecord::US_REMOVE_NODE;
6467     tabPtr.p->tabRemoveNode = nodeId;
6468     signal->theData[0] = DihContinueB::ZPACK_TABLE_INTO_PAGES;
6469     signal->theData[1] = tabPtr.i;
6470     sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
6471     return;
6472     break;
6473   case TabRecord::TLS_ACTIVE:
6474     ok = true;
6475     jam();
6476     /**
6477      * The table is participating in an LCP currently
6478      */
6479     // Fall through
6480     break;
6481   case TabRecord::TLS_WRITING_TO_FILE:
6482     ok = true;
6483     jam();
6484     /**
6485      * This should never happen since we in the beginning of this function
6486      * checks the tabCopyStatus
6487      */
6488     ndbrequire(lcpOngoingFlag);
6489     ndbrequire(false);
6490     break;
6491   }
6492   ndbrequire(ok);
6493 
6494   /**
6495    * The table is participating in an LCP currently
6496    *   and we removed some replicas that should have been checkpointed
6497    */
6498   ndbrequire(c_lcpState.lcpStatus != LCP_STATUS_IDLE);
6499   ndbrequire(tabPtr.p->tabLcpStatus == TabRecord::TLS_ACTIVE);
6500 
6501   /**
6502    * Save the table
6503    */
6504   tabPtr.p->tabCopyStatus = TabRecord::CS_REMOVE_NODE;
6505   tabPtr.p->tabUpdateState = TabRecord::US_REMOVE_NODE;
6506   tabPtr.p->tabRemoveNode = nodeId;
6507   signal->theData[0] = DihContinueB::ZPACK_TABLE_INTO_PAGES;
6508   signal->theData[1] = tabPtr.i;
6509   sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
6510 
6511   if(noOfRemainingLcpReplicas == 0){
6512     jam();
6513     /**
6514      * The removal on the failed node made the LCP complete
6515      */
6516     tabPtr.p->tabLcpStatus = TabRecord::TLS_WRITING_TO_FILE;
6517     checkLcpAllTablesDoneInLqh(__LINE__);
6518   }
6519 }
6520 
6521 void
removeNodeFromTablesComplete(Signal * signal,Uint32 nodeId)6522 Dbdih::removeNodeFromTablesComplete(Signal* signal, Uint32 nodeId){
6523   jam();
6524 
6525   /**
6526    * Check if we "accidently" completed a LCP
6527    */
6528   checkLcpCompletedLab(signal);
6529 
6530   /**
6531    * Check if we (DIH) are finished with node fail handling
6532    */
6533   checkLocalNodefailComplete(signal, nodeId, NF_REMOVE_NODE_FROM_TABLE);
6534 }
6535 
6536 void
checkLocalNodefailComplete(Signal * signal,Uint32 failedNodeId,NodefailHandlingStep step)6537 Dbdih::checkLocalNodefailComplete(Signal* signal, Uint32 failedNodeId,
6538 				  NodefailHandlingStep step){
6539   jam();
6540 
6541   NodeRecordPtr nodePtr;
6542   nodePtr.i = failedNodeId;
6543   ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
6544 
6545   ndbrequire(nodePtr.p->m_nodefailSteps.get(step));
6546   nodePtr.p->m_nodefailSteps.clear(step);
6547 
6548   if(nodePtr.p->m_nodefailSteps.count() > 0){
6549     jam();
6550     return;
6551   }
6552 
6553   if (ERROR_INSERTED(7030))
6554   {
6555     g_eventLogger->info("Reenable GCP_PREPARE");
6556     CLEAR_ERROR_INSERT_VALUE;
6557   }
6558 
6559   NFCompleteRep * const nf = (NFCompleteRep *)&signal->theData[0];
6560   nf->blockNo = DBDIH;
6561   nf->nodeId = cownNodeId;
6562   nf->failedNodeId = failedNodeId;
6563   nf->from = __LINE__;
6564   sendSignal(reference(), GSN_NF_COMPLETEREP, signal,
6565              NFCompleteRep::SignalLength, JBB);
6566 }
6567 
6568 
6569 void
setLocalNodefailHandling(Signal * signal,Uint32 failedNodeId,NodefailHandlingStep step)6570 Dbdih::setLocalNodefailHandling(Signal* signal, Uint32 failedNodeId,
6571 				NodefailHandlingStep step){
6572   jam();
6573 
6574   NodeRecordPtr nodePtr;
6575   nodePtr.i = failedNodeId;
6576   ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
6577 
6578   ndbrequire(!nodePtr.p->m_nodefailSteps.get(step));
6579   nodePtr.p->m_nodefailSteps.set(step);
6580 }
6581 
startLcpTakeOverLab(Signal * signal,Uint32 failedNodeId)6582 void Dbdih::startLcpTakeOverLab(Signal* signal, Uint32 failedNodeId)
6583 {
6584   /*--------------------------------------------------------------------*/
6585   // Start LCP master take over process. Consists of the following steps.
6586   // 1) Ensure that all LQH's have reported all fragments they have been
6587   // told to checkpoint. Can be a fairly long step time-wise.
6588   // 2) Query all nodes about their LCP status.
6589   // During the query process we do not want our own state to change.
6590   // This can change due to delayed reception of LCP_REPORT, completed
6591   // save of table on disk or reception of DIH_LCPCOMPLETE from other
6592   // node.
6593   /*--------------------------------------------------------------------*/
6594 }//Dbdih::startLcpTakeOver()
6595 
6596 void
execEMPTY_LCP_REP(Signal * signal)6597 Dbdih::execEMPTY_LCP_REP(Signal* signal)
6598 {
6599   jamEntry();
6600   EmptyLcpRep* rep = (EmptyLcpRep*)signal->getDataPtr();
6601 
6602   Uint32 len = signal->getLength();
6603   ndbrequire(len > EmptyLcpRep::SignalLength);
6604   len -= EmptyLcpRep::SignalLength;
6605 
6606   NdbNodeBitmask nodes;
6607   nodes.assign(NdbNodeBitmask::Size, rep->receiverGroup);
6608   NodeReceiverGroup rg (DBDIH, nodes);
6609   memmove(signal->getDataPtrSend(),
6610           signal->getDataPtr()+EmptyLcpRep::SignalLength, 4*len);
6611 
6612   sendSignal(rg, GSN_EMPTY_LCP_CONF, signal, len, JBB);
6613 }
6614 
execEMPTY_LCP_CONF(Signal * signal)6615 void Dbdih::execEMPTY_LCP_CONF(Signal* signal)
6616 {
6617   jamEntry();
6618 
6619   ndbrequire(c_lcpMasterTakeOverState.state == LMTOS_WAIT_EMPTY_LCP);
6620 
6621   const EmptyLcpConf * const conf = (EmptyLcpConf *)&signal->theData[0];
6622   Uint32 nodeId = conf->senderNodeId;
6623 
6624   CRASH_INSERTION(7206);
6625 
6626 
6627   if(!conf->idle){
6628     jam();
6629     if (conf->tableId < c_lcpMasterTakeOverState.minTableId) {
6630       jam();
6631       c_lcpMasterTakeOverState.minTableId = conf->tableId;
6632       c_lcpMasterTakeOverState.minFragId = conf->fragmentId;
6633     } else if (conf->tableId == c_lcpMasterTakeOverState.minTableId &&
6634 	       conf->fragmentId < c_lcpMasterTakeOverState.minFragId) {
6635       jam();
6636       c_lcpMasterTakeOverState.minFragId = conf->fragmentId;
6637     }//if
6638     if(isMaster()){
6639       jam();
6640       c_lcpState.m_LAST_LCP_FRAG_ORD.setWaitingFor(nodeId);
6641     }
6642   }
6643 
6644   receiveLoopMacro(EMPTY_LCP_REQ, nodeId);
6645   /*--------------------------------------------------------------------*/
6646   // Received all EMPTY_LCPCONF. We can continue with next phase of the
6647   // take over LCP master process.
6648   /*--------------------------------------------------------------------*/
6649   c_lcpMasterTakeOverState.set(LMTOS_WAIT_LCP_FRAG_REP, __LINE__);
6650   checkEmptyLcpComplete(signal);
6651   return;
6652 }//Dbdih::execEMPTY_LCPCONF()
6653 
6654 void
checkEmptyLcpComplete(Signal * signal)6655 Dbdih::checkEmptyLcpComplete(Signal *signal){
6656 
6657   ndbrequire(c_lcpMasterTakeOverState.state == LMTOS_WAIT_LCP_FRAG_REP);
6658 
6659   if(c_lcpState.noOfLcpFragRepOutstanding > 0){
6660     jam();
6661     return;
6662   }
6663 
6664   if(isMaster()){
6665     jam();
6666 
6667     signal->theData[0] = NDB_LE_LCP_TakeoverStarted;
6668     sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 1, JBB);
6669 
6670     signal->theData[0] = 7012;
6671     execDUMP_STATE_ORD(signal);
6672 
6673     if (ERROR_INSERTED(7194))
6674     {
6675       ndbout_c("7194 starting ZREMOVE_NODE_FROM_TABLE");
6676       signal->theData[0] = DihContinueB::ZREMOVE_NODE_FROM_TABLE;
6677       signal->theData[1] = c_lcpMasterTakeOverState.failedNodeId;
6678       signal->theData[2] = 0; // Tab id
6679       sendSignal(reference(), GSN_CONTINUEB, signal, 3, JBB);
6680     }
6681 
6682     c_current_time = NdbTick_CurrentMillisecond();
6683     c_lcpState.m_start_time = c_current_time;
6684     c_lcpMasterTakeOverState.set(LMTOS_INITIAL, __LINE__);
6685     MasterLCPReq * const req = (MasterLCPReq *)&signal->theData[0];
6686     req->masterRef = reference();
6687     req->failedNodeId = c_lcpMasterTakeOverState.failedNodeId;
6688     sendLoopMacro(MASTER_LCPREQ, sendMASTER_LCPREQ, RNIL);
6689 
6690   } else {
6691     sendMASTER_LCPCONF(signal);
6692   }
6693 }
6694 
6695 /*--------------------------------------------------*/
6696 /*       THE MASTER HAS FAILED AND THE NEW MASTER IS*/
6697 /*       QUERYING THIS NODE ABOUT THE STATE OF THE  */
6698 /*       LOCAL CHECKPOINT PROTOCOL.                 */
6699 /*--------------------------------------------------*/
execMASTER_LCPREQ(Signal * signal)6700 void Dbdih::execMASTER_LCPREQ(Signal* signal)
6701 {
6702   const MasterLCPReq * const req = (MasterLCPReq *)&signal->theData[0];
6703   jamEntry();
6704   const BlockReference newMasterBlockref = req->masterRef;
6705 
6706   CRASH_INSERTION(7205);
6707 
6708   if (ERROR_INSERTED(7207))
6709   {
6710     jam();
6711     SET_ERROR_INSERT_VALUE(7208);
6712     sendSignalWithDelay(reference(), GSN_MASTER_LCPREQ, signal,
6713 			500, signal->getLength());
6714     return;
6715   }
6716 
6717   if (ERROR_INSERTED(7208))
6718   {
6719     jam();
6720     signal->theData[0] = 9999;
6721     sendSignal(numberToRef(CMVMI, refToNode(newMasterBlockref)),
6722                GSN_NDB_TAMPER, signal, 1, JBB);
6723   }
6724 
6725   if (newMasterBlockref != cmasterdihref)
6726   {
6727     jam();
6728     ndbout_c("resending GSN_MASTER_LCPREQ");
6729     sendSignalWithDelay(reference(), GSN_MASTER_LCPREQ, signal,
6730 			50, signal->getLength());
6731     return;
6732   }
6733   Uint32 failedNodeId = req->failedNodeId;
6734 
6735   /**
6736    * There can be no take over with the same master
6737    */
6738   ndbrequire(c_lcpState.m_masterLcpDihRef != newMasterBlockref);
6739   c_lcpState.m_masterLcpDihRef = newMasterBlockref;
6740   c_lcpState.m_MASTER_LCPREQ_Received = true;
6741   c_lcpState.m_MASTER_LCPREQ_FailedNodeId = failedNodeId;
6742 
6743   if(newMasterBlockref != cmasterdihref){
6744     jam();
6745     ndbrequire(0);
6746   }
6747 
6748   if (ERROR_INSERTED(7209))
6749   {
6750     SET_ERROR_INSERT_VALUE(7210);
6751   }
6752 
6753   sendMASTER_LCPCONF(signal);
6754 }//Dbdih::execMASTER_LCPREQ()
6755 
6756 void
sendMASTER_LCPCONF(Signal * signal)6757 Dbdih::sendMASTER_LCPCONF(Signal * signal){
6758 
6759   if(!c_EMPTY_LCP_REQ_Counter.done()){
6760     /**
6761      * Have not received all EMPTY_LCP_REP
6762      * dare not answer MASTER_LCP_CONF yet
6763      */
6764     jam();
6765     return;
6766   }
6767 
6768   if(!c_lcpState.m_MASTER_LCPREQ_Received){
6769     jam();
6770     /**
6771      * Has not received MASTER_LCPREQ yet
6772      */
6773     return;
6774   }
6775 
6776   if(c_lcpState.lcpStatus == LCP_INIT_TABLES){
6777     jam();
6778     /**
6779      * Still aborting old initLcpLab
6780      */
6781     return;
6782   }
6783 
6784   if(c_lcpState.lcpStatus == LCP_COPY_GCI)
6785   {
6786     jam();
6787     /**
6788      * Restart it
6789      */
6790     //Uint32 lcpId = SYSFILE->latestLCP_ID;
6791     SYSFILE->latestLCP_ID--;
6792     Sysfile::clearLCPOngoing(SYSFILE->systemRestartBits);
6793     c_lcpState.setLcpStatus(LCP_STATUS_IDLE, __LINE__);
6794 #if 0
6795     if(c_copyGCISlave.m_copyReason == CopyGCIReq::LOCAL_CHECKPOINT){
6796       g_eventLogger->info("Dbdih: Also resetting c_copyGCISlave");
6797       c_copyGCISlave.m_copyReason = CopyGCIReq::IDLE;
6798       c_copyGCISlave.m_expectedNextWord = 0;
6799     }
6800 #endif
6801   }
6802 
6803   MasterLCPConf::State lcpState;
6804   switch (c_lcpState.lcpStatus) {
6805   case LCP_STATUS_IDLE:
6806     jam();
6807     /*------------------------------------------------*/
6808     /*       LOCAL CHECKPOINT IS CURRENTLY NOT ACTIVE */
6809     /*       SINCE NO COPY OF RESTART INFORMATION HAVE*/
6810     /*       BEEN RECEIVED YET. ALSO THE PREVIOUS     */
6811     /*       CHECKPOINT HAVE BEEN FULLY COMPLETED.    */
6812     /*------------------------------------------------*/
6813     lcpState = MasterLCPConf::LCP_STATUS_IDLE;
6814     break;
6815   case LCP_STATUS_ACTIVE:
6816     jam();
6817     /*--------------------------------------------------*/
6818     /*       COPY OF RESTART INFORMATION HAS BEEN       */
6819     /*       PERFORMED AND ALSO RESPONSE HAVE BEEN SENT.*/
6820     /*--------------------------------------------------*/
6821     lcpState = MasterLCPConf::LCP_STATUS_ACTIVE;
6822     break;
6823   case LCP_TAB_COMPLETED:
6824     jam();
6825     /*--------------------------------------------------------*/
6826     /*       ALL LCP_REPORT'S HAVE BEEN COMPLETED FOR         */
6827     /*       ALL TABLES.     SAVE OF AT LEAST ONE TABLE IS    */
6828     /*       ONGOING YET.                                     */
6829     /*--------------------------------------------------------*/
6830     lcpState = MasterLCPConf::LCP_TAB_COMPLETED;
6831     break;
6832   case LCP_TAB_SAVED:
6833     jam();
6834     /*--------------------------------------------------------*/
6835     /*       ALL LCP_REPORT'S HAVE BEEN COMPLETED FOR         */
6836     /*       ALL TABLES.     ALL TABLES HAVE ALSO BEEN SAVED  */
6837     /*       ALL OTHER NODES ARE NOT YET FINISHED WITH        */
6838     /*       THE LOCAL CHECKPOINT.                            */
6839     /*--------------------------------------------------------*/
6840     lcpState = MasterLCPConf::LCP_TAB_SAVED;
6841     break;
6842   case LCP_TCGET:
6843   case LCP_CALCULATE_KEEP_GCI:
6844   case LCP_TC_CLOPSIZE:
6845   case LCP_START_LCP_ROUND:
6846     /**
6847      * These should only exists on the master
6848      *   but since this is master take over
6849      *   it not allowed
6850      */
6851     ndbrequire(false);
6852     lcpState= MasterLCPConf::LCP_STATUS_IDLE; // remove warning
6853     break;
6854   case LCP_COPY_GCI:
6855   case LCP_INIT_TABLES:
6856     /**
6857      * These two states are handled by if statements above
6858      */
6859     ndbrequire(false);
6860     lcpState= MasterLCPConf::LCP_STATUS_IDLE; // remove warning
6861     break;
6862   default:
6863     ndbrequire(false);
6864     lcpState= MasterLCPConf::LCP_STATUS_IDLE; // remove warning
6865   }//switch
6866 
6867   Uint32 failedNodeId = c_lcpState.m_MASTER_LCPREQ_FailedNodeId;
6868   MasterLCPConf * const conf = (MasterLCPConf *)&signal->theData[0];
6869   conf->senderNodeId = cownNodeId;
6870   conf->lcpState = lcpState;
6871   conf->failedNodeId = failedNodeId;
6872   sendSignal(c_lcpState.m_masterLcpDihRef, GSN_MASTER_LCPCONF,
6873              signal, MasterLCPConf::SignalLength, JBB);
6874 
6875   // Answer to MASTER_LCPREQ sent, reset flag so
6876   // that it's not sent again before another request comes in
6877   c_lcpState.m_MASTER_LCPREQ_Received = false;
6878 
6879   if(c_lcpState.lcpStatus == LCP_TAB_SAVED){
6880 #ifdef VM_TRACE
6881     g_eventLogger->info("Sending extra GSN_LCP_COMPLETE_REP to new master");
6882 #endif
6883     sendLCP_COMPLETE_REP(signal);
6884   }
6885 
6886   if(!isMaster()){
6887     c_lcpMasterTakeOverState.set(LMTOS_IDLE, __LINE__);
6888     checkLocalNodefailComplete(signal, failedNodeId, NF_LCP_TAKE_OVER);
6889   }
6890 
6891   return;
6892 }
6893 
6894 NdbOut&
operator <<(NdbOut & out,const Dbdih::LcpMasterTakeOverState state)6895 operator<<(NdbOut& out, const Dbdih::LcpMasterTakeOverState state){
6896   switch(state){
6897   case Dbdih::LMTOS_IDLE:
6898     out << "LMTOS_IDLE";
6899     break;
6900   case Dbdih::LMTOS_WAIT_EMPTY_LCP:
6901     out << "LMTOS_WAIT_EMPTY_LCP";
6902     break;
6903   case Dbdih::LMTOS_WAIT_LCP_FRAG_REP:
6904     out << "LMTOS_WAIT_EMPTY_LCP";
6905     break;
6906   case Dbdih::LMTOS_INITIAL:
6907     out << "LMTOS_INITIAL";
6908     break;
6909   case Dbdih::LMTOS_ALL_IDLE:
6910     out << "LMTOS_ALL_IDLE";
6911     break;
6912   case Dbdih::LMTOS_ALL_ACTIVE:
6913     out << "LMTOS_ALL_ACTIVE";
6914     break;
6915   case Dbdih::LMTOS_LCP_CONCLUDING:
6916     out << "LMTOS_LCP_CONCLUDING";
6917     break;
6918   case Dbdih::LMTOS_COPY_ONGOING:
6919     out << "LMTOS_COPY_ONGOING";
6920     break;
6921   }
6922   return out;
6923 }
6924 
6925 struct MASTERLCP_StateTransitions {
6926   Dbdih::LcpMasterTakeOverState CurrentState;
6927   MasterLCPConf::State ParticipantState;
6928   Dbdih::LcpMasterTakeOverState NewState;
6929 };
6930 
6931 static const
6932 MASTERLCP_StateTransitions g_masterLCPTakeoverStateTransitions[] = {
6933   /**
6934    * Current = LMTOS_INITIAL
6935    */
6936   { Dbdih::LMTOS_INITIAL,
6937     MasterLCPConf::LCP_STATUS_IDLE,
6938     Dbdih::LMTOS_ALL_IDLE },
6939 
6940   { Dbdih::LMTOS_INITIAL,
6941     MasterLCPConf::LCP_STATUS_ACTIVE,
6942     Dbdih::LMTOS_ALL_ACTIVE },
6943 
6944   { Dbdih::LMTOS_INITIAL,
6945     MasterLCPConf::LCP_TAB_COMPLETED,
6946     Dbdih::LMTOS_LCP_CONCLUDING },
6947 
6948   { Dbdih::LMTOS_INITIAL,
6949     MasterLCPConf::LCP_TAB_SAVED,
6950     Dbdih::LMTOS_LCP_CONCLUDING },
6951 
6952   /**
6953    * Current = LMTOS_ALL_IDLE
6954    */
6955   { Dbdih::LMTOS_ALL_IDLE,
6956     MasterLCPConf::LCP_STATUS_IDLE,
6957     Dbdih::LMTOS_ALL_IDLE },
6958 
6959   { Dbdih::LMTOS_ALL_IDLE,
6960     MasterLCPConf::LCP_STATUS_ACTIVE,
6961     Dbdih::LMTOS_COPY_ONGOING },
6962 
6963   { Dbdih::LMTOS_ALL_IDLE,
6964     MasterLCPConf::LCP_TAB_COMPLETED,
6965     Dbdih::LMTOS_LCP_CONCLUDING },
6966 
6967   { Dbdih::LMTOS_ALL_IDLE,
6968     MasterLCPConf::LCP_TAB_SAVED,
6969     Dbdih::LMTOS_LCP_CONCLUDING },
6970 
6971   /**
6972    * Current = LMTOS_COPY_ONGOING
6973    */
6974   { Dbdih::LMTOS_COPY_ONGOING,
6975     MasterLCPConf::LCP_STATUS_IDLE,
6976     Dbdih::LMTOS_COPY_ONGOING },
6977 
6978   { Dbdih::LMTOS_COPY_ONGOING,
6979     MasterLCPConf::LCP_STATUS_ACTIVE,
6980     Dbdih::LMTOS_COPY_ONGOING },
6981 
6982   /**
6983    * Current = LMTOS_ALL_ACTIVE
6984    */
6985   { Dbdih::LMTOS_ALL_ACTIVE,
6986     MasterLCPConf::LCP_STATUS_IDLE,
6987     Dbdih::LMTOS_COPY_ONGOING },
6988 
6989   { Dbdih::LMTOS_ALL_ACTIVE,
6990     MasterLCPConf::LCP_STATUS_ACTIVE,
6991     Dbdih::LMTOS_ALL_ACTIVE },
6992 
6993   { Dbdih::LMTOS_ALL_ACTIVE,
6994     MasterLCPConf::LCP_TAB_COMPLETED,
6995     Dbdih::LMTOS_LCP_CONCLUDING },
6996 
6997   { Dbdih::LMTOS_ALL_ACTIVE,
6998     MasterLCPConf::LCP_TAB_SAVED,
6999     Dbdih::LMTOS_LCP_CONCLUDING },
7000 
7001   /**
7002    * Current = LMTOS_LCP_CONCLUDING
7003    */
7004   { Dbdih::LMTOS_LCP_CONCLUDING,
7005     MasterLCPConf::LCP_STATUS_IDLE,
7006     Dbdih::LMTOS_LCP_CONCLUDING },
7007 
7008   { Dbdih::LMTOS_LCP_CONCLUDING,
7009     MasterLCPConf::LCP_STATUS_ACTIVE,
7010     Dbdih::LMTOS_LCP_CONCLUDING },
7011 
7012   { Dbdih::LMTOS_LCP_CONCLUDING,
7013     MasterLCPConf::LCP_TAB_COMPLETED,
7014     Dbdih::LMTOS_LCP_CONCLUDING },
7015 
7016   { Dbdih::LMTOS_LCP_CONCLUDING,
7017     MasterLCPConf::LCP_TAB_SAVED,
7018     Dbdih::LMTOS_LCP_CONCLUDING }
7019 };
7020 
7021 const Uint32 g_masterLCPTakeoverStateTransitionsRows =
7022 sizeof(g_masterLCPTakeoverStateTransitions) / sizeof(struct MASTERLCP_StateTransitions);
7023 
execMASTER_LCPCONF(Signal * signal)7024 void Dbdih::execMASTER_LCPCONF(Signal* signal)
7025 {
7026   const MasterLCPConf * const conf = (MasterLCPConf *)&signal->theData[0];
7027   jamEntry();
7028 
7029   if (ERROR_INSERTED(7194))
7030   {
7031     ndbout_c("delaying MASTER_LCPCONF due to error 7194");
7032     sendSignalWithDelay(reference(), GSN_MASTER_LCPCONF, signal,
7033                         300, signal->getLength());
7034     return;
7035   }
7036 
7037   Uint32 senderNodeId = conf->senderNodeId;
7038   MasterLCPConf::State lcpState = (MasterLCPConf::State)conf->lcpState;
7039   const Uint32 failedNodeId = conf->failedNodeId;
7040   NodeRecordPtr nodePtr;
7041   nodePtr.i = senderNodeId;
7042   ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
7043   nodePtr.p->lcpStateAtTakeOver = lcpState;
7044 
7045   CRASH_INSERTION(7180);
7046 
7047 #ifdef VM_TRACE
7048   g_eventLogger->info("MASTER_LCPCONF");
7049   printMASTER_LCP_CONF(stdout, &signal->theData[0], 0, 0);
7050 #endif
7051 
7052   bool found = false;
7053   for(Uint32 i = 0; i<g_masterLCPTakeoverStateTransitionsRows; i++){
7054     const struct MASTERLCP_StateTransitions * valid =
7055       &g_masterLCPTakeoverStateTransitions[i];
7056 
7057     if(valid->CurrentState == c_lcpMasterTakeOverState.state &&
7058        valid->ParticipantState == lcpState){
7059       jam();
7060       found = true;
7061       c_lcpMasterTakeOverState.set(valid->NewState, __LINE__);
7062       break;
7063     }
7064   }
7065   ndbrequire(found);
7066 
7067   bool ok = false;
7068   switch(lcpState){
7069   case MasterLCPConf::LCP_STATUS_IDLE:
7070     ok = true;
7071     break;
7072   case MasterLCPConf::LCP_STATUS_ACTIVE:
7073   case MasterLCPConf::LCP_TAB_COMPLETED:
7074   case MasterLCPConf::LCP_TAB_SAVED:
7075     ok = true;
7076     c_lcpState.m_LCP_COMPLETE_REP_Counter_DIH.setWaitingFor(nodePtr.i);
7077     break;
7078   }
7079   ndbrequire(ok);
7080 
7081   receiveLoopMacro(MASTER_LCPREQ, senderNodeId);
7082   /*-------------------------------------------------------------------------*/
7083   // We have now received all responses and are ready to take over the LCP
7084   // protocol as master.
7085   /*-------------------------------------------------------------------------*/
7086   MASTER_LCPhandling(signal, failedNodeId);
7087 }//Dbdih::execMASTER_LCPCONF()
7088 
execMASTER_LCPREF(Signal * signal)7089 void Dbdih::execMASTER_LCPREF(Signal* signal)
7090 {
7091   const MasterLCPRef * const ref = (MasterLCPRef *)&signal->theData[0];
7092   jamEntry();
7093 
7094   Uint32 senderNodeId = ref->senderNodeId;
7095   Uint32 failedNodeId = ref->failedNodeId;
7096 
7097   if (c_lcpState.m_LCP_COMPLETE_REP_Counter_LQH.isWaitingFor(senderNodeId))
7098   {
7099     jam();
7100     c_lcpState.m_LCP_COMPLETE_REP_Counter_LQH.clearWaitingFor(senderNodeId);
7101   }
7102 
7103   receiveLoopMacro(MASTER_LCPREQ, senderNodeId);
7104   /*-------------------------------------------------------------------------*/
7105   // We have now received all responses and are ready to take over the LCP
7106   // protocol as master.
7107   /*-------------------------------------------------------------------------*/
7108   MASTER_LCPhandling(signal, failedNodeId);
7109 }//Dbdih::execMASTER_LCPREF()
7110 
MASTER_LCPhandling(Signal * signal,Uint32 failedNodeId)7111 void Dbdih::MASTER_LCPhandling(Signal* signal, Uint32 failedNodeId)
7112 {
7113   /*-------------------------------------------------------------------------
7114    *
7115    * WE ARE NOW READY TO CONCLUDE THE TAKE OVER AS MASTER.
7116    * WE HAVE ENOUGH INFO TO START UP ACTIVITIES IN THE PROPER PLACE.
7117    * ALSO SET THE PROPER STATE VARIABLES.
7118    *------------------------------------------------------------------------*/
7119   c_lcpState.currentFragment.tableId = c_lcpMasterTakeOverState.minTableId;
7120   c_lcpState.currentFragment.fragmentId = c_lcpMasterTakeOverState.minFragId;
7121   c_lcpState.m_LAST_LCP_FRAG_ORD = c_lcpState.m_LCP_COMPLETE_REP_Counter_LQH;
7122 
7123   NodeRecordPtr failedNodePtr;
7124   failedNodePtr.i = failedNodeId;
7125   ptrCheckGuard(failedNodePtr, MAX_NDB_NODES, nodeRecord);
7126 
7127   switch (c_lcpMasterTakeOverState.state) {
7128   case LMTOS_ALL_IDLE:
7129     jam();
7130     /* --------------------------------------------------------------------- */
7131     // All nodes were idle in the LCP protocol. Start checking for start of LCP
7132     // protocol.
7133     /* --------------------------------------------------------------------- */
7134 #ifdef VM_TRACE
7135     g_eventLogger->info("MASTER_LCPhandling:: LMTOS_ALL_IDLE -> checkLcpStart");
7136 #endif
7137     checkLcpStart(signal, __LINE__);
7138     break;
7139   case LMTOS_COPY_ONGOING:
7140     jam();
7141     /* --------------------------------------------------------------------- */
7142     // We were in the starting process of the LCP protocol. We will restart the
7143     // protocol by calculating the keep gci and storing the new lcp id.
7144     /* --------------------------------------------------------------------- */
7145 #ifdef VM_TRACE
7146     g_eventLogger->info("MASTER_LCPhandling:: LMTOS_COPY_ONGOING -> storeNewLcpId");
7147 #endif
7148     if (c_lcpState.lcpStatus == LCP_STATUS_ACTIVE) {
7149       jam();
7150       /*---------------------------------------------------------------------*/
7151       /*  WE NEED TO DECREASE THE LATEST LCP ID SINCE WE HAVE ALREADY        */
7152       /*  STARTED THIS */
7153       /*  LOCAL CHECKPOINT.                                                  */
7154       /*---------------------------------------------------------------------*/
7155       Uint32 lcpId = SYSFILE->latestLCP_ID;
7156 #ifdef VM_TRACE
7157       g_eventLogger->info("Decreasing latestLCP_ID from %d to %d", lcpId, lcpId - 1);
7158 #endif
7159       SYSFILE->latestLCP_ID--;
7160     }//if
7161 
7162     {
7163       Mutex mutex(signal, c_mutexMgr, c_fragmentInfoMutex_lcp);
7164       Callback c = { safe_cast(&Dbdih::lcpFragmentMutex_locked), 0 };
7165       ndbrequire(mutex.lock(c, false));
7166     }
7167     break;
7168   case LMTOS_ALL_ACTIVE:
7169     {
7170       jam();
7171       /* -------------------------------------------------------------------
7172        * Everybody was in the active phase. We will restart sending
7173        * LCP_FRAGORD to the nodes from the new master.
7174        * We also need to set dihLcpStatus to ZACTIVE
7175        * in the master node since the master will wait for all nodes to
7176        * complete before finalising the LCP process.
7177        * ------------------------------------------------------------------ */
7178 #ifdef VM_TRACE
7179       g_eventLogger->info("MASTER_LCPhandling:: LMTOS_ALL_ACTIVE -> "
7180                           "startLcpRoundLoopLab(table=%u, fragment=%u)",
7181                           c_lcpMasterTakeOverState.minTableId,
7182                           c_lcpMasterTakeOverState.minFragId);
7183 #endif
7184 
7185       c_lcpState.keepGci = SYSFILE->keepGCI;
7186 
7187       /**
7188        * We need to reaquire the mutex...
7189        */
7190       Mutex mutex(signal, c_mutexMgr, c_fragmentInfoMutex_lcp);
7191       Callback c =
7192         { safe_cast(&Dbdih::master_lcp_fragmentMutex_locked), failedNodePtr.i };
7193       ndbrequire(mutex.lock(c, false));
7194       return;
7195     }
7196   case LMTOS_LCP_CONCLUDING:
7197     {
7198       jam();
7199       /* ------------------------------------------------------------------- */
7200       // The LCP process is in the finalisation phase. We simply wait for it to
7201       // complete with signals arriving in. We need to check also if we should
7202       // change state due to table write completion during state
7203       // collection phase.
7204       /* ------------------------------------------------------------------- */
7205       ndbrequire(c_lcpState.lcpStatus != LCP_STATUS_IDLE);
7206 
7207       /**
7208        * We need to reaquire the mutex...
7209        */
7210       Mutex mutex(signal, c_mutexMgr, c_fragmentInfoMutex_lcp);
7211       Callback c =
7212         { safe_cast(&Dbdih::master_lcp_fragmentMutex_locked), failedNodePtr.i };
7213       ndbrequire(mutex.lock(c, false));
7214       return;
7215     }
7216   default:
7217     ndbrequire(false);
7218     break;
7219   }//switch
7220   signal->theData[0] = NDB_LE_LCP_TakeoverCompleted;
7221   signal->theData[1] = c_lcpMasterTakeOverState.state;
7222   sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB);
7223 
7224   signal->theData[0] = 7012;
7225   execDUMP_STATE_ORD(signal);
7226 
7227   c_lcpMasterTakeOverState.set(LMTOS_IDLE, __LINE__);
7228 
7229   checkLocalNodefailComplete(signal, failedNodePtr.i, NF_LCP_TAKE_OVER);
7230 }
7231 
7232 /* ------------------------------------------------------------------------- */
7233 /*       A BLOCK OR A NODE HAS COMPLETED THE HANDLING OF THE NODE FAILURE.   */
7234 /* ------------------------------------------------------------------------- */
execNF_COMPLETEREP(Signal * signal)7235 void Dbdih::execNF_COMPLETEREP(Signal* signal)
7236 {
7237   NodeRecordPtr failedNodePtr;
7238   NFCompleteRep * const nfCompleteRep = (NFCompleteRep *)&signal->theData[0];
7239   jamEntry();
7240   const Uint32 blockNo = nfCompleteRep->blockNo;
7241   Uint32 nodeId       = nfCompleteRep->nodeId;
7242   failedNodePtr.i = nfCompleteRep->failedNodeId;
7243 
7244   ptrCheckGuard(failedNodePtr, MAX_NDB_NODES, nodeRecord);
7245   switch (blockNo) {
7246   case DBTC:
7247     jam();
7248     ndbrequire(failedNodePtr.p->dbtcFailCompleted == ZFALSE);
7249     /* -------------------------------------------------------------------- */
7250     // Report the event that DBTC completed node failure handling.
7251     /* -------------------------------------------------------------------- */
7252     signal->theData[0] = NDB_LE_NodeFailCompleted;
7253     signal->theData[1] = DBTC;
7254     signal->theData[2] = failedNodePtr.i;
7255     sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 3, JBB);
7256 
7257     failedNodePtr.p->dbtcFailCompleted = ZTRUE;
7258     break;
7259   case DBDICT:
7260     jam();
7261     ndbrequire(failedNodePtr.p->dbdictFailCompleted == ZFALSE);
7262     /* --------------------------------------------------------------------- */
7263     // Report the event that DBDICT completed node failure handling.
7264     /* --------------------------------------------------------------------- */
7265     signal->theData[0] = NDB_LE_NodeFailCompleted;
7266     signal->theData[1] = DBDICT;
7267     signal->theData[2] = failedNodePtr.i;
7268     sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 3, JBB);
7269 
7270     failedNodePtr.p->dbdictFailCompleted = ZTRUE;
7271     break;
7272   case DBDIH:
7273     jam();
7274     ndbrequire(failedNodePtr.p->dbdihFailCompleted == ZFALSE);
7275     /* --------------------------------------------------------------------- */
7276     // Report the event that DBDIH completed node failure handling.
7277     /* --------------------------------------------------------------------- */
7278     signal->theData[0] = NDB_LE_NodeFailCompleted;
7279     signal->theData[1] = DBDIH;
7280     signal->theData[2] = failedNodePtr.i;
7281     sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 3, JBB);
7282 
7283     failedNodePtr.p->dbdihFailCompleted = ZTRUE;
7284     break;
7285   case DBLQH:
7286     jam();
7287     ndbrequire(failedNodePtr.p->dblqhFailCompleted == ZFALSE);
7288     /* --------------------------------------------------------------------- */
7289     // Report the event that DBDIH completed node failure handling.
7290     /* --------------------------------------------------------------------- */
7291     signal->theData[0] = NDB_LE_NodeFailCompleted;
7292     signal->theData[1] = DBLQH;
7293     signal->theData[2] = failedNodePtr.i;
7294     sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 3, JBB);
7295 
7296     failedNodePtr.p->dblqhFailCompleted = ZTRUE;
7297     break;
7298   case 0: /* Node has finished */
7299     jam();
7300     ndbrequire(nodeId < MAX_NDB_NODES);
7301 
7302     if (failedNodePtr.p->recNODE_FAILREP == ZFALSE) {
7303       jam();
7304       /* ------------------------------------------------------------------- */
7305       // We received a report about completion of node failure before we
7306       // received the message about the NODE failure ourselves.
7307       // We will send the signal to ourselves with a small delay
7308       // (10 milliseconds).
7309       /* ------------------------------------------------------------------- */
7310       //nf->from = __LINE__;
7311       sendSignalWithDelay(reference(), GSN_NF_COMPLETEREP, signal, 10,
7312 			  signal->length());
7313       return;
7314     }//if
7315 
7316     if (!failedNodePtr.p->m_NF_COMPLETE_REP.isWaitingFor(nodeId)){
7317       jam();
7318       return;
7319     }
7320 
7321     failedNodePtr.p->m_NF_COMPLETE_REP.clearWaitingFor(nodeId);;
7322 
7323     /* -------------------------------------------------------------------- */
7324     // Report the event that nodeId has completed node failure handling.
7325     /* -------------------------------------------------------------------- */
7326     signal->theData[0] = NDB_LE_NodeFailCompleted;
7327     signal->theData[1] = 0;
7328     signal->theData[2] = failedNodePtr.i;
7329     signal->theData[3] = nodeId;
7330     sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 4, JBB);
7331 
7332     nodeFailCompletedCheckLab(signal, failedNodePtr);
7333     return;
7334     break;
7335   default:
7336     ndbrequire(false);
7337     return;
7338     break;
7339   }//switch
7340   if (failedNodePtr.p->dbtcFailCompleted == ZFALSE) {
7341     jam();
7342     return;
7343   }//if
7344   if (failedNodePtr.p->dbdictFailCompleted == ZFALSE) {
7345     jam();
7346     return;
7347   }//if
7348   if (failedNodePtr.p->dbdihFailCompleted == ZFALSE) {
7349     jam();
7350     return;
7351   }//if
7352   if (failedNodePtr.p->dblqhFailCompleted == ZFALSE) {
7353     jam();
7354     return;
7355   }//if
7356   /* ----------------------------------------------------------------------- */
7357   /*     ALL BLOCKS IN THIS NODE HAVE COMPLETED THEIR PART OF HANDLING THE   */
7358   /*     NODE FAILURE. WE CAN NOW REPORT THIS COMPLETION TO ALL OTHER NODES. */
7359   /* ----------------------------------------------------------------------- */
7360   NodeRecordPtr nodePtr;
7361   for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
7362     jam();
7363     ptrAss(nodePtr, nodeRecord);
7364     if (nodePtr.p->nodeStatus == NodeRecord::ALIVE) {
7365       jam();
7366       BlockReference ref = calcDihBlockRef(nodePtr.i);
7367       NFCompleteRep * const nf = (NFCompleteRep *)&signal->theData[0];
7368       nf->blockNo      = 0;
7369       nf->nodeId       = cownNodeId;
7370       nf->failedNodeId = failedNodePtr.i;
7371       nf->from = __LINE__;
7372       sendSignal(ref, GSN_NF_COMPLETEREP, signal,
7373                  NFCompleteRep::SignalLength, JBB);
7374     }//if
7375   }//for
7376   return;
7377 }//Dbdih::execNF_COMPLETEREP()
7378 
nodeFailCompletedCheckLab(Signal * signal,NodeRecordPtr failedNodePtr)7379 void Dbdih::nodeFailCompletedCheckLab(Signal* signal,
7380 				      NodeRecordPtr failedNodePtr)
7381 {
7382   jam();
7383   if (!failedNodePtr.p->m_NF_COMPLETE_REP.done()){
7384     jam();
7385     return;
7386   }//if
7387   /* ---------------------------------------------------------------------- */
7388   /*    ALL BLOCKS IN ALL NODES HAVE NOW REPORTED COMPLETION OF THE NODE    */
7389   /*    FAILURE HANDLING. WE ARE NOW READY TO ACCEPT THAT THIS NODE STARTS  */
7390   /*    AGAIN.                                                              */
7391   /* ---------------------------------------------------------------------- */
7392   jam();
7393   failedNodePtr.p->nodeStatus = NodeRecord::DEAD;
7394   failedNodePtr.p->recNODE_FAILREP = ZFALSE;
7395 
7396   /* ---------------------------------------------------------------------- */
7397   // Report the event that all nodes completed node failure handling.
7398   /* ---------------------------------------------------------------------- */
7399   signal->theData[0] = NDB_LE_NodeFailCompleted;
7400   signal->theData[1] = 0;
7401   signal->theData[2] = failedNodePtr.i;
7402   signal->theData[3] = 0;
7403   sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 4, JBB);
7404 
7405   /* ---------------------------------------------------------------------- */
7406   // Report to QMGR that we have concluded recovery handling of this node.
7407   /* ---------------------------------------------------------------------- */
7408   signal->theData[0] = failedNodePtr.i;
7409   sendSignal(QMGR_REF, GSN_NDB_FAILCONF, signal, 1, JBB);
7410 
7411   return;
7412 }//Dbdih::nodeFailCompletedCheckLab()
7413 
7414 /*****************************************************************************/
7415 /* **********     SEIZING / RELEASING MODULE                     *************/
7416 /*****************************************************************************/
7417 /*
7418   3.4   L O C A L  N O D E   S E I Z E
7419   ************************************
7420   */
7421 /*
7422   3.7   A D D   T A B L E
7423   **********************=
7424   */
7425 /*****************************************************************************/
7426 /* **********     TABLE ADDING MODULE                            *************/
7427 /*****************************************************************************/
7428 /*
7429   3.7.1   A D D   T A B L E   M A I N L Y
7430   ***************************************
7431   */
7432 
inc_node_or_group(Uint32 & node,Uint32 max_node)7433 static inline void inc_node_or_group(Uint32 &node, Uint32 max_node)
7434 {
7435   Uint32 next = node + 1;
7436   node = (next == max_node ? 0 : next);
7437 }
7438 
7439 /*
7440   Spread fragments in backwards compatible mode
7441 */
set_default_node_groups(Signal * signal,Uint32 noFrags)7442 static void set_default_node_groups(Signal *signal, Uint32 noFrags)
7443 {
7444   Uint16 *node_group_array = (Uint16*)&signal->theData[25];
7445   Uint32 i;
7446   node_group_array[0] = 0;
7447   for (i = 1; i < noFrags; i++)
7448     node_group_array[i] = NDB_UNDEF_NODEGROUP;
7449 }
7450 
find_min_index(const Uint32 * array,Uint32 cnt)7451 static Uint32 find_min_index(const Uint32* array, Uint32 cnt)
7452 {
7453   Uint32 m = 0;
7454   Uint32 mv = array[0];
7455   for (Uint32 i = 1; i<cnt; i++)
7456   {
7457     if (array[i] < mv)
7458     {
7459       m = i;
7460       mv = array[i];
7461     }
7462   }
7463   return m;
7464 }
7465 
execCREATE_FRAGMENTATION_REQ(Signal * signal)7466 void Dbdih::execCREATE_FRAGMENTATION_REQ(Signal * signal)
7467 {
7468   Uint16 node_group_id[MAX_NDB_PARTITIONS];
7469   jamEntry();
7470   CreateFragmentationReq * const req =
7471     (CreateFragmentationReq*)signal->getDataPtr();
7472 
7473   const Uint32 senderRef = req->senderRef;
7474   const Uint32 senderData = req->senderData;
7475   Uint32 noOfFragments = req->noOfFragments;
7476   const Uint32 fragType = req->fragmentationType;
7477   const Uint32 primaryTableId = req->primaryTableId;
7478   const Uint32 map_ptr_i = req->map_ptr_i;
7479   const Uint32 flags = req->requestInfo;
7480 
7481   Uint32 err = 0;
7482   const Uint32 defaultFragments =
7483     c_fragments_per_node * cnoOfNodeGroups * cnoReplicas;
7484 
7485   do {
7486     NodeGroupRecordPtr NGPtr;
7487     TabRecordPtr primTabPtr;
7488     Uint32 count = 2;
7489     Uint16 noOfReplicas = cnoReplicas;
7490     Uint16 *fragments = (Uint16*)(signal->theData+25);
7491     if (primaryTableId == RNIL) {
7492       jam();
7493       switch ((DictTabInfo::FragmentType)fragType){
7494         /*
7495           Backward compatability and for all places in code not changed.
7496         */
7497       case DictTabInfo::AllNodesSmallTable:
7498         jam();
7499         noOfFragments = defaultFragments;
7500         set_default_node_groups(signal, noOfFragments);
7501         break;
7502       case DictTabInfo::AllNodesMediumTable:
7503         jam();
7504         noOfFragments = 2 * defaultFragments;
7505         set_default_node_groups(signal, noOfFragments);
7506         break;
7507       case DictTabInfo::AllNodesLargeTable:
7508         jam();
7509         noOfFragments = 4 * defaultFragments;
7510         set_default_node_groups(signal, noOfFragments);
7511         break;
7512       case DictTabInfo::SingleFragment:
7513         jam();
7514         noOfFragments = 1;
7515         set_default_node_groups(signal, noOfFragments);
7516         break;
7517       case DictTabInfo::DistrKeyHash:
7518         jam();
7519       case DictTabInfo::DistrKeyLin:
7520         jam();
7521         if (noOfFragments == 0)
7522         {
7523           jam();
7524           noOfFragments = defaultFragments;
7525           set_default_node_groups(signal, noOfFragments);
7526         }
7527         break;
7528       case DictTabInfo::HashMapPartition:
7529       {
7530         jam();
7531         ndbrequire(map_ptr_i != RNIL);
7532         Ptr<Hash2FragmentMap> ptr;
7533         g_hash_map.getPtr(ptr, map_ptr_i);
7534         if (noOfFragments == 0)
7535         {
7536           jam();
7537           noOfFragments = ptr.p->m_fragments;
7538         }
7539         else if (noOfFragments != ptr.p->m_fragments)
7540         {
7541           jam();
7542           err = CreateFragmentationRef::InvalidFragmentationType;
7543           break;
7544         }
7545         set_default_node_groups(signal, noOfFragments);
7546         break;
7547       }
7548       default:
7549         jam();
7550         if (noOfFragments == 0)
7551         {
7552           jam();
7553           err = CreateFragmentationRef::InvalidFragmentationType;
7554         }
7555         break;
7556       }
7557       if (err)
7558         break;
7559       /*
7560         When we come here the the exact partition is specified
7561         and there is an array of node groups sent along as well.
7562       */
7563       memcpy(&node_group_id[0], &signal->theData[25], 2 * noOfFragments);
7564       Uint16 next_replica_node[MAX_NDB_NODES];
7565       memset(next_replica_node,0,sizeof(next_replica_node));
7566       Uint32 default_node_group= c_nextNodeGroup;
7567       for(Uint32 fragNo = 0; fragNo < noOfFragments; fragNo++)
7568       {
7569         jam();
7570         NGPtr.i = node_group_id[fragNo];
7571         if (NGPtr.i == NDB_UNDEF_NODEGROUP)
7572         {
7573           jam();
7574 	  NGPtr.i = c_node_groups[default_node_group];
7575         }
7576         if (NGPtr.i >= MAX_NDB_NODES)
7577         {
7578           jam();
7579           err = CreateFragmentationRef::InvalidNodeGroup;
7580           break;
7581         }
7582         ptrCheckGuard(NGPtr, MAX_NDB_NODES, nodeGroupRecord);
7583         if (NGPtr.p->nodegroupIndex == RNIL)
7584         {
7585           jam();
7586           err = CreateFragmentationRef::InvalidNodeGroup;
7587           break;
7588         }
7589         const Uint32 max = NGPtr.p->nodeCount;
7590 
7591 	fragments[count++] = (NGPtr.p->m_next_log_part++ / cnoReplicas); // Store logpart first
7592 	Uint32 tmp= next_replica_node[NGPtr.i];
7593         for(Uint32 replicaNo = 0; replicaNo < noOfReplicas; replicaNo++)
7594         {
7595           jam();
7596           const Uint16 nodeId = NGPtr.p->nodesInGroup[tmp];
7597           fragments[count++]= nodeId;
7598           inc_node_or_group(tmp, max);
7599         }
7600         inc_node_or_group(tmp, max);
7601 	next_replica_node[NGPtr.i]= tmp;
7602 
7603         /**
7604          * Next node group for next fragment
7605          */
7606         inc_node_or_group(default_node_group, cnoOfNodeGroups);
7607       }
7608       if (err)
7609       {
7610         jam();
7611         break;
7612       }
7613       else
7614       {
7615         jam();
7616         c_nextNodeGroup = default_node_group;
7617       }
7618     } else {
7619       if (primaryTableId >= ctabFileSize) {
7620         jam();
7621         err = CreateFragmentationRef::InvalidPrimaryTable;
7622         break;
7623       }
7624       primTabPtr.i = primaryTableId;
7625       ptrAss(primTabPtr, tabRecord);
7626       if (primTabPtr.p->tabStatus != TabRecord::TS_ACTIVE) {
7627         jam();
7628         err = CreateFragmentationRef::InvalidPrimaryTable;
7629         break;
7630       }
7631       Uint32 fragments_per_node[MAX_NDB_NODES]; // Keep track of no of (primary) fragments per node
7632       bzero(fragments_per_node, sizeof(fragments_per_node));
7633       for (Uint32 fragNo = 0; fragNo < primTabPtr.p->totalfragments; fragNo++) {
7634         jam();
7635         FragmentstorePtr fragPtr;
7636         ReplicaRecordPtr replicaPtr;
7637         getFragstore(primTabPtr.p, fragNo, fragPtr);
7638 	fragments[count++] = fragPtr.p->m_log_part_id;
7639         fragments[count++] = fragPtr.p->preferredPrimary;
7640         fragments_per_node[fragPtr.p->preferredPrimary]++;
7641         for (replicaPtr.i = fragPtr.p->storedReplicas;
7642              replicaPtr.i != RNIL;
7643              replicaPtr.i = replicaPtr.p->nextReplica) {
7644           jam();
7645           ptrCheckGuard(replicaPtr, creplicaFileSize, replicaRecord);
7646           if (replicaPtr.p->procNode != fragPtr.p->preferredPrimary) {
7647             jam();
7648             fragments[count++]= replicaPtr.p->procNode;
7649           }
7650         }
7651         for (replicaPtr.i = fragPtr.p->oldStoredReplicas;
7652              replicaPtr.i != RNIL;
7653              replicaPtr.i = replicaPtr.p->nextReplica) {
7654           jam();
7655           ptrCheckGuard(replicaPtr, creplicaFileSize, replicaRecord);
7656           if (replicaPtr.p->procNode != fragPtr.p->preferredPrimary) {
7657             jam();
7658             fragments[count++]= replicaPtr.p->procNode;
7659           }
7660         }
7661       }
7662 
7663       if (flags & CreateFragmentationReq::RI_GET_FRAGMENTATION)
7664       {
7665         jam();
7666         noOfFragments = primTabPtr.p->totalfragments;
7667       }
7668       else if (flags & CreateFragmentationReq::RI_ADD_PARTITION)
7669       {
7670         jam();
7671         /**
7672          * All nodes that dont belong to a nodegroup to ~0 fragments_per_node
7673          *   so that they dont get any more...
7674          */
7675         for (Uint32 i = 0; i<MAX_NDB_NODES; i++)
7676         {
7677           if (getNodeStatus(i) == NodeRecord::NOT_IN_CLUSTER ||
7678               getNodeGroup(i) >= cnoOfNodeGroups) // XXX todo
7679           {
7680             jam();
7681             ndbassert(fragments_per_node[i] == 0);
7682             fragments_per_node[i] = ~(Uint32)0;
7683           }
7684         }
7685         for (Uint32 i = primTabPtr.p->totalfragments; i<noOfFragments; i++)
7686         {
7687           jam();
7688           Uint32 node = find_min_index(fragments_per_node,
7689                                        NDB_ARRAY_SIZE(fragments_per_node));
7690           NGPtr.i = getNodeGroup(node);
7691           ptrCheckGuard(NGPtr, MAX_NDB_NODES, nodeGroupRecord);
7692           fragments[count++] = NGPtr.p->m_next_log_part++;
7693           fragments[count++] = node;
7694           fragments_per_node[node]++;
7695           for (Uint32 r = 0; r<noOfReplicas; r++)
7696           {
7697             jam();
7698             if (NGPtr.p->nodesInGroup[r] != node)
7699             {
7700               jam();
7701               fragments[count++] = NGPtr.p->nodesInGroup[r];
7702             }
7703           }
7704         }
7705       }
7706     }
7707     if(count != (2U + (1 + noOfReplicas) * noOfFragments)){
7708         char buf[255];
7709         BaseString::snprintf(buf, sizeof(buf),
7710                            "Illegal configuration change: NoOfReplicas."
7711                            " Can't be applied online ");
7712         progError(__LINE__, NDBD_EXIT_INVALID_CONFIG, buf);
7713     }
7714 
7715     CreateFragmentationConf * const conf =
7716       (CreateFragmentationConf*)signal->getDataPtrSend();
7717     conf->senderRef = reference();
7718     conf->senderData = senderData;
7719     conf->noOfReplicas = (Uint32)noOfReplicas;
7720     conf->noOfFragments = (Uint32)noOfFragments;
7721 
7722     fragments[0]= noOfReplicas;
7723     fragments[1]= noOfFragments;
7724 
7725     if(senderRef != 0)
7726     {
7727       jam();
7728       LinearSectionPtr ptr[3];
7729       ptr[0].p = (Uint32*)&fragments[0];
7730       ptr[0].sz = (count + 1) / 2;
7731       sendSignal(senderRef,
7732 		 GSN_CREATE_FRAGMENTATION_CONF,
7733 		 signal,
7734 		 CreateFragmentationConf::SignalLength,
7735 		 JBB,
7736 		 ptr,
7737 		 1);
7738     }
7739     // Always ACK/NACK (here ACK)
7740     signal->theData[0] = 0;
7741     return;
7742   } while(false);
7743   // Always ACK/NACK (here NACK)
7744   signal->theData[0] = err;
7745 }
7746 
execDIADDTABREQ(Signal * signal)7747 void Dbdih::execDIADDTABREQ(Signal* signal)
7748 {
7749   Uint32 fragType;
7750   jamEntry();
7751 
7752   DiAddTabReq * const req = (DiAddTabReq*)signal->getDataPtr();
7753 
7754   // Seize connect record
7755   ndbrequire(cfirstconnect != RNIL);
7756   ConnectRecordPtr connectPtr;
7757   connectPtr.i = cfirstconnect;
7758   ptrCheckGuard(connectPtr, cconnectFileSize, connectRecord);
7759   cfirstconnect = connectPtr.p->nextPool;
7760 
7761   const Uint32 userPtr = req->connectPtr;
7762   const BlockReference userRef = signal->getSendersBlockRef();
7763   connectPtr.p->nextPool = RNIL;
7764   connectPtr.p->userpointer = userPtr;
7765   connectPtr.p->userblockref = userRef;
7766   connectPtr.p->connectState = ConnectRecord::INUSE;
7767   connectPtr.p->table = req->tableId;
7768   connectPtr.p->m_alter.m_changeMask = 0;
7769   connectPtr.p->m_create.m_map_ptr_i = req->hashMapPtrI;
7770 
7771   TabRecordPtr tabPtr;
7772   tabPtr.i = req->tableId;
7773   ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
7774   tabPtr.p->connectrec = connectPtr.i;
7775   tabPtr.p->tableType = req->tableType;
7776   fragType= req->fragType;
7777   tabPtr.p->schemaVersion = req->schemaVersion;
7778   tabPtr.p->primaryTableId = req->primaryTableId;
7779   tabPtr.p->schemaTransId = req->schemaTransId;
7780   tabPtr.p->m_scan_count[0] = 0;
7781   tabPtr.p->m_scan_count[1] = 0;
7782   tabPtr.p->m_scan_reorg_flag = 0;
7783 
7784   if (tabPtr.p->tabStatus == TabRecord::TS_ACTIVE)
7785   {
7786     jam();
7787     tabPtr.p->tabStatus = TabRecord::TS_CREATING;
7788     connectPtr.p->m_alter.m_totalfragments = tabPtr.p->totalfragments;
7789     sendAddFragreq(signal, connectPtr, tabPtr, 0);
7790     return;
7791   }
7792 
7793   if (getNodeState().getSystemRestartInProgress() &&
7794      tabPtr.p->tabStatus == TabRecord::TS_IDLE)
7795   {
7796     jam();
7797 
7798     ndbrequire(cmasterNodeId == getOwnNodeId());
7799     tabPtr.p->tabStatus = TabRecord::TS_CREATING;
7800 
7801     initTableFile(tabPtr);
7802     FileRecordPtr filePtr;
7803     filePtr.i = tabPtr.p->tabFile[0];
7804     ptrCheckGuard(filePtr, cfileFileSize, fileRecord);
7805     openFileRw(signal, filePtr);
7806     filePtr.p->reqStatus = FileRecord::OPENING_TABLE;
7807     return;
7808   }
7809 
7810   /*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
7811   /* AT THE TIME OF INITIATING THE FILE OF TABLE         */
7812   /* DESCRIPTION IS CREATED FOR APPROPRIATE SIZE. EACH   */
7813   /* EACH RECORD IN THIS FILE HAS THE INFORMATION ABOUT  */
7814   /* ONE TABLE. THE POINTER TO THIS RECORD IS THE TABLE  */
7815   /* REFERENCE. IN THE BEGINNING ALL RECORDS ARE CREATED */
7816   /* BUT THEY DO NOT HAVE ANY INFORMATION ABOUT ANY TABLE*/
7817   /*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
7818   tabPtr.p->tabStatus = TabRecord::TS_CREATING;
7819   if(req->loggedTable)
7820     tabPtr.p->tabStorage= TabRecord::ST_NORMAL;
7821   else if(req->temporaryTable)
7822     tabPtr.p->tabStorage= TabRecord::ST_TEMPORARY;
7823   else
7824     tabPtr.p->tabStorage= TabRecord::ST_NOLOGGING;
7825   tabPtr.p->kvalue = req->kValue;
7826 
7827   switch ((DictTabInfo::FragmentType)fragType){
7828   case DictTabInfo::HashMapPartition:
7829     tabPtr.p->method = TabRecord::HASH_MAP;
7830     break;
7831   case DictTabInfo::AllNodesSmallTable:
7832   case DictTabInfo::AllNodesMediumTable:
7833   case DictTabInfo::AllNodesLargeTable:
7834   case DictTabInfo::SingleFragment:
7835     jam();
7836   case DictTabInfo::DistrKeyLin:
7837     jam();
7838     tabPtr.p->method = TabRecord::LINEAR_HASH;
7839     break;
7840   case DictTabInfo::DistrKeyHash:
7841     jam();
7842     tabPtr.p->method = TabRecord::NORMAL_HASH;
7843     break;
7844   case DictTabInfo::DistrKeyOrderedIndex:
7845   {
7846     TabRecordPtr primTabPtr;
7847     primTabPtr.i = req->primaryTableId;
7848     ptrCheckGuard(primTabPtr, ctabFileSize, tabRecord);
7849     tabPtr.p->method = primTabPtr.p->method;
7850     req->hashMapPtrI = primTabPtr.p->m_map_ptr_i;
7851     break;
7852   }
7853   case DictTabInfo::UserDefined:
7854     jam();
7855     tabPtr.p->method = TabRecord::USER_DEFINED;
7856     break;
7857   default:
7858     ndbrequire(false);
7859   }
7860 
7861   union {
7862     Uint16 fragments[2 + MAX_FRAG_PER_NODE*MAX_REPLICAS*MAX_NDB_NODES];
7863     Uint32 align;
7864   };
7865   (void)align; // kill warning
7866   SectionHandle handle(this, signal);
7867   SegmentedSectionPtr fragDataPtr;
7868   ndbrequire(handle.getSection(fragDataPtr, DiAddTabReq::FRAGMENTATION));
7869   copy((Uint32*)fragments, fragDataPtr);
7870   releaseSections(handle);
7871 
7872   const Uint32 noReplicas = fragments[0];
7873   const Uint32 noFragments = fragments[1];
7874 
7875   tabPtr.p->noOfBackups = noReplicas - 1;
7876   tabPtr.p->totalfragments = noFragments;
7877   ndbrequire(noReplicas == cnoReplicas); // Only allowed
7878 
7879   if (ERROR_INSERTED(7173)) {
7880     CLEAR_ERROR_INSERT_VALUE;
7881     addtabrefuseLab(signal, connectPtr, ZREPLERROR1);
7882     return;
7883   }
7884   if ((noReplicas * noFragments) > cnoFreeReplicaRec) {
7885     jam();
7886     addtabrefuseLab(signal, connectPtr, ZREPLERROR1);
7887     return;
7888   }//if
7889   if (noFragments > cremainingfrags) {
7890     jam();
7891     addtabrefuseLab(signal, connectPtr, ZREPLERROR2);
7892     return;
7893   }//if
7894 
7895   Uint32 logTotalFragments = 1;
7896   while (logTotalFragments <= tabPtr.p->totalfragments) {
7897     jam();
7898     logTotalFragments <<= 1;
7899   }
7900   logTotalFragments >>= 1;
7901   tabPtr.p->mask = logTotalFragments - 1;
7902   tabPtr.p->hashpointer = tabPtr.p->totalfragments - logTotalFragments;
7903   allocFragments(tabPtr.p->totalfragments, tabPtr);
7904 
7905   if (tabPtr.p->method == TabRecord::HASH_MAP)
7906   {
7907     jam();
7908     tabPtr.p->m_map_ptr_i = req->hashMapPtrI;
7909     tabPtr.p->m_new_map_ptr_i = RNIL;
7910     Ptr<Hash2FragmentMap> mapPtr;
7911     g_hash_map.getPtr(mapPtr, tabPtr.p->m_map_ptr_i);
7912     ndbrequire(tabPtr.p->totalfragments >= mapPtr.p->m_fragments);
7913   }
7914 
7915   Uint32 index = 2;
7916   for (Uint32 fragId = 0; fragId < noFragments; fragId++) {
7917     jam();
7918     FragmentstorePtr fragPtr;
7919     Uint32 activeIndex = 0;
7920     getFragstore(tabPtr.p, fragId, fragPtr);
7921     fragPtr.p->m_log_part_id = fragments[index++];
7922     fragPtr.p->preferredPrimary = fragments[index];
7923 
7924     inc_ng_refcount(getNodeGroup(fragPtr.p->preferredPrimary));
7925 
7926     for (Uint32 i = 0; i<noReplicas; i++) {
7927       const Uint32 nodeId = fragments[index++];
7928       ReplicaRecordPtr replicaPtr;
7929       allocStoredReplica(fragPtr, replicaPtr, nodeId);
7930       if (getNodeStatus(nodeId) == NodeRecord::ALIVE) {
7931         jam();
7932         ndbrequire(activeIndex < MAX_REPLICAS);
7933         fragPtr.p->activeNodes[activeIndex] = nodeId;
7934         activeIndex++;
7935       } else {
7936         jam();
7937         removeStoredReplica(fragPtr, replicaPtr);
7938         linkOldStoredReplica(fragPtr, replicaPtr);
7939       }//if
7940     }//for
7941     fragPtr.p->fragReplicas = activeIndex;
7942     ndbrequire(activeIndex > 0 && fragPtr.p->storedReplicas != RNIL);
7943   }
7944   initTableFile(tabPtr);
7945   tabPtr.p->tabCopyStatus = TabRecord::CS_ADD_TABLE_MASTER;
7946   signal->theData[0] = DihContinueB::ZPACK_TABLE_INTO_PAGES;
7947   signal->theData[1] = tabPtr.i;
7948   sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
7949 }
7950 
7951 void
addTable_closeConf(Signal * signal,Uint32 tabPtrI)7952 Dbdih::addTable_closeConf(Signal * signal, Uint32 tabPtrI){
7953   TabRecordPtr tabPtr;
7954   tabPtr.i = tabPtrI;
7955   ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
7956 
7957   ConnectRecordPtr connectPtr;
7958   connectPtr.i = tabPtr.p->connectrec;
7959   ptrCheckGuard(connectPtr, cconnectFileSize, connectRecord);
7960   connectPtr.p->m_alter.m_totalfragments = tabPtr.p->totalfragments;
7961 
7962   sendAddFragreq(signal, connectPtr, tabPtr, 0);
7963 }
7964 
7965 void
sendAddFragreq(Signal * signal,ConnectRecordPtr connectPtr,TabRecordPtr tabPtr,Uint32 fragId)7966 Dbdih::sendAddFragreq(Signal* signal, ConnectRecordPtr connectPtr,
7967 		      TabRecordPtr tabPtr, Uint32 fragId){
7968   jam();
7969   const Uint32 fragCount = connectPtr.p->m_alter.m_totalfragments;
7970   ReplicaRecordPtr replicaPtr;
7971   LINT_INIT(replicaPtr.p);
7972   replicaPtr.i = RNIL;
7973   FragmentstorePtr fragPtr;
7974   for(; fragId<fragCount; fragId++){
7975     jam();
7976     getFragstore(tabPtr.p, fragId, fragPtr);
7977 
7978     replicaPtr.i = fragPtr.p->storedReplicas;
7979     while(replicaPtr.i != RNIL){
7980       jam();
7981       ptrCheckGuard(replicaPtr, creplicaFileSize, replicaRecord);
7982       if(replicaPtr.p->procNode == getOwnNodeId()){
7983 	break;
7984       }
7985       replicaPtr.i = replicaPtr.p->nextReplica;
7986     }
7987 
7988     if(replicaPtr.i != RNIL){
7989       jam();
7990       break;
7991     }
7992 
7993     replicaPtr.i = fragPtr.p->oldStoredReplicas;
7994     while(replicaPtr.i != RNIL){
7995       jam();
7996       ptrCheckGuard(replicaPtr, creplicaFileSize, replicaRecord);
7997       if(replicaPtr.p->procNode == getOwnNodeId()){
7998 	break;
7999       }
8000       replicaPtr.i = replicaPtr.p->nextReplica;
8001     }
8002 
8003     if(replicaPtr.i != RNIL){
8004       jam();
8005       break;
8006     }
8007   }
8008 
8009   if(replicaPtr.i != RNIL){
8010     jam();
8011     ndbrequire(fragId < fragCount);
8012     ndbrequire(replicaPtr.p->procNode == getOwnNodeId());
8013 
8014     Uint32 requestInfo = 0;
8015     if(tabPtr.p->tabStorage != TabRecord::ST_NORMAL){
8016       requestInfo |= LqhFragReq::TemporaryTable;
8017     }
8018 
8019     if(getNodeState().getNodeRestartInProgress()){
8020       requestInfo |= LqhFragReq::CreateInRunning;
8021     }
8022 
8023     AddFragReq* const req = (AddFragReq*)signal->getDataPtr();
8024     req->dihPtr = connectPtr.i;
8025     req->senderData = connectPtr.p->userpointer;
8026     req->fragmentId = fragId;
8027     req->requestInfo = requestInfo;
8028     req->tableId = tabPtr.i;
8029     req->nextLCP = 0;
8030     req->nodeId = getOwnNodeId();
8031     req->totalFragments = fragCount;
8032     req->startGci = SYSFILE->newestRestorableGCI;
8033     req->logPartId = fragPtr.p->m_log_part_id;
8034     req->changeMask = 0;
8035 
8036     if (connectPtr.p->connectState == ConnectRecord::ALTER_TABLE)
8037     {
8038       jam();
8039       req->changeMask = connectPtr.p->m_alter.m_changeMask;
8040     }
8041 
8042     sendSignal(DBDICT_REF, GSN_ADD_FRAGREQ, signal,
8043 	       AddFragReq::SignalLength, JBB);
8044     return;
8045   }
8046 
8047   if (connectPtr.p->connectState == ConnectRecord::ALTER_TABLE)
8048   {
8049     jam();
8050     // Request handled successfully
8051 
8052     if (AlterTableReq::getReorgFragFlag(connectPtr.p->m_alter.m_changeMask))
8053     {
8054       jam();
8055       DIH_TAB_WRITE_LOCK(tabPtr.p);
8056       tabPtr.p->m_new_map_ptr_i = connectPtr.p->m_alter.m_new_map_ptr_i;
8057       DIH_TAB_WRITE_UNLOCK(tabPtr.p);
8058     }
8059 
8060     if (AlterTableReq::getAddFragFlag(connectPtr.p->m_alter.m_changeMask))
8061     {
8062       jam();
8063       Callback cb;
8064       cb.m_callbackData = connectPtr.i;
8065       cb.m_callbackFunction = safe_cast(&Dbdih::alter_table_writeTable_conf);
8066       saveTableFile(signal, connectPtr, tabPtr, TabRecord::CS_ALTER_TABLE, cb);
8067       return;
8068     }
8069 
8070     send_alter_tab_conf(signal, connectPtr);
8071   }
8072   else
8073   {
8074     // Done
8075     DiAddTabConf * const conf = (DiAddTabConf*)signal->getDataPtr();
8076     conf->senderData = connectPtr.p->userpointer;
8077     sendSignal(connectPtr.p->userblockref, GSN_DIADDTABCONF, signal,
8078                DiAddTabConf::SignalLength, JBB);
8079 
8080 
8081     if (tabPtr.p->method == TabRecord::HASH_MAP)
8082     {
8083       Uint32 newValue = RNIL;
8084       if (DictTabInfo::isOrderedIndex(tabPtr.p->tableType))
8085       {
8086         jam();
8087         TabRecordPtr primTabPtr;
8088         primTabPtr.i = tabPtr.p->primaryTableId;
8089         ptrCheckGuard(primTabPtr, ctabFileSize, tabRecord);
8090         newValue = primTabPtr.p->m_map_ptr_i;
8091       }
8092       else
8093       {
8094         jam();
8095         newValue = connectPtr.p->m_create.m_map_ptr_i;
8096       }
8097 
8098       tabPtr.p->m_map_ptr_i = newValue;
8099     }
8100     // Release
8101     ndbrequire(tabPtr.p->connectrec == connectPtr.i);
8102     tabPtr.p->connectrec = RNIL;
8103     release_connect(connectPtr);
8104   }
8105 
8106 }
8107 void
release_connect(ConnectRecordPtr ptr)8108 Dbdih::release_connect(ConnectRecordPtr ptr)
8109 {
8110   TabRecordPtr tabPtr;
8111   tabPtr.i = ptr.p->table;
8112   if (tabPtr.i != RNIL)
8113   {
8114     jam();
8115     ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
8116     if (tabPtr.p->connectrec == ptr.i)
8117     {
8118       ndbassert(false); // should be fixed elsewhere
8119       tabPtr.p->connectrec = RNIL;
8120     }
8121   }
8122 
8123   ptr.p->table = RNIL;
8124   ptr.p->userblockref = ZNIL;
8125   ptr.p->userpointer = RNIL;
8126   ptr.p->connectState = ConnectRecord::FREE;
8127   ptr.p->nextPool = cfirstconnect;
8128   cfirstconnect = ptr.i;
8129 }
8130 
8131 void
execADD_FRAGCONF(Signal * signal)8132 Dbdih::execADD_FRAGCONF(Signal* signal){
8133   jamEntry();
8134   AddFragConf * const conf = (AddFragConf*)signal->getDataPtr();
8135 
8136   ConnectRecordPtr connectPtr;
8137   connectPtr.i = conf->dihPtr;
8138   ptrCheckGuard(connectPtr, cconnectFileSize, connectRecord);
8139 
8140   TabRecordPtr tabPtr;
8141   tabPtr.i = connectPtr.p->table;
8142   ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
8143 
8144   sendAddFragreq(signal, connectPtr, tabPtr, conf->fragId + 1);
8145 }
8146 
8147 void
execADD_FRAGREF(Signal * signal)8148 Dbdih::execADD_FRAGREF(Signal* signal){
8149   jamEntry();
8150   AddFragRef * const ref = (AddFragRef*)signal->getDataPtr();
8151 
8152   ConnectRecordPtr connectPtr;
8153   connectPtr.i = ref->dihPtr;
8154   ptrCheckGuard(connectPtr, cconnectFileSize, connectRecord);
8155 
8156   if (connectPtr.p->connectState == ConnectRecord::ALTER_TABLE)
8157   {
8158     jam();
8159 
8160     connectPtr.p->connectState = ConnectRecord::ALTER_TABLE_ABORT;
8161     drop_fragments(signal, connectPtr, connectPtr.p->m_alter.m_totalfragments);
8162     return;
8163   }
8164   else
8165   {
8166     DiAddTabRef * const ref = (DiAddTabRef*)signal->getDataPtr();
8167     ref->senderData = connectPtr.p->userpointer;
8168     ref->errorCode = ~0;
8169     sendSignal(connectPtr.p->userblockref, GSN_DIADDTABREF, signal,
8170 	       DiAddTabRef::SignalLength, JBB);
8171 
8172     // Release
8173     Ptr<TabRecord> tabPtr;
8174     tabPtr.i = connectPtr.p->table;
8175     ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
8176     ndbrequire(tabPtr.p->connectrec == connectPtr.i);
8177     tabPtr.p->connectrec = RNIL;
8178     release_connect(connectPtr);
8179   }
8180 }
8181 
8182 /*
8183   3.7.1.3   R E F U S E
8184   *********************
8185   */
8186 void
addtabrefuseLab(Signal * signal,ConnectRecordPtr connectPtr,Uint32 errorCode)8187 Dbdih::addtabrefuseLab(Signal* signal,
8188                        ConnectRecordPtr connectPtr, Uint32 errorCode)
8189 {
8190   signal->theData[0] = connectPtr.p->userpointer;
8191   signal->theData[1] = errorCode;
8192   sendSignal(connectPtr.p->userblockref, GSN_DIADDTABREF, signal, 2, JBB);
8193 
8194   Ptr<TabRecord> tabPtr;
8195   tabPtr.i = connectPtr.p->table;
8196   ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
8197   ndbrequire(tabPtr.p->connectrec == connectPtr.i);
8198   tabPtr.p->connectrec = RNIL;
8199 
8200   release_connect(connectPtr);
8201   return;
8202 }//Dbdih::addtabrefuseLab()
8203 
8204 /*
8205   3.7.2   A D D   T A B L E   D U P L I C A T I O N
8206   *************************************************
8207   */
8208 /*
8209   3.7.2.1    A D D   T A B L E   D U P L I C A T I O N   R E Q U E S T
8210   *******************************************************************=
8211   */
8212 
8213 /*
8214   D E L E T E   T A B L E
8215   **********************=
8216   */
8217 /*****************************************************************************/
8218 /***********              DELETE TABLE  MODULE                   *************/
8219 /*****************************************************************************/
8220 void
execDROP_TAB_REQ(Signal * signal)8221 Dbdih::execDROP_TAB_REQ(Signal* signal)
8222 {
8223   jamEntry();
8224   DropTabReq* req = (DropTabReq*)signal->getDataPtr();
8225 
8226   TabRecordPtr tabPtr;
8227   tabPtr.i = req->tableId;
8228   ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
8229 
8230   tabPtr.p->m_dropTab.tabUserRef = req->senderRef;
8231   tabPtr.p->m_dropTab.tabUserPtr = req->senderData;
8232 
8233   DropTabReq::RequestType rt = (DropTabReq::RequestType)req->requestType;
8234 
8235   switch(rt){
8236   case DropTabReq::OnlineDropTab:
8237     jam();
8238     ndbrequire(tabPtr.p->tabStatus == TabRecord::TS_DROPPING);
8239     break;
8240   case DropTabReq::CreateTabDrop:
8241     jam();
8242     break;
8243   case DropTabReq::RestartDropTab:
8244     break;
8245   }
8246 
8247   if(isMaster())
8248   {
8249     /**
8250      * Remove from queue
8251      */
8252     NodeRecordPtr nodePtr;
8253     for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
8254       jam();
8255       ptrAss(nodePtr, nodeRecord);
8256       if (c_lcpState.m_participatingLQH.get(nodePtr.i))
8257       {
8258 
8259 	Uint32 index = 0;
8260 	Uint32 count = nodePtr.p->noOfQueuedChkpt;
8261 	while(index < count){
8262 	  if(nodePtr.p->queuedChkpt[index].tableId == tabPtr.i){
8263 	    jam();
8264 	    //	    g_eventLogger->info("Unqueuing %d", index);
8265 
8266 	    count--;
8267 	    for(Uint32 i = index; i<count; i++){
8268 	      jam();
8269 	      nodePtr.p->queuedChkpt[i] = nodePtr.p->queuedChkpt[i + 1];
8270 	    }
8271 	  } else {
8272 	    index++;
8273 	  }
8274 	}
8275 	nodePtr.p->noOfQueuedChkpt = count;
8276       }
8277     }
8278   }
8279 
8280   {
8281     /**
8282      * Check table lcp state
8283      */
8284     bool ok = false;
8285     switch(tabPtr.p->tabLcpStatus){
8286     case TabRecord::TLS_COMPLETED:
8287     case TabRecord::TLS_WRITING_TO_FILE:
8288       ok = true;
8289       jam();
8290       break;
8291       return;
8292     case TabRecord::TLS_ACTIVE:
8293       ok = true;
8294       jam();
8295 
8296       tabPtr.p->tabLcpStatus = TabRecord::TLS_COMPLETED;
8297 
8298       /**
8299        * First check if all fragments are done
8300        */
8301       if (checkLcpAllTablesDoneInLqh(__LINE__))
8302       {
8303 	jam();
8304 
8305         g_eventLogger->info("This is the last table");
8306 
8307 	/**
8308 	 * Then check if saving of tab info is done for all tables
8309 	 */
8310 	LcpStatus a = c_lcpState.lcpStatus;
8311 	checkLcpCompletedLab(signal);
8312 
8313         if(a != c_lcpState.lcpStatus)
8314         {
8315           g_eventLogger->info("And all tables are written to already written disk");
8316         }
8317       }
8318       break;
8319     }
8320     ndbrequire(ok);
8321   }
8322 
8323   waitDropTabWritingToFile(signal, tabPtr);
8324 }
8325 
startDeleteFile(Signal * signal,TabRecordPtr tabPtr)8326 void Dbdih::startDeleteFile(Signal* signal, TabRecordPtr tabPtr)
8327 {
8328   if (tabPtr.p->tabFile[0] == RNIL) {
8329     jam();
8330     initTableFile(tabPtr);
8331   }//if
8332   openTableFileForDelete(signal, tabPtr.p->tabFile[0]);
8333 }//Dbdih::startDeleteFile()
8334 
openTableFileForDelete(Signal * signal,Uint32 fileIndex)8335 void Dbdih::openTableFileForDelete(Signal* signal, Uint32 fileIndex)
8336 {
8337   FileRecordPtr filePtr;
8338   filePtr.i = fileIndex;
8339   ptrCheckGuard(filePtr, cfileFileSize, fileRecord);
8340   openFileRw(signal, filePtr);
8341   filePtr.p->reqStatus = FileRecord::TABLE_OPEN_FOR_DELETE;
8342 }//Dbdih::openTableFileForDelete()
8343 
tableOpenLab(Signal * signal,FileRecordPtr filePtr)8344 void Dbdih::tableOpenLab(Signal* signal, FileRecordPtr filePtr)
8345 {
8346   closeFileDelete(signal, filePtr);
8347   filePtr.p->reqStatus = FileRecord::TABLE_CLOSE_DELETE;
8348   return;
8349 }//Dbdih::tableOpenLab()
8350 
tableDeleteLab(Signal * signal,FileRecordPtr filePtr)8351 void Dbdih::tableDeleteLab(Signal* signal, FileRecordPtr filePtr)
8352 {
8353   TabRecordPtr tabPtr;
8354   tabPtr.i = filePtr.p->tabRef;
8355   ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
8356   if (filePtr.i == tabPtr.p->tabFile[0]) {
8357     jam();
8358     openTableFileForDelete(signal, tabPtr.p->tabFile[1]);
8359     return;
8360   }//if
8361   ndbrequire(filePtr.i == tabPtr.p->tabFile[1]);
8362 
8363   releaseFile(tabPtr.p->tabFile[0]);
8364   releaseFile(tabPtr.p->tabFile[1]);
8365   tabPtr.p->tabFile[0] = tabPtr.p->tabFile[1] = RNIL;
8366 
8367   tabPtr.p->tabStatus = TabRecord::TS_IDLE;
8368 
8369   DropTabConf * const dropConf = (DropTabConf *)signal->getDataPtrSend();
8370   dropConf->senderRef = reference();
8371   dropConf->senderData = tabPtr.p->m_dropTab.tabUserPtr;
8372   dropConf->tableId = tabPtr.i;
8373   sendSignal(tabPtr.p->m_dropTab.tabUserRef, GSN_DROP_TAB_CONF,
8374 	     signal, DropTabConf::SignalLength, JBB);
8375 
8376   tabPtr.p->m_dropTab.tabUserPtr = RNIL;
8377   tabPtr.p->m_dropTab.tabUserRef = 0;
8378   releaseTable(tabPtr);
8379 }//Dbdih::tableDeleteLab()
8380 
8381 
releaseTable(TabRecordPtr tabPtr)8382 void Dbdih::releaseTable(TabRecordPtr tabPtr)
8383 {
8384   FragmentstorePtr fragPtr;
8385   if (tabPtr.p->noOfFragChunks > 0) {
8386     for (Uint32 fragId = 0; fragId < tabPtr.p->totalfragments; fragId++) {
8387       jam();
8388       getFragstore(tabPtr.p, fragId, fragPtr);
8389       dec_ng_refcount(getNodeGroup(fragPtr.p->preferredPrimary));
8390       releaseReplicas(& fragPtr.p->storedReplicas);
8391       releaseReplicas(& fragPtr.p->oldStoredReplicas);
8392     }//for
8393     releaseFragments(tabPtr);
8394   }
8395   if (tabPtr.p->tabFile[0] != RNIL) {
8396     jam();
8397     releaseFile(tabPtr.p->tabFile[0]);
8398     releaseFile(tabPtr.p->tabFile[1]);
8399     tabPtr.p->tabFile[0] = tabPtr.p->tabFile[1] = RNIL;
8400   }//if
8401 }//Dbdih::releaseTable()
8402 
releaseReplicas(Uint32 * replicaPtrI)8403 void Dbdih::releaseReplicas(Uint32 * replicaPtrI)
8404 {
8405   ReplicaRecordPtr replicaPtr;
8406   replicaPtr.i = * replicaPtrI;
8407   jam();
8408   while (replicaPtr.i != RNIL) {
8409     jam();
8410     ptrCheckGuard(replicaPtr, creplicaFileSize, replicaRecord);
8411     Uint32 tmp = replicaPtr.p->nextReplica;
8412     replicaPtr.p->nextReplica = cfirstfreeReplica;
8413     cfirstfreeReplica = replicaPtr.i;
8414     replicaPtr.i = tmp;
8415     cnoFreeReplicaRec++;
8416   }//while
8417 
8418   * replicaPtrI = RNIL;
8419 }//Dbdih::releaseReplicas()
8420 
seizeReplicaRec(ReplicaRecordPtr & replicaPtr)8421 void Dbdih::seizeReplicaRec(ReplicaRecordPtr& replicaPtr)
8422 {
8423   replicaPtr.i = cfirstfreeReplica;
8424   ptrCheckGuard(replicaPtr, creplicaFileSize, replicaRecord);
8425   cfirstfreeReplica = replicaPtr.p->nextReplica;
8426   cnoFreeReplicaRec--;
8427   replicaPtr.p->nextReplica = RNIL;
8428 }//Dbdih::seizeReplicaRec()
8429 
releaseFile(Uint32 fileIndex)8430 void Dbdih::releaseFile(Uint32 fileIndex)
8431 {
8432   FileRecordPtr filePtr;
8433   filePtr.i = fileIndex;
8434   ptrCheckGuard(filePtr, cfileFileSize, fileRecord);
8435   filePtr.p->nextFile = cfirstfreeFile;
8436   cfirstfreeFile = filePtr.i;
8437 }//Dbdih::releaseFile()
8438 
8439 
execALTER_TAB_REQ(Signal * signal)8440 void Dbdih::execALTER_TAB_REQ(Signal * signal)
8441 {
8442   const AlterTabReq* req = (const AlterTabReq*)signal->getDataPtr();
8443   const Uint32 senderRef = req->senderRef;
8444   const Uint32 senderData = req->senderData;
8445   const Uint32 tableId = req->tableId;
8446   const Uint32 tableVersion = req->tableVersion;
8447   const Uint32 newTableVersion = req->newTableVersion;
8448   AlterTabReq::RequestType requestType =
8449     (AlterTabReq::RequestType) req->requestType;
8450 
8451   TabRecordPtr tabPtr;
8452   tabPtr.i = tableId;
8453   ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
8454 
8455   switch(requestType){
8456   case AlterTabReq::AlterTablePrepare:
8457     jam();
8458     // fall through
8459   case AlterTabReq::AlterTableRevert:
8460     jam();
8461     if (AlterTableReq::getAddFragFlag(req->changeMask) &&
8462         tabPtr.p->tabCopyStatus != TabRecord::CS_IDLE)
8463     {
8464       jam();
8465       SectionHandle handle(this, signal);
8466       sendSignalWithDelay(reference(), GSN_ALTER_TAB_REQ, signal, 100,
8467                           signal->getLength(), &handle);
8468       return;
8469     }
8470   case AlterTabReq::AlterTableCommit:
8471     jam();
8472   case AlterTabReq::AlterTableComplete:
8473     jam();
8474   case AlterTabReq::AlterTableWaitScan:
8475     jam();
8476     break;
8477   default:
8478     jamLine(requestType);
8479   }
8480 
8481   ConnectRecordPtr connectPtr;
8482   connectPtr.i = RNIL;
8483   switch (requestType) {
8484   case AlterTabReq::AlterTablePrepare:
8485     jam();
8486 
8487     ndbrequire(cfirstconnect != RNIL);
8488     connectPtr.i = cfirstconnect;
8489     ptrCheckGuard(connectPtr, cconnectFileSize, connectRecord);
8490     cfirstconnect = connectPtr.p->nextPool;
8491 
8492     connectPtr.p->m_alter.m_totalfragments = tabPtr.p->totalfragments;
8493     connectPtr.p->m_alter.m_org_totalfragments = tabPtr.p->totalfragments;
8494     connectPtr.p->m_alter.m_changeMask = req->changeMask;
8495     connectPtr.p->m_alter.m_new_map_ptr_i = req->new_map_ptr_i;
8496     connectPtr.p->userpointer = senderData;
8497     connectPtr.p->userblockref = senderRef;
8498     connectPtr.p->connectState = ConnectRecord::ALTER_TABLE;
8499     connectPtr.p->table = tabPtr.i;
8500     tabPtr.p->connectrec = connectPtr.i;
8501     break;
8502   case AlterTabReq::AlterTableRevert:
8503     jam();
8504     tabPtr.p->schemaVersion = tableVersion;
8505 
8506     connectPtr.i = req->connectPtr;
8507     ptrCheckGuard(connectPtr, cconnectFileSize, connectRecord);
8508 
8509     ndbrequire(connectPtr.p->connectState == ConnectRecord::ALTER_TABLE);
8510 
8511     connectPtr.p->userpointer = senderData;
8512     connectPtr.p->userblockref = senderRef;
8513 
8514     if (AlterTableReq::getAddFragFlag(req->changeMask))
8515     {
8516       jam();
8517       tabPtr.p->tabCopyStatus = TabRecord::CS_ALTER_TABLE;
8518       connectPtr.p->connectState = ConnectRecord::ALTER_TABLE_REVERT;
8519       drop_fragments(signal, connectPtr,
8520                      connectPtr.p->m_alter.m_totalfragments);
8521       return;
8522     }
8523 
8524     send_alter_tab_conf(signal, connectPtr);
8525 
8526     ndbrequire(tabPtr.p->connectrec == connectPtr.i);
8527     tabPtr.p->connectrec = RNIL;
8528     release_connect(connectPtr);
8529     return;
8530     break;
8531   case AlterTabReq::AlterTableCommit:
8532     jam();
8533     tabPtr.p->schemaVersion = newTableVersion;
8534 
8535     connectPtr.i = req->connectPtr;
8536     ptrCheckGuard(connectPtr, cconnectFileSize, connectRecord);
8537     connectPtr.p->userpointer = senderData;
8538     connectPtr.p->userblockref = senderRef;
8539     ndbrequire(connectPtr.p->connectState == ConnectRecord::ALTER_TABLE);
8540 
8541     tabPtr.p->totalfragments = connectPtr.p->m_alter.m_totalfragments;
8542     if (AlterTableReq::getReorgFragFlag(connectPtr.p->m_alter.m_changeMask))
8543     {
8544       jam();
8545       DIH_TAB_WRITE_LOCK(tabPtr.p);
8546       Uint32 save = tabPtr.p->m_map_ptr_i;
8547       tabPtr.p->m_map_ptr_i = tabPtr.p->m_new_map_ptr_i;
8548       tabPtr.p->m_new_map_ptr_i = save;
8549 
8550       for (Uint32 i = 0; i<tabPtr.p->totalfragments; i++)
8551       {
8552         jam();
8553         FragmentstorePtr fragPtr;
8554         getFragstore(tabPtr.p, i, fragPtr);
8555         fragPtr.p->distributionKey = (fragPtr.p->distributionKey + 1) & 0xFF;
8556       }
8557       DIH_TAB_WRITE_UNLOCK(tabPtr.p);
8558 
8559       ndbassert(tabPtr.p->m_scan_count[1] == 0);
8560       tabPtr.p->m_scan_count[1] = tabPtr.p->m_scan_count[0];
8561       tabPtr.p->m_scan_count[0] = 0;
8562       tabPtr.p->m_scan_reorg_flag = 1;
8563 
8564       send_alter_tab_conf(signal, connectPtr);
8565       return;
8566     }
8567 
8568     send_alter_tab_conf(signal, connectPtr);
8569     ndbrequire(tabPtr.p->connectrec == connectPtr.i);
8570     tabPtr.p->connectrec = RNIL;
8571     release_connect(connectPtr);
8572     return;
8573   case AlterTabReq::AlterTableComplete:
8574     jam();
8575     connectPtr.i = req->connectPtr;
8576     ptrCheckGuard(connectPtr, cconnectFileSize, connectRecord);
8577     connectPtr.p->userpointer = senderData;
8578     connectPtr.p->userblockref = senderRef;
8579 
8580     send_alter_tab_conf(signal, connectPtr);
8581 
8582     DIH_TAB_WRITE_LOCK(tabPtr.p);
8583     tabPtr.p->m_new_map_ptr_i = RNIL;
8584     tabPtr.p->m_scan_reorg_flag = 0;
8585     DIH_TAB_WRITE_UNLOCK(tabPtr.p);
8586 
8587     ndbrequire(tabPtr.p->connectrec == connectPtr.i);
8588     tabPtr.p->connectrec = RNIL;
8589     release_connect(connectPtr);
8590     return;
8591   case AlterTabReq::AlterTableWaitScan:{
8592     jam();
8593     Uint64 now = NdbTick_CurrentMillisecond();
8594     now /= 1000;
8595     signal->theData[0] = DihContinueB::ZWAIT_OLD_SCAN;
8596     signal->theData[1] = tabPtr.i;
8597     signal->theData[2] = senderRef;
8598     signal->theData[3] = senderData;
8599     signal->theData[4] = connectPtr.i;
8600     signal->theData[5] = Uint32(now >> 32);
8601     signal->theData[6] = Uint32(now);
8602     signal->theData[7] = 3;
8603     sendSignal(reference(), GSN_CONTINUEB, signal, 8, JBB);
8604     return;
8605   }
8606   default:
8607     ndbrequire(false);
8608     break;
8609   }
8610 
8611   if (AlterTableReq::getAddFragFlag(req->changeMask))
8612   {
8613     jam();
8614     SegmentedSectionPtr ptr;
8615     SectionHandle handle(this, signal);
8616     handle.getSection(ptr, 0);
8617     union {
8618       Uint16 buf[2+2*MAX_NDB_PARTITIONS];
8619       Uint32 _align[1];
8620     };
8621     copy(_align, ptr);
8622     releaseSections(handle);
8623     Uint32 err;
8624     Uint32 save = tabPtr.p->totalfragments;
8625     if ((err = add_fragments_to_table(tabPtr, buf)))
8626     {
8627       jam();
8628       ndbrequire(tabPtr.p->totalfragments == save);
8629       ndbrequire(connectPtr.p->m_alter.m_org_totalfragments == save);
8630       send_alter_tab_ref(signal, tabPtr, connectPtr, err);
8631 
8632       ndbrequire(tabPtr.p->connectrec == connectPtr.i);
8633       tabPtr.p->connectrec = RNIL;
8634       release_connect(connectPtr);
8635       return;
8636     }
8637 
8638     tabPtr.p->tabCopyStatus = TabRecord::CS_ALTER_TABLE;
8639     connectPtr.p->m_alter.m_totalfragments = tabPtr.p->totalfragments;
8640     tabPtr.p->totalfragments = save; // Dont make the available yet...
8641     sendAddFragreq(signal, connectPtr, tabPtr,
8642                    connectPtr.p->m_alter.m_org_totalfragments);
8643     return;
8644   }
8645 
8646   send_alter_tab_conf(signal, connectPtr);
8647 }
8648 
8649 Uint32
add_fragments_to_table(Ptr<TabRecord> tabPtr,const Uint16 buf[])8650 Dbdih::add_fragments_to_table(Ptr<TabRecord> tabPtr, const Uint16 buf[])
8651 {
8652   Uint32 replicas = buf[0];
8653   Uint32 cnt = buf[1];
8654 
8655   Uint32 i = 0;
8656   Uint32 err = 0;
8657   Uint32 current = tabPtr.p->totalfragments;
8658   for (i = 0; i<cnt; i++)
8659   {
8660     FragmentstorePtr fragPtr;
8661     if (ERROR_INSERTED(7212) && cnt)
8662     {
8663       err = 1;
8664       CLEAR_ERROR_INSERT_VALUE;
8665       goto error;
8666     }
8667 
8668     if ((err = add_fragment_to_table(tabPtr, current + i, fragPtr)))
8669       goto error;
8670 
8671     fragPtr.p->m_log_part_id = buf[2+(1 + replicas)*i];
8672     fragPtr.p->preferredPrimary = buf[2+(1 + replicas)*i + 1];
8673 
8674     inc_ng_refcount(getNodeGroup(fragPtr.p->preferredPrimary));
8675 
8676     Uint32 activeIndex = 0;
8677     for (Uint32 j = 0; j<replicas; j++)
8678     {
8679       const Uint32 nodeId = buf[2+(1 + replicas)*i + 1 + j];
8680       ReplicaRecordPtr replicaPtr;
8681       allocStoredReplica(fragPtr, replicaPtr, nodeId);
8682       if (getNodeStatus(nodeId) == NodeRecord::ALIVE) {
8683         jam();
8684         ndbrequire(activeIndex < MAX_REPLICAS);
8685         fragPtr.p->activeNodes[activeIndex] = nodeId;
8686         activeIndex++;
8687       } else {
8688         jam();
8689         removeStoredReplica(fragPtr, replicaPtr);
8690         linkOldStoredReplica(fragPtr, replicaPtr);
8691       }
8692     }
8693     fragPtr.p->fragReplicas = activeIndex;
8694   }
8695 
8696   return 0;
8697 error:
8698   for(i = i + current; i != current; i--)
8699   {
8700     release_fragment_from_table(tabPtr, i);
8701   }
8702 
8703   return err;
8704 }
8705 
8706 void
wait_old_scan(Signal * signal)8707 Dbdih::wait_old_scan(Signal* signal)
8708 {
8709   jam();
8710 
8711   TabRecordPtr tabPtr;
8712   tabPtr.i = signal->theData[1];
8713   ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
8714 
8715   if (tabPtr.p->m_scan_count[1] == 0)
8716   {
8717     jam();
8718     Uint32 senderRef = signal->theData[2];
8719     Uint32 senderData = signal->theData[3];
8720     Uint32 connectPtrI = signal->theData[4];
8721 
8722     AlterTabConf* conf = (AlterTabConf*)signal->getDataPtrSend();
8723     conf->senderRef = reference();
8724     conf->senderData = senderData;
8725     conf->connectPtr = connectPtrI;
8726     sendSignal(senderRef, GSN_ALTER_TAB_CONF, signal,
8727                AlterTabConf::SignalLength, JBB);
8728     return;
8729   }
8730 
8731   Uint32 start_hi = signal->theData[5];
8732   Uint32 start_lo = signal->theData[6];
8733   Uint64 start = (Uint64(start_hi) << 32) + start_lo;
8734   Uint32 wait = signal->theData[7];
8735   Uint64 now = NdbTick_CurrentMillisecond() / 1000;
8736   if (now > start + wait)
8737   {
8738     infoEvent("Waiting(%u) for scans(%u) to complete on table %u",
8739               Uint32(now - start),
8740               tabPtr.p->m_scan_count[1],
8741               tabPtr.i);
8742 
8743     if (wait == 3)
8744     {
8745       signal->theData[7] = 3 + 7;
8746     }
8747     else
8748     {
8749       signal->theData[7] = 2 * wait;
8750     }
8751   }
8752 
8753   sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 1000, 7);
8754 }
8755 
8756 Uint32
add_fragment_to_table(Ptr<TabRecord> tabPtr,Uint32 fragId,Ptr<Fragmentstore> & fragPtr)8757 Dbdih::add_fragment_to_table(Ptr<TabRecord> tabPtr,
8758                              Uint32 fragId,
8759                              Ptr<Fragmentstore>& fragPtr)
8760 {
8761   Uint32 fragments = tabPtr.p->totalfragments;
8762   Uint32 chunks = tabPtr.p->noOfFragChunks;
8763 
8764   ndbrequire(fragId == fragments); // Only add at the end
8765 
8766   if (ERROR_INSERTED(7211))
8767   {
8768     CLEAR_ERROR_INSERT_VALUE;
8769     return 1;
8770   }
8771 
8772   Uint32 allocated = chunks << LOG_NO_OF_FRAGS_PER_CHUNK;
8773   if (fragId < allocated)
8774   {
8775     jam();
8776     tabPtr.p->totalfragments++;
8777     getFragstore(tabPtr.p, fragId, fragPtr);
8778     return 0;
8779   }
8780 
8781   /**
8782    * Allocate a new chunk
8783    */
8784   fragPtr.i = cfirstfragstore;
8785   if (fragPtr.i == RNIL)
8786   {
8787     jam();
8788     return -1;
8789   }
8790 
8791   ptrCheckGuard(fragPtr, cfragstoreFileSize, fragmentstore);
8792   cfirstfragstore = fragPtr.p->nextFragmentChunk;
8793   ndbrequire(cremainingfrags >= NO_OF_FRAGS_PER_CHUNK);
8794   cremainingfrags -= NO_OF_FRAGS_PER_CHUNK;
8795 
8796   ndbrequire(chunks < NDB_ARRAY_SIZE(tabPtr.p->startFid));
8797   tabPtr.p->startFid[chunks] = fragPtr.i;
8798   for (Uint32 i = 0; i<NO_OF_FRAGS_PER_CHUNK; i++)
8799   {
8800     jam();
8801     Ptr<Fragmentstore> tmp;
8802     tmp.i = fragPtr.i + i;
8803     ptrCheckGuard(tmp, cfragstoreFileSize, fragmentstore);
8804     initFragstore(tmp);
8805   }
8806 
8807   tabPtr.p->totalfragments++;
8808   tabPtr.p->noOfFragChunks++;
8809 
8810   return 0;
8811 }
8812 
8813 void
release_fragment_from_table(Ptr<TabRecord> tabPtr,Uint32 fragId)8814 Dbdih::release_fragment_from_table(Ptr<TabRecord> tabPtr, Uint32 fragId)
8815 {
8816   FragmentstorePtr fragPtr;
8817   Uint32 fragments = tabPtr.p->totalfragments;
8818   Uint32 chunks = tabPtr.p->noOfFragChunks;
8819 
8820   if (fragId >= fragments)
8821   {
8822     jam();
8823     return;
8824   }
8825   ndbrequire(fragId == fragments - 1); // only remove at end
8826   ndbrequire(fragments != 0);
8827 
8828   getFragstore(tabPtr.p, fragId, fragPtr);
8829   dec_ng_refcount(getNodeGroup(fragPtr.p->preferredPrimary));
8830 
8831   releaseReplicas(& fragPtr.p->storedReplicas);
8832   releaseReplicas(& fragPtr.p->oldStoredReplicas);
8833 
8834   if (fragId == ((chunks - 1) << LOG_NO_OF_FRAGS_PER_CHUNK))
8835   {
8836     jam();
8837 
8838     getFragstore(tabPtr.p, fragId, fragPtr);
8839 
8840     fragPtr.p->nextFragmentChunk = cfirstfragstore;
8841     cfirstfragstore = fragPtr.i;
8842     cremainingfrags += NO_OF_FRAGS_PER_CHUNK;
8843     tabPtr.p->noOfFragChunks = chunks - 1;
8844   }
8845 
8846   tabPtr.p->totalfragments--;
8847 }
8848 
8849 void
send_alter_tab_ref(Signal * signal,Ptr<TabRecord> tabPtr,Ptr<ConnectRecord> connectPtr,Uint32 errCode)8850 Dbdih::send_alter_tab_ref(Signal* signal,
8851                           Ptr<TabRecord> tabPtr,
8852                           Ptr<ConnectRecord> connectPtr,
8853                           Uint32 errCode)
8854 {
8855   AlterTabRef* ref = (AlterTabRef*)signal->getDataPtrSend();
8856   ref->senderRef = reference();
8857   ref->senderData = connectPtr.p->userpointer;
8858   ref->errorCode = errCode;
8859   sendSignal(connectPtr.p->userblockref, GSN_ALTER_TAB_REF, signal,
8860              AlterTabRef::SignalLength, JBB);
8861 }
8862 
8863 void
send_alter_tab_conf(Signal * signal,Ptr<ConnectRecord> connectPtr)8864 Dbdih::send_alter_tab_conf(Signal* signal, Ptr<ConnectRecord> connectPtr)
8865 {
8866   AlterTabConf* conf = (AlterTabConf*)signal->getDataPtrSend();
8867   conf->senderRef = reference();
8868   conf->senderData = connectPtr.p->userpointer;
8869   conf->connectPtr = connectPtr.i;
8870   sendSignal(connectPtr.p->userblockref, GSN_ALTER_TAB_CONF, signal,
8871              AlterTabConf::SignalLength, JBB);
8872 }
8873 
8874 void
saveTableFile(Signal * signal,Ptr<ConnectRecord> connectPtr,Ptr<TabRecord> tabPtr,TabRecord::CopyStatus expectedStatus,Callback & cb)8875 Dbdih::saveTableFile(Signal* signal,
8876                      Ptr<ConnectRecord> connectPtr,
8877                      Ptr<TabRecord> tabPtr,
8878                      TabRecord::CopyStatus expectedStatus,
8879                      Callback& cb)
8880 {
8881   ndbrequire(connectPtr.i == cb.m_callbackData);         // required
8882   ndbrequire(tabPtr.p->tabCopyStatus == expectedStatus); // locking
8883   memcpy(&connectPtr.p->m_callback, &cb, sizeof(Callback));
8884 
8885   tabPtr.p->tabCopyStatus = TabRecord::CS_COPY_TO_SAVE;
8886   tabPtr.p->tabUpdateState = TabRecord::US_CALLBACK;
8887   signal->theData[0] = DihContinueB::ZPACK_TABLE_INTO_PAGES;
8888   signal->theData[1] = tabPtr.i;
8889   sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
8890 }
8891 
8892 void
alter_table_writeTable_conf(Signal * signal,Uint32 ptrI,Uint32 err)8893 Dbdih::alter_table_writeTable_conf(Signal* signal, Uint32 ptrI, Uint32 err)
8894 {
8895   jamEntry();
8896   ndbrequire(err == 0);
8897 
8898   ConnectRecordPtr connectPtr;
8899   connectPtr.i = ptrI;
8900   ptrCheckGuard(connectPtr, cconnectFileSize, connectRecord);
8901 
8902   switch(connectPtr.p->connectState){
8903   case ConnectRecord::ALTER_TABLE_REVERT:
8904   {
8905     jam();
8906     send_alter_tab_conf(signal, connectPtr);
8907 
8908     Ptr<TabRecord> tabPtr;
8909     tabPtr.i = connectPtr.p->table;
8910     ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
8911     ndbrequire(tabPtr.p->connectrec == connectPtr.i);
8912     tabPtr.p->connectrec = RNIL;
8913     release_connect(connectPtr);
8914     return;
8915   }
8916   case ConnectRecord::ALTER_TABLE:
8917   {
8918     jam();
8919     send_alter_tab_conf(signal, connectPtr);
8920     return;
8921   }
8922   default:
8923     jamLine(connectPtr.p->connectState);
8924     ndbrequire(false);
8925   }
8926 }
8927 
8928 void
drop_fragments(Signal * signal,Ptr<ConnectRecord> connectPtr,Uint32 curr)8929 Dbdih::drop_fragments(Signal* signal, Ptr<ConnectRecord> connectPtr,
8930                       Uint32 curr)
8931 {
8932   ndbrequire(curr >= connectPtr.p->m_alter.m_org_totalfragments);
8933   if (curr == connectPtr.p->m_alter.m_org_totalfragments)
8934   {
8935     /**
8936      * done...
8937      */
8938     jam();
8939     Ptr<TabRecord> tabPtr;
8940     tabPtr.i = connectPtr.p->table;
8941     ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
8942 
8943     Uint32 new_frags = connectPtr.p->m_alter.m_totalfragments;
8944     Uint32 org_frags = connectPtr.p->m_alter.m_org_totalfragments;
8945     tabPtr.p->totalfragments = new_frags;
8946     for (Uint32 i = new_frags - 1; i >= org_frags; i--)
8947     {
8948       jam();
8949       release_fragment_from_table(tabPtr, i);
8950     }
8951     connectPtr.p->m_alter.m_totalfragments = org_frags;
8952 
8953     switch(connectPtr.p->connectState){
8954     case ConnectRecord::ALTER_TABLE_ABORT:
8955     {
8956       jam();
8957       ndbrequire(tabPtr.p->tabCopyStatus == TabRecord::CS_ALTER_TABLE);
8958       tabPtr.p->tabCopyStatus = TabRecord::CS_IDLE;
8959       send_alter_tab_ref(signal, tabPtr, connectPtr, ~0);
8960 
8961       connectPtr.p->connectState = ConnectRecord::ALTER_TABLE;
8962       return;
8963     }
8964     case ConnectRecord::ALTER_TABLE_REVERT:
8965     {
8966       jam();
8967       Callback cb;
8968       cb.m_callbackData = connectPtr.i;
8969       cb.m_callbackFunction = safe_cast(&Dbdih::alter_table_writeTable_conf);
8970       saveTableFile(signal, connectPtr, tabPtr, TabRecord::CS_ALTER_TABLE, cb);
8971       return;
8972     }
8973     default:
8974       jamLine(connectPtr.p->connectState);
8975       ndbrequire(false);
8976     }
8977     return;
8978   }
8979 
8980   ndbrequire(curr > 0);
8981   DropFragReq* req = (DropFragReq*)signal->getDataPtrSend();
8982   req->senderRef = reference();
8983   req->senderData = connectPtr.i;
8984   req->tableId = connectPtr.p->table;
8985   req->fragId = curr - 1;
8986   req->requestInfo = DropFragReq::AlterTableAbort;
8987   sendSignal(DBLQH_REF, GSN_DROP_FRAG_REQ, signal,
8988              DropFragReq::SignalLength, JBB);
8989 }
8990 
8991 void
execDROP_FRAG_REF(Signal * signal)8992 Dbdih::execDROP_FRAG_REF(Signal* signal)
8993 {
8994   ndbrequire(false);
8995 }
8996 
8997 void
execDROP_FRAG_CONF(Signal * signal)8998 Dbdih::execDROP_FRAG_CONF(Signal* signal)
8999 {
9000   DropFragConf* conf = (DropFragConf*)signal->getDataPtr();
9001 
9002   ConnectRecordPtr connectPtr;
9003   connectPtr.i = conf->senderData;
9004   ptrCheckGuard(connectPtr, cconnectFileSize, connectRecord);
9005 
9006   drop_fragments(signal, connectPtr, conf->fragId);
9007 }
9008 
9009 /*
9010   G E T   N O D E S
9011   **********************=
9012   */
9013 /*****************************************************************************/
9014 /* **********     TRANSACTION  HANDLING  MODULE                  *************/
9015 /*****************************************************************************/
9016 /*
9017   3.8.1    G E T   N O D E S   R E Q U E S T
9018   ******************************************
9019   Asks what nodes should be part of a transaction.
9020 */
execDIGETNODESREQ(Signal * signal)9021 void Dbdih::execDIGETNODESREQ(Signal* signal)
9022 {
9023   const DiGetNodesReq * const req = (DiGetNodesReq *)&signal->theData[0];
9024   FragmentstorePtr fragPtr;
9025   TabRecordPtr tabPtr;
9026   tabPtr.i = req->tableId;
9027   Uint32 hashValue = req->hashValue;
9028   Uint32 ttabFileSize = ctabFileSize;
9029   Uint32 fragId, newFragId = RNIL;
9030   DiGetNodesConf * const conf = (DiGetNodesConf *)&signal->theData[0];
9031   TabRecord* regTabDesc = tabRecord;
9032   EmulatedJamBuffer * jambuf = * (EmulatedJamBuffer**)(req->jamBuffer);
9033   thrjamEntry(jambuf);
9034   ptrCheckGuard(tabPtr, ttabFileSize, regTabDesc);
9035 
9036   if (DictTabInfo::isOrderedIndex(tabPtr.p->tableType))
9037   {
9038     thrjam(jambuf);
9039     tabPtr.i = tabPtr.p->primaryTableId;
9040     ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
9041   }
9042 
9043 loop:
9044   Uint32 val = tabPtr.p->m_lock.read_lock();
9045   Uint32 map_ptr_i = tabPtr.p->m_map_ptr_i;
9046   Uint32 new_map_ptr_i = tabPtr.p->m_new_map_ptr_i;
9047 
9048   /* When distr key indicator is set, regardless
9049    * of distribution algorithm in use, hashValue
9050    * IS fragment id.
9051    */
9052   if (req->distr_key_indicator)
9053   {
9054     fragId = hashValue;
9055     if (unlikely(fragId >= tabPtr.p->totalfragments))
9056     {
9057       thrjam(jambuf);
9058       conf->zero= 1; //Indicate error;
9059       signal->theData[1]= ZUNDEFINED_FRAGMENT_ERROR;
9060       return;
9061     }
9062   }
9063   else if (tabPtr.p->method == TabRecord::HASH_MAP)
9064   {
9065     thrjam(jambuf);
9066     Ptr<Hash2FragmentMap> ptr;
9067     g_hash_map.getPtr(ptr, map_ptr_i);
9068     fragId = ptr.p->m_map[hashValue % ptr.p->m_cnt];
9069 
9070     if (unlikely(new_map_ptr_i != RNIL))
9071     {
9072       thrjam(jambuf);
9073       g_hash_map.getPtr(ptr, new_map_ptr_i);
9074       newFragId = ptr.p->m_map[hashValue % ptr.p->m_cnt];
9075       if (newFragId == fragId)
9076       {
9077         thrjam(jambuf);
9078         newFragId = RNIL;
9079       }
9080     }
9081   }
9082   else if (tabPtr.p->method == TabRecord::LINEAR_HASH)
9083   {
9084     thrjam(jambuf);
9085     fragId = hashValue & tabPtr.p->mask;
9086     if (fragId < tabPtr.p->hashpointer) {
9087       thrjam(jambuf);
9088       fragId = hashValue & ((tabPtr.p->mask << 1) + 1);
9089     }//if
9090   }
9091   else if (tabPtr.p->method == TabRecord::NORMAL_HASH)
9092   {
9093     thrjam(jambuf);
9094     fragId= hashValue % tabPtr.p->totalfragments;
9095   }
9096   else
9097   {
9098     thrjam(jambuf);
9099     ndbassert(tabPtr.p->method == TabRecord::USER_DEFINED);
9100 
9101     /* User defined partitioning, but no distribution key passed */
9102     conf->zero= 1; //Indicate error;
9103     signal->theData[1]= ZUNDEFINED_FRAGMENT_ERROR;
9104     return;
9105   }
9106   getFragstore(tabPtr.p, fragId, fragPtr);
9107   Uint32 nodeCount = extractNodeInfo(fragPtr.p, conf->nodes);
9108   Uint32 sig2 = (nodeCount - 1) +
9109     (fragPtr.p->distributionKey << 16) +
9110     (dihGetInstanceKey(fragPtr) << 24);
9111   conf->zero = 0;
9112   conf->reqinfo = sig2;
9113   conf->fragId = fragId;
9114 
9115   if (unlikely(newFragId != RNIL))
9116   {
9117     thrjam(jambuf);
9118     conf->reqinfo |= DiGetNodesConf::REORG_MOVING;
9119     getFragstore(tabPtr.p, newFragId, fragPtr);
9120     nodeCount = extractNodeInfo(fragPtr.p, conf->nodes + 2 + MAX_REPLICAS);
9121     conf->nodes[MAX_REPLICAS] = newFragId;
9122     conf->nodes[MAX_REPLICAS + 1] = (nodeCount - 1) +
9123       (fragPtr.p->distributionKey << 16) +
9124       (dihGetInstanceKey(fragPtr) << 24);
9125   }
9126 
9127   if (unlikely(!tabPtr.p->m_lock.read_unlock(val)))
9128     goto loop;
9129 }//Dbdih::execDIGETNODESREQ()
9130 
extractNodeInfo(const Fragmentstore * fragPtr,Uint32 nodes[])9131 Uint32 Dbdih::extractNodeInfo(const Fragmentstore * fragPtr, Uint32 nodes[])
9132 {
9133   Uint32 nodeCount = 0;
9134   nodes[0] = nodes[1] = nodes[2] = nodes[3] = 0;
9135   for (Uint32 i = 0; i < fragPtr->fragReplicas; i++) {
9136     jam();
9137     NodeRecordPtr nodePtr;
9138     ndbrequire(i < MAX_REPLICAS);
9139     nodePtr.i = fragPtr->activeNodes[i];
9140     ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
9141     if (nodePtr.p->useInTransactions) {
9142       jam();
9143       nodes[nodeCount] = nodePtr.i;
9144       nodeCount++;
9145     }//if
9146   }//for
9147   ndbrequire(nodeCount > 0);
9148   return nodeCount;
9149 }//Dbdih::extractNodeInfo()
9150 
9151 void
getFragstore(TabRecord * tab,Uint32 fragNo,FragmentstorePtr & fragptr)9152 Dbdih::getFragstore(TabRecord * tab,        //In parameter
9153                     Uint32 fragNo,              //In parameter
9154                     FragmentstorePtr & fragptr) //Out parameter
9155 {
9156   FragmentstorePtr fragPtr;
9157   Uint32 TfragstoreFileSize = cfragstoreFileSize;
9158   Fragmentstore* TfragStore = fragmentstore;
9159   Uint32 chunkNo = fragNo >> LOG_NO_OF_FRAGS_PER_CHUNK;
9160   Uint32 chunkIndex = fragNo & (NO_OF_FRAGS_PER_CHUNK - 1);
9161   fragPtr.i = tab->startFid[chunkNo] + chunkIndex;
9162   if (likely(chunkNo < NDB_ARRAY_SIZE(tab->startFid))) {
9163     ptrCheckGuard(fragPtr, TfragstoreFileSize, TfragStore);
9164     fragptr = fragPtr;
9165     return;
9166   }//if
9167   ndbrequire(false);
9168 }//Dbdih::getFragstore()
9169 
allocFragments(Uint32 noOfFragments,TabRecordPtr tabPtr)9170 void Dbdih::allocFragments(Uint32 noOfFragments, TabRecordPtr tabPtr)
9171 {
9172   FragmentstorePtr fragPtr;
9173   Uint32 noOfChunks = (noOfFragments + (NO_OF_FRAGS_PER_CHUNK - 1)) >> LOG_NO_OF_FRAGS_PER_CHUNK;
9174   ndbrequire(cremainingfrags >= noOfFragments);
9175   for (Uint32 i = 0; i < noOfChunks; i++) {
9176     jam();
9177     Uint32 baseFrag = cfirstfragstore;
9178     ndbrequire(i < NDB_ARRAY_SIZE(tabPtr.p->startFid));
9179     tabPtr.p->startFid[i] = baseFrag;
9180     fragPtr.i = baseFrag;
9181     ptrCheckGuard(fragPtr, cfragstoreFileSize, fragmentstore);
9182     cfirstfragstore = fragPtr.p->nextFragmentChunk;
9183     cremainingfrags -= NO_OF_FRAGS_PER_CHUNK;
9184     for (Uint32 j = 0; j < NO_OF_FRAGS_PER_CHUNK; j++) {
9185       jam();
9186       fragPtr.i = baseFrag + j;
9187       ptrCheckGuard(fragPtr, cfragstoreFileSize, fragmentstore);
9188       initFragstore(fragPtr);
9189     }//if
9190   }//for
9191   tabPtr.p->noOfFragChunks = noOfChunks;
9192 }//Dbdih::allocFragments()
9193 
releaseFragments(TabRecordPtr tabPtr)9194 void Dbdih::releaseFragments(TabRecordPtr tabPtr)
9195 {
9196   FragmentstorePtr fragPtr;
9197   for (Uint32 i = 0; i < tabPtr.p->noOfFragChunks; i++) {
9198     jam();
9199     ndbrequire(i < NDB_ARRAY_SIZE(tabPtr.p->startFid));
9200     Uint32 baseFrag = tabPtr.p->startFid[i];
9201     fragPtr.i = baseFrag;
9202     ptrCheckGuard(fragPtr, cfragstoreFileSize, fragmentstore);
9203     fragPtr.p->nextFragmentChunk = cfirstfragstore;
9204     cfirstfragstore = baseFrag;
9205     tabPtr.p->startFid[i] = RNIL;
9206     cremainingfrags += NO_OF_FRAGS_PER_CHUNK;
9207   }//for
9208   tabPtr.p->noOfFragChunks = 0;
9209 }//Dbdih::releaseFragments()
9210 
initialiseFragstore()9211 void Dbdih::initialiseFragstore()
9212 {
9213   Uint32 i;
9214   FragmentstorePtr fragPtr;
9215   for (i = 0; i < cfragstoreFileSize; i++) {
9216     fragPtr.i = i;
9217     ptrCheckGuard(fragPtr, cfragstoreFileSize, fragmentstore);
9218     initFragstore(fragPtr);
9219   }//for
9220   Uint32 noOfChunks = cfragstoreFileSize >> LOG_NO_OF_FRAGS_PER_CHUNK;
9221   fragPtr.i = 0;
9222   cfirstfragstore = RNIL;
9223   cremainingfrags = 0;
9224   for (i = 0; i < noOfChunks; i++) {
9225     refresh_watch_dog();
9226     ptrCheckGuard(fragPtr, cfragstoreFileSize, fragmentstore);
9227     fragPtr.p->nextFragmentChunk = cfirstfragstore;
9228     cfirstfragstore = fragPtr.i;
9229     fragPtr.i += NO_OF_FRAGS_PER_CHUNK;
9230     cremainingfrags += NO_OF_FRAGS_PER_CHUNK;
9231   }//for
9232 }//Dbdih::initialiseFragstore()
9233 
9234 #ifndef NDB_HAVE_RMB
9235 #define rmb() do { } while (0)
9236 #endif
9237 
9238 #ifndef NDB_HAVE_WMB
9239 #define wmb() do { } while (0)
9240 #endif
9241 
9242 inline
9243 bool
isEmpty(const DIVERIFY_queue & q)9244 Dbdih::isEmpty(const DIVERIFY_queue & q)
9245 {
9246   return q.cfirstVerifyQueue == q.clastVerifyQueue;
9247 }
9248 
9249 inline
9250 void
enqueue(DIVERIFY_queue & q,Uint32 senderData,Uint64 gci)9251 Dbdih::enqueue(DIVERIFY_queue & q, Uint32 senderData, Uint64 gci)
9252 {
9253 #ifndef NDEBUG
9254   /**
9255    * - assert only
9256    * - we must read first *before* "publishing last
9257    *   or else DIH-thread could already have consumed entry
9258    *   when we call assert
9259    */
9260   Uint32 first = q.cfirstVerifyQueue;
9261 #endif
9262 
9263   Uint32 last = q.clastVerifyQueue;
9264   ApiConnectRecord * apiConnectRecord = q.apiConnectRecord;
9265 
9266   apiConnectRecord[last].senderData = senderData;
9267   apiConnectRecord[last].apiGci = gci;
9268   wmb();
9269   if (last + 1 == capiConnectFileSize)
9270   {
9271     q.clastVerifyQueue = 0;
9272   }
9273   else
9274   {
9275     q.clastVerifyQueue = last + 1;
9276   }
9277   assert(q.clastVerifyQueue != first);
9278 }
9279 
9280 inline
9281 void
dequeue(DIVERIFY_queue & q,ApiConnectRecord & conRecord)9282 Dbdih::dequeue(DIVERIFY_queue & q, ApiConnectRecord & conRecord)
9283 {
9284   Uint32 first = q.cfirstVerifyQueue;
9285   ApiConnectRecord * apiConnectRecord = q.apiConnectRecord;
9286 
9287   rmb();
9288   conRecord.senderData = apiConnectRecord[first].senderData;
9289   conRecord.apiGci = apiConnectRecord[first].apiGci;
9290 
9291   if (first + 1 == capiConnectFileSize)
9292   {
9293     q.cfirstVerifyQueue = 0;
9294   }
9295   else
9296   {
9297     q.cfirstVerifyQueue = first + 1;
9298   }
9299 }
9300 
9301 /*
9302   3.9   V E R I F I C A T I O N
9303   ****************************=
9304   */
9305 /****************************************************************************/
9306 /* **********     VERIFICATION SUB-MODULE                       *************/
9307 /****************************************************************************/
9308 /*
9309   3.9.1     R E C E I V I N G  O F  V E R I F I C A T I O N   R E Q U E S T
9310   *************************************************************************
9311   */
execDIVERIFYREQ(Signal * signal)9312 void Dbdih::execDIVERIFYREQ(Signal* signal)
9313 {
9314   EmulatedJamBuffer * jambuf = * (EmulatedJamBuffer**)(signal->theData+2);
9315   thrjamEntry(jambuf);
9316   Uint32 qno = signal->theData[1];
9317   ndbassert(qno < NDB_ARRAY_SIZE(c_diverify_queue));
9318   DIVERIFY_queue & q = c_diverify_queue[qno];
9319 loop:
9320   Uint32 val = m_micro_gcp.m_lock.read_lock();
9321   Uint32 blocked = getBlockCommit() == true ? 1 : 0;
9322   if (blocked == 0 && isEmpty(q))
9323   {
9324     thrjam(jambuf);
9325     /*-----------------------------------------------------------------------*/
9326     // We are not blocked and the verify queue was empty currently so we can
9327     // simply reply back to TC immediately. The method was called with
9328     // EXECUTE_DIRECT so we reply back by setting signal data and returning.
9329     // theData[0] already contains the correct information so
9330     // we need not touch it.
9331     /*-----------------------------------------------------------------------*/
9332     signal->theData[1] = (Uint32)(m_micro_gcp.m_current_gci >> 32);
9333     signal->theData[2] = (Uint32)(m_micro_gcp.m_current_gci & 0xFFFFFFFF);
9334     signal->theData[3] = 0;
9335     if (unlikely(! m_micro_gcp.m_lock.read_unlock(val)))
9336       goto loop;
9337     return;
9338   }//if
9339   /*-------------------------------------------------------------------------*/
9340   // Since we are blocked we need to put this operation last in the verify
9341   // queue to ensure that operation starts up in the correct order.
9342   /*-------------------------------------------------------------------------*/
9343   enqueue(q, signal->theData[0], m_micro_gcp.m_new_gci);
9344   if (blocked == 0 && jambuf == jamBuffer())
9345   {
9346     emptyverificbuffer(signal, 0, false);
9347   }
9348   signal->theData[3] = blocked + 1; // Indicate no immediate return
9349   return;
9350 }//Dbdih::execDIVERIFYREQ()
9351 
execDIH_SCAN_TAB_REQ(Signal * signal)9352 void Dbdih::execDIH_SCAN_TAB_REQ(Signal* signal)
9353 {
9354   DihScanTabReq * req = (DihScanTabReq*)signal->getDataPtr();
9355   TabRecordPtr tabPtr;
9356   const Uint32 senderData = req->senderData;
9357   const Uint32 senderRef = req->senderRef;
9358   const Uint32 schemaTransId = req->schemaTransId;
9359 
9360   jamEntry();
9361 
9362   tabPtr.i = req->tableId;
9363   ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
9364 
9365   if (tabPtr.p->tabStatus != TabRecord::TS_ACTIVE)
9366   {
9367     if (! (tabPtr.p->tabStatus == TabRecord::TS_CREATING &&
9368            tabPtr.p->schemaTransId == schemaTransId))
9369     {
9370       jam();
9371       goto error;
9372     }
9373   }
9374 
9375   tabPtr.p->m_scan_count[0]++;
9376 
9377   {
9378     DihScanTabConf* conf = (DihScanTabConf*)signal->getDataPtrSend();
9379     conf->tableId = tabPtr.i;
9380     conf->senderData = senderData;
9381     conf->fragmentCount = tabPtr.p->totalfragments;
9382     conf->noOfBackups = tabPtr.p->noOfBackups;
9383     conf->scanCookie = tabPtr.p->m_map_ptr_i;
9384     conf->reorgFlag = tabPtr.p->m_scan_reorg_flag;
9385     sendSignal(senderRef, GSN_DIH_SCAN_TAB_CONF, signal,
9386                DihScanTabConf::SignalLength, JBB);
9387   }
9388   return;
9389 
9390 error:
9391   DihScanTabRef* ref = (DihScanTabRef*)signal->getDataPtrSend();
9392   ref->tableId = tabPtr.i;
9393   ref->senderData = senderData;
9394   ref->error = DihScanTabRef::ErroneousTableState;
9395   ref->tableStatus = tabPtr.p->tabStatus;
9396   ref->schemaTransId = schemaTransId;
9397   sendSignal(senderRef, GSN_DIH_SCAN_TAB_REF, signal,
9398              DihScanTabRef::SignalLength, JBB);
9399   return;
9400 
9401 }//Dbdih::execDI_FCOUNTREQ()
9402 
execDIH_SCAN_GET_NODES_REQ(Signal * signal)9403 void Dbdih::execDIH_SCAN_GET_NODES_REQ(Signal* signal)
9404 {
9405   FragmentstorePtr fragPtr;
9406   TabRecordPtr tabPtr;
9407   jamEntry();
9408   DihScanGetNodesReq* req = (DihScanGetNodesReq*)signal->getDataPtrSend();
9409   Uint32 senderRef = req->senderRef;
9410   Uint32 senderData = req->senderData;
9411   Uint32 fragId = req->fragId;
9412 
9413   tabPtr.i = req->tableId;
9414   ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
9415   if (DictTabInfo::isOrderedIndex(tabPtr.p->tableType)) {
9416     jam();
9417     tabPtr.i = tabPtr.p->primaryTableId;
9418     ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
9419   }
9420 
9421   Uint32 nodes[MAX_REPLICAS];
9422   getFragstore(tabPtr.p, fragId, fragPtr);
9423   Uint32 count = extractNodeInfo(fragPtr.p, nodes);
9424 
9425   DihScanGetNodesConf* conf = (DihScanGetNodesConf*)signal->getDataPtrSend();
9426   conf->senderData = senderData;
9427   conf->nodes[0] = nodes[0];
9428   conf->nodes[1] = nodes[1];
9429   conf->nodes[2] = nodes[2];
9430   conf->nodes[3] = nodes[3];
9431   conf->count = count;
9432   conf->tableId = tabPtr.i;
9433   conf->fragId = fragId;
9434   conf->instanceKey = dihGetInstanceKey(fragPtr);
9435   sendSignal(senderRef, GSN_DIH_SCAN_GET_NODES_CONF, signal,
9436              DihScanGetNodesConf::SignalLength, JBB);
9437 }//Dbdih::execDIGETPRIMREQ()
9438 
9439 void
execDIH_SCAN_TAB_COMPLETE_REP(Signal * signal)9440 Dbdih::execDIH_SCAN_TAB_COMPLETE_REP(Signal* signal)
9441 {
9442   jamEntry();
9443   DihScanTabCompleteRep* rep = (DihScanTabCompleteRep*)signal->getDataPtr();
9444   TabRecordPtr tabPtr;
9445   tabPtr.i = rep->tableId;
9446   Uint32 map_ptr_i = rep->scanCookie;
9447   ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
9448 
9449   if (map_ptr_i == tabPtr.p->m_map_ptr_i)
9450   {
9451     jam();
9452     ndbassert(tabPtr.p->m_scan_count[0]);
9453     tabPtr.p->m_scan_count[0]--;
9454   }
9455   else
9456   {
9457     jam();
9458     ndbassert(tabPtr.p->m_scan_count[1]);
9459     tabPtr.p->m_scan_count[1]--;
9460   }
9461 }
9462 
9463 
9464 /****************************************************************************/
9465 /* **********     GLOBAL-CHECK-POINT HANDLING  MODULE           *************/
9466 /****************************************************************************/
9467 /*
9468   3.10   G L O B A L  C H E C K P O I N T ( IN  M A S T E R  R O L E)
9469   *******************************************************************
9470   */
9471 
9472 bool
check_enable_micro_gcp(Signal * signal,bool broadcast)9473 Dbdih::check_enable_micro_gcp(Signal* signal, bool broadcast)
9474 {
9475   ndbassert(m_micro_gcp.m_enabled == false);
9476   ndbassert(NodeVersionInfo::DataLength == 6);
9477   Uint32 min = ~(Uint32)0;
9478   const NodeVersionInfo& info = getNodeVersionInfo();
9479   for (Uint32 i = 0; i<3; i++)
9480   {
9481     Uint32 tmp = info.m_type[i].m_min_version;
9482     if (tmp)
9483     {
9484       min = (min < tmp) ? min : tmp;
9485     }
9486   }
9487 
9488   if (ndb_check_micro_gcp(min))
9489   {
9490     jam();
9491     m_micro_gcp.m_enabled = true;
9492 
9493     infoEvent("Enabling micro GCP");
9494     if (broadcast)
9495     {
9496       jam();
9497       UpgradeProtocolOrd * ord = (UpgradeProtocolOrd*)signal->getDataPtrSend();
9498       ord->type = UpgradeProtocolOrd::UPO_ENABLE_MICRO_GCP;
9499 
9500       /**
9501        * We need to notify all ndbd's or they'll get confused!
9502        */
9503       NodeRecordPtr specNodePtr;
9504       specNodePtr.i = cfirstAliveNode;
9505       do {
9506         jam();
9507         ptrCheckGuard(specNodePtr, MAX_NDB_NODES, nodeRecord);
9508         sendSignal(calcDihBlockRef(specNodePtr.i), GSN_UPGRADE_PROTOCOL_ORD,
9509                    signal, UpgradeProtocolOrd::SignalLength, JBA);
9510         specNodePtr.i = specNodePtr.p->nextNode;
9511       } while (specNodePtr.i != RNIL);
9512       EXECUTE_DIRECT(QMGR,GSN_UPGRADE_PROTOCOL_ORD,signal,signal->getLength());
9513     }
9514   }
9515   return m_micro_gcp.m_enabled;
9516 }
9517 
9518 void
execUPGRADE_PROTOCOL_ORD(Signal * signal)9519 Dbdih::execUPGRADE_PROTOCOL_ORD(Signal* signal)
9520 {
9521   const UpgradeProtocolOrd* ord = (UpgradeProtocolOrd*)signal->getDataPtr();
9522   switch(ord->type){
9523   case UpgradeProtocolOrd::UPO_ENABLE_MICRO_GCP:
9524     jam();
9525     m_micro_gcp.m_enabled = true;
9526     EXECUTE_DIRECT(QMGR, GSN_UPGRADE_PROTOCOL_ORD,signal, signal->getLength());
9527     return;
9528   }
9529 }
9530 
9531 void
startGcpLab(Signal * signal,Uint32 aWaitTime)9532 Dbdih::startGcpLab(Signal* signal, Uint32 aWaitTime)
9533 {
9534   for (Uint32 i = 0; i < c_diverify_queue_cnt; i++)
9535   {
9536     if (c_diverify_queue[i].m_empty_done == 0)
9537     {
9538       // Previous global checkpoint is not yet completed.
9539       jam();
9540       signal->theData[0] = DihContinueB::ZSTART_GCP;
9541       sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 10, 1);
9542       return;
9543     }
9544   }
9545 
9546   emptyWaitGCPMasterQueue(signal,
9547                           m_micro_gcp.m_current_gci,
9548                           c_waitEpochMasterList);
9549 
9550   if (c_nodeStartMaster.blockGcp != 0 &&
9551       m_gcp_save.m_master.m_state == GcpSave::GCP_SAVE_IDLE)
9552   {
9553     jam();
9554 
9555     /* ------------------------------------------------------------------ */
9556     /*  A NEW NODE WANTS IN AND WE MUST ALLOW IT TO COME IN NOW SINCE THE */
9557     /*       GCP IS COMPLETED.                                            */
9558     /* ------------------------------------------------------------------ */
9559 
9560     if (ERROR_INSERTED(7217))
9561     {
9562       jam();
9563 
9564       signal->theData[0] = 9999;
9565       sendSignal(numberToRef(CMVMI, refToNode(c_nodeStartMaster.startNode)),
9566                  GSN_NDB_TAMPER, signal, 1, JBB);
9567 
9568       m_micro_gcp.m_master.m_start_time = 0; // Force start
9569       // fall through
9570     }
9571     else
9572     {
9573       jam();
9574       ndbrequire(c_nodeStartMaster.blockGcp == 1); // Ordered...
9575       c_nodeStartMaster.blockGcp = 2; // effective
9576       gcpBlockedLab(signal);
9577       return;
9578     }
9579   }
9580 
9581   if (cgcpOrderBlocked)
9582   {
9583     jam();
9584     signal->theData[0] = DihContinueB::ZSTART_GCP;
9585     sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 10, 1);
9586     return;
9587   }
9588 
9589   Uint32 delayMicro = m_micro_gcp.m_enabled ?
9590     m_micro_gcp.m_master.m_time_between_gcp :
9591     m_gcp_save.m_master.m_time_between_gcp;
9592 
9593   Uint64 now = c_current_time = NdbTick_CurrentMillisecond();
9594   if (! (now >= m_micro_gcp.m_master.m_start_time + delayMicro))
9595   {
9596     jam();
9597     signal->theData[0] = DihContinueB::ZSTART_GCP;
9598     sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 10, 1);
9599     return;
9600   }
9601 
9602   m_micro_gcp.m_master.m_start_time = now;
9603 
9604   if (m_micro_gcp.m_enabled == false &&
9605       m_micro_gcp.m_master.m_time_between_gcp)
9606   {
9607     /**
9608      * Micro GCP is disabled...but configured...
9609      */
9610     jam();
9611     check_enable_micro_gcp(signal, true);
9612   }
9613 
9614   /**
9615    * Check that there has not been more than 2^32 micro GCP wo/ any save
9616    */
9617   Uint64 currGCI = m_micro_gcp.m_current_gci;
9618   ndbrequire(Uint32(currGCI) != ~(Uint32)0);
9619   m_micro_gcp.m_master.m_new_gci = currGCI + 1;
9620 
9621   Uint32 delaySave = m_gcp_save.m_master.m_time_between_gcp;
9622   if ((m_micro_gcp.m_enabled == false) ||
9623       (now >= m_gcp_save.m_master.m_start_time + delaySave &&
9624        m_gcp_save.m_master.m_state == GcpSave::GCP_SAVE_IDLE))
9625   {
9626     jam();
9627     /**
9628      * Time for save...switch gci_hi
9629      */
9630     m_gcp_save.m_master.m_start_time = now;
9631     m_micro_gcp.m_master.m_new_gci = Uint64((currGCI >> 32) + 1) << 32;
9632 
9633     signal->theData[0] = NDB_LE_GlobalCheckpointStarted; //Event type
9634     signal->theData[1] = Uint32(currGCI >> 32);
9635     signal->theData[2] = Uint32(currGCI);
9636     sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 3, JBB);
9637   }
9638 
9639   ndbassert(m_micro_gcp.m_enabled || Uint32(m_micro_gcp.m_new_gci) == 0);
9640 
9641 
9642   /***************************************************************************/
9643   // Report the event that a global checkpoint has started.
9644   /***************************************************************************/
9645 
9646   CRASH_INSERTION(7000);
9647   m_micro_gcp.m_master.m_state = MicroGcp::M_GCP_PREPARE;
9648   signal->setTrace(TestOrd::TraceGlobalCheckpoint);
9649 
9650 #ifdef ERROR_INSERT
9651   if (ERROR_INSERTED(7186))
9652   {
9653     sendToRandomNodes("GCP_PREPARE",
9654                       signal, &c_GCP_PREPARE_Counter, &Dbdih::sendGCP_PREPARE);
9655     signal->theData[0] = 9999;
9656     sendSignalWithDelay(CMVMI_REF, GSN_NDB_TAMPER, signal, 1000, 1);
9657     return;
9658   }
9659   else if (ERROR_INSERTED(7200))
9660   {
9661     c_GCP_PREPARE_Counter.clearWaitingFor();
9662     NodeRecordPtr nodePtr;
9663     nodePtr.i = cfirstAliveNode;
9664     do {
9665       jam();
9666       ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
9667       c_GCP_PREPARE_Counter.setWaitingFor(nodePtr.i);
9668       if (nodePtr.i != getOwnNodeId())
9669       {
9670         SET_ERROR_INSERT_VALUE(7201);
9671         sendGCP_PREPARE(signal, nodePtr.i, RNIL);
9672       }
9673       else
9674       {
9675         SET_ERROR_INSERT_VALUE(7202);
9676         sendGCP_PREPARE(signal, nodePtr.i, RNIL);
9677       }
9678       nodePtr.i = nodePtr.p->nextNode;
9679     } while (nodePtr.i != RNIL);
9680 
9681     NodeReceiverGroup rg(CMVMI, c_GCP_PREPARE_Counter);
9682     rg.m_nodes.clear(getOwnNodeId());
9683     Uint32 victim = rg.m_nodes.find(0);
9684 
9685     signal->theData[0] = 9999;
9686     sendSignal(numberToRef(CMVMI, victim),
9687 	       GSN_NDB_TAMPER, signal, 1, JBA);
9688 
9689     CLEAR_ERROR_INSERT_VALUE;
9690     return;
9691   }
9692   else if (ERROR_INSERTED(7227))
9693   {
9694     ndbout_c("Not sending GCP_PREPARE to %u", c_error_insert_extra);
9695     c_GCP_PREPARE_Counter.clearWaitingFor();
9696     NodeRecordPtr nodePtr;
9697     nodePtr.i = cfirstAliveNode;
9698     do {
9699       jam();
9700       ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
9701       c_GCP_PREPARE_Counter.setWaitingFor(nodePtr.i);
9702       if (nodePtr.i != c_error_insert_extra)
9703       {
9704         sendGCP_PREPARE(signal, nodePtr.i, RNIL);
9705       }
9706       nodePtr.i = nodePtr.p->nextNode;
9707     } while (nodePtr.i != RNIL);
9708 
9709     signal->theData[0] = 9999;
9710     sendSignalWithDelay(CMVMI_REF, GSN_NDB_TAMPER, signal, 200, 1);
9711     return;
9712   }
9713 #endif
9714 
9715   sendLoopMacro(GCP_PREPARE, sendGCP_PREPARE, RNIL);
9716 }//Dbdih::startGcpLab()
9717 
execGCP_PREPARECONF(Signal * signal)9718 void Dbdih::execGCP_PREPARECONF(Signal* signal)
9719 {
9720   jamEntry();
9721   Uint32 senderNodeId = signal->theData[0];
9722   Uint32 gci_hi = signal->theData[1];
9723   Uint32 gci_lo = signal->theData[2];
9724 
9725   if (unlikely(signal->getLength() < GCPPrepareConf::SignalLength))
9726   {
9727     gci_lo = 0;
9728     ndbassert(!ndb_check_micro_gcp(getNodeInfo(senderNodeId).m_version));
9729   }
9730 
9731   Uint64 gci = gci_lo | (Uint64(gci_hi) << 32);
9732   ndbrequire(gci == m_micro_gcp.m_master.m_new_gci);
9733   receiveLoopMacro(GCP_PREPARE, senderNodeId);
9734   //-------------------------------------------------------------
9735   // We have now received all replies. We are ready to continue
9736   // with committing the global checkpoint.
9737   //-------------------------------------------------------------
9738   gcpcommitreqLab(signal);
9739 }//Dbdih::execGCP_PREPARECONF()
9740 
gcpcommitreqLab(Signal * signal)9741 void Dbdih::gcpcommitreqLab(Signal* signal)
9742 {
9743   CRASH_INSERTION(7001);
9744 
9745   m_micro_gcp.m_master.m_state = MicroGcp::M_GCP_COMMIT;
9746 
9747 #ifdef ERROR_INSERT
9748   if (ERROR_INSERTED(7187))
9749   {
9750     sendToRandomNodes("GCP_COMMIT",
9751                       signal, &c_GCP_COMMIT_Counter, &Dbdih::sendGCP_COMMIT);
9752     signal->theData[0] = 9999;
9753     sendSignalWithDelay(CMVMI_REF, GSN_NDB_TAMPER, signal, 1000, 1);
9754     return;
9755   }
9756 #endif
9757 
9758   sendLoopMacro(GCP_COMMIT, sendGCP_COMMIT, RNIL);
9759   return;
9760 }//Dbdih::gcpcommitreqLab()
9761 
execGCP_NODEFINISH(Signal * signal)9762 void Dbdih::execGCP_NODEFINISH(Signal* signal)
9763 {
9764   jamEntry();
9765   const Uint32 senderNodeId = signal->theData[0];
9766   const Uint32 gci_hi = signal->theData[1];
9767   const Uint32 failureNr = signal->theData[2];
9768   const Uint32 gci_lo = signal->theData[3];
9769   const Uint64 gci = gci_lo | (Uint64(gci_hi) << 32);
9770 
9771   (void)gci; // TODO validate
9772   (void)failureNr; // kill warning
9773 
9774   ndbrequire(m_micro_gcp.m_master.m_state == MicroGcp::M_GCP_COMMIT);
9775   receiveLoopMacro(GCP_COMMIT, senderNodeId);
9776 
9777   jam();
9778 
9779   if (m_micro_gcp.m_enabled)
9780   {
9781     jam();
9782 
9783     m_micro_gcp.m_master.m_state = MicroGcp::M_GCP_COMPLETE;
9784 
9785     SubGcpCompleteRep * rep = (SubGcpCompleteRep*)signal->getDataPtr();
9786     rep->senderRef = reference();
9787     rep->gci_hi = (Uint32)(m_micro_gcp.m_old_gci >> 32);
9788     rep->gci_lo = (Uint32)(m_micro_gcp.m_old_gci & 0xFFFFFFFF);
9789     rep->flags = SubGcpCompleteRep::IN_MEMORY;
9790 
9791 #ifdef ERROR_INSERT
9792     if (ERROR_INSERTED(7190))
9793     {
9794       sendToRandomNodes("GCP_COMPLETE_REP", signal,
9795                         &c_SUB_GCP_COMPLETE_REP_Counter,
9796                         &Dbdih::sendSUB_GCP_COMPLETE_REP);
9797       signal->theData[0] = 9999;
9798       sendSignalWithDelay(CMVMI_REF, GSN_NDB_TAMPER, signal, 1000, 1);
9799     }
9800     else if (ERROR_INSERTED(7226))
9801     {
9802       ndbout_c("Not sending SUB_GCP_COMPLETE_REP to %u", c_error_insert_extra);
9803       c_SUB_GCP_COMPLETE_REP_Counter.clearWaitingFor();
9804       NodeRecordPtr nodePtr;
9805       nodePtr.i = cfirstAliveNode;
9806       do {
9807         jam();
9808         ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
9809         c_SUB_GCP_COMPLETE_REP_Counter.setWaitingFor(nodePtr.i);
9810         if (nodePtr.i != c_error_insert_extra)
9811         {
9812           sendSignal(calcDihBlockRef(nodePtr.i), GSN_SUB_GCP_COMPLETE_REP,
9813                      signal, SubGcpCompleteRep::SignalLength, JBA);
9814         }
9815         nodePtr.i = nodePtr.p->nextNode;
9816       } while (nodePtr.i != RNIL);
9817       SET_ERROR_INSERT_VALUE(7227);
9818 
9819       signal->theData[0] = 9999;
9820       sendSignalWithDelay(CMVMI_REF, GSN_NDB_TAMPER, signal, 200, 1);
9821     }
9822     else
9823 #endif
9824     {
9825       jam();
9826       // Normal path...
9827       sendLoopMacro(SUB_GCP_COMPLETE_REP, sendSUB_GCP_COMPLETE_REP, RNIL);
9828     }
9829   }
9830 
9831   //-------------------------------------------------------------
9832   // We have now received all replies. We are ready to continue
9833   // with saving the global checkpoint to disk.
9834   //-------------------------------------------------------------
9835   CRASH_INSERTION(7002);
9836 
9837   Uint32 curr_hi = (Uint32)(m_micro_gcp.m_current_gci >> 32);
9838   Uint32 old_hi = (Uint32)(m_micro_gcp.m_old_gci >> 32);
9839 
9840   if (m_micro_gcp.m_enabled)
9841   {
9842     jam();
9843   }
9844   else
9845   {
9846     ndbrequire(curr_hi != old_hi);
9847   }
9848 
9849   if (curr_hi == old_hi)
9850   {
9851     jam();
9852     return;
9853   }
9854 
9855   /**
9856    * Start a save
9857    */
9858   Uint32 saveGCI = old_hi;
9859   m_gcp_save.m_master.m_state = GcpSave::GCP_SAVE_REQ;
9860   m_gcp_save.m_master.m_new_gci = saveGCI;
9861 
9862 #ifdef ERROR_INSERT
9863   if (ERROR_INSERTED(7188))
9864   {
9865     sendToRandomNodes("GCP_SAVE",
9866                       signal, &c_GCP_SAVEREQ_Counter, &Dbdih::sendGCP_SAVEREQ);
9867     signal->theData[0] = 9999;
9868     sendSignalWithDelay(CMVMI_REF, GSN_NDB_TAMPER, signal, 1000, 1);
9869     return;
9870   }
9871   else if (ERROR_INSERTED(7216))
9872   {
9873     infoEvent("GCP_SAVE all/%u", c_error_insert_extra);
9874     NodeRecordPtr nodePtr;
9875     nodePtr.i = c_error_insert_extra;
9876     ptrAss(nodePtr, nodeRecord);
9877 
9878     removeAlive(nodePtr);
9879     sendLoopMacro(GCP_SAVEREQ, sendGCP_SAVEREQ, RNIL);
9880     insertAlive(nodePtr);
9881     signal->theData[0] = 9999;
9882     sendSignalWithDelay(CMVMI_REF, GSN_NDB_TAMPER, signal, 1000, 1);
9883     c_GCP_SAVEREQ_Counter.setWaitingFor(c_error_insert_extra);
9884     return;
9885   }
9886 #endif
9887 
9888   sendLoopMacro(GCP_SAVEREQ, sendGCP_SAVEREQ, RNIL);
9889 }
9890 
9891 void
execSUB_GCP_COMPLETE_ACK(Signal * signal)9892 Dbdih::execSUB_GCP_COMPLETE_ACK(Signal* signal)
9893 {
9894   jamEntry();
9895   SubGcpCompleteAck ack = * CAST_CONSTPTR(SubGcpCompleteAck,
9896                                           signal->getDataPtr());
9897   Uint32 senderNodeId = refToNode(ack.rep.senderRef);
9898 
9899   ndbrequire(m_micro_gcp.m_master.m_state == MicroGcp::M_GCP_COMPLETE);
9900   receiveLoopMacro(SUB_GCP_COMPLETE_REP, senderNodeId);
9901 
9902   m_micro_gcp.m_master.m_state = MicroGcp::M_GCP_IDLE;
9903 
9904   if (!ERROR_INSERTED(7190))
9905   {
9906     signal->theData[0] = DihContinueB::ZSTART_GCP;
9907     sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 10, 1);
9908   }
9909 }
9910 
9911 void
execGCP_SAVEREQ(Signal * signal)9912 Dbdih::execGCP_SAVEREQ(Signal* signal)
9913 {
9914   jamEntry();
9915   GCPSaveReq * req = (GCPSaveReq*)&signal->theData[0];
9916 
9917   if (m_gcp_save.m_state == GcpSave::GCP_SAVE_REQ)
9918   {
9919     jam();
9920     /**
9921      * This is master take over...
9922      * and SAVE_REQ is already running
9923      */
9924     ndbrequire(m_gcp_save.m_gci == req->gci);
9925     m_gcp_save.m_master_ref = req->dihBlockRef;
9926     return;
9927   }
9928 
9929   if (m_gcp_save.m_gci == req->gci)
9930   {
9931     jam();
9932     /**
9933      * This is master take over...
9934      * and SAVE_REQ is complete...
9935      */
9936     m_gcp_save.m_master_ref = req->dihBlockRef;
9937 
9938     GCPSaveReq save = (* req);
9939     GCPSaveConf * conf = (GCPSaveConf*)signal->getDataPtrSend();
9940     conf->dihPtr = save.dihPtr;
9941     conf->nodeId = getOwnNodeId();
9942     conf->gci    = save.gci;
9943     sendSignal(m_gcp_save.m_master_ref, GSN_GCP_SAVECONF, signal,
9944                GCPSaveConf::SignalLength, JBA);
9945     return;
9946   }
9947 
9948   ndbrequire(m_gcp_save.m_state == GcpSave::GCP_SAVE_IDLE);
9949   m_gcp_save.m_state = GcpSave::GCP_SAVE_REQ;
9950   m_gcp_save.m_master_ref = req->dihBlockRef;
9951   m_gcp_save.m_gci = req->gci;
9952 
9953   req->dihBlockRef = reference();
9954   sendSignal(DBLQH_REF, GSN_GCP_SAVEREQ, signal, signal->getLength(), JBA);
9955 }
9956 
execGCP_SAVECONF(Signal * signal)9957 void Dbdih::execGCP_SAVECONF(Signal* signal)
9958 {
9959   jamEntry();
9960   GCPSaveConf * saveConf = (GCPSaveConf*)&signal->theData[0];
9961 
9962   if (refToBlock(signal->getSendersBlockRef()) == DBLQH)
9963   {
9964     jam();
9965 
9966     ndbrequire(m_gcp_save.m_state == GcpSave::GCP_SAVE_REQ);
9967     m_gcp_save.m_state = GcpSave::GCP_SAVE_CONF;
9968 
9969     sendSignal(m_gcp_save.m_master_ref,
9970                GSN_GCP_SAVECONF, signal, signal->getLength(), JBA);
9971     return;
9972   }
9973 
9974   ndbrequire(saveConf->gci == m_gcp_save.m_master.m_new_gci);
9975   ndbrequire(saveConf->nodeId == saveConf->dihPtr);
9976   SYSFILE->lastCompletedGCI[saveConf->nodeId] = saveConf->gci;
9977   GCP_SAVEhandling(signal, saveConf->nodeId);
9978 }//Dbdih::execGCP_SAVECONF()
9979 
execGCP_SAVEREF(Signal * signal)9980 void Dbdih::execGCP_SAVEREF(Signal* signal)
9981 {
9982   jamEntry();
9983   GCPSaveRef * const saveRef = (GCPSaveRef*)&signal->theData[0];
9984 
9985   if (refToBlock(signal->getSendersBlockRef()) == DBLQH)
9986   {
9987     jam();
9988 
9989     ndbrequire(m_gcp_save.m_state == GcpSave::GCP_SAVE_REQ);
9990     m_gcp_save.m_state = GcpSave::GCP_SAVE_CONF;
9991 
9992     sendSignal(m_gcp_save.m_master_ref,
9993                GSN_GCP_SAVEREF, signal, signal->getLength(), JBA);
9994     return;
9995   }
9996 
9997   ndbrequire(saveRef->gci == m_gcp_save.m_master.m_new_gci);
9998   ndbrequire(saveRef->nodeId == saveRef->dihPtr);
9999 
10000   /**
10001    * Only allow reason not to save
10002    */
10003   ndbrequire(saveRef->errorCode == GCPSaveRef::NodeShutdownInProgress ||
10004 	     saveRef->errorCode == GCPSaveRef::FakedSignalDueToNodeFailure ||
10005 	     saveRef->errorCode == GCPSaveRef::NodeRestartInProgress);
10006   GCP_SAVEhandling(signal, saveRef->nodeId);
10007 }//Dbdih::execGCP_SAVEREF()
10008 
GCP_SAVEhandling(Signal * signal,Uint32 nodeId)10009 void Dbdih::GCP_SAVEhandling(Signal* signal, Uint32 nodeId)
10010 {
10011   ndbrequire(m_gcp_save.m_master.m_state == GcpSave::GCP_SAVE_REQ);
10012   receiveLoopMacro(GCP_SAVEREQ, nodeId);
10013   /*-------------------------------------------------------------------------*/
10014   // All nodes have replied. We are ready to update the system file.
10015   /*-------------------------------------------------------------------------*/
10016 
10017   CRASH_INSERTION(7003);
10018   /**------------------------------------------------------------------------
10019    * SET NEW RECOVERABLE GCI. ALSO RESET RESTART COUNTER TO ZERO.
10020    * THIS INDICATES THAT THE SYSTEM HAS BEEN RECOVERED AND SURVIVED AT
10021    * LEAST ONE GLOBAL CHECKPOINT PERIOD. WE WILL USE THIS PARAMETER TO
10022    * SET BACK THE RESTART GCI IF WE ENCOUNTER MORE THAN ONE UNSUCCESSFUL
10023    * RESTART.
10024    *------------------------------------------------------------------------*/
10025   SYSFILE->newestRestorableGCI = m_gcp_save.m_gci;
10026   if(Sysfile::getInitialStartOngoing(SYSFILE->systemRestartBits) &&
10027      getNodeState().startLevel == NodeState::SL_STARTED){
10028     jam();
10029 #if 0
10030     g_eventLogger->info("Dbdih: Clearing initial start ongoing");
10031 #endif
10032     Sysfile::clearInitialStartOngoing(SYSFILE->systemRestartBits);
10033   }
10034   copyGciLab(signal, CopyGCIReq::GLOBAL_CHECKPOINT);
10035 
10036   m_gcp_save.m_master.m_state = GcpSave::GCP_SAVE_COPY_GCI;
10037 
10038 }//Dbdih::GCP_SAVEhandling()
10039 
10040 /*
10041   3.11   G L O B A L  C H E C K P O I N T (N O T - M A S T E R)
10042   *************************************************************
10043   */
execGCP_PREPARE(Signal * signal)10044 void Dbdih::execGCP_PREPARE(Signal* signal)
10045 {
10046   jamEntry();
10047   CRASH_INSERTION(7005);
10048 
10049   if (ERROR_INSERTED(7030))
10050   {
10051     cgckptflag = true;
10052     g_eventLogger->info("Delayed GCP_PREPARE 5s");
10053     sendSignalWithDelay(reference(), GSN_GCP_PREPARE, signal, 5000,
10054 			signal->getLength());
10055     return;
10056   }
10057 
10058   GCPPrepare* req = (GCPPrepare*)signal->getDataPtr();
10059   GCPPrepareConf * conf = (GCPPrepareConf*)signal->getDataPtrSend();
10060   Uint32 masterNodeId = req->nodeId;
10061   Uint32 gci_hi = req->gci_hi;
10062   Uint32 gci_lo = req->gci_lo;
10063   if (unlikely(signal->getLength() < GCPPrepare::SignalLength))
10064   {
10065     jam();
10066     gci_lo = 0;
10067     ndbassert(!ndb_check_micro_gcp(getNodeInfo(masterNodeId).m_version));
10068   }
10069   Uint64 gci = gci_lo | (Uint64(gci_hi) << 32);
10070 
10071   BlockReference retRef = calcDihBlockRef(masterNodeId);
10072 
10073   if (isMaster())
10074   {
10075     ndbrequire(m_micro_gcp.m_master.m_state == MicroGcp::M_GCP_PREPARE);
10076   }
10077 
10078   if (m_micro_gcp.m_state == MicroGcp::M_GCP_PREPARE)
10079   {
10080     jam();
10081     /**
10082      * This must be master take over
10083      *   Prepare is already complete
10084      */
10085     ndbrequire(m_micro_gcp.m_new_gci == gci);
10086     m_micro_gcp.m_master_ref = retRef;
10087     goto reply;
10088   }
10089 
10090   if (m_micro_gcp.m_new_gci == gci)
10091   {
10092     jam();
10093     /**
10094      * This GCP has already been prepared...
10095      *   Must be master takeover
10096      */
10097     m_micro_gcp.m_master_ref = retRef;
10098     goto reply;
10099   }
10100 
10101   ndbrequire(m_micro_gcp.m_state == MicroGcp::M_GCP_IDLE);
10102 
10103   m_micro_gcp.m_lock.write_lock();
10104   cgckptflag = true;
10105   m_micro_gcp.m_state = MicroGcp::M_GCP_PREPARE;
10106   m_micro_gcp.m_new_gci = gci;
10107   m_micro_gcp.m_master_ref = retRef;
10108   m_micro_gcp.m_lock.write_unlock();
10109 
10110   if (ERROR_INSERTED(7031))
10111   {
10112     g_eventLogger->info("Crashing delayed in GCP_PREPARE 3s");
10113     signal->theData[0] = 9999;
10114     sendSignalWithDelay(CMVMI_REF, GSN_NDB_TAMPER, signal, 3000, 1);
10115     return;
10116   }
10117 #ifdef GCP_TIMER_HACK
10118   NdbTick_getMicroTimer(&globalData.gcp_timer_commit[0]);
10119 #endif
10120 
10121 reply:
10122   /**
10123    * Send the new gci to Suma.
10124    *
10125    * To get correct signal order and avoid races, this signal is sent on the
10126    * same prio as the SUB_GCP_COMPLETE_REP signal sent to SUMA in
10127    * execSUB_GCP_COMPLETE_REP().
10128    */
10129   sendSignal(SUMA_REF, GSN_GCP_PREPARE, signal, signal->length(), JBB);
10130 
10131   /* Send reply. */
10132   conf->nodeId = cownNodeId;
10133   conf->gci_hi = gci_hi;
10134   conf->gci_lo = gci_lo;
10135   sendSignal(retRef, GSN_GCP_PREPARECONF, signal,
10136              GCPPrepareConf::SignalLength, JBA);
10137   return;
10138 }
10139 
execGCP_COMMIT(Signal * signal)10140 void Dbdih::execGCP_COMMIT(Signal* signal)
10141 {
10142   jamEntry();
10143   CRASH_INSERTION(7006);
10144 
10145   GCPCommit * req = (GCPCommit*)signal->getDataPtr();
10146   Uint32 masterNodeId = req->nodeId;
10147   Uint32 gci_hi = req->gci_hi;
10148   Uint32 gci_lo = req->gci_lo;
10149 
10150   if (unlikely(signal->getLength() < GCPCommit::SignalLength))
10151   {
10152     gci_lo = 0;
10153     ndbassert(!ndb_check_micro_gcp(getNodeInfo(masterNodeId).m_version));
10154   }
10155   Uint64 gci = gci_lo | (Uint64(gci_hi) << 32);
10156 
10157 #ifdef ERROR_INSERT
10158   if (ERROR_INSERTED(7213))
10159   {
10160     ndbout_c("err 7213 killing %d", c_error_insert_extra);
10161     Uint32 save = signal->theData[0];
10162     signal->theData[0] = 5048;
10163     sendSignal(numberToRef(DBLQH, c_error_insert_extra),
10164                GSN_NDB_TAMPER, signal, 1, JBB);
10165     signal->theData[0] = save;
10166     CLEAR_ERROR_INSERT_VALUE;
10167 
10168     signal->theData[0] = 9999;
10169     sendSignal(numberToRef(CMVMI, c_error_insert_extra),
10170                GSN_DUMP_STATE_ORD, signal, 1, JBB);
10171 
10172     signal->theData[0] = save;
10173     CLEAR_ERROR_INSERT_VALUE;
10174 
10175     return;
10176   }
10177 #endif
10178 
10179   Uint32 masterRef = calcDihBlockRef(masterNodeId);
10180   ndbrequire(masterNodeId == cmasterNodeId);
10181   if (isMaster())
10182   {
10183     ndbrequire(m_micro_gcp.m_master.m_state == MicroGcp::M_GCP_COMMIT);
10184   }
10185 
10186   if (m_micro_gcp.m_state == MicroGcp::M_GCP_COMMIT)
10187   {
10188     jam();
10189     /**
10190      * This must be master take over
10191      *   Commit is already ongoing...
10192      */
10193     ndbrequire(m_micro_gcp.m_current_gci == gci);
10194     m_micro_gcp.m_master_ref = masterRef;
10195     return;
10196   }
10197 
10198   if (m_micro_gcp.m_current_gci == gci)
10199   {
10200     jam();
10201     /**
10202      * This must be master take over
10203      *   Commit has already completed
10204      */
10205     m_micro_gcp.m_master_ref = masterRef;
10206 
10207     GCPNodeFinished* conf = (GCPNodeFinished*)signal->getDataPtrSend();
10208     conf->nodeId = cownNodeId;
10209     conf->gci_hi = (Uint32)(m_micro_gcp.m_old_gci >> 32);
10210     conf->failno = cfailurenr;
10211     conf->gci_lo = (Uint32)(m_micro_gcp.m_old_gci & 0xFFFFFFFF);
10212     sendSignal(masterRef, GSN_GCP_NODEFINISH, signal,
10213                GCPNodeFinished::SignalLength, JBB);
10214     return;
10215   }
10216 
10217   ndbrequire(m_micro_gcp.m_new_gci == gci);
10218   ndbrequire(m_micro_gcp.m_state == MicroGcp::M_GCP_PREPARE);
10219   m_micro_gcp.m_state = MicroGcp::M_GCP_COMMIT;
10220   m_micro_gcp.m_master_ref = calcDihBlockRef(masterNodeId);
10221 
10222   m_micro_gcp.m_lock.write_lock();
10223   m_micro_gcp.m_old_gci = m_micro_gcp.m_current_gci;
10224   m_micro_gcp.m_current_gci = gci;
10225   cgckptflag = false;
10226   m_micro_gcp.m_lock.write_unlock();
10227 
10228   for (Uint32 i = 0; i < c_diverify_queue_cnt; i++)
10229   {
10230     jam();
10231     c_diverify_queue[i].m_empty_done = 0;
10232     emptyverificbuffer(signal, i, true);
10233   }
10234 
10235   GCPNoMoreTrans* req2 = (GCPNoMoreTrans*)signal->getDataPtrSend();
10236   req2->senderRef = reference();
10237   req2->senderData = calcDihBlockRef(masterNodeId);
10238   req2->gci_hi = (Uint32)(m_micro_gcp.m_old_gci >> 32);
10239   req2->gci_lo = (Uint32)(m_micro_gcp.m_old_gci & 0xFFFFFFFF);
10240   sendSignal(clocaltcblockref, GSN_GCP_NOMORETRANS, signal,
10241              GCPNoMoreTrans::SignalLength, JBB);
10242   return;
10243 }//Dbdih::execGCP_COMMIT()
10244 
execGCP_TCFINISHED(Signal * signal)10245 void Dbdih::execGCP_TCFINISHED(Signal* signal)
10246 {
10247   jamEntry();
10248   CRASH_INSERTION(7007);
10249   GCPTCFinished* conf = (GCPTCFinished*)signal->getDataPtr();
10250   Uint32 retRef = conf->senderData;
10251   Uint32 gci_hi = conf->gci_hi;
10252   Uint32 gci_lo = conf->gci_lo;
10253   Uint64 gci = gci_lo | (Uint64(gci_hi) << 32);
10254   ndbrequire(gci == m_micro_gcp.m_old_gci);
10255 
10256   if (ERROR_INSERTED(7181) || ERROR_INSERTED(7182))
10257   {
10258     c_error_7181_ref = retRef; // Save ref
10259     ndbout_c("killing %d", refToNode(cmasterdihref));
10260     signal->theData[0] = 9999;
10261     sendSignal(numberToRef(CMVMI, refToNode(cmasterdihref)),
10262 	       GSN_NDB_TAMPER, signal, 1, JBB);
10263     return;
10264   }
10265 
10266 #ifdef ERROR_INSERT
10267   if (ERROR_INSERTED(7214))
10268   {
10269     ndbout_c("err 7214 killing %d", c_error_insert_extra);
10270     Uint32 save = signal->theData[0];
10271     signal->theData[0] = 9999;
10272     sendSignal(numberToRef(CMVMI, c_error_insert_extra),
10273                GSN_NDB_TAMPER, signal, 1, JBB);
10274     signal->theData[0] = save;
10275     CLEAR_ERROR_INSERT_VALUE;
10276   }
10277 #endif
10278 
10279 #ifdef GCP_TIMER_HACK
10280   NdbTick_getMicroTimer(&globalData.gcp_timer_commit[1]);
10281 #endif
10282 
10283   ndbrequire(m_micro_gcp.m_state == MicroGcp::M_GCP_COMMIT);
10284 
10285   /**
10286    * Make sure that each LQH gets scheduled, so that they don't get out of sync
10287    * wrt to SUB_GCP_COMPLETE_REP
10288    */
10289   Callback cb;
10290   cb.m_callbackData = 10;
10291   cb.m_callbackFunction = safe_cast(&Dbdih::execGCP_TCFINISHED_sync_conf);
10292   Uint32 path[] = { DBLQH, SUMA, 0 };
10293   synchronize_path(signal, path, cb);
10294 }//Dbdih::execGCP_TCFINISHED()
10295 
10296 void
execGCP_TCFINISHED_sync_conf(Signal * signal,Uint32 cb,Uint32 err)10297 Dbdih::execGCP_TCFINISHED_sync_conf(Signal* signal, Uint32 cb, Uint32 err)
10298 {
10299   ndbrequire(m_micro_gcp.m_state == MicroGcp::M_GCP_COMMIT);
10300 
10301   m_micro_gcp.m_state = MicroGcp::M_GCP_COMMITTED;
10302   Uint32 retRef = m_micro_gcp.m_master_ref;
10303 
10304   GCPNodeFinished* conf2 = (GCPNodeFinished*)signal->getDataPtrSend();
10305   conf2->nodeId = cownNodeId;
10306   conf2->gci_hi = (Uint32)(m_micro_gcp.m_old_gci >> 32);
10307   conf2->failno = cfailurenr;
10308   conf2->gci_lo = (Uint32)(m_micro_gcp.m_old_gci & 0xFFFFFFFF);
10309   sendSignal(retRef, GSN_GCP_NODEFINISH, signal,
10310              GCPNodeFinished::SignalLength, JBB);
10311 }
10312 
10313 void
execSUB_GCP_COMPLETE_REP(Signal * signal)10314 Dbdih::execSUB_GCP_COMPLETE_REP(Signal* signal)
10315 {
10316   jamEntry();
10317 
10318   CRASH_INSERTION(7228);
10319   SubGcpCompleteRep rep = * (SubGcpCompleteRep*)signal->getDataPtr();
10320   if (isMaster())
10321   {
10322     ndbrequire(m_micro_gcp.m_master.m_state == MicroGcp::M_GCP_COMPLETE);
10323   }
10324 
10325   Uint32 masterRef = rep.senderRef;
10326   if (m_micro_gcp.m_state == MicroGcp::M_GCP_IDLE)
10327   {
10328     jam();
10329     /**
10330      * This must be master take over
10331      *   signal has already arrived
10332      */
10333     m_micro_gcp.m_master_ref = masterRef;
10334     goto reply;
10335   }
10336 
10337   ndbrequire(m_micro_gcp.m_state == MicroGcp::M_GCP_COMMITTED);
10338   m_micro_gcp.m_state = MicroGcp::M_GCP_IDLE;
10339 
10340   /**
10341    * To handle multiple LQH instances, this need to be passed though
10342    * each LQH...(so that no fire-trig-ord can arrive "too" late)
10343    */
10344   sendSignal(DBLQH_REF, GSN_SUB_GCP_COMPLETE_REP, signal,
10345              signal->length(), JBB);
10346 reply:
10347   Uint32 nodeId = refToNode(masterRef);
10348   if (!ndbd_dih_sub_gcp_complete_ack(getNodeInfo(nodeId).m_version))
10349   {
10350     jam();
10351     return;
10352   }
10353 
10354   SubGcpCompleteAck* ack = CAST_PTR(SubGcpCompleteAck,
10355                                     signal->getDataPtrSend());
10356   ack->rep = rep;
10357   ack->rep.senderRef = reference();
10358   sendSignal(masterRef, GSN_SUB_GCP_COMPLETE_ACK,
10359              signal, SubGcpCompleteAck::SignalLength, JBA);
10360 }
10361 
10362 /*****************************************************************************/
10363 //******     RECEIVING   TAMPER   REQUEST   FROM    NDBAPI             ******
10364 /*****************************************************************************/
execDIHNDBTAMPER(Signal * signal)10365 void Dbdih::execDIHNDBTAMPER(Signal* signal)
10366 {
10367   jamEntry();
10368   Uint32 tcgcpblocked = signal->theData[0];
10369   /* ACTION TO BE TAKEN BY DIH */
10370   Uint32 tuserpointer = signal->theData[1];
10371   BlockReference tuserblockref = signal->theData[2];
10372   switch (tcgcpblocked) {
10373   case 1:
10374     jam();
10375     if (isMaster()) {
10376       jam();
10377       cgcpOrderBlocked = 1;
10378     } else {
10379       jam();
10380       /* TRANSFER THE REQUEST */
10381       /* TO MASTER*/
10382       signal->theData[0] = tcgcpblocked;
10383       signal->theData[1] = tuserpointer;
10384       signal->theData[2] = tuserblockref;
10385       sendSignal(cmasterdihref, GSN_DIHNDBTAMPER, signal, 3, JBB);
10386     }//if
10387     break;
10388   case 2:
10389     jam();
10390     if (isMaster()) {
10391       jam();
10392       cgcpOrderBlocked = 0;
10393     } else {
10394       jam();
10395       /* TRANSFER THE REQUEST */
10396       /* TO MASTER*/
10397       signal->theData[0] = tcgcpblocked;
10398       signal->theData[1] = tuserpointer;
10399       signal->theData[2] = tuserblockref;
10400       sendSignal(cmasterdihref, GSN_DIHNDBTAMPER, signal, 3, JBB);
10401     }//if
10402     break;
10403   case 3:
10404     ndbrequire(false);
10405     return;
10406     break;
10407   case 4:
10408     jam();
10409     signal->theData[0] = tuserpointer;
10410     signal->theData[1] = crestartGci;
10411     sendSignal(tuserblockref, GSN_DIHNDBTAMPER, signal, 2, JBB);
10412     break;
10413 #ifdef ERROR_INSERT
10414   case 5:
10415     jam();
10416     if (tuserpointer >= 30000 && tuserpointer < 40000) {
10417       jam();
10418       /*--------------------------------------------------------------------*/
10419       // Redirect errors to master DIH in the 30000-range.
10420       /*--------------------------------------------------------------------*/
10421       tuserblockref = cmasterdihref;
10422       tuserpointer -= 30000;
10423       signal->theData[0] = 5;
10424       signal->theData[1] = tuserpointer;
10425       signal->theData[2] = tuserblockref;
10426       sendSignal(tuserblockref, GSN_DIHNDBTAMPER, signal, 3, JBB);
10427       return;
10428     } else if (tuserpointer >= 40000 && tuserpointer < 50000) {
10429       NodeRecordPtr localNodeptr;
10430       Uint32 Tfound = 0;
10431       jam();
10432       /*--------------------------------------------------------------------*/
10433       // Redirect errors to non-master DIH in the 40000-range.
10434       /*--------------------------------------------------------------------*/
10435       tuserpointer -= 40000;
10436       for (localNodeptr.i = 1;
10437            localNodeptr.i < MAX_NDB_NODES;
10438            localNodeptr.i++) {
10439         jam();
10440         ptrAss(localNodeptr, nodeRecord);
10441         if ((localNodeptr.p->nodeStatus == NodeRecord::ALIVE) &&
10442             (localNodeptr.i != cmasterNodeId)) {
10443           jam();
10444           tuserblockref = calcDihBlockRef(localNodeptr.i);
10445           Tfound = 1;
10446           break;
10447         }//if
10448       }//for
10449       if (Tfound == 0) {
10450         jam();
10451 	/*-------------------------------------------------------------------*/
10452 	// Ignore since no non-master node existed.
10453 	/*-------------------------------------------------------------------*/
10454         return;
10455       }//if
10456       signal->theData[0] = 5;
10457       signal->theData[1] = tuserpointer;
10458       signal->theData[2] = tuserblockref;
10459       sendSignal(tuserblockref, GSN_DIHNDBTAMPER, signal, 3, JBB);
10460       return;
10461     } else {
10462       jam();
10463       return;
10464     }//if
10465     break;
10466 #endif
10467   default:
10468     ndbrequire(false);
10469     break;
10470   }//switch
10471   return;
10472 }//Dbdih::execDIHNDBTAMPER()
10473 
10474 /*****************************************************************************/
10475 /* **********     FILE HANDLING MODULE                           *************/
10476 /*****************************************************************************/
copyGciLab(Signal * signal,CopyGCIReq::CopyReason reason)10477 void Dbdih::copyGciLab(Signal* signal, CopyGCIReq::CopyReason reason)
10478 {
10479   if(c_copyGCIMaster.m_copyReason != CopyGCIReq::IDLE)
10480   {
10481     jam();
10482     /**
10483      * There can currently only be two waiting
10484      */
10485     for (Uint32 i = 0; i<CopyGCIMaster::WAIT_CNT; i++)
10486     {
10487       jam();
10488       if (c_copyGCIMaster.m_waiting[i] == CopyGCIReq::IDLE)
10489       {
10490         jam();
10491         c_copyGCIMaster.m_waiting[i] = reason;
10492         return;
10493       }
10494     }
10495 
10496     /**
10497      * Code should *not* request more than WAIT_CNT copy-gci's
10498      *   so this is an internal error
10499      */
10500     ndbrequire(false);
10501     return;
10502   }
10503   c_copyGCIMaster.m_copyReason = reason;
10504 
10505 #ifdef ERROR_INSERT
10506   if (reason == CopyGCIReq::GLOBAL_CHECKPOINT && ERROR_INSERTED(7189))
10507   {
10508     sendToRandomNodes("COPY_GCI",
10509                       signal, &c_COPY_GCIREQ_Counter, &Dbdih::sendCOPY_GCIREQ);
10510     signal->theData[0] = 9999;
10511     sendSignalWithDelay(CMVMI_REF, GSN_NDB_TAMPER, signal, 1000, 1);
10512     return;
10513   }
10514 #endif
10515 
10516   if (reason == CopyGCIReq::RESTART_NR)
10517   {
10518     jam();
10519     if (c_nodeStartMaster.startNode != RNIL)
10520     {
10521       jam();
10522       c_COPY_GCIREQ_Counter.clearWaitingFor();
10523       c_COPY_GCIREQ_Counter.setWaitingFor(c_nodeStartMaster.startNode);
10524       sendCOPY_GCIREQ(signal, c_nodeStartMaster.startNode, RNIL);
10525       return;
10526     }
10527     else
10528     {
10529       jam();
10530       reason = c_copyGCIMaster.m_copyReason = c_copyGCIMaster.m_waiting[0];
10531       for (Uint32 i = 1; i<CopyGCIMaster::WAIT_CNT; i++)
10532       {
10533         jam();
10534         c_copyGCIMaster.m_waiting[i-1] = c_copyGCIMaster.m_waiting[i];
10535       }
10536       c_copyGCIMaster.m_waiting[CopyGCIMaster::WAIT_CNT-1] =
10537         CopyGCIReq::IDLE;
10538 
10539       if (reason == CopyGCIReq::IDLE)
10540       {
10541         jam();
10542         return;
10543       }
10544       // fall-through
10545     }
10546   }
10547 
10548   sendLoopMacro(COPY_GCIREQ, sendCOPY_GCIREQ, RNIL);
10549 
10550 }//Dbdih::copyGciLab()
10551 
10552 /* ------------------------------------------------------------------------- */
10553 /* COPY_GCICONF                           RESPONSE TO COPY_GCIREQ            */
10554 /* ------------------------------------------------------------------------- */
execCOPY_GCICONF(Signal * signal)10555 void Dbdih::execCOPY_GCICONF(Signal* signal)
10556 {
10557   jamEntry();
10558   NodeRecordPtr senderNodePtr;
10559   senderNodePtr.i = signal->theData[0];
10560   receiveLoopMacro(COPY_GCIREQ, senderNodePtr.i);
10561 
10562   CopyGCIReq::CopyReason current = c_copyGCIMaster.m_copyReason;
10563   c_copyGCIMaster.m_copyReason = CopyGCIReq::IDLE;
10564 
10565   bool ok = false;
10566   switch(current){
10567   case CopyGCIReq::RESTART:{
10568     ok = true;
10569     jam();
10570     DictStartReq * req = (DictStartReq*)&signal->theData[0];
10571     req->restartGci = SYSFILE->newestRestorableGCI;
10572     req->senderRef = reference();
10573     sendSignal(cdictblockref, GSN_DICTSTARTREQ,
10574                signal, DictStartReq::SignalLength, JBB);
10575     break;
10576   }
10577   case CopyGCIReq::LOCAL_CHECKPOINT:{
10578     ok = true;
10579     jam();
10580     startLcpRoundLab(signal);
10581     break;
10582   }
10583   case CopyGCIReq::GLOBAL_CHECKPOINT:
10584   {
10585     ok = true;
10586     jam();
10587 
10588     /************************************************************************/
10589     // Report the event that a global checkpoint has completed.
10590     /************************************************************************/
10591     signal->setTrace(0);
10592     signal->theData[0] = NDB_LE_GlobalCheckpointCompleted; //Event type
10593     signal->theData[1] = m_gcp_save.m_gci;
10594     sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB);
10595 
10596     c_newest_restorable_gci = m_gcp_save.m_gci;
10597 #ifdef ERROR_INSERT
10598     if ((ERROR_INSERTED(7222) || ERROR_INSERTED(7223)) &&
10599         !Sysfile::getLCPOngoing(SYSFILE->systemRestartBits) &&
10600         c_newest_restorable_gci >= c_lcpState.lcpStopGcp)
10601     {
10602       if (ERROR_INSERTED(7222))
10603       {
10604         sendLoopMacro(COPY_TABREQ, nullRoutine, 0);
10605         NodeReceiverGroup rg(CMVMI, c_COPY_TABREQ_Counter);
10606 
10607         rg.m_nodes.clear(getOwnNodeId());
10608         if (!rg.m_nodes.isclear())
10609         {
10610           signal->theData[0] = 9999;
10611           sendSignal(rg, GSN_NDB_TAMPER, signal, 1, JBA);
10612         }
10613         signal->theData[0] = 9999;
10614         sendSignalWithDelay(CMVMI_REF, GSN_NDB_TAMPER, signal, 1000, 1);
10615 
10616         signal->theData[0] = 932;
10617         EXECUTE_DIRECT(QMGR, GSN_NDB_TAMPER, signal, 1);
10618 
10619         return;
10620       }
10621       if (ERROR_INSERTED(7223))
10622       {
10623         CLEAR_ERROR_INSERT_VALUE;
10624         signal->theData[0] = 9999;
10625         sendSignal(numberToRef(CMVMI, c_error_insert_extra)
10626                    , GSN_NDB_TAMPER, signal, 1, JBA);
10627       }
10628     }
10629 #endif
10630 
10631     if (m_micro_gcp.m_enabled == false)
10632     {
10633       jam();
10634       /**
10635        * Running old protocol
10636        */
10637       signal->theData[0] = DihContinueB::ZSTART_GCP;
10638       sendSignal(reference(), GSN_CONTINUEB, signal, 1, JBB);
10639     }
10640     m_gcp_save.m_master.m_state = GcpSave::GCP_SAVE_IDLE;
10641 
10642     CRASH_INSERTION(7004);
10643     emptyWaitGCPMasterQueue(signal,
10644                             Uint64(m_gcp_save.m_gci) << 32,
10645                             c_waitGCPMasterList);
10646     break;
10647   }
10648   case CopyGCIReq::INITIAL_START_COMPLETED:
10649     ok = true;
10650     jam();
10651     initialStartCompletedLab(signal);
10652     break;
10653   case CopyGCIReq::IDLE:
10654     ok = false;
10655     jam();
10656     break;
10657   case CopyGCIReq::RESTART_NR:
10658     ok = true;
10659     jam();
10660     startme_copygci_conf(signal);
10661     break;
10662   }
10663   ndbrequire(ok);
10664 
10665 
10666   c_copyGCIMaster.m_copyReason = c_copyGCIMaster.m_waiting[0];
10667   for (Uint32 i = 1; i<CopyGCIMaster::WAIT_CNT; i++)
10668   {
10669     jam();
10670     c_copyGCIMaster.m_waiting[i-1] = c_copyGCIMaster.m_waiting[i];
10671   }
10672   c_copyGCIMaster.m_waiting[CopyGCIMaster::WAIT_CNT-1] = CopyGCIReq::IDLE;
10673 
10674   /**
10675    * Pop queue
10676    */
10677   if(c_copyGCIMaster.m_copyReason != CopyGCIReq::IDLE)
10678   {
10679     jam();
10680 
10681     signal->theData[0] = DihContinueB::ZCOPY_GCI;
10682     signal->theData[1] = c_copyGCIMaster.m_copyReason;
10683     sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
10684   }
10685 }//Dbdih::execCOPY_GCICONF()
10686 
invalidateLcpInfoAfterSr(Signal * signal)10687 void Dbdih::invalidateLcpInfoAfterSr(Signal* signal)
10688 {
10689   NodeRecordPtr nodePtr;
10690   SYSFILE->latestLCP_ID--;
10691   Sysfile::clearLCPOngoing(SYSFILE->systemRestartBits);
10692   for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
10693     jam();
10694     ptrAss(nodePtr, nodeRecord);
10695     if (!NdbNodeBitmask::get(SYSFILE->lcpActive, nodePtr.i)){
10696       jam();
10697       /* ------------------------------------------------------------------- */
10698       // The node was not active in the local checkpoint.
10699       // To avoid that we step the active status too fast to not
10700       // active we step back one step from Sysfile::NS_ActiveMissed_x.
10701       /* ------------------------------------------------------------------- */
10702       switch (nodePtr.p->activeStatus) {
10703       case Sysfile::NS_Active:
10704         nodePtr.p->activeStatus = Sysfile::NS_Active;
10705         break;
10706       case Sysfile::NS_ActiveMissed_1:
10707         jam();
10708         nodePtr.p->activeStatus = Sysfile::NS_Active;
10709         break;
10710       case Sysfile::NS_ActiveMissed_2:
10711         jam();
10712         nodePtr.p->activeStatus = Sysfile::NS_ActiveMissed_1;
10713         break;
10714       default:
10715         jam();
10716         break;
10717       }//switch
10718     }
10719     else
10720     {
10721       jam();
10722       ndbassert(nodePtr.p->activeStatus == Sysfile::NS_Active);
10723     }
10724   }//for
10725   setNodeRestartInfoBits(signal);
10726 }//Dbdih::invalidateLcpInfoAfterSr()
10727 
10728 /* ------------------------------------------------------------------------- */
10729 /*       THE NEXT STEP IS TO WRITE THE FILE.                                 */
10730 /* ------------------------------------------------------------------------- */
openingCopyGciSkipInitLab(Signal * signal,FileRecordPtr filePtr)10731 void Dbdih::openingCopyGciSkipInitLab(Signal* signal, FileRecordPtr filePtr)
10732 {
10733   writeRestorableGci(signal, filePtr);
10734   filePtr.p->reqStatus = FileRecord::WRITING_COPY_GCI;
10735   return;
10736 }//Dbdih::openingCopyGciSkipInitLab()
10737 
writingCopyGciLab(Signal * signal,FileRecordPtr filePtr)10738 void Dbdih::writingCopyGciLab(Signal* signal, FileRecordPtr filePtr)
10739 {
10740   /* ----------------------------------------------------------------------- */
10741   /*     WE HAVE NOW WRITTEN THIS FILE. WRITE ALSO NEXT FILE IF THIS IS NOT  */
10742   /*     ALREADY THE LAST.                                                   */
10743   /* ----------------------------------------------------------------------- */
10744   CRASH_INSERTION(7219);
10745 
10746   filePtr.p->reqStatus = FileRecord::IDLE;
10747   if (filePtr.i == crestartInfoFile[0]) {
10748     jam();
10749     filePtr.i = crestartInfoFile[1];
10750     ptrCheckGuard(filePtr, cfileFileSize, fileRecord);
10751     if (filePtr.p->fileStatus == FileRecord::OPEN) {
10752       jam();
10753       openingCopyGciSkipInitLab(signal, filePtr);
10754       return;
10755     }//if
10756     openFileRw(signal, filePtr);
10757     filePtr.p->reqStatus = FileRecord::OPENING_COPY_GCI;
10758     return;
10759   }//if
10760   /* ----------------------------------------------------------------------- */
10761   /*     WE HAVE COMPLETED WRITING BOTH FILES SUCCESSFULLY. NOW REPORT OUR   */
10762   /*     SUCCESS TO THE MASTER DIH. BUT FIRST WE NEED TO RESET A NUMBER OF   */
10763   /*     VARIABLES USED BY THE LOCAL CHECKPOINT PROCESS (ONLY IF TRIGGERED   */
10764   /*     BY LOCAL CHECKPOINT PROCESS.                                        */
10765   /* ----------------------------------------------------------------------- */
10766   CopyGCIReq::CopyReason reason = c_copyGCISlave.m_copyReason;
10767 
10768   if (reason == CopyGCIReq::GLOBAL_CHECKPOINT) {
10769     jam();
10770     m_gcp_save.m_state = GcpSave::GCP_SAVE_IDLE;
10771 
10772     SubGcpCompleteRep * const rep = (SubGcpCompleteRep*)signal->getDataPtr();
10773     rep->gci_hi = SYSFILE->newestRestorableGCI;
10774     rep->gci_lo = 0;
10775     rep->flags = SubGcpCompleteRep::ON_DISK;
10776 
10777     sendSignal(LGMAN_REF, GSN_SUB_GCP_COMPLETE_REP, signal,
10778                SubGcpCompleteRep::SignalLength, JBB);
10779 
10780     jamEntry();
10781 
10782     if (m_micro_gcp.m_enabled == false)
10783     {
10784       jam();
10785       sendSignal(DBLQH_REF, GSN_SUB_GCP_COMPLETE_REP, signal,
10786                  SubGcpCompleteRep::SignalLength, JBB);
10787       jamEntry();
10788       ndbrequire(m_micro_gcp.m_state == MicroGcp::M_GCP_COMMITTED);
10789       m_micro_gcp.m_state = MicroGcp::M_GCP_IDLE;
10790 
10791       CRASH_INSERTION(7190);
10792     }
10793 
10794 #ifdef GCP_TIMER_HACK
10795     NdbTick_getMicroTimer(&globalData.gcp_timer_copygci[1]);
10796 
10797     // this is last timer point so we send local report here
10798     {
10799       const GlobalData& g = globalData;
10800       Uint32 ms_commit = NdbTick_getMicrosPassed(
10801           g.gcp_timer_commit[0], g.gcp_timer_commit[1]) / 1000;
10802       Uint32 ms_save = NdbTick_getMicrosPassed(
10803           g.gcp_timer_save[0], g.gcp_timer_save[1]) / 1000;
10804       Uint32 ms_copygci = NdbTick_getMicrosPassed(
10805           g.gcp_timer_copygci[0], g.gcp_timer_copygci[1]) / 1000;
10806 
10807       Uint32 ms_total = ms_commit + ms_save + ms_copygci;
10808 
10809       // random formula to report excessive duration
10810       bool report =
10811         g.gcp_timer_limit != 0 ?
10812           (ms_total > g.gcp_timer_limit) :
10813           (ms_total > 3000 * (1 + cgcpDelay / 1000));
10814       if (report)
10815         infoEvent("GCP %u ms: total:%u commit:%u save:%u copygci:%u",
10816             coldgcp, ms_total, ms_commit, ms_save, ms_copygci);
10817     }
10818 #endif
10819   }
10820 
10821   jam();
10822   c_copyGCISlave.m_copyReason = CopyGCIReq::IDLE;
10823 
10824   if (reason == CopyGCIReq::GLOBAL_CHECKPOINT)
10825   {
10826     jam();
10827     signal->theData[0] = c_copyGCISlave.m_senderData;
10828     sendSignal(m_gcp_save.m_master_ref, GSN_COPY_GCICONF, signal, 1, JBB);
10829   }
10830   else if (c_copyGCISlave.m_senderRef == cmasterdihref)
10831   {
10832     jam();
10833     /**
10834      * Only if same master
10835      */
10836     signal->theData[0] = c_copyGCISlave.m_senderData;
10837     sendSignal(c_copyGCISlave.m_senderRef, GSN_COPY_GCICONF, signal, 1, JBB);
10838   }
10839   return;
10840 }//Dbdih::writingCopyGciLab()
10841 
execSTART_LCP_REQ(Signal * signal)10842 void Dbdih::execSTART_LCP_REQ(Signal* signal)
10843 {
10844   jamEntry();
10845   StartLcpReq * req = (StartLcpReq*)signal->getDataPtr();
10846 
10847   /**
10848    * Init m_local_lcp_state
10849    */
10850   m_local_lcp_state.init(req);
10851 
10852   CRASH_INSERTION2(7021, isMaster());
10853   CRASH_INSERTION2(7022, !isMaster());
10854 
10855   ndbrequire(c_lcpState.m_masterLcpDihRef == req->senderRef);
10856   c_lcpState.m_participatingDIH = req->participatingDIH;
10857   c_lcpState.m_participatingLQH = req->participatingLQH;
10858 
10859   c_lcpState.m_LCP_COMPLETE_REP_Counter_LQH = req->participatingLQH;
10860   if(isMaster())
10861   {
10862     jam();
10863     c_lcpState.m_LCP_COMPLETE_REP_Counter_DIH = req->participatingDIH;
10864   }
10865   else
10866   {
10867     jam();
10868     c_lcpState.m_LCP_COMPLETE_REP_Counter_DIH.clearWaitingFor();
10869   }
10870 
10871   c_lcpState.m_LCP_COMPLETE_REP_From_Master_Received = false;
10872 
10873   c_lcpState.setLcpStatus(LCP_INIT_TABLES, __LINE__);
10874 
10875   signal->theData[0] = DihContinueB::ZINIT_LCP;
10876   signal->theData[1] = c_lcpState.m_masterLcpDihRef;
10877   signal->theData[2] = 0;
10878   sendSignal(reference(), GSN_CONTINUEB, signal, 3, JBB);
10879 }
10880 
10881 void
reset()10882 Dbdih::LocalLCPState::reset()
10883 {
10884   m_state = LS_INITIAL;
10885   m_keep_gci = RNIL;
10886   m_stop_gci = RNIL;
10887 }
10888 
10889 void
init(const StartLcpReq * req)10890 Dbdih::LocalLCPState::init(const StartLcpReq * req)
10891 {
10892   m_state = LS_RUNNING;
10893   m_start_lcp_req = *req;
10894   m_keep_gci = ~(Uint32)0;
10895   m_stop_gci = 0;
10896 }
10897 
10898 void
lcp_frag_rep(const LcpFragRep * rep)10899 Dbdih::LocalLCPState::lcp_frag_rep(const LcpFragRep * rep)
10900 {
10901   assert(m_state == LS_RUNNING);
10902   if (rep->maxGciCompleted < m_keep_gci)
10903   {
10904     m_keep_gci = rep->maxGciCompleted;
10905   }
10906 
10907   if (rep->maxGciStarted > m_stop_gci)
10908   {
10909     m_stop_gci = rep->maxGciStarted;
10910   }
10911 }
10912 
10913 void
lcp_complete_rep(Uint32 gci)10914 Dbdih::LocalLCPState::lcp_complete_rep(Uint32 gci)
10915 {
10916   assert(m_state == LS_RUNNING);
10917   m_state = LS_COMPLETE;
10918   if (gci > m_stop_gci)
10919     m_stop_gci = gci;
10920 }
10921 
10922 bool
check_cut_log_tail(Uint32 gci) const10923 Dbdih::LocalLCPState::check_cut_log_tail(Uint32 gci) const
10924 {
10925   if (m_state == LS_COMPLETE)
10926   {
10927     if (gci >= m_stop_gci)
10928       return true;
10929   }
10930   return false;
10931 }
10932 
initLcpLab(Signal * signal,Uint32 senderRef,Uint32 tableId)10933 void Dbdih::initLcpLab(Signal* signal, Uint32 senderRef, Uint32 tableId)
10934 {
10935   TabRecordPtr tabPtr;
10936   tabPtr.i = tableId;
10937 
10938   if(c_lcpState.m_masterLcpDihRef != senderRef){
10939     jam();
10940     /**
10941      * This is LCP master takeover
10942      */
10943 #ifdef VM_TRACE
10944     g_eventLogger->info("initLcpLab aborted due to LCP master takeover - 1");
10945 #endif
10946     c_lcpState.setLcpStatus(LCP_STATUS_IDLE, __LINE__);
10947     sendMASTER_LCPCONF(signal);
10948     return;
10949   }
10950 
10951   if(c_lcpState.m_masterLcpDihRef != cmasterdihref){
10952     jam();
10953     /**
10954      * Master take over but has not yet received MASTER_LCPREQ
10955      */
10956 #ifdef VM_TRACE
10957     g_eventLogger->info("initLcpLab aborted due to LCP master takeover - 2");
10958 #endif
10959     return;
10960   }
10961 
10962   //const Uint32 lcpId = SYSFILE->latestLCP_ID;
10963 
10964   for(; tabPtr.i < ctabFileSize; tabPtr.i++){
10965 
10966     ptrAss(tabPtr, tabRecord);
10967 
10968     if (tabPtr.p->tabStatus != TabRecord::TS_ACTIVE)
10969     {
10970       jam();
10971       tabPtr.p->tabLcpStatus = TabRecord::TLS_COMPLETED;
10972       continue;
10973     }
10974 
10975     if (tabPtr.p->tabStorage != TabRecord::ST_NORMAL) {
10976       /**
10977        * Table is not logged
10978        */
10979       jam();
10980       tabPtr.p->tabLcpStatus = TabRecord::TLS_COMPLETED;
10981       continue;
10982     }
10983 
10984     if (tabPtr.p->tabCopyStatus != TabRecord::CS_IDLE) {
10985       /* ----------------------------------------------------------------- */
10986       // We protect the updates of table data structures by this variable.
10987       /* ----------------------------------------------------------------- */
10988       jam();
10989       signal->theData[0] = DihContinueB::ZINIT_LCP;
10990       signal->theData[1] = senderRef;
10991       signal->theData[2] = tabPtr.i;
10992       sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 20, 3);
10993       return;
10994     }//if
10995 
10996     /**
10997      * Found a table
10998      */
10999     tabPtr.p->tabLcpStatus = TabRecord::TLS_ACTIVE;
11000 
11001     /**
11002      * For each fragment
11003      */
11004     for (Uint32 fragId = 0; fragId < tabPtr.p->totalfragments; fragId++) {
11005       jam();
11006       FragmentstorePtr fragPtr;
11007       getFragstore(tabPtr.p, fragId, fragPtr);
11008 
11009       /**
11010        * For each of replica record
11011        */
11012       Uint32 replicaCount = 0;
11013       ReplicaRecordPtr replicaPtr;
11014       for(replicaPtr.i = fragPtr.p->storedReplicas; replicaPtr.i != RNIL;
11015 	  replicaPtr.i = replicaPtr.p->nextReplica) {
11016 	jam();
11017 
11018 	ptrCheckGuard(replicaPtr, creplicaFileSize, replicaRecord);
11019 	Uint32 nodeId = replicaPtr.p->procNode;
11020 	if(c_lcpState.m_participatingLQH.get(nodeId)){
11021 	  jam();
11022 	  replicaCount++;
11023 	  replicaPtr.p->lcpOngoingFlag = true;
11024 	}
11025       }
11026 
11027       fragPtr.p->noLcpReplicas = replicaCount;
11028     }//for
11029 
11030     signal->theData[0] = DihContinueB::ZINIT_LCP;
11031     signal->theData[1] = senderRef;
11032     signal->theData[2] = tabPtr.i + 1;
11033     sendSignal(reference(), GSN_CONTINUEB, signal, 3, JBB);
11034     return;
11035   }
11036 
11037   /**
11038    * No more tables
11039    */
11040   jam();
11041 
11042   if (c_lcpState.m_masterLcpDihRef != reference()){
11043     jam();
11044     ndbrequire(!isMaster());
11045     c_lcpState.setLcpStatus(LCP_STATUS_ACTIVE, __LINE__);
11046   } else {
11047     jam();
11048     ndbrequire(isMaster());
11049   }
11050 
11051   CRASH_INSERTION2(7023, isMaster());
11052   CRASH_INSERTION2(7024, !isMaster());
11053 
11054   jam();
11055   StartLcpConf * conf = (StartLcpConf*)signal->getDataPtrSend();
11056   conf->senderRef = reference();
11057   sendSignal(c_lcpState.m_masterLcpDihRef, GSN_START_LCP_CONF, signal,
11058 	     StartLcpConf::SignalLength, JBB);
11059   return;
11060 }//Dbdih::initLcpLab()
11061 
11062 /* ------------------------------------------------------------------------- */
11063 /*       ERROR HANDLING FOR COPY RESTORABLE GCI FILE.                        */
11064 /* ------------------------------------------------------------------------- */
openingCopyGciErrorLab(Signal * signal,FileRecordPtr filePtr)11065 void Dbdih::openingCopyGciErrorLab(Signal* signal, FileRecordPtr filePtr)
11066 {
11067   createFileRw(signal, filePtr);
11068   /* ------------------------------------------------------------------------- */
11069   /*       ERROR IN OPENING FILE. WE WILL TRY BY CREATING FILE INSTEAD.        */
11070   /* ------------------------------------------------------------------------- */
11071   filePtr.p->reqStatus = FileRecord::CREATING_COPY_GCI;
11072   return;
11073 }//Dbdih::openingCopyGciErrorLab()
11074 
11075 /* ------------------------------------------------------------------------- */
11076 /*       ENTER DICTSTARTCONF WITH                                            */
11077 /*         TBLOCKREF                                                         */
11078 /* ------------------------------------------------------------------------- */
dictStartConfLab(Signal * signal)11079 void Dbdih::dictStartConfLab(Signal* signal)
11080 {
11081   /* ----------------------------------------------------------------------- */
11082   /*     WE HAVE NOW RECEIVED ALL THE TABLES TO RESTART.                     */
11083   /* ----------------------------------------------------------------------- */
11084   signal->theData[0] = DihContinueB::ZSTART_FRAGMENT;
11085   signal->theData[1] = 0;  /* START WITH TABLE 0    */
11086   signal->theData[2] = 0;  /* AND FRAGMENT 0        */
11087   sendSignal(reference(), GSN_CONTINUEB, signal, 3, JBB);
11088   return;
11089 }//Dbdih::dictStartConfLab()
11090 
11091 
openingTableLab(Signal * signal,FileRecordPtr filePtr)11092 void Dbdih::openingTableLab(Signal* signal, FileRecordPtr filePtr)
11093 {
11094   /* ---------------------------------------------------------------------- */
11095   /*    SUCCESSFULLY OPENED A FILE. READ THE FIRST PAGE OF THIS FILE.       */
11096   /* ---------------------------------------------------------------------- */
11097   TabRecordPtr tabPtr;
11098   PageRecordPtr pagePtr;
11099 
11100   tabPtr.i = filePtr.p->tabRef;
11101   ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
11102   tabPtr.p->noPages = 1;
11103   allocpage(pagePtr);
11104   tabPtr.p->pageRef[0] = pagePtr.i;
11105   readTabfile(signal, tabPtr.p, filePtr);
11106   filePtr.p->reqStatus = FileRecord::READING_TABLE;
11107   return;
11108 }//Dbdih::openingTableLab()
11109 
openingTableErrorLab(Signal * signal,FileRecordPtr filePtr)11110 void Dbdih::openingTableErrorLab(Signal* signal, FileRecordPtr filePtr)
11111 {
11112   TabRecordPtr tabPtr;
11113   tabPtr.i = filePtr.p->tabRef;
11114   ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
11115   /* ---------------------------------------------------------------------- */
11116   /*    WE FAILED IN OPENING A FILE. IF THE FIRST FILE THEN TRY WITH THE    */
11117   /*    DUPLICATE FILE, OTHERWISE WE REPORT AN ERROR IN THE SYSTEM RESTART. */
11118   /* ---------------------------------------------------------------------- */
11119   if (filePtr.i == tabPtr.p->tabFile[0])
11120   {
11121     filePtr.i = tabPtr.p->tabFile[1];
11122     ptrCheckGuard(filePtr, cfileFileSize, fileRecord);
11123     openFileRw(signal, filePtr);
11124     filePtr.p->reqStatus = FileRecord::OPENING_TABLE;
11125   }
11126   else
11127   {
11128     char buf[256];
11129     BaseString::snprintf(buf, sizeof(buf),
11130 			 "Error opening DIH schema files for table: %d",
11131 			 tabPtr.i);
11132     progError(__LINE__, NDBD_EXIT_AFS_NO_SUCH_FILE, buf);
11133   }
11134 }//Dbdih::openingTableErrorLab()
11135 
readingTableLab(Signal * signal,FileRecordPtr filePtr)11136 void Dbdih::readingTableLab(Signal* signal, FileRecordPtr filePtr)
11137 {
11138   TabRecordPtr tabPtr;
11139   PageRecordPtr pagePtr;
11140   /* ---------------------------------------------------------------------- */
11141   /*    WE HAVE SUCCESSFULLY READ A NUMBER OF PAGES IN THE TABLE FILE. IF   */
11142   /*    MORE PAGES EXIST IN THE FILE THEN READ ALL PAGES IN THE FILE.       */
11143   /* ---------------------------------------------------------------------- */
11144   filePtr.p->reqStatus = FileRecord::IDLE;
11145   tabPtr.i = filePtr.p->tabRef;
11146   ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
11147   pagePtr.i = tabPtr.p->pageRef[0];
11148   ptrCheckGuard(pagePtr, cpageFileSize, pageRecord);
11149   Uint32 noOfStoredPages = pagePtr.p->word[33];
11150   if (tabPtr.p->noPages < noOfStoredPages) {
11151     jam();
11152     ndbrequire(noOfStoredPages <= NDB_ARRAY_SIZE(tabPtr.p->pageRef));
11153     for (Uint32 i = tabPtr.p->noPages; i < noOfStoredPages; i++) {
11154       jam();
11155       allocpage(pagePtr);
11156       tabPtr.p->pageRef[i] = pagePtr.i;
11157     }//for
11158     tabPtr.p->noPages = noOfStoredPages;
11159     readTabfile(signal, tabPtr.p, filePtr);
11160     filePtr.p->reqStatus = FileRecord::READING_TABLE;
11161   } else {
11162     ndbrequire(tabPtr.p->noPages == pagePtr.p->word[33]);
11163     ndbrequire(tabPtr.p->tabCopyStatus == TabRecord::CS_IDLE);
11164     jam();
11165     /* --------------------------------------------------------------------- */
11166     /*   WE HAVE READ ALL PAGES. NOW READ FROM PAGES INTO TABLE AND FRAGMENT */
11167     /*   DATA STRUCTURES.                                                    */
11168     /* --------------------------------------------------------------------- */
11169     tabPtr.p->tabCopyStatus = TabRecord::CS_SR_PHASE1_READ_PAGES;
11170     signal->theData[0] = DihContinueB::ZREAD_PAGES_INTO_TABLE;
11171     signal->theData[1] = tabPtr.i;
11172     sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
11173     return;
11174   }//if
11175   return;
11176 }//Dbdih::readingTableLab()
11177 
readTableFromPagesLab(Signal * signal,TabRecordPtr tabPtr)11178 void Dbdih::readTableFromPagesLab(Signal* signal, TabRecordPtr tabPtr)
11179 {
11180   FileRecordPtr filePtr;
11181   filePtr.i = tabPtr.p->tabFile[0];
11182   ptrCheckGuard(filePtr, cfileFileSize, fileRecord);
11183   /* ---------------------------------------------------------------------- */
11184   /*    WE HAVE NOW COPIED TO OUR NODE. WE HAVE NOW COMPLETED RESTORING     */
11185   /*    THIS TABLE. CONTINUE WITH THE NEXT TABLE.                           */
11186   /*    WE ALSO NEED TO CLOSE THE TABLE FILE.                               */
11187   /* ---------------------------------------------------------------------- */
11188   if (filePtr.p->fileStatus != FileRecord::OPEN) {
11189     jam();
11190     filePtr.i = tabPtr.p->tabFile[1];
11191     ptrCheckGuard(filePtr, cfileFileSize, fileRecord);
11192   }//if
11193   closeFile(signal, filePtr);
11194   filePtr.p->reqStatus = FileRecord::CLOSING_TABLE_SR;
11195   return;
11196 }//Dbdih::readTableFromPagesLab()
11197 
closingTableSrLab(Signal * signal,FileRecordPtr filePtr)11198 void Dbdih::closingTableSrLab(Signal* signal, FileRecordPtr filePtr)
11199 {
11200   /**
11201    * Update table/fragment info
11202    */
11203   TabRecordPtr tabPtr;
11204   tabPtr.i = filePtr.p->tabRef;
11205   ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
11206   resetReplicaSr(tabPtr);
11207 
11208   signal->theData[0] = DihContinueB::ZCOPY_TABLE;
11209   signal->theData[1] = filePtr.p->tabRef;
11210   sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
11211 
11212   return;
11213 }//Dbdih::closingTableSrLab()
11214 
11215 void
execDIH_GET_TABINFO_REQ(Signal * signal)11216 Dbdih::execDIH_GET_TABINFO_REQ(Signal* signal)
11217 {
11218   jamEntry();
11219 
11220   DihGetTabInfoReq req = * (DihGetTabInfoReq*)signal->getDataPtr();
11221 
11222   Uint32 err = 0;
11223   do
11224   {
11225     TabRecordPtr tabPtr;
11226     tabPtr.i = req.tableId;
11227     ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
11228 
11229     if (tabPtr.p->tabStatus != TabRecord::TS_ACTIVE)
11230     {
11231       jam();
11232       err = DihGetTabInfoRef::TableNotDefined;
11233       break;
11234     }
11235 
11236     if (cfirstconnect == RNIL)
11237     {
11238       jam();
11239       err = DihGetTabInfoRef::OutOfConnectionRecords;
11240       break;
11241     }
11242 
11243     if (tabPtr.p->connectrec != RNIL)
11244     {
11245       jam();
11246 
11247       ConnectRecordPtr connectPtr;
11248       connectPtr.i = tabPtr.p->connectrec;
11249       ptrCheckGuard(connectPtr, cconnectFileSize, connectRecord);
11250 
11251       if (connectPtr.p->connectState != ConnectRecord::GET_TABINFO)
11252       {
11253         jam();
11254         err = DihGetTabInfoRef::TableBusy;
11255         break;
11256       }
11257     }
11258 
11259     ConnectRecordPtr connectPtr;
11260     connectPtr.i = cfirstconnect;
11261     ptrCheckGuard(connectPtr, cconnectFileSize, connectRecord);
11262     cfirstconnect = connectPtr.p->nextPool;
11263 
11264     connectPtr.p->nextPool = tabPtr.p->connectrec;
11265     tabPtr.p->connectrec = connectPtr.i;
11266 
11267     connectPtr.p->m_get_tabinfo.m_requestInfo = req.requestInfo;
11268     connectPtr.p->userpointer = req.senderData;
11269     connectPtr.p->userblockref = req.senderRef;
11270     connectPtr.p->connectState = ConnectRecord::GET_TABINFO;
11271     connectPtr.p->table = tabPtr.i;
11272 
11273     if (connectPtr.p->nextPool == RNIL)
11274     {
11275       jam();
11276 
11277       /**
11278        * we're the first...start packing...
11279        */
11280       signal->theData[0] = DihContinueB::ZGET_TABINFO;
11281       signal->theData[1] = tabPtr.i;
11282       sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
11283     }
11284 
11285     return;
11286   } while (0);
11287 
11288   DihGetTabInfoRef * ref = (DihGetTabInfoRef*)signal->getDataPtrSend();
11289   ref->senderData = req.senderData;
11290   ref->senderRef = reference();
11291   ref->errorCode = err;
11292   sendSignal(req.senderRef, GSN_DIH_GET_TABINFO_REF, signal,
11293              DihGetTabInfoRef::SignalLength, JBB);
11294 }
11295 
11296 void
getTabInfo(Signal * signal)11297 Dbdih::getTabInfo(Signal* signal)
11298 {
11299   TabRecordPtr tabPtr;
11300   tabPtr.i = signal->theData[1];
11301   ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
11302 
11303   if (tabPtr.p->tabCopyStatus != TabRecord::CS_IDLE)
11304   {
11305     jam();
11306     signal->theData[0] = DihContinueB::ZGET_TABINFO;
11307     signal->theData[1] = tabPtr.i;
11308     sendSignalWithDelay(reference(), GSN_CONTINUEB,
11309                         signal, 100, signal->length());
11310     return;
11311   }
11312 
11313   tabPtr.p->tabCopyStatus  = TabRecord::CS_GET_TABINFO;
11314 
11315   signal->theData[0] = DihContinueB::ZPACK_TABLE_INTO_PAGES;
11316   signal->theData[1] = tabPtr.i;
11317   sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
11318 }
11319 
11320 int
getTabInfo_copyTableToSection(SegmentedSectionPtr & ptr,CopyTableNode ctn)11321 Dbdih::getTabInfo_copyTableToSection(SegmentedSectionPtr & ptr,
11322                                      CopyTableNode ctn)
11323 {
11324   PageRecordPtr pagePtr;
11325   pagePtr.i = ctn.ctnTabPtr.p->pageRef[0];
11326   ptrCheckGuard(pagePtr, cpageFileSize, pageRecord);
11327 
11328   while (ctn.noOfWords > 2048)
11329   {
11330     jam();
11331     ndbrequire(import(ptr, pagePtr.p->word, 2048));
11332     ctn.noOfWords -= 2048;
11333 
11334     ctn.pageIndex++;
11335     pagePtr.i = ctn.ctnTabPtr.p->pageRef[ctn.pageIndex];
11336     ptrCheckGuard(pagePtr, cpageFileSize, pageRecord);
11337   }
11338 
11339   ndbrequire(import(ptr, pagePtr.p->word, ctn.noOfWords));
11340   return 0;
11341 }
11342 
11343 int
getTabInfo_copySectionToPages(TabRecordPtr tabPtr,SegmentedSectionPtr ptr)11344 Dbdih::getTabInfo_copySectionToPages(TabRecordPtr tabPtr,
11345                                      SegmentedSectionPtr ptr)
11346 {
11347   jam();
11348   Uint32 sz = ptr.sz;
11349   SectionReader reader(ptr, getSectionSegmentPool());
11350 
11351   while (sz)
11352   {
11353     jam();
11354     PageRecordPtr pagePtr;
11355     allocpage(pagePtr);
11356     tabPtr.p->pageRef[tabPtr.p->noPages] = pagePtr.i;
11357     tabPtr.p->noPages++;
11358 
11359     Uint32 len = sz > 2048 ? 2048 : sz;
11360     ndbrequire(reader.getWords(pagePtr.p->word, len));
11361     sz -= len;
11362   }
11363   return 0;
11364 }
11365 
11366 void
getTabInfo_send(Signal * signal,TabRecordPtr tabPtr)11367 Dbdih::getTabInfo_send(Signal* signal,
11368                        TabRecordPtr tabPtr)
11369 {
11370   ndbrequire(tabPtr.p->tabCopyStatus == TabRecord::CS_GET_TABINFO);
11371 
11372   ConnectRecordPtr connectPtr;
11373   connectPtr.i = tabPtr.p->connectrec;
11374 
11375   /**
11376    * Done
11377    */
11378   if (connectPtr.i == RNIL)
11379   {
11380     jam();
11381     tabPtr.p->tabCopyStatus = TabRecord::CS_IDLE;
11382     return;
11383   }
11384 
11385   ptrCheckGuard(connectPtr, cconnectFileSize, connectRecord);
11386 
11387   ndbrequire(connectPtr.p->connectState == ConnectRecord::GET_TABINFO);
11388   ndbrequire(connectPtr.p->table == tabPtr.i);
11389 
11390   /**
11391    * Copy into segmented sections here...
11392    * NOTE: A GenericSectionIterator would be nice inside kernel too
11393    *  or having a pack-method that writes directly into SegmentedSection
11394    */
11395   PageRecordPtr pagePtr;
11396   pagePtr.i = tabPtr.p->pageRef[0];
11397   ptrCheckGuard(pagePtr, cpageFileSize, pageRecord);
11398   Uint32 words = pagePtr.p->word[34];
11399 
11400   CopyTableNode ctn;
11401   ctn.ctnTabPtr = tabPtr;
11402   ctn.pageIndex = 0;
11403   ctn.wordIndex = 0;
11404   ctn.noOfWords = words;
11405 
11406   SegmentedSectionPtr ptr;
11407   ndbrequire(getTabInfo_copyTableToSection(ptr, ctn) == 0);
11408 
11409   Callback cb = { safe_cast(&Dbdih::getTabInfo_sendComplete), connectPtr.i };
11410 
11411   SectionHandle handle(this, signal);
11412   handle.m_ptr[0] = ptr;
11413   handle.m_cnt = 1;
11414 
11415   DihGetTabInfoConf* conf = (DihGetTabInfoConf*)signal->getDataPtrSend();
11416   conf->senderData = connectPtr.p->userpointer;
11417   conf->senderRef = reference();
11418   sendFragmentedSignal(connectPtr.p->userblockref, GSN_DIH_GET_TABINFO_CONF, signal,
11419                        DihGetTabInfoConf::SignalLength, JBB, &handle, cb);
11420 }
11421 
11422 void
getTabInfo_sendComplete(Signal * signal,Uint32 senderData,Uint32 retVal)11423 Dbdih::getTabInfo_sendComplete(Signal * signal,
11424                                Uint32 senderData,
11425                                Uint32 retVal)
11426 {
11427   ndbrequire(retVal == 0);
11428 
11429   ConnectRecordPtr connectPtr;
11430   connectPtr.i = senderData;
11431   ptrCheckGuard(connectPtr, cconnectFileSize, connectRecord);
11432 
11433   ndbrequire(connectPtr.p->connectState == ConnectRecord::GET_TABINFO);
11434 
11435   TabRecordPtr tabPtr;
11436   tabPtr.i = connectPtr.p->table;
11437   ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
11438   tabPtr.p->connectrec = connectPtr.p->nextPool;
11439 
11440   signal->theData[0] = DihContinueB::ZGET_TABINFO_SEND;
11441   signal->theData[1] = tabPtr.i;
11442   sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
11443 
11444   release_connect(connectPtr);
11445 }
11446 
11447 void
resetReplicaSr(TabRecordPtr tabPtr)11448 Dbdih::resetReplicaSr(TabRecordPtr tabPtr){
11449 
11450   const Uint32 newestRestorableGCI = SYSFILE->newestRestorableGCI;
11451 
11452   for(Uint32 i = 0; i<tabPtr.p->totalfragments; i++)
11453   {
11454     FragmentstorePtr fragPtr;
11455     getFragstore(tabPtr.p, i, fragPtr);
11456 
11457     /**
11458      * During SR restart distributionKey from 0
11459      */
11460     fragPtr.p->distributionKey = 0;
11461 
11462     /**
11463      * 1) Start by moving all replicas into oldStoredReplicas
11464      */
11465     prepareReplicas(fragPtr);
11466 
11467     /**
11468      * 2) Move all "alive" replicas into storedReplicas
11469      *    + update noCrashedReplicas...
11470      */
11471     ReplicaRecordPtr replicaPtr;
11472     replicaPtr.i = fragPtr.p->oldStoredReplicas;
11473     while (replicaPtr.i != RNIL)
11474     {
11475       jam();
11476       ptrCheckGuard(replicaPtr, creplicaFileSize, replicaRecord);
11477 
11478       /**
11479        * invalidate LCP's not usable
11480        */
11481       resetReplica(replicaPtr);
11482 
11483       const Uint32 nextReplicaPtrI = replicaPtr.p->nextReplica;
11484 
11485       NodeRecordPtr nodePtr;
11486       nodePtr.i = replicaPtr.p->procNode;
11487       ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
11488 
11489       const Uint32 noCrashedReplicas = replicaPtr.p->noCrashedReplicas;
11490 
11491       if (nodePtr.p->nodeStatus == NodeRecord::ALIVE)
11492       {
11493 	jam();
11494 	switch (nodePtr.p->activeStatus) {
11495 	case Sysfile::NS_Active:
11496 	case Sysfile::NS_ActiveMissed_1:
11497 	case Sysfile::NS_ActiveMissed_2:{
11498 	  jam();
11499 	  /* --------------------------------------------------------------- */
11500 	  /* THE NODE IS ALIVE AND KICKING AND ACTIVE, LET'S USE IT.         */
11501 	  /* --------------------------------------------------------------- */
11502 	  arrGuardErr(noCrashedReplicas, MAX_CRASHED_REPLICAS, NDBD_EXIT_MAX_CRASHED_REPLICAS);
11503 
11504           // Create new crashed replica
11505           newCrashedReplica(replicaPtr);
11506 
11507           // Create a new redo-interval
11508           Uint32 nextCrashed = replicaPtr.p->noCrashedReplicas;
11509           replicaPtr.p->createGci[nextCrashed] = newestRestorableGCI + 1;
11510           replicaPtr.p->replicaLastGci[nextCrashed] = ZINIT_REPLICA_LAST_GCI;
11511 
11512           // merge
11513           mergeCrashedReplicas(replicaPtr);
11514 
11515 	  resetReplicaLcp(replicaPtr.p, newestRestorableGCI);
11516 
11517 	  /**
11518 	   * Make sure we can also find REDO for restoring replica...
11519 	   */
11520 	  {
11521 	    CreateReplicaRecord createReplica;
11522 	    ConstPtr<ReplicaRecord> constReplicaPtr;
11523 	    constReplicaPtr.i = replicaPtr.i;
11524 	    constReplicaPtr.p = replicaPtr.p;
11525 	    if (tabPtr.p->tabStorage != TabRecord::ST_NORMAL ||
11526 		setup_create_replica(fragPtr,
11527 				     &createReplica, constReplicaPtr))
11528 	    {
11529 	      jam();
11530 	      removeOldStoredReplica(fragPtr, replicaPtr);
11531 	      linkStoredReplica(fragPtr, replicaPtr);
11532 	    }
11533 	    else
11534 	    {
11535 	      jam();
11536 	      infoEvent("Forcing take-over of node %d due to unsufficient REDO"
11537 			" for table %d fragment: %d",
11538 			nodePtr.i, tabPtr.i, i);
11539 
11540               m_sr_nodes.clear(nodePtr.i);
11541               m_to_nodes.set(nodePtr.i);
11542 	      setNodeActiveStatus(nodePtr.i,
11543 				  Sysfile::NS_NotActive_NotTakenOver);
11544 	    }
11545 	  }
11546 	}
11547         default:
11548 	  jam();
11549 	  /*empty*/;
11550 	  break;
11551 	}
11552       }
11553       replicaPtr.i = nextReplicaPtrI;
11554     }//while
11555     updateNodeInfo(fragPtr);
11556   }
11557 }
11558 
11559 void
resetReplica(ReplicaRecordPtr readReplicaPtr)11560 Dbdih::resetReplica(ReplicaRecordPtr readReplicaPtr)
11561 {
11562   Uint32 i;
11563   /* ---------------------------------------------------------------------- */
11564   /*       IF THE LAST COMPLETED LOCAL CHECKPOINT IS VALID AND LARGER THAN  */
11565   /*       THE LAST COMPLETED CHECKPOINT THEN WE WILL INVALIDATE THIS LOCAL */
11566   /*       CHECKPOINT FOR THIS REPLICA.                                     */
11567   /* ---------------------------------------------------------------------- */
11568   for (i = 0; i < MAX_LCP_STORED; i++)
11569   {
11570     jam();
11571     if (readReplicaPtr.p->lcpStatus[i] == ZVALID &&
11572         readReplicaPtr.p->lcpId[i] > SYSFILE->latestLCP_ID)
11573     {
11574       jam();
11575       readReplicaPtr.p->lcpStatus[i] = ZINVALID;
11576     }
11577   }
11578 
11579   /* ---------------------------------------------------------------------- */
11580   /*       WE ALSO HAVE TO INVALIDATE ANY LOCAL CHECKPOINTS THAT HAVE BEEN  */
11581   /*       INVALIDATED BY MOVING BACK THE RESTART GCI.                      */
11582   /* ---------------------------------------------------------------------- */
11583   Uint32 lastCompletedGCI = SYSFILE->newestRestorableGCI;
11584   for (i = 0; i < MAX_LCP_STORED; i++)
11585   {
11586     jam();
11587     if (readReplicaPtr.p->lcpStatus[i] == ZVALID &&
11588         readReplicaPtr.p->maxGciStarted[i] > lastCompletedGCI)
11589     {
11590       jam();
11591       readReplicaPtr.p->lcpStatus[i] = ZINVALID;
11592     }
11593   }
11594 
11595   /* ---------------------------------------------------------------------- */
11596   /*       WE WILL REMOVE ANY OCCURRENCES OF REPLICAS THAT HAVE CRASHED     */
11597   /*       THAT ARE NO LONGER VALID DUE TO MOVING RESTART GCI BACKWARDS.    */
11598   /* ---------------------------------------------------------------------- */
11599   removeTooNewCrashedReplicas(readReplicaPtr, lastCompletedGCI);
11600 
11601   /**
11602    * Don't remove crashed replicas here,
11603    *   as 1) this will disable optimized NR
11604    *         if oldestRestorableGCI > GCI needed for local LCP's
11605    *      2) This is anyway done during LCP, which will be run during SR
11606    */
11607   //removeOldCrashedReplicas(readReplicaPtr);
11608 
11609   /* ---------------------------------------------------------------------- */
11610   /*       FIND PROCESSOR RECORD                                            */
11611   /* ---------------------------------------------------------------------- */
11612 }
11613 
11614 void
resetReplicaLcp(ReplicaRecord * replicaP,Uint32 stopGci)11615 Dbdih::resetReplicaLcp(ReplicaRecord * replicaP, Uint32 stopGci){
11616 
11617   Uint32 lcpNo = replicaP->nextLcp;
11618   const Uint32 startLcpNo = lcpNo;
11619   do {
11620     lcpNo = prevLcpNo(lcpNo);
11621     ndbrequire(lcpNo < MAX_LCP_STORED);
11622     if (replicaP->lcpStatus[lcpNo] == ZVALID)
11623     {
11624       if (replicaP->maxGciStarted[lcpNo] <= stopGci)
11625       {
11626         jam();
11627 	/* ----------------------------------------------------------------- */
11628 	/*   WE HAVE FOUND A USEFUL LOCAL CHECKPOINT THAT CAN BE USED FOR    */
11629 	/*   RESTARTING THIS FRAGMENT REPLICA.                               */
11630 	/* ----------------------------------------------------------------- */
11631         return ;
11632       }//if
11633     }//if
11634 
11635     /**
11636      * WE COULD  NOT USE THIS LOCAL CHECKPOINT. IT WAS TOO
11637      * RECENT OR SIMPLY NOT A VALID CHECKPOINT.
11638      * WE SHOULD THUS REMOVE THIS LOCAL CHECKPOINT SINCE IT WILL NEVER
11639      * AGAIN BE USED. SET LCP_STATUS TO INVALID.
11640      */
11641     replicaP->nextLcp = lcpNo;
11642     replicaP->lcpId[lcpNo] = 0;
11643     replicaP->lcpStatus[lcpNo] = ZINVALID;
11644   } while (lcpNo != startLcpNo);
11645 
11646   replicaP->nextLcp = 0;
11647 }
11648 
readingTableErrorLab(Signal * signal,FileRecordPtr filePtr)11649 void Dbdih::readingTableErrorLab(Signal* signal, FileRecordPtr filePtr)
11650 {
11651   TabRecordPtr tabPtr;
11652   tabPtr.i = filePtr.p->tabRef;
11653   ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
11654   /* ---------------------------------------------------------------------- */
11655   /*    READING THIS FILE FAILED. CLOSE IT AFTER RELEASING ALL PAGES.       */
11656   /* ---------------------------------------------------------------------- */
11657   ndbrequire(tabPtr.p->noPages <= NDB_ARRAY_SIZE(tabPtr.p->pageRef));
11658   for (Uint32 i = 0; i < tabPtr.p->noPages; i++) {
11659     jam();
11660     releasePage(tabPtr.p->pageRef[i]);
11661   }//for
11662   closeFile(signal, filePtr);
11663   filePtr.p->reqStatus = FileRecord::CLOSING_TABLE_CRASH;
11664   return;
11665 }//Dbdih::readingTableErrorLab()
11666 
closingTableCrashLab(Signal * signal,FileRecordPtr filePtr)11667 void Dbdih::closingTableCrashLab(Signal* signal, FileRecordPtr filePtr)
11668 {
11669   TabRecordPtr tabPtr;
11670   /* ---------------------------------------------------------------------- */
11671   /*    WE HAVE NOW CLOSED A FILE WHICH WE HAD A READ ERROR WITH. PROCEED   */
11672   /*    WITH NEXT FILE IF NOT THE LAST OTHERWISE REPORT ERROR.              */
11673   /* ---------------------------------------------------------------------- */
11674   tabPtr.i = filePtr.p->tabRef;
11675   ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
11676   ndbrequire(filePtr.i == tabPtr.p->tabFile[0]);
11677   filePtr.i = tabPtr.p->tabFile[1];
11678   ptrCheckGuard(filePtr, cfileFileSize, fileRecord);
11679   openFileRw(signal, filePtr);
11680   filePtr.p->reqStatus = FileRecord::OPENING_TABLE;
11681 }//Dbdih::closingTableCrashLab()
11682 
11683 /*****************************************************************************/
11684 /* **********     COPY TABLE MODULE                              *************/
11685 /*****************************************************************************/
execCOPY_TABREQ(Signal * signal)11686 void Dbdih::execCOPY_TABREQ(Signal* signal)
11687 {
11688   CRASH_INSERTION(7172);
11689 
11690   TabRecordPtr tabPtr;
11691   PageRecordPtr pagePtr;
11692   jamEntry();
11693   BlockReference ref = signal->theData[0];
11694   Uint32 reqinfo = signal->theData[1];
11695   tabPtr.i = signal->theData[2];
11696   Uint32 schemaVersion = signal->theData[3];
11697   Uint32 noOfWords = signal->theData[4];
11698   ndbrequire(ref == cmasterdihref);
11699   ndbrequire(!isMaster());
11700   ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
11701   if (reqinfo == 1) {
11702     jam();
11703     tabPtr.p->schemaVersion = schemaVersion;
11704     initTableFile(tabPtr);
11705   }//if
11706   ndbrequire(tabPtr.p->noPages < NDB_ARRAY_SIZE(tabPtr.p->pageRef));
11707   if (tabPtr.p->noOfWords == 0) {
11708     jam();
11709     allocpage(pagePtr);
11710     tabPtr.p->pageRef[tabPtr.p->noPages] = pagePtr.i;
11711     tabPtr.p->noPages++;
11712   } else {
11713     jam();
11714     pagePtr.i = tabPtr.p->pageRef[tabPtr.p->noPages - 1];
11715     ptrCheckGuard(pagePtr, cpageFileSize, pageRecord);
11716   }//if
11717   ndbrequire(tabPtr.p->noOfWords + 15 < 2048);
11718   ndbrequire(tabPtr.p->noOfWords < 2048);
11719   MEMCOPY_NO_WORDS(&pagePtr.p->word[tabPtr.p->noOfWords], &signal->theData[5], 16);
11720   tabPtr.p->noOfWords += 16;
11721   if (tabPtr.p->noOfWords == 2048) {
11722     jam();
11723     tabPtr.p->noOfWords = 0;
11724   }//if
11725   if (noOfWords > 16) {
11726     jam();
11727     return;
11728   }//if
11729   tabPtr.p->noOfWords = 0;
11730   ndbrequire(tabPtr.p->tabCopyStatus == TabRecord::CS_IDLE);
11731   tabPtr.p->tabCopyStatus = TabRecord::CS_COPY_TAB_REQ;
11732   signal->theData[0] = DihContinueB::ZREAD_PAGES_INTO_TABLE;
11733   signal->theData[1] = tabPtr.i;
11734   sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
11735 }//Dbdih::execCOPY_TABREQ()
11736 
11737 void
copyTabReq_complete(Signal * signal,TabRecordPtr tabPtr)11738 Dbdih::copyTabReq_complete(Signal* signal, TabRecordPtr tabPtr){
11739   if (!isMaster()) {
11740     jam();
11741     //----------------------------------------------------------------------------
11742     // In this particular case we do not release table pages if we are master. The
11743     // reason is that the master could still be sending the table info to another
11744     // node.
11745     //----------------------------------------------------------------------------
11746     releaseTabPages(tabPtr.i);
11747     tabPtr.p->tabStatus = TabRecord::TS_ACTIVE;
11748     for (Uint32 fragId = 0; fragId < tabPtr.p->totalfragments; fragId++) {
11749       jam();
11750       FragmentstorePtr fragPtr;
11751       getFragstore(tabPtr.p, fragId, fragPtr);
11752       updateNodeInfo(fragPtr);
11753     }//for
11754   }//if
11755   signal->theData[0] = cownNodeId;
11756   signal->theData[1] = tabPtr.i;
11757   sendSignal(cmasterdihref, GSN_COPY_TABCONF, signal, 2, JBB);
11758 }
11759 
11760 /*****************************************************************************/
11761 /* ******  READ FROM A NUMBER OF PAGES INTO THE TABLE DATA STRUCTURES ********/
11762 /*****************************************************************************/
readPagesIntoTableLab(Signal * signal,Uint32 tableId)11763 void Dbdih::readPagesIntoTableLab(Signal* signal, Uint32 tableId)
11764 {
11765   RWFragment rf;
11766   rf.wordIndex = 35;
11767   rf.pageIndex = 0;
11768   rf.rwfTabPtr.i = tableId;
11769   ptrCheckGuard(rf.rwfTabPtr, ctabFileSize, tabRecord);
11770   rf.rwfPageptr.i = rf.rwfTabPtr.p->pageRef[0];
11771   ptrCheckGuard(rf.rwfPageptr, cpageFileSize, pageRecord);
11772   rf.rwfTabPtr.p->totalfragments = readPageWord(&rf);
11773   rf.rwfTabPtr.p->noOfBackups = readPageWord(&rf);
11774   rf.rwfTabPtr.p->hashpointer = readPageWord(&rf);
11775   rf.rwfTabPtr.p->kvalue = readPageWord(&rf);
11776   rf.rwfTabPtr.p->mask = readPageWord(&rf);
11777   rf.rwfTabPtr.p->method = (TabRecord::Method)readPageWord(&rf);
11778   /* ------------- */
11779   /* Type of table */
11780   /* ------------- */
11781   rf.rwfTabPtr.p->tabStorage = (TabRecord::Storage)(readPageWord(&rf));
11782 
11783   Uint32 noOfFrags = rf.rwfTabPtr.p->totalfragments;
11784   ndbrequire(noOfFrags > 0);
11785   ndbrequire((noOfFrags * (rf.rwfTabPtr.p->noOfBackups + 1)) <= cnoFreeReplicaRec);
11786   allocFragments(noOfFrags, rf.rwfTabPtr);
11787 
11788   signal->theData[0] = DihContinueB::ZREAD_PAGES_INTO_FRAG;
11789   signal->theData[1] = rf.rwfTabPtr.i;
11790   signal->theData[2] = 0;
11791   signal->theData[3] = rf.pageIndex;
11792   signal->theData[4] = rf.wordIndex;
11793   sendSignal(reference(), GSN_CONTINUEB, signal, 5, JBB);
11794   return;
11795 }//Dbdih::readPagesIntoTableLab()
11796 
readPagesIntoFragLab(Signal * signal,RWFragment * rf)11797 void Dbdih::readPagesIntoFragLab(Signal* signal, RWFragment* rf)
11798 {
11799   ndbrequire(rf->pageIndex < NDB_ARRAY_SIZE(rf->rwfTabPtr.p->pageRef));
11800   rf->rwfPageptr.i = rf->rwfTabPtr.p->pageRef[rf->pageIndex];
11801   ptrCheckGuard(rf->rwfPageptr, cpageFileSize, pageRecord);
11802   FragmentstorePtr fragPtr;
11803   getFragstore(rf->rwfTabPtr.p, rf->fragId, fragPtr);
11804   readFragment(rf, fragPtr);
11805   readReplicas(rf, fragPtr);
11806   rf->fragId++;
11807   if (rf->fragId == rf->rwfTabPtr.p->totalfragments) {
11808     jam();
11809     switch (rf->rwfTabPtr.p->tabCopyStatus) {
11810     case TabRecord::CS_SR_PHASE1_READ_PAGES:
11811       jam();
11812       releaseTabPages(rf->rwfTabPtr.i);
11813       rf->rwfTabPtr.p->tabCopyStatus = TabRecord::CS_IDLE;
11814       signal->theData[0] = DihContinueB::ZREAD_TABLE_FROM_PAGES;
11815       signal->theData[1] = rf->rwfTabPtr.i;
11816       sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
11817       return;
11818       break;
11819     case TabRecord::CS_COPY_TAB_REQ:
11820       jam();
11821       rf->rwfTabPtr.p->tabCopyStatus = TabRecord::CS_IDLE;
11822       if (getNodeState().getSystemRestartInProgress() &&
11823           rf->rwfTabPtr.p->tabStorage == TabRecord::ST_NORMAL)
11824       {
11825         /**
11826          * avoid overwriting own table-definition...
11827          *   but this is not possible for no-logging tables
11828          */
11829 	jam();
11830 	copyTabReq_complete(signal, rf->rwfTabPtr);
11831 	return;
11832       }
11833       rf->rwfTabPtr.p->tabCopyStatus = TabRecord::CS_IDLE;
11834       rf->rwfTabPtr.p->tabUpdateState = TabRecord::US_COPY_TAB_REQ;
11835       signal->theData[0] = DihContinueB::ZTABLE_UPDATE;
11836       signal->theData[1] = rf->rwfTabPtr.i;
11837       sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
11838       return;
11839       break;
11840     default:
11841       ndbrequire(false);
11842       return;
11843       break;
11844     }//switch
11845   } else {
11846     jam();
11847     signal->theData[0] = DihContinueB::ZREAD_PAGES_INTO_FRAG;
11848     signal->theData[1] = rf->rwfTabPtr.i;
11849     signal->theData[2] = rf->fragId;
11850     signal->theData[3] = rf->pageIndex;
11851     signal->theData[4] = rf->wordIndex;
11852     sendSignal(reference(), GSN_CONTINUEB, signal, 5, JBB);
11853   }//if
11854   return;
11855 }//Dbdih::readPagesIntoFragLab()
11856 
11857 /*****************************************************************************/
11858 /*****   WRITING FROM TABLE DATA STRUCTURES INTO A SET OF PAGES         ******/
11859 // execCONTINUEB(ZPACK_TABLE_INTO_PAGES)
11860 /*****************************************************************************/
packTableIntoPagesLab(Signal * signal,Uint32 tableId)11861 void Dbdih::packTableIntoPagesLab(Signal* signal, Uint32 tableId)
11862 {
11863   RWFragment wf;
11864   TabRecordPtr tabPtr;
11865   allocpage(wf.rwfPageptr);
11866   tabPtr.i = tableId;
11867   ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
11868   tabPtr.p->pageRef[0] = wf.rwfPageptr.i;
11869   tabPtr.p->noPages = 1;
11870   wf.wordIndex = 35;
11871   wf.pageIndex = 0;
11872   Uint32 totalfragments = tabPtr.p->totalfragments;
11873   if (tabPtr.p->connectrec != RNIL)
11874   {
11875     jam();
11876     Ptr<ConnectRecord> connectPtr;
11877     connectPtr.i = tabPtr.p->connectrec;
11878     ptrCheckGuard(connectPtr, cconnectFileSize, connectRecord);
11879     ndbrequire(connectPtr.p->table == tabPtr.i);
11880     if (connectPtr.p->connectState == ConnectRecord::ALTER_TABLE)
11881     {
11882       jam();
11883       totalfragments = connectPtr.p->m_alter.m_totalfragments;
11884     }
11885   }
11886 
11887   writePageWord(&wf, totalfragments);
11888   writePageWord(&wf, tabPtr.p->noOfBackups);
11889   writePageWord(&wf, tabPtr.p->hashpointer);
11890   writePageWord(&wf, tabPtr.p->kvalue);
11891   writePageWord(&wf, tabPtr.p->mask);
11892   writePageWord(&wf, tabPtr.p->method);
11893   writePageWord(&wf, tabPtr.p->tabStorage);
11894 
11895   signal->theData[0] = DihContinueB::ZPACK_FRAG_INTO_PAGES;
11896   signal->theData[1] = tabPtr.i;
11897   signal->theData[2] = 0;
11898   signal->theData[3] = wf.pageIndex;
11899   signal->theData[4] = wf.wordIndex;
11900   signal->theData[5] = totalfragments;
11901   sendSignal(reference(), GSN_CONTINUEB, signal, 6, JBB);
11902 }//Dbdih::packTableIntoPagesLab()
11903 
11904 /*****************************************************************************/
11905 // execCONTINUEB(ZPACK_FRAG_INTO_PAGES)
11906 /*****************************************************************************/
packFragIntoPagesLab(Signal * signal,RWFragment * wf)11907 void Dbdih::packFragIntoPagesLab(Signal* signal, RWFragment* wf)
11908 {
11909   ndbrequire(wf->pageIndex < NDB_ARRAY_SIZE(wf->rwfTabPtr.p->pageRef));
11910   wf->rwfPageptr.i = wf->rwfTabPtr.p->pageRef[wf->pageIndex];
11911   ptrCheckGuard(wf->rwfPageptr, cpageFileSize, pageRecord);
11912   FragmentstorePtr fragPtr;
11913   getFragstore(wf->rwfTabPtr.p, wf->fragId, fragPtr);
11914   writeFragment(wf, fragPtr);
11915   writeReplicas(wf, fragPtr.p->storedReplicas);
11916   writeReplicas(wf, fragPtr.p->oldStoredReplicas);
11917   wf->fragId++;
11918   if (wf->fragId == wf->totalfragments) {
11919     jam();
11920     PageRecordPtr pagePtr;
11921     pagePtr.i = wf->rwfTabPtr.p->pageRef[0];
11922     ptrCheckGuard(pagePtr, cpageFileSize, pageRecord);
11923     pagePtr.p->word[33] = wf->rwfTabPtr.p->noPages;
11924     pagePtr.p->word[34] = ((wf->rwfTabPtr.p->noPages - 1) * 2048) + wf->wordIndex;
11925     switch (wf->rwfTabPtr.p->tabCopyStatus) {
11926     case TabRecord::CS_SR_PHASE2_READ_TABLE:
11927       /* -------------------------------------------------------------------*/
11928       // We are performing a system restart and we are now ready to copy the
11929       // table from this node (the master) to all other nodes.
11930       /* -------------------------------------------------------------------*/
11931       jam();
11932       wf->rwfTabPtr.p->tabCopyStatus = TabRecord::CS_IDLE;
11933       signal->theData[0] = DihContinueB::ZSR_PHASE2_READ_TABLE;
11934       signal->theData[1] = wf->rwfTabPtr.i;
11935       sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
11936       return;
11937       break;
11938     case TabRecord::CS_COPY_NODE_STATE:
11939       jam();
11940       tableCopyNodeLab(signal, wf->rwfTabPtr);
11941       return;
11942       break;
11943     case TabRecord::CS_LCP_READ_TABLE:
11944       jam();
11945       signal->theData[0] = DihContinueB::ZTABLE_UPDATE;
11946       signal->theData[1] = wf->rwfTabPtr.i;
11947       sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
11948       return;
11949       break;
11950     case TabRecord::CS_REMOVE_NODE:
11951     case TabRecord::CS_INVALIDATE_NODE_LCP:
11952       jam();
11953       signal->theData[0] = DihContinueB::ZTABLE_UPDATE;
11954       signal->theData[1] = wf->rwfTabPtr.i;
11955       sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
11956       return;
11957       break;
11958     case TabRecord::CS_ADD_TABLE_MASTER:
11959       jam();
11960       wf->rwfTabPtr.p->tabCopyStatus = TabRecord::CS_IDLE;
11961       signal->theData[0] = DihContinueB::ZADD_TABLE_MASTER_PAGES;
11962       signal->theData[1] = wf->rwfTabPtr.i;
11963       sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
11964       return;
11965       break;
11966     case TabRecord::CS_ADD_TABLE_SLAVE:
11967       jam();
11968       wf->rwfTabPtr.p->tabCopyStatus = TabRecord::CS_IDLE;
11969       signal->theData[0] = DihContinueB::ZADD_TABLE_SLAVE_PAGES;
11970       signal->theData[1] = wf->rwfTabPtr.i;
11971       sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
11972       return;
11973     case TabRecord::CS_COPY_TO_SAVE:
11974       signal->theData[0] = DihContinueB::ZTABLE_UPDATE;
11975       signal->theData[1] = wf->rwfTabPtr.i;
11976       sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
11977       return;
11978     case TabRecord::CS_GET_TABINFO:
11979       jam();
11980       signal->theData[0] = DihContinueB::ZGET_TABINFO_SEND;
11981       signal->theData[1] = wf->rwfTabPtr.i;
11982       sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
11983       return;
11984     default:
11985       ndbrequire(false);
11986       return;
11987       break;
11988     }//switch
11989   } else {
11990     jam();
11991     signal->theData[0] = DihContinueB::ZPACK_FRAG_INTO_PAGES;
11992     signal->theData[1] = wf->rwfTabPtr.i;
11993     signal->theData[2] = wf->fragId;
11994     signal->theData[3] = wf->pageIndex;
11995     signal->theData[4] = wf->wordIndex;
11996     signal->theData[5] = wf->totalfragments;
11997     sendSignal(reference(), GSN_CONTINUEB, signal, 6, JBB);
11998   }//if
11999   return;
12000 }//Dbdih::packFragIntoPagesLab()
12001 
12002 /*****************************************************************************/
12003 /* **********     START FRAGMENT MODULE                          *************/
12004 /*****************************************************************************/
12005 void
dump_replica_info()12006 Dbdih::dump_replica_info()
12007 {
12008   TabRecordPtr tabPtr;
12009   FragmentstorePtr fragPtr;
12010 
12011   for(tabPtr.i = 0; tabPtr.i < ctabFileSize; tabPtr.i++)
12012   {
12013     ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
12014     if (tabPtr.p->tabStatus != TabRecord::TS_ACTIVE)
12015       continue;
12016 
12017     for(Uint32 fid = 0; fid<tabPtr.p->totalfragments; fid++)
12018     {
12019       getFragstore(tabPtr.p, fid, fragPtr);
12020       ndbout_c("tab: %d frag: %d gci: %d\n",
12021 	       tabPtr.i, fid, SYSFILE->newestRestorableGCI);
12022 
12023       dump_replica_info(fragPtr.p);
12024     }
12025   }
12026 }
12027 
12028 void
dump_replica_info(const Fragmentstore * fragPtrP)12029 Dbdih::dump_replica_info(const Fragmentstore* fragPtrP)
12030 {
12031   ndbout_c("  -- storedReplicas: ");
12032   Uint32 i;
12033   ReplicaRecordPtr replicaPtr;
12034   replicaPtr.i = fragPtrP->storedReplicas;
12035   for(; replicaPtr.i != RNIL; replicaPtr.i = replicaPtr.p->nextReplica)
12036   {
12037     ptrCheckGuard(replicaPtr, creplicaFileSize, replicaRecord);
12038     ndbout_c("  node: %d initialGci: %d nextLcp: %d noCrashedReplicas: %d",
12039              replicaPtr.p->procNode,
12040              replicaPtr.p->initialGci,
12041              replicaPtr.p->nextLcp,
12042              replicaPtr.p->noCrashedReplicas);
12043     for(i = 0; i<MAX_LCP_STORED; i++)
12044     {
12045       ndbout_c("    i: %d %s : lcpId: %d maxGci Completed: %d Started: %d",
12046                i,
12047                (replicaPtr.p->lcpStatus[i] == ZVALID ?"VALID":"INVALID"),
12048                replicaPtr.p->lcpId[i],
12049                replicaPtr.p->maxGciCompleted[i],
12050                replicaPtr.p->maxGciStarted[i]);
12051     }
12052 
12053     for (i = 0; i < 8; i++)
12054     {
12055       ndbout_c("    crashed replica: %d replicaLastGci: %d createGci: %d",
12056                i,
12057                replicaPtr.p->replicaLastGci[i],
12058                replicaPtr.p->createGci[i]);
12059     }
12060   }
12061   ndbout_c("  -- oldStoredReplicas");
12062   replicaPtr.i = fragPtrP->oldStoredReplicas;
12063   for(; replicaPtr.i != RNIL; replicaPtr.i = replicaPtr.p->nextReplica)
12064   {
12065     ptrCheckGuard(replicaPtr, creplicaFileSize, replicaRecord);
12066     ndbout_c("  node: %d initialGci: %d nextLcp: %d noCrashedReplicas: %d",
12067              replicaPtr.p->procNode,
12068              replicaPtr.p->initialGci,
12069              replicaPtr.p->nextLcp,
12070              replicaPtr.p->noCrashedReplicas);
12071     for(i = 0; i<MAX_LCP_STORED; i++)
12072     {
12073       ndbout_c("    i: %d %s : lcpId: %d maxGci Completed: %d Started: %d",
12074                i,
12075                (replicaPtr.p->lcpStatus[i] == ZVALID ?"VALID":"INVALID"),
12076                replicaPtr.p->lcpId[i],
12077                replicaPtr.p->maxGciCompleted[i],
12078                replicaPtr.p->maxGciStarted[i]);
12079     }
12080 
12081     for (i = 0; i < 8; i++)
12082     {
12083       ndbout_c("    crashed replica: %d replicaLastGci: %d createGci: %d",
12084                i,
12085                replicaPtr.p->replicaLastGci[i],
12086                replicaPtr.p->createGci[i]);
12087     }
12088   }
12089 }
12090 
startFragment(Signal * signal,Uint32 tableId,Uint32 fragId)12091 void Dbdih::startFragment(Signal* signal, Uint32 tableId, Uint32 fragId)
12092 {
12093   Uint32 TloopCount = 0;
12094   TabRecordPtr tabPtr;
12095   while (true) {
12096     if (TloopCount > 100) {
12097       jam();
12098       signal->theData[0] = DihContinueB::ZSTART_FRAGMENT;
12099       signal->theData[1] = tableId;
12100       signal->theData[2] = 0;
12101       sendSignal(reference(), GSN_CONTINUEB, signal, 3, JBB);
12102       return;
12103     }
12104 
12105     if (tableId >= ctabFileSize) {
12106       jam();
12107       signal->theData[0] = DihContinueB::ZCOMPLETE_RESTART;
12108       sendSignal(reference(), GSN_CONTINUEB, signal, 1, JBB);
12109       return;
12110     }//if
12111 
12112     tabPtr.i = tableId;
12113     ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
12114     if (tabPtr.p->tabStatus != TabRecord::TS_ACTIVE){
12115       jam();
12116       TloopCount++;
12117       tableId++;
12118       fragId = 0;
12119       continue;
12120     }
12121 
12122     if(tabPtr.p->tabStorage != TabRecord::ST_NORMAL){
12123       jam();
12124       TloopCount++;
12125       tableId++;
12126       fragId = 0;
12127       continue;
12128     }
12129 
12130     jam();
12131     break;
12132   }//while
12133 
12134   FragmentstorePtr fragPtr;
12135   getFragstore(tabPtr.p, fragId, fragPtr);
12136   /* ----------------------------------------------------------------------- */
12137   /*     WE NEED TO RESET THE REPLICA DATA STRUCTURES. THIS MEANS THAT WE    */
12138   /*     MUST REMOVE REPLICAS THAT WAS NOT STARTED AT THE GCI TO RESTORE. WE */
12139   /*     NEED TO PUT ALL STORED REPLICAS ON THE LIST OF OLD STORED REPLICAS  */
12140   /*     RESET THE NUMBER OF REPLICAS TO CREATE.                             */
12141   /* ----------------------------------------------------------------------- */
12142   cnoOfCreateReplicas = 0;
12143   /* ----------------------------------------------------------------------- */
12144   /*     WE WILL NEVER START MORE THAN FOUR FRAGMENT REPLICAS WHATEVER THE   */
12145   /*     DESIRED REPLICATION IS.                                             */
12146   /* ----------------------------------------------------------------------- */
12147   ndbrequire(tabPtr.p->noOfBackups < MAX_REPLICAS);
12148   /* ----------------------------------------------------------------------- */
12149   /*     SEARCH FOR STORED REPLICAS THAT CAN BE USED TO RESTART THE SYSTEM.  */
12150   /* ----------------------------------------------------------------------- */
12151   searchStoredReplicas(fragPtr);
12152 
12153   if (cnoOfCreateReplicas == 0) {
12154     /* --------------------------------------------------------------------- */
12155     /*   THERE WERE NO STORED REPLICAS AVAILABLE THAT CAN SERVE AS REPLICA TO*/
12156     /*   RESTART THE SYSTEM FROM. IN A LATER RELEASE WE WILL ADD             */
12157     /*   FUNCTIONALITY TO CHECK IF THERE ARE ANY STANDBY NODES THAT COULD DO */
12158     /*   THIS TASK INSTEAD IN THIS IMPLEMENTATION WE SIMPLY CRASH THE SYSTEM.*/
12159     /*   THIS WILL DECREASE THE GCI TO RESTORE WHICH HOPEFULLY WILL MAKE IT  */
12160     /*   POSSIBLE TO RESTORE THE SYSTEM.                                     */
12161     /* --------------------------------------------------------------------- */
12162     char buf[64];
12163     BaseString::snprintf(buf, sizeof(buf), "table: %d fragment: %d gci: %d",
12164 			 tableId, fragId, SYSFILE->newestRestorableGCI);
12165 
12166     ndbout_c("%s", buf);
12167     dump_replica_info();
12168 
12169     progError(__LINE__, NDBD_EXIT_NO_RESTORABLE_REPLICA, buf);
12170     ndbrequire(false);
12171     return;
12172   }//if
12173 
12174   /* ----------------------------------------------------------------------- */
12175   /*     WE HAVE CHANGED THE NODE TO BE PRIMARY REPLICA AND THE NODES TO BE  */
12176   /*     BACKUP NODES. WE MUST UPDATE THIS NODES DATA STRUCTURE SINCE WE     */
12177   /*     WILL NOT COPY THE TABLE DATA TO OURSELF.                            */
12178   /* ----------------------------------------------------------------------- */
12179   updateNodeInfo(fragPtr);
12180   /* ----------------------------------------------------------------------- */
12181   /*     NOW WE HAVE COLLECTED ALL THE REPLICAS WE COULD GET. WE WILL NOW    */
12182   /*     RESTART THE FRAGMENT REPLICAS WE HAVE FOUND IRRESPECTIVE OF IF THERE*/
12183   /*     ARE ENOUGH ACCORDING TO THE DESIRED REPLICATION.                    */
12184   /* ----------------------------------------------------------------------- */
12185   /*     WE START BY SENDING ADD_FRAGREQ FOR THOSE REPLICAS THAT NEED IT.    */
12186   /* ----------------------------------------------------------------------- */
12187   CreateReplicaRecordPtr createReplicaPtr;
12188   for (createReplicaPtr.i = 0;
12189        createReplicaPtr.i < cnoOfCreateReplicas;
12190        createReplicaPtr.i++) {
12191     jam();
12192     ptrCheckGuard(createReplicaPtr, 4, createReplicaRecord);
12193   }//for
12194 
12195   sendStartFragreq(signal, tabPtr, fragId);
12196 
12197   /**
12198    * Don't wait for START_FRAGCONF
12199    */
12200   fragId++;
12201   if (fragId >= tabPtr.p->totalfragments) {
12202     jam();
12203     tabPtr.i++;
12204     fragId = 0;
12205   }//if
12206   signal->theData[0] = DihContinueB::ZSTART_FRAGMENT;
12207   signal->theData[1] = tabPtr.i;
12208   signal->theData[2] = fragId;
12209   sendSignal(reference(), GSN_CONTINUEB, signal, 3, JBB);
12210 
12211   return;
12212 }//Dbdih::startFragmentLab()
12213 
12214 
12215 /*****************************************************************************/
12216 /* **********     COMPLETE RESTART MODULE                        *************/
12217 /*****************************************************************************/
completeRestartLab(Signal * signal)12218 void Dbdih::completeRestartLab(Signal* signal)
12219 {
12220   sendLoopMacro(START_RECREQ, sendSTART_RECREQ, RNIL);
12221 }//completeRestartLab()
12222 
12223 /* ------------------------------------------------------------------------- */
12224 //       SYSTEM RESTART:
12225 /*         A NODE HAS COMPLETED RESTORING ALL DATABASE FRAGMENTS.            */
12226 //       NODE RESTART:
12227 //         THE STARTING NODE HAS PREPARED ITS LOG FILES TO ENABLE EXECUTION
12228 //         OF TRANSACTIONS.
12229 // Precondition:
12230 //   This signal must be received by the master node.
12231 /* ------------------------------------------------------------------------- */
execSTART_RECCONF(Signal * signal)12232 void Dbdih::execSTART_RECCONF(Signal* signal)
12233 {
12234   jamEntry();
12235   Uint32 senderNodeId = signal->theData[0];
12236   Uint32 senderData = signal->theData[1];
12237 
12238   if (senderData != RNIL)
12239   {
12240     /**
12241      * This is node restart
12242      */
12243     Ptr<TakeOverRecord> takeOverPtr;
12244     c_takeOverPool.getPtr(takeOverPtr, senderData);
12245     sendStartTo(signal, takeOverPtr);
12246     return;
12247   }
12248 
12249   /* --------------------------------------------------------------------- */
12250   // This was the system restart case. We set the state indicating that the
12251   // node has completed restoration of all fragments.
12252   /* --------------------------------------------------------------------- */
12253   receiveLoopMacro(START_RECREQ, senderNodeId);
12254 
12255   /**
12256    * Remove each node that has to TO from LCP/LQH
12257    */
12258   Uint32 i = 0;
12259   while ((i = m_to_nodes.find(i + 1)) != NdbNodeBitmask::NotFound)
12260   {
12261     jam();
12262     NodeRecordPtr nodePtr;
12263     nodePtr.i = i;
12264     ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
12265     nodePtr.p->copyCompleted = 0;
12266   }
12267 
12268   if (m_to_nodes.get(getOwnNodeId()))
12269   {
12270     /**
12271      * We (master) needs take-over
12272      *   run this directly to avoid strange confusion
12273      */
12274     jam();
12275     c_sr_wait_to = true;
12276   }
12277 
12278   if (!m_to_nodes.isclear() && c_sr_wait_to)
12279   {
12280     jam();
12281 
12282     StartCopyReq* req = (StartCopyReq*)signal->getDataPtrSend();
12283     req->senderRef = reference();
12284     req->senderData = getOwnNodeId();
12285     req->flags = 0; // Note dont wait for LCP
12286 
12287     i = 0;
12288     while ((i = m_to_nodes.find(i + 1)) != NdbNodeBitmask::NotFound)
12289     {
12290       jam();
12291       req->startingNodeId = i;
12292       sendSignal(calcDihBlockRef(i), GSN_START_COPYREQ, signal,
12293                  StartCopyReq::SignalLength, JBB);
12294     }
12295 
12296     char buf[100];
12297     infoEvent("Starting take-over of %s", m_to_nodes.getText(buf));
12298     return;
12299   }
12300 
12301   signal->theData[0] = reference();
12302   m_sr_nodes.copyto(NdbNodeBitmask::Size, signal->theData+1);
12303   sendSignal(cntrlblockref, GSN_NDB_STARTCONF, signal,
12304              1 + NdbNodeBitmask::Size, JBB);
12305 }//Dbdih::execSTART_RECCONF()
12306 
copyNodeLab(Signal * signal,Uint32 tableId)12307 void Dbdih::copyNodeLab(Signal* signal, Uint32 tableId)
12308 {
12309   /* ----------------------------------------------------------------------- */
12310   // This code is executed by the master to assist a node restart in receiving
12311   // the data in the master.
12312   /* ----------------------------------------------------------------------- */
12313   Uint32 TloopCount = 0;
12314 
12315   if (!c_nodeStartMaster.activeState) {
12316     jam();
12317     /* --------------------------------------------------------------------- */
12318     // Obviously the node crashed in the middle of its node restart. We will
12319     // stop this process simply by returning after resetting the wait indicator.
12320     /* ---------------------------------------------------------------------- */
12321     c_nodeStartMaster.wait = ZFALSE;
12322     return;
12323   }//if
12324   TabRecordPtr tabPtr;
12325   tabPtr.i = tableId;
12326   while (tabPtr.i < ctabFileSize) {
12327     ptrAss(tabPtr, tabRecord);
12328     if (tabPtr.p->tabStatus == TabRecord::TS_ACTIVE)
12329     {
12330       /* -------------------------------------------------------------------- */
12331       // The table is defined. We will start by packing the table into pages.
12332       // The tabCopyStatus indicates to the CONTINUEB(ZPACK_TABLE_INTO_PAGES)
12333       // who called it. After packing the table into page(s) it will be sent to
12334       // the starting node by COPY_TABREQ signals. After returning from the
12335       // starting node we will return to this subroutine and continue
12336       // with the next table.
12337       /* -------------------------------------------------------------------- */
12338       if (! (tabPtr.p->tabCopyStatus == TabRecord::CS_IDLE))
12339       {
12340         jam();
12341         signal->theData[0] = DihContinueB::ZCOPY_NODE;
12342         signal->theData[1] = tabPtr.i;
12343         sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 100, 2);
12344         return;
12345       }
12346       ndbrequire(tabPtr.p->tabCopyStatus == TabRecord::CS_IDLE);
12347       tabPtr.p->tabCopyStatus = TabRecord::CS_COPY_NODE_STATE;
12348       signal->theData[0] = DihContinueB::ZPACK_TABLE_INTO_PAGES;
12349       signal->theData[1] = tabPtr.i;
12350       sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
12351       return;
12352     } else {
12353       jam();
12354       if (TloopCount > 100) {
12355 	/* ------------------------------------------------------------------ */
12356 	// Introduce real-time break after looping through 100 not copied tables
12357 	/* ----------------------------------------------------------------- */
12358         jam();
12359         signal->theData[0] = DihContinueB::ZCOPY_NODE;
12360         signal->theData[1] = tabPtr.i + 1;
12361         sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
12362         return;
12363       } else {
12364         jam();
12365         TloopCount++;
12366         tabPtr.i++;
12367       }//if
12368     }//if
12369   }//while
12370   dihCopyCompletedLab(signal);
12371   return;
12372 }//Dbdih::copyNodeLab()
12373 
tableCopyNodeLab(Signal * signal,TabRecordPtr tabPtr)12374 void Dbdih::tableCopyNodeLab(Signal* signal, TabRecordPtr tabPtr)
12375 {
12376   /* ----------------------------------------------------------------------- */
12377   /*       COPY PAGES READ TO STARTING NODE.                                 */
12378   /* ----------------------------------------------------------------------- */
12379   if (!c_nodeStartMaster.activeState) {
12380     jam();
12381     releaseTabPages(tabPtr.i);
12382     c_nodeStartMaster.wait = ZFALSE;
12383     return;
12384   }//if
12385   NodeRecordPtr copyNodePtr;
12386   PageRecordPtr pagePtr;
12387   copyNodePtr.i = c_nodeStartMaster.startNode;
12388   ptrCheckGuard(copyNodePtr, MAX_NDB_NODES, nodeRecord);
12389 
12390   copyNodePtr.p->activeTabptr = tabPtr.i;
12391   pagePtr.i = tabPtr.p->pageRef[0];
12392   ptrCheckGuard(pagePtr, cpageFileSize, pageRecord);
12393 
12394   signal->theData[0] = DihContinueB::ZCOPY_TABLE_NODE;
12395   signal->theData[1] = tabPtr.i;
12396   signal->theData[2] = copyNodePtr.i;
12397   signal->theData[3] = 0;
12398   signal->theData[4] = 0;
12399   signal->theData[5] = pagePtr.p->word[34];
12400   sendSignal(reference(), GSN_CONTINUEB, signal, 6, JBB);
12401 }//Dbdih::tableCopyNodeLab()
12402 
12403 /* ------------------------------------------------------------------------- */
12404 // execCONTINUEB(ZCOPY_TABLE)
12405 // This routine is used to copy the table descriptions from the master to
12406 // other nodes. It is used in the system restart to copy from master to all
12407 // starting nodes.
12408 /* ------------------------------------------------------------------------- */
copyTableLab(Signal * signal,Uint32 tableId)12409 void Dbdih::copyTableLab(Signal* signal, Uint32 tableId)
12410 {
12411   TabRecordPtr tabPtr;
12412   tabPtr.i = tableId;
12413   ptrAss(tabPtr, tabRecord);
12414 
12415   ndbrequire(tabPtr.p->tabCopyStatus == TabRecord::CS_IDLE);
12416   tabPtr.p->tabCopyStatus = TabRecord::CS_SR_PHASE2_READ_TABLE;
12417   signal->theData[0] = DihContinueB::ZPACK_TABLE_INTO_PAGES;
12418   signal->theData[1] = tabPtr.i;
12419   sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
12420   return;
12421 }//Dbdih::copyTableLab()
12422 
12423 /* ------------------------------------------------------------------------- */
12424 // execCONTINUEB(ZSR_PHASE2_READ_TABLE)
12425 /* ------------------------------------------------------------------------- */
srPhase2ReadTableLab(Signal * signal,TabRecordPtr tabPtr)12426 void Dbdih::srPhase2ReadTableLab(Signal* signal, TabRecordPtr tabPtr)
12427 {
12428   /* ----------------------------------------------------------------------- */
12429   // We set the sendCOPY_TABREQState to ZACTIVE for all nodes since it is a long
12430   // process to send off all table descriptions. Thus we ensure that we do
12431   // not encounter race conditions where one node is completed before the
12432   // sending process is completed. This could lead to that we start off the
12433   // system before we actually finished all copying of table descriptions
12434   // and could lead to strange errors.
12435   /* ----------------------------------------------------------------------- */
12436 
12437   //sendLoopMacro(COPY_TABREQ, nullRoutine);
12438 
12439   breakCopyTableLab(signal, tabPtr, cfirstAliveNode);
12440   return;
12441 }//Dbdih::srPhase2ReadTableLab()
12442 
12443 /* ------------------------------------------------------------------------- */
12444 /*       COPY PAGES READ TO ALL NODES.                                       */
12445 /* ------------------------------------------------------------------------- */
breakCopyTableLab(Signal * signal,TabRecordPtr tabPtr,Uint32 nodeId)12446 void Dbdih::breakCopyTableLab(Signal* signal, TabRecordPtr tabPtr, Uint32 nodeId)
12447 {
12448   NodeRecordPtr nodePtr;
12449   nodePtr.i = nodeId;
12450   while (nodePtr.i != RNIL) {
12451     jam();
12452     ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
12453     if (nodePtr.i == getOwnNodeId()){
12454       jam();
12455       /* ------------------------------------------------------------------- */
12456       /* NOT NECESSARY TO COPY TO MY OWN NODE. I ALREADY HAVE THE PAGES.     */
12457       /* I DO HOWEVER NEED TO STORE THE TABLE DESCRIPTION ONTO DISK.         */
12458       /* ------------------------------------------------------------------- */
12459       /* IF WE ARE MASTER WE ONLY NEED TO SAVE THE TABLE ON DISK. WE ALREADY */
12460       /* HAVE THE TABLE DESCRIPTION IN THE DATA STRUCTURES.                  */
12461       // AFTER COMPLETING THE WRITE TO DISK THE MASTER WILL ALSO SEND
12462       // COPY_TABCONF AS ALL THE OTHER NODES.
12463       /* ------------------------------------------------------------------- */
12464       c_COPY_TABREQ_Counter.setWaitingFor(nodePtr.i);
12465       tabPtr.p->tabUpdateState = TabRecord::US_COPY_TAB_REQ;
12466       signal->theData[0] = DihContinueB::ZTABLE_UPDATE;
12467       signal->theData[1] = tabPtr.i;
12468       sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
12469       nodePtr.i = nodePtr.p->nextNode;
12470     } else {
12471       PageRecordPtr pagePtr;
12472       /* -------------------------------------------------------------------- */
12473       // RATHER THAN SENDING ALL COPY_TABREQ IN PARALLEL WE WILL SERIALISE THIS
12474       // ACTIVITY AND WILL THUS CALL breakCopyTableLab AGAIN WHEN COMPLETED THE
12475       // SENDING OF COPY_TABREQ'S.
12476       /* -------------------------------------------------------------------- */
12477       jam();
12478       tabPtr.p->tabCopyStatus = TabRecord::CS_SR_PHASE3_COPY_TABLE;
12479       pagePtr.i = tabPtr.p->pageRef[0];
12480       ptrCheckGuard(pagePtr, cpageFileSize, pageRecord);
12481       signal->theData[0] = DihContinueB::ZCOPY_TABLE_NODE;
12482       signal->theData[1] = tabPtr.i;
12483       signal->theData[2] = nodePtr.i;
12484       signal->theData[3] = 0;
12485       signal->theData[4] = 0;
12486       signal->theData[5] = pagePtr.p->word[34];
12487       sendSignal(reference(), GSN_CONTINUEB, signal, 6, JBB);
12488       return;
12489     }//if
12490   }//while
12491   /* ----------------------------------------------------------------------- */
12492   /*    WE HAVE NOW SENT THE TABLE PAGES TO ALL NODES. EXIT AND WAIT FOR ALL */
12493   /*    REPLIES.                                                             */
12494   /* ----------------------------------------------------------------------- */
12495   return;
12496 }//Dbdih::breakCopyTableLab()
12497 
12498 /* ------------------------------------------------------------------------- */
12499 // execCONTINUEB(ZCOPY_TABLE_NODE)
12500 /* ------------------------------------------------------------------------- */
copyTableNode(Signal * signal,CopyTableNode * ctn,NodeRecordPtr nodePtr)12501 void Dbdih::copyTableNode(Signal* signal,
12502 			  CopyTableNode* ctn, NodeRecordPtr nodePtr)
12503 {
12504   if (getNodeState().startLevel >= NodeState::SL_STARTED){
12505     /* --------------------------------------------------------------------- */
12506     // We are in the process of performing a node restart and are copying a
12507     // table description to a starting node. We will check that no nodes have
12508     // crashed in this process.
12509     /* --------------------------------------------------------------------- */
12510     if (!c_nodeStartMaster.activeState) {
12511       jam();
12512       /** ------------------------------------------------------------------
12513        * The starting node crashed. We will release table pages and stop this
12514        * copy process and allow new node restarts to start.
12515        * ------------------------------------------------------------------ */
12516       releaseTabPages(ctn->ctnTabPtr.i);
12517       c_nodeStartMaster.wait = ZFALSE;
12518       return;
12519     }//if
12520   }//if
12521   ndbrequire(ctn->pageIndex < NDB_ARRAY_SIZE(ctn->ctnTabPtr.p->pageRef));
12522   ctn->ctnPageptr.i = ctn->ctnTabPtr.p->pageRef[ctn->pageIndex];
12523   ptrCheckGuard(ctn->ctnPageptr, cpageFileSize, pageRecord);
12524   /**
12525    * If first page & firstWord reqinfo = 1 (first signal)
12526    */
12527   Uint32 reqinfo = (ctn->pageIndex == 0) && (ctn->wordIndex == 0);
12528   if(reqinfo == 1){
12529     c_COPY_TABREQ_Counter.setWaitingFor(nodePtr.i);
12530   }
12531 
12532   for (Uint32 i = 0; i < 16; i++) {
12533     jam();
12534     sendCopyTable(signal, ctn, calcDihBlockRef(nodePtr.i), reqinfo);
12535     reqinfo = 0;
12536     if (ctn->noOfWords <= 16) {
12537       jam();
12538       switch (ctn->ctnTabPtr.p->tabCopyStatus) {
12539       case TabRecord::CS_SR_PHASE3_COPY_TABLE:
12540 	/* ------------------------------------------------------------------ */
12541 	// We have copied the table description to this node.
12542 	// We will now proceed
12543 	// with sending the table description to the next node in the node list.
12544 	/* ------------------------------------------------------------------ */
12545         jam();
12546         ctn->ctnTabPtr.p->tabCopyStatus = TabRecord::CS_IDLE;
12547         breakCopyTableLab(signal, ctn->ctnTabPtr, nodePtr.p->nextNode);
12548         return;
12549         break;
12550       case TabRecord::CS_COPY_NODE_STATE:
12551         jam();
12552         ctn->ctnTabPtr.p->tabCopyStatus = TabRecord::CS_IDLE;
12553         return;
12554         break;
12555       default:
12556         ndbrequire(false);
12557         break;
12558       }//switch
12559     } else {
12560       jam();
12561       ctn->wordIndex += 16;
12562       if (ctn->wordIndex == 2048) {
12563         jam();
12564         ctn->wordIndex = 0;
12565         ctn->pageIndex++;
12566         ndbrequire(ctn->pageIndex < NDB_ARRAY_SIZE(ctn->ctnTabPtr.p->pageRef));
12567         ctn->ctnPageptr.i = ctn->ctnTabPtr.p->pageRef[ctn->pageIndex];
12568         ptrCheckGuard(ctn->ctnPageptr, cpageFileSize, pageRecord);
12569       }//if
12570       ctn->noOfWords -= 16;
12571     }//if
12572   }//for
12573   signal->theData[0] = DihContinueB::ZCOPY_TABLE_NODE;
12574   signal->theData[1] = ctn->ctnTabPtr.i;
12575   signal->theData[2] = nodePtr.i;
12576   signal->theData[3] = ctn->pageIndex;
12577   signal->theData[4] = ctn->wordIndex;
12578   signal->theData[5] = ctn->noOfWords;
12579   sendSignal(reference(), GSN_CONTINUEB, signal, 6, JBB);
12580 }//Dbdih::copyTableNodeLab()
12581 
sendCopyTable(Signal * signal,CopyTableNode * ctn,BlockReference ref,Uint32 reqinfo)12582 void Dbdih::sendCopyTable(Signal* signal, CopyTableNode* ctn,
12583                           BlockReference ref, Uint32 reqinfo)
12584 {
12585   signal->theData[0] = reference();
12586   signal->theData[1] = reqinfo;
12587   signal->theData[2] = ctn->ctnTabPtr.i;
12588   signal->theData[3] = ctn->ctnTabPtr.p->schemaVersion;
12589   signal->theData[4] = ctn->noOfWords;
12590   ndbrequire(ctn->wordIndex + 15 < 2048);
12591   MEMCOPY_NO_WORDS(&signal->theData[5], &ctn->ctnPageptr.p->word[ctn->wordIndex], 16);
12592   sendSignal(ref, GSN_COPY_TABREQ, signal, 21, JBB);
12593 }//Dbdih::sendCopyTable()
12594 
execCOPY_TABCONF(Signal * signal)12595 void Dbdih::execCOPY_TABCONF(Signal* signal)
12596 {
12597   NodeRecordPtr nodePtr;
12598   jamEntry();
12599   nodePtr.i = signal->theData[0];
12600   Uint32 tableId = signal->theData[1];
12601   if (getNodeState().startLevel >= NodeState::SL_STARTED){
12602     /* --------------------------------------------------------------------- */
12603     // We are in the process of performing a node restart. Continue by copying
12604     // the next table to the starting node.
12605     /* --------------------------------------------------------------------- */
12606     jam();
12607     NodeRecordPtr nodePtr;
12608     nodePtr.i = signal->theData[0];
12609     ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
12610     c_COPY_TABREQ_Counter.clearWaitingFor(nodePtr.i);
12611 
12612     releaseTabPages(tableId);
12613     signal->theData[0] = DihContinueB::ZCOPY_NODE;
12614     signal->theData[1] = tableId + 1;
12615     sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
12616     return;
12617   } else {
12618     /* --------------------------------------------------------------------- */
12619     // We are in the process of performing a system restart. Check if all nodes
12620     // have saved the new table description to file and then continue with the
12621     // next table.
12622     /* --------------------------------------------------------------------- */
12623     receiveLoopMacro(COPY_TABREQ, nodePtr.i);
12624     /* --------------------------------------------------------------------- */
12625     /*   WE HAVE NOW COPIED TO ALL NODES. WE HAVE NOW COMPLETED RESTORING    */
12626     /*   THIS TABLE. CONTINUE WITH THE NEXT TABLE.                           */
12627     /*   WE NEED TO RELEASE THE PAGES IN THE TABLE IN THIS NODE HERE.        */
12628     /*   WE ALSO NEED TO CLOSE THE TABLE FILE.                               */
12629     /* --------------------------------------------------------------------- */
12630     releaseTabPages(tableId);
12631 
12632     TabRecordPtr tabPtr;
12633     tabPtr.i = tableId;
12634     ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
12635 
12636     ConnectRecordPtr connectPtr;
12637     connectPtr.i = tabPtr.p->connectrec;
12638     ptrCheckGuard(connectPtr, cconnectFileSize, connectRecord);
12639 
12640     connectPtr.p->m_alter.m_totalfragments = tabPtr.p->totalfragments;
12641     sendAddFragreq(signal, connectPtr, tabPtr, 0);
12642     return;
12643   }//if
12644 }//Dbdih::execCOPY_TABCONF()
12645 
12646 /*
12647   3.13   L O C A L   C H E C K P O I N T  (M A S T E R)
12648   ****************************************************
12649   */
12650 /*****************************************************************************/
12651 /* **********     LOCAL-CHECK-POINT-HANDLING MODULE              *************/
12652 /*****************************************************************************/
12653 /* ------------------------------------------------------------------------- */
12654 /*       IT IS TIME TO CHECK IF IT IS TIME TO START A LOCAL CHECKPOINT.      */
12655 /*       WE WILL EITHER START AFTER 1 MILLION WORDS HAVE ARRIVED OR WE WILL  */
12656 /*       EXECUTE AFTER ABOUT 16 MINUTES HAVE PASSED BY.                      */
12657 /* ------------------------------------------------------------------------- */
checkTcCounterLab(Signal * signal)12658 void Dbdih::checkTcCounterLab(Signal* signal)
12659 {
12660   CRASH_INSERTION(7009);
12661   if (c_lcpState.lcpStatus != LCP_STATUS_IDLE) {
12662     g_eventLogger->error("lcpStatus = %u"
12663                          "lcpStatusUpdatedPlace = %d",
12664                          (Uint32) c_lcpState.lcpStatus,
12665                          c_lcpState.lcpStatusUpdatedPlace);
12666     ndbrequire(false);
12667     return;
12668   }//if
12669   add_lcp_counter(&c_lcpState.ctimer, 32);
12670   if ((c_nodeStartMaster.blockLcp == true) ||
12671       (c_lcpState.lcpStopGcp >= c_newest_restorable_gci)) {
12672     jam();
12673     /* --------------------------------------------------------------------- */
12674     // No reason to start juggling the states and checking for start of LCP if
12675     // we are blocked to start an LCP anyway.
12676     // We also block LCP start if we have not completed one global checkpoints
12677     // before starting another local checkpoint.
12678     /* --------------------------------------------------------------------- */
12679     signal->theData[0] = DihContinueB::ZCHECK_TC_COUNTER;
12680     signal->theData[1] = __LINE__;
12681     sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 1 * 100, 2);
12682     return;
12683   }//if
12684   c_lcpState.setLcpStatus(LCP_TCGET, __LINE__);
12685 
12686   c_lcpState.ctcCounter = c_lcpState.ctimer;
12687   sendLoopMacro(TCGETOPSIZEREQ, sendTCGETOPSIZEREQ, RNIL);
12688 }//Dbdih::checkTcCounterLab()
12689 
checkLcpStart(Signal * signal,Uint32 lineNo)12690 void Dbdih::checkLcpStart(Signal* signal, Uint32 lineNo)
12691 {
12692   /* ----------------------------------------------------------------------- */
12693   // Verify that we are not attempting to start another instance of the LCP
12694   // when it is not alright to do so.
12695   /* ----------------------------------------------------------------------- */
12696   ndbrequire(c_lcpState.lcpStart == ZIDLE);
12697   c_lcpState.lcpStart = ZACTIVE;
12698   signal->theData[0] = DihContinueB::ZCHECK_TC_COUNTER;
12699   signal->theData[1] = lineNo;
12700   sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 1000, 2);
12701 }//Dbdih::checkLcpStart()
12702 
12703 /* ------------------------------------------------------------------------- */
12704 /*TCGETOPSIZECONF          HOW MUCH OPERATION SIZE HAVE BEEN EXECUTED BY TC  */
12705 /* ------------------------------------------------------------------------- */
execTCGETOPSIZECONF(Signal * signal)12706 void Dbdih::execTCGETOPSIZECONF(Signal* signal)
12707 {
12708   jamEntry();
12709   Uint32 senderNodeId = signal->theData[0];
12710   add_lcp_counter(&c_lcpState.ctcCounter, signal->theData[1]);
12711 
12712   receiveLoopMacro(TCGETOPSIZEREQ, senderNodeId);
12713 
12714   ndbrequire(c_lcpState.lcpStatus == LCP_TCGET);
12715   ndbrequire(c_lcpState.lcpStart == ZACTIVE);
12716   /* ----------------------------------------------------------------------- */
12717   // We are not actively starting another LCP, still we receive this signal.
12718   // This is not ok.
12719   /* ---------------------------------------------------------------------- */
12720   /*    ALL TC'S HAVE RESPONDED NOW. NOW WE WILL CHECK IF ENOUGH OPERATIONS */
12721   /*    HAVE EXECUTED TO ENABLE US TO START A NEW LOCAL CHECKPOINT.         */
12722   /*    WHILE COPYING DICTIONARY AND DISTRIBUTION INFO TO A STARTING NODE   */
12723   /*    WE WILL ALSO NOT ALLOW THE LOCAL CHECKPOINT TO PROCEED.             */
12724   /*----------------------------------------------------------------------- */
12725   if (c_lcpState.immediateLcpStart == false)
12726   {
12727     Uint64 cnt = Uint64(c_lcpState.ctcCounter);
12728     Uint64 limit = Uint64(1) << c_lcpState.clcpDelay;
12729     bool dostart = cnt >= limit;
12730     if (dostart == false || c_nodeStartMaster.blockLcp == true)
12731     {
12732       jam();
12733       c_lcpState.setLcpStatus(LCP_STATUS_IDLE, __LINE__);
12734 
12735       signal->theData[0] = DihContinueB::ZCHECK_TC_COUNTER;
12736       signal->theData[1] = __LINE__;
12737       sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 1 * 100, 2);
12738       return;
12739     }//if
12740   }//if
12741   c_lcpState.lcpStart = ZIDLE;
12742   c_lcpState.immediateLcpStart = false;
12743   /* -----------------------------------------------------------------------
12744    * Now the initial lcp is started,
12745    * we can reset the delay to its orginal value
12746    * --------------------------------------------------------------------- */
12747   CRASH_INSERTION(7010);
12748   /* ----------------------------------------------------------------------- */
12749   /*     IF MORE THAN 1 MILLION WORDS PASSED THROUGH THE TC'S THEN WE WILL   */
12750   /*     START A NEW LOCAL CHECKPOINT. CLEAR CTIMER. START CHECKPOINT        */
12751   /*     ACTIVITY BY CALCULATING THE KEEP GLOBAL CHECKPOINT.                 */
12752   // Also remember the current global checkpoint to ensure that we run at least
12753   // one global checkpoints between each local checkpoint that we start up.
12754   /* ----------------------------------------------------------------------- */
12755   c_lcpState.ctimer = 0;
12756   c_lcpState.keepGci = (Uint32)(m_micro_gcp.m_old_gci >> 32);
12757   c_lcpState.oldestRestorableGci = SYSFILE->oldestRestorableGCI;
12758 
12759   CRASH_INSERTION(7014);
12760   c_lcpState.setLcpStatus(LCP_TC_CLOPSIZE, __LINE__);
12761   sendLoopMacro(TC_CLOPSIZEREQ, sendTC_CLOPSIZEREQ, RNIL);
12762 }
12763 
execTC_CLOPSIZECONF(Signal * signal)12764 void Dbdih::execTC_CLOPSIZECONF(Signal* signal)
12765 {
12766   jamEntry();
12767   Uint32 senderNodeId = signal->theData[0];
12768   receiveLoopMacro(TC_CLOPSIZEREQ, senderNodeId);
12769 
12770   ndbrequire(c_lcpState.lcpStatus == LCP_TC_CLOPSIZE);
12771 
12772   /* ----------------------------------------------------------------------- */
12773   /*       UPDATE THE NEW LATEST LOCAL CHECKPOINT ID.                        */
12774   /* ----------------------------------------------------------------------- */
12775   cnoOfActiveTables = 0;
12776   c_lcpState.setLcpStatus(LCP_CALCULATE_KEEP_GCI, __LINE__);
12777   ndbrequire(((int)c_lcpState.oldestRestorableGci) > 0);
12778 
12779   if (ERROR_INSERTED(7011)) {
12780     signal->theData[0] = NDB_LE_LCPStoppedInCalcKeepGci;
12781     signal->theData[1] = 0;
12782     sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB);
12783     return;
12784   }//if
12785 
12786   Mutex mutex(signal, c_mutexMgr, c_fragmentInfoMutex_lcp);
12787   Callback c = { safe_cast(&Dbdih::lcpFragmentMutex_locked), 0 };
12788   ndbrequire(mutex.trylock(c, false));
12789 }
12790 
12791 void
lcpFragmentMutex_locked(Signal * signal,Uint32 senderData,Uint32 retVal)12792 Dbdih::lcpFragmentMutex_locked(Signal* signal,
12793                                Uint32 senderData,
12794                                Uint32 retVal)
12795 {
12796   jamEntry();
12797 
12798   if (retVal == UtilLockRef::LockAlreadyHeld)
12799   {
12800     jam();
12801     Mutex mutex(signal, c_mutexMgr, c_fragmentInfoMutex_lcp);
12802     mutex.release();
12803 
12804     if (senderData == 0)
12805     {
12806       jam();
12807       infoEvent("Local checkpoint blocked waiting for node-restart");
12808     }
12809 
12810     // 2* is as parameter is in seconds, and we sendSignalWithDelay 500ms
12811     if (senderData >= 2*c_lcpState.m_lcp_trylock_timeout)
12812     {
12813       jam();
12814       Callback c = { safe_cast(&Dbdih::lcpFragmentMutex_locked), 0 };
12815       ndbrequire(mutex.lock(c, false));
12816       return;
12817     }
12818     signal->theData[0] = DihContinueB::ZLCP_TRY_LOCK;
12819     signal->theData[1] = senderData + 1;
12820     sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 500, 2);
12821     return;
12822   }
12823 
12824   ndbrequire(retVal == 0);
12825 
12826   c_lcpState.m_start_time = c_current_time;
12827 
12828   setLcpActiveStatusStart(signal);
12829 
12830   c_lcpState.keepGci = m_micro_gcp.m_old_gci >> 32;
12831   c_lcpState.oldestRestorableGci = SYSFILE->oldestRestorableGCI;
12832 
12833   signal->theData[0] = DihContinueB::ZCALCULATE_KEEP_GCI;
12834   signal->theData[1] = 0;  /* TABLE ID = 0          */
12835   signal->theData[2] = 0;  /* FRAGMENT ID = 0       */
12836   sendSignal(reference(), GSN_CONTINUEB, signal, 3, JBB);
12837   return;
12838 }//Dbdih::execTCGETOPSIZECONF()
12839 
12840 /* ------------------------------------------------------------------------- */
12841 /*       WE NEED TO CALCULATE THE OLDEST GLOBAL CHECKPOINT THAT WILL BE      */
12842 /*       COMPLETELY RESTORABLE AFTER EXECUTING THIS LOCAL CHECKPOINT.        */
12843 /* ------------------------------------------------------------------------- */
calculateKeepGciLab(Signal * signal,Uint32 tableId,Uint32 fragId)12844 void Dbdih::calculateKeepGciLab(Signal* signal, Uint32 tableId, Uint32 fragId)
12845 {
12846   TabRecordPtr tabPtr;
12847   Uint32 TloopCount = 1;
12848   tabPtr.i = tableId;
12849   do {
12850     if (tabPtr.i >= ctabFileSize) {
12851       if (cnoOfActiveTables > 0) {
12852         jam();
12853         signal->theData[0] = DihContinueB::ZSTORE_NEW_LCP_ID;
12854         sendSignal(reference(), GSN_CONTINUEB, signal, 1, JBB);
12855         return;
12856       } else {
12857         jam();
12858 	/* ------------------------------------------------------------------ */
12859 	/* THERE ARE NO TABLES TO CHECKPOINT. WE STOP THE CHECKPOINT ALREADY  */
12860 	/* HERE TO AVOID STRANGE PROBLEMS LATER.                              */
12861 	/* ------------------------------------------------------------------ */
12862         c_lcpState.setLcpStatus(LCP_STATUS_IDLE, __LINE__);
12863         checkLcpStart(signal, __LINE__);
12864         return;
12865       }//if
12866     }//if
12867     ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
12868     if (tabPtr.p->tabStatus != TabRecord::TS_ACTIVE ||
12869 	tabPtr.p->tabStorage != TabRecord::ST_NORMAL) {
12870       if (TloopCount > 100) {
12871         jam();
12872         signal->theData[0] = DihContinueB::ZCALCULATE_KEEP_GCI;
12873         signal->theData[1] = tabPtr.i + 1;
12874         signal->theData[2] = 0;
12875         sendSignal(reference(), GSN_CONTINUEB, signal, 3, JBB);
12876         return;
12877       } else {
12878         jam();
12879         TloopCount++;
12880         tabPtr.i++;
12881       }//if
12882     } else {
12883       jam();
12884       TloopCount = 0;
12885     }//if
12886   } while (TloopCount != 0);
12887   cnoOfActiveTables++;
12888   FragmentstorePtr fragPtr;
12889   getFragstore(tabPtr.p, fragId, fragPtr);
12890   checkKeepGci(tabPtr, fragId, fragPtr.p, fragPtr.p->storedReplicas);
12891   checkKeepGci(tabPtr, fragId, fragPtr.p, fragPtr.p->oldStoredReplicas);
12892   fragId++;
12893   if (fragId >= tabPtr.p->totalfragments) {
12894     jam();
12895     tabPtr.i++;
12896     fragId = 0;
12897   }//if
12898   signal->theData[0] = DihContinueB::ZCALCULATE_KEEP_GCI;
12899   signal->theData[1] = tabPtr.i;
12900   signal->theData[2] = fragId;
12901   sendSignal(reference(), GSN_CONTINUEB, signal, 3, JBB);
12902   return;
12903 }//Dbdih::calculateKeepGciLab()
12904 
12905 /* ------------------------------------------------------------------------- */
12906 /*       WE NEED TO STORE ON DISK THE FACT THAT WE ARE STARTING THIS LOCAL   */
12907 /*       CHECKPOINT ROUND. THIS WILL INVALIDATE ALL THE LOCAL CHECKPOINTS    */
12908 /*       THAT WILL EVENTUALLY BE OVERWRITTEN AS PART OF THIS LOCAL CHECKPOINT*/
12909 /* ------------------------------------------------------------------------- */
storeNewLcpIdLab(Signal * signal)12910 void Dbdih::storeNewLcpIdLab(Signal* signal)
12911 {
12912   signal->theData[0] = NDB_LE_LocalCheckpointStarted; //Event type
12913   signal->theData[1] = SYSFILE->latestLCP_ID + 1;
12914   signal->theData[2] = c_lcpState.keepGci;
12915   signal->theData[3] = c_lcpState.oldestRestorableGci;
12916   sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 4, JBB);
12917 
12918   /***************************************************************************/
12919   // Report the event that a local checkpoint has started.
12920   /***************************************************************************/
12921 
12922   signal->setTrace(TestOrd::TraceLocalCheckpoint);
12923 
12924   CRASH_INSERTION(7013);
12925   SYSFILE->keepGCI = c_lcpState.keepGci;
12926   //Uint32 lcpId = SYSFILE->latestLCP_ID;
12927   SYSFILE->latestLCP_ID++;
12928   SYSFILE->oldestRestorableGCI = c_lcpState.oldestRestorableGci;
12929 
12930   const Uint32 oldestRestorableGCI = SYSFILE->oldestRestorableGCI;
12931   //const Uint32 newestRestorableGCI = SYSFILE->newestRestorableGCI;
12932   //ndbrequire(newestRestorableGCI >= oldestRestorableGCI);
12933 
12934   Int32 val = oldestRestorableGCI;
12935   ndbrequire(val > 0);
12936 
12937   /* ----------------------------------------------------------------------- */
12938   /* SET BIT INDICATING THAT LOCAL CHECKPOINT IS ONGOING. THIS IS CLEARED    */
12939   /* AT THE END OF A LOCAL CHECKPOINT.                                       */
12940   /* ----------------------------------------------------------------------- */
12941   SYSFILE->setLCPOngoing(SYSFILE->systemRestartBits);
12942   /* ---------------------------------------------------------------------- */
12943   /*    CHECK IF ANY NODE MUST BE TAKEN OUT OF SERVICE AND REFILLED WITH    */
12944   /*    NEW FRESH DATA FROM AN ACTIVE NODE.                                 */
12945   /* ---------------------------------------------------------------------- */
12946 
12947   /**
12948    * This used be done in setLcpActiveStatusStart
12949    *   but this function has been move "up" in the flow
12950    *   to just before calcKeepGci
12951    */
12952   setNodeRestartInfoBits(signal);
12953 
12954   c_lcpState.setLcpStatus(LCP_COPY_GCI, __LINE__);
12955   //#ifdef VM_TRACE
12956   //  infoEvent("LocalCheckpoint %d started", SYSFILE->latestLCP_ID);
12957   //  signal->theData[0] = 7012;
12958   //  execDUMP_STATE_ORD(signal);
12959   //#endif
12960 
12961   copyGciLab(signal, CopyGCIReq::LOCAL_CHECKPOINT);
12962 }//Dbdih::storeNewLcpIdLab()
12963 
startLcpRoundLab(Signal * signal)12964 void Dbdih::startLcpRoundLab(Signal* signal) {
12965   jam();
12966 
12967   CRASH_INSERTION(7218);
12968 
12969   Mutex mutex(signal, c_mutexMgr, c_startLcpMutexHandle);
12970   Callback c = { safe_cast(&Dbdih::startLcpMutex_locked), 0 };
12971   ndbrequire(mutex.lock(c));
12972 }
12973 
12974 void
startLcpMutex_locked(Signal * signal,Uint32 senderData,Uint32 retVal)12975 Dbdih::startLcpMutex_locked(Signal* signal, Uint32 senderData, Uint32 retVal){
12976   jamEntry();
12977   ndbrequire(retVal == 0);
12978 
12979   StartLcpReq* req = (StartLcpReq*)signal->getDataPtrSend();
12980   req->senderRef = reference();
12981   req->lcpId = SYSFILE->latestLCP_ID;
12982   req->participatingLQH = c_lcpState.m_participatingLQH;
12983   req->participatingDIH = c_lcpState.m_participatingDIH;
12984   sendLoopMacro(START_LCP_REQ, sendSTART_LCP_REQ, RNIL);
12985 }
12986 
12987 void
sendSTART_LCP_REQ(Signal * signal,Uint32 nodeId,Uint32 extra)12988 Dbdih::sendSTART_LCP_REQ(Signal* signal, Uint32 nodeId, Uint32 extra){
12989   BlockReference ref = calcDihBlockRef(nodeId);
12990   if (ERROR_INSERTED(7021) && nodeId == getOwnNodeId())
12991   {
12992     sendSignalWithDelay(ref, GSN_START_LCP_REQ, signal, 500,
12993                         StartLcpReq::SignalLength);
12994     return;
12995   }
12996   else if (ERROR_INSERTED(7021) && ((rand() % 10) > 4))
12997   {
12998     infoEvent("Dont sent STARTLCPREQ to %u", nodeId);
12999     return;
13000   }
13001   sendSignal(ref, GSN_START_LCP_REQ, signal, StartLcpReq::SignalLength, JBB);
13002 }
13003 
13004 void
execSTART_LCP_CONF(Signal * signal)13005 Dbdih::execSTART_LCP_CONF(Signal* signal){
13006   StartLcpConf * conf = (StartLcpConf*)signal->getDataPtr();
13007 
13008   Uint32 nodeId = refToNode(conf->senderRef);
13009   receiveLoopMacro(START_LCP_REQ, nodeId);
13010 
13011   Mutex mutex(signal, c_mutexMgr, c_startLcpMutexHandle);
13012   Callback c = { safe_cast(&Dbdih::startLcpMutex_unlocked), 0 };
13013   mutex.unlock(c);
13014 }
13015 
13016 void
startLcpMutex_unlocked(Signal * signal,Uint32 data,Uint32 retVal)13017 Dbdih::startLcpMutex_unlocked(Signal* signal, Uint32 data, Uint32 retVal){
13018   jamEntry();
13019   ndbrequire(retVal == 0);
13020 
13021   Mutex mutex(signal, c_mutexMgr, c_startLcpMutexHandle);
13022   mutex.release();
13023 
13024   /* ----------------------------------------------------------------------- */
13025   /*     NOW PROCEED BY STARTING THE LOCAL CHECKPOINT IN EACH LQH.           */
13026   /* ----------------------------------------------------------------------- */
13027   c_lcpState.m_LAST_LCP_FRAG_ORD = c_lcpState.m_participatingLQH;
13028 
13029   CRASH_INSERTION(7015);
13030   c_lcpState.setLcpStatus(LCP_START_LCP_ROUND, __LINE__);
13031   startLcpRoundLoopLab(signal, 0, 0);
13032 }
13033 
13034 void
master_lcp_fragmentMutex_locked(Signal * signal,Uint32 failedNodePtrI,Uint32 retVal)13035 Dbdih::master_lcp_fragmentMutex_locked(Signal* signal,
13036                                        Uint32 failedNodePtrI, Uint32 retVal)
13037 {
13038   jamEntry();
13039   ndbrequire(retVal == 0);
13040 
13041   signal->theData[0] = NDB_LE_LCP_TakeoverCompleted;
13042   signal->theData[1] = c_lcpMasterTakeOverState.state;
13043   sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB);
13044 
13045   signal->theData[0] = 7012;
13046   execDUMP_STATE_ORD(signal);
13047 
13048   c_lcpMasterTakeOverState.set(LMTOS_IDLE, __LINE__);
13049 
13050   checkLocalNodefailComplete(signal, failedNodePtrI, NF_LCP_TAKE_OVER);
13051 
13052   startLcpRoundLoopLab(signal, 0, 0);
13053 }
13054 
startLcpRoundLoopLab(Signal * signal,Uint32 startTableId,Uint32 startFragId)13055 void Dbdih::startLcpRoundLoopLab(Signal* signal,
13056 				 Uint32 startTableId, Uint32 startFragId)
13057 {
13058   NodeRecordPtr nodePtr;
13059   for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
13060     ptrAss(nodePtr, nodeRecord);
13061     if (nodePtr.p->nodeStatus == NodeRecord::ALIVE) {
13062       ndbrequire(nodePtr.p->noOfStartedChkpt == 0);
13063       ndbrequire(nodePtr.p->noOfQueuedChkpt == 0);
13064     }//if
13065   }//if
13066   c_lcpState.currentFragment.tableId = startTableId;
13067   c_lcpState.currentFragment.fragmentId = startFragId;
13068   startNextChkpt(signal);
13069 }//Dbdih::startLcpRoundLoopLab()
13070 
startNextChkpt(Signal * signal)13071 void Dbdih::startNextChkpt(Signal* signal)
13072 {
13073   Uint32 lcpId = SYSFILE->latestLCP_ID;
13074 
13075   NdbNodeBitmask busyNodes;
13076   busyNodes.clear();
13077   const Uint32 lcpNodes = c_lcpState.m_participatingLQH.count();
13078 
13079   bool save = true;
13080   LcpState::CurrentFragment curr = c_lcpState.currentFragment;
13081 
13082   while (curr.tableId < ctabFileSize) {
13083     TabRecordPtr tabPtr;
13084     tabPtr.i = curr.tableId;
13085     ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
13086     if ((tabPtr.p->tabStatus != TabRecord::TS_ACTIVE) ||
13087         (tabPtr.p->tabLcpStatus != TabRecord::TLS_ACTIVE)) {
13088       curr.tableId++;
13089       curr.fragmentId = 0;
13090       continue;
13091     }//if
13092 
13093     FragmentstorePtr fragPtr;
13094     getFragstore(tabPtr.p, curr.fragmentId, fragPtr);
13095 
13096     ReplicaRecordPtr replicaPtr;
13097     for(replicaPtr.i = fragPtr.p->storedReplicas;
13098 	replicaPtr.i != RNIL ;
13099 	replicaPtr.i = replicaPtr.p->nextReplica){
13100 
13101       jam();
13102       ptrCheckGuard(replicaPtr, creplicaFileSize, replicaRecord);
13103 
13104       NodeRecordPtr nodePtr;
13105       nodePtr.i = replicaPtr.p->procNode;
13106       ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
13107 
13108       if (c_lcpState.m_participatingLQH.get(nodePtr.i))
13109       {
13110 	if (replicaPtr.p->lcpOngoingFlag &&
13111 	    replicaPtr.p->lcpIdStarted < lcpId)
13112 	{
13113 	  jam();
13114 	  //-------------------------------------------------------------------
13115 	  // We have found a replica on a node that performs local checkpoint
13116 	  // that is alive and that have not yet been started.
13117 	  //-------------------------------------------------------------------
13118 
13119 	  if (nodePtr.p->noOfStartedChkpt < 2)
13120 	  {
13121 	    jam();
13122 	    /**
13123 	     * Send LCP_FRAG_ORD to LQH
13124 	     */
13125 
13126 	    /**
13127 	     * Mark the replica so with lcpIdStarted == true
13128 	     */
13129 	    replicaPtr.p->lcpIdStarted = lcpId;
13130 
13131 	    Uint32 i = nodePtr.p->noOfStartedChkpt;
13132 	    nodePtr.p->startedChkpt[i].tableId = tabPtr.i;
13133 	    nodePtr.p->startedChkpt[i].fragId = curr.fragmentId;
13134 	    nodePtr.p->startedChkpt[i].replicaPtr = replicaPtr.i;
13135 	    nodePtr.p->noOfStartedChkpt = i + 1;
13136 
13137 	    sendLCP_FRAG_ORD(signal, nodePtr.p->startedChkpt[i]);
13138 	  }
13139 	  else if (nodePtr.p->noOfQueuedChkpt < 2)
13140 	  {
13141 	    jam();
13142 	    /**
13143 	     * Put LCP_FRAG_ORD "in queue"
13144 	     */
13145 
13146 	    /**
13147 	     * Mark the replica so with lcpIdStarted == true
13148 	     */
13149 	    replicaPtr.p->lcpIdStarted = lcpId;
13150 
13151 	    Uint32 i = nodePtr.p->noOfQueuedChkpt;
13152 	    nodePtr.p->queuedChkpt[i].tableId = tabPtr.i;
13153 	    nodePtr.p->queuedChkpt[i].fragId = curr.fragmentId;
13154 	    nodePtr.p->queuedChkpt[i].replicaPtr = replicaPtr.i;
13155 	    nodePtr.p->noOfQueuedChkpt = i + 1;
13156 	  }
13157 	  else
13158 	  {
13159 	    jam();
13160 
13161 	    if(save)
13162 	    {
13163 	      /**
13164 	       * Stop increasing value on first that was "full"
13165 	       */
13166 	      c_lcpState.currentFragment = curr;
13167 	      save = false;
13168 	    }
13169 
13170 	    busyNodes.set(nodePtr.i);
13171 	    if(busyNodes.count() == lcpNodes)
13172 	    {
13173 	      /**
13174 	       * There were no possibility to start the local checkpoint
13175 	       * and it was not possible to queue it up. In this case we
13176 	       * stop the start of local checkpoints until the nodes with a
13177 	       * backlog have performed more checkpoints. We will return and
13178 	       * will not continue the process of starting any more checkpoints.
13179 	       */
13180 	      return;
13181 	    }//if
13182 	  }//if
13183 	}
13184       }//while
13185     }
13186     curr.fragmentId++;
13187     if (curr.fragmentId >= tabPtr.p->totalfragments) {
13188       jam();
13189       curr.fragmentId = 0;
13190       curr.tableId++;
13191     }//if
13192   }//while
13193 
13194   sendLastLCP_FRAG_ORD(signal);
13195 }//Dbdih::startNextChkpt()
13196 
sendLastLCP_FRAG_ORD(Signal * signal)13197 void Dbdih::sendLastLCP_FRAG_ORD(Signal* signal)
13198 {
13199   LcpFragOrd * const lcpFragOrd = (LcpFragOrd *)&signal->theData[0];
13200   lcpFragOrd->tableId = RNIL;
13201   lcpFragOrd->fragmentId = 0;
13202   lcpFragOrd->lcpId = SYSFILE->latestLCP_ID;
13203   lcpFragOrd->lcpNo = 0;
13204   lcpFragOrd->keepGci = c_lcpState.keepGci;
13205   lcpFragOrd->lastFragmentFlag = true;
13206 
13207   NodeRecordPtr nodePtr;
13208   for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
13209     jam();
13210     ptrAss(nodePtr, nodeRecord);
13211 
13212     if(nodePtr.p->noOfQueuedChkpt == 0 &&
13213        nodePtr.p->noOfStartedChkpt == 0 &&
13214        c_lcpState.m_LAST_LCP_FRAG_ORD.isWaitingFor(nodePtr.i)){
13215       jam();
13216 
13217       CRASH_INSERTION(7028);
13218 
13219       /**
13220        * Nothing queued or started <=> Complete on that node
13221        *
13222        */
13223       c_lcpState.m_LAST_LCP_FRAG_ORD.clearWaitingFor(nodePtr.i);
13224       if(ERROR_INSERTED(7075)){
13225 	continue;
13226       }
13227 
13228       CRASH_INSERTION(7193);
13229       BlockReference ref = calcLqhBlockRef(nodePtr.i);
13230       sendSignal(ref, GSN_LCP_FRAG_ORD, signal,LcpFragOrd::SignalLength, JBB);
13231     }
13232   }
13233   if(ERROR_INSERTED(7075))
13234   {
13235     if(c_lcpState.m_LAST_LCP_FRAG_ORD.done())
13236     {
13237       CRASH_INSERTION(7075);
13238     }
13239   }
13240 }//Dbdih::sendLastLCP_FRAGORD()
13241 
13242 /* ------------------------------------------------------------------------- */
13243 /*       A FRAGMENT REPLICA HAS COMPLETED EXECUTING ITS LOCAL CHECKPOINT.    */
13244 /*       CHECK IF ALL REPLICAS IN THE TABLE HAVE COMPLETED. IF SO STORE THE  */
13245 /*       THE TABLE DISTRIBUTION ON DISK. ALSO SEND LCP_REPORT TO ALL OTHER   */
13246 /*       NODES SO THAT THEY CAN STORE THE TABLE ONTO DISK AS WELL.           */
13247 /* ------------------------------------------------------------------------- */
execLCP_FRAG_REP(Signal * signal)13248 void Dbdih::execLCP_FRAG_REP(Signal* signal)
13249 {
13250   jamEntry();
13251 
13252   LcpFragRep * const lcpReport = (LcpFragRep *)&signal->theData[0];
13253 
13254   /**
13255    * Proxing LCP_FRAG_REP
13256    */
13257   const bool broadcast_req = lcpReport->nodeId == LcpFragRep::BROADCAST_REQ;
13258   if (broadcast_req)
13259   {
13260     jam();
13261     ndbrequire(refToNode(signal->getSendersBlockRef()) == getOwnNodeId());
13262 
13263     /**
13264      * Set correct nodeId
13265      */
13266     lcpReport->nodeId = getOwnNodeId();
13267 
13268     NodeReceiverGroup rg(DBDIH, c_lcpState.m_participatingDIH);
13269     rg.m_nodes.clear(getOwnNodeId());
13270     sendSignal(rg, GSN_LCP_FRAG_REP, signal, signal->getLength(), JBB);
13271 
13272     /**
13273      * and continue processing
13274      */
13275   }
13276 
13277   ndbrequire(c_lcpState.lcpStatus != LCP_STATUS_IDLE);
13278 
13279 #if 0
13280   printLCP_FRAG_REP(stdout,
13281 		    signal->getDataPtr(),
13282 		    signal->length(), number());
13283 #endif
13284 
13285   Uint32 nodeId = lcpReport->nodeId;
13286   Uint32 tableId = lcpReport->tableId;
13287   Uint32 fragId = lcpReport->fragId;
13288 
13289   jamEntry();
13290 
13291   if (ERROR_INSERTED(7178) && nodeId != getOwnNodeId())
13292   {
13293     jam();
13294     Uint32 owng =Sysfile::getNodeGroup(getOwnNodeId(), SYSFILE->nodeGroups);
13295     Uint32 nodeg = Sysfile::getNodeGroup(nodeId, SYSFILE->nodeGroups);
13296     if (owng == nodeg)
13297     {
13298       jam();
13299       ndbout_c("throwing away LCP_FRAG_REP from  (and killing) %d", nodeId);
13300       SET_ERROR_INSERT_VALUE(7179);
13301       signal->theData[0] = 9999;
13302       sendSignal(numberToRef(CMVMI, nodeId),
13303 		 GSN_NDB_TAMPER, signal, 1, JBA);
13304       return;
13305     }
13306   }
13307 
13308   if (ERROR_INSERTED(7179) && nodeId != getOwnNodeId())
13309   {
13310     jam();
13311     Uint32 owng =Sysfile::getNodeGroup(getOwnNodeId(), SYSFILE->nodeGroups);
13312     Uint32 nodeg = Sysfile::getNodeGroup(nodeId, SYSFILE->nodeGroups);
13313     if (owng == nodeg)
13314     {
13315       jam();
13316       ndbout_c("throwing away LCP_FRAG_REP from %d", nodeId);
13317       return;
13318     }
13319   }
13320 
13321   CRASH_INSERTION2(7025, isMaster());
13322   CRASH_INSERTION2(7016, !isMaster());
13323   CRASH_INSERTION2(7191, (!isMaster() && tableId));
13324 
13325   bool fromTimeQueue = (signal->senderBlockRef()==reference()&&!broadcast_req);
13326 
13327   TabRecordPtr tabPtr;
13328   tabPtr.i = tableId;
13329   ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
13330   if(tabPtr.p->tabCopyStatus != TabRecord::CS_IDLE) {
13331     jam();
13332     /*-----------------------------------------------------------------------*/
13333     // If the table is currently copied to disk we also
13334     // stop already here to avoid strange half-way updates
13335     // of the table data structures.
13336     /*-----------------------------------------------------------------------*/
13337     /*
13338       We need to send this signal without a delay since we have discovered
13339       that we have run out of space in the short time queue. This problem
13340       is very erunlikely to happen but it has and it results in a node crash.
13341       This should be considered a "quick fix" and not a permanent solution.
13342       A cleaner/better way would be to check the time queue if it is full or
13343       not before sending this signal.
13344     */
13345     sendSignal(reference(), GSN_LCP_FRAG_REP, signal, signal->length(), JBB);
13346     /* Kept here for reference
13347        sendSignalWithDelay(reference(), GSN_LCP_FRAG_REP,
13348        signal, 20, signal->length());
13349     */
13350 
13351     if(!fromTimeQueue){
13352       c_lcpState.noOfLcpFragRepOutstanding++;
13353     }
13354 
13355     return;
13356   }//if
13357 
13358   if(fromTimeQueue)
13359   {
13360     jam();
13361     ndbrequire(c_lcpState.noOfLcpFragRepOutstanding > 0);
13362     c_lcpState.noOfLcpFragRepOutstanding--;
13363   }
13364 
13365   bool tableDone = reportLcpCompletion(lcpReport);
13366 
13367   Uint32 started = lcpReport->maxGciStarted;
13368   Uint32 completed = lcpReport->maxGciCompleted;
13369 
13370   if (started > c_lcpState.lcpStopGcp)
13371   {
13372     jam();
13373     c_lcpState.lcpStopGcp = started;
13374   }
13375 
13376   /**
13377    * Update m_local_lcp_state
13378    *
13379    * we could only look fragments that we have locally...
13380    *   but for now we look at all fragments
13381    */
13382   m_local_lcp_state.lcp_frag_rep(lcpReport);
13383 
13384   if (tableDone)
13385   {
13386     jam();
13387 
13388     if (tabPtr.p->tabStatus == TabRecord::TS_IDLE ||
13389         tabPtr.p->tabStatus == TabRecord::TS_DROPPING)
13390     {
13391       jam();
13392       g_eventLogger->info("TS_DROPPING - Neglecting to save Table: %d Frag: %d - ",
13393                           tableId, fragId);
13394     }
13395     else
13396     {
13397       jam();
13398       /**
13399        * Write table description to file
13400        */
13401       tabPtr.p->tabLcpStatus = TabRecord::TLS_WRITING_TO_FILE;
13402       tabPtr.p->tabCopyStatus = TabRecord::CS_LCP_READ_TABLE;
13403       tabPtr.p->tabUpdateState = TabRecord::US_LOCAL_CHECKPOINT;
13404       signal->theData[0] = DihContinueB::ZPACK_TABLE_INTO_PAGES;
13405       signal->theData[1] = tabPtr.i;
13406       sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
13407 
13408       bool ret = checkLcpAllTablesDoneInLqh(__LINE__);
13409       if (ret && ERROR_INSERTED(7209))
13410       {
13411         jam();
13412 
13413         signal->theData[0] = 9999;
13414         sendSignal(numberToRef(CMVMI, cmasterNodeId),
13415                    GSN_NDB_TAMPER, signal, 1, JBB);
13416       }
13417     }
13418   }
13419 
13420 #ifdef VM_TRACE
13421   /* --------------------------------------------------------------------- */
13422   // REPORT that local checkpoint have completed this fragment.
13423   /* --------------------------------------------------------------------- */
13424   signal->theData[0] = NDB_LE_LCPFragmentCompleted;
13425   signal->theData[1] = nodeId;
13426   signal->theData[2] = tableId;
13427   signal->theData[3] = fragId;
13428   signal->theData[4] = started;
13429   signal->theData[5] = completed;
13430   sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 6, JBB);
13431 #endif
13432 
13433   bool ok = false;
13434   switch(c_lcpMasterTakeOverState.state){
13435   case LMTOS_IDLE:
13436     ok = true;
13437     jam();
13438     /**
13439      * Fall through
13440      */
13441     break;
13442   case LMTOS_WAIT_EMPTY_LCP: // LCP Take over waiting for EMPTY_LCPCONF
13443     jam();
13444     return;
13445   case LMTOS_WAIT_LCP_FRAG_REP:
13446     jam();
13447     checkEmptyLcpComplete(signal);
13448     return;
13449   case LMTOS_INITIAL:
13450   case LMTOS_ALL_IDLE:
13451   case LMTOS_ALL_ACTIVE:
13452   case LMTOS_LCP_CONCLUDING:
13453   case LMTOS_COPY_ONGOING:
13454     ndbrequire(false);
13455   }
13456   ndbrequire(ok);
13457 
13458   /* ----------------------------------------------------------------------- */
13459   // Check if there are more LCP's to start up.
13460   /* ----------------------------------------------------------------------- */
13461   if(isMaster())
13462   {
13463     jam();
13464 
13465     /**
13466      * Remove from "running" array
13467      */
13468     NodeRecordPtr nodePtr;
13469     nodePtr.i = nodeId;
13470     ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
13471 
13472     const Uint32 outstanding = nodePtr.p->noOfStartedChkpt;
13473     ndbrequire(outstanding > 0);
13474     if(nodePtr.p->startedChkpt[0].tableId != tableId ||
13475        nodePtr.p->startedChkpt[0].fragId != fragId){
13476       jam();
13477       ndbrequire(outstanding > 1);
13478       ndbrequire(nodePtr.p->startedChkpt[1].tableId == tableId);
13479       ndbrequire(nodePtr.p->startedChkpt[1].fragId == fragId);
13480     } else {
13481       jam();
13482       nodePtr.p->startedChkpt[0] = nodePtr.p->startedChkpt[1];
13483     }
13484     nodePtr.p->noOfStartedChkpt--;
13485     checkStartMoreLcp(signal, nodeId);
13486   }
13487 }
13488 
13489 bool
checkLcpAllTablesDoneInLqh(Uint32 line)13490 Dbdih::checkLcpAllTablesDoneInLqh(Uint32 line){
13491   TabRecordPtr tabPtr;
13492 
13493   /**
13494    * Check if finished with all tables
13495    */
13496   for (tabPtr.i = 0; tabPtr.i < ctabFileSize; tabPtr.i++) {
13497     jam();
13498     ptrAss(tabPtr, tabRecord);
13499     if ((tabPtr.p->tabStatus == TabRecord::TS_ACTIVE) &&
13500         (tabPtr.p->tabLcpStatus == TabRecord::TLS_ACTIVE))
13501     {
13502       jam();
13503       /**
13504        * Nope, not finished with all tables
13505        */
13506       return false;
13507     }//if
13508   }//for
13509 
13510   CRASH_INSERTION2(7026, isMaster());
13511   CRASH_INSERTION2(7017, !isMaster());
13512 
13513   c_lcpState.setLcpStatus(LCP_TAB_COMPLETED, line);
13514 
13515   if (ERROR_INSERTED(7194))
13516   {
13517     ndbout_c("CLEARING 7194");
13518     CLEAR_ERROR_INSERT_VALUE;
13519   }
13520 
13521   return true;
13522 }
13523 
findReplica(ReplicaRecordPtr & replicaPtr,Fragmentstore * fragPtrP,Uint32 nodeId,bool old)13524 void Dbdih::findReplica(ReplicaRecordPtr& replicaPtr,
13525 			Fragmentstore* fragPtrP,
13526 			Uint32 nodeId,
13527 			bool old)
13528 {
13529   replicaPtr.i = old ? fragPtrP->oldStoredReplicas : fragPtrP->storedReplicas;
13530   while(replicaPtr.i != RNIL){
13531     ptrCheckGuard(replicaPtr, creplicaFileSize, replicaRecord);
13532     if (replicaPtr.p->procNode == nodeId) {
13533       jam();
13534       return;
13535     } else {
13536       jam();
13537       replicaPtr.i = replicaPtr.p->nextReplica;
13538     }//if
13539   };
13540 
13541 #ifdef VM_TRACE
13542   g_eventLogger->info("Fragment Replica(node=%d) not found", nodeId);
13543   replicaPtr.i = fragPtrP->oldStoredReplicas;
13544   while(replicaPtr.i != RNIL){
13545     ptrCheckGuard(replicaPtr, creplicaFileSize, replicaRecord);
13546     if (replicaPtr.p->procNode == nodeId) {
13547       jam();
13548       break;
13549     } else {
13550       jam();
13551       replicaPtr.i = replicaPtr.p->nextReplica;
13552     }//if
13553   };
13554   if(replicaPtr.i != RNIL){
13555     g_eventLogger->info("...But was found in oldStoredReplicas");
13556   } else {
13557     g_eventLogger->info("...And wasn't found in oldStoredReplicas");
13558   }
13559 #endif
13560   ndbrequire(false);
13561 }//Dbdih::findReplica()
13562 
13563 
13564 int
handle_invalid_lcp_no(const LcpFragRep * rep,ReplicaRecordPtr replicaPtr)13565 Dbdih::handle_invalid_lcp_no(const LcpFragRep* rep,
13566 			     ReplicaRecordPtr replicaPtr)
13567 {
13568   ndbrequire(!isMaster());
13569   Uint32 lcpNo = rep->lcpNo;
13570   Uint32 lcpId = rep->lcpId;
13571 
13572   if (!ndb_pnr(getNodeInfo(refToNode(cmasterdihref)).m_version))
13573   {
13574   }
13575   else
13576   {
13577     warningEvent("Detected previous node failure of %d during lcp",
13578                  rep->nodeId);
13579   }
13580 
13581   replicaPtr.p->nextLcp = lcpNo;
13582   replicaPtr.p->lcpId[lcpNo] = 0;
13583   replicaPtr.p->lcpStatus[lcpNo] = ZINVALID;
13584 
13585   for (Uint32 i = lcpNo; i != lcpNo; i = nextLcpNo(i))
13586   {
13587     jam();
13588     if (replicaPtr.p->lcpStatus[i] == ZVALID &&
13589 	replicaPtr.p->lcpId[i] >= lcpId)
13590     {
13591       ndbout_c("i: %d lcpId: %d", i, replicaPtr.p->lcpId[i]);
13592       ndbrequire(false);
13593     }
13594   }
13595 
13596   return 0;
13597 }
13598 
13599 /**
13600  * Return true  if table is all fragment replicas have been checkpointed
13601  *                 to disk (in all LQHs)
13602  *        false otherwise
13603  */
13604 bool
reportLcpCompletion(const LcpFragRep * lcpReport)13605 Dbdih::reportLcpCompletion(const LcpFragRep* lcpReport)
13606 {
13607   Uint32 lcpNo = lcpReport->lcpNo;
13608   Uint32 lcpId = lcpReport->lcpId;
13609   Uint32 maxGciStarted = lcpReport->maxGciStarted;
13610   Uint32 maxGciCompleted = lcpReport->maxGciCompleted;
13611   Uint32 tableId = lcpReport->tableId;
13612   Uint32 fragId = lcpReport->fragId;
13613   Uint32 nodeId = lcpReport->nodeId;
13614 
13615   TabRecordPtr tabPtr;
13616   tabPtr.i = tableId;
13617   ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
13618 
13619   if (tabPtr.p->tabStatus == TabRecord::TS_DROPPING ||
13620       tabPtr.p->tabStatus == TabRecord::TS_IDLE)
13621   {
13622     jam();
13623     return true;
13624   }
13625 
13626   FragmentstorePtr fragPtr;
13627   getFragstore(tabPtr.p, fragId, fragPtr);
13628 
13629   ReplicaRecordPtr replicaPtr;
13630   findReplica(replicaPtr, fragPtr.p, nodeId);
13631 
13632   ndbrequire(replicaPtr.p->lcpOngoingFlag == true);
13633   if(lcpNo != replicaPtr.p->nextLcp){
13634     if (handle_invalid_lcp_no(lcpReport, replicaPtr))
13635     {
13636       g_eventLogger->error("lcpNo = %d replicaPtr.p->nextLcp = %d",
13637                            lcpNo, replicaPtr.p->nextLcp);
13638       ndbrequire(false);
13639     }
13640   }
13641   ndbrequire(lcpNo == replicaPtr.p->nextLcp);
13642   ndbrequire(lcpNo < MAX_LCP_STORED);
13643   ndbrequire(replicaPtr.p->lcpId[lcpNo] != lcpId);
13644 
13645   replicaPtr.p->lcpIdStarted = lcpId;
13646   replicaPtr.p->lcpOngoingFlag = false;
13647 
13648   removeOldCrashedReplicas(tableId, fragId, replicaPtr);
13649   replicaPtr.p->lcpId[lcpNo] = lcpId;
13650   replicaPtr.p->lcpStatus[lcpNo] = ZVALID;
13651   replicaPtr.p->maxGciStarted[lcpNo] = maxGciStarted;
13652   replicaPtr.p->maxGciCompleted[lcpNo] = maxGciCompleted;
13653   replicaPtr.p->nextLcp = nextLcpNo(replicaPtr.p->nextLcp);
13654   ndbrequire(fragPtr.p->noLcpReplicas > 0);
13655   fragPtr.p->noLcpReplicas --;
13656 
13657   if(fragPtr.p->noLcpReplicas > 0){
13658     jam();
13659     return false;
13660   }
13661 
13662   for (Uint32 fid = 0; fid < tabPtr.p->totalfragments; fid++) {
13663     jam();
13664     getFragstore(tabPtr.p, fid, fragPtr);
13665     if (fragPtr.p->noLcpReplicas > 0){
13666       jam();
13667       /* ----------------------------------------------------------------- */
13668       // Not all fragments in table have been checkpointed.
13669       /* ----------------------------------------------------------------- */
13670       if(0)
13671         g_eventLogger->info("reportLcpCompletion: fragment %d not ready", fid);
13672       return false;
13673     }//if
13674   }//for
13675   return true;
13676 }//Dbdih::reportLcpCompletion()
13677 
checkStartMoreLcp(Signal * signal,Uint32 nodeId)13678 void Dbdih::checkStartMoreLcp(Signal* signal, Uint32 nodeId)
13679 {
13680   ndbrequire(isMaster());
13681 
13682   NodeRecordPtr nodePtr;
13683   nodePtr.i = nodeId;
13684   ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
13685 
13686   ndbrequire(nodePtr.p->noOfStartedChkpt < 2);
13687 
13688   if (nodePtr.p->noOfQueuedChkpt > 0) {
13689     jam();
13690     nodePtr.p->noOfQueuedChkpt--;
13691     Uint32 i = nodePtr.p->noOfStartedChkpt;
13692     nodePtr.p->startedChkpt[i] = nodePtr.p->queuedChkpt[0];
13693     nodePtr.p->queuedChkpt[0] = nodePtr.p->queuedChkpt[1];
13694     //-------------------------------------------------------------------
13695     // We can send a LCP_FRAGORD to the node ordering it to perform a
13696     // local checkpoint on this fragment replica.
13697     //-------------------------------------------------------------------
13698     nodePtr.p->noOfStartedChkpt = i + 1;
13699 
13700     sendLCP_FRAG_ORD(signal, nodePtr.p->startedChkpt[i]);
13701   }
13702 
13703   /* ----------------------------------------------------------------------- */
13704   // When there are no more outstanding LCP reports and there are no one queued
13705   // in at least one node, then we are ready to make sure all nodes have at
13706   // least two outstanding LCP requests per node and at least two queued for
13707   // sending.
13708   /* ----------------------------------------------------------------------- */
13709   startNextChkpt(signal);
13710 }//Dbdih::checkStartMoreLcp()
13711 
13712 void
sendLCP_FRAG_ORD(Signal * signal,NodeRecord::FragmentCheckpointInfo info)13713 Dbdih::sendLCP_FRAG_ORD(Signal* signal,
13714 			NodeRecord::FragmentCheckpointInfo info){
13715 
13716   ReplicaRecordPtr replicaPtr;
13717   replicaPtr.i = info.replicaPtr;
13718   ptrCheckGuard(replicaPtr, creplicaFileSize, replicaRecord);
13719 
13720   // MT LQH goes via proxy for DD reasons
13721   BlockReference ref = calcLqhBlockRef(replicaPtr.p->procNode);
13722 
13723   if (ERROR_INSERTED(7193) && replicaPtr.p->procNode == getOwnNodeId())
13724   {
13725     return;
13726   }
13727 
13728   if (replicaPtr.p->nextLcp >= MAX_LCP_USED)
13729   {
13730     jam();
13731     infoEvent("Updating nextLcp from %u to %u tab: %u",
13732               replicaPtr.p->nextLcp, 0,
13733               info.tableId);
13734     replicaPtr.p->nextLcp = 0;
13735   }
13736 
13737   Uint32 keepGci = c_lcpState.keepGci;
13738   if (keepGci > SYSFILE->lastCompletedGCI[replicaPtr.p->procNode])
13739   {
13740     jam();
13741     keepGci = SYSFILE->lastCompletedGCI[replicaPtr.p->procNode];
13742   }
13743 
13744   LcpFragOrd * const lcpFragOrd = (LcpFragOrd *)&signal->theData[0];
13745   lcpFragOrd->tableId    = info.tableId;
13746   lcpFragOrd->fragmentId = info.fragId;
13747   lcpFragOrd->lcpId      = SYSFILE->latestLCP_ID;
13748   lcpFragOrd->lcpNo      = replicaPtr.p->nextLcp;
13749   lcpFragOrd->keepGci    = keepGci;
13750   lcpFragOrd->lastFragmentFlag = false;
13751   sendSignal(ref, GSN_LCP_FRAG_ORD, signal, LcpFragOrd::SignalLength, JBB);
13752 }
13753 
checkLcpCompletedLab(Signal * signal)13754 void Dbdih::checkLcpCompletedLab(Signal* signal)
13755 {
13756   if(c_lcpState.lcpStatus < LCP_TAB_COMPLETED)
13757   {
13758     jam();
13759     return;
13760   }
13761 
13762   TabRecordPtr tabPtr;
13763   for (tabPtr.i = 0; tabPtr.i < ctabFileSize; tabPtr.i++) {
13764     jam();
13765     ptrAss(tabPtr, tabRecord);
13766     if (tabPtr.p->tabLcpStatus != TabRecord::TLS_COMPLETED)
13767     {
13768       jam();
13769       return;
13770     }
13771   }
13772 
13773   CRASH_INSERTION2(7027, isMaster());
13774   CRASH_INSERTION2(7018, !isMaster());
13775 
13776   if(c_lcpState.lcpStatus == LCP_TAB_COMPLETED)
13777   {
13778     /**
13779      * We'r done
13780      */
13781 
13782     if (ERROR_INSERTED(7209))
13783     {
13784       signal->theData[0] = DihContinueB::ZCHECK_LCP_COMPLETED;
13785       sendSignal(reference(), GSN_CONTINUEB, signal, 1, JBB);
13786       return;
13787     }
13788 
13789     c_lcpState.setLcpStatus(LCP_TAB_SAVED, __LINE__);
13790     sendLCP_COMPLETE_REP(signal);
13791 
13792     if (ERROR_INSERTED(7210))
13793     {
13794       CLEAR_ERROR_INSERT_VALUE;
13795       EmptyLcpReq* req = (EmptyLcpReq*)signal->getDataPtr();
13796       req->senderRef = reference();
13797       sendEMPTY_LCP_REQ(signal, getOwnNodeId(), 0);
13798     }
13799 
13800     return;
13801   }
13802 
13803   ndbrequire(c_lcpState.lcpStatus == LCP_TAB_SAVED);
13804   allNodesLcpCompletedLab(signal);
13805   return;
13806 }//Dbdih::checkLcpCompletedLab()
13807 
13808 void
sendLCP_COMPLETE_REP(Signal * signal)13809 Dbdih::sendLCP_COMPLETE_REP(Signal* signal){
13810   jam();
13811 
13812   /**
13813    * Quick and dirty fix for bug#36276 dont save
13814    * LCP_COMPLETE_REP to same node same LCP twice
13815    */
13816   bool alreadysent =
13817     c_lcpState.m_lastLCP_COMPLETE_REP_id == SYSFILE->latestLCP_ID &&
13818     c_lcpState.m_lastLCP_COMPLETE_REP_ref == c_lcpState.m_masterLcpDihRef;
13819 
13820   if (!alreadysent)
13821   {
13822     LcpCompleteRep * rep = (LcpCompleteRep*)signal->getDataPtrSend();
13823     rep->nodeId = getOwnNodeId();
13824     rep->lcpId = SYSFILE->latestLCP_ID;
13825     rep->blockNo = DBDIH;
13826 
13827     sendSignal(c_lcpState.m_masterLcpDihRef, GSN_LCP_COMPLETE_REP, signal,
13828                LcpCompleteRep::SignalLength, JBB);
13829 
13830     c_lcpState.m_lastLCP_COMPLETE_REP_id = SYSFILE->latestLCP_ID;
13831     c_lcpState.m_lastLCP_COMPLETE_REP_ref = c_lcpState.m_masterLcpDihRef;
13832   }
13833 
13834   /**
13835    * Say that an initial node restart does not need to be redone
13836    *   once node has been part of first LCP
13837    */
13838   if (c_set_initial_start_flag &&
13839       c_lcpState.m_participatingLQH.get(getOwnNodeId()))
13840   {
13841     jam();
13842     c_set_initial_start_flag = FALSE;
13843   }
13844 }
13845 
13846 /*-------------------------------------------------------------------------- */
13847 /* COMP_LCP_ROUND                   A LQH HAS COMPLETED A LOCAL CHECKPOINT  */
13848 /*------------------------------------------------------------------------- */
execLCP_COMPLETE_REP(Signal * signal)13849 void Dbdih::execLCP_COMPLETE_REP(Signal* signal)
13850 {
13851   jamEntry();
13852 
13853   CRASH_INSERTION(7191);
13854 
13855 #if 0
13856   g_eventLogger->info("LCP_COMPLETE_REP");
13857   printLCP_COMPLETE_REP(stdout,
13858 			signal->getDataPtr(),
13859 			signal->length(), number());
13860 #endif
13861 
13862   LcpCompleteRep * rep = (LcpCompleteRep*)signal->getDataPtr();
13863 
13864   if (rep->nodeId == LcpFragRep::BROADCAST_REQ)
13865   {
13866     jam();
13867     ndbrequire(refToNode(signal->getSendersBlockRef()) == getOwnNodeId());
13868 
13869     /**
13870      * Set correct nodeId
13871      */
13872     rep->nodeId = getOwnNodeId();
13873 
13874     NodeReceiverGroup rg(DBDIH, c_lcpState.m_participatingDIH);
13875     rg.m_nodes.clear(getOwnNodeId());
13876     sendSignal(rg, GSN_LCP_COMPLETE_REP, signal, signal->getLength(), JBB);
13877 
13878     /**
13879      * and continue processing
13880      */
13881   }
13882 
13883   Uint32 lcpId = rep->lcpId;
13884   Uint32 nodeId = rep->nodeId;
13885   Uint32 blockNo = rep->blockNo;
13886 
13887   if(c_lcpMasterTakeOverState.state > LMTOS_WAIT_LCP_FRAG_REP){
13888     jam();
13889     /**
13890      * Don't allow LCP_COMPLETE_REP to arrive during
13891      * LCP master take over
13892      */
13893     ndbrequire(isMaster());
13894     ndbrequire(blockNo == DBDIH);
13895     sendSignalWithDelay(reference(), GSN_LCP_COMPLETE_REP, signal, 100,
13896 			signal->length());
13897     return;
13898   }
13899 
13900   ndbrequire(c_lcpState.lcpStatus != LCP_STATUS_IDLE);
13901 
13902   switch(blockNo){
13903   case DBLQH:
13904     jam();
13905     c_lcpState.m_LCP_COMPLETE_REP_Counter_LQH.clearWaitingFor(nodeId);
13906     ndbrequire(!c_lcpState.m_LAST_LCP_FRAG_ORD.isWaitingFor(nodeId));
13907     break;
13908   case DBDIH:
13909     jam();
13910     ndbrequire(isMaster());
13911     c_lcpState.m_LCP_COMPLETE_REP_Counter_DIH.clearWaitingFor(nodeId);
13912     break;
13913   case 0:
13914     jam();
13915     ndbrequire(!isMaster());
13916     ndbrequire(c_lcpState.m_LCP_COMPLETE_REP_From_Master_Received == false);
13917     c_lcpState.m_LCP_COMPLETE_REP_From_Master_Received = true;
13918     break;
13919   default:
13920     ndbrequire(false);
13921   }
13922   ndbrequire(lcpId == SYSFILE->latestLCP_ID);
13923 
13924   allNodesLcpCompletedLab(signal);
13925   return;
13926 }
13927 
allNodesLcpCompletedLab(Signal * signal)13928 void Dbdih::allNodesLcpCompletedLab(Signal* signal)
13929 {
13930   jam();
13931 
13932   if (c_lcpState.lcpStatus != LCP_TAB_SAVED) {
13933     jam();
13934     /**
13935      * We have not sent LCP_COMPLETE_REP to master DIH yet
13936      */
13937     return;
13938   }//if
13939 
13940   if (!c_lcpState.m_LCP_COMPLETE_REP_Counter_LQH.done()){
13941     jam();
13942     return;
13943   }
13944 
13945   if (!c_lcpState.m_LCP_COMPLETE_REP_Counter_DIH.done()){
13946     jam();
13947     return;
13948   }
13949 
13950   if (!isMaster() &&
13951       c_lcpState.m_LCP_COMPLETE_REP_From_Master_Received == false){
13952     jam();
13953     /**
13954      * Wait until master DIH has signaled lcp is complete
13955      */
13956     return;
13957   }
13958 
13959   if(c_lcpMasterTakeOverState.state != LMTOS_IDLE){
13960     jam();
13961 #ifdef VM_TRACE
13962     g_eventLogger->info("Exiting from allNodesLcpCompletedLab");
13963 #endif
13964     return;
13965   }
13966 
13967 
13968   /*------------------------------------------------------------------------ */
13969   /*     WE HAVE NOW COMPLETED A LOCAL CHECKPOINT. WE ARE NOW READY TO WAIT  */
13970   /*     FOR THE NEXT LOCAL CHECKPOINT. SEND WITHOUT TIME-OUT SINCE IT MIGHT */
13971   /*     BE TIME TO START THE NEXT LOCAL CHECKPOINT IMMEDIATELY.             */
13972   /*     CLEAR BIT 3 OF SYSTEM RESTART BITS TO INDICATE THAT THERE IS NO     */
13973   /*     LOCAL CHECKPOINT ONGOING. THIS WILL BE WRITTEN AT SOME LATER TIME   */
13974   /*     DURING A GLOBAL CHECKPOINT. IT IS NOT NECESSARY TO WRITE IT         */
13975   /*     IMMEDIATELY. WE WILL ALSO CLEAR BIT 2 OF SYSTEM RESTART BITS IF ALL */
13976   /*     CURRENTLY ACTIVE NODES COMPLETED THE LOCAL CHECKPOINT.              */
13977   /*------------------------------------------------------------------------ */
13978   CRASH_INSERTION(7019);
13979   signal->setTrace(0);
13980 
13981   c_lcpState.setLcpStatus(LCP_STATUS_IDLE, __LINE__);
13982 
13983   /**
13984    * Update m_local_lcp_state
13985    */
13986   m_local_lcp_state.lcp_complete_rep(c_newest_restorable_gci);
13987 
13988   if (isMaster())
13989   {
13990     /**
13991      * Check for any "completed" TO
13992      */
13993     TakeOverRecordPtr takeOverPtr;
13994     for (c_activeTakeOverList.first(takeOverPtr); !takeOverPtr.isNull();
13995          c_activeTakeOverList.next(takeOverPtr))
13996     {
13997       jam();
13998       Ptr<NodeRecord> nodePtr;
13999       nodePtr.i = takeOverPtr.p->toStartingNode;
14000       if (takeOverPtr.p->toMasterStatus == TakeOverRecord::TO_WAIT_LCP)
14001       {
14002         jam();
14003         if (c_lcpState.m_participatingLQH.get(nodePtr.i))
14004         {
14005           jam();
14006           ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
14007           ndbrequire(nodePtr.p->copyCompleted == 2);
14008 
14009           EndToConf * conf = (EndToConf *)signal->getDataPtrSend();
14010           conf->senderData = takeOverPtr.p->m_senderData;
14011           conf->sendingNodeId = cownNodeId;
14012           conf->startingNodeId = nodePtr.i;
14013           sendSignal(takeOverPtr.p->m_senderRef, GSN_END_TOCONF, signal,
14014                      EndToConf::SignalLength, JBB);
14015 
14016           releaseTakeOver(takeOverPtr);
14017         }
14018       }
14019     }
14020   }
14021 
14022   Sysfile::clearLCPOngoing(SYSFILE->systemRestartBits);
14023   setLcpActiveStatusEnd(signal);
14024 
14025   if(!isMaster()){
14026     jam();
14027     /**
14028      * We're not master, be content
14029      */
14030     return;
14031   }
14032 
14033   // Send LCP_COMPLETE_REP to all other nodes
14034   // allowing them to set their lcpStatus to LCP_STATUS_IDLE
14035   LcpCompleteRep * rep = (LcpCompleteRep*)signal->getDataPtrSend();
14036   rep->nodeId = getOwnNodeId();
14037   rep->lcpId = SYSFILE->latestLCP_ID;
14038   rep->blockNo = 0; // 0 = Sent from master
14039 
14040   NodeRecordPtr nodePtr;
14041   nodePtr.i = cfirstAliveNode;
14042   do {
14043     jam();
14044     ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
14045     if (nodePtr.i != cownNodeId){
14046       BlockReference ref = calcDihBlockRef(nodePtr.i);
14047       sendSignal(ref, GSN_LCP_COMPLETE_REP, signal,
14048 		 LcpCompleteRep::SignalLength, JBB);
14049     }
14050     nodePtr.i = nodePtr.p->nextNode;
14051   } while (nodePtr.i != RNIL);
14052 
14053 
14054   jam();
14055   /***************************************************************************/
14056   // Report the event that a local checkpoint has completed.
14057   /***************************************************************************/
14058   signal->theData[0] = NDB_LE_LocalCheckpointCompleted; //Event type
14059   signal->theData[1] = SYSFILE->latestLCP_ID;
14060   sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB);
14061 
14062   if (c_newest_restorable_gci > c_lcpState.lcpStopGcp &&
14063       !(ERROR_INSERTED(7222) || ERROR_INSERTED(7223)))
14064   {
14065     jam();
14066     c_lcpState.lcpStopGcp = c_newest_restorable_gci;
14067   }
14068 
14069   /**
14070    * Start checking for next LCP
14071    */
14072   checkLcpStart(signal, __LINE__);
14073 
14074   Mutex mutex(signal, c_mutexMgr, c_fragmentInfoMutex_lcp);
14075   mutex.unlock();
14076 
14077   c_lcpState.m_lcp_time = c_current_time - c_lcpState.m_start_time;
14078 
14079   if (cwaitLcpSr == true) {
14080     jam();
14081     cwaitLcpSr = false;
14082     ndbsttorry10Lab(signal, __LINE__);
14083     return;
14084   }//if
14085 
14086 
14087   if (c_nodeStartMaster.blockLcp == true) {
14088     jam();
14089     lcpBlockedLab(signal, false, c_nodeStartMaster.startNode);
14090     return;
14091   }//if
14092   return;
14093 }//Dbdih::allNodesLcpCompletedLab()
14094 
14095 /******************************************************************************/
14096 /* **********     TABLE UPDATE MODULE                             *************/
14097 /* ****************************************************************************/
14098 /* ------------------------------------------------------------------------- */
14099 /*       THIS MODULE IS USED TO UPDATE THE TABLE DESCRIPTION. IT STARTS BY   */
14100 /*       CREATING THE FIRST TABLE FILE, THEN UPDATES THIS FILE AND CLOSES IT.*/
14101 /*       AFTER THAT THE SAME HAPPENS WITH THE SECOND FILE. AFTER THAT THE    */
14102 /*       TABLE DISTRIBUTION HAS BEEN UPDATED.                                */
14103 /*                                                                           */
14104 /*       THE REASON FOR CREATING THE FILE AND NOT OPENING IT IS TO ENSURE    */
14105 /*       THAT WE DO NOT GET A MIX OF OLD AND NEW INFORMATION IN THE FILE IN  */
14106 /*       ERROR SITUATIONS.                                                   */
14107 /* ------------------------------------------------------------------------- */
tableUpdateLab(Signal * signal,TabRecordPtr tabPtr)14108 void Dbdih::tableUpdateLab(Signal* signal, TabRecordPtr tabPtr) {
14109   FileRecordPtr filePtr;
14110   if(tabPtr.p->tabStorage == TabRecord::ST_TEMPORARY) {
14111     // For temporary tables we do not write to disk. Mark both copies 0 and 1
14112     // as done, and go straight to the after-close code.
14113     filePtr.i = tabPtr.p->tabFile[1];
14114     ptrCheckGuard(filePtr, cfileFileSize, fileRecord);
14115     tableCloseLab(signal, filePtr);
14116     return;
14117   }
14118   filePtr.i = tabPtr.p->tabFile[0];
14119   ptrCheckGuard(filePtr, cfileFileSize, fileRecord);
14120   createFileRw(signal, filePtr);
14121   filePtr.p->reqStatus = FileRecord::TABLE_CREATE;
14122   return;
14123 }//Dbdih::tableUpdateLab()
14124 
tableCreateLab(Signal * signal,FileRecordPtr filePtr)14125 void Dbdih::tableCreateLab(Signal* signal, FileRecordPtr filePtr)
14126 {
14127   TabRecordPtr tabPtr;
14128   tabPtr.i = filePtr.p->tabRef;
14129   ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
14130   writeTabfile(signal, tabPtr.p, filePtr);
14131   filePtr.p->reqStatus = FileRecord::TABLE_WRITE;
14132   return;
14133 }//Dbdih::tableCreateLab()
14134 
tableWriteLab(Signal * signal,FileRecordPtr filePtr)14135 void Dbdih::tableWriteLab(Signal* signal, FileRecordPtr filePtr)
14136 {
14137   closeFile(signal, filePtr);
14138   filePtr.p->reqStatus = FileRecord::TABLE_CLOSE;
14139   return;
14140 }//Dbdih::tableWriteLab()
14141 
tableCloseLab(Signal * signal,FileRecordPtr filePtr)14142 void Dbdih::tableCloseLab(Signal* signal, FileRecordPtr filePtr)
14143 {
14144   TabRecordPtr tabPtr;
14145   tabPtr.i = filePtr.p->tabRef;
14146   ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
14147   if (filePtr.i == tabPtr.p->tabFile[0]) {
14148     jam();
14149     filePtr.i = tabPtr.p->tabFile[1];
14150     ptrCheckGuard(filePtr, cfileFileSize, fileRecord);
14151     createFileRw(signal, filePtr);
14152     filePtr.p->reqStatus = FileRecord::TABLE_CREATE;
14153     return;
14154   }//if
14155   switch (tabPtr.p->tabUpdateState) {
14156   case TabRecord::US_LOCAL_CHECKPOINT:
14157     jam();
14158     releaseTabPages(tabPtr.i);
14159     signal->theData[0] = DihContinueB::ZCHECK_LCP_COMPLETED;
14160     sendSignal(reference(), GSN_CONTINUEB, signal, 1, JBB);
14161 
14162     tabPtr.p->tabCopyStatus = TabRecord::CS_IDLE;
14163     tabPtr.p->tabUpdateState = TabRecord::US_IDLE;
14164     tabPtr.p->tabLcpStatus = TabRecord::TLS_COMPLETED;
14165     return;
14166     break;
14167   case TabRecord::US_REMOVE_NODE:
14168     jam();
14169     releaseTabPages(tabPtr.i);
14170     tabPtr.p->tabCopyStatus = TabRecord::CS_IDLE;
14171     tabPtr.p->tabUpdateState = TabRecord::US_IDLE;
14172     if (tabPtr.p->tabLcpStatus == TabRecord::TLS_WRITING_TO_FILE) {
14173       jam();
14174       tabPtr.p->tabLcpStatus = TabRecord::TLS_COMPLETED;
14175       signal->theData[0] = DihContinueB::ZCHECK_LCP_COMPLETED;
14176       sendSignal(reference(), GSN_CONTINUEB, signal, 1, JBB);
14177     }//if
14178     signal->theData[0] = DihContinueB::ZREMOVE_NODE_FROM_TABLE;
14179     signal->theData[1] = tabPtr.p->tabRemoveNode;
14180     signal->theData[2] = tabPtr.i + 1;
14181     sendSignal(reference(), GSN_CONTINUEB, signal, 3, JBB);
14182     return;
14183     break;
14184   case TabRecord::US_INVALIDATE_NODE_LCP:
14185     jam();
14186     releaseTabPages(tabPtr.i);
14187     tabPtr.p->tabCopyStatus = TabRecord::CS_IDLE;
14188     tabPtr.p->tabUpdateState = TabRecord::US_IDLE;
14189 
14190     signal->theData[0] = DihContinueB::ZINVALIDATE_NODE_LCP;
14191     signal->theData[1] = tabPtr.p->tabRemoveNode;
14192     signal->theData[2] = tabPtr.i + 1;
14193     if (ERROR_INSERTED(7204))
14194     {
14195       sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 2000, 3);
14196     }
14197     else
14198     {
14199       sendSignal(reference(), GSN_CONTINUEB, signal, 3, JBB);
14200     }
14201     return;
14202   case TabRecord::US_COPY_TAB_REQ:
14203     jam();
14204     tabPtr.p->tabUpdateState = TabRecord::US_IDLE;
14205     copyTabReq_complete(signal, tabPtr);
14206     return;
14207     break;
14208   case TabRecord::US_ADD_TABLE_MASTER:
14209     jam();
14210     releaseTabPages(tabPtr.i);
14211     tabPtr.p->tabUpdateState = TabRecord::US_IDLE;
14212     signal->theData[0] = DihContinueB::ZDIH_ADD_TABLE_MASTER;
14213     signal->theData[1] = tabPtr.i;
14214     sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
14215     return;
14216     break;
14217   case TabRecord::US_ADD_TABLE_SLAVE:
14218     jam();
14219     releaseTabPages(tabPtr.i);
14220     tabPtr.p->tabUpdateState = TabRecord::US_IDLE;
14221     signal->theData[0] = DihContinueB::ZDIH_ADD_TABLE_SLAVE;
14222     signal->theData[1] = tabPtr.i;
14223     sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
14224     return;
14225     break;
14226   case TabRecord::US_CALLBACK:
14227   {
14228     jam();
14229     releaseTabPages(tabPtr.i);
14230     tabPtr.p->tabCopyStatus = TabRecord::CS_IDLE;
14231     tabPtr.p->tabUpdateState = TabRecord::US_IDLE;
14232 
14233     Ptr<ConnectRecord> connectPtr;
14234     connectPtr.i = tabPtr.p->connectrec;
14235     ptrCheckGuard(connectPtr, cconnectFileSize, connectRecord);
14236     execute(signal, connectPtr.p->m_callback, 0);
14237     return;
14238   }
14239   default:
14240     ndbrequire(false);
14241     return;
14242     break;
14243   }//switch
14244 }//Dbdih::tableCloseLab()
14245 
checkGcpStopLab(Signal * signal)14246 void Dbdih::checkGcpStopLab(Signal* signal)
14247 {
14248   Uint32 cnt0 = ++m_gcp_monitor.m_gcp_save.m_counter;
14249   Uint32 cnt1 = ++m_gcp_monitor.m_micro_gcp.m_counter;
14250 
14251   if (m_gcp_monitor.m_gcp_save.m_gci == m_gcp_save.m_gci)
14252   {
14253     jam();
14254     if (m_gcp_monitor.m_gcp_save.m_max_lag &&
14255         cnt0 == m_gcp_monitor.m_gcp_save.m_max_lag)
14256     {
14257       crashSystemAtGcpStop(signal, false);
14258       return;
14259     }
14260 
14261     Uint32 threshold = 60; // seconds
14262     if (cnt0 && ((cnt0 % (threshold * 10)) == 0))
14263     {
14264       if (m_gcp_monitor.m_gcp_save.m_max_lag)
14265       {
14266         warningEvent("GCP Monitor: GCP_SAVE lag %u seconds"
14267                      " (max lag: %us)",
14268                      cnt0/10, m_gcp_monitor.m_gcp_save.m_max_lag/10);
14269       }
14270       else
14271       {
14272         warningEvent("GCP Monitor: GCP_SAVE lag %u seconds"
14273                      " (no max lag)",
14274                      cnt0/10);
14275       }
14276     }
14277   }
14278   else
14279   {
14280     jam();
14281     m_gcp_monitor.m_gcp_save.m_gci = m_gcp_save.m_gci;
14282     m_gcp_monitor.m_gcp_save.m_counter = 0;
14283   }
14284 
14285   if (m_gcp_monitor.m_micro_gcp.m_gci == m_micro_gcp.m_current_gci)
14286   {
14287     jam();
14288     Uint32 cmp = m_micro_gcp.m_enabled ?
14289       m_gcp_monitor.m_micro_gcp.m_max_lag :
14290       m_gcp_monitor.m_gcp_save.m_max_lag;
14291 
14292     if (cmp && cnt1 == cmp)
14293     {
14294       crashSystemAtGcpStop(signal, false);
14295       return;
14296     }
14297 
14298     Uint32 threshold = 10; // seconds
14299     if (cnt1 && ((cnt0 % (threshold * 10)) == 0))
14300     {
14301       if (m_gcp_monitor.m_micro_gcp.m_max_lag)
14302       {
14303         warningEvent("GCP Monitor: GCP_COMMIT lag %u seconds"
14304                      " (max lag: %u)",
14305                      cnt1/10, m_gcp_monitor.m_micro_gcp.m_max_lag/10);
14306       }
14307       else
14308       {
14309         warningEvent("GCP Monitor: GCP_COMMIT lag %u seconds"
14310                      " (no max lag)",
14311                      cnt1/10);
14312       }
14313     }
14314   }
14315   else
14316   {
14317     jam();
14318     m_gcp_monitor.m_micro_gcp.m_counter = 0;
14319     m_gcp_monitor.m_micro_gcp.m_gci = m_micro_gcp.m_current_gci;
14320   }
14321 
14322   signal->theData[0] = DihContinueB::ZCHECK_GCP_STOP;
14323   sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 100, 1);
14324   return;
14325 }//Dbdih::checkGcpStopLab()
14326 
14327 void
dumpGcpStop()14328 Dbdih::dumpGcpStop()
14329 {
14330   ndbout_c("c_nodeStartMaster.blockGcp: %u %u",
14331            c_nodeStartMaster.blockGcp,
14332            c_nodeStartMaster.startNode);
14333   ndbout_c("m_gcp_save.m_counter: %u m_gcp_save.m_max_lag: %u",
14334            m_gcp_monitor.m_gcp_save.m_counter,
14335            m_gcp_monitor.m_gcp_save.m_max_lag);
14336   ndbout_c("m_micro_gcp.m_counter: %u m_micro_gcp.m_max_lag: %u",
14337            m_gcp_monitor.m_micro_gcp.m_counter,
14338            m_gcp_monitor.m_micro_gcp.m_max_lag);
14339 
14340 
14341   ndbout_c("m_gcp_save.m_state: %u", m_gcp_save.m_state);
14342   ndbout_c("m_gcp_save.m_master.m_state: %u", m_gcp_save.m_master.m_state);
14343   ndbout_c("m_micro_gcp.m_state: %u", m_micro_gcp.m_state);
14344   ndbout_c("m_micro_gcp.m_master.m_state: %u", m_micro_gcp.m_master.m_state);
14345 
14346   ndbout_c("c_COPY_GCIREQ_Counter = %s", c_COPY_GCIREQ_Counter.getText());
14347   ndbout_c("c_COPY_TABREQ_Counter = %s", c_COPY_TABREQ_Counter.getText());
14348   ndbout_c("c_CREATE_FRAGREQ_Counter = %s", c_CREATE_FRAGREQ_Counter.getText());
14349   ndbout_c("c_DIH_SWITCH_REPLICA_REQ_Counter = %s",
14350 	   c_DIH_SWITCH_REPLICA_REQ_Counter.getText());
14351   ndbout_c("c_EMPTY_LCP_REQ_Counter = %s",c_EMPTY_LCP_REQ_Counter.getText());
14352   ndbout_c("c_GCP_COMMIT_Counter = %s", c_GCP_COMMIT_Counter.getText());
14353   ndbout_c("c_GCP_PREPARE_Counter = %s", c_GCP_PREPARE_Counter.getText());
14354   ndbout_c("c_GCP_SAVEREQ_Counter = %s", c_GCP_SAVEREQ_Counter.getText());
14355   ndbout_c("c_SUB_GCP_COMPLETE_REP_Counter = %s",
14356            c_SUB_GCP_COMPLETE_REP_Counter.getText());
14357   ndbout_c("c_INCL_NODEREQ_Counter = %s", c_INCL_NODEREQ_Counter.getText());
14358   ndbout_c("c_MASTER_GCPREQ_Counter = %s", c_MASTER_GCPREQ_Counter.getText());
14359   ndbout_c("c_MASTER_LCPREQ_Counter = %s", c_MASTER_LCPREQ_Counter.getText());
14360   ndbout_c("c_START_INFOREQ_Counter = %s", c_START_INFOREQ_Counter.getText());
14361   ndbout_c("c_START_RECREQ_Counter = %s", c_START_RECREQ_Counter.getText());
14362   ndbout_c("c_STOP_ME_REQ_Counter = %s", c_STOP_ME_REQ_Counter.getText());
14363   ndbout_c("c_TC_CLOPSIZEREQ_Counter = %s", c_TC_CLOPSIZEREQ_Counter.getText());
14364   ndbout_c("c_TCGETOPSIZEREQ_Counter = %s", c_TCGETOPSIZEREQ_Counter.getText());
14365 
14366   ndbout_c("m_copyReason: %d m_waiting: %u %u",
14367            c_copyGCIMaster.m_copyReason,
14368            c_copyGCIMaster.m_waiting[0],
14369            c_copyGCIMaster.m_waiting[1]);
14370 
14371   ndbout_c("c_copyGCISlave: sender{Data, Ref} %d %x reason: %d nextWord: %d",
14372 	   c_copyGCISlave.m_senderData,
14373 	   c_copyGCISlave.m_senderRef,
14374 	   c_copyGCISlave.m_copyReason,
14375 	   c_copyGCISlave.m_expectedNextWord);
14376 }
14377 
14378 /**
14379  * GCP stop detected,
14380  * send SYSTEM_ERROR to all other alive nodes
14381  */
crashSystemAtGcpStop(Signal * signal,bool local)14382 void Dbdih::crashSystemAtGcpStop(Signal* signal, bool local)
14383 {
14384   dumpGcpStop();
14385   Uint32 save_counter = m_gcp_monitor.m_gcp_save.m_counter;
14386   Uint32 micro_counter = m_gcp_monitor.m_micro_gcp.m_counter;
14387   m_gcp_monitor.m_gcp_save.m_counter = 0;
14388   m_gcp_monitor.m_micro_gcp.m_counter = 0;
14389 
14390   if (local)
14391     goto dolocal;
14392 
14393   if (c_nodeStartMaster.blockGcp == 2)
14394   {
14395     jam();
14396     /**
14397      * Starting node...is delaying GCP to long...
14398      *   kill it
14399      */
14400     SystemError * const sysErr = (SystemError*)&signal->theData[0];
14401     sysErr->errorCode = SystemError::GCPStopDetected;
14402     sysErr->errorRef = reference();
14403     sysErr->data[0] = m_gcp_save.m_master.m_state;
14404     sysErr->data[1] = cgcpOrderBlocked;
14405     sysErr->data[2] = m_micro_gcp.m_master.m_state;
14406     sendSignal(calcNdbCntrBlockRef(c_nodeStartMaster.startNode),
14407                GSN_SYSTEM_ERROR, signal, SystemError::SignalLength, JBA);
14408     return;
14409   }
14410 
14411   if (save_counter == m_gcp_monitor.m_gcp_save.m_max_lag)
14412   {
14413     switch(m_gcp_save.m_master.m_state){
14414     case GcpSave::GCP_SAVE_IDLE:
14415     {
14416       /**
14417        * No switch for looong time...and we're idle...it *our* fault
14418        */
14419       local = true;
14420       break;
14421     }
14422     case GcpSave::GCP_SAVE_REQ:
14423     {
14424       jam();
14425       NodeReceiverGroup rg(DBLQH, c_GCP_SAVEREQ_Counter);
14426       signal->theData[0] = 2305;
14427       sendSignal(rg, GSN_DUMP_STATE_ORD, signal, 1, JBB);
14428 
14429       warningEvent("Detected GCP stop(%d)...sending kill to %s",
14430                 m_gcp_save.m_master.m_state, c_GCP_SAVEREQ_Counter.getText());
14431       ndbout_c("Detected GCP stop(%d)...sending kill to %s",
14432                m_gcp_save.m_master.m_state, c_GCP_SAVEREQ_Counter.getText());
14433       ndbrequire(!c_GCP_SAVEREQ_Counter.done());
14434       return;
14435     }
14436     case GcpSave::GCP_SAVE_COPY_GCI:
14437     {
14438       /**
14439        * We're waiting for a COPY_GCICONF
14440        */
14441       warningEvent("Detected GCP stop(%d)...sending kill to %s",
14442                 m_gcp_save.m_master.m_state, c_COPY_GCIREQ_Counter.getText());
14443       ndbout_c("Detected GCP stop(%d)...sending kill to %s",
14444                m_gcp_save.m_master.m_state, c_COPY_GCIREQ_Counter.getText());
14445 
14446       {
14447         NodeReceiverGroup rg(DBDIH, c_COPY_GCIREQ_Counter);
14448         signal->theData[0] = 7022;
14449         sendSignal(rg, GSN_DUMP_STATE_ORD, signal, 1, JBA);
14450       }
14451 
14452       {
14453         NodeReceiverGroup rg(NDBCNTR, c_COPY_GCIREQ_Counter);
14454         SystemError * const sysErr = (SystemError*)&signal->theData[0];
14455         sysErr->errorCode = SystemError::GCPStopDetected;
14456         sysErr->errorRef = reference();
14457         sysErr->data[0] = m_gcp_save.m_master.m_state;
14458         sysErr->data[1] = cgcpOrderBlocked;
14459         sysErr->data[2] = m_micro_gcp.m_master.m_state;
14460         sendSignal(rg, GSN_SYSTEM_ERROR, signal,
14461                    SystemError::SignalLength, JBA);
14462       }
14463       ndbrequire(!c_COPY_GCIREQ_Counter.done());
14464       return;
14465     }
14466     case GcpSave::GCP_SAVE_CONF:
14467       /**
14468        * This *should* not happen (not a master state)
14469        */
14470       local = true;
14471       break;
14472     }
14473   }
14474 
14475   if (micro_counter == m_gcp_monitor.m_micro_gcp.m_max_lag)
14476   {
14477     switch(m_micro_gcp.m_master.m_state){
14478     case MicroGcp::M_GCP_IDLE:
14479     {
14480       /**
14481        * No switch for looong time...and we're idle...it *our* fault
14482        */
14483       local = true;
14484       break;
14485     }
14486     case MicroGcp::M_GCP_PREPARE:
14487     {
14488     /**
14489      * We're waiting for a GCP PREPARE CONF
14490      */
14491       warningEvent("Detected GCP stop(%d)...sending kill to %s",
14492                 m_micro_gcp.m_state, c_GCP_PREPARE_Counter.getText());
14493       ndbout_c("Detected GCP stop(%d)...sending kill to %s",
14494                m_micro_gcp.m_state, c_GCP_PREPARE_Counter.getText());
14495 
14496       {
14497         NodeReceiverGroup rg(DBDIH, c_GCP_PREPARE_Counter);
14498         signal->theData[0] = 7022;
14499         sendSignal(rg, GSN_DUMP_STATE_ORD, signal, 1, JBA);
14500       }
14501 
14502       {
14503         NodeReceiverGroup rg(NDBCNTR, c_GCP_PREPARE_Counter);
14504         SystemError * const sysErr = (SystemError*)&signal->theData[0];
14505         sysErr->errorCode = SystemError::GCPStopDetected;
14506         sysErr->errorRef = reference();
14507         sysErr->data[0] = m_gcp_save.m_master.m_state;
14508         sysErr->data[1] = cgcpOrderBlocked;
14509         sysErr->data[2] = m_micro_gcp.m_master.m_state;
14510         sendSignal(rg, GSN_SYSTEM_ERROR, signal,
14511                    SystemError::SignalLength, JBA);
14512       }
14513       ndbrequire(!c_GCP_PREPARE_Counter.done());
14514       return;
14515     }
14516     case MicroGcp::M_GCP_COMMIT:
14517     {
14518       warningEvent("Detected GCP stop(%d)...sending kill to %s",
14519                 m_micro_gcp.m_state, c_GCP_COMMIT_Counter.getText());
14520       ndbout_c("Detected GCP stop(%d)...sending kill to %s",
14521                m_micro_gcp.m_state, c_GCP_COMMIT_Counter.getText());
14522 
14523       {
14524         NodeReceiverGroup rg(DBDIH, c_GCP_COMMIT_Counter);
14525         signal->theData[0] = 7022;
14526         sendSignal(rg, GSN_DUMP_STATE_ORD, signal, 1, JBA);
14527       }
14528 
14529       {
14530         NodeReceiverGroup rg(NDBCNTR, c_GCP_COMMIT_Counter);
14531         SystemError * const sysErr = (SystemError*)&signal->theData[0];
14532         sysErr->errorCode = SystemError::GCPStopDetected;
14533         sysErr->errorRef = reference();
14534         sysErr->data[0] = m_gcp_save.m_master.m_state;
14535         sysErr->data[1] = cgcpOrderBlocked;
14536         sysErr->data[2] = m_micro_gcp.m_master.m_state;
14537         sendSignal(rg, GSN_SYSTEM_ERROR, signal,
14538                    SystemError::SignalLength, JBA);
14539       }
14540       ndbrequire(!c_GCP_COMMIT_Counter.done());
14541       return;
14542     }
14543     case MicroGcp::M_GCP_COMMITTED:
14544       /**
14545        * This *should* not happen (not a master state)
14546        */
14547       local = true;
14548       break;
14549     case MicroGcp::M_GCP_COMPLETE:
14550       infoEvent("Detected GCP stop(%d)...sending kill to %s",
14551                 m_micro_gcp.m_state, c_SUB_GCP_COMPLETE_REP_Counter.getText());
14552       ndbout_c("Detected GCP stop(%d)...sending kill to %s",
14553                m_micro_gcp.m_state, c_SUB_GCP_COMPLETE_REP_Counter.getText());
14554 
14555       {
14556         NodeReceiverGroup rg(DBDIH, c_SUB_GCP_COMPLETE_REP_Counter);
14557         signal->theData[0] = 7022;
14558         sendSignal(rg, GSN_DUMP_STATE_ORD, signal, 1, JBA);
14559       }
14560 
14561       {
14562         NodeReceiverGroup rg(NDBCNTR, c_SUB_GCP_COMPLETE_REP_Counter);
14563         SystemError * const sysErr = (SystemError*)&signal->theData[0];
14564         sysErr->errorCode = SystemError::GCPStopDetected;
14565         sysErr->errorRef = reference();
14566         sysErr->data[0] = m_gcp_save.m_master.m_state;
14567         sysErr->data[1] = cgcpOrderBlocked;
14568         sysErr->data[2] = m_micro_gcp.m_master.m_state;
14569         sendSignal(rg, GSN_SYSTEM_ERROR, signal,
14570                    SystemError::SignalLength, JBA);
14571       }
14572       ndbrequire(!c_SUB_GCP_COMPLETE_REP_Counter.done());
14573       return;
14574     }
14575   }
14576 
14577 dolocal:
14578   FileRecordPtr file0Ptr;
14579   file0Ptr.i = crestartInfoFile[0];
14580   ptrCheckGuard(file0Ptr, cfileFileSize, fileRecord);
14581   FileRecordPtr file1Ptr;
14582   file1Ptr.i = crestartInfoFile[1];
14583   ptrCheckGuard(file1Ptr, cfileFileSize, fileRecord);
14584 
14585   ndbout_c("file[0] status: %d type: %d reqStatus: %d file1: %d %d %d",
14586 	   file0Ptr.p->fileStatus, file0Ptr.p->fileType, file0Ptr.p->reqStatus,
14587 	   file1Ptr.p->fileStatus, file1Ptr.p->fileType, file1Ptr.p->reqStatus
14588 	   );
14589 
14590   signal->theData[0] = 404;
14591   signal->theData[1] = file0Ptr.p->fileRef;
14592   EXECUTE_DIRECT(NDBFS, GSN_DUMP_STATE_ORD, signal, 2);
14593 
14594   signal->theData[0] = 404;
14595   signal->theData[1] = file1Ptr.p->fileRef;
14596   EXECUTE_DIRECT(NDBFS, GSN_DUMP_STATE_ORD, signal, 2);
14597 
14598   jam();
14599   SystemError * const sysErr = (SystemError*)&signal->theData[0];
14600   sysErr->errorCode = SystemError::GCPStopDetected;
14601   sysErr->errorRef = reference();
14602   sysErr->data[0] = m_gcp_save.m_master.m_state;
14603   sysErr->data[1] = cgcpOrderBlocked;
14604   sysErr->data[2] = m_micro_gcp.m_master.m_state;
14605   EXECUTE_DIRECT(NDBCNTR, GSN_SYSTEM_ERROR,
14606                  signal, SystemError::SignalLength);
14607   ndbrequire(false);
14608   return;
14609 }//Dbdih::crashSystemAtGcpStop()
14610 
14611 /*************************************************************************/
14612 /*                                                                       */
14613 /*       MODULE: ALLOCPAGE                                               */
14614 /*       DESCRIPTION: THE SUBROUTINE IS CALLED WITH POINTER TO PAGE      */
14615 /*                    RECORD. A PAGE  RECORD IS TAKEN FROM               */
14616 /*                    THE FREE PAGE  LIST                                */
14617 /*************************************************************************/
allocpage(PageRecordPtr & pagePtr)14618 void Dbdih::allocpage(PageRecordPtr& pagePtr)
14619 {
14620   ndbrequire(cfirstfreepage != RNIL);
14621   pagePtr.i = cfirstfreepage;
14622   ptrCheckGuard(pagePtr, cpageFileSize, pageRecord);
14623   cfirstfreepage = pagePtr.p->nextfreepage;
14624   pagePtr.p->nextfreepage = RNIL;
14625 }//Dbdih::allocpage()
14626 
14627 /*************************************************************************/
14628 /*                                                                       */
14629 /*       MODULE: ALLOC_STORED_REPLICA                                    */
14630 /*       DESCRIPTION: THE SUBROUTINE IS CALLED TO GET A REPLICA RECORD,  */
14631 /*                    TO INITIALISE IT AND TO LINK IT INTO THE FRAGMENT  */
14632 /*                    STORE RECORD. USED FOR STORED REPLICAS.            */
14633 /*************************************************************************/
allocStoredReplica(FragmentstorePtr fragPtr,ReplicaRecordPtr & newReplicaPtr,Uint32 nodeId)14634 void Dbdih::allocStoredReplica(FragmentstorePtr fragPtr,
14635                                ReplicaRecordPtr& newReplicaPtr,
14636                                Uint32 nodeId)
14637 {
14638   Uint32 i;
14639   ReplicaRecordPtr arrReplicaPtr;
14640   ReplicaRecordPtr arrPrevReplicaPtr;
14641 
14642   seizeReplicaRec(newReplicaPtr);
14643   for (i = 0; i < MAX_LCP_STORED; i++) {
14644     newReplicaPtr.p->maxGciCompleted[i] = 0;
14645     newReplicaPtr.p->maxGciStarted[i] = 0;
14646     newReplicaPtr.p->lcpId[i] = 0;
14647     newReplicaPtr.p->lcpStatus[i] = ZINVALID;
14648   }//for
14649   newReplicaPtr.p->noCrashedReplicas = 0;
14650   newReplicaPtr.p->initialGci = (Uint32)(m_micro_gcp.m_current_gci >> 32);
14651   for (i = 0; i < MAX_CRASHED_REPLICAS; i++) {
14652     newReplicaPtr.p->replicaLastGci[i] = ZINIT_REPLICA_LAST_GCI;
14653     newReplicaPtr.p->createGci[i] = ZINIT_CREATE_GCI;
14654   }//for
14655   newReplicaPtr.p->createGci[0] = (Uint32)(m_micro_gcp.m_current_gci >> 32);
14656   newReplicaPtr.p->nextLcp = 0;
14657   newReplicaPtr.p->procNode = nodeId;
14658   newReplicaPtr.p->lcpOngoingFlag = false;
14659   newReplicaPtr.p->lcpIdStarted = 0;
14660 
14661   arrPrevReplicaPtr.i = RNIL;
14662   arrReplicaPtr.i = fragPtr.p->storedReplicas;
14663   while (arrReplicaPtr.i != RNIL) {
14664     jam();
14665     ptrCheckGuard(arrReplicaPtr, creplicaFileSize, replicaRecord);
14666     arrPrevReplicaPtr = arrReplicaPtr;
14667     arrReplicaPtr.i = arrReplicaPtr.p->nextReplica;
14668   }//while
14669   if (arrPrevReplicaPtr.i == RNIL) {
14670     jam();
14671     fragPtr.p->storedReplicas = newReplicaPtr.i;
14672   } else {
14673     jam();
14674     arrPrevReplicaPtr.p->nextReplica = newReplicaPtr.i;
14675   }//if
14676   fragPtr.p->noStoredReplicas++;
14677 }//Dbdih::allocStoredReplica()
14678 
14679 /*************************************************************************/
14680 /* CHECK IF THE NODE CRASH IS TO ESCALATE INTO A SYSTEM CRASH. WE COULD  */
14681 /* DO THIS BECAUSE ALL REPLICAS OF SOME FRAGMENT ARE LOST. WE COULD ALSO */
14682 /* DO IT AFTER MANY NODE FAILURES THAT MAKE IT VERY DIFFICULT TO RESTORE */
14683 /* DATABASE AFTER A SYSTEM CRASH. IT MIGHT EVEN BE IMPOSSIBLE AND THIS   */
14684 /* MUST BE AVOIDED EVEN MORE THAN AVOIDING SYSTEM CRASHES.               */
14685 /*************************************************************************/
checkEscalation()14686 void Dbdih::checkEscalation()
14687 {
14688   Uint32 TnodeGroup[MAX_NDB_NODES];
14689   NodeRecordPtr nodePtr;
14690   Uint32 i;
14691   for (i = 0; i < cnoOfNodeGroups; i++) {
14692     TnodeGroup[i] = ZFALSE;
14693   }//for
14694   for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
14695     jam();
14696     ptrAss(nodePtr, nodeRecord);
14697     if (nodePtr.p->nodeStatus == NodeRecord::ALIVE &&
14698 	nodePtr.p->activeStatus == Sysfile::NS_Active){
14699       ndbrequire(nodePtr.p->nodeGroup < MAX_NDB_NODES);
14700       TnodeGroup[nodePtr.p->nodeGroup] = ZTRUE;
14701     }
14702   }
14703   for (i = 0; i < cnoOfNodeGroups; i++) {
14704     jam();
14705     if (TnodeGroup[c_node_groups[i]] == ZFALSE) {
14706       jam();
14707       progError(__LINE__, NDBD_EXIT_LOST_NODE_GROUP, "Lost node group");
14708     }//if
14709   }//for
14710 }//Dbdih::checkEscalation()
14711 
14712 /*************************************************************************/
14713 /*                                                                       */
14714 /*       MODULE: CHECK_KEEP_GCI                                          */
14715 /*       DESCRIPTION: CHECK FOR MINIMUM GCI RESTORABLE WITH NEW LOCAL    */
14716 /*                    CHECKPOINT.                                        */
14717 /*************************************************************************/
checkKeepGci(TabRecordPtr tabPtr,Uint32 fragId,Fragmentstore *,Uint32 replicaStartIndex)14718 void Dbdih::checkKeepGci(TabRecordPtr tabPtr, Uint32 fragId, Fragmentstore*,
14719 			 Uint32 replicaStartIndex)
14720 {
14721   ReplicaRecordPtr ckgReplicaPtr;
14722   ckgReplicaPtr.i = replicaStartIndex;
14723   while (ckgReplicaPtr.i != RNIL) {
14724     jam();
14725     ptrCheckGuard(ckgReplicaPtr, creplicaFileSize, replicaRecord);
14726     if (c_lcpState.m_participatingLQH.get(ckgReplicaPtr.p->procNode))
14727     {
14728       Uint32 keepGci;
14729       Uint32 oldestRestorableGci;
14730       findMinGci(ckgReplicaPtr, keepGci, oldestRestorableGci);
14731       if (keepGci < c_lcpState.keepGci) {
14732         jam();
14733         /* ----------------------------------------------------------------- */
14734         /* WE MUST KEEP LOG RECORDS SO THAT WE CAN USE ALL LOCAL CHECKPOINTS */
14735         /* THAT ARE AVAILABLE. THUS WE NEED TO CALCULATE THE MINIMUM OVER ALL*/
14736         /* FRAGMENTS.                                                        */
14737         /* ----------------------------------------------------------------- */
14738         c_lcpState.keepGci = keepGci;
14739       }//if
14740       if (oldestRestorableGci > c_lcpState.oldestRestorableGci) {
14741         jam();
14742         c_lcpState.oldestRestorableGci = oldestRestorableGci;
14743       }//if
14744     }
14745     ckgReplicaPtr.i = ckgReplicaPtr.p->nextReplica;
14746   }//while
14747 }//Dbdih::checkKeepGci()
14748 
closeFile(Signal * signal,FileRecordPtr filePtr)14749 void Dbdih::closeFile(Signal* signal, FileRecordPtr filePtr)
14750 {
14751   signal->theData[0] = filePtr.p->fileRef;
14752   signal->theData[1] = reference();
14753   signal->theData[2] = filePtr.i;
14754   signal->theData[3] = ZCLOSE_NO_DELETE;
14755   sendSignal(NDBFS_REF, GSN_FSCLOSEREQ, signal, 4, JBA);
14756 }//Dbdih::closeFile()
14757 
closeFileDelete(Signal * signal,FileRecordPtr filePtr)14758 void Dbdih::closeFileDelete(Signal* signal, FileRecordPtr filePtr)
14759 {
14760   signal->theData[0] = filePtr.p->fileRef;
14761   signal->theData[1] = reference();
14762   signal->theData[2] = filePtr.i;
14763   signal->theData[3] = ZCLOSE_DELETE;
14764   sendSignal(NDBFS_REF, GSN_FSCLOSEREQ, signal, 4, JBA);
14765 }//Dbdih::closeFileDelete()
14766 
createFileRw(Signal * signal,FileRecordPtr filePtr)14767 void Dbdih::createFileRw(Signal* signal, FileRecordPtr filePtr)
14768 {
14769   signal->theData[0] = reference();
14770   signal->theData[1] = filePtr.i;
14771   signal->theData[2] = filePtr.p->fileName[0];
14772   signal->theData[3] = filePtr.p->fileName[1];
14773   signal->theData[4] = filePtr.p->fileName[2];
14774   signal->theData[5] = filePtr.p->fileName[3];
14775   signal->theData[6] = ZCREATE_READ_WRITE;
14776   sendSignal(NDBFS_REF, GSN_FSOPENREQ, signal, 7, JBA);
14777 }//Dbdih::createFileRw()
14778 
14779 void
emptyverificbuffer(Signal * signal,Uint32 q,bool aContinueB)14780 Dbdih::emptyverificbuffer(Signal* signal, Uint32 q, bool aContinueB)
14781 {
14782   if(unlikely(getBlockCommit() == true))
14783   {
14784     jam();
14785     return;
14786   }
14787 
14788   if (!isEmpty(c_diverify_queue[q]))
14789   {
14790     jam();
14791 
14792     ApiConnectRecord localApiConnect;
14793     dequeue(c_diverify_queue[q], localApiConnect);
14794     ndbrequire(localApiConnect.apiGci <= m_micro_gcp.m_current_gci);
14795     signal->theData[0] = localApiConnect.senderData;
14796     signal->theData[1] = (Uint32)(m_micro_gcp.m_current_gci >> 32);
14797     signal->theData[2] = (Uint32)(m_micro_gcp.m_current_gci & 0xFFFFFFFF);
14798     signal->theData[3] = 0;
14799     sendSignal(c_diverify_queue[q].m_ref, GSN_DIVERIFYCONF, signal, 4, JBB);
14800   }
14801   else if (aContinueB == true)
14802   {
14803     jam();
14804     /**
14805      * Make sure that we don't miss any pending transactions
14806      *   (transactions that are added to list by other thread
14807      *    while we execute this code)
14808      */
14809     Uint32 blocks[] = { DBTC, 0 };
14810     Callback c = { safe_cast(&Dbdih::emptyverificbuffer_check), q };
14811     synchronize_threads_for_blocks(signal, blocks, c);
14812     return;
14813   }
14814 
14815   if (aContinueB == true)
14816   {
14817     jam();
14818     //-----------------------------------------------------------------------
14819     // This emptying happened as part of a take-out process by continueb signals
14820     // This ensures that we will empty the queue eventually. We will also empty
14821     // one item every time we insert one item to ensure that the list doesn't
14822     // grow when it is not blocked.
14823     //-----------------------------------------------------------------------
14824     signal->theData[0] = DihContinueB::ZEMPTY_VERIFY_QUEUE;
14825     signal->theData[1] = q;
14826     sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
14827   }//if
14828 
14829   return;
14830 }//Dbdih::emptyverificbuffer()
14831 
14832 void
emptyverificbuffer_check(Signal * signal,Uint32 q,Uint32 retVal)14833 Dbdih::emptyverificbuffer_check(Signal* signal, Uint32 q, Uint32 retVal)
14834 {
14835   ndbrequire(retVal == 0);
14836   if (!isEmpty(c_diverify_queue[q]))
14837   {
14838     jam();
14839     signal->theData[0] = DihContinueB::ZEMPTY_VERIFY_QUEUE;
14840     signal->theData[1] = q;
14841     sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
14842   }
14843   else
14844   {
14845     /**
14846      * Done with emptyverificbuffer
14847      */
14848     c_diverify_queue[q].m_empty_done = 1;
14849   }
14850 }
14851 
14852 /*************************************************************************/
14853 /*       FIND THE NODES FROM WHICH WE CAN EXECUTE THE LOG TO RESTORE THE */
14854 /*       DATA NODE IN A SYSTEM RESTART.                                  */
14855 /*************************************************************************/
findLogNodes(CreateReplicaRecord * createReplica,FragmentstorePtr fragPtr,Uint32 startGci,Uint32 stopGci)14856 bool Dbdih::findLogNodes(CreateReplicaRecord* createReplica,
14857                          FragmentstorePtr fragPtr,
14858                          Uint32 startGci,
14859                          Uint32 stopGci)
14860 {
14861   ConstPtr<ReplicaRecord> flnReplicaPtr;
14862   flnReplicaPtr.i = createReplica->replicaRec;
14863   ptrCheckGuard(flnReplicaPtr, creplicaFileSize, replicaRecord);
14864   /* --------------------------------------------------------------------- */
14865   /*       WE START BY CHECKING IF THE DATA NODE CAN HANDLE THE LOG ALL BY */
14866   /*       ITSELF. THIS IS THE DESIRED BEHAVIOUR. IF THIS IS NOT POSSIBLE  */
14867   /*       THEN WE SEARCH FOR THE BEST POSSIBLE NODES AMONG THE NODES THAT */
14868   /*       ARE PART OF THIS SYSTEM RESTART.                                */
14869   /*       THIS CAN ONLY BE HANDLED BY THE LAST CRASHED REPLICA.           */
14870   /*       The condition is that the replica was created before or at the  */
14871   /*       time of the starting gci, in addition it must have been alive   */
14872   /*       at the time of the stopping gci. This is checked by two         */
14873   /*       conditions, the first checks replicaLastGci and the second      */
14874   /*       checks that it is also smaller than the last gci the node was   */
14875   /*       involved in. This is necessary to check since createGci is set  */
14876   /*       Last + 1 and sometimes startGci = stopGci + 1 and in that case  */
14877   /*       it could happen that replicaLastGci is set to -1 with CreateGci */
14878   /*       set to LastGci + 1.                                             */
14879   /* --------------------------------------------------------------------- */
14880   arrGuard(flnReplicaPtr.p->noCrashedReplicas, MAX_CRASHED_REPLICAS);
14881   const Uint32 noCrashed = flnReplicaPtr.p->noCrashedReplicas;
14882 
14883   if (!(ERROR_INSERTED(7073) || ERROR_INSERTED(7074))&&
14884       (startGci >= flnReplicaPtr.p->createGci[noCrashed]) &&
14885       (stopGci <= flnReplicaPtr.p->replicaLastGci[noCrashed]) &&
14886       (stopGci <= SYSFILE->lastCompletedGCI[flnReplicaPtr.p->procNode])) {
14887     jam();
14888     /* --------------------------------------------------------------------- */
14889     /*       WE FOUND ALL THE LOG RECORDS NEEDED IN THE DATA NODE. WE WILL   */
14890     /*       USE THOSE.                                                      */
14891     /* --------------------------------------------------------------------- */
14892     createReplica->noLogNodes = 1;
14893     createReplica->logStartGci[0] = startGci;
14894     createReplica->logStopGci[0] = stopGci;
14895     createReplica->logNodeId[0] = flnReplicaPtr.p->procNode;
14896     return true;
14897   }//if
14898   Uint32 logNode = 0;
14899   do {
14900     Uint32 fblStopGci;
14901     jam();
14902     if(!findBestLogNode(createReplica,
14903 			fragPtr,
14904 			startGci,
14905 			stopGci,
14906 			logNode,
14907 			fblStopGci)){
14908       jam();
14909       return false;
14910     }
14911 
14912     logNode++;
14913     if (fblStopGci >= stopGci) {
14914       jam();
14915       createReplica->noLogNodes = logNode;
14916       return true;
14917     }//if
14918     startGci = fblStopGci + 1;
14919     if (logNode >= MAX_LOG_EXEC)
14920     {
14921       jam();
14922       break;
14923     }//if
14924   } while (1);
14925   /* --------------------------------------------------------------------- */
14926   /*       IT WAS NOT POSSIBLE TO RESTORE THE REPLICA. THIS CAN EITHER BE  */
14927   /*       BECAUSE OF LACKING NODES OR BECAUSE OF A REALLY SERIOUS PROBLEM.*/
14928   /* --------------------------------------------------------------------- */
14929   return false;
14930 }//Dbdih::findLogNodes()
14931 
14932 /*************************************************************************/
14933 /*       FIND THE BEST POSSIBLE LOG NODE TO EXECUTE THE LOG AS SPECIFIED */
14934 /*       BY THE INPUT PARAMETERS. WE SCAN THROUGH ALL ALIVE REPLICAS.    */
14935 /*       THIS MEANS STORED, OLD_STORED                                   */
14936 /*************************************************************************/
14937 bool
findBestLogNode(CreateReplicaRecord * createReplica,FragmentstorePtr fragPtr,Uint32 startGci,Uint32 stopGci,Uint32 logNode,Uint32 & fblStopGci)14938 Dbdih::findBestLogNode(CreateReplicaRecord* createReplica,
14939 		       FragmentstorePtr fragPtr,
14940 		       Uint32 startGci,
14941 		       Uint32 stopGci,
14942 		       Uint32 logNode,
14943 		       Uint32& fblStopGci)
14944 {
14945   ConstPtr<ReplicaRecord> fblFoundReplicaPtr;
14946   ConstPtr<ReplicaRecord> fblReplicaPtr;
14947   LINT_INIT(fblFoundReplicaPtr.p);
14948 
14949   /* --------------------------------------------------------------------- */
14950   /*       WE START WITH ZERO AS FOUND TO ENSURE THAT FIRST HIT WILL BE    */
14951   /*       BETTER.                                                         */
14952   /* --------------------------------------------------------------------- */
14953   fblStopGci = 0;
14954   fblReplicaPtr.i = fragPtr.p->storedReplicas;
14955   while (fblReplicaPtr.i != RNIL) {
14956     jam();
14957     ptrCheckGuard(fblReplicaPtr, creplicaFileSize, replicaRecord);
14958     if (m_sr_nodes.get(fblReplicaPtr.p->procNode))
14959     {
14960       jam();
14961       Uint32 fliStopGci = findLogInterval(fblReplicaPtr, startGci);
14962       if (fliStopGci > fblStopGci)
14963       {
14964         jam();
14965         fblStopGci = fliStopGci;
14966         fblFoundReplicaPtr = fblReplicaPtr;
14967       }//if
14968     }//if
14969     fblReplicaPtr.i = fblReplicaPtr.p->nextReplica;
14970   }//while
14971   fblReplicaPtr.i = fragPtr.p->oldStoredReplicas;
14972   while (fblReplicaPtr.i != RNIL) {
14973     jam();
14974     ptrCheckGuard(fblReplicaPtr, creplicaFileSize, replicaRecord);
14975     if (m_sr_nodes.get(fblReplicaPtr.p->procNode))
14976     {
14977       jam();
14978       Uint32 fliStopGci = findLogInterval(fblReplicaPtr, startGci);
14979       if (fliStopGci > fblStopGci)
14980       {
14981         jam();
14982         fblStopGci = fliStopGci;
14983         fblFoundReplicaPtr = fblReplicaPtr;
14984       }//if
14985     }//if
14986     fblReplicaPtr.i = fblReplicaPtr.p->nextReplica;
14987   }//while
14988   if (fblStopGci != 0) {
14989     jam();
14990     ndbrequire(logNode < MAX_LOG_EXEC);
14991     createReplica->logNodeId[logNode] = fblFoundReplicaPtr.p->procNode;
14992     createReplica->logStartGci[logNode] = startGci;
14993     if (fblStopGci >= stopGci) {
14994       jam();
14995       createReplica->logStopGci[logNode] = stopGci;
14996     } else {
14997       jam();
14998       createReplica->logStopGci[logNode] = fblStopGci;
14999     }//if
15000   }//if
15001 
15002   return fblStopGci != 0;
15003 }//Dbdih::findBestLogNode()
15004 
findLogInterval(ConstPtr<ReplicaRecord> replicaPtr,Uint32 startGci)15005 Uint32 Dbdih::findLogInterval(ConstPtr<ReplicaRecord> replicaPtr,
15006 			      Uint32 startGci)
15007 {
15008   ndbrequire(replicaPtr.p->noCrashedReplicas <= MAX_CRASHED_REPLICAS);
15009   Uint32 loopLimit = replicaPtr.p->noCrashedReplicas + 1;
15010   for (Uint32 i = 0; i < loopLimit; i++) {
15011     jam();
15012     if (replicaPtr.p->createGci[i] <= startGci) {
15013       if (replicaPtr.p->replicaLastGci[i] >= startGci) {
15014         jam();
15015         return replicaPtr.p->replicaLastGci[i];
15016       }//if
15017     }//if
15018   }//for
15019   return 0;
15020 }//Dbdih::findLogInterval()
15021 
15022 /*************************************************************************/
15023 /*                                                                       */
15024 /*       MODULE: FIND THE MINIMUM GCI THAT THIS NODE HAS LOG RECORDS FOR.*/
15025 /*************************************************************************/
findMinGci(ReplicaRecordPtr fmgReplicaPtr,Uint32 & keepGci,Uint32 & oldestRestorableGci)15026 void Dbdih::findMinGci(ReplicaRecordPtr fmgReplicaPtr,
15027                        Uint32& keepGci,
15028                        Uint32& oldestRestorableGci)
15029 {
15030   keepGci = (Uint32)-1;
15031   oldestRestorableGci = 0;
15032 
15033   Uint32 maxLcpId = 0;              // LcpId of latest valid LCP
15034   Uint32 maxLcpNo = MAX_LCP_STORED; // Index of latest valid LCP
15035   for (Uint32 i = 0; i < MAX_LCP_STORED; i++)
15036   {
15037     jam();
15038     if (fmgReplicaPtr.p->lcpStatus[i] == ZVALID)
15039     {
15040       if ((fmgReplicaPtr.p->lcpId[i] + MAX_LCP_STORED) <= (SYSFILE->latestLCP_ID + 1))
15041       {
15042         jam();
15043         /*-----------------------------------------------------------------*/
15044         // We invalidate the checkpoint we are preparing to overwrite.
15045         // The LCP id is still the old lcp id,
15046         // this is the reason of comparing with lcpId + 1.
15047         /*-----------------------------------------------------------------*/
15048         fmgReplicaPtr.p->lcpStatus[i] = ZINVALID;
15049       }
15050       else if (fmgReplicaPtr.p->lcpId[i] > maxLcpId)
15051       {
15052         jam();
15053         maxLcpId = fmgReplicaPtr.p->lcpId[i];
15054         maxLcpNo = i;
15055       }
15056     }
15057   }
15058 
15059   if (maxLcpNo < MAX_LCP_STORED)
15060   {
15061     /**
15062      * Only consider latest LCP (wrt to how to cut REDO)
15063      */
15064     jam();
15065     keepGci = fmgReplicaPtr.p->maxGciCompleted[maxLcpNo];
15066     oldestRestorableGci = fmgReplicaPtr.p->maxGciStarted[maxLcpNo];
15067   }
15068 
15069   if (oldestRestorableGci == 0 && keepGci == Uint32(-1))
15070   {
15071     jam();
15072     if (fmgReplicaPtr.p->createGci[0] == fmgReplicaPtr.p->initialGci)
15073     {
15074       keepGci = fmgReplicaPtr.p->createGci[0];
15075       // XXX Jonas
15076       //oldestRestorableGci = fmgReplicaPtr.p->createGci[0];
15077     }
15078   }
15079   else
15080   {
15081     ndbassert(oldestRestorableGci <= c_newest_restorable_gci);
15082   }
15083   return;
15084 }//Dbdih::findMinGci()
15085 
findStartGci(ConstPtr<ReplicaRecord> replicaPtr,Uint32 stopGci,Uint32 & startGci,Uint32 & lcpNo)15086 bool Dbdih::findStartGci(ConstPtr<ReplicaRecord> replicaPtr,
15087                          Uint32 stopGci,
15088                          Uint32& startGci,
15089                          Uint32& lcpNo)
15090 {
15091   Uint32 cnt = 0;
15092   Uint32 tmp[MAX_LCP_STORED];
15093   for (Uint32 i = 0; i<MAX_LCP_STORED; i++)
15094   {
15095     jam();
15096     if (replicaPtr.p->lcpStatus[i] == ZVALID &&
15097         replicaPtr.p->maxGciStarted[i] <= stopGci)
15098     {
15099       /**
15100        * In order to use LCP
15101        *   we must be able to run REDO atleast up until maxGciStarted
15102        *   which is that highest GCI that
15103        */
15104       jam();
15105       tmp[cnt] = i;
15106       cnt++;
15107     }
15108   }
15109 
15110   if (cnt)
15111   {
15112     jam();
15113     /**
15114      * We found atleast one...get the highest
15115      */
15116     lcpNo = tmp[0];
15117     Uint32 lcpId = replicaPtr.p->lcpId[lcpNo];
15118     for (Uint32 i = 1; i<cnt; i++)
15119     {
15120       jam();
15121       if (replicaPtr.p->lcpId[tmp[i]] > lcpId)
15122       {
15123         jam();
15124         lcpNo = tmp[i];
15125         lcpId = replicaPtr.p->lcpId[lcpNo];
15126       }
15127     }
15128     startGci = replicaPtr.p->maxGciCompleted[lcpNo] + 1;
15129     return true;
15130   }
15131 
15132   /* --------------------------------------------------------------------- */
15133   /*       NO VALID LOCAL CHECKPOINT WAS AVAILABLE. WE WILL ADD THE        */
15134   /*       FRAGMENT. THUS THE NEXT LCP MUST BE SET TO ZERO.                */
15135   /*       WE MUST EXECUTE THE LOG FROM THE INITIAL GLOBAL CHECKPOINT WHEN */
15136   /*       THE TABLE WAS CREATED.                                          */
15137   /* --------------------------------------------------------------------- */
15138   startGci = replicaPtr.p->initialGci;
15139   ndbrequire(replicaPtr.p->nextLcp == 0);
15140   return false;
15141 }//Dbdih::findStartGci()
15142 
15143 static
15144 Uint32
count_db_nodes(ndb_mgm_configuration_iterator * iter)15145 count_db_nodes(ndb_mgm_configuration_iterator * iter)
15146 {
15147   Uint32 cnt = 0;
15148   for (ndb_mgm_first(iter); ndb_mgm_valid(iter); ndb_mgm_next(iter))
15149   {
15150     Uint32 nodeId = 0;
15151     Uint32 type = ~Uint32(0);
15152     if (ndb_mgm_get_int_parameter(iter, CFG_NODE_ID, &nodeId) == 0 &&
15153         ndb_mgm_get_int_parameter(iter,CFG_TYPE_OF_SECTION, &type) == 0 &&
15154         type == NodeInfo::DB)
15155     {
15156       cnt++;
15157     }
15158   }
15159   return cnt;
15160 }
15161 
15162 /**
15163  * Compute max time it can take to "resolve" cascading node-failures
15164  *   given hb-interval, arbit timeout and #db-nodes
15165  */
15166 static
15167 Uint32
compute_max_failure_time(const ndb_mgm_configuration_iterator * p,ndb_mgm_configuration_iterator * cluster)15168 compute_max_failure_time(const ndb_mgm_configuration_iterator * p,
15169                          ndb_mgm_configuration_iterator * cluster)
15170 {
15171   Uint32 dbnodes = count_db_nodes(cluster);
15172 
15173   Uint32 hbDBDB = 1500;
15174   Uint32 arbitTimeout = 1000;
15175   ndb_mgm_get_int_parameter(p, CFG_DB_HEARTBEAT_INTERVAL, &hbDBDB);
15176   ndb_mgm_get_int_parameter(p, CFG_DB_ARBIT_TIMEOUT, &arbitTimeout);
15177 
15178   /*
15179    * Max time for 1 node failure is
15180    */
15181   Uint32 max_time_one_failure = arbitTimeout + 4 * hbDBDB;
15182 
15183   /**
15184    * And worst case...this can be cascading failure with all but self
15185    */
15186   Uint32 max_time_total_failure = (dbnodes - 1) * max_time_one_failure;
15187 
15188   return max_time_total_failure;
15189 }
15190 
initCommonData()15191 void Dbdih::initCommonData()
15192 {
15193   c_blockCommit = false;
15194   c_blockCommitNo = 0;
15195   cfailurenr = 1;
15196   cfirstAliveNode = RNIL;
15197   cfirstDeadNode = RNIL;
15198   cgckptflag = false;
15199   cgcpOrderBlocked = 0;
15200 
15201   c_lcpMasterTakeOverState.set(LMTOS_IDLE, __LINE__);
15202 
15203   c_lcpState.clcpDelay = 0;
15204   c_lcpState.lcpStart = ZIDLE;
15205   c_lcpState.lcpStopGcp = 0;
15206   c_lcpState.setLcpStatus(LCP_STATUS_IDLE, __LINE__);
15207   c_lcpState.currentFragment.tableId = 0;
15208   c_lcpState.currentFragment.fragmentId = 0;
15209   c_lcpState.noOfLcpFragRepOutstanding = 0;
15210   c_lcpState.keepGci = 0;
15211   c_lcpState.oldestRestorableGci = 0;
15212   c_lcpState.ctcCounter = 0;
15213   c_lcpState.ctimer = 0;
15214   c_lcpState.immediateLcpStart = false;
15215   c_lcpState.m_MASTER_LCPREQ_Received = false;
15216   c_lcpState.m_lastLCP_COMPLETE_REP_ref = 0;
15217   cmasterdihref = 0;
15218   cmasterNodeId = 0;
15219   cmasterState = MASTER_IDLE;
15220   cmasterTakeOverNode = 0;
15221   cnoOfActiveTables = 0;
15222   cnoOfNodeGroups = 0;
15223   c_nextNodeGroup = 0;
15224   cnoReplicas = 0;
15225   con_lineNodes = 0;
15226   creceivedfrag = 0;
15227   crestartGci = 0;
15228   crestartInfoFile[0] = RNIL;
15229   crestartInfoFile[1] = RNIL;
15230   cstartPhase = 0;
15231   cstarttype = (Uint32)-1;
15232   csystemnodes = 0;
15233   c_newest_restorable_gci = 0;
15234   cwaitLcpSr = false;
15235   c_nodeStartMaster.blockGcp = 0;
15236 
15237   nodeResetStart(0);
15238   c_nodeStartMaster.wait = ZFALSE;
15239 
15240   memset(&sysfileData[0], 0, sizeof(sysfileData));
15241 
15242   const ndb_mgm_configuration_iterator * p =
15243     m_ctx.m_config.getOwnConfigIterator();
15244   ndbrequire(p != 0);
15245 
15246   c_lcpState.clcpDelay = 20;
15247   ndb_mgm_get_int_parameter(p, CFG_DB_LCP_INTERVAL, &c_lcpState.clcpDelay);
15248   c_lcpState.clcpDelay = c_lcpState.clcpDelay > 31 ? 31 : c_lcpState.clcpDelay;
15249 
15250   //ndb_mgm_get_int_parameter(p, CFG_DB_MIN_HOT_SPARES, &cminHotSpareNodes);
15251 
15252   cnoReplicas = 1;
15253   ndb_mgm_get_int_parameter(p, CFG_DB_NO_REPLICAS, &cnoReplicas);
15254   if (cnoReplicas > MAX_REPLICAS)
15255   {
15256     progError(__LINE__, NDBD_EXIT_INVALID_CONFIG,
15257 	      "Only up to four replicas are supported. Check NoOfReplicas.");
15258   }
15259 
15260   Uint32 max_failure_time = compute_max_failure_time
15261     (p, m_ctx.m_config.getClusterConfigIterator());
15262 
15263   bzero(&m_gcp_save, sizeof(m_gcp_save));
15264   bzero(&m_micro_gcp, sizeof(m_micro_gcp));
15265   {
15266     { // Set time-between global checkpoint
15267       Uint32 tmp = 2000;
15268       ndb_mgm_get_int_parameter(p, CFG_DB_GCP_INTERVAL, &tmp);
15269       tmp = tmp > 60000 ? 60000 : (tmp < 10 ? 10 : tmp);
15270       m_gcp_save.m_master.m_time_between_gcp = tmp;
15271     }
15272 
15273     Uint32 tmp = 0;
15274     if (ndb_mgm_get_int_parameter(p, CFG_DB_MICRO_GCP_INTERVAL, &tmp) == 0 &&
15275         tmp)
15276     {
15277       /**
15278        * Set time-between epochs
15279        */
15280       if (tmp > m_gcp_save.m_master.m_time_between_gcp)
15281         tmp = m_gcp_save.m_master.m_time_between_gcp;
15282       if (tmp < 10)
15283         tmp = 10;
15284       m_micro_gcp.m_master.m_time_between_gcp = tmp;
15285     }
15286 
15287     { // Set time-between global checkpoint timeout
15288       Uint32 tmp = 120000;     // No config, hard code 2 minutes
15289       tmp += max_failure_time; //
15290       m_gcp_monitor.m_gcp_save.m_max_lag =
15291         (m_gcp_save.m_master.m_time_between_gcp + tmp) / 100;
15292     }
15293 
15294     { // Set time-between epochs timeout
15295       Uint32 tmp = 4000;
15296       ndb_mgm_get_int_parameter(p, CFG_DB_MICRO_GCP_TIMEOUT, &tmp);
15297       if (tmp != 0)
15298       {
15299         jam();
15300         tmp += max_failure_time;
15301         m_gcp_monitor.m_micro_gcp.m_max_lag =
15302           (m_micro_gcp.m_master.m_time_between_gcp + tmp) / 100;
15303       }
15304       else
15305       {
15306         jam();
15307         m_gcp_monitor.m_gcp_save.m_max_lag = 0;
15308         m_gcp_monitor.m_micro_gcp.m_max_lag = 0;
15309       }
15310     }
15311   }
15312 }//Dbdih::initCommonData()
15313 
initFragstore(FragmentstorePtr fragPtr)15314 void Dbdih::initFragstore(FragmentstorePtr fragPtr)
15315 {
15316   fragPtr.p->storedReplicas = RNIL;
15317   fragPtr.p->oldStoredReplicas = RNIL;
15318 
15319   fragPtr.p->noStoredReplicas = 0;
15320   fragPtr.p->noOldStoredReplicas = 0;
15321   fragPtr.p->fragReplicas = 0;
15322   fragPtr.p->preferredPrimary = 0;
15323 
15324   for (Uint32 i = 0; i < MAX_REPLICAS; i++)
15325     fragPtr.p->activeNodes[i] = 0;
15326 
15327   fragPtr.p->noLcpReplicas = 0;
15328   fragPtr.p->distributionKey = 0;
15329 }//Dbdih::initFragstore()
15330 
15331 /*************************************************************************/
15332 /*                                                                       */
15333 /*       MODULE: INIT_RESTART_INFO                                       */
15334 /*       DESCRIPTION: INITIATE RESTART INFO VARIABLE AND VARIABLES FOR   */
15335 /*                    GLOBAL CHECKPOINTS.                                */
15336 /*************************************************************************/
initRestartInfo(Signal * signal)15337 void Dbdih::initRestartInfo(Signal* signal)
15338 {
15339   Uint32 i;
15340   for (i = 0; i < MAX_NDB_NODES; i++) {
15341     SYSFILE->lastCompletedGCI[i] = 0;
15342   }//for
15343   NodeRecordPtr nodePtr;
15344   nodePtr.i = cfirstAliveNode;
15345   do {
15346     jam();
15347     ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
15348     SYSFILE->lastCompletedGCI[nodePtr.i] = 1;
15349     /* FIRST GCP = 1 ALREADY SET BY LQH */
15350     nodePtr.i = nodePtr.p->nextNode;
15351   } while (nodePtr.i != RNIL);
15352 
15353   Uint32 startGci = 1;
15354 #ifndef DBUG_OFF
15355   {
15356     char envBuf[256];
15357     const char* v = NdbEnv_GetEnv("NDB_START_GCI",
15358                                   envBuf,
15359                                   256);
15360     if (v && *v != 0)
15361     {
15362       startGci = strtoull(v, NULL, 0);
15363 
15364       ndbout_c("DbDih : Using value of %u from NDB_START_GCI",
15365                startGci);
15366     }
15367   }
15368 #endif
15369 
15370   m_micro_gcp.m_old_gci = Uint64(startGci) << 32;
15371   m_micro_gcp.m_current_gci = Uint64(startGci + 1) << 32;
15372   crestartGci = startGci;
15373   c_newest_restorable_gci = startGci;
15374 
15375   SYSFILE->keepGCI             = startGci;
15376   SYSFILE->oldestRestorableGCI = startGci;
15377   SYSFILE->newestRestorableGCI = startGci;
15378   SYSFILE->systemRestartBits   = 0;
15379   for (i = 0; i < NdbNodeBitmask::Size; i++) {
15380     SYSFILE->lcpActive[0]        = 0;
15381   }//for
15382   for (i = 0; i < Sysfile::TAKE_OVER_SIZE; i++) {
15383     SYSFILE->takeOver[i] = 0;
15384   }//for
15385   Sysfile::setInitialStartOngoing(SYSFILE->systemRestartBits);
15386   srand((unsigned int)time(0));
15387   globalData.m_restart_seq = SYSFILE->m_restart_seq = 0;
15388 
15389   if (m_micro_gcp.m_enabled == false &&
15390       m_micro_gcp.m_master.m_time_between_gcp)
15391   {
15392     /**
15393      * Micro GCP is disabled...but configured...
15394      */
15395     jam();
15396     m_micro_gcp.m_enabled = true;
15397     UpgradeProtocolOrd * ord = (UpgradeProtocolOrd*)signal->getDataPtrSend();
15398     ord->type = UpgradeProtocolOrd::UPO_ENABLE_MICRO_GCP;
15399     EXECUTE_DIRECT(QMGR,GSN_UPGRADE_PROTOCOL_ORD,signal,signal->getLength());
15400   }
15401 }//Dbdih::initRestartInfo()
15402 
15403 /*--------------------------------------------------------------------*/
15404 /*       NODE GROUP BITS ARE INITIALISED BEFORE THIS.                 */
15405 /*       NODE ACTIVE BITS ARE INITIALISED BEFORE THIS.                */
15406 /*--------------------------------------------------------------------*/
15407 /*************************************************************************/
15408 /*                                                                       */
15409 /*       MODULE: INIT_RESTORABLE_GCI_FILES                               */
15410 /*       DESCRIPTION: THE SUBROUTINE SETS UP THE FILES THAT REFERS TO THE*/
15411 /*       FILES THAT KEEP THE VARIABLE CRESTART_INFO                      */
15412 /*************************************************************************/
initRestorableGciFiles()15413 void Dbdih::initRestorableGciFiles()
15414 {
15415   Uint32 tirgTmp;
15416   FileRecordPtr filePtr;
15417   seizeFile(filePtr);
15418   filePtr.p->tabRef = RNIL;
15419   filePtr.p->fileType = FileRecord::GCP_FILE;
15420   filePtr.p->reqStatus = FileRecord::IDLE;
15421   filePtr.p->fileStatus = FileRecord::CLOSED;
15422   crestartInfoFile[0] = filePtr.i;
15423   filePtr.p->fileName[0] = (Uint32)-1;  /* T DIRECTORY NOT USED  */
15424   filePtr.p->fileName[1] = (Uint32)-1;  /* F DIRECTORY NOT USED  */
15425   filePtr.p->fileName[2] = (Uint32)-1;  /* S PART IGNORED        */
15426   tirgTmp = 1;  /* FILE NAME VERSION 1   */
15427   tirgTmp = (tirgTmp << 8) + 6; /* .SYSFILE              */
15428   tirgTmp = (tirgTmp << 8) + 1; /* D1 DIRECTORY          */
15429   tirgTmp = (tirgTmp << 8) + 0; /* P0 FILE NAME          */
15430   filePtr.p->fileName[3] = tirgTmp;
15431   /* --------------------------------------------------------------------- */
15432   /*       THE NAME BECOMES /D1/DBDICT/S0.SYSFILE                          */
15433   /* --------------------------------------------------------------------- */
15434   seizeFile(filePtr);
15435   filePtr.p->tabRef = RNIL;
15436   filePtr.p->fileType = FileRecord::GCP_FILE;
15437   filePtr.p->reqStatus = FileRecord::IDLE;
15438   filePtr.p->fileStatus = FileRecord::CLOSED;
15439   crestartInfoFile[1] = filePtr.i;
15440   filePtr.p->fileName[0] = (Uint32)-1;  /* T DIRECTORY NOT USED  */
15441   filePtr.p->fileName[1] = (Uint32)-1;  /* F DIRECTORY NOT USED  */
15442   filePtr.p->fileName[2] = (Uint32)-1;  /* S PART IGNORED        */
15443   tirgTmp = 1;  /* FILE NAME VERSION 1   */
15444   tirgTmp = (tirgTmp << 8) + 6; /* .SYSFILE              */
15445   tirgTmp = (tirgTmp << 8) + 2; /* D1 DIRECTORY          */
15446   tirgTmp = (tirgTmp << 8) + 0; /* P0 FILE NAME          */
15447   filePtr.p->fileName[3] = tirgTmp;
15448   /* --------------------------------------------------------------------- */
15449   /*       THE NAME BECOMES /D2/DBDICT/P0.SYSFILE                          */
15450   /* --------------------------------------------------------------------- */
15451 }//Dbdih::initRestorableGciFiles()
15452 
initTable(TabRecordPtr tabPtr)15453 void Dbdih::initTable(TabRecordPtr tabPtr)
15454 {
15455   new (tabPtr.p) TabRecord();
15456   tabPtr.p->noOfFragChunks = 0;
15457   tabPtr.p->method = TabRecord::NOTDEFINED;
15458   tabPtr.p->tabStatus = TabRecord::TS_IDLE;
15459   tabPtr.p->noOfWords = 0;
15460   tabPtr.p->noPages = 0;
15461   tabPtr.p->tabLcpStatus = TabRecord::TLS_COMPLETED;
15462   tabPtr.p->tabCopyStatus = TabRecord::CS_IDLE;
15463   tabPtr.p->tabUpdateState = TabRecord::US_IDLE;
15464   tabPtr.p->noOfBackups = 0;
15465   tabPtr.p->kvalue = 0;
15466   tabPtr.p->hashpointer = (Uint32)-1;
15467   tabPtr.p->mask = 0;
15468   tabPtr.p->tabStorage = TabRecord::ST_NORMAL;
15469   tabPtr.p->tabErrorCode = 0;
15470   tabPtr.p->schemaVersion = (Uint32)-1;
15471   tabPtr.p->tabRemoveNode = RNIL;
15472   tabPtr.p->totalfragments = (Uint32)-1;
15473   tabPtr.p->connectrec = RNIL;
15474   tabPtr.p->tabFile[0] = RNIL;
15475   tabPtr.p->tabFile[1] = RNIL;
15476   tabPtr.p->m_dropTab.tabUserRef = 0;
15477   tabPtr.p->m_dropTab.tabUserPtr = RNIL;
15478   Uint32 i;
15479   for (i = 0; i < NDB_ARRAY_SIZE(tabPtr.p->startFid); i++) {
15480     tabPtr.p->startFid[i] = RNIL;
15481   }//for
15482   for (i = 0; i < NDB_ARRAY_SIZE(tabPtr.p->pageRef); i++) {
15483     tabPtr.p->pageRef[i] = RNIL;
15484   }//for
15485   tabPtr.p->tableType = DictTabInfo::UndefTableType;
15486   tabPtr.p->schemaTransId = 0;
15487 }//Dbdih::initTable()
15488 
15489 /*************************************************************************/
15490 /*                                                                       */
15491 /*       MODULE: INIT_TABLE_FILES                                        */
15492 /*       DESCRIPTION: THE SUBROUTINE SETS UP THE FILES THAT REFERS TO THE*/
15493 /*       FILES THAT KEEP THE TABLE FRAGMENTATION DESCRIPTION.            */
15494 /*************************************************************************/
initTableFile(TabRecordPtr tabPtr)15495 void Dbdih::initTableFile(TabRecordPtr tabPtr)
15496 {
15497   Uint32 titfTmp;
15498   FileRecordPtr filePtr;
15499   seizeFile(filePtr);
15500   filePtr.p->tabRef = tabPtr.i;
15501   filePtr.p->fileType = FileRecord::TABLE_FILE;
15502   filePtr.p->reqStatus = FileRecord::IDLE;
15503   filePtr.p->fileStatus = FileRecord::CLOSED;
15504   tabPtr.p->tabFile[0] = filePtr.i;
15505   filePtr.p->fileName[0] = (Uint32)-1;  /* T DIRECTORY NOT USED  */
15506   filePtr.p->fileName[1] = (Uint32)-1;  /* F DIRECTORY NOT USED  */
15507   filePtr.p->fileName[2] = tabPtr.i;    /* Stid FILE NAME        */
15508   titfTmp = 1;  /* FILE NAME VERSION 1   */
15509   titfTmp = (titfTmp << 8) + 3; /* .FRAGLIST             */
15510   titfTmp = (titfTmp << 8) + 1; /* D1 DIRECTORY          */
15511   titfTmp = (titfTmp << 8) + 255;       /* P PART IGNORED        */
15512   filePtr.p->fileName[3] = titfTmp;
15513   /* --------------------------------------------------------------------- */
15514   /*       THE NAME BECOMES /D1/DBDICT/Stid.FRAGLIST                       */
15515   /* --------------------------------------------------------------------- */
15516   seizeFile(filePtr);
15517   filePtr.p->tabRef = tabPtr.i;
15518   filePtr.p->fileType = FileRecord::TABLE_FILE;
15519   filePtr.p->reqStatus = FileRecord::IDLE;
15520   filePtr.p->fileStatus = FileRecord::CLOSED;
15521   tabPtr.p->tabFile[1] = filePtr.i;
15522   filePtr.p->fileName[0] = (Uint32)-1;  /* T DIRECTORY NOT USED  */
15523   filePtr.p->fileName[1] = (Uint32)-1;  /* F DIRECTORY NOT USED  */
15524   filePtr.p->fileName[2] = tabPtr.i;    /* Stid FILE NAME        */
15525   titfTmp = 1;  /* FILE NAME VERSION 1   */
15526   titfTmp = (titfTmp << 8) + 3; /* .FRAGLIST             */
15527   titfTmp = (titfTmp << 8) + 2; /* D2 DIRECTORY          */
15528   titfTmp = (titfTmp << 8) + 255;       /* P PART IGNORED        */
15529   filePtr.p->fileName[3] = titfTmp;
15530   /* --------------------------------------------------------------------- */
15531   /*       THE NAME BECOMES /D2/DBDICT/Stid.FRAGLIST                       */
15532   /* --------------------------------------------------------------------- */
15533 }//Dbdih::initTableFile()
15534 
initialiseRecordsLab(Signal * signal,Uint32 stepNo,Uint32 retRef,Uint32 retData)15535 void Dbdih::initialiseRecordsLab(Signal* signal,
15536 				 Uint32 stepNo, Uint32 retRef, Uint32 retData)
15537 {
15538   switch (stepNo) {
15539   case 0:
15540     jam();
15541     initCommonData();
15542     break;
15543   case 1:{
15544     ApiConnectRecordPtr apiConnectptr;
15545     jam();
15546     c_diverify_queue[0].m_ref = calcTcBlockRef(getOwnNodeId());
15547     for (Uint32 i = 0; i < c_diverify_queue_cnt; i++)
15548     {
15549       if (c_diverify_queue_cnt > 1)
15550       {
15551         c_diverify_queue[i].m_ref = numberToRef(DBTC, i + 1, 0);
15552       }
15553       /******** INTIALIZING API CONNECT RECORDS ********/
15554       for (apiConnectptr.i = 0;
15555            apiConnectptr.i < capiConnectFileSize; apiConnectptr.i++)
15556       {
15557         refresh_watch_dog();
15558         ptrAss(apiConnectptr, c_diverify_queue[i].apiConnectRecord);
15559         apiConnectptr.p->senderData = RNIL;
15560         apiConnectptr.p->apiGci = ~(Uint64)0;
15561       }//for
15562     }
15563     jam();
15564     break;
15565   }
15566   case 2:{
15567     ConnectRecordPtr connectPtr;
15568     jam();
15569     /****** CONNECT ******/
15570     for (connectPtr.i = 0; connectPtr.i < cconnectFileSize; connectPtr.i++) {
15571       refresh_watch_dog();
15572       ptrAss(connectPtr, connectRecord);
15573       connectPtr.p->userpointer = RNIL;
15574       connectPtr.p->userblockref = ZNIL;
15575       connectPtr.p->connectState = ConnectRecord::FREE;
15576       connectPtr.p->table = RNIL;
15577       connectPtr.p->nextPool = connectPtr.i + 1;
15578       bzero(connectPtr.p->nodes, sizeof(connectPtr.p->nodes));
15579     }//for
15580     connectPtr.i = cconnectFileSize - 1;
15581     ptrAss(connectPtr, connectRecord);
15582     connectPtr.p->nextPool = RNIL;
15583     cfirstconnect = 0;
15584     break;
15585   }
15586   case 3:
15587     {
15588       FileRecordPtr filePtr;
15589       jam();
15590       /******** INTIALIZING FILE RECORDS ********/
15591       for (filePtr.i = 0; filePtr.i < cfileFileSize; filePtr.i++) {
15592 	ptrAss(filePtr, fileRecord);
15593 	filePtr.p->nextFile = filePtr.i + 1;
15594 	filePtr.p->fileStatus = FileRecord::CLOSED;
15595 	filePtr.p->reqStatus = FileRecord::IDLE;
15596       }//for
15597       filePtr.i = cfileFileSize - 1;
15598       ptrAss(filePtr, fileRecord);
15599       filePtr.p->nextFile = RNIL;
15600       cfirstfreeFile = 0;
15601       initRestorableGciFiles();
15602       break;
15603     }
15604   case 4:
15605     jam();
15606     initialiseFragstore();
15607     break;
15608   case 5:
15609     {
15610       jam();
15611       /******* NODE GROUP RECORD ******/
15612       /******* NODE RECORD       ******/
15613       NodeGroupRecordPtr loopNGPtr;
15614       for (loopNGPtr.i = 0; loopNGPtr.i < MAX_NDB_NODES; loopNGPtr.i++) {
15615 	ptrAss(loopNGPtr, nodeGroupRecord);
15616         loopNGPtr.p->nodesInGroup[0] = RNIL;
15617         loopNGPtr.p->nodesInGroup[1] = RNIL;
15618         loopNGPtr.p->nodesInGroup[2] = RNIL;
15619         loopNGPtr.p->nodesInGroup[3] = RNIL;
15620         loopNGPtr.p->nextReplicaNode = 0;
15621         loopNGPtr.p->nodeCount = 0;
15622         loopNGPtr.p->activeTakeOver = false;
15623         loopNGPtr.p->nodegroupIndex = RNIL;
15624         loopNGPtr.p->m_ref_count = 0;
15625         loopNGPtr.p->m_next_log_part = 0;
15626       }//for
15627       break;
15628     }
15629   case 6:
15630     {
15631       PageRecordPtr pagePtr;
15632       jam();
15633       /******* PAGE RECORD ******/
15634       for (pagePtr.i = 0; pagePtr.i < cpageFileSize; pagePtr.i++) {
15635         refresh_watch_dog();
15636 	ptrAss(pagePtr, pageRecord);
15637 	pagePtr.p->nextfreepage = pagePtr.i + 1;
15638       }//for
15639       pagePtr.i = cpageFileSize - 1;
15640       ptrAss(pagePtr, pageRecord);
15641       pagePtr.p->nextfreepage = RNIL;
15642       cfirstfreepage = 0;
15643       break;
15644     }
15645   case 7:
15646     {
15647       ReplicaRecordPtr initReplicaPtr;
15648       jam();
15649       /******* REPLICA RECORD ******/
15650       for (initReplicaPtr.i = 0; initReplicaPtr.i < creplicaFileSize;
15651 	   initReplicaPtr.i++) {
15652         refresh_watch_dog();
15653 	ptrAss(initReplicaPtr, replicaRecord);
15654 	initReplicaPtr.p->lcpIdStarted = 0;
15655 	initReplicaPtr.p->lcpOngoingFlag = false;
15656 	initReplicaPtr.p->nextReplica = initReplicaPtr.i + 1;
15657       }//for
15658       initReplicaPtr.i = creplicaFileSize - 1;
15659       ptrAss(initReplicaPtr, replicaRecord);
15660       initReplicaPtr.p->nextReplica = RNIL;
15661       cnoFreeReplicaRec = creplicaFileSize;
15662       cfirstfreeReplica = 0;
15663       break;
15664     }
15665   case 8:
15666     {
15667       TabRecordPtr loopTabptr;
15668       jam();
15669       /********* TAB-DESCRIPTOR ********/
15670       for (loopTabptr.i = 0; loopTabptr.i < ctabFileSize; loopTabptr.i++) {
15671 	ptrAss(loopTabptr, tabRecord);
15672         refresh_watch_dog();
15673 	initTable(loopTabptr);
15674       }//for
15675       break;
15676     }
15677   case 9:
15678     {
15679       jam();
15680       ReadConfigConf * conf = (ReadConfigConf*)signal->getDataPtrSend();
15681       conf->senderRef = reference();
15682       conf->senderData = retData;
15683       sendSignal(retRef, GSN_READ_CONFIG_CONF, signal,
15684 		 ReadConfigConf::SignalLength, JBB);
15685       return;
15686       break;
15687     }
15688   default:
15689     ndbrequire(false);
15690     break;
15691   }//switch
15692   jam();
15693   /* ---------------------------------------------------------------------- */
15694   /* SEND REAL-TIME BREAK DURING INIT OF VARIABLES DURING SYSTEM RESTART.   */
15695   /* ---------------------------------------------------------------------- */
15696   signal->theData[0] = DihContinueB::ZINITIALISE_RECORDS;
15697   signal->theData[1] = stepNo + 1;
15698   signal->theData[2] = retRef;
15699   signal->theData[3] = retData;
15700   sendSignal(reference(), GSN_CONTINUEB, signal, 4, JBB);
15701 }//Dbdih::initialiseRecordsLab()
15702 
15703 /*************************************************************************/
15704 /*       INSERT THE NODE INTO THE LINKED LIST OF NODES INVOLVED ALL      */
15705 /*       DISTRIBUTED PROTOCOLS (EXCEPT GCP PROTOCOL THAT USES THE DIH    */
15706 /*       LINKED LIST INSTEAD).                                           */
15707 /*************************************************************************/
insertAlive(NodeRecordPtr newNodePtr)15708 void Dbdih::insertAlive(NodeRecordPtr newNodePtr)
15709 {
15710   NodeRecordPtr nodePtr;
15711 
15712   nodePtr.i = cfirstAliveNode;
15713   if (nodePtr.i == RNIL) {
15714     jam();
15715     cfirstAliveNode = newNodePtr.i;
15716   } else {
15717     do {
15718       ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
15719       if (nodePtr.p->nextNode == RNIL) {
15720         jam();
15721         nodePtr.p->nextNode = newNodePtr.i;
15722         break;
15723       } else {
15724         jam();
15725         nodePtr.i = nodePtr.p->nextNode;
15726       }//if
15727     } while (1);
15728   }//if
15729   newNodePtr.p->nextNode = RNIL;
15730 }//Dbdih::insertAlive()
15731 
insertBackup(FragmentstorePtr fragPtr,Uint32 nodeId)15732 void Dbdih::insertBackup(FragmentstorePtr fragPtr, Uint32 nodeId)
15733 {
15734   for (Uint32 i = fragPtr.p->fragReplicas; i > 1; i--) {
15735     jam();
15736     ndbrequire(i < MAX_REPLICAS && i > 0);
15737     fragPtr.p->activeNodes[i] = fragPtr.p->activeNodes[i - 1];
15738   }//for
15739   fragPtr.p->activeNodes[1] = nodeId;
15740   fragPtr.p->fragReplicas++;
15741 }//Dbdih::insertBackup()
15742 
insertDeadNode(NodeRecordPtr newNodePtr)15743 void Dbdih::insertDeadNode(NodeRecordPtr newNodePtr)
15744 {
15745   NodeRecordPtr nodePtr;
15746 
15747   nodePtr.i = cfirstDeadNode;
15748   if (nodePtr.i == RNIL) {
15749     jam();
15750     cfirstDeadNode = newNodePtr.i;
15751   } else {
15752     do {
15753       jam();
15754       ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
15755       if (nodePtr.p->nextNode == RNIL) {
15756         jam();
15757         nodePtr.p->nextNode = newNodePtr.i;
15758         break;
15759       } else {
15760         jam();
15761         nodePtr.i = nodePtr.p->nextNode;
15762       }//if
15763     } while (1);
15764   }//if
15765   newNodePtr.p->nextNode = RNIL;
15766 }//Dbdih::insertDeadNode()
15767 
linkOldStoredReplica(FragmentstorePtr fragPtr,ReplicaRecordPtr replicatePtr)15768 void Dbdih::linkOldStoredReplica(FragmentstorePtr fragPtr,
15769                                  ReplicaRecordPtr replicatePtr)
15770 {
15771   ReplicaRecordPtr losReplicaPtr;
15772 
15773   replicatePtr.p->nextReplica = RNIL;
15774   fragPtr.p->noOldStoredReplicas++;
15775   losReplicaPtr.i = fragPtr.p->oldStoredReplicas;
15776   if (losReplicaPtr.i == RNIL) {
15777     jam();
15778     fragPtr.p->oldStoredReplicas = replicatePtr.i;
15779     return;
15780   }//if
15781   ptrCheckGuard(losReplicaPtr, creplicaFileSize, replicaRecord);
15782   while (losReplicaPtr.p->nextReplica != RNIL) {
15783     jam();
15784     losReplicaPtr.i = losReplicaPtr.p->nextReplica;
15785     ptrCheckGuard(losReplicaPtr, creplicaFileSize, replicaRecord);
15786   }//if
15787   losReplicaPtr.p->nextReplica = replicatePtr.i;
15788 }//Dbdih::linkOldStoredReplica()
15789 
linkStoredReplica(FragmentstorePtr fragPtr,ReplicaRecordPtr replicatePtr)15790 void Dbdih::linkStoredReplica(FragmentstorePtr fragPtr,
15791                               ReplicaRecordPtr replicatePtr)
15792 {
15793   ReplicaRecordPtr lsrReplicaPtr;
15794 
15795   fragPtr.p->noStoredReplicas++;
15796   replicatePtr.p->nextReplica = RNIL;
15797   lsrReplicaPtr.i = fragPtr.p->storedReplicas;
15798   if (fragPtr.p->storedReplicas == RNIL) {
15799     jam();
15800     fragPtr.p->storedReplicas = replicatePtr.i;
15801     return;
15802   }//if
15803   ptrCheckGuard(lsrReplicaPtr, creplicaFileSize, replicaRecord);
15804   while (lsrReplicaPtr.p->nextReplica != RNIL) {
15805     jam();
15806     lsrReplicaPtr.i = lsrReplicaPtr.p->nextReplica;
15807     ptrCheckGuard(lsrReplicaPtr, creplicaFileSize, replicaRecord);
15808   }//if
15809   lsrReplicaPtr.p->nextReplica = replicatePtr.i;
15810 }//Dbdih::linkStoredReplica()
15811 
15812 /*************************************************************************/
15813 /*        MAKE NODE GROUPS BASED ON THE LIST OF NODES RECEIVED FROM CNTR */
15814 /*************************************************************************/
15815 void
add_nodegroup(NodeGroupRecordPtr NGPtr)15816 Dbdih::add_nodegroup(NodeGroupRecordPtr NGPtr)
15817 {
15818   if (NGPtr.p->nodegroupIndex == RNIL)
15819   {
15820     jam();
15821     NGPtr.p->nodegroupIndex = cnoOfNodeGroups;
15822     c_node_groups[cnoOfNodeGroups++] = NGPtr.i;
15823   }
15824 }
15825 
15826 void
inc_ng_refcount(Uint32 i)15827 Dbdih::inc_ng_refcount(Uint32 i)
15828 {
15829   NodeGroupRecordPtr NGPtr;
15830   NGPtr.i = i;
15831   ptrCheckGuard(NGPtr, MAX_NDB_NODES, nodeGroupRecord);
15832   NGPtr.p->m_ref_count++;
15833 }
15834 
15835 void
dec_ng_refcount(Uint32 i)15836 Dbdih::dec_ng_refcount(Uint32 i)
15837 {
15838   NodeGroupRecordPtr NGPtr;
15839   NGPtr.i = i;
15840   ptrCheckGuard(NGPtr, MAX_NDB_NODES, nodeGroupRecord);
15841   ndbrequire(NGPtr.p->m_ref_count);
15842   NGPtr.p->m_ref_count--;
15843 }
15844 
makeNodeGroups(Uint32 nodeArray[])15845 void Dbdih::makeNodeGroups(Uint32 nodeArray[])
15846 {
15847   NodeGroupRecordPtr NGPtr;
15848   NodeRecordPtr mngNodeptr;
15849   Uint32 j;
15850 
15851   /**-----------------------------------------------------------------------
15852    * ASSIGN ALL ACTIVE NODES INTO NODE GROUPS. HOT SPARE NODES ARE ASSIGNED
15853    * TO NODE GROUP ZNIL
15854    *-----------------------------------------------------------------------*/
15855   cnoOfNodeGroups = 0;
15856   for (Uint32 i = 0; nodeArray[i] != RNIL; i++)
15857   {
15858     jam();
15859     mngNodeptr.i = nodeArray[i];
15860     ptrCheckGuard(mngNodeptr, MAX_NDB_NODES, nodeRecord);
15861     if (mngNodeptr.p->nodeGroup == NDB_NO_NODEGROUP)
15862     {
15863       jam();
15864       mngNodeptr.p->nodeGroup = ZNIL;
15865       ndbout_c("setting nodeGroup = ZNIL for node %u",
15866                mngNodeptr.i);
15867     }
15868     else if (mngNodeptr.p->nodeGroup != RNIL)
15869     {
15870       jam();
15871       NGPtr.i = mngNodeptr.p->nodeGroup;
15872       ptrCheckGuard(NGPtr, MAX_NDB_NODES, nodeGroupRecord);
15873       arrGuard(NGPtr.p->nodeCount, MAX_REPLICAS);
15874       NGPtr.p->nodesInGroup[NGPtr.p->nodeCount++] = mngNodeptr.i;
15875 
15876       add_nodegroup(NGPtr);
15877     }
15878   }
15879   NGPtr.i = 0;
15880   for (; NGPtr.i < MAX_NDB_NODES; NGPtr.i++)
15881   {
15882     jam();
15883     ptrCheckGuard(NGPtr, MAX_NDB_NODES, nodeGroupRecord);
15884     if (NGPtr.p->nodeCount < cnoReplicas)
15885       break;
15886   }
15887 
15888   for (Uint32 i = 0; nodeArray[i] != RNIL; i++)
15889   {
15890     jam();
15891     mngNodeptr.i = nodeArray[i];
15892     ptrCheckGuard(mngNodeptr, MAX_NDB_NODES, nodeRecord);
15893     if (mngNodeptr.p->nodeGroup == RNIL)
15894     {
15895       mngNodeptr.p->nodeGroup = NGPtr.i;
15896       NGPtr.p->nodesInGroup[NGPtr.p->nodeCount++] = mngNodeptr.i;
15897 
15898       add_nodegroup(NGPtr);
15899 
15900       if (NGPtr.p->nodeCount == cnoReplicas)
15901       {
15902         jam();
15903         for (; NGPtr.i < MAX_NDB_NODES; NGPtr.i++)
15904         {
15905           jam();
15906           ptrCheckGuard(NGPtr, MAX_NDB_NODES, nodeGroupRecord);
15907           if (NGPtr.p->nodeCount < cnoReplicas)
15908             break;
15909         }
15910       }
15911     }
15912   }
15913 
15914   Uint32 maxNG = 0;
15915   for (Uint32 i = 0; i<cnoOfNodeGroups; i++)
15916   {
15917     jam();
15918     NGPtr.i = c_node_groups[i];
15919     ptrCheckGuard(NGPtr, MAX_NDB_NODES, nodeGroupRecord);
15920     if (NGPtr.p->nodeCount == 0)
15921     {
15922       jam();
15923     }
15924     else if (NGPtr.p->nodeCount != cnoReplicas)
15925     {
15926       ndbrequire(false);
15927     }
15928     else
15929     {
15930       if (NGPtr.i > maxNG)
15931       {
15932         maxNG = NGPtr.i;
15933       }
15934     }
15935   }
15936 
15937   ndbrequire(csystemnodes < MAX_NDB_NODES);
15938 
15939   /**
15940    * Init sysfile
15941    */
15942   for(Uint32 i = 0; i < MAX_NDB_NODES; i++)
15943   {
15944     jam();
15945     Sysfile::setNodeGroup(i, SYSFILE->nodeGroups, NO_NODE_GROUP_ID);
15946     Sysfile::setNodeStatus(i, SYSFILE->nodeStatus,Sysfile::NS_NotDefined);
15947   }
15948 
15949   for (Uint32 i = 0; nodeArray[i] != RNIL; i++)
15950   {
15951     jam();
15952     Uint32 nodeId = mngNodeptr.i = nodeArray[i];
15953     ptrCheckGuard(mngNodeptr, MAX_NDB_NODES, nodeRecord);
15954 
15955     if (mngNodeptr.p->nodeGroup != ZNIL)
15956     {
15957       jam();
15958       Sysfile::setNodeGroup(nodeId, SYSFILE->nodeGroups,
15959                             mngNodeptr.p->nodeGroup);
15960 
15961       if (mngNodeptr.p->nodeStatus == NodeRecord::ALIVE)
15962       {
15963         jam();
15964         mngNodeptr.p->activeStatus = Sysfile::NS_Active;
15965       }
15966       else
15967       {
15968         jam();
15969         mngNodeptr.p->activeStatus = Sysfile::NS_NotActive_NotTakenOver;
15970       }
15971     }
15972     else
15973     {
15974       jam();
15975       Sysfile::setNodeGroup(mngNodeptr.i, SYSFILE->nodeGroups,
15976                             NO_NODE_GROUP_ID);
15977       mngNodeptr.p->activeStatus = Sysfile::NS_Configured;
15978     }
15979     Sysfile::setNodeStatus(nodeId, SYSFILE->nodeStatus,
15980                            mngNodeptr.p->activeStatus);
15981   }
15982 
15983   for (Uint32 i = 0; i<cnoOfNodeGroups; i++)
15984   {
15985     jam();
15986     bool alive = false;
15987     NodeGroupRecordPtr NGPtr;
15988     NGPtr.i = c_node_groups[i];
15989     ptrCheckGuard(NGPtr, MAX_NDB_NODES, nodeGroupRecord);
15990     for (j = 0; j<NGPtr.p->nodeCount; j++)
15991     {
15992       jam();
15993       mngNodeptr.i = NGPtr.p->nodesInGroup[j];
15994       ptrCheckGuard(mngNodeptr, MAX_NDB_NODES, nodeRecord);
15995       if (checkNodeAlive(NGPtr.p->nodesInGroup[j]))
15996       {
15997 	alive = true;
15998 	break;
15999       }
16000     }
16001 
16002     if (!alive)
16003     {
16004       char buf[255];
16005       BaseString::snprintf
16006         (buf, sizeof(buf),
16007          "Illegal initial start, no alive node in nodegroup %u", i);
16008       progError(__LINE__,
16009                 NDBD_EXIT_INSUFFICENT_NODES,
16010                 buf);
16011     }
16012   }
16013 }//Dbdih::makeNodeGroups()
16014 
16015 /**
16016  * On node failure QMGR asks DIH about node groups.  This is
16017  * a direct signal (function call in same process).  Input is
16018  * bitmask of surviving nodes.  The routine is not concerned
16019  * about node count.  Reply is one of:
16020  * 1) win - we can survive, and nobody else can
16021  * 2) lose - we cannot survive
16022  * 3) partition - we can survive but there could be others
16023  */
execCHECKNODEGROUPSREQ(Signal * signal)16024 void Dbdih::execCHECKNODEGROUPSREQ(Signal* signal)
16025 {
16026   jamEntry();
16027   CheckNodeGroups* sd = (CheckNodeGroups*)&signal->theData[0];
16028 
16029   bool direct = (sd->requestType & CheckNodeGroups::Direct);
16030   bool ok = false;
16031   switch(sd->requestType & ~CheckNodeGroups::Direct){
16032   case CheckNodeGroups::ArbitCheck:{
16033     ok = true;
16034     jam();
16035     unsigned missall = 0;
16036     unsigned haveall = 0;
16037     for (Uint32 i = 0; i < cnoOfNodeGroups; i++) {
16038       jam();
16039       NodeGroupRecordPtr ngPtr;
16040       ngPtr.i = c_node_groups[i];
16041       ptrAss(ngPtr, nodeGroupRecord);
16042       Uint32 count = 0;
16043       for (Uint32 j = 0; j < ngPtr.p->nodeCount; j++) {
16044 	jam();
16045 	Uint32 nodeId = ngPtr.p->nodesInGroup[j];
16046 	if (sd->mask.get(nodeId)) {
16047 	  jam();
16048 	  count++;
16049 	}//if
16050       }//for
16051       if (count == 0) {
16052 	jam();
16053 	missall++;
16054       }//if
16055       if (count == ngPtr.p->nodeCount) {
16056 	haveall++;
16057       }//if
16058     }//for
16059 
16060     if (missall) {
16061       jam();
16062       sd->output = CheckNodeGroups::Lose;
16063     } else if (haveall) {
16064       jam();
16065       sd->output = CheckNodeGroups::Win;
16066     } else {
16067       jam();
16068       sd->output = CheckNodeGroups::Partitioning;
16069     }//if
16070   }
16071     break;
16072   case CheckNodeGroups::GetNodeGroup:{
16073     ok = true;
16074     Uint32 ng = Sysfile::getNodeGroup(getOwnNodeId(), SYSFILE->nodeGroups);
16075     if (ng == NO_NODE_GROUP_ID)
16076       ng = RNIL;
16077     sd->output = ng;
16078     break;
16079   }
16080   case CheckNodeGroups::GetNodeGroupMembers: {
16081     ok = true;
16082     Uint32 ng = Sysfile::getNodeGroup(sd->nodeId, SYSFILE->nodeGroups);
16083     if (ng == NO_NODE_GROUP_ID)
16084       ng = RNIL;
16085 
16086     sd->output = ng;
16087     sd->mask.clear();
16088 
16089     NodeGroupRecordPtr ngPtr;
16090     ngPtr.i = ng;
16091     if (ngPtr.i != RNIL)
16092     {
16093       jam();
16094       ptrAss(ngPtr, nodeGroupRecord);
16095       for (Uint32 j = 0; j < ngPtr.p->nodeCount; j++) {
16096         jam();
16097         sd->mask.set(ngPtr.p->nodesInGroup[j]);
16098       }
16099     }
16100     break;
16101   }
16102   case CheckNodeGroups::GetDefaultFragments:
16103     jam();
16104     ok = true;
16105     sd->output = (cnoOfNodeGroups + sd->extraNodeGroups)
16106       * c_fragments_per_node * cnoReplicas;
16107     break;
16108   }
16109   ndbrequire(ok);
16110 
16111   if (!direct)
16112     sendSignal(sd->blockRef, GSN_CHECKNODEGROUPSCONF, signal,
16113 	       CheckNodeGroups::SignalLength, JBB);
16114 }//Dbdih::execCHECKNODEGROUPSREQ()
16115 
16116 void
makePrnList(ReadNodesConf * readNodes,Uint32 nodeArray[])16117   Dbdih::makePrnList(ReadNodesConf * readNodes, Uint32 nodeArray[])
16118 {
16119   cfirstAliveNode = RNIL;
16120   ndbrequire(con_lineNodes > 0);
16121   ndbrequire(csystemnodes < MAX_NDB_NODES);
16122   for (Uint32 i = 0; i < csystemnodes; i++) {
16123     NodeRecordPtr nodePtr;
16124     jam();
16125     nodePtr.i = nodeArray[i];
16126     ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
16127     new (nodePtr.p) NodeRecord();
16128     if (NdbNodeBitmask::get(readNodes->inactiveNodes, nodePtr.i) == false){
16129       jam();
16130       nodePtr.p->nodeStatus = NodeRecord::ALIVE;
16131       nodePtr.p->useInTransactions = true;
16132       nodePtr.p->copyCompleted = true;
16133       nodePtr.p->m_inclDihLcp = true;
16134       insertAlive(nodePtr);
16135     } else {
16136       jam();
16137       nodePtr.p->nodeStatus = NodeRecord::DEAD;
16138       insertDeadNode(nodePtr);
16139     }//if
16140   }//for
16141 }//Dbdih::makePrnList()
16142 
16143 /*************************************************************************/
16144 /*       A NEW CRASHED REPLICA IS ADDED BY A NODE FAILURE.               */
16145 /*************************************************************************/
newCrashedReplica(ReplicaRecordPtr ncrReplicaPtr)16146 void Dbdih::newCrashedReplica(ReplicaRecordPtr ncrReplicaPtr)
16147 {
16148   /*----------------------------------------------------------------------*/
16149   /*       SET THE REPLICA_LAST_GCI OF THE CRASHED REPLICA TO LAST GCI    */
16150   /*       EXECUTED BY THE FAILED NODE.                                   */
16151   /*----------------------------------------------------------------------*/
16152   /*       WE HAVE A NEW CRASHED REPLICA. INITIATE CREATE GCI TO INDICATE */
16153   /*       THAT THE NEW REPLICA IS NOT STARTED YET AND REPLICA_LAST_GCI IS*/
16154   /*       SET TO -1 TO INDICATE THAT IT IS NOT DEAD YET.                 */
16155   /*----------------------------------------------------------------------*/
16156   Uint32 nodeId = ncrReplicaPtr.p->procNode;
16157   Uint32 lastGCI = SYSFILE->lastCompletedGCI[nodeId];
16158   if (ncrReplicaPtr.p->noCrashedReplicas + 1 == MAX_CRASHED_REPLICAS)
16159   {
16160     jam();
16161     packCrashedReplicas(ncrReplicaPtr);
16162   }
16163 
16164   Uint32 noCrashedReplicas = ncrReplicaPtr.p->noCrashedReplicas;
16165   arrGuardErr(ncrReplicaPtr.p->noCrashedReplicas + 1, MAX_CRASHED_REPLICAS,
16166               NDBD_EXIT_MAX_CRASHED_REPLICAS);
16167 
16168   if (noCrashedReplicas > 0 &&
16169       ncrReplicaPtr.p->replicaLastGci[noCrashedReplicas - 1] == lastGCI)
16170   {
16171     jam();
16172     /**
16173      * Don't add another redo-interval, that already exist
16174      *  instead initalize new
16175      */
16176     ncrReplicaPtr.p->createGci[ncrReplicaPtr.p->noCrashedReplicas] =
16177       ZINIT_CREATE_GCI;
16178     ncrReplicaPtr.p->replicaLastGci[ncrReplicaPtr.p->noCrashedReplicas] =
16179       ZINIT_REPLICA_LAST_GCI;
16180   }
16181   else if (ncrReplicaPtr.p->createGci[noCrashedReplicas] <= lastGCI)
16182   {
16183     jam();
16184     ncrReplicaPtr.p->replicaLastGci[ncrReplicaPtr.p->noCrashedReplicas] =
16185       lastGCI;
16186     ncrReplicaPtr.p->noCrashedReplicas = ncrReplicaPtr.p->noCrashedReplicas + 1;
16187     ncrReplicaPtr.p->createGci[ncrReplicaPtr.p->noCrashedReplicas] =
16188       ZINIT_CREATE_GCI;
16189     ncrReplicaPtr.p->replicaLastGci[ncrReplicaPtr.p->noCrashedReplicas] =
16190       ZINIT_REPLICA_LAST_GCI;
16191   }
16192   else
16193   {
16194     /**
16195      * This can happen if createGci is set
16196      *   (during sendCreateFragReq(COMMIT_STORED))
16197      *   but SYSFILE->lastCompletedGCI[nodeId] has not been updated
16198      *   as node has not yet completed it's first LCP, causing it to return
16199      *   GCP_SAVEREF (which makes SYSFILE->lastCompletedGCI[nodeId] be left
16200      *   untouched)
16201      *
16202      * I.e crash during node-restart
16203      */
16204     ncrReplicaPtr.p->createGci[noCrashedReplicas] = ZINIT_CREATE_GCI;
16205   }
16206 
16207 }//Dbdih::newCrashedReplica()
16208 
16209 /*************************************************************************/
16210 /*       AT NODE FAILURE DURING START OF A NEW NODE WE NEED TO RESET A   */
16211 /*       SET OF VARIABLES CONTROLLING THE START AND INDICATING ONGOING   */
16212 /*       START OF A NEW NODE.                                            */
16213 /*************************************************************************/
nodeResetStart(Signal * signal)16214 void Dbdih::nodeResetStart(Signal *signal)
16215 {
16216   jam();
16217   Uint32 startGCP = c_nodeStartMaster.blockGcp;
16218 
16219   c_nodeStartSlave.nodeId = 0;
16220   c_nodeStartMaster.startNode = RNIL;
16221   c_nodeStartMaster.failNr = cfailurenr;
16222   c_nodeStartMaster.activeState = false;
16223   c_nodeStartMaster.blockGcp = 0;
16224   c_nodeStartMaster.blockLcp = false;
16225   c_nodeStartMaster.m_outstandingGsn = 0;
16226 
16227   if (startGCP == 2) // effective
16228   {
16229     jam();
16230     ndbrequire(isMaster());
16231     ndbrequire(m_micro_gcp.m_master.m_state == MicroGcp::M_GCP_IDLE);
16232     signal->theData[0] = DihContinueB::ZSTART_GCP;
16233     sendSignal(reference(), GSN_CONTINUEB, signal, 1, JBB);
16234   }
16235 }//Dbdih::nodeResetStart()
16236 
openFileRw(Signal * signal,FileRecordPtr filePtr)16237 void Dbdih::openFileRw(Signal* signal, FileRecordPtr filePtr)
16238 {
16239   signal->theData[0] = reference();
16240   signal->theData[1] = filePtr.i;
16241   signal->theData[2] = filePtr.p->fileName[0];
16242   signal->theData[3] = filePtr.p->fileName[1];
16243   signal->theData[4] = filePtr.p->fileName[2];
16244   signal->theData[5] = filePtr.p->fileName[3];
16245   signal->theData[6] = FsOpenReq::OM_READWRITE;
16246   sendSignal(NDBFS_REF, GSN_FSOPENREQ, signal, 7, JBA);
16247 }//Dbdih::openFileRw()
16248 
openFileRo(Signal * signal,FileRecordPtr filePtr)16249 void Dbdih::openFileRo(Signal* signal, FileRecordPtr filePtr)
16250 {
16251   signal->theData[0] = reference();
16252   signal->theData[1] = filePtr.i;
16253   signal->theData[2] = filePtr.p->fileName[0];
16254   signal->theData[3] = filePtr.p->fileName[1];
16255   signal->theData[4] = filePtr.p->fileName[2];
16256   signal->theData[5] = filePtr.p->fileName[3];
16257   signal->theData[6] = FsOpenReq::OM_READONLY;
16258   sendSignal(NDBFS_REF, GSN_FSOPENREQ, signal, 7, JBA);
16259 }//Dbdih::openFileRw()
16260 
16261 /*************************************************************************/
16262 /*       REMOVE A CRASHED REPLICA BY PACKING THE ARRAY OF CREATED GCI AND*/
16263 /*       THE LAST GCI OF THE CRASHED REPLICA.                            */
16264 /*************************************************************************/
packCrashedReplicas(ReplicaRecordPtr replicaPtr)16265 void Dbdih::packCrashedReplicas(ReplicaRecordPtr replicaPtr)
16266 {
16267   ndbrequire(replicaPtr.p->noCrashedReplicas > 0);
16268   ndbrequire(replicaPtr.p->noCrashedReplicas <= MAX_CRASHED_REPLICAS);
16269   for (Uint32 i = 0; i < replicaPtr.p->noCrashedReplicas; i++) {
16270     jam();
16271     replicaPtr.p->createGci[i] = replicaPtr.p->createGci[i + 1];
16272     replicaPtr.p->replicaLastGci[i] = replicaPtr.p->replicaLastGci[i + 1];
16273   }//for
16274   replicaPtr.p->noCrashedReplicas--;
16275   replicaPtr.p->createGci[replicaPtr.p->noCrashedReplicas + 1] =
16276     ZINIT_CREATE_GCI;
16277   replicaPtr.p->replicaLastGci[replicaPtr.p->noCrashedReplicas + 1] =
16278     ZINIT_REPLICA_LAST_GCI;
16279 }//Dbdih::packCrashedReplicas()
16280 
16281 void
mergeCrashedReplicas(ReplicaRecordPtr replicaPtr)16282 Dbdih::mergeCrashedReplicas(ReplicaRecordPtr replicaPtr)
16283 {
16284   /**
16285    * merge adjacent redo-intervals
16286    */
16287   for (Uint32 i = replicaPtr.p->noCrashedReplicas; i > 0; i--)
16288   {
16289     jam();
16290     if (replicaPtr.p->createGci[i] == 1 + replicaPtr.p->replicaLastGci[i-1])
16291     {
16292       jam();
16293       replicaPtr.p->replicaLastGci[i-1] = replicaPtr.p->replicaLastGci[i];
16294       replicaPtr.p->createGci[i] = ZINIT_CREATE_GCI;
16295       replicaPtr.p->replicaLastGci[i] = ZINIT_REPLICA_LAST_GCI;
16296       replicaPtr.p->noCrashedReplicas--;
16297     }
16298     else
16299     {
16300       jam();
16301       break;
16302     }
16303   }
16304 }
16305 
prepareReplicas(FragmentstorePtr fragPtr)16306 void Dbdih::prepareReplicas(FragmentstorePtr fragPtr)
16307 {
16308   ReplicaRecordPtr prReplicaPtr;
16309   Uint32 prevReplica = RNIL;
16310 
16311   /* --------------------------------------------------------------------- */
16312   /*       BEGIN BY LINKING ALL REPLICA RECORDS ONTO THE OLD STORED REPLICA*/
16313   /*       LIST.                                                           */
16314   /*       AT A SYSTEM RESTART OBVIOUSLY ALL NODES ARE OLD.                */
16315   /* --------------------------------------------------------------------- */
16316   prReplicaPtr.i = fragPtr.p->storedReplicas;
16317   while (prReplicaPtr.i != RNIL) {
16318     jam();
16319     prevReplica = prReplicaPtr.i;
16320     ptrCheckGuard(prReplicaPtr, creplicaFileSize, replicaRecord);
16321     prReplicaPtr.i = prReplicaPtr.p->nextReplica;
16322   }//while
16323   /* --------------------------------------------------------------------- */
16324   /*       LIST OF STORED REPLICAS WILL BE EMPTY NOW.                      */
16325   /* --------------------------------------------------------------------- */
16326   if (prevReplica != RNIL) {
16327     prReplicaPtr.i = prevReplica;
16328     ptrCheckGuard(prReplicaPtr, creplicaFileSize, replicaRecord);
16329     prReplicaPtr.p->nextReplica = fragPtr.p->oldStoredReplicas;
16330     fragPtr.p->oldStoredReplicas = fragPtr.p->storedReplicas;
16331     fragPtr.p->storedReplicas = RNIL;
16332     fragPtr.p->noOldStoredReplicas += fragPtr.p->noStoredReplicas;
16333     fragPtr.p->noStoredReplicas = 0;
16334   }//if
16335 }//Dbdih::prepareReplicas()
16336 
readFragment(RWFragment * rf,FragmentstorePtr fragPtr)16337 void Dbdih::readFragment(RWFragment* rf, FragmentstorePtr fragPtr)
16338 {
16339   Uint32 TreadFid = readPageWord(rf);
16340   fragPtr.p->preferredPrimary = readPageWord(rf);
16341   fragPtr.p->noStoredReplicas = readPageWord(rf);
16342   fragPtr.p->noOldStoredReplicas = readPageWord(rf);
16343   Uint32 TdistKey = readPageWord(rf);
16344 
16345   ndbrequire(fragPtr.p->noStoredReplicas > 0);
16346   ndbrequire(TreadFid == rf->fragId);
16347   ndbrequire(TdistKey < 256);
16348   fragPtr.p->distributionKey = TdistKey;
16349 
16350   fragPtr.p->m_log_part_id = readPageWord(rf);
16351   inc_ng_refcount(getNodeGroup(fragPtr.p->preferredPrimary));
16352 }//Dbdih::readFragment()
16353 
readPageWord(RWFragment * rf)16354 Uint32 Dbdih::readPageWord(RWFragment* rf)
16355 {
16356   if (rf->wordIndex >= 2048) {
16357     jam();
16358     ndbrequire(rf->wordIndex == 2048);
16359     rf->pageIndex++;
16360     ndbrequire(rf->pageIndex < NDB_ARRAY_SIZE(rf->rwfTabPtr.p->pageRef));
16361     rf->rwfPageptr.i = rf->rwfTabPtr.p->pageRef[rf->pageIndex];
16362     ptrCheckGuard(rf->rwfPageptr, cpageFileSize, pageRecord);
16363     rf->wordIndex = 32;
16364   }//if
16365   Uint32 dataWord = rf->rwfPageptr.p->word[rf->wordIndex];
16366   rf->wordIndex++;
16367   return dataWord;
16368 }//Dbdih::readPageWord()
16369 
readReplica(RWFragment * rf,ReplicaRecordPtr readReplicaPtr)16370 void Dbdih::readReplica(RWFragment* rf, ReplicaRecordPtr readReplicaPtr)
16371 {
16372   Uint32 i;
16373   readReplicaPtr.p->procNode = readPageWord(rf);
16374   readReplicaPtr.p->initialGci = readPageWord(rf);
16375   readReplicaPtr.p->noCrashedReplicas = readPageWord(rf);
16376   readReplicaPtr.p->nextLcp = readPageWord(rf);
16377 
16378   for (i = 0; i < MAX_LCP_STORED; i++) {
16379     readReplicaPtr.p->maxGciCompleted[i] = readPageWord(rf);
16380     readReplicaPtr.p->maxGciStarted[i] = readPageWord(rf);
16381     readReplicaPtr.p->lcpId[i] = readPageWord(rf);
16382     readReplicaPtr.p->lcpStatus[i] = readPageWord(rf);
16383   }//for
16384   const Uint32 noCrashedReplicas = readReplicaPtr.p->noCrashedReplicas;
16385   ndbrequire(noCrashedReplicas < MAX_CRASHED_REPLICAS);
16386   for (i = 0; i < noCrashedReplicas; i++) {
16387     readReplicaPtr.p->createGci[i] = readPageWord(rf);
16388     readReplicaPtr.p->replicaLastGci[i] = readPageWord(rf);
16389   }//for
16390   for(i = noCrashedReplicas; i<MAX_CRASHED_REPLICAS; i++){
16391     readReplicaPtr.p->createGci[i] = readPageWord(rf);
16392     readReplicaPtr.p->replicaLastGci[i] = readPageWord(rf);
16393   }
16394 }//Dbdih::readReplica()
16395 
readReplicas(RWFragment * rf,FragmentstorePtr fragPtr)16396 void Dbdih::readReplicas(RWFragment* rf, FragmentstorePtr fragPtr)
16397 {
16398   Uint32 i;
16399   ReplicaRecordPtr newReplicaPtr;
16400   Uint32 noStoredReplicas = fragPtr.p->noStoredReplicas;
16401   Uint32 noOldStoredReplicas = fragPtr.p->noOldStoredReplicas;
16402   /* ----------------------------------------------------------------------- */
16403   /*      WE CLEAR THE NUMBER OF STORED REPLICAS SINCE IT WILL BE CALCULATED */
16404   /*      BY THE LINKING SUBROUTINES.                                        */
16405   /* ----------------------------------------------------------------------- */
16406   fragPtr.p->noStoredReplicas = 0;
16407   fragPtr.p->noOldStoredReplicas = 0;
16408   Uint32 replicaIndex = 0;
16409   ndbrequire(noStoredReplicas + noOldStoredReplicas <= MAX_REPLICAS);
16410   for (i = 0; i < noStoredReplicas; i++)
16411   {
16412     seizeReplicaRec(newReplicaPtr);
16413     readReplica(rf, newReplicaPtr);
16414     ndbrequire(replicaIndex < MAX_REPLICAS);
16415     fragPtr.p->activeNodes[replicaIndex] = newReplicaPtr.p->procNode;
16416     replicaIndex++;
16417     linkStoredReplica(fragPtr, newReplicaPtr);
16418   }//for
16419   fragPtr.p->fragReplicas = noStoredReplicas;
16420   for (i = 0; i < noOldStoredReplicas; i++) {
16421     jam();
16422     seizeReplicaRec(newReplicaPtr);
16423     readReplica(rf, newReplicaPtr);
16424     linkOldStoredReplica(fragPtr, newReplicaPtr);
16425   }//for
16426 }//Dbdih::readReplicas()
16427 
readRestorableGci(Signal * signal,FileRecordPtr filePtr)16428 void Dbdih::readRestorableGci(Signal* signal, FileRecordPtr filePtr)
16429 {
16430   signal->theData[0] = filePtr.p->fileRef;
16431   signal->theData[1] = reference();
16432   signal->theData[2] = filePtr.i;
16433   signal->theData[3] = ZLIST_OF_PAIRS;
16434   signal->theData[4] = ZVAR_NO_CRESTART_INFO;
16435   signal->theData[5] = 1;
16436   signal->theData[6] = 0;
16437   signal->theData[7] = 0;
16438   sendSignal(NDBFS_REF, GSN_FSREADREQ, signal, 8, JBA);
16439 }//Dbdih::readRestorableGci()
16440 
readTabfile(Signal * signal,TabRecord * tab,FileRecordPtr filePtr)16441 void Dbdih::readTabfile(Signal* signal, TabRecord* tab, FileRecordPtr filePtr)
16442 {
16443   signal->theData[0] = filePtr.p->fileRef;
16444   signal->theData[1] = reference();
16445   signal->theData[2] = filePtr.i;
16446   signal->theData[3] = ZLIST_OF_PAIRS;
16447   signal->theData[4] = ZVAR_NO_WORD;
16448   signal->theData[5] = tab->noPages;
16449   Uint32 section[2 * NDB_ARRAY_SIZE(tab->pageRef)];
16450   for (Uint32 i = 0; i < tab->noPages; i++)
16451   {
16452     section[(2 * i) + 0] = tab->pageRef[i];
16453     section[(2 * i) + 1] = i;
16454   }
16455   LinearSectionPtr ptr[3];
16456   ptr[0].p = section;
16457   ptr[0].sz = 2 * tab->noPages;
16458   sendSignal(NDBFS_REF, GSN_FSREADREQ, signal, 6, JBA, ptr, 1);
16459 }//Dbdih::readTabfile()
16460 
releasePage(Uint32 pageIndex)16461 void Dbdih::releasePage(Uint32 pageIndex)
16462 {
16463   PageRecordPtr pagePtr;
16464   pagePtr.i = pageIndex;
16465   ptrCheckGuard(pagePtr, cpageFileSize, pageRecord);
16466   pagePtr.p->nextfreepage = cfirstfreepage;
16467   cfirstfreepage = pagePtr.i;
16468 }//Dbdih::releasePage()
16469 
releaseTabPages(Uint32 tableId)16470 void Dbdih::releaseTabPages(Uint32 tableId)
16471 {
16472   TabRecordPtr tabPtr;
16473   tabPtr.i = tableId;
16474   ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
16475   ndbrequire(tabPtr.p->noPages <= NDB_ARRAY_SIZE(tabPtr.p->pageRef));
16476   for (Uint32 i = 0; i < tabPtr.p->noPages; i++) {
16477     jam();
16478     releasePage(tabPtr.p->pageRef[i]);
16479   }//for
16480   tabPtr.p->noPages = 0;
16481 }//Dbdih::releaseTabPages()
16482 
16483 /*************************************************************************/
16484 /*       REMOVE NODE FROM SET OF ALIVE NODES.                            */
16485 /*************************************************************************/
removeAlive(NodeRecordPtr removeNodePtr)16486 void Dbdih::removeAlive(NodeRecordPtr removeNodePtr)
16487 {
16488   NodeRecordPtr nodePtr;
16489 
16490   nodePtr.i = cfirstAliveNode;
16491   if (nodePtr.i == removeNodePtr.i) {
16492     jam();
16493     cfirstAliveNode = removeNodePtr.p->nextNode;
16494     return;
16495   }//if
16496   do {
16497     jam();
16498     ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
16499     if (nodePtr.p->nextNode == removeNodePtr.i) {
16500       jam();
16501       nodePtr.p->nextNode = removeNodePtr.p->nextNode;
16502       break;
16503     } else {
16504       jam();
16505       nodePtr.i = nodePtr.p->nextNode;
16506     }//if
16507   } while (1);
16508 }//Dbdih::removeAlive()
16509 
16510 /*************************************************************************/
16511 /*       REMOVE NODE FROM SET OF DEAD NODES.                             */
16512 /*************************************************************************/
removeDeadNode(NodeRecordPtr removeNodePtr)16513 void Dbdih::removeDeadNode(NodeRecordPtr removeNodePtr)
16514 {
16515   NodeRecordPtr nodePtr;
16516 
16517   nodePtr.i = cfirstDeadNode;
16518   if (nodePtr.i == removeNodePtr.i) {
16519     jam();
16520     cfirstDeadNode = removeNodePtr.p->nextNode;
16521     return;
16522   }//if
16523   do {
16524     jam();
16525     ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
16526     if (nodePtr.p->nextNode == removeNodePtr.i) {
16527       jam();
16528       nodePtr.p->nextNode = removeNodePtr.p->nextNode;
16529       break;
16530     } else {
16531       jam();
16532       nodePtr.i = nodePtr.p->nextNode;
16533     }//if
16534   } while (1);
16535 }//Dbdih::removeDeadNode()
16536 
16537 /*---------------------------------------------------------------*/
16538 /*       REMOVE REPLICAS OF A FAILED NODE FROM LIST OF STORED    */
16539 /*       REPLICAS AND MOVE IT TO THE LIST OF OLD STORED REPLICAS.*/
16540 /*       ALSO UPDATE THE CRASHED REPLICA INFORMATION.            */
16541 /*---------------------------------------------------------------*/
removeNodeFromStored(Uint32 nodeId,FragmentstorePtr fragPtr,ReplicaRecordPtr replicatePtr,bool temporary)16542 void Dbdih::removeNodeFromStored(Uint32 nodeId,
16543                                  FragmentstorePtr fragPtr,
16544                                  ReplicaRecordPtr replicatePtr,
16545 				 bool temporary)
16546 {
16547   if (!temporary)
16548   {
16549     jam();
16550     newCrashedReplica(replicatePtr);
16551   }
16552   else
16553   {
16554     jam();
16555   }
16556   removeStoredReplica(fragPtr, replicatePtr);
16557   linkOldStoredReplica(fragPtr, replicatePtr);
16558   ndbrequire(fragPtr.p->storedReplicas != RNIL);
16559 }//Dbdih::removeNodeFromStored()
16560 
16561 /*************************************************************************/
16562 /*       REMOVE ANY OLD CRASHED REPLICAS THAT ARE NOT RESTORABLE ANY MORE*/
16563 /*************************************************************************/
removeOldCrashedReplicas(Uint32 tab,Uint32 frag,ReplicaRecordPtr rocReplicaPtr)16564 void Dbdih::removeOldCrashedReplicas(Uint32 tab, Uint32 frag,
16565                                      ReplicaRecordPtr rocReplicaPtr)
16566 {
16567   mergeCrashedReplicas(rocReplicaPtr);
16568   while (rocReplicaPtr.p->noCrashedReplicas > 0) {
16569     jam();
16570     /* --------------------------------------------------------------------- */
16571     /*       ONLY IF THERE IS AT LEAST ONE REPLICA THEN CAN WE REMOVE ANY.   */
16572     /* --------------------------------------------------------------------- */
16573     if (rocReplicaPtr.p->replicaLastGci[0] < SYSFILE->oldestRestorableGCI){
16574       jam();
16575       /* ------------------------------------------------------------------- */
16576       /*     THIS CRASHED REPLICA HAS BECOME EXTINCT AND MUST BE REMOVED TO  */
16577       /*     GIVE SPACE FOR NEW CRASHED REPLICAS.                            */
16578       /* ------------------------------------------------------------------- */
16579       packCrashedReplicas(rocReplicaPtr);
16580     } else {
16581       break;
16582     }//if
16583   }//while
16584 
16585   while (rocReplicaPtr.p->createGci[0] < SYSFILE->keepGCI)
16586   {
16587     jam();
16588     /* --------------------------------------------------------------------- */
16589     /*       MOVE FORWARD THE CREATE GCI TO A GCI THAT CAN BE USED. WE HAVE  */
16590     /*       NO CERTAINTY IN FINDING ANY LOG RECORDS FROM OLDER GCI'S.       */
16591     /* --------------------------------------------------------------------- */
16592     rocReplicaPtr.p->createGci[0] = SYSFILE->keepGCI;
16593 
16594     if (rocReplicaPtr.p->noCrashedReplicas)
16595     {
16596       /**
16597        * a REDO interval while is from 78 to 14 is not usefull
16598        *   but rather harmful, remove it...
16599        */
16600       if (rocReplicaPtr.p->createGci[0] > rocReplicaPtr.p->replicaLastGci[0])
16601       {
16602         jam();
16603         packCrashedReplicas(rocReplicaPtr);
16604       }
16605     }
16606   }
16607 }
16608 
removeOldStoredReplica(FragmentstorePtr fragPtr,ReplicaRecordPtr replicatePtr)16609 void Dbdih::removeOldStoredReplica(FragmentstorePtr fragPtr,
16610                                    ReplicaRecordPtr replicatePtr)
16611 {
16612   ReplicaRecordPtr rosTmpReplicaPtr;
16613   ReplicaRecordPtr rosPrevReplicaPtr;
16614 
16615   fragPtr.p->noOldStoredReplicas--;
16616   if (fragPtr.p->oldStoredReplicas == replicatePtr.i) {
16617     jam();
16618     fragPtr.p->oldStoredReplicas = replicatePtr.p->nextReplica;
16619   } else {
16620     rosPrevReplicaPtr.i = fragPtr.p->oldStoredReplicas;
16621     ptrCheckGuard(rosPrevReplicaPtr, creplicaFileSize, replicaRecord);
16622     rosTmpReplicaPtr.i = rosPrevReplicaPtr.p->nextReplica;
16623     while (rosTmpReplicaPtr.i != replicatePtr.i) {
16624       jam();
16625       rosPrevReplicaPtr.i = rosTmpReplicaPtr.i;
16626       ptrCheckGuard(rosPrevReplicaPtr, creplicaFileSize, replicaRecord);
16627       ptrCheckGuard(rosTmpReplicaPtr, creplicaFileSize, replicaRecord);
16628       rosTmpReplicaPtr.i = rosTmpReplicaPtr.p->nextReplica;
16629     }//if
16630     rosPrevReplicaPtr.p->nextReplica = replicatePtr.p->nextReplica;
16631   }//if
16632 }//Dbdih::removeOldStoredReplica()
16633 
removeStoredReplica(FragmentstorePtr fragPtr,ReplicaRecordPtr replicatePtr)16634 void Dbdih::removeStoredReplica(FragmentstorePtr fragPtr,
16635                                 ReplicaRecordPtr replicatePtr)
16636 {
16637   ReplicaRecordPtr rsrTmpReplicaPtr;
16638   ReplicaRecordPtr rsrPrevReplicaPtr;
16639 
16640   fragPtr.p->noStoredReplicas--;
16641   if (fragPtr.p->storedReplicas == replicatePtr.i) {
16642     jam();
16643     fragPtr.p->storedReplicas = replicatePtr.p->nextReplica;
16644   } else {
16645     jam();
16646     rsrPrevReplicaPtr.i = fragPtr.p->storedReplicas;
16647     rsrTmpReplicaPtr.i = fragPtr.p->storedReplicas;
16648     ptrCheckGuard(rsrTmpReplicaPtr, creplicaFileSize, replicaRecord);
16649     rsrTmpReplicaPtr.i = rsrTmpReplicaPtr.p->nextReplica;
16650     while (rsrTmpReplicaPtr.i != replicatePtr.i) {
16651       jam();
16652       rsrPrevReplicaPtr.i = rsrTmpReplicaPtr.i;
16653       ptrCheckGuard(rsrTmpReplicaPtr, creplicaFileSize, replicaRecord);
16654       rsrTmpReplicaPtr.i = rsrTmpReplicaPtr.p->nextReplica;
16655     }//while
16656     ptrCheckGuard(rsrPrevReplicaPtr, creplicaFileSize, replicaRecord);
16657     rsrPrevReplicaPtr.p->nextReplica = replicatePtr.p->nextReplica;
16658   }//if
16659 }//Dbdih::removeStoredReplica()
16660 
16661 /*************************************************************************/
16662 /*       REMOVE ALL TOO NEW CRASHED REPLICAS THAT IS IN THIS REPLICA.    */
16663 /*************************************************************************/
removeTooNewCrashedReplicas(ReplicaRecordPtr rtnReplicaPtr,Uint32 lastCompletedGCI)16664 void Dbdih::removeTooNewCrashedReplicas(ReplicaRecordPtr rtnReplicaPtr, Uint32 lastCompletedGCI)
16665 {
16666   while (rtnReplicaPtr.p->noCrashedReplicas > 0) {
16667     jam();
16668     /* --------------------------------------------------------------------- */
16669     /*       REMOVE ALL REPLICAS THAT ONLY LIVED IN A PERIOD THAT HAVE BEEN  */
16670     /*       REMOVED FROM THE RESTART INFORMATION SINCE THE RESTART FAILED   */
16671     /*       TOO MANY TIMES.                                                 */
16672     /* --------------------------------------------------------------------- */
16673     arrGuard(rtnReplicaPtr.p->noCrashedReplicas - 1, MAX_CRASHED_REPLICAS);
16674     if (rtnReplicaPtr.p->createGci[rtnReplicaPtr.p->noCrashedReplicas - 1] > lastCompletedGCI)
16675     {
16676       jam();
16677       rtnReplicaPtr.p->createGci[rtnReplicaPtr.p->noCrashedReplicas - 1] =
16678 	ZINIT_CREATE_GCI;
16679       rtnReplicaPtr.p->replicaLastGci[rtnReplicaPtr.p->noCrashedReplicas - 1] =
16680 	ZINIT_REPLICA_LAST_GCI;
16681       rtnReplicaPtr.p->noCrashedReplicas--;
16682     } else {
16683       break;
16684     }//if
16685   }//while
16686 }//Dbdih::removeTooNewCrashedReplicas()
16687 
16688 /*************************************************************************/
16689 /*                                                                       */
16690 /*       MODULE: SEARCH FOR POSSIBLE REPLICAS THAT CAN HANDLE THE GLOBAL */
16691 /*               CHECKPOINT WITHOUT NEEDING ANY EXTRA LOGGING FACILITIES.*/
16692 /*               A MAXIMUM OF FOUR NODES IS RETRIEVED.                   */
16693 /*************************************************************************/
16694 bool
setup_create_replica(FragmentstorePtr fragPtr,CreateReplicaRecord * createReplicaPtrP,ConstPtr<ReplicaRecord> replicaPtr)16695 Dbdih::setup_create_replica(FragmentstorePtr fragPtr,
16696 			    CreateReplicaRecord* createReplicaPtrP,
16697 			    ConstPtr<ReplicaRecord> replicaPtr)
16698 {
16699   createReplicaPtrP->dataNodeId = replicaPtr.p->procNode;
16700   createReplicaPtrP->replicaRec = replicaPtr.i;
16701 
16702   /* ----------------------------------------------------------------- */
16703   /*   WE NEED TO SEARCH FOR A PROPER LOCAL CHECKPOINT TO USE FOR THE  */
16704   /*   SYSTEM RESTART.                                                 */
16705   /* ----------------------------------------------------------------- */
16706   Uint32 startGci;
16707   Uint32 startLcpNo;
16708   Uint32 stopGci = SYSFILE->newestRestorableGCI;
16709   bool result = findStartGci(replicaPtr,
16710 			     stopGci,
16711 			     startGci,
16712 			     startLcpNo);
16713   if (!result)
16714   {
16715     jam();
16716     /* --------------------------------------------------------------- */
16717     /* WE COULD NOT FIND ANY LOCAL CHECKPOINT. THE FRAGMENT THUS DO NOT*/
16718     /* CONTAIN ANY VALID LOCAL CHECKPOINT. IT DOES HOWEVER CONTAIN A   */
16719     /* VALID FRAGMENT LOG. THUS BY FIRST CREATING THE FRAGMENT AND THEN*/
16720     /* EXECUTING THE FRAGMENT LOG WE CAN CREATE THE FRAGMENT AS        */
16721     /* DESIRED. THIS SHOULD ONLY OCCUR AFTER CREATING A FRAGMENT.      */
16722     /*                                                                 */
16723     /* TO INDICATE THAT NO LOCAL CHECKPOINT IS TO BE USED WE SET THE   */
16724     /* LOCAL CHECKPOINT TO ZNIL.                                       */
16725     /* --------------------------------------------------------------- */
16726     createReplicaPtrP->lcpNo = ZNIL;
16727   }
16728   else
16729   {
16730     jam();
16731     /* --------------------------------------------------------------- */
16732     /* WE FOUND A PROPER LOCAL CHECKPOINT TO RESTART FROM.             */
16733     /* SET LOCAL CHECKPOINT ID AND LOCAL CHECKPOINT NUMBER.            */
16734     /* --------------------------------------------------------------- */
16735     createReplicaPtrP->lcpNo = startLcpNo;
16736     arrGuard(startLcpNo, MAX_LCP_STORED);
16737     createReplicaPtrP->createLcpId = replicaPtr.p->lcpId[startLcpNo];
16738   }//if
16739 
16740 
16741   /* ----------------------------------------------------------------- */
16742   /*   WE HAVE EITHER FOUND A LOCAL CHECKPOINT OR WE ARE PLANNING TO   */
16743   /*   EXECUTE THE LOG FROM THE INITIAL CREATION OF THE TABLE. IN BOTH */
16744   /*   CASES WE NEED TO FIND A SET OF LOGS THAT CAN EXECUTE SUCH THAT  */
16745   /*   WE RECOVER TO THE SYSTEM RESTART GLOBAL CHECKPOINT.             */
16746   /* -_--------------------------------------------------------------- */
16747   return findLogNodes(createReplicaPtrP, fragPtr, startGci, stopGci);
16748 }
16749 
searchStoredReplicas(FragmentstorePtr fragPtr)16750 void Dbdih::searchStoredReplicas(FragmentstorePtr fragPtr)
16751 {
16752   Uint32 nextReplicaPtrI;
16753   Ptr<ReplicaRecord> replicaPtr;
16754 
16755   replicaPtr.i = fragPtr.p->storedReplicas;
16756   while (replicaPtr.i != RNIL) {
16757     jam();
16758     ptrCheckGuard(replicaPtr, creplicaFileSize, replicaRecord);
16759     nextReplicaPtrI = replicaPtr.p->nextReplica;
16760     ConstPtr<ReplicaRecord> constReplicaPtr;
16761     constReplicaPtr.i = replicaPtr.i;
16762     constReplicaPtr.p = replicaPtr.p;
16763     NodeRecordPtr nodePtr;
16764     nodePtr.i = replicaPtr.p->procNode;
16765     ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
16766     if (nodePtr.p->nodeStatus == NodeRecord::ALIVE) {
16767       jam();
16768       switch (nodePtr.p->activeStatus) {
16769       case Sysfile::NS_Active:
16770       case Sysfile::NS_ActiveMissed_1:
16771       case Sysfile::NS_ActiveMissed_2:{
16772 	/* ----------------------------------------------------------------- */
16773 	/*   INITIALISE THE CREATE REPLICA STRUCTURE THAT IS USED FOR SENDING*/
16774 	/*   TO LQH START_FRAGREQ.                                           */
16775 	/*   SET THE DATA NODE WHERE THE LOCAL CHECKPOINT IS FOUND. ALSO     */
16776 	/*   SET A REFERENCE TO THE REPLICA POINTER OF THAT.                 */
16777 	/* ----------------------------------------------------------------- */
16778 	CreateReplicaRecordPtr createReplicaPtr;
16779 	createReplicaPtr.i = cnoOfCreateReplicas;
16780 	ptrCheckGuard(createReplicaPtr, 4, createReplicaRecord);
16781 	cnoOfCreateReplicas++;
16782 
16783 	/**
16784 	 * Should have been checked in resetReplicaSr
16785 	 */
16786 	ndbrequire(setup_create_replica(fragPtr,
16787 					createReplicaPtr.p,
16788 					constReplicaPtr));
16789 	break;
16790       }
16791       default:
16792         jam();
16793         /*empty*/;
16794         break;
16795       }//switch
16796     }
16797     replicaPtr.i = nextReplicaPtrI;
16798   }//while
16799 }//Dbdih::searchStoredReplicas()
16800 
16801 /*************************************************************************/
16802 /*                                                                       */
16803 /*       MODULE: SEIZE_FILE                                              */
16804 /*       DESCRIPTION: THE SUBROUTINE SEIZES A FILE RECORD FROM THE       */
16805 /*                    FREE LIST.                                         */
16806 /*************************************************************************/
seizeFile(FileRecordPtr & filePtr)16807 void Dbdih::seizeFile(FileRecordPtr& filePtr)
16808 {
16809   filePtr.i = cfirstfreeFile;
16810   ptrCheckGuard(filePtr, cfileFileSize, fileRecord);
16811   cfirstfreeFile = filePtr.p->nextFile;
16812   filePtr.p->nextFile = RNIL;
16813 }//Dbdih::seizeFile()
16814 
16815 /*************************************************************************/
16816 /*       SEND CREATE_FRAGREQ TO ALL NODES IN THE NDB CLUSTER.            */
16817 /*************************************************************************/
16818 /*************************************************************************/
16819 /*                                                                       */
16820 /*       MODULE: FIND THE START GCI AND LOCAL CHECKPOINT TO USE.         */
16821 /*************************************************************************/
sendStartFragreq(Signal * signal,TabRecordPtr tabPtr,Uint32 fragId)16822 void Dbdih::sendStartFragreq(Signal* signal,
16823 			     TabRecordPtr tabPtr, Uint32 fragId)
16824 {
16825   CreateReplicaRecordPtr replicaPtr;
16826   for (replicaPtr.i = 0; replicaPtr.i < cnoOfCreateReplicas; replicaPtr.i++) {
16827     jam();
16828     ptrAss(replicaPtr, createReplicaRecord);
16829 
16830     BlockReference ref = numberToRef(DBLQH, replicaPtr.p->dataNodeId);
16831 
16832     StartFragReq * const startFragReq = (StartFragReq *)&signal->theData[0];
16833     startFragReq->userPtr = replicaPtr.p->replicaRec;
16834     startFragReq->userRef = reference();
16835     startFragReq->lcpNo = replicaPtr.p->lcpNo;
16836     startFragReq->lcpId = replicaPtr.p->createLcpId;
16837     startFragReq->tableId = tabPtr.i;
16838     startFragReq->fragId = fragId;
16839     startFragReq->requestInfo = StartFragReq::SFR_RESTORE_LCP;
16840 
16841     if(ERROR_INSERTED(7072) || ERROR_INSERTED(7074)){
16842       jam();
16843       const Uint32 noNodes = replicaPtr.p->noLogNodes;
16844       Uint32 start = replicaPtr.p->logStartGci[noNodes - 1];
16845       const Uint32 stop  = replicaPtr.p->logStopGci[noNodes - 1];
16846 
16847       for(Uint32 i = noNodes; i < MAX_LOG_EXEC && (stop - start) > 0; i++){
16848 	replicaPtr.p->noLogNodes++;
16849 	replicaPtr.p->logStopGci[i - 1] = start;
16850 
16851 	replicaPtr.p->logNodeId[i] = replicaPtr.p->logNodeId[i-1];
16852 	replicaPtr.p->logStartGci[i] = start + 1;
16853 	replicaPtr.p->logStopGci[i] = stop;
16854 	start += 1;
16855       }
16856     }
16857 
16858     startFragReq->noOfLogNodes = replicaPtr.p->noLogNodes;
16859 
16860     for (Uint32 i = 0; i < MAX_LOG_EXEC ; i++) {
16861       startFragReq->lqhLogNode[i] = replicaPtr.p->logNodeId[i];
16862       startFragReq->startGci[i] = replicaPtr.p->logStartGci[i];
16863       startFragReq->lastGci[i] = replicaPtr.p->logStopGci[i];
16864     }//for
16865 
16866     sendSignal(ref, GSN_START_FRAGREQ, signal,
16867 	       StartFragReq::SignalLength, JBB);
16868   }//for
16869 }//Dbdih::sendStartFragreq()
16870 
16871 /*************************************************************************/
16872 /*       SET LCP ACTIVE STATUS BEFORE STARTING A LOCAL CHECKPOINT.       */
16873 /*************************************************************************/
setLcpActiveStatusStart(Signal * signal)16874 void Dbdih::setLcpActiveStatusStart(Signal* signal)
16875 {
16876   NodeRecordPtr nodePtr;
16877 
16878   c_lcpState.m_participatingLQH.clear();
16879   c_lcpState.m_participatingDIH.clear();
16880 
16881   for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
16882     ptrAss(nodePtr, nodeRecord);
16883 #if 0
16884     if(nodePtr.p->nodeStatus != NodeRecord::NOT_IN_CLUSTER){
16885       infoEvent("Node %d nodeStatus=%d activeStatus=%d copyCompleted=%d lcp=%d",
16886 		nodePtr.i,
16887 		nodePtr.p->nodeStatus,
16888 		nodePtr.p->activeStatus,
16889 		nodePtr.p->copyCompleted,
16890 		nodePtr.p->m_inclDihLcp);
16891     }
16892 #endif
16893     if(nodePtr.p->nodeStatus == NodeRecord::ALIVE)
16894     {
16895       jam();
16896       if (nodePtr.p->m_inclDihLcp)
16897       {
16898         jam();
16899         c_lcpState.m_participatingDIH.set(nodePtr.i);
16900       }
16901 
16902       if (nodePtr.p->copyCompleted)
16903       {
16904         jam();
16905 	c_lcpState.m_participatingLQH.set(nodePtr.i);
16906       }
16907       else if (nodePtr.p->activeStatus == Sysfile::NS_Configured)
16908       {
16909         jam();
16910         continue;
16911       }
16912       else
16913       {
16914         jam();
16915         nodePtr.p->activeStatus = Sysfile::NS_ActiveMissed_1;
16916       }
16917     }
16918     else if (nodePtr.p->activeStatus == Sysfile::NS_Configured)
16919     {
16920       jam();
16921       continue;
16922     }
16923     else if (nodePtr.p->activeStatus != Sysfile::NS_NotDefined)
16924     {
16925       jam();
16926       nodePtr.p->activeStatus = Sysfile::NS_ActiveMissed_1;
16927     }
16928   }
16929 }//Dbdih::setLcpActiveStatusStart()
16930 
16931 /*************************************************************************/
16932 /*       SET LCP ACTIVE STATUS AT THE END OF A LOCAL CHECKPOINT.        */
16933 /*************************************************************************/
setLcpActiveStatusEnd(Signal * signal)16934 void Dbdih::setLcpActiveStatusEnd(Signal* signal)
16935 {
16936   NodeRecordPtr nodePtr;
16937 
16938   for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
16939     jam();
16940     ptrAss(nodePtr, nodeRecord);
16941     if (c_lcpState.m_participatingLQH.get(nodePtr.i))
16942     {
16943       jam();
16944       nodePtr.p->copyCompleted = 1;
16945       if (! (nodePtr.p->activeStatus == Sysfile::NS_Configured))
16946       {
16947         jam();
16948         nodePtr.p->activeStatus = Sysfile::NS_Active;
16949       }
16950       else
16951       {
16952         jam();
16953         // Do nothing
16954       }
16955     }
16956     else if (nodePtr.p->activeStatus == Sysfile::NS_Configured)
16957     {
16958       jam();
16959       continue;
16960     }
16961     else if (nodePtr.p->activeStatus != Sysfile::NS_NotDefined)
16962     {
16963       jam();
16964       nodePtr.p->activeStatus = Sysfile::NS_ActiveMissed_1;
16965     }
16966   }
16967 
16968   c_lcpState.m_participatingDIH.clear();
16969   c_lcpState.m_participatingLQH.clear();
16970   if (isMaster()) {
16971     jam();
16972     setNodeRestartInfoBits(signal);
16973   }//if
16974 }//Dbdih::setLcpActiveStatusEnd()
16975 
16976 /*************************************************************************/
16977 /* SET NODE ACTIVE STATUS AT SYSTEM RESTART AND WHEN UPDATED BY MASTER   */
16978 /*************************************************************************/
setNodeActiveStatus()16979 void Dbdih::setNodeActiveStatus()
16980 {
16981   NodeRecordPtr snaNodeptr;
16982 
16983   for (snaNodeptr.i = 1; snaNodeptr.i < MAX_NDB_NODES; snaNodeptr.i++)
16984   {
16985     ptrAss(snaNodeptr, nodeRecord);
16986     const Uint32 tsnaNodeBits = Sysfile::getNodeStatus(snaNodeptr.i,
16987                                                        SYSFILE->nodeStatus);
16988     switch (tsnaNodeBits) {
16989     case Sysfile::NS_Active:
16990       jam();
16991       snaNodeptr.p->activeStatus = Sysfile::NS_Active;
16992       break;
16993     case Sysfile::NS_ActiveMissed_1:
16994       jam();
16995       snaNodeptr.p->activeStatus = Sysfile::NS_ActiveMissed_1;
16996       break;
16997     case Sysfile::NS_ActiveMissed_2:
16998       jam();
16999       snaNodeptr.p->activeStatus = Sysfile::NS_ActiveMissed_2;
17000       break;
17001     case Sysfile::NS_TakeOver:
17002       jam();
17003       snaNodeptr.p->activeStatus = Sysfile::NS_TakeOver;
17004       break;
17005     case Sysfile::NS_NotActive_NotTakenOver:
17006       jam();
17007       snaNodeptr.p->activeStatus = Sysfile::NS_NotActive_NotTakenOver;
17008       break;
17009     case Sysfile::NS_NotDefined:
17010       jam();
17011       snaNodeptr.p->activeStatus = Sysfile::NS_NotDefined;
17012       break;
17013     case Sysfile::NS_Configured:
17014       jam();
17015       snaNodeptr.p->activeStatus = Sysfile::NS_Configured;
17016       break;
17017     default:
17018       ndbrequire(false);
17019       break;
17020     }//switch
17021   }//for
17022 }//Dbdih::setNodeActiveStatus()
17023 
17024 /***************************************************************************/
17025 /* SET THE NODE GROUP BASED ON THE RESTART INFORMATION OR AS SET BY MASTER */
17026 /***************************************************************************/
setNodeGroups()17027 void Dbdih::setNodeGroups()
17028 {
17029   NodeGroupRecordPtr NGPtr;
17030   NodeRecordPtr sngNodeptr;
17031   Uint32 Ti;
17032 
17033   for (Ti = 0; Ti < cnoOfNodeGroups; Ti++) {
17034     NGPtr.i = c_node_groups[Ti];
17035     ptrAss(NGPtr, nodeGroupRecord);
17036     NGPtr.p->nodeCount = 0;
17037     NGPtr.p->nodegroupIndex = RNIL;
17038   }//for
17039   cnoOfNodeGroups = 0;
17040   for (sngNodeptr.i = 1; sngNodeptr.i < MAX_NDB_NODES; sngNodeptr.i++) {
17041     ptrAss(sngNodeptr, nodeRecord);
17042     Sysfile::ActiveStatus s =
17043       (Sysfile::ActiveStatus)Sysfile::getNodeStatus(sngNodeptr.i,
17044 						    SYSFILE->nodeStatus);
17045     switch (s){
17046     case Sysfile::NS_Active:
17047     case Sysfile::NS_ActiveMissed_1:
17048     case Sysfile::NS_ActiveMissed_2:
17049     case Sysfile::NS_NotActive_NotTakenOver:
17050     case Sysfile::NS_TakeOver:
17051       jam();
17052       sngNodeptr.p->nodeGroup = Sysfile::getNodeGroup(sngNodeptr.i,
17053                                                       SYSFILE->nodeGroups);
17054       NGPtr.i = sngNodeptr.p->nodeGroup;
17055       ptrCheckGuard(NGPtr, MAX_NDB_NODES, nodeGroupRecord);
17056       NGPtr.p->nodesInGroup[NGPtr.p->nodeCount] = sngNodeptr.i;
17057       NGPtr.p->nodeCount++;
17058       add_nodegroup(NGPtr);
17059       break;
17060     case Sysfile::NS_NotDefined:
17061     case Sysfile::NS_Configured:
17062       jam();
17063       sngNodeptr.p->nodeGroup = ZNIL;
17064       break;
17065     default:
17066       ndbrequire(false);
17067       return;
17068       break;
17069     }//switch
17070   }//for
17071 }//Dbdih::setNodeGroups()
17072 
17073 /*************************************************************************/
17074 /* SET THE RESTART INFO BITS BASED ON THE NODES ACTIVE STATUS.           */
17075 /*************************************************************************/
setNodeRestartInfoBits(Signal * signal)17076 void Dbdih::setNodeRestartInfoBits(Signal * signal)
17077 {
17078   NodeRecordPtr nodePtr;
17079   Uint32 tsnrNodeGroup;
17080   Uint32 tsnrNodeActiveStatus;
17081   Uint32 i;
17082   for(i = 1; i < MAX_NDB_NODES; i++){
17083     Sysfile::setNodeStatus(i, SYSFILE->nodeStatus, Sysfile::NS_Active);
17084   }//for
17085   for(i = 1; i < Sysfile::NODE_GROUPS_SIZE; i++){
17086     SYSFILE->nodeGroups[i] = 0;
17087   }//for
17088   NdbNodeBitmask::clear(SYSFILE->lcpActive);
17089 
17090 #ifdef ERROR_INSERT
17091   NdbNodeBitmask tmp;
17092 #endif
17093 
17094   for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
17095     ptrAss(nodePtr, nodeRecord);
17096     switch (nodePtr.p->activeStatus) {
17097     case Sysfile::NS_Active:
17098       jam();
17099       tsnrNodeActiveStatus = Sysfile::NS_Active;
17100       break;
17101     case Sysfile::NS_ActiveMissed_1:
17102       jam();
17103       tsnrNodeActiveStatus = Sysfile::NS_ActiveMissed_1;
17104       break;
17105     case Sysfile::NS_ActiveMissed_2:
17106       jam();
17107       tsnrNodeActiveStatus = Sysfile::NS_ActiveMissed_2;
17108       break;
17109     case Sysfile::NS_TakeOver:
17110       jam();
17111       tsnrNodeActiveStatus = Sysfile::NS_TakeOver;
17112       break;
17113     case Sysfile::NS_NotActive_NotTakenOver:
17114       jam();
17115       tsnrNodeActiveStatus = Sysfile::NS_NotActive_NotTakenOver;
17116       break;
17117     case Sysfile::NS_NotDefined:
17118       jam();
17119       tsnrNodeActiveStatus = Sysfile::NS_NotDefined;
17120       break;
17121     case Sysfile::NS_Configured:
17122       jam();
17123       tsnrNodeActiveStatus = Sysfile::NS_Configured;
17124       break;
17125     default:
17126       ndbrequire(false);
17127       tsnrNodeActiveStatus = Sysfile::NS_NotDefined; // remove warning
17128       break;
17129     }//switch
17130     Sysfile::setNodeStatus(nodePtr.i, SYSFILE->nodeStatus,
17131                            tsnrNodeActiveStatus);
17132     if (nodePtr.p->nodeGroup == ZNIL) {
17133       jam();
17134       tsnrNodeGroup = NO_NODE_GROUP_ID;
17135     } else {
17136       jam();
17137       tsnrNodeGroup = nodePtr.p->nodeGroup;
17138     }//if
17139     Sysfile::setNodeGroup(nodePtr.i, SYSFILE->nodeGroups, tsnrNodeGroup);
17140     if (c_lcpState.m_participatingLQH.get(nodePtr.i))
17141     {
17142       jam();
17143       NdbNodeBitmask::set(SYSFILE->lcpActive, nodePtr.i);
17144     }//if
17145 #ifdef ERROR_INSERT
17146     else if (Sysfile::getLCPOngoing(SYSFILE->systemRestartBits))
17147     {
17148       jam();
17149       if (nodePtr.p->activeStatus == Sysfile::NS_Active)
17150         tmp.set(nodePtr.i);
17151     }
17152 #endif
17153   }//for
17154 
17155 #ifdef ERROR_INSERT
17156   if (ERROR_INSERTED(7220) && !tmp.isclear())
17157   {
17158     jam();
17159 
17160     NdbNodeBitmask all;
17161     nodePtr.i = cfirstAliveNode;
17162     do {
17163       jam();
17164       ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
17165       all.set(nodePtr.i);
17166       nodePtr.i = nodePtr.p->nextNode;
17167     } while (nodePtr.i != RNIL);
17168 
17169 
17170     NodeReceiverGroup rg(DBDIH, all);
17171     signal->theData[0] = 7219;
17172     sendSignal(rg, GSN_NDB_TAMPER, signal,  1, JBA);
17173   }
17174 #endif
17175 }//Dbdih::setNodeRestartInfoBits()
17176 
17177 /*************************************************************************/
17178 /*       START THE GLOBAL CHECKPOINT PROTOCOL IN MASTER AT START-UP      */
17179 /*************************************************************************/
startGcp(Signal * signal)17180 void Dbdih::startGcp(Signal* signal)
17181 {
17182   signal->theData[0] = DihContinueB::ZSTART_GCP;
17183   sendSignal(reference(), GSN_CONTINUEB, signal, 1, JBB);
17184 
17185   startGcpMonitor(signal);
17186 }//Dbdih::startGcp()
17187 
17188 void
startGcpMonitor(Signal * signal)17189 Dbdih::startGcpMonitor(Signal* signal)
17190 {
17191   jam();
17192   m_gcp_monitor.m_gcp_save.m_gci = m_gcp_save.m_gci;
17193   m_gcp_monitor.m_gcp_save.m_counter = 0;
17194   m_gcp_monitor.m_micro_gcp.m_gci = m_micro_gcp.m_current_gci;
17195   m_gcp_monitor.m_micro_gcp.m_counter = 0;
17196 
17197   signal->theData[0] = DihContinueB::ZCHECK_GCP_STOP;
17198   sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 100, 1);
17199 }
17200 
updateNodeInfo(FragmentstorePtr fragPtr)17201 void Dbdih::updateNodeInfo(FragmentstorePtr fragPtr)
17202 {
17203   ReplicaRecordPtr replicatePtr;
17204   Uint32 index = 0;
17205   replicatePtr.i = fragPtr.p->storedReplicas;
17206   do {
17207     jam();
17208     ptrCheckGuard(replicatePtr, creplicaFileSize, replicaRecord);
17209     ndbrequire(index < MAX_REPLICAS);
17210     fragPtr.p->activeNodes[index] = replicatePtr.p->procNode;
17211     index++;
17212     replicatePtr.i = replicatePtr.p->nextReplica;
17213   } while (replicatePtr.i != RNIL);
17214   fragPtr.p->fragReplicas = index;
17215 
17216   /* ----------------------------------------------------------------------- */
17217   // We switch primary to the preferred primary if the preferred primary is
17218   // in the list.
17219   /* ----------------------------------------------------------------------- */
17220   const Uint32 prefPrim = fragPtr.p->preferredPrimary;
17221   for (Uint32 i = 1; i < index; i++) {
17222     jam();
17223     ndbrequire(i < MAX_REPLICAS);
17224     if (fragPtr.p->activeNodes[i] == prefPrim){
17225       jam();
17226       Uint32 switchNode = fragPtr.p->activeNodes[0];
17227       fragPtr.p->activeNodes[0] = prefPrim;
17228       fragPtr.p->activeNodes[i] = switchNode;
17229       break;
17230     }//if
17231   }//for
17232 }//Dbdih::updateNodeInfo()
17233 
writeFragment(RWFragment * wf,FragmentstorePtr fragPtr)17234 void Dbdih::writeFragment(RWFragment* wf, FragmentstorePtr fragPtr)
17235 {
17236   writePageWord(wf, wf->fragId);
17237   writePageWord(wf, fragPtr.p->preferredPrimary);
17238   writePageWord(wf, fragPtr.p->noStoredReplicas);
17239   writePageWord(wf, fragPtr.p->noOldStoredReplicas);
17240   writePageWord(wf, fragPtr.p->distributionKey);
17241   writePageWord(wf, fragPtr.p->m_log_part_id);
17242 }//Dbdih::writeFragment()
17243 
writePageWord(RWFragment * wf,Uint32 dataWord)17244 void Dbdih::writePageWord(RWFragment* wf, Uint32 dataWord)
17245 {
17246   if (wf->wordIndex >= 2048) {
17247     jam();
17248     ndbrequire(wf->wordIndex == 2048);
17249     allocpage(wf->rwfPageptr);
17250     wf->wordIndex = 32;
17251     wf->pageIndex++;
17252     ndbrequire(wf->pageIndex < NDB_ARRAY_SIZE(wf->rwfTabPtr.p->pageRef));
17253     wf->rwfTabPtr.p->pageRef[wf->pageIndex] = wf->rwfPageptr.i;
17254     wf->rwfTabPtr.p->noPages++;
17255   }//if
17256   wf->rwfPageptr.p->word[wf->wordIndex] = dataWord;
17257   wf->wordIndex++;
17258 }//Dbdih::writePageWord()
17259 
writeReplicas(RWFragment * wf,Uint32 replicaStartIndex)17260 void Dbdih::writeReplicas(RWFragment* wf, Uint32 replicaStartIndex)
17261 {
17262   ReplicaRecordPtr wfReplicaPtr;
17263   wfReplicaPtr.i = replicaStartIndex;
17264   while (wfReplicaPtr.i != RNIL) {
17265     jam();
17266     ptrCheckGuard(wfReplicaPtr, creplicaFileSize, replicaRecord);
17267     writePageWord(wf, wfReplicaPtr.p->procNode);
17268     writePageWord(wf, wfReplicaPtr.p->initialGci);
17269     writePageWord(wf, wfReplicaPtr.p->noCrashedReplicas);
17270     writePageWord(wf, wfReplicaPtr.p->nextLcp);
17271     Uint32 i;
17272     for (i = 0; i < MAX_LCP_STORED; i++) {
17273       writePageWord(wf, wfReplicaPtr.p->maxGciCompleted[i]);
17274       writePageWord(wf, wfReplicaPtr.p->maxGciStarted[i]);
17275       writePageWord(wf, wfReplicaPtr.p->lcpId[i]);
17276       writePageWord(wf, wfReplicaPtr.p->lcpStatus[i]);
17277     }//if
17278     for (i = 0; i < MAX_CRASHED_REPLICAS; i++) {
17279       writePageWord(wf, wfReplicaPtr.p->createGci[i]);
17280       writePageWord(wf, wfReplicaPtr.p->replicaLastGci[i]);
17281     }//if
17282 
17283     wfReplicaPtr.i = wfReplicaPtr.p->nextReplica;
17284   }//while
17285 }//Dbdih::writeReplicas()
17286 
writeRestorableGci(Signal * signal,FileRecordPtr filePtr)17287 void Dbdih::writeRestorableGci(Signal* signal, FileRecordPtr filePtr)
17288 {
17289   for (Uint32 i = 0; i < Sysfile::SYSFILE_SIZE32; i++) {
17290     sysfileDataToFile[i] = sysfileData[i];
17291   }//for
17292   signal->theData[0] = filePtr.p->fileRef;
17293   signal->theData[1] = reference();
17294   signal->theData[2] = filePtr.i;
17295   signal->theData[3] = ZLIST_OF_PAIRS_SYNCH;
17296   signal->theData[4] = ZVAR_NO_CRESTART_INFO_TO_FILE;
17297   signal->theData[5] = 1; /* AMOUNT OF PAGES */
17298   signal->theData[6] = 0; /* MEMORY PAGE = 0 SINCE COMMON STORED VARIABLE  */
17299   signal->theData[7] = 0;
17300 
17301   if (ERROR_INSERTED(7224) && filePtr.i == crestartInfoFile[1])
17302   {
17303     jam();
17304     SET_ERROR_INSERT_VALUE(7225);
17305     sendSignalWithDelay(NDBFS_REF, GSN_FSWRITEREQ, signal, 500, 8);
17306 
17307     signal->theData[0] = 9999;
17308     sendSignal(numberToRef(CMVMI, refToNode(cmasterdihref)),
17309 	       GSN_NDB_TAMPER, signal, 1, JBB);
17310     return;
17311   }
17312   sendSignal(NDBFS_REF, GSN_FSWRITEREQ, signal, 8, JBA);
17313 }//Dbdih::writeRestorableGci()
17314 
writeTabfile(Signal * signal,TabRecord * tab,FileRecordPtr filePtr)17315 void Dbdih::writeTabfile(Signal* signal, TabRecord* tab, FileRecordPtr filePtr)
17316 {
17317   signal->theData[0] = filePtr.p->fileRef;
17318   signal->theData[1] = reference();
17319   signal->theData[2] = filePtr.i;
17320   signal->theData[3] = ZLIST_OF_PAIRS_SYNCH;
17321   signal->theData[4] = ZVAR_NO_WORD;
17322   signal->theData[5] = tab->noPages;
17323 
17324   Uint32 section[2 * NDB_ARRAY_SIZE(tab->pageRef)];
17325   for (Uint32 i = 0; i < tab->noPages; i++)
17326   {
17327     section[(2 * i) + 0] = tab->pageRef[i];
17328     section[(2 * i) + 1] = i;
17329   }
17330   LinearSectionPtr ptr[3];
17331   ptr[0].p = section;
17332   ptr[0].sz = 2 * tab->noPages;
17333   sendSignal(NDBFS_REF, GSN_FSWRITEREQ, signal, 6, JBA, ptr, 1);
17334 }//Dbdih::writeTabfile()
17335 
execDEBUG_SIG(Signal * signal)17336 void Dbdih::execDEBUG_SIG(Signal* signal)
17337 {
17338   signal = signal; //Avoid compiler warnings
17339 }//Dbdih::execDEBUG_SIG()
17340 
17341 void
execDUMP_STATE_ORD(Signal * signal)17342 Dbdih::execDUMP_STATE_ORD(Signal* signal)
17343 {
17344   DumpStateOrd * const & dumpState = (DumpStateOrd *)&signal->theData[0];
17345   Uint32 arg = dumpState->args[0];
17346   if (arg == DumpStateOrd::DihDumpNodeRestartInfo) {
17347     infoEvent("c_nodeStartMaster.blockLcp = %d, c_nodeStartMaster.blockGcp = %d, c_nodeStartMaster.wait = %d",
17348 	      c_nodeStartMaster.blockLcp, c_nodeStartMaster.blockGcp, c_nodeStartMaster.wait);
17349     for (Uint32 i = 0; i < c_diverify_queue_cnt; i++)
17350     {
17351       infoEvent("[ %u : cfirstVerifyQueue = %u clastVerifyQueue = %u sz: %u]",
17352                 i,
17353                 c_diverify_queue[i].cfirstVerifyQueue,
17354                 c_diverify_queue[i].clastVerifyQueue,
17355                 capiConnectFileSize);
17356     }
17357     infoEvent("cgcpOrderBlocked = %d",
17358               cgcpOrderBlocked);
17359   }//if
17360   if (arg == DumpStateOrd::DihDumpNodeStatusInfo) {
17361     NodeRecordPtr localNodePtr;
17362     infoEvent("Printing nodeStatus of all nodes");
17363     for (localNodePtr.i = 1; localNodePtr.i < MAX_NDB_NODES; localNodePtr.i++) {
17364       ptrAss(localNodePtr, nodeRecord);
17365       if (localNodePtr.p->nodeStatus != NodeRecord::NOT_IN_CLUSTER) {
17366         infoEvent("Node = %d has status = %d",
17367 		  localNodePtr.i, localNodePtr.p->nodeStatus);
17368       }//if
17369     }//for
17370   }//if
17371 
17372   if (arg == DumpStateOrd::DihPrintFragmentation)
17373   {
17374     infoEvent("Printing nodegroups --");
17375     for (Uint32 i = 0; i<cnoOfNodeGroups; i++)
17376     {
17377       NodeGroupRecordPtr NGPtr;
17378       NGPtr.i = c_node_groups[i];
17379       ptrCheckGuard(NGPtr, MAX_NDB_NODES, nodeGroupRecord);
17380 
17381       infoEvent("NG %u(%u) ref: %u [ cnt: %u : %u %u %u %u ]",
17382                 NGPtr.i, NGPtr.p->nodegroupIndex, NGPtr.p->m_ref_count,
17383                 NGPtr.p->nodeCount,
17384                 NGPtr.p->nodesInGroup[0], NGPtr.p->nodesInGroup[1], NGPtr.p->nodesInGroup[2], NGPtr.p->nodesInGroup[3]);
17385     }
17386 
17387     infoEvent("Printing fragmentation of all tables --");
17388     for(Uint32 i = 0; i<ctabFileSize; i++){
17389       TabRecordPtr tabPtr;
17390       tabPtr.i = i;
17391       ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
17392 
17393       if(tabPtr.p->tabStatus != TabRecord::TS_ACTIVE)
17394 	continue;
17395 
17396       for(Uint32 j = 0; j < tabPtr.p->totalfragments; j++){
17397 	FragmentstorePtr fragPtr;
17398 	getFragstore(tabPtr.p, j, fragPtr);
17399 
17400 	Uint32 nodeOrder[MAX_REPLICAS];
17401 	const Uint32 noOfReplicas = extractNodeInfo(fragPtr.p, nodeOrder);
17402 	char buf[100];
17403 	BaseString::snprintf(buf, sizeof(buf), " Table %d Fragment %d(%u) LP: %u - ", tabPtr.i, j, dihGetInstanceKey(fragPtr), fragPtr.p->m_log_part_id);
17404 	for(Uint32 k = 0; k < noOfReplicas; k++){
17405 	  char tmp[100];
17406 	  BaseString::snprintf(tmp, sizeof(tmp), "%d ", nodeOrder[k]);
17407 	  strcat(buf, tmp);
17408 	}
17409 	infoEvent("%s", buf);
17410       }
17411     }
17412   }
17413 
17414   if (signal->theData[0] == 7000) {
17415     infoEvent("ctimer = %d",
17416               c_lcpState.ctimer);
17417     infoEvent("cmasterState = %d", cmasterState);
17418     infoEvent("cmasterTakeOverNode = %d, ctcCounter = %d",
17419               cmasterTakeOverNode, c_lcpState.ctcCounter);
17420   }//if
17421   if (signal->theData[0] == 7001) {
17422     infoEvent("c_lcpState.keepGci = %d",
17423               c_lcpState.keepGci);
17424     infoEvent("c_lcpState.lcpStatus = %d, clcpStopGcp = %d",
17425               c_lcpState.lcpStatus,
17426 	      c_lcpState.lcpStopGcp);
17427     infoEvent("cimmediateLcpStart = %d",
17428               c_lcpState.immediateLcpStart);
17429   }//if
17430   if (signal->theData[0] == 7002) {
17431     infoEvent("cnoOfActiveTables = %d",
17432               cnoOfActiveTables);
17433     infoEvent("cdictblockref = %d, cfailurenr = %d",
17434               cdictblockref, cfailurenr);
17435     infoEvent("con_lineNodes = %d, reference() = %d, creceivedfrag = %d",
17436               con_lineNodes, reference(), creceivedfrag);
17437   }//if
17438   if (signal->theData[0] == 7003) {
17439     infoEvent("cfirstAliveNode = %d, cgckptflag = %d",
17440               cfirstAliveNode, cgckptflag);
17441     infoEvent("clocallqhblockref = %d, clocaltcblockref = %d, cgcpOrderBlocked = %d",
17442               clocallqhblockref, clocaltcblockref, cgcpOrderBlocked);
17443     infoEvent("cstarttype = %d, csystemnodes = %d",
17444               cstarttype, csystemnodes);
17445   }//if
17446   if (signal->theData[0] == 7004) {
17447     infoEvent("cmasterdihref = %d, cownNodeId = %d",
17448               cmasterdihref, cownNodeId);
17449     infoEvent("cndbStartReqBlockref = %d, cremainingfrags = %d",
17450               cndbStartReqBlockref, cremainingfrags);
17451   }//if
17452   if (signal->theData[0] == 7005) {
17453     infoEvent("crestartGci = %d",
17454               crestartGci);
17455   }//if
17456   if (signal->theData[0] == 7006) {
17457     infoEvent("clcpDelay = %d",
17458               c_lcpState.clcpDelay);
17459     infoEvent("cmasterNodeId = %d", cmasterNodeId);
17460     infoEvent("c_nodeStartMaster.startNode = %d, c_nodeStartMaster.wait = %d",
17461               c_nodeStartMaster.startNode, c_nodeStartMaster.wait);
17462   }//if
17463   if (signal->theData[0] == 7007) {
17464     infoEvent("c_nodeStartMaster.failNr = %d", c_nodeStartMaster.failNr);
17465     infoEvent("c_nodeStartMaster.startInfoErrorCode = %d",
17466               c_nodeStartMaster.startInfoErrorCode);
17467     infoEvent("c_nodeStartMaster.blockLcp = %d, c_nodeStartMaster.blockGcp = %d",
17468               c_nodeStartMaster.blockLcp, c_nodeStartMaster.blockGcp);
17469   }//if
17470   if (signal->theData[0] == 7008) {
17471     infoEvent("cfirstDeadNode = %d, cstartPhase = %d, cnoReplicas = %d",
17472               cfirstDeadNode, cstartPhase, cnoReplicas);
17473     infoEvent("cwaitLcpSr = %d",cwaitLcpSr);
17474   }//if
17475   if (signal->theData[0] == 7009) {
17476     infoEvent("ccalcOldestRestorableGci = %d, cnoOfNodeGroups = %d",
17477               c_lcpState.oldestRestorableGci, cnoOfNodeGroups);
17478     infoEvent("crestartGci = %d",
17479               crestartGci);
17480   }//if
17481   if (signal->theData[0] == 7010) {
17482     infoEvent("c_lcpState.lcpStatusUpdatedPlace = %d, cLcpStart = %d",
17483               c_lcpState.lcpStatusUpdatedPlace, c_lcpState.lcpStart);
17484     infoEvent("c_blockCommit = %d, c_blockCommitNo = %d",
17485               c_blockCommit, c_blockCommitNo);
17486   }//if
17487   if (signal->theData[0] == 7011){
17488     infoEvent("c_COPY_GCIREQ_Counter = %s",
17489 	      c_COPY_GCIREQ_Counter.getText());
17490     infoEvent("c_COPY_TABREQ_Counter = %s",
17491 	      c_COPY_TABREQ_Counter.getText());
17492     infoEvent("c_CREATE_FRAGREQ_Counter = %s",
17493 	      c_CREATE_FRAGREQ_Counter.getText());
17494     infoEvent("c_DIH_SWITCH_REPLICA_REQ_Counter = %s",
17495 	      c_DIH_SWITCH_REPLICA_REQ_Counter.getText());
17496     infoEvent("c_EMPTY_LCP_REQ_Counter = %s",c_EMPTY_LCP_REQ_Counter.getText());
17497     infoEvent("c_GCP_COMMIT_Counter = %s", c_GCP_COMMIT_Counter.getText());
17498     infoEvent("c_GCP_PREPARE_Counter = %s", c_GCP_PREPARE_Counter.getText());
17499     infoEvent("c_GCP_SAVEREQ_Counter = %s", c_GCP_SAVEREQ_Counter.getText());
17500     infoEvent("c_SUB_GCP_COMPLETE_REP_Counter = %s",
17501               c_SUB_GCP_COMPLETE_REP_Counter.getText());
17502     infoEvent("c_INCL_NODEREQ_Counter = %s", c_INCL_NODEREQ_Counter.getText());
17503     infoEvent("c_MASTER_GCPREQ_Counter = %s",
17504 	      c_MASTER_GCPREQ_Counter.getText());
17505     infoEvent("c_MASTER_LCPREQ_Counter = %s",
17506 	      c_MASTER_LCPREQ_Counter.getText());
17507     infoEvent("c_START_INFOREQ_Counter = %s",
17508 	      c_START_INFOREQ_Counter.getText());
17509     infoEvent("c_START_RECREQ_Counter = %s", c_START_RECREQ_Counter.getText());
17510     infoEvent("c_STOP_ME_REQ_Counter = %s", c_STOP_ME_REQ_Counter.getText());
17511     infoEvent("c_TC_CLOPSIZEREQ_Counter = %s",
17512 	      c_TC_CLOPSIZEREQ_Counter.getText());
17513     infoEvent("c_TCGETOPSIZEREQ_Counter = %s",
17514 	      c_TCGETOPSIZEREQ_Counter.getText());
17515   }
17516 
17517   if(signal->theData[0] == 7012){
17518     char buf[8*_NDB_NODE_BITMASK_SIZE+1];
17519     infoEvent("ParticipatingDIH = %s", c_lcpState.m_participatingDIH.getText(buf));
17520     infoEvent("ParticipatingLQH = %s", c_lcpState.m_participatingLQH.getText(buf));
17521     infoEvent("m_LCP_COMPLETE_REP_Counter_DIH = %s",
17522 	      c_lcpState.m_LCP_COMPLETE_REP_Counter_DIH.getText());
17523     infoEvent("m_LCP_COMPLETE_REP_Counter_LQH = %s",
17524 	      c_lcpState.m_LCP_COMPLETE_REP_Counter_LQH.getText());
17525     infoEvent("m_LAST_LCP_FRAG_ORD = %s",
17526 	      c_lcpState.m_LAST_LCP_FRAG_ORD.getText());
17527     infoEvent("m_LCP_COMPLETE_REP_From_Master_Received = %d",
17528 	      c_lcpState.m_LCP_COMPLETE_REP_From_Master_Received);
17529 
17530     NodeRecordPtr nodePtr;
17531     for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
17532       jam();
17533       ptrAss(nodePtr, nodeRecord);
17534       if(nodePtr.p->nodeStatus == NodeRecord::ALIVE){
17535         Uint32 i;
17536 	for(i = 0; i<nodePtr.p->noOfStartedChkpt; i++){
17537 	  infoEvent("Node %d: started: table=%d fragment=%d replica=%d",
17538 		    nodePtr.i,
17539 		    nodePtr.p->startedChkpt[i].tableId,
17540 		    nodePtr.p->startedChkpt[i].fragId,
17541 		    nodePtr.p->startedChkpt[i].replicaPtr);
17542 	}
17543 
17544 	for(i = 0; i<nodePtr.p->noOfQueuedChkpt; i++){
17545 	  infoEvent("Node %d: queued: table=%d fragment=%d replica=%d",
17546 		    nodePtr.i,
17547 		    nodePtr.p->queuedChkpt[i].tableId,
17548 		    nodePtr.p->queuedChkpt[i].fragId,
17549 		    nodePtr.p->queuedChkpt[i].replicaPtr);
17550 	}
17551       }
17552     }
17553   }
17554 
17555   if(arg == 7019 && signal->getLength() == 2 &&
17556      signal->theData[1] < MAX_NDB_NODES)
17557   {
17558     char buf2[8+1];
17559     NodeRecordPtr nodePtr;
17560     nodePtr.i = signal->theData[1];
17561     ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
17562     infoEvent("NF Node %d tc: %d lqh: %d dih: %d dict: %d recNODE_FAILREP: %d",
17563 	      nodePtr.i,
17564 	      nodePtr.p->dbtcFailCompleted,
17565 	      nodePtr.p->dblqhFailCompleted,
17566 	      nodePtr.p->dbdihFailCompleted,
17567 	      nodePtr.p->dbdictFailCompleted,
17568 	      nodePtr.p->recNODE_FAILREP);
17569     infoEvent(" m_NF_COMPLETE_REP: %s m_nodefailSteps: %s",
17570 	      nodePtr.p->m_NF_COMPLETE_REP.getText(),
17571 	      nodePtr.p->m_nodefailSteps.getText(buf2));
17572   }
17573 
17574   if(arg == 7020 && signal->getLength() > 3)
17575   {
17576     Uint32 gsn= signal->theData[1];
17577     Uint32 block= signal->theData[2];
17578     Uint32 length= signal->length() - 3;
17579     memmove(signal->theData, signal->theData+3, 4*length);
17580     sendSignal(numberToRef(block, getOwnNodeId()), gsn, signal, length, JBB);
17581 
17582     warningEvent("-- SENDING CUSTOM SIGNAL --");
17583     char buf[100], buf2[100];
17584     buf2[0]= 0;
17585     for(Uint32 i = 0; i<length; i++)
17586     {
17587       BaseString::snprintf(buf, 100, "%s %.8x", buf2, signal->theData[i]);
17588       BaseString::snprintf(buf2, 100, "%s", buf);
17589     }
17590     warningEvent("gsn: %d block: %s, length: %d theData: %s",
17591 		 gsn, getBlockName(block, "UNKNOWN"), length, buf);
17592 
17593     g_eventLogger->warning("-- SENDING CUSTOM SIGNAL --");
17594     g_eventLogger->warning("gsn: %d block: %s, length: %d theData: %s",
17595                            gsn, getBlockName(block, "UNKNOWN"), length, buf);
17596   }
17597 
17598   if(arg == DumpStateOrd::DihDumpLCPState){
17599     infoEvent("-- Node %d LCP STATE --", getOwnNodeId());
17600     infoEvent("lcpStatus = %d (update place = %d) ",
17601 	      c_lcpState.lcpStatus, c_lcpState.lcpStatusUpdatedPlace);
17602     infoEvent
17603       ("lcpStart = %d lcpStopGcp = %d keepGci = %d oldestRestorable = %d",
17604        c_lcpState.lcpStart, c_lcpState.lcpStopGcp,
17605        c_lcpState.keepGci, c_lcpState.oldestRestorableGci);
17606 
17607     infoEvent
17608       ("immediateLcpStart = %d masterLcpNodeId = %d",
17609        c_lcpState.immediateLcpStart,
17610        refToNode(c_lcpState.m_masterLcpDihRef));
17611 
17612     for (Uint32 i = 0; i<10; i++)
17613     {
17614       infoEvent("%u : status: %u place: %u", i,
17615                 c_lcpState.m_saveState[i].m_status,
17616                 c_lcpState.m_saveState[i].m_place);
17617     }
17618 
17619     infoEvent("-- Node %d LCP STATE --", getOwnNodeId());
17620   }
17621 
17622   if(arg == DumpStateOrd::DihDumpLCPMasterTakeOver){
17623     infoEvent("-- Node %d LCP MASTER TAKE OVER STATE --", getOwnNodeId());
17624     infoEvent
17625       ("c_lcpMasterTakeOverState.state = %d updatePlace = %d failedNodeId = %d",
17626        c_lcpMasterTakeOverState.state,
17627        c_lcpMasterTakeOverState.updatePlace,
17628        c_lcpMasterTakeOverState.failedNodeId);
17629 
17630     infoEvent("c_lcpMasterTakeOverState.minTableId = %u minFragId = %u",
17631 	      c_lcpMasterTakeOverState.minTableId,
17632 	      c_lcpMasterTakeOverState.minFragId);
17633 
17634     infoEvent("-- Node %d LCP MASTER TAKE OVER STATE --", getOwnNodeId());
17635   }
17636 
17637   if (signal->theData[0] == 7015)
17638   {
17639     if (signal->getLength() == 1)
17640     {
17641       signal->theData[1] = 0;
17642     }
17643 
17644     Uint32 tableId = signal->theData[1];
17645     if (tableId < ctabFileSize)
17646     {
17647       signal->theData[0] = 7021;
17648       execDUMP_STATE_ORD(signal);
17649       signal->theData[0] = 7015;
17650       signal->theData[1] = tableId + 1;
17651       sendSignal(reference(), GSN_DUMP_STATE_ORD, signal, 2, JBB);
17652     }
17653   }
17654 
17655   if(arg == DumpStateOrd::EnableUndoDelayDataWrite){
17656     g_eventLogger->info("Dbdih:: delay write of datapages for table = %d",
17657                         dumpState->args[1]);
17658     // Send this dump to ACC and TUP
17659     sendSignal(DBACC_REF, GSN_DUMP_STATE_ORD, signal, 2, JBB);
17660     sendSignal(DBTUP_REF, GSN_DUMP_STATE_ORD, signal, 2, JBB);
17661 
17662     // Start immediate LCP
17663     add_lcp_counter(&c_lcpState.ctimer, (1 << 31));
17664     return;
17665   }
17666 
17667   if (signal->theData[0] == DumpStateOrd::DihAllAllowNodeStart) {
17668     for (Uint32 i = 1; i < MAX_NDB_NODES; i++)
17669       setAllowNodeStart(i, true);
17670     return;
17671   }//if
17672   if (signal->theData[0] == DumpStateOrd::DihMinTimeBetweenLCP) {
17673     // Set time between LCP to min value
17674     if (signal->getLength() == 2)
17675     {
17676       Uint32 tmp;
17677       const ndb_mgm_configuration_iterator * p =
17678 	m_ctx.m_config.getOwnConfigIterator();
17679       ndbrequire(p != 0);
17680       ndb_mgm_get_int_parameter(p, CFG_DB_LCP_INTERVAL, &tmp);
17681       g_eventLogger->info("Reset time between LCP to %u", tmp);
17682       c_lcpState.clcpDelay = tmp;
17683     }
17684     else
17685     {
17686       g_eventLogger->info("Set time between LCP to min value");
17687       c_lcpState.clcpDelay = 0; // TimeBetweenLocalCheckpoints.min
17688     }
17689     return;
17690   }
17691   if (signal->theData[0] == DumpStateOrd::DihMaxTimeBetweenLCP) {
17692     // Set time between LCP to max value
17693     g_eventLogger->info("Set time between LCP to max value");
17694     c_lcpState.clcpDelay = 31; // TimeBetweenLocalCheckpoints.max
17695     return;
17696   }
17697 
17698   if(arg == 7098){
17699     if(signal->length() == 3){
17700       jam();
17701       infoEvent("startLcpRoundLoopLab(tabel=%d, fragment=%d)",
17702 		signal->theData[1], signal->theData[2]);
17703       startLcpRoundLoopLab(signal, signal->theData[1], signal->theData[2]);
17704       return;
17705     } else {
17706       infoEvent("Invalid no of arguments to 7098 - startLcpRoundLoopLab -"
17707 		" expected 2 (tableId, fragmentId)");
17708     }
17709   }
17710 
17711   if (arg == DumpStateOrd::DihStartLcpImmediately)
17712   {
17713     jam();
17714     add_lcp_counter(&c_lcpState.ctimer, (1 << 31));
17715 
17716     /**
17717      * If sent from local LQH, forward to master
17718      */
17719     if (cmasterNodeId != getOwnNodeId() &&
17720         refToMain(signal->getSendersBlockRef()) == DBLQH)
17721     {
17722       jam();
17723       sendSignal(cmasterdihref, GSN_DUMP_STATE_ORD, signal, 1, JBB);
17724     }
17725     return;
17726   }
17727 
17728   if (arg == DumpStateOrd::DihSetTimeBetweenGcp)
17729   {
17730     Uint32 tmp = 0;
17731     if (signal->getLength() == 1)
17732     {
17733       const ndb_mgm_configuration_iterator * p =
17734 	m_ctx.m_config.getOwnConfigIterator();
17735       ndbrequire(p != 0);
17736       ndb_mgm_get_int_parameter(p, CFG_DB_GCP_INTERVAL, &tmp);
17737     }
17738     else
17739     {
17740       tmp = signal->theData[1];
17741     }
17742     m_gcp_save.m_master.m_time_between_gcp = tmp;
17743     g_eventLogger->info("Setting time between gcp : %d", tmp);
17744   }
17745 
17746   if (arg == 7021 && signal->getLength() == 2)
17747   {
17748     TabRecordPtr tabPtr;
17749     tabPtr.i = signal->theData[1];
17750     if (tabPtr.i >= ctabFileSize)
17751       return;
17752 
17753     ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
17754 
17755     if(tabPtr.p->tabStatus != TabRecord::TS_ACTIVE)
17756       return;
17757 
17758     infoEvent
17759       ("Table %d: TabCopyStatus: %d TabUpdateStatus: %d TabLcpStatus: %d",
17760        tabPtr.i,
17761        tabPtr.p->tabCopyStatus,
17762        tabPtr.p->tabUpdateState,
17763        tabPtr.p->tabLcpStatus);
17764 
17765     FragmentstorePtr fragPtr;
17766     for (Uint32 fid = 0; fid < tabPtr.p->totalfragments; fid++) {
17767       jam();
17768       getFragstore(tabPtr.p, fid, fragPtr);
17769 
17770       char buf[100], buf2[100];
17771       BaseString::snprintf(buf, sizeof(buf), " Fragment %d: noLcpReplicas==%d ",
17772 			   fid, fragPtr.p->noLcpReplicas);
17773 
17774       Uint32 num=0;
17775       ReplicaRecordPtr replicaPtr;
17776       replicaPtr.i = fragPtr.p->storedReplicas;
17777       do {
17778 	ptrCheckGuard(replicaPtr, creplicaFileSize, replicaRecord);
17779 	BaseString::snprintf(buf2, sizeof(buf2), "%s %d(on %d)=%d(%s)",
17780 			     buf, num,
17781 			     replicaPtr.p->procNode,
17782 			     replicaPtr.p->lcpIdStarted,
17783 			     replicaPtr.p->lcpOngoingFlag ? "Ongoing" : "Idle");
17784 	BaseString::snprintf(buf, sizeof(buf), "%s", buf2);
17785 
17786 	num++;
17787 	replicaPtr.i = replicaPtr.p->nextReplica;
17788       } while (replicaPtr.i != RNIL);
17789       infoEvent("%s", buf);
17790     }
17791   }
17792 
17793   if (arg == 7022)
17794   {
17795     jam();
17796     crashSystemAtGcpStop(signal, true);
17797   }
17798 
17799   if (arg == 7025)
17800   {
17801     jam();
17802     dumpGcpStop();
17803     return;
17804   }
17805 
17806 #ifdef GCP_TIMER_HACK
17807   if (signal->theData[0] == 7901)
17808     globalData.gcp_timer_limit = signal->theData[1];
17809 #endif
17810   if (arg == 7023)
17811   {
17812     /**
17813      * Dump all active TakeOver
17814      */
17815     Ptr<TakeOverRecord> ptr;
17816     ptr.i = signal->theData[1];
17817     if (signal->getLength() == 1)
17818     {
17819       infoEvent("Starting dump all active take-over");
17820       c_activeTakeOverList.first(ptr);
17821     }
17822 
17823     if (ptr.i == RNIL)
17824     {
17825       infoEvent("Dump all active take-over done");
17826       return;
17827     }
17828 
17829     c_activeTakeOverList.getPtr(ptr);
17830     infoEvent("TakeOverPtr(%u) starting: %u flags: 0x%x ref: 0x%x, data: %u",
17831               ptr.i,
17832               ptr.p->toStartingNode,
17833               ptr.p->m_flags,
17834               ptr.p->m_senderRef,
17835               ptr.p->m_senderData);
17836     infoEvent("slaveState: %u masterState: %u",
17837               ptr.p->toSlaveStatus, ptr.p->toMasterStatus);
17838     infoEvent("restorableGci: %u startGci: %u tab: %u frag: %u src: %u max: %u",
17839               ptr.p->restorableGci, ptr.p->startGci,
17840               ptr.p->toCurrentTabref, ptr.p->toCurrentFragid,
17841               ptr.p->toCopyNode, ptr.p->maxPage);
17842 
17843     c_activeTakeOverList.next(ptr);
17844     signal->theData[0] = arg;
17845     signal->theData[1] = ptr.i;
17846   }
17847 
17848   if (arg == DumpStateOrd::SchemaResourceSnapshot)
17849   {
17850     RSS_OP_SNAPSHOT_SAVE(cremainingfrags);
17851     RSS_OP_SNAPSHOT_SAVE(cnoFreeReplicaRec);
17852 
17853     {
17854       Uint32 cnghash = 0;
17855       NodeGroupRecordPtr NGPtr;
17856       for (Uint32 i = 0; i<cnoOfNodeGroups; i++)
17857       {
17858         NGPtr.i = c_node_groups[i];
17859         ptrCheckGuard(NGPtr, MAX_NDB_NODES, nodeGroupRecord);
17860         cnghash = (cnghash * 33) + NGPtr.p->m_ref_count;
17861       }
17862       RSS_OP_SNAPSHOT_SAVE(cnghash);
17863     }
17864     return;
17865   }
17866 
17867   if (arg == DumpStateOrd::SchemaResourceCheckLeak)
17868   {
17869     RSS_OP_SNAPSHOT_CHECK(cremainingfrags);
17870     RSS_OP_SNAPSHOT_SAVE(cnoFreeReplicaRec);
17871 
17872     {
17873       Uint32 cnghash = 0;
17874       NodeGroupRecordPtr NGPtr;
17875       for (Uint32 i = 0; i<cnoOfNodeGroups; i++)
17876       {
17877         NGPtr.i = c_node_groups[i];
17878         ptrCheckGuard(NGPtr, MAX_NDB_NODES, nodeGroupRecord);
17879         cnghash = (cnghash * 33) + NGPtr.p->m_ref_count;
17880       }
17881       RSS_OP_SNAPSHOT_CHECK(cnghash);
17882     }
17883   }
17884 
17885   DECLARE_DUMP0(DBDIH, 7213, "Set error 7213 with extra arg")
17886   {
17887     SET_ERROR_INSERT_VALUE2(7213, signal->theData[1]);
17888     return;
17889   }
17890   DECLARE_DUMP0(DBDIH, 7214, "Set error 7214 with extra arg")
17891   {
17892     SET_ERROR_INSERT_VALUE2(7214, signal->theData[1]);
17893     return;
17894   }
17895 
17896   DECLARE_DUMP0(DBDIH, 7216, "Set error 7216 with extra arg")
17897   {
17898     SET_ERROR_INSERT_VALUE2(7216, signal->theData[1]);
17899     return;
17900   }
17901   DECLARE_DUMP0(DBDIH, 6099, "Start microgcp")
17902   {
17903     if (isMaster())
17904     {
17905       jam();
17906       m_micro_gcp.m_master.m_start_time = 0;
17907     }
17908     else
17909     {
17910       jam();
17911       sendSignal(cmasterdihref, GSN_DUMP_STATE_ORD, signal, 1, JBB);
17912     }
17913     return;
17914   }
17915   DECLARE_DUMP0(DBDIH, 7999, "Set error code with extra arg")
17916   {
17917     SET_ERROR_INSERT_VALUE2(signal->theData[1],
17918                             signal->theData[2]);
17919   }
17920 }//Dbdih::execDUMP_STATE_ORD()
17921 
17922 void
execPREP_DROP_TAB_REQ(Signal * signal)17923 Dbdih::execPREP_DROP_TAB_REQ(Signal* signal){
17924   jamEntry();
17925 
17926   PrepDropTabReq* req = (PrepDropTabReq*)signal->getDataPtr();
17927 
17928   TabRecordPtr tabPtr;
17929   tabPtr.i = req->tableId;
17930   ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
17931 
17932   Uint32 senderRef = req->senderRef;
17933   Uint32 senderData = req->senderData;
17934 
17935   PrepDropTabRef::ErrorCode err = PrepDropTabRef::OK;
17936   { /**
17937      * Check table state
17938      */
17939     bool ok = false;
17940     switch(tabPtr.p->tabStatus){
17941     case TabRecord::TS_IDLE:
17942       ok = true;
17943       jam();
17944       err = PrepDropTabRef::NoSuchTable;
17945       break;
17946     case TabRecord::TS_DROPPING:
17947       ok = true;
17948       jam();
17949       err = PrepDropTabRef::PrepDropInProgress;
17950       break;
17951     case TabRecord::TS_CREATING:
17952       jam();
17953       ok = true;
17954       break;
17955     case TabRecord::TS_ACTIVE:
17956       ok = true;
17957       jam();
17958       break;
17959     }
17960     ndbrequire(ok);
17961   }
17962 
17963   if(err != PrepDropTabRef::OK)
17964   {
17965     jam();
17966     PrepDropTabRef* ref = (PrepDropTabRef*)signal->getDataPtrSend();
17967     ref->senderRef = reference();
17968     ref->senderData = senderData;
17969     ref->tableId = tabPtr.i;
17970     ref->errorCode = err;
17971     sendSignal(senderRef, GSN_PREP_DROP_TAB_REF, signal,
17972 	       PrepDropTabRef::SignalLength, JBB);
17973     return;
17974   }
17975 
17976   tabPtr.p->tabStatus = TabRecord::TS_DROPPING;
17977   PrepDropTabConf* conf = (PrepDropTabConf*)signal->getDataPtrSend();
17978   conf->tableId = tabPtr.i;
17979   conf->senderRef = reference();
17980   conf->senderData = senderData;
17981   sendSignal(senderRef, GSN_PREP_DROP_TAB_CONF,
17982              signal, PrepDropTabConf::SignalLength, JBB);
17983 }
17984 
17985 void
waitDropTabWritingToFile(Signal * signal,TabRecordPtr tabPtr)17986 Dbdih::waitDropTabWritingToFile(Signal* signal, TabRecordPtr tabPtr){
17987 
17988   if (tabPtr.p->tabLcpStatus == TabRecord::TLS_WRITING_TO_FILE)
17989   {
17990     jam();
17991     signal->theData[0] = DihContinueB::WAIT_DROP_TAB_WRITING_TO_FILE;
17992     signal->theData[1] = tabPtr.i;
17993     sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 100, 2);
17994     return;
17995   }
17996 
17997   ndbrequire(tabPtr.p->tabLcpStatus ==  TabRecord::TLS_COMPLETED);
17998   checkDropTabComplete(signal, tabPtr);
17999 }
18000 
18001 void
checkDropTabComplete(Signal * signal,TabRecordPtr tabPtr)18002 Dbdih::checkDropTabComplete(Signal* signal, TabRecordPtr tabPtr)
18003 {
18004   startDeleteFile(signal, tabPtr);
18005 }
18006 
18007 void
execNDB_TAMPER(Signal * signal)18008 Dbdih::execNDB_TAMPER(Signal* signal)
18009 {
18010   if ((ERROR_INSERTED(7011)) &&
18011       (signal->theData[0] == 7012)) {
18012     CLEAR_ERROR_INSERT_VALUE;
18013     calculateKeepGciLab(signal, 0, 0);
18014     return;
18015   }//if
18016   SET_ERROR_INSERT_VALUE(signal->theData[0]);
18017   return;
18018 }//Dbdih::execNDB_TAMPER()
18019 
execBLOCK_COMMIT_ORD(Signal * signal)18020 void Dbdih::execBLOCK_COMMIT_ORD(Signal* signal){
18021   BlockCommitOrd* const block = (BlockCommitOrd *)&signal->theData[0];
18022 
18023   jamEntry();
18024 
18025   c_blockCommit = true;
18026   c_blockCommitNo = block->failNo;
18027 }
18028 
execUNBLOCK_COMMIT_ORD(Signal * signal)18029 void Dbdih::execUNBLOCK_COMMIT_ORD(Signal* signal){
18030   UnblockCommitOrd* const unblock = (UnblockCommitOrd *)&signal->theData[0];
18031   (void)unblock;
18032 
18033   jamEntry();
18034 
18035   if(c_blockCommit == true)
18036   {
18037     jam();
18038 
18039     c_blockCommit = false;
18040     for (Uint32 i = 0; i<c_diverify_queue_cnt; i++)
18041     {
18042       c_diverify_queue[i].m_empty_done = 0;
18043       emptyverificbuffer(signal, i, true);
18044     }
18045   }
18046 }
18047 
execSTOP_PERM_REQ(Signal * signal)18048 void Dbdih::execSTOP_PERM_REQ(Signal* signal){
18049 
18050   jamEntry();
18051 
18052   StopPermReq* const req = (StopPermReq*)&signal->theData[0];
18053   StopPermRef* const ref = (StopPermRef*)&signal->theData[0];
18054 
18055   const Uint32 senderData = req->senderData;
18056   const BlockReference senderRef = req->senderRef;
18057   const NodeId nodeId = refToNode(senderRef);
18058 
18059   if (isMaster()) {
18060     /**
18061      * Master
18062      */
18063     jam();
18064     CRASH_INSERTION(7065);
18065     if (c_stopPermMaster.clientRef != 0) {
18066       jam();
18067 
18068       ref->senderData = senderData;
18069       ref->errorCode  = StopPermRef::NodeShutdownInProgress;
18070       sendSignal(senderRef, GSN_STOP_PERM_REF, signal,
18071                  StopPermRef::SignalLength, JBB);
18072       return;
18073     }//if
18074 
18075     if (c_nodeStartMaster.activeState) {
18076       jam();
18077       ref->senderData = senderData;
18078       ref->errorCode  = StopPermRef::NodeStartInProgress;
18079       sendSignal(senderRef, GSN_STOP_PERM_REF, signal,
18080                  StopPermRef::SignalLength, JBB);
18081       return;
18082     }//if
18083 
18084     /**
18085      * Lock
18086      */
18087     c_nodeStartMaster.activeState = true;
18088     c_stopPermMaster.clientRef = senderRef;
18089 
18090     c_stopPermMaster.clientData = senderData;
18091     c_stopPermMaster.returnValue = 0;
18092     c_switchReplicas.clear();
18093 
18094     Mutex mutex(signal, c_mutexMgr, c_switchPrimaryMutexHandle);
18095     Callback c = { safe_cast(&Dbdih::switch_primary_stop_node), nodeId };
18096     ndbrequire(mutex.lock(c));
18097   } else {
18098     /**
18099      * Proxy part
18100      */
18101     jam();
18102     CRASH_INSERTION(7066);
18103     if(c_stopPermProxy.clientRef != 0){
18104       jam();
18105       ref->senderData = senderData;
18106       ref->errorCode = StopPermRef::NodeShutdownInProgress;
18107       sendSignal(senderRef, GSN_STOP_PERM_REF, signal, 2, JBB);
18108       return;
18109     }//if
18110 
18111     c_stopPermProxy.clientRef = senderRef;
18112     c_stopPermProxy.masterRef = cmasterdihref;
18113     c_stopPermProxy.clientData = senderData;
18114 
18115     req->senderRef = reference();
18116     req->senderData = senderData;
18117     sendSignal(cmasterdihref, GSN_STOP_PERM_REQ, signal,
18118 	       StopPermReq::SignalLength, JBB);
18119   }//if
18120 }//Dbdih::execSTOP_PERM_REQ()
18121 
18122 void
switch_primary_stop_node(Signal * signal,Uint32 node_id,Uint32 ret_val)18123 Dbdih::switch_primary_stop_node(Signal* signal, Uint32 node_id, Uint32 ret_val)
18124 {
18125   ndbrequire(ret_val == 0);
18126   signal->theData[0] = DihContinueB::SwitchReplica;
18127   signal->theData[1] = node_id;
18128   signal->theData[2] = 0; // table id
18129   signal->theData[3] = 0; // fragment id
18130   sendSignal(reference(), GSN_CONTINUEB, signal, 4, JBB);
18131 }
18132 
execSTOP_PERM_REF(Signal * signal)18133 void Dbdih::execSTOP_PERM_REF(Signal* signal)
18134 {
18135   jamEntry();
18136   ndbrequire(c_stopPermProxy.clientRef != 0);
18137   ndbrequire(c_stopPermProxy.masterRef == signal->senderBlockRef());
18138   sendSignal(c_stopPermProxy.clientRef, GSN_STOP_PERM_REF, signal, 2, JBB);
18139   c_stopPermProxy.clientRef = 0;
18140 }//Dbdih::execSTOP_PERM_REF()
18141 
execSTOP_PERM_CONF(Signal * signal)18142 void Dbdih::execSTOP_PERM_CONF(Signal* signal)
18143 {
18144   jamEntry();
18145   ndbrequire(c_stopPermProxy.clientRef != 0);
18146   ndbrequire(c_stopPermProxy.masterRef == signal->senderBlockRef());
18147   sendSignal(c_stopPermProxy.clientRef, GSN_STOP_PERM_CONF, signal, 1, JBB);
18148   c_stopPermProxy.clientRef = 0;
18149 }//Dbdih::execSTOP_PERM_CONF()
18150 
execDIH_SWITCH_REPLICA_REQ(Signal * signal)18151 void Dbdih::execDIH_SWITCH_REPLICA_REQ(Signal* signal)
18152 {
18153   jamEntry();
18154   DihSwitchReplicaReq* const req = (DihSwitchReplicaReq*)&signal->theData[0];
18155   const Uint32 tableId = req->tableId;
18156   const Uint32 fragNo = req->fragNo;
18157   const BlockReference senderRef = req->senderRef;
18158 
18159   CRASH_INSERTION(7067);
18160   TabRecordPtr tabPtr;
18161   tabPtr.i = tableId;
18162   ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
18163 
18164   ndbrequire(tabPtr.p->tabStatus == TabRecord::TS_ACTIVE);
18165   if (tabPtr.p->tabCopyStatus != TabRecord::CS_IDLE) {
18166     jam();
18167     sendSignal(reference(), GSN_DIH_SWITCH_REPLICA_REQ, signal,
18168 	       DihSwitchReplicaReq::SignalLength, JBB);
18169     return;
18170   }//if
18171   FragmentstorePtr fragPtr;
18172   getFragstore(tabPtr.p, fragNo, fragPtr);
18173 
18174   /**
18175    * Do funky stuff
18176    */
18177   Uint32 oldOrder[MAX_REPLICAS];
18178   const Uint32 noOfReplicas = extractNodeInfo(fragPtr.p, oldOrder);
18179 
18180   if (noOfReplicas < req->noOfReplicas) {
18181     jam();
18182     //---------------------------------------------------------------------
18183     // A crash occurred in the middle of our switch handling.
18184     //---------------------------------------------------------------------
18185     DihSwitchReplicaRef* const ref = (DihSwitchReplicaRef*)&signal->theData[0];
18186     ref->senderNode = cownNodeId;
18187     ref->errorCode = StopPermRef::NF_CausedAbortOfStopProcedure;
18188     sendSignal(senderRef, GSN_DIH_SWITCH_REPLICA_REF, signal,
18189                DihSwitchReplicaRef::SignalLength, JBB);
18190   }//if
18191 
18192   DIH_TAB_WRITE_LOCK(tabPtr.p);
18193   for (Uint32 i = 0; i < noOfReplicas; i++) {
18194     jam();
18195     ndbrequire(i < MAX_REPLICAS);
18196     fragPtr.p->activeNodes[i] = req->newNodeOrder[i];
18197   }//for
18198   DIH_TAB_WRITE_UNLOCK(tabPtr.p);
18199 
18200   /**
18201    * Reply
18202    */
18203   DihSwitchReplicaConf* const conf = (DihSwitchReplicaConf*)&signal->theData[0];
18204   conf->senderNode = cownNodeId;
18205   sendSignal(senderRef, GSN_DIH_SWITCH_REPLICA_CONF, signal,
18206              DihSwitchReplicaConf::SignalLength, JBB);
18207 }//Dbdih::execDIH_SWITCH_REPLICA_REQ()
18208 
execDIH_SWITCH_REPLICA_CONF(Signal * signal)18209 void Dbdih::execDIH_SWITCH_REPLICA_CONF(Signal* signal)
18210 {
18211   jamEntry();
18212   /**
18213    * Response to master
18214    */
18215   CRASH_INSERTION(7068);
18216   DihSwitchReplicaConf* const conf = (DihSwitchReplicaConf*)&signal->theData[0];
18217   switchReplicaReply(signal, conf->senderNode);
18218 }//Dbdih::execDIH_SWITCH_REPLICA_CONF()
18219 
execDIH_SWITCH_REPLICA_REF(Signal * signal)18220 void Dbdih::execDIH_SWITCH_REPLICA_REF(Signal* signal)
18221 {
18222   jamEntry();
18223   DihSwitchReplicaRef* const ref = (DihSwitchReplicaRef*)&signal->theData[0];
18224   if(c_stopPermMaster.returnValue == 0){
18225     jam();
18226     c_stopPermMaster.returnValue = ref->errorCode;
18227   }//if
18228   switchReplicaReply(signal, ref->senderNode);
18229 }//Dbdih::execDIH_SWITCH_REPLICA_REF()
18230 
switchReplicaReply(Signal * signal,NodeId nodeId)18231 void Dbdih::switchReplicaReply(Signal* signal,
18232 			       NodeId nodeId){
18233   jam();
18234   receiveLoopMacro(DIH_SWITCH_REPLICA_REQ, nodeId);
18235   //------------------------------------------------------
18236   // We have received all responses from the nodes. Thus
18237   // we have completed switching replica roles. Continue
18238   // with the next fragment.
18239   //------------------------------------------------------
18240   if(c_stopPermMaster.returnValue != 0){
18241     jam();
18242     c_switchReplicas.tableId = ctabFileSize + 1;
18243   }//if
18244   c_switchReplicas.fragNo++;
18245 
18246   signal->theData[0] = DihContinueB::SwitchReplica;
18247   signal->theData[1] = c_switchReplicas.nodeId;
18248   signal->theData[2] = c_switchReplicas.tableId;
18249   signal->theData[3] = c_switchReplicas.fragNo;
18250   sendSignal(reference(), GSN_CONTINUEB, signal, 4, JBB);
18251 }//Dbdih::switchReplicaReply()
18252 
18253 void
switchReplica(Signal * signal,Uint32 nodeId,Uint32 tableId,Uint32 fragNo)18254 Dbdih::switchReplica(Signal* signal,
18255 		     Uint32 nodeId,
18256 		     Uint32 tableId,
18257 		     Uint32 fragNo){
18258   jam();
18259   DihSwitchReplicaReq* const req = (DihSwitchReplicaReq*)&signal->theData[0];
18260 
18261   const Uint32 RT_BREAK = 64;
18262 
18263   for (Uint32 i = 0; i < RT_BREAK; i++) {
18264     jam();
18265     if (tableId >= ctabFileSize) {
18266       jam();
18267       StopPermConf* const conf = (StopPermConf*)&signal->theData[0];
18268       StopPermRef*  const ref  = (StopPermRef*)&signal->theData[0];
18269       /**
18270        * Finished with all tables
18271        */
18272       if(c_stopPermMaster.returnValue == 0) {
18273 	jam();
18274 	conf->senderData = c_stopPermMaster.clientData;
18275 	sendSignal(c_stopPermMaster.clientRef, GSN_STOP_PERM_CONF,
18276 		   signal, 1, JBB);
18277       } else {
18278         jam();
18279         ref->senderData = c_stopPermMaster.clientData;
18280         ref->errorCode  = c_stopPermMaster.returnValue;
18281         sendSignal(c_stopPermMaster.clientRef, GSN_STOP_PERM_REF, signal, 2,JBB);
18282       }//if
18283 
18284       /**
18285        * UnLock
18286        */
18287       c_nodeStartMaster.activeState = false;
18288       c_stopPermMaster.clientRef = 0;
18289       c_stopPermMaster.clientData = 0;
18290       c_stopPermMaster.returnValue = 0;
18291       Mutex mutex(signal, c_mutexMgr, c_switchPrimaryMutexHandle);
18292       mutex.unlock(); // ignore result
18293       return;
18294     }//if
18295 
18296     TabRecordPtr tabPtr;
18297     tabPtr.i = tableId;
18298     ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
18299 
18300     if (tabPtr.p->tabStatus != TabRecord::TS_ACTIVE) {
18301       jam();
18302       tableId++;
18303       fragNo = 0;
18304       continue;
18305     }//if
18306     if (fragNo >= tabPtr.p->totalfragments) {
18307       jam();
18308       tableId++;
18309       fragNo = 0;
18310       continue;
18311     }//if
18312     FragmentstorePtr fragPtr;
18313     getFragstore(tabPtr.p, fragNo, fragPtr);
18314 
18315     Uint32 oldOrder[MAX_REPLICAS];
18316     const Uint32 noOfReplicas = extractNodeInfo(fragPtr.p, oldOrder);
18317 
18318     if(oldOrder[0] != nodeId) {
18319       jam();
18320       fragNo++;
18321       continue;
18322     }//if
18323     req->tableId = tableId;
18324     req->fragNo = fragNo;
18325     req->noOfReplicas = noOfReplicas;
18326     for (Uint32 i = 0; i < (noOfReplicas - 1); i++) {
18327       req->newNodeOrder[i] = oldOrder[i+1];
18328     }//for
18329     req->newNodeOrder[noOfReplicas-1] = nodeId;
18330     req->senderRef = reference();
18331 
18332     /**
18333      * Initialize struct
18334      */
18335     c_switchReplicas.tableId = tableId;
18336     c_switchReplicas.fragNo = fragNo;
18337     c_switchReplicas.nodeId = nodeId;
18338 
18339     sendLoopMacro(DIH_SWITCH_REPLICA_REQ, sendDIH_SWITCH_REPLICA_REQ, RNIL);
18340     return;
18341   }//for
18342 
18343   signal->theData[0] = DihContinueB::SwitchReplica;
18344   signal->theData[1] = nodeId;
18345   signal->theData[2] = tableId;
18346   signal->theData[3] = fragNo;
18347   sendSignal(reference(), GSN_CONTINUEB, signal, 4, JBB);
18348 }//Dbdih::switchReplica()
18349 
execSTOP_ME_REQ(Signal * signal)18350 void Dbdih::execSTOP_ME_REQ(Signal* signal)
18351 {
18352   jamEntry();
18353   StopMeReq* const req = (StopMeReq*)&signal->theData[0];
18354   const BlockReference senderRef = req->senderRef;
18355   const Uint32 senderData = req->senderData;
18356   const Uint32 nodeId = refToNode(senderRef);
18357   {
18358     /**
18359      * Set node dead (remove from operations)
18360      */
18361     NodeRecordPtr nodePtr;
18362     nodePtr.i = nodeId;
18363     ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
18364     nodePtr.p->useInTransactions = false;
18365   }
18366   if (nodeId != getOwnNodeId()) {
18367     jam();
18368     StopMeConf * const stopMeConf = (StopMeConf *)&signal->theData[0];
18369     stopMeConf->senderData = senderData;
18370     stopMeConf->senderRef  = reference();
18371     sendSignal(senderRef, GSN_STOP_ME_CONF, signal,
18372 	       StopMeConf::SignalLength, JBB);
18373     return;
18374   }//if
18375 
18376   /**
18377    * Local signal
18378    */
18379   jam();
18380   ndbrequire(c_stopMe.clientRef == 0);
18381 
18382   c_stopMe.clientData  = senderData;
18383   c_stopMe.clientRef   = senderRef;
18384 
18385   req->senderData = senderData;
18386   req->senderRef  = reference();
18387 
18388   sendLoopMacro(STOP_ME_REQ, sendSTOP_ME_REQ, RNIL);
18389 
18390   /**
18391    * Send conf to self
18392    */
18393   StopMeConf * const stopMeConf = (StopMeConf *)&signal->theData[0];
18394   stopMeConf->senderData = senderData;
18395   stopMeConf->senderRef  = reference();
18396   sendSignal(reference(), GSN_STOP_ME_CONF, signal,
18397 	     StopMeConf::SignalLength, JBB);
18398 }//Dbdih::execSTOP_ME_REQ()
18399 
execSTOP_ME_REF(Signal * signal)18400 void Dbdih::execSTOP_ME_REF(Signal* signal)
18401 {
18402   ndbrequire(false);
18403 }
18404 
execSTOP_ME_CONF(Signal * signal)18405 void Dbdih::execSTOP_ME_CONF(Signal* signal)
18406 {
18407   jamEntry();
18408   StopMeConf * const stopMeConf = (StopMeConf *)&signal->theData[0];
18409 
18410   const Uint32 senderRef  = stopMeConf->senderRef;
18411   const Uint32 senderData = stopMeConf->senderData;
18412   const Uint32 nodeId     = refToNode(senderRef);
18413 
18414   ndbrequire(c_stopMe.clientRef != 0);
18415   ndbrequire(c_stopMe.clientData == senderData);
18416 
18417   receiveLoopMacro(STOP_ME_REQ, nodeId);
18418   //---------------------------------------------------------
18419   // All STOP_ME_REQ have been received. We will send the
18420   // confirmation back to the requesting block.
18421   //---------------------------------------------------------
18422 
18423   stopMeConf->senderRef = reference();
18424   stopMeConf->senderData = c_stopMe.clientData;
18425   sendSignal(c_stopMe.clientRef, GSN_STOP_ME_CONF, signal,
18426 	     StopMeConf::SignalLength, JBB);
18427   c_stopMe.clientRef = 0;
18428 }//Dbdih::execSTOP_ME_CONF()
18429 
execWAIT_GCP_REQ(Signal * signal)18430 void Dbdih::execWAIT_GCP_REQ(Signal* signal)
18431 {
18432   jamEntry();
18433   WaitGCPReq* const req = (WaitGCPReq*)&signal->theData[0];
18434   WaitGCPRef* const ref = (WaitGCPRef*)&signal->theData[0];
18435   WaitGCPConf* const conf = (WaitGCPConf*)&signal->theData[0];
18436   const Uint32 senderData = req->senderData;
18437   const BlockReference senderRef = req->senderRef;
18438   const Uint32 requestType = req->requestType;
18439   Uint32 errorCode = 0;
18440 
18441   if(requestType == WaitGCPReq::CurrentGCI)
18442   {
18443     jam();
18444     conf->senderData = senderData;
18445     conf->gci_hi = Uint32(m_micro_gcp.m_current_gci >> 32);
18446     conf->gci_lo = Uint32(m_micro_gcp.m_current_gci);
18447     conf->blockStatus = cgcpOrderBlocked;
18448     sendSignal(senderRef, GSN_WAIT_GCP_CONF, signal,
18449 	       WaitGCPConf::SignalLength, JBB);
18450     return;
18451   }//if
18452 
18453   if(requestType == WaitGCPReq::RestartGCI)
18454   {
18455     jam();
18456     conf->senderData = senderData;
18457     conf->gci_hi = Uint32(crestartGci);
18458     conf->gci_lo = 0;
18459     conf->blockStatus = cgcpOrderBlocked;
18460     sendSignal(senderRef, GSN_WAIT_GCP_CONF, signal,
18461 	       WaitGCPConf::SignalLength, JBB);
18462     return;
18463   }//if
18464 
18465   if (requestType == WaitGCPReq::BlockStartGcp)
18466   {
18467     jam();
18468     conf->senderData = senderData;
18469     conf->gci_hi = Uint32(m_micro_gcp.m_current_gci >> 32);
18470     conf->gci_lo = Uint32(m_micro_gcp.m_current_gci);
18471     conf->blockStatus = cgcpOrderBlocked;
18472     sendSignal(senderRef, GSN_WAIT_GCP_CONF, signal,
18473 	       WaitGCPConf::SignalLength, JBB);
18474     cgcpOrderBlocked = 1;
18475     return;
18476   }
18477 
18478   if (requestType == WaitGCPReq::UnblockStartGcp)
18479   {
18480     jam();
18481     conf->senderData = senderData;
18482     conf->gci_hi = Uint32(m_micro_gcp.m_current_gci >> 32);
18483     conf->gci_lo = Uint32(m_micro_gcp.m_current_gci);
18484     conf->blockStatus = cgcpOrderBlocked;
18485     sendSignal(senderRef, GSN_WAIT_GCP_CONF, signal,
18486 	       WaitGCPConf::SignalLength, JBB);
18487     cgcpOrderBlocked = 0;
18488     return;
18489   }
18490 
18491   if(isMaster())
18492   {
18493     /**
18494      * Master
18495      */
18496 
18497     if (!isActiveMaster())
18498     {
18499       ndbassert(cmasterState == MASTER_TAKE_OVER_GCP);
18500       errorCode = WaitGCPRef::NF_MasterTakeOverInProgress;
18501       goto error;
18502     }
18503 
18504     if((requestType == WaitGCPReq::CompleteIfRunning) &&
18505        (m_gcp_save.m_master.m_state == GcpSave::GCP_SAVE_IDLE))
18506     {
18507       jam();
18508       conf->senderData = senderData;
18509       conf->gci_hi = Uint32(m_micro_gcp.m_old_gci >> 32);
18510       conf->gci_lo = Uint32(m_micro_gcp.m_old_gci);
18511       conf->blockStatus = cgcpOrderBlocked;
18512       sendSignal(senderRef, GSN_WAIT_GCP_CONF, signal,
18513 		 WaitGCPConf::SignalLength, JBB);
18514       return;
18515     }//if
18516 
18517     WaitGCPMasterPtr ptr;
18518     WaitGCPList * list = &c_waitGCPMasterList;
18519     if (requestType == WaitGCPReq::WaitEpoch)
18520     {
18521       jam();
18522       list = &c_waitEpochMasterList;
18523     }
18524 
18525     if(list->seize(ptr) == false)
18526     {
18527       jam();
18528       errorCode = WaitGCPRef::NoWaitGCPRecords;
18529       goto error;
18530       return;
18531     }
18532 
18533     ptr.p->clientRef = senderRef;
18534     ptr.p->clientData = senderData;
18535 
18536     if((requestType == WaitGCPReq::CompleteForceStart) &&
18537        (m_gcp_save.m_master.m_state == GcpSave::GCP_SAVE_IDLE))
18538     {
18539       jam();
18540       m_micro_gcp.m_master.m_start_time = m_gcp_save.m_master.m_start_time = 0;
18541     }//if
18542     return;
18543   }
18544   else
18545   {
18546     /**
18547      * Proxy part
18548      */
18549     jam();
18550     WaitGCPProxyPtr ptr;
18551     if (c_waitGCPProxyList.seize(ptr) == false)
18552     {
18553       jam();
18554       errorCode = WaitGCPRef::NoWaitGCPRecords;
18555       goto error;
18556     }//if
18557     ptr.p->clientRef = senderRef;
18558     ptr.p->clientData = senderData;
18559     ptr.p->masterRef = cmasterdihref;
18560 
18561     req->senderData = ptr.i;
18562     req->senderRef = reference();
18563     req->requestType = requestType;
18564 
18565     sendSignal(cmasterdihref, GSN_WAIT_GCP_REQ, signal,
18566 	       WaitGCPReq::SignalLength, JBB);
18567     return;
18568   }//if
18569 
18570 error:
18571   ref->senderData = senderData;
18572   ref->errorCode = errorCode;
18573   sendSignal(senderRef, GSN_WAIT_GCP_REF, signal,
18574              WaitGCPRef::SignalLength, JBB);
18575 }//Dbdih::execWAIT_GCP_REQ()
18576 
execWAIT_GCP_REF(Signal * signal)18577 void Dbdih::execWAIT_GCP_REF(Signal* signal)
18578 {
18579   jamEntry();
18580   ndbrequire(!isMaster());
18581   WaitGCPRef* const ref = (WaitGCPRef*)&signal->theData[0];
18582 
18583   const Uint32 proxyPtr = ref->senderData;
18584   const Uint32 errorCode = ref->errorCode;
18585 
18586   WaitGCPProxyPtr ptr;
18587   ptr.i = proxyPtr;
18588   c_waitGCPProxyList.getPtr(ptr);
18589 
18590   ref->senderData = ptr.p->clientData;
18591   ref->errorCode = errorCode;
18592   sendSignal(ptr.p->clientRef, GSN_WAIT_GCP_REF, signal,
18593 	     WaitGCPRef::SignalLength, JBB);
18594 
18595   c_waitGCPProxyList.release(ptr);
18596 }//Dbdih::execWAIT_GCP_REF()
18597 
execWAIT_GCP_CONF(Signal * signal)18598 void Dbdih::execWAIT_GCP_CONF(Signal* signal)
18599 {
18600   jamEntry();
18601   ndbrequire(!isMaster());
18602   WaitGCPConf* const conf = (WaitGCPConf*)&signal->theData[0];
18603   const Uint32 proxyPtr = conf->senderData;
18604   const Uint32 gci_hi = conf->gci_hi;
18605   const Uint32 gci_lo = conf->gci_lo;
18606   WaitGCPProxyPtr ptr;
18607 
18608   ptr.i = proxyPtr;
18609   c_waitGCPProxyList.getPtr(ptr);
18610 
18611   conf->senderData = ptr.p->clientData;
18612   conf->gci_hi = gci_hi;
18613   conf->gci_lo = gci_lo;
18614   conf->blockStatus = cgcpOrderBlocked;
18615   sendSignal(ptr.p->clientRef, GSN_WAIT_GCP_CONF, signal,
18616 	     WaitGCPConf::SignalLength, JBB);
18617 
18618   c_waitGCPProxyList.release(ptr);
18619 }//Dbdih::execWAIT_GCP_CONF()
18620 
checkWaitGCPProxy(Signal * signal,NodeId failedNodeId)18621 void Dbdih::checkWaitGCPProxy(Signal* signal, NodeId failedNodeId)
18622 {
18623   jam();
18624   WaitGCPRef* const ref = (WaitGCPRef*)&signal->theData[0];
18625   ref->errorCode = WaitGCPRef::NF_CausedAbortOfProcedure;
18626 
18627   WaitGCPProxyPtr ptr;
18628   c_waitGCPProxyList.first(ptr);
18629   while(ptr.i != RNIL) {
18630     jam();
18631     const Uint32 i = ptr.i;
18632     const Uint32 clientData = ptr.p->clientData;
18633     const BlockReference clientRef = ptr.p->clientRef;
18634     const BlockReference masterRef = ptr.p->masterRef;
18635 
18636     c_waitGCPProxyList.next(ptr);
18637     if(refToNode(masterRef) == failedNodeId) {
18638       jam();
18639       c_waitGCPProxyList.release(i);
18640       ref->senderData = clientData;
18641       sendSignal(clientRef, GSN_WAIT_GCP_REF, signal,
18642 		 WaitGCPRef::SignalLength, JBB);
18643     }//if
18644   }//while
18645 }//Dbdih::checkWaitGCPProxy()
18646 
checkWaitGCPMaster(Signal * signal,NodeId failedNodeId)18647 void Dbdih::checkWaitGCPMaster(Signal* signal, NodeId failedNodeId)
18648 {
18649   jam();
18650   WaitGCPMasterPtr ptr;
18651   c_waitGCPMasterList.first(ptr);
18652 
18653   while (ptr.i != RNIL) {
18654     jam();
18655     const Uint32 i = ptr.i;
18656     const NodeId nodeId = refToNode(ptr.p->clientRef);
18657 
18658     c_waitGCPMasterList.next(ptr);
18659     if (nodeId == failedNodeId) {
18660       jam();
18661       c_waitGCPMasterList.release(i);
18662     }//if
18663   }//while
18664 }//Dbdih::checkWaitGCPMaster()
18665 
emptyWaitGCPMasterQueue(Signal * signal,Uint64 gci,WaitGCPList & list)18666 void Dbdih::emptyWaitGCPMasterQueue(Signal* signal,
18667                                     Uint64 gci,
18668                                     WaitGCPList & list)
18669 {
18670   jam();
18671   WaitGCPConf* const conf = (WaitGCPConf*)&signal->theData[0];
18672   conf->gci_hi = Uint32(gci >> 32);
18673   conf->gci_lo = Uint32(gci);
18674 
18675   WaitGCPMasterPtr ptr;
18676   list.first(ptr);
18677   while(ptr.i != RNIL) {
18678     jam();
18679     const Uint32 i = ptr.i;
18680     const Uint32 clientData = ptr.p->clientData;
18681     const BlockReference clientRef = ptr.p->clientRef;
18682 
18683     c_waitGCPMasterList.next(ptr);
18684     conf->senderData = clientData;
18685     conf->blockStatus = cgcpOrderBlocked;
18686     sendSignal(clientRef, GSN_WAIT_GCP_CONF, signal,
18687 	       WaitGCPConf::SignalLength, JBB);
18688 
18689     list.release(i);
18690   }//while
18691 }//Dbdih::emptyWaitGCPMasterQueue()
18692 
setNodeStatus(Uint32 nodeId,NodeRecord::NodeStatus newStatus)18693 void Dbdih::setNodeStatus(Uint32 nodeId, NodeRecord::NodeStatus newStatus)
18694 {
18695   NodeRecordPtr nodePtr;
18696   nodePtr.i = nodeId;
18697   ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
18698   nodePtr.p->nodeStatus = newStatus;
18699 }//Dbdih::setNodeStatus()
18700 
getNodeStatus(Uint32 nodeId)18701 Dbdih::NodeRecord::NodeStatus Dbdih::getNodeStatus(Uint32 nodeId)
18702 {
18703   NodeRecordPtr nodePtr;
18704   nodePtr.i = nodeId;
18705   ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
18706   return nodePtr.p->nodeStatus;
18707 }//Dbdih::getNodeStatus()
18708 
18709 Sysfile::ActiveStatus
getNodeActiveStatus(Uint32 nodeId)18710 Dbdih::getNodeActiveStatus(Uint32 nodeId)
18711 {
18712   NodeRecordPtr nodePtr;
18713   nodePtr.i = nodeId;
18714   ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
18715   return nodePtr.p->activeStatus;
18716 }//Dbdih::getNodeActiveStatus()
18717 
18718 
18719 void
setNodeActiveStatus(Uint32 nodeId,Sysfile::ActiveStatus newStatus)18720 Dbdih::setNodeActiveStatus(Uint32 nodeId, Sysfile::ActiveStatus newStatus)
18721 {
18722   NodeRecordPtr nodePtr;
18723   nodePtr.i = nodeId;
18724   ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
18725   nodePtr.p->activeStatus = newStatus;
18726 }//Dbdih::setNodeActiveStatus()
18727 
setAllowNodeStart(Uint32 nodeId,bool newState)18728 void Dbdih::setAllowNodeStart(Uint32 nodeId, bool newState)
18729 {
18730   NodeRecordPtr nodePtr;
18731   nodePtr.i = nodeId;
18732   ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
18733   nodePtr.p->allowNodeStart = newState;
18734 }//Dbdih::setAllowNodeStart()
18735 
getAllowNodeStart(Uint32 nodeId)18736 bool Dbdih::getAllowNodeStart(Uint32 nodeId)
18737 {
18738   NodeRecordPtr nodePtr;
18739   nodePtr.i = nodeId;
18740   ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
18741   return nodePtr.p->allowNodeStart;
18742 }//Dbdih::getAllowNodeStart()
18743 
18744 Uint32
getNodeGroup(Uint32 nodeId) const18745 Dbdih::getNodeGroup(Uint32 nodeId) const
18746 {
18747   NodeRecordPtr nodePtr;
18748   nodePtr.i = nodeId;
18749   ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
18750   return nodePtr.p->nodeGroup;
18751 }
18752 
checkNodeAlive(Uint32 nodeId)18753 bool Dbdih::checkNodeAlive(Uint32 nodeId)
18754 {
18755   NodeRecordPtr nodePtr;
18756   nodePtr.i = nodeId;
18757   ndbrequire(nodeId > 0);
18758   ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
18759   if (nodePtr.p->nodeStatus != NodeRecord::ALIVE) {
18760     return false;
18761   } else {
18762     return true;
18763   }//if
18764 }//Dbdih::checkNodeAlive()
18765 
isMaster()18766 bool Dbdih::isMaster()
18767 {
18768   return (reference() == cmasterdihref);
18769 }//Dbdih::isMaster()
18770 
isActiveMaster()18771 bool Dbdih::isActiveMaster()
18772 {
18773   return ((reference() == cmasterdihref) && (cmasterState == MASTER_ACTIVE));
18774 }//Dbdih::isActiveMaster()
18775 
NodeRecord()18776 Dbdih::NodeRecord::NodeRecord(){
18777   m_nodefailSteps.clear();
18778 
18779   activeStatus = Sysfile::NS_NotDefined;
18780   recNODE_FAILREP = ZFALSE;
18781   dbtcFailCompleted = ZTRUE;
18782   dbdictFailCompleted = ZTRUE;
18783   dbdihFailCompleted = ZTRUE;
18784   dblqhFailCompleted = ZTRUE;
18785   noOfStartedChkpt = 0;
18786   noOfQueuedChkpt = 0;
18787   lcpStateAtTakeOver = (MasterLCPConf::State)255;
18788 
18789   activeTabptr = RNIL;
18790   nodeStatus = NodeRecord::NOT_IN_CLUSTER;
18791   useInTransactions = false;
18792   copyCompleted = false;
18793   allowNodeStart = true;
18794 }
18795 
18796 // DICT lock slave
18797 
18798 void
sendDictLockReq(Signal * signal,Uint32 lockType,Callback c)18799 Dbdih::sendDictLockReq(Signal* signal, Uint32 lockType, Callback c)
18800 {
18801   DictLockReq* req = (DictLockReq*)&signal->theData[0];
18802   DictLockSlavePtr lockPtr;
18803 
18804   c_dictLockSlavePool.seize(lockPtr);
18805   ndbrequire(lockPtr.i != RNIL);
18806 
18807   req->userPtr = lockPtr.i;
18808   req->lockType = lockType;
18809   req->userRef = reference();
18810 
18811   lockPtr.p->lockPtr = RNIL;
18812   lockPtr.p->lockType = lockType;
18813   lockPtr.p->locked = false;
18814   lockPtr.p->callback = c;
18815 
18816   // handle rolling upgrade
18817   {
18818     Uint32 masterVersion = getNodeInfo(cmasterNodeId).m_version;
18819 
18820     const unsigned int get_major = getMajor(masterVersion);
18821     const unsigned int get_minor = getMinor(masterVersion);
18822     const unsigned int get_build = getBuild(masterVersion);
18823     ndbrequire(get_major >= 4);
18824 
18825     if (masterVersion < NDBD_DICT_LOCK_VERSION_5 ||
18826         (masterVersion < NDBD_DICT_LOCK_VERSION_5_1 &&
18827          get_major == 5 && get_minor == 1) ||
18828         ERROR_INSERTED(7176)) {
18829       jam();
18830 
18831       infoEvent("DIH: detect upgrade: master node %u old version %u.%u.%u",
18832                 (unsigned int)cmasterNodeId, get_major, get_minor, get_build);
18833 
18834       DictLockConf* conf = (DictLockConf*)&signal->theData[0];
18835       conf->userPtr = lockPtr.i;
18836       conf->lockType = lockType;
18837       conf->lockPtr = ZNIL;
18838 
18839       sendSignal(reference(), GSN_DICT_LOCK_CONF, signal,
18840                  DictLockConf::SignalLength, JBB);
18841       return;
18842     }
18843   }
18844 
18845   BlockReference dictMasterRef = calcDictBlockRef(cmasterNodeId);
18846   sendSignal(dictMasterRef, GSN_DICT_LOCK_REQ, signal,
18847       DictLockReq::SignalLength, JBB);
18848 }
18849 
18850 void
execDICT_LOCK_CONF(Signal * signal)18851 Dbdih::execDICT_LOCK_CONF(Signal* signal)
18852 {
18853   jamEntry();
18854   recvDictLockConf(signal);
18855 }
18856 
18857 void
execDICT_LOCK_REF(Signal * signal)18858 Dbdih::execDICT_LOCK_REF(Signal* signal)
18859 {
18860   jamEntry();
18861   ndbrequire(false);
18862 }
18863 
18864 void
recvDictLockConf(Signal * signal)18865 Dbdih::recvDictLockConf(Signal* signal)
18866 {
18867   const DictLockConf* conf = (const DictLockConf*)&signal->theData[0];
18868 
18869   DictLockSlavePtr lockPtr;
18870   c_dictLockSlavePool.getPtr(lockPtr, conf->userPtr);
18871 
18872   lockPtr.p->lockPtr = conf->lockPtr;
18873   ndbrequire(lockPtr.p->lockType == conf->lockType);
18874   ndbrequire(lockPtr.p->locked == false);
18875   lockPtr.p->locked = true;
18876 
18877   lockPtr.p->callback.m_callbackData = lockPtr.i;
18878   execute(signal, lockPtr.p->callback, 0);
18879 }
18880 
18881 void
sendDictUnlockOrd(Signal * signal,Uint32 lockSlavePtrI)18882 Dbdih::sendDictUnlockOrd(Signal* signal, Uint32 lockSlavePtrI)
18883 {
18884   DictUnlockOrd* ord = (DictUnlockOrd*)&signal->theData[0];
18885 
18886   DictLockSlavePtr lockPtr;
18887   c_dictLockSlavePool.getPtr(lockPtr, lockSlavePtrI);
18888 
18889   ord->lockPtr = lockPtr.p->lockPtr;
18890   ord->lockType = lockPtr.p->lockType;
18891   ord->senderData = lockPtr.i;
18892   ord->senderRef = reference();
18893 
18894   c_dictLockSlavePool.release(lockPtr);
18895 
18896   // handle rolling upgrade
18897   {
18898     Uint32 masterVersion = getNodeInfo(cmasterNodeId).m_version;
18899 
18900     const unsigned int get_major = getMajor(masterVersion);
18901     const unsigned int get_minor = getMinor(masterVersion);
18902     ndbrequire(get_major >= 4);
18903 
18904     if (masterVersion < NDBD_DICT_LOCK_VERSION_5 ||
18905         (masterVersion < NDBD_DICT_LOCK_VERSION_5_1 &&
18906          get_major == 5 && get_minor == 1) ||
18907         ERROR_INSERTED(7176)) {
18908       return;
18909     }
18910   }
18911 
18912   Uint32 len = DictUnlockOrd::SignalLength;
18913   if (unlikely(getNodeInfo(cmasterNodeId).m_version < NDB_MAKE_VERSION(6,3,0)))
18914   {
18915     jam();
18916     len = 2;
18917   }
18918 
18919   BlockReference dictMasterRef = calcDictBlockRef(cmasterNodeId);
18920   sendSignal(dictMasterRef, GSN_DICT_UNLOCK_ORD, signal, len, JBB);
18921 }
18922 
18923 #ifdef ERROR_INSERT
18924 void
sendToRandomNodes(const char * msg,Signal * signal,SignalCounter * counter,SendFunction fun,Uint32 extra,Uint32 block,Uint32 gsn,Uint32 len,JobBufferLevel level)18925 Dbdih::sendToRandomNodes(const char * msg,
18926                          Signal* signal,
18927                          SignalCounter* counter,
18928                          SendFunction fun,
18929                          Uint32 extra,
18930                          Uint32 block,
18931                          Uint32 gsn,
18932                          Uint32 len,
18933                          JobBufferLevel level)
18934 {
18935 
18936   if (counter)
18937     counter->clearWaitingFor();
18938 
18939   Vector<Uint32> nodes;
18940   NodeRecordPtr nodePtr;
18941   nodePtr.i = cfirstAliveNode;
18942   do {
18943     jam();
18944     ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
18945     if (nodePtr.i != getOwnNodeId())
18946     {
18947       nodes.push_back(nodePtr.i);
18948     }
18949     nodePtr.i = nodePtr.p->nextNode;
18950   } while (nodePtr.i != RNIL);
18951 
18952 
18953   NdbNodeBitmask masked;
18954   Uint32 cnt = nodes.size();
18955   if (cnt <= 1)
18956   {
18957     goto do_send;
18958   }
18959 
18960   {
18961     Uint32 remove = (rand() % cnt);
18962     if (remove == 0)
18963       remove = 1;
18964 
18965     for (Uint32 i = 0; i<remove; i++)
18966     {
18967       Uint32 rand_node = rand() % nodes.size();
18968       masked.set(nodes[rand_node]);
18969       nodes.erase(rand_node);
18970     }
18971   }
18972 
18973 do_send:
18974   char bufpos = 0;
18975   char buf[256];
18976 
18977   nodePtr.i = cfirstAliveNode;
18978   do {
18979     jam();
18980     ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
18981     if (counter)
18982       counter->setWaitingFor(nodePtr.i);
18983     if (!masked.get(nodePtr.i))
18984     {
18985       if (fun)
18986       {
18987         (this->*fun)(signal, nodePtr.i, extra);
18988       }
18989       else
18990       {
18991         Uint32 ref = numberToRef(block, nodePtr.i);
18992         sendSignal(ref, gsn, signal, len, level);
18993       }
18994       BaseString::snprintf(buf+bufpos, sizeof(buf)-bufpos, "%u ", nodePtr.i);
18995     }
18996     else
18997     {
18998       BaseString::snprintf(buf+bufpos, sizeof(buf)-bufpos, "[%u] ", nodePtr.i);
18999     }
19000     bufpos = strlen(buf);
19001     nodePtr.i = nodePtr.p->nextNode;
19002   } while (nodePtr.i != RNIL);
19003   infoEvent("%s %s", msg, buf);
19004 }
19005 
19006 #endif
19007 
19008 // MT LQH
19009 
19010 Uint32
dihGetInstanceKey(Uint32 tabId,Uint32 fragId)19011 Dbdih::dihGetInstanceKey(Uint32 tabId, Uint32 fragId)
19012 {
19013   TabRecordPtr tTabPtr;
19014   tTabPtr.i = tabId;
19015   ptrCheckGuard(tTabPtr, ctabFileSize, tabRecord);
19016   FragmentstorePtr tFragPtr;
19017   getFragstore(tTabPtr.p, fragId, tFragPtr);
19018   Uint32 instanceKey = dihGetInstanceKey(tFragPtr);
19019   return instanceKey;
19020 }
19021 
19022 /**
19023  *
19024  */
19025 void
execCREATE_NODEGROUP_IMPL_REQ(Signal * signal)19026 Dbdih::execCREATE_NODEGROUP_IMPL_REQ(Signal* signal)
19027 {
19028   jamEntry();
19029   CreateNodegroupImplReq reqCopy = *(CreateNodegroupImplReq*)signal->getDataPtr();
19030   CreateNodegroupImplReq *req = &reqCopy;
19031 
19032   Uint32 err = 0;
19033   Uint32 rt = req->requestType;
19034   Uint64 gci = 0;
19035   switch(rt){
19036   case CreateNodegroupImplReq::RT_ABORT:
19037     jam(); // do nothing
19038     break;
19039   case CreateNodegroupImplReq::RT_PARSE:
19040   case CreateNodegroupImplReq::RT_PREPARE:
19041   case CreateNodegroupImplReq::RT_COMMIT:
19042   {
19043     Uint32 cnt = 0;
19044     for (Uint32 i = 0; i<NDB_ARRAY_SIZE(req->nodes) && req->nodes[i] ; i++)
19045     {
19046       cnt++;
19047       if (getNodeActiveStatus(req->nodes[i]) != Sysfile::NS_Configured)
19048       {
19049         jam();
19050         err = CreateNodegroupRef::NodeAlreadyInNodegroup;
19051         goto error;
19052       }
19053     }
19054 
19055     if (cnt != cnoReplicas)
19056     {
19057       jam();
19058       err = CreateNodegroupRef::InvalidNoOfNodesInNodegroup;
19059       goto error;
19060     }
19061 
19062     Uint32 ng = req->nodegroupId;
19063     NdbNodeBitmask tmp;
19064     tmp.set();
19065     for (Uint32 i = 0; i<cnoOfNodeGroups; i++)
19066     {
19067       tmp.clear(c_node_groups[i]);
19068     }
19069 
19070     if (ng == RNIL && rt == CreateNodegroupImplReq::RT_PARSE)
19071     {
19072       jam();
19073       ng = tmp.find(0);
19074     }
19075 
19076     if (ng > MAX_NDB_NODES)
19077     {
19078       jam();
19079       err = CreateNodegroupRef::InvalidNodegroupId;
19080       goto error;
19081     }
19082 
19083     if (tmp.get(ng) == false)
19084     {
19085       jam();
19086       err = CreateNodegroupRef::NodegroupInUse;
19087       goto error;
19088     }
19089 
19090     if (rt == CreateNodegroupImplReq::RT_PARSE || rt == CreateNodegroupImplReq::RT_PREPARE)
19091     {
19092       /**
19093        * Check that atleast one of the nodes are alive
19094        */
19095       bool alive = false;
19096       for (Uint32 i = 0; i<cnoReplicas; i++)
19097       {
19098         jam();
19099         Uint32 nodeId = req->nodes[i];
19100         if (getNodeStatus(nodeId) == NodeRecord::ALIVE)
19101         {
19102           jam();
19103           alive = true;
19104           break;
19105         }
19106       }
19107 
19108       jam();
19109       if (alive == false)
19110       {
19111         jam();
19112         err = CreateNodegroupRef::NoNodeAlive;
19113         goto error;
19114       }
19115     }
19116 
19117     if (rt == CreateNodegroupImplReq::RT_PARSE)
19118     {
19119       jam();
19120       signal->theData[0] = 0;
19121       signal->theData[1] = ng;
19122       return;
19123     }
19124 
19125     if (rt == CreateNodegroupImplReq::RT_PREPARE)
19126     {
19127       jam(); // do nothing
19128       break;
19129     }
19130 
19131     ndbrequire(rt == CreateNodegroupImplReq::RT_COMMIT);
19132     for (Uint32 i = 0; i<cnoReplicas; i++)
19133     {
19134       Uint32 nodeId = req->nodes[i];
19135       Sysfile::setNodeGroup(nodeId, SYSFILE->nodeGroups, req->nodegroupId);
19136       if (getNodeStatus(nodeId) == NodeRecord::ALIVE)
19137       {
19138         jam();
19139         Sysfile::setNodeStatus(nodeId, SYSFILE->nodeStatus, Sysfile::NS_Active);
19140       }
19141       else
19142       {
19143         jam();
19144         Sysfile::setNodeStatus(nodeId, SYSFILE->nodeStatus, Sysfile::NS_ActiveMissed_1);
19145       }
19146       setNodeActiveStatus();
19147       setNodeGroups();
19148     }
19149     break;
19150   }
19151   case CreateNodegroupImplReq::RT_COMPLETE:
19152     jam();
19153     gci = m_micro_gcp.m_current_gci;
19154     break;
19155   }
19156 
19157   {
19158     CreateNodegroupImplConf* conf = (CreateNodegroupImplConf*)signal->getDataPtrSend();
19159     conf->senderRef = reference();
19160     conf->senderData = req->senderData;
19161     conf->gci_hi = Uint32(gci >> 32);
19162     conf->gci_lo = Uint32(gci);
19163     sendSignal(req->senderRef, GSN_CREATE_NODEGROUP_IMPL_CONF, signal,
19164                CreateNodegroupImplConf::SignalLength, JBB);
19165   }
19166   return;
19167 
19168 error:
19169   if (rt == CreateNodegroupImplReq::RT_PARSE)
19170   {
19171     jam();
19172     signal->theData[0] = err;
19173     return;
19174   }
19175 
19176   if (rt == CreateNodegroupImplReq::RT_PREPARE)
19177   {
19178     jam();
19179     CreateNodegroupImplRef * ref = (CreateNodegroupImplRef*)signal->getDataPtrSend();
19180     ref->senderRef = reference();
19181     ref->senderData = req->senderData;
19182     ref->errorCode = err;
19183     sendSignal(req->senderRef, GSN_CREATE_NODEGROUP_IMPL_REF, signal,
19184                CreateNodegroupImplRef::SignalLength, JBB);
19185     return;
19186   }
19187 
19188   jamLine(err);
19189   ndbrequire(false);
19190 }
19191 
19192 /**
19193  *
19194  */
19195 void
execDROP_NODEGROUP_IMPL_REQ(Signal * signal)19196 Dbdih::execDROP_NODEGROUP_IMPL_REQ(Signal* signal)
19197 {
19198   jamEntry();
19199   DropNodegroupImplReq reqCopy = *(DropNodegroupImplReq*)signal->getDataPtr();
19200   DropNodegroupImplReq *req = &reqCopy;
19201 
19202   NodeGroupRecordPtr NGPtr;
19203 
19204   Uint32 err = 0;
19205   Uint32 rt = req->requestType;
19206   Uint64 gci = 0;
19207   switch(rt){
19208   case DropNodegroupImplReq::RT_ABORT:
19209     jam(); // do nothing
19210     break;
19211   case DropNodegroupImplReq::RT_PARSE:
19212   case DropNodegroupImplReq::RT_PREPARE:
19213     jam();
19214     NGPtr.i = req->nodegroupId;
19215     if (NGPtr.i >= MAX_NDB_NODES)
19216     {
19217       jam();
19218       err = DropNodegroupRef::NoSuchNodegroup;
19219       goto error;
19220     }
19221     ptrCheckGuard(NGPtr, MAX_NDB_NODES, nodeGroupRecord);
19222 
19223     if (NGPtr.p->nodegroupIndex == RNIL)
19224     {
19225       jam();
19226       err = DropNodegroupRef::NoSuchNodegroup;
19227       goto error;
19228     }
19229 
19230     if (NGPtr.p->m_ref_count)
19231     {
19232       jam();
19233       err = DropNodegroupRef::NodegroupInUse;
19234       goto error;
19235     }
19236     break;
19237   case DropNodegroupImplReq::RT_COMMIT:
19238   {
19239     jam();
19240     gci = m_micro_gcp.m_current_gci;
19241     break;
19242   }
19243   case DropNodegroupImplReq::RT_COMPLETE:
19244   {
19245     NGPtr.i = req->nodegroupId;
19246     ptrCheckGuard(NGPtr, MAX_NDB_NODES, nodeGroupRecord);
19247     for (Uint32 i = 0; i<NGPtr.p->nodeCount; i++)
19248     {
19249       jam();
19250       Uint32 nodeId = NGPtr.p->nodesInGroup[i];
19251       Sysfile::setNodeGroup(nodeId, SYSFILE->nodeGroups, NO_NODE_GROUP_ID);
19252       Sysfile::setNodeStatus(nodeId, SYSFILE->nodeStatus, Sysfile::NS_Configured);
19253     }
19254     setNodeActiveStatus();
19255     setNodeGroups();
19256     break;
19257   }
19258   }
19259 
19260   {
19261     DropNodegroupImplConf* conf = (DropNodegroupImplConf*)signal->getDataPtrSend();
19262     conf->senderRef = reference();
19263     conf->senderData = req->senderData;
19264     conf->gci_hi = Uint32(gci >> 32);
19265     conf->gci_lo = Uint32(gci);
19266     sendSignal(req->senderRef, GSN_DROP_NODEGROUP_IMPL_CONF, signal,
19267                DropNodegroupImplConf::SignalLength, JBB);
19268   }
19269   return;
19270 
19271 error:
19272   DropNodegroupImplRef * ref = (DropNodegroupImplRef*)signal->getDataPtrSend();
19273   ref->senderRef = reference();
19274   ref->senderData = req->senderData;
19275   ref->errorCode = err;
19276   sendSignal(req->senderRef, GSN_DROP_NODEGROUP_IMPL_REF, signal,
19277              DropNodegroupImplRef::SignalLength, JBB);
19278 }
19279