1 /*
2 Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License, version 2.0,
6 as published by the Free Software Foundation.
7
8 This program is also distributed with certain software (including
9 but not limited to OpenSSL) that is licensed under separate terms,
10 as designated in a particular file or component or in included license
11 documentation. The authors of MySQL hereby grant you an additional
12 permission to link the program and your derivative works with the
13 separately licensed software that they have included with MySQL.
14
15 This program is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License, version 2.0, for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with this program; if not, write to the Free Software
22 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
23 */
24
25 #define DBDIH_C
26 #include <ndb_global.h>
27 #include <ndb_limits.h>
28 #include <ndb_version.h>
29 #include <NdbOut.hpp>
30
31 #include "Dbdih.hpp"
32 #include "Configuration.hpp"
33
34 #include <signaldata/BlockCommitOrd.hpp>
35 #include <signaldata/CheckNodeGroups.hpp>
36 #include <signaldata/CopyActive.hpp>
37 #include <signaldata/CopyFrag.hpp>
38 #include <signaldata/CopyGCIReq.hpp>
39 #include <signaldata/DiAddTab.hpp>
40 #include <signaldata/DictStart.hpp>
41 #include <signaldata/DiGetNodes.hpp>
42 #include <signaldata/DihContinueB.hpp>
43 #include <signaldata/DihSwitchReplica.hpp>
44 #include <signaldata/DumpStateOrd.hpp>
45 #include <signaldata/EmptyLcp.hpp>
46 #include <signaldata/EventReport.hpp>
47 #include <signaldata/GCP.hpp>
48 #include <signaldata/HotSpareRep.hpp>
49 #include <signaldata/MasterGCP.hpp>
50 #include <signaldata/MasterLCP.hpp>
51 #include <signaldata/NFCompleteRep.hpp>
52 #include <signaldata/NodeFailRep.hpp>
53 #include <signaldata/ReadNodesConf.hpp>
54 #include <signaldata/StartFragReq.hpp>
55 #include <signaldata/StartInfo.hpp>
56 #include <signaldata/StartMe.hpp>
57 #include <signaldata/StartPerm.hpp>
58 #include <signaldata/StartRec.hpp>
59 #include <signaldata/StopPerm.hpp>
60 #include <signaldata/StopMe.hpp>
61 #include <signaldata/TestOrd.hpp>
62 #include <signaldata/WaitGCP.hpp>
63 #include <signaldata/DihStartTab.hpp>
64 #include <signaldata/LCP.hpp>
65 #include <signaldata/SystemError.hpp>
66
67 #include <signaldata/TakeOver.hpp>
68
69 #include <signaldata/DropTab.hpp>
70 #include <signaldata/AlterTab.hpp>
71 #include <signaldata/AlterTable.hpp>
72 #include <signaldata/PrepDropTab.hpp>
73 #include <signaldata/SumaImpl.hpp>
74 #include <signaldata/DictTabInfo.hpp>
75 #include <signaldata/CreateFragmentation.hpp>
76 #include <signaldata/LqhFrag.hpp>
77 #include <signaldata/FsOpenReq.hpp>
78 #include <signaldata/DihScanTab.hpp>
79 #include <signaldata/DictLock.hpp>
80 #include <DebuggerNames.hpp>
81 #include <signaldata/Upgrade.hpp>
82 #include <NdbEnv.h>
83 #include <signaldata/CreateNodegroup.hpp>
84 #include <signaldata/CreateNodegroupImpl.hpp>
85 #include <signaldata/DropNodegroup.hpp>
86 #include <signaldata/DropNodegroupImpl.hpp>
87 #include <signaldata/DihGetTabInfo.hpp>
88 #include <SectionReader.hpp>
89 #include <signaldata/DihRestart.hpp>
90
91 #include <EventLogger.hpp>
92 extern EventLogger * g_eventLogger;
93
94 #define SYSFILE ((Sysfile *)&sysfileData[0])
95 #define MAX_CRASHED_REPLICAS 8
96 #define ZINIT_CREATE_GCI Uint32(0)
97 #define ZINIT_REPLICA_LAST_GCI Uint32(-1)
98
99 #define RETURN_IF_NODE_NOT_ALIVE(node) \
100 if (!checkNodeAlive((node))) { \
101 jam(); \
102 return; \
103 } \
104
105 #define receiveLoopMacro(sigName, receiveNodeId)\
106 { \
107 c_##sigName##_Counter.clearWaitingFor(receiveNodeId); \
108 if(c_##sigName##_Counter.done() == false){ \
109 jam(); \
110 return; \
111 } \
112 }
113
114 #define sendLoopMacro(sigName, signalRoutine, extra) \
115 { \
116 c_##sigName##_Counter.clearWaitingFor(); \
117 NodeRecordPtr specNodePtr; \
118 specNodePtr.i = cfirstAliveNode; \
119 do { \
120 jam(); \
121 ptrCheckGuard(specNodePtr, MAX_NDB_NODES, nodeRecord); \
122 c_##sigName##_Counter.setWaitingFor(specNodePtr.i); \
123 signalRoutine(signal, specNodePtr.i, extra); \
124 specNodePtr.i = specNodePtr.p->nextNode; \
125 } while (specNodePtr.i != RNIL); \
126 }
127
128 static
129 Uint32
prevLcpNo(Uint32 lcpNo)130 prevLcpNo(Uint32 lcpNo){
131 if(lcpNo == 0)
132 return MAX_LCP_USED - 1;
133 return lcpNo - 1;
134 }
135
136 static
137 Uint32
nextLcpNo(Uint32 lcpNo)138 nextLcpNo(Uint32 lcpNo){
139 lcpNo++;
140 if(lcpNo >= MAX_LCP_USED)
141 return 0;
142 return lcpNo;
143 }
144
nullRoutine(Signal * signal,Uint32 nodeId,Uint32 extra)145 void Dbdih::nullRoutine(Signal* signal, Uint32 nodeId, Uint32 extra)
146 {
147 }//Dbdih::nullRoutine()
148
sendCOPY_GCIREQ(Signal * signal,Uint32 nodeId,Uint32 extra)149 void Dbdih::sendCOPY_GCIREQ(Signal* signal, Uint32 nodeId, Uint32 extra)
150 {
151 ndbrequire(c_copyGCIMaster.m_copyReason != CopyGCIReq::IDLE);
152
153 const BlockReference ref = calcDihBlockRef(nodeId);
154 const Uint32 wordPerSignal = CopyGCIReq::DATA_SIZE;
155 const Uint32 noOfSignals = ((Sysfile::SYSFILE_SIZE32 + (wordPerSignal - 1)) /
156 wordPerSignal);
157
158 CopyGCIReq * const copyGCI = (CopyGCIReq *)&signal->theData[0];
159 copyGCI->anyData = nodeId;
160 copyGCI->copyReason = c_copyGCIMaster.m_copyReason;
161 copyGCI->startWord = 0;
162
163 for(Uint32 i = 0; i < noOfSignals; i++) {
164 jam();
165 { // Do copy
166 const int startWord = copyGCI->startWord;
167 for(Uint32 j = 0; j < wordPerSignal; j++) {
168 copyGCI->data[j] = sysfileData[j+startWord];
169 }//for
170 }
171 sendSignal(ref, GSN_COPY_GCIREQ, signal, 25, JBB);
172 copyGCI->startWord += wordPerSignal;
173 }//for
174 }//Dbdih::sendCOPY_GCIREQ()
175
176
sendDIH_SWITCH_REPLICA_REQ(Signal * signal,Uint32 nodeId,Uint32 extra)177 void Dbdih::sendDIH_SWITCH_REPLICA_REQ(Signal* signal, Uint32 nodeId,
178 Uint32 extra)
179 {
180 const BlockReference ref = calcDihBlockRef(nodeId);
181 sendSignal(ref, GSN_DIH_SWITCH_REPLICA_REQ, signal,
182 DihSwitchReplicaReq::SignalLength, JBB);
183 }//Dbdih::sendDIH_SWITCH_REPLICA_REQ()
184
sendEMPTY_LCP_REQ(Signal * signal,Uint32 nodeId,Uint32 extra)185 void Dbdih::sendEMPTY_LCP_REQ(Signal* signal, Uint32 nodeId, Uint32 extra)
186 {
187 BlockReference ref = calcLqhBlockRef(nodeId);
188 sendSignal(ref, GSN_EMPTY_LCP_REQ, signal, EmptyLcpReq::SignalLength, JBB);
189 }//Dbdih::sendEMPTY_LCPREQ()
190
sendGCP_COMMIT(Signal * signal,Uint32 nodeId,Uint32 extra)191 void Dbdih::sendGCP_COMMIT(Signal* signal, Uint32 nodeId, Uint32 extra)
192 {
193 BlockReference ref = calcDihBlockRef(nodeId);
194 GCPCommit *req = (GCPCommit*)signal->getDataPtrSend();
195 req->nodeId = cownNodeId;
196 req->gci_hi = Uint32(m_micro_gcp.m_master.m_new_gci >> 32);
197 req->gci_lo = Uint32(m_micro_gcp.m_master.m_new_gci);
198 sendSignal(ref, GSN_GCP_COMMIT, signal, GCPCommit::SignalLength, JBA);
199
200 ndbassert(m_micro_gcp.m_enabled || Uint32(m_micro_gcp.m_new_gci) == 0);
201 }//Dbdih::sendGCP_COMMIT()
202
sendGCP_PREPARE(Signal * signal,Uint32 nodeId,Uint32 extra)203 void Dbdih::sendGCP_PREPARE(Signal* signal, Uint32 nodeId, Uint32 extra)
204 {
205 BlockReference ref = calcDihBlockRef(nodeId);
206 GCPPrepare *req = (GCPPrepare*)signal->getDataPtrSend();
207 req->nodeId = cownNodeId;
208 req->gci_hi = Uint32(m_micro_gcp.m_master.m_new_gci >> 32);
209 req->gci_lo = Uint32(m_micro_gcp.m_master.m_new_gci);
210
211 if (! (ERROR_INSERTED(7201) || ERROR_INSERTED(7202)))
212 {
213 sendSignal(ref, GSN_GCP_PREPARE, signal, GCPPrepare::SignalLength, JBA);
214 }
215 else if (ERROR_INSERTED(7201))
216 {
217 sendSignal(ref, GSN_GCP_PREPARE, signal, GCPPrepare::SignalLength, JBB);
218 }
219 else if (ERROR_INSERTED(7202))
220 {
221 ndbrequire(nodeId == getOwnNodeId());
222 sendSignalWithDelay(ref, GSN_GCP_PREPARE, signal, 2000,
223 GCPPrepare::SignalLength);
224 }
225 else
226 {
227 ndbrequire(false); // should be dead code #ifndef ERROR_INSERT
228 }
229
230 ndbassert(m_micro_gcp.m_enabled || Uint32(m_micro_gcp.m_new_gci) == 0);
231 }//Dbdih::sendGCP_PREPARE()
232
233 void
sendSUB_GCP_COMPLETE_REP(Signal * signal,Uint32 nodeId,Uint32 extra)234 Dbdih::sendSUB_GCP_COMPLETE_REP(Signal* signal, Uint32 nodeId, Uint32 extra)
235 {
236 ndbassert(m_micro_gcp.m_enabled || Uint32(m_micro_gcp.m_new_gci) == 0);
237 if (!ndbd_dih_sub_gcp_complete_ack(getNodeInfo(nodeId).m_version))
238 {
239 jam();
240 c_SUB_GCP_COMPLETE_REP_Counter.clearWaitingFor(nodeId);
241 }
242 BlockReference ref = calcDihBlockRef(nodeId);
243 sendSignal(ref, GSN_SUB_GCP_COMPLETE_REP, signal,
244 SubGcpCompleteRep::SignalLength, JBA);
245 }
246
sendGCP_SAVEREQ(Signal * signal,Uint32 nodeId,Uint32 extra)247 void Dbdih::sendGCP_SAVEREQ(Signal* signal, Uint32 nodeId, Uint32 extra)
248 {
249 GCPSaveReq * const saveReq = (GCPSaveReq*)&signal->theData[0];
250 BlockReference ref = calcDihBlockRef(nodeId);
251 saveReq->dihBlockRef = reference();
252 saveReq->dihPtr = nodeId;
253 saveReq->gci = m_gcp_save.m_master.m_new_gci;
254 sendSignal(ref, GSN_GCP_SAVEREQ, signal, GCPSaveReq::SignalLength, JBB);
255 }//Dbdih::sendGCP_SAVEREQ()
256
sendINCL_NODEREQ(Signal * signal,Uint32 nodeId,Uint32 extra)257 void Dbdih::sendINCL_NODEREQ(Signal* signal, Uint32 nodeId, Uint32 extra)
258 {
259 BlockReference nodeDihRef = calcDihBlockRef(nodeId);
260 signal->theData[0] = reference();
261 signal->theData[1] = c_nodeStartMaster.startNode;
262 signal->theData[2] = c_nodeStartMaster.failNr;
263 signal->theData[3] = 0;
264 signal->theData[4] = (Uint32)(m_micro_gcp.m_current_gci >> 32);
265 signal->theData[5] = (Uint32)(m_micro_gcp.m_current_gci & 0xFFFFFFFF);
266 sendSignal(nodeDihRef, GSN_INCL_NODEREQ, signal, 6, JBA);
267 }//Dbdih::sendINCL_NODEREQ()
268
sendMASTER_GCPREQ(Signal * signal,Uint32 nodeId,Uint32 extra)269 void Dbdih::sendMASTER_GCPREQ(Signal* signal, Uint32 nodeId, Uint32 extra)
270 {
271 BlockReference ref = calcDihBlockRef(nodeId);
272 sendSignal(ref, GSN_MASTER_GCPREQ, signal, MasterGCPReq::SignalLength, JBB);
273 }//Dbdih::sendMASTER_GCPREQ()
274
sendMASTER_LCPREQ(Signal * signal,Uint32 nodeId,Uint32 extra)275 void Dbdih::sendMASTER_LCPREQ(Signal* signal, Uint32 nodeId, Uint32 extra)
276 {
277 BlockReference ref = calcDihBlockRef(nodeId);
278 sendSignal(ref, GSN_MASTER_LCPREQ, signal, MasterLCPReq::SignalLength, JBB);
279 }//Dbdih::sendMASTER_LCPREQ()
280
sendSTART_INFOREQ(Signal * signal,Uint32 nodeId,Uint32 extra)281 void Dbdih::sendSTART_INFOREQ(Signal* signal, Uint32 nodeId, Uint32 extra)
282 {
283 const BlockReference ref = calcDihBlockRef(nodeId);
284 sendSignal(ref, GSN_START_INFOREQ, signal, StartInfoReq::SignalLength, JBB);
285 }//sendSTART_INFOREQ()
286
sendSTART_RECREQ(Signal * signal,Uint32 nodeId,Uint32 extra)287 void Dbdih::sendSTART_RECREQ(Signal* signal, Uint32 nodeId, Uint32 extra)
288 {
289 if (!m_sr_nodes.get(nodeId))
290 {
291 jam();
292 c_START_RECREQ_Counter.clearWaitingFor(nodeId);
293 return;
294 }
295
296 Uint32 keepGCI = SYSFILE->keepGCI;
297 Uint32 lastCompletedGCI = SYSFILE->lastCompletedGCI[nodeId];
298 if (keepGCI > lastCompletedGCI)
299 {
300 jam();
301 keepGCI = lastCompletedGCI;
302 }
303
304 StartRecReq * const req = (StartRecReq*)&signal->theData[0];
305 BlockReference ref = calcLqhBlockRef(nodeId);
306 req->receivingNodeId = nodeId;
307 req->senderRef = reference();
308 req->keepGci = keepGCI;
309 req->lastCompletedGci = lastCompletedGCI;
310 req->newestGci = SYSFILE->newestRestorableGCI;
311 req->senderData = extra;
312 m_sr_nodes.copyto(NdbNodeBitmask::Size, req->sr_nodes);
313 sendSignal(ref, GSN_START_RECREQ, signal, StartRecReq::SignalLength, JBB);
314
315 signal->theData[0] = NDB_LE_StartREDOLog;
316 signal->theData[1] = nodeId;
317 signal->theData[2] = keepGCI;
318 signal->theData[3] = lastCompletedGCI;
319 signal->theData[4] = SYSFILE->newestRestorableGCI;
320 sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 5, JBB);
321 }//Dbdih::sendSTART_RECREQ()
322
sendSTART_TOREQ(Signal * signal,Uint32 nodeId,Uint32 extra)323 void Dbdih::sendSTART_TOREQ(Signal* signal, Uint32 nodeId, Uint32 extra)
324 {
325 BlockReference ref = calcDihBlockRef(nodeId);
326 sendSignal(ref, GSN_START_TOREQ, signal, StartToReq::SignalLength, JBB);
327 }//Dbdih::sendSTART_TOREQ()
328
sendSTOP_ME_REQ(Signal * signal,Uint32 nodeId,Uint32 extra)329 void Dbdih::sendSTOP_ME_REQ(Signal* signal, Uint32 nodeId, Uint32 extra)
330 {
331 if (nodeId != getOwnNodeId()) {
332 jam();
333 const BlockReference ref = calcDihBlockRef(nodeId);
334 sendSignal(ref, GSN_STOP_ME_REQ, signal, StopMeReq::SignalLength, JBB);
335 }//if
336 }//Dbdih::sendSTOP_ME_REQ()
337
sendTC_CLOPSIZEREQ(Signal * signal,Uint32 nodeId,Uint32 extra)338 void Dbdih::sendTC_CLOPSIZEREQ(Signal* signal, Uint32 nodeId, Uint32 extra)
339 {
340 BlockReference ref = calcTcBlockRef(nodeId);
341 signal->theData[0] = nodeId;
342 signal->theData[1] = reference();
343 sendSignal(ref, GSN_TC_CLOPSIZEREQ, signal, 2, JBB);
344 }//Dbdih::sendTC_CLOPSIZEREQ()
345
sendTCGETOPSIZEREQ(Signal * signal,Uint32 nodeId,Uint32 extra)346 void Dbdih::sendTCGETOPSIZEREQ(Signal* signal, Uint32 nodeId, Uint32 extra)
347 {
348 BlockReference ref = calcTcBlockRef(nodeId);
349 signal->theData[0] = nodeId;
350 signal->theData[1] = reference();
351 sendSignal(ref, GSN_TCGETOPSIZEREQ, signal, 2, JBB);
352 }//Dbdih::sendTCGETOPSIZEREQ()
353
sendUPDATE_TOREQ(Signal * signal,Uint32 nodeId,Uint32 extra)354 void Dbdih::sendUPDATE_TOREQ(Signal* signal, Uint32 nodeId, Uint32 extra)
355 {
356 const BlockReference ref = calcDihBlockRef(nodeId);
357 sendSignal(ref, GSN_UPDATE_TOREQ, signal, UpdateToReq::SignalLength, JBB);
358 }//sendUPDATE_TOREQ()
359
execCONTINUEB(Signal * signal)360 void Dbdih::execCONTINUEB(Signal* signal)
361 {
362 jamEntry();
363 switch ((DihContinueB::Type)signal->theData[0]) {
364 case DihContinueB::ZPACK_TABLE_INTO_PAGES:
365 {
366 jam();
367 Uint32 tableId = signal->theData[1];
368 packTableIntoPagesLab(signal, tableId);
369 return;
370 break;
371 }
372 case DihContinueB::ZPACK_FRAG_INTO_PAGES:
373 {
374 RWFragment wf;
375 jam();
376 wf.rwfTabPtr.i = signal->theData[1];
377 ptrCheckGuard(wf.rwfTabPtr, ctabFileSize, tabRecord);
378 wf.fragId = signal->theData[2];
379 wf.pageIndex = signal->theData[3];
380 wf.wordIndex = signal->theData[4];
381 wf.totalfragments = signal->theData[5];
382 packFragIntoPagesLab(signal, &wf);
383 return;
384 break;
385 }
386 case DihContinueB::ZREAD_PAGES_INTO_TABLE:
387 {
388 jam();
389 Uint32 tableId = signal->theData[1];
390 readPagesIntoTableLab(signal, tableId);
391 return;
392 break;
393 }
394 case DihContinueB::ZREAD_PAGES_INTO_FRAG:
395 {
396 RWFragment rf;
397 jam();
398 rf.rwfTabPtr.i = signal->theData[1];
399 ptrCheckGuard(rf.rwfTabPtr, ctabFileSize, tabRecord);
400 rf.fragId = signal->theData[2];
401 rf.pageIndex = signal->theData[3];
402 rf.wordIndex = signal->theData[4];
403 readPagesIntoFragLab(signal, &rf);
404 return;
405 break;
406 }
407 case DihContinueB::ZCOPY_TABLE:
408 {
409 jam();
410 Uint32 tableId = signal->theData[1];
411 copyTableLab(signal, tableId);
412 return;
413 }
414 case DihContinueB::ZCOPY_TABLE_NODE:
415 {
416 NodeRecordPtr nodePtr;
417 CopyTableNode ctn;
418 jam();
419 ctn.ctnTabPtr.i = signal->theData[1];
420 ptrCheckGuard(ctn.ctnTabPtr, ctabFileSize, tabRecord);
421 nodePtr.i = signal->theData[2];
422 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
423 ctn.pageIndex = signal->theData[3];
424 ctn.wordIndex = signal->theData[4];
425 ctn.noOfWords = signal->theData[5];
426 copyTableNode(signal, &ctn, nodePtr);
427 return;
428 }
429 case DihContinueB::ZSTART_FRAGMENT:
430 {
431 jam();
432 Uint32 tableId = signal->theData[1];
433 Uint32 fragId = signal->theData[2];
434 startFragment(signal, tableId, fragId);
435 return;
436 }
437 case DihContinueB::ZCOMPLETE_RESTART:
438 jam();
439 completeRestartLab(signal);
440 return;
441 case DihContinueB::ZREAD_TABLE_FROM_PAGES:
442 {
443 TabRecordPtr tabPtr;
444 jam();
445 tabPtr.i = signal->theData[1];
446 ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
447 readTableFromPagesLab(signal, tabPtr);
448 return;
449 }
450 case DihContinueB::ZSR_PHASE2_READ_TABLE:
451 {
452 TabRecordPtr tabPtr;
453 jam();
454 tabPtr.i = signal->theData[1];
455 ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
456 srPhase2ReadTableLab(signal, tabPtr);
457 return;
458 }
459 case DihContinueB::ZCHECK_TC_COUNTER:
460 jam();
461 #ifndef NO_LCP
462 checkTcCounterLab(signal);
463 #endif
464 return;
465 case DihContinueB::ZCALCULATE_KEEP_GCI:
466 {
467 jam();
468 Uint32 tableId = signal->theData[1];
469 Uint32 fragId = signal->theData[2];
470 calculateKeepGciLab(signal, tableId, fragId);
471 return;
472 }
473 case DihContinueB::ZSTORE_NEW_LCP_ID:
474 jam();
475 storeNewLcpIdLab(signal);
476 return;
477 case DihContinueB::ZTABLE_UPDATE:
478 {
479 TabRecordPtr tabPtr;
480 jam();
481 tabPtr.i = signal->theData[1];
482 ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
483 tableUpdateLab(signal, tabPtr);
484 return;
485 }
486 case DihContinueB::ZCHECK_LCP_COMPLETED:
487 {
488 jam();
489 checkLcpCompletedLab(signal);
490 return;
491 }
492 case DihContinueB::ZINIT_LCP:
493 {
494 jam();
495 Uint32 senderRef = signal->theData[1];
496 Uint32 tableId = signal->theData[2];
497 initLcpLab(signal, senderRef, tableId);
498 return;
499 }
500 case DihContinueB::ZADD_TABLE_MASTER_PAGES:
501 {
502 TabRecordPtr tabPtr;
503 jam();
504 tabPtr.i = signal->theData[1];
505 ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
506 tabPtr.p->tabUpdateState = TabRecord::US_ADD_TABLE_MASTER;
507 tableUpdateLab(signal, tabPtr);
508 return;
509 break;
510 }
511 case DihContinueB::ZDIH_ADD_TABLE_MASTER:
512 {
513 jam();
514 addTable_closeConf(signal, signal->theData[1]);
515 return;
516 }
517 case DihContinueB::ZADD_TABLE_SLAVE_PAGES:
518 {
519 TabRecordPtr tabPtr;
520 jam();
521 tabPtr.i = signal->theData[1];
522 ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
523 tabPtr.p->tabUpdateState = TabRecord::US_ADD_TABLE_SLAVE;
524 tableUpdateLab(signal, tabPtr);
525 return;
526 }
527 case DihContinueB::ZDIH_ADD_TABLE_SLAVE:
528 {
529 ndbrequire(false);
530 return;
531 }
532 case DihContinueB::ZSTART_GCP:
533 jam();
534 #ifndef NO_GCP
535 startGcpLab(signal, signal->theData[1]);
536 #endif
537 return;
538 break;
539 case DihContinueB::ZCOPY_GCI:{
540 jam();
541 CopyGCIReq::CopyReason reason = (CopyGCIReq::CopyReason)signal->theData[1];
542 ndbrequire(c_copyGCIMaster.m_copyReason == reason);
543
544 // set to idle, to be able to reuse method
545 c_copyGCIMaster.m_copyReason = CopyGCIReq::IDLE;
546 copyGciLab(signal, reason);
547 return;
548 }
549 break;
550 case DihContinueB::ZEMPTY_VERIFY_QUEUE:
551 jam();
552 emptyverificbuffer(signal, signal->theData[1], true);
553 return;
554 break;
555 case DihContinueB::ZCHECK_GCP_STOP:
556 jam();
557 #ifndef NO_GCP
558 checkGcpStopLab(signal);
559 #endif
560 return;
561 break;
562 case DihContinueB::ZREMOVE_NODE_FROM_TABLE:
563 {
564 jam();
565 Uint32 nodeId = signal->theData[1];
566 Uint32 tableId = signal->theData[2];
567 removeNodeFromTables(signal, nodeId, tableId);
568 return;
569 }
570 case DihContinueB::ZCOPY_NODE:
571 {
572 jam();
573 Uint32 tableId = signal->theData[1];
574 copyNodeLab(signal, tableId);
575 return;
576 }
577 case DihContinueB::ZTO_START_COPY_FRAG:
578 {
579 jam();
580 Uint32 takeOverPtrI = signal->theData[1];
581 startNextCopyFragment(signal, takeOverPtrI);
582 return;
583 }
584 case DihContinueB::ZINVALIDATE_NODE_LCP:
585 {
586 jam();
587 const Uint32 nodeId = signal->theData[1];
588 const Uint32 tableId = signal->theData[2];
589 invalidateNodeLCP(signal, nodeId, tableId);
590 return;
591 }
592 case DihContinueB::ZINITIALISE_RECORDS:
593 jam();
594 initialiseRecordsLab(signal,
595 signal->theData[1],
596 signal->theData[2],
597 signal->theData[3]);
598 return;
599 break;
600 case DihContinueB::ZSTART_PERMREQ_AGAIN:
601 jam();
602 nodeRestartPh2Lab2(signal);
603 return;
604 break;
605 case DihContinueB::SwitchReplica:
606 {
607 jam();
608 const Uint32 nodeId = signal->theData[1];
609 const Uint32 tableId = signal->theData[2];
610 const Uint32 fragNo = signal->theData[3];
611 switchReplica(signal, nodeId, tableId, fragNo);
612 return;
613 }
614 case DihContinueB::ZSEND_ADD_FRAG:
615 {
616 jam();
617 Uint32 takeOverPtrI = signal->theData[1];
618 toCopyFragLab(signal, takeOverPtrI);
619 return;
620 }
621 case DihContinueB::ZSEND_START_TO:
622 {
623 jam();
624 Ptr<TakeOverRecord> takeOverPtr;
625 c_takeOverPool.getPtr(takeOverPtr, signal->theData[1]);
626 sendStartTo(signal, takeOverPtr);
627 return;
628 }
629 case DihContinueB::ZSEND_UPDATE_TO:
630 {
631 jam();
632 Ptr<TakeOverRecord> takeOverPtr;
633 c_takeOverPool.getPtr(takeOverPtr, signal->theData[1]);
634 sendUpdateTo(signal, takeOverPtr);
635 return;
636 }
637 case DihContinueB::WAIT_DROP_TAB_WRITING_TO_FILE:{
638 jam();
639 TabRecordPtr tabPtr;
640 tabPtr.i = signal->theData[1];
641 ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
642 waitDropTabWritingToFile(signal, tabPtr);
643 return;
644 }
645 case DihContinueB::ZTO_START_FRAGMENTS:
646 {
647 TakeOverRecordPtr takeOverPtr;
648 c_takeOverPool.getPtr(takeOverPtr, signal->theData[1]);
649 nr_start_fragments(signal, takeOverPtr);
650 return;
651 }
652 case DihContinueB::ZCOPY_NODE_WAIT_CREATE_FRAG:
653 {
654 jam();
655 lcpBlockedLab(signal, true, signal->theData[1]);
656 return;
657 }
658 case DihContinueB::ZWAIT_OLD_SCAN:
659 {
660 jam();
661 wait_old_scan(signal);
662 return;
663 }
664 case DihContinueB::ZLCP_TRY_LOCK:
665 {
666 jam();
667 Mutex mutex(signal, c_mutexMgr, c_fragmentInfoMutex_lcp);
668 Callback c = { safe_cast(&Dbdih::lcpFragmentMutex_locked),
669 signal->theData[1] };
670 ndbrequire(mutex.trylock(c, false));
671 return;
672 }
673 case DihContinueB::ZDELAY_RELEASE_FRAGMENT_INFO_MUTEX:
674 {
675 jam();
676 MutexHandle2<DIH_FRAGMENT_INFO> mh;
677 mh.setHandle(signal->theData[1]);
678 Mutex mutex(signal, c_mutexMgr, mh);
679 mutex.unlock();
680 return;
681 }
682 case DihContinueB::ZTO_START_LOGGING:
683 {
684 jam();
685 TakeOverRecordPtr takeOverPtr;
686 c_takeOverPool.getPtr(takeOverPtr, signal->theData[1]);
687 nr_start_logging(signal, takeOverPtr);
688 return;
689 }
690 case DihContinueB::ZGET_TABINFO:
691 {
692 jam();
693 getTabInfo(signal);
694 return;
695 }
696 case DihContinueB::ZGET_TABINFO_SEND:
697 {
698 jam();
699 TabRecordPtr tabPtr;
700 jam();
701 tabPtr.i = signal->theData[1];
702 ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
703 getTabInfo_send(signal, tabPtr);
704 return;
705 }
706 }
707
708 ndbrequire(false);
709 return;
710 }//Dbdih::execCONTINUEB()
711
execCOPY_GCIREQ(Signal * signal)712 void Dbdih::execCOPY_GCIREQ(Signal* signal)
713 {
714 CopyGCIReq * const copyGCI = (CopyGCIReq *)&signal->theData[0];
715 jamEntry();
716 CopyGCIReq::CopyReason reason = (CopyGCIReq::CopyReason)copyGCI->copyReason;
717 const Uint32 tstart = copyGCI->startWord;
718
719 ndbrequire(cmasterdihref == signal->senderBlockRef()) ;
720 ndbrequire((reason == CopyGCIReq::GLOBAL_CHECKPOINT &&
721 c_copyGCISlave.m_copyReason == CopyGCIReq::GLOBAL_CHECKPOINT) ||
722 c_copyGCISlave.m_copyReason == CopyGCIReq::IDLE);
723 ndbrequire(c_copyGCISlave.m_expectedNextWord == tstart);
724 ndbrequire(reason != CopyGCIReq::IDLE);
725 bool isdone = (tstart + CopyGCIReq::DATA_SIZE) >= Sysfile::SYSFILE_SIZE32;
726
727 if (ERROR_INSERTED(7177))
728 {
729 jam();
730
731 if (signal->getLength() == 3)
732 {
733 jam();
734 goto done;
735 }
736 }
737
738 arrGuard(tstart + CopyGCIReq::DATA_SIZE, sizeof(sysfileData)/4);
739 for(Uint32 i = 0; i<CopyGCIReq::DATA_SIZE; i++)
740 cdata[tstart+i] = copyGCI->data[i];
741
742 if (ERROR_INSERTED(7177) && isMaster() && isdone)
743 {
744 sendSignalWithDelay(reference(), GSN_COPY_GCIREQ, signal, 1000, 3);
745 return;
746 }
747
748 done:
749 if (isdone)
750 {
751 jam();
752 c_copyGCISlave.m_expectedNextWord = 0;
753 }
754 else
755 {
756 jam();
757 c_copyGCISlave.m_expectedNextWord += CopyGCIReq::DATA_SIZE;
758 return;
759 }
760
761 if (cmasterdihref != reference())
762 {
763 jam();
764 Uint32 tmp= SYSFILE->m_restart_seq;
765 memcpy(sysfileData, cdata, sizeof(sysfileData));
766 SYSFILE->m_restart_seq = tmp;
767
768 if (c_set_initial_start_flag)
769 {
770 jam();
771 Sysfile::setInitialStartOngoing(SYSFILE->systemRestartBits);
772 }
773 }
774
775 c_copyGCISlave.m_copyReason = reason;
776 c_copyGCISlave.m_senderRef = signal->senderBlockRef();
777 c_copyGCISlave.m_senderData = copyGCI->anyData;
778
779 CRASH_INSERTION2(7020, reason==CopyGCIReq::LOCAL_CHECKPOINT);
780 CRASH_INSERTION2(7008, reason==CopyGCIReq::GLOBAL_CHECKPOINT);
781
782 if (m_local_lcp_state.check_cut_log_tail(c_newest_restorable_gci))
783 {
784 jam();
785
786 #if NOT_YET
787 LcpCompleteRep* rep = (LcpCompleteRep*)signal->getDataPtrSend();
788 rep->nodeId = getOwnNodeId();
789 rep->blockNo = 0;
790 rep->lcpId = m_local_lcp_state.m_start_lcp_req.lcpId;
791 rep->keepGci = m_local_lcp_state.m_keep_gci;
792 sendSignal(DBLQH_REF, GSN_LCP_COMPLETE_REP, signal,
793 LcpCompleteRep::SignalLength, JBB);
794
795 warningEvent("CUT LOG TAIL: reason: %u lcp: %u m_keep_gci: %u stop: %u",
796 reason,
797 m_local_lcp_state.m_start_lcp_req.lcpId,
798 m_local_lcp_state.m_keep_gci,
799 m_local_lcp_state.m_stop_gci);
800 #endif
801 m_local_lcp_state.reset();
802 }
803
804 /* -------------------------------------------------------------------------*/
805 /* WE SET THE REQUESTER OF THE COPY GCI TO THE CURRENT MASTER. IF THE */
806 /* CURRENT MASTER WE DO NOT WANT THE NEW MASTER TO RECEIVE CONFIRM OF */
807 /* SOMETHING HE HAS NOT SENT. THE TAKE OVER MUST BE CAREFUL. */
808 /* -------------------------------------------------------------------------*/
809 bool ok = false;
810 switch(reason){
811 case CopyGCIReq::IDLE:
812 ok = true;
813 jam();
814 ndbrequire(false);
815 break;
816 case CopyGCIReq::LOCAL_CHECKPOINT: {
817 ok = true;
818 jam();
819 c_lcpState.setLcpStatus(LCP_COPY_GCI, __LINE__);
820 c_lcpState.m_masterLcpDihRef = cmasterdihref;
821 setNodeActiveStatus();
822 break;
823 }
824 case CopyGCIReq::RESTART: {
825 ok = true;
826 jam();
827 Uint32 newest = SYSFILE->newestRestorableGCI;
828 m_micro_gcp.m_old_gci = Uint64(newest) << 32;
829 crestartGci = newest;
830 c_newest_restorable_gci = newest;
831 Sysfile::setRestartOngoing(SYSFILE->systemRestartBits);
832 m_micro_gcp.m_current_gci = Uint64(newest + 1) << 32;
833 setNodeActiveStatus();
834 setNodeGroups();
835 if ((Sysfile::getLCPOngoing(SYSFILE->systemRestartBits))) {
836 jam();
837 /* -------------------------------------------------------------------- */
838 // IF THERE WAS A LOCAL CHECKPOINT ONGOING AT THE CRASH MOMENT WE WILL
839 // INVALIDATE THAT LOCAL CHECKPOINT.
840 /* -------------------------------------------------------------------- */
841 invalidateLcpInfoAfterSr(signal);
842 }//if
843
844 if (m_micro_gcp.m_enabled == false &&
845 m_micro_gcp.m_master.m_time_between_gcp)
846 {
847 /**
848 * Micro GCP is disabled...but configured...
849 */
850 jam();
851 m_micro_gcp.m_enabled = true;
852 UpgradeProtocolOrd * ord = (UpgradeProtocolOrd*)signal->getDataPtrSend();
853 ord->type = UpgradeProtocolOrd::UPO_ENABLE_MICRO_GCP;
854 EXECUTE_DIRECT(QMGR,GSN_UPGRADE_PROTOCOL_ORD,signal,signal->getLength());
855 }
856 break;
857 }
858 case CopyGCIReq::GLOBAL_CHECKPOINT: {
859 ok = true;
860 jam();
861
862 if (m_gcp_save.m_state == GcpSave::GCP_SAVE_COPY_GCI)
863 {
864 jam();
865 /**
866 * This must be master take over...and it already running...
867 */
868 ndbrequire(c_newest_restorable_gci == SYSFILE->newestRestorableGCI);
869 m_gcp_save.m_master_ref = c_copyGCISlave.m_senderRef;
870 return;
871 }
872
873 if (c_newest_restorable_gci == SYSFILE->newestRestorableGCI)
874 {
875 jam();
876
877 /**
878 * This must be master take over...and it already complete...
879 */
880 m_gcp_save.m_master_ref = c_copyGCISlave.m_senderRef;
881 c_copyGCISlave.m_copyReason = CopyGCIReq::IDLE;
882 signal->theData[0] = c_copyGCISlave.m_senderData;
883 sendSignal(m_gcp_save.m_master_ref, GSN_COPY_GCICONF, signal, 1, JBB);
884 return;
885 }
886
887 ndbrequire(m_gcp_save.m_state == GcpSave::GCP_SAVE_CONF);
888 m_gcp_save.m_state = GcpSave::GCP_SAVE_COPY_GCI;
889 m_gcp_save.m_master_ref = c_copyGCISlave.m_senderRef;
890 c_newest_restorable_gci = SYSFILE->newestRestorableGCI;
891 setNodeActiveStatus();
892 break;
893 }//if
894 case CopyGCIReq::INITIAL_START_COMPLETED:
895 ok = true;
896 jam();
897 break;
898 case CopyGCIReq::RESTART_NR:
899 jam();
900 setNodeGroups();
901 /**
902 * We dont really need to make anything durable here...skip it
903 */
904 c_copyGCISlave.m_copyReason = CopyGCIReq::IDLE;
905 signal->theData[0] = c_copyGCISlave.m_senderData;
906 sendSignal(c_copyGCISlave.m_senderRef, GSN_COPY_GCICONF, signal, 1, JBB);
907 return;
908 }
909 ndbrequire(ok);
910
911 CRASH_INSERTION(7183);
912
913 if (ERROR_INSERTED(7185) && reason==CopyGCIReq::GLOBAL_CHECKPOINT)
914 {
915 jam();
916 return;
917 }
918 #ifdef GCP_TIMER_HACK
919 if (reason == CopyGCIReq::GLOBAL_CHECKPOINT) {
920 jam();
921 NdbTick_getMicroTimer(&globalData.gcp_timer_copygci[0]);
922 }
923 #endif
924
925 /* ----------------------------------------------------------------------- */
926 /* WE START BY TRYING TO OPEN THE FIRST RESTORABLE GCI FILE. */
927 /* ----------------------------------------------------------------------- */
928 FileRecordPtr filePtr;
929 filePtr.i = crestartInfoFile[0];
930 ptrCheckGuard(filePtr, cfileFileSize, fileRecord);
931 if (filePtr.p->fileStatus == FileRecord::OPEN) {
932 jam();
933 openingCopyGciSkipInitLab(signal, filePtr);
934 return;
935 }//if
936 openFileRw(signal, filePtr);
937 filePtr.p->reqStatus = FileRecord::OPENING_COPY_GCI;
938 return;
939 }//Dbdih::execCOPY_GCIREQ()
940
execDICTSTARTCONF(Signal * signal)941 void Dbdih::execDICTSTARTCONF(Signal* signal)
942 {
943 jamEntry();
944 Uint32 nodeId = refToNode(signal->getSendersBlockRef());
945 if (nodeId != getOwnNodeId()) {
946 jam();
947 nodeDictStartConfLab(signal);
948 } else {
949 jam();
950 dictStartConfLab(signal);
951 }//if
952 }//Dbdih::execDICTSTARTCONF()
953
execFSCLOSECONF(Signal * signal)954 void Dbdih::execFSCLOSECONF(Signal* signal)
955 {
956 FileRecordPtr filePtr;
957 jamEntry();
958 filePtr.i = signal->theData[0];
959 ptrCheckGuard(filePtr, cfileFileSize, fileRecord);
960 filePtr.p->fileStatus = FileRecord::CLOSED;
961 FileRecord::ReqStatus status = filePtr.p->reqStatus;
962 filePtr.p->reqStatus = FileRecord::IDLE;
963 switch (status) {
964 case FileRecord::CLOSING_GCP:
965 jam();
966 closingGcpLab(signal, filePtr);
967 break;
968 case FileRecord::CLOSING_GCP_CRASH:
969 jam();
970 closingGcpCrashLab(signal, filePtr);
971 break;
972 case FileRecord::CLOSING_TABLE_CRASH:
973 jam();
974 closingTableCrashLab(signal, filePtr);
975 break;
976 case FileRecord::CLOSING_TABLE_SR:
977 jam();
978 closingTableSrLab(signal, filePtr);
979 break;
980 case FileRecord::TABLE_CLOSE:
981 jam();
982 tableCloseLab(signal, filePtr);
983 break;
984 case FileRecord::TABLE_CLOSE_DELETE:
985 jam();
986 tableDeleteLab(signal, filePtr);
987 break;
988 default:
989 ndbrequire(false);
990 break;
991 }//switch
992 return;
993 }//Dbdih::execFSCLOSECONF()
994
execFSCLOSEREF(Signal * signal)995 void Dbdih::execFSCLOSEREF(Signal* signal)
996 {
997 FileRecordPtr filePtr;
998 jamEntry();
999 filePtr.i = signal->theData[0];
1000 ptrCheckGuard(filePtr, cfileFileSize, fileRecord);
1001 FileRecord::ReqStatus status = filePtr.p->reqStatus;
1002 filePtr.p->reqStatus = FileRecord::IDLE;
1003 switch (status) {
1004 case FileRecord::CLOSING_GCP:
1005 jam();
1006 break;
1007 case FileRecord::CLOSING_GCP_CRASH:
1008 jam();
1009 closingGcpCrashLab(signal, filePtr);
1010 return;
1011 case FileRecord::CLOSING_TABLE_CRASH:
1012 jam();
1013 closingTableCrashLab(signal, filePtr);
1014 return;
1015 case FileRecord::CLOSING_TABLE_SR:
1016 jam();
1017 break;
1018 case FileRecord::TABLE_CLOSE:
1019 jam();
1020 break;
1021 case FileRecord::TABLE_CLOSE_DELETE:
1022 jam();
1023 break;
1024 default:
1025 jam();
1026 break;
1027
1028 }//switch
1029 {
1030 char msg[100];
1031 sprintf(msg, "File system close failed during FileRecord status %d", (Uint32)status);
1032 fsRefError(signal,__LINE__,msg);
1033 }
1034 return;
1035 }//Dbdih::execFSCLOSEREF()
1036
execFSOPENCONF(Signal * signal)1037 void Dbdih::execFSOPENCONF(Signal* signal)
1038 {
1039 FileRecordPtr filePtr;
1040 jamEntry();
1041 filePtr.i = signal->theData[0];
1042 ptrCheckGuard(filePtr, cfileFileSize, fileRecord);
1043 filePtr.p->fileRef = signal->theData[1];
1044 filePtr.p->fileStatus = FileRecord::OPEN;
1045 FileRecord::ReqStatus status = filePtr.p->reqStatus;
1046 filePtr.p->reqStatus = FileRecord::IDLE;
1047 switch (status) {
1048 case FileRecord::CREATING_GCP:
1049 jam();
1050 creatingGcpLab(signal, filePtr);
1051 break;
1052 case FileRecord::OPENING_COPY_GCI:
1053 jam();
1054 openingCopyGciSkipInitLab(signal, filePtr);
1055 break;
1056 case FileRecord::CREATING_COPY_GCI:
1057 jam();
1058 openingCopyGciSkipInitLab(signal, filePtr);
1059 break;
1060 case FileRecord::OPENING_GCP:
1061 jam();
1062 openingGcpLab(signal, filePtr);
1063 break;
1064 case FileRecord::OPENING_TABLE:
1065 jam();
1066 openingTableLab(signal, filePtr);
1067 break;
1068 case FileRecord::TABLE_CREATE:
1069 jam();
1070 tableCreateLab(signal, filePtr);
1071 break;
1072 case FileRecord::TABLE_OPEN_FOR_DELETE:
1073 jam();
1074 tableOpenLab(signal, filePtr);
1075 break;
1076 default:
1077 ndbrequire(false);
1078 break;
1079 }//switch
1080 return;
1081 }//Dbdih::execFSOPENCONF()
1082
execFSOPENREF(Signal * signal)1083 void Dbdih::execFSOPENREF(Signal* signal)
1084 {
1085 FileRecordPtr filePtr;
1086 jamEntry();
1087 filePtr.i = signal->theData[0];
1088 ptrCheckGuard(filePtr, cfileFileSize, fileRecord);
1089 FileRecord::ReqStatus status = filePtr.p->reqStatus;
1090 filePtr.p->reqStatus = FileRecord::IDLE;
1091 switch (status) {
1092 case FileRecord::CREATING_GCP:
1093 /* --------------------------------------------------------------------- */
1094 /* WE DID NOT MANAGE TO CREATE A GLOBAL CHECKPOINT FILE. SERIOUS ERROR */
1095 /* WHICH CAUSES A SYSTEM RESTART. */
1096 /* --------------------------------------------------------------------- */
1097 jam();
1098 break;
1099 case FileRecord::OPENING_COPY_GCI:
1100 jam();
1101 openingCopyGciErrorLab(signal, filePtr);
1102 return;
1103 case FileRecord::CREATING_COPY_GCI:
1104 jam();
1105 break;
1106 case FileRecord::OPENING_GCP:
1107 jam();
1108 openingGcpErrorLab(signal, filePtr);
1109 return;
1110 case FileRecord::OPENING_TABLE:
1111 jam();
1112 openingTableErrorLab(signal, filePtr);
1113 return;
1114 case FileRecord::TABLE_CREATE:
1115 jam();
1116 break;
1117 case FileRecord::TABLE_OPEN_FOR_DELETE:
1118 jam();
1119 tableDeleteLab(signal, filePtr);
1120 return;
1121 default:
1122 jam();
1123 break;
1124 }//switch
1125 {
1126 char msg[100];
1127 sprintf(msg, "File system open failed during FileRecord status %d", (Uint32)status);
1128 fsRefError(signal,__LINE__,msg);
1129 }
1130 return;
1131 }//Dbdih::execFSOPENREF()
1132
execFSREADCONF(Signal * signal)1133 void Dbdih::execFSREADCONF(Signal* signal)
1134 {
1135 FileRecordPtr filePtr;
1136 jamEntry();
1137 filePtr.i = signal->theData[0];
1138 ptrCheckGuard(filePtr, cfileFileSize, fileRecord);
1139 FileRecord::ReqStatus status = filePtr.p->reqStatus;
1140 filePtr.p->reqStatus = FileRecord::IDLE;
1141 switch (status) {
1142 case FileRecord::READING_GCP:
1143 jam();
1144 readingGcpLab(signal, filePtr);
1145 break;
1146 case FileRecord::READING_TABLE:
1147 jam();
1148 readingTableLab(signal, filePtr);
1149 break;
1150 default:
1151 ndbrequire(false);
1152 break;
1153 }//switch
1154 return;
1155 }//Dbdih::execFSREADCONF()
1156
execFSREADREF(Signal * signal)1157 void Dbdih::execFSREADREF(Signal* signal)
1158 {
1159 FileRecordPtr filePtr;
1160 jamEntry();
1161 filePtr.i = signal->theData[0];
1162 ptrCheckGuard(filePtr, cfileFileSize, fileRecord);
1163 FileRecord::ReqStatus status = filePtr.p->reqStatus;
1164 filePtr.p->reqStatus = FileRecord::IDLE;
1165 switch (status) {
1166 case FileRecord::READING_GCP:
1167 jam();
1168 readingGcpErrorLab(signal, filePtr);
1169 return;
1170 case FileRecord::READING_TABLE:
1171 jam();
1172 readingTableErrorLab(signal, filePtr);
1173 return;
1174 default:
1175 break;
1176 }//switch
1177 {
1178 char msg[100];
1179 sprintf(msg, "File system read failed during FileRecord status %d", (Uint32)status);
1180 fsRefError(signal,__LINE__,msg);
1181 }
1182 }//Dbdih::execFSREADREF()
1183
execFSWRITECONF(Signal * signal)1184 void Dbdih::execFSWRITECONF(Signal* signal)
1185 {
1186 FileRecordPtr filePtr;
1187 jamEntry();
1188 filePtr.i = signal->theData[0];
1189 ptrCheckGuard(filePtr, cfileFileSize, fileRecord);
1190 FileRecord::ReqStatus status = filePtr.p->reqStatus;
1191 filePtr.p->reqStatus = FileRecord::IDLE;
1192 switch (status) {
1193 case FileRecord::WRITING_COPY_GCI:
1194 jam();
1195 writingCopyGciLab(signal, filePtr);
1196 break;
1197 case FileRecord::WRITE_INIT_GCP:
1198 jam();
1199 writeInitGcpLab(signal, filePtr);
1200 break;
1201 case FileRecord::TABLE_WRITE:
1202 jam();
1203 tableWriteLab(signal, filePtr);
1204 break;
1205 default:
1206 ndbrequire(false);
1207 break;
1208 }//switch
1209 return;
1210 }//Dbdih::execFSWRITECONF()
1211
execFSWRITEREF(Signal * signal)1212 void Dbdih::execFSWRITEREF(Signal* signal)
1213 {
1214 FileRecordPtr filePtr;
1215 jamEntry();
1216 filePtr.i = signal->theData[0];
1217 ptrCheckGuard(filePtr, cfileFileSize, fileRecord);
1218 FileRecord::ReqStatus status = filePtr.p->reqStatus;
1219 filePtr.p->reqStatus = FileRecord::IDLE;
1220 switch (status) {
1221 case FileRecord::WRITING_COPY_GCI:
1222 /* --------------------------------------------------------------------- */
1223 /* EVEN CREATING THE FILE DID NOT WORK. WE WILL THEN CRASH. */
1224 /* ERROR IN WRITING FILE. WE WILL NOT CONTINUE FROM HERE. */
1225 /* --------------------------------------------------------------------- */
1226 jam();
1227 break;
1228 case FileRecord::WRITE_INIT_GCP:
1229 /* --------------------------------------------------------------------- */
1230 /* AN ERROR OCCURRED IN WRITING A GCI FILE WHICH IS A SERIOUS ERROR */
1231 /* THAT CAUSE A SYSTEM RESTART. */
1232 /* --------------------------------------------------------------------- */
1233 jam();
1234 break;
1235 case FileRecord::TABLE_WRITE:
1236 jam();
1237 break;
1238 default:
1239 jam();
1240 break;
1241 }//switch
1242 {
1243 char msg[100];
1244 sprintf(msg, "File system write failed during FileRecord status %d", (Uint32)status);
1245 fsRefError(signal,__LINE__,msg);
1246 }
1247 return;
1248 }//Dbdih::execFSWRITEREF()
1249
execGETGCIREQ(Signal * signal)1250 void Dbdih::execGETGCIREQ(Signal* signal)
1251 {
1252
1253 jamEntry();
1254 Uint32 userPtr = signal->theData[0];
1255 BlockReference userRef = signal->theData[1];
1256 Uint32 type = signal->theData[2];
1257
1258 Uint32 gci_hi = 0;
1259 Uint32 gci_lo = 0;
1260 switch(type){
1261 case 0:
1262 jam();
1263 gci_hi = SYSFILE->newestRestorableGCI;
1264 break;
1265 case 1:
1266 jam();
1267 gci_hi = Uint32(m_micro_gcp.m_current_gci >> 32);
1268 gci_lo = Uint32(m_micro_gcp.m_current_gci);
1269 break;
1270 }
1271
1272 signal->theData[0] = userPtr;
1273 signal->theData[1] = gci_hi;
1274 signal->theData[2] = gci_lo;
1275
1276 if (userRef)
1277 {
1278 jam();
1279 sendSignal(userRef, GSN_GETGCICONF, signal, 3, JBB);
1280 }
1281 else
1282 {
1283 jam();
1284 // Execute direct
1285 }
1286 }//Dbdih::execGETGCIREQ()
1287
execREAD_CONFIG_REQ(Signal * signal)1288 void Dbdih::execREAD_CONFIG_REQ(Signal* signal)
1289 {
1290 const ReadConfigReq * req = (ReadConfigReq*)signal->getDataPtr();
1291 Uint32 ref = req->senderRef;
1292 Uint32 senderData = req->senderData;
1293 ndbrequire(req->noOfParameters == 0);
1294
1295 jamEntry();
1296
1297 const ndb_mgm_configuration_iterator * p =
1298 m_ctx.m_config.getOwnConfigIterator();
1299 ndbrequireErr(p != 0, NDBD_EXIT_INVALID_CONFIG);
1300
1301 initData();
1302
1303 cconnectFileSize = 256; // Only used for DDL
1304
1305 ndbrequireErr(!ndb_mgm_get_int_parameter(p, CFG_DIH_API_CONNECT,
1306 &capiConnectFileSize),
1307 NDBD_EXIT_INVALID_CONFIG);
1308 capiConnectFileSize++; // Increase by 1...so that srsw queue never gets full
1309
1310 ndbrequireErr(!ndb_mgm_get_int_parameter(p, CFG_DIH_FRAG_CONNECT,
1311 &cfragstoreFileSize),
1312 NDBD_EXIT_INVALID_CONFIG);
1313 ndbrequireErr(!ndb_mgm_get_int_parameter(p, CFG_DIH_REPLICAS,
1314 &creplicaFileSize),
1315 NDBD_EXIT_INVALID_CONFIG);
1316 ndbrequireErr(!ndb_mgm_get_int_parameter(p, CFG_DIH_TABLE, &ctabFileSize),
1317 NDBD_EXIT_INVALID_CONFIG);
1318
1319 if (isNdbMtLqh())
1320 {
1321 jam();
1322 c_fragments_per_node = getLqhWorkers();
1323 // try to get some LQH workers which initially handle no fragments
1324 if (ERROR_INSERTED(7215)) {
1325 c_fragments_per_node = 1;
1326 }
1327 }
1328 ndbout_c("Using %u fragments per node", c_fragments_per_node);
1329
1330 ndb_mgm_get_int_parameter(p, CFG_DB_LCP_TRY_LOCK_TIMEOUT,
1331 &c_lcpState.m_lcp_trylock_timeout);
1332
1333 cfileFileSize = (2 * ctabFileSize) + 2;
1334 initRecords();
1335 initialiseRecordsLab(signal, 0, ref, senderData);
1336
1337 {
1338 Uint32 val = 0;
1339 ndb_mgm_get_int_parameter(p, CFG_DB_2PASS_INR,
1340 &val);
1341 c_2pass_inr = val ? true : false;
1342 }
1343
1344 /**
1345 * Set API assigned nodegroup(s)
1346 */
1347 {
1348 NodeRecordPtr nodePtr;
1349 for (nodePtr.i = 0; nodePtr.i < MAX_NDB_NODES; nodePtr.i++)
1350 {
1351 ptrAss(nodePtr, nodeRecord);
1352 new (nodePtr.p) NodeRecord();
1353 nodePtr.p->nodeGroup = RNIL;
1354 }
1355
1356 ndb_mgm_configuration_iterator * iter =
1357 m_ctx.m_config.getClusterConfigIterator();
1358 for(ndb_mgm_first(iter); ndb_mgm_valid(iter); ndb_mgm_next(iter))
1359 {
1360 jam();
1361 Uint32 nodeId;
1362 Uint32 nodeType;
1363
1364 ndbrequire(!ndb_mgm_get_int_parameter(iter,CFG_NODE_ID, &nodeId));
1365 ndbrequire(!ndb_mgm_get_int_parameter(iter,CFG_TYPE_OF_SECTION,
1366 &nodeType));
1367
1368 if (nodeType == NodeInfo::DB)
1369 {
1370 jam();
1371 Uint32 ng;
1372 nodePtr.i = nodeId;
1373 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
1374 if (ndb_mgm_get_int_parameter(iter, CFG_DB_NODEGROUP, &ng) == 0)
1375 {
1376 jam();
1377 nodePtr.p->nodeGroup = ng;
1378 }
1379 else
1380 {
1381 jam();
1382 nodePtr.p->nodeGroup = RNIL;
1383 }
1384 }
1385 }
1386 }
1387 return;
1388 }//Dbdih::execSIZEALT_REP()
1389
execSTART_COPYREF(Signal * signal)1390 void Dbdih::execSTART_COPYREF(Signal* signal)
1391 {
1392 jamEntry();
1393 ndbrequire(false);
1394 }//Dbdih::execSTART_COPYREF()
1395
execSTART_FRAGCONF(Signal * signal)1396 void Dbdih::execSTART_FRAGCONF(Signal* signal)
1397 {
1398 (void)signal; // Don't want compiler warning
1399 /* ********************************************************************* */
1400 /* If anyone wants to add functionality in this method, be aware that */
1401 /* for temporary tables no START_FRAGREQ is sent and therefore no */
1402 /* START_FRAGCONF signal will be received for those tables!! */
1403 /* ********************************************************************* */
1404 jamEntry();
1405 return;
1406 }//Dbdih::execSTART_FRAGCONF()
1407
execSTART_FRAGREF(Signal * signal)1408 void Dbdih::execSTART_FRAGREF(Signal* signal)
1409 {
1410 jamEntry();
1411
1412 /**
1413 * Kill starting node
1414 */
1415 Uint32 errCode = signal->theData[1];
1416 Uint32 nodeId = signal->theData[2];
1417
1418 SystemError * const sysErr = (SystemError*)&signal->theData[0];
1419 sysErr->errorCode = SystemError::StartFragRefError;
1420 sysErr->errorRef = reference();
1421 sysErr->data[0] = errCode;
1422 sysErr->data[1] = 0;
1423 sendSignal(calcNdbCntrBlockRef(nodeId), GSN_SYSTEM_ERROR, signal,
1424 SystemError::SignalLength, JBB);
1425 return;
1426 }//Dbdih::execSTART_FRAGCONF()
1427
execSTART_MEREF(Signal * signal)1428 void Dbdih::execSTART_MEREF(Signal* signal)
1429 {
1430 jamEntry();
1431 ndbrequire(false);
1432 }//Dbdih::execSTART_MEREF()
1433
execTAB_COMMITREQ(Signal * signal)1434 void Dbdih::execTAB_COMMITREQ(Signal* signal)
1435 {
1436 TabRecordPtr tabPtr;
1437 jamEntry();
1438 Uint32 tdictPtr = signal->theData[0];
1439 BlockReference tdictBlockref = signal->theData[1];
1440 tabPtr.i = signal->theData[2];
1441 ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
1442
1443 ndbrequire(tabPtr.p->tabStatus == TabRecord::TS_CREATING);
1444 tabPtr.p->tabStatus = TabRecord::TS_ACTIVE;
1445 tabPtr.p->schemaTransId = 0;
1446 signal->theData[0] = tdictPtr;
1447 signal->theData[1] = cownNodeId;
1448 signal->theData[2] = tabPtr.i;
1449 sendSignal(tdictBlockref, GSN_TAB_COMMITCONF, signal, 3, JBB);
1450 return;
1451 }//Dbdih::execTAB_COMMITREQ()
1452
1453 /*
1454 3.2 S T A N D A R D S U B P R O G R A M S I N P L E X
1455 *************************************************************
1456 */
1457 /*
1458 3.2.1 S T A R T / R E S T A R T
1459 **********************************
1460 */
1461 /*****************************************************************************/
1462 /* ********** START / RESTART MODULE *************/
1463 /*****************************************************************************/
1464 /*
1465 3.2.1.1 LOADING O W N B L O C K R E F E R E N C E (ABSOLUTE PHASE 1)
1466 *****************************************************************************
1467 */
execDIH_RESTARTREQ(Signal * signal)1468 void Dbdih::execDIH_RESTARTREQ(Signal* signal)
1469 {
1470 jamEntry();
1471 const DihRestartReq* req = CAST_CONSTPTR(DihRestartReq,
1472 signal->getDataPtr());
1473 if (req->senderRef != 0)
1474 {
1475 jam();
1476 cntrlblockref = req->senderRef;
1477 if(m_ctx.m_config.getInitialStart())
1478 {
1479 sendDihRestartRef(signal);
1480 } else {
1481 readGciFileLab(signal);
1482 }
1483 }
1484 else
1485 {
1486 /**
1487 * Precondition, (not checked)
1488 * atleast 1 node in each node group
1489 */
1490 Uint32 i;
1491 NdbNodeBitmask mask;
1492 mask.assign(NdbNodeBitmask::Size, req->nodemask);
1493 const Uint32 *node_gcis = req->node_gcis;
1494 Uint32 node_group_gcis[MAX_NDB_NODES+1];
1495 memset(node_group_gcis, 0, sizeof(node_group_gcis));
1496 for (i = 0; i<MAX_NDB_NODES; i++)
1497 {
1498 if (mask.get(i))
1499 {
1500 jam();
1501 Uint32 ng = Sysfile::getNodeGroup(i, SYSFILE->nodeGroups);
1502 if (ng != NO_NODE_GROUP_ID)
1503 {
1504 ndbrequire(ng < MAX_NDB_NODES);
1505 Uint32 gci = node_gcis[i];
1506 if (gci < SYSFILE->lastCompletedGCI[i])
1507 {
1508 jam();
1509 /**
1510 * Handle case, where *I* know that node complete GCI
1511 * but node does not...bug#29167
1512 * i.e node died before it wrote own sysfile
1513 */
1514 gci = SYSFILE->lastCompletedGCI[i];
1515 }
1516
1517 if (gci > node_group_gcis[ng])
1518 {
1519 jam();
1520 node_group_gcis[ng] = gci;
1521 }
1522 }
1523 }
1524 }
1525 for (i = 0; i<MAX_NDB_NODES && node_group_gcis[i] == 0; i++);
1526
1527 Uint32 gci = node_group_gcis[i];
1528 for (i++ ; i<MAX_NDB_NODES; i++)
1529 {
1530 jam();
1531 if (node_group_gcis[i] && node_group_gcis[i] != gci)
1532 {
1533 jam();
1534 signal->theData[0] = i;
1535 return;
1536 }
1537 }
1538 signal->theData[0] = MAX_NDB_NODES;
1539 return;
1540 }
1541 return;
1542 }//Dbdih::execDIH_RESTARTREQ()
1543
execSTTOR(Signal * signal)1544 void Dbdih::execSTTOR(Signal* signal)
1545 {
1546 jamEntry();
1547
1548 Callback c = { safe_cast(&Dbdih::sendSTTORRY), 0 };
1549 m_sendSTTORRY = c;
1550
1551 switch(signal->theData[1]){
1552 case 1:
1553 createMutexes(signal, 0);
1554 return;
1555 case 2:
1556 break;
1557 case 3:
1558 signal->theData[0] = reference();
1559 sendSignal(NDBCNTR_REF, GSN_READ_NODESREQ, signal, 1, JBB);
1560 return;
1561 }
1562
1563 sendSTTORRY(signal);
1564 }//Dbdih::execSTTOR()
1565
1566 void
sendSTTORRY(Signal * signal,Uint32 senderData,Uint32 retVal)1567 Dbdih::sendSTTORRY(Signal* signal, Uint32 senderData, Uint32 retVal)
1568 {
1569 signal->theData[0] = 0;
1570 signal->theData[1] = 0;
1571 signal->theData[2] = 0;
1572 signal->theData[3] = 1; // Next start phase
1573 signal->theData[4] = 2; // Next start phase
1574 signal->theData[5] = 3;
1575 signal->theData[6] = 255; // Next start phase
1576 sendSignal(NDBCNTR_REF, GSN_STTORRY, signal, 7, JBB);
1577 return;
1578 }
1579
initialStartCompletedLab(Signal * signal)1580 void Dbdih::initialStartCompletedLab(Signal* signal)
1581 {
1582 /*-------------------------------------------------------------------------*/
1583 /* NOW THAT (RE)START IS COMPLETED WE CAN START THE LCP.*/
1584 /*-------------------------------------------------------------------------*/
1585 return;
1586 }//Dbdih::initialStartCompletedLab()
1587
1588 /*
1589 * ***************************************************************************
1590 * S E N D I N G R E P L Y T O S T A R T / R E S T A R T R E Q U E S T S
1591 * ****************************************************************************
1592 */
ndbsttorry10Lab(Signal * signal,Uint32 _line)1593 void Dbdih::ndbsttorry10Lab(Signal* signal, Uint32 _line)
1594 {
1595 /*-------------------------------------------------------------------------*/
1596 // AN NDB START PHASE HAS BEEN COMPLETED. WHEN START PHASE 6 IS COMPLETED WE
1597 // RECORD THAT THE SYSTEM IS RUNNING.
1598 /*-------------------------------------------------------------------------*/
1599 signal->theData[0] = reference();
1600 sendSignal(cntrlblockref, GSN_NDB_STTORRY, signal, 1, JBB);
1601 return;
1602 }//Dbdih::ndbsttorry10Lab()
1603
1604 /*
1605 ****************************************
1606 I N T E R N A L P H A S E S
1607 ****************************************
1608 */
1609 /*---------------------------------------------------------------------------*/
1610 /*NDB_STTOR START SIGNAL AT START/RESTART */
1611 /*---------------------------------------------------------------------------*/
execNDB_STTOR(Signal * signal)1612 void Dbdih::execNDB_STTOR(Signal* signal)
1613 {
1614 jamEntry();
1615 BlockReference cntrRef = signal->theData[0]; /* SENDERS BLOCK REFERENCE */
1616 Uint32 ownNodeId = signal->theData[1]; /* OWN PROCESSOR ID*/
1617 Uint32 phase = signal->theData[2]; /* INTERNAL START PHASE*/
1618 Uint32 typestart = signal->theData[3];
1619
1620 cstarttype = typestart;
1621 cstartPhase = phase;
1622
1623 switch (phase){
1624 case ZNDB_SPH1:
1625 jam();
1626 /*----------------------------------------------------------------------*/
1627 /* Set the delay between local checkpoints in ndb startphase 1. */
1628 /*----------------------------------------------------------------------*/
1629 cownNodeId = ownNodeId;
1630 /*-----------------------------------------------------------------------*/
1631 // Compute all static block references in this node as part of
1632 // ndb start phase 1.
1633 /*-----------------------------------------------------------------------*/
1634 cntrlblockref = cntrRef;
1635 clocaltcblockref = calcTcBlockRef(ownNodeId);
1636 clocallqhblockref = calcLqhBlockRef(ownNodeId);
1637 cdictblockref = calcDictBlockRef(ownNodeId);
1638 ndbsttorry10Lab(signal, __LINE__);
1639 break;
1640
1641 case ZNDB_SPH2:
1642 jam();
1643 /*-----------------------------------------------------------------------*/
1644 // Set the number of replicas, maximum is 4 replicas.
1645 // Read the ndb nodes from the configuration.
1646 /*-----------------------------------------------------------------------*/
1647
1648 /*-----------------------------------------------------------------------*/
1649 // For node restarts we will also add a request for permission
1650 // to continue the system restart.
1651 // The permission is given by the master node in the alive set.
1652 /*-----------------------------------------------------------------------*/
1653 if (cstarttype == NodeState::ST_INITIAL_NODE_RESTART)
1654 {
1655 jam();
1656 c_set_initial_start_flag = TRUE; // In sysfile...
1657 }
1658
1659 if (cstarttype == NodeState::ST_INITIAL_START) {
1660 jam();
1661 // setInitialActiveStatus is moved into makeNodeGroups
1662 } else if (cstarttype == NodeState::ST_SYSTEM_RESTART) {
1663 jam();
1664 /*empty*/;
1665 } else if ((cstarttype == NodeState::ST_NODE_RESTART) ||
1666 (cstarttype == NodeState::ST_INITIAL_NODE_RESTART)) {
1667 jam();
1668 nodeRestartPh2Lab(signal);
1669 return;
1670 } else {
1671 ndbrequire(false);
1672 }//if
1673 ndbsttorry10Lab(signal, __LINE__);
1674 return;
1675
1676 case ZNDB_SPH3:
1677 jam();
1678 /*-----------------------------------------------------------------------*/
1679 // Non-master nodes performing an initial start will execute
1680 // the start request here since the
1681 // initial start do not synchronise so much from the master.
1682 // In the master nodes the start
1683 // request will be sent directly to dih (in ndb_startreq) when all
1684 // nodes have completed phase 3 of the start.
1685 /*-----------------------------------------------------------------------*/
1686 cmasterState = MASTER_IDLE;
1687 if(cstarttype == NodeState::ST_INITIAL_START ||
1688 cstarttype == NodeState::ST_SYSTEM_RESTART){
1689 jam();
1690 cmasterState = isMaster() ? MASTER_ACTIVE : MASTER_IDLE;
1691 }
1692 if (!isMaster() && cstarttype == NodeState::ST_INITIAL_START) {
1693 jam();
1694 ndbStartReqLab(signal, cntrRef);
1695 return;
1696 }//if
1697 ndbsttorry10Lab(signal, __LINE__);
1698 break;
1699
1700 case ZNDB_SPH4:
1701 jam();
1702 c_lcpState.setLcpStatus(LCP_STATUS_IDLE, __LINE__);
1703 cmasterTakeOverNode = ZNIL;
1704 switch(typestart){
1705 case NodeState::ST_INITIAL_START:
1706 jam();
1707 ndbsttorry10Lab(signal, __LINE__);
1708 return;
1709 case NodeState::ST_SYSTEM_RESTART:
1710 jam();
1711 ndbsttorry10Lab(signal, __LINE__);
1712 return;
1713 case NodeState::ST_INITIAL_NODE_RESTART:
1714 case NodeState::ST_NODE_RESTART:
1715 jam();
1716
1717 /***********************************************************************
1718 * When starting nodes while system is operational we must be controlled
1719 * by the master since only one node restart is allowed at a time.
1720 * When this signal is confirmed the master has also copied the
1721 * dictionary and the distribution information.
1722 */
1723 StartMeReq * req = (StartMeReq*)&signal->theData[0];
1724 req->startingRef = reference();
1725 req->startingVersion = 0; // Obsolete
1726 sendSignal(cmasterdihref, GSN_START_MEREQ, signal,
1727 StartMeReq::SignalLength, JBB);
1728 return;
1729 }
1730 ndbrequire(false);
1731 break;
1732 case ZNDB_SPH5:
1733 jam();
1734 if (m_gcp_monitor.m_micro_gcp.m_max_lag > 0)
1735 {
1736 infoEvent("GCP Monitor: Computed max GCP_SAVE lag to %u seconds",
1737 m_gcp_monitor.m_gcp_save.m_max_lag / 10);
1738 infoEvent("GCP Monitor: Computed max GCP_COMMIT lag to %u seconds",
1739 m_gcp_monitor.m_micro_gcp.m_max_lag / 10);
1740 }
1741 else
1742 {
1743 infoEvent("GCP Monitor: unlimited lags allowed");
1744 }
1745 switch(typestart){
1746 case NodeState::ST_INITIAL_START:
1747 case NodeState::ST_SYSTEM_RESTART:
1748 jam();
1749 jam();
1750 /*---------------------------------------------------------------------*/
1751 // WE EXECUTE A LOCAL CHECKPOINT AS A PART OF A SYSTEM RESTART.
1752 // THE IDEA IS THAT WE NEED TO
1753 // ENSURE THAT WE CAN RECOVER FROM PROBLEMS CAUSED BY MANY NODE
1754 // CRASHES THAT CAUSES THE LOG
1755 // TO GROW AND THE NUMBER OF LOG ROUNDS TO EXECUTE TO GROW.
1756 // THIS CAN OTHERWISE GET US INTO
1757 // A SITUATION WHICH IS UNREPAIRABLE. THUS WE EXECUTE A CHECKPOINT
1758 // BEFORE ALLOWING ANY TRANSACTIONS TO START.
1759 /*---------------------------------------------------------------------*/
1760 if (!isMaster()) {
1761 jam();
1762 ndbsttorry10Lab(signal, __LINE__);
1763 return;
1764 }//if
1765
1766 c_lcpState.immediateLcpStart = true;
1767 cwaitLcpSr = true;
1768 checkLcpStart(signal, __LINE__);
1769 return;
1770 case NodeState::ST_NODE_RESTART:
1771 case NodeState::ST_INITIAL_NODE_RESTART:
1772 jam();
1773 {
1774 StartCopyReq* req = (StartCopyReq*)signal->getDataPtrSend();
1775 req->senderRef = reference();
1776 req->senderData = RNIL;
1777 req->flags = StartCopyReq::WAIT_LCP;
1778 req->startingNodeId = getOwnNodeId();
1779 if (!ndb_pnr(getNodeInfo(refToNode(cmasterdihref)).m_version))
1780 {
1781 jam();
1782 infoEvent("Detecting upgrade: Master(%u) does not support parallel node recovery",
1783 refToNode(cmasterdihref));
1784 sendSignal(cmasterdihref, GSN_START_COPYREQ, signal,
1785 StartCopyReq::SignalLength, JBB);
1786 }
1787 else
1788 {
1789 sendSignal(reference(), GSN_START_COPYREQ, signal,
1790 StartCopyReq::SignalLength, JBB);
1791 }
1792 }
1793 return;
1794 }
1795 ndbrequire(false);
1796 case ZNDB_SPH6:
1797 jam();
1798 switch(typestart){
1799 case NodeState::ST_INITIAL_START:
1800 case NodeState::ST_SYSTEM_RESTART:
1801 jam();
1802 if(isMaster()){
1803 jam();
1804 startGcp(signal);
1805 }
1806 ndbsttorry10Lab(signal, __LINE__);
1807 return;
1808 case NodeState::ST_NODE_RESTART:
1809 case NodeState::ST_INITIAL_NODE_RESTART:
1810 ndbsttorry10Lab(signal, __LINE__);
1811 return;
1812 }
1813 ndbrequire(false);
1814 break;
1815 default:
1816 jam();
1817 ndbsttorry10Lab(signal, __LINE__);
1818 break;
1819 }//switch
1820 }//Dbdih::execNDB_STTOR()
1821
1822 void
execNODE_START_REP(Signal * signal)1823 Dbdih::execNODE_START_REP(Signal* signal)
1824 {
1825 /*
1826 * Send DICT_UNLOCK_ORD when this node is SL_STARTED.
1827 *
1828 * Sending it before (sp 7) conflicts with code which assumes
1829 * SL_STARTING means we are in copy phase of NR.
1830 *
1831 * NodeState::starting.restartType is not supposed to be used
1832 * when SL_STARTED. Also it seems NODE_START_REP can arrive twice.
1833 *
1834 * For these reasons there are no consistency checks and
1835 * we rely on c_dictLockSlavePtrI_nodeRestart alone.
1836 */
1837 if (signal->theData[0] == getOwnNodeId())
1838 {
1839 /**
1840 * With parallel node restart, only unlock self, if it's self that has
1841 * started
1842 */
1843 jam();
1844 if (c_dictLockSlavePtrI_nodeRestart != RNIL) {
1845 sendDictUnlockOrd(signal, c_dictLockSlavePtrI_nodeRestart);
1846 c_dictLockSlavePtrI_nodeRestart = RNIL;
1847 }
1848 }
1849 }
1850
1851 void
createMutexes(Signal * signal,Uint32 count)1852 Dbdih::createMutexes(Signal * signal, Uint32 count){
1853 Callback c = { safe_cast(&Dbdih::createMutex_done), count };
1854
1855 switch(count){
1856 case 0:{
1857 Mutex mutex(signal, c_mutexMgr, c_startLcpMutexHandle);
1858 mutex.create(c);
1859 return;
1860 }
1861 case 1:{
1862 Mutex mutex(signal, c_mutexMgr, c_switchPrimaryMutexHandle);
1863 mutex.create(c);
1864 return;
1865 }
1866 case 2:{
1867 Mutex mutex(signal, c_mutexMgr, c_fragmentInfoMutex_lcp);
1868 mutex.create(c);
1869 return;
1870 }
1871 }
1872
1873 execute(signal, m_sendSTTORRY, 0);
1874 }
1875
1876 void
createMutex_done(Signal * signal,Uint32 senderData,Uint32 retVal)1877 Dbdih::createMutex_done(Signal* signal, Uint32 senderData, Uint32 retVal){
1878 jamEntry();
1879 ndbrequire(retVal == 0);
1880
1881 switch(senderData){
1882 case 0:{
1883 Mutex mutex(signal, c_mutexMgr, c_startLcpMutexHandle);
1884 mutex.release();
1885 break;
1886 }
1887 case 1:{
1888 Mutex mutex(signal, c_mutexMgr, c_switchPrimaryMutexHandle);
1889 mutex.release();
1890 break;
1891 }
1892 case 2:{
1893 Mutex mutex(signal, c_mutexMgr, c_fragmentInfoMutex_lcp);
1894 mutex.release();
1895 break;
1896 }
1897 }
1898
1899 createMutexes(signal, senderData + 1);
1900 }
1901
1902 /*****************************************************************************/
1903 /* ------------------------------------------------------------------------- */
1904 /* WE HAVE BEEN REQUESTED BY NDBCNTR TO PERFORM A RESTART OF THE */
1905 /* DATABASE TABLES. */
1906 /* THIS SIGNAL IS SENT AFTER COMPLETING PHASE 3 IN ALL BLOCKS IN A */
1907 /* SYSTEM RESTART. WE WILL ALSO JUMP TO THIS LABEL FROM PHASE 3 IN AN */
1908 /* INITIAL START. */
1909 /* ------------------------------------------------------------------------- */
1910 /*****************************************************************************/
execNDB_STARTREQ(Signal * signal)1911 void Dbdih::execNDB_STARTREQ(Signal* signal)
1912 {
1913 jamEntry();
1914 BlockReference ref = signal->theData[0];
1915 cstarttype = signal->theData[1];
1916 ndbStartReqLab(signal, ref);
1917 }//Dbdih::execNDB_STARTREQ()
1918
ndbStartReqLab(Signal * signal,BlockReference ref)1919 void Dbdih::ndbStartReqLab(Signal* signal, BlockReference ref)
1920 {
1921 cndbStartReqBlockref = ref;
1922 if (cstarttype == NodeState::ST_INITIAL_START) {
1923 jam();
1924 initRestartInfo(signal);
1925 initGciFilesLab(signal);
1926 return;
1927 }
1928
1929 NodeRecordPtr nodePtr;
1930 Uint32 gci = SYSFILE->lastCompletedGCI[getOwnNodeId()];
1931 for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++)
1932 {
1933 jam();
1934 ptrAss(nodePtr, nodeRecord);
1935 if (SYSFILE->lastCompletedGCI[nodePtr.i] > gci)
1936 {
1937 jam();
1938 /**
1939 * Since we're starting(is master) and there
1940 * there are other nodes with higher GCI...
1941 * there gci's must be invalidated...
1942 * and they _must_ do an initial start
1943 * indicate this by setting lastCompletedGCI = 0
1944 */
1945 SYSFILE->lastCompletedGCI[nodePtr.i] = 0;
1946 ndbrequire(nodePtr.p->nodeStatus != NodeRecord::ALIVE);
1947 warningEvent("Making filesystem for node %d unusable (need --initial)",
1948 nodePtr.i);
1949 }
1950 else if (nodePtr.p->nodeStatus == NodeRecord::ALIVE &&
1951 SYSFILE->lastCompletedGCI[nodePtr.i] == 0)
1952 {
1953 jam();
1954 CRASH_INSERTION(7170);
1955 char buf[255];
1956 BaseString::snprintf(buf, sizeof(buf),
1957 "Cluster requires this node to be started "
1958 " with --initial as partial start has been performed"
1959 " and this filesystem is unusable");
1960 progError(__LINE__,
1961 NDBD_EXIT_SR_RESTARTCONFLICT,
1962 buf);
1963 ndbrequire(false);
1964 }
1965 }
1966
1967 /**
1968 * This set which GCI we will try to restart to
1969 */
1970 SYSFILE->newestRestorableGCI = gci;
1971 infoEvent("Restarting cluster to GCI: %u", gci);
1972
1973 ndbrequire(isMaster());
1974 copyGciLab(signal, CopyGCIReq::RESTART); // We have already read the file!
1975 }//Dbdih::ndbStartReqLab()
1976
execREAD_NODESCONF(Signal * signal)1977 void Dbdih::execREAD_NODESCONF(Signal* signal)
1978 {
1979 unsigned i;
1980 ReadNodesConf * const readNodes = (ReadNodesConf *)&signal->theData[0];
1981 jamEntry();
1982 Uint32 nodeArray[MAX_NDB_NODES+1];
1983
1984 csystemnodes = readNodes->noOfNodes;
1985 cmasterNodeId = readNodes->masterNodeId;
1986 unsigned index = 0;
1987 NdbNodeBitmask tmp; tmp.assign(2, readNodes->allNodes);
1988 for (i = 1; i < MAX_NDB_NODES; i++){
1989 jam();
1990 if(tmp.get(i)){
1991 jam();
1992 nodeArray[index] = i;
1993 if(NdbNodeBitmask::get(readNodes->inactiveNodes, i) == false){
1994 jam();
1995 con_lineNodes++;
1996 }//if
1997 index++;
1998 }//if
1999 }//for
2000 nodeArray[index] = RNIL; // terminate
2001
2002 if (c_2pass_inr)
2003 {
2004 jam();
2005 Uint32 workers = getNodeInfo(getOwnNodeId()).m_lqh_workers;
2006 printf("Checking 2-pass initial node restart: ");
2007 for (i = 0; i<index; i++)
2008 {
2009 if (NdbNodeBitmask::get(readNodes->inactiveNodes, nodeArray[i]))
2010 continue;
2011
2012 if (!ndbd_non_trans_copy_frag_req(getNodeInfo(nodeArray[i]).m_version))
2013 {
2014 jam();
2015 c_2pass_inr = false;
2016 printf("not ok (version node %u) => disabled\n", nodeArray[i]);
2017 break;
2018 }
2019
2020 if (workers > 1 &&
2021 workers != getNodeInfo(nodeArray[i]).m_lqh_workers)
2022 {
2023 c_2pass_inr = false;
2024 printf("not ok (different worker cnt node %u) => disabled\n",
2025 nodeArray[i]);
2026 break;
2027 }
2028 }
2029 if (c_2pass_inr)
2030 printf("ok\n");
2031
2032 /**
2033 * Note: In theory it would be ok for just nodes that we plan to copy from
2034 * supported this...but in e.g a 3/4-replica scenario,
2035 * if one of the nodes does, and the other doesnt, we don't
2036 * have enought infrastructure to easily check this...
2037 * therefor we require all nodes to support it.
2038 */
2039 }
2040
2041 if(cstarttype == NodeState::ST_SYSTEM_RESTART ||
2042 cstarttype == NodeState::ST_NODE_RESTART)
2043 {
2044
2045 for(i = 1; i<MAX_NDB_NODES; i++){
2046 const Uint32 stat = Sysfile::getNodeStatus(i, SYSFILE->nodeStatus);
2047 if(stat == Sysfile::NS_NotDefined && !tmp.get(i))
2048 {
2049 jam();
2050 continue;
2051 }
2052
2053 if(tmp.get(i) && stat != Sysfile::NS_NotDefined)
2054 {
2055 jam();
2056 continue;
2057 }
2058
2059 if (stat == Sysfile::NS_NotDefined && tmp.get(i))
2060 {
2061 jam();
2062 infoEvent("Discovered new node %u", i);
2063 continue;
2064 }
2065
2066 if (stat == Sysfile::NS_Configured && !tmp.get(i))
2067 {
2068 jam();
2069 infoEvent("Configured node %u not present, ignoring",
2070 i);
2071 continue;
2072 }
2073
2074 char buf[255];
2075 BaseString::snprintf(buf, sizeof(buf),
2076 "Illegal configuration change."
2077 " Initial start needs to be performed "
2078 " when removing nodes with nodegroup (node %d)", i);
2079 progError(__LINE__, NDBD_EXIT_INVALID_CONFIG, buf);
2080 }
2081 }
2082
2083 ndbrequire(csystemnodes >= 1 && csystemnodes < MAX_NDB_NODES);
2084
2085 cmasterdihref = calcDihBlockRef(cmasterNodeId);
2086 /*-------------------------------------------------------------------------*/
2087 /* MAKE THE LIST OF PRN-RECORD WHICH IS ONE OF THE NODES-LIST IN THIS BLOCK*/
2088 /*-------------------------------------------------------------------------*/
2089 makePrnList(readNodes, nodeArray);
2090 if (cstarttype == NodeState::ST_INITIAL_START) {
2091 jam();
2092 /**----------------------------------------------------------------------
2093 * WHEN WE INITIALLY START A DATABASE WE WILL CREATE NODE GROUPS.
2094 * ALL NODES ARE PUT INTO NODE GROUPS ALTHOUGH HOT SPARE NODES ARE PUT
2095 * INTO A SPECIAL NODE GROUP. IN EACH NODE GROUP WE HAVE THE SAME AMOUNT
2096 * OF NODES AS THERE ARE NUMBER OF REPLICAS.
2097 * ONE POSSIBLE USAGE OF NODE GROUPS ARE TO MAKE A NODE GROUP A COMPLETE
2098 * FRAGMENT OF THE DATABASE. THIS MEANS THAT ALL REPLICAS WILL BE STORED
2099 * IN THE NODE GROUP.
2100 *-----------------------------------------------------------------------*/
2101 makeNodeGroups(nodeArray);
2102 }//if
2103 ndbrequire(checkNodeAlive(cmasterNodeId));
2104
2105 /**
2106 * Keep bitmap of nodes that can be restored...
2107 * and nodes that need take-over
2108 *
2109 */
2110 m_sr_nodes.clear();
2111 m_to_nodes.clear();
2112
2113 // Start with assumption that all can restore
2114 {
2115 NodeRecordPtr specNodePtr;
2116 specNodePtr.i = cfirstAliveNode;
2117 do {
2118 jam();
2119 m_sr_nodes.set(specNodePtr.i);
2120 ptrCheckGuard(specNodePtr, MAX_NDB_NODES, nodeRecord);
2121 specNodePtr.i = specNodePtr.p->nextNode;
2122 } while (specNodePtr.i != RNIL);
2123 }
2124
2125 execute(signal, m_sendSTTORRY, 0);
2126 }//Dbdih::execREAD_NODESCONF()
2127
2128 /*---------------------------------------------------------------------------*/
2129 /* START NODE LOGIC FOR NODE RESTART */
2130 /*---------------------------------------------------------------------------*/
nodeRestartPh2Lab(Signal * signal)2131 void Dbdih::nodeRestartPh2Lab(Signal* signal)
2132 {
2133 /*
2134 * Lock master DICT to avoid metadata operations during INR/NR.
2135 * Done just before START_PERMREQ.
2136 *
2137 * It would be more elegant to do this just before START_MEREQ.
2138 * The problem is, on INR we end up in massive invalidateNodeLCP
2139 * which is not fully protected against metadata ops.
2140 */
2141 ndbrequire(c_dictLockSlavePtrI_nodeRestart == RNIL);
2142
2143 // check that we are not yet taking part in schema ops
2144 CRASH_INSERTION(7174);
2145
2146 Uint32 lockType = DictLockReq::NodeRestartLock;
2147 Callback c = { safe_cast(&Dbdih::recvDictLockConf_nodeRestart), 0 };
2148 sendDictLockReq(signal, lockType, c);
2149 }
2150
recvDictLockConf_nodeRestart(Signal * signal,Uint32 data,Uint32 ret)2151 void Dbdih::recvDictLockConf_nodeRestart(Signal* signal, Uint32 data, Uint32 ret)
2152 {
2153 ndbrequire(c_dictLockSlavePtrI_nodeRestart == RNIL);
2154 ndbrequire(data != RNIL);
2155 c_dictLockSlavePtrI_nodeRestart = data;
2156
2157 nodeRestartPh2Lab2(signal);
2158 }
2159
nodeRestartPh2Lab2(Signal * signal)2160 void Dbdih::nodeRestartPh2Lab2(Signal* signal)
2161 {
2162 /*------------------------------------------------------------------------*/
2163 // REQUEST FOR PERMISSION FROM MASTER TO START A NODE IN AN ALREADY
2164 // RUNNING SYSTEM.
2165 /*------------------------------------------------------------------------*/
2166 StartPermReq * const req = (StartPermReq *)&signal->theData[0];
2167
2168 req->blockRef = reference();
2169 req->nodeId = cownNodeId;
2170 req->startType = cstarttype;
2171 sendSignal(cmasterdihref, GSN_START_PERMREQ, signal, 3, JBB);
2172
2173 if (ERROR_INSERTED(7203))
2174 {
2175 signal->theData[0] = 9999;
2176 sendSignalWithDelay(CMVMI_REF, GSN_NDB_TAMPER, signal, 200, 1);
2177 }
2178 }
2179
execSTART_PERMCONF(Signal * signal)2180 void Dbdih::execSTART_PERMCONF(Signal* signal)
2181 {
2182 jamEntry();
2183 CRASH_INSERTION(7121);
2184 Uint32 nodeId = signal->theData[0];
2185 cfailurenr = signal->theData[1];
2186
2187 bool microGCP = signal->theData[2];
2188 if (signal->getLength() < StartPermConf::SignalLength)
2189 {
2190 microGCP = false;
2191 }
2192 m_micro_gcp.m_enabled = microGCP;
2193 ndbrequire(nodeId == cownNodeId);
2194 ndbsttorry10Lab(signal, __LINE__);
2195
2196 if (m_micro_gcp.m_enabled)
2197 {
2198 jam();
2199 UpgradeProtocolOrd * ord = (UpgradeProtocolOrd*)signal->getDataPtrSend();
2200 ord->type = UpgradeProtocolOrd::UPO_ENABLE_MICRO_GCP;
2201 EXECUTE_DIRECT(QMGR,GSN_UPGRADE_PROTOCOL_ORD,signal,signal->getLength());
2202 }
2203 else if(isMultiThreaded())
2204 {
2205 /**
2206 * Prevent this start, as there is some non-thread-safe upgrade code for
2207 * this case in LQH.
2208 */
2209 progError(__LINE__, NDBD_EXIT_SR_RESTARTCONFLICT,
2210 "Cluster requires that all old data nodes are upgraded "
2211 "while running single-threaded ndbd before starting "
2212 "multi-threaded ndbmtd data nodes.");
2213 }
2214 }//Dbdih::execSTART_PERMCONF()
2215
execSTART_PERMREF(Signal * signal)2216 void Dbdih::execSTART_PERMREF(Signal* signal)
2217 {
2218 jamEntry();
2219 Uint32 errorCode = signal->theData[1];
2220 if (errorCode == StartPermRef::ZNODE_ALREADY_STARTING_ERROR ||
2221 errorCode == StartPermRef::ZNODE_START_DISALLOWED_ERROR) {
2222 jam();
2223 /*-----------------------------------------------------------------------*/
2224 // The master was busy adding another node. We will wait for a second and
2225 // try again.
2226 /*-----------------------------------------------------------------------*/
2227 infoEvent("Did not get permission to start (%u) retry in 3s",
2228 errorCode);
2229 signal->theData[0] = DihContinueB::ZSTART_PERMREQ_AGAIN;
2230 sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 3000, 1);
2231 return;
2232 }//if
2233
2234 if (errorCode == StartPermRef::InitialStartRequired)
2235 {
2236 CRASH_INSERTION(7170);
2237 char buf[255];
2238 BaseString::snprintf(buf, sizeof(buf),
2239 "Cluster requires this node to be started "
2240 " with --initial as partial start has been performed"
2241 " and this filesystem is unusable");
2242 progError(__LINE__,
2243 NDBD_EXIT_SR_RESTARTCONFLICT,
2244 buf);
2245 ndbrequire(false);
2246 }
2247
2248 /*------------------------------------------------------------------------*/
2249 // Some node process in another node involving our node was still active. We
2250 // will recover from this by crashing here.
2251 // This is controlled restart using the
2252 // already existing features of node crashes. It is not a bug getting here.
2253 /*-------------------------------------------------------------------------*/
2254 ndbrequire(false);
2255 return;
2256 }//Dbdih::execSTART_PERMREF()
2257
2258 /*---------------------------------------------------------------------------*/
2259 /* THIS SIGNAL IS RECEIVED IN THE STARTING NODE WHEN THE START_MEREQ */
2260 /* HAS BEEN EXECUTED IN THE MASTER NODE. */
2261 /*---------------------------------------------------------------------------*/
execSTART_MECONF(Signal * signal)2262 void Dbdih::execSTART_MECONF(Signal* signal)
2263 {
2264 jamEntry();
2265 StartMeConf * const startMe = (StartMeConf *)&signal->theData[0];
2266 Uint32 nodeId = startMe->startingNodeId;
2267 const Uint32 startWord = startMe->startWord;
2268 Uint32 i;
2269
2270 CRASH_INSERTION(7130);
2271 ndbrequire(nodeId == cownNodeId);
2272 arrGuard(startWord + StartMeConf::DATA_SIZE, sizeof(cdata)/4);
2273 for(i = 0; i < StartMeConf::DATA_SIZE; i++)
2274 cdata[startWord+i] = startMe->data[i];
2275
2276 if(startWord + StartMeConf::DATA_SIZE < Sysfile::SYSFILE_SIZE32){
2277 jam();
2278 /**
2279 * We are still waiting for data
2280 */
2281 return;
2282 }
2283 jam();
2284
2285 /**
2286 * Copy into sysfile
2287 *
2288 * But dont copy lastCompletedGCI:s
2289 */
2290 Uint32 key = SYSFILE->m_restart_seq;
2291 Uint32 tempGCP[MAX_NDB_NODES];
2292 for(i = 0; i < MAX_NDB_NODES; i++)
2293 tempGCP[i] = SYSFILE->lastCompletedGCI[i];
2294
2295 for(i = 0; i < Sysfile::SYSFILE_SIZE32; i++)
2296 sysfileData[i] = cdata[i];
2297
2298 SYSFILE->m_restart_seq = key;
2299 for(i = 0; i < MAX_NDB_NODES; i++)
2300 SYSFILE->lastCompletedGCI[i] = tempGCP[i];
2301
2302 setNodeActiveStatus();
2303 setNodeGroups();
2304 ndbsttorry10Lab(signal, __LINE__);
2305
2306 if (getNodeActiveStatus(getOwnNodeId()) == Sysfile::NS_Configured)
2307 {
2308 jam();
2309 c_set_initial_start_flag = FALSE;
2310 }
2311 }//Dbdih::execSTART_MECONF()
2312
execSTART_COPYCONF(Signal * signal)2313 void Dbdih::execSTART_COPYCONF(Signal* signal)
2314 {
2315 jamEntry();
2316
2317 StartCopyConf* conf = (StartCopyConf*)signal->getDataPtr();
2318 Uint32 nodeId = conf->startingNodeId;
2319 Uint32 senderData = conf->senderData;
2320
2321 if (!ndb_pnr(getNodeInfo(refToNode(signal->getSendersBlockRef())).m_version))
2322 {
2323 jam();
2324 senderData = RNIL;
2325 }
2326
2327 if (senderData == RNIL)
2328 {
2329 /**
2330 * This is NR
2331 */
2332 jam();
2333 ndbrequire(nodeId == cownNodeId);
2334 CRASH_INSERTION(7132);
2335 ndbsttorry10Lab(signal, __LINE__);
2336 }
2337 else
2338 {
2339 /**
2340 * This is TO during SR...waiting for all nodes
2341 */
2342 infoEvent("Take-over of %u complete", nodeId);
2343
2344 ndbrequire(senderData == getOwnNodeId());
2345 ndbrequire(m_to_nodes.get(nodeId));
2346 m_to_nodes.clear(nodeId);
2347 m_sr_nodes.set(nodeId);
2348 if (!m_to_nodes.isclear())
2349 {
2350 jam();
2351 return;
2352 }
2353
2354 signal->theData[0] = reference();
2355 m_sr_nodes.copyto(NdbNodeBitmask::Size, signal->theData+1);
2356 sendSignal(cntrlblockref, GSN_NDB_STARTCONF, signal,
2357 1 + NdbNodeBitmask::Size, JBB);
2358 return;
2359 }
2360 return;
2361 }//Dbdih::execSTART_COPYCONF()
2362
2363 /*---------------------------------------------------------------------------*/
2364 /* MASTER LOGIC FOR NODE RESTART */
2365 /*---------------------------------------------------------------------------*/
2366 /* NODE RESTART PERMISSION REQUEST */
2367 /*---------------------------------------------------------------------------*/
2368 // A REQUEST FROM A STARTING NODE TO PERFORM A NODE RESTART. IF NO OTHER NODE
2369 // IS ACTIVE IN PERFORMING A NODE RESTART AND THERE ARE NO ACTIVE PROCESSES IN
2370 // THIS NODE INVOLVING THE STARTING NODE THIS REQUEST WILL BE GRANTED.
2371 /*---------------------------------------------------------------------------*/
execSTART_PERMREQ(Signal * signal)2372 void Dbdih::execSTART_PERMREQ(Signal* signal)
2373 {
2374 StartPermReq * const req = (StartPermReq*)&signal->theData[0];
2375 jamEntry();
2376 const BlockReference retRef = req->blockRef;
2377 const Uint32 nodeId = req->nodeId;
2378 const Uint32 typeStart = req->startType;
2379 CRASH_INSERTION(7122);
2380 ndbrequire(isMaster());
2381 ndbrequire(refToNode(retRef) == nodeId);
2382 if ((c_nodeStartMaster.activeState) ||
2383 (c_nodeStartMaster.wait != ZFALSE) ||
2384 ERROR_INSERTED_CLEAR(7175)) {
2385 jam();
2386 signal->theData[0] = nodeId;
2387 signal->theData[1] = StartPermRef::ZNODE_ALREADY_STARTING_ERROR;
2388 sendSignal(retRef, GSN_START_PERMREF, signal, 2, JBB);
2389 return;
2390 }//if
2391
2392 if (!getAllowNodeStart(nodeId))
2393 {
2394 jam();
2395 ref:
2396 signal->theData[0] = nodeId;
2397 signal->theData[1] = StartPermRef::ZNODE_START_DISALLOWED_ERROR;
2398 sendSignal(retRef, GSN_START_PERMREF, signal, 2, JBB);
2399 return;
2400 }
2401 if (getNodeStatus(nodeId) != NodeRecord::DEAD)
2402 {
2403 jam();
2404 g_eventLogger->error("nodeStatus in START_PERMREQ = %u",
2405 (Uint32) getNodeStatus(nodeId));
2406 goto ref;
2407 }//if
2408
2409 if (SYSFILE->lastCompletedGCI[nodeId] == 0 &&
2410 typeStart != NodeState::ST_INITIAL_NODE_RESTART)
2411 {
2412 jam();
2413 signal->theData[0] = nodeId;
2414 signal->theData[1] = StartPermRef::InitialStartRequired;
2415 sendSignal(retRef, GSN_START_PERMREF, signal, 2, JBB);
2416 return;
2417 }
2418
2419 /*----------------------------------------------------------------------
2420 * WE START THE INCLUSION PROCEDURE
2421 * ---------------------------------------------------------------------*/
2422 c_nodeStartMaster.failNr = cfailurenr;
2423 c_nodeStartMaster.wait = ZFALSE;
2424 c_nodeStartMaster.startInfoErrorCode = 0;
2425 c_nodeStartMaster.startNode = nodeId;
2426 c_nodeStartMaster.activeState = true;
2427 c_nodeStartMaster.m_outstandingGsn = GSN_START_INFOREQ;
2428
2429 setNodeStatus(nodeId, NodeRecord::STARTING);
2430 /**
2431 * But if it's a NodeState::ST_INITIAL_NODE_RESTART
2432 *
2433 * We first have to clear LCP's
2434 * For normal node restart we simply ensure that all nodes
2435 * are informed of the node restart
2436 */
2437 StartInfoReq *const r =(StartInfoReq*)&signal->theData[0];
2438 r->startingNodeId = nodeId;
2439 r->typeStart = typeStart;
2440 r->systemFailureNo = cfailurenr;
2441 sendLoopMacro(START_INFOREQ, sendSTART_INFOREQ, RNIL);
2442 }//Dbdih::execSTART_PERMREQ()
2443
execSTART_INFOREF(Signal * signal)2444 void Dbdih::execSTART_INFOREF(Signal* signal)
2445 {
2446 StartInfoRef * ref = (StartInfoRef*)&signal->theData[0];
2447 if (getNodeStatus(ref->startingNodeId) != NodeRecord::STARTING) {
2448 jam();
2449 return;
2450 }//if
2451 ndbrequire(c_nodeStartMaster.startNode == ref->startingNodeId);
2452 c_nodeStartMaster.startInfoErrorCode = ref->errorCode;
2453 startInfoReply(signal, ref->sendingNodeId);
2454 }//Dbdih::execSTART_INFOREF()
2455
execSTART_INFOCONF(Signal * signal)2456 void Dbdih::execSTART_INFOCONF(Signal* signal)
2457 {
2458 jamEntry();
2459 StartInfoConf * conf = (StartInfoConf*)&signal->theData[0];
2460 if (getNodeStatus(conf->startingNodeId) != NodeRecord::STARTING) {
2461 jam();
2462 return;
2463 }//if
2464 ndbrequire(c_nodeStartMaster.startNode == conf->startingNodeId);
2465 startInfoReply(signal, conf->sendingNodeId);
2466 }//Dbdih::execSTART_INFOCONF()
2467
startInfoReply(Signal * signal,Uint32 nodeId)2468 void Dbdih::startInfoReply(Signal* signal, Uint32 nodeId)
2469 {
2470 receiveLoopMacro(START_INFOREQ, nodeId);
2471 /**
2472 * We're finished with the START_INFOREQ's
2473 */
2474 if (c_nodeStartMaster.startInfoErrorCode == 0) {
2475 jam();
2476 /**
2477 * Everything has been a success so far
2478 */
2479 StartPermConf * conf = (StartPermConf*)&signal->theData[0];
2480 conf->startingNodeId = c_nodeStartMaster.startNode;
2481 conf->systemFailureNo = cfailurenr;
2482 conf->microGCP = m_micro_gcp.m_enabled;
2483 sendSignal(calcDihBlockRef(c_nodeStartMaster.startNode),
2484 GSN_START_PERMCONF, signal, StartPermConf::SignalLength, JBB);
2485 c_nodeStartMaster.m_outstandingGsn = GSN_START_PERMCONF;
2486 } else {
2487 jam();
2488 StartPermRef * ref = (StartPermRef*)&signal->theData[0];
2489 ref->startingNodeId = c_nodeStartMaster.startNode;
2490 ref->errorCode = c_nodeStartMaster.startInfoErrorCode;
2491 sendSignal(calcDihBlockRef(c_nodeStartMaster.startNode),
2492 GSN_START_PERMREF, signal, StartPermRef::SignalLength, JBB);
2493 nodeResetStart(signal);
2494 }//if
2495 }//Dbdih::startInfoReply()
2496
2497 /*---------------------------------------------------------------------------*/
2498 /* NODE RESTART CONTINUE REQUEST */
2499 /*---------------------------------------------------------------------------*/
2500 // THIS SIGNAL AND THE CODE BELOW IS EXECUTED BY THE MASTER WHEN IT HAS BEEN
2501 // REQUESTED TO START UP A NEW NODE. The master instructs the starting node
2502 // how to set up its log for continued execution.
2503 /*---------------------------------------------------------------------------*/
execSTART_MEREQ(Signal * signal)2504 void Dbdih::execSTART_MEREQ(Signal* signal)
2505 {
2506 StartMeReq * req = (StartMeReq*)&signal->theData[0];
2507 jamEntry();
2508 const BlockReference Tblockref = req->startingRef;
2509 const Uint32 Tnodeid = refToNode(Tblockref);
2510
2511 ndbrequire(isMaster());
2512 ndbrequire(c_nodeStartMaster.startNode == Tnodeid);
2513 ndbrequire(getNodeStatus(Tnodeid) == NodeRecord::STARTING);
2514
2515 if (getNodeInfo(Tnodeid).m_version >= NDBD_COPY_GCI_RESTART_NR)
2516 {
2517 jam();
2518 /**
2519 * COPY sysfile to starting node here directly
2520 * to that it gets nodegroups early on
2521 */
2522
2523 /**
2524 * Note: only one node can be starting now, so we can use
2525 * c_nodeStartMaster.startNode for determening where to send
2526 */
2527 c_nodeStartMaster.m_outstandingGsn = GSN_COPY_GCIREQ;
2528 copyGciLab(signal, CopyGCIReq::RESTART_NR);
2529 }
2530 else
2531 {
2532 jam();
2533 startme_copygci_conf(signal);
2534 }
2535 }//Dbdih::nodeRestartStartRecConfLab()
2536
2537 void
startme_copygci_conf(Signal * signal)2538 Dbdih::startme_copygci_conf(Signal* signal)
2539 {
2540 jam();
2541 Callback c = { safe_cast(&Dbdih::lcpBlockedLab),
2542 c_nodeStartMaster.startNode };
2543 Mutex mutex(signal, c_mutexMgr, c_nodeStartMaster.m_fragmentInfoMutex);
2544 mutex.lock(c, true, true);
2545 }
2546
lcpBlockedLab(Signal * signal,Uint32 nodeId,Uint32 retVal)2547 void Dbdih::lcpBlockedLab(Signal* signal, Uint32 nodeId, Uint32 retVal)
2548 {
2549 jamEntry();
2550 if (c_nodeStartMaster.startNode != nodeId)
2551 {
2552 jam();
2553 if (retVal == 0 || retVal == UtilLockRef::InLockQueue)
2554 {
2555 infoEvent("Releasing table/fragment info lock for node %u", nodeId);
2556
2557 Mutex mutex(signal, c_mutexMgr, c_nodeStartMaster.m_fragmentInfoMutex);
2558 mutex.unlock();
2559 return;
2560 }
2561 return;
2562 }
2563
2564 if (retVal == UtilLockRef::InLockQueue)
2565 {
2566 jam();
2567 infoEvent("Node %u enqueued is waiting to copy table/fragment info",
2568 c_nodeStartMaster.startNode);
2569 return;
2570 }
2571
2572 ndbrequire(retVal == 0); // Mutex error
2573 ndbrequire(getNodeStatus(c_nodeStartMaster.startNode)==NodeRecord::STARTING);
2574 /*------------------------------------------------------------------------*/
2575 // NOW WE HAVE COPIED ALL INFORMATION IN DICT WE ARE NOW READY TO COPY ALL
2576 // INFORMATION IN DIH TO THE NEW NODE.
2577 /*------------------------------------------------------------------------*/
2578
2579 c_nodeStartMaster.wait = 10;
2580 signal->theData[0] = DihContinueB::ZCOPY_NODE;
2581 signal->theData[1] = 0;
2582 sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
2583 c_nodeStartMaster.m_outstandingGsn = GSN_COPY_TABREQ;
2584 }//Dbdih::lcpBlockedLab()
2585
nodeDictStartConfLab(Signal * signal)2586 void Dbdih::nodeDictStartConfLab(Signal* signal)
2587 {
2588 /*-----------------------------------------------------------------*/
2589 // Report that node restart has completed copy of dictionary.
2590 /*-----------------------------------------------------------------*/
2591 signal->theData[0] = NDB_LE_NR_CopyDict;
2592 sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 1, JBB);
2593
2594 /*-------------------------------------------------------------------------*/
2595 // NOW WE HAVE COPIED BOTH DIH AND DICT INFORMATION. WE ARE NOW READY TO
2596 // INTEGRATE THE NODE INTO THE LCP AND GCP PROTOCOLS AND TO ALLOW UPDATES OF
2597 // THE DICTIONARY AGAIN.
2598 /*-------------------------------------------------------------------------*/
2599 c_nodeStartMaster.wait = ZFALSE;
2600 c_nodeStartMaster.blockGcp = 1;
2601
2602 return;
2603 }//Dbdih::nodeDictStartConfLab()
2604
dihCopyCompletedLab(Signal * signal)2605 void Dbdih::dihCopyCompletedLab(Signal* signal)
2606 {
2607 BlockReference ref = calcDictBlockRef(c_nodeStartMaster.startNode);
2608 DictStartReq * req = (DictStartReq*)&signal->theData[0];
2609 req->restartGci = (Uint32)(m_micro_gcp.m_new_gci >> 32);
2610 req->senderRef = reference();
2611 sendSignal(ref, GSN_DICTSTARTREQ,
2612 signal, DictStartReq::SignalLength, JBB);
2613 c_nodeStartMaster.m_outstandingGsn = GSN_DICTSTARTREQ;
2614 c_nodeStartMaster.wait = 0;
2615 }//Dbdih::dihCopyCompletedLab()
2616
gcpBlockedLab(Signal * signal)2617 void Dbdih::gcpBlockedLab(Signal* signal)
2618 {
2619 /**
2620 * The node DIH will be part of LCP
2621 */
2622 NodeRecordPtr nodePtr;
2623 nodePtr.i = c_nodeStartMaster.startNode;
2624 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
2625 nodePtr.p->m_inclDihLcp = true;
2626
2627 /**
2628 * If node is new...this is the place to do things,
2629 * gcp+lcp is blocked
2630 */
2631 if (getNodeActiveStatus(nodePtr.i) == Sysfile::NS_NotDefined)
2632 {
2633 jam();
2634 infoEvent("Adding node %d to sysfile, NS_Configured",
2635 nodePtr.i);
2636 setNodeActiveStatus(nodePtr.i, Sysfile::NS_Configured);
2637 Sysfile::setNodeGroup(nodePtr.i, SYSFILE->nodeGroups,
2638 NO_NODE_GROUP_ID);
2639 Sysfile::setNodeStatus(nodePtr.i,
2640 SYSFILE->nodeStatus, Sysfile::NS_Configured);
2641 }
2642
2643 /*-------------------------------------------------------------------------*/
2644 // NOW IT IS TIME TO INFORM ALL OTHER NODES IN THE CLUSTER OF THE STARTED
2645 // NODE SUCH THAT THEY ALSO INCLUDE THE NODE IN THE NODE LISTS AND SO FORTH.
2646 /*------------------------------------------------------------------------*/
2647 sendLoopMacro(INCL_NODEREQ, sendINCL_NODEREQ, RNIL);
2648 /*-------------------------------------------------------------------------*/
2649 // We also need to send to the starting node to ensure he is aware of the
2650 // global checkpoint id and the correct state. We do not wait for any reply
2651 // since the starting node will not send any.
2652 /*-------------------------------------------------------------------------*/
2653 Uint32 startVersion = getNodeInfo(c_nodeStartMaster.startNode).m_version;
2654
2655 if ((getMajor(startVersion) == 4 &&
2656 startVersion >= NDBD_INCL_NODECONF_VERSION_4) ||
2657 (getMajor(startVersion) == 5 &&
2658 startVersion >= NDBD_INCL_NODECONF_VERSION_5) ||
2659 (getMajor(startVersion) > 5))
2660 {
2661 c_INCL_NODEREQ_Counter.setWaitingFor(c_nodeStartMaster.startNode);
2662 }
2663
2664 sendINCL_NODEREQ(signal, c_nodeStartMaster.startNode, RNIL);
2665 }//Dbdih::gcpBlockedLab()
2666
2667 /*---------------------------------------------------------------------------*/
2668 // THIS SIGNAL IS EXECUTED IN BOTH SLAVES AND IN THE MASTER
2669 /*---------------------------------------------------------------------------*/
execINCL_NODECONF(Signal * signal)2670 void Dbdih::execINCL_NODECONF(Signal* signal)
2671 {
2672 jamEntry();
2673 Uint32 TstartNode = signal->theData[0];
2674 Uint32 TsendNodeId_or_blockref = signal->theData[1];
2675
2676 Uint32 blocklist[7];
2677 blocklist[0] = clocallqhblockref;
2678 blocklist[1] = clocaltcblockref;
2679 blocklist[2] = cdictblockref;
2680 blocklist[3] = numberToRef(BACKUP, getOwnNodeId());
2681 blocklist[4] = numberToRef(SUMA, getOwnNodeId());
2682 blocklist[5] = numberToRef(DBSPJ, getOwnNodeId());
2683 blocklist[6] = 0;
2684
2685 for (Uint32 i = 0; blocklist[i] != 0; i++)
2686 {
2687 if (TsendNodeId_or_blockref == blocklist[i])
2688 {
2689 jam();
2690
2691 if (TstartNode != c_nodeStartSlave.nodeId)
2692 {
2693 jam();
2694 warningEvent("Recevied INCL_NODECONF for %u from %s"
2695 " while %u is starting",
2696 TstartNode,
2697 getBlockName(refToBlock(TsendNodeId_or_blockref)),
2698 c_nodeStartSlave.nodeId);
2699 return;
2700 }
2701
2702 if (getNodeStatus(c_nodeStartSlave.nodeId) == NodeRecord::ALIVE &&
2703 blocklist[i+1] != 0)
2704 {
2705 /**
2706 * Send to next in block list
2707 */
2708 jam();
2709 signal->theData[0] = reference();
2710 signal->theData[1] = c_nodeStartSlave.nodeId;
2711 sendSignal(blocklist[i+1], GSN_INCL_NODEREQ, signal, 2, JBB);
2712 return;
2713 }
2714 else
2715 {
2716 /**
2717 * All done, reply to master
2718 */
2719 jam();
2720 signal->theData[0] = c_nodeStartSlave.nodeId;
2721 signal->theData[1] = cownNodeId;
2722 sendSignal(cmasterdihref, GSN_INCL_NODECONF, signal, 2, JBB);
2723
2724 c_nodeStartSlave.nodeId = 0;
2725 return;
2726 }
2727 }
2728 }
2729
2730 if (c_nodeStartMaster.startNode != TstartNode)
2731 {
2732 jam();
2733 warningEvent("Recevied INCL_NODECONF for %u from %u"
2734 " while %u is starting",
2735 TstartNode,
2736 TsendNodeId_or_blockref,
2737 c_nodeStartMaster.startNode);
2738 return;
2739 }
2740
2741 ndbrequire(reference() == cmasterdihref);
2742 receiveLoopMacro(INCL_NODEREQ, TsendNodeId_or_blockref);
2743
2744 CRASH_INSERTION(7128);
2745 /*-------------------------------------------------------------------------*/
2746 // Now that we have included the starting node in the node lists in the
2747 // various blocks we are ready to start the global checkpoint protocol
2748 /*------------------------------------------------------------------------*/
2749 c_nodeStartMaster.wait = 11;
2750 c_nodeStartMaster.blockGcp = 0;
2751
2752 /**
2753 * Restart GCP
2754 */
2755 signal->theData[0] = reference();
2756 sendSignal(reference(), GSN_UNBLO_DICTCONF, signal, 1, JBB);
2757
2758 signal->theData[0] = DihContinueB::ZSTART_GCP;
2759 sendSignal(reference(), GSN_CONTINUEB, signal, 1, JBB);
2760 /**
2761 * To increase likelyhood that multiple nodes starting simulatanious
2762 * gets to copy fragment-info before a new LCP is started
2763 * we delay the releasing of this mutex. So that node that (might)
2764 * be started when GSN_START_PERMREP arrives will get mutex
2765 * before LCP (which does trylock for 60s)
2766 */
2767 signal->theData[0] = DihContinueB::ZDELAY_RELEASE_FRAGMENT_INFO_MUTEX;
2768 signal->theData[1] = c_nodeStartMaster.m_fragmentInfoMutex.getHandle();
2769 c_nodeStartMaster.m_fragmentInfoMutex.clear();
2770 sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 500, 2);
2771 }//Dbdih::execINCL_NODECONF()
2772
execUNBLO_DICTCONF(Signal * signal)2773 void Dbdih::execUNBLO_DICTCONF(Signal* signal)
2774 {
2775 jamEntry();
2776 c_nodeStartMaster.wait = ZFALSE;
2777 if (!c_nodeStartMaster.activeState) {
2778 jam();
2779 return;
2780 }//if
2781
2782 CRASH_INSERTION(7129);
2783 /**-----------------------------------------------------------------------
2784 * WE HAVE NOW PREPARED IT FOR INCLUSION IN THE LCP PROTOCOL.
2785 * WE CAN NOW START THE LCP PROTOCOL AGAIN.
2786 * WE HAVE ALSO MADE THIS FOR THE GCP PROTOCOL.
2787 * WE ARE READY TO START THE PROTOCOLS AND RESPOND TO THE START REQUEST
2788 * FROM THE STARTING NODE.
2789 *------------------------------------------------------------------------*/
2790
2791 StartMeConf * const startMe = (StartMeConf *)&signal->theData[0];
2792
2793 const Uint32 wordPerSignal = StartMeConf::DATA_SIZE;
2794 const int noOfSignals = ((Sysfile::SYSFILE_SIZE32 + (wordPerSignal - 1)) /
2795 wordPerSignal);
2796
2797 Uint32 nodeId = startMe->startingNodeId = c_nodeStartMaster.startNode;
2798 startMe->startWord = 0;
2799
2800 const Uint32 ref = calcDihBlockRef(c_nodeStartMaster.startNode);
2801 for(int i = 0; i < noOfSignals; i++){
2802 jam();
2803 { // Do copy
2804 const int startWord = startMe->startWord;
2805 for(Uint32 j = 0; j < wordPerSignal; j++){
2806 startMe->data[j] = sysfileData[j+startWord];
2807 }
2808 }
2809 sendSignal(ref, GSN_START_MECONF, signal, StartMeConf::SignalLength, JBB);
2810 startMe->startWord += wordPerSignal;
2811 }//for
2812 c_nodeStartMaster.m_outstandingGsn = GSN_START_MECONF;
2813 nodeResetStart(signal);
2814
2815 /**
2816 * Allow next node to start...
2817 */
2818 signal->theData[0] = nodeId;
2819 sendSignal(NDBCNTR_REF, GSN_START_PERMREP, signal, 1, JBB);
2820 }//Dbdih::execUNBLO_DICTCONF()
2821
2822 /*---------------------------------------------------------------------------*/
2823 /* NODE RESTART COPY REQUEST */
2824 /*---------------------------------------------------------------------------*/
2825 // A NODE RESTART HAS REACHED ITS FINAL PHASE WHEN THE DATA IS TO BE COPIED
2826 // TO THE NODE. START_COPYREQ IS EXECUTED BY THE STARTING NODE.
2827 /*---------------------------------------------------------------------------*/
execSTART_COPYREQ(Signal * signal)2828 void Dbdih::execSTART_COPYREQ(Signal* signal)
2829 {
2830 jamEntry();
2831 StartCopyReq req = *(StartCopyReq*)signal->getDataPtr();
2832
2833 Uint32 startNodeId = req.startingNodeId;
2834
2835 /*-------------------------------------------------------------------------*/
2836 // REPORT Copy process of node restart is now about to start up.
2837 /*-------------------------------------------------------------------------*/
2838 signal->theData[0] = NDB_LE_NR_CopyFragsStarted;
2839 signal->theData[1] = req.startingNodeId;
2840 sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB);
2841
2842 CRASH_INSERTION(7131);
2843
2844 switch (getNodeActiveStatus(startNodeId)) {
2845 case Sysfile::NS_Active:
2846 case Sysfile::NS_ActiveMissed_1:
2847 case Sysfile::NS_ActiveMissed_2:
2848 case Sysfile::NS_NotActive_NotTakenOver:
2849 case Sysfile::NS_Configured:
2850 jam();
2851 /*-----------------------------------------------------------------------*/
2852 // AN ACTIVE NODE HAS BEEN STARTED. THE ACTIVE NODE MUST THEN GET ALL DATA
2853 // IT HAD BEFORE ITS CRASH. WE START THE TAKE OVER IMMEDIATELY.
2854 // SINCE WE ARE AN ACTIVE NODE WE WILL TAKE OVER OUR OWN NODE THAT
2855 // PREVIOUSLY CRASHED.
2856 /*-----------------------------------------------------------------------*/
2857 startTakeOver(signal, startNodeId, startNodeId, &req);
2858 break;
2859 case Sysfile::NS_TakeOver:{
2860 jam();
2861 /*--------------------------------------------------------------------
2862 * We were in the process of taking over but it was not completed.
2863 * We will complete it now instead.
2864 *--------------------------------------------------------------------*/
2865 Uint32 takeOverNode = Sysfile::getTakeOverNode(startNodeId,
2866 SYSFILE->takeOver);
2867 if(takeOverNode == 0){
2868 jam();
2869 warningEvent("Bug in take-over code restarting");
2870 takeOverNode = startNodeId;
2871 }
2872
2873 startTakeOver(signal, startNodeId, takeOverNode, &req);
2874 break;
2875 }
2876 default:
2877 ndbrequire(false);
2878 break;
2879 }//switch
2880 }//Dbdih::execSTART_COPYREQ()
2881
2882 /*---------------------------------------------------------------------------*/
2883 /* SLAVE LOGIC FOR NODE RESTART */
2884 /*---------------------------------------------------------------------------*/
execSTART_INFOREQ(Signal * signal)2885 void Dbdih::execSTART_INFOREQ(Signal* signal)
2886 {
2887 jamEntry();
2888 StartInfoReq *const req =(StartInfoReq*)&signal->theData[0];
2889 Uint32 startNode = req->startingNodeId;
2890 if (cfailurenr != req->systemFailureNo) {
2891 jam();
2892 //---------------------------------------------------------------
2893 // A failure occurred since master sent this request. We will ignore
2894 // this request since the node is already dead that is starting.
2895 //---------------------------------------------------------------
2896 return;
2897 }//if
2898 CRASH_INSERTION(7123);
2899 if (isMaster()) {
2900 jam();
2901 ndbrequire(getNodeStatus(startNode) == NodeRecord::STARTING);
2902 } else {
2903 jam();
2904 ndbrequire(getNodeStatus(startNode) == NodeRecord::DEAD);
2905 }//if
2906 if ((!getAllowNodeStart(startNode)) ||
2907 (c_nodeStartSlave.nodeId != 0) ||
2908 (ERROR_INSERTED(7124))) {
2909 jam();
2910 StartInfoRef *const ref =(StartInfoRef*)&signal->theData[0];
2911 ref->startingNodeId = startNode;
2912 ref->sendingNodeId = cownNodeId;
2913 ref->errorCode = StartPermRef::ZNODE_START_DISALLOWED_ERROR;
2914 sendSignal(cmasterdihref, GSN_START_INFOREF, signal,
2915 StartInfoRef::SignalLength, JBB);
2916 return;
2917 }//if
2918 setNodeStatus(startNode, NodeRecord::STARTING);
2919 if (req->typeStart == NodeState::ST_INITIAL_NODE_RESTART) {
2920 jam();
2921 setAllowNodeStart(startNode, false);
2922 invalidateNodeLCP(signal, startNode, 0);
2923 } else {
2924 jam();
2925 StartInfoConf * c = (StartInfoConf*)&signal->theData[0];
2926 c->sendingNodeId = cownNodeId;
2927 c->startingNodeId = startNode;
2928 sendSignal(cmasterdihref, GSN_START_INFOCONF, signal,
2929 StartInfoConf::SignalLength, JBB);
2930 return;
2931 }//if
2932 }//Dbdih::execSTART_INFOREQ()
2933
execINCL_NODEREQ(Signal * signal)2934 void Dbdih::execINCL_NODEREQ(Signal* signal)
2935 {
2936 jamEntry();
2937 Uint32 retRef = signal->theData[0];
2938 Uint32 nodeId = signal->theData[1];
2939 if (nodeId == getOwnNodeId() && ERROR_INSERTED(7165))
2940 {
2941 CLEAR_ERROR_INSERT_VALUE;
2942 sendSignalWithDelay(reference(), GSN_INCL_NODEREQ, signal, 5000,
2943 signal->getLength());
2944 return;
2945 }
2946
2947 Uint32 tnodeStartFailNr = signal->theData[2];
2948 Uint32 gci_hi = signal->theData[4];
2949 Uint32 gci_lo = signal->theData[5];
2950 if (unlikely(signal->getLength() < 6))
2951 {
2952 jam();
2953 gci_lo = 0;
2954 }
2955
2956 Uint64 gci = gci_lo | (Uint64(gci_hi) << 32);
2957 CRASH_INSERTION(7127);
2958 m_micro_gcp.m_current_gci = gci;
2959 m_micro_gcp.m_old_gci = gci - 1;
2960 if (!isMaster()) {
2961 jam();
2962 /*-----------------------------------------------------------------------*/
2963 // We don't want to change the state of the master since he can be in the
2964 // state LCP_TCGET at this time.
2965 /*-----------------------------------------------------------------------*/
2966 c_lcpState.setLcpStatus(LCP_STATUS_IDLE, __LINE__);
2967 }//if
2968
2969 /*-------------------------------------------------------------------------*/
2970 // When a node is restarted we must ensure that a lcp will be run
2971 // as soon as possible and the reset the delay according to the original
2972 // configuration.
2973 // Without an initial local checkpoint the new node will not be available.
2974 /*-------------------------------------------------------------------------*/
2975 if (getOwnNodeId() == nodeId) {
2976 jam();
2977 /*-----------------------------------------------------------------------*/
2978 // We are the starting node. We came here only to set the global checkpoint
2979 // id's and the lcp status.
2980 /*-----------------------------------------------------------------------*/
2981 CRASH_INSERTION(7171);
2982 Uint32 masterVersion = getNodeInfo(refToNode(cmasterdihref)).m_version;
2983
2984 if ((NDB_VERSION_MAJOR == 4 &&
2985 masterVersion >= NDBD_INCL_NODECONF_VERSION_4) ||
2986 (NDB_VERSION_MAJOR == 5 &&
2987 masterVersion >= NDBD_INCL_NODECONF_VERSION_5) ||
2988 (NDB_VERSION_MAJOR > 5))
2989 {
2990 signal->theData[0] = getOwnNodeId();
2991 signal->theData[1] = getOwnNodeId();
2992 sendSignal(cmasterdihref, GSN_INCL_NODECONF, signal, 2, JBB);
2993 }
2994 return;
2995 }//if
2996 if (getNodeStatus(nodeId) != NodeRecord::STARTING) {
2997 jam();
2998 return;
2999 }//if
3000 ndbrequire(cfailurenr == tnodeStartFailNr);
3001 ndbrequire (c_nodeStartSlave.nodeId == 0);
3002 c_nodeStartSlave.nodeId = nodeId;
3003
3004 ndbrequire (retRef == cmasterdihref);
3005
3006 NodeRecordPtr nodePtr;
3007 nodePtr.i = nodeId;
3008 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
3009
3010 Sysfile::ActiveStatus TsaveState = nodePtr.p->activeStatus;
3011 Uint32 TnodeGroup = nodePtr.p->nodeGroup;
3012
3013 new (nodePtr.p) NodeRecord();
3014 nodePtr.p->nodeGroup = TnodeGroup;
3015 nodePtr.p->activeStatus = TsaveState;
3016 nodePtr.p->nodeStatus = NodeRecord::ALIVE;
3017 nodePtr.p->useInTransactions = true;
3018 nodePtr.p->m_inclDihLcp = true;
3019
3020 removeDeadNode(nodePtr);
3021 insertAlive(nodePtr);
3022 con_lineNodes++;
3023
3024 /*-------------------------------------------------------------------------*/
3025 // WE WILL ALSO SEND THE INCLUDE NODE REQUEST TO THE LOCAL LQH BLOCK.
3026 /*-------------------------------------------------------------------------*/
3027 signal->theData[0] = reference();
3028 signal->theData[1] = nodeId;
3029 signal->theData[2] = Uint32(m_micro_gcp.m_current_gci >> 32);
3030 sendSignal(clocallqhblockref, GSN_INCL_NODEREQ, signal, 3, JBB);
3031 }//Dbdih::execINCL_NODEREQ()
3032
3033 /* ------------------------------------------------------------------------- */
3034 // execINCL_NODECONF() is found in the master logic part since it is used by
3035 // both the master and the slaves.
3036 /* ------------------------------------------------------------------------- */
3037
execSTART_TOREQ(Signal * signal)3038 void Dbdih::execSTART_TOREQ(Signal* signal)
3039 {
3040 jamEntry();
3041 StartToReq req = *(StartToReq *)&signal->theData[0];
3042
3043
3044 if (ndb_pnr(getNodeInfo(refToNode(req.senderRef)).m_version))
3045 {
3046 jam();
3047 TakeOverRecordPtr takeOverPtr;
3048
3049 c_activeTakeOverList.seize(takeOverPtr);
3050 takeOverPtr.p->toStartingNode = req.startingNodeId;
3051 takeOverPtr.p->m_senderRef = req.senderRef;
3052 takeOverPtr.p->m_senderData = req.senderData;
3053 takeOverPtr.p->toMasterStatus = TakeOverRecord::TO_MASTER_IDLE;
3054 takeOverPtr.p->toStartTime = c_current_time;
3055 }
3056
3057 StartToConf * conf = (StartToConf *)&signal->theData[0];
3058 conf->senderData = req.senderData;
3059 conf->sendingNodeId = cownNodeId;
3060 conf->startingNodeId = req.startingNodeId;
3061 sendSignal(req.senderRef, GSN_START_TOCONF,
3062 signal, StartToConf::SignalLength, JBB);
3063 }//Dbdih::execSTART_TOREQ()
3064
execUPDATE_TOREQ(Signal * signal)3065 void Dbdih::execUPDATE_TOREQ(Signal* signal)
3066 {
3067 jamEntry();
3068 UpdateToReq req = *(UpdateToReq *)&signal->theData[0];
3069
3070 Uint32 errCode;
3071 Uint32 extra;
3072 if (ndb_pnr(getNodeInfo(refToNode(req.senderRef)).m_version))
3073 {
3074 jam();
3075 /**
3076 *
3077 */
3078 TakeOverRecordPtr takeOverPtr;
3079 if (findTakeOver(takeOverPtr, req.startingNodeId) == false)
3080 {
3081 errCode = UpdateToRef::UnknownTakeOver;
3082 extra = RNIL;
3083 goto ref;
3084 }
3085
3086 CRASH_INSERTION(7141);
3087
3088 takeOverPtr.p->toCopyNode = req.copyNodeId;
3089 takeOverPtr.p->toCurrentTabref = req.tableId;
3090 takeOverPtr.p->toCurrentFragid = req.fragmentNo;
3091
3092 NodeRecordPtr nodePtr;
3093 NodeGroupRecordPtr NGPtr;
3094 nodePtr.i = req.copyNodeId;
3095 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
3096 NGPtr.i = nodePtr.p->nodeGroup;
3097 ptrCheckGuard(NGPtr, MAX_NDB_NODES, nodeGroupRecord);
3098
3099 Mutex mutex(signal, c_mutexMgr, takeOverPtr.p->m_fragmentInfoMutex);
3100 Callback c = { safe_cast(&Dbdih::updateToReq_fragmentMutex_locked),
3101 takeOverPtr.i };
3102
3103 switch(req.requestType){
3104 case UpdateToReq::BEFORE_STORED:
3105 jam();
3106
3107 if (NGPtr.p->activeTakeOver == 0)
3108 {
3109 jam();
3110 NGPtr.p->activeTakeOver = req.startingNodeId;
3111 }
3112 else
3113 {
3114 jam();
3115 errCode = UpdateToRef::CopyFragInProgress;
3116 extra = NGPtr.p->activeTakeOver;
3117 goto ref;
3118 }
3119
3120 takeOverPtr.p->toMasterStatus = TakeOverRecord::TO_MUTEX_BEFORE_STORED;
3121 mutex.lock(c, false, true);
3122 return;
3123 case UpdateToReq::AFTER_STORED:
3124 {
3125 jam();
3126 mutex.unlock();
3127 takeOverPtr.p->toMasterStatus = TakeOverRecord::TO_AFTER_STORED;
3128 // Send conf
3129 break;
3130 }
3131 case UpdateToReq::BEFORE_COMMIT_STORED:
3132 jam();
3133 takeOverPtr.p->toMasterStatus = TakeOverRecord::TO_MUTEX_BEFORE_COMMIT;
3134 mutex.lock(c, false, true);
3135 return;
3136 case UpdateToReq::AFTER_COMMIT_STORED:
3137 {
3138 jam();
3139 mutex.unlock();
3140
3141 Mutex mutex2(signal, c_mutexMgr,
3142 takeOverPtr.p->m_switchPrimaryMutexHandle);
3143 mutex2.unlock();
3144 takeOverPtr.p->toMasterStatus = TakeOverRecord::TO_MASTER_IDLE;
3145 break; // send conf
3146 }
3147 }
3148 }
3149 else
3150 {
3151 CRASH_INSERTION(7154);
3152 RETURN_IF_NODE_NOT_ALIVE(req.startingNodeId);
3153 }
3154
3155 {
3156 UpdateToConf * conf = (UpdateToConf *)&signal->theData[0];
3157 conf->senderData = req.senderData;
3158 conf->sendingNodeId = cownNodeId;
3159 conf->startingNodeId = req.startingNodeId;
3160 sendSignal(req.senderRef, GSN_UPDATE_TOCONF, signal,
3161 UpdateToConf::SignalLength, JBB);
3162 }
3163 return;
3164
3165 ref:
3166 UpdateToRef* ref = (UpdateToRef*)signal->getDataPtrSend();
3167 ref->senderData = req.senderData;
3168 ref->senderRef = reference();
3169 ref->errorCode = errCode;
3170 ref->extra = extra;
3171 sendSignal(req.senderRef, GSN_UPDATE_TOREF, signal,
3172 UpdateToRef::SignalLength, JBB);
3173 }
3174
3175 void
updateToReq_fragmentMutex_locked(Signal * signal,Uint32 toPtrI,Uint32 retVal)3176 Dbdih::updateToReq_fragmentMutex_locked(Signal * signal,
3177 Uint32 toPtrI, Uint32 retVal)
3178 {
3179 jamEntry();
3180 TakeOverRecordPtr takeOverPtr;
3181 c_takeOverPool.getPtr(takeOverPtr, toPtrI);
3182
3183 Uint32 nodeId = takeOverPtr.p->toStartingNode;
3184
3185 if (retVal == UtilLockRef::InLockQueue)
3186 {
3187 jam();
3188 infoEvent("Node %u waiting to continue copying table %u fragment: %u (%s)",
3189 nodeId,
3190 takeOverPtr.p->toCurrentTabref,
3191 takeOverPtr.p->toCurrentFragid,
3192 takeOverPtr.p->toMasterStatus == TakeOverRecord::TO_MUTEX_BEFORE_STORED ? "STORED" : "COMMIT");
3193 return;
3194 }
3195
3196 Uint32 errCode;
3197 Uint32 extra;
3198
3199 NodeRecordPtr nodePtr;
3200 nodePtr.i = nodeId;
3201 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
3202 if (unlikely(nodePtr.p->nodeStatus != NodeRecord::ALIVE))
3203 {
3204 jam();
3205 /**
3206 * Node died while we waited for lock...
3207 */
3208 abortTakeOver(signal, takeOverPtr);
3209 return;
3210 }
3211
3212 switch(takeOverPtr.p->toMasterStatus){
3213 case TakeOverRecord::TO_MUTEX_BEFORE_STORED:
3214 {
3215 jam();
3216 // send conf
3217 takeOverPtr.p->toMasterStatus = TakeOverRecord::TO_MUTEX_BEFORE_LOCKED;
3218 break;
3219 }
3220 case TakeOverRecord::TO_MUTEX_BEFORE_COMMIT:
3221 {
3222 jam();
3223
3224 NodeRecordPtr nodePtr;
3225 NodeGroupRecordPtr NGPtr;
3226 nodePtr.i = takeOverPtr.p->toCopyNode;
3227 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
3228 NGPtr.i = nodePtr.p->nodeGroup;
3229 ptrCheckGuard(NGPtr, MAX_NDB_NODES, nodeGroupRecord);
3230
3231 if (NGPtr.p->activeTakeOver != nodeId)
3232 {
3233 ndbassert(false);
3234 errCode = UpdateToRef::InvalidRequest;
3235 extra = NGPtr.p->activeTakeOver;
3236 goto ref;
3237 }
3238 NGPtr.p->activeTakeOver = 0;
3239 takeOverPtr.p->toCopyNode = RNIL;
3240 Mutex mutex(signal, c_mutexMgr,
3241 takeOverPtr.p->m_switchPrimaryMutexHandle);
3242 Callback c = { safe_cast(&Dbdih::switchPrimaryMutex_locked),
3243 takeOverPtr.i };
3244 ndbrequire(mutex.lock(c));
3245 takeOverPtr.p->toMasterStatus = TakeOverRecord::TO_MUTEX_BEFORE_SWITCH_REPLICA;
3246 return;
3247 break;
3248 }
3249 default:
3250 jamLine(takeOverPtr.p->toMasterStatus);
3251 ndbrequire(false);
3252 }
3253
3254 {
3255 UpdateToConf * conf = (UpdateToConf *)&signal->theData[0];
3256 conf->senderData = takeOverPtr.p->m_senderData;
3257 conf->sendingNodeId = cownNodeId;
3258 conf->startingNodeId = takeOverPtr.p->toStartingNode;
3259 sendSignal(takeOverPtr.p->m_senderRef, GSN_UPDATE_TOCONF, signal,
3260 UpdateToConf::SignalLength, JBB);
3261 }
3262 return;
3263
3264 ref:
3265 {
3266 Mutex mutex(signal, c_mutexMgr, takeOverPtr.p->m_fragmentInfoMutex);
3267 mutex.unlock();
3268
3269 UpdateToRef* ref = (UpdateToRef*)signal->getDataPtrSend();
3270 ref->senderData = takeOverPtr.p->m_senderData;
3271 ref->senderRef = reference();
3272 ref->errorCode = errCode;
3273 ref->extra = extra;
3274 sendSignal(takeOverPtr.p->m_senderRef, GSN_UPDATE_TOREF, signal,
3275 UpdateToRef::SignalLength, JBB);
3276 return;
3277 }
3278 }
3279
3280 void
switchPrimaryMutex_locked(Signal * signal,Uint32 toPtrI,Uint32 retVal)3281 Dbdih::switchPrimaryMutex_locked(Signal* signal, Uint32 toPtrI, Uint32 retVal)
3282 {
3283 jamEntry();
3284 ndbrequire(retVal == 0);
3285
3286 TakeOverRecordPtr takeOverPtr;
3287 c_takeOverPool.getPtr(takeOverPtr, toPtrI);
3288
3289 Uint32 nodeId = takeOverPtr.p->toStartingNode;
3290 NodeRecordPtr nodePtr;
3291 nodePtr.i = nodeId;
3292 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
3293
3294 if (unlikely(nodePtr.p->nodeStatus != NodeRecord::ALIVE))
3295 {
3296 jam();
3297 /**
3298 * Node died while we waited for lock...
3299 */
3300 abortTakeOver(signal, takeOverPtr);
3301 return;
3302 }
3303
3304 takeOverPtr.p->toMasterStatus = TakeOverRecord::TO_MUTEX_AFTER_SWITCH_REPLICA;
3305
3306 UpdateToConf * conf = (UpdateToConf *)&signal->theData[0];
3307 conf->senderData = takeOverPtr.p->m_senderData;
3308 conf->sendingNodeId = cownNodeId;
3309 conf->startingNodeId = takeOverPtr.p->toStartingNode;
3310 sendSignal(takeOverPtr.p->m_senderRef, GSN_UPDATE_TOCONF, signal,
3311 UpdateToConf::SignalLength, JBB);
3312 }
3313
3314 void
switchPrimaryMutex_unlocked(Signal * signal,Uint32 toPtrI,Uint32 retVal)3315 Dbdih::switchPrimaryMutex_unlocked(Signal* signal, Uint32 toPtrI, Uint32 retVal)
3316 {
3317 jamEntry();
3318 ndbrequire(retVal == 0);
3319
3320 TakeOverRecordPtr takeOverPtr;
3321 c_takeOverPool.getPtr(takeOverPtr, toPtrI);
3322
3323 UpdateToConf * conf = (UpdateToConf *)&signal->theData[0];
3324 conf->senderData = takeOverPtr.p->m_senderData;
3325 conf->sendingNodeId = cownNodeId;
3326 conf->startingNodeId = takeOverPtr.p->toStartingNode;
3327 sendSignal(takeOverPtr.p->m_senderRef, GSN_UPDATE_TOCONF, signal,
3328 UpdateToConf::SignalLength, JBB);
3329 }
3330
3331 void
abortTakeOver(Signal * signal,TakeOverRecordPtr takeOverPtr)3332 Dbdih::abortTakeOver(Signal* signal, TakeOverRecordPtr takeOverPtr)
3333 {
3334 if (!takeOverPtr.p->m_switchPrimaryMutexHandle.isNull())
3335 {
3336 jam();
3337 Mutex mutex(signal, c_mutexMgr,
3338 takeOverPtr.p->m_switchPrimaryMutexHandle);
3339 mutex.unlock();
3340
3341 }
3342
3343 if (!takeOverPtr.p->m_fragmentInfoMutex.isNull())
3344 {
3345 jam();
3346 Mutex mutex(signal, c_mutexMgr,
3347 takeOverPtr.p->m_fragmentInfoMutex);
3348 mutex.unlock();
3349 }
3350
3351 NodeRecordPtr nodePtr;
3352 nodePtr.i = takeOverPtr.p->toCopyNode;
3353 if (nodePtr.i != RNIL)
3354 {
3355 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
3356 NodeGroupRecordPtr NGPtr;
3357 NGPtr.i = nodePtr.p->nodeGroup;
3358 ptrCheckGuard(NGPtr, MAX_NDB_NODES, nodeGroupRecord);
3359 if (NGPtr.p->activeTakeOver == takeOverPtr.p->toStartingNode)
3360 {
3361 jam();
3362 NGPtr.p->activeTakeOver = 0;
3363 }
3364 }
3365
3366 releaseTakeOver(takeOverPtr);
3367 }
3368
3369 static
3370 void
add_lcp_counter(Uint32 * counter,Uint32 add)3371 add_lcp_counter(Uint32 * counter, Uint32 add)
3372 {
3373 Uint64 tmp = * counter;
3374 tmp += add;
3375 if (tmp > 0xFFFFFFFF)
3376 tmp = 0xFFFFFFFF;
3377 * counter = Uint32(tmp);
3378 }
3379
3380 void
check_force_lcp(Ptr<TakeOverRecord> takeOverPtr)3381 Dbdih::check_force_lcp(Ptr<TakeOverRecord> takeOverPtr)
3382 {
3383 Uint64 duration = c_current_time - takeOverPtr.p->toStartTime;
3384 Uint64 lcp_time = c_lcpState.m_lcp_time;
3385
3386 Ptr<TakeOverRecord> tmp;
3387 for (c_activeTakeOverList.first(tmp); !tmp.isNull();
3388 c_activeTakeOverList.next(tmp))
3389 {
3390 jam();
3391 if (tmp.p->toMasterStatus != TakeOverRecord::TO_WAIT_LCP)
3392 {
3393 jam();
3394
3395 Uint64 elapsed = c_current_time - tmp.p->toStartTime;
3396 if (elapsed >= duration)
3397 {
3398 jam();
3399 /**
3400 * This has spent more...than our took...
3401 * expect it to finish soon...
3402 * i.e dont force LCP
3403 */
3404 infoEvent("Node %u not forcing LCP start(1 %llu >= %llu), wait on %u",
3405 takeOverPtr.p->toStartingNode,
3406 elapsed, duration,
3407 tmp.p->toStartingNode);
3408 return;
3409 }
3410
3411 Uint64 left = duration - elapsed;
3412 if (left < lcp_time)
3413 {
3414 jam();
3415 /**
3416 * This has less than one lcp left...
3417 * dont force LCP
3418 */
3419 infoEvent("Node %u not forcing LCP start(2 %llu < %llu), wait on %u",
3420 takeOverPtr.p->toStartingNode,
3421 left, lcp_time,
3422 tmp.p->toStartingNode);
3423 return;
3424 }
3425 }
3426 }
3427 add_lcp_counter(&c_lcpState.ctimer, (1 << 31));
3428 }
3429
execEND_TOREQ(Signal * signal)3430 void Dbdih::execEND_TOREQ(Signal* signal)
3431 {
3432 jamEntry();
3433 EndToReq req = *(EndToReq *)&signal->theData[0];
3434
3435 Uint32 nodeId = refToNode(req.senderRef);
3436 TakeOverRecordPtr takeOverPtr;
3437
3438 if (ndb_pnr(getNodeInfo(nodeId).m_version))
3439 {
3440 jam();
3441 /**
3442 *
3443 */
3444 ndbrequire(findTakeOver(takeOverPtr, nodeId));
3445 NodeRecordPtr nodePtr;
3446 nodePtr.i = nodeId;
3447 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
3448
3449 if (req.flags & StartCopyReq::WAIT_LCP)
3450 {
3451 jam();
3452
3453 /**
3454 * Wait for LCP
3455 */
3456 nodePtr.p->copyCompleted = 2;
3457 takeOverPtr.p->toMasterStatus = TakeOverRecord::TO_WAIT_LCP;
3458
3459 /**
3460 * Make sure that node also participatened in 1 GCP
3461 * before running it's first LCP, so that GCI variables
3462 * in LQH are set properly
3463 */
3464 c_lcpState.lcpStopGcp = c_newest_restorable_gci;
3465
3466 check_force_lcp(takeOverPtr);
3467 return;
3468 }
3469 nodePtr.p->copyCompleted = 1;
3470 releaseTakeOver(takeOverPtr);
3471 }
3472
3473 EndToConf * conf = (EndToConf *)&signal->theData[0];
3474 conf->senderData = req.senderData;
3475 conf->sendingNodeId = cownNodeId;
3476 conf->startingNodeId = req.startingNodeId;
3477 sendSignal(req.senderRef, GSN_END_TOCONF, signal,
3478 EndToConf::SignalLength, JBB);
3479 }//Dbdih::execEND_TOREQ()
3480
3481 #define DIH_TAB_WRITE_LOCK(tabPtrP) \
3482 do { assertOwnThread(); tabPtrP->m_lock.write_lock(); } while (0)
3483
3484 #define DIH_TAB_WRITE_UNLOCK(tabPtrP) \
3485 do { assertOwnThread(); tabPtrP->m_lock.write_unlock(); } while (0)
3486
3487 /* --------------------------------------------------------------------------*/
3488 /* AN ORDER TO START OR COMMIT THE REPLICA CREATION ARRIVED FROM THE */
3489 /* MASTER. */
3490 /* --------------------------------------------------------------------------*/
execCREATE_FRAGREQ(Signal * signal)3491 void Dbdih::execCREATE_FRAGREQ(Signal* signal)
3492 {
3493 jamEntry();
3494 CreateFragReq * const req = (CreateFragReq *)&signal->theData[0];
3495
3496 Uint32 senderData = req->senderData;
3497 Uint32 senderRef = req->senderRef;
3498
3499 TabRecordPtr tabPtr;
3500 tabPtr.i = req->tableId;
3501 ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
3502
3503 Uint32 fragId = req->fragId;
3504 Uint32 tdestNodeid = req->startingNodeId;
3505 //Uint32 tsourceNodeid = req->copyNodeId;
3506 Uint32 startGci = req->startGci;
3507 Uint32 replicaType = req->replicaType;
3508 Uint32 tFailedNodeId = req->failedNodeId;
3509
3510 if (!ndb_pnr(getNodeInfo(refToNode(senderRef)).m_version))
3511 {
3512 jam();
3513 tFailedNodeId = tdestNodeid;
3514 }
3515
3516 FragmentstorePtr fragPtr;
3517 getFragstore(tabPtr.p, fragId, fragPtr);
3518 RETURN_IF_NODE_NOT_ALIVE(tdestNodeid);
3519 ReplicaRecordPtr frReplicaPtr;
3520 findReplica(frReplicaPtr, fragPtr.p, tFailedNodeId,
3521 replicaType == CreateFragReq::START_LOGGING ? false : true);
3522 if (frReplicaPtr.i == RNIL)
3523 {
3524 dump_replica_info(fragPtr.p);
3525 }
3526 ndbrequire(frReplicaPtr.i != RNIL);
3527
3528 DIH_TAB_WRITE_LOCK(tabPtr.p);
3529 switch (replicaType) {
3530 case CreateFragReq::STORED:
3531 jam();
3532 CRASH_INSERTION(7138);
3533 /* ----------------------------------------------------------------------*/
3534 /* HERE WE ARE INSERTING THE NEW BACKUP NODE IN THE EXECUTION OF ALL */
3535 /* OPERATIONS. FROM HERE ON ALL OPERATIONS ON THIS FRAGMENT WILL INCLUDE*/
3536 /* USE OF THE NEW REPLICA. */
3537 /* --------------------------------------------------------------------- */
3538 insertBackup(fragPtr, tdestNodeid);
3539
3540 fragPtr.p->distributionKey++;
3541 fragPtr.p->distributionKey &= 255;
3542 break;
3543 case CreateFragReq::COMMIT_STORED:
3544 jam();
3545 CRASH_INSERTION(7139);
3546 /* ----------------------------------------------------------------------*/
3547 /* HERE WE ARE MOVING THE REPLICA TO THE STORED SECTION SINCE IT IS NOW */
3548 /* FULLY LOADED WITH ALL DATA NEEDED. */
3549 // We also update the order of the replicas here so that if the new
3550 // replica is the desired primary we insert it as primary.
3551 /* ----------------------------------------------------------------------*/
3552 removeOldStoredReplica(fragPtr, frReplicaPtr);
3553 linkStoredReplica(fragPtr, frReplicaPtr);
3554 updateNodeInfo(fragPtr);
3555 break;
3556 case CreateFragReq::START_LOGGING:
3557 jam();
3558 break;
3559 default:
3560 ndbrequire(false);
3561 break;
3562 }//switch
3563 DIH_TAB_WRITE_UNLOCK(tabPtr.p);
3564
3565 /* ------------------------------------------------------------------------*/
3566 /* THE NEW NODE OF THIS REPLICA IS THE STARTING NODE. */
3567 /* ------------------------------------------------------------------------*/
3568 if (tFailedNodeId != tdestNodeid)
3569 {
3570 jam();
3571 /**
3572 * This is a Hot-spare or move partition
3573 */
3574
3575 /* IF WE ARE STARTING A TAKE OVER NODE WE MUST INVALIDATE ALL LCP'S. */
3576 /* OTHERWISE WE WILL TRY TO START LCP'S THAT DO NOT EXIST. */
3577 /* ---------------------------------------------------------------------*/
3578 frReplicaPtr.p->procNode = tdestNodeid;
3579 frReplicaPtr.p->noCrashedReplicas = 0;
3580 frReplicaPtr.p->createGci[0] = startGci;
3581 frReplicaPtr.p->replicaLastGci[0] = (Uint32)-1;
3582 for (Uint32 i = 0; i < MAX_LCP_STORED; i++)
3583 {
3584 frReplicaPtr.p->lcpStatus[i] = ZINVALID;
3585 }
3586 }
3587 else
3588 {
3589 jam();
3590 const Uint32 noCrashed = frReplicaPtr.p->noCrashedReplicas;
3591 arrGuard(noCrashed, 8);
3592 frReplicaPtr.p->createGci[noCrashed] = startGci;
3593 frReplicaPtr.p->replicaLastGci[noCrashed] = (Uint32)-1;
3594 }
3595
3596 CreateFragConf * const conf = (CreateFragConf *)&signal->theData[0];
3597 conf->senderData = senderData;
3598 conf->tableId = tabPtr.i;
3599 conf->fragId = fragId;
3600 conf->sendingNodeId = cownNodeId;
3601 conf->startingNodeId = tdestNodeid;
3602 conf->failedNodeId = tFailedNodeId;
3603 sendSignal(senderRef, GSN_CREATE_FRAGCONF, signal,
3604 CreateFragConf::SignalLength, JBB);
3605 }//Dbdih::execCREATE_FRAGREQ()
3606
3607 /*****************************************************************************/
3608 /*********** NODE ADDING MODULE *************/
3609 /*********** CODE TO HANDLE TAKE OVER *************/
3610 /*****************************************************************************/
3611 // A take over can be initiated by a number of things:
3612 // 1) A node restart, usually the node takes over itself but can also take
3613 // over somebody else if its own data was already taken over
3614 // 2) At system restart it is necessary to use the take over code to recover
3615 // nodes which had too old checkpoints to be restorable by the usual
3616 // restoration from disk.
3617 // 3) When a node has missed too many local checkpoints and is decided by the
3618 // master to be taken over by a hot spare node that sits around waiting
3619 // for this to happen.
3620 //
3621 // To support multiple node failures efficiently the code is written such that
3622 // only one take over can handle transitions in state but during a copy
3623 // fragment other take over's can perform state transitions.
3624 /*****************************************************************************/
startTakeOver(Signal * signal,Uint32 startNode,Uint32 nodeTakenOver,const StartCopyReq * req)3625 void Dbdih::startTakeOver(Signal* signal,
3626 Uint32 startNode,
3627 Uint32 nodeTakenOver,
3628 const StartCopyReq* req)
3629 {
3630 jam();
3631
3632 TakeOverRecordPtr takeOverPtr;
3633 ndbrequire(c_activeTakeOverList.seize(takeOverPtr));
3634 takeOverPtr.p->startGci = SYSFILE->lastCompletedGCI[startNode];
3635 takeOverPtr.p->restorableGci = SYSFILE->lastCompletedGCI[startNode];
3636 takeOverPtr.p->toStartingNode = startNode;
3637 takeOverPtr.p->toFailedNode = nodeTakenOver;
3638 takeOverPtr.p->toCurrentTabref = 0;
3639 takeOverPtr.p->toCurrentFragid = 0;
3640
3641 if (req)
3642 {
3643 jam();
3644 takeOverPtr.p->m_flags = req->flags;
3645 takeOverPtr.p->m_senderData = req->senderData;
3646 takeOverPtr.p->m_senderRef = req->senderRef;
3647 }
3648
3649 takeOverPtr.p->toSlaveStatus = TakeOverRecord::TO_START_FRAGMENTS;
3650 nr_start_fragments(signal, takeOverPtr);
3651 }//Dbdih::startTakeOver()
3652
3653 void
nr_start_fragments(Signal * signal,TakeOverRecordPtr takeOverPtr)3654 Dbdih::nr_start_fragments(Signal* signal,
3655 TakeOverRecordPtr takeOverPtr)
3656 {
3657 Uint32 loopCount = 0 ;
3658 TabRecordPtr tabPtr;
3659 while (loopCount++ < 100) {
3660 tabPtr.i = takeOverPtr.p->toCurrentTabref;
3661 if (tabPtr.i >= ctabFileSize) {
3662 jam();
3663 nr_run_redo(signal, takeOverPtr);
3664 return;
3665 }//if
3666 ptrAss(tabPtr, tabRecord);
3667 if (tabPtr.p->tabStatus != TabRecord::TS_ACTIVE ||
3668 tabPtr.p->tabStorage != TabRecord::ST_NORMAL)
3669 {
3670 jam();
3671 takeOverPtr.p->toCurrentFragid = 0;
3672 takeOverPtr.p->toCurrentTabref++;
3673 continue;
3674 }//if
3675 Uint32 fragId = takeOverPtr.p->toCurrentFragid;
3676 if (fragId >= tabPtr.p->totalfragments) {
3677 jam();
3678 takeOverPtr.p->toCurrentFragid = 0;
3679 takeOverPtr.p->toCurrentTabref++;
3680 continue;
3681 }//if
3682 FragmentstorePtr fragPtr;
3683 getFragstore(tabPtr.p, fragId, fragPtr);
3684 ReplicaRecordPtr loopReplicaPtr;
3685 loopReplicaPtr.i = fragPtr.p->oldStoredReplicas;
3686 while (loopReplicaPtr.i != RNIL) {
3687 ptrCheckGuard(loopReplicaPtr, creplicaFileSize, replicaRecord);
3688 if (loopReplicaPtr.p->procNode == takeOverPtr.p->toStartingNode) {
3689 jam();
3690 nr_start_fragment(signal, takeOverPtr, loopReplicaPtr);
3691 break;
3692 } else {
3693 jam();
3694 loopReplicaPtr.i = loopReplicaPtr.p->nextReplica;
3695 }//if
3696 }//while
3697 takeOverPtr.p->toCurrentFragid++;
3698 }//while
3699 signal->theData[0] = DihContinueB::ZTO_START_FRAGMENTS;
3700 signal->theData[1] = takeOverPtr.i;
3701 sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
3702 }
3703
3704 void
nr_start_fragment(Signal * signal,TakeOverRecordPtr takeOverPtr,ReplicaRecordPtr replicaPtr)3705 Dbdih::nr_start_fragment(Signal* signal,
3706 TakeOverRecordPtr takeOverPtr,
3707 ReplicaRecordPtr replicaPtr)
3708 {
3709 Uint32 i;
3710 Uint32 maxLcpId = 0;
3711 Uint32 maxLcpIndex = ~0;
3712
3713 Uint32 gci = 0;
3714 Uint32 restorableGCI = takeOverPtr.p->restorableGci;
3715
3716 #if defined VM_TRACE || defined ERROR_INSERT
3717 ndbout_c("tab: %d frag: %d replicaP->nextLcp: %d",
3718 takeOverPtr.p->toCurrentTabref,
3719 takeOverPtr.p->toCurrentFragid,
3720 replicaPtr.p->nextLcp);
3721 #endif
3722
3723 Int32 j = replicaPtr.p->noCrashedReplicas - 1;
3724 Uint32 idx = prevLcpNo(replicaPtr.p->nextLcp);
3725 for(i = 0; i<MAX_LCP_USED; i++, idx = prevLcpNo(idx))
3726 {
3727 #if defined VM_TRACE || defined ERROR_INSERT
3728 printf("scanning idx: %d lcpId: %d crashed replicas: %u %s",
3729 idx, replicaPtr.p->lcpId[idx],
3730 replicaPtr.p->noCrashedReplicas,
3731 replicaPtr.p->lcpStatus[idx] == ZVALID ? "VALID" : "NOT VALID");
3732 #endif
3733 if (replicaPtr.p->lcpStatus[idx] == ZVALID)
3734 {
3735 Uint32 startGci = replicaPtr.p->maxGciCompleted[idx] + 1;
3736 Uint32 stopGci = replicaPtr.p->maxGciStarted[idx];
3737 #if defined VM_TRACE || defined ERROR_INSERT
3738 ndbout_c(" maxGciCompleted: %u maxGciStarted: %u", startGci - 1, stopGci);
3739 #endif
3740 for (; j>= 0; j--)
3741 {
3742 #if defined VM_TRACE || defined ERROR_INSERT
3743 ndbout_c("crashed replica: %d(%d) replica(createGci: %u lastGci: %d )",
3744 j,
3745 replicaPtr.p->noCrashedReplicas,
3746 replicaPtr.p->createGci[j],
3747 replicaPtr.p->replicaLastGci[j]);
3748 #endif
3749 if (replicaPtr.p->createGci[j] <= startGci &&
3750 replicaPtr.p->replicaLastGci[j] >= stopGci)
3751 {
3752 maxLcpId = replicaPtr.p->lcpId[idx];
3753 maxLcpIndex = idx;
3754 gci = replicaPtr.p->replicaLastGci[j];
3755 goto done;
3756 }
3757 }
3758 }
3759 else
3760 {
3761 #if defined VM_TRACE || defined ERROR_INSERT
3762 printf("\n");
3763 #endif
3764 }
3765 }
3766
3767 idx = 2; // backward compat code
3768 #if defined VM_TRACE || defined ERROR_INSERT
3769 ndbout_c("- scanning idx: %d lcpId: %d", idx, replicaPtr.p->lcpId[idx]);
3770 #endif
3771 if (replicaPtr.p->lcpStatus[idx] == ZVALID)
3772 {
3773 Uint32 startGci = replicaPtr.p->maxGciCompleted[idx] + 1;
3774 Uint32 stopGci = replicaPtr.p->maxGciStarted[idx];
3775 for (;j >= 0; j--)
3776 {
3777 #if defined VM_TRACE || defined ERROR_INSERT
3778 ndbout_c("crashed replica: %d(%d) replica(createGci: %u lastGci: %d )",
3779 j,
3780 replicaPtr.p->noCrashedReplicas,
3781 replicaPtr.p->createGci[j],
3782 replicaPtr.p->replicaLastGci[j]);
3783 #endif
3784 if (replicaPtr.p->createGci[j] <= startGci &&
3785 replicaPtr.p->replicaLastGci[j] >= stopGci)
3786 {
3787 maxLcpId = replicaPtr.p->lcpId[idx];
3788 maxLcpIndex = idx;
3789 gci = replicaPtr.p->replicaLastGci[j];
3790 goto done;
3791 }
3792 }
3793 }
3794
3795 done:
3796
3797 StartFragReq *req = (StartFragReq *)signal->getDataPtrSend();
3798 req->requestInfo = StartFragReq::SFR_RESTORE_LCP;
3799 if (maxLcpIndex == ~ (Uint32) 0)
3800 {
3801 /**
3802 * we didn't find a local LCP that we can restore
3803 */
3804 jam();
3805 ndbassert(gci == 0);
3806 replicaPtr.p->m_restorable_gci = gci;
3807
3808 req->userPtr = 0;
3809 req->userRef = reference();
3810 req->lcpNo = ZNIL;
3811 req->lcpId = 0;
3812 req->tableId = takeOverPtr.p->toCurrentTabref;
3813 req->fragId = takeOverPtr.p->toCurrentFragid;
3814 req->noOfLogNodes = 0;
3815
3816 if (c_2pass_inr && cstarttype == NodeState::ST_INITIAL_NODE_RESTART)
3817 {
3818 /**
3819 * Check if we can make 2-phase copy
3820 * 1) non-transaction, (after we rebuild indexes)
3821 * 2) transaction (maintaining indexes during rebuild)
3822 * where the transactional copies efterything >= startGci
3823 *
3824 * NOTE: c_2pass_inr is only set if all nodes in cluster currently
3825 * supports this
3826 */
3827
3828 if (takeOverPtr.p->startGci == 0)
3829 {
3830 jam();
3831 /**
3832 * Set a startGci to currently lastCompletedGCI of master
3833 * any value will do...as long as subsequent transactinal copy
3834 * will be using it (scanning >= this value)
3835 */
3836 takeOverPtr.p->startGci = SYSFILE->lastCompletedGCI[cmasterNodeId];
3837 }
3838
3839 TabRecordPtr tabPtr;
3840 tabPtr.i = takeOverPtr.p->toCurrentTabref;
3841 ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
3842
3843 FragmentstorePtr fragPtr;
3844 getFragstore(tabPtr.p, takeOverPtr.p->toCurrentFragid, fragPtr);
3845 Uint32 nodes[MAX_REPLICAS];
3846 extractNodeInfo(fragPtr.p, nodes);
3847
3848 req->lqhLogNode[0] = nodes[0]; // Source
3849 req->requestInfo = StartFragReq::SFR_COPY_FRAG;
3850 replicaPtr.p->m_restorable_gci = takeOverPtr.p->startGci;
3851 }
3852
3853 if (req->requestInfo == StartFragReq::SFR_RESTORE_LCP)
3854 {
3855 ndbout_c("node: %d tab: %d frag: %d no lcp to restore",
3856 takeOverPtr.p->toStartingNode,
3857 takeOverPtr.p->toCurrentTabref,
3858 takeOverPtr.p->toCurrentFragid);
3859 }
3860 else
3861 {
3862 ndbout_c("node: %d tab: %d frag: %d copying data from %u (gci: %u)",
3863 takeOverPtr.p->toStartingNode,
3864 takeOverPtr.p->toCurrentTabref,
3865 takeOverPtr.p->toCurrentFragid,
3866 req->lqhLogNode[0],
3867 takeOverPtr.p->startGci);
3868 }
3869
3870 BlockReference ref = numberToRef(DBLQH, takeOverPtr.p->toStartingNode);
3871 sendSignal(ref, GSN_START_FRAGREQ, signal,
3872 StartFragReq::SignalLength, JBB);
3873 }
3874 else
3875 {
3876 jam();
3877 if (gci != restorableGCI)
3878 {
3879 Ptr<TabRecord> tabPtr;
3880 tabPtr.i = takeOverPtr.p->toCurrentTabref;
3881 ptrAss(tabPtr, tabRecord);
3882
3883 FragmentstorePtr fragPtr;
3884 getFragstore(tabPtr.p, takeOverPtr.p->toCurrentFragid, fragPtr);
3885 dump_replica_info(fragPtr.p);
3886 }
3887 ndbassert(gci == restorableGCI);
3888 replicaPtr.p->m_restorable_gci = gci;
3889 Uint32 startGci = replicaPtr.p->maxGciCompleted[maxLcpIndex] + 1;
3890 if (startGci > gci)
3891 startGci = gci;
3892 ndbout_c("node: %d tab: %d frag: %d restore lcp: %u(idx: %u) maxGciStarted: %u maxGciCompleted: %u (restorable: %u(%u) newestRestorableGCI: %u)",
3893 takeOverPtr.p->toStartingNode,
3894 takeOverPtr.p->toCurrentTabref,
3895 takeOverPtr.p->toCurrentFragid,
3896 maxLcpId,
3897 maxLcpIndex,
3898 replicaPtr.p->maxGciStarted[maxLcpIndex],
3899 replicaPtr.p->maxGciCompleted[maxLcpIndex],
3900 restorableGCI,
3901 SYSFILE->lastCompletedGCI[takeOverPtr.p->toStartingNode],
3902 SYSFILE->newestRestorableGCI);
3903
3904 StartFragReq *req = (StartFragReq *)signal->getDataPtrSend();
3905 req->userPtr = 0;
3906 req->userRef = reference();
3907 req->lcpNo = maxLcpIndex;
3908 req->lcpId = maxLcpId;
3909 req->tableId = takeOverPtr.p->toCurrentTabref;
3910 req->fragId = takeOverPtr.p->toCurrentFragid;
3911 req->noOfLogNodes = 1;
3912 req->lqhLogNode[0] = takeOverPtr.p->toStartingNode;
3913 req->startGci[0] = startGci;
3914 req->lastGci[0] = gci;
3915
3916 BlockReference ref = numberToRef(DBLQH, takeOverPtr.p->toStartingNode);
3917 sendSignal(ref, GSN_START_FRAGREQ, signal,
3918 StartFragReq::SignalLength, JBB);
3919
3920 if (startGci < takeOverPtr.p->startGci)
3921 {
3922 jam();
3923 takeOverPtr.p->startGci = startGci;
3924 }
3925 }
3926 }
3927
3928 void
nr_run_redo(Signal * signal,TakeOverRecordPtr takeOverPtr)3929 Dbdih::nr_run_redo(Signal* signal, TakeOverRecordPtr takeOverPtr)
3930 {
3931 /**
3932 * sendSTART_RECREQ uses m_sr_nodes
3933 * and for TO during SR, we don't want to modify it
3934 * so save/restore it
3935 */
3936 NdbNodeBitmask save = m_sr_nodes;
3937 m_sr_nodes.clear();
3938 m_sr_nodes.set(takeOverPtr.p->toStartingNode);
3939
3940 Uint32 save_keepGCI = SYSFILE->keepGCI;
3941 if (takeOverPtr.p->startGci < SYSFILE->keepGCI)
3942 {
3943 jam();
3944 SYSFILE->keepGCI = takeOverPtr.p->startGci;
3945 ndbout_c("GSN_START_RECREQ keepGci: %u (%u)",
3946 takeOverPtr.p->startGci, save_keepGCI);
3947 }
3948
3949 takeOverPtr.p->toCurrentTabref = 0;
3950 takeOverPtr.p->toCurrentFragid = 0;
3951 takeOverPtr.p->toSlaveStatus = TakeOverRecord::TO_RUN_REDO;
3952 sendSTART_RECREQ(signal, takeOverPtr.p->toStartingNode, takeOverPtr.i);
3953
3954 m_sr_nodes = save; // restore
3955 SYSFILE->keepGCI = save_keepGCI;
3956 }
3957
3958 void
nr_start_logging(Signal * signal,TakeOverRecordPtr takeOverPtr)3959 Dbdih::nr_start_logging(Signal* signal, TakeOverRecordPtr takeOverPtr)
3960 {
3961 Uint32 loopCount = 0 ;
3962 TabRecordPtr tabPtr;
3963 while (loopCount++ < 100)
3964 {
3965 tabPtr.i = takeOverPtr.p->toCurrentTabref;
3966 if (tabPtr.i >= ctabFileSize)
3967 {
3968 jam();
3969 takeOverPtr.p->toSlaveStatus = TakeOverRecord::TO_END_TO;
3970 EndToReq* req = (EndToReq*)signal->getDataPtrSend();
3971 req->senderData = takeOverPtr.i;
3972 req->senderRef = reference();
3973 req->flags = takeOverPtr.p->m_flags;
3974 sendSignal(cmasterdihref, GSN_END_TOREQ,
3975 signal, EndToReq::SignalLength, JBB);
3976
3977 return;
3978 }
3979 ptrAss(tabPtr, tabRecord);
3980 if (tabPtr.p->tabStatus != TabRecord::TS_ACTIVE ||
3981 tabPtr.p->tabStorage != TabRecord::ST_NORMAL)
3982 {
3983 jam();
3984 takeOverPtr.p->toCurrentFragid = 0;
3985 takeOverPtr.p->toCurrentTabref++;
3986 continue;
3987 }
3988
3989 Uint32 fragId = takeOverPtr.p->toCurrentFragid;
3990 if (fragId >= tabPtr.p->totalfragments)
3991 {
3992 jam();
3993 takeOverPtr.p->toCurrentFragid = 0;
3994 takeOverPtr.p->toCurrentTabref++;
3995 continue;
3996 }
3997
3998 FragmentstorePtr fragPtr;
3999 getFragstore(tabPtr.p, fragId, fragPtr);
4000 ReplicaRecordPtr loopReplicaPtr;
4001 loopReplicaPtr.i = fragPtr.p->storedReplicas;
4002 while (loopReplicaPtr.i != RNIL)
4003 {
4004 ptrCheckGuard(loopReplicaPtr, creplicaFileSize, replicaRecord);
4005 if (loopReplicaPtr.p->procNode == takeOverPtr.p->toStartingNode)
4006 {
4007 jam();
4008 ndbrequire(loopReplicaPtr.p->procNode == getOwnNodeId());
4009 takeOverPtr.p->toSlaveStatus = TakeOverRecord::TO_SL_COPY_ACTIVE;
4010
4011 Uint32 instanceKey = dihGetInstanceKey(fragPtr);
4012 BlockReference lqhRef = numberToRef(DBLQH, instanceKey,
4013 takeOverPtr.p->toStartingNode);
4014
4015 CopyActiveReq * const req = (CopyActiveReq *)&signal->theData[0];
4016 req->userPtr = takeOverPtr.i;
4017 req->userRef = reference();
4018 req->tableId = takeOverPtr.p->toCurrentTabref;
4019 req->fragId = takeOverPtr.p->toCurrentFragid;
4020 req->distributionKey = fragPtr.p->distributionKey;
4021 req->flags = 0;
4022 sendSignal(lqhRef,GSN_COPY_ACTIVEREQ, signal,
4023 CopyActiveReq::SignalLength, JBB);
4024 return;
4025 }
4026 else
4027 {
4028 jam();
4029 loopReplicaPtr.i = loopReplicaPtr.p->nextReplica;
4030 }
4031 }
4032 takeOverPtr.p->toCurrentFragid++;
4033 }
4034 signal->theData[0] = DihContinueB::ZTO_START_LOGGING;
4035 signal->theData[1] = takeOverPtr.i;
4036 sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
4037 }
4038
4039 void
sendStartTo(Signal * signal,TakeOverRecordPtr takeOverPtr)4040 Dbdih::sendStartTo(Signal* signal, TakeOverRecordPtr takeOverPtr)
4041 {
4042 takeOverPtr.p->toSlaveStatus = TakeOverRecord::TO_START_TO;
4043
4044 StartToReq* req = (StartToReq*)signal->getDataPtrSend();
4045 req->senderData = takeOverPtr.i;
4046 req->senderRef = reference();
4047 req->startingNodeId = takeOverPtr.p->toStartingNode;
4048 sendSignal(cmasterdihref, GSN_START_TOREQ,
4049 signal, StartToReq::SignalLength, JBB);
4050 }
4051
4052 void
execSTART_TOREF(Signal * signal)4053 Dbdih::execSTART_TOREF(Signal* signal)
4054 {
4055 jamEntry();
4056
4057 StartToRef* ref = (StartToRef*)signal->getDataPtr();
4058 Uint32 errCode = ref->errorCode;
4059 (void)errCode; // TODO check for "valid" error
4060
4061 TakeOverRecordPtr takeOverPtr;
4062 c_takeOverPool.getPtr(takeOverPtr, ref->senderData);
4063
4064 signal->theData[0] = DihContinueB::ZSEND_START_TO;
4065 signal->theData[1] = takeOverPtr.i;
4066
4067 sendSignalWithDelay(reference(), GSN_CONTINUEB,
4068 signal, 5000, 2);
4069 }
4070
4071 void
execSTART_TOCONF(Signal * signal)4072 Dbdih::execSTART_TOCONF(Signal* signal)
4073 {
4074 jamEntry();
4075 StartToConf * conf = (StartToConf*)signal->getDataPtr();
4076
4077 TakeOverRecordPtr takeOverPtr;
4078 c_takeOverPool.getPtr(takeOverPtr, conf->senderData);
4079
4080 CRASH_INSERTION(7133);
4081
4082 /**
4083 * We are now allowed to start copying
4084 */
4085 startNextCopyFragment(signal, takeOverPtr.i);
4086 }
4087
startNextCopyFragment(Signal * signal,Uint32 takeOverPtrI)4088 void Dbdih::startNextCopyFragment(Signal* signal, Uint32 takeOverPtrI)
4089 {
4090 TabRecordPtr tabPtr;
4091 TakeOverRecordPtr takeOverPtr;
4092 c_takeOverPool.getPtr(takeOverPtr, takeOverPtrI);
4093
4094 Uint32 loopCount;
4095 loopCount = 0;
4096 if (ERROR_INSERTED(7159)) {
4097 loopCount = 100;
4098 }//if
4099 while (loopCount++ < 100) {
4100 tabPtr.i = takeOverPtr.p->toCurrentTabref;
4101 if (tabPtr.i >= ctabFileSize) {
4102 jam();
4103 CRASH_INSERTION(7136);
4104 toCopyCompletedLab(signal, takeOverPtr);
4105 return;
4106 }//if
4107 ptrAss(tabPtr, tabRecord);
4108 if (tabPtr.p->tabStatus != TabRecord::TS_ACTIVE){
4109 jam();
4110 takeOverPtr.p->toCurrentFragid = 0;
4111 takeOverPtr.p->toCurrentTabref++;
4112 continue;
4113 }//if
4114 Uint32 fragId = takeOverPtr.p->toCurrentFragid;
4115 if (fragId >= tabPtr.p->totalfragments) {
4116 jam();
4117 takeOverPtr.p->toCurrentFragid = 0;
4118 takeOverPtr.p->toCurrentTabref++;
4119 if (ERROR_INSERTED(7135)) {
4120 if (takeOverPtr.p->toCurrentTabref == 1) {
4121 ndbrequire(false);
4122 }//if
4123 }//if
4124 continue;
4125 }//if
4126 FragmentstorePtr fragPtr;
4127 getFragstore(tabPtr.p, fragId, fragPtr);
4128 ReplicaRecordPtr loopReplicaPtr;
4129 loopReplicaPtr.i = fragPtr.p->oldStoredReplicas;
4130 while (loopReplicaPtr.i != RNIL) {
4131 ptrCheckGuard(loopReplicaPtr, creplicaFileSize, replicaRecord);
4132 if (loopReplicaPtr.p->procNode == takeOverPtr.p->toFailedNode) {
4133 jam();
4134 /* ----------------------------------------------------------------- */
4135 /* WE HAVE FOUND A REPLICA THAT BELONGED THE FAILED NODE THAT NEEDS */
4136 /* TAKE OVER. WE TAKE OVER THIS REPLICA TO THE NEW NODE. */
4137 /* ----------------------------------------------------------------- */
4138 takeOverPtr.p->toCurrentReplica = loopReplicaPtr.i;
4139 toCopyFragLab(signal, takeOverPtr.i);
4140 return;
4141 } else if (loopReplicaPtr.p->procNode == takeOverPtr.p->toStartingNode) {
4142 jam();
4143 /* ----------------------------------------------------------------- */
4144 /* WE HAVE OBVIOUSLY STARTED TAKING OVER THIS WITHOUT COMPLETING IT. */
4145 /* WE */
4146 /* NEED TO COMPLETE THE TAKE OVER OF THIS REPLICA. */
4147 /* ----------------------------------------------------------------- */
4148 takeOverPtr.p->toCurrentReplica = loopReplicaPtr.i;
4149 toCopyFragLab(signal, takeOverPtr.i);
4150 return;
4151 } else {
4152 jam();
4153 loopReplicaPtr.i = loopReplicaPtr.p->nextReplica;
4154 }//if
4155 }//while
4156 takeOverPtr.p->toCurrentFragid++;
4157 }//while
4158 signal->theData[0] = DihContinueB::ZTO_START_COPY_FRAG;
4159 signal->theData[1] = takeOverPtr.i;
4160 sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
4161 }//Dbdih::startNextCopyFragment()
4162
toCopyFragLab(Signal * signal,Uint32 takeOverPtrI)4163 void Dbdih::toCopyFragLab(Signal* signal,
4164 Uint32 takeOverPtrI)
4165 {
4166 TakeOverRecordPtr takeOverPtr;
4167 c_takeOverPool.getPtr(takeOverPtr, takeOverPtrI);
4168
4169 /**
4170 * Inform starting node that TakeOver is about to start
4171 */
4172 TabRecordPtr tabPtr;
4173 tabPtr.i = takeOverPtr.p->toCurrentTabref;
4174 ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
4175
4176 FragmentstorePtr fragPtr;
4177 getFragstore(tabPtr.p, takeOverPtr.p->toCurrentFragid, fragPtr);
4178 Uint32 nodes[MAX_REPLICAS];
4179 extractNodeInfo(fragPtr.p, nodes);
4180 takeOverPtr.p->toCopyNode = nodes[0];
4181
4182 PrepareCopyFragReq* req= (PrepareCopyFragReq*)signal->getDataPtrSend();
4183 req->senderRef = reference();
4184 req->senderData = takeOverPtrI;
4185 req->tableId = takeOverPtr.p->toCurrentTabref;
4186 req->fragId = takeOverPtr.p->toCurrentFragid;
4187 req->copyNodeId = takeOverPtr.p->toCopyNode;
4188 req->startingNodeId = takeOverPtr.p->toStartingNode; // Dst
4189
4190 Uint32 instanceKey = dihGetInstanceKey(req->tableId, req->fragId);
4191 Uint32 ref = numberToRef(DBLQH, instanceKey, takeOverPtr.p->toStartingNode);
4192
4193 sendSignal(ref, GSN_PREPARE_COPY_FRAG_REQ, signal,
4194 PrepareCopyFragReq::SignalLength, JBB);
4195
4196 takeOverPtr.p->toSlaveStatus = TakeOverRecord::TO_PREPARE_COPY;
4197 }
4198
4199 void
execPREPARE_COPY_FRAG_REF(Signal * signal)4200 Dbdih::execPREPARE_COPY_FRAG_REF(Signal* signal)
4201 {
4202 jamEntry();
4203 PrepareCopyFragRef ref = *(PrepareCopyFragRef*)signal->getDataPtr();
4204
4205 TakeOverRecordPtr takeOverPtr;
4206 c_takeOverPool.getPtr(takeOverPtr, ref.senderData);
4207
4208 ndbrequire(takeOverPtr.p->toSlaveStatus == TakeOverRecord::TO_PREPARE_COPY);
4209
4210 /**
4211 * Treat this as copy frag ref
4212 */
4213 CopyFragRef * cfref = (CopyFragRef*)signal->getDataPtrSend();
4214 cfref->userPtr = ref.senderData;
4215 cfref->startingNodeId = ref.startingNodeId;
4216 cfref->errorCode = ref.errorCode;
4217 cfref->tableId = ref.tableId;
4218 cfref->fragId = ref.fragId;
4219 cfref->sendingNodeId = ref.copyNodeId;
4220 takeOverPtr.p->toSlaveStatus = TakeOverRecord::TO_COPY_FRAG;
4221 execCOPY_FRAGREF(signal);
4222 }
4223
4224 void
execPREPARE_COPY_FRAG_CONF(Signal * signal)4225 Dbdih::execPREPARE_COPY_FRAG_CONF(Signal* signal)
4226 {
4227 jamEntry();
4228 PrepareCopyFragConf conf = *(PrepareCopyFragConf*)signal->getDataPtr();
4229
4230 TakeOverRecordPtr takeOverPtr;
4231 c_takeOverPool.getPtr(takeOverPtr, conf.senderData);
4232
4233 Uint32 version = getNodeInfo(refToNode(conf.senderRef)).m_version;
4234 ndbrequire(ndb_check_prep_copy_frag_version(version) >= 2);
4235 takeOverPtr.p->maxPage = conf.maxPageNo;
4236
4237 /**
4238 * We need to lock fragment info...in order to later run CREATE_FRAG_REQ
4239 */
4240 takeOverPtr.p->toSlaveStatus = TakeOverRecord::TO_UPDATE_BEFORE_STORED;
4241 sendUpdateTo(signal, takeOverPtr);
4242 }
4243
4244 void
sendUpdateTo(Signal * signal,TakeOverRecordPtr takeOverPtr)4245 Dbdih::sendUpdateTo(Signal* signal, TakeOverRecordPtr takeOverPtr)
4246 {
4247 UpdateToReq* req = (UpdateToReq*)signal->getDataPtrSend();
4248 req->senderData = takeOverPtr.i;
4249 req->senderRef = reference();
4250 req->startingNodeId = takeOverPtr.p->toStartingNode;
4251 req->copyNodeId = takeOverPtr.p->toCopyNode;
4252 req->tableId = takeOverPtr.p->toCurrentTabref;
4253 req->fragmentNo = takeOverPtr.p->toCurrentFragid;
4254 switch(takeOverPtr.p->toSlaveStatus){
4255 case TakeOverRecord::TO_UPDATE_BEFORE_STORED:
4256 jam();
4257 req->requestType = UpdateToReq::BEFORE_STORED;
4258 break;
4259 case TakeOverRecord::TO_UPDATE_AFTER_STORED:
4260 req->requestType = UpdateToReq::AFTER_STORED;
4261 break;
4262 case TakeOverRecord::TO_UPDATE_BEFORE_COMMIT:
4263 jam();
4264 req->requestType = UpdateToReq::BEFORE_COMMIT_STORED;
4265 break;
4266 case TakeOverRecord::TO_UPDATE_AFTER_COMMIT:
4267 jam();
4268 req->requestType = UpdateToReq::AFTER_COMMIT_STORED;
4269 break;
4270 default:
4271 jamLine(takeOverPtr.p->toSlaveStatus);
4272 ndbrequire(false);
4273 }
4274 sendSignal(cmasterdihref, GSN_UPDATE_TOREQ,
4275 signal, UpdateToReq::SignalLength, JBB);
4276 }
4277
4278 void
execUPDATE_TOREF(Signal * signal)4279 Dbdih::execUPDATE_TOREF(Signal* signal)
4280 {
4281 jamEntry();
4282 UpdateToRef* ref = (UpdateToRef*)signal->getDataPtr();
4283 Uint32 errCode = ref->errorCode;
4284 (void)errCode; // TODO check for "valid" error
4285
4286 TakeOverRecordPtr takeOverPtr;
4287 c_takeOverPool.getPtr(takeOverPtr, ref->senderData);
4288
4289 signal->theData[0] = DihContinueB::ZSEND_UPDATE_TO;
4290 signal->theData[1] = takeOverPtr.i;
4291
4292 sendSignalWithDelay(reference(), GSN_CONTINUEB,
4293 signal, 5000, 2);
4294 }
4295
4296 void
execUPDATE_TOCONF(Signal * signal)4297 Dbdih::execUPDATE_TOCONF(Signal* signal)
4298 {
4299 jamEntry();
4300
4301 UpdateToConf* conf = (UpdateToConf*)signal->getDataPtr();
4302
4303 TakeOverRecordPtr takeOverPtr;
4304 c_takeOverPool.getPtr(takeOverPtr, conf->senderData);
4305
4306 switch(takeOverPtr.p->toSlaveStatus){
4307 case TakeOverRecord::TO_UPDATE_BEFORE_STORED:
4308 jam();
4309
4310 CRASH_INSERTION(7154);
4311
4312 takeOverPtr.p->toSlaveStatus = TakeOverRecord::TO_CREATE_FRAG_STORED;
4313 sendCreateFragReq(signal, ZINIT_CREATE_GCI, CreateFragReq::STORED, takeOverPtr.i);
4314 return;
4315 case TakeOverRecord::TO_UPDATE_AFTER_STORED:
4316 jam();
4317
4318 CRASH_INSERTION(7195);
4319
4320 takeOverPtr.p->toSlaveStatus = TakeOverRecord::TO_COPY_FRAG;
4321 toStartCopyFrag(signal, takeOverPtr);
4322 return;
4323 case TakeOverRecord::TO_UPDATE_BEFORE_COMMIT:
4324 jam();
4325
4326 CRASH_INSERTION(7196);
4327
4328 takeOverPtr.p->toSlaveStatus = TakeOverRecord::TO_CREATE_FRAG_COMMIT;
4329 sendCreateFragReq(signal, takeOverPtr.p->startGci,
4330 CreateFragReq::COMMIT_STORED, takeOverPtr.i);
4331 return;
4332 case TakeOverRecord::TO_UPDATE_AFTER_COMMIT:
4333 jam();
4334
4335 CRASH_INSERTION(7197);
4336
4337 takeOverPtr.p->toSlaveStatus = TakeOverRecord::TO_SELECTING_NEXT;
4338 startNextCopyFragment(signal, takeOverPtr.i);
4339 return;
4340 default:
4341 ndbrequire(false);
4342 }
4343 }
4344
4345 void
toStartCopyFrag(Signal * signal,TakeOverRecordPtr takeOverPtr)4346 Dbdih::toStartCopyFrag(Signal* signal, TakeOverRecordPtr takeOverPtr)
4347 {
4348 TabRecordPtr tabPtr;
4349 tabPtr.i = takeOverPtr.p->toCurrentTabref;
4350 ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
4351
4352 Uint32 fragId = takeOverPtr.p->toCurrentFragid;
4353
4354 FragmentstorePtr fragPtr;
4355 getFragstore(tabPtr.p, fragId, fragPtr);
4356
4357 ReplicaRecordPtr replicaPtr;
4358 findReplica(replicaPtr, fragPtr.p, getOwnNodeId(), true);
4359
4360 Uint32 gci = replicaPtr.p->m_restorable_gci;
4361 replicaPtr.p->m_restorable_gci = 0; // used in union...
4362
4363 Uint32 instanceKey = dihGetInstanceKey(tabPtr.i, fragId);
4364 BlockReference ref = numberToRef(DBLQH, instanceKey,
4365 takeOverPtr.p->toCopyNode);
4366 CopyFragReq * const copyFragReq = (CopyFragReq *)&signal->theData[0];
4367 copyFragReq->userPtr = takeOverPtr.i;
4368 copyFragReq->userRef = reference();
4369 copyFragReq->tableId = tabPtr.i;
4370 copyFragReq->fragId = fragId;
4371 copyFragReq->nodeId = takeOverPtr.p->toStartingNode;
4372 copyFragReq->schemaVersion = tabPtr.p->schemaVersion;
4373 copyFragReq->distributionKey = fragPtr.p->distributionKey;
4374 copyFragReq->gci = gci;
4375 Uint32 len = copyFragReq->nodeCount =
4376 extractNodeInfo(fragPtr.p,
4377 copyFragReq->nodeList);
4378 copyFragReq->nodeList[len] = takeOverPtr.p->maxPage;
4379 copyFragReq->nodeList[len+1] = CopyFragReq::CFR_TRANSACTIONAL;
4380 sendSignal(ref, GSN_COPY_FRAGREQ, signal,
4381 CopyFragReq::SignalLength + len, JBB);
4382 }//Dbdih::toStartCopy()
4383
sendCreateFragReq(Signal * signal,Uint32 startGci,Uint32 replicaType,Uint32 takeOverPtrI)4384 void Dbdih::sendCreateFragReq(Signal* signal,
4385 Uint32 startGci,
4386 Uint32 replicaType,
4387 Uint32 takeOverPtrI)
4388 {
4389 Ptr<TakeOverRecord> takeOverPtr;
4390 c_takeOverPool.getPtr(takeOverPtr, takeOverPtrI);
4391
4392 sendLoopMacro(CREATE_FRAGREQ, nullRoutine, RNIL);
4393
4394 CreateFragReq * const req = (CreateFragReq *)&signal->theData[0];
4395 req->senderData = takeOverPtr.i;
4396 req->senderRef = reference();
4397 req->tableId = takeOverPtr.p->toCurrentTabref;
4398 req->fragId = takeOverPtr.p->toCurrentFragid;
4399 req->startingNodeId = takeOverPtr.p->toStartingNode;
4400 req->copyNodeId = takeOverPtr.p->toCopyNode;
4401 req->failedNodeId = takeOverPtr.p->toFailedNode;
4402 req->startGci = startGci;
4403 req->replicaType = replicaType;
4404
4405 NodeRecordPtr nodePtr;
4406 nodePtr.i = cfirstAliveNode;
4407 do {
4408 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
4409 BlockReference ref = calcDihBlockRef(nodePtr.i);
4410 sendSignal(ref, GSN_CREATE_FRAGREQ, signal,
4411 CreateFragReq::SignalLength, JBB);
4412 nodePtr.i = nodePtr.p->nextNode;
4413 } while (nodePtr.i != RNIL);
4414 }//Dbdih::sendCreateFragReq()
4415
execCREATE_FRAGCONF(Signal * signal)4416 void Dbdih::execCREATE_FRAGCONF(Signal* signal)
4417 {
4418 jamEntry();
4419 CRASH_INSERTION(7148);
4420 CreateFragConf * conf = (CreateFragConf *)&signal->theData[0];
4421
4422 TakeOverRecordPtr takeOverPtr;
4423 c_takeOverPool.getPtr(takeOverPtr, conf->senderData);
4424
4425 receiveLoopMacro(CREATE_FRAGREQ, conf->sendingNodeId);
4426
4427 switch(takeOverPtr.p->toSlaveStatus){
4428 case TakeOverRecord::TO_CREATE_FRAG_STORED:
4429 jam();
4430 CRASH_INSERTION(7198);
4431 takeOverPtr.p->toSlaveStatus = TakeOverRecord::TO_UPDATE_AFTER_STORED;
4432 break;
4433 case TakeOverRecord::TO_CREATE_FRAG_COMMIT:
4434 jam();
4435 CRASH_INSERTION(7199);
4436 takeOverPtr.p->toSlaveStatus = TakeOverRecord::TO_UPDATE_AFTER_COMMIT;
4437 break;
4438 case TakeOverRecord::TO_SL_CREATE_FRAG:
4439 jam();
4440 //CRASH_INSERTION(
4441 takeOverPtr.p->toSlaveStatus = TakeOverRecord::TO_START_LOGGING;
4442 takeOverPtr.p->toCurrentFragid++;
4443 signal->theData[0] = DihContinueB::ZTO_START_LOGGING;
4444 signal->theData[1] = takeOverPtr.i;
4445 sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
4446 return;
4447 default:
4448 jamLine(takeOverPtr.p->toSlaveStatus);
4449 ndbrequire(false);
4450 }
4451
4452 sendUpdateTo(signal, takeOverPtr);
4453 }//Dbdih::execCREATE_FRAGCONF()
4454
execCOPY_FRAGREF(Signal * signal)4455 void Dbdih::execCOPY_FRAGREF(Signal* signal)
4456 {
4457 const CopyFragRef * const ref = (CopyFragRef *)&signal->theData[0];
4458 jamEntry();
4459 Uint32 takeOverPtrI = ref->userPtr;
4460 Uint32 startingNodeId = ref->startingNodeId;
4461 Uint32 errorCode = ref->errorCode;
4462
4463 TakeOverRecordPtr takeOverPtr;
4464 c_takeOverPool.getPtr(takeOverPtr, takeOverPtrI);
4465 ndbrequire(ref->tableId == takeOverPtr.p->toCurrentTabref);
4466 ndbrequire(ref->fragId == takeOverPtr.p->toCurrentFragid);
4467 ndbrequire(ref->startingNodeId == takeOverPtr.p->toStartingNode);
4468 ndbrequire(ref->sendingNodeId == takeOverPtr.p->toCopyNode);
4469 ndbrequire(takeOverPtr.p->toSlaveStatus == TakeOverRecord::TO_COPY_FRAG);
4470
4471 //--------------------------------------------------------------------------
4472 // For some reason we did not succeed in copying a fragment. We treat this
4473 // as a serious failure and crash the starting node.
4474 //--------------------------------------------------------------------------
4475 BlockReference cntrRef = calcNdbCntrBlockRef(startingNodeId);
4476 SystemError * const sysErr = (SystemError*)&signal->theData[0];
4477 sysErr->errorCode = SystemError::CopyFragRefError;
4478 sysErr->errorRef = reference();
4479 sysErr->data[0] = errorCode;
4480 sysErr->data[1] = 0;
4481 sendSignal(cntrRef, GSN_SYSTEM_ERROR, signal,
4482 SystemError::SignalLength, JBB);
4483 return;
4484 }//Dbdih::execCOPY_FRAGREF()
4485
execCOPY_FRAGCONF(Signal * signal)4486 void Dbdih::execCOPY_FRAGCONF(Signal* signal)
4487 {
4488 const CopyFragConf * const conf = (CopyFragConf *)&signal->theData[0];
4489 jamEntry();
4490 CRASH_INSERTION(7142);
4491
4492 TakeOverRecordPtr takeOverPtr;
4493 c_takeOverPool.getPtr(takeOverPtr, conf->userPtr);
4494
4495 Uint32 rows_lo = conf->rows_lo;
4496 Uint32 bytes_lo = conf->bytes_lo;
4497
4498 ndbrequire(conf->tableId == takeOverPtr.p->toCurrentTabref);
4499 ndbrequire(conf->fragId == takeOverPtr.p->toCurrentFragid);
4500 ndbrequire(conf->startingNodeId == takeOverPtr.p->toStartingNode);
4501 ndbrequire(conf->sendingNodeId == takeOverPtr.p->toCopyNode);
4502 ndbrequire(takeOverPtr.p->toSlaveStatus == TakeOverRecord::TO_COPY_FRAG);
4503
4504 TabRecordPtr tabPtr;
4505 tabPtr.i = takeOverPtr.p->toCurrentTabref;
4506 ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
4507
4508 FragmentstorePtr fragPtr;
4509 getFragstore(tabPtr.p, takeOverPtr.p->toCurrentFragid, fragPtr);
4510 Uint32 instanceKey = dihGetInstanceKey(fragPtr);
4511 BlockReference lqhRef = numberToRef(DBLQH, instanceKey,
4512 takeOverPtr.p->toStartingNode);
4513 CopyActiveReq * const req = (CopyActiveReq *)&signal->theData[0];
4514 req->userPtr = takeOverPtr.i;
4515 req->userRef = reference();
4516 req->tableId = takeOverPtr.p->toCurrentTabref;
4517 req->fragId = takeOverPtr.p->toCurrentFragid;
4518 req->distributionKey = fragPtr.p->distributionKey;
4519 req->flags = 0;
4520
4521 Uint32 min_version = getNodeVersionInfo().m_type[NodeInfo::DB].m_min_version;
4522 if (ndb_delayed_copy_active_req(min_version))
4523 {
4524 jam();
4525 /**
4526 * Bug48474 - Don't start logging an fragment
4527 * until all fragments has been copied
4528 * Else it's easy to run out of REDO
4529 */
4530 req->flags |= CopyActiveReq::CAR_NO_WAIT | CopyActiveReq::CAR_NO_LOGGING;
4531 }
4532
4533 sendSignal(lqhRef, GSN_COPY_ACTIVEREQ, signal,
4534 CopyActiveReq::SignalLength, JBB);
4535
4536 takeOverPtr.p->toSlaveStatus = TakeOverRecord::TO_COPY_ACTIVE;
4537
4538 signal->theData[0] = NDB_LE_NR_CopyFragDone;
4539 signal->theData[1] = getOwnNodeId();
4540 signal->theData[2] = takeOverPtr.p->toCurrentTabref;
4541 signal->theData[3] = takeOverPtr.p->toCurrentFragid;
4542 signal->theData[4] = rows_lo;
4543 signal->theData[5] = 0;
4544 signal->theData[6] = bytes_lo;
4545 signal->theData[7] = 0;
4546 sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 8, JBB);
4547 }//Dbdih::execCOPY_FRAGCONF()
4548
execCOPY_ACTIVECONF(Signal * signal)4549 void Dbdih::execCOPY_ACTIVECONF(Signal* signal)
4550 {
4551 const CopyActiveConf * const conf = (CopyActiveConf *)&signal->theData[0];
4552 jamEntry();
4553 CRASH_INSERTION(7143);
4554
4555 TakeOverRecordPtr takeOverPtr;
4556 c_takeOverPool.getPtr(takeOverPtr, conf->userPtr);
4557
4558 ndbrequire(conf->tableId == takeOverPtr.p->toCurrentTabref);
4559 ndbrequire(conf->fragId == takeOverPtr.p->toCurrentFragid);
4560 ndbrequire(checkNodeAlive(conf->startingNodeId));
4561
4562 takeOverPtr.p->startGci = conf->startGci;
4563
4564 if (takeOverPtr.p->toSlaveStatus == TakeOverRecord::TO_COPY_ACTIVE)
4565 {
4566 jam();
4567 ndbrequire(takeOverPtr.p->toSlaveStatus == TakeOverRecord::TO_COPY_ACTIVE);
4568 takeOverPtr.p->toSlaveStatus = TakeOverRecord::TO_UPDATE_BEFORE_COMMIT;
4569 sendUpdateTo(signal, takeOverPtr);
4570 }
4571 else
4572 {
4573 jam();
4574 ndbrequire(takeOverPtr.p->toSlaveStatus==TakeOverRecord::TO_SL_COPY_ACTIVE);
4575 takeOverPtr.p->toSlaveStatus = TakeOverRecord::TO_SL_CREATE_FRAG;
4576 sendCreateFragReq(signal, takeOverPtr.p->startGci,
4577 CreateFragReq::START_LOGGING, takeOverPtr.i);
4578 }
4579 }//Dbdih::execCOPY_ACTIVECONF()
4580
toCopyCompletedLab(Signal * signal,TakeOverRecordPtr takeOverPtr)4581 void Dbdih::toCopyCompletedLab(Signal * signal, TakeOverRecordPtr takeOverPtr)
4582 {
4583 signal->theData[0] = NDB_LE_NR_CopyFragsCompleted;
4584 signal->theData[1] = takeOverPtr.p->toStartingNode;
4585 sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB);
4586
4587 Uint32 min_version = getNodeVersionInfo().m_type[NodeInfo::DB].m_min_version;
4588 if (ndb_delayed_copy_active_req(min_version))
4589 {
4590 jam();
4591 takeOverPtr.p->toSlaveStatus = TakeOverRecord::TO_START_LOGGING;
4592 takeOverPtr.p->toCurrentTabref = 0;
4593 takeOverPtr.p->toCurrentFragid = 0;
4594 takeOverPtr.p->toCurrentReplica = RNIL;
4595 nr_start_logging(signal, takeOverPtr);
4596 return;
4597 }
4598 else
4599 {
4600 jam();
4601
4602 takeOverPtr.p->toSlaveStatus = TakeOverRecord::TO_END_TO;
4603
4604 EndToReq* req = (EndToReq*)signal->getDataPtrSend();
4605 req->senderData = takeOverPtr.i;
4606 req->senderRef = reference();
4607 req->flags = takeOverPtr.p->m_flags;
4608 sendSignal(cmasterdihref, GSN_END_TOREQ,
4609 signal, EndToReq::SignalLength, JBB);
4610 return;
4611 }
4612 }//Dbdih::toCopyCompletedLab()
4613
4614 void
execEND_TOREF(Signal * signal)4615 Dbdih::execEND_TOREF(Signal* signal)
4616 {
4617 jamEntry();
4618 EndToRef* ref = (EndToRef*)signal->getDataPtr();
4619
4620 TakeOverRecordPtr takeOverPtr;
4621 c_takeOverPool.getPtr(takeOverPtr, ref->senderData);
4622
4623 ndbrequire(false);
4624 }
4625
4626 void
execEND_TOCONF(Signal * signal)4627 Dbdih::execEND_TOCONF(Signal* signal)
4628 {
4629 jamEntry();
4630 EndToConf* conf = (EndToConf*)signal->getDataPtr();
4631
4632 CRASH_INSERTION(7144);
4633
4634 TakeOverRecordPtr takeOverPtr;
4635 c_takeOverPool.getPtr(takeOverPtr, conf->senderData);
4636
4637 Uint32 senderData = takeOverPtr.p->m_senderData;
4638 Uint32 senderRef = takeOverPtr.p->m_senderRef;
4639 Uint32 nodeId = takeOverPtr.p->toStartingNode;
4640
4641 releaseTakeOver(takeOverPtr);
4642
4643 StartCopyConf* ret = (StartCopyConf*)signal->getDataPtrSend();
4644 ret->startingNodeId = nodeId;
4645 ret->senderData = senderData;
4646 ret->senderRef = reference();
4647 sendSignal(senderRef, GSN_START_COPYCONF, signal,
4648 StartCopyConf::SignalLength, JBB);
4649 }
4650
releaseTakeOver(TakeOverRecordPtr takeOverPtr)4651 void Dbdih::releaseTakeOver(TakeOverRecordPtr takeOverPtr)
4652 {
4653 takeOverPtr.p->toCopyNode = RNIL;
4654 takeOverPtr.p->toCurrentFragid = RNIL;
4655 takeOverPtr.p->toCurrentReplica = RNIL;
4656 takeOverPtr.p->toCurrentTabref = RNIL;
4657 takeOverPtr.p->toFailedNode = RNIL;
4658 takeOverPtr.p->toStartingNode = RNIL;
4659 takeOverPtr.p->toStartTime = 0;
4660 takeOverPtr.p->toSlaveStatus = TakeOverRecord::TO_SLAVE_IDLE;
4661 takeOverPtr.p->toMasterStatus = TakeOverRecord::TO_MASTER_IDLE;
4662
4663 c_activeTakeOverList.release(takeOverPtr);
4664 }//Dbdih::releaseTakeOver()
4665
4666
4667 /*****************************************************************************/
4668 /* ------------------------------------------------------------------------- */
4669 /* WE HAVE BEEN REQUESTED TO PERFORM A SYSTEM RESTART. WE START BY */
4670 /* READING THE GCI FILES. THIS REQUEST WILL ONLY BE SENT TO THE MASTER */
4671 /* DIH. THAT MEANS WE HAVE TO REPLICATE THE INFORMATION WE READ FROM */
4672 /* OUR FILES TO ENSURE THAT ALL NODES HAVE THE SAME DISTRIBUTION */
4673 /* INFORMATION. */
4674 /* ------------------------------------------------------------------------- */
4675 /*****************************************************************************/
readGciFileLab(Signal * signal)4676 void Dbdih::readGciFileLab(Signal* signal)
4677 {
4678 FileRecordPtr filePtr;
4679 filePtr.i = crestartInfoFile[0];
4680 ptrCheckGuard(filePtr, cfileFileSize, fileRecord);
4681 filePtr.p->reqStatus = FileRecord::OPENING_GCP;
4682
4683 openFileRo(signal, filePtr);
4684 }//Dbdih::readGciFileLab()
4685
openingGcpLab(Signal * signal,FileRecordPtr filePtr)4686 void Dbdih::openingGcpLab(Signal* signal, FileRecordPtr filePtr)
4687 {
4688 /* ----------------------------------------------------------------------- */
4689 /* WE HAVE SUCCESSFULLY OPENED A FILE CONTAINING INFORMATION ABOUT */
4690 /* THE GLOBAL CHECKPOINTS THAT ARE POSSIBLE TO RESTART. */
4691 /* ----------------------------------------------------------------------- */
4692 readRestorableGci(signal, filePtr);
4693 filePtr.p->reqStatus = FileRecord::READING_GCP;
4694 }//Dbdih::openingGcpLab()
4695
readingGcpLab(Signal * signal,FileRecordPtr filePtr)4696 void Dbdih::readingGcpLab(Signal* signal, FileRecordPtr filePtr)
4697 {
4698 /* ----------------------------------------------------------------------- */
4699 /* WE HAVE NOW SUCCESSFULLY MANAGED TO READ IN THE GLOBAL CHECKPOINT */
4700 /* INFORMATION FROM FILE. LATER WE WILL ADD SOME FUNCTIONALITY THAT */
4701 /* CHECKS THE RESTART TIMERS TO DEDUCE FROM WHERE TO RESTART. */
4702 /* NOW WE WILL SIMPLY RESTART FROM THE NEWEST GLOBAL CHECKPOINT */
4703 /* POSSIBLE TO RESTORE. */
4704 /* */
4705 /* BEFORE WE INVOKE DICT WE NEED TO COPY CRESTART_INFO TO ALL NODES. */
4706 /* WE ALSO COPY TO OUR OWN NODE. TO ENABLE US TO DO THIS PROPERLY WE */
4707 /* START BY CLOSING THIS FILE. */
4708 /* ----------------------------------------------------------------------- */
4709 globalData.m_restart_seq = ++SYSFILE->m_restart_seq;
4710 closeFile(signal, filePtr);
4711 filePtr.p->reqStatus = FileRecord::CLOSING_GCP;
4712 }//Dbdih::readingGcpLab()
4713
closingGcpLab(Signal * signal,FileRecordPtr filePtr)4714 void Dbdih::closingGcpLab(Signal* signal, FileRecordPtr filePtr)
4715 {
4716 if (Sysfile::getInitialStartOngoing(SYSFILE->systemRestartBits) == false){
4717 jam();
4718 selectMasterCandidateAndSend(signal);
4719 return;
4720 } else {
4721 jam();
4722 sendDihRestartRef(signal);
4723 return;
4724 }//if
4725 }//Dbdih::closingGcpLab()
4726
4727 void
sendDihRestartRef(Signal * signal)4728 Dbdih::sendDihRestartRef(Signal* signal)
4729 {
4730 jam();
4731
4732 /**
4733 * We couldn't read P0.Sysfile...
4734 * so compute no_nodegroup_mask from configuration
4735 */
4736 NdbNodeBitmask no_nodegroup_mask;
4737
4738 ndb_mgm_configuration_iterator * iter =
4739 m_ctx.m_config.getClusterConfigIterator();
4740 for(ndb_mgm_first(iter); ndb_mgm_valid(iter); ndb_mgm_next(iter))
4741 {
4742 jam();
4743 Uint32 nodeId;
4744 Uint32 nodeType;
4745
4746 ndbrequire(!ndb_mgm_get_int_parameter(iter,CFG_NODE_ID, &nodeId));
4747 ndbrequire(!ndb_mgm_get_int_parameter(iter,CFG_TYPE_OF_SECTION,
4748 &nodeType));
4749
4750 if (nodeType == NodeInfo::DB)
4751 {
4752 jam();
4753 Uint32 ng;
4754 if (ndb_mgm_get_int_parameter(iter, CFG_DB_NODEGROUP, &ng) == 0)
4755 {
4756 jam();
4757 if (ng == NDB_NO_NODEGROUP)
4758 {
4759 no_nodegroup_mask.set(nodeId);
4760 }
4761 }
4762 }
4763 }
4764 DihRestartRef * ref = CAST_PTR(DihRestartRef, signal->getDataPtrSend());
4765 no_nodegroup_mask.copyto(NdbNodeBitmask::Size, ref->no_nodegroup_mask);
4766 sendSignal(cntrlblockref, GSN_DIH_RESTARTREF, signal,
4767 DihRestartRef::SignalLength, JBB);
4768 }
4769
4770 /* ------------------------------------------------------------------------- */
4771 /* SELECT THE MASTER CANDIDATE TO BE USED IN SYSTEM RESTARTS. */
4772 /* ------------------------------------------------------------------------- */
selectMasterCandidateAndSend(Signal * signal)4773 void Dbdih::selectMasterCandidateAndSend(Signal* signal)
4774 {
4775 setNodeGroups();
4776
4777 NodeRecordPtr nodePtr;
4778 Uint32 node_groups[MAX_NDB_NODES];
4779 memset(node_groups, 0, sizeof(node_groups));
4780 NdbNodeBitmask no_nodegroup_mask;
4781 for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
4782 jam();
4783 if (Sysfile::getNodeStatus(nodePtr.i, SYSFILE->nodeStatus) == Sysfile::NS_NotDefined)
4784 {
4785 jam();
4786 continue;
4787 }
4788 const Uint32 ng = Sysfile::getNodeGroup(nodePtr.i, SYSFILE->nodeGroups);
4789 if(ng != NO_NODE_GROUP_ID)
4790 {
4791 ndbrequire(ng < MAX_NDB_NODES);
4792 node_groups[ng]++;
4793 }
4794 else
4795 {
4796 no_nodegroup_mask.set(nodePtr.i);
4797 }
4798 }
4799
4800 DihRestartConf * conf = CAST_PTR(DihRestartConf, signal->getDataPtrSend());
4801 conf->unused = getOwnNodeId();
4802 conf->latest_gci = SYSFILE->lastCompletedGCI[getOwnNodeId()];
4803 no_nodegroup_mask.copyto(NdbNodeBitmask::Size, conf->no_nodegroup_mask);
4804 sendSignal(cntrlblockref, GSN_DIH_RESTARTCONF, signal,
4805 DihRestartConf::SignalLength, JBB);
4806
4807 for (nodePtr.i = 0; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
4808 jam();
4809 Uint32 count = node_groups[nodePtr.i];
4810 if(count != 0 && count != cnoReplicas){
4811 char buf[255];
4812 BaseString::snprintf(buf, sizeof(buf),
4813 "Illegal configuration change."
4814 " Initial start needs to be performed "
4815 " when changing no of replicas (%d != %d)",
4816 node_groups[nodePtr.i], cnoReplicas);
4817 progError(__LINE__, NDBD_EXIT_INVALID_CONFIG, buf);
4818 }
4819 }
4820 }//Dbdih::selectMasterCandidate()
4821
4822 /* ------------------------------------------------------------------------- */
4823 /* ERROR HANDLING DURING READING RESTORABLE GCI FROM FILE. */
4824 /* ------------------------------------------------------------------------- */
openingGcpErrorLab(Signal * signal,FileRecordPtr filePtr)4825 void Dbdih::openingGcpErrorLab(Signal* signal, FileRecordPtr filePtr)
4826 {
4827 filePtr.p->fileStatus = FileRecord::CRASHED;
4828 filePtr.p->reqStatus = FileRecord::IDLE;
4829 if (crestartInfoFile[0] == filePtr.i) {
4830 jam();
4831 /* --------------------------------------------------------------------- */
4832 /* THE FIRST FILE WAS NOT ABLE TO BE OPENED. SET STATUS TO CRASHED AND */
4833 /* TRY OPEN THE NEXT FILE. */
4834 /* --------------------------------------------------------------------- */
4835 filePtr.i = crestartInfoFile[1];
4836 ptrCheckGuard(filePtr, cfileFileSize, fileRecord);
4837 openFileRo(signal, filePtr);
4838 filePtr.p->reqStatus = FileRecord::OPENING_GCP;
4839 } else {
4840 jam();
4841 /* --------------------------------------------------------------------- */
4842 /* WE FAILED IN OPENING THE SECOND FILE. BOTH FILES WERE CORRUPTED. WE */
4843 /* CANNOT CONTINUE THE RESTART IN THIS CASE. TELL NDBCNTR OF OUR */
4844 /* FAILURE. */
4845 /*---------------------------------------------------------------------- */
4846 sendDihRestartRef(signal);
4847 return;
4848 }//if
4849 }//Dbdih::openingGcpErrorLab()
4850
readingGcpErrorLab(Signal * signal,FileRecordPtr filePtr)4851 void Dbdih::readingGcpErrorLab(Signal* signal, FileRecordPtr filePtr)
4852 {
4853 filePtr.p->fileStatus = FileRecord::CRASHED;
4854 /* ----------------------------------------------------------------------- */
4855 /* WE FAILED IN READING THE FILE AS WELL. WE WILL CLOSE THIS FILE. */
4856 /* ----------------------------------------------------------------------- */
4857 closeFile(signal, filePtr);
4858 filePtr.p->reqStatus = FileRecord::CLOSING_GCP_CRASH;
4859 }//Dbdih::readingGcpErrorLab()
4860
closingGcpCrashLab(Signal * signal,FileRecordPtr filePtr)4861 void Dbdih::closingGcpCrashLab(Signal* signal, FileRecordPtr filePtr)
4862 {
4863 if (crestartInfoFile[0] == filePtr.i) {
4864 jam();
4865 /* --------------------------------------------------------------------- */
4866 /* ERROR IN FIRST FILE, TRY THE SECOND FILE. */
4867 /* --------------------------------------------------------------------- */
4868 filePtr.i = crestartInfoFile[1];
4869 ptrCheckGuard(filePtr, cfileFileSize, fileRecord);
4870 openFileRw(signal, filePtr);
4871 filePtr.p->reqStatus = FileRecord::OPENING_GCP;
4872 return;
4873 }//if
4874 /* ----------------------------------------------------------------------- */
4875 /* WE DISCOVERED A FAILURE WITH THE SECOND FILE AS WELL. THIS IS A */
4876 /* SERIOUS PROBLEM. REPORT FAILURE TO NDBCNTR. */
4877 /* ----------------------------------------------------------------------- */
4878 sendDihRestartRef(signal);
4879 }//Dbdih::closingGcpCrashLab()
4880
4881 /*****************************************************************************/
4882 /* ------------------------------------------------------------------------- */
4883 /* THIS IS AN INITIAL RESTART. WE WILL CREATE THE TWO FILES DESCRIBING */
4884 /* THE GLOBAL CHECKPOINTS THAT ARE RESTORABLE. */
4885 /* ------------------------------------------------------------------------- */
4886 /*****************************************************************************/
initGciFilesLab(Signal * signal)4887 void Dbdih::initGciFilesLab(Signal* signal)
4888 {
4889 FileRecordPtr filePtr;
4890 filePtr.i = crestartInfoFile[0];
4891 ptrCheckGuard(filePtr, cfileFileSize, fileRecord);
4892 createFileRw(signal, filePtr);
4893 filePtr.p->reqStatus = FileRecord::CREATING_GCP;
4894 }//Dbdih::initGciFilesLab()
4895
4896 /* ------------------------------------------------------------------------- */
4897 /* GLOBAL CHECKPOINT FILE HAVE BEEN SUCCESSFULLY CREATED. */
4898 /* ------------------------------------------------------------------------- */
creatingGcpLab(Signal * signal,FileRecordPtr filePtr)4899 void Dbdih::creatingGcpLab(Signal* signal, FileRecordPtr filePtr)
4900 {
4901 if (filePtr.i == crestartInfoFile[0]) {
4902 jam();
4903 /* --------------------------------------------------------------------- */
4904 /* IF CREATED FIRST THEN ALSO CREATE THE SECOND FILE. */
4905 /* --------------------------------------------------------------------- */
4906 filePtr.i = crestartInfoFile[1];
4907 ptrCheckGuard(filePtr, cfileFileSize, fileRecord);
4908 createFileRw(signal, filePtr);
4909 filePtr.p->reqStatus = FileRecord::CREATING_GCP;
4910 } else {
4911 jam();
4912 /* --------------------------------------------------------------------- */
4913 /* BOTH FILES HAVE BEEN CREATED. NOW WRITE THE INITIAL DATA TO BOTH */
4914 /* OF THE FILES. */
4915 /* --------------------------------------------------------------------- */
4916 filePtr.i = crestartInfoFile[0];
4917 ptrCheckGuard(filePtr, cfileFileSize, fileRecord);
4918 writeRestorableGci(signal, filePtr);
4919 filePtr.p->reqStatus = FileRecord::WRITE_INIT_GCP;
4920 }//if
4921 }//Dbdih::creatingGcpLab()
4922
4923 /* ------------------------------------------------------------------------- */
4924 /* WE HAVE SUCCESSFULLY WRITTEN A GCI FILE. */
4925 /* ------------------------------------------------------------------------- */
writeInitGcpLab(Signal * signal,FileRecordPtr filePtr)4926 void Dbdih::writeInitGcpLab(Signal* signal, FileRecordPtr filePtr)
4927 {
4928 filePtr.p->reqStatus = FileRecord::IDLE;
4929 if (filePtr.i == crestartInfoFile[0]) {
4930 jam();
4931 /* --------------------------------------------------------------------- */
4932 /* WE HAVE WRITTEN THE FIRST FILE NOW ALSO WRITE THE SECOND FILE. */
4933 /* --------------------------------------------------------------------- */
4934 filePtr.i = crestartInfoFile[1];
4935 ptrCheckGuard(filePtr, cfileFileSize, fileRecord);
4936 writeRestorableGci(signal, filePtr);
4937 filePtr.p->reqStatus = FileRecord::WRITE_INIT_GCP;
4938 } else {
4939 /* --------------------------------------------------------------------- */
4940 /* WE HAVE WRITTEN BOTH FILES. LEAVE BOTH FILES OPEN AND CONFIRM OUR */
4941 /* PART OF THE INITIAL START. */
4942 /* --------------------------------------------------------------------- */
4943 if (isMaster()) {
4944 jam();
4945 /*---------------------------------------------------------------------*/
4946 // IN MASTER NODES THE START REQUEST IS RECEIVED FROM NDBCNTR AND WE MUST
4947 // RESPOND WHEN COMPLETED.
4948 /*---------------------------------------------------------------------*/
4949 signal->theData[0] = reference();
4950 sendSignal(cndbStartReqBlockref, GSN_NDB_STARTCONF, signal, 1, JBB);
4951 } else {
4952 jam();
4953 ndbsttorry10Lab(signal, __LINE__);
4954 return;
4955 }//if
4956 }//if
4957 }//Dbdih::writeInitGcpLab()
4958
4959 /*****************************************************************************/
4960 /* ********** NODES DELETION MODULE *************/
4961 /*****************************************************************************/
4962 /*---------------------------------------------------------------------------*/
4963 /* LOGIC FOR NODE FAILURE */
4964 /*---------------------------------------------------------------------------*/
execNODE_FAILREP(Signal * signal)4965 void Dbdih::execNODE_FAILREP(Signal* signal)
4966 {
4967 Uint32 i;
4968 Uint32 failedNodes[MAX_NDB_NODES];
4969 jamEntry();
4970 NodeFailRep * const nodeFail = (NodeFailRep *)&signal->theData[0];
4971
4972 cfailurenr = nodeFail->failNo;
4973 Uint32 newMasterId = nodeFail->masterNodeId;
4974 const Uint32 noOfFailedNodes = nodeFail->noOfNodes;
4975
4976 if (ERROR_INSERTED(7179) || ERROR_INSERTED(7217))
4977 {
4978 CLEAR_ERROR_INSERT_VALUE;
4979 }
4980
4981 if (ERROR_INSERTED(7184))
4982 {
4983 SET_ERROR_INSERT_VALUE(7000);
4984 }
4985
4986
4987
4988 /*-------------------------------------------------------------------------*/
4989 // The first step is to convert from a bit mask to an array of failed nodes.
4990 /*-------------------------------------------------------------------------*/
4991 Uint32 index = 0;
4992 for (i = 1; i < MAX_NDB_NODES; i++) {
4993 jam();
4994 if(NdbNodeBitmask::get(nodeFail->theNodes, i)){
4995 jam();
4996 failedNodes[index] = i;
4997 index++;
4998 }//if
4999 }//for
5000 ndbrequire(noOfFailedNodes == index);
5001 ndbrequire(noOfFailedNodes - 1 < MAX_NDB_NODES);
5002
5003 /*-------------------------------------------------------------------------*/
5004 // The second step is to update the node status of the failed nodes, remove
5005 // them from the alive node list and put them into the dead node list. Also
5006 // update the number of nodes on-line.
5007 // We also set certain state variables ensuring that the node no longer is
5008 // used in transactions and also mark that we received this signal.
5009 /*-------------------------------------------------------------------------*/
5010 for (i = 0; i < noOfFailedNodes; i++) {
5011 jam();
5012 NodeRecordPtr TNodePtr;
5013 TNodePtr.i = failedNodes[i];
5014 ptrCheckGuard(TNodePtr, MAX_NDB_NODES, nodeRecord);
5015 TNodePtr.p->useInTransactions = false;
5016 TNodePtr.p->m_inclDihLcp = false;
5017 TNodePtr.p->recNODE_FAILREP = ZTRUE;
5018 if (TNodePtr.p->nodeStatus == NodeRecord::ALIVE) {
5019 jam();
5020 con_lineNodes--;
5021 TNodePtr.p->nodeStatus = NodeRecord::DIED_NOW;
5022 removeAlive(TNodePtr);
5023 insertDeadNode(TNodePtr);
5024 }//if
5025 }//for
5026
5027 /*-------------------------------------------------------------------------*/
5028 // Verify that we can continue to operate the cluster. If we cannot we will
5029 // not return from checkEscalation.
5030 /*-------------------------------------------------------------------------*/
5031 checkEscalation();
5032
5033 /*------------------------------------------------------------------------*/
5034 // Verify that a starting node has also crashed. Reset the node start record.
5035 /*-------------------------------------------------------------------------*/
5036 #if 0
5037 /**
5038 * Node will crash by itself...
5039 * nodeRestart is run then...
5040 */
5041 if (false && c_nodeStartMaster.startNode != RNIL && getNodeStatus(c_nodeStartMaster.startNode) == NodeRecord::ALIVE)
5042 {
5043 BlockReference cntrRef = calcNdbCntrBlockRef(c_nodeStartMaster.startNode);
5044 SystemError * const sysErr = (SystemError*)&signal->theData[0];
5045 sysErr->errorCode = SystemError::StartInProgressError;
5046 sysErr->errorRef = reference();
5047 sysErr->data[0]= 0;
5048 sysErr->data[1]= __LINE__;
5049 sendSignal(cntrRef, GSN_SYSTEM_ERROR, signal, SystemError::SignalLength, JBA);
5050 nodeResetStart(signal);
5051 }//if
5052 #endif
5053
5054 /*--------------------------------------------------*/
5055 /* */
5056 /* WE CHANGE THE REFERENCE TO MASTER DIH */
5057 /* BLOCK AND POINTER AT THIS PLACE IN THE CODE*/
5058 /*--------------------------------------------------*/
5059 Uint32 oldMasterId = cmasterNodeId;
5060 BlockReference oldMasterRef = cmasterdihref;
5061 cmasterdihref = calcDihBlockRef(newMasterId);
5062 cmasterNodeId = newMasterId;
5063
5064 const bool masterTakeOver = (oldMasterId != newMasterId);
5065
5066 for(i = 0; i < noOfFailedNodes; i++) {
5067 NodeRecordPtr failedNodePtr;
5068 failedNodePtr.i = failedNodes[i];
5069 ptrCheckGuard(failedNodePtr, MAX_NDB_NODES, nodeRecord);
5070 if (oldMasterRef == reference()) {
5071 /*-------------------------------------------------------*/
5072 // Functions that need to be called only for master nodes.
5073 /*-------------------------------------------------------*/
5074 checkCopyTab(signal, failedNodePtr);
5075 checkStopPermMaster(signal, failedNodePtr);
5076 checkWaitGCPMaster(signal, failedNodes[i]);
5077
5078 {
5079 Ptr<TakeOverRecord> takeOverPtr;
5080 if (findTakeOver(takeOverPtr, failedNodePtr.i))
5081 {
5082 handleTakeOver(signal, takeOverPtr);
5083 }
5084 }
5085 checkGcpOutstanding(signal, failedNodePtr.i);
5086 } else {
5087 jam();
5088 /*-----------------------------------------------------------*/
5089 // Functions that need to be called only for nodes that were
5090 // not master before these failures.
5091 /*-----------------------------------------------------------*/
5092 checkStopPermProxy(signal, failedNodes[i]);
5093 checkWaitGCPProxy(signal, failedNodes[i]);
5094 }//if
5095 /*--------------------------------------------------*/
5096 // Functions that need to be called for all nodes.
5097 /*--------------------------------------------------*/
5098 checkStopMe(signal, failedNodePtr);
5099 failedNodeLcpHandling(signal, failedNodePtr);
5100 startRemoveFailedNode(signal, failedNodePtr);
5101
5102 /**
5103 * This is the last function called
5104 * It modifies failedNodePtr.p->nodeStatus
5105 */
5106 failedNodeSynchHandling(signal, failedNodePtr);
5107 }//for
5108
5109 if(masterTakeOver){
5110 jam();
5111 startLcpMasterTakeOver(signal, oldMasterId);
5112 startGcpMasterTakeOver(signal, oldMasterId);
5113
5114 if(getNodeState().getNodeRestartInProgress()){
5115 jam();
5116 progError(__LINE__, NDBD_EXIT_MASTER_FAILURE_DURING_NR);
5117 }
5118 }
5119
5120
5121 if (isMaster()) {
5122 jam();
5123 setNodeRestartInfoBits(signal);
5124 }//if
5125 }//Dbdih::execNODE_FAILREP()
5126
checkCopyTab(Signal * signal,NodeRecordPtr failedNodePtr)5127 void Dbdih::checkCopyTab(Signal* signal, NodeRecordPtr failedNodePtr)
5128 {
5129 jam();
5130
5131 if(c_nodeStartMaster.startNode != failedNodePtr.i){
5132 jam();
5133 return;
5134 }
5135
5136 switch(c_nodeStartMaster.m_outstandingGsn){
5137 case GSN_COPY_TABREQ:
5138 jam();
5139 ndbrequire(c_COPY_TABREQ_Counter.isWaitingFor(failedNodePtr.i));
5140 releaseTabPages(failedNodePtr.p->activeTabptr);
5141 c_COPY_TABREQ_Counter.clearWaitingFor(failedNodePtr.i);
5142 c_nodeStartMaster.wait = ZFALSE;
5143 break;
5144 case GSN_START_INFOREQ:
5145 case GSN_START_PERMCONF:
5146 case GSN_DICTSTARTREQ:
5147 case GSN_START_MECONF:
5148 case GSN_COPY_GCIREQ:
5149 jam();
5150 break;
5151 default:
5152 g_eventLogger->error("outstanding gsn: %s(%d)",
5153 getSignalName(c_nodeStartMaster.m_outstandingGsn),
5154 c_nodeStartMaster.m_outstandingGsn);
5155 ndbrequire(false);
5156 }
5157
5158 if (!c_nodeStartMaster.m_fragmentInfoMutex.isNull())
5159 {
5160 jam();
5161 Mutex mutex(signal, c_mutexMgr, c_nodeStartMaster.m_fragmentInfoMutex);
5162 mutex.unlock();
5163 }
5164
5165 nodeResetStart(signal);
5166 }//Dbdih::checkCopyTab()
5167
checkStopMe(Signal * signal,NodeRecordPtr failedNodePtr)5168 void Dbdih::checkStopMe(Signal* signal, NodeRecordPtr failedNodePtr)
5169 {
5170 jam();
5171 if (c_STOP_ME_REQ_Counter.isWaitingFor(failedNodePtr.i)){
5172 jam();
5173 ndbrequire(c_stopMe.clientRef != 0);
5174 StopMeConf * const stopMeConf = (StopMeConf *)&signal->theData[0];
5175 stopMeConf->senderRef = calcDihBlockRef(failedNodePtr.i);
5176 stopMeConf->senderData = c_stopMe.clientData;
5177 sendSignal(reference(), GSN_STOP_ME_CONF, signal,
5178 StopMeConf::SignalLength, JBB);
5179 }//if
5180 }//Dbdih::checkStopMe()
5181
checkStopPermMaster(Signal * signal,NodeRecordPtr failedNodePtr)5182 void Dbdih::checkStopPermMaster(Signal* signal, NodeRecordPtr failedNodePtr)
5183 {
5184 DihSwitchReplicaRef* const ref = (DihSwitchReplicaRef*)&signal->theData[0];
5185 jam();
5186 if (c_DIH_SWITCH_REPLICA_REQ_Counter.isWaitingFor(failedNodePtr.i)){
5187 jam();
5188 ndbrequire(c_stopPermMaster.clientRef != 0);
5189 ref->senderNode = failedNodePtr.i;
5190 ref->errorCode = StopPermRef::NF_CausedAbortOfStopProcedure;
5191 sendSignal(reference(), GSN_DIH_SWITCH_REPLICA_REF, signal,
5192 DihSwitchReplicaRef::SignalLength, JBB);
5193 return;
5194 }//if
5195 }//Dbdih::checkStopPermMaster()
5196
checkStopPermProxy(Signal * signal,NodeId failedNodeId)5197 void Dbdih::checkStopPermProxy(Signal* signal, NodeId failedNodeId)
5198 {
5199 jam();
5200 if(c_stopPermProxy.clientRef != 0 &&
5201 refToNode(c_stopPermProxy.masterRef) == failedNodeId){
5202
5203 /**
5204 * The master has failed report to proxy-client
5205 */
5206 jam();
5207 StopPermRef* const ref = (StopPermRef*)&signal->theData[0];
5208
5209 ref->senderData = c_stopPermProxy.clientData;
5210 ref->errorCode = StopPermRef::NF_CausedAbortOfStopProcedure;
5211 sendSignal(c_stopPermProxy.clientRef, GSN_STOP_PERM_REF, signal, 2, JBB);
5212 c_stopPermProxy.clientRef = 0;
5213 }//if
5214 }//Dbdih::checkStopPermProxy()
5215
5216 void
handleTakeOver(Signal * signal,TakeOverRecordPtr takeOverPtr)5217 Dbdih::handleTakeOver(Signal* signal, TakeOverRecordPtr takeOverPtr)
5218 {
5219 jam();
5220 switch(takeOverPtr.p->toMasterStatus){
5221 case TakeOverRecord::TO_MASTER_IDLE:
5222 jam();
5223 releaseTakeOver(takeOverPtr);
5224 return;
5225 case TakeOverRecord::TO_MUTEX_BEFORE_STORED:
5226 jam();
5227 /**
5228 * Waiting for lock...
5229 * do nothing...will be detected when lock is acquired
5230 */
5231 return;
5232 case TakeOverRecord::TO_MUTEX_BEFORE_LOCKED:
5233 jam();
5234 /**
5235 * Has lock...and NGPtr reservation...
5236 */
5237 abortTakeOver(signal, takeOverPtr);
5238 return;
5239 case TakeOverRecord::TO_AFTER_STORED:{
5240 jam();
5241 /**
5242 * No lock...but NGPtr reservation...remove NGPtr reservation
5243 */
5244 NodeRecordPtr nodePtr;
5245 NodeGroupRecordPtr NGPtr;
5246 nodePtr.i = takeOverPtr.p->toCopyNode;
5247 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
5248 NGPtr.i = nodePtr.p->nodeGroup;
5249 ptrCheckGuard(NGPtr, MAX_NDB_NODES, nodeGroupRecord);
5250
5251 ndbassert(NGPtr.p->activeTakeOver == takeOverPtr.p->toStartingNode);
5252 if (NGPtr.p->activeTakeOver == takeOverPtr.p->toStartingNode)
5253 {
5254 jam();
5255 NGPtr.p->activeTakeOver = 0;
5256 }
5257 releaseTakeOver(takeOverPtr);
5258 return;
5259 }
5260 case TakeOverRecord::TO_MUTEX_BEFORE_COMMIT:
5261 jam();
5262 /**
5263 * Waiting for lock...
5264 * do nothing...will be detected when lock is acquired
5265 */
5266 return;
5267 case TakeOverRecord::TO_MUTEX_BEFORE_SWITCH_REPLICA:
5268 jam();
5269 /**
5270 * Waiting for lock...
5271 * do nothing...will be detected when lock is acquired
5272 */
5273 return;
5274 case TakeOverRecord::TO_MUTEX_AFTER_SWITCH_REPLICA:
5275 jam();
5276 abortTakeOver(signal, takeOverPtr);
5277 return;
5278 case TakeOverRecord::TO_WAIT_LCP:{
5279 jam();
5280 /**
5281 * Waiting for LCP
5282 */
5283 NodeRecordPtr nodePtr;
5284 nodePtr.i = takeOverPtr.p->toStartingNode;
5285 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
5286 nodePtr.p->copyCompleted = 0;
5287 releaseTakeOver(takeOverPtr);
5288 return;
5289 }
5290 default:
5291 jamLine(takeOverPtr.p->toMasterStatus);
5292 ndbrequire(false);
5293 }
5294 }
5295
failedNodeSynchHandling(Signal * signal,NodeRecordPtr failedNodePtr)5296 void Dbdih::failedNodeSynchHandling(Signal* signal,
5297 NodeRecordPtr failedNodePtr)
5298 {
5299 jam();
5300 /*----------------------------------------------------*/
5301 /* INITIALISE THE VARIABLES THAT KEEP TRACK OF */
5302 /* WHEN A NODE FAILURE IS COMPLETED. */
5303 /*----------------------------------------------------*/
5304 failedNodePtr.p->dbdictFailCompleted = ZFALSE;
5305 failedNodePtr.p->dbtcFailCompleted = ZFALSE;
5306 failedNodePtr.p->dbdihFailCompleted = ZFALSE;
5307 failedNodePtr.p->dblqhFailCompleted = ZFALSE;
5308
5309 failedNodePtr.p->m_NF_COMPLETE_REP.clearWaitingFor();
5310
5311 NodeRecordPtr nodePtr;
5312 for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
5313 ptrAss(nodePtr, nodeRecord);
5314 if (nodePtr.p->nodeStatus == NodeRecord::ALIVE) {
5315 jam();
5316 /**
5317 * We'r waiting for nodePtr.i to complete
5318 * handling of failedNodePtr.i's death
5319 */
5320
5321 failedNodePtr.p->m_NF_COMPLETE_REP.setWaitingFor(nodePtr.i);
5322 } else {
5323 jam();
5324 if ((nodePtr.p->nodeStatus == NodeRecord::DYING) &&
5325 (nodePtr.p->m_NF_COMPLETE_REP.isWaitingFor(failedNodePtr.i))){
5326 jam();
5327 /*----------------------------------------------------*/
5328 /* THE NODE FAILED BEFORE REPORTING THE FAILURE */
5329 /* HANDLING COMPLETED ON THIS FAILED NODE. */
5330 /* REPORT THAT NODE FAILURE HANDLING WAS */
5331 /* COMPLETED ON THE NEW FAILED NODE FOR THIS */
5332 /* PARTICULAR OLD FAILED NODE. */
5333 /*----------------------------------------------------*/
5334 NFCompleteRep * const nf = (NFCompleteRep *)&signal->theData[0];
5335 nf->blockNo = 0;
5336 nf->nodeId = failedNodePtr.i;
5337 nf->failedNodeId = nodePtr.i;
5338 nf->from = __LINE__;
5339 sendSignal(reference(), GSN_NF_COMPLETEREP, signal,
5340 NFCompleteRep::SignalLength, JBB);
5341 }//if
5342 }//if
5343 }//for
5344 if (failedNodePtr.p->nodeStatus == NodeRecord::DIED_NOW) {
5345 jam();
5346 failedNodePtr.p->nodeStatus = NodeRecord::DYING;
5347 } else {
5348 jam();
5349 /*----------------------------------------------------*/
5350 // No more processing needed when node not even started
5351 // yet. We give the node status to DEAD since we do not
5352 // care whether all nodes complete the node failure
5353 // handling. The node have not been included in the
5354 // node failure protocols.
5355 /*----------------------------------------------------*/
5356 failedNodePtr.p->nodeStatus = NodeRecord::DEAD;
5357 /**-----------------------------------------------------------------------
5358 * WE HAVE COMPLETED HANDLING THE NODE FAILURE IN DIH. WE CAN REPORT THIS
5359 * TO DIH THAT WAIT FOR THE OTHER BLOCKS TO BE CONCLUDED AS WELL.
5360 *-----------------------------------------------------------------------*/
5361 NFCompleteRep * const nf = (NFCompleteRep *)&signal->theData[0];
5362 nf->blockNo = DBDIH;
5363 nf->nodeId = cownNodeId;
5364 nf->failedNodeId = failedNodePtr.i;
5365 nf->from = __LINE__;
5366 sendSignal(reference(), GSN_NF_COMPLETEREP, signal,
5367 NFCompleteRep::SignalLength, JBB);
5368 }//if
5369 }//Dbdih::failedNodeSynchHandling()
5370
5371 bool
findTakeOver(Ptr<TakeOverRecord> & ptr,Uint32 failedNodeId)5372 Dbdih::findTakeOver(Ptr<TakeOverRecord> & ptr, Uint32 failedNodeId)
5373 {
5374 for (c_activeTakeOverList.first(ptr); !ptr.isNull();
5375 c_activeTakeOverList.next(ptr))
5376 {
5377 jam();
5378 if (ptr.p->toStartingNode == failedNodeId)
5379 {
5380 jam();
5381 return true;
5382 }
5383 }
5384 ptr.setNull();
5385 return false;
5386 }//Dbdih::findTakeOver()
5387
failedNodeLcpHandling(Signal * signal,NodeRecordPtr failedNodePtr)5388 void Dbdih::failedNodeLcpHandling(Signal* signal, NodeRecordPtr failedNodePtr)
5389 {
5390 jam();
5391 const Uint32 nodeId = failedNodePtr.i;
5392
5393 if (isMaster() && c_lcpState.m_participatingLQH.get(failedNodePtr.i))
5394 {
5395 /*----------------------------------------------------*/
5396 /* THE NODE WAS INVOLVED IN A LOCAL CHECKPOINT. WE */
5397 /* MUST UPDATE THE ACTIVE STATUS TO INDICATE THAT */
5398 /* THE NODE HAVE MISSED A LOCAL CHECKPOINT. */
5399 /*----------------------------------------------------*/
5400
5401 /**
5402 * Bug#28717, Only master should do this, as this status is copied
5403 * to other nodes
5404 */
5405 switch (failedNodePtr.p->activeStatus) {
5406 case Sysfile::NS_Active:
5407 jam();
5408 failedNodePtr.p->activeStatus = Sysfile::NS_ActiveMissed_1;
5409 break;
5410 case Sysfile::NS_ActiveMissed_1:
5411 jam();
5412 failedNodePtr.p->activeStatus = Sysfile::NS_ActiveMissed_1;
5413 break;
5414 case Sysfile::NS_ActiveMissed_2:
5415 jam();
5416 failedNodePtr.p->activeStatus = Sysfile::NS_NotActive_NotTakenOver;
5417 break;
5418 case Sysfile::NS_TakeOver:
5419 jam();
5420 failedNodePtr.p->activeStatus = Sysfile::NS_NotActive_NotTakenOver;
5421 break;
5422 case Sysfile::NS_Configured:
5423 jam();
5424 break;
5425 default:
5426 g_eventLogger->error("activeStatus = %u "
5427 "at failure after NODE_FAILREP of node = %u",
5428 (Uint32) failedNodePtr.p->activeStatus,
5429 failedNodePtr.i);
5430 ndbrequire(false);
5431 break;
5432 }//switch
5433 }//if
5434
5435 c_lcpState.m_participatingDIH.clear(failedNodePtr.i);
5436 c_lcpState.m_participatingLQH.clear(failedNodePtr.i);
5437
5438 bool wf = c_MASTER_LCPREQ_Counter.isWaitingFor(failedNodePtr.i);
5439
5440 if(c_lcpState.m_LCP_COMPLETE_REP_Counter_DIH.isWaitingFor(failedNodePtr.i))
5441 {
5442 jam();
5443 LcpCompleteRep * rep = (LcpCompleteRep*)signal->getDataPtrSend();
5444 rep->nodeId = failedNodePtr.i;
5445 rep->lcpId = SYSFILE->latestLCP_ID;
5446 rep->blockNo = DBDIH;
5447 sendSignal(reference(), GSN_LCP_COMPLETE_REP, signal,
5448 LcpCompleteRep::SignalLength, JBB);
5449 }
5450
5451 bool lcp_complete_rep = false;
5452 if (!wf)
5453 {
5454 jam();
5455
5456 /**
5457 * Check if we'r waiting for the failed node's LQH to complete
5458 *
5459 * Note that this is ran "before" LCP master take over
5460 */
5461 if(c_lcpState.m_LCP_COMPLETE_REP_Counter_LQH.isWaitingFor(nodeId)){
5462 jam();
5463
5464 lcp_complete_rep = true;
5465 LcpCompleteRep * rep = (LcpCompleteRep*)signal->getDataPtrSend();
5466 rep->nodeId = nodeId;
5467 rep->lcpId = SYSFILE->latestLCP_ID;
5468 rep->blockNo = DBLQH;
5469 sendSignal(reference(), GSN_LCP_COMPLETE_REP, signal,
5470 LcpCompleteRep::SignalLength, JBB);
5471
5472 if(c_lcpState.m_LAST_LCP_FRAG_ORD.isWaitingFor(nodeId)){
5473 jam();
5474 /**
5475 * Make sure we're ready to accept it
5476 */
5477 c_lcpState.m_LAST_LCP_FRAG_ORD.clearWaitingFor(nodeId);
5478 }
5479 }
5480 }
5481
5482 if (c_TCGETOPSIZEREQ_Counter.isWaitingFor(failedNodePtr.i)) {
5483 jam();
5484 signal->theData[0] = failedNodePtr.i;
5485 signal->theData[1] = 0;
5486 sendSignal(reference(), GSN_TCGETOPSIZECONF, signal, 2, JBB);
5487 }//if
5488
5489 if (c_TC_CLOPSIZEREQ_Counter.isWaitingFor(failedNodePtr.i)) {
5490 jam();
5491 signal->theData[0] = failedNodePtr.i;
5492 sendSignal(reference(), GSN_TC_CLOPSIZECONF, signal, 1, JBB);
5493 }//if
5494
5495 if (c_START_LCP_REQ_Counter.isWaitingFor(failedNodePtr.i)) {
5496 jam();
5497 StartLcpConf * conf = (StartLcpConf*)signal->getDataPtrSend();
5498 conf->senderRef = numberToRef(DBLQH, failedNodePtr.i);
5499 conf->lcpId = SYSFILE->latestLCP_ID;
5500 sendSignal(reference(), GSN_START_LCP_CONF, signal,
5501 StartLcpConf::SignalLength, JBB);
5502 }//if
5503
5504 dosend:
5505 if (c_EMPTY_LCP_REQ_Counter.isWaitingFor(failedNodePtr.i))
5506 {
5507 jam();
5508 EmptyLcpConf * const rep = (EmptyLcpConf *)&signal->theData[0];
5509 rep->senderNodeId = failedNodePtr.i;
5510 rep->tableId = ~0;
5511 rep->fragmentId = ~0;
5512 rep->lcpNo = 0;
5513 rep->lcpId = SYSFILE->latestLCP_ID;
5514 rep->idle = true;
5515 sendSignal(reference(), GSN_EMPTY_LCP_CONF, signal,
5516 EmptyLcpConf::SignalLength, JBB);
5517 }
5518 else if (!c_EMPTY_LCP_REQ_Counter.done() && lcp_complete_rep)
5519 {
5520 jam();
5521 c_EMPTY_LCP_REQ_Counter.setWaitingFor(failedNodePtr.i);
5522 goto dosend;
5523 }
5524
5525 if (c_MASTER_LCPREQ_Counter.isWaitingFor(failedNodePtr.i)) {
5526 jam();
5527 MasterLCPRef * const ref = (MasterLCPRef *)&signal->theData[0];
5528 ref->senderNodeId = failedNodePtr.i;
5529 ref->failedNodeId = cmasterTakeOverNode;
5530 sendSignal(reference(), GSN_MASTER_LCPREF, signal,
5531 MasterLCPRef::SignalLength, JBB);
5532 }//if
5533
5534 }//Dbdih::failedNodeLcpHandling()
5535
checkGcpOutstanding(Signal * signal,Uint32 failedNodeId)5536 void Dbdih::checkGcpOutstanding(Signal* signal, Uint32 failedNodeId){
5537 if (c_GCP_PREPARE_Counter.isWaitingFor(failedNodeId)){
5538 jam();
5539 GCPPrepareConf* conf = (GCPPrepareConf*)signal->getDataPtrSend();
5540 conf->nodeId = failedNodeId;
5541 conf->gci_hi = Uint32(m_micro_gcp.m_master.m_new_gci >> 32);
5542 conf->gci_lo = Uint32(m_micro_gcp.m_master.m_new_gci);
5543 sendSignal(reference(), GSN_GCP_PREPARECONF, signal,
5544 GCPPrepareConf::SignalLength, JBB);
5545 }//if
5546
5547 if (c_GCP_COMMIT_Counter.isWaitingFor(failedNodeId))
5548 {
5549 jam();
5550
5551 /**
5552 * Waiting for GSN_GCP_NODEFINISH
5553 * TC-take-over can generate new transactions
5554 * that will be in this epoch
5555 * re-run GCP_NOMORETRANS to master-TC (self) that will run
5556 * take-over
5557 */
5558 c_GCP_COMMIT_Counter.clearWaitingFor(failedNodeId);
5559 if (!c_GCP_COMMIT_Counter.isWaitingFor(getOwnNodeId()))
5560 {
5561 jam();
5562 c_GCP_COMMIT_Counter.setWaitingFor(getOwnNodeId());
5563 m_micro_gcp.m_state = MicroGcp::M_GCP_COMMIT;
5564 }
5565
5566 GCPNoMoreTrans* req = (GCPNoMoreTrans*)signal->getDataPtrSend();
5567 req->senderRef = reference();
5568 req->senderData = m_micro_gcp.m_master_ref;
5569 req->gci_hi = Uint32(m_micro_gcp.m_old_gci >> 32);
5570 req->gci_lo = Uint32(m_micro_gcp.m_old_gci);
5571 sendSignal(clocaltcblockref, GSN_GCP_NOMORETRANS, signal,
5572 GCPNoMoreTrans::SignalLength, JBB);
5573 }
5574
5575 if (c_GCP_SAVEREQ_Counter.isWaitingFor(failedNodeId)) {
5576 jam();
5577 GCPSaveRef * const saveRef = (GCPSaveRef*)&signal->theData[0];
5578 saveRef->dihPtr = failedNodeId;
5579 saveRef->nodeId = failedNodeId;
5580 saveRef->gci = m_gcp_save.m_master.m_new_gci;
5581 saveRef->errorCode = GCPSaveRef::FakedSignalDueToNodeFailure;
5582 sendSignal(reference(), GSN_GCP_SAVEREF, signal,
5583 GCPSaveRef::SignalLength, JBB);
5584 }//if
5585
5586 if (c_COPY_GCIREQ_Counter.isWaitingFor(failedNodeId)) {
5587 jam();
5588 signal->theData[0] = failedNodeId;
5589 sendSignal(reference(), GSN_COPY_GCICONF, signal, 1, JBB);
5590 }//if
5591
5592 if (c_MASTER_GCPREQ_Counter.isWaitingFor(failedNodeId)){
5593 jam();
5594 MasterGCPRef * const ref = (MasterGCPRef *)&signal->theData[0];
5595 ref->senderNodeId = failedNodeId;
5596 ref->failedNodeId = cmasterTakeOverNode;
5597 sendSignal(reference(), GSN_MASTER_GCPREF, signal,
5598 MasterGCPRef::SignalLength, JBB);
5599 }//if
5600
5601 if (c_SUB_GCP_COMPLETE_REP_Counter.isWaitingFor(failedNodeId))
5602 {
5603 jam();
5604 SubGcpCompleteAck* ack = CAST_PTR(SubGcpCompleteAck,
5605 signal->getDataPtrSend());
5606 ack->rep.senderRef = numberToRef(DBDIH, failedNodeId);
5607 sendSignal(reference(), GSN_SUB_GCP_COMPLETE_ACK, signal,
5608 SubGcpCompleteAck::SignalLength, JBB);
5609 }
5610 }//Dbdih::handleGcpStateInMaster()
5611
5612
5613 void
startLcpMasterTakeOver(Signal * signal,Uint32 nodeId)5614 Dbdih::startLcpMasterTakeOver(Signal* signal, Uint32 nodeId){
5615 jam();
5616
5617 Uint32 oldNode = c_lcpMasterTakeOverState.failedNodeId;
5618
5619 c_lcpMasterTakeOverState.minTableId = ~0;
5620 c_lcpMasterTakeOverState.minFragId = ~0;
5621 c_lcpMasterTakeOverState.failedNodeId = nodeId;
5622
5623 c_lcpMasterTakeOverState.set(LMTOS_WAIT_EMPTY_LCP, __LINE__);
5624
5625 EmptyLcpReq* req = (EmptyLcpReq*)signal->getDataPtrSend();
5626 req->senderRef = reference();
5627 {
5628 NodeRecordPtr specNodePtr;
5629 specNodePtr.i = cfirstAliveNode;
5630 do {
5631 jam();
5632 ptrCheckGuard(specNodePtr, MAX_NDB_NODES, nodeRecord);
5633 if (!c_EMPTY_LCP_REQ_Counter.isWaitingFor(specNodePtr.i))
5634 {
5635 jam();
5636 c_EMPTY_LCP_REQ_Counter.setWaitingFor(specNodePtr.i);
5637 if (!(ERROR_INSERTED(7209) && specNodePtr.i == getOwnNodeId()))
5638 {
5639 sendEMPTY_LCP_REQ(signal, specNodePtr.i, 0);
5640 }
5641 else
5642 {
5643 ndbout_c("NOT sending EMPTY_LCP_REQ to %u", specNodePtr.i);
5644 }
5645
5646 if (c_lcpState.m_LAST_LCP_FRAG_ORD.isWaitingFor(specNodePtr.i))
5647 {
5648 jam();
5649 c_lcpState.m_LAST_LCP_FRAG_ORD.clearWaitingFor();
5650 }
5651 }
5652 specNodePtr.i = specNodePtr.p->nextNode;
5653 } while (specNodePtr.i != RNIL);
5654 }
5655
5656 NodeRecordPtr nodePtr;
5657 nodePtr.i = oldNode;
5658 if (oldNode > 0 && oldNode < MAX_NDB_NODES)
5659 {
5660 jam();
5661 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
5662 if (nodePtr.p->m_nodefailSteps.get(NF_LCP_TAKE_OVER))
5663 {
5664 jam();
5665 checkLocalNodefailComplete(signal, oldNode, NF_LCP_TAKE_OVER);
5666 }
5667 }
5668
5669 setLocalNodefailHandling(signal, nodeId, NF_LCP_TAKE_OVER);
5670 }
5671
startGcpMasterTakeOver(Signal * signal,Uint32 oldMasterId)5672 void Dbdih::startGcpMasterTakeOver(Signal* signal, Uint32 oldMasterId){
5673 jam();
5674 /*--------------------------------------------------*/
5675 /* */
5676 /* THE MASTER HAVE FAILED AND WE WERE ELECTED */
5677 /* TO BE THE NEW MASTER NODE. WE NEED TO QUERY*/
5678 /* ALL THE OTHER NODES ABOUT THEIR STATUS IN */
5679 /* ORDER TO BE ABLE TO TAKE OVER CONTROL OF */
5680 /* THE GLOBAL CHECKPOINT PROTOCOL AND THE */
5681 /* LOCAL CHECKPOINT PROTOCOL. */
5682 /*--------------------------------------------------*/
5683 if(!isMaster()){
5684 jam();
5685 return;
5686 }
5687 cmasterState = MASTER_TAKE_OVER_GCP;
5688 cmasterTakeOverNode = oldMasterId;
5689 MasterGCPReq * const req = (MasterGCPReq *)&signal->theData[0];
5690 req->masterRef = reference();
5691 req->failedNodeId = oldMasterId;
5692 sendLoopMacro(MASTER_GCPREQ, sendMASTER_GCPREQ, RNIL);
5693
5694 signal->theData[0] = NDB_LE_GCP_TakeoverStarted;
5695 sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 1, JBB);
5696
5697 /**
5698 * save own value...
5699 * to be able to check values returned in MASTER_GCPCONF
5700 */
5701 m_gcp_save.m_master.m_new_gci = m_gcp_save.m_gci;
5702
5703 setLocalNodefailHandling(signal, oldMasterId, NF_GCP_TAKE_OVER);
5704 }//Dbdih::handleNewMaster()
5705
startRemoveFailedNode(Signal * signal,NodeRecordPtr failedNodePtr)5706 void Dbdih::startRemoveFailedNode(Signal* signal, NodeRecordPtr failedNodePtr)
5707 {
5708 Uint32 nodeId = failedNodePtr.i;
5709 if(failedNodePtr.p->nodeStatus != NodeRecord::DIED_NOW){
5710 jam();
5711 /**
5712 * Is node isn't alive. It can't be part of LCP
5713 */
5714 ndbrequire(!c_lcpState.m_LCP_COMPLETE_REP_Counter_LQH.isWaitingFor(nodeId));
5715
5716 /**
5717 * And there is no point in removing any replicas
5718 * It's dead...
5719 */
5720 return;
5721 }
5722
5723 /**
5724 * If node has node complete LCP
5725 * we need to remove it as undo might not be complete
5726 * bug#31257
5727 */
5728 failedNodePtr.p->m_remove_node_from_table_lcp_id = RNIL;
5729 if (c_lcpState.m_LCP_COMPLETE_REP_Counter_LQH.isWaitingFor(failedNodePtr.i))
5730 {
5731 jam();
5732 failedNodePtr.p->m_remove_node_from_table_lcp_id = SYSFILE->latestLCP_ID;
5733 }
5734
5735 jam();
5736
5737 if (!ERROR_INSERTED(7194) && !ERROR_INSERTED(7221))
5738 {
5739 signal->theData[0] = DihContinueB::ZREMOVE_NODE_FROM_TABLE;
5740 signal->theData[1] = failedNodePtr.i;
5741 signal->theData[2] = 0; // Tab id
5742 sendSignal(reference(), GSN_CONTINUEB, signal, 3, JBB);
5743 }
5744 else
5745 {
5746 if (ERROR_INSERTED(7194))
5747 {
5748 ndbout_c("7194 Not starting ZREMOVE_NODE_FROM_TABLE");
5749 }
5750 else if (ERROR_INSERTED(7221))
5751 {
5752 ndbout_c("7221 Not starting ZREMOVE_NODE_FROM_TABLE");
5753 }
5754 }
5755
5756 setLocalNodefailHandling(signal, failedNodePtr.i, NF_REMOVE_NODE_FROM_TABLE);
5757 }//Dbdih::startRemoveFailedNode()
5758
5759 /*--------------------------------------------------*/
5760 /* THE MASTER HAS FAILED AND THE NEW MASTER IS*/
5761 /* QUERYING THIS NODE ABOUT THE STATE OF THE */
5762 /* GLOBAL CHECKPOINT PROTOCOL */
5763 /*--------------------------------------------------*/
execMASTER_GCPREQ(Signal * signal)5764 void Dbdih::execMASTER_GCPREQ(Signal* signal)
5765 {
5766 NodeRecordPtr failedNodePtr;
5767 MasterGCPReq * const masterGCPReq = (MasterGCPReq *)&signal->theData[0];
5768 jamEntry();
5769 const BlockReference newMasterBlockref = masterGCPReq->masterRef;
5770 const Uint32 failedNodeId = masterGCPReq->failedNodeId;
5771
5772 failedNodePtr.i = failedNodeId;
5773 ptrCheckGuard(failedNodePtr, MAX_NDB_NODES, nodeRecord);
5774 if (failedNodePtr.p->nodeStatus == NodeRecord::ALIVE) {
5775 jam();
5776 /*--------------------------------------------------*/
5777 /* ENSURE THAT WE HAVE PROCESSED THE SIGNAL */
5778 /* NODE_FAILURE BEFORE WE PROCESS THIS REQUEST*/
5779 /* FROM THE NEW MASTER. THIS ENSURES THAT WE */
5780 /* HAVE REMOVED THE FAILED NODE FROM THE LIST */
5781 /* OF ACTIVE NODES AND SO FORTH. */
5782 /*--------------------------------------------------*/
5783 sendSignalWithDelay(reference(), GSN_MASTER_GCPREQ,
5784 signal, 10, MasterGCPReq::SignalLength);
5785 return;
5786 } else {
5787 ndbrequire(failedNodePtr.p->nodeStatus == NodeRecord::DYING);
5788 }//if
5789
5790 if (ERROR_INSERTED(7181))
5791 {
5792 ndbout_c("execGCP_TCFINISHED in MASTER_GCPREQ");
5793 CLEAR_ERROR_INSERT_VALUE;
5794 signal->theData[0] = c_error_7181_ref;
5795 signal->theData[1] = (Uint32)(m_micro_gcp.m_old_gci >> 32);
5796 signal->theData[2] = (Uint32)(m_micro_gcp.m_old_gci & 0xFFFFFFFF);
5797 execGCP_TCFINISHED(signal);
5798 }
5799
5800 MasterGCPConf::State gcpState;
5801 switch(m_micro_gcp.m_state){
5802 case MicroGcp::M_GCP_IDLE:
5803 jam();
5804 gcpState = MasterGCPConf::GCP_READY;
5805 break;
5806 case MicroGcp::M_GCP_PREPARE:
5807 jam();
5808 gcpState = MasterGCPConf::GCP_PREPARE_RECEIVED;
5809 break;
5810 case MicroGcp::M_GCP_COMMIT:
5811 jam();
5812 gcpState = MasterGCPConf::GCP_COMMIT_RECEIVED;
5813 break;
5814 case MicroGcp::M_GCP_COMMITTED:
5815 jam();
5816 gcpState = MasterGCPConf::GCP_COMMITTED;
5817
5818 /**
5819 * Change state to GCP_COMMIT_RECEIVEDn and rerun GSN_GCP_NOMORETRANS
5820 */
5821 gcpState = MasterGCPConf::GCP_COMMIT_RECEIVED;
5822 m_micro_gcp.m_state = MicroGcp::M_GCP_COMMIT;
5823
5824 {
5825 GCPNoMoreTrans* req2 = (GCPNoMoreTrans*)signal->getDataPtrSend();
5826 req2->senderRef = reference();
5827 req2->senderData = m_micro_gcp.m_master_ref;
5828 req2->gci_hi = (Uint32)(m_micro_gcp.m_old_gci >> 32);
5829 req2->gci_lo = (Uint32)(m_micro_gcp.m_old_gci & 0xFFFFFFFF);
5830 sendSignal(clocaltcblockref, GSN_GCP_NOMORETRANS, signal,
5831 GCPNoMoreTrans::SignalLength, JBB);
5832 }
5833 break;
5834 case MicroGcp::M_GCP_COMPLETE:
5835 /**
5836 * This is a master only state...
5837 */
5838 ndbrequire(false);
5839 }
5840
5841 MasterGCPConf::SaveState saveState;
5842 switch(m_gcp_save.m_state){
5843 case GcpSave::GCP_SAVE_IDLE:
5844 jam();
5845 saveState = MasterGCPConf::GCP_SAVE_IDLE;
5846 break;
5847 case GcpSave::GCP_SAVE_REQ:
5848 jam();
5849 saveState = MasterGCPConf::GCP_SAVE_REQ;
5850 break;
5851 case GcpSave::GCP_SAVE_CONF:
5852 jam();
5853 saveState = MasterGCPConf::GCP_SAVE_CONF;
5854 break;
5855 case GcpSave::GCP_SAVE_COPY_GCI:
5856 jam();
5857 saveState = MasterGCPConf::GCP_SAVE_COPY_GCI;
5858 break;
5859 }
5860
5861 MasterGCPConf * const masterGCPConf = (MasterGCPConf *)&signal->theData[0];
5862 masterGCPConf->gcpState = gcpState;
5863 masterGCPConf->senderNodeId = cownNodeId;
5864 masterGCPConf->failedNodeId = failedNodeId;
5865 masterGCPConf->newGCP_hi = (Uint32)(m_micro_gcp.m_new_gci >> 32);
5866 masterGCPConf->latestLCP = SYSFILE->latestLCP_ID;
5867 masterGCPConf->oldestRestorableGCI = SYSFILE->oldestRestorableGCI;
5868 masterGCPConf->keepGCI = SYSFILE->keepGCI;
5869 masterGCPConf->newGCP_lo = Uint32(m_micro_gcp.m_new_gci);
5870 masterGCPConf->saveState = saveState;
5871 masterGCPConf->saveGCI = m_gcp_save.m_gci;
5872 for(Uint32 i = 0; i < NdbNodeBitmask::Size; i++)
5873 masterGCPConf->lcpActive[i] = SYSFILE->lcpActive[i];
5874
5875 if (ERROR_INSERTED(7225))
5876 {
5877 CLEAR_ERROR_INSERT_VALUE;
5878 ndbrequire(refToNode(newMasterBlockref) == getOwnNodeId());
5879 sendSignalWithDelay(newMasterBlockref, GSN_MASTER_GCPCONF, signal,
5880 500, MasterGCPConf::SignalLength);
5881 }
5882 else
5883 {
5884 sendSignal(newMasterBlockref, GSN_MASTER_GCPCONF, signal,
5885 MasterGCPConf::SignalLength, JBB);
5886 }
5887
5888 if (ERROR_INSERTED(7182))
5889 {
5890 ndbout_c("execGCP_TCFINISHED in MASTER_GCPREQ");
5891 CLEAR_ERROR_INSERT_VALUE;
5892 signal->theData[0] = c_error_7181_ref;
5893 signal->theData[1] = (Uint32)(m_micro_gcp.m_old_gci >> 32);
5894 signal->theData[2] = (Uint32)(m_micro_gcp.m_old_gci & 0xFFFFFFFF);
5895 execGCP_TCFINISHED(signal);
5896 }
5897
5898 if (c_copyGCISlave.m_expectedNextWord != 0)
5899 {
5900 jam();
5901 c_copyGCISlave.m_expectedNextWord = 0;
5902 c_copyGCISlave.m_copyReason = CopyGCIReq::IDLE;
5903 }
5904 }//Dbdih::execMASTER_GCPREQ()
5905
execMASTER_GCPCONF(Signal * signal)5906 void Dbdih::execMASTER_GCPCONF(Signal* signal)
5907 {
5908 NodeRecordPtr senderNodePtr;
5909 MasterGCPConf * const masterGCPConf = (MasterGCPConf *)&signal->theData[0];
5910 jamEntry();
5911 senderNodePtr.i = masterGCPConf->senderNodeId;
5912 ptrCheckGuard(senderNodePtr, MAX_NDB_NODES, nodeRecord);
5913
5914 MasterGCPConf::State gcpState = (MasterGCPConf::State)masterGCPConf->gcpState;
5915 MasterGCPConf::SaveState saveState =
5916 (MasterGCPConf::SaveState)masterGCPConf->saveState;
5917 const Uint32 failedNodeId = masterGCPConf->failedNodeId;
5918 const Uint32 newGcp_hi = masterGCPConf->newGCP_hi;
5919 const Uint32 newGcp_lo = masterGCPConf->newGCP_lo;
5920 Uint64 newGCI = newGcp_lo | (Uint64(newGcp_hi) << 32);
5921 const Uint32 latestLcpId = masterGCPConf->latestLCP;
5922 const Uint32 oldestRestorableGci = masterGCPConf->oldestRestorableGCI;
5923 const Uint32 oldestKeepGci = masterGCPConf->keepGCI;
5924 const Uint32 saveGCI = masterGCPConf->saveGCI;
5925
5926 if (latestLcpId > SYSFILE->latestLCP_ID) {
5927 jam();
5928 #if 0
5929 g_eventLogger->info("Dbdih: Setting SYSFILE->latestLCP_ID to %d",
5930 latestLcpId);
5931 SYSFILE->latestLCP_ID = latestLcpId;
5932 #endif
5933 SYSFILE->keepGCI = oldestKeepGci;
5934 SYSFILE->oldestRestorableGCI = oldestRestorableGci;
5935 for(Uint32 i = 0; i < NdbNodeBitmask::Size; i++)
5936 SYSFILE->lcpActive[i] = masterGCPConf->lcpActive[i];
5937 }//if
5938
5939 bool ok = false;
5940 switch (gcpState) {
5941 case MasterGCPConf::GCP_READY:
5942 jam();
5943 ok = true;
5944 // Either not started or complete...
5945 break;
5946 case MasterGCPConf::GCP_PREPARE_RECEIVED:
5947 jam();
5948 ok = true;
5949 if (m_micro_gcp.m_master.m_state == MicroGcp::M_GCP_IDLE)
5950 {
5951 jam();
5952 m_micro_gcp.m_master.m_state = MicroGcp::M_GCP_PREPARE;
5953 m_micro_gcp.m_master.m_new_gci = newGCI;
5954 }
5955 else
5956 {
5957 jam();
5958 ndbrequire(m_micro_gcp.m_master.m_new_gci == newGCI);
5959 }
5960 break;
5961 case MasterGCPConf::GCP_COMMIT_RECEIVED:
5962 jam();
5963 case MasterGCPConf::GCP_COMMITTED:
5964 jam();
5965 ok = true;
5966 if (m_micro_gcp.m_master.m_state != MicroGcp::M_GCP_IDLE)
5967 {
5968 ndbrequire(m_micro_gcp.m_master.m_new_gci == newGCI);
5969 }
5970 m_micro_gcp.m_master.m_new_gci = newGCI;
5971 m_micro_gcp.m_master.m_state = MicroGcp::M_GCP_COMMIT;
5972 break;
5973 #ifndef VM_TRACE
5974 default:
5975 jamLine(gcpState);
5976 ndbrequire(false);
5977 #endif
5978 }
5979 ndbassert(ok); // Unhandled case...
5980
5981 ok = false;
5982 /**
5983 * GCI should differ with atmost one
5984 */
5985 ndbrequire(saveGCI == m_gcp_save.m_gci ||
5986 saveGCI == m_gcp_save.m_gci + 1 ||
5987 saveGCI + 1 == m_gcp_save.m_gci);
5988 if (saveGCI > m_gcp_save.m_master.m_new_gci)
5989 {
5990 jam();
5991 m_gcp_save.m_master.m_new_gci = saveGCI;
5992 }
5993 switch(saveState){
5994 case MasterGCPConf::GCP_SAVE_IDLE:
5995 jam();
5996 break;
5997 case MasterGCPConf::GCP_SAVE_REQ:
5998 jam();
5999 if (m_gcp_save.m_master.m_state == GcpSave::GCP_SAVE_IDLE)
6000 {
6001 jam();
6002 m_gcp_save.m_master.m_state = GcpSave::GCP_SAVE_REQ;
6003 }
6004 break;
6005 case MasterGCPConf::GCP_SAVE_CONF:
6006 jam();
6007 if (m_gcp_save.m_master.m_state == GcpSave::GCP_SAVE_IDLE)
6008 {
6009 jam();
6010 m_gcp_save.m_master.m_state = GcpSave::GCP_SAVE_REQ;
6011 }
6012 break;
6013 case MasterGCPConf::GCP_SAVE_COPY_GCI:
6014 jam();
6015 if (m_gcp_save.m_master.m_state == GcpSave::GCP_SAVE_IDLE)
6016 {
6017 jam();
6018 m_gcp_save.m_master.m_state = GcpSave::GCP_SAVE_COPY_GCI;
6019 }
6020 break;
6021 #ifndef VM_TRACE
6022 default:
6023 jamLine(saveState);
6024 ndbrequire(false);
6025 #endif
6026 }
6027 //ndbassert(ok); // Unhandled case
6028
6029 receiveLoopMacro(MASTER_GCPREQ, senderNodePtr.i);
6030 /*-------------------------------------------------------------------------*/
6031 // We have now received all responses and are ready to take over the GCP
6032 // protocol as master.
6033 /*-------------------------------------------------------------------------*/
6034 MASTER_GCPhandling(signal, failedNodeId);
6035
6036 return;
6037 }//Dbdih::execMASTER_GCPCONF()
6038
execMASTER_GCPREF(Signal * signal)6039 void Dbdih::execMASTER_GCPREF(Signal* signal)
6040 {
6041 const MasterGCPRef * const ref = (MasterGCPRef *)&signal->theData[0];
6042 jamEntry();
6043 receiveLoopMacro(MASTER_GCPREQ, ref->senderNodeId);
6044 /*-------------------------------------------------------------------------*/
6045 // We have now received all responses and are ready to take over the GCP
6046 // protocol as master.
6047 /*-------------------------------------------------------------------------*/
6048 MASTER_GCPhandling(signal, ref->failedNodeId);
6049 }//Dbdih::execMASTER_GCPREF()
6050
MASTER_GCPhandling(Signal * signal,Uint32 failedNodeId)6051 void Dbdih::MASTER_GCPhandling(Signal* signal, Uint32 failedNodeId)
6052 {
6053 cmasterState = MASTER_ACTIVE;
6054
6055 m_micro_gcp.m_master.m_start_time = 0;
6056 m_gcp_save.m_master.m_start_time = 0;
6057 if (m_gcp_monitor.m_micro_gcp.m_max_lag > 0)
6058 {
6059 infoEvent("GCP Monitor: Computed max GCP_SAVE lag to %u seconds",
6060 m_gcp_monitor.m_gcp_save.m_max_lag / 10);
6061 infoEvent("GCP Monitor: Computed max GCP_COMMIT lag to %u seconds",
6062 m_gcp_monitor.m_micro_gcp.m_max_lag / 10);
6063 }
6064 else
6065 {
6066 infoEvent("GCP Monitor: unlimited lags allowed");
6067 }
6068
6069 bool ok = false;
6070 switch(m_micro_gcp.m_master.m_state){
6071 case MicroGcp::M_GCP_IDLE:
6072 jam();
6073 ok = true;
6074 signal->theData[0] = DihContinueB::ZSTART_GCP;
6075 sendSignal(reference(), GSN_CONTINUEB, signal, 1, JBB);
6076 break;
6077 case MicroGcp::M_GCP_PREPARE:
6078 {
6079 jam();
6080 ok = true;
6081
6082 /**
6083 * Restart GCP_PREPARE
6084 */
6085 sendLoopMacro(GCP_PREPARE, sendGCP_PREPARE, RNIL);
6086 break;
6087 }
6088 case MicroGcp::M_GCP_COMMIT:
6089 {
6090 jam();
6091 ok = true;
6092
6093 /**
6094 * Restart GCP_COMMIT
6095 */
6096 sendLoopMacro(GCP_COMMIT, sendGCP_COMMIT, RNIL);
6097 break;
6098 }
6099 case MicroGcp::M_GCP_COMMITTED:
6100 jam();
6101 ndbrequire(false);
6102 case MicroGcp::M_GCP_COMPLETE:
6103 jam();
6104 ndbrequire(false);
6105 #ifndef VM_TRACE
6106 default:
6107 jamLine(m_micro_gcp.m_master.m_state);
6108 ndbrequire(false);
6109 #endif
6110 }
6111 ndbassert(ok);
6112
6113 if (m_micro_gcp.m_enabled == false)
6114 {
6115 jam();
6116 m_gcp_save.m_master.m_state = GcpSave::GCP_SAVE_IDLE;
6117 }
6118 else
6119 {
6120 ok = false;
6121 switch(m_gcp_save.m_master.m_state){
6122 case GcpSave::GCP_SAVE_IDLE:
6123 jam();
6124 ok = true;
6125 break;
6126 case GcpSave::GCP_SAVE_REQ:
6127 {
6128 jam();
6129 ok = true;
6130
6131 /**
6132 * Restart GCP_SAVE_REQ
6133 */
6134 sendLoopMacro(GCP_SAVEREQ, sendGCP_SAVEREQ, RNIL);
6135 break;
6136 }
6137 case GcpSave::GCP_SAVE_CONF:
6138 jam();
6139 case GcpSave::GCP_SAVE_COPY_GCI:
6140 jam();
6141 ok = true;
6142 copyGciLab(signal, CopyGCIReq::GLOBAL_CHECKPOINT);
6143 m_gcp_save.m_master.m_state = GcpSave::GCP_SAVE_COPY_GCI;
6144 break;
6145 #ifndef VM_TRACE
6146 default:
6147 jamLine(m_gcp_save.m_master.m_state);
6148 ndbrequire(false);
6149 #endif
6150 }
6151 ndbrequire(ok);
6152 }
6153
6154 signal->theData[0] = NDB_LE_GCP_TakeoverCompleted;
6155 signal->theData[1] = m_micro_gcp.m_master.m_state;
6156 signal->theData[2] = m_gcp_save.m_master.m_state;
6157 sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 1, JBB);
6158
6159 infoEvent("kk: %u/%u %u %u",
6160 Uint32(m_micro_gcp.m_current_gci >> 32),
6161 Uint32(m_micro_gcp.m_current_gci),
6162 m_micro_gcp.m_master.m_state,
6163 m_gcp_save.m_master.m_state);
6164
6165 /*--------------------------------------------------*/
6166 /* WE SEPARATE HANDLING OF GLOBAL CHECKPOINTS */
6167 /* AND LOCAL CHECKPOINTS HERE. LCP'S HAVE TO */
6168 /* REMOVE ALL FAILED FRAGMENTS BEFORE WE CAN */
6169 /* HANDLE THE LCP PROTOCOL. */
6170 /*--------------------------------------------------*/
6171 checkLocalNodefailComplete(signal, failedNodeId, NF_GCP_TAKE_OVER);
6172
6173 startGcpMonitor(signal);
6174
6175 return;
6176 }//Dbdih::masterGcpConfFromFailedLab()
6177
6178 void
invalidateNodeLCP(Signal * signal,Uint32 nodeId,Uint32 tableId)6179 Dbdih::invalidateNodeLCP(Signal* signal, Uint32 nodeId, Uint32 tableId)
6180 {
6181 jamEntry();
6182 TabRecordPtr tabPtr;
6183 tabPtr.i = tableId;
6184 const Uint32 RT_BREAK = 64;
6185 if (ERROR_INSERTED(7125)) {
6186 return;
6187 }//if
6188 for (Uint32 i = 0; i<RT_BREAK; i++) {
6189 jam();
6190 if (tabPtr.i >= ctabFileSize){
6191 jam();
6192 /**
6193 * Ready with entire loop
6194 * Return to master
6195 */
6196 if (ERROR_INSERTED(7204))
6197 {
6198 CLEAR_ERROR_INSERT_VALUE;
6199 }
6200 setAllowNodeStart(nodeId, true);
6201 if (getNodeStatus(nodeId) == NodeRecord::STARTING) {
6202 jam();
6203 StartInfoConf * conf = (StartInfoConf*)&signal->theData[0];
6204 conf->sendingNodeId = cownNodeId;
6205 conf->startingNodeId = nodeId;
6206 sendSignal(cmasterdihref, GSN_START_INFOCONF, signal,
6207 StartInfoConf::SignalLength, JBB);
6208 }//if
6209 return;
6210 }//if
6211 ptrAss(tabPtr, tabRecord);
6212 if (tabPtr.p->tabStatus == TabRecord::TS_ACTIVE) {
6213 jam();
6214 invalidateNodeLCP(signal, nodeId, tabPtr);
6215 return;
6216 }//if
6217 tabPtr.i++;
6218 }//for
6219 signal->theData[0] = DihContinueB::ZINVALIDATE_NODE_LCP;
6220 signal->theData[1] = nodeId;
6221 signal->theData[2] = tabPtr.i;
6222 sendSignal(reference(), GSN_CONTINUEB, signal, 3, JBB);
6223 }//Dbdih::invalidateNodeLCP()
6224
6225 void
invalidateNodeLCP(Signal * signal,Uint32 nodeId,TabRecordPtr tabPtr)6226 Dbdih::invalidateNodeLCP(Signal* signal, Uint32 nodeId, TabRecordPtr tabPtr)
6227 {
6228 /**
6229 * Check so that no one else is using the tab descriptior
6230 */
6231 if (tabPtr.p->tabCopyStatus != TabRecord::CS_IDLE) {
6232 jam();
6233 signal->theData[0] = DihContinueB::ZINVALIDATE_NODE_LCP;
6234 signal->theData[1] = nodeId;
6235 signal->theData[2] = tabPtr.i;
6236 sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 20, 3);
6237 return;
6238 }//if
6239
6240 /**
6241 * For each fragment
6242 */
6243 bool modified = false;
6244 FragmentstorePtr fragPtr;
6245 for(Uint32 fragNo = 0; fragNo < tabPtr.p->totalfragments; fragNo++){
6246 jam();
6247 getFragstore(tabPtr.p, fragNo, fragPtr);
6248 /**
6249 * For each of replica record
6250 */
6251 ReplicaRecordPtr replicaPtr;
6252 for(replicaPtr.i = fragPtr.p->oldStoredReplicas; replicaPtr.i != RNIL;
6253 replicaPtr.i = replicaPtr.p->nextReplica) {
6254 jam();
6255 ptrCheckGuard(replicaPtr, creplicaFileSize, replicaRecord);
6256 if(replicaPtr.p->procNode == nodeId){
6257 jam();
6258 /**
6259 * Found one with correct node id
6260 */
6261 /**
6262 * Invalidate all LCP's
6263 */
6264 modified = true;
6265 for(int i = 0; i < MAX_LCP_STORED; i++) {
6266 replicaPtr.p->lcpStatus[i] = ZINVALID;
6267 }//if
6268 /**
6269 * And reset nextLcp
6270 */
6271 replicaPtr.p->nextLcp = 0;
6272 replicaPtr.p->noCrashedReplicas = 0;
6273 }//if
6274 }//for
6275 }//for
6276
6277 if (modified) {
6278 jam();
6279 /**
6280 * Save table description to disk
6281 */
6282 tabPtr.p->tabCopyStatus = TabRecord::CS_INVALIDATE_NODE_LCP;
6283 tabPtr.p->tabUpdateState = TabRecord::US_INVALIDATE_NODE_LCP;
6284 tabPtr.p->tabRemoveNode = nodeId;
6285 signal->theData[0] = DihContinueB::ZPACK_TABLE_INTO_PAGES;
6286 signal->theData[1] = tabPtr.i;
6287 sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
6288 return;
6289 }
6290
6291 jam();
6292 /**
6293 * Move to next table
6294 */
6295 tabPtr.i++;
6296 signal->theData[0] = DihContinueB::ZINVALIDATE_NODE_LCP;
6297 signal->theData[1] = nodeId;
6298 signal->theData[2] = tabPtr.i;
6299
6300 if (ERROR_INSERTED(7204))
6301 {
6302 sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 2000, 3);
6303 }
6304 else
6305 {
6306 sendSignal(reference(), GSN_CONTINUEB, signal, 3, JBB);
6307 }
6308 return;
6309 }//Dbdih::invalidateNodeLCP()
6310
6311 /*------------------------------------------------*/
6312 /* INPUT: TABPTR */
6313 /* TNODEID */
6314 /*------------------------------------------------*/
removeNodeFromTables(Signal * signal,Uint32 nodeId,Uint32 tableId)6315 void Dbdih::removeNodeFromTables(Signal* signal,
6316 Uint32 nodeId, Uint32 tableId)
6317 {
6318 jamEntry();
6319 TabRecordPtr tabPtr;
6320 tabPtr.i = tableId;
6321 const Uint32 RT_BREAK = 64;
6322 for (Uint32 i = 0; i<RT_BREAK; i++) {
6323 jam();
6324 if (tabPtr.i >= ctabFileSize){
6325 jam();
6326 removeNodeFromTablesComplete(signal, nodeId);
6327 return;
6328 }//if
6329
6330 ptrAss(tabPtr, tabRecord);
6331 if (tabPtr.p->tabStatus == TabRecord::TS_ACTIVE) {
6332 jam();
6333 removeNodeFromTable(signal, nodeId, tabPtr);
6334 return;
6335 }//if
6336 tabPtr.i++;
6337 }//for
6338 signal->theData[0] = DihContinueB::ZREMOVE_NODE_FROM_TABLE;
6339 signal->theData[1] = nodeId;
6340 signal->theData[2] = tabPtr.i;
6341 sendSignal(reference(), GSN_CONTINUEB, signal, 3, JBB);
6342 }
6343
removeNodeFromTable(Signal * signal,Uint32 nodeId,TabRecordPtr tabPtr)6344 void Dbdih::removeNodeFromTable(Signal* signal,
6345 Uint32 nodeId, TabRecordPtr tabPtr){
6346
6347 /**
6348 * Check so that no one else is using the tab descriptior
6349 */
6350 if (tabPtr.p->tabCopyStatus != TabRecord::CS_IDLE) {
6351 jam();
6352 signal->theData[0] = DihContinueB::ZREMOVE_NODE_FROM_TABLE;
6353 signal->theData[1] = nodeId;
6354 signal->theData[2] = tabPtr.i;
6355 sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 20, 3);
6356 return;
6357 }//if
6358
6359 NodeRecordPtr nodePtr;
6360 nodePtr.i = nodeId;
6361 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
6362 const Uint32 lcpId = nodePtr.p->m_remove_node_from_table_lcp_id;
6363
6364 /**
6365 * For each fragment
6366 */
6367 Uint32 noOfRemovedReplicas = 0; // No of replicas removed
6368 Uint32 noOfRemovedLcpReplicas = 0; // No of replicas in LCP removed
6369 Uint32 noOfRemainingLcpReplicas = 0;// No of replicas in LCP remaining
6370
6371 const bool lcpOngoingFlag = (tabPtr.p->tabLcpStatus== TabRecord::TLS_ACTIVE);
6372 const bool unlogged = (tabPtr.p->tabStorage != TabRecord::ST_NORMAL);
6373
6374 FragmentstorePtr fragPtr;
6375 for(Uint32 fragNo = 0; fragNo < tabPtr.p->totalfragments; fragNo++){
6376 jam();
6377 getFragstore(tabPtr.p, fragNo, fragPtr);
6378
6379 /**
6380 * For each of replica record
6381 */
6382 bool found = false;
6383 ReplicaRecordPtr replicaPtr;
6384 for(replicaPtr.i = fragPtr.p->storedReplicas; replicaPtr.i != RNIL;
6385 replicaPtr.i = replicaPtr.p->nextReplica) {
6386 jam();
6387
6388 ptrCheckGuard(replicaPtr, creplicaFileSize, replicaRecord);
6389 if(replicaPtr.p->procNode == nodeId){
6390 jam();
6391 found = true;
6392 noOfRemovedReplicas++;
6393 removeNodeFromStored(nodeId, fragPtr, replicaPtr, unlogged);
6394 if(replicaPtr.p->lcpOngoingFlag){
6395 jam();
6396 /**
6397 * This replica is currently LCP:ed
6398 */
6399 ndbrequire(fragPtr.p->noLcpReplicas > 0);
6400 fragPtr.p->noLcpReplicas --;
6401
6402 noOfRemovedLcpReplicas ++;
6403 replicaPtr.p->lcpOngoingFlag = false;
6404 }
6405
6406 if (lcpId != RNIL)
6407 {
6408 jam();
6409 Uint32 lcpNo = prevLcpNo(replicaPtr.p->nextLcp);
6410 if (replicaPtr.p->lcpStatus[lcpNo] == ZVALID &&
6411 replicaPtr.p->lcpId[lcpNo] == lcpId)
6412 {
6413 jam();
6414 replicaPtr.p->lcpStatus[lcpNo] = ZINVALID;
6415 replicaPtr.p->lcpId[lcpNo] = 0;
6416 replicaPtr.p->nextLcp = lcpNo;
6417 ndbout_c("REMOVING lcp: %u from table: %u frag: %u node: %u",
6418 SYSFILE->latestLCP_ID,
6419 tabPtr.i, fragNo, nodeId);
6420 }
6421 }
6422 }
6423 }
6424
6425 /**
6426 * Run updateNodeInfo to remove any dead nodes from list of activeNodes
6427 * see bug#15587
6428 */
6429 updateNodeInfo(fragPtr);
6430 noOfRemainingLcpReplicas += fragPtr.p->noLcpReplicas;
6431 }
6432
6433 if (noOfRemovedReplicas == 0)
6434 {
6435 jam();
6436 /**
6437 * The table had no replica on the failed node
6438 * continue with next table
6439 */
6440 tabPtr.i++;
6441 signal->theData[0] = DihContinueB::ZREMOVE_NODE_FROM_TABLE;
6442 signal->theData[1] = nodeId;
6443 signal->theData[2] = tabPtr.i;
6444 sendSignal(reference(), GSN_CONTINUEB, signal, 3, JBB);
6445 return;
6446 }
6447
6448 /**
6449 * We did remove at least one replica
6450 */
6451 bool ok = false;
6452 switch(tabPtr.p->tabLcpStatus){
6453 case TabRecord::TLS_COMPLETED:
6454 ok = true;
6455 jam();
6456 /**
6457 * WE WILL WRITE THE TABLE DESCRIPTION TO DISK AT THIS TIME
6458 * INDEPENDENT OF WHAT THE LOCAL CHECKPOINT NEEDED.
6459 * THIS IS TO ENSURE THAT THE FAILED NODES ARE ALSO UPDATED ON DISK
6460 * IN THE DIH DATA STRUCTURES BEFORE WE COMPLETE HANDLING OF THE
6461 * NODE FAILURE.
6462 */
6463 ndbrequire(noOfRemovedLcpReplicas == 0);
6464
6465 tabPtr.p->tabCopyStatus = TabRecord::CS_REMOVE_NODE;
6466 tabPtr.p->tabUpdateState = TabRecord::US_REMOVE_NODE;
6467 tabPtr.p->tabRemoveNode = nodeId;
6468 signal->theData[0] = DihContinueB::ZPACK_TABLE_INTO_PAGES;
6469 signal->theData[1] = tabPtr.i;
6470 sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
6471 return;
6472 break;
6473 case TabRecord::TLS_ACTIVE:
6474 ok = true;
6475 jam();
6476 /**
6477 * The table is participating in an LCP currently
6478 */
6479 // Fall through
6480 break;
6481 case TabRecord::TLS_WRITING_TO_FILE:
6482 ok = true;
6483 jam();
6484 /**
6485 * This should never happen since we in the beginning of this function
6486 * checks the tabCopyStatus
6487 */
6488 ndbrequire(lcpOngoingFlag);
6489 ndbrequire(false);
6490 break;
6491 }
6492 ndbrequire(ok);
6493
6494 /**
6495 * The table is participating in an LCP currently
6496 * and we removed some replicas that should have been checkpointed
6497 */
6498 ndbrequire(c_lcpState.lcpStatus != LCP_STATUS_IDLE);
6499 ndbrequire(tabPtr.p->tabLcpStatus == TabRecord::TLS_ACTIVE);
6500
6501 /**
6502 * Save the table
6503 */
6504 tabPtr.p->tabCopyStatus = TabRecord::CS_REMOVE_NODE;
6505 tabPtr.p->tabUpdateState = TabRecord::US_REMOVE_NODE;
6506 tabPtr.p->tabRemoveNode = nodeId;
6507 signal->theData[0] = DihContinueB::ZPACK_TABLE_INTO_PAGES;
6508 signal->theData[1] = tabPtr.i;
6509 sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
6510
6511 if(noOfRemainingLcpReplicas == 0){
6512 jam();
6513 /**
6514 * The removal on the failed node made the LCP complete
6515 */
6516 tabPtr.p->tabLcpStatus = TabRecord::TLS_WRITING_TO_FILE;
6517 checkLcpAllTablesDoneInLqh(__LINE__);
6518 }
6519 }
6520
6521 void
removeNodeFromTablesComplete(Signal * signal,Uint32 nodeId)6522 Dbdih::removeNodeFromTablesComplete(Signal* signal, Uint32 nodeId){
6523 jam();
6524
6525 /**
6526 * Check if we "accidently" completed a LCP
6527 */
6528 checkLcpCompletedLab(signal);
6529
6530 /**
6531 * Check if we (DIH) are finished with node fail handling
6532 */
6533 checkLocalNodefailComplete(signal, nodeId, NF_REMOVE_NODE_FROM_TABLE);
6534 }
6535
6536 void
checkLocalNodefailComplete(Signal * signal,Uint32 failedNodeId,NodefailHandlingStep step)6537 Dbdih::checkLocalNodefailComplete(Signal* signal, Uint32 failedNodeId,
6538 NodefailHandlingStep step){
6539 jam();
6540
6541 NodeRecordPtr nodePtr;
6542 nodePtr.i = failedNodeId;
6543 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
6544
6545 ndbrequire(nodePtr.p->m_nodefailSteps.get(step));
6546 nodePtr.p->m_nodefailSteps.clear(step);
6547
6548 if(nodePtr.p->m_nodefailSteps.count() > 0){
6549 jam();
6550 return;
6551 }
6552
6553 if (ERROR_INSERTED(7030))
6554 {
6555 g_eventLogger->info("Reenable GCP_PREPARE");
6556 CLEAR_ERROR_INSERT_VALUE;
6557 }
6558
6559 NFCompleteRep * const nf = (NFCompleteRep *)&signal->theData[0];
6560 nf->blockNo = DBDIH;
6561 nf->nodeId = cownNodeId;
6562 nf->failedNodeId = failedNodeId;
6563 nf->from = __LINE__;
6564 sendSignal(reference(), GSN_NF_COMPLETEREP, signal,
6565 NFCompleteRep::SignalLength, JBB);
6566 }
6567
6568
6569 void
setLocalNodefailHandling(Signal * signal,Uint32 failedNodeId,NodefailHandlingStep step)6570 Dbdih::setLocalNodefailHandling(Signal* signal, Uint32 failedNodeId,
6571 NodefailHandlingStep step){
6572 jam();
6573
6574 NodeRecordPtr nodePtr;
6575 nodePtr.i = failedNodeId;
6576 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
6577
6578 ndbrequire(!nodePtr.p->m_nodefailSteps.get(step));
6579 nodePtr.p->m_nodefailSteps.set(step);
6580 }
6581
startLcpTakeOverLab(Signal * signal,Uint32 failedNodeId)6582 void Dbdih::startLcpTakeOverLab(Signal* signal, Uint32 failedNodeId)
6583 {
6584 /*--------------------------------------------------------------------*/
6585 // Start LCP master take over process. Consists of the following steps.
6586 // 1) Ensure that all LQH's have reported all fragments they have been
6587 // told to checkpoint. Can be a fairly long step time-wise.
6588 // 2) Query all nodes about their LCP status.
6589 // During the query process we do not want our own state to change.
6590 // This can change due to delayed reception of LCP_REPORT, completed
6591 // save of table on disk or reception of DIH_LCPCOMPLETE from other
6592 // node.
6593 /*--------------------------------------------------------------------*/
6594 }//Dbdih::startLcpTakeOver()
6595
6596 void
execEMPTY_LCP_REP(Signal * signal)6597 Dbdih::execEMPTY_LCP_REP(Signal* signal)
6598 {
6599 jamEntry();
6600 EmptyLcpRep* rep = (EmptyLcpRep*)signal->getDataPtr();
6601
6602 Uint32 len = signal->getLength();
6603 ndbrequire(len > EmptyLcpRep::SignalLength);
6604 len -= EmptyLcpRep::SignalLength;
6605
6606 NdbNodeBitmask nodes;
6607 nodes.assign(NdbNodeBitmask::Size, rep->receiverGroup);
6608 NodeReceiverGroup rg (DBDIH, nodes);
6609 memmove(signal->getDataPtrSend(),
6610 signal->getDataPtr()+EmptyLcpRep::SignalLength, 4*len);
6611
6612 sendSignal(rg, GSN_EMPTY_LCP_CONF, signal, len, JBB);
6613 }
6614
execEMPTY_LCP_CONF(Signal * signal)6615 void Dbdih::execEMPTY_LCP_CONF(Signal* signal)
6616 {
6617 jamEntry();
6618
6619 ndbrequire(c_lcpMasterTakeOverState.state == LMTOS_WAIT_EMPTY_LCP);
6620
6621 const EmptyLcpConf * const conf = (EmptyLcpConf *)&signal->theData[0];
6622 Uint32 nodeId = conf->senderNodeId;
6623
6624 CRASH_INSERTION(7206);
6625
6626
6627 if(!conf->idle){
6628 jam();
6629 if (conf->tableId < c_lcpMasterTakeOverState.minTableId) {
6630 jam();
6631 c_lcpMasterTakeOverState.minTableId = conf->tableId;
6632 c_lcpMasterTakeOverState.minFragId = conf->fragmentId;
6633 } else if (conf->tableId == c_lcpMasterTakeOverState.minTableId &&
6634 conf->fragmentId < c_lcpMasterTakeOverState.minFragId) {
6635 jam();
6636 c_lcpMasterTakeOverState.minFragId = conf->fragmentId;
6637 }//if
6638 if(isMaster()){
6639 jam();
6640 c_lcpState.m_LAST_LCP_FRAG_ORD.setWaitingFor(nodeId);
6641 }
6642 }
6643
6644 receiveLoopMacro(EMPTY_LCP_REQ, nodeId);
6645 /*--------------------------------------------------------------------*/
6646 // Received all EMPTY_LCPCONF. We can continue with next phase of the
6647 // take over LCP master process.
6648 /*--------------------------------------------------------------------*/
6649 c_lcpMasterTakeOverState.set(LMTOS_WAIT_LCP_FRAG_REP, __LINE__);
6650 checkEmptyLcpComplete(signal);
6651 return;
6652 }//Dbdih::execEMPTY_LCPCONF()
6653
6654 void
checkEmptyLcpComplete(Signal * signal)6655 Dbdih::checkEmptyLcpComplete(Signal *signal){
6656
6657 ndbrequire(c_lcpMasterTakeOverState.state == LMTOS_WAIT_LCP_FRAG_REP);
6658
6659 if(c_lcpState.noOfLcpFragRepOutstanding > 0){
6660 jam();
6661 return;
6662 }
6663
6664 if(isMaster()){
6665 jam();
6666
6667 signal->theData[0] = NDB_LE_LCP_TakeoverStarted;
6668 sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 1, JBB);
6669
6670 signal->theData[0] = 7012;
6671 execDUMP_STATE_ORD(signal);
6672
6673 if (ERROR_INSERTED(7194))
6674 {
6675 ndbout_c("7194 starting ZREMOVE_NODE_FROM_TABLE");
6676 signal->theData[0] = DihContinueB::ZREMOVE_NODE_FROM_TABLE;
6677 signal->theData[1] = c_lcpMasterTakeOverState.failedNodeId;
6678 signal->theData[2] = 0; // Tab id
6679 sendSignal(reference(), GSN_CONTINUEB, signal, 3, JBB);
6680 }
6681
6682 c_current_time = NdbTick_CurrentMillisecond();
6683 c_lcpState.m_start_time = c_current_time;
6684 c_lcpMasterTakeOverState.set(LMTOS_INITIAL, __LINE__);
6685 MasterLCPReq * const req = (MasterLCPReq *)&signal->theData[0];
6686 req->masterRef = reference();
6687 req->failedNodeId = c_lcpMasterTakeOverState.failedNodeId;
6688 sendLoopMacro(MASTER_LCPREQ, sendMASTER_LCPREQ, RNIL);
6689
6690 } else {
6691 sendMASTER_LCPCONF(signal);
6692 }
6693 }
6694
6695 /*--------------------------------------------------*/
6696 /* THE MASTER HAS FAILED AND THE NEW MASTER IS*/
6697 /* QUERYING THIS NODE ABOUT THE STATE OF THE */
6698 /* LOCAL CHECKPOINT PROTOCOL. */
6699 /*--------------------------------------------------*/
execMASTER_LCPREQ(Signal * signal)6700 void Dbdih::execMASTER_LCPREQ(Signal* signal)
6701 {
6702 const MasterLCPReq * const req = (MasterLCPReq *)&signal->theData[0];
6703 jamEntry();
6704 const BlockReference newMasterBlockref = req->masterRef;
6705
6706 CRASH_INSERTION(7205);
6707
6708 if (ERROR_INSERTED(7207))
6709 {
6710 jam();
6711 SET_ERROR_INSERT_VALUE(7208);
6712 sendSignalWithDelay(reference(), GSN_MASTER_LCPREQ, signal,
6713 500, signal->getLength());
6714 return;
6715 }
6716
6717 if (ERROR_INSERTED(7208))
6718 {
6719 jam();
6720 signal->theData[0] = 9999;
6721 sendSignal(numberToRef(CMVMI, refToNode(newMasterBlockref)),
6722 GSN_NDB_TAMPER, signal, 1, JBB);
6723 }
6724
6725 if (newMasterBlockref != cmasterdihref)
6726 {
6727 jam();
6728 ndbout_c("resending GSN_MASTER_LCPREQ");
6729 sendSignalWithDelay(reference(), GSN_MASTER_LCPREQ, signal,
6730 50, signal->getLength());
6731 return;
6732 }
6733 Uint32 failedNodeId = req->failedNodeId;
6734
6735 /**
6736 * There can be no take over with the same master
6737 */
6738 ndbrequire(c_lcpState.m_masterLcpDihRef != newMasterBlockref);
6739 c_lcpState.m_masterLcpDihRef = newMasterBlockref;
6740 c_lcpState.m_MASTER_LCPREQ_Received = true;
6741 c_lcpState.m_MASTER_LCPREQ_FailedNodeId = failedNodeId;
6742
6743 if(newMasterBlockref != cmasterdihref){
6744 jam();
6745 ndbrequire(0);
6746 }
6747
6748 if (ERROR_INSERTED(7209))
6749 {
6750 SET_ERROR_INSERT_VALUE(7210);
6751 }
6752
6753 sendMASTER_LCPCONF(signal);
6754 }//Dbdih::execMASTER_LCPREQ()
6755
6756 void
sendMASTER_LCPCONF(Signal * signal)6757 Dbdih::sendMASTER_LCPCONF(Signal * signal){
6758
6759 if(!c_EMPTY_LCP_REQ_Counter.done()){
6760 /**
6761 * Have not received all EMPTY_LCP_REP
6762 * dare not answer MASTER_LCP_CONF yet
6763 */
6764 jam();
6765 return;
6766 }
6767
6768 if(!c_lcpState.m_MASTER_LCPREQ_Received){
6769 jam();
6770 /**
6771 * Has not received MASTER_LCPREQ yet
6772 */
6773 return;
6774 }
6775
6776 if(c_lcpState.lcpStatus == LCP_INIT_TABLES){
6777 jam();
6778 /**
6779 * Still aborting old initLcpLab
6780 */
6781 return;
6782 }
6783
6784 if(c_lcpState.lcpStatus == LCP_COPY_GCI)
6785 {
6786 jam();
6787 /**
6788 * Restart it
6789 */
6790 //Uint32 lcpId = SYSFILE->latestLCP_ID;
6791 SYSFILE->latestLCP_ID--;
6792 Sysfile::clearLCPOngoing(SYSFILE->systemRestartBits);
6793 c_lcpState.setLcpStatus(LCP_STATUS_IDLE, __LINE__);
6794 #if 0
6795 if(c_copyGCISlave.m_copyReason == CopyGCIReq::LOCAL_CHECKPOINT){
6796 g_eventLogger->info("Dbdih: Also resetting c_copyGCISlave");
6797 c_copyGCISlave.m_copyReason = CopyGCIReq::IDLE;
6798 c_copyGCISlave.m_expectedNextWord = 0;
6799 }
6800 #endif
6801 }
6802
6803 MasterLCPConf::State lcpState;
6804 switch (c_lcpState.lcpStatus) {
6805 case LCP_STATUS_IDLE:
6806 jam();
6807 /*------------------------------------------------*/
6808 /* LOCAL CHECKPOINT IS CURRENTLY NOT ACTIVE */
6809 /* SINCE NO COPY OF RESTART INFORMATION HAVE*/
6810 /* BEEN RECEIVED YET. ALSO THE PREVIOUS */
6811 /* CHECKPOINT HAVE BEEN FULLY COMPLETED. */
6812 /*------------------------------------------------*/
6813 lcpState = MasterLCPConf::LCP_STATUS_IDLE;
6814 break;
6815 case LCP_STATUS_ACTIVE:
6816 jam();
6817 /*--------------------------------------------------*/
6818 /* COPY OF RESTART INFORMATION HAS BEEN */
6819 /* PERFORMED AND ALSO RESPONSE HAVE BEEN SENT.*/
6820 /*--------------------------------------------------*/
6821 lcpState = MasterLCPConf::LCP_STATUS_ACTIVE;
6822 break;
6823 case LCP_TAB_COMPLETED:
6824 jam();
6825 /*--------------------------------------------------------*/
6826 /* ALL LCP_REPORT'S HAVE BEEN COMPLETED FOR */
6827 /* ALL TABLES. SAVE OF AT LEAST ONE TABLE IS */
6828 /* ONGOING YET. */
6829 /*--------------------------------------------------------*/
6830 lcpState = MasterLCPConf::LCP_TAB_COMPLETED;
6831 break;
6832 case LCP_TAB_SAVED:
6833 jam();
6834 /*--------------------------------------------------------*/
6835 /* ALL LCP_REPORT'S HAVE BEEN COMPLETED FOR */
6836 /* ALL TABLES. ALL TABLES HAVE ALSO BEEN SAVED */
6837 /* ALL OTHER NODES ARE NOT YET FINISHED WITH */
6838 /* THE LOCAL CHECKPOINT. */
6839 /*--------------------------------------------------------*/
6840 lcpState = MasterLCPConf::LCP_TAB_SAVED;
6841 break;
6842 case LCP_TCGET:
6843 case LCP_CALCULATE_KEEP_GCI:
6844 case LCP_TC_CLOPSIZE:
6845 case LCP_START_LCP_ROUND:
6846 /**
6847 * These should only exists on the master
6848 * but since this is master take over
6849 * it not allowed
6850 */
6851 ndbrequire(false);
6852 lcpState= MasterLCPConf::LCP_STATUS_IDLE; // remove warning
6853 break;
6854 case LCP_COPY_GCI:
6855 case LCP_INIT_TABLES:
6856 /**
6857 * These two states are handled by if statements above
6858 */
6859 ndbrequire(false);
6860 lcpState= MasterLCPConf::LCP_STATUS_IDLE; // remove warning
6861 break;
6862 default:
6863 ndbrequire(false);
6864 lcpState= MasterLCPConf::LCP_STATUS_IDLE; // remove warning
6865 }//switch
6866
6867 Uint32 failedNodeId = c_lcpState.m_MASTER_LCPREQ_FailedNodeId;
6868 MasterLCPConf * const conf = (MasterLCPConf *)&signal->theData[0];
6869 conf->senderNodeId = cownNodeId;
6870 conf->lcpState = lcpState;
6871 conf->failedNodeId = failedNodeId;
6872 sendSignal(c_lcpState.m_masterLcpDihRef, GSN_MASTER_LCPCONF,
6873 signal, MasterLCPConf::SignalLength, JBB);
6874
6875 // Answer to MASTER_LCPREQ sent, reset flag so
6876 // that it's not sent again before another request comes in
6877 c_lcpState.m_MASTER_LCPREQ_Received = false;
6878
6879 if(c_lcpState.lcpStatus == LCP_TAB_SAVED){
6880 #ifdef VM_TRACE
6881 g_eventLogger->info("Sending extra GSN_LCP_COMPLETE_REP to new master");
6882 #endif
6883 sendLCP_COMPLETE_REP(signal);
6884 }
6885
6886 if(!isMaster()){
6887 c_lcpMasterTakeOverState.set(LMTOS_IDLE, __LINE__);
6888 checkLocalNodefailComplete(signal, failedNodeId, NF_LCP_TAKE_OVER);
6889 }
6890
6891 return;
6892 }
6893
6894 NdbOut&
operator <<(NdbOut & out,const Dbdih::LcpMasterTakeOverState state)6895 operator<<(NdbOut& out, const Dbdih::LcpMasterTakeOverState state){
6896 switch(state){
6897 case Dbdih::LMTOS_IDLE:
6898 out << "LMTOS_IDLE";
6899 break;
6900 case Dbdih::LMTOS_WAIT_EMPTY_LCP:
6901 out << "LMTOS_WAIT_EMPTY_LCP";
6902 break;
6903 case Dbdih::LMTOS_WAIT_LCP_FRAG_REP:
6904 out << "LMTOS_WAIT_EMPTY_LCP";
6905 break;
6906 case Dbdih::LMTOS_INITIAL:
6907 out << "LMTOS_INITIAL";
6908 break;
6909 case Dbdih::LMTOS_ALL_IDLE:
6910 out << "LMTOS_ALL_IDLE";
6911 break;
6912 case Dbdih::LMTOS_ALL_ACTIVE:
6913 out << "LMTOS_ALL_ACTIVE";
6914 break;
6915 case Dbdih::LMTOS_LCP_CONCLUDING:
6916 out << "LMTOS_LCP_CONCLUDING";
6917 break;
6918 case Dbdih::LMTOS_COPY_ONGOING:
6919 out << "LMTOS_COPY_ONGOING";
6920 break;
6921 }
6922 return out;
6923 }
6924
6925 struct MASTERLCP_StateTransitions {
6926 Dbdih::LcpMasterTakeOverState CurrentState;
6927 MasterLCPConf::State ParticipantState;
6928 Dbdih::LcpMasterTakeOverState NewState;
6929 };
6930
6931 static const
6932 MASTERLCP_StateTransitions g_masterLCPTakeoverStateTransitions[] = {
6933 /**
6934 * Current = LMTOS_INITIAL
6935 */
6936 { Dbdih::LMTOS_INITIAL,
6937 MasterLCPConf::LCP_STATUS_IDLE,
6938 Dbdih::LMTOS_ALL_IDLE },
6939
6940 { Dbdih::LMTOS_INITIAL,
6941 MasterLCPConf::LCP_STATUS_ACTIVE,
6942 Dbdih::LMTOS_ALL_ACTIVE },
6943
6944 { Dbdih::LMTOS_INITIAL,
6945 MasterLCPConf::LCP_TAB_COMPLETED,
6946 Dbdih::LMTOS_LCP_CONCLUDING },
6947
6948 { Dbdih::LMTOS_INITIAL,
6949 MasterLCPConf::LCP_TAB_SAVED,
6950 Dbdih::LMTOS_LCP_CONCLUDING },
6951
6952 /**
6953 * Current = LMTOS_ALL_IDLE
6954 */
6955 { Dbdih::LMTOS_ALL_IDLE,
6956 MasterLCPConf::LCP_STATUS_IDLE,
6957 Dbdih::LMTOS_ALL_IDLE },
6958
6959 { Dbdih::LMTOS_ALL_IDLE,
6960 MasterLCPConf::LCP_STATUS_ACTIVE,
6961 Dbdih::LMTOS_COPY_ONGOING },
6962
6963 { Dbdih::LMTOS_ALL_IDLE,
6964 MasterLCPConf::LCP_TAB_COMPLETED,
6965 Dbdih::LMTOS_LCP_CONCLUDING },
6966
6967 { Dbdih::LMTOS_ALL_IDLE,
6968 MasterLCPConf::LCP_TAB_SAVED,
6969 Dbdih::LMTOS_LCP_CONCLUDING },
6970
6971 /**
6972 * Current = LMTOS_COPY_ONGOING
6973 */
6974 { Dbdih::LMTOS_COPY_ONGOING,
6975 MasterLCPConf::LCP_STATUS_IDLE,
6976 Dbdih::LMTOS_COPY_ONGOING },
6977
6978 { Dbdih::LMTOS_COPY_ONGOING,
6979 MasterLCPConf::LCP_STATUS_ACTIVE,
6980 Dbdih::LMTOS_COPY_ONGOING },
6981
6982 /**
6983 * Current = LMTOS_ALL_ACTIVE
6984 */
6985 { Dbdih::LMTOS_ALL_ACTIVE,
6986 MasterLCPConf::LCP_STATUS_IDLE,
6987 Dbdih::LMTOS_COPY_ONGOING },
6988
6989 { Dbdih::LMTOS_ALL_ACTIVE,
6990 MasterLCPConf::LCP_STATUS_ACTIVE,
6991 Dbdih::LMTOS_ALL_ACTIVE },
6992
6993 { Dbdih::LMTOS_ALL_ACTIVE,
6994 MasterLCPConf::LCP_TAB_COMPLETED,
6995 Dbdih::LMTOS_LCP_CONCLUDING },
6996
6997 { Dbdih::LMTOS_ALL_ACTIVE,
6998 MasterLCPConf::LCP_TAB_SAVED,
6999 Dbdih::LMTOS_LCP_CONCLUDING },
7000
7001 /**
7002 * Current = LMTOS_LCP_CONCLUDING
7003 */
7004 { Dbdih::LMTOS_LCP_CONCLUDING,
7005 MasterLCPConf::LCP_STATUS_IDLE,
7006 Dbdih::LMTOS_LCP_CONCLUDING },
7007
7008 { Dbdih::LMTOS_LCP_CONCLUDING,
7009 MasterLCPConf::LCP_STATUS_ACTIVE,
7010 Dbdih::LMTOS_LCP_CONCLUDING },
7011
7012 { Dbdih::LMTOS_LCP_CONCLUDING,
7013 MasterLCPConf::LCP_TAB_COMPLETED,
7014 Dbdih::LMTOS_LCP_CONCLUDING },
7015
7016 { Dbdih::LMTOS_LCP_CONCLUDING,
7017 MasterLCPConf::LCP_TAB_SAVED,
7018 Dbdih::LMTOS_LCP_CONCLUDING }
7019 };
7020
7021 const Uint32 g_masterLCPTakeoverStateTransitionsRows =
7022 sizeof(g_masterLCPTakeoverStateTransitions) / sizeof(struct MASTERLCP_StateTransitions);
7023
execMASTER_LCPCONF(Signal * signal)7024 void Dbdih::execMASTER_LCPCONF(Signal* signal)
7025 {
7026 const MasterLCPConf * const conf = (MasterLCPConf *)&signal->theData[0];
7027 jamEntry();
7028
7029 if (ERROR_INSERTED(7194))
7030 {
7031 ndbout_c("delaying MASTER_LCPCONF due to error 7194");
7032 sendSignalWithDelay(reference(), GSN_MASTER_LCPCONF, signal,
7033 300, signal->getLength());
7034 return;
7035 }
7036
7037 Uint32 senderNodeId = conf->senderNodeId;
7038 MasterLCPConf::State lcpState = (MasterLCPConf::State)conf->lcpState;
7039 const Uint32 failedNodeId = conf->failedNodeId;
7040 NodeRecordPtr nodePtr;
7041 nodePtr.i = senderNodeId;
7042 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
7043 nodePtr.p->lcpStateAtTakeOver = lcpState;
7044
7045 CRASH_INSERTION(7180);
7046
7047 #ifdef VM_TRACE
7048 g_eventLogger->info("MASTER_LCPCONF");
7049 printMASTER_LCP_CONF(stdout, &signal->theData[0], 0, 0);
7050 #endif
7051
7052 bool found = false;
7053 for(Uint32 i = 0; i<g_masterLCPTakeoverStateTransitionsRows; i++){
7054 const struct MASTERLCP_StateTransitions * valid =
7055 &g_masterLCPTakeoverStateTransitions[i];
7056
7057 if(valid->CurrentState == c_lcpMasterTakeOverState.state &&
7058 valid->ParticipantState == lcpState){
7059 jam();
7060 found = true;
7061 c_lcpMasterTakeOverState.set(valid->NewState, __LINE__);
7062 break;
7063 }
7064 }
7065 ndbrequire(found);
7066
7067 bool ok = false;
7068 switch(lcpState){
7069 case MasterLCPConf::LCP_STATUS_IDLE:
7070 ok = true;
7071 break;
7072 case MasterLCPConf::LCP_STATUS_ACTIVE:
7073 case MasterLCPConf::LCP_TAB_COMPLETED:
7074 case MasterLCPConf::LCP_TAB_SAVED:
7075 ok = true;
7076 c_lcpState.m_LCP_COMPLETE_REP_Counter_DIH.setWaitingFor(nodePtr.i);
7077 break;
7078 }
7079 ndbrequire(ok);
7080
7081 receiveLoopMacro(MASTER_LCPREQ, senderNodeId);
7082 /*-------------------------------------------------------------------------*/
7083 // We have now received all responses and are ready to take over the LCP
7084 // protocol as master.
7085 /*-------------------------------------------------------------------------*/
7086 MASTER_LCPhandling(signal, failedNodeId);
7087 }//Dbdih::execMASTER_LCPCONF()
7088
execMASTER_LCPREF(Signal * signal)7089 void Dbdih::execMASTER_LCPREF(Signal* signal)
7090 {
7091 const MasterLCPRef * const ref = (MasterLCPRef *)&signal->theData[0];
7092 jamEntry();
7093
7094 Uint32 senderNodeId = ref->senderNodeId;
7095 Uint32 failedNodeId = ref->failedNodeId;
7096
7097 if (c_lcpState.m_LCP_COMPLETE_REP_Counter_LQH.isWaitingFor(senderNodeId))
7098 {
7099 jam();
7100 c_lcpState.m_LCP_COMPLETE_REP_Counter_LQH.clearWaitingFor(senderNodeId);
7101 }
7102
7103 receiveLoopMacro(MASTER_LCPREQ, senderNodeId);
7104 /*-------------------------------------------------------------------------*/
7105 // We have now received all responses and are ready to take over the LCP
7106 // protocol as master.
7107 /*-------------------------------------------------------------------------*/
7108 MASTER_LCPhandling(signal, failedNodeId);
7109 }//Dbdih::execMASTER_LCPREF()
7110
MASTER_LCPhandling(Signal * signal,Uint32 failedNodeId)7111 void Dbdih::MASTER_LCPhandling(Signal* signal, Uint32 failedNodeId)
7112 {
7113 /*-------------------------------------------------------------------------
7114 *
7115 * WE ARE NOW READY TO CONCLUDE THE TAKE OVER AS MASTER.
7116 * WE HAVE ENOUGH INFO TO START UP ACTIVITIES IN THE PROPER PLACE.
7117 * ALSO SET THE PROPER STATE VARIABLES.
7118 *------------------------------------------------------------------------*/
7119 c_lcpState.currentFragment.tableId = c_lcpMasterTakeOverState.minTableId;
7120 c_lcpState.currentFragment.fragmentId = c_lcpMasterTakeOverState.minFragId;
7121 c_lcpState.m_LAST_LCP_FRAG_ORD = c_lcpState.m_LCP_COMPLETE_REP_Counter_LQH;
7122
7123 NodeRecordPtr failedNodePtr;
7124 failedNodePtr.i = failedNodeId;
7125 ptrCheckGuard(failedNodePtr, MAX_NDB_NODES, nodeRecord);
7126
7127 switch (c_lcpMasterTakeOverState.state) {
7128 case LMTOS_ALL_IDLE:
7129 jam();
7130 /* --------------------------------------------------------------------- */
7131 // All nodes were idle in the LCP protocol. Start checking for start of LCP
7132 // protocol.
7133 /* --------------------------------------------------------------------- */
7134 #ifdef VM_TRACE
7135 g_eventLogger->info("MASTER_LCPhandling:: LMTOS_ALL_IDLE -> checkLcpStart");
7136 #endif
7137 checkLcpStart(signal, __LINE__);
7138 break;
7139 case LMTOS_COPY_ONGOING:
7140 jam();
7141 /* --------------------------------------------------------------------- */
7142 // We were in the starting process of the LCP protocol. We will restart the
7143 // protocol by calculating the keep gci and storing the new lcp id.
7144 /* --------------------------------------------------------------------- */
7145 #ifdef VM_TRACE
7146 g_eventLogger->info("MASTER_LCPhandling:: LMTOS_COPY_ONGOING -> storeNewLcpId");
7147 #endif
7148 if (c_lcpState.lcpStatus == LCP_STATUS_ACTIVE) {
7149 jam();
7150 /*---------------------------------------------------------------------*/
7151 /* WE NEED TO DECREASE THE LATEST LCP ID SINCE WE HAVE ALREADY */
7152 /* STARTED THIS */
7153 /* LOCAL CHECKPOINT. */
7154 /*---------------------------------------------------------------------*/
7155 Uint32 lcpId = SYSFILE->latestLCP_ID;
7156 #ifdef VM_TRACE
7157 g_eventLogger->info("Decreasing latestLCP_ID from %d to %d", lcpId, lcpId - 1);
7158 #endif
7159 SYSFILE->latestLCP_ID--;
7160 }//if
7161
7162 {
7163 Mutex mutex(signal, c_mutexMgr, c_fragmentInfoMutex_lcp);
7164 Callback c = { safe_cast(&Dbdih::lcpFragmentMutex_locked), 0 };
7165 ndbrequire(mutex.lock(c, false));
7166 }
7167 break;
7168 case LMTOS_ALL_ACTIVE:
7169 {
7170 jam();
7171 /* -------------------------------------------------------------------
7172 * Everybody was in the active phase. We will restart sending
7173 * LCP_FRAGORD to the nodes from the new master.
7174 * We also need to set dihLcpStatus to ZACTIVE
7175 * in the master node since the master will wait for all nodes to
7176 * complete before finalising the LCP process.
7177 * ------------------------------------------------------------------ */
7178 #ifdef VM_TRACE
7179 g_eventLogger->info("MASTER_LCPhandling:: LMTOS_ALL_ACTIVE -> "
7180 "startLcpRoundLoopLab(table=%u, fragment=%u)",
7181 c_lcpMasterTakeOverState.minTableId,
7182 c_lcpMasterTakeOverState.minFragId);
7183 #endif
7184
7185 c_lcpState.keepGci = SYSFILE->keepGCI;
7186
7187 /**
7188 * We need to reaquire the mutex...
7189 */
7190 Mutex mutex(signal, c_mutexMgr, c_fragmentInfoMutex_lcp);
7191 Callback c =
7192 { safe_cast(&Dbdih::master_lcp_fragmentMutex_locked), failedNodePtr.i };
7193 ndbrequire(mutex.lock(c, false));
7194 return;
7195 }
7196 case LMTOS_LCP_CONCLUDING:
7197 {
7198 jam();
7199 /* ------------------------------------------------------------------- */
7200 // The LCP process is in the finalisation phase. We simply wait for it to
7201 // complete with signals arriving in. We need to check also if we should
7202 // change state due to table write completion during state
7203 // collection phase.
7204 /* ------------------------------------------------------------------- */
7205 ndbrequire(c_lcpState.lcpStatus != LCP_STATUS_IDLE);
7206
7207 /**
7208 * We need to reaquire the mutex...
7209 */
7210 Mutex mutex(signal, c_mutexMgr, c_fragmentInfoMutex_lcp);
7211 Callback c =
7212 { safe_cast(&Dbdih::master_lcp_fragmentMutex_locked), failedNodePtr.i };
7213 ndbrequire(mutex.lock(c, false));
7214 return;
7215 }
7216 default:
7217 ndbrequire(false);
7218 break;
7219 }//switch
7220 signal->theData[0] = NDB_LE_LCP_TakeoverCompleted;
7221 signal->theData[1] = c_lcpMasterTakeOverState.state;
7222 sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB);
7223
7224 signal->theData[0] = 7012;
7225 execDUMP_STATE_ORD(signal);
7226
7227 c_lcpMasterTakeOverState.set(LMTOS_IDLE, __LINE__);
7228
7229 checkLocalNodefailComplete(signal, failedNodePtr.i, NF_LCP_TAKE_OVER);
7230 }
7231
7232 /* ------------------------------------------------------------------------- */
7233 /* A BLOCK OR A NODE HAS COMPLETED THE HANDLING OF THE NODE FAILURE. */
7234 /* ------------------------------------------------------------------------- */
execNF_COMPLETEREP(Signal * signal)7235 void Dbdih::execNF_COMPLETEREP(Signal* signal)
7236 {
7237 NodeRecordPtr failedNodePtr;
7238 NFCompleteRep * const nfCompleteRep = (NFCompleteRep *)&signal->theData[0];
7239 jamEntry();
7240 const Uint32 blockNo = nfCompleteRep->blockNo;
7241 Uint32 nodeId = nfCompleteRep->nodeId;
7242 failedNodePtr.i = nfCompleteRep->failedNodeId;
7243
7244 ptrCheckGuard(failedNodePtr, MAX_NDB_NODES, nodeRecord);
7245 switch (blockNo) {
7246 case DBTC:
7247 jam();
7248 ndbrequire(failedNodePtr.p->dbtcFailCompleted == ZFALSE);
7249 /* -------------------------------------------------------------------- */
7250 // Report the event that DBTC completed node failure handling.
7251 /* -------------------------------------------------------------------- */
7252 signal->theData[0] = NDB_LE_NodeFailCompleted;
7253 signal->theData[1] = DBTC;
7254 signal->theData[2] = failedNodePtr.i;
7255 sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 3, JBB);
7256
7257 failedNodePtr.p->dbtcFailCompleted = ZTRUE;
7258 break;
7259 case DBDICT:
7260 jam();
7261 ndbrequire(failedNodePtr.p->dbdictFailCompleted == ZFALSE);
7262 /* --------------------------------------------------------------------- */
7263 // Report the event that DBDICT completed node failure handling.
7264 /* --------------------------------------------------------------------- */
7265 signal->theData[0] = NDB_LE_NodeFailCompleted;
7266 signal->theData[1] = DBDICT;
7267 signal->theData[2] = failedNodePtr.i;
7268 sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 3, JBB);
7269
7270 failedNodePtr.p->dbdictFailCompleted = ZTRUE;
7271 break;
7272 case DBDIH:
7273 jam();
7274 ndbrequire(failedNodePtr.p->dbdihFailCompleted == ZFALSE);
7275 /* --------------------------------------------------------------------- */
7276 // Report the event that DBDIH completed node failure handling.
7277 /* --------------------------------------------------------------------- */
7278 signal->theData[0] = NDB_LE_NodeFailCompleted;
7279 signal->theData[1] = DBDIH;
7280 signal->theData[2] = failedNodePtr.i;
7281 sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 3, JBB);
7282
7283 failedNodePtr.p->dbdihFailCompleted = ZTRUE;
7284 break;
7285 case DBLQH:
7286 jam();
7287 ndbrequire(failedNodePtr.p->dblqhFailCompleted == ZFALSE);
7288 /* --------------------------------------------------------------------- */
7289 // Report the event that DBDIH completed node failure handling.
7290 /* --------------------------------------------------------------------- */
7291 signal->theData[0] = NDB_LE_NodeFailCompleted;
7292 signal->theData[1] = DBLQH;
7293 signal->theData[2] = failedNodePtr.i;
7294 sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 3, JBB);
7295
7296 failedNodePtr.p->dblqhFailCompleted = ZTRUE;
7297 break;
7298 case 0: /* Node has finished */
7299 jam();
7300 ndbrequire(nodeId < MAX_NDB_NODES);
7301
7302 if (failedNodePtr.p->recNODE_FAILREP == ZFALSE) {
7303 jam();
7304 /* ------------------------------------------------------------------- */
7305 // We received a report about completion of node failure before we
7306 // received the message about the NODE failure ourselves.
7307 // We will send the signal to ourselves with a small delay
7308 // (10 milliseconds).
7309 /* ------------------------------------------------------------------- */
7310 //nf->from = __LINE__;
7311 sendSignalWithDelay(reference(), GSN_NF_COMPLETEREP, signal, 10,
7312 signal->length());
7313 return;
7314 }//if
7315
7316 if (!failedNodePtr.p->m_NF_COMPLETE_REP.isWaitingFor(nodeId)){
7317 jam();
7318 return;
7319 }
7320
7321 failedNodePtr.p->m_NF_COMPLETE_REP.clearWaitingFor(nodeId);;
7322
7323 /* -------------------------------------------------------------------- */
7324 // Report the event that nodeId has completed node failure handling.
7325 /* -------------------------------------------------------------------- */
7326 signal->theData[0] = NDB_LE_NodeFailCompleted;
7327 signal->theData[1] = 0;
7328 signal->theData[2] = failedNodePtr.i;
7329 signal->theData[3] = nodeId;
7330 sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 4, JBB);
7331
7332 nodeFailCompletedCheckLab(signal, failedNodePtr);
7333 return;
7334 break;
7335 default:
7336 ndbrequire(false);
7337 return;
7338 break;
7339 }//switch
7340 if (failedNodePtr.p->dbtcFailCompleted == ZFALSE) {
7341 jam();
7342 return;
7343 }//if
7344 if (failedNodePtr.p->dbdictFailCompleted == ZFALSE) {
7345 jam();
7346 return;
7347 }//if
7348 if (failedNodePtr.p->dbdihFailCompleted == ZFALSE) {
7349 jam();
7350 return;
7351 }//if
7352 if (failedNodePtr.p->dblqhFailCompleted == ZFALSE) {
7353 jam();
7354 return;
7355 }//if
7356 /* ----------------------------------------------------------------------- */
7357 /* ALL BLOCKS IN THIS NODE HAVE COMPLETED THEIR PART OF HANDLING THE */
7358 /* NODE FAILURE. WE CAN NOW REPORT THIS COMPLETION TO ALL OTHER NODES. */
7359 /* ----------------------------------------------------------------------- */
7360 NodeRecordPtr nodePtr;
7361 for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
7362 jam();
7363 ptrAss(nodePtr, nodeRecord);
7364 if (nodePtr.p->nodeStatus == NodeRecord::ALIVE) {
7365 jam();
7366 BlockReference ref = calcDihBlockRef(nodePtr.i);
7367 NFCompleteRep * const nf = (NFCompleteRep *)&signal->theData[0];
7368 nf->blockNo = 0;
7369 nf->nodeId = cownNodeId;
7370 nf->failedNodeId = failedNodePtr.i;
7371 nf->from = __LINE__;
7372 sendSignal(ref, GSN_NF_COMPLETEREP, signal,
7373 NFCompleteRep::SignalLength, JBB);
7374 }//if
7375 }//for
7376 return;
7377 }//Dbdih::execNF_COMPLETEREP()
7378
nodeFailCompletedCheckLab(Signal * signal,NodeRecordPtr failedNodePtr)7379 void Dbdih::nodeFailCompletedCheckLab(Signal* signal,
7380 NodeRecordPtr failedNodePtr)
7381 {
7382 jam();
7383 if (!failedNodePtr.p->m_NF_COMPLETE_REP.done()){
7384 jam();
7385 return;
7386 }//if
7387 /* ---------------------------------------------------------------------- */
7388 /* ALL BLOCKS IN ALL NODES HAVE NOW REPORTED COMPLETION OF THE NODE */
7389 /* FAILURE HANDLING. WE ARE NOW READY TO ACCEPT THAT THIS NODE STARTS */
7390 /* AGAIN. */
7391 /* ---------------------------------------------------------------------- */
7392 jam();
7393 failedNodePtr.p->nodeStatus = NodeRecord::DEAD;
7394 failedNodePtr.p->recNODE_FAILREP = ZFALSE;
7395
7396 /* ---------------------------------------------------------------------- */
7397 // Report the event that all nodes completed node failure handling.
7398 /* ---------------------------------------------------------------------- */
7399 signal->theData[0] = NDB_LE_NodeFailCompleted;
7400 signal->theData[1] = 0;
7401 signal->theData[2] = failedNodePtr.i;
7402 signal->theData[3] = 0;
7403 sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 4, JBB);
7404
7405 /* ---------------------------------------------------------------------- */
7406 // Report to QMGR that we have concluded recovery handling of this node.
7407 /* ---------------------------------------------------------------------- */
7408 signal->theData[0] = failedNodePtr.i;
7409 sendSignal(QMGR_REF, GSN_NDB_FAILCONF, signal, 1, JBB);
7410
7411 return;
7412 }//Dbdih::nodeFailCompletedCheckLab()
7413
7414 /*****************************************************************************/
7415 /* ********** SEIZING / RELEASING MODULE *************/
7416 /*****************************************************************************/
7417 /*
7418 3.4 L O C A L N O D E S E I Z E
7419 ************************************
7420 */
7421 /*
7422 3.7 A D D T A B L E
7423 **********************=
7424 */
7425 /*****************************************************************************/
7426 /* ********** TABLE ADDING MODULE *************/
7427 /*****************************************************************************/
7428 /*
7429 3.7.1 A D D T A B L E M A I N L Y
7430 ***************************************
7431 */
7432
inc_node_or_group(Uint32 & node,Uint32 max_node)7433 static inline void inc_node_or_group(Uint32 &node, Uint32 max_node)
7434 {
7435 Uint32 next = node + 1;
7436 node = (next == max_node ? 0 : next);
7437 }
7438
7439 /*
7440 Spread fragments in backwards compatible mode
7441 */
set_default_node_groups(Signal * signal,Uint32 noFrags)7442 static void set_default_node_groups(Signal *signal, Uint32 noFrags)
7443 {
7444 Uint16 *node_group_array = (Uint16*)&signal->theData[25];
7445 Uint32 i;
7446 node_group_array[0] = 0;
7447 for (i = 1; i < noFrags; i++)
7448 node_group_array[i] = NDB_UNDEF_NODEGROUP;
7449 }
7450
find_min_index(const Uint32 * array,Uint32 cnt)7451 static Uint32 find_min_index(const Uint32* array, Uint32 cnt)
7452 {
7453 Uint32 m = 0;
7454 Uint32 mv = array[0];
7455 for (Uint32 i = 1; i<cnt; i++)
7456 {
7457 if (array[i] < mv)
7458 {
7459 m = i;
7460 mv = array[i];
7461 }
7462 }
7463 return m;
7464 }
7465
execCREATE_FRAGMENTATION_REQ(Signal * signal)7466 void Dbdih::execCREATE_FRAGMENTATION_REQ(Signal * signal)
7467 {
7468 Uint16 node_group_id[MAX_NDB_PARTITIONS];
7469 jamEntry();
7470 CreateFragmentationReq * const req =
7471 (CreateFragmentationReq*)signal->getDataPtr();
7472
7473 const Uint32 senderRef = req->senderRef;
7474 const Uint32 senderData = req->senderData;
7475 Uint32 noOfFragments = req->noOfFragments;
7476 const Uint32 fragType = req->fragmentationType;
7477 const Uint32 primaryTableId = req->primaryTableId;
7478 const Uint32 map_ptr_i = req->map_ptr_i;
7479 const Uint32 flags = req->requestInfo;
7480
7481 Uint32 err = 0;
7482 const Uint32 defaultFragments =
7483 c_fragments_per_node * cnoOfNodeGroups * cnoReplicas;
7484
7485 do {
7486 NodeGroupRecordPtr NGPtr;
7487 TabRecordPtr primTabPtr;
7488 Uint32 count = 2;
7489 Uint16 noOfReplicas = cnoReplicas;
7490 Uint16 *fragments = (Uint16*)(signal->theData+25);
7491 if (primaryTableId == RNIL) {
7492 jam();
7493 switch ((DictTabInfo::FragmentType)fragType){
7494 /*
7495 Backward compatability and for all places in code not changed.
7496 */
7497 case DictTabInfo::AllNodesSmallTable:
7498 jam();
7499 noOfFragments = defaultFragments;
7500 set_default_node_groups(signal, noOfFragments);
7501 break;
7502 case DictTabInfo::AllNodesMediumTable:
7503 jam();
7504 noOfFragments = 2 * defaultFragments;
7505 set_default_node_groups(signal, noOfFragments);
7506 break;
7507 case DictTabInfo::AllNodesLargeTable:
7508 jam();
7509 noOfFragments = 4 * defaultFragments;
7510 set_default_node_groups(signal, noOfFragments);
7511 break;
7512 case DictTabInfo::SingleFragment:
7513 jam();
7514 noOfFragments = 1;
7515 set_default_node_groups(signal, noOfFragments);
7516 break;
7517 case DictTabInfo::DistrKeyHash:
7518 jam();
7519 case DictTabInfo::DistrKeyLin:
7520 jam();
7521 if (noOfFragments == 0)
7522 {
7523 jam();
7524 noOfFragments = defaultFragments;
7525 set_default_node_groups(signal, noOfFragments);
7526 }
7527 break;
7528 case DictTabInfo::HashMapPartition:
7529 {
7530 jam();
7531 ndbrequire(map_ptr_i != RNIL);
7532 Ptr<Hash2FragmentMap> ptr;
7533 g_hash_map.getPtr(ptr, map_ptr_i);
7534 if (noOfFragments == 0)
7535 {
7536 jam();
7537 noOfFragments = ptr.p->m_fragments;
7538 }
7539 else if (noOfFragments != ptr.p->m_fragments)
7540 {
7541 jam();
7542 err = CreateFragmentationRef::InvalidFragmentationType;
7543 break;
7544 }
7545 set_default_node_groups(signal, noOfFragments);
7546 break;
7547 }
7548 default:
7549 jam();
7550 if (noOfFragments == 0)
7551 {
7552 jam();
7553 err = CreateFragmentationRef::InvalidFragmentationType;
7554 }
7555 break;
7556 }
7557 if (err)
7558 break;
7559 /*
7560 When we come here the the exact partition is specified
7561 and there is an array of node groups sent along as well.
7562 */
7563 memcpy(&node_group_id[0], &signal->theData[25], 2 * noOfFragments);
7564 Uint16 next_replica_node[MAX_NDB_NODES];
7565 memset(next_replica_node,0,sizeof(next_replica_node));
7566 Uint32 default_node_group= c_nextNodeGroup;
7567 for(Uint32 fragNo = 0; fragNo < noOfFragments; fragNo++)
7568 {
7569 jam();
7570 NGPtr.i = node_group_id[fragNo];
7571 if (NGPtr.i == NDB_UNDEF_NODEGROUP)
7572 {
7573 jam();
7574 NGPtr.i = c_node_groups[default_node_group];
7575 }
7576 if (NGPtr.i >= MAX_NDB_NODES)
7577 {
7578 jam();
7579 err = CreateFragmentationRef::InvalidNodeGroup;
7580 break;
7581 }
7582 ptrCheckGuard(NGPtr, MAX_NDB_NODES, nodeGroupRecord);
7583 if (NGPtr.p->nodegroupIndex == RNIL)
7584 {
7585 jam();
7586 err = CreateFragmentationRef::InvalidNodeGroup;
7587 break;
7588 }
7589 const Uint32 max = NGPtr.p->nodeCount;
7590
7591 fragments[count++] = (NGPtr.p->m_next_log_part++ / cnoReplicas); // Store logpart first
7592 Uint32 tmp= next_replica_node[NGPtr.i];
7593 for(Uint32 replicaNo = 0; replicaNo < noOfReplicas; replicaNo++)
7594 {
7595 jam();
7596 const Uint16 nodeId = NGPtr.p->nodesInGroup[tmp];
7597 fragments[count++]= nodeId;
7598 inc_node_or_group(tmp, max);
7599 }
7600 inc_node_or_group(tmp, max);
7601 next_replica_node[NGPtr.i]= tmp;
7602
7603 /**
7604 * Next node group for next fragment
7605 */
7606 inc_node_or_group(default_node_group, cnoOfNodeGroups);
7607 }
7608 if (err)
7609 {
7610 jam();
7611 break;
7612 }
7613 else
7614 {
7615 jam();
7616 c_nextNodeGroup = default_node_group;
7617 }
7618 } else {
7619 if (primaryTableId >= ctabFileSize) {
7620 jam();
7621 err = CreateFragmentationRef::InvalidPrimaryTable;
7622 break;
7623 }
7624 primTabPtr.i = primaryTableId;
7625 ptrAss(primTabPtr, tabRecord);
7626 if (primTabPtr.p->tabStatus != TabRecord::TS_ACTIVE) {
7627 jam();
7628 err = CreateFragmentationRef::InvalidPrimaryTable;
7629 break;
7630 }
7631 Uint32 fragments_per_node[MAX_NDB_NODES]; // Keep track of no of (primary) fragments per node
7632 bzero(fragments_per_node, sizeof(fragments_per_node));
7633 for (Uint32 fragNo = 0; fragNo < primTabPtr.p->totalfragments; fragNo++) {
7634 jam();
7635 FragmentstorePtr fragPtr;
7636 ReplicaRecordPtr replicaPtr;
7637 getFragstore(primTabPtr.p, fragNo, fragPtr);
7638 fragments[count++] = fragPtr.p->m_log_part_id;
7639 fragments[count++] = fragPtr.p->preferredPrimary;
7640 fragments_per_node[fragPtr.p->preferredPrimary]++;
7641 for (replicaPtr.i = fragPtr.p->storedReplicas;
7642 replicaPtr.i != RNIL;
7643 replicaPtr.i = replicaPtr.p->nextReplica) {
7644 jam();
7645 ptrCheckGuard(replicaPtr, creplicaFileSize, replicaRecord);
7646 if (replicaPtr.p->procNode != fragPtr.p->preferredPrimary) {
7647 jam();
7648 fragments[count++]= replicaPtr.p->procNode;
7649 }
7650 }
7651 for (replicaPtr.i = fragPtr.p->oldStoredReplicas;
7652 replicaPtr.i != RNIL;
7653 replicaPtr.i = replicaPtr.p->nextReplica) {
7654 jam();
7655 ptrCheckGuard(replicaPtr, creplicaFileSize, replicaRecord);
7656 if (replicaPtr.p->procNode != fragPtr.p->preferredPrimary) {
7657 jam();
7658 fragments[count++]= replicaPtr.p->procNode;
7659 }
7660 }
7661 }
7662
7663 if (flags & CreateFragmentationReq::RI_GET_FRAGMENTATION)
7664 {
7665 jam();
7666 noOfFragments = primTabPtr.p->totalfragments;
7667 }
7668 else if (flags & CreateFragmentationReq::RI_ADD_PARTITION)
7669 {
7670 jam();
7671 /**
7672 * All nodes that dont belong to a nodegroup to ~0 fragments_per_node
7673 * so that they dont get any more...
7674 */
7675 for (Uint32 i = 0; i<MAX_NDB_NODES; i++)
7676 {
7677 if (getNodeStatus(i) == NodeRecord::NOT_IN_CLUSTER ||
7678 getNodeGroup(i) >= cnoOfNodeGroups) // XXX todo
7679 {
7680 jam();
7681 ndbassert(fragments_per_node[i] == 0);
7682 fragments_per_node[i] = ~(Uint32)0;
7683 }
7684 }
7685 for (Uint32 i = primTabPtr.p->totalfragments; i<noOfFragments; i++)
7686 {
7687 jam();
7688 Uint32 node = find_min_index(fragments_per_node,
7689 NDB_ARRAY_SIZE(fragments_per_node));
7690 NGPtr.i = getNodeGroup(node);
7691 ptrCheckGuard(NGPtr, MAX_NDB_NODES, nodeGroupRecord);
7692 fragments[count++] = NGPtr.p->m_next_log_part++;
7693 fragments[count++] = node;
7694 fragments_per_node[node]++;
7695 for (Uint32 r = 0; r<noOfReplicas; r++)
7696 {
7697 jam();
7698 if (NGPtr.p->nodesInGroup[r] != node)
7699 {
7700 jam();
7701 fragments[count++] = NGPtr.p->nodesInGroup[r];
7702 }
7703 }
7704 }
7705 }
7706 }
7707 if(count != (2U + (1 + noOfReplicas) * noOfFragments)){
7708 char buf[255];
7709 BaseString::snprintf(buf, sizeof(buf),
7710 "Illegal configuration change: NoOfReplicas."
7711 " Can't be applied online ");
7712 progError(__LINE__, NDBD_EXIT_INVALID_CONFIG, buf);
7713 }
7714
7715 CreateFragmentationConf * const conf =
7716 (CreateFragmentationConf*)signal->getDataPtrSend();
7717 conf->senderRef = reference();
7718 conf->senderData = senderData;
7719 conf->noOfReplicas = (Uint32)noOfReplicas;
7720 conf->noOfFragments = (Uint32)noOfFragments;
7721
7722 fragments[0]= noOfReplicas;
7723 fragments[1]= noOfFragments;
7724
7725 if(senderRef != 0)
7726 {
7727 jam();
7728 LinearSectionPtr ptr[3];
7729 ptr[0].p = (Uint32*)&fragments[0];
7730 ptr[0].sz = (count + 1) / 2;
7731 sendSignal(senderRef,
7732 GSN_CREATE_FRAGMENTATION_CONF,
7733 signal,
7734 CreateFragmentationConf::SignalLength,
7735 JBB,
7736 ptr,
7737 1);
7738 }
7739 // Always ACK/NACK (here ACK)
7740 signal->theData[0] = 0;
7741 return;
7742 } while(false);
7743 // Always ACK/NACK (here NACK)
7744 signal->theData[0] = err;
7745 }
7746
execDIADDTABREQ(Signal * signal)7747 void Dbdih::execDIADDTABREQ(Signal* signal)
7748 {
7749 Uint32 fragType;
7750 jamEntry();
7751
7752 DiAddTabReq * const req = (DiAddTabReq*)signal->getDataPtr();
7753
7754 // Seize connect record
7755 ndbrequire(cfirstconnect != RNIL);
7756 ConnectRecordPtr connectPtr;
7757 connectPtr.i = cfirstconnect;
7758 ptrCheckGuard(connectPtr, cconnectFileSize, connectRecord);
7759 cfirstconnect = connectPtr.p->nextPool;
7760
7761 const Uint32 userPtr = req->connectPtr;
7762 const BlockReference userRef = signal->getSendersBlockRef();
7763 connectPtr.p->nextPool = RNIL;
7764 connectPtr.p->userpointer = userPtr;
7765 connectPtr.p->userblockref = userRef;
7766 connectPtr.p->connectState = ConnectRecord::INUSE;
7767 connectPtr.p->table = req->tableId;
7768 connectPtr.p->m_alter.m_changeMask = 0;
7769 connectPtr.p->m_create.m_map_ptr_i = req->hashMapPtrI;
7770
7771 TabRecordPtr tabPtr;
7772 tabPtr.i = req->tableId;
7773 ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
7774 tabPtr.p->connectrec = connectPtr.i;
7775 tabPtr.p->tableType = req->tableType;
7776 fragType= req->fragType;
7777 tabPtr.p->schemaVersion = req->schemaVersion;
7778 tabPtr.p->primaryTableId = req->primaryTableId;
7779 tabPtr.p->schemaTransId = req->schemaTransId;
7780 tabPtr.p->m_scan_count[0] = 0;
7781 tabPtr.p->m_scan_count[1] = 0;
7782 tabPtr.p->m_scan_reorg_flag = 0;
7783
7784 if (tabPtr.p->tabStatus == TabRecord::TS_ACTIVE)
7785 {
7786 jam();
7787 tabPtr.p->tabStatus = TabRecord::TS_CREATING;
7788 connectPtr.p->m_alter.m_totalfragments = tabPtr.p->totalfragments;
7789 sendAddFragreq(signal, connectPtr, tabPtr, 0);
7790 return;
7791 }
7792
7793 if (getNodeState().getSystemRestartInProgress() &&
7794 tabPtr.p->tabStatus == TabRecord::TS_IDLE)
7795 {
7796 jam();
7797
7798 ndbrequire(cmasterNodeId == getOwnNodeId());
7799 tabPtr.p->tabStatus = TabRecord::TS_CREATING;
7800
7801 initTableFile(tabPtr);
7802 FileRecordPtr filePtr;
7803 filePtr.i = tabPtr.p->tabFile[0];
7804 ptrCheckGuard(filePtr, cfileFileSize, fileRecord);
7805 openFileRw(signal, filePtr);
7806 filePtr.p->reqStatus = FileRecord::OPENING_TABLE;
7807 return;
7808 }
7809
7810 /*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
7811 /* AT THE TIME OF INITIATING THE FILE OF TABLE */
7812 /* DESCRIPTION IS CREATED FOR APPROPRIATE SIZE. EACH */
7813 /* EACH RECORD IN THIS FILE HAS THE INFORMATION ABOUT */
7814 /* ONE TABLE. THE POINTER TO THIS RECORD IS THE TABLE */
7815 /* REFERENCE. IN THE BEGINNING ALL RECORDS ARE CREATED */
7816 /* BUT THEY DO NOT HAVE ANY INFORMATION ABOUT ANY TABLE*/
7817 /*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
7818 tabPtr.p->tabStatus = TabRecord::TS_CREATING;
7819 if(req->loggedTable)
7820 tabPtr.p->tabStorage= TabRecord::ST_NORMAL;
7821 else if(req->temporaryTable)
7822 tabPtr.p->tabStorage= TabRecord::ST_TEMPORARY;
7823 else
7824 tabPtr.p->tabStorage= TabRecord::ST_NOLOGGING;
7825 tabPtr.p->kvalue = req->kValue;
7826
7827 switch ((DictTabInfo::FragmentType)fragType){
7828 case DictTabInfo::HashMapPartition:
7829 tabPtr.p->method = TabRecord::HASH_MAP;
7830 break;
7831 case DictTabInfo::AllNodesSmallTable:
7832 case DictTabInfo::AllNodesMediumTable:
7833 case DictTabInfo::AllNodesLargeTable:
7834 case DictTabInfo::SingleFragment:
7835 jam();
7836 case DictTabInfo::DistrKeyLin:
7837 jam();
7838 tabPtr.p->method = TabRecord::LINEAR_HASH;
7839 break;
7840 case DictTabInfo::DistrKeyHash:
7841 jam();
7842 tabPtr.p->method = TabRecord::NORMAL_HASH;
7843 break;
7844 case DictTabInfo::DistrKeyOrderedIndex:
7845 {
7846 TabRecordPtr primTabPtr;
7847 primTabPtr.i = req->primaryTableId;
7848 ptrCheckGuard(primTabPtr, ctabFileSize, tabRecord);
7849 tabPtr.p->method = primTabPtr.p->method;
7850 req->hashMapPtrI = primTabPtr.p->m_map_ptr_i;
7851 break;
7852 }
7853 case DictTabInfo::UserDefined:
7854 jam();
7855 tabPtr.p->method = TabRecord::USER_DEFINED;
7856 break;
7857 default:
7858 ndbrequire(false);
7859 }
7860
7861 union {
7862 Uint16 fragments[2 + MAX_FRAG_PER_NODE*MAX_REPLICAS*MAX_NDB_NODES];
7863 Uint32 align;
7864 };
7865 (void)align; // kill warning
7866 SectionHandle handle(this, signal);
7867 SegmentedSectionPtr fragDataPtr;
7868 ndbrequire(handle.getSection(fragDataPtr, DiAddTabReq::FRAGMENTATION));
7869 copy((Uint32*)fragments, fragDataPtr);
7870 releaseSections(handle);
7871
7872 const Uint32 noReplicas = fragments[0];
7873 const Uint32 noFragments = fragments[1];
7874
7875 tabPtr.p->noOfBackups = noReplicas - 1;
7876 tabPtr.p->totalfragments = noFragments;
7877 ndbrequire(noReplicas == cnoReplicas); // Only allowed
7878
7879 if (ERROR_INSERTED(7173)) {
7880 CLEAR_ERROR_INSERT_VALUE;
7881 addtabrefuseLab(signal, connectPtr, ZREPLERROR1);
7882 return;
7883 }
7884 if ((noReplicas * noFragments) > cnoFreeReplicaRec) {
7885 jam();
7886 addtabrefuseLab(signal, connectPtr, ZREPLERROR1);
7887 return;
7888 }//if
7889 if (noFragments > cremainingfrags) {
7890 jam();
7891 addtabrefuseLab(signal, connectPtr, ZREPLERROR2);
7892 return;
7893 }//if
7894
7895 Uint32 logTotalFragments = 1;
7896 while (logTotalFragments <= tabPtr.p->totalfragments) {
7897 jam();
7898 logTotalFragments <<= 1;
7899 }
7900 logTotalFragments >>= 1;
7901 tabPtr.p->mask = logTotalFragments - 1;
7902 tabPtr.p->hashpointer = tabPtr.p->totalfragments - logTotalFragments;
7903 allocFragments(tabPtr.p->totalfragments, tabPtr);
7904
7905 if (tabPtr.p->method == TabRecord::HASH_MAP)
7906 {
7907 jam();
7908 tabPtr.p->m_map_ptr_i = req->hashMapPtrI;
7909 tabPtr.p->m_new_map_ptr_i = RNIL;
7910 Ptr<Hash2FragmentMap> mapPtr;
7911 g_hash_map.getPtr(mapPtr, tabPtr.p->m_map_ptr_i);
7912 ndbrequire(tabPtr.p->totalfragments >= mapPtr.p->m_fragments);
7913 }
7914
7915 Uint32 index = 2;
7916 for (Uint32 fragId = 0; fragId < noFragments; fragId++) {
7917 jam();
7918 FragmentstorePtr fragPtr;
7919 Uint32 activeIndex = 0;
7920 getFragstore(tabPtr.p, fragId, fragPtr);
7921 fragPtr.p->m_log_part_id = fragments[index++];
7922 fragPtr.p->preferredPrimary = fragments[index];
7923
7924 inc_ng_refcount(getNodeGroup(fragPtr.p->preferredPrimary));
7925
7926 for (Uint32 i = 0; i<noReplicas; i++) {
7927 const Uint32 nodeId = fragments[index++];
7928 ReplicaRecordPtr replicaPtr;
7929 allocStoredReplica(fragPtr, replicaPtr, nodeId);
7930 if (getNodeStatus(nodeId) == NodeRecord::ALIVE) {
7931 jam();
7932 ndbrequire(activeIndex < MAX_REPLICAS);
7933 fragPtr.p->activeNodes[activeIndex] = nodeId;
7934 activeIndex++;
7935 } else {
7936 jam();
7937 removeStoredReplica(fragPtr, replicaPtr);
7938 linkOldStoredReplica(fragPtr, replicaPtr);
7939 }//if
7940 }//for
7941 fragPtr.p->fragReplicas = activeIndex;
7942 ndbrequire(activeIndex > 0 && fragPtr.p->storedReplicas != RNIL);
7943 }
7944 initTableFile(tabPtr);
7945 tabPtr.p->tabCopyStatus = TabRecord::CS_ADD_TABLE_MASTER;
7946 signal->theData[0] = DihContinueB::ZPACK_TABLE_INTO_PAGES;
7947 signal->theData[1] = tabPtr.i;
7948 sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
7949 }
7950
7951 void
addTable_closeConf(Signal * signal,Uint32 tabPtrI)7952 Dbdih::addTable_closeConf(Signal * signal, Uint32 tabPtrI){
7953 TabRecordPtr tabPtr;
7954 tabPtr.i = tabPtrI;
7955 ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
7956
7957 ConnectRecordPtr connectPtr;
7958 connectPtr.i = tabPtr.p->connectrec;
7959 ptrCheckGuard(connectPtr, cconnectFileSize, connectRecord);
7960 connectPtr.p->m_alter.m_totalfragments = tabPtr.p->totalfragments;
7961
7962 sendAddFragreq(signal, connectPtr, tabPtr, 0);
7963 }
7964
7965 void
sendAddFragreq(Signal * signal,ConnectRecordPtr connectPtr,TabRecordPtr tabPtr,Uint32 fragId)7966 Dbdih::sendAddFragreq(Signal* signal, ConnectRecordPtr connectPtr,
7967 TabRecordPtr tabPtr, Uint32 fragId){
7968 jam();
7969 const Uint32 fragCount = connectPtr.p->m_alter.m_totalfragments;
7970 ReplicaRecordPtr replicaPtr;
7971 LINT_INIT(replicaPtr.p);
7972 replicaPtr.i = RNIL;
7973 FragmentstorePtr fragPtr;
7974 for(; fragId<fragCount; fragId++){
7975 jam();
7976 getFragstore(tabPtr.p, fragId, fragPtr);
7977
7978 replicaPtr.i = fragPtr.p->storedReplicas;
7979 while(replicaPtr.i != RNIL){
7980 jam();
7981 ptrCheckGuard(replicaPtr, creplicaFileSize, replicaRecord);
7982 if(replicaPtr.p->procNode == getOwnNodeId()){
7983 break;
7984 }
7985 replicaPtr.i = replicaPtr.p->nextReplica;
7986 }
7987
7988 if(replicaPtr.i != RNIL){
7989 jam();
7990 break;
7991 }
7992
7993 replicaPtr.i = fragPtr.p->oldStoredReplicas;
7994 while(replicaPtr.i != RNIL){
7995 jam();
7996 ptrCheckGuard(replicaPtr, creplicaFileSize, replicaRecord);
7997 if(replicaPtr.p->procNode == getOwnNodeId()){
7998 break;
7999 }
8000 replicaPtr.i = replicaPtr.p->nextReplica;
8001 }
8002
8003 if(replicaPtr.i != RNIL){
8004 jam();
8005 break;
8006 }
8007 }
8008
8009 if(replicaPtr.i != RNIL){
8010 jam();
8011 ndbrequire(fragId < fragCount);
8012 ndbrequire(replicaPtr.p->procNode == getOwnNodeId());
8013
8014 Uint32 requestInfo = 0;
8015 if(tabPtr.p->tabStorage != TabRecord::ST_NORMAL){
8016 requestInfo |= LqhFragReq::TemporaryTable;
8017 }
8018
8019 if(getNodeState().getNodeRestartInProgress()){
8020 requestInfo |= LqhFragReq::CreateInRunning;
8021 }
8022
8023 AddFragReq* const req = (AddFragReq*)signal->getDataPtr();
8024 req->dihPtr = connectPtr.i;
8025 req->senderData = connectPtr.p->userpointer;
8026 req->fragmentId = fragId;
8027 req->requestInfo = requestInfo;
8028 req->tableId = tabPtr.i;
8029 req->nextLCP = 0;
8030 req->nodeId = getOwnNodeId();
8031 req->totalFragments = fragCount;
8032 req->startGci = SYSFILE->newestRestorableGCI;
8033 req->logPartId = fragPtr.p->m_log_part_id;
8034 req->changeMask = 0;
8035
8036 if (connectPtr.p->connectState == ConnectRecord::ALTER_TABLE)
8037 {
8038 jam();
8039 req->changeMask = connectPtr.p->m_alter.m_changeMask;
8040 }
8041
8042 sendSignal(DBDICT_REF, GSN_ADD_FRAGREQ, signal,
8043 AddFragReq::SignalLength, JBB);
8044 return;
8045 }
8046
8047 if (connectPtr.p->connectState == ConnectRecord::ALTER_TABLE)
8048 {
8049 jam();
8050 // Request handled successfully
8051
8052 if (AlterTableReq::getReorgFragFlag(connectPtr.p->m_alter.m_changeMask))
8053 {
8054 jam();
8055 DIH_TAB_WRITE_LOCK(tabPtr.p);
8056 tabPtr.p->m_new_map_ptr_i = connectPtr.p->m_alter.m_new_map_ptr_i;
8057 DIH_TAB_WRITE_UNLOCK(tabPtr.p);
8058 }
8059
8060 if (AlterTableReq::getAddFragFlag(connectPtr.p->m_alter.m_changeMask))
8061 {
8062 jam();
8063 Callback cb;
8064 cb.m_callbackData = connectPtr.i;
8065 cb.m_callbackFunction = safe_cast(&Dbdih::alter_table_writeTable_conf);
8066 saveTableFile(signal, connectPtr, tabPtr, TabRecord::CS_ALTER_TABLE, cb);
8067 return;
8068 }
8069
8070 send_alter_tab_conf(signal, connectPtr);
8071 }
8072 else
8073 {
8074 // Done
8075 DiAddTabConf * const conf = (DiAddTabConf*)signal->getDataPtr();
8076 conf->senderData = connectPtr.p->userpointer;
8077 sendSignal(connectPtr.p->userblockref, GSN_DIADDTABCONF, signal,
8078 DiAddTabConf::SignalLength, JBB);
8079
8080
8081 if (tabPtr.p->method == TabRecord::HASH_MAP)
8082 {
8083 Uint32 newValue = RNIL;
8084 if (DictTabInfo::isOrderedIndex(tabPtr.p->tableType))
8085 {
8086 jam();
8087 TabRecordPtr primTabPtr;
8088 primTabPtr.i = tabPtr.p->primaryTableId;
8089 ptrCheckGuard(primTabPtr, ctabFileSize, tabRecord);
8090 newValue = primTabPtr.p->m_map_ptr_i;
8091 }
8092 else
8093 {
8094 jam();
8095 newValue = connectPtr.p->m_create.m_map_ptr_i;
8096 }
8097
8098 tabPtr.p->m_map_ptr_i = newValue;
8099 }
8100 // Release
8101 ndbrequire(tabPtr.p->connectrec == connectPtr.i);
8102 tabPtr.p->connectrec = RNIL;
8103 release_connect(connectPtr);
8104 }
8105
8106 }
8107 void
release_connect(ConnectRecordPtr ptr)8108 Dbdih::release_connect(ConnectRecordPtr ptr)
8109 {
8110 TabRecordPtr tabPtr;
8111 tabPtr.i = ptr.p->table;
8112 if (tabPtr.i != RNIL)
8113 {
8114 jam();
8115 ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
8116 if (tabPtr.p->connectrec == ptr.i)
8117 {
8118 ndbassert(false); // should be fixed elsewhere
8119 tabPtr.p->connectrec = RNIL;
8120 }
8121 }
8122
8123 ptr.p->table = RNIL;
8124 ptr.p->userblockref = ZNIL;
8125 ptr.p->userpointer = RNIL;
8126 ptr.p->connectState = ConnectRecord::FREE;
8127 ptr.p->nextPool = cfirstconnect;
8128 cfirstconnect = ptr.i;
8129 }
8130
8131 void
execADD_FRAGCONF(Signal * signal)8132 Dbdih::execADD_FRAGCONF(Signal* signal){
8133 jamEntry();
8134 AddFragConf * const conf = (AddFragConf*)signal->getDataPtr();
8135
8136 ConnectRecordPtr connectPtr;
8137 connectPtr.i = conf->dihPtr;
8138 ptrCheckGuard(connectPtr, cconnectFileSize, connectRecord);
8139
8140 TabRecordPtr tabPtr;
8141 tabPtr.i = connectPtr.p->table;
8142 ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
8143
8144 sendAddFragreq(signal, connectPtr, tabPtr, conf->fragId + 1);
8145 }
8146
8147 void
execADD_FRAGREF(Signal * signal)8148 Dbdih::execADD_FRAGREF(Signal* signal){
8149 jamEntry();
8150 AddFragRef * const ref = (AddFragRef*)signal->getDataPtr();
8151
8152 ConnectRecordPtr connectPtr;
8153 connectPtr.i = ref->dihPtr;
8154 ptrCheckGuard(connectPtr, cconnectFileSize, connectRecord);
8155
8156 if (connectPtr.p->connectState == ConnectRecord::ALTER_TABLE)
8157 {
8158 jam();
8159
8160 connectPtr.p->connectState = ConnectRecord::ALTER_TABLE_ABORT;
8161 drop_fragments(signal, connectPtr, connectPtr.p->m_alter.m_totalfragments);
8162 return;
8163 }
8164 else
8165 {
8166 DiAddTabRef * const ref = (DiAddTabRef*)signal->getDataPtr();
8167 ref->senderData = connectPtr.p->userpointer;
8168 ref->errorCode = ~0;
8169 sendSignal(connectPtr.p->userblockref, GSN_DIADDTABREF, signal,
8170 DiAddTabRef::SignalLength, JBB);
8171
8172 // Release
8173 Ptr<TabRecord> tabPtr;
8174 tabPtr.i = connectPtr.p->table;
8175 ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
8176 ndbrequire(tabPtr.p->connectrec == connectPtr.i);
8177 tabPtr.p->connectrec = RNIL;
8178 release_connect(connectPtr);
8179 }
8180 }
8181
8182 /*
8183 3.7.1.3 R E F U S E
8184 *********************
8185 */
8186 void
addtabrefuseLab(Signal * signal,ConnectRecordPtr connectPtr,Uint32 errorCode)8187 Dbdih::addtabrefuseLab(Signal* signal,
8188 ConnectRecordPtr connectPtr, Uint32 errorCode)
8189 {
8190 signal->theData[0] = connectPtr.p->userpointer;
8191 signal->theData[1] = errorCode;
8192 sendSignal(connectPtr.p->userblockref, GSN_DIADDTABREF, signal, 2, JBB);
8193
8194 Ptr<TabRecord> tabPtr;
8195 tabPtr.i = connectPtr.p->table;
8196 ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
8197 ndbrequire(tabPtr.p->connectrec == connectPtr.i);
8198 tabPtr.p->connectrec = RNIL;
8199
8200 release_connect(connectPtr);
8201 return;
8202 }//Dbdih::addtabrefuseLab()
8203
8204 /*
8205 3.7.2 A D D T A B L E D U P L I C A T I O N
8206 *************************************************
8207 */
8208 /*
8209 3.7.2.1 A D D T A B L E D U P L I C A T I O N R E Q U E S T
8210 *******************************************************************=
8211 */
8212
8213 /*
8214 D E L E T E T A B L E
8215 **********************=
8216 */
8217 /*****************************************************************************/
8218 /*********** DELETE TABLE MODULE *************/
8219 /*****************************************************************************/
8220 void
execDROP_TAB_REQ(Signal * signal)8221 Dbdih::execDROP_TAB_REQ(Signal* signal)
8222 {
8223 jamEntry();
8224 DropTabReq* req = (DropTabReq*)signal->getDataPtr();
8225
8226 TabRecordPtr tabPtr;
8227 tabPtr.i = req->tableId;
8228 ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
8229
8230 tabPtr.p->m_dropTab.tabUserRef = req->senderRef;
8231 tabPtr.p->m_dropTab.tabUserPtr = req->senderData;
8232
8233 DropTabReq::RequestType rt = (DropTabReq::RequestType)req->requestType;
8234
8235 switch(rt){
8236 case DropTabReq::OnlineDropTab:
8237 jam();
8238 ndbrequire(tabPtr.p->tabStatus == TabRecord::TS_DROPPING);
8239 break;
8240 case DropTabReq::CreateTabDrop:
8241 jam();
8242 break;
8243 case DropTabReq::RestartDropTab:
8244 break;
8245 }
8246
8247 if(isMaster())
8248 {
8249 /**
8250 * Remove from queue
8251 */
8252 NodeRecordPtr nodePtr;
8253 for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
8254 jam();
8255 ptrAss(nodePtr, nodeRecord);
8256 if (c_lcpState.m_participatingLQH.get(nodePtr.i))
8257 {
8258
8259 Uint32 index = 0;
8260 Uint32 count = nodePtr.p->noOfQueuedChkpt;
8261 while(index < count){
8262 if(nodePtr.p->queuedChkpt[index].tableId == tabPtr.i){
8263 jam();
8264 // g_eventLogger->info("Unqueuing %d", index);
8265
8266 count--;
8267 for(Uint32 i = index; i<count; i++){
8268 jam();
8269 nodePtr.p->queuedChkpt[i] = nodePtr.p->queuedChkpt[i + 1];
8270 }
8271 } else {
8272 index++;
8273 }
8274 }
8275 nodePtr.p->noOfQueuedChkpt = count;
8276 }
8277 }
8278 }
8279
8280 {
8281 /**
8282 * Check table lcp state
8283 */
8284 bool ok = false;
8285 switch(tabPtr.p->tabLcpStatus){
8286 case TabRecord::TLS_COMPLETED:
8287 case TabRecord::TLS_WRITING_TO_FILE:
8288 ok = true;
8289 jam();
8290 break;
8291 return;
8292 case TabRecord::TLS_ACTIVE:
8293 ok = true;
8294 jam();
8295
8296 tabPtr.p->tabLcpStatus = TabRecord::TLS_COMPLETED;
8297
8298 /**
8299 * First check if all fragments are done
8300 */
8301 if (checkLcpAllTablesDoneInLqh(__LINE__))
8302 {
8303 jam();
8304
8305 g_eventLogger->info("This is the last table");
8306
8307 /**
8308 * Then check if saving of tab info is done for all tables
8309 */
8310 LcpStatus a = c_lcpState.lcpStatus;
8311 checkLcpCompletedLab(signal);
8312
8313 if(a != c_lcpState.lcpStatus)
8314 {
8315 g_eventLogger->info("And all tables are written to already written disk");
8316 }
8317 }
8318 break;
8319 }
8320 ndbrequire(ok);
8321 }
8322
8323 waitDropTabWritingToFile(signal, tabPtr);
8324 }
8325
startDeleteFile(Signal * signal,TabRecordPtr tabPtr)8326 void Dbdih::startDeleteFile(Signal* signal, TabRecordPtr tabPtr)
8327 {
8328 if (tabPtr.p->tabFile[0] == RNIL) {
8329 jam();
8330 initTableFile(tabPtr);
8331 }//if
8332 openTableFileForDelete(signal, tabPtr.p->tabFile[0]);
8333 }//Dbdih::startDeleteFile()
8334
openTableFileForDelete(Signal * signal,Uint32 fileIndex)8335 void Dbdih::openTableFileForDelete(Signal* signal, Uint32 fileIndex)
8336 {
8337 FileRecordPtr filePtr;
8338 filePtr.i = fileIndex;
8339 ptrCheckGuard(filePtr, cfileFileSize, fileRecord);
8340 openFileRw(signal, filePtr);
8341 filePtr.p->reqStatus = FileRecord::TABLE_OPEN_FOR_DELETE;
8342 }//Dbdih::openTableFileForDelete()
8343
tableOpenLab(Signal * signal,FileRecordPtr filePtr)8344 void Dbdih::tableOpenLab(Signal* signal, FileRecordPtr filePtr)
8345 {
8346 closeFileDelete(signal, filePtr);
8347 filePtr.p->reqStatus = FileRecord::TABLE_CLOSE_DELETE;
8348 return;
8349 }//Dbdih::tableOpenLab()
8350
tableDeleteLab(Signal * signal,FileRecordPtr filePtr)8351 void Dbdih::tableDeleteLab(Signal* signal, FileRecordPtr filePtr)
8352 {
8353 TabRecordPtr tabPtr;
8354 tabPtr.i = filePtr.p->tabRef;
8355 ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
8356 if (filePtr.i == tabPtr.p->tabFile[0]) {
8357 jam();
8358 openTableFileForDelete(signal, tabPtr.p->tabFile[1]);
8359 return;
8360 }//if
8361 ndbrequire(filePtr.i == tabPtr.p->tabFile[1]);
8362
8363 releaseFile(tabPtr.p->tabFile[0]);
8364 releaseFile(tabPtr.p->tabFile[1]);
8365 tabPtr.p->tabFile[0] = tabPtr.p->tabFile[1] = RNIL;
8366
8367 tabPtr.p->tabStatus = TabRecord::TS_IDLE;
8368
8369 DropTabConf * const dropConf = (DropTabConf *)signal->getDataPtrSend();
8370 dropConf->senderRef = reference();
8371 dropConf->senderData = tabPtr.p->m_dropTab.tabUserPtr;
8372 dropConf->tableId = tabPtr.i;
8373 sendSignal(tabPtr.p->m_dropTab.tabUserRef, GSN_DROP_TAB_CONF,
8374 signal, DropTabConf::SignalLength, JBB);
8375
8376 tabPtr.p->m_dropTab.tabUserPtr = RNIL;
8377 tabPtr.p->m_dropTab.tabUserRef = 0;
8378 releaseTable(tabPtr);
8379 }//Dbdih::tableDeleteLab()
8380
8381
releaseTable(TabRecordPtr tabPtr)8382 void Dbdih::releaseTable(TabRecordPtr tabPtr)
8383 {
8384 FragmentstorePtr fragPtr;
8385 if (tabPtr.p->noOfFragChunks > 0) {
8386 for (Uint32 fragId = 0; fragId < tabPtr.p->totalfragments; fragId++) {
8387 jam();
8388 getFragstore(tabPtr.p, fragId, fragPtr);
8389 dec_ng_refcount(getNodeGroup(fragPtr.p->preferredPrimary));
8390 releaseReplicas(& fragPtr.p->storedReplicas);
8391 releaseReplicas(& fragPtr.p->oldStoredReplicas);
8392 }//for
8393 releaseFragments(tabPtr);
8394 }
8395 if (tabPtr.p->tabFile[0] != RNIL) {
8396 jam();
8397 releaseFile(tabPtr.p->tabFile[0]);
8398 releaseFile(tabPtr.p->tabFile[1]);
8399 tabPtr.p->tabFile[0] = tabPtr.p->tabFile[1] = RNIL;
8400 }//if
8401 }//Dbdih::releaseTable()
8402
releaseReplicas(Uint32 * replicaPtrI)8403 void Dbdih::releaseReplicas(Uint32 * replicaPtrI)
8404 {
8405 ReplicaRecordPtr replicaPtr;
8406 replicaPtr.i = * replicaPtrI;
8407 jam();
8408 while (replicaPtr.i != RNIL) {
8409 jam();
8410 ptrCheckGuard(replicaPtr, creplicaFileSize, replicaRecord);
8411 Uint32 tmp = replicaPtr.p->nextReplica;
8412 replicaPtr.p->nextReplica = cfirstfreeReplica;
8413 cfirstfreeReplica = replicaPtr.i;
8414 replicaPtr.i = tmp;
8415 cnoFreeReplicaRec++;
8416 }//while
8417
8418 * replicaPtrI = RNIL;
8419 }//Dbdih::releaseReplicas()
8420
seizeReplicaRec(ReplicaRecordPtr & replicaPtr)8421 void Dbdih::seizeReplicaRec(ReplicaRecordPtr& replicaPtr)
8422 {
8423 replicaPtr.i = cfirstfreeReplica;
8424 ptrCheckGuard(replicaPtr, creplicaFileSize, replicaRecord);
8425 cfirstfreeReplica = replicaPtr.p->nextReplica;
8426 cnoFreeReplicaRec--;
8427 replicaPtr.p->nextReplica = RNIL;
8428 }//Dbdih::seizeReplicaRec()
8429
releaseFile(Uint32 fileIndex)8430 void Dbdih::releaseFile(Uint32 fileIndex)
8431 {
8432 FileRecordPtr filePtr;
8433 filePtr.i = fileIndex;
8434 ptrCheckGuard(filePtr, cfileFileSize, fileRecord);
8435 filePtr.p->nextFile = cfirstfreeFile;
8436 cfirstfreeFile = filePtr.i;
8437 }//Dbdih::releaseFile()
8438
8439
execALTER_TAB_REQ(Signal * signal)8440 void Dbdih::execALTER_TAB_REQ(Signal * signal)
8441 {
8442 const AlterTabReq* req = (const AlterTabReq*)signal->getDataPtr();
8443 const Uint32 senderRef = req->senderRef;
8444 const Uint32 senderData = req->senderData;
8445 const Uint32 tableId = req->tableId;
8446 const Uint32 tableVersion = req->tableVersion;
8447 const Uint32 newTableVersion = req->newTableVersion;
8448 AlterTabReq::RequestType requestType =
8449 (AlterTabReq::RequestType) req->requestType;
8450
8451 TabRecordPtr tabPtr;
8452 tabPtr.i = tableId;
8453 ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
8454
8455 switch(requestType){
8456 case AlterTabReq::AlterTablePrepare:
8457 jam();
8458 // fall through
8459 case AlterTabReq::AlterTableRevert:
8460 jam();
8461 if (AlterTableReq::getAddFragFlag(req->changeMask) &&
8462 tabPtr.p->tabCopyStatus != TabRecord::CS_IDLE)
8463 {
8464 jam();
8465 SectionHandle handle(this, signal);
8466 sendSignalWithDelay(reference(), GSN_ALTER_TAB_REQ, signal, 100,
8467 signal->getLength(), &handle);
8468 return;
8469 }
8470 case AlterTabReq::AlterTableCommit:
8471 jam();
8472 case AlterTabReq::AlterTableComplete:
8473 jam();
8474 case AlterTabReq::AlterTableWaitScan:
8475 jam();
8476 break;
8477 default:
8478 jamLine(requestType);
8479 }
8480
8481 ConnectRecordPtr connectPtr;
8482 connectPtr.i = RNIL;
8483 switch (requestType) {
8484 case AlterTabReq::AlterTablePrepare:
8485 jam();
8486
8487 ndbrequire(cfirstconnect != RNIL);
8488 connectPtr.i = cfirstconnect;
8489 ptrCheckGuard(connectPtr, cconnectFileSize, connectRecord);
8490 cfirstconnect = connectPtr.p->nextPool;
8491
8492 connectPtr.p->m_alter.m_totalfragments = tabPtr.p->totalfragments;
8493 connectPtr.p->m_alter.m_org_totalfragments = tabPtr.p->totalfragments;
8494 connectPtr.p->m_alter.m_changeMask = req->changeMask;
8495 connectPtr.p->m_alter.m_new_map_ptr_i = req->new_map_ptr_i;
8496 connectPtr.p->userpointer = senderData;
8497 connectPtr.p->userblockref = senderRef;
8498 connectPtr.p->connectState = ConnectRecord::ALTER_TABLE;
8499 connectPtr.p->table = tabPtr.i;
8500 tabPtr.p->connectrec = connectPtr.i;
8501 break;
8502 case AlterTabReq::AlterTableRevert:
8503 jam();
8504 tabPtr.p->schemaVersion = tableVersion;
8505
8506 connectPtr.i = req->connectPtr;
8507 ptrCheckGuard(connectPtr, cconnectFileSize, connectRecord);
8508
8509 ndbrequire(connectPtr.p->connectState == ConnectRecord::ALTER_TABLE);
8510
8511 connectPtr.p->userpointer = senderData;
8512 connectPtr.p->userblockref = senderRef;
8513
8514 if (AlterTableReq::getAddFragFlag(req->changeMask))
8515 {
8516 jam();
8517 tabPtr.p->tabCopyStatus = TabRecord::CS_ALTER_TABLE;
8518 connectPtr.p->connectState = ConnectRecord::ALTER_TABLE_REVERT;
8519 drop_fragments(signal, connectPtr,
8520 connectPtr.p->m_alter.m_totalfragments);
8521 return;
8522 }
8523
8524 send_alter_tab_conf(signal, connectPtr);
8525
8526 ndbrequire(tabPtr.p->connectrec == connectPtr.i);
8527 tabPtr.p->connectrec = RNIL;
8528 release_connect(connectPtr);
8529 return;
8530 break;
8531 case AlterTabReq::AlterTableCommit:
8532 jam();
8533 tabPtr.p->schemaVersion = newTableVersion;
8534
8535 connectPtr.i = req->connectPtr;
8536 ptrCheckGuard(connectPtr, cconnectFileSize, connectRecord);
8537 connectPtr.p->userpointer = senderData;
8538 connectPtr.p->userblockref = senderRef;
8539 ndbrequire(connectPtr.p->connectState == ConnectRecord::ALTER_TABLE);
8540
8541 tabPtr.p->totalfragments = connectPtr.p->m_alter.m_totalfragments;
8542 if (AlterTableReq::getReorgFragFlag(connectPtr.p->m_alter.m_changeMask))
8543 {
8544 jam();
8545 DIH_TAB_WRITE_LOCK(tabPtr.p);
8546 Uint32 save = tabPtr.p->m_map_ptr_i;
8547 tabPtr.p->m_map_ptr_i = tabPtr.p->m_new_map_ptr_i;
8548 tabPtr.p->m_new_map_ptr_i = save;
8549
8550 for (Uint32 i = 0; i<tabPtr.p->totalfragments; i++)
8551 {
8552 jam();
8553 FragmentstorePtr fragPtr;
8554 getFragstore(tabPtr.p, i, fragPtr);
8555 fragPtr.p->distributionKey = (fragPtr.p->distributionKey + 1) & 0xFF;
8556 }
8557 DIH_TAB_WRITE_UNLOCK(tabPtr.p);
8558
8559 ndbassert(tabPtr.p->m_scan_count[1] == 0);
8560 tabPtr.p->m_scan_count[1] = tabPtr.p->m_scan_count[0];
8561 tabPtr.p->m_scan_count[0] = 0;
8562 tabPtr.p->m_scan_reorg_flag = 1;
8563
8564 send_alter_tab_conf(signal, connectPtr);
8565 return;
8566 }
8567
8568 send_alter_tab_conf(signal, connectPtr);
8569 ndbrequire(tabPtr.p->connectrec == connectPtr.i);
8570 tabPtr.p->connectrec = RNIL;
8571 release_connect(connectPtr);
8572 return;
8573 case AlterTabReq::AlterTableComplete:
8574 jam();
8575 connectPtr.i = req->connectPtr;
8576 ptrCheckGuard(connectPtr, cconnectFileSize, connectRecord);
8577 connectPtr.p->userpointer = senderData;
8578 connectPtr.p->userblockref = senderRef;
8579
8580 send_alter_tab_conf(signal, connectPtr);
8581
8582 DIH_TAB_WRITE_LOCK(tabPtr.p);
8583 tabPtr.p->m_new_map_ptr_i = RNIL;
8584 tabPtr.p->m_scan_reorg_flag = 0;
8585 DIH_TAB_WRITE_UNLOCK(tabPtr.p);
8586
8587 ndbrequire(tabPtr.p->connectrec == connectPtr.i);
8588 tabPtr.p->connectrec = RNIL;
8589 release_connect(connectPtr);
8590 return;
8591 case AlterTabReq::AlterTableWaitScan:{
8592 jam();
8593 Uint64 now = NdbTick_CurrentMillisecond();
8594 now /= 1000;
8595 signal->theData[0] = DihContinueB::ZWAIT_OLD_SCAN;
8596 signal->theData[1] = tabPtr.i;
8597 signal->theData[2] = senderRef;
8598 signal->theData[3] = senderData;
8599 signal->theData[4] = connectPtr.i;
8600 signal->theData[5] = Uint32(now >> 32);
8601 signal->theData[6] = Uint32(now);
8602 signal->theData[7] = 3;
8603 sendSignal(reference(), GSN_CONTINUEB, signal, 8, JBB);
8604 return;
8605 }
8606 default:
8607 ndbrequire(false);
8608 break;
8609 }
8610
8611 if (AlterTableReq::getAddFragFlag(req->changeMask))
8612 {
8613 jam();
8614 SegmentedSectionPtr ptr;
8615 SectionHandle handle(this, signal);
8616 handle.getSection(ptr, 0);
8617 union {
8618 Uint16 buf[2+2*MAX_NDB_PARTITIONS];
8619 Uint32 _align[1];
8620 };
8621 copy(_align, ptr);
8622 releaseSections(handle);
8623 Uint32 err;
8624 Uint32 save = tabPtr.p->totalfragments;
8625 if ((err = add_fragments_to_table(tabPtr, buf)))
8626 {
8627 jam();
8628 ndbrequire(tabPtr.p->totalfragments == save);
8629 ndbrequire(connectPtr.p->m_alter.m_org_totalfragments == save);
8630 send_alter_tab_ref(signal, tabPtr, connectPtr, err);
8631
8632 ndbrequire(tabPtr.p->connectrec == connectPtr.i);
8633 tabPtr.p->connectrec = RNIL;
8634 release_connect(connectPtr);
8635 return;
8636 }
8637
8638 tabPtr.p->tabCopyStatus = TabRecord::CS_ALTER_TABLE;
8639 connectPtr.p->m_alter.m_totalfragments = tabPtr.p->totalfragments;
8640 tabPtr.p->totalfragments = save; // Dont make the available yet...
8641 sendAddFragreq(signal, connectPtr, tabPtr,
8642 connectPtr.p->m_alter.m_org_totalfragments);
8643 return;
8644 }
8645
8646 send_alter_tab_conf(signal, connectPtr);
8647 }
8648
8649 Uint32
add_fragments_to_table(Ptr<TabRecord> tabPtr,const Uint16 buf[])8650 Dbdih::add_fragments_to_table(Ptr<TabRecord> tabPtr, const Uint16 buf[])
8651 {
8652 Uint32 replicas = buf[0];
8653 Uint32 cnt = buf[1];
8654
8655 Uint32 i = 0;
8656 Uint32 err = 0;
8657 Uint32 current = tabPtr.p->totalfragments;
8658 for (i = 0; i<cnt; i++)
8659 {
8660 FragmentstorePtr fragPtr;
8661 if (ERROR_INSERTED(7212) && cnt)
8662 {
8663 err = 1;
8664 CLEAR_ERROR_INSERT_VALUE;
8665 goto error;
8666 }
8667
8668 if ((err = add_fragment_to_table(tabPtr, current + i, fragPtr)))
8669 goto error;
8670
8671 fragPtr.p->m_log_part_id = buf[2+(1 + replicas)*i];
8672 fragPtr.p->preferredPrimary = buf[2+(1 + replicas)*i + 1];
8673
8674 inc_ng_refcount(getNodeGroup(fragPtr.p->preferredPrimary));
8675
8676 Uint32 activeIndex = 0;
8677 for (Uint32 j = 0; j<replicas; j++)
8678 {
8679 const Uint32 nodeId = buf[2+(1 + replicas)*i + 1 + j];
8680 ReplicaRecordPtr replicaPtr;
8681 allocStoredReplica(fragPtr, replicaPtr, nodeId);
8682 if (getNodeStatus(nodeId) == NodeRecord::ALIVE) {
8683 jam();
8684 ndbrequire(activeIndex < MAX_REPLICAS);
8685 fragPtr.p->activeNodes[activeIndex] = nodeId;
8686 activeIndex++;
8687 } else {
8688 jam();
8689 removeStoredReplica(fragPtr, replicaPtr);
8690 linkOldStoredReplica(fragPtr, replicaPtr);
8691 }
8692 }
8693 fragPtr.p->fragReplicas = activeIndex;
8694 }
8695
8696 return 0;
8697 error:
8698 for(i = i + current; i != current; i--)
8699 {
8700 release_fragment_from_table(tabPtr, i);
8701 }
8702
8703 return err;
8704 }
8705
8706 void
wait_old_scan(Signal * signal)8707 Dbdih::wait_old_scan(Signal* signal)
8708 {
8709 jam();
8710
8711 TabRecordPtr tabPtr;
8712 tabPtr.i = signal->theData[1];
8713 ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
8714
8715 if (tabPtr.p->m_scan_count[1] == 0)
8716 {
8717 jam();
8718 Uint32 senderRef = signal->theData[2];
8719 Uint32 senderData = signal->theData[3];
8720 Uint32 connectPtrI = signal->theData[4];
8721
8722 AlterTabConf* conf = (AlterTabConf*)signal->getDataPtrSend();
8723 conf->senderRef = reference();
8724 conf->senderData = senderData;
8725 conf->connectPtr = connectPtrI;
8726 sendSignal(senderRef, GSN_ALTER_TAB_CONF, signal,
8727 AlterTabConf::SignalLength, JBB);
8728 return;
8729 }
8730
8731 Uint32 start_hi = signal->theData[5];
8732 Uint32 start_lo = signal->theData[6];
8733 Uint64 start = (Uint64(start_hi) << 32) + start_lo;
8734 Uint32 wait = signal->theData[7];
8735 Uint64 now = NdbTick_CurrentMillisecond() / 1000;
8736 if (now > start + wait)
8737 {
8738 infoEvent("Waiting(%u) for scans(%u) to complete on table %u",
8739 Uint32(now - start),
8740 tabPtr.p->m_scan_count[1],
8741 tabPtr.i);
8742
8743 if (wait == 3)
8744 {
8745 signal->theData[7] = 3 + 7;
8746 }
8747 else
8748 {
8749 signal->theData[7] = 2 * wait;
8750 }
8751 }
8752
8753 sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 1000, 7);
8754 }
8755
8756 Uint32
add_fragment_to_table(Ptr<TabRecord> tabPtr,Uint32 fragId,Ptr<Fragmentstore> & fragPtr)8757 Dbdih::add_fragment_to_table(Ptr<TabRecord> tabPtr,
8758 Uint32 fragId,
8759 Ptr<Fragmentstore>& fragPtr)
8760 {
8761 Uint32 fragments = tabPtr.p->totalfragments;
8762 Uint32 chunks = tabPtr.p->noOfFragChunks;
8763
8764 ndbrequire(fragId == fragments); // Only add at the end
8765
8766 if (ERROR_INSERTED(7211))
8767 {
8768 CLEAR_ERROR_INSERT_VALUE;
8769 return 1;
8770 }
8771
8772 Uint32 allocated = chunks << LOG_NO_OF_FRAGS_PER_CHUNK;
8773 if (fragId < allocated)
8774 {
8775 jam();
8776 tabPtr.p->totalfragments++;
8777 getFragstore(tabPtr.p, fragId, fragPtr);
8778 return 0;
8779 }
8780
8781 /**
8782 * Allocate a new chunk
8783 */
8784 fragPtr.i = cfirstfragstore;
8785 if (fragPtr.i == RNIL)
8786 {
8787 jam();
8788 return -1;
8789 }
8790
8791 ptrCheckGuard(fragPtr, cfragstoreFileSize, fragmentstore);
8792 cfirstfragstore = fragPtr.p->nextFragmentChunk;
8793 ndbrequire(cremainingfrags >= NO_OF_FRAGS_PER_CHUNK);
8794 cremainingfrags -= NO_OF_FRAGS_PER_CHUNK;
8795
8796 ndbrequire(chunks < NDB_ARRAY_SIZE(tabPtr.p->startFid));
8797 tabPtr.p->startFid[chunks] = fragPtr.i;
8798 for (Uint32 i = 0; i<NO_OF_FRAGS_PER_CHUNK; i++)
8799 {
8800 jam();
8801 Ptr<Fragmentstore> tmp;
8802 tmp.i = fragPtr.i + i;
8803 ptrCheckGuard(tmp, cfragstoreFileSize, fragmentstore);
8804 initFragstore(tmp);
8805 }
8806
8807 tabPtr.p->totalfragments++;
8808 tabPtr.p->noOfFragChunks++;
8809
8810 return 0;
8811 }
8812
8813 void
release_fragment_from_table(Ptr<TabRecord> tabPtr,Uint32 fragId)8814 Dbdih::release_fragment_from_table(Ptr<TabRecord> tabPtr, Uint32 fragId)
8815 {
8816 FragmentstorePtr fragPtr;
8817 Uint32 fragments = tabPtr.p->totalfragments;
8818 Uint32 chunks = tabPtr.p->noOfFragChunks;
8819
8820 if (fragId >= fragments)
8821 {
8822 jam();
8823 return;
8824 }
8825 ndbrequire(fragId == fragments - 1); // only remove at end
8826 ndbrequire(fragments != 0);
8827
8828 getFragstore(tabPtr.p, fragId, fragPtr);
8829 dec_ng_refcount(getNodeGroup(fragPtr.p->preferredPrimary));
8830
8831 releaseReplicas(& fragPtr.p->storedReplicas);
8832 releaseReplicas(& fragPtr.p->oldStoredReplicas);
8833
8834 if (fragId == ((chunks - 1) << LOG_NO_OF_FRAGS_PER_CHUNK))
8835 {
8836 jam();
8837
8838 getFragstore(tabPtr.p, fragId, fragPtr);
8839
8840 fragPtr.p->nextFragmentChunk = cfirstfragstore;
8841 cfirstfragstore = fragPtr.i;
8842 cremainingfrags += NO_OF_FRAGS_PER_CHUNK;
8843 tabPtr.p->noOfFragChunks = chunks - 1;
8844 }
8845
8846 tabPtr.p->totalfragments--;
8847 }
8848
8849 void
send_alter_tab_ref(Signal * signal,Ptr<TabRecord> tabPtr,Ptr<ConnectRecord> connectPtr,Uint32 errCode)8850 Dbdih::send_alter_tab_ref(Signal* signal,
8851 Ptr<TabRecord> tabPtr,
8852 Ptr<ConnectRecord> connectPtr,
8853 Uint32 errCode)
8854 {
8855 AlterTabRef* ref = (AlterTabRef*)signal->getDataPtrSend();
8856 ref->senderRef = reference();
8857 ref->senderData = connectPtr.p->userpointer;
8858 ref->errorCode = errCode;
8859 sendSignal(connectPtr.p->userblockref, GSN_ALTER_TAB_REF, signal,
8860 AlterTabRef::SignalLength, JBB);
8861 }
8862
8863 void
send_alter_tab_conf(Signal * signal,Ptr<ConnectRecord> connectPtr)8864 Dbdih::send_alter_tab_conf(Signal* signal, Ptr<ConnectRecord> connectPtr)
8865 {
8866 AlterTabConf* conf = (AlterTabConf*)signal->getDataPtrSend();
8867 conf->senderRef = reference();
8868 conf->senderData = connectPtr.p->userpointer;
8869 conf->connectPtr = connectPtr.i;
8870 sendSignal(connectPtr.p->userblockref, GSN_ALTER_TAB_CONF, signal,
8871 AlterTabConf::SignalLength, JBB);
8872 }
8873
8874 void
saveTableFile(Signal * signal,Ptr<ConnectRecord> connectPtr,Ptr<TabRecord> tabPtr,TabRecord::CopyStatus expectedStatus,Callback & cb)8875 Dbdih::saveTableFile(Signal* signal,
8876 Ptr<ConnectRecord> connectPtr,
8877 Ptr<TabRecord> tabPtr,
8878 TabRecord::CopyStatus expectedStatus,
8879 Callback& cb)
8880 {
8881 ndbrequire(connectPtr.i == cb.m_callbackData); // required
8882 ndbrequire(tabPtr.p->tabCopyStatus == expectedStatus); // locking
8883 memcpy(&connectPtr.p->m_callback, &cb, sizeof(Callback));
8884
8885 tabPtr.p->tabCopyStatus = TabRecord::CS_COPY_TO_SAVE;
8886 tabPtr.p->tabUpdateState = TabRecord::US_CALLBACK;
8887 signal->theData[0] = DihContinueB::ZPACK_TABLE_INTO_PAGES;
8888 signal->theData[1] = tabPtr.i;
8889 sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
8890 }
8891
8892 void
alter_table_writeTable_conf(Signal * signal,Uint32 ptrI,Uint32 err)8893 Dbdih::alter_table_writeTable_conf(Signal* signal, Uint32 ptrI, Uint32 err)
8894 {
8895 jamEntry();
8896 ndbrequire(err == 0);
8897
8898 ConnectRecordPtr connectPtr;
8899 connectPtr.i = ptrI;
8900 ptrCheckGuard(connectPtr, cconnectFileSize, connectRecord);
8901
8902 switch(connectPtr.p->connectState){
8903 case ConnectRecord::ALTER_TABLE_REVERT:
8904 {
8905 jam();
8906 send_alter_tab_conf(signal, connectPtr);
8907
8908 Ptr<TabRecord> tabPtr;
8909 tabPtr.i = connectPtr.p->table;
8910 ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
8911 ndbrequire(tabPtr.p->connectrec == connectPtr.i);
8912 tabPtr.p->connectrec = RNIL;
8913 release_connect(connectPtr);
8914 return;
8915 }
8916 case ConnectRecord::ALTER_TABLE:
8917 {
8918 jam();
8919 send_alter_tab_conf(signal, connectPtr);
8920 return;
8921 }
8922 default:
8923 jamLine(connectPtr.p->connectState);
8924 ndbrequire(false);
8925 }
8926 }
8927
8928 void
drop_fragments(Signal * signal,Ptr<ConnectRecord> connectPtr,Uint32 curr)8929 Dbdih::drop_fragments(Signal* signal, Ptr<ConnectRecord> connectPtr,
8930 Uint32 curr)
8931 {
8932 ndbrequire(curr >= connectPtr.p->m_alter.m_org_totalfragments);
8933 if (curr == connectPtr.p->m_alter.m_org_totalfragments)
8934 {
8935 /**
8936 * done...
8937 */
8938 jam();
8939 Ptr<TabRecord> tabPtr;
8940 tabPtr.i = connectPtr.p->table;
8941 ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
8942
8943 Uint32 new_frags = connectPtr.p->m_alter.m_totalfragments;
8944 Uint32 org_frags = connectPtr.p->m_alter.m_org_totalfragments;
8945 tabPtr.p->totalfragments = new_frags;
8946 for (Uint32 i = new_frags - 1; i >= org_frags; i--)
8947 {
8948 jam();
8949 release_fragment_from_table(tabPtr, i);
8950 }
8951 connectPtr.p->m_alter.m_totalfragments = org_frags;
8952
8953 switch(connectPtr.p->connectState){
8954 case ConnectRecord::ALTER_TABLE_ABORT:
8955 {
8956 jam();
8957 ndbrequire(tabPtr.p->tabCopyStatus == TabRecord::CS_ALTER_TABLE);
8958 tabPtr.p->tabCopyStatus = TabRecord::CS_IDLE;
8959 send_alter_tab_ref(signal, tabPtr, connectPtr, ~0);
8960
8961 connectPtr.p->connectState = ConnectRecord::ALTER_TABLE;
8962 return;
8963 }
8964 case ConnectRecord::ALTER_TABLE_REVERT:
8965 {
8966 jam();
8967 Callback cb;
8968 cb.m_callbackData = connectPtr.i;
8969 cb.m_callbackFunction = safe_cast(&Dbdih::alter_table_writeTable_conf);
8970 saveTableFile(signal, connectPtr, tabPtr, TabRecord::CS_ALTER_TABLE, cb);
8971 return;
8972 }
8973 default:
8974 jamLine(connectPtr.p->connectState);
8975 ndbrequire(false);
8976 }
8977 return;
8978 }
8979
8980 ndbrequire(curr > 0);
8981 DropFragReq* req = (DropFragReq*)signal->getDataPtrSend();
8982 req->senderRef = reference();
8983 req->senderData = connectPtr.i;
8984 req->tableId = connectPtr.p->table;
8985 req->fragId = curr - 1;
8986 req->requestInfo = DropFragReq::AlterTableAbort;
8987 sendSignal(DBLQH_REF, GSN_DROP_FRAG_REQ, signal,
8988 DropFragReq::SignalLength, JBB);
8989 }
8990
8991 void
execDROP_FRAG_REF(Signal * signal)8992 Dbdih::execDROP_FRAG_REF(Signal* signal)
8993 {
8994 ndbrequire(false);
8995 }
8996
8997 void
execDROP_FRAG_CONF(Signal * signal)8998 Dbdih::execDROP_FRAG_CONF(Signal* signal)
8999 {
9000 DropFragConf* conf = (DropFragConf*)signal->getDataPtr();
9001
9002 ConnectRecordPtr connectPtr;
9003 connectPtr.i = conf->senderData;
9004 ptrCheckGuard(connectPtr, cconnectFileSize, connectRecord);
9005
9006 drop_fragments(signal, connectPtr, conf->fragId);
9007 }
9008
9009 /*
9010 G E T N O D E S
9011 **********************=
9012 */
9013 /*****************************************************************************/
9014 /* ********** TRANSACTION HANDLING MODULE *************/
9015 /*****************************************************************************/
9016 /*
9017 3.8.1 G E T N O D E S R E Q U E S T
9018 ******************************************
9019 Asks what nodes should be part of a transaction.
9020 */
execDIGETNODESREQ(Signal * signal)9021 void Dbdih::execDIGETNODESREQ(Signal* signal)
9022 {
9023 const DiGetNodesReq * const req = (DiGetNodesReq *)&signal->theData[0];
9024 FragmentstorePtr fragPtr;
9025 TabRecordPtr tabPtr;
9026 tabPtr.i = req->tableId;
9027 Uint32 hashValue = req->hashValue;
9028 Uint32 ttabFileSize = ctabFileSize;
9029 Uint32 fragId, newFragId = RNIL;
9030 DiGetNodesConf * const conf = (DiGetNodesConf *)&signal->theData[0];
9031 TabRecord* regTabDesc = tabRecord;
9032 EmulatedJamBuffer * jambuf = * (EmulatedJamBuffer**)(req->jamBuffer);
9033 thrjamEntry(jambuf);
9034 ptrCheckGuard(tabPtr, ttabFileSize, regTabDesc);
9035
9036 if (DictTabInfo::isOrderedIndex(tabPtr.p->tableType))
9037 {
9038 thrjam(jambuf);
9039 tabPtr.i = tabPtr.p->primaryTableId;
9040 ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
9041 }
9042
9043 loop:
9044 Uint32 val = tabPtr.p->m_lock.read_lock();
9045 Uint32 map_ptr_i = tabPtr.p->m_map_ptr_i;
9046 Uint32 new_map_ptr_i = tabPtr.p->m_new_map_ptr_i;
9047
9048 /* When distr key indicator is set, regardless
9049 * of distribution algorithm in use, hashValue
9050 * IS fragment id.
9051 */
9052 if (req->distr_key_indicator)
9053 {
9054 fragId = hashValue;
9055 if (unlikely(fragId >= tabPtr.p->totalfragments))
9056 {
9057 thrjam(jambuf);
9058 conf->zero= 1; //Indicate error;
9059 signal->theData[1]= ZUNDEFINED_FRAGMENT_ERROR;
9060 return;
9061 }
9062 }
9063 else if (tabPtr.p->method == TabRecord::HASH_MAP)
9064 {
9065 thrjam(jambuf);
9066 Ptr<Hash2FragmentMap> ptr;
9067 g_hash_map.getPtr(ptr, map_ptr_i);
9068 fragId = ptr.p->m_map[hashValue % ptr.p->m_cnt];
9069
9070 if (unlikely(new_map_ptr_i != RNIL))
9071 {
9072 thrjam(jambuf);
9073 g_hash_map.getPtr(ptr, new_map_ptr_i);
9074 newFragId = ptr.p->m_map[hashValue % ptr.p->m_cnt];
9075 if (newFragId == fragId)
9076 {
9077 thrjam(jambuf);
9078 newFragId = RNIL;
9079 }
9080 }
9081 }
9082 else if (tabPtr.p->method == TabRecord::LINEAR_HASH)
9083 {
9084 thrjam(jambuf);
9085 fragId = hashValue & tabPtr.p->mask;
9086 if (fragId < tabPtr.p->hashpointer) {
9087 thrjam(jambuf);
9088 fragId = hashValue & ((tabPtr.p->mask << 1) + 1);
9089 }//if
9090 }
9091 else if (tabPtr.p->method == TabRecord::NORMAL_HASH)
9092 {
9093 thrjam(jambuf);
9094 fragId= hashValue % tabPtr.p->totalfragments;
9095 }
9096 else
9097 {
9098 thrjam(jambuf);
9099 ndbassert(tabPtr.p->method == TabRecord::USER_DEFINED);
9100
9101 /* User defined partitioning, but no distribution key passed */
9102 conf->zero= 1; //Indicate error;
9103 signal->theData[1]= ZUNDEFINED_FRAGMENT_ERROR;
9104 return;
9105 }
9106 getFragstore(tabPtr.p, fragId, fragPtr);
9107 Uint32 nodeCount = extractNodeInfo(fragPtr.p, conf->nodes);
9108 Uint32 sig2 = (nodeCount - 1) +
9109 (fragPtr.p->distributionKey << 16) +
9110 (dihGetInstanceKey(fragPtr) << 24);
9111 conf->zero = 0;
9112 conf->reqinfo = sig2;
9113 conf->fragId = fragId;
9114
9115 if (unlikely(newFragId != RNIL))
9116 {
9117 thrjam(jambuf);
9118 conf->reqinfo |= DiGetNodesConf::REORG_MOVING;
9119 getFragstore(tabPtr.p, newFragId, fragPtr);
9120 nodeCount = extractNodeInfo(fragPtr.p, conf->nodes + 2 + MAX_REPLICAS);
9121 conf->nodes[MAX_REPLICAS] = newFragId;
9122 conf->nodes[MAX_REPLICAS + 1] = (nodeCount - 1) +
9123 (fragPtr.p->distributionKey << 16) +
9124 (dihGetInstanceKey(fragPtr) << 24);
9125 }
9126
9127 if (unlikely(!tabPtr.p->m_lock.read_unlock(val)))
9128 goto loop;
9129 }//Dbdih::execDIGETNODESREQ()
9130
extractNodeInfo(const Fragmentstore * fragPtr,Uint32 nodes[])9131 Uint32 Dbdih::extractNodeInfo(const Fragmentstore * fragPtr, Uint32 nodes[])
9132 {
9133 Uint32 nodeCount = 0;
9134 nodes[0] = nodes[1] = nodes[2] = nodes[3] = 0;
9135 for (Uint32 i = 0; i < fragPtr->fragReplicas; i++) {
9136 jam();
9137 NodeRecordPtr nodePtr;
9138 ndbrequire(i < MAX_REPLICAS);
9139 nodePtr.i = fragPtr->activeNodes[i];
9140 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
9141 if (nodePtr.p->useInTransactions) {
9142 jam();
9143 nodes[nodeCount] = nodePtr.i;
9144 nodeCount++;
9145 }//if
9146 }//for
9147 ndbrequire(nodeCount > 0);
9148 return nodeCount;
9149 }//Dbdih::extractNodeInfo()
9150
9151 void
getFragstore(TabRecord * tab,Uint32 fragNo,FragmentstorePtr & fragptr)9152 Dbdih::getFragstore(TabRecord * tab, //In parameter
9153 Uint32 fragNo, //In parameter
9154 FragmentstorePtr & fragptr) //Out parameter
9155 {
9156 FragmentstorePtr fragPtr;
9157 Uint32 TfragstoreFileSize = cfragstoreFileSize;
9158 Fragmentstore* TfragStore = fragmentstore;
9159 Uint32 chunkNo = fragNo >> LOG_NO_OF_FRAGS_PER_CHUNK;
9160 Uint32 chunkIndex = fragNo & (NO_OF_FRAGS_PER_CHUNK - 1);
9161 fragPtr.i = tab->startFid[chunkNo] + chunkIndex;
9162 if (likely(chunkNo < NDB_ARRAY_SIZE(tab->startFid))) {
9163 ptrCheckGuard(fragPtr, TfragstoreFileSize, TfragStore);
9164 fragptr = fragPtr;
9165 return;
9166 }//if
9167 ndbrequire(false);
9168 }//Dbdih::getFragstore()
9169
allocFragments(Uint32 noOfFragments,TabRecordPtr tabPtr)9170 void Dbdih::allocFragments(Uint32 noOfFragments, TabRecordPtr tabPtr)
9171 {
9172 FragmentstorePtr fragPtr;
9173 Uint32 noOfChunks = (noOfFragments + (NO_OF_FRAGS_PER_CHUNK - 1)) >> LOG_NO_OF_FRAGS_PER_CHUNK;
9174 ndbrequire(cremainingfrags >= noOfFragments);
9175 for (Uint32 i = 0; i < noOfChunks; i++) {
9176 jam();
9177 Uint32 baseFrag = cfirstfragstore;
9178 ndbrequire(i < NDB_ARRAY_SIZE(tabPtr.p->startFid));
9179 tabPtr.p->startFid[i] = baseFrag;
9180 fragPtr.i = baseFrag;
9181 ptrCheckGuard(fragPtr, cfragstoreFileSize, fragmentstore);
9182 cfirstfragstore = fragPtr.p->nextFragmentChunk;
9183 cremainingfrags -= NO_OF_FRAGS_PER_CHUNK;
9184 for (Uint32 j = 0; j < NO_OF_FRAGS_PER_CHUNK; j++) {
9185 jam();
9186 fragPtr.i = baseFrag + j;
9187 ptrCheckGuard(fragPtr, cfragstoreFileSize, fragmentstore);
9188 initFragstore(fragPtr);
9189 }//if
9190 }//for
9191 tabPtr.p->noOfFragChunks = noOfChunks;
9192 }//Dbdih::allocFragments()
9193
releaseFragments(TabRecordPtr tabPtr)9194 void Dbdih::releaseFragments(TabRecordPtr tabPtr)
9195 {
9196 FragmentstorePtr fragPtr;
9197 for (Uint32 i = 0; i < tabPtr.p->noOfFragChunks; i++) {
9198 jam();
9199 ndbrequire(i < NDB_ARRAY_SIZE(tabPtr.p->startFid));
9200 Uint32 baseFrag = tabPtr.p->startFid[i];
9201 fragPtr.i = baseFrag;
9202 ptrCheckGuard(fragPtr, cfragstoreFileSize, fragmentstore);
9203 fragPtr.p->nextFragmentChunk = cfirstfragstore;
9204 cfirstfragstore = baseFrag;
9205 tabPtr.p->startFid[i] = RNIL;
9206 cremainingfrags += NO_OF_FRAGS_PER_CHUNK;
9207 }//for
9208 tabPtr.p->noOfFragChunks = 0;
9209 }//Dbdih::releaseFragments()
9210
initialiseFragstore()9211 void Dbdih::initialiseFragstore()
9212 {
9213 Uint32 i;
9214 FragmentstorePtr fragPtr;
9215 for (i = 0; i < cfragstoreFileSize; i++) {
9216 fragPtr.i = i;
9217 ptrCheckGuard(fragPtr, cfragstoreFileSize, fragmentstore);
9218 initFragstore(fragPtr);
9219 }//for
9220 Uint32 noOfChunks = cfragstoreFileSize >> LOG_NO_OF_FRAGS_PER_CHUNK;
9221 fragPtr.i = 0;
9222 cfirstfragstore = RNIL;
9223 cremainingfrags = 0;
9224 for (i = 0; i < noOfChunks; i++) {
9225 refresh_watch_dog();
9226 ptrCheckGuard(fragPtr, cfragstoreFileSize, fragmentstore);
9227 fragPtr.p->nextFragmentChunk = cfirstfragstore;
9228 cfirstfragstore = fragPtr.i;
9229 fragPtr.i += NO_OF_FRAGS_PER_CHUNK;
9230 cremainingfrags += NO_OF_FRAGS_PER_CHUNK;
9231 }//for
9232 }//Dbdih::initialiseFragstore()
9233
9234 #ifndef NDB_HAVE_RMB
9235 #define rmb() do { } while (0)
9236 #endif
9237
9238 #ifndef NDB_HAVE_WMB
9239 #define wmb() do { } while (0)
9240 #endif
9241
9242 inline
9243 bool
isEmpty(const DIVERIFY_queue & q)9244 Dbdih::isEmpty(const DIVERIFY_queue & q)
9245 {
9246 return q.cfirstVerifyQueue == q.clastVerifyQueue;
9247 }
9248
9249 inline
9250 void
enqueue(DIVERIFY_queue & q,Uint32 senderData,Uint64 gci)9251 Dbdih::enqueue(DIVERIFY_queue & q, Uint32 senderData, Uint64 gci)
9252 {
9253 #ifndef NDEBUG
9254 /**
9255 * - assert only
9256 * - we must read first *before* "publishing last
9257 * or else DIH-thread could already have consumed entry
9258 * when we call assert
9259 */
9260 Uint32 first = q.cfirstVerifyQueue;
9261 #endif
9262
9263 Uint32 last = q.clastVerifyQueue;
9264 ApiConnectRecord * apiConnectRecord = q.apiConnectRecord;
9265
9266 apiConnectRecord[last].senderData = senderData;
9267 apiConnectRecord[last].apiGci = gci;
9268 wmb();
9269 if (last + 1 == capiConnectFileSize)
9270 {
9271 q.clastVerifyQueue = 0;
9272 }
9273 else
9274 {
9275 q.clastVerifyQueue = last + 1;
9276 }
9277 assert(q.clastVerifyQueue != first);
9278 }
9279
9280 inline
9281 void
dequeue(DIVERIFY_queue & q,ApiConnectRecord & conRecord)9282 Dbdih::dequeue(DIVERIFY_queue & q, ApiConnectRecord & conRecord)
9283 {
9284 Uint32 first = q.cfirstVerifyQueue;
9285 ApiConnectRecord * apiConnectRecord = q.apiConnectRecord;
9286
9287 rmb();
9288 conRecord.senderData = apiConnectRecord[first].senderData;
9289 conRecord.apiGci = apiConnectRecord[first].apiGci;
9290
9291 if (first + 1 == capiConnectFileSize)
9292 {
9293 q.cfirstVerifyQueue = 0;
9294 }
9295 else
9296 {
9297 q.cfirstVerifyQueue = first + 1;
9298 }
9299 }
9300
9301 /*
9302 3.9 V E R I F I C A T I O N
9303 ****************************=
9304 */
9305 /****************************************************************************/
9306 /* ********** VERIFICATION SUB-MODULE *************/
9307 /****************************************************************************/
9308 /*
9309 3.9.1 R E C E I V I N G O F V E R I F I C A T I O N R E Q U E S T
9310 *************************************************************************
9311 */
execDIVERIFYREQ(Signal * signal)9312 void Dbdih::execDIVERIFYREQ(Signal* signal)
9313 {
9314 EmulatedJamBuffer * jambuf = * (EmulatedJamBuffer**)(signal->theData+2);
9315 thrjamEntry(jambuf);
9316 Uint32 qno = signal->theData[1];
9317 ndbassert(qno < NDB_ARRAY_SIZE(c_diverify_queue));
9318 DIVERIFY_queue & q = c_diverify_queue[qno];
9319 loop:
9320 Uint32 val = m_micro_gcp.m_lock.read_lock();
9321 Uint32 blocked = getBlockCommit() == true ? 1 : 0;
9322 if (blocked == 0 && isEmpty(q))
9323 {
9324 thrjam(jambuf);
9325 /*-----------------------------------------------------------------------*/
9326 // We are not blocked and the verify queue was empty currently so we can
9327 // simply reply back to TC immediately. The method was called with
9328 // EXECUTE_DIRECT so we reply back by setting signal data and returning.
9329 // theData[0] already contains the correct information so
9330 // we need not touch it.
9331 /*-----------------------------------------------------------------------*/
9332 signal->theData[1] = (Uint32)(m_micro_gcp.m_current_gci >> 32);
9333 signal->theData[2] = (Uint32)(m_micro_gcp.m_current_gci & 0xFFFFFFFF);
9334 signal->theData[3] = 0;
9335 if (unlikely(! m_micro_gcp.m_lock.read_unlock(val)))
9336 goto loop;
9337 return;
9338 }//if
9339 /*-------------------------------------------------------------------------*/
9340 // Since we are blocked we need to put this operation last in the verify
9341 // queue to ensure that operation starts up in the correct order.
9342 /*-------------------------------------------------------------------------*/
9343 enqueue(q, signal->theData[0], m_micro_gcp.m_new_gci);
9344 if (blocked == 0 && jambuf == jamBuffer())
9345 {
9346 emptyverificbuffer(signal, 0, false);
9347 }
9348 signal->theData[3] = blocked + 1; // Indicate no immediate return
9349 return;
9350 }//Dbdih::execDIVERIFYREQ()
9351
execDIH_SCAN_TAB_REQ(Signal * signal)9352 void Dbdih::execDIH_SCAN_TAB_REQ(Signal* signal)
9353 {
9354 DihScanTabReq * req = (DihScanTabReq*)signal->getDataPtr();
9355 TabRecordPtr tabPtr;
9356 const Uint32 senderData = req->senderData;
9357 const Uint32 senderRef = req->senderRef;
9358 const Uint32 schemaTransId = req->schemaTransId;
9359
9360 jamEntry();
9361
9362 tabPtr.i = req->tableId;
9363 ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
9364
9365 if (tabPtr.p->tabStatus != TabRecord::TS_ACTIVE)
9366 {
9367 if (! (tabPtr.p->tabStatus == TabRecord::TS_CREATING &&
9368 tabPtr.p->schemaTransId == schemaTransId))
9369 {
9370 jam();
9371 goto error;
9372 }
9373 }
9374
9375 tabPtr.p->m_scan_count[0]++;
9376
9377 {
9378 DihScanTabConf* conf = (DihScanTabConf*)signal->getDataPtrSend();
9379 conf->tableId = tabPtr.i;
9380 conf->senderData = senderData;
9381 conf->fragmentCount = tabPtr.p->totalfragments;
9382 conf->noOfBackups = tabPtr.p->noOfBackups;
9383 conf->scanCookie = tabPtr.p->m_map_ptr_i;
9384 conf->reorgFlag = tabPtr.p->m_scan_reorg_flag;
9385 sendSignal(senderRef, GSN_DIH_SCAN_TAB_CONF, signal,
9386 DihScanTabConf::SignalLength, JBB);
9387 }
9388 return;
9389
9390 error:
9391 DihScanTabRef* ref = (DihScanTabRef*)signal->getDataPtrSend();
9392 ref->tableId = tabPtr.i;
9393 ref->senderData = senderData;
9394 ref->error = DihScanTabRef::ErroneousTableState;
9395 ref->tableStatus = tabPtr.p->tabStatus;
9396 ref->schemaTransId = schemaTransId;
9397 sendSignal(senderRef, GSN_DIH_SCAN_TAB_REF, signal,
9398 DihScanTabRef::SignalLength, JBB);
9399 return;
9400
9401 }//Dbdih::execDI_FCOUNTREQ()
9402
execDIH_SCAN_GET_NODES_REQ(Signal * signal)9403 void Dbdih::execDIH_SCAN_GET_NODES_REQ(Signal* signal)
9404 {
9405 FragmentstorePtr fragPtr;
9406 TabRecordPtr tabPtr;
9407 jamEntry();
9408 DihScanGetNodesReq* req = (DihScanGetNodesReq*)signal->getDataPtrSend();
9409 Uint32 senderRef = req->senderRef;
9410 Uint32 senderData = req->senderData;
9411 Uint32 fragId = req->fragId;
9412
9413 tabPtr.i = req->tableId;
9414 ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
9415 if (DictTabInfo::isOrderedIndex(tabPtr.p->tableType)) {
9416 jam();
9417 tabPtr.i = tabPtr.p->primaryTableId;
9418 ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
9419 }
9420
9421 Uint32 nodes[MAX_REPLICAS];
9422 getFragstore(tabPtr.p, fragId, fragPtr);
9423 Uint32 count = extractNodeInfo(fragPtr.p, nodes);
9424
9425 DihScanGetNodesConf* conf = (DihScanGetNodesConf*)signal->getDataPtrSend();
9426 conf->senderData = senderData;
9427 conf->nodes[0] = nodes[0];
9428 conf->nodes[1] = nodes[1];
9429 conf->nodes[2] = nodes[2];
9430 conf->nodes[3] = nodes[3];
9431 conf->count = count;
9432 conf->tableId = tabPtr.i;
9433 conf->fragId = fragId;
9434 conf->instanceKey = dihGetInstanceKey(fragPtr);
9435 sendSignal(senderRef, GSN_DIH_SCAN_GET_NODES_CONF, signal,
9436 DihScanGetNodesConf::SignalLength, JBB);
9437 }//Dbdih::execDIGETPRIMREQ()
9438
9439 void
execDIH_SCAN_TAB_COMPLETE_REP(Signal * signal)9440 Dbdih::execDIH_SCAN_TAB_COMPLETE_REP(Signal* signal)
9441 {
9442 jamEntry();
9443 DihScanTabCompleteRep* rep = (DihScanTabCompleteRep*)signal->getDataPtr();
9444 TabRecordPtr tabPtr;
9445 tabPtr.i = rep->tableId;
9446 Uint32 map_ptr_i = rep->scanCookie;
9447 ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
9448
9449 if (map_ptr_i == tabPtr.p->m_map_ptr_i)
9450 {
9451 jam();
9452 ndbassert(tabPtr.p->m_scan_count[0]);
9453 tabPtr.p->m_scan_count[0]--;
9454 }
9455 else
9456 {
9457 jam();
9458 ndbassert(tabPtr.p->m_scan_count[1]);
9459 tabPtr.p->m_scan_count[1]--;
9460 }
9461 }
9462
9463
9464 /****************************************************************************/
9465 /* ********** GLOBAL-CHECK-POINT HANDLING MODULE *************/
9466 /****************************************************************************/
9467 /*
9468 3.10 G L O B A L C H E C K P O I N T ( IN M A S T E R R O L E)
9469 *******************************************************************
9470 */
9471
9472 bool
check_enable_micro_gcp(Signal * signal,bool broadcast)9473 Dbdih::check_enable_micro_gcp(Signal* signal, bool broadcast)
9474 {
9475 ndbassert(m_micro_gcp.m_enabled == false);
9476 ndbassert(NodeVersionInfo::DataLength == 6);
9477 Uint32 min = ~(Uint32)0;
9478 const NodeVersionInfo& info = getNodeVersionInfo();
9479 for (Uint32 i = 0; i<3; i++)
9480 {
9481 Uint32 tmp = info.m_type[i].m_min_version;
9482 if (tmp)
9483 {
9484 min = (min < tmp) ? min : tmp;
9485 }
9486 }
9487
9488 if (ndb_check_micro_gcp(min))
9489 {
9490 jam();
9491 m_micro_gcp.m_enabled = true;
9492
9493 infoEvent("Enabling micro GCP");
9494 if (broadcast)
9495 {
9496 jam();
9497 UpgradeProtocolOrd * ord = (UpgradeProtocolOrd*)signal->getDataPtrSend();
9498 ord->type = UpgradeProtocolOrd::UPO_ENABLE_MICRO_GCP;
9499
9500 /**
9501 * We need to notify all ndbd's or they'll get confused!
9502 */
9503 NodeRecordPtr specNodePtr;
9504 specNodePtr.i = cfirstAliveNode;
9505 do {
9506 jam();
9507 ptrCheckGuard(specNodePtr, MAX_NDB_NODES, nodeRecord);
9508 sendSignal(calcDihBlockRef(specNodePtr.i), GSN_UPGRADE_PROTOCOL_ORD,
9509 signal, UpgradeProtocolOrd::SignalLength, JBA);
9510 specNodePtr.i = specNodePtr.p->nextNode;
9511 } while (specNodePtr.i != RNIL);
9512 EXECUTE_DIRECT(QMGR,GSN_UPGRADE_PROTOCOL_ORD,signal,signal->getLength());
9513 }
9514 }
9515 return m_micro_gcp.m_enabled;
9516 }
9517
9518 void
execUPGRADE_PROTOCOL_ORD(Signal * signal)9519 Dbdih::execUPGRADE_PROTOCOL_ORD(Signal* signal)
9520 {
9521 const UpgradeProtocolOrd* ord = (UpgradeProtocolOrd*)signal->getDataPtr();
9522 switch(ord->type){
9523 case UpgradeProtocolOrd::UPO_ENABLE_MICRO_GCP:
9524 jam();
9525 m_micro_gcp.m_enabled = true;
9526 EXECUTE_DIRECT(QMGR, GSN_UPGRADE_PROTOCOL_ORD,signal, signal->getLength());
9527 return;
9528 }
9529 }
9530
9531 void
startGcpLab(Signal * signal,Uint32 aWaitTime)9532 Dbdih::startGcpLab(Signal* signal, Uint32 aWaitTime)
9533 {
9534 for (Uint32 i = 0; i < c_diverify_queue_cnt; i++)
9535 {
9536 if (c_diverify_queue[i].m_empty_done == 0)
9537 {
9538 // Previous global checkpoint is not yet completed.
9539 jam();
9540 signal->theData[0] = DihContinueB::ZSTART_GCP;
9541 sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 10, 1);
9542 return;
9543 }
9544 }
9545
9546 emptyWaitGCPMasterQueue(signal,
9547 m_micro_gcp.m_current_gci,
9548 c_waitEpochMasterList);
9549
9550 if (c_nodeStartMaster.blockGcp != 0 &&
9551 m_gcp_save.m_master.m_state == GcpSave::GCP_SAVE_IDLE)
9552 {
9553 jam();
9554
9555 /* ------------------------------------------------------------------ */
9556 /* A NEW NODE WANTS IN AND WE MUST ALLOW IT TO COME IN NOW SINCE THE */
9557 /* GCP IS COMPLETED. */
9558 /* ------------------------------------------------------------------ */
9559
9560 if (ERROR_INSERTED(7217))
9561 {
9562 jam();
9563
9564 signal->theData[0] = 9999;
9565 sendSignal(numberToRef(CMVMI, refToNode(c_nodeStartMaster.startNode)),
9566 GSN_NDB_TAMPER, signal, 1, JBB);
9567
9568 m_micro_gcp.m_master.m_start_time = 0; // Force start
9569 // fall through
9570 }
9571 else
9572 {
9573 jam();
9574 ndbrequire(c_nodeStartMaster.blockGcp == 1); // Ordered...
9575 c_nodeStartMaster.blockGcp = 2; // effective
9576 gcpBlockedLab(signal);
9577 return;
9578 }
9579 }
9580
9581 if (cgcpOrderBlocked)
9582 {
9583 jam();
9584 signal->theData[0] = DihContinueB::ZSTART_GCP;
9585 sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 10, 1);
9586 return;
9587 }
9588
9589 Uint32 delayMicro = m_micro_gcp.m_enabled ?
9590 m_micro_gcp.m_master.m_time_between_gcp :
9591 m_gcp_save.m_master.m_time_between_gcp;
9592
9593 Uint64 now = c_current_time = NdbTick_CurrentMillisecond();
9594 if (! (now >= m_micro_gcp.m_master.m_start_time + delayMicro))
9595 {
9596 jam();
9597 signal->theData[0] = DihContinueB::ZSTART_GCP;
9598 sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 10, 1);
9599 return;
9600 }
9601
9602 m_micro_gcp.m_master.m_start_time = now;
9603
9604 if (m_micro_gcp.m_enabled == false &&
9605 m_micro_gcp.m_master.m_time_between_gcp)
9606 {
9607 /**
9608 * Micro GCP is disabled...but configured...
9609 */
9610 jam();
9611 check_enable_micro_gcp(signal, true);
9612 }
9613
9614 /**
9615 * Check that there has not been more than 2^32 micro GCP wo/ any save
9616 */
9617 Uint64 currGCI = m_micro_gcp.m_current_gci;
9618 ndbrequire(Uint32(currGCI) != ~(Uint32)0);
9619 m_micro_gcp.m_master.m_new_gci = currGCI + 1;
9620
9621 Uint32 delaySave = m_gcp_save.m_master.m_time_between_gcp;
9622 if ((m_micro_gcp.m_enabled == false) ||
9623 (now >= m_gcp_save.m_master.m_start_time + delaySave &&
9624 m_gcp_save.m_master.m_state == GcpSave::GCP_SAVE_IDLE))
9625 {
9626 jam();
9627 /**
9628 * Time for save...switch gci_hi
9629 */
9630 m_gcp_save.m_master.m_start_time = now;
9631 m_micro_gcp.m_master.m_new_gci = Uint64((currGCI >> 32) + 1) << 32;
9632
9633 signal->theData[0] = NDB_LE_GlobalCheckpointStarted; //Event type
9634 signal->theData[1] = Uint32(currGCI >> 32);
9635 signal->theData[2] = Uint32(currGCI);
9636 sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 3, JBB);
9637 }
9638
9639 ndbassert(m_micro_gcp.m_enabled || Uint32(m_micro_gcp.m_new_gci) == 0);
9640
9641
9642 /***************************************************************************/
9643 // Report the event that a global checkpoint has started.
9644 /***************************************************************************/
9645
9646 CRASH_INSERTION(7000);
9647 m_micro_gcp.m_master.m_state = MicroGcp::M_GCP_PREPARE;
9648 signal->setTrace(TestOrd::TraceGlobalCheckpoint);
9649
9650 #ifdef ERROR_INSERT
9651 if (ERROR_INSERTED(7186))
9652 {
9653 sendToRandomNodes("GCP_PREPARE",
9654 signal, &c_GCP_PREPARE_Counter, &Dbdih::sendGCP_PREPARE);
9655 signal->theData[0] = 9999;
9656 sendSignalWithDelay(CMVMI_REF, GSN_NDB_TAMPER, signal, 1000, 1);
9657 return;
9658 }
9659 else if (ERROR_INSERTED(7200))
9660 {
9661 c_GCP_PREPARE_Counter.clearWaitingFor();
9662 NodeRecordPtr nodePtr;
9663 nodePtr.i = cfirstAliveNode;
9664 do {
9665 jam();
9666 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
9667 c_GCP_PREPARE_Counter.setWaitingFor(nodePtr.i);
9668 if (nodePtr.i != getOwnNodeId())
9669 {
9670 SET_ERROR_INSERT_VALUE(7201);
9671 sendGCP_PREPARE(signal, nodePtr.i, RNIL);
9672 }
9673 else
9674 {
9675 SET_ERROR_INSERT_VALUE(7202);
9676 sendGCP_PREPARE(signal, nodePtr.i, RNIL);
9677 }
9678 nodePtr.i = nodePtr.p->nextNode;
9679 } while (nodePtr.i != RNIL);
9680
9681 NodeReceiverGroup rg(CMVMI, c_GCP_PREPARE_Counter);
9682 rg.m_nodes.clear(getOwnNodeId());
9683 Uint32 victim = rg.m_nodes.find(0);
9684
9685 signal->theData[0] = 9999;
9686 sendSignal(numberToRef(CMVMI, victim),
9687 GSN_NDB_TAMPER, signal, 1, JBA);
9688
9689 CLEAR_ERROR_INSERT_VALUE;
9690 return;
9691 }
9692 else if (ERROR_INSERTED(7227))
9693 {
9694 ndbout_c("Not sending GCP_PREPARE to %u", c_error_insert_extra);
9695 c_GCP_PREPARE_Counter.clearWaitingFor();
9696 NodeRecordPtr nodePtr;
9697 nodePtr.i = cfirstAliveNode;
9698 do {
9699 jam();
9700 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
9701 c_GCP_PREPARE_Counter.setWaitingFor(nodePtr.i);
9702 if (nodePtr.i != c_error_insert_extra)
9703 {
9704 sendGCP_PREPARE(signal, nodePtr.i, RNIL);
9705 }
9706 nodePtr.i = nodePtr.p->nextNode;
9707 } while (nodePtr.i != RNIL);
9708
9709 signal->theData[0] = 9999;
9710 sendSignalWithDelay(CMVMI_REF, GSN_NDB_TAMPER, signal, 200, 1);
9711 return;
9712 }
9713 #endif
9714
9715 sendLoopMacro(GCP_PREPARE, sendGCP_PREPARE, RNIL);
9716 }//Dbdih::startGcpLab()
9717
execGCP_PREPARECONF(Signal * signal)9718 void Dbdih::execGCP_PREPARECONF(Signal* signal)
9719 {
9720 jamEntry();
9721 Uint32 senderNodeId = signal->theData[0];
9722 Uint32 gci_hi = signal->theData[1];
9723 Uint32 gci_lo = signal->theData[2];
9724
9725 if (unlikely(signal->getLength() < GCPPrepareConf::SignalLength))
9726 {
9727 gci_lo = 0;
9728 ndbassert(!ndb_check_micro_gcp(getNodeInfo(senderNodeId).m_version));
9729 }
9730
9731 Uint64 gci = gci_lo | (Uint64(gci_hi) << 32);
9732 ndbrequire(gci == m_micro_gcp.m_master.m_new_gci);
9733 receiveLoopMacro(GCP_PREPARE, senderNodeId);
9734 //-------------------------------------------------------------
9735 // We have now received all replies. We are ready to continue
9736 // with committing the global checkpoint.
9737 //-------------------------------------------------------------
9738 gcpcommitreqLab(signal);
9739 }//Dbdih::execGCP_PREPARECONF()
9740
gcpcommitreqLab(Signal * signal)9741 void Dbdih::gcpcommitreqLab(Signal* signal)
9742 {
9743 CRASH_INSERTION(7001);
9744
9745 m_micro_gcp.m_master.m_state = MicroGcp::M_GCP_COMMIT;
9746
9747 #ifdef ERROR_INSERT
9748 if (ERROR_INSERTED(7187))
9749 {
9750 sendToRandomNodes("GCP_COMMIT",
9751 signal, &c_GCP_COMMIT_Counter, &Dbdih::sendGCP_COMMIT);
9752 signal->theData[0] = 9999;
9753 sendSignalWithDelay(CMVMI_REF, GSN_NDB_TAMPER, signal, 1000, 1);
9754 return;
9755 }
9756 #endif
9757
9758 sendLoopMacro(GCP_COMMIT, sendGCP_COMMIT, RNIL);
9759 return;
9760 }//Dbdih::gcpcommitreqLab()
9761
execGCP_NODEFINISH(Signal * signal)9762 void Dbdih::execGCP_NODEFINISH(Signal* signal)
9763 {
9764 jamEntry();
9765 const Uint32 senderNodeId = signal->theData[0];
9766 const Uint32 gci_hi = signal->theData[1];
9767 const Uint32 failureNr = signal->theData[2];
9768 const Uint32 gci_lo = signal->theData[3];
9769 const Uint64 gci = gci_lo | (Uint64(gci_hi) << 32);
9770
9771 (void)gci; // TODO validate
9772 (void)failureNr; // kill warning
9773
9774 ndbrequire(m_micro_gcp.m_master.m_state == MicroGcp::M_GCP_COMMIT);
9775 receiveLoopMacro(GCP_COMMIT, senderNodeId);
9776
9777 jam();
9778
9779 if (m_micro_gcp.m_enabled)
9780 {
9781 jam();
9782
9783 m_micro_gcp.m_master.m_state = MicroGcp::M_GCP_COMPLETE;
9784
9785 SubGcpCompleteRep * rep = (SubGcpCompleteRep*)signal->getDataPtr();
9786 rep->senderRef = reference();
9787 rep->gci_hi = (Uint32)(m_micro_gcp.m_old_gci >> 32);
9788 rep->gci_lo = (Uint32)(m_micro_gcp.m_old_gci & 0xFFFFFFFF);
9789 rep->flags = SubGcpCompleteRep::IN_MEMORY;
9790
9791 #ifdef ERROR_INSERT
9792 if (ERROR_INSERTED(7190))
9793 {
9794 sendToRandomNodes("GCP_COMPLETE_REP", signal,
9795 &c_SUB_GCP_COMPLETE_REP_Counter,
9796 &Dbdih::sendSUB_GCP_COMPLETE_REP);
9797 signal->theData[0] = 9999;
9798 sendSignalWithDelay(CMVMI_REF, GSN_NDB_TAMPER, signal, 1000, 1);
9799 }
9800 else if (ERROR_INSERTED(7226))
9801 {
9802 ndbout_c("Not sending SUB_GCP_COMPLETE_REP to %u", c_error_insert_extra);
9803 c_SUB_GCP_COMPLETE_REP_Counter.clearWaitingFor();
9804 NodeRecordPtr nodePtr;
9805 nodePtr.i = cfirstAliveNode;
9806 do {
9807 jam();
9808 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
9809 c_SUB_GCP_COMPLETE_REP_Counter.setWaitingFor(nodePtr.i);
9810 if (nodePtr.i != c_error_insert_extra)
9811 {
9812 sendSignal(calcDihBlockRef(nodePtr.i), GSN_SUB_GCP_COMPLETE_REP,
9813 signal, SubGcpCompleteRep::SignalLength, JBA);
9814 }
9815 nodePtr.i = nodePtr.p->nextNode;
9816 } while (nodePtr.i != RNIL);
9817 SET_ERROR_INSERT_VALUE(7227);
9818
9819 signal->theData[0] = 9999;
9820 sendSignalWithDelay(CMVMI_REF, GSN_NDB_TAMPER, signal, 200, 1);
9821 }
9822 else
9823 #endif
9824 {
9825 jam();
9826 // Normal path...
9827 sendLoopMacro(SUB_GCP_COMPLETE_REP, sendSUB_GCP_COMPLETE_REP, RNIL);
9828 }
9829 }
9830
9831 //-------------------------------------------------------------
9832 // We have now received all replies. We are ready to continue
9833 // with saving the global checkpoint to disk.
9834 //-------------------------------------------------------------
9835 CRASH_INSERTION(7002);
9836
9837 Uint32 curr_hi = (Uint32)(m_micro_gcp.m_current_gci >> 32);
9838 Uint32 old_hi = (Uint32)(m_micro_gcp.m_old_gci >> 32);
9839
9840 if (m_micro_gcp.m_enabled)
9841 {
9842 jam();
9843 }
9844 else
9845 {
9846 ndbrequire(curr_hi != old_hi);
9847 }
9848
9849 if (curr_hi == old_hi)
9850 {
9851 jam();
9852 return;
9853 }
9854
9855 /**
9856 * Start a save
9857 */
9858 Uint32 saveGCI = old_hi;
9859 m_gcp_save.m_master.m_state = GcpSave::GCP_SAVE_REQ;
9860 m_gcp_save.m_master.m_new_gci = saveGCI;
9861
9862 #ifdef ERROR_INSERT
9863 if (ERROR_INSERTED(7188))
9864 {
9865 sendToRandomNodes("GCP_SAVE",
9866 signal, &c_GCP_SAVEREQ_Counter, &Dbdih::sendGCP_SAVEREQ);
9867 signal->theData[0] = 9999;
9868 sendSignalWithDelay(CMVMI_REF, GSN_NDB_TAMPER, signal, 1000, 1);
9869 return;
9870 }
9871 else if (ERROR_INSERTED(7216))
9872 {
9873 infoEvent("GCP_SAVE all/%u", c_error_insert_extra);
9874 NodeRecordPtr nodePtr;
9875 nodePtr.i = c_error_insert_extra;
9876 ptrAss(nodePtr, nodeRecord);
9877
9878 removeAlive(nodePtr);
9879 sendLoopMacro(GCP_SAVEREQ, sendGCP_SAVEREQ, RNIL);
9880 insertAlive(nodePtr);
9881 signal->theData[0] = 9999;
9882 sendSignalWithDelay(CMVMI_REF, GSN_NDB_TAMPER, signal, 1000, 1);
9883 c_GCP_SAVEREQ_Counter.setWaitingFor(c_error_insert_extra);
9884 return;
9885 }
9886 #endif
9887
9888 sendLoopMacro(GCP_SAVEREQ, sendGCP_SAVEREQ, RNIL);
9889 }
9890
9891 void
execSUB_GCP_COMPLETE_ACK(Signal * signal)9892 Dbdih::execSUB_GCP_COMPLETE_ACK(Signal* signal)
9893 {
9894 jamEntry();
9895 SubGcpCompleteAck ack = * CAST_CONSTPTR(SubGcpCompleteAck,
9896 signal->getDataPtr());
9897 Uint32 senderNodeId = refToNode(ack.rep.senderRef);
9898
9899 ndbrequire(m_micro_gcp.m_master.m_state == MicroGcp::M_GCP_COMPLETE);
9900 receiveLoopMacro(SUB_GCP_COMPLETE_REP, senderNodeId);
9901
9902 m_micro_gcp.m_master.m_state = MicroGcp::M_GCP_IDLE;
9903
9904 if (!ERROR_INSERTED(7190))
9905 {
9906 signal->theData[0] = DihContinueB::ZSTART_GCP;
9907 sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 10, 1);
9908 }
9909 }
9910
9911 void
execGCP_SAVEREQ(Signal * signal)9912 Dbdih::execGCP_SAVEREQ(Signal* signal)
9913 {
9914 jamEntry();
9915 GCPSaveReq * req = (GCPSaveReq*)&signal->theData[0];
9916
9917 if (m_gcp_save.m_state == GcpSave::GCP_SAVE_REQ)
9918 {
9919 jam();
9920 /**
9921 * This is master take over...
9922 * and SAVE_REQ is already running
9923 */
9924 ndbrequire(m_gcp_save.m_gci == req->gci);
9925 m_gcp_save.m_master_ref = req->dihBlockRef;
9926 return;
9927 }
9928
9929 if (m_gcp_save.m_gci == req->gci)
9930 {
9931 jam();
9932 /**
9933 * This is master take over...
9934 * and SAVE_REQ is complete...
9935 */
9936 m_gcp_save.m_master_ref = req->dihBlockRef;
9937
9938 GCPSaveReq save = (* req);
9939 GCPSaveConf * conf = (GCPSaveConf*)signal->getDataPtrSend();
9940 conf->dihPtr = save.dihPtr;
9941 conf->nodeId = getOwnNodeId();
9942 conf->gci = save.gci;
9943 sendSignal(m_gcp_save.m_master_ref, GSN_GCP_SAVECONF, signal,
9944 GCPSaveConf::SignalLength, JBA);
9945 return;
9946 }
9947
9948 ndbrequire(m_gcp_save.m_state == GcpSave::GCP_SAVE_IDLE);
9949 m_gcp_save.m_state = GcpSave::GCP_SAVE_REQ;
9950 m_gcp_save.m_master_ref = req->dihBlockRef;
9951 m_gcp_save.m_gci = req->gci;
9952
9953 req->dihBlockRef = reference();
9954 sendSignal(DBLQH_REF, GSN_GCP_SAVEREQ, signal, signal->getLength(), JBA);
9955 }
9956
execGCP_SAVECONF(Signal * signal)9957 void Dbdih::execGCP_SAVECONF(Signal* signal)
9958 {
9959 jamEntry();
9960 GCPSaveConf * saveConf = (GCPSaveConf*)&signal->theData[0];
9961
9962 if (refToBlock(signal->getSendersBlockRef()) == DBLQH)
9963 {
9964 jam();
9965
9966 ndbrequire(m_gcp_save.m_state == GcpSave::GCP_SAVE_REQ);
9967 m_gcp_save.m_state = GcpSave::GCP_SAVE_CONF;
9968
9969 sendSignal(m_gcp_save.m_master_ref,
9970 GSN_GCP_SAVECONF, signal, signal->getLength(), JBA);
9971 return;
9972 }
9973
9974 ndbrequire(saveConf->gci == m_gcp_save.m_master.m_new_gci);
9975 ndbrequire(saveConf->nodeId == saveConf->dihPtr);
9976 SYSFILE->lastCompletedGCI[saveConf->nodeId] = saveConf->gci;
9977 GCP_SAVEhandling(signal, saveConf->nodeId);
9978 }//Dbdih::execGCP_SAVECONF()
9979
execGCP_SAVEREF(Signal * signal)9980 void Dbdih::execGCP_SAVEREF(Signal* signal)
9981 {
9982 jamEntry();
9983 GCPSaveRef * const saveRef = (GCPSaveRef*)&signal->theData[0];
9984
9985 if (refToBlock(signal->getSendersBlockRef()) == DBLQH)
9986 {
9987 jam();
9988
9989 ndbrequire(m_gcp_save.m_state == GcpSave::GCP_SAVE_REQ);
9990 m_gcp_save.m_state = GcpSave::GCP_SAVE_CONF;
9991
9992 sendSignal(m_gcp_save.m_master_ref,
9993 GSN_GCP_SAVEREF, signal, signal->getLength(), JBA);
9994 return;
9995 }
9996
9997 ndbrequire(saveRef->gci == m_gcp_save.m_master.m_new_gci);
9998 ndbrequire(saveRef->nodeId == saveRef->dihPtr);
9999
10000 /**
10001 * Only allow reason not to save
10002 */
10003 ndbrequire(saveRef->errorCode == GCPSaveRef::NodeShutdownInProgress ||
10004 saveRef->errorCode == GCPSaveRef::FakedSignalDueToNodeFailure ||
10005 saveRef->errorCode == GCPSaveRef::NodeRestartInProgress);
10006 GCP_SAVEhandling(signal, saveRef->nodeId);
10007 }//Dbdih::execGCP_SAVEREF()
10008
GCP_SAVEhandling(Signal * signal,Uint32 nodeId)10009 void Dbdih::GCP_SAVEhandling(Signal* signal, Uint32 nodeId)
10010 {
10011 ndbrequire(m_gcp_save.m_master.m_state == GcpSave::GCP_SAVE_REQ);
10012 receiveLoopMacro(GCP_SAVEREQ, nodeId);
10013 /*-------------------------------------------------------------------------*/
10014 // All nodes have replied. We are ready to update the system file.
10015 /*-------------------------------------------------------------------------*/
10016
10017 CRASH_INSERTION(7003);
10018 /**------------------------------------------------------------------------
10019 * SET NEW RECOVERABLE GCI. ALSO RESET RESTART COUNTER TO ZERO.
10020 * THIS INDICATES THAT THE SYSTEM HAS BEEN RECOVERED AND SURVIVED AT
10021 * LEAST ONE GLOBAL CHECKPOINT PERIOD. WE WILL USE THIS PARAMETER TO
10022 * SET BACK THE RESTART GCI IF WE ENCOUNTER MORE THAN ONE UNSUCCESSFUL
10023 * RESTART.
10024 *------------------------------------------------------------------------*/
10025 SYSFILE->newestRestorableGCI = m_gcp_save.m_gci;
10026 if(Sysfile::getInitialStartOngoing(SYSFILE->systemRestartBits) &&
10027 getNodeState().startLevel == NodeState::SL_STARTED){
10028 jam();
10029 #if 0
10030 g_eventLogger->info("Dbdih: Clearing initial start ongoing");
10031 #endif
10032 Sysfile::clearInitialStartOngoing(SYSFILE->systemRestartBits);
10033 }
10034 copyGciLab(signal, CopyGCIReq::GLOBAL_CHECKPOINT);
10035
10036 m_gcp_save.m_master.m_state = GcpSave::GCP_SAVE_COPY_GCI;
10037
10038 }//Dbdih::GCP_SAVEhandling()
10039
10040 /*
10041 3.11 G L O B A L C H E C K P O I N T (N O T - M A S T E R)
10042 *************************************************************
10043 */
execGCP_PREPARE(Signal * signal)10044 void Dbdih::execGCP_PREPARE(Signal* signal)
10045 {
10046 jamEntry();
10047 CRASH_INSERTION(7005);
10048
10049 if (ERROR_INSERTED(7030))
10050 {
10051 cgckptflag = true;
10052 g_eventLogger->info("Delayed GCP_PREPARE 5s");
10053 sendSignalWithDelay(reference(), GSN_GCP_PREPARE, signal, 5000,
10054 signal->getLength());
10055 return;
10056 }
10057
10058 GCPPrepare* req = (GCPPrepare*)signal->getDataPtr();
10059 GCPPrepareConf * conf = (GCPPrepareConf*)signal->getDataPtrSend();
10060 Uint32 masterNodeId = req->nodeId;
10061 Uint32 gci_hi = req->gci_hi;
10062 Uint32 gci_lo = req->gci_lo;
10063 if (unlikely(signal->getLength() < GCPPrepare::SignalLength))
10064 {
10065 jam();
10066 gci_lo = 0;
10067 ndbassert(!ndb_check_micro_gcp(getNodeInfo(masterNodeId).m_version));
10068 }
10069 Uint64 gci = gci_lo | (Uint64(gci_hi) << 32);
10070
10071 BlockReference retRef = calcDihBlockRef(masterNodeId);
10072
10073 if (isMaster())
10074 {
10075 ndbrequire(m_micro_gcp.m_master.m_state == MicroGcp::M_GCP_PREPARE);
10076 }
10077
10078 if (m_micro_gcp.m_state == MicroGcp::M_GCP_PREPARE)
10079 {
10080 jam();
10081 /**
10082 * This must be master take over
10083 * Prepare is already complete
10084 */
10085 ndbrequire(m_micro_gcp.m_new_gci == gci);
10086 m_micro_gcp.m_master_ref = retRef;
10087 goto reply;
10088 }
10089
10090 if (m_micro_gcp.m_new_gci == gci)
10091 {
10092 jam();
10093 /**
10094 * This GCP has already been prepared...
10095 * Must be master takeover
10096 */
10097 m_micro_gcp.m_master_ref = retRef;
10098 goto reply;
10099 }
10100
10101 ndbrequire(m_micro_gcp.m_state == MicroGcp::M_GCP_IDLE);
10102
10103 m_micro_gcp.m_lock.write_lock();
10104 cgckptflag = true;
10105 m_micro_gcp.m_state = MicroGcp::M_GCP_PREPARE;
10106 m_micro_gcp.m_new_gci = gci;
10107 m_micro_gcp.m_master_ref = retRef;
10108 m_micro_gcp.m_lock.write_unlock();
10109
10110 if (ERROR_INSERTED(7031))
10111 {
10112 g_eventLogger->info("Crashing delayed in GCP_PREPARE 3s");
10113 signal->theData[0] = 9999;
10114 sendSignalWithDelay(CMVMI_REF, GSN_NDB_TAMPER, signal, 3000, 1);
10115 return;
10116 }
10117 #ifdef GCP_TIMER_HACK
10118 NdbTick_getMicroTimer(&globalData.gcp_timer_commit[0]);
10119 #endif
10120
10121 reply:
10122 /**
10123 * Send the new gci to Suma.
10124 *
10125 * To get correct signal order and avoid races, this signal is sent on the
10126 * same prio as the SUB_GCP_COMPLETE_REP signal sent to SUMA in
10127 * execSUB_GCP_COMPLETE_REP().
10128 */
10129 sendSignal(SUMA_REF, GSN_GCP_PREPARE, signal, signal->length(), JBB);
10130
10131 /* Send reply. */
10132 conf->nodeId = cownNodeId;
10133 conf->gci_hi = gci_hi;
10134 conf->gci_lo = gci_lo;
10135 sendSignal(retRef, GSN_GCP_PREPARECONF, signal,
10136 GCPPrepareConf::SignalLength, JBA);
10137 return;
10138 }
10139
execGCP_COMMIT(Signal * signal)10140 void Dbdih::execGCP_COMMIT(Signal* signal)
10141 {
10142 jamEntry();
10143 CRASH_INSERTION(7006);
10144
10145 GCPCommit * req = (GCPCommit*)signal->getDataPtr();
10146 Uint32 masterNodeId = req->nodeId;
10147 Uint32 gci_hi = req->gci_hi;
10148 Uint32 gci_lo = req->gci_lo;
10149
10150 if (unlikely(signal->getLength() < GCPCommit::SignalLength))
10151 {
10152 gci_lo = 0;
10153 ndbassert(!ndb_check_micro_gcp(getNodeInfo(masterNodeId).m_version));
10154 }
10155 Uint64 gci = gci_lo | (Uint64(gci_hi) << 32);
10156
10157 #ifdef ERROR_INSERT
10158 if (ERROR_INSERTED(7213))
10159 {
10160 ndbout_c("err 7213 killing %d", c_error_insert_extra);
10161 Uint32 save = signal->theData[0];
10162 signal->theData[0] = 5048;
10163 sendSignal(numberToRef(DBLQH, c_error_insert_extra),
10164 GSN_NDB_TAMPER, signal, 1, JBB);
10165 signal->theData[0] = save;
10166 CLEAR_ERROR_INSERT_VALUE;
10167
10168 signal->theData[0] = 9999;
10169 sendSignal(numberToRef(CMVMI, c_error_insert_extra),
10170 GSN_DUMP_STATE_ORD, signal, 1, JBB);
10171
10172 signal->theData[0] = save;
10173 CLEAR_ERROR_INSERT_VALUE;
10174
10175 return;
10176 }
10177 #endif
10178
10179 Uint32 masterRef = calcDihBlockRef(masterNodeId);
10180 ndbrequire(masterNodeId == cmasterNodeId);
10181 if (isMaster())
10182 {
10183 ndbrequire(m_micro_gcp.m_master.m_state == MicroGcp::M_GCP_COMMIT);
10184 }
10185
10186 if (m_micro_gcp.m_state == MicroGcp::M_GCP_COMMIT)
10187 {
10188 jam();
10189 /**
10190 * This must be master take over
10191 * Commit is already ongoing...
10192 */
10193 ndbrequire(m_micro_gcp.m_current_gci == gci);
10194 m_micro_gcp.m_master_ref = masterRef;
10195 return;
10196 }
10197
10198 if (m_micro_gcp.m_current_gci == gci)
10199 {
10200 jam();
10201 /**
10202 * This must be master take over
10203 * Commit has already completed
10204 */
10205 m_micro_gcp.m_master_ref = masterRef;
10206
10207 GCPNodeFinished* conf = (GCPNodeFinished*)signal->getDataPtrSend();
10208 conf->nodeId = cownNodeId;
10209 conf->gci_hi = (Uint32)(m_micro_gcp.m_old_gci >> 32);
10210 conf->failno = cfailurenr;
10211 conf->gci_lo = (Uint32)(m_micro_gcp.m_old_gci & 0xFFFFFFFF);
10212 sendSignal(masterRef, GSN_GCP_NODEFINISH, signal,
10213 GCPNodeFinished::SignalLength, JBB);
10214 return;
10215 }
10216
10217 ndbrequire(m_micro_gcp.m_new_gci == gci);
10218 ndbrequire(m_micro_gcp.m_state == MicroGcp::M_GCP_PREPARE);
10219 m_micro_gcp.m_state = MicroGcp::M_GCP_COMMIT;
10220 m_micro_gcp.m_master_ref = calcDihBlockRef(masterNodeId);
10221
10222 m_micro_gcp.m_lock.write_lock();
10223 m_micro_gcp.m_old_gci = m_micro_gcp.m_current_gci;
10224 m_micro_gcp.m_current_gci = gci;
10225 cgckptflag = false;
10226 m_micro_gcp.m_lock.write_unlock();
10227
10228 for (Uint32 i = 0; i < c_diverify_queue_cnt; i++)
10229 {
10230 jam();
10231 c_diverify_queue[i].m_empty_done = 0;
10232 emptyverificbuffer(signal, i, true);
10233 }
10234
10235 GCPNoMoreTrans* req2 = (GCPNoMoreTrans*)signal->getDataPtrSend();
10236 req2->senderRef = reference();
10237 req2->senderData = calcDihBlockRef(masterNodeId);
10238 req2->gci_hi = (Uint32)(m_micro_gcp.m_old_gci >> 32);
10239 req2->gci_lo = (Uint32)(m_micro_gcp.m_old_gci & 0xFFFFFFFF);
10240 sendSignal(clocaltcblockref, GSN_GCP_NOMORETRANS, signal,
10241 GCPNoMoreTrans::SignalLength, JBB);
10242 return;
10243 }//Dbdih::execGCP_COMMIT()
10244
execGCP_TCFINISHED(Signal * signal)10245 void Dbdih::execGCP_TCFINISHED(Signal* signal)
10246 {
10247 jamEntry();
10248 CRASH_INSERTION(7007);
10249 GCPTCFinished* conf = (GCPTCFinished*)signal->getDataPtr();
10250 Uint32 retRef = conf->senderData;
10251 Uint32 gci_hi = conf->gci_hi;
10252 Uint32 gci_lo = conf->gci_lo;
10253 Uint64 gci = gci_lo | (Uint64(gci_hi) << 32);
10254 ndbrequire(gci == m_micro_gcp.m_old_gci);
10255
10256 if (ERROR_INSERTED(7181) || ERROR_INSERTED(7182))
10257 {
10258 c_error_7181_ref = retRef; // Save ref
10259 ndbout_c("killing %d", refToNode(cmasterdihref));
10260 signal->theData[0] = 9999;
10261 sendSignal(numberToRef(CMVMI, refToNode(cmasterdihref)),
10262 GSN_NDB_TAMPER, signal, 1, JBB);
10263 return;
10264 }
10265
10266 #ifdef ERROR_INSERT
10267 if (ERROR_INSERTED(7214))
10268 {
10269 ndbout_c("err 7214 killing %d", c_error_insert_extra);
10270 Uint32 save = signal->theData[0];
10271 signal->theData[0] = 9999;
10272 sendSignal(numberToRef(CMVMI, c_error_insert_extra),
10273 GSN_NDB_TAMPER, signal, 1, JBB);
10274 signal->theData[0] = save;
10275 CLEAR_ERROR_INSERT_VALUE;
10276 }
10277 #endif
10278
10279 #ifdef GCP_TIMER_HACK
10280 NdbTick_getMicroTimer(&globalData.gcp_timer_commit[1]);
10281 #endif
10282
10283 ndbrequire(m_micro_gcp.m_state == MicroGcp::M_GCP_COMMIT);
10284
10285 /**
10286 * Make sure that each LQH gets scheduled, so that they don't get out of sync
10287 * wrt to SUB_GCP_COMPLETE_REP
10288 */
10289 Callback cb;
10290 cb.m_callbackData = 10;
10291 cb.m_callbackFunction = safe_cast(&Dbdih::execGCP_TCFINISHED_sync_conf);
10292 Uint32 path[] = { DBLQH, SUMA, 0 };
10293 synchronize_path(signal, path, cb);
10294 }//Dbdih::execGCP_TCFINISHED()
10295
10296 void
execGCP_TCFINISHED_sync_conf(Signal * signal,Uint32 cb,Uint32 err)10297 Dbdih::execGCP_TCFINISHED_sync_conf(Signal* signal, Uint32 cb, Uint32 err)
10298 {
10299 ndbrequire(m_micro_gcp.m_state == MicroGcp::M_GCP_COMMIT);
10300
10301 m_micro_gcp.m_state = MicroGcp::M_GCP_COMMITTED;
10302 Uint32 retRef = m_micro_gcp.m_master_ref;
10303
10304 GCPNodeFinished* conf2 = (GCPNodeFinished*)signal->getDataPtrSend();
10305 conf2->nodeId = cownNodeId;
10306 conf2->gci_hi = (Uint32)(m_micro_gcp.m_old_gci >> 32);
10307 conf2->failno = cfailurenr;
10308 conf2->gci_lo = (Uint32)(m_micro_gcp.m_old_gci & 0xFFFFFFFF);
10309 sendSignal(retRef, GSN_GCP_NODEFINISH, signal,
10310 GCPNodeFinished::SignalLength, JBB);
10311 }
10312
10313 void
execSUB_GCP_COMPLETE_REP(Signal * signal)10314 Dbdih::execSUB_GCP_COMPLETE_REP(Signal* signal)
10315 {
10316 jamEntry();
10317
10318 CRASH_INSERTION(7228);
10319 SubGcpCompleteRep rep = * (SubGcpCompleteRep*)signal->getDataPtr();
10320 if (isMaster())
10321 {
10322 ndbrequire(m_micro_gcp.m_master.m_state == MicroGcp::M_GCP_COMPLETE);
10323 }
10324
10325 Uint32 masterRef = rep.senderRef;
10326 if (m_micro_gcp.m_state == MicroGcp::M_GCP_IDLE)
10327 {
10328 jam();
10329 /**
10330 * This must be master take over
10331 * signal has already arrived
10332 */
10333 m_micro_gcp.m_master_ref = masterRef;
10334 goto reply;
10335 }
10336
10337 ndbrequire(m_micro_gcp.m_state == MicroGcp::M_GCP_COMMITTED);
10338 m_micro_gcp.m_state = MicroGcp::M_GCP_IDLE;
10339
10340 /**
10341 * To handle multiple LQH instances, this need to be passed though
10342 * each LQH...(so that no fire-trig-ord can arrive "too" late)
10343 */
10344 sendSignal(DBLQH_REF, GSN_SUB_GCP_COMPLETE_REP, signal,
10345 signal->length(), JBB);
10346 reply:
10347 Uint32 nodeId = refToNode(masterRef);
10348 if (!ndbd_dih_sub_gcp_complete_ack(getNodeInfo(nodeId).m_version))
10349 {
10350 jam();
10351 return;
10352 }
10353
10354 SubGcpCompleteAck* ack = CAST_PTR(SubGcpCompleteAck,
10355 signal->getDataPtrSend());
10356 ack->rep = rep;
10357 ack->rep.senderRef = reference();
10358 sendSignal(masterRef, GSN_SUB_GCP_COMPLETE_ACK,
10359 signal, SubGcpCompleteAck::SignalLength, JBA);
10360 }
10361
10362 /*****************************************************************************/
10363 //****** RECEIVING TAMPER REQUEST FROM NDBAPI ******
10364 /*****************************************************************************/
execDIHNDBTAMPER(Signal * signal)10365 void Dbdih::execDIHNDBTAMPER(Signal* signal)
10366 {
10367 jamEntry();
10368 Uint32 tcgcpblocked = signal->theData[0];
10369 /* ACTION TO BE TAKEN BY DIH */
10370 Uint32 tuserpointer = signal->theData[1];
10371 BlockReference tuserblockref = signal->theData[2];
10372 switch (tcgcpblocked) {
10373 case 1:
10374 jam();
10375 if (isMaster()) {
10376 jam();
10377 cgcpOrderBlocked = 1;
10378 } else {
10379 jam();
10380 /* TRANSFER THE REQUEST */
10381 /* TO MASTER*/
10382 signal->theData[0] = tcgcpblocked;
10383 signal->theData[1] = tuserpointer;
10384 signal->theData[2] = tuserblockref;
10385 sendSignal(cmasterdihref, GSN_DIHNDBTAMPER, signal, 3, JBB);
10386 }//if
10387 break;
10388 case 2:
10389 jam();
10390 if (isMaster()) {
10391 jam();
10392 cgcpOrderBlocked = 0;
10393 } else {
10394 jam();
10395 /* TRANSFER THE REQUEST */
10396 /* TO MASTER*/
10397 signal->theData[0] = tcgcpblocked;
10398 signal->theData[1] = tuserpointer;
10399 signal->theData[2] = tuserblockref;
10400 sendSignal(cmasterdihref, GSN_DIHNDBTAMPER, signal, 3, JBB);
10401 }//if
10402 break;
10403 case 3:
10404 ndbrequire(false);
10405 return;
10406 break;
10407 case 4:
10408 jam();
10409 signal->theData[0] = tuserpointer;
10410 signal->theData[1] = crestartGci;
10411 sendSignal(tuserblockref, GSN_DIHNDBTAMPER, signal, 2, JBB);
10412 break;
10413 #ifdef ERROR_INSERT
10414 case 5:
10415 jam();
10416 if (tuserpointer >= 30000 && tuserpointer < 40000) {
10417 jam();
10418 /*--------------------------------------------------------------------*/
10419 // Redirect errors to master DIH in the 30000-range.
10420 /*--------------------------------------------------------------------*/
10421 tuserblockref = cmasterdihref;
10422 tuserpointer -= 30000;
10423 signal->theData[0] = 5;
10424 signal->theData[1] = tuserpointer;
10425 signal->theData[2] = tuserblockref;
10426 sendSignal(tuserblockref, GSN_DIHNDBTAMPER, signal, 3, JBB);
10427 return;
10428 } else if (tuserpointer >= 40000 && tuserpointer < 50000) {
10429 NodeRecordPtr localNodeptr;
10430 Uint32 Tfound = 0;
10431 jam();
10432 /*--------------------------------------------------------------------*/
10433 // Redirect errors to non-master DIH in the 40000-range.
10434 /*--------------------------------------------------------------------*/
10435 tuserpointer -= 40000;
10436 for (localNodeptr.i = 1;
10437 localNodeptr.i < MAX_NDB_NODES;
10438 localNodeptr.i++) {
10439 jam();
10440 ptrAss(localNodeptr, nodeRecord);
10441 if ((localNodeptr.p->nodeStatus == NodeRecord::ALIVE) &&
10442 (localNodeptr.i != cmasterNodeId)) {
10443 jam();
10444 tuserblockref = calcDihBlockRef(localNodeptr.i);
10445 Tfound = 1;
10446 break;
10447 }//if
10448 }//for
10449 if (Tfound == 0) {
10450 jam();
10451 /*-------------------------------------------------------------------*/
10452 // Ignore since no non-master node existed.
10453 /*-------------------------------------------------------------------*/
10454 return;
10455 }//if
10456 signal->theData[0] = 5;
10457 signal->theData[1] = tuserpointer;
10458 signal->theData[2] = tuserblockref;
10459 sendSignal(tuserblockref, GSN_DIHNDBTAMPER, signal, 3, JBB);
10460 return;
10461 } else {
10462 jam();
10463 return;
10464 }//if
10465 break;
10466 #endif
10467 default:
10468 ndbrequire(false);
10469 break;
10470 }//switch
10471 return;
10472 }//Dbdih::execDIHNDBTAMPER()
10473
10474 /*****************************************************************************/
10475 /* ********** FILE HANDLING MODULE *************/
10476 /*****************************************************************************/
copyGciLab(Signal * signal,CopyGCIReq::CopyReason reason)10477 void Dbdih::copyGciLab(Signal* signal, CopyGCIReq::CopyReason reason)
10478 {
10479 if(c_copyGCIMaster.m_copyReason != CopyGCIReq::IDLE)
10480 {
10481 jam();
10482 /**
10483 * There can currently only be two waiting
10484 */
10485 for (Uint32 i = 0; i<CopyGCIMaster::WAIT_CNT; i++)
10486 {
10487 jam();
10488 if (c_copyGCIMaster.m_waiting[i] == CopyGCIReq::IDLE)
10489 {
10490 jam();
10491 c_copyGCIMaster.m_waiting[i] = reason;
10492 return;
10493 }
10494 }
10495
10496 /**
10497 * Code should *not* request more than WAIT_CNT copy-gci's
10498 * so this is an internal error
10499 */
10500 ndbrequire(false);
10501 return;
10502 }
10503 c_copyGCIMaster.m_copyReason = reason;
10504
10505 #ifdef ERROR_INSERT
10506 if (reason == CopyGCIReq::GLOBAL_CHECKPOINT && ERROR_INSERTED(7189))
10507 {
10508 sendToRandomNodes("COPY_GCI",
10509 signal, &c_COPY_GCIREQ_Counter, &Dbdih::sendCOPY_GCIREQ);
10510 signal->theData[0] = 9999;
10511 sendSignalWithDelay(CMVMI_REF, GSN_NDB_TAMPER, signal, 1000, 1);
10512 return;
10513 }
10514 #endif
10515
10516 if (reason == CopyGCIReq::RESTART_NR)
10517 {
10518 jam();
10519 if (c_nodeStartMaster.startNode != RNIL)
10520 {
10521 jam();
10522 c_COPY_GCIREQ_Counter.clearWaitingFor();
10523 c_COPY_GCIREQ_Counter.setWaitingFor(c_nodeStartMaster.startNode);
10524 sendCOPY_GCIREQ(signal, c_nodeStartMaster.startNode, RNIL);
10525 return;
10526 }
10527 else
10528 {
10529 jam();
10530 reason = c_copyGCIMaster.m_copyReason = c_copyGCIMaster.m_waiting[0];
10531 for (Uint32 i = 1; i<CopyGCIMaster::WAIT_CNT; i++)
10532 {
10533 jam();
10534 c_copyGCIMaster.m_waiting[i-1] = c_copyGCIMaster.m_waiting[i];
10535 }
10536 c_copyGCIMaster.m_waiting[CopyGCIMaster::WAIT_CNT-1] =
10537 CopyGCIReq::IDLE;
10538
10539 if (reason == CopyGCIReq::IDLE)
10540 {
10541 jam();
10542 return;
10543 }
10544 // fall-through
10545 }
10546 }
10547
10548 sendLoopMacro(COPY_GCIREQ, sendCOPY_GCIREQ, RNIL);
10549
10550 }//Dbdih::copyGciLab()
10551
10552 /* ------------------------------------------------------------------------- */
10553 /* COPY_GCICONF RESPONSE TO COPY_GCIREQ */
10554 /* ------------------------------------------------------------------------- */
execCOPY_GCICONF(Signal * signal)10555 void Dbdih::execCOPY_GCICONF(Signal* signal)
10556 {
10557 jamEntry();
10558 NodeRecordPtr senderNodePtr;
10559 senderNodePtr.i = signal->theData[0];
10560 receiveLoopMacro(COPY_GCIREQ, senderNodePtr.i);
10561
10562 CopyGCIReq::CopyReason current = c_copyGCIMaster.m_copyReason;
10563 c_copyGCIMaster.m_copyReason = CopyGCIReq::IDLE;
10564
10565 bool ok = false;
10566 switch(current){
10567 case CopyGCIReq::RESTART:{
10568 ok = true;
10569 jam();
10570 DictStartReq * req = (DictStartReq*)&signal->theData[0];
10571 req->restartGci = SYSFILE->newestRestorableGCI;
10572 req->senderRef = reference();
10573 sendSignal(cdictblockref, GSN_DICTSTARTREQ,
10574 signal, DictStartReq::SignalLength, JBB);
10575 break;
10576 }
10577 case CopyGCIReq::LOCAL_CHECKPOINT:{
10578 ok = true;
10579 jam();
10580 startLcpRoundLab(signal);
10581 break;
10582 }
10583 case CopyGCIReq::GLOBAL_CHECKPOINT:
10584 {
10585 ok = true;
10586 jam();
10587
10588 /************************************************************************/
10589 // Report the event that a global checkpoint has completed.
10590 /************************************************************************/
10591 signal->setTrace(0);
10592 signal->theData[0] = NDB_LE_GlobalCheckpointCompleted; //Event type
10593 signal->theData[1] = m_gcp_save.m_gci;
10594 sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB);
10595
10596 c_newest_restorable_gci = m_gcp_save.m_gci;
10597 #ifdef ERROR_INSERT
10598 if ((ERROR_INSERTED(7222) || ERROR_INSERTED(7223)) &&
10599 !Sysfile::getLCPOngoing(SYSFILE->systemRestartBits) &&
10600 c_newest_restorable_gci >= c_lcpState.lcpStopGcp)
10601 {
10602 if (ERROR_INSERTED(7222))
10603 {
10604 sendLoopMacro(COPY_TABREQ, nullRoutine, 0);
10605 NodeReceiverGroup rg(CMVMI, c_COPY_TABREQ_Counter);
10606
10607 rg.m_nodes.clear(getOwnNodeId());
10608 if (!rg.m_nodes.isclear())
10609 {
10610 signal->theData[0] = 9999;
10611 sendSignal(rg, GSN_NDB_TAMPER, signal, 1, JBA);
10612 }
10613 signal->theData[0] = 9999;
10614 sendSignalWithDelay(CMVMI_REF, GSN_NDB_TAMPER, signal, 1000, 1);
10615
10616 signal->theData[0] = 932;
10617 EXECUTE_DIRECT(QMGR, GSN_NDB_TAMPER, signal, 1);
10618
10619 return;
10620 }
10621 if (ERROR_INSERTED(7223))
10622 {
10623 CLEAR_ERROR_INSERT_VALUE;
10624 signal->theData[0] = 9999;
10625 sendSignal(numberToRef(CMVMI, c_error_insert_extra)
10626 , GSN_NDB_TAMPER, signal, 1, JBA);
10627 }
10628 }
10629 #endif
10630
10631 if (m_micro_gcp.m_enabled == false)
10632 {
10633 jam();
10634 /**
10635 * Running old protocol
10636 */
10637 signal->theData[0] = DihContinueB::ZSTART_GCP;
10638 sendSignal(reference(), GSN_CONTINUEB, signal, 1, JBB);
10639 }
10640 m_gcp_save.m_master.m_state = GcpSave::GCP_SAVE_IDLE;
10641
10642 CRASH_INSERTION(7004);
10643 emptyWaitGCPMasterQueue(signal,
10644 Uint64(m_gcp_save.m_gci) << 32,
10645 c_waitGCPMasterList);
10646 break;
10647 }
10648 case CopyGCIReq::INITIAL_START_COMPLETED:
10649 ok = true;
10650 jam();
10651 initialStartCompletedLab(signal);
10652 break;
10653 case CopyGCIReq::IDLE:
10654 ok = false;
10655 jam();
10656 break;
10657 case CopyGCIReq::RESTART_NR:
10658 ok = true;
10659 jam();
10660 startme_copygci_conf(signal);
10661 break;
10662 }
10663 ndbrequire(ok);
10664
10665
10666 c_copyGCIMaster.m_copyReason = c_copyGCIMaster.m_waiting[0];
10667 for (Uint32 i = 1; i<CopyGCIMaster::WAIT_CNT; i++)
10668 {
10669 jam();
10670 c_copyGCIMaster.m_waiting[i-1] = c_copyGCIMaster.m_waiting[i];
10671 }
10672 c_copyGCIMaster.m_waiting[CopyGCIMaster::WAIT_CNT-1] = CopyGCIReq::IDLE;
10673
10674 /**
10675 * Pop queue
10676 */
10677 if(c_copyGCIMaster.m_copyReason != CopyGCIReq::IDLE)
10678 {
10679 jam();
10680
10681 signal->theData[0] = DihContinueB::ZCOPY_GCI;
10682 signal->theData[1] = c_copyGCIMaster.m_copyReason;
10683 sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
10684 }
10685 }//Dbdih::execCOPY_GCICONF()
10686
invalidateLcpInfoAfterSr(Signal * signal)10687 void Dbdih::invalidateLcpInfoAfterSr(Signal* signal)
10688 {
10689 NodeRecordPtr nodePtr;
10690 SYSFILE->latestLCP_ID--;
10691 Sysfile::clearLCPOngoing(SYSFILE->systemRestartBits);
10692 for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
10693 jam();
10694 ptrAss(nodePtr, nodeRecord);
10695 if (!NdbNodeBitmask::get(SYSFILE->lcpActive, nodePtr.i)){
10696 jam();
10697 /* ------------------------------------------------------------------- */
10698 // The node was not active in the local checkpoint.
10699 // To avoid that we step the active status too fast to not
10700 // active we step back one step from Sysfile::NS_ActiveMissed_x.
10701 /* ------------------------------------------------------------------- */
10702 switch (nodePtr.p->activeStatus) {
10703 case Sysfile::NS_Active:
10704 nodePtr.p->activeStatus = Sysfile::NS_Active;
10705 break;
10706 case Sysfile::NS_ActiveMissed_1:
10707 jam();
10708 nodePtr.p->activeStatus = Sysfile::NS_Active;
10709 break;
10710 case Sysfile::NS_ActiveMissed_2:
10711 jam();
10712 nodePtr.p->activeStatus = Sysfile::NS_ActiveMissed_1;
10713 break;
10714 default:
10715 jam();
10716 break;
10717 }//switch
10718 }
10719 else
10720 {
10721 jam();
10722 ndbassert(nodePtr.p->activeStatus == Sysfile::NS_Active);
10723 }
10724 }//for
10725 setNodeRestartInfoBits(signal);
10726 }//Dbdih::invalidateLcpInfoAfterSr()
10727
10728 /* ------------------------------------------------------------------------- */
10729 /* THE NEXT STEP IS TO WRITE THE FILE. */
10730 /* ------------------------------------------------------------------------- */
openingCopyGciSkipInitLab(Signal * signal,FileRecordPtr filePtr)10731 void Dbdih::openingCopyGciSkipInitLab(Signal* signal, FileRecordPtr filePtr)
10732 {
10733 writeRestorableGci(signal, filePtr);
10734 filePtr.p->reqStatus = FileRecord::WRITING_COPY_GCI;
10735 return;
10736 }//Dbdih::openingCopyGciSkipInitLab()
10737
writingCopyGciLab(Signal * signal,FileRecordPtr filePtr)10738 void Dbdih::writingCopyGciLab(Signal* signal, FileRecordPtr filePtr)
10739 {
10740 /* ----------------------------------------------------------------------- */
10741 /* WE HAVE NOW WRITTEN THIS FILE. WRITE ALSO NEXT FILE IF THIS IS NOT */
10742 /* ALREADY THE LAST. */
10743 /* ----------------------------------------------------------------------- */
10744 CRASH_INSERTION(7219);
10745
10746 filePtr.p->reqStatus = FileRecord::IDLE;
10747 if (filePtr.i == crestartInfoFile[0]) {
10748 jam();
10749 filePtr.i = crestartInfoFile[1];
10750 ptrCheckGuard(filePtr, cfileFileSize, fileRecord);
10751 if (filePtr.p->fileStatus == FileRecord::OPEN) {
10752 jam();
10753 openingCopyGciSkipInitLab(signal, filePtr);
10754 return;
10755 }//if
10756 openFileRw(signal, filePtr);
10757 filePtr.p->reqStatus = FileRecord::OPENING_COPY_GCI;
10758 return;
10759 }//if
10760 /* ----------------------------------------------------------------------- */
10761 /* WE HAVE COMPLETED WRITING BOTH FILES SUCCESSFULLY. NOW REPORT OUR */
10762 /* SUCCESS TO THE MASTER DIH. BUT FIRST WE NEED TO RESET A NUMBER OF */
10763 /* VARIABLES USED BY THE LOCAL CHECKPOINT PROCESS (ONLY IF TRIGGERED */
10764 /* BY LOCAL CHECKPOINT PROCESS. */
10765 /* ----------------------------------------------------------------------- */
10766 CopyGCIReq::CopyReason reason = c_copyGCISlave.m_copyReason;
10767
10768 if (reason == CopyGCIReq::GLOBAL_CHECKPOINT) {
10769 jam();
10770 m_gcp_save.m_state = GcpSave::GCP_SAVE_IDLE;
10771
10772 SubGcpCompleteRep * const rep = (SubGcpCompleteRep*)signal->getDataPtr();
10773 rep->gci_hi = SYSFILE->newestRestorableGCI;
10774 rep->gci_lo = 0;
10775 rep->flags = SubGcpCompleteRep::ON_DISK;
10776
10777 sendSignal(LGMAN_REF, GSN_SUB_GCP_COMPLETE_REP, signal,
10778 SubGcpCompleteRep::SignalLength, JBB);
10779
10780 jamEntry();
10781
10782 if (m_micro_gcp.m_enabled == false)
10783 {
10784 jam();
10785 sendSignal(DBLQH_REF, GSN_SUB_GCP_COMPLETE_REP, signal,
10786 SubGcpCompleteRep::SignalLength, JBB);
10787 jamEntry();
10788 ndbrequire(m_micro_gcp.m_state == MicroGcp::M_GCP_COMMITTED);
10789 m_micro_gcp.m_state = MicroGcp::M_GCP_IDLE;
10790
10791 CRASH_INSERTION(7190);
10792 }
10793
10794 #ifdef GCP_TIMER_HACK
10795 NdbTick_getMicroTimer(&globalData.gcp_timer_copygci[1]);
10796
10797 // this is last timer point so we send local report here
10798 {
10799 const GlobalData& g = globalData;
10800 Uint32 ms_commit = NdbTick_getMicrosPassed(
10801 g.gcp_timer_commit[0], g.gcp_timer_commit[1]) / 1000;
10802 Uint32 ms_save = NdbTick_getMicrosPassed(
10803 g.gcp_timer_save[0], g.gcp_timer_save[1]) / 1000;
10804 Uint32 ms_copygci = NdbTick_getMicrosPassed(
10805 g.gcp_timer_copygci[0], g.gcp_timer_copygci[1]) / 1000;
10806
10807 Uint32 ms_total = ms_commit + ms_save + ms_copygci;
10808
10809 // random formula to report excessive duration
10810 bool report =
10811 g.gcp_timer_limit != 0 ?
10812 (ms_total > g.gcp_timer_limit) :
10813 (ms_total > 3000 * (1 + cgcpDelay / 1000));
10814 if (report)
10815 infoEvent("GCP %u ms: total:%u commit:%u save:%u copygci:%u",
10816 coldgcp, ms_total, ms_commit, ms_save, ms_copygci);
10817 }
10818 #endif
10819 }
10820
10821 jam();
10822 c_copyGCISlave.m_copyReason = CopyGCIReq::IDLE;
10823
10824 if (reason == CopyGCIReq::GLOBAL_CHECKPOINT)
10825 {
10826 jam();
10827 signal->theData[0] = c_copyGCISlave.m_senderData;
10828 sendSignal(m_gcp_save.m_master_ref, GSN_COPY_GCICONF, signal, 1, JBB);
10829 }
10830 else if (c_copyGCISlave.m_senderRef == cmasterdihref)
10831 {
10832 jam();
10833 /**
10834 * Only if same master
10835 */
10836 signal->theData[0] = c_copyGCISlave.m_senderData;
10837 sendSignal(c_copyGCISlave.m_senderRef, GSN_COPY_GCICONF, signal, 1, JBB);
10838 }
10839 return;
10840 }//Dbdih::writingCopyGciLab()
10841
execSTART_LCP_REQ(Signal * signal)10842 void Dbdih::execSTART_LCP_REQ(Signal* signal)
10843 {
10844 jamEntry();
10845 StartLcpReq * req = (StartLcpReq*)signal->getDataPtr();
10846
10847 /**
10848 * Init m_local_lcp_state
10849 */
10850 m_local_lcp_state.init(req);
10851
10852 CRASH_INSERTION2(7021, isMaster());
10853 CRASH_INSERTION2(7022, !isMaster());
10854
10855 ndbrequire(c_lcpState.m_masterLcpDihRef == req->senderRef);
10856 c_lcpState.m_participatingDIH = req->participatingDIH;
10857 c_lcpState.m_participatingLQH = req->participatingLQH;
10858
10859 c_lcpState.m_LCP_COMPLETE_REP_Counter_LQH = req->participatingLQH;
10860 if(isMaster())
10861 {
10862 jam();
10863 c_lcpState.m_LCP_COMPLETE_REP_Counter_DIH = req->participatingDIH;
10864 }
10865 else
10866 {
10867 jam();
10868 c_lcpState.m_LCP_COMPLETE_REP_Counter_DIH.clearWaitingFor();
10869 }
10870
10871 c_lcpState.m_LCP_COMPLETE_REP_From_Master_Received = false;
10872
10873 c_lcpState.setLcpStatus(LCP_INIT_TABLES, __LINE__);
10874
10875 signal->theData[0] = DihContinueB::ZINIT_LCP;
10876 signal->theData[1] = c_lcpState.m_masterLcpDihRef;
10877 signal->theData[2] = 0;
10878 sendSignal(reference(), GSN_CONTINUEB, signal, 3, JBB);
10879 }
10880
10881 void
reset()10882 Dbdih::LocalLCPState::reset()
10883 {
10884 m_state = LS_INITIAL;
10885 m_keep_gci = RNIL;
10886 m_stop_gci = RNIL;
10887 }
10888
10889 void
init(const StartLcpReq * req)10890 Dbdih::LocalLCPState::init(const StartLcpReq * req)
10891 {
10892 m_state = LS_RUNNING;
10893 m_start_lcp_req = *req;
10894 m_keep_gci = ~(Uint32)0;
10895 m_stop_gci = 0;
10896 }
10897
10898 void
lcp_frag_rep(const LcpFragRep * rep)10899 Dbdih::LocalLCPState::lcp_frag_rep(const LcpFragRep * rep)
10900 {
10901 assert(m_state == LS_RUNNING);
10902 if (rep->maxGciCompleted < m_keep_gci)
10903 {
10904 m_keep_gci = rep->maxGciCompleted;
10905 }
10906
10907 if (rep->maxGciStarted > m_stop_gci)
10908 {
10909 m_stop_gci = rep->maxGciStarted;
10910 }
10911 }
10912
10913 void
lcp_complete_rep(Uint32 gci)10914 Dbdih::LocalLCPState::lcp_complete_rep(Uint32 gci)
10915 {
10916 assert(m_state == LS_RUNNING);
10917 m_state = LS_COMPLETE;
10918 if (gci > m_stop_gci)
10919 m_stop_gci = gci;
10920 }
10921
10922 bool
check_cut_log_tail(Uint32 gci) const10923 Dbdih::LocalLCPState::check_cut_log_tail(Uint32 gci) const
10924 {
10925 if (m_state == LS_COMPLETE)
10926 {
10927 if (gci >= m_stop_gci)
10928 return true;
10929 }
10930 return false;
10931 }
10932
initLcpLab(Signal * signal,Uint32 senderRef,Uint32 tableId)10933 void Dbdih::initLcpLab(Signal* signal, Uint32 senderRef, Uint32 tableId)
10934 {
10935 TabRecordPtr tabPtr;
10936 tabPtr.i = tableId;
10937
10938 if(c_lcpState.m_masterLcpDihRef != senderRef){
10939 jam();
10940 /**
10941 * This is LCP master takeover
10942 */
10943 #ifdef VM_TRACE
10944 g_eventLogger->info("initLcpLab aborted due to LCP master takeover - 1");
10945 #endif
10946 c_lcpState.setLcpStatus(LCP_STATUS_IDLE, __LINE__);
10947 sendMASTER_LCPCONF(signal);
10948 return;
10949 }
10950
10951 if(c_lcpState.m_masterLcpDihRef != cmasterdihref){
10952 jam();
10953 /**
10954 * Master take over but has not yet received MASTER_LCPREQ
10955 */
10956 #ifdef VM_TRACE
10957 g_eventLogger->info("initLcpLab aborted due to LCP master takeover - 2");
10958 #endif
10959 return;
10960 }
10961
10962 //const Uint32 lcpId = SYSFILE->latestLCP_ID;
10963
10964 for(; tabPtr.i < ctabFileSize; tabPtr.i++){
10965
10966 ptrAss(tabPtr, tabRecord);
10967
10968 if (tabPtr.p->tabStatus != TabRecord::TS_ACTIVE)
10969 {
10970 jam();
10971 tabPtr.p->tabLcpStatus = TabRecord::TLS_COMPLETED;
10972 continue;
10973 }
10974
10975 if (tabPtr.p->tabStorage != TabRecord::ST_NORMAL) {
10976 /**
10977 * Table is not logged
10978 */
10979 jam();
10980 tabPtr.p->tabLcpStatus = TabRecord::TLS_COMPLETED;
10981 continue;
10982 }
10983
10984 if (tabPtr.p->tabCopyStatus != TabRecord::CS_IDLE) {
10985 /* ----------------------------------------------------------------- */
10986 // We protect the updates of table data structures by this variable.
10987 /* ----------------------------------------------------------------- */
10988 jam();
10989 signal->theData[0] = DihContinueB::ZINIT_LCP;
10990 signal->theData[1] = senderRef;
10991 signal->theData[2] = tabPtr.i;
10992 sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 20, 3);
10993 return;
10994 }//if
10995
10996 /**
10997 * Found a table
10998 */
10999 tabPtr.p->tabLcpStatus = TabRecord::TLS_ACTIVE;
11000
11001 /**
11002 * For each fragment
11003 */
11004 for (Uint32 fragId = 0; fragId < tabPtr.p->totalfragments; fragId++) {
11005 jam();
11006 FragmentstorePtr fragPtr;
11007 getFragstore(tabPtr.p, fragId, fragPtr);
11008
11009 /**
11010 * For each of replica record
11011 */
11012 Uint32 replicaCount = 0;
11013 ReplicaRecordPtr replicaPtr;
11014 for(replicaPtr.i = fragPtr.p->storedReplicas; replicaPtr.i != RNIL;
11015 replicaPtr.i = replicaPtr.p->nextReplica) {
11016 jam();
11017
11018 ptrCheckGuard(replicaPtr, creplicaFileSize, replicaRecord);
11019 Uint32 nodeId = replicaPtr.p->procNode;
11020 if(c_lcpState.m_participatingLQH.get(nodeId)){
11021 jam();
11022 replicaCount++;
11023 replicaPtr.p->lcpOngoingFlag = true;
11024 }
11025 }
11026
11027 fragPtr.p->noLcpReplicas = replicaCount;
11028 }//for
11029
11030 signal->theData[0] = DihContinueB::ZINIT_LCP;
11031 signal->theData[1] = senderRef;
11032 signal->theData[2] = tabPtr.i + 1;
11033 sendSignal(reference(), GSN_CONTINUEB, signal, 3, JBB);
11034 return;
11035 }
11036
11037 /**
11038 * No more tables
11039 */
11040 jam();
11041
11042 if (c_lcpState.m_masterLcpDihRef != reference()){
11043 jam();
11044 ndbrequire(!isMaster());
11045 c_lcpState.setLcpStatus(LCP_STATUS_ACTIVE, __LINE__);
11046 } else {
11047 jam();
11048 ndbrequire(isMaster());
11049 }
11050
11051 CRASH_INSERTION2(7023, isMaster());
11052 CRASH_INSERTION2(7024, !isMaster());
11053
11054 jam();
11055 StartLcpConf * conf = (StartLcpConf*)signal->getDataPtrSend();
11056 conf->senderRef = reference();
11057 sendSignal(c_lcpState.m_masterLcpDihRef, GSN_START_LCP_CONF, signal,
11058 StartLcpConf::SignalLength, JBB);
11059 return;
11060 }//Dbdih::initLcpLab()
11061
11062 /* ------------------------------------------------------------------------- */
11063 /* ERROR HANDLING FOR COPY RESTORABLE GCI FILE. */
11064 /* ------------------------------------------------------------------------- */
openingCopyGciErrorLab(Signal * signal,FileRecordPtr filePtr)11065 void Dbdih::openingCopyGciErrorLab(Signal* signal, FileRecordPtr filePtr)
11066 {
11067 createFileRw(signal, filePtr);
11068 /* ------------------------------------------------------------------------- */
11069 /* ERROR IN OPENING FILE. WE WILL TRY BY CREATING FILE INSTEAD. */
11070 /* ------------------------------------------------------------------------- */
11071 filePtr.p->reqStatus = FileRecord::CREATING_COPY_GCI;
11072 return;
11073 }//Dbdih::openingCopyGciErrorLab()
11074
11075 /* ------------------------------------------------------------------------- */
11076 /* ENTER DICTSTARTCONF WITH */
11077 /* TBLOCKREF */
11078 /* ------------------------------------------------------------------------- */
dictStartConfLab(Signal * signal)11079 void Dbdih::dictStartConfLab(Signal* signal)
11080 {
11081 /* ----------------------------------------------------------------------- */
11082 /* WE HAVE NOW RECEIVED ALL THE TABLES TO RESTART. */
11083 /* ----------------------------------------------------------------------- */
11084 signal->theData[0] = DihContinueB::ZSTART_FRAGMENT;
11085 signal->theData[1] = 0; /* START WITH TABLE 0 */
11086 signal->theData[2] = 0; /* AND FRAGMENT 0 */
11087 sendSignal(reference(), GSN_CONTINUEB, signal, 3, JBB);
11088 return;
11089 }//Dbdih::dictStartConfLab()
11090
11091
openingTableLab(Signal * signal,FileRecordPtr filePtr)11092 void Dbdih::openingTableLab(Signal* signal, FileRecordPtr filePtr)
11093 {
11094 /* ---------------------------------------------------------------------- */
11095 /* SUCCESSFULLY OPENED A FILE. READ THE FIRST PAGE OF THIS FILE. */
11096 /* ---------------------------------------------------------------------- */
11097 TabRecordPtr tabPtr;
11098 PageRecordPtr pagePtr;
11099
11100 tabPtr.i = filePtr.p->tabRef;
11101 ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
11102 tabPtr.p->noPages = 1;
11103 allocpage(pagePtr);
11104 tabPtr.p->pageRef[0] = pagePtr.i;
11105 readTabfile(signal, tabPtr.p, filePtr);
11106 filePtr.p->reqStatus = FileRecord::READING_TABLE;
11107 return;
11108 }//Dbdih::openingTableLab()
11109
openingTableErrorLab(Signal * signal,FileRecordPtr filePtr)11110 void Dbdih::openingTableErrorLab(Signal* signal, FileRecordPtr filePtr)
11111 {
11112 TabRecordPtr tabPtr;
11113 tabPtr.i = filePtr.p->tabRef;
11114 ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
11115 /* ---------------------------------------------------------------------- */
11116 /* WE FAILED IN OPENING A FILE. IF THE FIRST FILE THEN TRY WITH THE */
11117 /* DUPLICATE FILE, OTHERWISE WE REPORT AN ERROR IN THE SYSTEM RESTART. */
11118 /* ---------------------------------------------------------------------- */
11119 if (filePtr.i == tabPtr.p->tabFile[0])
11120 {
11121 filePtr.i = tabPtr.p->tabFile[1];
11122 ptrCheckGuard(filePtr, cfileFileSize, fileRecord);
11123 openFileRw(signal, filePtr);
11124 filePtr.p->reqStatus = FileRecord::OPENING_TABLE;
11125 }
11126 else
11127 {
11128 char buf[256];
11129 BaseString::snprintf(buf, sizeof(buf),
11130 "Error opening DIH schema files for table: %d",
11131 tabPtr.i);
11132 progError(__LINE__, NDBD_EXIT_AFS_NO_SUCH_FILE, buf);
11133 }
11134 }//Dbdih::openingTableErrorLab()
11135
readingTableLab(Signal * signal,FileRecordPtr filePtr)11136 void Dbdih::readingTableLab(Signal* signal, FileRecordPtr filePtr)
11137 {
11138 TabRecordPtr tabPtr;
11139 PageRecordPtr pagePtr;
11140 /* ---------------------------------------------------------------------- */
11141 /* WE HAVE SUCCESSFULLY READ A NUMBER OF PAGES IN THE TABLE FILE. IF */
11142 /* MORE PAGES EXIST IN THE FILE THEN READ ALL PAGES IN THE FILE. */
11143 /* ---------------------------------------------------------------------- */
11144 filePtr.p->reqStatus = FileRecord::IDLE;
11145 tabPtr.i = filePtr.p->tabRef;
11146 ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
11147 pagePtr.i = tabPtr.p->pageRef[0];
11148 ptrCheckGuard(pagePtr, cpageFileSize, pageRecord);
11149 Uint32 noOfStoredPages = pagePtr.p->word[33];
11150 if (tabPtr.p->noPages < noOfStoredPages) {
11151 jam();
11152 ndbrequire(noOfStoredPages <= NDB_ARRAY_SIZE(tabPtr.p->pageRef));
11153 for (Uint32 i = tabPtr.p->noPages; i < noOfStoredPages; i++) {
11154 jam();
11155 allocpage(pagePtr);
11156 tabPtr.p->pageRef[i] = pagePtr.i;
11157 }//for
11158 tabPtr.p->noPages = noOfStoredPages;
11159 readTabfile(signal, tabPtr.p, filePtr);
11160 filePtr.p->reqStatus = FileRecord::READING_TABLE;
11161 } else {
11162 ndbrequire(tabPtr.p->noPages == pagePtr.p->word[33]);
11163 ndbrequire(tabPtr.p->tabCopyStatus == TabRecord::CS_IDLE);
11164 jam();
11165 /* --------------------------------------------------------------------- */
11166 /* WE HAVE READ ALL PAGES. NOW READ FROM PAGES INTO TABLE AND FRAGMENT */
11167 /* DATA STRUCTURES. */
11168 /* --------------------------------------------------------------------- */
11169 tabPtr.p->tabCopyStatus = TabRecord::CS_SR_PHASE1_READ_PAGES;
11170 signal->theData[0] = DihContinueB::ZREAD_PAGES_INTO_TABLE;
11171 signal->theData[1] = tabPtr.i;
11172 sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
11173 return;
11174 }//if
11175 return;
11176 }//Dbdih::readingTableLab()
11177
readTableFromPagesLab(Signal * signal,TabRecordPtr tabPtr)11178 void Dbdih::readTableFromPagesLab(Signal* signal, TabRecordPtr tabPtr)
11179 {
11180 FileRecordPtr filePtr;
11181 filePtr.i = tabPtr.p->tabFile[0];
11182 ptrCheckGuard(filePtr, cfileFileSize, fileRecord);
11183 /* ---------------------------------------------------------------------- */
11184 /* WE HAVE NOW COPIED TO OUR NODE. WE HAVE NOW COMPLETED RESTORING */
11185 /* THIS TABLE. CONTINUE WITH THE NEXT TABLE. */
11186 /* WE ALSO NEED TO CLOSE THE TABLE FILE. */
11187 /* ---------------------------------------------------------------------- */
11188 if (filePtr.p->fileStatus != FileRecord::OPEN) {
11189 jam();
11190 filePtr.i = tabPtr.p->tabFile[1];
11191 ptrCheckGuard(filePtr, cfileFileSize, fileRecord);
11192 }//if
11193 closeFile(signal, filePtr);
11194 filePtr.p->reqStatus = FileRecord::CLOSING_TABLE_SR;
11195 return;
11196 }//Dbdih::readTableFromPagesLab()
11197
closingTableSrLab(Signal * signal,FileRecordPtr filePtr)11198 void Dbdih::closingTableSrLab(Signal* signal, FileRecordPtr filePtr)
11199 {
11200 /**
11201 * Update table/fragment info
11202 */
11203 TabRecordPtr tabPtr;
11204 tabPtr.i = filePtr.p->tabRef;
11205 ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
11206 resetReplicaSr(tabPtr);
11207
11208 signal->theData[0] = DihContinueB::ZCOPY_TABLE;
11209 signal->theData[1] = filePtr.p->tabRef;
11210 sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
11211
11212 return;
11213 }//Dbdih::closingTableSrLab()
11214
11215 void
execDIH_GET_TABINFO_REQ(Signal * signal)11216 Dbdih::execDIH_GET_TABINFO_REQ(Signal* signal)
11217 {
11218 jamEntry();
11219
11220 DihGetTabInfoReq req = * (DihGetTabInfoReq*)signal->getDataPtr();
11221
11222 Uint32 err = 0;
11223 do
11224 {
11225 TabRecordPtr tabPtr;
11226 tabPtr.i = req.tableId;
11227 ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
11228
11229 if (tabPtr.p->tabStatus != TabRecord::TS_ACTIVE)
11230 {
11231 jam();
11232 err = DihGetTabInfoRef::TableNotDefined;
11233 break;
11234 }
11235
11236 if (cfirstconnect == RNIL)
11237 {
11238 jam();
11239 err = DihGetTabInfoRef::OutOfConnectionRecords;
11240 break;
11241 }
11242
11243 if (tabPtr.p->connectrec != RNIL)
11244 {
11245 jam();
11246
11247 ConnectRecordPtr connectPtr;
11248 connectPtr.i = tabPtr.p->connectrec;
11249 ptrCheckGuard(connectPtr, cconnectFileSize, connectRecord);
11250
11251 if (connectPtr.p->connectState != ConnectRecord::GET_TABINFO)
11252 {
11253 jam();
11254 err = DihGetTabInfoRef::TableBusy;
11255 break;
11256 }
11257 }
11258
11259 ConnectRecordPtr connectPtr;
11260 connectPtr.i = cfirstconnect;
11261 ptrCheckGuard(connectPtr, cconnectFileSize, connectRecord);
11262 cfirstconnect = connectPtr.p->nextPool;
11263
11264 connectPtr.p->nextPool = tabPtr.p->connectrec;
11265 tabPtr.p->connectrec = connectPtr.i;
11266
11267 connectPtr.p->m_get_tabinfo.m_requestInfo = req.requestInfo;
11268 connectPtr.p->userpointer = req.senderData;
11269 connectPtr.p->userblockref = req.senderRef;
11270 connectPtr.p->connectState = ConnectRecord::GET_TABINFO;
11271 connectPtr.p->table = tabPtr.i;
11272
11273 if (connectPtr.p->nextPool == RNIL)
11274 {
11275 jam();
11276
11277 /**
11278 * we're the first...start packing...
11279 */
11280 signal->theData[0] = DihContinueB::ZGET_TABINFO;
11281 signal->theData[1] = tabPtr.i;
11282 sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
11283 }
11284
11285 return;
11286 } while (0);
11287
11288 DihGetTabInfoRef * ref = (DihGetTabInfoRef*)signal->getDataPtrSend();
11289 ref->senderData = req.senderData;
11290 ref->senderRef = reference();
11291 ref->errorCode = err;
11292 sendSignal(req.senderRef, GSN_DIH_GET_TABINFO_REF, signal,
11293 DihGetTabInfoRef::SignalLength, JBB);
11294 }
11295
11296 void
getTabInfo(Signal * signal)11297 Dbdih::getTabInfo(Signal* signal)
11298 {
11299 TabRecordPtr tabPtr;
11300 tabPtr.i = signal->theData[1];
11301 ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
11302
11303 if (tabPtr.p->tabCopyStatus != TabRecord::CS_IDLE)
11304 {
11305 jam();
11306 signal->theData[0] = DihContinueB::ZGET_TABINFO;
11307 signal->theData[1] = tabPtr.i;
11308 sendSignalWithDelay(reference(), GSN_CONTINUEB,
11309 signal, 100, signal->length());
11310 return;
11311 }
11312
11313 tabPtr.p->tabCopyStatus = TabRecord::CS_GET_TABINFO;
11314
11315 signal->theData[0] = DihContinueB::ZPACK_TABLE_INTO_PAGES;
11316 signal->theData[1] = tabPtr.i;
11317 sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
11318 }
11319
11320 int
getTabInfo_copyTableToSection(SegmentedSectionPtr & ptr,CopyTableNode ctn)11321 Dbdih::getTabInfo_copyTableToSection(SegmentedSectionPtr & ptr,
11322 CopyTableNode ctn)
11323 {
11324 PageRecordPtr pagePtr;
11325 pagePtr.i = ctn.ctnTabPtr.p->pageRef[0];
11326 ptrCheckGuard(pagePtr, cpageFileSize, pageRecord);
11327
11328 while (ctn.noOfWords > 2048)
11329 {
11330 jam();
11331 ndbrequire(import(ptr, pagePtr.p->word, 2048));
11332 ctn.noOfWords -= 2048;
11333
11334 ctn.pageIndex++;
11335 pagePtr.i = ctn.ctnTabPtr.p->pageRef[ctn.pageIndex];
11336 ptrCheckGuard(pagePtr, cpageFileSize, pageRecord);
11337 }
11338
11339 ndbrequire(import(ptr, pagePtr.p->word, ctn.noOfWords));
11340 return 0;
11341 }
11342
11343 int
getTabInfo_copySectionToPages(TabRecordPtr tabPtr,SegmentedSectionPtr ptr)11344 Dbdih::getTabInfo_copySectionToPages(TabRecordPtr tabPtr,
11345 SegmentedSectionPtr ptr)
11346 {
11347 jam();
11348 Uint32 sz = ptr.sz;
11349 SectionReader reader(ptr, getSectionSegmentPool());
11350
11351 while (sz)
11352 {
11353 jam();
11354 PageRecordPtr pagePtr;
11355 allocpage(pagePtr);
11356 tabPtr.p->pageRef[tabPtr.p->noPages] = pagePtr.i;
11357 tabPtr.p->noPages++;
11358
11359 Uint32 len = sz > 2048 ? 2048 : sz;
11360 ndbrequire(reader.getWords(pagePtr.p->word, len));
11361 sz -= len;
11362 }
11363 return 0;
11364 }
11365
11366 void
getTabInfo_send(Signal * signal,TabRecordPtr tabPtr)11367 Dbdih::getTabInfo_send(Signal* signal,
11368 TabRecordPtr tabPtr)
11369 {
11370 ndbrequire(tabPtr.p->tabCopyStatus == TabRecord::CS_GET_TABINFO);
11371
11372 ConnectRecordPtr connectPtr;
11373 connectPtr.i = tabPtr.p->connectrec;
11374
11375 /**
11376 * Done
11377 */
11378 if (connectPtr.i == RNIL)
11379 {
11380 jam();
11381 tabPtr.p->tabCopyStatus = TabRecord::CS_IDLE;
11382 return;
11383 }
11384
11385 ptrCheckGuard(connectPtr, cconnectFileSize, connectRecord);
11386
11387 ndbrequire(connectPtr.p->connectState == ConnectRecord::GET_TABINFO);
11388 ndbrequire(connectPtr.p->table == tabPtr.i);
11389
11390 /**
11391 * Copy into segmented sections here...
11392 * NOTE: A GenericSectionIterator would be nice inside kernel too
11393 * or having a pack-method that writes directly into SegmentedSection
11394 */
11395 PageRecordPtr pagePtr;
11396 pagePtr.i = tabPtr.p->pageRef[0];
11397 ptrCheckGuard(pagePtr, cpageFileSize, pageRecord);
11398 Uint32 words = pagePtr.p->word[34];
11399
11400 CopyTableNode ctn;
11401 ctn.ctnTabPtr = tabPtr;
11402 ctn.pageIndex = 0;
11403 ctn.wordIndex = 0;
11404 ctn.noOfWords = words;
11405
11406 SegmentedSectionPtr ptr;
11407 ndbrequire(getTabInfo_copyTableToSection(ptr, ctn) == 0);
11408
11409 Callback cb = { safe_cast(&Dbdih::getTabInfo_sendComplete), connectPtr.i };
11410
11411 SectionHandle handle(this, signal);
11412 handle.m_ptr[0] = ptr;
11413 handle.m_cnt = 1;
11414
11415 DihGetTabInfoConf* conf = (DihGetTabInfoConf*)signal->getDataPtrSend();
11416 conf->senderData = connectPtr.p->userpointer;
11417 conf->senderRef = reference();
11418 sendFragmentedSignal(connectPtr.p->userblockref, GSN_DIH_GET_TABINFO_CONF, signal,
11419 DihGetTabInfoConf::SignalLength, JBB, &handle, cb);
11420 }
11421
11422 void
getTabInfo_sendComplete(Signal * signal,Uint32 senderData,Uint32 retVal)11423 Dbdih::getTabInfo_sendComplete(Signal * signal,
11424 Uint32 senderData,
11425 Uint32 retVal)
11426 {
11427 ndbrequire(retVal == 0);
11428
11429 ConnectRecordPtr connectPtr;
11430 connectPtr.i = senderData;
11431 ptrCheckGuard(connectPtr, cconnectFileSize, connectRecord);
11432
11433 ndbrequire(connectPtr.p->connectState == ConnectRecord::GET_TABINFO);
11434
11435 TabRecordPtr tabPtr;
11436 tabPtr.i = connectPtr.p->table;
11437 ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
11438 tabPtr.p->connectrec = connectPtr.p->nextPool;
11439
11440 signal->theData[0] = DihContinueB::ZGET_TABINFO_SEND;
11441 signal->theData[1] = tabPtr.i;
11442 sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
11443
11444 release_connect(connectPtr);
11445 }
11446
11447 void
resetReplicaSr(TabRecordPtr tabPtr)11448 Dbdih::resetReplicaSr(TabRecordPtr tabPtr){
11449
11450 const Uint32 newestRestorableGCI = SYSFILE->newestRestorableGCI;
11451
11452 for(Uint32 i = 0; i<tabPtr.p->totalfragments; i++)
11453 {
11454 FragmentstorePtr fragPtr;
11455 getFragstore(tabPtr.p, i, fragPtr);
11456
11457 /**
11458 * During SR restart distributionKey from 0
11459 */
11460 fragPtr.p->distributionKey = 0;
11461
11462 /**
11463 * 1) Start by moving all replicas into oldStoredReplicas
11464 */
11465 prepareReplicas(fragPtr);
11466
11467 /**
11468 * 2) Move all "alive" replicas into storedReplicas
11469 * + update noCrashedReplicas...
11470 */
11471 ReplicaRecordPtr replicaPtr;
11472 replicaPtr.i = fragPtr.p->oldStoredReplicas;
11473 while (replicaPtr.i != RNIL)
11474 {
11475 jam();
11476 ptrCheckGuard(replicaPtr, creplicaFileSize, replicaRecord);
11477
11478 /**
11479 * invalidate LCP's not usable
11480 */
11481 resetReplica(replicaPtr);
11482
11483 const Uint32 nextReplicaPtrI = replicaPtr.p->nextReplica;
11484
11485 NodeRecordPtr nodePtr;
11486 nodePtr.i = replicaPtr.p->procNode;
11487 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
11488
11489 const Uint32 noCrashedReplicas = replicaPtr.p->noCrashedReplicas;
11490
11491 if (nodePtr.p->nodeStatus == NodeRecord::ALIVE)
11492 {
11493 jam();
11494 switch (nodePtr.p->activeStatus) {
11495 case Sysfile::NS_Active:
11496 case Sysfile::NS_ActiveMissed_1:
11497 case Sysfile::NS_ActiveMissed_2:{
11498 jam();
11499 /* --------------------------------------------------------------- */
11500 /* THE NODE IS ALIVE AND KICKING AND ACTIVE, LET'S USE IT. */
11501 /* --------------------------------------------------------------- */
11502 arrGuardErr(noCrashedReplicas, MAX_CRASHED_REPLICAS, NDBD_EXIT_MAX_CRASHED_REPLICAS);
11503
11504 // Create new crashed replica
11505 newCrashedReplica(replicaPtr);
11506
11507 // Create a new redo-interval
11508 Uint32 nextCrashed = replicaPtr.p->noCrashedReplicas;
11509 replicaPtr.p->createGci[nextCrashed] = newestRestorableGCI + 1;
11510 replicaPtr.p->replicaLastGci[nextCrashed] = ZINIT_REPLICA_LAST_GCI;
11511
11512 // merge
11513 mergeCrashedReplicas(replicaPtr);
11514
11515 resetReplicaLcp(replicaPtr.p, newestRestorableGCI);
11516
11517 /**
11518 * Make sure we can also find REDO for restoring replica...
11519 */
11520 {
11521 CreateReplicaRecord createReplica;
11522 ConstPtr<ReplicaRecord> constReplicaPtr;
11523 constReplicaPtr.i = replicaPtr.i;
11524 constReplicaPtr.p = replicaPtr.p;
11525 if (tabPtr.p->tabStorage != TabRecord::ST_NORMAL ||
11526 setup_create_replica(fragPtr,
11527 &createReplica, constReplicaPtr))
11528 {
11529 jam();
11530 removeOldStoredReplica(fragPtr, replicaPtr);
11531 linkStoredReplica(fragPtr, replicaPtr);
11532 }
11533 else
11534 {
11535 jam();
11536 infoEvent("Forcing take-over of node %d due to unsufficient REDO"
11537 " for table %d fragment: %d",
11538 nodePtr.i, tabPtr.i, i);
11539
11540 m_sr_nodes.clear(nodePtr.i);
11541 m_to_nodes.set(nodePtr.i);
11542 setNodeActiveStatus(nodePtr.i,
11543 Sysfile::NS_NotActive_NotTakenOver);
11544 }
11545 }
11546 }
11547 default:
11548 jam();
11549 /*empty*/;
11550 break;
11551 }
11552 }
11553 replicaPtr.i = nextReplicaPtrI;
11554 }//while
11555 updateNodeInfo(fragPtr);
11556 }
11557 }
11558
11559 void
resetReplica(ReplicaRecordPtr readReplicaPtr)11560 Dbdih::resetReplica(ReplicaRecordPtr readReplicaPtr)
11561 {
11562 Uint32 i;
11563 /* ---------------------------------------------------------------------- */
11564 /* IF THE LAST COMPLETED LOCAL CHECKPOINT IS VALID AND LARGER THAN */
11565 /* THE LAST COMPLETED CHECKPOINT THEN WE WILL INVALIDATE THIS LOCAL */
11566 /* CHECKPOINT FOR THIS REPLICA. */
11567 /* ---------------------------------------------------------------------- */
11568 for (i = 0; i < MAX_LCP_STORED; i++)
11569 {
11570 jam();
11571 if (readReplicaPtr.p->lcpStatus[i] == ZVALID &&
11572 readReplicaPtr.p->lcpId[i] > SYSFILE->latestLCP_ID)
11573 {
11574 jam();
11575 readReplicaPtr.p->lcpStatus[i] = ZINVALID;
11576 }
11577 }
11578
11579 /* ---------------------------------------------------------------------- */
11580 /* WE ALSO HAVE TO INVALIDATE ANY LOCAL CHECKPOINTS THAT HAVE BEEN */
11581 /* INVALIDATED BY MOVING BACK THE RESTART GCI. */
11582 /* ---------------------------------------------------------------------- */
11583 Uint32 lastCompletedGCI = SYSFILE->newestRestorableGCI;
11584 for (i = 0; i < MAX_LCP_STORED; i++)
11585 {
11586 jam();
11587 if (readReplicaPtr.p->lcpStatus[i] == ZVALID &&
11588 readReplicaPtr.p->maxGciStarted[i] > lastCompletedGCI)
11589 {
11590 jam();
11591 readReplicaPtr.p->lcpStatus[i] = ZINVALID;
11592 }
11593 }
11594
11595 /* ---------------------------------------------------------------------- */
11596 /* WE WILL REMOVE ANY OCCURRENCES OF REPLICAS THAT HAVE CRASHED */
11597 /* THAT ARE NO LONGER VALID DUE TO MOVING RESTART GCI BACKWARDS. */
11598 /* ---------------------------------------------------------------------- */
11599 removeTooNewCrashedReplicas(readReplicaPtr, lastCompletedGCI);
11600
11601 /**
11602 * Don't remove crashed replicas here,
11603 * as 1) this will disable optimized NR
11604 * if oldestRestorableGCI > GCI needed for local LCP's
11605 * 2) This is anyway done during LCP, which will be run during SR
11606 */
11607 //removeOldCrashedReplicas(readReplicaPtr);
11608
11609 /* ---------------------------------------------------------------------- */
11610 /* FIND PROCESSOR RECORD */
11611 /* ---------------------------------------------------------------------- */
11612 }
11613
11614 void
resetReplicaLcp(ReplicaRecord * replicaP,Uint32 stopGci)11615 Dbdih::resetReplicaLcp(ReplicaRecord * replicaP, Uint32 stopGci){
11616
11617 Uint32 lcpNo = replicaP->nextLcp;
11618 const Uint32 startLcpNo = lcpNo;
11619 do {
11620 lcpNo = prevLcpNo(lcpNo);
11621 ndbrequire(lcpNo < MAX_LCP_STORED);
11622 if (replicaP->lcpStatus[lcpNo] == ZVALID)
11623 {
11624 if (replicaP->maxGciStarted[lcpNo] <= stopGci)
11625 {
11626 jam();
11627 /* ----------------------------------------------------------------- */
11628 /* WE HAVE FOUND A USEFUL LOCAL CHECKPOINT THAT CAN BE USED FOR */
11629 /* RESTARTING THIS FRAGMENT REPLICA. */
11630 /* ----------------------------------------------------------------- */
11631 return ;
11632 }//if
11633 }//if
11634
11635 /**
11636 * WE COULD NOT USE THIS LOCAL CHECKPOINT. IT WAS TOO
11637 * RECENT OR SIMPLY NOT A VALID CHECKPOINT.
11638 * WE SHOULD THUS REMOVE THIS LOCAL CHECKPOINT SINCE IT WILL NEVER
11639 * AGAIN BE USED. SET LCP_STATUS TO INVALID.
11640 */
11641 replicaP->nextLcp = lcpNo;
11642 replicaP->lcpId[lcpNo] = 0;
11643 replicaP->lcpStatus[lcpNo] = ZINVALID;
11644 } while (lcpNo != startLcpNo);
11645
11646 replicaP->nextLcp = 0;
11647 }
11648
readingTableErrorLab(Signal * signal,FileRecordPtr filePtr)11649 void Dbdih::readingTableErrorLab(Signal* signal, FileRecordPtr filePtr)
11650 {
11651 TabRecordPtr tabPtr;
11652 tabPtr.i = filePtr.p->tabRef;
11653 ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
11654 /* ---------------------------------------------------------------------- */
11655 /* READING THIS FILE FAILED. CLOSE IT AFTER RELEASING ALL PAGES. */
11656 /* ---------------------------------------------------------------------- */
11657 ndbrequire(tabPtr.p->noPages <= NDB_ARRAY_SIZE(tabPtr.p->pageRef));
11658 for (Uint32 i = 0; i < tabPtr.p->noPages; i++) {
11659 jam();
11660 releasePage(tabPtr.p->pageRef[i]);
11661 }//for
11662 closeFile(signal, filePtr);
11663 filePtr.p->reqStatus = FileRecord::CLOSING_TABLE_CRASH;
11664 return;
11665 }//Dbdih::readingTableErrorLab()
11666
closingTableCrashLab(Signal * signal,FileRecordPtr filePtr)11667 void Dbdih::closingTableCrashLab(Signal* signal, FileRecordPtr filePtr)
11668 {
11669 TabRecordPtr tabPtr;
11670 /* ---------------------------------------------------------------------- */
11671 /* WE HAVE NOW CLOSED A FILE WHICH WE HAD A READ ERROR WITH. PROCEED */
11672 /* WITH NEXT FILE IF NOT THE LAST OTHERWISE REPORT ERROR. */
11673 /* ---------------------------------------------------------------------- */
11674 tabPtr.i = filePtr.p->tabRef;
11675 ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
11676 ndbrequire(filePtr.i == tabPtr.p->tabFile[0]);
11677 filePtr.i = tabPtr.p->tabFile[1];
11678 ptrCheckGuard(filePtr, cfileFileSize, fileRecord);
11679 openFileRw(signal, filePtr);
11680 filePtr.p->reqStatus = FileRecord::OPENING_TABLE;
11681 }//Dbdih::closingTableCrashLab()
11682
11683 /*****************************************************************************/
11684 /* ********** COPY TABLE MODULE *************/
11685 /*****************************************************************************/
execCOPY_TABREQ(Signal * signal)11686 void Dbdih::execCOPY_TABREQ(Signal* signal)
11687 {
11688 CRASH_INSERTION(7172);
11689
11690 TabRecordPtr tabPtr;
11691 PageRecordPtr pagePtr;
11692 jamEntry();
11693 BlockReference ref = signal->theData[0];
11694 Uint32 reqinfo = signal->theData[1];
11695 tabPtr.i = signal->theData[2];
11696 Uint32 schemaVersion = signal->theData[3];
11697 Uint32 noOfWords = signal->theData[4];
11698 ndbrequire(ref == cmasterdihref);
11699 ndbrequire(!isMaster());
11700 ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
11701 if (reqinfo == 1) {
11702 jam();
11703 tabPtr.p->schemaVersion = schemaVersion;
11704 initTableFile(tabPtr);
11705 }//if
11706 ndbrequire(tabPtr.p->noPages < NDB_ARRAY_SIZE(tabPtr.p->pageRef));
11707 if (tabPtr.p->noOfWords == 0) {
11708 jam();
11709 allocpage(pagePtr);
11710 tabPtr.p->pageRef[tabPtr.p->noPages] = pagePtr.i;
11711 tabPtr.p->noPages++;
11712 } else {
11713 jam();
11714 pagePtr.i = tabPtr.p->pageRef[tabPtr.p->noPages - 1];
11715 ptrCheckGuard(pagePtr, cpageFileSize, pageRecord);
11716 }//if
11717 ndbrequire(tabPtr.p->noOfWords + 15 < 2048);
11718 ndbrequire(tabPtr.p->noOfWords < 2048);
11719 MEMCOPY_NO_WORDS(&pagePtr.p->word[tabPtr.p->noOfWords], &signal->theData[5], 16);
11720 tabPtr.p->noOfWords += 16;
11721 if (tabPtr.p->noOfWords == 2048) {
11722 jam();
11723 tabPtr.p->noOfWords = 0;
11724 }//if
11725 if (noOfWords > 16) {
11726 jam();
11727 return;
11728 }//if
11729 tabPtr.p->noOfWords = 0;
11730 ndbrequire(tabPtr.p->tabCopyStatus == TabRecord::CS_IDLE);
11731 tabPtr.p->tabCopyStatus = TabRecord::CS_COPY_TAB_REQ;
11732 signal->theData[0] = DihContinueB::ZREAD_PAGES_INTO_TABLE;
11733 signal->theData[1] = tabPtr.i;
11734 sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
11735 }//Dbdih::execCOPY_TABREQ()
11736
11737 void
copyTabReq_complete(Signal * signal,TabRecordPtr tabPtr)11738 Dbdih::copyTabReq_complete(Signal* signal, TabRecordPtr tabPtr){
11739 if (!isMaster()) {
11740 jam();
11741 //----------------------------------------------------------------------------
11742 // In this particular case we do not release table pages if we are master. The
11743 // reason is that the master could still be sending the table info to another
11744 // node.
11745 //----------------------------------------------------------------------------
11746 releaseTabPages(tabPtr.i);
11747 tabPtr.p->tabStatus = TabRecord::TS_ACTIVE;
11748 for (Uint32 fragId = 0; fragId < tabPtr.p->totalfragments; fragId++) {
11749 jam();
11750 FragmentstorePtr fragPtr;
11751 getFragstore(tabPtr.p, fragId, fragPtr);
11752 updateNodeInfo(fragPtr);
11753 }//for
11754 }//if
11755 signal->theData[0] = cownNodeId;
11756 signal->theData[1] = tabPtr.i;
11757 sendSignal(cmasterdihref, GSN_COPY_TABCONF, signal, 2, JBB);
11758 }
11759
11760 /*****************************************************************************/
11761 /* ****** READ FROM A NUMBER OF PAGES INTO THE TABLE DATA STRUCTURES ********/
11762 /*****************************************************************************/
readPagesIntoTableLab(Signal * signal,Uint32 tableId)11763 void Dbdih::readPagesIntoTableLab(Signal* signal, Uint32 tableId)
11764 {
11765 RWFragment rf;
11766 rf.wordIndex = 35;
11767 rf.pageIndex = 0;
11768 rf.rwfTabPtr.i = tableId;
11769 ptrCheckGuard(rf.rwfTabPtr, ctabFileSize, tabRecord);
11770 rf.rwfPageptr.i = rf.rwfTabPtr.p->pageRef[0];
11771 ptrCheckGuard(rf.rwfPageptr, cpageFileSize, pageRecord);
11772 rf.rwfTabPtr.p->totalfragments = readPageWord(&rf);
11773 rf.rwfTabPtr.p->noOfBackups = readPageWord(&rf);
11774 rf.rwfTabPtr.p->hashpointer = readPageWord(&rf);
11775 rf.rwfTabPtr.p->kvalue = readPageWord(&rf);
11776 rf.rwfTabPtr.p->mask = readPageWord(&rf);
11777 rf.rwfTabPtr.p->method = (TabRecord::Method)readPageWord(&rf);
11778 /* ------------- */
11779 /* Type of table */
11780 /* ------------- */
11781 rf.rwfTabPtr.p->tabStorage = (TabRecord::Storage)(readPageWord(&rf));
11782
11783 Uint32 noOfFrags = rf.rwfTabPtr.p->totalfragments;
11784 ndbrequire(noOfFrags > 0);
11785 ndbrequire((noOfFrags * (rf.rwfTabPtr.p->noOfBackups + 1)) <= cnoFreeReplicaRec);
11786 allocFragments(noOfFrags, rf.rwfTabPtr);
11787
11788 signal->theData[0] = DihContinueB::ZREAD_PAGES_INTO_FRAG;
11789 signal->theData[1] = rf.rwfTabPtr.i;
11790 signal->theData[2] = 0;
11791 signal->theData[3] = rf.pageIndex;
11792 signal->theData[4] = rf.wordIndex;
11793 sendSignal(reference(), GSN_CONTINUEB, signal, 5, JBB);
11794 return;
11795 }//Dbdih::readPagesIntoTableLab()
11796
readPagesIntoFragLab(Signal * signal,RWFragment * rf)11797 void Dbdih::readPagesIntoFragLab(Signal* signal, RWFragment* rf)
11798 {
11799 ndbrequire(rf->pageIndex < NDB_ARRAY_SIZE(rf->rwfTabPtr.p->pageRef));
11800 rf->rwfPageptr.i = rf->rwfTabPtr.p->pageRef[rf->pageIndex];
11801 ptrCheckGuard(rf->rwfPageptr, cpageFileSize, pageRecord);
11802 FragmentstorePtr fragPtr;
11803 getFragstore(rf->rwfTabPtr.p, rf->fragId, fragPtr);
11804 readFragment(rf, fragPtr);
11805 readReplicas(rf, fragPtr);
11806 rf->fragId++;
11807 if (rf->fragId == rf->rwfTabPtr.p->totalfragments) {
11808 jam();
11809 switch (rf->rwfTabPtr.p->tabCopyStatus) {
11810 case TabRecord::CS_SR_PHASE1_READ_PAGES:
11811 jam();
11812 releaseTabPages(rf->rwfTabPtr.i);
11813 rf->rwfTabPtr.p->tabCopyStatus = TabRecord::CS_IDLE;
11814 signal->theData[0] = DihContinueB::ZREAD_TABLE_FROM_PAGES;
11815 signal->theData[1] = rf->rwfTabPtr.i;
11816 sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
11817 return;
11818 break;
11819 case TabRecord::CS_COPY_TAB_REQ:
11820 jam();
11821 rf->rwfTabPtr.p->tabCopyStatus = TabRecord::CS_IDLE;
11822 if (getNodeState().getSystemRestartInProgress() &&
11823 rf->rwfTabPtr.p->tabStorage == TabRecord::ST_NORMAL)
11824 {
11825 /**
11826 * avoid overwriting own table-definition...
11827 * but this is not possible for no-logging tables
11828 */
11829 jam();
11830 copyTabReq_complete(signal, rf->rwfTabPtr);
11831 return;
11832 }
11833 rf->rwfTabPtr.p->tabCopyStatus = TabRecord::CS_IDLE;
11834 rf->rwfTabPtr.p->tabUpdateState = TabRecord::US_COPY_TAB_REQ;
11835 signal->theData[0] = DihContinueB::ZTABLE_UPDATE;
11836 signal->theData[1] = rf->rwfTabPtr.i;
11837 sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
11838 return;
11839 break;
11840 default:
11841 ndbrequire(false);
11842 return;
11843 break;
11844 }//switch
11845 } else {
11846 jam();
11847 signal->theData[0] = DihContinueB::ZREAD_PAGES_INTO_FRAG;
11848 signal->theData[1] = rf->rwfTabPtr.i;
11849 signal->theData[2] = rf->fragId;
11850 signal->theData[3] = rf->pageIndex;
11851 signal->theData[4] = rf->wordIndex;
11852 sendSignal(reference(), GSN_CONTINUEB, signal, 5, JBB);
11853 }//if
11854 return;
11855 }//Dbdih::readPagesIntoFragLab()
11856
11857 /*****************************************************************************/
11858 /***** WRITING FROM TABLE DATA STRUCTURES INTO A SET OF PAGES ******/
11859 // execCONTINUEB(ZPACK_TABLE_INTO_PAGES)
11860 /*****************************************************************************/
packTableIntoPagesLab(Signal * signal,Uint32 tableId)11861 void Dbdih::packTableIntoPagesLab(Signal* signal, Uint32 tableId)
11862 {
11863 RWFragment wf;
11864 TabRecordPtr tabPtr;
11865 allocpage(wf.rwfPageptr);
11866 tabPtr.i = tableId;
11867 ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
11868 tabPtr.p->pageRef[0] = wf.rwfPageptr.i;
11869 tabPtr.p->noPages = 1;
11870 wf.wordIndex = 35;
11871 wf.pageIndex = 0;
11872 Uint32 totalfragments = tabPtr.p->totalfragments;
11873 if (tabPtr.p->connectrec != RNIL)
11874 {
11875 jam();
11876 Ptr<ConnectRecord> connectPtr;
11877 connectPtr.i = tabPtr.p->connectrec;
11878 ptrCheckGuard(connectPtr, cconnectFileSize, connectRecord);
11879 ndbrequire(connectPtr.p->table == tabPtr.i);
11880 if (connectPtr.p->connectState == ConnectRecord::ALTER_TABLE)
11881 {
11882 jam();
11883 totalfragments = connectPtr.p->m_alter.m_totalfragments;
11884 }
11885 }
11886
11887 writePageWord(&wf, totalfragments);
11888 writePageWord(&wf, tabPtr.p->noOfBackups);
11889 writePageWord(&wf, tabPtr.p->hashpointer);
11890 writePageWord(&wf, tabPtr.p->kvalue);
11891 writePageWord(&wf, tabPtr.p->mask);
11892 writePageWord(&wf, tabPtr.p->method);
11893 writePageWord(&wf, tabPtr.p->tabStorage);
11894
11895 signal->theData[0] = DihContinueB::ZPACK_FRAG_INTO_PAGES;
11896 signal->theData[1] = tabPtr.i;
11897 signal->theData[2] = 0;
11898 signal->theData[3] = wf.pageIndex;
11899 signal->theData[4] = wf.wordIndex;
11900 signal->theData[5] = totalfragments;
11901 sendSignal(reference(), GSN_CONTINUEB, signal, 6, JBB);
11902 }//Dbdih::packTableIntoPagesLab()
11903
11904 /*****************************************************************************/
11905 // execCONTINUEB(ZPACK_FRAG_INTO_PAGES)
11906 /*****************************************************************************/
packFragIntoPagesLab(Signal * signal,RWFragment * wf)11907 void Dbdih::packFragIntoPagesLab(Signal* signal, RWFragment* wf)
11908 {
11909 ndbrequire(wf->pageIndex < NDB_ARRAY_SIZE(wf->rwfTabPtr.p->pageRef));
11910 wf->rwfPageptr.i = wf->rwfTabPtr.p->pageRef[wf->pageIndex];
11911 ptrCheckGuard(wf->rwfPageptr, cpageFileSize, pageRecord);
11912 FragmentstorePtr fragPtr;
11913 getFragstore(wf->rwfTabPtr.p, wf->fragId, fragPtr);
11914 writeFragment(wf, fragPtr);
11915 writeReplicas(wf, fragPtr.p->storedReplicas);
11916 writeReplicas(wf, fragPtr.p->oldStoredReplicas);
11917 wf->fragId++;
11918 if (wf->fragId == wf->totalfragments) {
11919 jam();
11920 PageRecordPtr pagePtr;
11921 pagePtr.i = wf->rwfTabPtr.p->pageRef[0];
11922 ptrCheckGuard(pagePtr, cpageFileSize, pageRecord);
11923 pagePtr.p->word[33] = wf->rwfTabPtr.p->noPages;
11924 pagePtr.p->word[34] = ((wf->rwfTabPtr.p->noPages - 1) * 2048) + wf->wordIndex;
11925 switch (wf->rwfTabPtr.p->tabCopyStatus) {
11926 case TabRecord::CS_SR_PHASE2_READ_TABLE:
11927 /* -------------------------------------------------------------------*/
11928 // We are performing a system restart and we are now ready to copy the
11929 // table from this node (the master) to all other nodes.
11930 /* -------------------------------------------------------------------*/
11931 jam();
11932 wf->rwfTabPtr.p->tabCopyStatus = TabRecord::CS_IDLE;
11933 signal->theData[0] = DihContinueB::ZSR_PHASE2_READ_TABLE;
11934 signal->theData[1] = wf->rwfTabPtr.i;
11935 sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
11936 return;
11937 break;
11938 case TabRecord::CS_COPY_NODE_STATE:
11939 jam();
11940 tableCopyNodeLab(signal, wf->rwfTabPtr);
11941 return;
11942 break;
11943 case TabRecord::CS_LCP_READ_TABLE:
11944 jam();
11945 signal->theData[0] = DihContinueB::ZTABLE_UPDATE;
11946 signal->theData[1] = wf->rwfTabPtr.i;
11947 sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
11948 return;
11949 break;
11950 case TabRecord::CS_REMOVE_NODE:
11951 case TabRecord::CS_INVALIDATE_NODE_LCP:
11952 jam();
11953 signal->theData[0] = DihContinueB::ZTABLE_UPDATE;
11954 signal->theData[1] = wf->rwfTabPtr.i;
11955 sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
11956 return;
11957 break;
11958 case TabRecord::CS_ADD_TABLE_MASTER:
11959 jam();
11960 wf->rwfTabPtr.p->tabCopyStatus = TabRecord::CS_IDLE;
11961 signal->theData[0] = DihContinueB::ZADD_TABLE_MASTER_PAGES;
11962 signal->theData[1] = wf->rwfTabPtr.i;
11963 sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
11964 return;
11965 break;
11966 case TabRecord::CS_ADD_TABLE_SLAVE:
11967 jam();
11968 wf->rwfTabPtr.p->tabCopyStatus = TabRecord::CS_IDLE;
11969 signal->theData[0] = DihContinueB::ZADD_TABLE_SLAVE_PAGES;
11970 signal->theData[1] = wf->rwfTabPtr.i;
11971 sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
11972 return;
11973 case TabRecord::CS_COPY_TO_SAVE:
11974 signal->theData[0] = DihContinueB::ZTABLE_UPDATE;
11975 signal->theData[1] = wf->rwfTabPtr.i;
11976 sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
11977 return;
11978 case TabRecord::CS_GET_TABINFO:
11979 jam();
11980 signal->theData[0] = DihContinueB::ZGET_TABINFO_SEND;
11981 signal->theData[1] = wf->rwfTabPtr.i;
11982 sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
11983 return;
11984 default:
11985 ndbrequire(false);
11986 return;
11987 break;
11988 }//switch
11989 } else {
11990 jam();
11991 signal->theData[0] = DihContinueB::ZPACK_FRAG_INTO_PAGES;
11992 signal->theData[1] = wf->rwfTabPtr.i;
11993 signal->theData[2] = wf->fragId;
11994 signal->theData[3] = wf->pageIndex;
11995 signal->theData[4] = wf->wordIndex;
11996 signal->theData[5] = wf->totalfragments;
11997 sendSignal(reference(), GSN_CONTINUEB, signal, 6, JBB);
11998 }//if
11999 return;
12000 }//Dbdih::packFragIntoPagesLab()
12001
12002 /*****************************************************************************/
12003 /* ********** START FRAGMENT MODULE *************/
12004 /*****************************************************************************/
12005 void
dump_replica_info()12006 Dbdih::dump_replica_info()
12007 {
12008 TabRecordPtr tabPtr;
12009 FragmentstorePtr fragPtr;
12010
12011 for(tabPtr.i = 0; tabPtr.i < ctabFileSize; tabPtr.i++)
12012 {
12013 ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
12014 if (tabPtr.p->tabStatus != TabRecord::TS_ACTIVE)
12015 continue;
12016
12017 for(Uint32 fid = 0; fid<tabPtr.p->totalfragments; fid++)
12018 {
12019 getFragstore(tabPtr.p, fid, fragPtr);
12020 ndbout_c("tab: %d frag: %d gci: %d\n",
12021 tabPtr.i, fid, SYSFILE->newestRestorableGCI);
12022
12023 dump_replica_info(fragPtr.p);
12024 }
12025 }
12026 }
12027
12028 void
dump_replica_info(const Fragmentstore * fragPtrP)12029 Dbdih::dump_replica_info(const Fragmentstore* fragPtrP)
12030 {
12031 ndbout_c(" -- storedReplicas: ");
12032 Uint32 i;
12033 ReplicaRecordPtr replicaPtr;
12034 replicaPtr.i = fragPtrP->storedReplicas;
12035 for(; replicaPtr.i != RNIL; replicaPtr.i = replicaPtr.p->nextReplica)
12036 {
12037 ptrCheckGuard(replicaPtr, creplicaFileSize, replicaRecord);
12038 ndbout_c(" node: %d initialGci: %d nextLcp: %d noCrashedReplicas: %d",
12039 replicaPtr.p->procNode,
12040 replicaPtr.p->initialGci,
12041 replicaPtr.p->nextLcp,
12042 replicaPtr.p->noCrashedReplicas);
12043 for(i = 0; i<MAX_LCP_STORED; i++)
12044 {
12045 ndbout_c(" i: %d %s : lcpId: %d maxGci Completed: %d Started: %d",
12046 i,
12047 (replicaPtr.p->lcpStatus[i] == ZVALID ?"VALID":"INVALID"),
12048 replicaPtr.p->lcpId[i],
12049 replicaPtr.p->maxGciCompleted[i],
12050 replicaPtr.p->maxGciStarted[i]);
12051 }
12052
12053 for (i = 0; i < 8; i++)
12054 {
12055 ndbout_c(" crashed replica: %d replicaLastGci: %d createGci: %d",
12056 i,
12057 replicaPtr.p->replicaLastGci[i],
12058 replicaPtr.p->createGci[i]);
12059 }
12060 }
12061 ndbout_c(" -- oldStoredReplicas");
12062 replicaPtr.i = fragPtrP->oldStoredReplicas;
12063 for(; replicaPtr.i != RNIL; replicaPtr.i = replicaPtr.p->nextReplica)
12064 {
12065 ptrCheckGuard(replicaPtr, creplicaFileSize, replicaRecord);
12066 ndbout_c(" node: %d initialGci: %d nextLcp: %d noCrashedReplicas: %d",
12067 replicaPtr.p->procNode,
12068 replicaPtr.p->initialGci,
12069 replicaPtr.p->nextLcp,
12070 replicaPtr.p->noCrashedReplicas);
12071 for(i = 0; i<MAX_LCP_STORED; i++)
12072 {
12073 ndbout_c(" i: %d %s : lcpId: %d maxGci Completed: %d Started: %d",
12074 i,
12075 (replicaPtr.p->lcpStatus[i] == ZVALID ?"VALID":"INVALID"),
12076 replicaPtr.p->lcpId[i],
12077 replicaPtr.p->maxGciCompleted[i],
12078 replicaPtr.p->maxGciStarted[i]);
12079 }
12080
12081 for (i = 0; i < 8; i++)
12082 {
12083 ndbout_c(" crashed replica: %d replicaLastGci: %d createGci: %d",
12084 i,
12085 replicaPtr.p->replicaLastGci[i],
12086 replicaPtr.p->createGci[i]);
12087 }
12088 }
12089 }
12090
startFragment(Signal * signal,Uint32 tableId,Uint32 fragId)12091 void Dbdih::startFragment(Signal* signal, Uint32 tableId, Uint32 fragId)
12092 {
12093 Uint32 TloopCount = 0;
12094 TabRecordPtr tabPtr;
12095 while (true) {
12096 if (TloopCount > 100) {
12097 jam();
12098 signal->theData[0] = DihContinueB::ZSTART_FRAGMENT;
12099 signal->theData[1] = tableId;
12100 signal->theData[2] = 0;
12101 sendSignal(reference(), GSN_CONTINUEB, signal, 3, JBB);
12102 return;
12103 }
12104
12105 if (tableId >= ctabFileSize) {
12106 jam();
12107 signal->theData[0] = DihContinueB::ZCOMPLETE_RESTART;
12108 sendSignal(reference(), GSN_CONTINUEB, signal, 1, JBB);
12109 return;
12110 }//if
12111
12112 tabPtr.i = tableId;
12113 ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
12114 if (tabPtr.p->tabStatus != TabRecord::TS_ACTIVE){
12115 jam();
12116 TloopCount++;
12117 tableId++;
12118 fragId = 0;
12119 continue;
12120 }
12121
12122 if(tabPtr.p->tabStorage != TabRecord::ST_NORMAL){
12123 jam();
12124 TloopCount++;
12125 tableId++;
12126 fragId = 0;
12127 continue;
12128 }
12129
12130 jam();
12131 break;
12132 }//while
12133
12134 FragmentstorePtr fragPtr;
12135 getFragstore(tabPtr.p, fragId, fragPtr);
12136 /* ----------------------------------------------------------------------- */
12137 /* WE NEED TO RESET THE REPLICA DATA STRUCTURES. THIS MEANS THAT WE */
12138 /* MUST REMOVE REPLICAS THAT WAS NOT STARTED AT THE GCI TO RESTORE. WE */
12139 /* NEED TO PUT ALL STORED REPLICAS ON THE LIST OF OLD STORED REPLICAS */
12140 /* RESET THE NUMBER OF REPLICAS TO CREATE. */
12141 /* ----------------------------------------------------------------------- */
12142 cnoOfCreateReplicas = 0;
12143 /* ----------------------------------------------------------------------- */
12144 /* WE WILL NEVER START MORE THAN FOUR FRAGMENT REPLICAS WHATEVER THE */
12145 /* DESIRED REPLICATION IS. */
12146 /* ----------------------------------------------------------------------- */
12147 ndbrequire(tabPtr.p->noOfBackups < MAX_REPLICAS);
12148 /* ----------------------------------------------------------------------- */
12149 /* SEARCH FOR STORED REPLICAS THAT CAN BE USED TO RESTART THE SYSTEM. */
12150 /* ----------------------------------------------------------------------- */
12151 searchStoredReplicas(fragPtr);
12152
12153 if (cnoOfCreateReplicas == 0) {
12154 /* --------------------------------------------------------------------- */
12155 /* THERE WERE NO STORED REPLICAS AVAILABLE THAT CAN SERVE AS REPLICA TO*/
12156 /* RESTART THE SYSTEM FROM. IN A LATER RELEASE WE WILL ADD */
12157 /* FUNCTIONALITY TO CHECK IF THERE ARE ANY STANDBY NODES THAT COULD DO */
12158 /* THIS TASK INSTEAD IN THIS IMPLEMENTATION WE SIMPLY CRASH THE SYSTEM.*/
12159 /* THIS WILL DECREASE THE GCI TO RESTORE WHICH HOPEFULLY WILL MAKE IT */
12160 /* POSSIBLE TO RESTORE THE SYSTEM. */
12161 /* --------------------------------------------------------------------- */
12162 char buf[64];
12163 BaseString::snprintf(buf, sizeof(buf), "table: %d fragment: %d gci: %d",
12164 tableId, fragId, SYSFILE->newestRestorableGCI);
12165
12166 ndbout_c("%s", buf);
12167 dump_replica_info();
12168
12169 progError(__LINE__, NDBD_EXIT_NO_RESTORABLE_REPLICA, buf);
12170 ndbrequire(false);
12171 return;
12172 }//if
12173
12174 /* ----------------------------------------------------------------------- */
12175 /* WE HAVE CHANGED THE NODE TO BE PRIMARY REPLICA AND THE NODES TO BE */
12176 /* BACKUP NODES. WE MUST UPDATE THIS NODES DATA STRUCTURE SINCE WE */
12177 /* WILL NOT COPY THE TABLE DATA TO OURSELF. */
12178 /* ----------------------------------------------------------------------- */
12179 updateNodeInfo(fragPtr);
12180 /* ----------------------------------------------------------------------- */
12181 /* NOW WE HAVE COLLECTED ALL THE REPLICAS WE COULD GET. WE WILL NOW */
12182 /* RESTART THE FRAGMENT REPLICAS WE HAVE FOUND IRRESPECTIVE OF IF THERE*/
12183 /* ARE ENOUGH ACCORDING TO THE DESIRED REPLICATION. */
12184 /* ----------------------------------------------------------------------- */
12185 /* WE START BY SENDING ADD_FRAGREQ FOR THOSE REPLICAS THAT NEED IT. */
12186 /* ----------------------------------------------------------------------- */
12187 CreateReplicaRecordPtr createReplicaPtr;
12188 for (createReplicaPtr.i = 0;
12189 createReplicaPtr.i < cnoOfCreateReplicas;
12190 createReplicaPtr.i++) {
12191 jam();
12192 ptrCheckGuard(createReplicaPtr, 4, createReplicaRecord);
12193 }//for
12194
12195 sendStartFragreq(signal, tabPtr, fragId);
12196
12197 /**
12198 * Don't wait for START_FRAGCONF
12199 */
12200 fragId++;
12201 if (fragId >= tabPtr.p->totalfragments) {
12202 jam();
12203 tabPtr.i++;
12204 fragId = 0;
12205 }//if
12206 signal->theData[0] = DihContinueB::ZSTART_FRAGMENT;
12207 signal->theData[1] = tabPtr.i;
12208 signal->theData[2] = fragId;
12209 sendSignal(reference(), GSN_CONTINUEB, signal, 3, JBB);
12210
12211 return;
12212 }//Dbdih::startFragmentLab()
12213
12214
12215 /*****************************************************************************/
12216 /* ********** COMPLETE RESTART MODULE *************/
12217 /*****************************************************************************/
completeRestartLab(Signal * signal)12218 void Dbdih::completeRestartLab(Signal* signal)
12219 {
12220 sendLoopMacro(START_RECREQ, sendSTART_RECREQ, RNIL);
12221 }//completeRestartLab()
12222
12223 /* ------------------------------------------------------------------------- */
12224 // SYSTEM RESTART:
12225 /* A NODE HAS COMPLETED RESTORING ALL DATABASE FRAGMENTS. */
12226 // NODE RESTART:
12227 // THE STARTING NODE HAS PREPARED ITS LOG FILES TO ENABLE EXECUTION
12228 // OF TRANSACTIONS.
12229 // Precondition:
12230 // This signal must be received by the master node.
12231 /* ------------------------------------------------------------------------- */
execSTART_RECCONF(Signal * signal)12232 void Dbdih::execSTART_RECCONF(Signal* signal)
12233 {
12234 jamEntry();
12235 Uint32 senderNodeId = signal->theData[0];
12236 Uint32 senderData = signal->theData[1];
12237
12238 if (senderData != RNIL)
12239 {
12240 /**
12241 * This is node restart
12242 */
12243 Ptr<TakeOverRecord> takeOverPtr;
12244 c_takeOverPool.getPtr(takeOverPtr, senderData);
12245 sendStartTo(signal, takeOverPtr);
12246 return;
12247 }
12248
12249 /* --------------------------------------------------------------------- */
12250 // This was the system restart case. We set the state indicating that the
12251 // node has completed restoration of all fragments.
12252 /* --------------------------------------------------------------------- */
12253 receiveLoopMacro(START_RECREQ, senderNodeId);
12254
12255 /**
12256 * Remove each node that has to TO from LCP/LQH
12257 */
12258 Uint32 i = 0;
12259 while ((i = m_to_nodes.find(i + 1)) != NdbNodeBitmask::NotFound)
12260 {
12261 jam();
12262 NodeRecordPtr nodePtr;
12263 nodePtr.i = i;
12264 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
12265 nodePtr.p->copyCompleted = 0;
12266 }
12267
12268 if (m_to_nodes.get(getOwnNodeId()))
12269 {
12270 /**
12271 * We (master) needs take-over
12272 * run this directly to avoid strange confusion
12273 */
12274 jam();
12275 c_sr_wait_to = true;
12276 }
12277
12278 if (!m_to_nodes.isclear() && c_sr_wait_to)
12279 {
12280 jam();
12281
12282 StartCopyReq* req = (StartCopyReq*)signal->getDataPtrSend();
12283 req->senderRef = reference();
12284 req->senderData = getOwnNodeId();
12285 req->flags = 0; // Note dont wait for LCP
12286
12287 i = 0;
12288 while ((i = m_to_nodes.find(i + 1)) != NdbNodeBitmask::NotFound)
12289 {
12290 jam();
12291 req->startingNodeId = i;
12292 sendSignal(calcDihBlockRef(i), GSN_START_COPYREQ, signal,
12293 StartCopyReq::SignalLength, JBB);
12294 }
12295
12296 char buf[100];
12297 infoEvent("Starting take-over of %s", m_to_nodes.getText(buf));
12298 return;
12299 }
12300
12301 signal->theData[0] = reference();
12302 m_sr_nodes.copyto(NdbNodeBitmask::Size, signal->theData+1);
12303 sendSignal(cntrlblockref, GSN_NDB_STARTCONF, signal,
12304 1 + NdbNodeBitmask::Size, JBB);
12305 }//Dbdih::execSTART_RECCONF()
12306
copyNodeLab(Signal * signal,Uint32 tableId)12307 void Dbdih::copyNodeLab(Signal* signal, Uint32 tableId)
12308 {
12309 /* ----------------------------------------------------------------------- */
12310 // This code is executed by the master to assist a node restart in receiving
12311 // the data in the master.
12312 /* ----------------------------------------------------------------------- */
12313 Uint32 TloopCount = 0;
12314
12315 if (!c_nodeStartMaster.activeState) {
12316 jam();
12317 /* --------------------------------------------------------------------- */
12318 // Obviously the node crashed in the middle of its node restart. We will
12319 // stop this process simply by returning after resetting the wait indicator.
12320 /* ---------------------------------------------------------------------- */
12321 c_nodeStartMaster.wait = ZFALSE;
12322 return;
12323 }//if
12324 TabRecordPtr tabPtr;
12325 tabPtr.i = tableId;
12326 while (tabPtr.i < ctabFileSize) {
12327 ptrAss(tabPtr, tabRecord);
12328 if (tabPtr.p->tabStatus == TabRecord::TS_ACTIVE)
12329 {
12330 /* -------------------------------------------------------------------- */
12331 // The table is defined. We will start by packing the table into pages.
12332 // The tabCopyStatus indicates to the CONTINUEB(ZPACK_TABLE_INTO_PAGES)
12333 // who called it. After packing the table into page(s) it will be sent to
12334 // the starting node by COPY_TABREQ signals. After returning from the
12335 // starting node we will return to this subroutine and continue
12336 // with the next table.
12337 /* -------------------------------------------------------------------- */
12338 if (! (tabPtr.p->tabCopyStatus == TabRecord::CS_IDLE))
12339 {
12340 jam();
12341 signal->theData[0] = DihContinueB::ZCOPY_NODE;
12342 signal->theData[1] = tabPtr.i;
12343 sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 100, 2);
12344 return;
12345 }
12346 ndbrequire(tabPtr.p->tabCopyStatus == TabRecord::CS_IDLE);
12347 tabPtr.p->tabCopyStatus = TabRecord::CS_COPY_NODE_STATE;
12348 signal->theData[0] = DihContinueB::ZPACK_TABLE_INTO_PAGES;
12349 signal->theData[1] = tabPtr.i;
12350 sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
12351 return;
12352 } else {
12353 jam();
12354 if (TloopCount > 100) {
12355 /* ------------------------------------------------------------------ */
12356 // Introduce real-time break after looping through 100 not copied tables
12357 /* ----------------------------------------------------------------- */
12358 jam();
12359 signal->theData[0] = DihContinueB::ZCOPY_NODE;
12360 signal->theData[1] = tabPtr.i + 1;
12361 sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
12362 return;
12363 } else {
12364 jam();
12365 TloopCount++;
12366 tabPtr.i++;
12367 }//if
12368 }//if
12369 }//while
12370 dihCopyCompletedLab(signal);
12371 return;
12372 }//Dbdih::copyNodeLab()
12373
tableCopyNodeLab(Signal * signal,TabRecordPtr tabPtr)12374 void Dbdih::tableCopyNodeLab(Signal* signal, TabRecordPtr tabPtr)
12375 {
12376 /* ----------------------------------------------------------------------- */
12377 /* COPY PAGES READ TO STARTING NODE. */
12378 /* ----------------------------------------------------------------------- */
12379 if (!c_nodeStartMaster.activeState) {
12380 jam();
12381 releaseTabPages(tabPtr.i);
12382 c_nodeStartMaster.wait = ZFALSE;
12383 return;
12384 }//if
12385 NodeRecordPtr copyNodePtr;
12386 PageRecordPtr pagePtr;
12387 copyNodePtr.i = c_nodeStartMaster.startNode;
12388 ptrCheckGuard(copyNodePtr, MAX_NDB_NODES, nodeRecord);
12389
12390 copyNodePtr.p->activeTabptr = tabPtr.i;
12391 pagePtr.i = tabPtr.p->pageRef[0];
12392 ptrCheckGuard(pagePtr, cpageFileSize, pageRecord);
12393
12394 signal->theData[0] = DihContinueB::ZCOPY_TABLE_NODE;
12395 signal->theData[1] = tabPtr.i;
12396 signal->theData[2] = copyNodePtr.i;
12397 signal->theData[3] = 0;
12398 signal->theData[4] = 0;
12399 signal->theData[5] = pagePtr.p->word[34];
12400 sendSignal(reference(), GSN_CONTINUEB, signal, 6, JBB);
12401 }//Dbdih::tableCopyNodeLab()
12402
12403 /* ------------------------------------------------------------------------- */
12404 // execCONTINUEB(ZCOPY_TABLE)
12405 // This routine is used to copy the table descriptions from the master to
12406 // other nodes. It is used in the system restart to copy from master to all
12407 // starting nodes.
12408 /* ------------------------------------------------------------------------- */
copyTableLab(Signal * signal,Uint32 tableId)12409 void Dbdih::copyTableLab(Signal* signal, Uint32 tableId)
12410 {
12411 TabRecordPtr tabPtr;
12412 tabPtr.i = tableId;
12413 ptrAss(tabPtr, tabRecord);
12414
12415 ndbrequire(tabPtr.p->tabCopyStatus == TabRecord::CS_IDLE);
12416 tabPtr.p->tabCopyStatus = TabRecord::CS_SR_PHASE2_READ_TABLE;
12417 signal->theData[0] = DihContinueB::ZPACK_TABLE_INTO_PAGES;
12418 signal->theData[1] = tabPtr.i;
12419 sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
12420 return;
12421 }//Dbdih::copyTableLab()
12422
12423 /* ------------------------------------------------------------------------- */
12424 // execCONTINUEB(ZSR_PHASE2_READ_TABLE)
12425 /* ------------------------------------------------------------------------- */
srPhase2ReadTableLab(Signal * signal,TabRecordPtr tabPtr)12426 void Dbdih::srPhase2ReadTableLab(Signal* signal, TabRecordPtr tabPtr)
12427 {
12428 /* ----------------------------------------------------------------------- */
12429 // We set the sendCOPY_TABREQState to ZACTIVE for all nodes since it is a long
12430 // process to send off all table descriptions. Thus we ensure that we do
12431 // not encounter race conditions where one node is completed before the
12432 // sending process is completed. This could lead to that we start off the
12433 // system before we actually finished all copying of table descriptions
12434 // and could lead to strange errors.
12435 /* ----------------------------------------------------------------------- */
12436
12437 //sendLoopMacro(COPY_TABREQ, nullRoutine);
12438
12439 breakCopyTableLab(signal, tabPtr, cfirstAliveNode);
12440 return;
12441 }//Dbdih::srPhase2ReadTableLab()
12442
12443 /* ------------------------------------------------------------------------- */
12444 /* COPY PAGES READ TO ALL NODES. */
12445 /* ------------------------------------------------------------------------- */
breakCopyTableLab(Signal * signal,TabRecordPtr tabPtr,Uint32 nodeId)12446 void Dbdih::breakCopyTableLab(Signal* signal, TabRecordPtr tabPtr, Uint32 nodeId)
12447 {
12448 NodeRecordPtr nodePtr;
12449 nodePtr.i = nodeId;
12450 while (nodePtr.i != RNIL) {
12451 jam();
12452 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
12453 if (nodePtr.i == getOwnNodeId()){
12454 jam();
12455 /* ------------------------------------------------------------------- */
12456 /* NOT NECESSARY TO COPY TO MY OWN NODE. I ALREADY HAVE THE PAGES. */
12457 /* I DO HOWEVER NEED TO STORE THE TABLE DESCRIPTION ONTO DISK. */
12458 /* ------------------------------------------------------------------- */
12459 /* IF WE ARE MASTER WE ONLY NEED TO SAVE THE TABLE ON DISK. WE ALREADY */
12460 /* HAVE THE TABLE DESCRIPTION IN THE DATA STRUCTURES. */
12461 // AFTER COMPLETING THE WRITE TO DISK THE MASTER WILL ALSO SEND
12462 // COPY_TABCONF AS ALL THE OTHER NODES.
12463 /* ------------------------------------------------------------------- */
12464 c_COPY_TABREQ_Counter.setWaitingFor(nodePtr.i);
12465 tabPtr.p->tabUpdateState = TabRecord::US_COPY_TAB_REQ;
12466 signal->theData[0] = DihContinueB::ZTABLE_UPDATE;
12467 signal->theData[1] = tabPtr.i;
12468 sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
12469 nodePtr.i = nodePtr.p->nextNode;
12470 } else {
12471 PageRecordPtr pagePtr;
12472 /* -------------------------------------------------------------------- */
12473 // RATHER THAN SENDING ALL COPY_TABREQ IN PARALLEL WE WILL SERIALISE THIS
12474 // ACTIVITY AND WILL THUS CALL breakCopyTableLab AGAIN WHEN COMPLETED THE
12475 // SENDING OF COPY_TABREQ'S.
12476 /* -------------------------------------------------------------------- */
12477 jam();
12478 tabPtr.p->tabCopyStatus = TabRecord::CS_SR_PHASE3_COPY_TABLE;
12479 pagePtr.i = tabPtr.p->pageRef[0];
12480 ptrCheckGuard(pagePtr, cpageFileSize, pageRecord);
12481 signal->theData[0] = DihContinueB::ZCOPY_TABLE_NODE;
12482 signal->theData[1] = tabPtr.i;
12483 signal->theData[2] = nodePtr.i;
12484 signal->theData[3] = 0;
12485 signal->theData[4] = 0;
12486 signal->theData[5] = pagePtr.p->word[34];
12487 sendSignal(reference(), GSN_CONTINUEB, signal, 6, JBB);
12488 return;
12489 }//if
12490 }//while
12491 /* ----------------------------------------------------------------------- */
12492 /* WE HAVE NOW SENT THE TABLE PAGES TO ALL NODES. EXIT AND WAIT FOR ALL */
12493 /* REPLIES. */
12494 /* ----------------------------------------------------------------------- */
12495 return;
12496 }//Dbdih::breakCopyTableLab()
12497
12498 /* ------------------------------------------------------------------------- */
12499 // execCONTINUEB(ZCOPY_TABLE_NODE)
12500 /* ------------------------------------------------------------------------- */
copyTableNode(Signal * signal,CopyTableNode * ctn,NodeRecordPtr nodePtr)12501 void Dbdih::copyTableNode(Signal* signal,
12502 CopyTableNode* ctn, NodeRecordPtr nodePtr)
12503 {
12504 if (getNodeState().startLevel >= NodeState::SL_STARTED){
12505 /* --------------------------------------------------------------------- */
12506 // We are in the process of performing a node restart and are copying a
12507 // table description to a starting node. We will check that no nodes have
12508 // crashed in this process.
12509 /* --------------------------------------------------------------------- */
12510 if (!c_nodeStartMaster.activeState) {
12511 jam();
12512 /** ------------------------------------------------------------------
12513 * The starting node crashed. We will release table pages and stop this
12514 * copy process and allow new node restarts to start.
12515 * ------------------------------------------------------------------ */
12516 releaseTabPages(ctn->ctnTabPtr.i);
12517 c_nodeStartMaster.wait = ZFALSE;
12518 return;
12519 }//if
12520 }//if
12521 ndbrequire(ctn->pageIndex < NDB_ARRAY_SIZE(ctn->ctnTabPtr.p->pageRef));
12522 ctn->ctnPageptr.i = ctn->ctnTabPtr.p->pageRef[ctn->pageIndex];
12523 ptrCheckGuard(ctn->ctnPageptr, cpageFileSize, pageRecord);
12524 /**
12525 * If first page & firstWord reqinfo = 1 (first signal)
12526 */
12527 Uint32 reqinfo = (ctn->pageIndex == 0) && (ctn->wordIndex == 0);
12528 if(reqinfo == 1){
12529 c_COPY_TABREQ_Counter.setWaitingFor(nodePtr.i);
12530 }
12531
12532 for (Uint32 i = 0; i < 16; i++) {
12533 jam();
12534 sendCopyTable(signal, ctn, calcDihBlockRef(nodePtr.i), reqinfo);
12535 reqinfo = 0;
12536 if (ctn->noOfWords <= 16) {
12537 jam();
12538 switch (ctn->ctnTabPtr.p->tabCopyStatus) {
12539 case TabRecord::CS_SR_PHASE3_COPY_TABLE:
12540 /* ------------------------------------------------------------------ */
12541 // We have copied the table description to this node.
12542 // We will now proceed
12543 // with sending the table description to the next node in the node list.
12544 /* ------------------------------------------------------------------ */
12545 jam();
12546 ctn->ctnTabPtr.p->tabCopyStatus = TabRecord::CS_IDLE;
12547 breakCopyTableLab(signal, ctn->ctnTabPtr, nodePtr.p->nextNode);
12548 return;
12549 break;
12550 case TabRecord::CS_COPY_NODE_STATE:
12551 jam();
12552 ctn->ctnTabPtr.p->tabCopyStatus = TabRecord::CS_IDLE;
12553 return;
12554 break;
12555 default:
12556 ndbrequire(false);
12557 break;
12558 }//switch
12559 } else {
12560 jam();
12561 ctn->wordIndex += 16;
12562 if (ctn->wordIndex == 2048) {
12563 jam();
12564 ctn->wordIndex = 0;
12565 ctn->pageIndex++;
12566 ndbrequire(ctn->pageIndex < NDB_ARRAY_SIZE(ctn->ctnTabPtr.p->pageRef));
12567 ctn->ctnPageptr.i = ctn->ctnTabPtr.p->pageRef[ctn->pageIndex];
12568 ptrCheckGuard(ctn->ctnPageptr, cpageFileSize, pageRecord);
12569 }//if
12570 ctn->noOfWords -= 16;
12571 }//if
12572 }//for
12573 signal->theData[0] = DihContinueB::ZCOPY_TABLE_NODE;
12574 signal->theData[1] = ctn->ctnTabPtr.i;
12575 signal->theData[2] = nodePtr.i;
12576 signal->theData[3] = ctn->pageIndex;
12577 signal->theData[4] = ctn->wordIndex;
12578 signal->theData[5] = ctn->noOfWords;
12579 sendSignal(reference(), GSN_CONTINUEB, signal, 6, JBB);
12580 }//Dbdih::copyTableNodeLab()
12581
sendCopyTable(Signal * signal,CopyTableNode * ctn,BlockReference ref,Uint32 reqinfo)12582 void Dbdih::sendCopyTable(Signal* signal, CopyTableNode* ctn,
12583 BlockReference ref, Uint32 reqinfo)
12584 {
12585 signal->theData[0] = reference();
12586 signal->theData[1] = reqinfo;
12587 signal->theData[2] = ctn->ctnTabPtr.i;
12588 signal->theData[3] = ctn->ctnTabPtr.p->schemaVersion;
12589 signal->theData[4] = ctn->noOfWords;
12590 ndbrequire(ctn->wordIndex + 15 < 2048);
12591 MEMCOPY_NO_WORDS(&signal->theData[5], &ctn->ctnPageptr.p->word[ctn->wordIndex], 16);
12592 sendSignal(ref, GSN_COPY_TABREQ, signal, 21, JBB);
12593 }//Dbdih::sendCopyTable()
12594
execCOPY_TABCONF(Signal * signal)12595 void Dbdih::execCOPY_TABCONF(Signal* signal)
12596 {
12597 NodeRecordPtr nodePtr;
12598 jamEntry();
12599 nodePtr.i = signal->theData[0];
12600 Uint32 tableId = signal->theData[1];
12601 if (getNodeState().startLevel >= NodeState::SL_STARTED){
12602 /* --------------------------------------------------------------------- */
12603 // We are in the process of performing a node restart. Continue by copying
12604 // the next table to the starting node.
12605 /* --------------------------------------------------------------------- */
12606 jam();
12607 NodeRecordPtr nodePtr;
12608 nodePtr.i = signal->theData[0];
12609 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
12610 c_COPY_TABREQ_Counter.clearWaitingFor(nodePtr.i);
12611
12612 releaseTabPages(tableId);
12613 signal->theData[0] = DihContinueB::ZCOPY_NODE;
12614 signal->theData[1] = tableId + 1;
12615 sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
12616 return;
12617 } else {
12618 /* --------------------------------------------------------------------- */
12619 // We are in the process of performing a system restart. Check if all nodes
12620 // have saved the new table description to file and then continue with the
12621 // next table.
12622 /* --------------------------------------------------------------------- */
12623 receiveLoopMacro(COPY_TABREQ, nodePtr.i);
12624 /* --------------------------------------------------------------------- */
12625 /* WE HAVE NOW COPIED TO ALL NODES. WE HAVE NOW COMPLETED RESTORING */
12626 /* THIS TABLE. CONTINUE WITH THE NEXT TABLE. */
12627 /* WE NEED TO RELEASE THE PAGES IN THE TABLE IN THIS NODE HERE. */
12628 /* WE ALSO NEED TO CLOSE THE TABLE FILE. */
12629 /* --------------------------------------------------------------------- */
12630 releaseTabPages(tableId);
12631
12632 TabRecordPtr tabPtr;
12633 tabPtr.i = tableId;
12634 ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
12635
12636 ConnectRecordPtr connectPtr;
12637 connectPtr.i = tabPtr.p->connectrec;
12638 ptrCheckGuard(connectPtr, cconnectFileSize, connectRecord);
12639
12640 connectPtr.p->m_alter.m_totalfragments = tabPtr.p->totalfragments;
12641 sendAddFragreq(signal, connectPtr, tabPtr, 0);
12642 return;
12643 }//if
12644 }//Dbdih::execCOPY_TABCONF()
12645
12646 /*
12647 3.13 L O C A L C H E C K P O I N T (M A S T E R)
12648 ****************************************************
12649 */
12650 /*****************************************************************************/
12651 /* ********** LOCAL-CHECK-POINT-HANDLING MODULE *************/
12652 /*****************************************************************************/
12653 /* ------------------------------------------------------------------------- */
12654 /* IT IS TIME TO CHECK IF IT IS TIME TO START A LOCAL CHECKPOINT. */
12655 /* WE WILL EITHER START AFTER 1 MILLION WORDS HAVE ARRIVED OR WE WILL */
12656 /* EXECUTE AFTER ABOUT 16 MINUTES HAVE PASSED BY. */
12657 /* ------------------------------------------------------------------------- */
checkTcCounterLab(Signal * signal)12658 void Dbdih::checkTcCounterLab(Signal* signal)
12659 {
12660 CRASH_INSERTION(7009);
12661 if (c_lcpState.lcpStatus != LCP_STATUS_IDLE) {
12662 g_eventLogger->error("lcpStatus = %u"
12663 "lcpStatusUpdatedPlace = %d",
12664 (Uint32) c_lcpState.lcpStatus,
12665 c_lcpState.lcpStatusUpdatedPlace);
12666 ndbrequire(false);
12667 return;
12668 }//if
12669 add_lcp_counter(&c_lcpState.ctimer, 32);
12670 if ((c_nodeStartMaster.blockLcp == true) ||
12671 (c_lcpState.lcpStopGcp >= c_newest_restorable_gci)) {
12672 jam();
12673 /* --------------------------------------------------------------------- */
12674 // No reason to start juggling the states and checking for start of LCP if
12675 // we are blocked to start an LCP anyway.
12676 // We also block LCP start if we have not completed one global checkpoints
12677 // before starting another local checkpoint.
12678 /* --------------------------------------------------------------------- */
12679 signal->theData[0] = DihContinueB::ZCHECK_TC_COUNTER;
12680 signal->theData[1] = __LINE__;
12681 sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 1 * 100, 2);
12682 return;
12683 }//if
12684 c_lcpState.setLcpStatus(LCP_TCGET, __LINE__);
12685
12686 c_lcpState.ctcCounter = c_lcpState.ctimer;
12687 sendLoopMacro(TCGETOPSIZEREQ, sendTCGETOPSIZEREQ, RNIL);
12688 }//Dbdih::checkTcCounterLab()
12689
checkLcpStart(Signal * signal,Uint32 lineNo)12690 void Dbdih::checkLcpStart(Signal* signal, Uint32 lineNo)
12691 {
12692 /* ----------------------------------------------------------------------- */
12693 // Verify that we are not attempting to start another instance of the LCP
12694 // when it is not alright to do so.
12695 /* ----------------------------------------------------------------------- */
12696 ndbrequire(c_lcpState.lcpStart == ZIDLE);
12697 c_lcpState.lcpStart = ZACTIVE;
12698 signal->theData[0] = DihContinueB::ZCHECK_TC_COUNTER;
12699 signal->theData[1] = lineNo;
12700 sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 1000, 2);
12701 }//Dbdih::checkLcpStart()
12702
12703 /* ------------------------------------------------------------------------- */
12704 /*TCGETOPSIZECONF HOW MUCH OPERATION SIZE HAVE BEEN EXECUTED BY TC */
12705 /* ------------------------------------------------------------------------- */
execTCGETOPSIZECONF(Signal * signal)12706 void Dbdih::execTCGETOPSIZECONF(Signal* signal)
12707 {
12708 jamEntry();
12709 Uint32 senderNodeId = signal->theData[0];
12710 add_lcp_counter(&c_lcpState.ctcCounter, signal->theData[1]);
12711
12712 receiveLoopMacro(TCGETOPSIZEREQ, senderNodeId);
12713
12714 ndbrequire(c_lcpState.lcpStatus == LCP_TCGET);
12715 ndbrequire(c_lcpState.lcpStart == ZACTIVE);
12716 /* ----------------------------------------------------------------------- */
12717 // We are not actively starting another LCP, still we receive this signal.
12718 // This is not ok.
12719 /* ---------------------------------------------------------------------- */
12720 /* ALL TC'S HAVE RESPONDED NOW. NOW WE WILL CHECK IF ENOUGH OPERATIONS */
12721 /* HAVE EXECUTED TO ENABLE US TO START A NEW LOCAL CHECKPOINT. */
12722 /* WHILE COPYING DICTIONARY AND DISTRIBUTION INFO TO A STARTING NODE */
12723 /* WE WILL ALSO NOT ALLOW THE LOCAL CHECKPOINT TO PROCEED. */
12724 /*----------------------------------------------------------------------- */
12725 if (c_lcpState.immediateLcpStart == false)
12726 {
12727 Uint64 cnt = Uint64(c_lcpState.ctcCounter);
12728 Uint64 limit = Uint64(1) << c_lcpState.clcpDelay;
12729 bool dostart = cnt >= limit;
12730 if (dostart == false || c_nodeStartMaster.blockLcp == true)
12731 {
12732 jam();
12733 c_lcpState.setLcpStatus(LCP_STATUS_IDLE, __LINE__);
12734
12735 signal->theData[0] = DihContinueB::ZCHECK_TC_COUNTER;
12736 signal->theData[1] = __LINE__;
12737 sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 1 * 100, 2);
12738 return;
12739 }//if
12740 }//if
12741 c_lcpState.lcpStart = ZIDLE;
12742 c_lcpState.immediateLcpStart = false;
12743 /* -----------------------------------------------------------------------
12744 * Now the initial lcp is started,
12745 * we can reset the delay to its orginal value
12746 * --------------------------------------------------------------------- */
12747 CRASH_INSERTION(7010);
12748 /* ----------------------------------------------------------------------- */
12749 /* IF MORE THAN 1 MILLION WORDS PASSED THROUGH THE TC'S THEN WE WILL */
12750 /* START A NEW LOCAL CHECKPOINT. CLEAR CTIMER. START CHECKPOINT */
12751 /* ACTIVITY BY CALCULATING THE KEEP GLOBAL CHECKPOINT. */
12752 // Also remember the current global checkpoint to ensure that we run at least
12753 // one global checkpoints between each local checkpoint that we start up.
12754 /* ----------------------------------------------------------------------- */
12755 c_lcpState.ctimer = 0;
12756 c_lcpState.keepGci = (Uint32)(m_micro_gcp.m_old_gci >> 32);
12757 c_lcpState.oldestRestorableGci = SYSFILE->oldestRestorableGCI;
12758
12759 CRASH_INSERTION(7014);
12760 c_lcpState.setLcpStatus(LCP_TC_CLOPSIZE, __LINE__);
12761 sendLoopMacro(TC_CLOPSIZEREQ, sendTC_CLOPSIZEREQ, RNIL);
12762 }
12763
execTC_CLOPSIZECONF(Signal * signal)12764 void Dbdih::execTC_CLOPSIZECONF(Signal* signal)
12765 {
12766 jamEntry();
12767 Uint32 senderNodeId = signal->theData[0];
12768 receiveLoopMacro(TC_CLOPSIZEREQ, senderNodeId);
12769
12770 ndbrequire(c_lcpState.lcpStatus == LCP_TC_CLOPSIZE);
12771
12772 /* ----------------------------------------------------------------------- */
12773 /* UPDATE THE NEW LATEST LOCAL CHECKPOINT ID. */
12774 /* ----------------------------------------------------------------------- */
12775 cnoOfActiveTables = 0;
12776 c_lcpState.setLcpStatus(LCP_CALCULATE_KEEP_GCI, __LINE__);
12777 ndbrequire(((int)c_lcpState.oldestRestorableGci) > 0);
12778
12779 if (ERROR_INSERTED(7011)) {
12780 signal->theData[0] = NDB_LE_LCPStoppedInCalcKeepGci;
12781 signal->theData[1] = 0;
12782 sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB);
12783 return;
12784 }//if
12785
12786 Mutex mutex(signal, c_mutexMgr, c_fragmentInfoMutex_lcp);
12787 Callback c = { safe_cast(&Dbdih::lcpFragmentMutex_locked), 0 };
12788 ndbrequire(mutex.trylock(c, false));
12789 }
12790
12791 void
lcpFragmentMutex_locked(Signal * signal,Uint32 senderData,Uint32 retVal)12792 Dbdih::lcpFragmentMutex_locked(Signal* signal,
12793 Uint32 senderData,
12794 Uint32 retVal)
12795 {
12796 jamEntry();
12797
12798 if (retVal == UtilLockRef::LockAlreadyHeld)
12799 {
12800 jam();
12801 Mutex mutex(signal, c_mutexMgr, c_fragmentInfoMutex_lcp);
12802 mutex.release();
12803
12804 if (senderData == 0)
12805 {
12806 jam();
12807 infoEvent("Local checkpoint blocked waiting for node-restart");
12808 }
12809
12810 // 2* is as parameter is in seconds, and we sendSignalWithDelay 500ms
12811 if (senderData >= 2*c_lcpState.m_lcp_trylock_timeout)
12812 {
12813 jam();
12814 Callback c = { safe_cast(&Dbdih::lcpFragmentMutex_locked), 0 };
12815 ndbrequire(mutex.lock(c, false));
12816 return;
12817 }
12818 signal->theData[0] = DihContinueB::ZLCP_TRY_LOCK;
12819 signal->theData[1] = senderData + 1;
12820 sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 500, 2);
12821 return;
12822 }
12823
12824 ndbrequire(retVal == 0);
12825
12826 c_lcpState.m_start_time = c_current_time;
12827
12828 setLcpActiveStatusStart(signal);
12829
12830 c_lcpState.keepGci = m_micro_gcp.m_old_gci >> 32;
12831 c_lcpState.oldestRestorableGci = SYSFILE->oldestRestorableGCI;
12832
12833 signal->theData[0] = DihContinueB::ZCALCULATE_KEEP_GCI;
12834 signal->theData[1] = 0; /* TABLE ID = 0 */
12835 signal->theData[2] = 0; /* FRAGMENT ID = 0 */
12836 sendSignal(reference(), GSN_CONTINUEB, signal, 3, JBB);
12837 return;
12838 }//Dbdih::execTCGETOPSIZECONF()
12839
12840 /* ------------------------------------------------------------------------- */
12841 /* WE NEED TO CALCULATE THE OLDEST GLOBAL CHECKPOINT THAT WILL BE */
12842 /* COMPLETELY RESTORABLE AFTER EXECUTING THIS LOCAL CHECKPOINT. */
12843 /* ------------------------------------------------------------------------- */
calculateKeepGciLab(Signal * signal,Uint32 tableId,Uint32 fragId)12844 void Dbdih::calculateKeepGciLab(Signal* signal, Uint32 tableId, Uint32 fragId)
12845 {
12846 TabRecordPtr tabPtr;
12847 Uint32 TloopCount = 1;
12848 tabPtr.i = tableId;
12849 do {
12850 if (tabPtr.i >= ctabFileSize) {
12851 if (cnoOfActiveTables > 0) {
12852 jam();
12853 signal->theData[0] = DihContinueB::ZSTORE_NEW_LCP_ID;
12854 sendSignal(reference(), GSN_CONTINUEB, signal, 1, JBB);
12855 return;
12856 } else {
12857 jam();
12858 /* ------------------------------------------------------------------ */
12859 /* THERE ARE NO TABLES TO CHECKPOINT. WE STOP THE CHECKPOINT ALREADY */
12860 /* HERE TO AVOID STRANGE PROBLEMS LATER. */
12861 /* ------------------------------------------------------------------ */
12862 c_lcpState.setLcpStatus(LCP_STATUS_IDLE, __LINE__);
12863 checkLcpStart(signal, __LINE__);
12864 return;
12865 }//if
12866 }//if
12867 ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
12868 if (tabPtr.p->tabStatus != TabRecord::TS_ACTIVE ||
12869 tabPtr.p->tabStorage != TabRecord::ST_NORMAL) {
12870 if (TloopCount > 100) {
12871 jam();
12872 signal->theData[0] = DihContinueB::ZCALCULATE_KEEP_GCI;
12873 signal->theData[1] = tabPtr.i + 1;
12874 signal->theData[2] = 0;
12875 sendSignal(reference(), GSN_CONTINUEB, signal, 3, JBB);
12876 return;
12877 } else {
12878 jam();
12879 TloopCount++;
12880 tabPtr.i++;
12881 }//if
12882 } else {
12883 jam();
12884 TloopCount = 0;
12885 }//if
12886 } while (TloopCount != 0);
12887 cnoOfActiveTables++;
12888 FragmentstorePtr fragPtr;
12889 getFragstore(tabPtr.p, fragId, fragPtr);
12890 checkKeepGci(tabPtr, fragId, fragPtr.p, fragPtr.p->storedReplicas);
12891 checkKeepGci(tabPtr, fragId, fragPtr.p, fragPtr.p->oldStoredReplicas);
12892 fragId++;
12893 if (fragId >= tabPtr.p->totalfragments) {
12894 jam();
12895 tabPtr.i++;
12896 fragId = 0;
12897 }//if
12898 signal->theData[0] = DihContinueB::ZCALCULATE_KEEP_GCI;
12899 signal->theData[1] = tabPtr.i;
12900 signal->theData[2] = fragId;
12901 sendSignal(reference(), GSN_CONTINUEB, signal, 3, JBB);
12902 return;
12903 }//Dbdih::calculateKeepGciLab()
12904
12905 /* ------------------------------------------------------------------------- */
12906 /* WE NEED TO STORE ON DISK THE FACT THAT WE ARE STARTING THIS LOCAL */
12907 /* CHECKPOINT ROUND. THIS WILL INVALIDATE ALL THE LOCAL CHECKPOINTS */
12908 /* THAT WILL EVENTUALLY BE OVERWRITTEN AS PART OF THIS LOCAL CHECKPOINT*/
12909 /* ------------------------------------------------------------------------- */
storeNewLcpIdLab(Signal * signal)12910 void Dbdih::storeNewLcpIdLab(Signal* signal)
12911 {
12912 signal->theData[0] = NDB_LE_LocalCheckpointStarted; //Event type
12913 signal->theData[1] = SYSFILE->latestLCP_ID + 1;
12914 signal->theData[2] = c_lcpState.keepGci;
12915 signal->theData[3] = c_lcpState.oldestRestorableGci;
12916 sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 4, JBB);
12917
12918 /***************************************************************************/
12919 // Report the event that a local checkpoint has started.
12920 /***************************************************************************/
12921
12922 signal->setTrace(TestOrd::TraceLocalCheckpoint);
12923
12924 CRASH_INSERTION(7013);
12925 SYSFILE->keepGCI = c_lcpState.keepGci;
12926 //Uint32 lcpId = SYSFILE->latestLCP_ID;
12927 SYSFILE->latestLCP_ID++;
12928 SYSFILE->oldestRestorableGCI = c_lcpState.oldestRestorableGci;
12929
12930 const Uint32 oldestRestorableGCI = SYSFILE->oldestRestorableGCI;
12931 //const Uint32 newestRestorableGCI = SYSFILE->newestRestorableGCI;
12932 //ndbrequire(newestRestorableGCI >= oldestRestorableGCI);
12933
12934 Int32 val = oldestRestorableGCI;
12935 ndbrequire(val > 0);
12936
12937 /* ----------------------------------------------------------------------- */
12938 /* SET BIT INDICATING THAT LOCAL CHECKPOINT IS ONGOING. THIS IS CLEARED */
12939 /* AT THE END OF A LOCAL CHECKPOINT. */
12940 /* ----------------------------------------------------------------------- */
12941 SYSFILE->setLCPOngoing(SYSFILE->systemRestartBits);
12942 /* ---------------------------------------------------------------------- */
12943 /* CHECK IF ANY NODE MUST BE TAKEN OUT OF SERVICE AND REFILLED WITH */
12944 /* NEW FRESH DATA FROM AN ACTIVE NODE. */
12945 /* ---------------------------------------------------------------------- */
12946
12947 /**
12948 * This used be done in setLcpActiveStatusStart
12949 * but this function has been move "up" in the flow
12950 * to just before calcKeepGci
12951 */
12952 setNodeRestartInfoBits(signal);
12953
12954 c_lcpState.setLcpStatus(LCP_COPY_GCI, __LINE__);
12955 //#ifdef VM_TRACE
12956 // infoEvent("LocalCheckpoint %d started", SYSFILE->latestLCP_ID);
12957 // signal->theData[0] = 7012;
12958 // execDUMP_STATE_ORD(signal);
12959 //#endif
12960
12961 copyGciLab(signal, CopyGCIReq::LOCAL_CHECKPOINT);
12962 }//Dbdih::storeNewLcpIdLab()
12963
startLcpRoundLab(Signal * signal)12964 void Dbdih::startLcpRoundLab(Signal* signal) {
12965 jam();
12966
12967 CRASH_INSERTION(7218);
12968
12969 Mutex mutex(signal, c_mutexMgr, c_startLcpMutexHandle);
12970 Callback c = { safe_cast(&Dbdih::startLcpMutex_locked), 0 };
12971 ndbrequire(mutex.lock(c));
12972 }
12973
12974 void
startLcpMutex_locked(Signal * signal,Uint32 senderData,Uint32 retVal)12975 Dbdih::startLcpMutex_locked(Signal* signal, Uint32 senderData, Uint32 retVal){
12976 jamEntry();
12977 ndbrequire(retVal == 0);
12978
12979 StartLcpReq* req = (StartLcpReq*)signal->getDataPtrSend();
12980 req->senderRef = reference();
12981 req->lcpId = SYSFILE->latestLCP_ID;
12982 req->participatingLQH = c_lcpState.m_participatingLQH;
12983 req->participatingDIH = c_lcpState.m_participatingDIH;
12984 sendLoopMacro(START_LCP_REQ, sendSTART_LCP_REQ, RNIL);
12985 }
12986
12987 void
sendSTART_LCP_REQ(Signal * signal,Uint32 nodeId,Uint32 extra)12988 Dbdih::sendSTART_LCP_REQ(Signal* signal, Uint32 nodeId, Uint32 extra){
12989 BlockReference ref = calcDihBlockRef(nodeId);
12990 if (ERROR_INSERTED(7021) && nodeId == getOwnNodeId())
12991 {
12992 sendSignalWithDelay(ref, GSN_START_LCP_REQ, signal, 500,
12993 StartLcpReq::SignalLength);
12994 return;
12995 }
12996 else if (ERROR_INSERTED(7021) && ((rand() % 10) > 4))
12997 {
12998 infoEvent("Dont sent STARTLCPREQ to %u", nodeId);
12999 return;
13000 }
13001 sendSignal(ref, GSN_START_LCP_REQ, signal, StartLcpReq::SignalLength, JBB);
13002 }
13003
13004 void
execSTART_LCP_CONF(Signal * signal)13005 Dbdih::execSTART_LCP_CONF(Signal* signal){
13006 StartLcpConf * conf = (StartLcpConf*)signal->getDataPtr();
13007
13008 Uint32 nodeId = refToNode(conf->senderRef);
13009 receiveLoopMacro(START_LCP_REQ, nodeId);
13010
13011 Mutex mutex(signal, c_mutexMgr, c_startLcpMutexHandle);
13012 Callback c = { safe_cast(&Dbdih::startLcpMutex_unlocked), 0 };
13013 mutex.unlock(c);
13014 }
13015
13016 void
startLcpMutex_unlocked(Signal * signal,Uint32 data,Uint32 retVal)13017 Dbdih::startLcpMutex_unlocked(Signal* signal, Uint32 data, Uint32 retVal){
13018 jamEntry();
13019 ndbrequire(retVal == 0);
13020
13021 Mutex mutex(signal, c_mutexMgr, c_startLcpMutexHandle);
13022 mutex.release();
13023
13024 /* ----------------------------------------------------------------------- */
13025 /* NOW PROCEED BY STARTING THE LOCAL CHECKPOINT IN EACH LQH. */
13026 /* ----------------------------------------------------------------------- */
13027 c_lcpState.m_LAST_LCP_FRAG_ORD = c_lcpState.m_participatingLQH;
13028
13029 CRASH_INSERTION(7015);
13030 c_lcpState.setLcpStatus(LCP_START_LCP_ROUND, __LINE__);
13031 startLcpRoundLoopLab(signal, 0, 0);
13032 }
13033
13034 void
master_lcp_fragmentMutex_locked(Signal * signal,Uint32 failedNodePtrI,Uint32 retVal)13035 Dbdih::master_lcp_fragmentMutex_locked(Signal* signal,
13036 Uint32 failedNodePtrI, Uint32 retVal)
13037 {
13038 jamEntry();
13039 ndbrequire(retVal == 0);
13040
13041 signal->theData[0] = NDB_LE_LCP_TakeoverCompleted;
13042 signal->theData[1] = c_lcpMasterTakeOverState.state;
13043 sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB);
13044
13045 signal->theData[0] = 7012;
13046 execDUMP_STATE_ORD(signal);
13047
13048 c_lcpMasterTakeOverState.set(LMTOS_IDLE, __LINE__);
13049
13050 checkLocalNodefailComplete(signal, failedNodePtrI, NF_LCP_TAKE_OVER);
13051
13052 startLcpRoundLoopLab(signal, 0, 0);
13053 }
13054
startLcpRoundLoopLab(Signal * signal,Uint32 startTableId,Uint32 startFragId)13055 void Dbdih::startLcpRoundLoopLab(Signal* signal,
13056 Uint32 startTableId, Uint32 startFragId)
13057 {
13058 NodeRecordPtr nodePtr;
13059 for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
13060 ptrAss(nodePtr, nodeRecord);
13061 if (nodePtr.p->nodeStatus == NodeRecord::ALIVE) {
13062 ndbrequire(nodePtr.p->noOfStartedChkpt == 0);
13063 ndbrequire(nodePtr.p->noOfQueuedChkpt == 0);
13064 }//if
13065 }//if
13066 c_lcpState.currentFragment.tableId = startTableId;
13067 c_lcpState.currentFragment.fragmentId = startFragId;
13068 startNextChkpt(signal);
13069 }//Dbdih::startLcpRoundLoopLab()
13070
startNextChkpt(Signal * signal)13071 void Dbdih::startNextChkpt(Signal* signal)
13072 {
13073 Uint32 lcpId = SYSFILE->latestLCP_ID;
13074
13075 NdbNodeBitmask busyNodes;
13076 busyNodes.clear();
13077 const Uint32 lcpNodes = c_lcpState.m_participatingLQH.count();
13078
13079 bool save = true;
13080 LcpState::CurrentFragment curr = c_lcpState.currentFragment;
13081
13082 while (curr.tableId < ctabFileSize) {
13083 TabRecordPtr tabPtr;
13084 tabPtr.i = curr.tableId;
13085 ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
13086 if ((tabPtr.p->tabStatus != TabRecord::TS_ACTIVE) ||
13087 (tabPtr.p->tabLcpStatus != TabRecord::TLS_ACTIVE)) {
13088 curr.tableId++;
13089 curr.fragmentId = 0;
13090 continue;
13091 }//if
13092
13093 FragmentstorePtr fragPtr;
13094 getFragstore(tabPtr.p, curr.fragmentId, fragPtr);
13095
13096 ReplicaRecordPtr replicaPtr;
13097 for(replicaPtr.i = fragPtr.p->storedReplicas;
13098 replicaPtr.i != RNIL ;
13099 replicaPtr.i = replicaPtr.p->nextReplica){
13100
13101 jam();
13102 ptrCheckGuard(replicaPtr, creplicaFileSize, replicaRecord);
13103
13104 NodeRecordPtr nodePtr;
13105 nodePtr.i = replicaPtr.p->procNode;
13106 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
13107
13108 if (c_lcpState.m_participatingLQH.get(nodePtr.i))
13109 {
13110 if (replicaPtr.p->lcpOngoingFlag &&
13111 replicaPtr.p->lcpIdStarted < lcpId)
13112 {
13113 jam();
13114 //-------------------------------------------------------------------
13115 // We have found a replica on a node that performs local checkpoint
13116 // that is alive and that have not yet been started.
13117 //-------------------------------------------------------------------
13118
13119 if (nodePtr.p->noOfStartedChkpt < 2)
13120 {
13121 jam();
13122 /**
13123 * Send LCP_FRAG_ORD to LQH
13124 */
13125
13126 /**
13127 * Mark the replica so with lcpIdStarted == true
13128 */
13129 replicaPtr.p->lcpIdStarted = lcpId;
13130
13131 Uint32 i = nodePtr.p->noOfStartedChkpt;
13132 nodePtr.p->startedChkpt[i].tableId = tabPtr.i;
13133 nodePtr.p->startedChkpt[i].fragId = curr.fragmentId;
13134 nodePtr.p->startedChkpt[i].replicaPtr = replicaPtr.i;
13135 nodePtr.p->noOfStartedChkpt = i + 1;
13136
13137 sendLCP_FRAG_ORD(signal, nodePtr.p->startedChkpt[i]);
13138 }
13139 else if (nodePtr.p->noOfQueuedChkpt < 2)
13140 {
13141 jam();
13142 /**
13143 * Put LCP_FRAG_ORD "in queue"
13144 */
13145
13146 /**
13147 * Mark the replica so with lcpIdStarted == true
13148 */
13149 replicaPtr.p->lcpIdStarted = lcpId;
13150
13151 Uint32 i = nodePtr.p->noOfQueuedChkpt;
13152 nodePtr.p->queuedChkpt[i].tableId = tabPtr.i;
13153 nodePtr.p->queuedChkpt[i].fragId = curr.fragmentId;
13154 nodePtr.p->queuedChkpt[i].replicaPtr = replicaPtr.i;
13155 nodePtr.p->noOfQueuedChkpt = i + 1;
13156 }
13157 else
13158 {
13159 jam();
13160
13161 if(save)
13162 {
13163 /**
13164 * Stop increasing value on first that was "full"
13165 */
13166 c_lcpState.currentFragment = curr;
13167 save = false;
13168 }
13169
13170 busyNodes.set(nodePtr.i);
13171 if(busyNodes.count() == lcpNodes)
13172 {
13173 /**
13174 * There were no possibility to start the local checkpoint
13175 * and it was not possible to queue it up. In this case we
13176 * stop the start of local checkpoints until the nodes with a
13177 * backlog have performed more checkpoints. We will return and
13178 * will not continue the process of starting any more checkpoints.
13179 */
13180 return;
13181 }//if
13182 }//if
13183 }
13184 }//while
13185 }
13186 curr.fragmentId++;
13187 if (curr.fragmentId >= tabPtr.p->totalfragments) {
13188 jam();
13189 curr.fragmentId = 0;
13190 curr.tableId++;
13191 }//if
13192 }//while
13193
13194 sendLastLCP_FRAG_ORD(signal);
13195 }//Dbdih::startNextChkpt()
13196
sendLastLCP_FRAG_ORD(Signal * signal)13197 void Dbdih::sendLastLCP_FRAG_ORD(Signal* signal)
13198 {
13199 LcpFragOrd * const lcpFragOrd = (LcpFragOrd *)&signal->theData[0];
13200 lcpFragOrd->tableId = RNIL;
13201 lcpFragOrd->fragmentId = 0;
13202 lcpFragOrd->lcpId = SYSFILE->latestLCP_ID;
13203 lcpFragOrd->lcpNo = 0;
13204 lcpFragOrd->keepGci = c_lcpState.keepGci;
13205 lcpFragOrd->lastFragmentFlag = true;
13206
13207 NodeRecordPtr nodePtr;
13208 for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
13209 jam();
13210 ptrAss(nodePtr, nodeRecord);
13211
13212 if(nodePtr.p->noOfQueuedChkpt == 0 &&
13213 nodePtr.p->noOfStartedChkpt == 0 &&
13214 c_lcpState.m_LAST_LCP_FRAG_ORD.isWaitingFor(nodePtr.i)){
13215 jam();
13216
13217 CRASH_INSERTION(7028);
13218
13219 /**
13220 * Nothing queued or started <=> Complete on that node
13221 *
13222 */
13223 c_lcpState.m_LAST_LCP_FRAG_ORD.clearWaitingFor(nodePtr.i);
13224 if(ERROR_INSERTED(7075)){
13225 continue;
13226 }
13227
13228 CRASH_INSERTION(7193);
13229 BlockReference ref = calcLqhBlockRef(nodePtr.i);
13230 sendSignal(ref, GSN_LCP_FRAG_ORD, signal,LcpFragOrd::SignalLength, JBB);
13231 }
13232 }
13233 if(ERROR_INSERTED(7075))
13234 {
13235 if(c_lcpState.m_LAST_LCP_FRAG_ORD.done())
13236 {
13237 CRASH_INSERTION(7075);
13238 }
13239 }
13240 }//Dbdih::sendLastLCP_FRAGORD()
13241
13242 /* ------------------------------------------------------------------------- */
13243 /* A FRAGMENT REPLICA HAS COMPLETED EXECUTING ITS LOCAL CHECKPOINT. */
13244 /* CHECK IF ALL REPLICAS IN THE TABLE HAVE COMPLETED. IF SO STORE THE */
13245 /* THE TABLE DISTRIBUTION ON DISK. ALSO SEND LCP_REPORT TO ALL OTHER */
13246 /* NODES SO THAT THEY CAN STORE THE TABLE ONTO DISK AS WELL. */
13247 /* ------------------------------------------------------------------------- */
execLCP_FRAG_REP(Signal * signal)13248 void Dbdih::execLCP_FRAG_REP(Signal* signal)
13249 {
13250 jamEntry();
13251
13252 LcpFragRep * const lcpReport = (LcpFragRep *)&signal->theData[0];
13253
13254 /**
13255 * Proxing LCP_FRAG_REP
13256 */
13257 const bool broadcast_req = lcpReport->nodeId == LcpFragRep::BROADCAST_REQ;
13258 if (broadcast_req)
13259 {
13260 jam();
13261 ndbrequire(refToNode(signal->getSendersBlockRef()) == getOwnNodeId());
13262
13263 /**
13264 * Set correct nodeId
13265 */
13266 lcpReport->nodeId = getOwnNodeId();
13267
13268 NodeReceiverGroup rg(DBDIH, c_lcpState.m_participatingDIH);
13269 rg.m_nodes.clear(getOwnNodeId());
13270 sendSignal(rg, GSN_LCP_FRAG_REP, signal, signal->getLength(), JBB);
13271
13272 /**
13273 * and continue processing
13274 */
13275 }
13276
13277 ndbrequire(c_lcpState.lcpStatus != LCP_STATUS_IDLE);
13278
13279 #if 0
13280 printLCP_FRAG_REP(stdout,
13281 signal->getDataPtr(),
13282 signal->length(), number());
13283 #endif
13284
13285 Uint32 nodeId = lcpReport->nodeId;
13286 Uint32 tableId = lcpReport->tableId;
13287 Uint32 fragId = lcpReport->fragId;
13288
13289 jamEntry();
13290
13291 if (ERROR_INSERTED(7178) && nodeId != getOwnNodeId())
13292 {
13293 jam();
13294 Uint32 owng =Sysfile::getNodeGroup(getOwnNodeId(), SYSFILE->nodeGroups);
13295 Uint32 nodeg = Sysfile::getNodeGroup(nodeId, SYSFILE->nodeGroups);
13296 if (owng == nodeg)
13297 {
13298 jam();
13299 ndbout_c("throwing away LCP_FRAG_REP from (and killing) %d", nodeId);
13300 SET_ERROR_INSERT_VALUE(7179);
13301 signal->theData[0] = 9999;
13302 sendSignal(numberToRef(CMVMI, nodeId),
13303 GSN_NDB_TAMPER, signal, 1, JBA);
13304 return;
13305 }
13306 }
13307
13308 if (ERROR_INSERTED(7179) && nodeId != getOwnNodeId())
13309 {
13310 jam();
13311 Uint32 owng =Sysfile::getNodeGroup(getOwnNodeId(), SYSFILE->nodeGroups);
13312 Uint32 nodeg = Sysfile::getNodeGroup(nodeId, SYSFILE->nodeGroups);
13313 if (owng == nodeg)
13314 {
13315 jam();
13316 ndbout_c("throwing away LCP_FRAG_REP from %d", nodeId);
13317 return;
13318 }
13319 }
13320
13321 CRASH_INSERTION2(7025, isMaster());
13322 CRASH_INSERTION2(7016, !isMaster());
13323 CRASH_INSERTION2(7191, (!isMaster() && tableId));
13324
13325 bool fromTimeQueue = (signal->senderBlockRef()==reference()&&!broadcast_req);
13326
13327 TabRecordPtr tabPtr;
13328 tabPtr.i = tableId;
13329 ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
13330 if(tabPtr.p->tabCopyStatus != TabRecord::CS_IDLE) {
13331 jam();
13332 /*-----------------------------------------------------------------------*/
13333 // If the table is currently copied to disk we also
13334 // stop already here to avoid strange half-way updates
13335 // of the table data structures.
13336 /*-----------------------------------------------------------------------*/
13337 /*
13338 We need to send this signal without a delay since we have discovered
13339 that we have run out of space in the short time queue. This problem
13340 is very erunlikely to happen but it has and it results in a node crash.
13341 This should be considered a "quick fix" and not a permanent solution.
13342 A cleaner/better way would be to check the time queue if it is full or
13343 not before sending this signal.
13344 */
13345 sendSignal(reference(), GSN_LCP_FRAG_REP, signal, signal->length(), JBB);
13346 /* Kept here for reference
13347 sendSignalWithDelay(reference(), GSN_LCP_FRAG_REP,
13348 signal, 20, signal->length());
13349 */
13350
13351 if(!fromTimeQueue){
13352 c_lcpState.noOfLcpFragRepOutstanding++;
13353 }
13354
13355 return;
13356 }//if
13357
13358 if(fromTimeQueue)
13359 {
13360 jam();
13361 ndbrequire(c_lcpState.noOfLcpFragRepOutstanding > 0);
13362 c_lcpState.noOfLcpFragRepOutstanding--;
13363 }
13364
13365 bool tableDone = reportLcpCompletion(lcpReport);
13366
13367 Uint32 started = lcpReport->maxGciStarted;
13368 Uint32 completed = lcpReport->maxGciCompleted;
13369
13370 if (started > c_lcpState.lcpStopGcp)
13371 {
13372 jam();
13373 c_lcpState.lcpStopGcp = started;
13374 }
13375
13376 /**
13377 * Update m_local_lcp_state
13378 *
13379 * we could only look fragments that we have locally...
13380 * but for now we look at all fragments
13381 */
13382 m_local_lcp_state.lcp_frag_rep(lcpReport);
13383
13384 if (tableDone)
13385 {
13386 jam();
13387
13388 if (tabPtr.p->tabStatus == TabRecord::TS_IDLE ||
13389 tabPtr.p->tabStatus == TabRecord::TS_DROPPING)
13390 {
13391 jam();
13392 g_eventLogger->info("TS_DROPPING - Neglecting to save Table: %d Frag: %d - ",
13393 tableId, fragId);
13394 }
13395 else
13396 {
13397 jam();
13398 /**
13399 * Write table description to file
13400 */
13401 tabPtr.p->tabLcpStatus = TabRecord::TLS_WRITING_TO_FILE;
13402 tabPtr.p->tabCopyStatus = TabRecord::CS_LCP_READ_TABLE;
13403 tabPtr.p->tabUpdateState = TabRecord::US_LOCAL_CHECKPOINT;
13404 signal->theData[0] = DihContinueB::ZPACK_TABLE_INTO_PAGES;
13405 signal->theData[1] = tabPtr.i;
13406 sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
13407
13408 bool ret = checkLcpAllTablesDoneInLqh(__LINE__);
13409 if (ret && ERROR_INSERTED(7209))
13410 {
13411 jam();
13412
13413 signal->theData[0] = 9999;
13414 sendSignal(numberToRef(CMVMI, cmasterNodeId),
13415 GSN_NDB_TAMPER, signal, 1, JBB);
13416 }
13417 }
13418 }
13419
13420 #ifdef VM_TRACE
13421 /* --------------------------------------------------------------------- */
13422 // REPORT that local checkpoint have completed this fragment.
13423 /* --------------------------------------------------------------------- */
13424 signal->theData[0] = NDB_LE_LCPFragmentCompleted;
13425 signal->theData[1] = nodeId;
13426 signal->theData[2] = tableId;
13427 signal->theData[3] = fragId;
13428 signal->theData[4] = started;
13429 signal->theData[5] = completed;
13430 sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 6, JBB);
13431 #endif
13432
13433 bool ok = false;
13434 switch(c_lcpMasterTakeOverState.state){
13435 case LMTOS_IDLE:
13436 ok = true;
13437 jam();
13438 /**
13439 * Fall through
13440 */
13441 break;
13442 case LMTOS_WAIT_EMPTY_LCP: // LCP Take over waiting for EMPTY_LCPCONF
13443 jam();
13444 return;
13445 case LMTOS_WAIT_LCP_FRAG_REP:
13446 jam();
13447 checkEmptyLcpComplete(signal);
13448 return;
13449 case LMTOS_INITIAL:
13450 case LMTOS_ALL_IDLE:
13451 case LMTOS_ALL_ACTIVE:
13452 case LMTOS_LCP_CONCLUDING:
13453 case LMTOS_COPY_ONGOING:
13454 ndbrequire(false);
13455 }
13456 ndbrequire(ok);
13457
13458 /* ----------------------------------------------------------------------- */
13459 // Check if there are more LCP's to start up.
13460 /* ----------------------------------------------------------------------- */
13461 if(isMaster())
13462 {
13463 jam();
13464
13465 /**
13466 * Remove from "running" array
13467 */
13468 NodeRecordPtr nodePtr;
13469 nodePtr.i = nodeId;
13470 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
13471
13472 const Uint32 outstanding = nodePtr.p->noOfStartedChkpt;
13473 ndbrequire(outstanding > 0);
13474 if(nodePtr.p->startedChkpt[0].tableId != tableId ||
13475 nodePtr.p->startedChkpt[0].fragId != fragId){
13476 jam();
13477 ndbrequire(outstanding > 1);
13478 ndbrequire(nodePtr.p->startedChkpt[1].tableId == tableId);
13479 ndbrequire(nodePtr.p->startedChkpt[1].fragId == fragId);
13480 } else {
13481 jam();
13482 nodePtr.p->startedChkpt[0] = nodePtr.p->startedChkpt[1];
13483 }
13484 nodePtr.p->noOfStartedChkpt--;
13485 checkStartMoreLcp(signal, nodeId);
13486 }
13487 }
13488
13489 bool
checkLcpAllTablesDoneInLqh(Uint32 line)13490 Dbdih::checkLcpAllTablesDoneInLqh(Uint32 line){
13491 TabRecordPtr tabPtr;
13492
13493 /**
13494 * Check if finished with all tables
13495 */
13496 for (tabPtr.i = 0; tabPtr.i < ctabFileSize; tabPtr.i++) {
13497 jam();
13498 ptrAss(tabPtr, tabRecord);
13499 if ((tabPtr.p->tabStatus == TabRecord::TS_ACTIVE) &&
13500 (tabPtr.p->tabLcpStatus == TabRecord::TLS_ACTIVE))
13501 {
13502 jam();
13503 /**
13504 * Nope, not finished with all tables
13505 */
13506 return false;
13507 }//if
13508 }//for
13509
13510 CRASH_INSERTION2(7026, isMaster());
13511 CRASH_INSERTION2(7017, !isMaster());
13512
13513 c_lcpState.setLcpStatus(LCP_TAB_COMPLETED, line);
13514
13515 if (ERROR_INSERTED(7194))
13516 {
13517 ndbout_c("CLEARING 7194");
13518 CLEAR_ERROR_INSERT_VALUE;
13519 }
13520
13521 return true;
13522 }
13523
findReplica(ReplicaRecordPtr & replicaPtr,Fragmentstore * fragPtrP,Uint32 nodeId,bool old)13524 void Dbdih::findReplica(ReplicaRecordPtr& replicaPtr,
13525 Fragmentstore* fragPtrP,
13526 Uint32 nodeId,
13527 bool old)
13528 {
13529 replicaPtr.i = old ? fragPtrP->oldStoredReplicas : fragPtrP->storedReplicas;
13530 while(replicaPtr.i != RNIL){
13531 ptrCheckGuard(replicaPtr, creplicaFileSize, replicaRecord);
13532 if (replicaPtr.p->procNode == nodeId) {
13533 jam();
13534 return;
13535 } else {
13536 jam();
13537 replicaPtr.i = replicaPtr.p->nextReplica;
13538 }//if
13539 };
13540
13541 #ifdef VM_TRACE
13542 g_eventLogger->info("Fragment Replica(node=%d) not found", nodeId);
13543 replicaPtr.i = fragPtrP->oldStoredReplicas;
13544 while(replicaPtr.i != RNIL){
13545 ptrCheckGuard(replicaPtr, creplicaFileSize, replicaRecord);
13546 if (replicaPtr.p->procNode == nodeId) {
13547 jam();
13548 break;
13549 } else {
13550 jam();
13551 replicaPtr.i = replicaPtr.p->nextReplica;
13552 }//if
13553 };
13554 if(replicaPtr.i != RNIL){
13555 g_eventLogger->info("...But was found in oldStoredReplicas");
13556 } else {
13557 g_eventLogger->info("...And wasn't found in oldStoredReplicas");
13558 }
13559 #endif
13560 ndbrequire(false);
13561 }//Dbdih::findReplica()
13562
13563
13564 int
handle_invalid_lcp_no(const LcpFragRep * rep,ReplicaRecordPtr replicaPtr)13565 Dbdih::handle_invalid_lcp_no(const LcpFragRep* rep,
13566 ReplicaRecordPtr replicaPtr)
13567 {
13568 ndbrequire(!isMaster());
13569 Uint32 lcpNo = rep->lcpNo;
13570 Uint32 lcpId = rep->lcpId;
13571
13572 if (!ndb_pnr(getNodeInfo(refToNode(cmasterdihref)).m_version))
13573 {
13574 }
13575 else
13576 {
13577 warningEvent("Detected previous node failure of %d during lcp",
13578 rep->nodeId);
13579 }
13580
13581 replicaPtr.p->nextLcp = lcpNo;
13582 replicaPtr.p->lcpId[lcpNo] = 0;
13583 replicaPtr.p->lcpStatus[lcpNo] = ZINVALID;
13584
13585 for (Uint32 i = lcpNo; i != lcpNo; i = nextLcpNo(i))
13586 {
13587 jam();
13588 if (replicaPtr.p->lcpStatus[i] == ZVALID &&
13589 replicaPtr.p->lcpId[i] >= lcpId)
13590 {
13591 ndbout_c("i: %d lcpId: %d", i, replicaPtr.p->lcpId[i]);
13592 ndbrequire(false);
13593 }
13594 }
13595
13596 return 0;
13597 }
13598
13599 /**
13600 * Return true if table is all fragment replicas have been checkpointed
13601 * to disk (in all LQHs)
13602 * false otherwise
13603 */
13604 bool
reportLcpCompletion(const LcpFragRep * lcpReport)13605 Dbdih::reportLcpCompletion(const LcpFragRep* lcpReport)
13606 {
13607 Uint32 lcpNo = lcpReport->lcpNo;
13608 Uint32 lcpId = lcpReport->lcpId;
13609 Uint32 maxGciStarted = lcpReport->maxGciStarted;
13610 Uint32 maxGciCompleted = lcpReport->maxGciCompleted;
13611 Uint32 tableId = lcpReport->tableId;
13612 Uint32 fragId = lcpReport->fragId;
13613 Uint32 nodeId = lcpReport->nodeId;
13614
13615 TabRecordPtr tabPtr;
13616 tabPtr.i = tableId;
13617 ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
13618
13619 if (tabPtr.p->tabStatus == TabRecord::TS_DROPPING ||
13620 tabPtr.p->tabStatus == TabRecord::TS_IDLE)
13621 {
13622 jam();
13623 return true;
13624 }
13625
13626 FragmentstorePtr fragPtr;
13627 getFragstore(tabPtr.p, fragId, fragPtr);
13628
13629 ReplicaRecordPtr replicaPtr;
13630 findReplica(replicaPtr, fragPtr.p, nodeId);
13631
13632 ndbrequire(replicaPtr.p->lcpOngoingFlag == true);
13633 if(lcpNo != replicaPtr.p->nextLcp){
13634 if (handle_invalid_lcp_no(lcpReport, replicaPtr))
13635 {
13636 g_eventLogger->error("lcpNo = %d replicaPtr.p->nextLcp = %d",
13637 lcpNo, replicaPtr.p->nextLcp);
13638 ndbrequire(false);
13639 }
13640 }
13641 ndbrequire(lcpNo == replicaPtr.p->nextLcp);
13642 ndbrequire(lcpNo < MAX_LCP_STORED);
13643 ndbrequire(replicaPtr.p->lcpId[lcpNo] != lcpId);
13644
13645 replicaPtr.p->lcpIdStarted = lcpId;
13646 replicaPtr.p->lcpOngoingFlag = false;
13647
13648 removeOldCrashedReplicas(tableId, fragId, replicaPtr);
13649 replicaPtr.p->lcpId[lcpNo] = lcpId;
13650 replicaPtr.p->lcpStatus[lcpNo] = ZVALID;
13651 replicaPtr.p->maxGciStarted[lcpNo] = maxGciStarted;
13652 replicaPtr.p->maxGciCompleted[lcpNo] = maxGciCompleted;
13653 replicaPtr.p->nextLcp = nextLcpNo(replicaPtr.p->nextLcp);
13654 ndbrequire(fragPtr.p->noLcpReplicas > 0);
13655 fragPtr.p->noLcpReplicas --;
13656
13657 if(fragPtr.p->noLcpReplicas > 0){
13658 jam();
13659 return false;
13660 }
13661
13662 for (Uint32 fid = 0; fid < tabPtr.p->totalfragments; fid++) {
13663 jam();
13664 getFragstore(tabPtr.p, fid, fragPtr);
13665 if (fragPtr.p->noLcpReplicas > 0){
13666 jam();
13667 /* ----------------------------------------------------------------- */
13668 // Not all fragments in table have been checkpointed.
13669 /* ----------------------------------------------------------------- */
13670 if(0)
13671 g_eventLogger->info("reportLcpCompletion: fragment %d not ready", fid);
13672 return false;
13673 }//if
13674 }//for
13675 return true;
13676 }//Dbdih::reportLcpCompletion()
13677
checkStartMoreLcp(Signal * signal,Uint32 nodeId)13678 void Dbdih::checkStartMoreLcp(Signal* signal, Uint32 nodeId)
13679 {
13680 ndbrequire(isMaster());
13681
13682 NodeRecordPtr nodePtr;
13683 nodePtr.i = nodeId;
13684 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
13685
13686 ndbrequire(nodePtr.p->noOfStartedChkpt < 2);
13687
13688 if (nodePtr.p->noOfQueuedChkpt > 0) {
13689 jam();
13690 nodePtr.p->noOfQueuedChkpt--;
13691 Uint32 i = nodePtr.p->noOfStartedChkpt;
13692 nodePtr.p->startedChkpt[i] = nodePtr.p->queuedChkpt[0];
13693 nodePtr.p->queuedChkpt[0] = nodePtr.p->queuedChkpt[1];
13694 //-------------------------------------------------------------------
13695 // We can send a LCP_FRAGORD to the node ordering it to perform a
13696 // local checkpoint on this fragment replica.
13697 //-------------------------------------------------------------------
13698 nodePtr.p->noOfStartedChkpt = i + 1;
13699
13700 sendLCP_FRAG_ORD(signal, nodePtr.p->startedChkpt[i]);
13701 }
13702
13703 /* ----------------------------------------------------------------------- */
13704 // When there are no more outstanding LCP reports and there are no one queued
13705 // in at least one node, then we are ready to make sure all nodes have at
13706 // least two outstanding LCP requests per node and at least two queued for
13707 // sending.
13708 /* ----------------------------------------------------------------------- */
13709 startNextChkpt(signal);
13710 }//Dbdih::checkStartMoreLcp()
13711
13712 void
sendLCP_FRAG_ORD(Signal * signal,NodeRecord::FragmentCheckpointInfo info)13713 Dbdih::sendLCP_FRAG_ORD(Signal* signal,
13714 NodeRecord::FragmentCheckpointInfo info){
13715
13716 ReplicaRecordPtr replicaPtr;
13717 replicaPtr.i = info.replicaPtr;
13718 ptrCheckGuard(replicaPtr, creplicaFileSize, replicaRecord);
13719
13720 // MT LQH goes via proxy for DD reasons
13721 BlockReference ref = calcLqhBlockRef(replicaPtr.p->procNode);
13722
13723 if (ERROR_INSERTED(7193) && replicaPtr.p->procNode == getOwnNodeId())
13724 {
13725 return;
13726 }
13727
13728 if (replicaPtr.p->nextLcp >= MAX_LCP_USED)
13729 {
13730 jam();
13731 infoEvent("Updating nextLcp from %u to %u tab: %u",
13732 replicaPtr.p->nextLcp, 0,
13733 info.tableId);
13734 replicaPtr.p->nextLcp = 0;
13735 }
13736
13737 Uint32 keepGci = c_lcpState.keepGci;
13738 if (keepGci > SYSFILE->lastCompletedGCI[replicaPtr.p->procNode])
13739 {
13740 jam();
13741 keepGci = SYSFILE->lastCompletedGCI[replicaPtr.p->procNode];
13742 }
13743
13744 LcpFragOrd * const lcpFragOrd = (LcpFragOrd *)&signal->theData[0];
13745 lcpFragOrd->tableId = info.tableId;
13746 lcpFragOrd->fragmentId = info.fragId;
13747 lcpFragOrd->lcpId = SYSFILE->latestLCP_ID;
13748 lcpFragOrd->lcpNo = replicaPtr.p->nextLcp;
13749 lcpFragOrd->keepGci = keepGci;
13750 lcpFragOrd->lastFragmentFlag = false;
13751 sendSignal(ref, GSN_LCP_FRAG_ORD, signal, LcpFragOrd::SignalLength, JBB);
13752 }
13753
checkLcpCompletedLab(Signal * signal)13754 void Dbdih::checkLcpCompletedLab(Signal* signal)
13755 {
13756 if(c_lcpState.lcpStatus < LCP_TAB_COMPLETED)
13757 {
13758 jam();
13759 return;
13760 }
13761
13762 TabRecordPtr tabPtr;
13763 for (tabPtr.i = 0; tabPtr.i < ctabFileSize; tabPtr.i++) {
13764 jam();
13765 ptrAss(tabPtr, tabRecord);
13766 if (tabPtr.p->tabLcpStatus != TabRecord::TLS_COMPLETED)
13767 {
13768 jam();
13769 return;
13770 }
13771 }
13772
13773 CRASH_INSERTION2(7027, isMaster());
13774 CRASH_INSERTION2(7018, !isMaster());
13775
13776 if(c_lcpState.lcpStatus == LCP_TAB_COMPLETED)
13777 {
13778 /**
13779 * We'r done
13780 */
13781
13782 if (ERROR_INSERTED(7209))
13783 {
13784 signal->theData[0] = DihContinueB::ZCHECK_LCP_COMPLETED;
13785 sendSignal(reference(), GSN_CONTINUEB, signal, 1, JBB);
13786 return;
13787 }
13788
13789 c_lcpState.setLcpStatus(LCP_TAB_SAVED, __LINE__);
13790 sendLCP_COMPLETE_REP(signal);
13791
13792 if (ERROR_INSERTED(7210))
13793 {
13794 CLEAR_ERROR_INSERT_VALUE;
13795 EmptyLcpReq* req = (EmptyLcpReq*)signal->getDataPtr();
13796 req->senderRef = reference();
13797 sendEMPTY_LCP_REQ(signal, getOwnNodeId(), 0);
13798 }
13799
13800 return;
13801 }
13802
13803 ndbrequire(c_lcpState.lcpStatus == LCP_TAB_SAVED);
13804 allNodesLcpCompletedLab(signal);
13805 return;
13806 }//Dbdih::checkLcpCompletedLab()
13807
13808 void
sendLCP_COMPLETE_REP(Signal * signal)13809 Dbdih::sendLCP_COMPLETE_REP(Signal* signal){
13810 jam();
13811
13812 /**
13813 * Quick and dirty fix for bug#36276 dont save
13814 * LCP_COMPLETE_REP to same node same LCP twice
13815 */
13816 bool alreadysent =
13817 c_lcpState.m_lastLCP_COMPLETE_REP_id == SYSFILE->latestLCP_ID &&
13818 c_lcpState.m_lastLCP_COMPLETE_REP_ref == c_lcpState.m_masterLcpDihRef;
13819
13820 if (!alreadysent)
13821 {
13822 LcpCompleteRep * rep = (LcpCompleteRep*)signal->getDataPtrSend();
13823 rep->nodeId = getOwnNodeId();
13824 rep->lcpId = SYSFILE->latestLCP_ID;
13825 rep->blockNo = DBDIH;
13826
13827 sendSignal(c_lcpState.m_masterLcpDihRef, GSN_LCP_COMPLETE_REP, signal,
13828 LcpCompleteRep::SignalLength, JBB);
13829
13830 c_lcpState.m_lastLCP_COMPLETE_REP_id = SYSFILE->latestLCP_ID;
13831 c_lcpState.m_lastLCP_COMPLETE_REP_ref = c_lcpState.m_masterLcpDihRef;
13832 }
13833
13834 /**
13835 * Say that an initial node restart does not need to be redone
13836 * once node has been part of first LCP
13837 */
13838 if (c_set_initial_start_flag &&
13839 c_lcpState.m_participatingLQH.get(getOwnNodeId()))
13840 {
13841 jam();
13842 c_set_initial_start_flag = FALSE;
13843 }
13844 }
13845
13846 /*-------------------------------------------------------------------------- */
13847 /* COMP_LCP_ROUND A LQH HAS COMPLETED A LOCAL CHECKPOINT */
13848 /*------------------------------------------------------------------------- */
execLCP_COMPLETE_REP(Signal * signal)13849 void Dbdih::execLCP_COMPLETE_REP(Signal* signal)
13850 {
13851 jamEntry();
13852
13853 CRASH_INSERTION(7191);
13854
13855 #if 0
13856 g_eventLogger->info("LCP_COMPLETE_REP");
13857 printLCP_COMPLETE_REP(stdout,
13858 signal->getDataPtr(),
13859 signal->length(), number());
13860 #endif
13861
13862 LcpCompleteRep * rep = (LcpCompleteRep*)signal->getDataPtr();
13863
13864 if (rep->nodeId == LcpFragRep::BROADCAST_REQ)
13865 {
13866 jam();
13867 ndbrequire(refToNode(signal->getSendersBlockRef()) == getOwnNodeId());
13868
13869 /**
13870 * Set correct nodeId
13871 */
13872 rep->nodeId = getOwnNodeId();
13873
13874 NodeReceiverGroup rg(DBDIH, c_lcpState.m_participatingDIH);
13875 rg.m_nodes.clear(getOwnNodeId());
13876 sendSignal(rg, GSN_LCP_COMPLETE_REP, signal, signal->getLength(), JBB);
13877
13878 /**
13879 * and continue processing
13880 */
13881 }
13882
13883 Uint32 lcpId = rep->lcpId;
13884 Uint32 nodeId = rep->nodeId;
13885 Uint32 blockNo = rep->blockNo;
13886
13887 if(c_lcpMasterTakeOverState.state > LMTOS_WAIT_LCP_FRAG_REP){
13888 jam();
13889 /**
13890 * Don't allow LCP_COMPLETE_REP to arrive during
13891 * LCP master take over
13892 */
13893 ndbrequire(isMaster());
13894 ndbrequire(blockNo == DBDIH);
13895 sendSignalWithDelay(reference(), GSN_LCP_COMPLETE_REP, signal, 100,
13896 signal->length());
13897 return;
13898 }
13899
13900 ndbrequire(c_lcpState.lcpStatus != LCP_STATUS_IDLE);
13901
13902 switch(blockNo){
13903 case DBLQH:
13904 jam();
13905 c_lcpState.m_LCP_COMPLETE_REP_Counter_LQH.clearWaitingFor(nodeId);
13906 ndbrequire(!c_lcpState.m_LAST_LCP_FRAG_ORD.isWaitingFor(nodeId));
13907 break;
13908 case DBDIH:
13909 jam();
13910 ndbrequire(isMaster());
13911 c_lcpState.m_LCP_COMPLETE_REP_Counter_DIH.clearWaitingFor(nodeId);
13912 break;
13913 case 0:
13914 jam();
13915 ndbrequire(!isMaster());
13916 ndbrequire(c_lcpState.m_LCP_COMPLETE_REP_From_Master_Received == false);
13917 c_lcpState.m_LCP_COMPLETE_REP_From_Master_Received = true;
13918 break;
13919 default:
13920 ndbrequire(false);
13921 }
13922 ndbrequire(lcpId == SYSFILE->latestLCP_ID);
13923
13924 allNodesLcpCompletedLab(signal);
13925 return;
13926 }
13927
allNodesLcpCompletedLab(Signal * signal)13928 void Dbdih::allNodesLcpCompletedLab(Signal* signal)
13929 {
13930 jam();
13931
13932 if (c_lcpState.lcpStatus != LCP_TAB_SAVED) {
13933 jam();
13934 /**
13935 * We have not sent LCP_COMPLETE_REP to master DIH yet
13936 */
13937 return;
13938 }//if
13939
13940 if (!c_lcpState.m_LCP_COMPLETE_REP_Counter_LQH.done()){
13941 jam();
13942 return;
13943 }
13944
13945 if (!c_lcpState.m_LCP_COMPLETE_REP_Counter_DIH.done()){
13946 jam();
13947 return;
13948 }
13949
13950 if (!isMaster() &&
13951 c_lcpState.m_LCP_COMPLETE_REP_From_Master_Received == false){
13952 jam();
13953 /**
13954 * Wait until master DIH has signaled lcp is complete
13955 */
13956 return;
13957 }
13958
13959 if(c_lcpMasterTakeOverState.state != LMTOS_IDLE){
13960 jam();
13961 #ifdef VM_TRACE
13962 g_eventLogger->info("Exiting from allNodesLcpCompletedLab");
13963 #endif
13964 return;
13965 }
13966
13967
13968 /*------------------------------------------------------------------------ */
13969 /* WE HAVE NOW COMPLETED A LOCAL CHECKPOINT. WE ARE NOW READY TO WAIT */
13970 /* FOR THE NEXT LOCAL CHECKPOINT. SEND WITHOUT TIME-OUT SINCE IT MIGHT */
13971 /* BE TIME TO START THE NEXT LOCAL CHECKPOINT IMMEDIATELY. */
13972 /* CLEAR BIT 3 OF SYSTEM RESTART BITS TO INDICATE THAT THERE IS NO */
13973 /* LOCAL CHECKPOINT ONGOING. THIS WILL BE WRITTEN AT SOME LATER TIME */
13974 /* DURING A GLOBAL CHECKPOINT. IT IS NOT NECESSARY TO WRITE IT */
13975 /* IMMEDIATELY. WE WILL ALSO CLEAR BIT 2 OF SYSTEM RESTART BITS IF ALL */
13976 /* CURRENTLY ACTIVE NODES COMPLETED THE LOCAL CHECKPOINT. */
13977 /*------------------------------------------------------------------------ */
13978 CRASH_INSERTION(7019);
13979 signal->setTrace(0);
13980
13981 c_lcpState.setLcpStatus(LCP_STATUS_IDLE, __LINE__);
13982
13983 /**
13984 * Update m_local_lcp_state
13985 */
13986 m_local_lcp_state.lcp_complete_rep(c_newest_restorable_gci);
13987
13988 if (isMaster())
13989 {
13990 /**
13991 * Check for any "completed" TO
13992 */
13993 TakeOverRecordPtr takeOverPtr;
13994 for (c_activeTakeOverList.first(takeOverPtr); !takeOverPtr.isNull();
13995 c_activeTakeOverList.next(takeOverPtr))
13996 {
13997 jam();
13998 Ptr<NodeRecord> nodePtr;
13999 nodePtr.i = takeOverPtr.p->toStartingNode;
14000 if (takeOverPtr.p->toMasterStatus == TakeOverRecord::TO_WAIT_LCP)
14001 {
14002 jam();
14003 if (c_lcpState.m_participatingLQH.get(nodePtr.i))
14004 {
14005 jam();
14006 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
14007 ndbrequire(nodePtr.p->copyCompleted == 2);
14008
14009 EndToConf * conf = (EndToConf *)signal->getDataPtrSend();
14010 conf->senderData = takeOverPtr.p->m_senderData;
14011 conf->sendingNodeId = cownNodeId;
14012 conf->startingNodeId = nodePtr.i;
14013 sendSignal(takeOverPtr.p->m_senderRef, GSN_END_TOCONF, signal,
14014 EndToConf::SignalLength, JBB);
14015
14016 releaseTakeOver(takeOverPtr);
14017 }
14018 }
14019 }
14020 }
14021
14022 Sysfile::clearLCPOngoing(SYSFILE->systemRestartBits);
14023 setLcpActiveStatusEnd(signal);
14024
14025 if(!isMaster()){
14026 jam();
14027 /**
14028 * We're not master, be content
14029 */
14030 return;
14031 }
14032
14033 // Send LCP_COMPLETE_REP to all other nodes
14034 // allowing them to set their lcpStatus to LCP_STATUS_IDLE
14035 LcpCompleteRep * rep = (LcpCompleteRep*)signal->getDataPtrSend();
14036 rep->nodeId = getOwnNodeId();
14037 rep->lcpId = SYSFILE->latestLCP_ID;
14038 rep->blockNo = 0; // 0 = Sent from master
14039
14040 NodeRecordPtr nodePtr;
14041 nodePtr.i = cfirstAliveNode;
14042 do {
14043 jam();
14044 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
14045 if (nodePtr.i != cownNodeId){
14046 BlockReference ref = calcDihBlockRef(nodePtr.i);
14047 sendSignal(ref, GSN_LCP_COMPLETE_REP, signal,
14048 LcpCompleteRep::SignalLength, JBB);
14049 }
14050 nodePtr.i = nodePtr.p->nextNode;
14051 } while (nodePtr.i != RNIL);
14052
14053
14054 jam();
14055 /***************************************************************************/
14056 // Report the event that a local checkpoint has completed.
14057 /***************************************************************************/
14058 signal->theData[0] = NDB_LE_LocalCheckpointCompleted; //Event type
14059 signal->theData[1] = SYSFILE->latestLCP_ID;
14060 sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB);
14061
14062 if (c_newest_restorable_gci > c_lcpState.lcpStopGcp &&
14063 !(ERROR_INSERTED(7222) || ERROR_INSERTED(7223)))
14064 {
14065 jam();
14066 c_lcpState.lcpStopGcp = c_newest_restorable_gci;
14067 }
14068
14069 /**
14070 * Start checking for next LCP
14071 */
14072 checkLcpStart(signal, __LINE__);
14073
14074 Mutex mutex(signal, c_mutexMgr, c_fragmentInfoMutex_lcp);
14075 mutex.unlock();
14076
14077 c_lcpState.m_lcp_time = c_current_time - c_lcpState.m_start_time;
14078
14079 if (cwaitLcpSr == true) {
14080 jam();
14081 cwaitLcpSr = false;
14082 ndbsttorry10Lab(signal, __LINE__);
14083 return;
14084 }//if
14085
14086
14087 if (c_nodeStartMaster.blockLcp == true) {
14088 jam();
14089 lcpBlockedLab(signal, false, c_nodeStartMaster.startNode);
14090 return;
14091 }//if
14092 return;
14093 }//Dbdih::allNodesLcpCompletedLab()
14094
14095 /******************************************************************************/
14096 /* ********** TABLE UPDATE MODULE *************/
14097 /* ****************************************************************************/
14098 /* ------------------------------------------------------------------------- */
14099 /* THIS MODULE IS USED TO UPDATE THE TABLE DESCRIPTION. IT STARTS BY */
14100 /* CREATING THE FIRST TABLE FILE, THEN UPDATES THIS FILE AND CLOSES IT.*/
14101 /* AFTER THAT THE SAME HAPPENS WITH THE SECOND FILE. AFTER THAT THE */
14102 /* TABLE DISTRIBUTION HAS BEEN UPDATED. */
14103 /* */
14104 /* THE REASON FOR CREATING THE FILE AND NOT OPENING IT IS TO ENSURE */
14105 /* THAT WE DO NOT GET A MIX OF OLD AND NEW INFORMATION IN THE FILE IN */
14106 /* ERROR SITUATIONS. */
14107 /* ------------------------------------------------------------------------- */
tableUpdateLab(Signal * signal,TabRecordPtr tabPtr)14108 void Dbdih::tableUpdateLab(Signal* signal, TabRecordPtr tabPtr) {
14109 FileRecordPtr filePtr;
14110 if(tabPtr.p->tabStorage == TabRecord::ST_TEMPORARY) {
14111 // For temporary tables we do not write to disk. Mark both copies 0 and 1
14112 // as done, and go straight to the after-close code.
14113 filePtr.i = tabPtr.p->tabFile[1];
14114 ptrCheckGuard(filePtr, cfileFileSize, fileRecord);
14115 tableCloseLab(signal, filePtr);
14116 return;
14117 }
14118 filePtr.i = tabPtr.p->tabFile[0];
14119 ptrCheckGuard(filePtr, cfileFileSize, fileRecord);
14120 createFileRw(signal, filePtr);
14121 filePtr.p->reqStatus = FileRecord::TABLE_CREATE;
14122 return;
14123 }//Dbdih::tableUpdateLab()
14124
tableCreateLab(Signal * signal,FileRecordPtr filePtr)14125 void Dbdih::tableCreateLab(Signal* signal, FileRecordPtr filePtr)
14126 {
14127 TabRecordPtr tabPtr;
14128 tabPtr.i = filePtr.p->tabRef;
14129 ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
14130 writeTabfile(signal, tabPtr.p, filePtr);
14131 filePtr.p->reqStatus = FileRecord::TABLE_WRITE;
14132 return;
14133 }//Dbdih::tableCreateLab()
14134
tableWriteLab(Signal * signal,FileRecordPtr filePtr)14135 void Dbdih::tableWriteLab(Signal* signal, FileRecordPtr filePtr)
14136 {
14137 closeFile(signal, filePtr);
14138 filePtr.p->reqStatus = FileRecord::TABLE_CLOSE;
14139 return;
14140 }//Dbdih::tableWriteLab()
14141
tableCloseLab(Signal * signal,FileRecordPtr filePtr)14142 void Dbdih::tableCloseLab(Signal* signal, FileRecordPtr filePtr)
14143 {
14144 TabRecordPtr tabPtr;
14145 tabPtr.i = filePtr.p->tabRef;
14146 ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
14147 if (filePtr.i == tabPtr.p->tabFile[0]) {
14148 jam();
14149 filePtr.i = tabPtr.p->tabFile[1];
14150 ptrCheckGuard(filePtr, cfileFileSize, fileRecord);
14151 createFileRw(signal, filePtr);
14152 filePtr.p->reqStatus = FileRecord::TABLE_CREATE;
14153 return;
14154 }//if
14155 switch (tabPtr.p->tabUpdateState) {
14156 case TabRecord::US_LOCAL_CHECKPOINT:
14157 jam();
14158 releaseTabPages(tabPtr.i);
14159 signal->theData[0] = DihContinueB::ZCHECK_LCP_COMPLETED;
14160 sendSignal(reference(), GSN_CONTINUEB, signal, 1, JBB);
14161
14162 tabPtr.p->tabCopyStatus = TabRecord::CS_IDLE;
14163 tabPtr.p->tabUpdateState = TabRecord::US_IDLE;
14164 tabPtr.p->tabLcpStatus = TabRecord::TLS_COMPLETED;
14165 return;
14166 break;
14167 case TabRecord::US_REMOVE_NODE:
14168 jam();
14169 releaseTabPages(tabPtr.i);
14170 tabPtr.p->tabCopyStatus = TabRecord::CS_IDLE;
14171 tabPtr.p->tabUpdateState = TabRecord::US_IDLE;
14172 if (tabPtr.p->tabLcpStatus == TabRecord::TLS_WRITING_TO_FILE) {
14173 jam();
14174 tabPtr.p->tabLcpStatus = TabRecord::TLS_COMPLETED;
14175 signal->theData[0] = DihContinueB::ZCHECK_LCP_COMPLETED;
14176 sendSignal(reference(), GSN_CONTINUEB, signal, 1, JBB);
14177 }//if
14178 signal->theData[0] = DihContinueB::ZREMOVE_NODE_FROM_TABLE;
14179 signal->theData[1] = tabPtr.p->tabRemoveNode;
14180 signal->theData[2] = tabPtr.i + 1;
14181 sendSignal(reference(), GSN_CONTINUEB, signal, 3, JBB);
14182 return;
14183 break;
14184 case TabRecord::US_INVALIDATE_NODE_LCP:
14185 jam();
14186 releaseTabPages(tabPtr.i);
14187 tabPtr.p->tabCopyStatus = TabRecord::CS_IDLE;
14188 tabPtr.p->tabUpdateState = TabRecord::US_IDLE;
14189
14190 signal->theData[0] = DihContinueB::ZINVALIDATE_NODE_LCP;
14191 signal->theData[1] = tabPtr.p->tabRemoveNode;
14192 signal->theData[2] = tabPtr.i + 1;
14193 if (ERROR_INSERTED(7204))
14194 {
14195 sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 2000, 3);
14196 }
14197 else
14198 {
14199 sendSignal(reference(), GSN_CONTINUEB, signal, 3, JBB);
14200 }
14201 return;
14202 case TabRecord::US_COPY_TAB_REQ:
14203 jam();
14204 tabPtr.p->tabUpdateState = TabRecord::US_IDLE;
14205 copyTabReq_complete(signal, tabPtr);
14206 return;
14207 break;
14208 case TabRecord::US_ADD_TABLE_MASTER:
14209 jam();
14210 releaseTabPages(tabPtr.i);
14211 tabPtr.p->tabUpdateState = TabRecord::US_IDLE;
14212 signal->theData[0] = DihContinueB::ZDIH_ADD_TABLE_MASTER;
14213 signal->theData[1] = tabPtr.i;
14214 sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
14215 return;
14216 break;
14217 case TabRecord::US_ADD_TABLE_SLAVE:
14218 jam();
14219 releaseTabPages(tabPtr.i);
14220 tabPtr.p->tabUpdateState = TabRecord::US_IDLE;
14221 signal->theData[0] = DihContinueB::ZDIH_ADD_TABLE_SLAVE;
14222 signal->theData[1] = tabPtr.i;
14223 sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
14224 return;
14225 break;
14226 case TabRecord::US_CALLBACK:
14227 {
14228 jam();
14229 releaseTabPages(tabPtr.i);
14230 tabPtr.p->tabCopyStatus = TabRecord::CS_IDLE;
14231 tabPtr.p->tabUpdateState = TabRecord::US_IDLE;
14232
14233 Ptr<ConnectRecord> connectPtr;
14234 connectPtr.i = tabPtr.p->connectrec;
14235 ptrCheckGuard(connectPtr, cconnectFileSize, connectRecord);
14236 execute(signal, connectPtr.p->m_callback, 0);
14237 return;
14238 }
14239 default:
14240 ndbrequire(false);
14241 return;
14242 break;
14243 }//switch
14244 }//Dbdih::tableCloseLab()
14245
checkGcpStopLab(Signal * signal)14246 void Dbdih::checkGcpStopLab(Signal* signal)
14247 {
14248 Uint32 cnt0 = ++m_gcp_monitor.m_gcp_save.m_counter;
14249 Uint32 cnt1 = ++m_gcp_monitor.m_micro_gcp.m_counter;
14250
14251 if (m_gcp_monitor.m_gcp_save.m_gci == m_gcp_save.m_gci)
14252 {
14253 jam();
14254 if (m_gcp_monitor.m_gcp_save.m_max_lag &&
14255 cnt0 == m_gcp_monitor.m_gcp_save.m_max_lag)
14256 {
14257 crashSystemAtGcpStop(signal, false);
14258 return;
14259 }
14260
14261 Uint32 threshold = 60; // seconds
14262 if (cnt0 && ((cnt0 % (threshold * 10)) == 0))
14263 {
14264 if (m_gcp_monitor.m_gcp_save.m_max_lag)
14265 {
14266 warningEvent("GCP Monitor: GCP_SAVE lag %u seconds"
14267 " (max lag: %us)",
14268 cnt0/10, m_gcp_monitor.m_gcp_save.m_max_lag/10);
14269 }
14270 else
14271 {
14272 warningEvent("GCP Monitor: GCP_SAVE lag %u seconds"
14273 " (no max lag)",
14274 cnt0/10);
14275 }
14276 }
14277 }
14278 else
14279 {
14280 jam();
14281 m_gcp_monitor.m_gcp_save.m_gci = m_gcp_save.m_gci;
14282 m_gcp_monitor.m_gcp_save.m_counter = 0;
14283 }
14284
14285 if (m_gcp_monitor.m_micro_gcp.m_gci == m_micro_gcp.m_current_gci)
14286 {
14287 jam();
14288 Uint32 cmp = m_micro_gcp.m_enabled ?
14289 m_gcp_monitor.m_micro_gcp.m_max_lag :
14290 m_gcp_monitor.m_gcp_save.m_max_lag;
14291
14292 if (cmp && cnt1 == cmp)
14293 {
14294 crashSystemAtGcpStop(signal, false);
14295 return;
14296 }
14297
14298 Uint32 threshold = 10; // seconds
14299 if (cnt1 && ((cnt0 % (threshold * 10)) == 0))
14300 {
14301 if (m_gcp_monitor.m_micro_gcp.m_max_lag)
14302 {
14303 warningEvent("GCP Monitor: GCP_COMMIT lag %u seconds"
14304 " (max lag: %u)",
14305 cnt1/10, m_gcp_monitor.m_micro_gcp.m_max_lag/10);
14306 }
14307 else
14308 {
14309 warningEvent("GCP Monitor: GCP_COMMIT lag %u seconds"
14310 " (no max lag)",
14311 cnt1/10);
14312 }
14313 }
14314 }
14315 else
14316 {
14317 jam();
14318 m_gcp_monitor.m_micro_gcp.m_counter = 0;
14319 m_gcp_monitor.m_micro_gcp.m_gci = m_micro_gcp.m_current_gci;
14320 }
14321
14322 signal->theData[0] = DihContinueB::ZCHECK_GCP_STOP;
14323 sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 100, 1);
14324 return;
14325 }//Dbdih::checkGcpStopLab()
14326
14327 void
dumpGcpStop()14328 Dbdih::dumpGcpStop()
14329 {
14330 ndbout_c("c_nodeStartMaster.blockGcp: %u %u",
14331 c_nodeStartMaster.blockGcp,
14332 c_nodeStartMaster.startNode);
14333 ndbout_c("m_gcp_save.m_counter: %u m_gcp_save.m_max_lag: %u",
14334 m_gcp_monitor.m_gcp_save.m_counter,
14335 m_gcp_monitor.m_gcp_save.m_max_lag);
14336 ndbout_c("m_micro_gcp.m_counter: %u m_micro_gcp.m_max_lag: %u",
14337 m_gcp_monitor.m_micro_gcp.m_counter,
14338 m_gcp_monitor.m_micro_gcp.m_max_lag);
14339
14340
14341 ndbout_c("m_gcp_save.m_state: %u", m_gcp_save.m_state);
14342 ndbout_c("m_gcp_save.m_master.m_state: %u", m_gcp_save.m_master.m_state);
14343 ndbout_c("m_micro_gcp.m_state: %u", m_micro_gcp.m_state);
14344 ndbout_c("m_micro_gcp.m_master.m_state: %u", m_micro_gcp.m_master.m_state);
14345
14346 ndbout_c("c_COPY_GCIREQ_Counter = %s", c_COPY_GCIREQ_Counter.getText());
14347 ndbout_c("c_COPY_TABREQ_Counter = %s", c_COPY_TABREQ_Counter.getText());
14348 ndbout_c("c_CREATE_FRAGREQ_Counter = %s", c_CREATE_FRAGREQ_Counter.getText());
14349 ndbout_c("c_DIH_SWITCH_REPLICA_REQ_Counter = %s",
14350 c_DIH_SWITCH_REPLICA_REQ_Counter.getText());
14351 ndbout_c("c_EMPTY_LCP_REQ_Counter = %s",c_EMPTY_LCP_REQ_Counter.getText());
14352 ndbout_c("c_GCP_COMMIT_Counter = %s", c_GCP_COMMIT_Counter.getText());
14353 ndbout_c("c_GCP_PREPARE_Counter = %s", c_GCP_PREPARE_Counter.getText());
14354 ndbout_c("c_GCP_SAVEREQ_Counter = %s", c_GCP_SAVEREQ_Counter.getText());
14355 ndbout_c("c_SUB_GCP_COMPLETE_REP_Counter = %s",
14356 c_SUB_GCP_COMPLETE_REP_Counter.getText());
14357 ndbout_c("c_INCL_NODEREQ_Counter = %s", c_INCL_NODEREQ_Counter.getText());
14358 ndbout_c("c_MASTER_GCPREQ_Counter = %s", c_MASTER_GCPREQ_Counter.getText());
14359 ndbout_c("c_MASTER_LCPREQ_Counter = %s", c_MASTER_LCPREQ_Counter.getText());
14360 ndbout_c("c_START_INFOREQ_Counter = %s", c_START_INFOREQ_Counter.getText());
14361 ndbout_c("c_START_RECREQ_Counter = %s", c_START_RECREQ_Counter.getText());
14362 ndbout_c("c_STOP_ME_REQ_Counter = %s", c_STOP_ME_REQ_Counter.getText());
14363 ndbout_c("c_TC_CLOPSIZEREQ_Counter = %s", c_TC_CLOPSIZEREQ_Counter.getText());
14364 ndbout_c("c_TCGETOPSIZEREQ_Counter = %s", c_TCGETOPSIZEREQ_Counter.getText());
14365
14366 ndbout_c("m_copyReason: %d m_waiting: %u %u",
14367 c_copyGCIMaster.m_copyReason,
14368 c_copyGCIMaster.m_waiting[0],
14369 c_copyGCIMaster.m_waiting[1]);
14370
14371 ndbout_c("c_copyGCISlave: sender{Data, Ref} %d %x reason: %d nextWord: %d",
14372 c_copyGCISlave.m_senderData,
14373 c_copyGCISlave.m_senderRef,
14374 c_copyGCISlave.m_copyReason,
14375 c_copyGCISlave.m_expectedNextWord);
14376 }
14377
14378 /**
14379 * GCP stop detected,
14380 * send SYSTEM_ERROR to all other alive nodes
14381 */
crashSystemAtGcpStop(Signal * signal,bool local)14382 void Dbdih::crashSystemAtGcpStop(Signal* signal, bool local)
14383 {
14384 dumpGcpStop();
14385 Uint32 save_counter = m_gcp_monitor.m_gcp_save.m_counter;
14386 Uint32 micro_counter = m_gcp_monitor.m_micro_gcp.m_counter;
14387 m_gcp_monitor.m_gcp_save.m_counter = 0;
14388 m_gcp_monitor.m_micro_gcp.m_counter = 0;
14389
14390 if (local)
14391 goto dolocal;
14392
14393 if (c_nodeStartMaster.blockGcp == 2)
14394 {
14395 jam();
14396 /**
14397 * Starting node...is delaying GCP to long...
14398 * kill it
14399 */
14400 SystemError * const sysErr = (SystemError*)&signal->theData[0];
14401 sysErr->errorCode = SystemError::GCPStopDetected;
14402 sysErr->errorRef = reference();
14403 sysErr->data[0] = m_gcp_save.m_master.m_state;
14404 sysErr->data[1] = cgcpOrderBlocked;
14405 sysErr->data[2] = m_micro_gcp.m_master.m_state;
14406 sendSignal(calcNdbCntrBlockRef(c_nodeStartMaster.startNode),
14407 GSN_SYSTEM_ERROR, signal, SystemError::SignalLength, JBA);
14408 return;
14409 }
14410
14411 if (save_counter == m_gcp_monitor.m_gcp_save.m_max_lag)
14412 {
14413 switch(m_gcp_save.m_master.m_state){
14414 case GcpSave::GCP_SAVE_IDLE:
14415 {
14416 /**
14417 * No switch for looong time...and we're idle...it *our* fault
14418 */
14419 local = true;
14420 break;
14421 }
14422 case GcpSave::GCP_SAVE_REQ:
14423 {
14424 jam();
14425 NodeReceiverGroup rg(DBLQH, c_GCP_SAVEREQ_Counter);
14426 signal->theData[0] = 2305;
14427 sendSignal(rg, GSN_DUMP_STATE_ORD, signal, 1, JBB);
14428
14429 warningEvent("Detected GCP stop(%d)...sending kill to %s",
14430 m_gcp_save.m_master.m_state, c_GCP_SAVEREQ_Counter.getText());
14431 ndbout_c("Detected GCP stop(%d)...sending kill to %s",
14432 m_gcp_save.m_master.m_state, c_GCP_SAVEREQ_Counter.getText());
14433 ndbrequire(!c_GCP_SAVEREQ_Counter.done());
14434 return;
14435 }
14436 case GcpSave::GCP_SAVE_COPY_GCI:
14437 {
14438 /**
14439 * We're waiting for a COPY_GCICONF
14440 */
14441 warningEvent("Detected GCP stop(%d)...sending kill to %s",
14442 m_gcp_save.m_master.m_state, c_COPY_GCIREQ_Counter.getText());
14443 ndbout_c("Detected GCP stop(%d)...sending kill to %s",
14444 m_gcp_save.m_master.m_state, c_COPY_GCIREQ_Counter.getText());
14445
14446 {
14447 NodeReceiverGroup rg(DBDIH, c_COPY_GCIREQ_Counter);
14448 signal->theData[0] = 7022;
14449 sendSignal(rg, GSN_DUMP_STATE_ORD, signal, 1, JBA);
14450 }
14451
14452 {
14453 NodeReceiverGroup rg(NDBCNTR, c_COPY_GCIREQ_Counter);
14454 SystemError * const sysErr = (SystemError*)&signal->theData[0];
14455 sysErr->errorCode = SystemError::GCPStopDetected;
14456 sysErr->errorRef = reference();
14457 sysErr->data[0] = m_gcp_save.m_master.m_state;
14458 sysErr->data[1] = cgcpOrderBlocked;
14459 sysErr->data[2] = m_micro_gcp.m_master.m_state;
14460 sendSignal(rg, GSN_SYSTEM_ERROR, signal,
14461 SystemError::SignalLength, JBA);
14462 }
14463 ndbrequire(!c_COPY_GCIREQ_Counter.done());
14464 return;
14465 }
14466 case GcpSave::GCP_SAVE_CONF:
14467 /**
14468 * This *should* not happen (not a master state)
14469 */
14470 local = true;
14471 break;
14472 }
14473 }
14474
14475 if (micro_counter == m_gcp_monitor.m_micro_gcp.m_max_lag)
14476 {
14477 switch(m_micro_gcp.m_master.m_state){
14478 case MicroGcp::M_GCP_IDLE:
14479 {
14480 /**
14481 * No switch for looong time...and we're idle...it *our* fault
14482 */
14483 local = true;
14484 break;
14485 }
14486 case MicroGcp::M_GCP_PREPARE:
14487 {
14488 /**
14489 * We're waiting for a GCP PREPARE CONF
14490 */
14491 warningEvent("Detected GCP stop(%d)...sending kill to %s",
14492 m_micro_gcp.m_state, c_GCP_PREPARE_Counter.getText());
14493 ndbout_c("Detected GCP stop(%d)...sending kill to %s",
14494 m_micro_gcp.m_state, c_GCP_PREPARE_Counter.getText());
14495
14496 {
14497 NodeReceiverGroup rg(DBDIH, c_GCP_PREPARE_Counter);
14498 signal->theData[0] = 7022;
14499 sendSignal(rg, GSN_DUMP_STATE_ORD, signal, 1, JBA);
14500 }
14501
14502 {
14503 NodeReceiverGroup rg(NDBCNTR, c_GCP_PREPARE_Counter);
14504 SystemError * const sysErr = (SystemError*)&signal->theData[0];
14505 sysErr->errorCode = SystemError::GCPStopDetected;
14506 sysErr->errorRef = reference();
14507 sysErr->data[0] = m_gcp_save.m_master.m_state;
14508 sysErr->data[1] = cgcpOrderBlocked;
14509 sysErr->data[2] = m_micro_gcp.m_master.m_state;
14510 sendSignal(rg, GSN_SYSTEM_ERROR, signal,
14511 SystemError::SignalLength, JBA);
14512 }
14513 ndbrequire(!c_GCP_PREPARE_Counter.done());
14514 return;
14515 }
14516 case MicroGcp::M_GCP_COMMIT:
14517 {
14518 warningEvent("Detected GCP stop(%d)...sending kill to %s",
14519 m_micro_gcp.m_state, c_GCP_COMMIT_Counter.getText());
14520 ndbout_c("Detected GCP stop(%d)...sending kill to %s",
14521 m_micro_gcp.m_state, c_GCP_COMMIT_Counter.getText());
14522
14523 {
14524 NodeReceiverGroup rg(DBDIH, c_GCP_COMMIT_Counter);
14525 signal->theData[0] = 7022;
14526 sendSignal(rg, GSN_DUMP_STATE_ORD, signal, 1, JBA);
14527 }
14528
14529 {
14530 NodeReceiverGroup rg(NDBCNTR, c_GCP_COMMIT_Counter);
14531 SystemError * const sysErr = (SystemError*)&signal->theData[0];
14532 sysErr->errorCode = SystemError::GCPStopDetected;
14533 sysErr->errorRef = reference();
14534 sysErr->data[0] = m_gcp_save.m_master.m_state;
14535 sysErr->data[1] = cgcpOrderBlocked;
14536 sysErr->data[2] = m_micro_gcp.m_master.m_state;
14537 sendSignal(rg, GSN_SYSTEM_ERROR, signal,
14538 SystemError::SignalLength, JBA);
14539 }
14540 ndbrequire(!c_GCP_COMMIT_Counter.done());
14541 return;
14542 }
14543 case MicroGcp::M_GCP_COMMITTED:
14544 /**
14545 * This *should* not happen (not a master state)
14546 */
14547 local = true;
14548 break;
14549 case MicroGcp::M_GCP_COMPLETE:
14550 infoEvent("Detected GCP stop(%d)...sending kill to %s",
14551 m_micro_gcp.m_state, c_SUB_GCP_COMPLETE_REP_Counter.getText());
14552 ndbout_c("Detected GCP stop(%d)...sending kill to %s",
14553 m_micro_gcp.m_state, c_SUB_GCP_COMPLETE_REP_Counter.getText());
14554
14555 {
14556 NodeReceiverGroup rg(DBDIH, c_SUB_GCP_COMPLETE_REP_Counter);
14557 signal->theData[0] = 7022;
14558 sendSignal(rg, GSN_DUMP_STATE_ORD, signal, 1, JBA);
14559 }
14560
14561 {
14562 NodeReceiverGroup rg(NDBCNTR, c_SUB_GCP_COMPLETE_REP_Counter);
14563 SystemError * const sysErr = (SystemError*)&signal->theData[0];
14564 sysErr->errorCode = SystemError::GCPStopDetected;
14565 sysErr->errorRef = reference();
14566 sysErr->data[0] = m_gcp_save.m_master.m_state;
14567 sysErr->data[1] = cgcpOrderBlocked;
14568 sysErr->data[2] = m_micro_gcp.m_master.m_state;
14569 sendSignal(rg, GSN_SYSTEM_ERROR, signal,
14570 SystemError::SignalLength, JBA);
14571 }
14572 ndbrequire(!c_SUB_GCP_COMPLETE_REP_Counter.done());
14573 return;
14574 }
14575 }
14576
14577 dolocal:
14578 FileRecordPtr file0Ptr;
14579 file0Ptr.i = crestartInfoFile[0];
14580 ptrCheckGuard(file0Ptr, cfileFileSize, fileRecord);
14581 FileRecordPtr file1Ptr;
14582 file1Ptr.i = crestartInfoFile[1];
14583 ptrCheckGuard(file1Ptr, cfileFileSize, fileRecord);
14584
14585 ndbout_c("file[0] status: %d type: %d reqStatus: %d file1: %d %d %d",
14586 file0Ptr.p->fileStatus, file0Ptr.p->fileType, file0Ptr.p->reqStatus,
14587 file1Ptr.p->fileStatus, file1Ptr.p->fileType, file1Ptr.p->reqStatus
14588 );
14589
14590 signal->theData[0] = 404;
14591 signal->theData[1] = file0Ptr.p->fileRef;
14592 EXECUTE_DIRECT(NDBFS, GSN_DUMP_STATE_ORD, signal, 2);
14593
14594 signal->theData[0] = 404;
14595 signal->theData[1] = file1Ptr.p->fileRef;
14596 EXECUTE_DIRECT(NDBFS, GSN_DUMP_STATE_ORD, signal, 2);
14597
14598 jam();
14599 SystemError * const sysErr = (SystemError*)&signal->theData[0];
14600 sysErr->errorCode = SystemError::GCPStopDetected;
14601 sysErr->errorRef = reference();
14602 sysErr->data[0] = m_gcp_save.m_master.m_state;
14603 sysErr->data[1] = cgcpOrderBlocked;
14604 sysErr->data[2] = m_micro_gcp.m_master.m_state;
14605 EXECUTE_DIRECT(NDBCNTR, GSN_SYSTEM_ERROR,
14606 signal, SystemError::SignalLength);
14607 ndbrequire(false);
14608 return;
14609 }//Dbdih::crashSystemAtGcpStop()
14610
14611 /*************************************************************************/
14612 /* */
14613 /* MODULE: ALLOCPAGE */
14614 /* DESCRIPTION: THE SUBROUTINE IS CALLED WITH POINTER TO PAGE */
14615 /* RECORD. A PAGE RECORD IS TAKEN FROM */
14616 /* THE FREE PAGE LIST */
14617 /*************************************************************************/
allocpage(PageRecordPtr & pagePtr)14618 void Dbdih::allocpage(PageRecordPtr& pagePtr)
14619 {
14620 ndbrequire(cfirstfreepage != RNIL);
14621 pagePtr.i = cfirstfreepage;
14622 ptrCheckGuard(pagePtr, cpageFileSize, pageRecord);
14623 cfirstfreepage = pagePtr.p->nextfreepage;
14624 pagePtr.p->nextfreepage = RNIL;
14625 }//Dbdih::allocpage()
14626
14627 /*************************************************************************/
14628 /* */
14629 /* MODULE: ALLOC_STORED_REPLICA */
14630 /* DESCRIPTION: THE SUBROUTINE IS CALLED TO GET A REPLICA RECORD, */
14631 /* TO INITIALISE IT AND TO LINK IT INTO THE FRAGMENT */
14632 /* STORE RECORD. USED FOR STORED REPLICAS. */
14633 /*************************************************************************/
allocStoredReplica(FragmentstorePtr fragPtr,ReplicaRecordPtr & newReplicaPtr,Uint32 nodeId)14634 void Dbdih::allocStoredReplica(FragmentstorePtr fragPtr,
14635 ReplicaRecordPtr& newReplicaPtr,
14636 Uint32 nodeId)
14637 {
14638 Uint32 i;
14639 ReplicaRecordPtr arrReplicaPtr;
14640 ReplicaRecordPtr arrPrevReplicaPtr;
14641
14642 seizeReplicaRec(newReplicaPtr);
14643 for (i = 0; i < MAX_LCP_STORED; i++) {
14644 newReplicaPtr.p->maxGciCompleted[i] = 0;
14645 newReplicaPtr.p->maxGciStarted[i] = 0;
14646 newReplicaPtr.p->lcpId[i] = 0;
14647 newReplicaPtr.p->lcpStatus[i] = ZINVALID;
14648 }//for
14649 newReplicaPtr.p->noCrashedReplicas = 0;
14650 newReplicaPtr.p->initialGci = (Uint32)(m_micro_gcp.m_current_gci >> 32);
14651 for (i = 0; i < MAX_CRASHED_REPLICAS; i++) {
14652 newReplicaPtr.p->replicaLastGci[i] = ZINIT_REPLICA_LAST_GCI;
14653 newReplicaPtr.p->createGci[i] = ZINIT_CREATE_GCI;
14654 }//for
14655 newReplicaPtr.p->createGci[0] = (Uint32)(m_micro_gcp.m_current_gci >> 32);
14656 newReplicaPtr.p->nextLcp = 0;
14657 newReplicaPtr.p->procNode = nodeId;
14658 newReplicaPtr.p->lcpOngoingFlag = false;
14659 newReplicaPtr.p->lcpIdStarted = 0;
14660
14661 arrPrevReplicaPtr.i = RNIL;
14662 arrReplicaPtr.i = fragPtr.p->storedReplicas;
14663 while (arrReplicaPtr.i != RNIL) {
14664 jam();
14665 ptrCheckGuard(arrReplicaPtr, creplicaFileSize, replicaRecord);
14666 arrPrevReplicaPtr = arrReplicaPtr;
14667 arrReplicaPtr.i = arrReplicaPtr.p->nextReplica;
14668 }//while
14669 if (arrPrevReplicaPtr.i == RNIL) {
14670 jam();
14671 fragPtr.p->storedReplicas = newReplicaPtr.i;
14672 } else {
14673 jam();
14674 arrPrevReplicaPtr.p->nextReplica = newReplicaPtr.i;
14675 }//if
14676 fragPtr.p->noStoredReplicas++;
14677 }//Dbdih::allocStoredReplica()
14678
14679 /*************************************************************************/
14680 /* CHECK IF THE NODE CRASH IS TO ESCALATE INTO A SYSTEM CRASH. WE COULD */
14681 /* DO THIS BECAUSE ALL REPLICAS OF SOME FRAGMENT ARE LOST. WE COULD ALSO */
14682 /* DO IT AFTER MANY NODE FAILURES THAT MAKE IT VERY DIFFICULT TO RESTORE */
14683 /* DATABASE AFTER A SYSTEM CRASH. IT MIGHT EVEN BE IMPOSSIBLE AND THIS */
14684 /* MUST BE AVOIDED EVEN MORE THAN AVOIDING SYSTEM CRASHES. */
14685 /*************************************************************************/
checkEscalation()14686 void Dbdih::checkEscalation()
14687 {
14688 Uint32 TnodeGroup[MAX_NDB_NODES];
14689 NodeRecordPtr nodePtr;
14690 Uint32 i;
14691 for (i = 0; i < cnoOfNodeGroups; i++) {
14692 TnodeGroup[i] = ZFALSE;
14693 }//for
14694 for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
14695 jam();
14696 ptrAss(nodePtr, nodeRecord);
14697 if (nodePtr.p->nodeStatus == NodeRecord::ALIVE &&
14698 nodePtr.p->activeStatus == Sysfile::NS_Active){
14699 ndbrequire(nodePtr.p->nodeGroup < MAX_NDB_NODES);
14700 TnodeGroup[nodePtr.p->nodeGroup] = ZTRUE;
14701 }
14702 }
14703 for (i = 0; i < cnoOfNodeGroups; i++) {
14704 jam();
14705 if (TnodeGroup[c_node_groups[i]] == ZFALSE) {
14706 jam();
14707 progError(__LINE__, NDBD_EXIT_LOST_NODE_GROUP, "Lost node group");
14708 }//if
14709 }//for
14710 }//Dbdih::checkEscalation()
14711
14712 /*************************************************************************/
14713 /* */
14714 /* MODULE: CHECK_KEEP_GCI */
14715 /* DESCRIPTION: CHECK FOR MINIMUM GCI RESTORABLE WITH NEW LOCAL */
14716 /* CHECKPOINT. */
14717 /*************************************************************************/
checkKeepGci(TabRecordPtr tabPtr,Uint32 fragId,Fragmentstore *,Uint32 replicaStartIndex)14718 void Dbdih::checkKeepGci(TabRecordPtr tabPtr, Uint32 fragId, Fragmentstore*,
14719 Uint32 replicaStartIndex)
14720 {
14721 ReplicaRecordPtr ckgReplicaPtr;
14722 ckgReplicaPtr.i = replicaStartIndex;
14723 while (ckgReplicaPtr.i != RNIL) {
14724 jam();
14725 ptrCheckGuard(ckgReplicaPtr, creplicaFileSize, replicaRecord);
14726 if (c_lcpState.m_participatingLQH.get(ckgReplicaPtr.p->procNode))
14727 {
14728 Uint32 keepGci;
14729 Uint32 oldestRestorableGci;
14730 findMinGci(ckgReplicaPtr, keepGci, oldestRestorableGci);
14731 if (keepGci < c_lcpState.keepGci) {
14732 jam();
14733 /* ----------------------------------------------------------------- */
14734 /* WE MUST KEEP LOG RECORDS SO THAT WE CAN USE ALL LOCAL CHECKPOINTS */
14735 /* THAT ARE AVAILABLE. THUS WE NEED TO CALCULATE THE MINIMUM OVER ALL*/
14736 /* FRAGMENTS. */
14737 /* ----------------------------------------------------------------- */
14738 c_lcpState.keepGci = keepGci;
14739 }//if
14740 if (oldestRestorableGci > c_lcpState.oldestRestorableGci) {
14741 jam();
14742 c_lcpState.oldestRestorableGci = oldestRestorableGci;
14743 }//if
14744 }
14745 ckgReplicaPtr.i = ckgReplicaPtr.p->nextReplica;
14746 }//while
14747 }//Dbdih::checkKeepGci()
14748
closeFile(Signal * signal,FileRecordPtr filePtr)14749 void Dbdih::closeFile(Signal* signal, FileRecordPtr filePtr)
14750 {
14751 signal->theData[0] = filePtr.p->fileRef;
14752 signal->theData[1] = reference();
14753 signal->theData[2] = filePtr.i;
14754 signal->theData[3] = ZCLOSE_NO_DELETE;
14755 sendSignal(NDBFS_REF, GSN_FSCLOSEREQ, signal, 4, JBA);
14756 }//Dbdih::closeFile()
14757
closeFileDelete(Signal * signal,FileRecordPtr filePtr)14758 void Dbdih::closeFileDelete(Signal* signal, FileRecordPtr filePtr)
14759 {
14760 signal->theData[0] = filePtr.p->fileRef;
14761 signal->theData[1] = reference();
14762 signal->theData[2] = filePtr.i;
14763 signal->theData[3] = ZCLOSE_DELETE;
14764 sendSignal(NDBFS_REF, GSN_FSCLOSEREQ, signal, 4, JBA);
14765 }//Dbdih::closeFileDelete()
14766
createFileRw(Signal * signal,FileRecordPtr filePtr)14767 void Dbdih::createFileRw(Signal* signal, FileRecordPtr filePtr)
14768 {
14769 signal->theData[0] = reference();
14770 signal->theData[1] = filePtr.i;
14771 signal->theData[2] = filePtr.p->fileName[0];
14772 signal->theData[3] = filePtr.p->fileName[1];
14773 signal->theData[4] = filePtr.p->fileName[2];
14774 signal->theData[5] = filePtr.p->fileName[3];
14775 signal->theData[6] = ZCREATE_READ_WRITE;
14776 sendSignal(NDBFS_REF, GSN_FSOPENREQ, signal, 7, JBA);
14777 }//Dbdih::createFileRw()
14778
14779 void
emptyverificbuffer(Signal * signal,Uint32 q,bool aContinueB)14780 Dbdih::emptyverificbuffer(Signal* signal, Uint32 q, bool aContinueB)
14781 {
14782 if(unlikely(getBlockCommit() == true))
14783 {
14784 jam();
14785 return;
14786 }
14787
14788 if (!isEmpty(c_diverify_queue[q]))
14789 {
14790 jam();
14791
14792 ApiConnectRecord localApiConnect;
14793 dequeue(c_diverify_queue[q], localApiConnect);
14794 ndbrequire(localApiConnect.apiGci <= m_micro_gcp.m_current_gci);
14795 signal->theData[0] = localApiConnect.senderData;
14796 signal->theData[1] = (Uint32)(m_micro_gcp.m_current_gci >> 32);
14797 signal->theData[2] = (Uint32)(m_micro_gcp.m_current_gci & 0xFFFFFFFF);
14798 signal->theData[3] = 0;
14799 sendSignal(c_diverify_queue[q].m_ref, GSN_DIVERIFYCONF, signal, 4, JBB);
14800 }
14801 else if (aContinueB == true)
14802 {
14803 jam();
14804 /**
14805 * Make sure that we don't miss any pending transactions
14806 * (transactions that are added to list by other thread
14807 * while we execute this code)
14808 */
14809 Uint32 blocks[] = { DBTC, 0 };
14810 Callback c = { safe_cast(&Dbdih::emptyverificbuffer_check), q };
14811 synchronize_threads_for_blocks(signal, blocks, c);
14812 return;
14813 }
14814
14815 if (aContinueB == true)
14816 {
14817 jam();
14818 //-----------------------------------------------------------------------
14819 // This emptying happened as part of a take-out process by continueb signals
14820 // This ensures that we will empty the queue eventually. We will also empty
14821 // one item every time we insert one item to ensure that the list doesn't
14822 // grow when it is not blocked.
14823 //-----------------------------------------------------------------------
14824 signal->theData[0] = DihContinueB::ZEMPTY_VERIFY_QUEUE;
14825 signal->theData[1] = q;
14826 sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
14827 }//if
14828
14829 return;
14830 }//Dbdih::emptyverificbuffer()
14831
14832 void
emptyverificbuffer_check(Signal * signal,Uint32 q,Uint32 retVal)14833 Dbdih::emptyverificbuffer_check(Signal* signal, Uint32 q, Uint32 retVal)
14834 {
14835 ndbrequire(retVal == 0);
14836 if (!isEmpty(c_diverify_queue[q]))
14837 {
14838 jam();
14839 signal->theData[0] = DihContinueB::ZEMPTY_VERIFY_QUEUE;
14840 signal->theData[1] = q;
14841 sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
14842 }
14843 else
14844 {
14845 /**
14846 * Done with emptyverificbuffer
14847 */
14848 c_diverify_queue[q].m_empty_done = 1;
14849 }
14850 }
14851
14852 /*************************************************************************/
14853 /* FIND THE NODES FROM WHICH WE CAN EXECUTE THE LOG TO RESTORE THE */
14854 /* DATA NODE IN A SYSTEM RESTART. */
14855 /*************************************************************************/
findLogNodes(CreateReplicaRecord * createReplica,FragmentstorePtr fragPtr,Uint32 startGci,Uint32 stopGci)14856 bool Dbdih::findLogNodes(CreateReplicaRecord* createReplica,
14857 FragmentstorePtr fragPtr,
14858 Uint32 startGci,
14859 Uint32 stopGci)
14860 {
14861 ConstPtr<ReplicaRecord> flnReplicaPtr;
14862 flnReplicaPtr.i = createReplica->replicaRec;
14863 ptrCheckGuard(flnReplicaPtr, creplicaFileSize, replicaRecord);
14864 /* --------------------------------------------------------------------- */
14865 /* WE START BY CHECKING IF THE DATA NODE CAN HANDLE THE LOG ALL BY */
14866 /* ITSELF. THIS IS THE DESIRED BEHAVIOUR. IF THIS IS NOT POSSIBLE */
14867 /* THEN WE SEARCH FOR THE BEST POSSIBLE NODES AMONG THE NODES THAT */
14868 /* ARE PART OF THIS SYSTEM RESTART. */
14869 /* THIS CAN ONLY BE HANDLED BY THE LAST CRASHED REPLICA. */
14870 /* The condition is that the replica was created before or at the */
14871 /* time of the starting gci, in addition it must have been alive */
14872 /* at the time of the stopping gci. This is checked by two */
14873 /* conditions, the first checks replicaLastGci and the second */
14874 /* checks that it is also smaller than the last gci the node was */
14875 /* involved in. This is necessary to check since createGci is set */
14876 /* Last + 1 and sometimes startGci = stopGci + 1 and in that case */
14877 /* it could happen that replicaLastGci is set to -1 with CreateGci */
14878 /* set to LastGci + 1. */
14879 /* --------------------------------------------------------------------- */
14880 arrGuard(flnReplicaPtr.p->noCrashedReplicas, MAX_CRASHED_REPLICAS);
14881 const Uint32 noCrashed = flnReplicaPtr.p->noCrashedReplicas;
14882
14883 if (!(ERROR_INSERTED(7073) || ERROR_INSERTED(7074))&&
14884 (startGci >= flnReplicaPtr.p->createGci[noCrashed]) &&
14885 (stopGci <= flnReplicaPtr.p->replicaLastGci[noCrashed]) &&
14886 (stopGci <= SYSFILE->lastCompletedGCI[flnReplicaPtr.p->procNode])) {
14887 jam();
14888 /* --------------------------------------------------------------------- */
14889 /* WE FOUND ALL THE LOG RECORDS NEEDED IN THE DATA NODE. WE WILL */
14890 /* USE THOSE. */
14891 /* --------------------------------------------------------------------- */
14892 createReplica->noLogNodes = 1;
14893 createReplica->logStartGci[0] = startGci;
14894 createReplica->logStopGci[0] = stopGci;
14895 createReplica->logNodeId[0] = flnReplicaPtr.p->procNode;
14896 return true;
14897 }//if
14898 Uint32 logNode = 0;
14899 do {
14900 Uint32 fblStopGci;
14901 jam();
14902 if(!findBestLogNode(createReplica,
14903 fragPtr,
14904 startGci,
14905 stopGci,
14906 logNode,
14907 fblStopGci)){
14908 jam();
14909 return false;
14910 }
14911
14912 logNode++;
14913 if (fblStopGci >= stopGci) {
14914 jam();
14915 createReplica->noLogNodes = logNode;
14916 return true;
14917 }//if
14918 startGci = fblStopGci + 1;
14919 if (logNode >= MAX_LOG_EXEC)
14920 {
14921 jam();
14922 break;
14923 }//if
14924 } while (1);
14925 /* --------------------------------------------------------------------- */
14926 /* IT WAS NOT POSSIBLE TO RESTORE THE REPLICA. THIS CAN EITHER BE */
14927 /* BECAUSE OF LACKING NODES OR BECAUSE OF A REALLY SERIOUS PROBLEM.*/
14928 /* --------------------------------------------------------------------- */
14929 return false;
14930 }//Dbdih::findLogNodes()
14931
14932 /*************************************************************************/
14933 /* FIND THE BEST POSSIBLE LOG NODE TO EXECUTE THE LOG AS SPECIFIED */
14934 /* BY THE INPUT PARAMETERS. WE SCAN THROUGH ALL ALIVE REPLICAS. */
14935 /* THIS MEANS STORED, OLD_STORED */
14936 /*************************************************************************/
14937 bool
findBestLogNode(CreateReplicaRecord * createReplica,FragmentstorePtr fragPtr,Uint32 startGci,Uint32 stopGci,Uint32 logNode,Uint32 & fblStopGci)14938 Dbdih::findBestLogNode(CreateReplicaRecord* createReplica,
14939 FragmentstorePtr fragPtr,
14940 Uint32 startGci,
14941 Uint32 stopGci,
14942 Uint32 logNode,
14943 Uint32& fblStopGci)
14944 {
14945 ConstPtr<ReplicaRecord> fblFoundReplicaPtr;
14946 ConstPtr<ReplicaRecord> fblReplicaPtr;
14947 LINT_INIT(fblFoundReplicaPtr.p);
14948
14949 /* --------------------------------------------------------------------- */
14950 /* WE START WITH ZERO AS FOUND TO ENSURE THAT FIRST HIT WILL BE */
14951 /* BETTER. */
14952 /* --------------------------------------------------------------------- */
14953 fblStopGci = 0;
14954 fblReplicaPtr.i = fragPtr.p->storedReplicas;
14955 while (fblReplicaPtr.i != RNIL) {
14956 jam();
14957 ptrCheckGuard(fblReplicaPtr, creplicaFileSize, replicaRecord);
14958 if (m_sr_nodes.get(fblReplicaPtr.p->procNode))
14959 {
14960 jam();
14961 Uint32 fliStopGci = findLogInterval(fblReplicaPtr, startGci);
14962 if (fliStopGci > fblStopGci)
14963 {
14964 jam();
14965 fblStopGci = fliStopGci;
14966 fblFoundReplicaPtr = fblReplicaPtr;
14967 }//if
14968 }//if
14969 fblReplicaPtr.i = fblReplicaPtr.p->nextReplica;
14970 }//while
14971 fblReplicaPtr.i = fragPtr.p->oldStoredReplicas;
14972 while (fblReplicaPtr.i != RNIL) {
14973 jam();
14974 ptrCheckGuard(fblReplicaPtr, creplicaFileSize, replicaRecord);
14975 if (m_sr_nodes.get(fblReplicaPtr.p->procNode))
14976 {
14977 jam();
14978 Uint32 fliStopGci = findLogInterval(fblReplicaPtr, startGci);
14979 if (fliStopGci > fblStopGci)
14980 {
14981 jam();
14982 fblStopGci = fliStopGci;
14983 fblFoundReplicaPtr = fblReplicaPtr;
14984 }//if
14985 }//if
14986 fblReplicaPtr.i = fblReplicaPtr.p->nextReplica;
14987 }//while
14988 if (fblStopGci != 0) {
14989 jam();
14990 ndbrequire(logNode < MAX_LOG_EXEC);
14991 createReplica->logNodeId[logNode] = fblFoundReplicaPtr.p->procNode;
14992 createReplica->logStartGci[logNode] = startGci;
14993 if (fblStopGci >= stopGci) {
14994 jam();
14995 createReplica->logStopGci[logNode] = stopGci;
14996 } else {
14997 jam();
14998 createReplica->logStopGci[logNode] = fblStopGci;
14999 }//if
15000 }//if
15001
15002 return fblStopGci != 0;
15003 }//Dbdih::findBestLogNode()
15004
findLogInterval(ConstPtr<ReplicaRecord> replicaPtr,Uint32 startGci)15005 Uint32 Dbdih::findLogInterval(ConstPtr<ReplicaRecord> replicaPtr,
15006 Uint32 startGci)
15007 {
15008 ndbrequire(replicaPtr.p->noCrashedReplicas <= MAX_CRASHED_REPLICAS);
15009 Uint32 loopLimit = replicaPtr.p->noCrashedReplicas + 1;
15010 for (Uint32 i = 0; i < loopLimit; i++) {
15011 jam();
15012 if (replicaPtr.p->createGci[i] <= startGci) {
15013 if (replicaPtr.p->replicaLastGci[i] >= startGci) {
15014 jam();
15015 return replicaPtr.p->replicaLastGci[i];
15016 }//if
15017 }//if
15018 }//for
15019 return 0;
15020 }//Dbdih::findLogInterval()
15021
15022 /*************************************************************************/
15023 /* */
15024 /* MODULE: FIND THE MINIMUM GCI THAT THIS NODE HAS LOG RECORDS FOR.*/
15025 /*************************************************************************/
findMinGci(ReplicaRecordPtr fmgReplicaPtr,Uint32 & keepGci,Uint32 & oldestRestorableGci)15026 void Dbdih::findMinGci(ReplicaRecordPtr fmgReplicaPtr,
15027 Uint32& keepGci,
15028 Uint32& oldestRestorableGci)
15029 {
15030 keepGci = (Uint32)-1;
15031 oldestRestorableGci = 0;
15032
15033 Uint32 maxLcpId = 0; // LcpId of latest valid LCP
15034 Uint32 maxLcpNo = MAX_LCP_STORED; // Index of latest valid LCP
15035 for (Uint32 i = 0; i < MAX_LCP_STORED; i++)
15036 {
15037 jam();
15038 if (fmgReplicaPtr.p->lcpStatus[i] == ZVALID)
15039 {
15040 if ((fmgReplicaPtr.p->lcpId[i] + MAX_LCP_STORED) <= (SYSFILE->latestLCP_ID + 1))
15041 {
15042 jam();
15043 /*-----------------------------------------------------------------*/
15044 // We invalidate the checkpoint we are preparing to overwrite.
15045 // The LCP id is still the old lcp id,
15046 // this is the reason of comparing with lcpId + 1.
15047 /*-----------------------------------------------------------------*/
15048 fmgReplicaPtr.p->lcpStatus[i] = ZINVALID;
15049 }
15050 else if (fmgReplicaPtr.p->lcpId[i] > maxLcpId)
15051 {
15052 jam();
15053 maxLcpId = fmgReplicaPtr.p->lcpId[i];
15054 maxLcpNo = i;
15055 }
15056 }
15057 }
15058
15059 if (maxLcpNo < MAX_LCP_STORED)
15060 {
15061 /**
15062 * Only consider latest LCP (wrt to how to cut REDO)
15063 */
15064 jam();
15065 keepGci = fmgReplicaPtr.p->maxGciCompleted[maxLcpNo];
15066 oldestRestorableGci = fmgReplicaPtr.p->maxGciStarted[maxLcpNo];
15067 }
15068
15069 if (oldestRestorableGci == 0 && keepGci == Uint32(-1))
15070 {
15071 jam();
15072 if (fmgReplicaPtr.p->createGci[0] == fmgReplicaPtr.p->initialGci)
15073 {
15074 keepGci = fmgReplicaPtr.p->createGci[0];
15075 // XXX Jonas
15076 //oldestRestorableGci = fmgReplicaPtr.p->createGci[0];
15077 }
15078 }
15079 else
15080 {
15081 ndbassert(oldestRestorableGci <= c_newest_restorable_gci);
15082 }
15083 return;
15084 }//Dbdih::findMinGci()
15085
findStartGci(ConstPtr<ReplicaRecord> replicaPtr,Uint32 stopGci,Uint32 & startGci,Uint32 & lcpNo)15086 bool Dbdih::findStartGci(ConstPtr<ReplicaRecord> replicaPtr,
15087 Uint32 stopGci,
15088 Uint32& startGci,
15089 Uint32& lcpNo)
15090 {
15091 Uint32 cnt = 0;
15092 Uint32 tmp[MAX_LCP_STORED];
15093 for (Uint32 i = 0; i<MAX_LCP_STORED; i++)
15094 {
15095 jam();
15096 if (replicaPtr.p->lcpStatus[i] == ZVALID &&
15097 replicaPtr.p->maxGciStarted[i] <= stopGci)
15098 {
15099 /**
15100 * In order to use LCP
15101 * we must be able to run REDO atleast up until maxGciStarted
15102 * which is that highest GCI that
15103 */
15104 jam();
15105 tmp[cnt] = i;
15106 cnt++;
15107 }
15108 }
15109
15110 if (cnt)
15111 {
15112 jam();
15113 /**
15114 * We found atleast one...get the highest
15115 */
15116 lcpNo = tmp[0];
15117 Uint32 lcpId = replicaPtr.p->lcpId[lcpNo];
15118 for (Uint32 i = 1; i<cnt; i++)
15119 {
15120 jam();
15121 if (replicaPtr.p->lcpId[tmp[i]] > lcpId)
15122 {
15123 jam();
15124 lcpNo = tmp[i];
15125 lcpId = replicaPtr.p->lcpId[lcpNo];
15126 }
15127 }
15128 startGci = replicaPtr.p->maxGciCompleted[lcpNo] + 1;
15129 return true;
15130 }
15131
15132 /* --------------------------------------------------------------------- */
15133 /* NO VALID LOCAL CHECKPOINT WAS AVAILABLE. WE WILL ADD THE */
15134 /* FRAGMENT. THUS THE NEXT LCP MUST BE SET TO ZERO. */
15135 /* WE MUST EXECUTE THE LOG FROM THE INITIAL GLOBAL CHECKPOINT WHEN */
15136 /* THE TABLE WAS CREATED. */
15137 /* --------------------------------------------------------------------- */
15138 startGci = replicaPtr.p->initialGci;
15139 ndbrequire(replicaPtr.p->nextLcp == 0);
15140 return false;
15141 }//Dbdih::findStartGci()
15142
15143 static
15144 Uint32
count_db_nodes(ndb_mgm_configuration_iterator * iter)15145 count_db_nodes(ndb_mgm_configuration_iterator * iter)
15146 {
15147 Uint32 cnt = 0;
15148 for (ndb_mgm_first(iter); ndb_mgm_valid(iter); ndb_mgm_next(iter))
15149 {
15150 Uint32 nodeId = 0;
15151 Uint32 type = ~Uint32(0);
15152 if (ndb_mgm_get_int_parameter(iter, CFG_NODE_ID, &nodeId) == 0 &&
15153 ndb_mgm_get_int_parameter(iter,CFG_TYPE_OF_SECTION, &type) == 0 &&
15154 type == NodeInfo::DB)
15155 {
15156 cnt++;
15157 }
15158 }
15159 return cnt;
15160 }
15161
15162 /**
15163 * Compute max time it can take to "resolve" cascading node-failures
15164 * given hb-interval, arbit timeout and #db-nodes
15165 */
15166 static
15167 Uint32
compute_max_failure_time(const ndb_mgm_configuration_iterator * p,ndb_mgm_configuration_iterator * cluster)15168 compute_max_failure_time(const ndb_mgm_configuration_iterator * p,
15169 ndb_mgm_configuration_iterator * cluster)
15170 {
15171 Uint32 dbnodes = count_db_nodes(cluster);
15172
15173 Uint32 hbDBDB = 1500;
15174 Uint32 arbitTimeout = 1000;
15175 ndb_mgm_get_int_parameter(p, CFG_DB_HEARTBEAT_INTERVAL, &hbDBDB);
15176 ndb_mgm_get_int_parameter(p, CFG_DB_ARBIT_TIMEOUT, &arbitTimeout);
15177
15178 /*
15179 * Max time for 1 node failure is
15180 */
15181 Uint32 max_time_one_failure = arbitTimeout + 4 * hbDBDB;
15182
15183 /**
15184 * And worst case...this can be cascading failure with all but self
15185 */
15186 Uint32 max_time_total_failure = (dbnodes - 1) * max_time_one_failure;
15187
15188 return max_time_total_failure;
15189 }
15190
initCommonData()15191 void Dbdih::initCommonData()
15192 {
15193 c_blockCommit = false;
15194 c_blockCommitNo = 0;
15195 cfailurenr = 1;
15196 cfirstAliveNode = RNIL;
15197 cfirstDeadNode = RNIL;
15198 cgckptflag = false;
15199 cgcpOrderBlocked = 0;
15200
15201 c_lcpMasterTakeOverState.set(LMTOS_IDLE, __LINE__);
15202
15203 c_lcpState.clcpDelay = 0;
15204 c_lcpState.lcpStart = ZIDLE;
15205 c_lcpState.lcpStopGcp = 0;
15206 c_lcpState.setLcpStatus(LCP_STATUS_IDLE, __LINE__);
15207 c_lcpState.currentFragment.tableId = 0;
15208 c_lcpState.currentFragment.fragmentId = 0;
15209 c_lcpState.noOfLcpFragRepOutstanding = 0;
15210 c_lcpState.keepGci = 0;
15211 c_lcpState.oldestRestorableGci = 0;
15212 c_lcpState.ctcCounter = 0;
15213 c_lcpState.ctimer = 0;
15214 c_lcpState.immediateLcpStart = false;
15215 c_lcpState.m_MASTER_LCPREQ_Received = false;
15216 c_lcpState.m_lastLCP_COMPLETE_REP_ref = 0;
15217 cmasterdihref = 0;
15218 cmasterNodeId = 0;
15219 cmasterState = MASTER_IDLE;
15220 cmasterTakeOverNode = 0;
15221 cnoOfActiveTables = 0;
15222 cnoOfNodeGroups = 0;
15223 c_nextNodeGroup = 0;
15224 cnoReplicas = 0;
15225 con_lineNodes = 0;
15226 creceivedfrag = 0;
15227 crestartGci = 0;
15228 crestartInfoFile[0] = RNIL;
15229 crestartInfoFile[1] = RNIL;
15230 cstartPhase = 0;
15231 cstarttype = (Uint32)-1;
15232 csystemnodes = 0;
15233 c_newest_restorable_gci = 0;
15234 cwaitLcpSr = false;
15235 c_nodeStartMaster.blockGcp = 0;
15236
15237 nodeResetStart(0);
15238 c_nodeStartMaster.wait = ZFALSE;
15239
15240 memset(&sysfileData[0], 0, sizeof(sysfileData));
15241
15242 const ndb_mgm_configuration_iterator * p =
15243 m_ctx.m_config.getOwnConfigIterator();
15244 ndbrequire(p != 0);
15245
15246 c_lcpState.clcpDelay = 20;
15247 ndb_mgm_get_int_parameter(p, CFG_DB_LCP_INTERVAL, &c_lcpState.clcpDelay);
15248 c_lcpState.clcpDelay = c_lcpState.clcpDelay > 31 ? 31 : c_lcpState.clcpDelay;
15249
15250 //ndb_mgm_get_int_parameter(p, CFG_DB_MIN_HOT_SPARES, &cminHotSpareNodes);
15251
15252 cnoReplicas = 1;
15253 ndb_mgm_get_int_parameter(p, CFG_DB_NO_REPLICAS, &cnoReplicas);
15254 if (cnoReplicas > MAX_REPLICAS)
15255 {
15256 progError(__LINE__, NDBD_EXIT_INVALID_CONFIG,
15257 "Only up to four replicas are supported. Check NoOfReplicas.");
15258 }
15259
15260 Uint32 max_failure_time = compute_max_failure_time
15261 (p, m_ctx.m_config.getClusterConfigIterator());
15262
15263 bzero(&m_gcp_save, sizeof(m_gcp_save));
15264 bzero(&m_micro_gcp, sizeof(m_micro_gcp));
15265 {
15266 { // Set time-between global checkpoint
15267 Uint32 tmp = 2000;
15268 ndb_mgm_get_int_parameter(p, CFG_DB_GCP_INTERVAL, &tmp);
15269 tmp = tmp > 60000 ? 60000 : (tmp < 10 ? 10 : tmp);
15270 m_gcp_save.m_master.m_time_between_gcp = tmp;
15271 }
15272
15273 Uint32 tmp = 0;
15274 if (ndb_mgm_get_int_parameter(p, CFG_DB_MICRO_GCP_INTERVAL, &tmp) == 0 &&
15275 tmp)
15276 {
15277 /**
15278 * Set time-between epochs
15279 */
15280 if (tmp > m_gcp_save.m_master.m_time_between_gcp)
15281 tmp = m_gcp_save.m_master.m_time_between_gcp;
15282 if (tmp < 10)
15283 tmp = 10;
15284 m_micro_gcp.m_master.m_time_between_gcp = tmp;
15285 }
15286
15287 { // Set time-between global checkpoint timeout
15288 Uint32 tmp = 120000; // No config, hard code 2 minutes
15289 tmp += max_failure_time; //
15290 m_gcp_monitor.m_gcp_save.m_max_lag =
15291 (m_gcp_save.m_master.m_time_between_gcp + tmp) / 100;
15292 }
15293
15294 { // Set time-between epochs timeout
15295 Uint32 tmp = 4000;
15296 ndb_mgm_get_int_parameter(p, CFG_DB_MICRO_GCP_TIMEOUT, &tmp);
15297 if (tmp != 0)
15298 {
15299 jam();
15300 tmp += max_failure_time;
15301 m_gcp_monitor.m_micro_gcp.m_max_lag =
15302 (m_micro_gcp.m_master.m_time_between_gcp + tmp) / 100;
15303 }
15304 else
15305 {
15306 jam();
15307 m_gcp_monitor.m_gcp_save.m_max_lag = 0;
15308 m_gcp_monitor.m_micro_gcp.m_max_lag = 0;
15309 }
15310 }
15311 }
15312 }//Dbdih::initCommonData()
15313
initFragstore(FragmentstorePtr fragPtr)15314 void Dbdih::initFragstore(FragmentstorePtr fragPtr)
15315 {
15316 fragPtr.p->storedReplicas = RNIL;
15317 fragPtr.p->oldStoredReplicas = RNIL;
15318
15319 fragPtr.p->noStoredReplicas = 0;
15320 fragPtr.p->noOldStoredReplicas = 0;
15321 fragPtr.p->fragReplicas = 0;
15322 fragPtr.p->preferredPrimary = 0;
15323
15324 for (Uint32 i = 0; i < MAX_REPLICAS; i++)
15325 fragPtr.p->activeNodes[i] = 0;
15326
15327 fragPtr.p->noLcpReplicas = 0;
15328 fragPtr.p->distributionKey = 0;
15329 }//Dbdih::initFragstore()
15330
15331 /*************************************************************************/
15332 /* */
15333 /* MODULE: INIT_RESTART_INFO */
15334 /* DESCRIPTION: INITIATE RESTART INFO VARIABLE AND VARIABLES FOR */
15335 /* GLOBAL CHECKPOINTS. */
15336 /*************************************************************************/
initRestartInfo(Signal * signal)15337 void Dbdih::initRestartInfo(Signal* signal)
15338 {
15339 Uint32 i;
15340 for (i = 0; i < MAX_NDB_NODES; i++) {
15341 SYSFILE->lastCompletedGCI[i] = 0;
15342 }//for
15343 NodeRecordPtr nodePtr;
15344 nodePtr.i = cfirstAliveNode;
15345 do {
15346 jam();
15347 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
15348 SYSFILE->lastCompletedGCI[nodePtr.i] = 1;
15349 /* FIRST GCP = 1 ALREADY SET BY LQH */
15350 nodePtr.i = nodePtr.p->nextNode;
15351 } while (nodePtr.i != RNIL);
15352
15353 Uint32 startGci = 1;
15354 #ifndef DBUG_OFF
15355 {
15356 char envBuf[256];
15357 const char* v = NdbEnv_GetEnv("NDB_START_GCI",
15358 envBuf,
15359 256);
15360 if (v && *v != 0)
15361 {
15362 startGci = strtoull(v, NULL, 0);
15363
15364 ndbout_c("DbDih : Using value of %u from NDB_START_GCI",
15365 startGci);
15366 }
15367 }
15368 #endif
15369
15370 m_micro_gcp.m_old_gci = Uint64(startGci) << 32;
15371 m_micro_gcp.m_current_gci = Uint64(startGci + 1) << 32;
15372 crestartGci = startGci;
15373 c_newest_restorable_gci = startGci;
15374
15375 SYSFILE->keepGCI = startGci;
15376 SYSFILE->oldestRestorableGCI = startGci;
15377 SYSFILE->newestRestorableGCI = startGci;
15378 SYSFILE->systemRestartBits = 0;
15379 for (i = 0; i < NdbNodeBitmask::Size; i++) {
15380 SYSFILE->lcpActive[0] = 0;
15381 }//for
15382 for (i = 0; i < Sysfile::TAKE_OVER_SIZE; i++) {
15383 SYSFILE->takeOver[i] = 0;
15384 }//for
15385 Sysfile::setInitialStartOngoing(SYSFILE->systemRestartBits);
15386 srand((unsigned int)time(0));
15387 globalData.m_restart_seq = SYSFILE->m_restart_seq = 0;
15388
15389 if (m_micro_gcp.m_enabled == false &&
15390 m_micro_gcp.m_master.m_time_between_gcp)
15391 {
15392 /**
15393 * Micro GCP is disabled...but configured...
15394 */
15395 jam();
15396 m_micro_gcp.m_enabled = true;
15397 UpgradeProtocolOrd * ord = (UpgradeProtocolOrd*)signal->getDataPtrSend();
15398 ord->type = UpgradeProtocolOrd::UPO_ENABLE_MICRO_GCP;
15399 EXECUTE_DIRECT(QMGR,GSN_UPGRADE_PROTOCOL_ORD,signal,signal->getLength());
15400 }
15401 }//Dbdih::initRestartInfo()
15402
15403 /*--------------------------------------------------------------------*/
15404 /* NODE GROUP BITS ARE INITIALISED BEFORE THIS. */
15405 /* NODE ACTIVE BITS ARE INITIALISED BEFORE THIS. */
15406 /*--------------------------------------------------------------------*/
15407 /*************************************************************************/
15408 /* */
15409 /* MODULE: INIT_RESTORABLE_GCI_FILES */
15410 /* DESCRIPTION: THE SUBROUTINE SETS UP THE FILES THAT REFERS TO THE*/
15411 /* FILES THAT KEEP THE VARIABLE CRESTART_INFO */
15412 /*************************************************************************/
initRestorableGciFiles()15413 void Dbdih::initRestorableGciFiles()
15414 {
15415 Uint32 tirgTmp;
15416 FileRecordPtr filePtr;
15417 seizeFile(filePtr);
15418 filePtr.p->tabRef = RNIL;
15419 filePtr.p->fileType = FileRecord::GCP_FILE;
15420 filePtr.p->reqStatus = FileRecord::IDLE;
15421 filePtr.p->fileStatus = FileRecord::CLOSED;
15422 crestartInfoFile[0] = filePtr.i;
15423 filePtr.p->fileName[0] = (Uint32)-1; /* T DIRECTORY NOT USED */
15424 filePtr.p->fileName[1] = (Uint32)-1; /* F DIRECTORY NOT USED */
15425 filePtr.p->fileName[2] = (Uint32)-1; /* S PART IGNORED */
15426 tirgTmp = 1; /* FILE NAME VERSION 1 */
15427 tirgTmp = (tirgTmp << 8) + 6; /* .SYSFILE */
15428 tirgTmp = (tirgTmp << 8) + 1; /* D1 DIRECTORY */
15429 tirgTmp = (tirgTmp << 8) + 0; /* P0 FILE NAME */
15430 filePtr.p->fileName[3] = tirgTmp;
15431 /* --------------------------------------------------------------------- */
15432 /* THE NAME BECOMES /D1/DBDICT/S0.SYSFILE */
15433 /* --------------------------------------------------------------------- */
15434 seizeFile(filePtr);
15435 filePtr.p->tabRef = RNIL;
15436 filePtr.p->fileType = FileRecord::GCP_FILE;
15437 filePtr.p->reqStatus = FileRecord::IDLE;
15438 filePtr.p->fileStatus = FileRecord::CLOSED;
15439 crestartInfoFile[1] = filePtr.i;
15440 filePtr.p->fileName[0] = (Uint32)-1; /* T DIRECTORY NOT USED */
15441 filePtr.p->fileName[1] = (Uint32)-1; /* F DIRECTORY NOT USED */
15442 filePtr.p->fileName[2] = (Uint32)-1; /* S PART IGNORED */
15443 tirgTmp = 1; /* FILE NAME VERSION 1 */
15444 tirgTmp = (tirgTmp << 8) + 6; /* .SYSFILE */
15445 tirgTmp = (tirgTmp << 8) + 2; /* D1 DIRECTORY */
15446 tirgTmp = (tirgTmp << 8) + 0; /* P0 FILE NAME */
15447 filePtr.p->fileName[3] = tirgTmp;
15448 /* --------------------------------------------------------------------- */
15449 /* THE NAME BECOMES /D2/DBDICT/P0.SYSFILE */
15450 /* --------------------------------------------------------------------- */
15451 }//Dbdih::initRestorableGciFiles()
15452
initTable(TabRecordPtr tabPtr)15453 void Dbdih::initTable(TabRecordPtr tabPtr)
15454 {
15455 new (tabPtr.p) TabRecord();
15456 tabPtr.p->noOfFragChunks = 0;
15457 tabPtr.p->method = TabRecord::NOTDEFINED;
15458 tabPtr.p->tabStatus = TabRecord::TS_IDLE;
15459 tabPtr.p->noOfWords = 0;
15460 tabPtr.p->noPages = 0;
15461 tabPtr.p->tabLcpStatus = TabRecord::TLS_COMPLETED;
15462 tabPtr.p->tabCopyStatus = TabRecord::CS_IDLE;
15463 tabPtr.p->tabUpdateState = TabRecord::US_IDLE;
15464 tabPtr.p->noOfBackups = 0;
15465 tabPtr.p->kvalue = 0;
15466 tabPtr.p->hashpointer = (Uint32)-1;
15467 tabPtr.p->mask = 0;
15468 tabPtr.p->tabStorage = TabRecord::ST_NORMAL;
15469 tabPtr.p->tabErrorCode = 0;
15470 tabPtr.p->schemaVersion = (Uint32)-1;
15471 tabPtr.p->tabRemoveNode = RNIL;
15472 tabPtr.p->totalfragments = (Uint32)-1;
15473 tabPtr.p->connectrec = RNIL;
15474 tabPtr.p->tabFile[0] = RNIL;
15475 tabPtr.p->tabFile[1] = RNIL;
15476 tabPtr.p->m_dropTab.tabUserRef = 0;
15477 tabPtr.p->m_dropTab.tabUserPtr = RNIL;
15478 Uint32 i;
15479 for (i = 0; i < NDB_ARRAY_SIZE(tabPtr.p->startFid); i++) {
15480 tabPtr.p->startFid[i] = RNIL;
15481 }//for
15482 for (i = 0; i < NDB_ARRAY_SIZE(tabPtr.p->pageRef); i++) {
15483 tabPtr.p->pageRef[i] = RNIL;
15484 }//for
15485 tabPtr.p->tableType = DictTabInfo::UndefTableType;
15486 tabPtr.p->schemaTransId = 0;
15487 }//Dbdih::initTable()
15488
15489 /*************************************************************************/
15490 /* */
15491 /* MODULE: INIT_TABLE_FILES */
15492 /* DESCRIPTION: THE SUBROUTINE SETS UP THE FILES THAT REFERS TO THE*/
15493 /* FILES THAT KEEP THE TABLE FRAGMENTATION DESCRIPTION. */
15494 /*************************************************************************/
initTableFile(TabRecordPtr tabPtr)15495 void Dbdih::initTableFile(TabRecordPtr tabPtr)
15496 {
15497 Uint32 titfTmp;
15498 FileRecordPtr filePtr;
15499 seizeFile(filePtr);
15500 filePtr.p->tabRef = tabPtr.i;
15501 filePtr.p->fileType = FileRecord::TABLE_FILE;
15502 filePtr.p->reqStatus = FileRecord::IDLE;
15503 filePtr.p->fileStatus = FileRecord::CLOSED;
15504 tabPtr.p->tabFile[0] = filePtr.i;
15505 filePtr.p->fileName[0] = (Uint32)-1; /* T DIRECTORY NOT USED */
15506 filePtr.p->fileName[1] = (Uint32)-1; /* F DIRECTORY NOT USED */
15507 filePtr.p->fileName[2] = tabPtr.i; /* Stid FILE NAME */
15508 titfTmp = 1; /* FILE NAME VERSION 1 */
15509 titfTmp = (titfTmp << 8) + 3; /* .FRAGLIST */
15510 titfTmp = (titfTmp << 8) + 1; /* D1 DIRECTORY */
15511 titfTmp = (titfTmp << 8) + 255; /* P PART IGNORED */
15512 filePtr.p->fileName[3] = titfTmp;
15513 /* --------------------------------------------------------------------- */
15514 /* THE NAME BECOMES /D1/DBDICT/Stid.FRAGLIST */
15515 /* --------------------------------------------------------------------- */
15516 seizeFile(filePtr);
15517 filePtr.p->tabRef = tabPtr.i;
15518 filePtr.p->fileType = FileRecord::TABLE_FILE;
15519 filePtr.p->reqStatus = FileRecord::IDLE;
15520 filePtr.p->fileStatus = FileRecord::CLOSED;
15521 tabPtr.p->tabFile[1] = filePtr.i;
15522 filePtr.p->fileName[0] = (Uint32)-1; /* T DIRECTORY NOT USED */
15523 filePtr.p->fileName[1] = (Uint32)-1; /* F DIRECTORY NOT USED */
15524 filePtr.p->fileName[2] = tabPtr.i; /* Stid FILE NAME */
15525 titfTmp = 1; /* FILE NAME VERSION 1 */
15526 titfTmp = (titfTmp << 8) + 3; /* .FRAGLIST */
15527 titfTmp = (titfTmp << 8) + 2; /* D2 DIRECTORY */
15528 titfTmp = (titfTmp << 8) + 255; /* P PART IGNORED */
15529 filePtr.p->fileName[3] = titfTmp;
15530 /* --------------------------------------------------------------------- */
15531 /* THE NAME BECOMES /D2/DBDICT/Stid.FRAGLIST */
15532 /* --------------------------------------------------------------------- */
15533 }//Dbdih::initTableFile()
15534
initialiseRecordsLab(Signal * signal,Uint32 stepNo,Uint32 retRef,Uint32 retData)15535 void Dbdih::initialiseRecordsLab(Signal* signal,
15536 Uint32 stepNo, Uint32 retRef, Uint32 retData)
15537 {
15538 switch (stepNo) {
15539 case 0:
15540 jam();
15541 initCommonData();
15542 break;
15543 case 1:{
15544 ApiConnectRecordPtr apiConnectptr;
15545 jam();
15546 c_diverify_queue[0].m_ref = calcTcBlockRef(getOwnNodeId());
15547 for (Uint32 i = 0; i < c_diverify_queue_cnt; i++)
15548 {
15549 if (c_diverify_queue_cnt > 1)
15550 {
15551 c_diverify_queue[i].m_ref = numberToRef(DBTC, i + 1, 0);
15552 }
15553 /******** INTIALIZING API CONNECT RECORDS ********/
15554 for (apiConnectptr.i = 0;
15555 apiConnectptr.i < capiConnectFileSize; apiConnectptr.i++)
15556 {
15557 refresh_watch_dog();
15558 ptrAss(apiConnectptr, c_diverify_queue[i].apiConnectRecord);
15559 apiConnectptr.p->senderData = RNIL;
15560 apiConnectptr.p->apiGci = ~(Uint64)0;
15561 }//for
15562 }
15563 jam();
15564 break;
15565 }
15566 case 2:{
15567 ConnectRecordPtr connectPtr;
15568 jam();
15569 /****** CONNECT ******/
15570 for (connectPtr.i = 0; connectPtr.i < cconnectFileSize; connectPtr.i++) {
15571 refresh_watch_dog();
15572 ptrAss(connectPtr, connectRecord);
15573 connectPtr.p->userpointer = RNIL;
15574 connectPtr.p->userblockref = ZNIL;
15575 connectPtr.p->connectState = ConnectRecord::FREE;
15576 connectPtr.p->table = RNIL;
15577 connectPtr.p->nextPool = connectPtr.i + 1;
15578 bzero(connectPtr.p->nodes, sizeof(connectPtr.p->nodes));
15579 }//for
15580 connectPtr.i = cconnectFileSize - 1;
15581 ptrAss(connectPtr, connectRecord);
15582 connectPtr.p->nextPool = RNIL;
15583 cfirstconnect = 0;
15584 break;
15585 }
15586 case 3:
15587 {
15588 FileRecordPtr filePtr;
15589 jam();
15590 /******** INTIALIZING FILE RECORDS ********/
15591 for (filePtr.i = 0; filePtr.i < cfileFileSize; filePtr.i++) {
15592 ptrAss(filePtr, fileRecord);
15593 filePtr.p->nextFile = filePtr.i + 1;
15594 filePtr.p->fileStatus = FileRecord::CLOSED;
15595 filePtr.p->reqStatus = FileRecord::IDLE;
15596 }//for
15597 filePtr.i = cfileFileSize - 1;
15598 ptrAss(filePtr, fileRecord);
15599 filePtr.p->nextFile = RNIL;
15600 cfirstfreeFile = 0;
15601 initRestorableGciFiles();
15602 break;
15603 }
15604 case 4:
15605 jam();
15606 initialiseFragstore();
15607 break;
15608 case 5:
15609 {
15610 jam();
15611 /******* NODE GROUP RECORD ******/
15612 /******* NODE RECORD ******/
15613 NodeGroupRecordPtr loopNGPtr;
15614 for (loopNGPtr.i = 0; loopNGPtr.i < MAX_NDB_NODES; loopNGPtr.i++) {
15615 ptrAss(loopNGPtr, nodeGroupRecord);
15616 loopNGPtr.p->nodesInGroup[0] = RNIL;
15617 loopNGPtr.p->nodesInGroup[1] = RNIL;
15618 loopNGPtr.p->nodesInGroup[2] = RNIL;
15619 loopNGPtr.p->nodesInGroup[3] = RNIL;
15620 loopNGPtr.p->nextReplicaNode = 0;
15621 loopNGPtr.p->nodeCount = 0;
15622 loopNGPtr.p->activeTakeOver = false;
15623 loopNGPtr.p->nodegroupIndex = RNIL;
15624 loopNGPtr.p->m_ref_count = 0;
15625 loopNGPtr.p->m_next_log_part = 0;
15626 }//for
15627 break;
15628 }
15629 case 6:
15630 {
15631 PageRecordPtr pagePtr;
15632 jam();
15633 /******* PAGE RECORD ******/
15634 for (pagePtr.i = 0; pagePtr.i < cpageFileSize; pagePtr.i++) {
15635 refresh_watch_dog();
15636 ptrAss(pagePtr, pageRecord);
15637 pagePtr.p->nextfreepage = pagePtr.i + 1;
15638 }//for
15639 pagePtr.i = cpageFileSize - 1;
15640 ptrAss(pagePtr, pageRecord);
15641 pagePtr.p->nextfreepage = RNIL;
15642 cfirstfreepage = 0;
15643 break;
15644 }
15645 case 7:
15646 {
15647 ReplicaRecordPtr initReplicaPtr;
15648 jam();
15649 /******* REPLICA RECORD ******/
15650 for (initReplicaPtr.i = 0; initReplicaPtr.i < creplicaFileSize;
15651 initReplicaPtr.i++) {
15652 refresh_watch_dog();
15653 ptrAss(initReplicaPtr, replicaRecord);
15654 initReplicaPtr.p->lcpIdStarted = 0;
15655 initReplicaPtr.p->lcpOngoingFlag = false;
15656 initReplicaPtr.p->nextReplica = initReplicaPtr.i + 1;
15657 }//for
15658 initReplicaPtr.i = creplicaFileSize - 1;
15659 ptrAss(initReplicaPtr, replicaRecord);
15660 initReplicaPtr.p->nextReplica = RNIL;
15661 cnoFreeReplicaRec = creplicaFileSize;
15662 cfirstfreeReplica = 0;
15663 break;
15664 }
15665 case 8:
15666 {
15667 TabRecordPtr loopTabptr;
15668 jam();
15669 /********* TAB-DESCRIPTOR ********/
15670 for (loopTabptr.i = 0; loopTabptr.i < ctabFileSize; loopTabptr.i++) {
15671 ptrAss(loopTabptr, tabRecord);
15672 refresh_watch_dog();
15673 initTable(loopTabptr);
15674 }//for
15675 break;
15676 }
15677 case 9:
15678 {
15679 jam();
15680 ReadConfigConf * conf = (ReadConfigConf*)signal->getDataPtrSend();
15681 conf->senderRef = reference();
15682 conf->senderData = retData;
15683 sendSignal(retRef, GSN_READ_CONFIG_CONF, signal,
15684 ReadConfigConf::SignalLength, JBB);
15685 return;
15686 break;
15687 }
15688 default:
15689 ndbrequire(false);
15690 break;
15691 }//switch
15692 jam();
15693 /* ---------------------------------------------------------------------- */
15694 /* SEND REAL-TIME BREAK DURING INIT OF VARIABLES DURING SYSTEM RESTART. */
15695 /* ---------------------------------------------------------------------- */
15696 signal->theData[0] = DihContinueB::ZINITIALISE_RECORDS;
15697 signal->theData[1] = stepNo + 1;
15698 signal->theData[2] = retRef;
15699 signal->theData[3] = retData;
15700 sendSignal(reference(), GSN_CONTINUEB, signal, 4, JBB);
15701 }//Dbdih::initialiseRecordsLab()
15702
15703 /*************************************************************************/
15704 /* INSERT THE NODE INTO THE LINKED LIST OF NODES INVOLVED ALL */
15705 /* DISTRIBUTED PROTOCOLS (EXCEPT GCP PROTOCOL THAT USES THE DIH */
15706 /* LINKED LIST INSTEAD). */
15707 /*************************************************************************/
insertAlive(NodeRecordPtr newNodePtr)15708 void Dbdih::insertAlive(NodeRecordPtr newNodePtr)
15709 {
15710 NodeRecordPtr nodePtr;
15711
15712 nodePtr.i = cfirstAliveNode;
15713 if (nodePtr.i == RNIL) {
15714 jam();
15715 cfirstAliveNode = newNodePtr.i;
15716 } else {
15717 do {
15718 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
15719 if (nodePtr.p->nextNode == RNIL) {
15720 jam();
15721 nodePtr.p->nextNode = newNodePtr.i;
15722 break;
15723 } else {
15724 jam();
15725 nodePtr.i = nodePtr.p->nextNode;
15726 }//if
15727 } while (1);
15728 }//if
15729 newNodePtr.p->nextNode = RNIL;
15730 }//Dbdih::insertAlive()
15731
insertBackup(FragmentstorePtr fragPtr,Uint32 nodeId)15732 void Dbdih::insertBackup(FragmentstorePtr fragPtr, Uint32 nodeId)
15733 {
15734 for (Uint32 i = fragPtr.p->fragReplicas; i > 1; i--) {
15735 jam();
15736 ndbrequire(i < MAX_REPLICAS && i > 0);
15737 fragPtr.p->activeNodes[i] = fragPtr.p->activeNodes[i - 1];
15738 }//for
15739 fragPtr.p->activeNodes[1] = nodeId;
15740 fragPtr.p->fragReplicas++;
15741 }//Dbdih::insertBackup()
15742
insertDeadNode(NodeRecordPtr newNodePtr)15743 void Dbdih::insertDeadNode(NodeRecordPtr newNodePtr)
15744 {
15745 NodeRecordPtr nodePtr;
15746
15747 nodePtr.i = cfirstDeadNode;
15748 if (nodePtr.i == RNIL) {
15749 jam();
15750 cfirstDeadNode = newNodePtr.i;
15751 } else {
15752 do {
15753 jam();
15754 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
15755 if (nodePtr.p->nextNode == RNIL) {
15756 jam();
15757 nodePtr.p->nextNode = newNodePtr.i;
15758 break;
15759 } else {
15760 jam();
15761 nodePtr.i = nodePtr.p->nextNode;
15762 }//if
15763 } while (1);
15764 }//if
15765 newNodePtr.p->nextNode = RNIL;
15766 }//Dbdih::insertDeadNode()
15767
linkOldStoredReplica(FragmentstorePtr fragPtr,ReplicaRecordPtr replicatePtr)15768 void Dbdih::linkOldStoredReplica(FragmentstorePtr fragPtr,
15769 ReplicaRecordPtr replicatePtr)
15770 {
15771 ReplicaRecordPtr losReplicaPtr;
15772
15773 replicatePtr.p->nextReplica = RNIL;
15774 fragPtr.p->noOldStoredReplicas++;
15775 losReplicaPtr.i = fragPtr.p->oldStoredReplicas;
15776 if (losReplicaPtr.i == RNIL) {
15777 jam();
15778 fragPtr.p->oldStoredReplicas = replicatePtr.i;
15779 return;
15780 }//if
15781 ptrCheckGuard(losReplicaPtr, creplicaFileSize, replicaRecord);
15782 while (losReplicaPtr.p->nextReplica != RNIL) {
15783 jam();
15784 losReplicaPtr.i = losReplicaPtr.p->nextReplica;
15785 ptrCheckGuard(losReplicaPtr, creplicaFileSize, replicaRecord);
15786 }//if
15787 losReplicaPtr.p->nextReplica = replicatePtr.i;
15788 }//Dbdih::linkOldStoredReplica()
15789
linkStoredReplica(FragmentstorePtr fragPtr,ReplicaRecordPtr replicatePtr)15790 void Dbdih::linkStoredReplica(FragmentstorePtr fragPtr,
15791 ReplicaRecordPtr replicatePtr)
15792 {
15793 ReplicaRecordPtr lsrReplicaPtr;
15794
15795 fragPtr.p->noStoredReplicas++;
15796 replicatePtr.p->nextReplica = RNIL;
15797 lsrReplicaPtr.i = fragPtr.p->storedReplicas;
15798 if (fragPtr.p->storedReplicas == RNIL) {
15799 jam();
15800 fragPtr.p->storedReplicas = replicatePtr.i;
15801 return;
15802 }//if
15803 ptrCheckGuard(lsrReplicaPtr, creplicaFileSize, replicaRecord);
15804 while (lsrReplicaPtr.p->nextReplica != RNIL) {
15805 jam();
15806 lsrReplicaPtr.i = lsrReplicaPtr.p->nextReplica;
15807 ptrCheckGuard(lsrReplicaPtr, creplicaFileSize, replicaRecord);
15808 }//if
15809 lsrReplicaPtr.p->nextReplica = replicatePtr.i;
15810 }//Dbdih::linkStoredReplica()
15811
15812 /*************************************************************************/
15813 /* MAKE NODE GROUPS BASED ON THE LIST OF NODES RECEIVED FROM CNTR */
15814 /*************************************************************************/
15815 void
add_nodegroup(NodeGroupRecordPtr NGPtr)15816 Dbdih::add_nodegroup(NodeGroupRecordPtr NGPtr)
15817 {
15818 if (NGPtr.p->nodegroupIndex == RNIL)
15819 {
15820 jam();
15821 NGPtr.p->nodegroupIndex = cnoOfNodeGroups;
15822 c_node_groups[cnoOfNodeGroups++] = NGPtr.i;
15823 }
15824 }
15825
15826 void
inc_ng_refcount(Uint32 i)15827 Dbdih::inc_ng_refcount(Uint32 i)
15828 {
15829 NodeGroupRecordPtr NGPtr;
15830 NGPtr.i = i;
15831 ptrCheckGuard(NGPtr, MAX_NDB_NODES, nodeGroupRecord);
15832 NGPtr.p->m_ref_count++;
15833 }
15834
15835 void
dec_ng_refcount(Uint32 i)15836 Dbdih::dec_ng_refcount(Uint32 i)
15837 {
15838 NodeGroupRecordPtr NGPtr;
15839 NGPtr.i = i;
15840 ptrCheckGuard(NGPtr, MAX_NDB_NODES, nodeGroupRecord);
15841 ndbrequire(NGPtr.p->m_ref_count);
15842 NGPtr.p->m_ref_count--;
15843 }
15844
makeNodeGroups(Uint32 nodeArray[])15845 void Dbdih::makeNodeGroups(Uint32 nodeArray[])
15846 {
15847 NodeGroupRecordPtr NGPtr;
15848 NodeRecordPtr mngNodeptr;
15849 Uint32 j;
15850
15851 /**-----------------------------------------------------------------------
15852 * ASSIGN ALL ACTIVE NODES INTO NODE GROUPS. HOT SPARE NODES ARE ASSIGNED
15853 * TO NODE GROUP ZNIL
15854 *-----------------------------------------------------------------------*/
15855 cnoOfNodeGroups = 0;
15856 for (Uint32 i = 0; nodeArray[i] != RNIL; i++)
15857 {
15858 jam();
15859 mngNodeptr.i = nodeArray[i];
15860 ptrCheckGuard(mngNodeptr, MAX_NDB_NODES, nodeRecord);
15861 if (mngNodeptr.p->nodeGroup == NDB_NO_NODEGROUP)
15862 {
15863 jam();
15864 mngNodeptr.p->nodeGroup = ZNIL;
15865 ndbout_c("setting nodeGroup = ZNIL for node %u",
15866 mngNodeptr.i);
15867 }
15868 else if (mngNodeptr.p->nodeGroup != RNIL)
15869 {
15870 jam();
15871 NGPtr.i = mngNodeptr.p->nodeGroup;
15872 ptrCheckGuard(NGPtr, MAX_NDB_NODES, nodeGroupRecord);
15873 arrGuard(NGPtr.p->nodeCount, MAX_REPLICAS);
15874 NGPtr.p->nodesInGroup[NGPtr.p->nodeCount++] = mngNodeptr.i;
15875
15876 add_nodegroup(NGPtr);
15877 }
15878 }
15879 NGPtr.i = 0;
15880 for (; NGPtr.i < MAX_NDB_NODES; NGPtr.i++)
15881 {
15882 jam();
15883 ptrCheckGuard(NGPtr, MAX_NDB_NODES, nodeGroupRecord);
15884 if (NGPtr.p->nodeCount < cnoReplicas)
15885 break;
15886 }
15887
15888 for (Uint32 i = 0; nodeArray[i] != RNIL; i++)
15889 {
15890 jam();
15891 mngNodeptr.i = nodeArray[i];
15892 ptrCheckGuard(mngNodeptr, MAX_NDB_NODES, nodeRecord);
15893 if (mngNodeptr.p->nodeGroup == RNIL)
15894 {
15895 mngNodeptr.p->nodeGroup = NGPtr.i;
15896 NGPtr.p->nodesInGroup[NGPtr.p->nodeCount++] = mngNodeptr.i;
15897
15898 add_nodegroup(NGPtr);
15899
15900 if (NGPtr.p->nodeCount == cnoReplicas)
15901 {
15902 jam();
15903 for (; NGPtr.i < MAX_NDB_NODES; NGPtr.i++)
15904 {
15905 jam();
15906 ptrCheckGuard(NGPtr, MAX_NDB_NODES, nodeGroupRecord);
15907 if (NGPtr.p->nodeCount < cnoReplicas)
15908 break;
15909 }
15910 }
15911 }
15912 }
15913
15914 Uint32 maxNG = 0;
15915 for (Uint32 i = 0; i<cnoOfNodeGroups; i++)
15916 {
15917 jam();
15918 NGPtr.i = c_node_groups[i];
15919 ptrCheckGuard(NGPtr, MAX_NDB_NODES, nodeGroupRecord);
15920 if (NGPtr.p->nodeCount == 0)
15921 {
15922 jam();
15923 }
15924 else if (NGPtr.p->nodeCount != cnoReplicas)
15925 {
15926 ndbrequire(false);
15927 }
15928 else
15929 {
15930 if (NGPtr.i > maxNG)
15931 {
15932 maxNG = NGPtr.i;
15933 }
15934 }
15935 }
15936
15937 ndbrequire(csystemnodes < MAX_NDB_NODES);
15938
15939 /**
15940 * Init sysfile
15941 */
15942 for(Uint32 i = 0; i < MAX_NDB_NODES; i++)
15943 {
15944 jam();
15945 Sysfile::setNodeGroup(i, SYSFILE->nodeGroups, NO_NODE_GROUP_ID);
15946 Sysfile::setNodeStatus(i, SYSFILE->nodeStatus,Sysfile::NS_NotDefined);
15947 }
15948
15949 for (Uint32 i = 0; nodeArray[i] != RNIL; i++)
15950 {
15951 jam();
15952 Uint32 nodeId = mngNodeptr.i = nodeArray[i];
15953 ptrCheckGuard(mngNodeptr, MAX_NDB_NODES, nodeRecord);
15954
15955 if (mngNodeptr.p->nodeGroup != ZNIL)
15956 {
15957 jam();
15958 Sysfile::setNodeGroup(nodeId, SYSFILE->nodeGroups,
15959 mngNodeptr.p->nodeGroup);
15960
15961 if (mngNodeptr.p->nodeStatus == NodeRecord::ALIVE)
15962 {
15963 jam();
15964 mngNodeptr.p->activeStatus = Sysfile::NS_Active;
15965 }
15966 else
15967 {
15968 jam();
15969 mngNodeptr.p->activeStatus = Sysfile::NS_NotActive_NotTakenOver;
15970 }
15971 }
15972 else
15973 {
15974 jam();
15975 Sysfile::setNodeGroup(mngNodeptr.i, SYSFILE->nodeGroups,
15976 NO_NODE_GROUP_ID);
15977 mngNodeptr.p->activeStatus = Sysfile::NS_Configured;
15978 }
15979 Sysfile::setNodeStatus(nodeId, SYSFILE->nodeStatus,
15980 mngNodeptr.p->activeStatus);
15981 }
15982
15983 for (Uint32 i = 0; i<cnoOfNodeGroups; i++)
15984 {
15985 jam();
15986 bool alive = false;
15987 NodeGroupRecordPtr NGPtr;
15988 NGPtr.i = c_node_groups[i];
15989 ptrCheckGuard(NGPtr, MAX_NDB_NODES, nodeGroupRecord);
15990 for (j = 0; j<NGPtr.p->nodeCount; j++)
15991 {
15992 jam();
15993 mngNodeptr.i = NGPtr.p->nodesInGroup[j];
15994 ptrCheckGuard(mngNodeptr, MAX_NDB_NODES, nodeRecord);
15995 if (checkNodeAlive(NGPtr.p->nodesInGroup[j]))
15996 {
15997 alive = true;
15998 break;
15999 }
16000 }
16001
16002 if (!alive)
16003 {
16004 char buf[255];
16005 BaseString::snprintf
16006 (buf, sizeof(buf),
16007 "Illegal initial start, no alive node in nodegroup %u", i);
16008 progError(__LINE__,
16009 NDBD_EXIT_INSUFFICENT_NODES,
16010 buf);
16011 }
16012 }
16013 }//Dbdih::makeNodeGroups()
16014
16015 /**
16016 * On node failure QMGR asks DIH about node groups. This is
16017 * a direct signal (function call in same process). Input is
16018 * bitmask of surviving nodes. The routine is not concerned
16019 * about node count. Reply is one of:
16020 * 1) win - we can survive, and nobody else can
16021 * 2) lose - we cannot survive
16022 * 3) partition - we can survive but there could be others
16023 */
execCHECKNODEGROUPSREQ(Signal * signal)16024 void Dbdih::execCHECKNODEGROUPSREQ(Signal* signal)
16025 {
16026 jamEntry();
16027 CheckNodeGroups* sd = (CheckNodeGroups*)&signal->theData[0];
16028
16029 bool direct = (sd->requestType & CheckNodeGroups::Direct);
16030 bool ok = false;
16031 switch(sd->requestType & ~CheckNodeGroups::Direct){
16032 case CheckNodeGroups::ArbitCheck:{
16033 ok = true;
16034 jam();
16035 unsigned missall = 0;
16036 unsigned haveall = 0;
16037 for (Uint32 i = 0; i < cnoOfNodeGroups; i++) {
16038 jam();
16039 NodeGroupRecordPtr ngPtr;
16040 ngPtr.i = c_node_groups[i];
16041 ptrAss(ngPtr, nodeGroupRecord);
16042 Uint32 count = 0;
16043 for (Uint32 j = 0; j < ngPtr.p->nodeCount; j++) {
16044 jam();
16045 Uint32 nodeId = ngPtr.p->nodesInGroup[j];
16046 if (sd->mask.get(nodeId)) {
16047 jam();
16048 count++;
16049 }//if
16050 }//for
16051 if (count == 0) {
16052 jam();
16053 missall++;
16054 }//if
16055 if (count == ngPtr.p->nodeCount) {
16056 haveall++;
16057 }//if
16058 }//for
16059
16060 if (missall) {
16061 jam();
16062 sd->output = CheckNodeGroups::Lose;
16063 } else if (haveall) {
16064 jam();
16065 sd->output = CheckNodeGroups::Win;
16066 } else {
16067 jam();
16068 sd->output = CheckNodeGroups::Partitioning;
16069 }//if
16070 }
16071 break;
16072 case CheckNodeGroups::GetNodeGroup:{
16073 ok = true;
16074 Uint32 ng = Sysfile::getNodeGroup(getOwnNodeId(), SYSFILE->nodeGroups);
16075 if (ng == NO_NODE_GROUP_ID)
16076 ng = RNIL;
16077 sd->output = ng;
16078 break;
16079 }
16080 case CheckNodeGroups::GetNodeGroupMembers: {
16081 ok = true;
16082 Uint32 ng = Sysfile::getNodeGroup(sd->nodeId, SYSFILE->nodeGroups);
16083 if (ng == NO_NODE_GROUP_ID)
16084 ng = RNIL;
16085
16086 sd->output = ng;
16087 sd->mask.clear();
16088
16089 NodeGroupRecordPtr ngPtr;
16090 ngPtr.i = ng;
16091 if (ngPtr.i != RNIL)
16092 {
16093 jam();
16094 ptrAss(ngPtr, nodeGroupRecord);
16095 for (Uint32 j = 0; j < ngPtr.p->nodeCount; j++) {
16096 jam();
16097 sd->mask.set(ngPtr.p->nodesInGroup[j]);
16098 }
16099 }
16100 break;
16101 }
16102 case CheckNodeGroups::GetDefaultFragments:
16103 jam();
16104 ok = true;
16105 sd->output = (cnoOfNodeGroups + sd->extraNodeGroups)
16106 * c_fragments_per_node * cnoReplicas;
16107 break;
16108 }
16109 ndbrequire(ok);
16110
16111 if (!direct)
16112 sendSignal(sd->blockRef, GSN_CHECKNODEGROUPSCONF, signal,
16113 CheckNodeGroups::SignalLength, JBB);
16114 }//Dbdih::execCHECKNODEGROUPSREQ()
16115
16116 void
makePrnList(ReadNodesConf * readNodes,Uint32 nodeArray[])16117 Dbdih::makePrnList(ReadNodesConf * readNodes, Uint32 nodeArray[])
16118 {
16119 cfirstAliveNode = RNIL;
16120 ndbrequire(con_lineNodes > 0);
16121 ndbrequire(csystemnodes < MAX_NDB_NODES);
16122 for (Uint32 i = 0; i < csystemnodes; i++) {
16123 NodeRecordPtr nodePtr;
16124 jam();
16125 nodePtr.i = nodeArray[i];
16126 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
16127 new (nodePtr.p) NodeRecord();
16128 if (NdbNodeBitmask::get(readNodes->inactiveNodes, nodePtr.i) == false){
16129 jam();
16130 nodePtr.p->nodeStatus = NodeRecord::ALIVE;
16131 nodePtr.p->useInTransactions = true;
16132 nodePtr.p->copyCompleted = true;
16133 nodePtr.p->m_inclDihLcp = true;
16134 insertAlive(nodePtr);
16135 } else {
16136 jam();
16137 nodePtr.p->nodeStatus = NodeRecord::DEAD;
16138 insertDeadNode(nodePtr);
16139 }//if
16140 }//for
16141 }//Dbdih::makePrnList()
16142
16143 /*************************************************************************/
16144 /* A NEW CRASHED REPLICA IS ADDED BY A NODE FAILURE. */
16145 /*************************************************************************/
newCrashedReplica(ReplicaRecordPtr ncrReplicaPtr)16146 void Dbdih::newCrashedReplica(ReplicaRecordPtr ncrReplicaPtr)
16147 {
16148 /*----------------------------------------------------------------------*/
16149 /* SET THE REPLICA_LAST_GCI OF THE CRASHED REPLICA TO LAST GCI */
16150 /* EXECUTED BY THE FAILED NODE. */
16151 /*----------------------------------------------------------------------*/
16152 /* WE HAVE A NEW CRASHED REPLICA. INITIATE CREATE GCI TO INDICATE */
16153 /* THAT THE NEW REPLICA IS NOT STARTED YET AND REPLICA_LAST_GCI IS*/
16154 /* SET TO -1 TO INDICATE THAT IT IS NOT DEAD YET. */
16155 /*----------------------------------------------------------------------*/
16156 Uint32 nodeId = ncrReplicaPtr.p->procNode;
16157 Uint32 lastGCI = SYSFILE->lastCompletedGCI[nodeId];
16158 if (ncrReplicaPtr.p->noCrashedReplicas + 1 == MAX_CRASHED_REPLICAS)
16159 {
16160 jam();
16161 packCrashedReplicas(ncrReplicaPtr);
16162 }
16163
16164 Uint32 noCrashedReplicas = ncrReplicaPtr.p->noCrashedReplicas;
16165 arrGuardErr(ncrReplicaPtr.p->noCrashedReplicas + 1, MAX_CRASHED_REPLICAS,
16166 NDBD_EXIT_MAX_CRASHED_REPLICAS);
16167
16168 if (noCrashedReplicas > 0 &&
16169 ncrReplicaPtr.p->replicaLastGci[noCrashedReplicas - 1] == lastGCI)
16170 {
16171 jam();
16172 /**
16173 * Don't add another redo-interval, that already exist
16174 * instead initalize new
16175 */
16176 ncrReplicaPtr.p->createGci[ncrReplicaPtr.p->noCrashedReplicas] =
16177 ZINIT_CREATE_GCI;
16178 ncrReplicaPtr.p->replicaLastGci[ncrReplicaPtr.p->noCrashedReplicas] =
16179 ZINIT_REPLICA_LAST_GCI;
16180 }
16181 else if (ncrReplicaPtr.p->createGci[noCrashedReplicas] <= lastGCI)
16182 {
16183 jam();
16184 ncrReplicaPtr.p->replicaLastGci[ncrReplicaPtr.p->noCrashedReplicas] =
16185 lastGCI;
16186 ncrReplicaPtr.p->noCrashedReplicas = ncrReplicaPtr.p->noCrashedReplicas + 1;
16187 ncrReplicaPtr.p->createGci[ncrReplicaPtr.p->noCrashedReplicas] =
16188 ZINIT_CREATE_GCI;
16189 ncrReplicaPtr.p->replicaLastGci[ncrReplicaPtr.p->noCrashedReplicas] =
16190 ZINIT_REPLICA_LAST_GCI;
16191 }
16192 else
16193 {
16194 /**
16195 * This can happen if createGci is set
16196 * (during sendCreateFragReq(COMMIT_STORED))
16197 * but SYSFILE->lastCompletedGCI[nodeId] has not been updated
16198 * as node has not yet completed it's first LCP, causing it to return
16199 * GCP_SAVEREF (which makes SYSFILE->lastCompletedGCI[nodeId] be left
16200 * untouched)
16201 *
16202 * I.e crash during node-restart
16203 */
16204 ncrReplicaPtr.p->createGci[noCrashedReplicas] = ZINIT_CREATE_GCI;
16205 }
16206
16207 }//Dbdih::newCrashedReplica()
16208
16209 /*************************************************************************/
16210 /* AT NODE FAILURE DURING START OF A NEW NODE WE NEED TO RESET A */
16211 /* SET OF VARIABLES CONTROLLING THE START AND INDICATING ONGOING */
16212 /* START OF A NEW NODE. */
16213 /*************************************************************************/
nodeResetStart(Signal * signal)16214 void Dbdih::nodeResetStart(Signal *signal)
16215 {
16216 jam();
16217 Uint32 startGCP = c_nodeStartMaster.blockGcp;
16218
16219 c_nodeStartSlave.nodeId = 0;
16220 c_nodeStartMaster.startNode = RNIL;
16221 c_nodeStartMaster.failNr = cfailurenr;
16222 c_nodeStartMaster.activeState = false;
16223 c_nodeStartMaster.blockGcp = 0;
16224 c_nodeStartMaster.blockLcp = false;
16225 c_nodeStartMaster.m_outstandingGsn = 0;
16226
16227 if (startGCP == 2) // effective
16228 {
16229 jam();
16230 ndbrequire(isMaster());
16231 ndbrequire(m_micro_gcp.m_master.m_state == MicroGcp::M_GCP_IDLE);
16232 signal->theData[0] = DihContinueB::ZSTART_GCP;
16233 sendSignal(reference(), GSN_CONTINUEB, signal, 1, JBB);
16234 }
16235 }//Dbdih::nodeResetStart()
16236
openFileRw(Signal * signal,FileRecordPtr filePtr)16237 void Dbdih::openFileRw(Signal* signal, FileRecordPtr filePtr)
16238 {
16239 signal->theData[0] = reference();
16240 signal->theData[1] = filePtr.i;
16241 signal->theData[2] = filePtr.p->fileName[0];
16242 signal->theData[3] = filePtr.p->fileName[1];
16243 signal->theData[4] = filePtr.p->fileName[2];
16244 signal->theData[5] = filePtr.p->fileName[3];
16245 signal->theData[6] = FsOpenReq::OM_READWRITE;
16246 sendSignal(NDBFS_REF, GSN_FSOPENREQ, signal, 7, JBA);
16247 }//Dbdih::openFileRw()
16248
openFileRo(Signal * signal,FileRecordPtr filePtr)16249 void Dbdih::openFileRo(Signal* signal, FileRecordPtr filePtr)
16250 {
16251 signal->theData[0] = reference();
16252 signal->theData[1] = filePtr.i;
16253 signal->theData[2] = filePtr.p->fileName[0];
16254 signal->theData[3] = filePtr.p->fileName[1];
16255 signal->theData[4] = filePtr.p->fileName[2];
16256 signal->theData[5] = filePtr.p->fileName[3];
16257 signal->theData[6] = FsOpenReq::OM_READONLY;
16258 sendSignal(NDBFS_REF, GSN_FSOPENREQ, signal, 7, JBA);
16259 }//Dbdih::openFileRw()
16260
16261 /*************************************************************************/
16262 /* REMOVE A CRASHED REPLICA BY PACKING THE ARRAY OF CREATED GCI AND*/
16263 /* THE LAST GCI OF THE CRASHED REPLICA. */
16264 /*************************************************************************/
packCrashedReplicas(ReplicaRecordPtr replicaPtr)16265 void Dbdih::packCrashedReplicas(ReplicaRecordPtr replicaPtr)
16266 {
16267 ndbrequire(replicaPtr.p->noCrashedReplicas > 0);
16268 ndbrequire(replicaPtr.p->noCrashedReplicas <= MAX_CRASHED_REPLICAS);
16269 for (Uint32 i = 0; i < replicaPtr.p->noCrashedReplicas; i++) {
16270 jam();
16271 replicaPtr.p->createGci[i] = replicaPtr.p->createGci[i + 1];
16272 replicaPtr.p->replicaLastGci[i] = replicaPtr.p->replicaLastGci[i + 1];
16273 }//for
16274 replicaPtr.p->noCrashedReplicas--;
16275 replicaPtr.p->createGci[replicaPtr.p->noCrashedReplicas + 1] =
16276 ZINIT_CREATE_GCI;
16277 replicaPtr.p->replicaLastGci[replicaPtr.p->noCrashedReplicas + 1] =
16278 ZINIT_REPLICA_LAST_GCI;
16279 }//Dbdih::packCrashedReplicas()
16280
16281 void
mergeCrashedReplicas(ReplicaRecordPtr replicaPtr)16282 Dbdih::mergeCrashedReplicas(ReplicaRecordPtr replicaPtr)
16283 {
16284 /**
16285 * merge adjacent redo-intervals
16286 */
16287 for (Uint32 i = replicaPtr.p->noCrashedReplicas; i > 0; i--)
16288 {
16289 jam();
16290 if (replicaPtr.p->createGci[i] == 1 + replicaPtr.p->replicaLastGci[i-1])
16291 {
16292 jam();
16293 replicaPtr.p->replicaLastGci[i-1] = replicaPtr.p->replicaLastGci[i];
16294 replicaPtr.p->createGci[i] = ZINIT_CREATE_GCI;
16295 replicaPtr.p->replicaLastGci[i] = ZINIT_REPLICA_LAST_GCI;
16296 replicaPtr.p->noCrashedReplicas--;
16297 }
16298 else
16299 {
16300 jam();
16301 break;
16302 }
16303 }
16304 }
16305
prepareReplicas(FragmentstorePtr fragPtr)16306 void Dbdih::prepareReplicas(FragmentstorePtr fragPtr)
16307 {
16308 ReplicaRecordPtr prReplicaPtr;
16309 Uint32 prevReplica = RNIL;
16310
16311 /* --------------------------------------------------------------------- */
16312 /* BEGIN BY LINKING ALL REPLICA RECORDS ONTO THE OLD STORED REPLICA*/
16313 /* LIST. */
16314 /* AT A SYSTEM RESTART OBVIOUSLY ALL NODES ARE OLD. */
16315 /* --------------------------------------------------------------------- */
16316 prReplicaPtr.i = fragPtr.p->storedReplicas;
16317 while (prReplicaPtr.i != RNIL) {
16318 jam();
16319 prevReplica = prReplicaPtr.i;
16320 ptrCheckGuard(prReplicaPtr, creplicaFileSize, replicaRecord);
16321 prReplicaPtr.i = prReplicaPtr.p->nextReplica;
16322 }//while
16323 /* --------------------------------------------------------------------- */
16324 /* LIST OF STORED REPLICAS WILL BE EMPTY NOW. */
16325 /* --------------------------------------------------------------------- */
16326 if (prevReplica != RNIL) {
16327 prReplicaPtr.i = prevReplica;
16328 ptrCheckGuard(prReplicaPtr, creplicaFileSize, replicaRecord);
16329 prReplicaPtr.p->nextReplica = fragPtr.p->oldStoredReplicas;
16330 fragPtr.p->oldStoredReplicas = fragPtr.p->storedReplicas;
16331 fragPtr.p->storedReplicas = RNIL;
16332 fragPtr.p->noOldStoredReplicas += fragPtr.p->noStoredReplicas;
16333 fragPtr.p->noStoredReplicas = 0;
16334 }//if
16335 }//Dbdih::prepareReplicas()
16336
readFragment(RWFragment * rf,FragmentstorePtr fragPtr)16337 void Dbdih::readFragment(RWFragment* rf, FragmentstorePtr fragPtr)
16338 {
16339 Uint32 TreadFid = readPageWord(rf);
16340 fragPtr.p->preferredPrimary = readPageWord(rf);
16341 fragPtr.p->noStoredReplicas = readPageWord(rf);
16342 fragPtr.p->noOldStoredReplicas = readPageWord(rf);
16343 Uint32 TdistKey = readPageWord(rf);
16344
16345 ndbrequire(fragPtr.p->noStoredReplicas > 0);
16346 ndbrequire(TreadFid == rf->fragId);
16347 ndbrequire(TdistKey < 256);
16348 fragPtr.p->distributionKey = TdistKey;
16349
16350 fragPtr.p->m_log_part_id = readPageWord(rf);
16351 inc_ng_refcount(getNodeGroup(fragPtr.p->preferredPrimary));
16352 }//Dbdih::readFragment()
16353
readPageWord(RWFragment * rf)16354 Uint32 Dbdih::readPageWord(RWFragment* rf)
16355 {
16356 if (rf->wordIndex >= 2048) {
16357 jam();
16358 ndbrequire(rf->wordIndex == 2048);
16359 rf->pageIndex++;
16360 ndbrequire(rf->pageIndex < NDB_ARRAY_SIZE(rf->rwfTabPtr.p->pageRef));
16361 rf->rwfPageptr.i = rf->rwfTabPtr.p->pageRef[rf->pageIndex];
16362 ptrCheckGuard(rf->rwfPageptr, cpageFileSize, pageRecord);
16363 rf->wordIndex = 32;
16364 }//if
16365 Uint32 dataWord = rf->rwfPageptr.p->word[rf->wordIndex];
16366 rf->wordIndex++;
16367 return dataWord;
16368 }//Dbdih::readPageWord()
16369
readReplica(RWFragment * rf,ReplicaRecordPtr readReplicaPtr)16370 void Dbdih::readReplica(RWFragment* rf, ReplicaRecordPtr readReplicaPtr)
16371 {
16372 Uint32 i;
16373 readReplicaPtr.p->procNode = readPageWord(rf);
16374 readReplicaPtr.p->initialGci = readPageWord(rf);
16375 readReplicaPtr.p->noCrashedReplicas = readPageWord(rf);
16376 readReplicaPtr.p->nextLcp = readPageWord(rf);
16377
16378 for (i = 0; i < MAX_LCP_STORED; i++) {
16379 readReplicaPtr.p->maxGciCompleted[i] = readPageWord(rf);
16380 readReplicaPtr.p->maxGciStarted[i] = readPageWord(rf);
16381 readReplicaPtr.p->lcpId[i] = readPageWord(rf);
16382 readReplicaPtr.p->lcpStatus[i] = readPageWord(rf);
16383 }//for
16384 const Uint32 noCrashedReplicas = readReplicaPtr.p->noCrashedReplicas;
16385 ndbrequire(noCrashedReplicas < MAX_CRASHED_REPLICAS);
16386 for (i = 0; i < noCrashedReplicas; i++) {
16387 readReplicaPtr.p->createGci[i] = readPageWord(rf);
16388 readReplicaPtr.p->replicaLastGci[i] = readPageWord(rf);
16389 }//for
16390 for(i = noCrashedReplicas; i<MAX_CRASHED_REPLICAS; i++){
16391 readReplicaPtr.p->createGci[i] = readPageWord(rf);
16392 readReplicaPtr.p->replicaLastGci[i] = readPageWord(rf);
16393 }
16394 }//Dbdih::readReplica()
16395
readReplicas(RWFragment * rf,FragmentstorePtr fragPtr)16396 void Dbdih::readReplicas(RWFragment* rf, FragmentstorePtr fragPtr)
16397 {
16398 Uint32 i;
16399 ReplicaRecordPtr newReplicaPtr;
16400 Uint32 noStoredReplicas = fragPtr.p->noStoredReplicas;
16401 Uint32 noOldStoredReplicas = fragPtr.p->noOldStoredReplicas;
16402 /* ----------------------------------------------------------------------- */
16403 /* WE CLEAR THE NUMBER OF STORED REPLICAS SINCE IT WILL BE CALCULATED */
16404 /* BY THE LINKING SUBROUTINES. */
16405 /* ----------------------------------------------------------------------- */
16406 fragPtr.p->noStoredReplicas = 0;
16407 fragPtr.p->noOldStoredReplicas = 0;
16408 Uint32 replicaIndex = 0;
16409 ndbrequire(noStoredReplicas + noOldStoredReplicas <= MAX_REPLICAS);
16410 for (i = 0; i < noStoredReplicas; i++)
16411 {
16412 seizeReplicaRec(newReplicaPtr);
16413 readReplica(rf, newReplicaPtr);
16414 ndbrequire(replicaIndex < MAX_REPLICAS);
16415 fragPtr.p->activeNodes[replicaIndex] = newReplicaPtr.p->procNode;
16416 replicaIndex++;
16417 linkStoredReplica(fragPtr, newReplicaPtr);
16418 }//for
16419 fragPtr.p->fragReplicas = noStoredReplicas;
16420 for (i = 0; i < noOldStoredReplicas; i++) {
16421 jam();
16422 seizeReplicaRec(newReplicaPtr);
16423 readReplica(rf, newReplicaPtr);
16424 linkOldStoredReplica(fragPtr, newReplicaPtr);
16425 }//for
16426 }//Dbdih::readReplicas()
16427
readRestorableGci(Signal * signal,FileRecordPtr filePtr)16428 void Dbdih::readRestorableGci(Signal* signal, FileRecordPtr filePtr)
16429 {
16430 signal->theData[0] = filePtr.p->fileRef;
16431 signal->theData[1] = reference();
16432 signal->theData[2] = filePtr.i;
16433 signal->theData[3] = ZLIST_OF_PAIRS;
16434 signal->theData[4] = ZVAR_NO_CRESTART_INFO;
16435 signal->theData[5] = 1;
16436 signal->theData[6] = 0;
16437 signal->theData[7] = 0;
16438 sendSignal(NDBFS_REF, GSN_FSREADREQ, signal, 8, JBA);
16439 }//Dbdih::readRestorableGci()
16440
readTabfile(Signal * signal,TabRecord * tab,FileRecordPtr filePtr)16441 void Dbdih::readTabfile(Signal* signal, TabRecord* tab, FileRecordPtr filePtr)
16442 {
16443 signal->theData[0] = filePtr.p->fileRef;
16444 signal->theData[1] = reference();
16445 signal->theData[2] = filePtr.i;
16446 signal->theData[3] = ZLIST_OF_PAIRS;
16447 signal->theData[4] = ZVAR_NO_WORD;
16448 signal->theData[5] = tab->noPages;
16449 Uint32 section[2 * NDB_ARRAY_SIZE(tab->pageRef)];
16450 for (Uint32 i = 0; i < tab->noPages; i++)
16451 {
16452 section[(2 * i) + 0] = tab->pageRef[i];
16453 section[(2 * i) + 1] = i;
16454 }
16455 LinearSectionPtr ptr[3];
16456 ptr[0].p = section;
16457 ptr[0].sz = 2 * tab->noPages;
16458 sendSignal(NDBFS_REF, GSN_FSREADREQ, signal, 6, JBA, ptr, 1);
16459 }//Dbdih::readTabfile()
16460
releasePage(Uint32 pageIndex)16461 void Dbdih::releasePage(Uint32 pageIndex)
16462 {
16463 PageRecordPtr pagePtr;
16464 pagePtr.i = pageIndex;
16465 ptrCheckGuard(pagePtr, cpageFileSize, pageRecord);
16466 pagePtr.p->nextfreepage = cfirstfreepage;
16467 cfirstfreepage = pagePtr.i;
16468 }//Dbdih::releasePage()
16469
releaseTabPages(Uint32 tableId)16470 void Dbdih::releaseTabPages(Uint32 tableId)
16471 {
16472 TabRecordPtr tabPtr;
16473 tabPtr.i = tableId;
16474 ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
16475 ndbrequire(tabPtr.p->noPages <= NDB_ARRAY_SIZE(tabPtr.p->pageRef));
16476 for (Uint32 i = 0; i < tabPtr.p->noPages; i++) {
16477 jam();
16478 releasePage(tabPtr.p->pageRef[i]);
16479 }//for
16480 tabPtr.p->noPages = 0;
16481 }//Dbdih::releaseTabPages()
16482
16483 /*************************************************************************/
16484 /* REMOVE NODE FROM SET OF ALIVE NODES. */
16485 /*************************************************************************/
removeAlive(NodeRecordPtr removeNodePtr)16486 void Dbdih::removeAlive(NodeRecordPtr removeNodePtr)
16487 {
16488 NodeRecordPtr nodePtr;
16489
16490 nodePtr.i = cfirstAliveNode;
16491 if (nodePtr.i == removeNodePtr.i) {
16492 jam();
16493 cfirstAliveNode = removeNodePtr.p->nextNode;
16494 return;
16495 }//if
16496 do {
16497 jam();
16498 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
16499 if (nodePtr.p->nextNode == removeNodePtr.i) {
16500 jam();
16501 nodePtr.p->nextNode = removeNodePtr.p->nextNode;
16502 break;
16503 } else {
16504 jam();
16505 nodePtr.i = nodePtr.p->nextNode;
16506 }//if
16507 } while (1);
16508 }//Dbdih::removeAlive()
16509
16510 /*************************************************************************/
16511 /* REMOVE NODE FROM SET OF DEAD NODES. */
16512 /*************************************************************************/
removeDeadNode(NodeRecordPtr removeNodePtr)16513 void Dbdih::removeDeadNode(NodeRecordPtr removeNodePtr)
16514 {
16515 NodeRecordPtr nodePtr;
16516
16517 nodePtr.i = cfirstDeadNode;
16518 if (nodePtr.i == removeNodePtr.i) {
16519 jam();
16520 cfirstDeadNode = removeNodePtr.p->nextNode;
16521 return;
16522 }//if
16523 do {
16524 jam();
16525 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
16526 if (nodePtr.p->nextNode == removeNodePtr.i) {
16527 jam();
16528 nodePtr.p->nextNode = removeNodePtr.p->nextNode;
16529 break;
16530 } else {
16531 jam();
16532 nodePtr.i = nodePtr.p->nextNode;
16533 }//if
16534 } while (1);
16535 }//Dbdih::removeDeadNode()
16536
16537 /*---------------------------------------------------------------*/
16538 /* REMOVE REPLICAS OF A FAILED NODE FROM LIST OF STORED */
16539 /* REPLICAS AND MOVE IT TO THE LIST OF OLD STORED REPLICAS.*/
16540 /* ALSO UPDATE THE CRASHED REPLICA INFORMATION. */
16541 /*---------------------------------------------------------------*/
removeNodeFromStored(Uint32 nodeId,FragmentstorePtr fragPtr,ReplicaRecordPtr replicatePtr,bool temporary)16542 void Dbdih::removeNodeFromStored(Uint32 nodeId,
16543 FragmentstorePtr fragPtr,
16544 ReplicaRecordPtr replicatePtr,
16545 bool temporary)
16546 {
16547 if (!temporary)
16548 {
16549 jam();
16550 newCrashedReplica(replicatePtr);
16551 }
16552 else
16553 {
16554 jam();
16555 }
16556 removeStoredReplica(fragPtr, replicatePtr);
16557 linkOldStoredReplica(fragPtr, replicatePtr);
16558 ndbrequire(fragPtr.p->storedReplicas != RNIL);
16559 }//Dbdih::removeNodeFromStored()
16560
16561 /*************************************************************************/
16562 /* REMOVE ANY OLD CRASHED REPLICAS THAT ARE NOT RESTORABLE ANY MORE*/
16563 /*************************************************************************/
removeOldCrashedReplicas(Uint32 tab,Uint32 frag,ReplicaRecordPtr rocReplicaPtr)16564 void Dbdih::removeOldCrashedReplicas(Uint32 tab, Uint32 frag,
16565 ReplicaRecordPtr rocReplicaPtr)
16566 {
16567 mergeCrashedReplicas(rocReplicaPtr);
16568 while (rocReplicaPtr.p->noCrashedReplicas > 0) {
16569 jam();
16570 /* --------------------------------------------------------------------- */
16571 /* ONLY IF THERE IS AT LEAST ONE REPLICA THEN CAN WE REMOVE ANY. */
16572 /* --------------------------------------------------------------------- */
16573 if (rocReplicaPtr.p->replicaLastGci[0] < SYSFILE->oldestRestorableGCI){
16574 jam();
16575 /* ------------------------------------------------------------------- */
16576 /* THIS CRASHED REPLICA HAS BECOME EXTINCT AND MUST BE REMOVED TO */
16577 /* GIVE SPACE FOR NEW CRASHED REPLICAS. */
16578 /* ------------------------------------------------------------------- */
16579 packCrashedReplicas(rocReplicaPtr);
16580 } else {
16581 break;
16582 }//if
16583 }//while
16584
16585 while (rocReplicaPtr.p->createGci[0] < SYSFILE->keepGCI)
16586 {
16587 jam();
16588 /* --------------------------------------------------------------------- */
16589 /* MOVE FORWARD THE CREATE GCI TO A GCI THAT CAN BE USED. WE HAVE */
16590 /* NO CERTAINTY IN FINDING ANY LOG RECORDS FROM OLDER GCI'S. */
16591 /* --------------------------------------------------------------------- */
16592 rocReplicaPtr.p->createGci[0] = SYSFILE->keepGCI;
16593
16594 if (rocReplicaPtr.p->noCrashedReplicas)
16595 {
16596 /**
16597 * a REDO interval while is from 78 to 14 is not usefull
16598 * but rather harmful, remove it...
16599 */
16600 if (rocReplicaPtr.p->createGci[0] > rocReplicaPtr.p->replicaLastGci[0])
16601 {
16602 jam();
16603 packCrashedReplicas(rocReplicaPtr);
16604 }
16605 }
16606 }
16607 }
16608
removeOldStoredReplica(FragmentstorePtr fragPtr,ReplicaRecordPtr replicatePtr)16609 void Dbdih::removeOldStoredReplica(FragmentstorePtr fragPtr,
16610 ReplicaRecordPtr replicatePtr)
16611 {
16612 ReplicaRecordPtr rosTmpReplicaPtr;
16613 ReplicaRecordPtr rosPrevReplicaPtr;
16614
16615 fragPtr.p->noOldStoredReplicas--;
16616 if (fragPtr.p->oldStoredReplicas == replicatePtr.i) {
16617 jam();
16618 fragPtr.p->oldStoredReplicas = replicatePtr.p->nextReplica;
16619 } else {
16620 rosPrevReplicaPtr.i = fragPtr.p->oldStoredReplicas;
16621 ptrCheckGuard(rosPrevReplicaPtr, creplicaFileSize, replicaRecord);
16622 rosTmpReplicaPtr.i = rosPrevReplicaPtr.p->nextReplica;
16623 while (rosTmpReplicaPtr.i != replicatePtr.i) {
16624 jam();
16625 rosPrevReplicaPtr.i = rosTmpReplicaPtr.i;
16626 ptrCheckGuard(rosPrevReplicaPtr, creplicaFileSize, replicaRecord);
16627 ptrCheckGuard(rosTmpReplicaPtr, creplicaFileSize, replicaRecord);
16628 rosTmpReplicaPtr.i = rosTmpReplicaPtr.p->nextReplica;
16629 }//if
16630 rosPrevReplicaPtr.p->nextReplica = replicatePtr.p->nextReplica;
16631 }//if
16632 }//Dbdih::removeOldStoredReplica()
16633
removeStoredReplica(FragmentstorePtr fragPtr,ReplicaRecordPtr replicatePtr)16634 void Dbdih::removeStoredReplica(FragmentstorePtr fragPtr,
16635 ReplicaRecordPtr replicatePtr)
16636 {
16637 ReplicaRecordPtr rsrTmpReplicaPtr;
16638 ReplicaRecordPtr rsrPrevReplicaPtr;
16639
16640 fragPtr.p->noStoredReplicas--;
16641 if (fragPtr.p->storedReplicas == replicatePtr.i) {
16642 jam();
16643 fragPtr.p->storedReplicas = replicatePtr.p->nextReplica;
16644 } else {
16645 jam();
16646 rsrPrevReplicaPtr.i = fragPtr.p->storedReplicas;
16647 rsrTmpReplicaPtr.i = fragPtr.p->storedReplicas;
16648 ptrCheckGuard(rsrTmpReplicaPtr, creplicaFileSize, replicaRecord);
16649 rsrTmpReplicaPtr.i = rsrTmpReplicaPtr.p->nextReplica;
16650 while (rsrTmpReplicaPtr.i != replicatePtr.i) {
16651 jam();
16652 rsrPrevReplicaPtr.i = rsrTmpReplicaPtr.i;
16653 ptrCheckGuard(rsrTmpReplicaPtr, creplicaFileSize, replicaRecord);
16654 rsrTmpReplicaPtr.i = rsrTmpReplicaPtr.p->nextReplica;
16655 }//while
16656 ptrCheckGuard(rsrPrevReplicaPtr, creplicaFileSize, replicaRecord);
16657 rsrPrevReplicaPtr.p->nextReplica = replicatePtr.p->nextReplica;
16658 }//if
16659 }//Dbdih::removeStoredReplica()
16660
16661 /*************************************************************************/
16662 /* REMOVE ALL TOO NEW CRASHED REPLICAS THAT IS IN THIS REPLICA. */
16663 /*************************************************************************/
removeTooNewCrashedReplicas(ReplicaRecordPtr rtnReplicaPtr,Uint32 lastCompletedGCI)16664 void Dbdih::removeTooNewCrashedReplicas(ReplicaRecordPtr rtnReplicaPtr, Uint32 lastCompletedGCI)
16665 {
16666 while (rtnReplicaPtr.p->noCrashedReplicas > 0) {
16667 jam();
16668 /* --------------------------------------------------------------------- */
16669 /* REMOVE ALL REPLICAS THAT ONLY LIVED IN A PERIOD THAT HAVE BEEN */
16670 /* REMOVED FROM THE RESTART INFORMATION SINCE THE RESTART FAILED */
16671 /* TOO MANY TIMES. */
16672 /* --------------------------------------------------------------------- */
16673 arrGuard(rtnReplicaPtr.p->noCrashedReplicas - 1, MAX_CRASHED_REPLICAS);
16674 if (rtnReplicaPtr.p->createGci[rtnReplicaPtr.p->noCrashedReplicas - 1] > lastCompletedGCI)
16675 {
16676 jam();
16677 rtnReplicaPtr.p->createGci[rtnReplicaPtr.p->noCrashedReplicas - 1] =
16678 ZINIT_CREATE_GCI;
16679 rtnReplicaPtr.p->replicaLastGci[rtnReplicaPtr.p->noCrashedReplicas - 1] =
16680 ZINIT_REPLICA_LAST_GCI;
16681 rtnReplicaPtr.p->noCrashedReplicas--;
16682 } else {
16683 break;
16684 }//if
16685 }//while
16686 }//Dbdih::removeTooNewCrashedReplicas()
16687
16688 /*************************************************************************/
16689 /* */
16690 /* MODULE: SEARCH FOR POSSIBLE REPLICAS THAT CAN HANDLE THE GLOBAL */
16691 /* CHECKPOINT WITHOUT NEEDING ANY EXTRA LOGGING FACILITIES.*/
16692 /* A MAXIMUM OF FOUR NODES IS RETRIEVED. */
16693 /*************************************************************************/
16694 bool
setup_create_replica(FragmentstorePtr fragPtr,CreateReplicaRecord * createReplicaPtrP,ConstPtr<ReplicaRecord> replicaPtr)16695 Dbdih::setup_create_replica(FragmentstorePtr fragPtr,
16696 CreateReplicaRecord* createReplicaPtrP,
16697 ConstPtr<ReplicaRecord> replicaPtr)
16698 {
16699 createReplicaPtrP->dataNodeId = replicaPtr.p->procNode;
16700 createReplicaPtrP->replicaRec = replicaPtr.i;
16701
16702 /* ----------------------------------------------------------------- */
16703 /* WE NEED TO SEARCH FOR A PROPER LOCAL CHECKPOINT TO USE FOR THE */
16704 /* SYSTEM RESTART. */
16705 /* ----------------------------------------------------------------- */
16706 Uint32 startGci;
16707 Uint32 startLcpNo;
16708 Uint32 stopGci = SYSFILE->newestRestorableGCI;
16709 bool result = findStartGci(replicaPtr,
16710 stopGci,
16711 startGci,
16712 startLcpNo);
16713 if (!result)
16714 {
16715 jam();
16716 /* --------------------------------------------------------------- */
16717 /* WE COULD NOT FIND ANY LOCAL CHECKPOINT. THE FRAGMENT THUS DO NOT*/
16718 /* CONTAIN ANY VALID LOCAL CHECKPOINT. IT DOES HOWEVER CONTAIN A */
16719 /* VALID FRAGMENT LOG. THUS BY FIRST CREATING THE FRAGMENT AND THEN*/
16720 /* EXECUTING THE FRAGMENT LOG WE CAN CREATE THE FRAGMENT AS */
16721 /* DESIRED. THIS SHOULD ONLY OCCUR AFTER CREATING A FRAGMENT. */
16722 /* */
16723 /* TO INDICATE THAT NO LOCAL CHECKPOINT IS TO BE USED WE SET THE */
16724 /* LOCAL CHECKPOINT TO ZNIL. */
16725 /* --------------------------------------------------------------- */
16726 createReplicaPtrP->lcpNo = ZNIL;
16727 }
16728 else
16729 {
16730 jam();
16731 /* --------------------------------------------------------------- */
16732 /* WE FOUND A PROPER LOCAL CHECKPOINT TO RESTART FROM. */
16733 /* SET LOCAL CHECKPOINT ID AND LOCAL CHECKPOINT NUMBER. */
16734 /* --------------------------------------------------------------- */
16735 createReplicaPtrP->lcpNo = startLcpNo;
16736 arrGuard(startLcpNo, MAX_LCP_STORED);
16737 createReplicaPtrP->createLcpId = replicaPtr.p->lcpId[startLcpNo];
16738 }//if
16739
16740
16741 /* ----------------------------------------------------------------- */
16742 /* WE HAVE EITHER FOUND A LOCAL CHECKPOINT OR WE ARE PLANNING TO */
16743 /* EXECUTE THE LOG FROM THE INITIAL CREATION OF THE TABLE. IN BOTH */
16744 /* CASES WE NEED TO FIND A SET OF LOGS THAT CAN EXECUTE SUCH THAT */
16745 /* WE RECOVER TO THE SYSTEM RESTART GLOBAL CHECKPOINT. */
16746 /* -_--------------------------------------------------------------- */
16747 return findLogNodes(createReplicaPtrP, fragPtr, startGci, stopGci);
16748 }
16749
searchStoredReplicas(FragmentstorePtr fragPtr)16750 void Dbdih::searchStoredReplicas(FragmentstorePtr fragPtr)
16751 {
16752 Uint32 nextReplicaPtrI;
16753 Ptr<ReplicaRecord> replicaPtr;
16754
16755 replicaPtr.i = fragPtr.p->storedReplicas;
16756 while (replicaPtr.i != RNIL) {
16757 jam();
16758 ptrCheckGuard(replicaPtr, creplicaFileSize, replicaRecord);
16759 nextReplicaPtrI = replicaPtr.p->nextReplica;
16760 ConstPtr<ReplicaRecord> constReplicaPtr;
16761 constReplicaPtr.i = replicaPtr.i;
16762 constReplicaPtr.p = replicaPtr.p;
16763 NodeRecordPtr nodePtr;
16764 nodePtr.i = replicaPtr.p->procNode;
16765 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
16766 if (nodePtr.p->nodeStatus == NodeRecord::ALIVE) {
16767 jam();
16768 switch (nodePtr.p->activeStatus) {
16769 case Sysfile::NS_Active:
16770 case Sysfile::NS_ActiveMissed_1:
16771 case Sysfile::NS_ActiveMissed_2:{
16772 /* ----------------------------------------------------------------- */
16773 /* INITIALISE THE CREATE REPLICA STRUCTURE THAT IS USED FOR SENDING*/
16774 /* TO LQH START_FRAGREQ. */
16775 /* SET THE DATA NODE WHERE THE LOCAL CHECKPOINT IS FOUND. ALSO */
16776 /* SET A REFERENCE TO THE REPLICA POINTER OF THAT. */
16777 /* ----------------------------------------------------------------- */
16778 CreateReplicaRecordPtr createReplicaPtr;
16779 createReplicaPtr.i = cnoOfCreateReplicas;
16780 ptrCheckGuard(createReplicaPtr, 4, createReplicaRecord);
16781 cnoOfCreateReplicas++;
16782
16783 /**
16784 * Should have been checked in resetReplicaSr
16785 */
16786 ndbrequire(setup_create_replica(fragPtr,
16787 createReplicaPtr.p,
16788 constReplicaPtr));
16789 break;
16790 }
16791 default:
16792 jam();
16793 /*empty*/;
16794 break;
16795 }//switch
16796 }
16797 replicaPtr.i = nextReplicaPtrI;
16798 }//while
16799 }//Dbdih::searchStoredReplicas()
16800
16801 /*************************************************************************/
16802 /* */
16803 /* MODULE: SEIZE_FILE */
16804 /* DESCRIPTION: THE SUBROUTINE SEIZES A FILE RECORD FROM THE */
16805 /* FREE LIST. */
16806 /*************************************************************************/
seizeFile(FileRecordPtr & filePtr)16807 void Dbdih::seizeFile(FileRecordPtr& filePtr)
16808 {
16809 filePtr.i = cfirstfreeFile;
16810 ptrCheckGuard(filePtr, cfileFileSize, fileRecord);
16811 cfirstfreeFile = filePtr.p->nextFile;
16812 filePtr.p->nextFile = RNIL;
16813 }//Dbdih::seizeFile()
16814
16815 /*************************************************************************/
16816 /* SEND CREATE_FRAGREQ TO ALL NODES IN THE NDB CLUSTER. */
16817 /*************************************************************************/
16818 /*************************************************************************/
16819 /* */
16820 /* MODULE: FIND THE START GCI AND LOCAL CHECKPOINT TO USE. */
16821 /*************************************************************************/
sendStartFragreq(Signal * signal,TabRecordPtr tabPtr,Uint32 fragId)16822 void Dbdih::sendStartFragreq(Signal* signal,
16823 TabRecordPtr tabPtr, Uint32 fragId)
16824 {
16825 CreateReplicaRecordPtr replicaPtr;
16826 for (replicaPtr.i = 0; replicaPtr.i < cnoOfCreateReplicas; replicaPtr.i++) {
16827 jam();
16828 ptrAss(replicaPtr, createReplicaRecord);
16829
16830 BlockReference ref = numberToRef(DBLQH, replicaPtr.p->dataNodeId);
16831
16832 StartFragReq * const startFragReq = (StartFragReq *)&signal->theData[0];
16833 startFragReq->userPtr = replicaPtr.p->replicaRec;
16834 startFragReq->userRef = reference();
16835 startFragReq->lcpNo = replicaPtr.p->lcpNo;
16836 startFragReq->lcpId = replicaPtr.p->createLcpId;
16837 startFragReq->tableId = tabPtr.i;
16838 startFragReq->fragId = fragId;
16839 startFragReq->requestInfo = StartFragReq::SFR_RESTORE_LCP;
16840
16841 if(ERROR_INSERTED(7072) || ERROR_INSERTED(7074)){
16842 jam();
16843 const Uint32 noNodes = replicaPtr.p->noLogNodes;
16844 Uint32 start = replicaPtr.p->logStartGci[noNodes - 1];
16845 const Uint32 stop = replicaPtr.p->logStopGci[noNodes - 1];
16846
16847 for(Uint32 i = noNodes; i < MAX_LOG_EXEC && (stop - start) > 0; i++){
16848 replicaPtr.p->noLogNodes++;
16849 replicaPtr.p->logStopGci[i - 1] = start;
16850
16851 replicaPtr.p->logNodeId[i] = replicaPtr.p->logNodeId[i-1];
16852 replicaPtr.p->logStartGci[i] = start + 1;
16853 replicaPtr.p->logStopGci[i] = stop;
16854 start += 1;
16855 }
16856 }
16857
16858 startFragReq->noOfLogNodes = replicaPtr.p->noLogNodes;
16859
16860 for (Uint32 i = 0; i < MAX_LOG_EXEC ; i++) {
16861 startFragReq->lqhLogNode[i] = replicaPtr.p->logNodeId[i];
16862 startFragReq->startGci[i] = replicaPtr.p->logStartGci[i];
16863 startFragReq->lastGci[i] = replicaPtr.p->logStopGci[i];
16864 }//for
16865
16866 sendSignal(ref, GSN_START_FRAGREQ, signal,
16867 StartFragReq::SignalLength, JBB);
16868 }//for
16869 }//Dbdih::sendStartFragreq()
16870
16871 /*************************************************************************/
16872 /* SET LCP ACTIVE STATUS BEFORE STARTING A LOCAL CHECKPOINT. */
16873 /*************************************************************************/
setLcpActiveStatusStart(Signal * signal)16874 void Dbdih::setLcpActiveStatusStart(Signal* signal)
16875 {
16876 NodeRecordPtr nodePtr;
16877
16878 c_lcpState.m_participatingLQH.clear();
16879 c_lcpState.m_participatingDIH.clear();
16880
16881 for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
16882 ptrAss(nodePtr, nodeRecord);
16883 #if 0
16884 if(nodePtr.p->nodeStatus != NodeRecord::NOT_IN_CLUSTER){
16885 infoEvent("Node %d nodeStatus=%d activeStatus=%d copyCompleted=%d lcp=%d",
16886 nodePtr.i,
16887 nodePtr.p->nodeStatus,
16888 nodePtr.p->activeStatus,
16889 nodePtr.p->copyCompleted,
16890 nodePtr.p->m_inclDihLcp);
16891 }
16892 #endif
16893 if(nodePtr.p->nodeStatus == NodeRecord::ALIVE)
16894 {
16895 jam();
16896 if (nodePtr.p->m_inclDihLcp)
16897 {
16898 jam();
16899 c_lcpState.m_participatingDIH.set(nodePtr.i);
16900 }
16901
16902 if (nodePtr.p->copyCompleted)
16903 {
16904 jam();
16905 c_lcpState.m_participatingLQH.set(nodePtr.i);
16906 }
16907 else if (nodePtr.p->activeStatus == Sysfile::NS_Configured)
16908 {
16909 jam();
16910 continue;
16911 }
16912 else
16913 {
16914 jam();
16915 nodePtr.p->activeStatus = Sysfile::NS_ActiveMissed_1;
16916 }
16917 }
16918 else if (nodePtr.p->activeStatus == Sysfile::NS_Configured)
16919 {
16920 jam();
16921 continue;
16922 }
16923 else if (nodePtr.p->activeStatus != Sysfile::NS_NotDefined)
16924 {
16925 jam();
16926 nodePtr.p->activeStatus = Sysfile::NS_ActiveMissed_1;
16927 }
16928 }
16929 }//Dbdih::setLcpActiveStatusStart()
16930
16931 /*************************************************************************/
16932 /* SET LCP ACTIVE STATUS AT THE END OF A LOCAL CHECKPOINT. */
16933 /*************************************************************************/
setLcpActiveStatusEnd(Signal * signal)16934 void Dbdih::setLcpActiveStatusEnd(Signal* signal)
16935 {
16936 NodeRecordPtr nodePtr;
16937
16938 for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
16939 jam();
16940 ptrAss(nodePtr, nodeRecord);
16941 if (c_lcpState.m_participatingLQH.get(nodePtr.i))
16942 {
16943 jam();
16944 nodePtr.p->copyCompleted = 1;
16945 if (! (nodePtr.p->activeStatus == Sysfile::NS_Configured))
16946 {
16947 jam();
16948 nodePtr.p->activeStatus = Sysfile::NS_Active;
16949 }
16950 else
16951 {
16952 jam();
16953 // Do nothing
16954 }
16955 }
16956 else if (nodePtr.p->activeStatus == Sysfile::NS_Configured)
16957 {
16958 jam();
16959 continue;
16960 }
16961 else if (nodePtr.p->activeStatus != Sysfile::NS_NotDefined)
16962 {
16963 jam();
16964 nodePtr.p->activeStatus = Sysfile::NS_ActiveMissed_1;
16965 }
16966 }
16967
16968 c_lcpState.m_participatingDIH.clear();
16969 c_lcpState.m_participatingLQH.clear();
16970 if (isMaster()) {
16971 jam();
16972 setNodeRestartInfoBits(signal);
16973 }//if
16974 }//Dbdih::setLcpActiveStatusEnd()
16975
16976 /*************************************************************************/
16977 /* SET NODE ACTIVE STATUS AT SYSTEM RESTART AND WHEN UPDATED BY MASTER */
16978 /*************************************************************************/
setNodeActiveStatus()16979 void Dbdih::setNodeActiveStatus()
16980 {
16981 NodeRecordPtr snaNodeptr;
16982
16983 for (snaNodeptr.i = 1; snaNodeptr.i < MAX_NDB_NODES; snaNodeptr.i++)
16984 {
16985 ptrAss(snaNodeptr, nodeRecord);
16986 const Uint32 tsnaNodeBits = Sysfile::getNodeStatus(snaNodeptr.i,
16987 SYSFILE->nodeStatus);
16988 switch (tsnaNodeBits) {
16989 case Sysfile::NS_Active:
16990 jam();
16991 snaNodeptr.p->activeStatus = Sysfile::NS_Active;
16992 break;
16993 case Sysfile::NS_ActiveMissed_1:
16994 jam();
16995 snaNodeptr.p->activeStatus = Sysfile::NS_ActiveMissed_1;
16996 break;
16997 case Sysfile::NS_ActiveMissed_2:
16998 jam();
16999 snaNodeptr.p->activeStatus = Sysfile::NS_ActiveMissed_2;
17000 break;
17001 case Sysfile::NS_TakeOver:
17002 jam();
17003 snaNodeptr.p->activeStatus = Sysfile::NS_TakeOver;
17004 break;
17005 case Sysfile::NS_NotActive_NotTakenOver:
17006 jam();
17007 snaNodeptr.p->activeStatus = Sysfile::NS_NotActive_NotTakenOver;
17008 break;
17009 case Sysfile::NS_NotDefined:
17010 jam();
17011 snaNodeptr.p->activeStatus = Sysfile::NS_NotDefined;
17012 break;
17013 case Sysfile::NS_Configured:
17014 jam();
17015 snaNodeptr.p->activeStatus = Sysfile::NS_Configured;
17016 break;
17017 default:
17018 ndbrequire(false);
17019 break;
17020 }//switch
17021 }//for
17022 }//Dbdih::setNodeActiveStatus()
17023
17024 /***************************************************************************/
17025 /* SET THE NODE GROUP BASED ON THE RESTART INFORMATION OR AS SET BY MASTER */
17026 /***************************************************************************/
setNodeGroups()17027 void Dbdih::setNodeGroups()
17028 {
17029 NodeGroupRecordPtr NGPtr;
17030 NodeRecordPtr sngNodeptr;
17031 Uint32 Ti;
17032
17033 for (Ti = 0; Ti < cnoOfNodeGroups; Ti++) {
17034 NGPtr.i = c_node_groups[Ti];
17035 ptrAss(NGPtr, nodeGroupRecord);
17036 NGPtr.p->nodeCount = 0;
17037 NGPtr.p->nodegroupIndex = RNIL;
17038 }//for
17039 cnoOfNodeGroups = 0;
17040 for (sngNodeptr.i = 1; sngNodeptr.i < MAX_NDB_NODES; sngNodeptr.i++) {
17041 ptrAss(sngNodeptr, nodeRecord);
17042 Sysfile::ActiveStatus s =
17043 (Sysfile::ActiveStatus)Sysfile::getNodeStatus(sngNodeptr.i,
17044 SYSFILE->nodeStatus);
17045 switch (s){
17046 case Sysfile::NS_Active:
17047 case Sysfile::NS_ActiveMissed_1:
17048 case Sysfile::NS_ActiveMissed_2:
17049 case Sysfile::NS_NotActive_NotTakenOver:
17050 case Sysfile::NS_TakeOver:
17051 jam();
17052 sngNodeptr.p->nodeGroup = Sysfile::getNodeGroup(sngNodeptr.i,
17053 SYSFILE->nodeGroups);
17054 NGPtr.i = sngNodeptr.p->nodeGroup;
17055 ptrCheckGuard(NGPtr, MAX_NDB_NODES, nodeGroupRecord);
17056 NGPtr.p->nodesInGroup[NGPtr.p->nodeCount] = sngNodeptr.i;
17057 NGPtr.p->nodeCount++;
17058 add_nodegroup(NGPtr);
17059 break;
17060 case Sysfile::NS_NotDefined:
17061 case Sysfile::NS_Configured:
17062 jam();
17063 sngNodeptr.p->nodeGroup = ZNIL;
17064 break;
17065 default:
17066 ndbrequire(false);
17067 return;
17068 break;
17069 }//switch
17070 }//for
17071 }//Dbdih::setNodeGroups()
17072
17073 /*************************************************************************/
17074 /* SET THE RESTART INFO BITS BASED ON THE NODES ACTIVE STATUS. */
17075 /*************************************************************************/
setNodeRestartInfoBits(Signal * signal)17076 void Dbdih::setNodeRestartInfoBits(Signal * signal)
17077 {
17078 NodeRecordPtr nodePtr;
17079 Uint32 tsnrNodeGroup;
17080 Uint32 tsnrNodeActiveStatus;
17081 Uint32 i;
17082 for(i = 1; i < MAX_NDB_NODES; i++){
17083 Sysfile::setNodeStatus(i, SYSFILE->nodeStatus, Sysfile::NS_Active);
17084 }//for
17085 for(i = 1; i < Sysfile::NODE_GROUPS_SIZE; i++){
17086 SYSFILE->nodeGroups[i] = 0;
17087 }//for
17088 NdbNodeBitmask::clear(SYSFILE->lcpActive);
17089
17090 #ifdef ERROR_INSERT
17091 NdbNodeBitmask tmp;
17092 #endif
17093
17094 for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
17095 ptrAss(nodePtr, nodeRecord);
17096 switch (nodePtr.p->activeStatus) {
17097 case Sysfile::NS_Active:
17098 jam();
17099 tsnrNodeActiveStatus = Sysfile::NS_Active;
17100 break;
17101 case Sysfile::NS_ActiveMissed_1:
17102 jam();
17103 tsnrNodeActiveStatus = Sysfile::NS_ActiveMissed_1;
17104 break;
17105 case Sysfile::NS_ActiveMissed_2:
17106 jam();
17107 tsnrNodeActiveStatus = Sysfile::NS_ActiveMissed_2;
17108 break;
17109 case Sysfile::NS_TakeOver:
17110 jam();
17111 tsnrNodeActiveStatus = Sysfile::NS_TakeOver;
17112 break;
17113 case Sysfile::NS_NotActive_NotTakenOver:
17114 jam();
17115 tsnrNodeActiveStatus = Sysfile::NS_NotActive_NotTakenOver;
17116 break;
17117 case Sysfile::NS_NotDefined:
17118 jam();
17119 tsnrNodeActiveStatus = Sysfile::NS_NotDefined;
17120 break;
17121 case Sysfile::NS_Configured:
17122 jam();
17123 tsnrNodeActiveStatus = Sysfile::NS_Configured;
17124 break;
17125 default:
17126 ndbrequire(false);
17127 tsnrNodeActiveStatus = Sysfile::NS_NotDefined; // remove warning
17128 break;
17129 }//switch
17130 Sysfile::setNodeStatus(nodePtr.i, SYSFILE->nodeStatus,
17131 tsnrNodeActiveStatus);
17132 if (nodePtr.p->nodeGroup == ZNIL) {
17133 jam();
17134 tsnrNodeGroup = NO_NODE_GROUP_ID;
17135 } else {
17136 jam();
17137 tsnrNodeGroup = nodePtr.p->nodeGroup;
17138 }//if
17139 Sysfile::setNodeGroup(nodePtr.i, SYSFILE->nodeGroups, tsnrNodeGroup);
17140 if (c_lcpState.m_participatingLQH.get(nodePtr.i))
17141 {
17142 jam();
17143 NdbNodeBitmask::set(SYSFILE->lcpActive, nodePtr.i);
17144 }//if
17145 #ifdef ERROR_INSERT
17146 else if (Sysfile::getLCPOngoing(SYSFILE->systemRestartBits))
17147 {
17148 jam();
17149 if (nodePtr.p->activeStatus == Sysfile::NS_Active)
17150 tmp.set(nodePtr.i);
17151 }
17152 #endif
17153 }//for
17154
17155 #ifdef ERROR_INSERT
17156 if (ERROR_INSERTED(7220) && !tmp.isclear())
17157 {
17158 jam();
17159
17160 NdbNodeBitmask all;
17161 nodePtr.i = cfirstAliveNode;
17162 do {
17163 jam();
17164 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
17165 all.set(nodePtr.i);
17166 nodePtr.i = nodePtr.p->nextNode;
17167 } while (nodePtr.i != RNIL);
17168
17169
17170 NodeReceiverGroup rg(DBDIH, all);
17171 signal->theData[0] = 7219;
17172 sendSignal(rg, GSN_NDB_TAMPER, signal, 1, JBA);
17173 }
17174 #endif
17175 }//Dbdih::setNodeRestartInfoBits()
17176
17177 /*************************************************************************/
17178 /* START THE GLOBAL CHECKPOINT PROTOCOL IN MASTER AT START-UP */
17179 /*************************************************************************/
startGcp(Signal * signal)17180 void Dbdih::startGcp(Signal* signal)
17181 {
17182 signal->theData[0] = DihContinueB::ZSTART_GCP;
17183 sendSignal(reference(), GSN_CONTINUEB, signal, 1, JBB);
17184
17185 startGcpMonitor(signal);
17186 }//Dbdih::startGcp()
17187
17188 void
startGcpMonitor(Signal * signal)17189 Dbdih::startGcpMonitor(Signal* signal)
17190 {
17191 jam();
17192 m_gcp_monitor.m_gcp_save.m_gci = m_gcp_save.m_gci;
17193 m_gcp_monitor.m_gcp_save.m_counter = 0;
17194 m_gcp_monitor.m_micro_gcp.m_gci = m_micro_gcp.m_current_gci;
17195 m_gcp_monitor.m_micro_gcp.m_counter = 0;
17196
17197 signal->theData[0] = DihContinueB::ZCHECK_GCP_STOP;
17198 sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 100, 1);
17199 }
17200
updateNodeInfo(FragmentstorePtr fragPtr)17201 void Dbdih::updateNodeInfo(FragmentstorePtr fragPtr)
17202 {
17203 ReplicaRecordPtr replicatePtr;
17204 Uint32 index = 0;
17205 replicatePtr.i = fragPtr.p->storedReplicas;
17206 do {
17207 jam();
17208 ptrCheckGuard(replicatePtr, creplicaFileSize, replicaRecord);
17209 ndbrequire(index < MAX_REPLICAS);
17210 fragPtr.p->activeNodes[index] = replicatePtr.p->procNode;
17211 index++;
17212 replicatePtr.i = replicatePtr.p->nextReplica;
17213 } while (replicatePtr.i != RNIL);
17214 fragPtr.p->fragReplicas = index;
17215
17216 /* ----------------------------------------------------------------------- */
17217 // We switch primary to the preferred primary if the preferred primary is
17218 // in the list.
17219 /* ----------------------------------------------------------------------- */
17220 const Uint32 prefPrim = fragPtr.p->preferredPrimary;
17221 for (Uint32 i = 1; i < index; i++) {
17222 jam();
17223 ndbrequire(i < MAX_REPLICAS);
17224 if (fragPtr.p->activeNodes[i] == prefPrim){
17225 jam();
17226 Uint32 switchNode = fragPtr.p->activeNodes[0];
17227 fragPtr.p->activeNodes[0] = prefPrim;
17228 fragPtr.p->activeNodes[i] = switchNode;
17229 break;
17230 }//if
17231 }//for
17232 }//Dbdih::updateNodeInfo()
17233
writeFragment(RWFragment * wf,FragmentstorePtr fragPtr)17234 void Dbdih::writeFragment(RWFragment* wf, FragmentstorePtr fragPtr)
17235 {
17236 writePageWord(wf, wf->fragId);
17237 writePageWord(wf, fragPtr.p->preferredPrimary);
17238 writePageWord(wf, fragPtr.p->noStoredReplicas);
17239 writePageWord(wf, fragPtr.p->noOldStoredReplicas);
17240 writePageWord(wf, fragPtr.p->distributionKey);
17241 writePageWord(wf, fragPtr.p->m_log_part_id);
17242 }//Dbdih::writeFragment()
17243
writePageWord(RWFragment * wf,Uint32 dataWord)17244 void Dbdih::writePageWord(RWFragment* wf, Uint32 dataWord)
17245 {
17246 if (wf->wordIndex >= 2048) {
17247 jam();
17248 ndbrequire(wf->wordIndex == 2048);
17249 allocpage(wf->rwfPageptr);
17250 wf->wordIndex = 32;
17251 wf->pageIndex++;
17252 ndbrequire(wf->pageIndex < NDB_ARRAY_SIZE(wf->rwfTabPtr.p->pageRef));
17253 wf->rwfTabPtr.p->pageRef[wf->pageIndex] = wf->rwfPageptr.i;
17254 wf->rwfTabPtr.p->noPages++;
17255 }//if
17256 wf->rwfPageptr.p->word[wf->wordIndex] = dataWord;
17257 wf->wordIndex++;
17258 }//Dbdih::writePageWord()
17259
writeReplicas(RWFragment * wf,Uint32 replicaStartIndex)17260 void Dbdih::writeReplicas(RWFragment* wf, Uint32 replicaStartIndex)
17261 {
17262 ReplicaRecordPtr wfReplicaPtr;
17263 wfReplicaPtr.i = replicaStartIndex;
17264 while (wfReplicaPtr.i != RNIL) {
17265 jam();
17266 ptrCheckGuard(wfReplicaPtr, creplicaFileSize, replicaRecord);
17267 writePageWord(wf, wfReplicaPtr.p->procNode);
17268 writePageWord(wf, wfReplicaPtr.p->initialGci);
17269 writePageWord(wf, wfReplicaPtr.p->noCrashedReplicas);
17270 writePageWord(wf, wfReplicaPtr.p->nextLcp);
17271 Uint32 i;
17272 for (i = 0; i < MAX_LCP_STORED; i++) {
17273 writePageWord(wf, wfReplicaPtr.p->maxGciCompleted[i]);
17274 writePageWord(wf, wfReplicaPtr.p->maxGciStarted[i]);
17275 writePageWord(wf, wfReplicaPtr.p->lcpId[i]);
17276 writePageWord(wf, wfReplicaPtr.p->lcpStatus[i]);
17277 }//if
17278 for (i = 0; i < MAX_CRASHED_REPLICAS; i++) {
17279 writePageWord(wf, wfReplicaPtr.p->createGci[i]);
17280 writePageWord(wf, wfReplicaPtr.p->replicaLastGci[i]);
17281 }//if
17282
17283 wfReplicaPtr.i = wfReplicaPtr.p->nextReplica;
17284 }//while
17285 }//Dbdih::writeReplicas()
17286
writeRestorableGci(Signal * signal,FileRecordPtr filePtr)17287 void Dbdih::writeRestorableGci(Signal* signal, FileRecordPtr filePtr)
17288 {
17289 for (Uint32 i = 0; i < Sysfile::SYSFILE_SIZE32; i++) {
17290 sysfileDataToFile[i] = sysfileData[i];
17291 }//for
17292 signal->theData[0] = filePtr.p->fileRef;
17293 signal->theData[1] = reference();
17294 signal->theData[2] = filePtr.i;
17295 signal->theData[3] = ZLIST_OF_PAIRS_SYNCH;
17296 signal->theData[4] = ZVAR_NO_CRESTART_INFO_TO_FILE;
17297 signal->theData[5] = 1; /* AMOUNT OF PAGES */
17298 signal->theData[6] = 0; /* MEMORY PAGE = 0 SINCE COMMON STORED VARIABLE */
17299 signal->theData[7] = 0;
17300
17301 if (ERROR_INSERTED(7224) && filePtr.i == crestartInfoFile[1])
17302 {
17303 jam();
17304 SET_ERROR_INSERT_VALUE(7225);
17305 sendSignalWithDelay(NDBFS_REF, GSN_FSWRITEREQ, signal, 500, 8);
17306
17307 signal->theData[0] = 9999;
17308 sendSignal(numberToRef(CMVMI, refToNode(cmasterdihref)),
17309 GSN_NDB_TAMPER, signal, 1, JBB);
17310 return;
17311 }
17312 sendSignal(NDBFS_REF, GSN_FSWRITEREQ, signal, 8, JBA);
17313 }//Dbdih::writeRestorableGci()
17314
writeTabfile(Signal * signal,TabRecord * tab,FileRecordPtr filePtr)17315 void Dbdih::writeTabfile(Signal* signal, TabRecord* tab, FileRecordPtr filePtr)
17316 {
17317 signal->theData[0] = filePtr.p->fileRef;
17318 signal->theData[1] = reference();
17319 signal->theData[2] = filePtr.i;
17320 signal->theData[3] = ZLIST_OF_PAIRS_SYNCH;
17321 signal->theData[4] = ZVAR_NO_WORD;
17322 signal->theData[5] = tab->noPages;
17323
17324 Uint32 section[2 * NDB_ARRAY_SIZE(tab->pageRef)];
17325 for (Uint32 i = 0; i < tab->noPages; i++)
17326 {
17327 section[(2 * i) + 0] = tab->pageRef[i];
17328 section[(2 * i) + 1] = i;
17329 }
17330 LinearSectionPtr ptr[3];
17331 ptr[0].p = section;
17332 ptr[0].sz = 2 * tab->noPages;
17333 sendSignal(NDBFS_REF, GSN_FSWRITEREQ, signal, 6, JBA, ptr, 1);
17334 }//Dbdih::writeTabfile()
17335
execDEBUG_SIG(Signal * signal)17336 void Dbdih::execDEBUG_SIG(Signal* signal)
17337 {
17338 signal = signal; //Avoid compiler warnings
17339 }//Dbdih::execDEBUG_SIG()
17340
17341 void
execDUMP_STATE_ORD(Signal * signal)17342 Dbdih::execDUMP_STATE_ORD(Signal* signal)
17343 {
17344 DumpStateOrd * const & dumpState = (DumpStateOrd *)&signal->theData[0];
17345 Uint32 arg = dumpState->args[0];
17346 if (arg == DumpStateOrd::DihDumpNodeRestartInfo) {
17347 infoEvent("c_nodeStartMaster.blockLcp = %d, c_nodeStartMaster.blockGcp = %d, c_nodeStartMaster.wait = %d",
17348 c_nodeStartMaster.blockLcp, c_nodeStartMaster.blockGcp, c_nodeStartMaster.wait);
17349 for (Uint32 i = 0; i < c_diverify_queue_cnt; i++)
17350 {
17351 infoEvent("[ %u : cfirstVerifyQueue = %u clastVerifyQueue = %u sz: %u]",
17352 i,
17353 c_diverify_queue[i].cfirstVerifyQueue,
17354 c_diverify_queue[i].clastVerifyQueue,
17355 capiConnectFileSize);
17356 }
17357 infoEvent("cgcpOrderBlocked = %d",
17358 cgcpOrderBlocked);
17359 }//if
17360 if (arg == DumpStateOrd::DihDumpNodeStatusInfo) {
17361 NodeRecordPtr localNodePtr;
17362 infoEvent("Printing nodeStatus of all nodes");
17363 for (localNodePtr.i = 1; localNodePtr.i < MAX_NDB_NODES; localNodePtr.i++) {
17364 ptrAss(localNodePtr, nodeRecord);
17365 if (localNodePtr.p->nodeStatus != NodeRecord::NOT_IN_CLUSTER) {
17366 infoEvent("Node = %d has status = %d",
17367 localNodePtr.i, localNodePtr.p->nodeStatus);
17368 }//if
17369 }//for
17370 }//if
17371
17372 if (arg == DumpStateOrd::DihPrintFragmentation)
17373 {
17374 infoEvent("Printing nodegroups --");
17375 for (Uint32 i = 0; i<cnoOfNodeGroups; i++)
17376 {
17377 NodeGroupRecordPtr NGPtr;
17378 NGPtr.i = c_node_groups[i];
17379 ptrCheckGuard(NGPtr, MAX_NDB_NODES, nodeGroupRecord);
17380
17381 infoEvent("NG %u(%u) ref: %u [ cnt: %u : %u %u %u %u ]",
17382 NGPtr.i, NGPtr.p->nodegroupIndex, NGPtr.p->m_ref_count,
17383 NGPtr.p->nodeCount,
17384 NGPtr.p->nodesInGroup[0], NGPtr.p->nodesInGroup[1], NGPtr.p->nodesInGroup[2], NGPtr.p->nodesInGroup[3]);
17385 }
17386
17387 infoEvent("Printing fragmentation of all tables --");
17388 for(Uint32 i = 0; i<ctabFileSize; i++){
17389 TabRecordPtr tabPtr;
17390 tabPtr.i = i;
17391 ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
17392
17393 if(tabPtr.p->tabStatus != TabRecord::TS_ACTIVE)
17394 continue;
17395
17396 for(Uint32 j = 0; j < tabPtr.p->totalfragments; j++){
17397 FragmentstorePtr fragPtr;
17398 getFragstore(tabPtr.p, j, fragPtr);
17399
17400 Uint32 nodeOrder[MAX_REPLICAS];
17401 const Uint32 noOfReplicas = extractNodeInfo(fragPtr.p, nodeOrder);
17402 char buf[100];
17403 BaseString::snprintf(buf, sizeof(buf), " Table %d Fragment %d(%u) LP: %u - ", tabPtr.i, j, dihGetInstanceKey(fragPtr), fragPtr.p->m_log_part_id);
17404 for(Uint32 k = 0; k < noOfReplicas; k++){
17405 char tmp[100];
17406 BaseString::snprintf(tmp, sizeof(tmp), "%d ", nodeOrder[k]);
17407 strcat(buf, tmp);
17408 }
17409 infoEvent("%s", buf);
17410 }
17411 }
17412 }
17413
17414 if (signal->theData[0] == 7000) {
17415 infoEvent("ctimer = %d",
17416 c_lcpState.ctimer);
17417 infoEvent("cmasterState = %d", cmasterState);
17418 infoEvent("cmasterTakeOverNode = %d, ctcCounter = %d",
17419 cmasterTakeOverNode, c_lcpState.ctcCounter);
17420 }//if
17421 if (signal->theData[0] == 7001) {
17422 infoEvent("c_lcpState.keepGci = %d",
17423 c_lcpState.keepGci);
17424 infoEvent("c_lcpState.lcpStatus = %d, clcpStopGcp = %d",
17425 c_lcpState.lcpStatus,
17426 c_lcpState.lcpStopGcp);
17427 infoEvent("cimmediateLcpStart = %d",
17428 c_lcpState.immediateLcpStart);
17429 }//if
17430 if (signal->theData[0] == 7002) {
17431 infoEvent("cnoOfActiveTables = %d",
17432 cnoOfActiveTables);
17433 infoEvent("cdictblockref = %d, cfailurenr = %d",
17434 cdictblockref, cfailurenr);
17435 infoEvent("con_lineNodes = %d, reference() = %d, creceivedfrag = %d",
17436 con_lineNodes, reference(), creceivedfrag);
17437 }//if
17438 if (signal->theData[0] == 7003) {
17439 infoEvent("cfirstAliveNode = %d, cgckptflag = %d",
17440 cfirstAliveNode, cgckptflag);
17441 infoEvent("clocallqhblockref = %d, clocaltcblockref = %d, cgcpOrderBlocked = %d",
17442 clocallqhblockref, clocaltcblockref, cgcpOrderBlocked);
17443 infoEvent("cstarttype = %d, csystemnodes = %d",
17444 cstarttype, csystemnodes);
17445 }//if
17446 if (signal->theData[0] == 7004) {
17447 infoEvent("cmasterdihref = %d, cownNodeId = %d",
17448 cmasterdihref, cownNodeId);
17449 infoEvent("cndbStartReqBlockref = %d, cremainingfrags = %d",
17450 cndbStartReqBlockref, cremainingfrags);
17451 }//if
17452 if (signal->theData[0] == 7005) {
17453 infoEvent("crestartGci = %d",
17454 crestartGci);
17455 }//if
17456 if (signal->theData[0] == 7006) {
17457 infoEvent("clcpDelay = %d",
17458 c_lcpState.clcpDelay);
17459 infoEvent("cmasterNodeId = %d", cmasterNodeId);
17460 infoEvent("c_nodeStartMaster.startNode = %d, c_nodeStartMaster.wait = %d",
17461 c_nodeStartMaster.startNode, c_nodeStartMaster.wait);
17462 }//if
17463 if (signal->theData[0] == 7007) {
17464 infoEvent("c_nodeStartMaster.failNr = %d", c_nodeStartMaster.failNr);
17465 infoEvent("c_nodeStartMaster.startInfoErrorCode = %d",
17466 c_nodeStartMaster.startInfoErrorCode);
17467 infoEvent("c_nodeStartMaster.blockLcp = %d, c_nodeStartMaster.blockGcp = %d",
17468 c_nodeStartMaster.blockLcp, c_nodeStartMaster.blockGcp);
17469 }//if
17470 if (signal->theData[0] == 7008) {
17471 infoEvent("cfirstDeadNode = %d, cstartPhase = %d, cnoReplicas = %d",
17472 cfirstDeadNode, cstartPhase, cnoReplicas);
17473 infoEvent("cwaitLcpSr = %d",cwaitLcpSr);
17474 }//if
17475 if (signal->theData[0] == 7009) {
17476 infoEvent("ccalcOldestRestorableGci = %d, cnoOfNodeGroups = %d",
17477 c_lcpState.oldestRestorableGci, cnoOfNodeGroups);
17478 infoEvent("crestartGci = %d",
17479 crestartGci);
17480 }//if
17481 if (signal->theData[0] == 7010) {
17482 infoEvent("c_lcpState.lcpStatusUpdatedPlace = %d, cLcpStart = %d",
17483 c_lcpState.lcpStatusUpdatedPlace, c_lcpState.lcpStart);
17484 infoEvent("c_blockCommit = %d, c_blockCommitNo = %d",
17485 c_blockCommit, c_blockCommitNo);
17486 }//if
17487 if (signal->theData[0] == 7011){
17488 infoEvent("c_COPY_GCIREQ_Counter = %s",
17489 c_COPY_GCIREQ_Counter.getText());
17490 infoEvent("c_COPY_TABREQ_Counter = %s",
17491 c_COPY_TABREQ_Counter.getText());
17492 infoEvent("c_CREATE_FRAGREQ_Counter = %s",
17493 c_CREATE_FRAGREQ_Counter.getText());
17494 infoEvent("c_DIH_SWITCH_REPLICA_REQ_Counter = %s",
17495 c_DIH_SWITCH_REPLICA_REQ_Counter.getText());
17496 infoEvent("c_EMPTY_LCP_REQ_Counter = %s",c_EMPTY_LCP_REQ_Counter.getText());
17497 infoEvent("c_GCP_COMMIT_Counter = %s", c_GCP_COMMIT_Counter.getText());
17498 infoEvent("c_GCP_PREPARE_Counter = %s", c_GCP_PREPARE_Counter.getText());
17499 infoEvent("c_GCP_SAVEREQ_Counter = %s", c_GCP_SAVEREQ_Counter.getText());
17500 infoEvent("c_SUB_GCP_COMPLETE_REP_Counter = %s",
17501 c_SUB_GCP_COMPLETE_REP_Counter.getText());
17502 infoEvent("c_INCL_NODEREQ_Counter = %s", c_INCL_NODEREQ_Counter.getText());
17503 infoEvent("c_MASTER_GCPREQ_Counter = %s",
17504 c_MASTER_GCPREQ_Counter.getText());
17505 infoEvent("c_MASTER_LCPREQ_Counter = %s",
17506 c_MASTER_LCPREQ_Counter.getText());
17507 infoEvent("c_START_INFOREQ_Counter = %s",
17508 c_START_INFOREQ_Counter.getText());
17509 infoEvent("c_START_RECREQ_Counter = %s", c_START_RECREQ_Counter.getText());
17510 infoEvent("c_STOP_ME_REQ_Counter = %s", c_STOP_ME_REQ_Counter.getText());
17511 infoEvent("c_TC_CLOPSIZEREQ_Counter = %s",
17512 c_TC_CLOPSIZEREQ_Counter.getText());
17513 infoEvent("c_TCGETOPSIZEREQ_Counter = %s",
17514 c_TCGETOPSIZEREQ_Counter.getText());
17515 }
17516
17517 if(signal->theData[0] == 7012){
17518 char buf[8*_NDB_NODE_BITMASK_SIZE+1];
17519 infoEvent("ParticipatingDIH = %s", c_lcpState.m_participatingDIH.getText(buf));
17520 infoEvent("ParticipatingLQH = %s", c_lcpState.m_participatingLQH.getText(buf));
17521 infoEvent("m_LCP_COMPLETE_REP_Counter_DIH = %s",
17522 c_lcpState.m_LCP_COMPLETE_REP_Counter_DIH.getText());
17523 infoEvent("m_LCP_COMPLETE_REP_Counter_LQH = %s",
17524 c_lcpState.m_LCP_COMPLETE_REP_Counter_LQH.getText());
17525 infoEvent("m_LAST_LCP_FRAG_ORD = %s",
17526 c_lcpState.m_LAST_LCP_FRAG_ORD.getText());
17527 infoEvent("m_LCP_COMPLETE_REP_From_Master_Received = %d",
17528 c_lcpState.m_LCP_COMPLETE_REP_From_Master_Received);
17529
17530 NodeRecordPtr nodePtr;
17531 for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
17532 jam();
17533 ptrAss(nodePtr, nodeRecord);
17534 if(nodePtr.p->nodeStatus == NodeRecord::ALIVE){
17535 Uint32 i;
17536 for(i = 0; i<nodePtr.p->noOfStartedChkpt; i++){
17537 infoEvent("Node %d: started: table=%d fragment=%d replica=%d",
17538 nodePtr.i,
17539 nodePtr.p->startedChkpt[i].tableId,
17540 nodePtr.p->startedChkpt[i].fragId,
17541 nodePtr.p->startedChkpt[i].replicaPtr);
17542 }
17543
17544 for(i = 0; i<nodePtr.p->noOfQueuedChkpt; i++){
17545 infoEvent("Node %d: queued: table=%d fragment=%d replica=%d",
17546 nodePtr.i,
17547 nodePtr.p->queuedChkpt[i].tableId,
17548 nodePtr.p->queuedChkpt[i].fragId,
17549 nodePtr.p->queuedChkpt[i].replicaPtr);
17550 }
17551 }
17552 }
17553 }
17554
17555 if(arg == 7019 && signal->getLength() == 2 &&
17556 signal->theData[1] < MAX_NDB_NODES)
17557 {
17558 char buf2[8+1];
17559 NodeRecordPtr nodePtr;
17560 nodePtr.i = signal->theData[1];
17561 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
17562 infoEvent("NF Node %d tc: %d lqh: %d dih: %d dict: %d recNODE_FAILREP: %d",
17563 nodePtr.i,
17564 nodePtr.p->dbtcFailCompleted,
17565 nodePtr.p->dblqhFailCompleted,
17566 nodePtr.p->dbdihFailCompleted,
17567 nodePtr.p->dbdictFailCompleted,
17568 nodePtr.p->recNODE_FAILREP);
17569 infoEvent(" m_NF_COMPLETE_REP: %s m_nodefailSteps: %s",
17570 nodePtr.p->m_NF_COMPLETE_REP.getText(),
17571 nodePtr.p->m_nodefailSteps.getText(buf2));
17572 }
17573
17574 if(arg == 7020 && signal->getLength() > 3)
17575 {
17576 Uint32 gsn= signal->theData[1];
17577 Uint32 block= signal->theData[2];
17578 Uint32 length= signal->length() - 3;
17579 memmove(signal->theData, signal->theData+3, 4*length);
17580 sendSignal(numberToRef(block, getOwnNodeId()), gsn, signal, length, JBB);
17581
17582 warningEvent("-- SENDING CUSTOM SIGNAL --");
17583 char buf[100], buf2[100];
17584 buf2[0]= 0;
17585 for(Uint32 i = 0; i<length; i++)
17586 {
17587 BaseString::snprintf(buf, 100, "%s %.8x", buf2, signal->theData[i]);
17588 BaseString::snprintf(buf2, 100, "%s", buf);
17589 }
17590 warningEvent("gsn: %d block: %s, length: %d theData: %s",
17591 gsn, getBlockName(block, "UNKNOWN"), length, buf);
17592
17593 g_eventLogger->warning("-- SENDING CUSTOM SIGNAL --");
17594 g_eventLogger->warning("gsn: %d block: %s, length: %d theData: %s",
17595 gsn, getBlockName(block, "UNKNOWN"), length, buf);
17596 }
17597
17598 if(arg == DumpStateOrd::DihDumpLCPState){
17599 infoEvent("-- Node %d LCP STATE --", getOwnNodeId());
17600 infoEvent("lcpStatus = %d (update place = %d) ",
17601 c_lcpState.lcpStatus, c_lcpState.lcpStatusUpdatedPlace);
17602 infoEvent
17603 ("lcpStart = %d lcpStopGcp = %d keepGci = %d oldestRestorable = %d",
17604 c_lcpState.lcpStart, c_lcpState.lcpStopGcp,
17605 c_lcpState.keepGci, c_lcpState.oldestRestorableGci);
17606
17607 infoEvent
17608 ("immediateLcpStart = %d masterLcpNodeId = %d",
17609 c_lcpState.immediateLcpStart,
17610 refToNode(c_lcpState.m_masterLcpDihRef));
17611
17612 for (Uint32 i = 0; i<10; i++)
17613 {
17614 infoEvent("%u : status: %u place: %u", i,
17615 c_lcpState.m_saveState[i].m_status,
17616 c_lcpState.m_saveState[i].m_place);
17617 }
17618
17619 infoEvent("-- Node %d LCP STATE --", getOwnNodeId());
17620 }
17621
17622 if(arg == DumpStateOrd::DihDumpLCPMasterTakeOver){
17623 infoEvent("-- Node %d LCP MASTER TAKE OVER STATE --", getOwnNodeId());
17624 infoEvent
17625 ("c_lcpMasterTakeOverState.state = %d updatePlace = %d failedNodeId = %d",
17626 c_lcpMasterTakeOverState.state,
17627 c_lcpMasterTakeOverState.updatePlace,
17628 c_lcpMasterTakeOverState.failedNodeId);
17629
17630 infoEvent("c_lcpMasterTakeOverState.minTableId = %u minFragId = %u",
17631 c_lcpMasterTakeOverState.minTableId,
17632 c_lcpMasterTakeOverState.minFragId);
17633
17634 infoEvent("-- Node %d LCP MASTER TAKE OVER STATE --", getOwnNodeId());
17635 }
17636
17637 if (signal->theData[0] == 7015)
17638 {
17639 if (signal->getLength() == 1)
17640 {
17641 signal->theData[1] = 0;
17642 }
17643
17644 Uint32 tableId = signal->theData[1];
17645 if (tableId < ctabFileSize)
17646 {
17647 signal->theData[0] = 7021;
17648 execDUMP_STATE_ORD(signal);
17649 signal->theData[0] = 7015;
17650 signal->theData[1] = tableId + 1;
17651 sendSignal(reference(), GSN_DUMP_STATE_ORD, signal, 2, JBB);
17652 }
17653 }
17654
17655 if(arg == DumpStateOrd::EnableUndoDelayDataWrite){
17656 g_eventLogger->info("Dbdih:: delay write of datapages for table = %d",
17657 dumpState->args[1]);
17658 // Send this dump to ACC and TUP
17659 sendSignal(DBACC_REF, GSN_DUMP_STATE_ORD, signal, 2, JBB);
17660 sendSignal(DBTUP_REF, GSN_DUMP_STATE_ORD, signal, 2, JBB);
17661
17662 // Start immediate LCP
17663 add_lcp_counter(&c_lcpState.ctimer, (1 << 31));
17664 return;
17665 }
17666
17667 if (signal->theData[0] == DumpStateOrd::DihAllAllowNodeStart) {
17668 for (Uint32 i = 1; i < MAX_NDB_NODES; i++)
17669 setAllowNodeStart(i, true);
17670 return;
17671 }//if
17672 if (signal->theData[0] == DumpStateOrd::DihMinTimeBetweenLCP) {
17673 // Set time between LCP to min value
17674 if (signal->getLength() == 2)
17675 {
17676 Uint32 tmp;
17677 const ndb_mgm_configuration_iterator * p =
17678 m_ctx.m_config.getOwnConfigIterator();
17679 ndbrequire(p != 0);
17680 ndb_mgm_get_int_parameter(p, CFG_DB_LCP_INTERVAL, &tmp);
17681 g_eventLogger->info("Reset time between LCP to %u", tmp);
17682 c_lcpState.clcpDelay = tmp;
17683 }
17684 else
17685 {
17686 g_eventLogger->info("Set time between LCP to min value");
17687 c_lcpState.clcpDelay = 0; // TimeBetweenLocalCheckpoints.min
17688 }
17689 return;
17690 }
17691 if (signal->theData[0] == DumpStateOrd::DihMaxTimeBetweenLCP) {
17692 // Set time between LCP to max value
17693 g_eventLogger->info("Set time between LCP to max value");
17694 c_lcpState.clcpDelay = 31; // TimeBetweenLocalCheckpoints.max
17695 return;
17696 }
17697
17698 if(arg == 7098){
17699 if(signal->length() == 3){
17700 jam();
17701 infoEvent("startLcpRoundLoopLab(tabel=%d, fragment=%d)",
17702 signal->theData[1], signal->theData[2]);
17703 startLcpRoundLoopLab(signal, signal->theData[1], signal->theData[2]);
17704 return;
17705 } else {
17706 infoEvent("Invalid no of arguments to 7098 - startLcpRoundLoopLab -"
17707 " expected 2 (tableId, fragmentId)");
17708 }
17709 }
17710
17711 if (arg == DumpStateOrd::DihStartLcpImmediately)
17712 {
17713 jam();
17714 add_lcp_counter(&c_lcpState.ctimer, (1 << 31));
17715
17716 /**
17717 * If sent from local LQH, forward to master
17718 */
17719 if (cmasterNodeId != getOwnNodeId() &&
17720 refToMain(signal->getSendersBlockRef()) == DBLQH)
17721 {
17722 jam();
17723 sendSignal(cmasterdihref, GSN_DUMP_STATE_ORD, signal, 1, JBB);
17724 }
17725 return;
17726 }
17727
17728 if (arg == DumpStateOrd::DihSetTimeBetweenGcp)
17729 {
17730 Uint32 tmp = 0;
17731 if (signal->getLength() == 1)
17732 {
17733 const ndb_mgm_configuration_iterator * p =
17734 m_ctx.m_config.getOwnConfigIterator();
17735 ndbrequire(p != 0);
17736 ndb_mgm_get_int_parameter(p, CFG_DB_GCP_INTERVAL, &tmp);
17737 }
17738 else
17739 {
17740 tmp = signal->theData[1];
17741 }
17742 m_gcp_save.m_master.m_time_between_gcp = tmp;
17743 g_eventLogger->info("Setting time between gcp : %d", tmp);
17744 }
17745
17746 if (arg == 7021 && signal->getLength() == 2)
17747 {
17748 TabRecordPtr tabPtr;
17749 tabPtr.i = signal->theData[1];
17750 if (tabPtr.i >= ctabFileSize)
17751 return;
17752
17753 ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
17754
17755 if(tabPtr.p->tabStatus != TabRecord::TS_ACTIVE)
17756 return;
17757
17758 infoEvent
17759 ("Table %d: TabCopyStatus: %d TabUpdateStatus: %d TabLcpStatus: %d",
17760 tabPtr.i,
17761 tabPtr.p->tabCopyStatus,
17762 tabPtr.p->tabUpdateState,
17763 tabPtr.p->tabLcpStatus);
17764
17765 FragmentstorePtr fragPtr;
17766 for (Uint32 fid = 0; fid < tabPtr.p->totalfragments; fid++) {
17767 jam();
17768 getFragstore(tabPtr.p, fid, fragPtr);
17769
17770 char buf[100], buf2[100];
17771 BaseString::snprintf(buf, sizeof(buf), " Fragment %d: noLcpReplicas==%d ",
17772 fid, fragPtr.p->noLcpReplicas);
17773
17774 Uint32 num=0;
17775 ReplicaRecordPtr replicaPtr;
17776 replicaPtr.i = fragPtr.p->storedReplicas;
17777 do {
17778 ptrCheckGuard(replicaPtr, creplicaFileSize, replicaRecord);
17779 BaseString::snprintf(buf2, sizeof(buf2), "%s %d(on %d)=%d(%s)",
17780 buf, num,
17781 replicaPtr.p->procNode,
17782 replicaPtr.p->lcpIdStarted,
17783 replicaPtr.p->lcpOngoingFlag ? "Ongoing" : "Idle");
17784 BaseString::snprintf(buf, sizeof(buf), "%s", buf2);
17785
17786 num++;
17787 replicaPtr.i = replicaPtr.p->nextReplica;
17788 } while (replicaPtr.i != RNIL);
17789 infoEvent("%s", buf);
17790 }
17791 }
17792
17793 if (arg == 7022)
17794 {
17795 jam();
17796 crashSystemAtGcpStop(signal, true);
17797 }
17798
17799 if (arg == 7025)
17800 {
17801 jam();
17802 dumpGcpStop();
17803 return;
17804 }
17805
17806 #ifdef GCP_TIMER_HACK
17807 if (signal->theData[0] == 7901)
17808 globalData.gcp_timer_limit = signal->theData[1];
17809 #endif
17810 if (arg == 7023)
17811 {
17812 /**
17813 * Dump all active TakeOver
17814 */
17815 Ptr<TakeOverRecord> ptr;
17816 ptr.i = signal->theData[1];
17817 if (signal->getLength() == 1)
17818 {
17819 infoEvent("Starting dump all active take-over");
17820 c_activeTakeOverList.first(ptr);
17821 }
17822
17823 if (ptr.i == RNIL)
17824 {
17825 infoEvent("Dump all active take-over done");
17826 return;
17827 }
17828
17829 c_activeTakeOverList.getPtr(ptr);
17830 infoEvent("TakeOverPtr(%u) starting: %u flags: 0x%x ref: 0x%x, data: %u",
17831 ptr.i,
17832 ptr.p->toStartingNode,
17833 ptr.p->m_flags,
17834 ptr.p->m_senderRef,
17835 ptr.p->m_senderData);
17836 infoEvent("slaveState: %u masterState: %u",
17837 ptr.p->toSlaveStatus, ptr.p->toMasterStatus);
17838 infoEvent("restorableGci: %u startGci: %u tab: %u frag: %u src: %u max: %u",
17839 ptr.p->restorableGci, ptr.p->startGci,
17840 ptr.p->toCurrentTabref, ptr.p->toCurrentFragid,
17841 ptr.p->toCopyNode, ptr.p->maxPage);
17842
17843 c_activeTakeOverList.next(ptr);
17844 signal->theData[0] = arg;
17845 signal->theData[1] = ptr.i;
17846 }
17847
17848 if (arg == DumpStateOrd::SchemaResourceSnapshot)
17849 {
17850 RSS_OP_SNAPSHOT_SAVE(cremainingfrags);
17851 RSS_OP_SNAPSHOT_SAVE(cnoFreeReplicaRec);
17852
17853 {
17854 Uint32 cnghash = 0;
17855 NodeGroupRecordPtr NGPtr;
17856 for (Uint32 i = 0; i<cnoOfNodeGroups; i++)
17857 {
17858 NGPtr.i = c_node_groups[i];
17859 ptrCheckGuard(NGPtr, MAX_NDB_NODES, nodeGroupRecord);
17860 cnghash = (cnghash * 33) + NGPtr.p->m_ref_count;
17861 }
17862 RSS_OP_SNAPSHOT_SAVE(cnghash);
17863 }
17864 return;
17865 }
17866
17867 if (arg == DumpStateOrd::SchemaResourceCheckLeak)
17868 {
17869 RSS_OP_SNAPSHOT_CHECK(cremainingfrags);
17870 RSS_OP_SNAPSHOT_SAVE(cnoFreeReplicaRec);
17871
17872 {
17873 Uint32 cnghash = 0;
17874 NodeGroupRecordPtr NGPtr;
17875 for (Uint32 i = 0; i<cnoOfNodeGroups; i++)
17876 {
17877 NGPtr.i = c_node_groups[i];
17878 ptrCheckGuard(NGPtr, MAX_NDB_NODES, nodeGroupRecord);
17879 cnghash = (cnghash * 33) + NGPtr.p->m_ref_count;
17880 }
17881 RSS_OP_SNAPSHOT_CHECK(cnghash);
17882 }
17883 }
17884
17885 DECLARE_DUMP0(DBDIH, 7213, "Set error 7213 with extra arg")
17886 {
17887 SET_ERROR_INSERT_VALUE2(7213, signal->theData[1]);
17888 return;
17889 }
17890 DECLARE_DUMP0(DBDIH, 7214, "Set error 7214 with extra arg")
17891 {
17892 SET_ERROR_INSERT_VALUE2(7214, signal->theData[1]);
17893 return;
17894 }
17895
17896 DECLARE_DUMP0(DBDIH, 7216, "Set error 7216 with extra arg")
17897 {
17898 SET_ERROR_INSERT_VALUE2(7216, signal->theData[1]);
17899 return;
17900 }
17901 DECLARE_DUMP0(DBDIH, 6099, "Start microgcp")
17902 {
17903 if (isMaster())
17904 {
17905 jam();
17906 m_micro_gcp.m_master.m_start_time = 0;
17907 }
17908 else
17909 {
17910 jam();
17911 sendSignal(cmasterdihref, GSN_DUMP_STATE_ORD, signal, 1, JBB);
17912 }
17913 return;
17914 }
17915 DECLARE_DUMP0(DBDIH, 7999, "Set error code with extra arg")
17916 {
17917 SET_ERROR_INSERT_VALUE2(signal->theData[1],
17918 signal->theData[2]);
17919 }
17920 }//Dbdih::execDUMP_STATE_ORD()
17921
17922 void
execPREP_DROP_TAB_REQ(Signal * signal)17923 Dbdih::execPREP_DROP_TAB_REQ(Signal* signal){
17924 jamEntry();
17925
17926 PrepDropTabReq* req = (PrepDropTabReq*)signal->getDataPtr();
17927
17928 TabRecordPtr tabPtr;
17929 tabPtr.i = req->tableId;
17930 ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
17931
17932 Uint32 senderRef = req->senderRef;
17933 Uint32 senderData = req->senderData;
17934
17935 PrepDropTabRef::ErrorCode err = PrepDropTabRef::OK;
17936 { /**
17937 * Check table state
17938 */
17939 bool ok = false;
17940 switch(tabPtr.p->tabStatus){
17941 case TabRecord::TS_IDLE:
17942 ok = true;
17943 jam();
17944 err = PrepDropTabRef::NoSuchTable;
17945 break;
17946 case TabRecord::TS_DROPPING:
17947 ok = true;
17948 jam();
17949 err = PrepDropTabRef::PrepDropInProgress;
17950 break;
17951 case TabRecord::TS_CREATING:
17952 jam();
17953 ok = true;
17954 break;
17955 case TabRecord::TS_ACTIVE:
17956 ok = true;
17957 jam();
17958 break;
17959 }
17960 ndbrequire(ok);
17961 }
17962
17963 if(err != PrepDropTabRef::OK)
17964 {
17965 jam();
17966 PrepDropTabRef* ref = (PrepDropTabRef*)signal->getDataPtrSend();
17967 ref->senderRef = reference();
17968 ref->senderData = senderData;
17969 ref->tableId = tabPtr.i;
17970 ref->errorCode = err;
17971 sendSignal(senderRef, GSN_PREP_DROP_TAB_REF, signal,
17972 PrepDropTabRef::SignalLength, JBB);
17973 return;
17974 }
17975
17976 tabPtr.p->tabStatus = TabRecord::TS_DROPPING;
17977 PrepDropTabConf* conf = (PrepDropTabConf*)signal->getDataPtrSend();
17978 conf->tableId = tabPtr.i;
17979 conf->senderRef = reference();
17980 conf->senderData = senderData;
17981 sendSignal(senderRef, GSN_PREP_DROP_TAB_CONF,
17982 signal, PrepDropTabConf::SignalLength, JBB);
17983 }
17984
17985 void
waitDropTabWritingToFile(Signal * signal,TabRecordPtr tabPtr)17986 Dbdih::waitDropTabWritingToFile(Signal* signal, TabRecordPtr tabPtr){
17987
17988 if (tabPtr.p->tabLcpStatus == TabRecord::TLS_WRITING_TO_FILE)
17989 {
17990 jam();
17991 signal->theData[0] = DihContinueB::WAIT_DROP_TAB_WRITING_TO_FILE;
17992 signal->theData[1] = tabPtr.i;
17993 sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 100, 2);
17994 return;
17995 }
17996
17997 ndbrequire(tabPtr.p->tabLcpStatus == TabRecord::TLS_COMPLETED);
17998 checkDropTabComplete(signal, tabPtr);
17999 }
18000
18001 void
checkDropTabComplete(Signal * signal,TabRecordPtr tabPtr)18002 Dbdih::checkDropTabComplete(Signal* signal, TabRecordPtr tabPtr)
18003 {
18004 startDeleteFile(signal, tabPtr);
18005 }
18006
18007 void
execNDB_TAMPER(Signal * signal)18008 Dbdih::execNDB_TAMPER(Signal* signal)
18009 {
18010 if ((ERROR_INSERTED(7011)) &&
18011 (signal->theData[0] == 7012)) {
18012 CLEAR_ERROR_INSERT_VALUE;
18013 calculateKeepGciLab(signal, 0, 0);
18014 return;
18015 }//if
18016 SET_ERROR_INSERT_VALUE(signal->theData[0]);
18017 return;
18018 }//Dbdih::execNDB_TAMPER()
18019
execBLOCK_COMMIT_ORD(Signal * signal)18020 void Dbdih::execBLOCK_COMMIT_ORD(Signal* signal){
18021 BlockCommitOrd* const block = (BlockCommitOrd *)&signal->theData[0];
18022
18023 jamEntry();
18024
18025 c_blockCommit = true;
18026 c_blockCommitNo = block->failNo;
18027 }
18028
execUNBLOCK_COMMIT_ORD(Signal * signal)18029 void Dbdih::execUNBLOCK_COMMIT_ORD(Signal* signal){
18030 UnblockCommitOrd* const unblock = (UnblockCommitOrd *)&signal->theData[0];
18031 (void)unblock;
18032
18033 jamEntry();
18034
18035 if(c_blockCommit == true)
18036 {
18037 jam();
18038
18039 c_blockCommit = false;
18040 for (Uint32 i = 0; i<c_diverify_queue_cnt; i++)
18041 {
18042 c_diverify_queue[i].m_empty_done = 0;
18043 emptyverificbuffer(signal, i, true);
18044 }
18045 }
18046 }
18047
execSTOP_PERM_REQ(Signal * signal)18048 void Dbdih::execSTOP_PERM_REQ(Signal* signal){
18049
18050 jamEntry();
18051
18052 StopPermReq* const req = (StopPermReq*)&signal->theData[0];
18053 StopPermRef* const ref = (StopPermRef*)&signal->theData[0];
18054
18055 const Uint32 senderData = req->senderData;
18056 const BlockReference senderRef = req->senderRef;
18057 const NodeId nodeId = refToNode(senderRef);
18058
18059 if (isMaster()) {
18060 /**
18061 * Master
18062 */
18063 jam();
18064 CRASH_INSERTION(7065);
18065 if (c_stopPermMaster.clientRef != 0) {
18066 jam();
18067
18068 ref->senderData = senderData;
18069 ref->errorCode = StopPermRef::NodeShutdownInProgress;
18070 sendSignal(senderRef, GSN_STOP_PERM_REF, signal,
18071 StopPermRef::SignalLength, JBB);
18072 return;
18073 }//if
18074
18075 if (c_nodeStartMaster.activeState) {
18076 jam();
18077 ref->senderData = senderData;
18078 ref->errorCode = StopPermRef::NodeStartInProgress;
18079 sendSignal(senderRef, GSN_STOP_PERM_REF, signal,
18080 StopPermRef::SignalLength, JBB);
18081 return;
18082 }//if
18083
18084 /**
18085 * Lock
18086 */
18087 c_nodeStartMaster.activeState = true;
18088 c_stopPermMaster.clientRef = senderRef;
18089
18090 c_stopPermMaster.clientData = senderData;
18091 c_stopPermMaster.returnValue = 0;
18092 c_switchReplicas.clear();
18093
18094 Mutex mutex(signal, c_mutexMgr, c_switchPrimaryMutexHandle);
18095 Callback c = { safe_cast(&Dbdih::switch_primary_stop_node), nodeId };
18096 ndbrequire(mutex.lock(c));
18097 } else {
18098 /**
18099 * Proxy part
18100 */
18101 jam();
18102 CRASH_INSERTION(7066);
18103 if(c_stopPermProxy.clientRef != 0){
18104 jam();
18105 ref->senderData = senderData;
18106 ref->errorCode = StopPermRef::NodeShutdownInProgress;
18107 sendSignal(senderRef, GSN_STOP_PERM_REF, signal, 2, JBB);
18108 return;
18109 }//if
18110
18111 c_stopPermProxy.clientRef = senderRef;
18112 c_stopPermProxy.masterRef = cmasterdihref;
18113 c_stopPermProxy.clientData = senderData;
18114
18115 req->senderRef = reference();
18116 req->senderData = senderData;
18117 sendSignal(cmasterdihref, GSN_STOP_PERM_REQ, signal,
18118 StopPermReq::SignalLength, JBB);
18119 }//if
18120 }//Dbdih::execSTOP_PERM_REQ()
18121
18122 void
switch_primary_stop_node(Signal * signal,Uint32 node_id,Uint32 ret_val)18123 Dbdih::switch_primary_stop_node(Signal* signal, Uint32 node_id, Uint32 ret_val)
18124 {
18125 ndbrequire(ret_val == 0);
18126 signal->theData[0] = DihContinueB::SwitchReplica;
18127 signal->theData[1] = node_id;
18128 signal->theData[2] = 0; // table id
18129 signal->theData[3] = 0; // fragment id
18130 sendSignal(reference(), GSN_CONTINUEB, signal, 4, JBB);
18131 }
18132
execSTOP_PERM_REF(Signal * signal)18133 void Dbdih::execSTOP_PERM_REF(Signal* signal)
18134 {
18135 jamEntry();
18136 ndbrequire(c_stopPermProxy.clientRef != 0);
18137 ndbrequire(c_stopPermProxy.masterRef == signal->senderBlockRef());
18138 sendSignal(c_stopPermProxy.clientRef, GSN_STOP_PERM_REF, signal, 2, JBB);
18139 c_stopPermProxy.clientRef = 0;
18140 }//Dbdih::execSTOP_PERM_REF()
18141
execSTOP_PERM_CONF(Signal * signal)18142 void Dbdih::execSTOP_PERM_CONF(Signal* signal)
18143 {
18144 jamEntry();
18145 ndbrequire(c_stopPermProxy.clientRef != 0);
18146 ndbrequire(c_stopPermProxy.masterRef == signal->senderBlockRef());
18147 sendSignal(c_stopPermProxy.clientRef, GSN_STOP_PERM_CONF, signal, 1, JBB);
18148 c_stopPermProxy.clientRef = 0;
18149 }//Dbdih::execSTOP_PERM_CONF()
18150
execDIH_SWITCH_REPLICA_REQ(Signal * signal)18151 void Dbdih::execDIH_SWITCH_REPLICA_REQ(Signal* signal)
18152 {
18153 jamEntry();
18154 DihSwitchReplicaReq* const req = (DihSwitchReplicaReq*)&signal->theData[0];
18155 const Uint32 tableId = req->tableId;
18156 const Uint32 fragNo = req->fragNo;
18157 const BlockReference senderRef = req->senderRef;
18158
18159 CRASH_INSERTION(7067);
18160 TabRecordPtr tabPtr;
18161 tabPtr.i = tableId;
18162 ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
18163
18164 ndbrequire(tabPtr.p->tabStatus == TabRecord::TS_ACTIVE);
18165 if (tabPtr.p->tabCopyStatus != TabRecord::CS_IDLE) {
18166 jam();
18167 sendSignal(reference(), GSN_DIH_SWITCH_REPLICA_REQ, signal,
18168 DihSwitchReplicaReq::SignalLength, JBB);
18169 return;
18170 }//if
18171 FragmentstorePtr fragPtr;
18172 getFragstore(tabPtr.p, fragNo, fragPtr);
18173
18174 /**
18175 * Do funky stuff
18176 */
18177 Uint32 oldOrder[MAX_REPLICAS];
18178 const Uint32 noOfReplicas = extractNodeInfo(fragPtr.p, oldOrder);
18179
18180 if (noOfReplicas < req->noOfReplicas) {
18181 jam();
18182 //---------------------------------------------------------------------
18183 // A crash occurred in the middle of our switch handling.
18184 //---------------------------------------------------------------------
18185 DihSwitchReplicaRef* const ref = (DihSwitchReplicaRef*)&signal->theData[0];
18186 ref->senderNode = cownNodeId;
18187 ref->errorCode = StopPermRef::NF_CausedAbortOfStopProcedure;
18188 sendSignal(senderRef, GSN_DIH_SWITCH_REPLICA_REF, signal,
18189 DihSwitchReplicaRef::SignalLength, JBB);
18190 }//if
18191
18192 DIH_TAB_WRITE_LOCK(tabPtr.p);
18193 for (Uint32 i = 0; i < noOfReplicas; i++) {
18194 jam();
18195 ndbrequire(i < MAX_REPLICAS);
18196 fragPtr.p->activeNodes[i] = req->newNodeOrder[i];
18197 }//for
18198 DIH_TAB_WRITE_UNLOCK(tabPtr.p);
18199
18200 /**
18201 * Reply
18202 */
18203 DihSwitchReplicaConf* const conf = (DihSwitchReplicaConf*)&signal->theData[0];
18204 conf->senderNode = cownNodeId;
18205 sendSignal(senderRef, GSN_DIH_SWITCH_REPLICA_CONF, signal,
18206 DihSwitchReplicaConf::SignalLength, JBB);
18207 }//Dbdih::execDIH_SWITCH_REPLICA_REQ()
18208
execDIH_SWITCH_REPLICA_CONF(Signal * signal)18209 void Dbdih::execDIH_SWITCH_REPLICA_CONF(Signal* signal)
18210 {
18211 jamEntry();
18212 /**
18213 * Response to master
18214 */
18215 CRASH_INSERTION(7068);
18216 DihSwitchReplicaConf* const conf = (DihSwitchReplicaConf*)&signal->theData[0];
18217 switchReplicaReply(signal, conf->senderNode);
18218 }//Dbdih::execDIH_SWITCH_REPLICA_CONF()
18219
execDIH_SWITCH_REPLICA_REF(Signal * signal)18220 void Dbdih::execDIH_SWITCH_REPLICA_REF(Signal* signal)
18221 {
18222 jamEntry();
18223 DihSwitchReplicaRef* const ref = (DihSwitchReplicaRef*)&signal->theData[0];
18224 if(c_stopPermMaster.returnValue == 0){
18225 jam();
18226 c_stopPermMaster.returnValue = ref->errorCode;
18227 }//if
18228 switchReplicaReply(signal, ref->senderNode);
18229 }//Dbdih::execDIH_SWITCH_REPLICA_REF()
18230
switchReplicaReply(Signal * signal,NodeId nodeId)18231 void Dbdih::switchReplicaReply(Signal* signal,
18232 NodeId nodeId){
18233 jam();
18234 receiveLoopMacro(DIH_SWITCH_REPLICA_REQ, nodeId);
18235 //------------------------------------------------------
18236 // We have received all responses from the nodes. Thus
18237 // we have completed switching replica roles. Continue
18238 // with the next fragment.
18239 //------------------------------------------------------
18240 if(c_stopPermMaster.returnValue != 0){
18241 jam();
18242 c_switchReplicas.tableId = ctabFileSize + 1;
18243 }//if
18244 c_switchReplicas.fragNo++;
18245
18246 signal->theData[0] = DihContinueB::SwitchReplica;
18247 signal->theData[1] = c_switchReplicas.nodeId;
18248 signal->theData[2] = c_switchReplicas.tableId;
18249 signal->theData[3] = c_switchReplicas.fragNo;
18250 sendSignal(reference(), GSN_CONTINUEB, signal, 4, JBB);
18251 }//Dbdih::switchReplicaReply()
18252
18253 void
switchReplica(Signal * signal,Uint32 nodeId,Uint32 tableId,Uint32 fragNo)18254 Dbdih::switchReplica(Signal* signal,
18255 Uint32 nodeId,
18256 Uint32 tableId,
18257 Uint32 fragNo){
18258 jam();
18259 DihSwitchReplicaReq* const req = (DihSwitchReplicaReq*)&signal->theData[0];
18260
18261 const Uint32 RT_BREAK = 64;
18262
18263 for (Uint32 i = 0; i < RT_BREAK; i++) {
18264 jam();
18265 if (tableId >= ctabFileSize) {
18266 jam();
18267 StopPermConf* const conf = (StopPermConf*)&signal->theData[0];
18268 StopPermRef* const ref = (StopPermRef*)&signal->theData[0];
18269 /**
18270 * Finished with all tables
18271 */
18272 if(c_stopPermMaster.returnValue == 0) {
18273 jam();
18274 conf->senderData = c_stopPermMaster.clientData;
18275 sendSignal(c_stopPermMaster.clientRef, GSN_STOP_PERM_CONF,
18276 signal, 1, JBB);
18277 } else {
18278 jam();
18279 ref->senderData = c_stopPermMaster.clientData;
18280 ref->errorCode = c_stopPermMaster.returnValue;
18281 sendSignal(c_stopPermMaster.clientRef, GSN_STOP_PERM_REF, signal, 2,JBB);
18282 }//if
18283
18284 /**
18285 * UnLock
18286 */
18287 c_nodeStartMaster.activeState = false;
18288 c_stopPermMaster.clientRef = 0;
18289 c_stopPermMaster.clientData = 0;
18290 c_stopPermMaster.returnValue = 0;
18291 Mutex mutex(signal, c_mutexMgr, c_switchPrimaryMutexHandle);
18292 mutex.unlock(); // ignore result
18293 return;
18294 }//if
18295
18296 TabRecordPtr tabPtr;
18297 tabPtr.i = tableId;
18298 ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
18299
18300 if (tabPtr.p->tabStatus != TabRecord::TS_ACTIVE) {
18301 jam();
18302 tableId++;
18303 fragNo = 0;
18304 continue;
18305 }//if
18306 if (fragNo >= tabPtr.p->totalfragments) {
18307 jam();
18308 tableId++;
18309 fragNo = 0;
18310 continue;
18311 }//if
18312 FragmentstorePtr fragPtr;
18313 getFragstore(tabPtr.p, fragNo, fragPtr);
18314
18315 Uint32 oldOrder[MAX_REPLICAS];
18316 const Uint32 noOfReplicas = extractNodeInfo(fragPtr.p, oldOrder);
18317
18318 if(oldOrder[0] != nodeId) {
18319 jam();
18320 fragNo++;
18321 continue;
18322 }//if
18323 req->tableId = tableId;
18324 req->fragNo = fragNo;
18325 req->noOfReplicas = noOfReplicas;
18326 for (Uint32 i = 0; i < (noOfReplicas - 1); i++) {
18327 req->newNodeOrder[i] = oldOrder[i+1];
18328 }//for
18329 req->newNodeOrder[noOfReplicas-1] = nodeId;
18330 req->senderRef = reference();
18331
18332 /**
18333 * Initialize struct
18334 */
18335 c_switchReplicas.tableId = tableId;
18336 c_switchReplicas.fragNo = fragNo;
18337 c_switchReplicas.nodeId = nodeId;
18338
18339 sendLoopMacro(DIH_SWITCH_REPLICA_REQ, sendDIH_SWITCH_REPLICA_REQ, RNIL);
18340 return;
18341 }//for
18342
18343 signal->theData[0] = DihContinueB::SwitchReplica;
18344 signal->theData[1] = nodeId;
18345 signal->theData[2] = tableId;
18346 signal->theData[3] = fragNo;
18347 sendSignal(reference(), GSN_CONTINUEB, signal, 4, JBB);
18348 }//Dbdih::switchReplica()
18349
execSTOP_ME_REQ(Signal * signal)18350 void Dbdih::execSTOP_ME_REQ(Signal* signal)
18351 {
18352 jamEntry();
18353 StopMeReq* const req = (StopMeReq*)&signal->theData[0];
18354 const BlockReference senderRef = req->senderRef;
18355 const Uint32 senderData = req->senderData;
18356 const Uint32 nodeId = refToNode(senderRef);
18357 {
18358 /**
18359 * Set node dead (remove from operations)
18360 */
18361 NodeRecordPtr nodePtr;
18362 nodePtr.i = nodeId;
18363 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
18364 nodePtr.p->useInTransactions = false;
18365 }
18366 if (nodeId != getOwnNodeId()) {
18367 jam();
18368 StopMeConf * const stopMeConf = (StopMeConf *)&signal->theData[0];
18369 stopMeConf->senderData = senderData;
18370 stopMeConf->senderRef = reference();
18371 sendSignal(senderRef, GSN_STOP_ME_CONF, signal,
18372 StopMeConf::SignalLength, JBB);
18373 return;
18374 }//if
18375
18376 /**
18377 * Local signal
18378 */
18379 jam();
18380 ndbrequire(c_stopMe.clientRef == 0);
18381
18382 c_stopMe.clientData = senderData;
18383 c_stopMe.clientRef = senderRef;
18384
18385 req->senderData = senderData;
18386 req->senderRef = reference();
18387
18388 sendLoopMacro(STOP_ME_REQ, sendSTOP_ME_REQ, RNIL);
18389
18390 /**
18391 * Send conf to self
18392 */
18393 StopMeConf * const stopMeConf = (StopMeConf *)&signal->theData[0];
18394 stopMeConf->senderData = senderData;
18395 stopMeConf->senderRef = reference();
18396 sendSignal(reference(), GSN_STOP_ME_CONF, signal,
18397 StopMeConf::SignalLength, JBB);
18398 }//Dbdih::execSTOP_ME_REQ()
18399
execSTOP_ME_REF(Signal * signal)18400 void Dbdih::execSTOP_ME_REF(Signal* signal)
18401 {
18402 ndbrequire(false);
18403 }
18404
execSTOP_ME_CONF(Signal * signal)18405 void Dbdih::execSTOP_ME_CONF(Signal* signal)
18406 {
18407 jamEntry();
18408 StopMeConf * const stopMeConf = (StopMeConf *)&signal->theData[0];
18409
18410 const Uint32 senderRef = stopMeConf->senderRef;
18411 const Uint32 senderData = stopMeConf->senderData;
18412 const Uint32 nodeId = refToNode(senderRef);
18413
18414 ndbrequire(c_stopMe.clientRef != 0);
18415 ndbrequire(c_stopMe.clientData == senderData);
18416
18417 receiveLoopMacro(STOP_ME_REQ, nodeId);
18418 //---------------------------------------------------------
18419 // All STOP_ME_REQ have been received. We will send the
18420 // confirmation back to the requesting block.
18421 //---------------------------------------------------------
18422
18423 stopMeConf->senderRef = reference();
18424 stopMeConf->senderData = c_stopMe.clientData;
18425 sendSignal(c_stopMe.clientRef, GSN_STOP_ME_CONF, signal,
18426 StopMeConf::SignalLength, JBB);
18427 c_stopMe.clientRef = 0;
18428 }//Dbdih::execSTOP_ME_CONF()
18429
execWAIT_GCP_REQ(Signal * signal)18430 void Dbdih::execWAIT_GCP_REQ(Signal* signal)
18431 {
18432 jamEntry();
18433 WaitGCPReq* const req = (WaitGCPReq*)&signal->theData[0];
18434 WaitGCPRef* const ref = (WaitGCPRef*)&signal->theData[0];
18435 WaitGCPConf* const conf = (WaitGCPConf*)&signal->theData[0];
18436 const Uint32 senderData = req->senderData;
18437 const BlockReference senderRef = req->senderRef;
18438 const Uint32 requestType = req->requestType;
18439 Uint32 errorCode = 0;
18440
18441 if(requestType == WaitGCPReq::CurrentGCI)
18442 {
18443 jam();
18444 conf->senderData = senderData;
18445 conf->gci_hi = Uint32(m_micro_gcp.m_current_gci >> 32);
18446 conf->gci_lo = Uint32(m_micro_gcp.m_current_gci);
18447 conf->blockStatus = cgcpOrderBlocked;
18448 sendSignal(senderRef, GSN_WAIT_GCP_CONF, signal,
18449 WaitGCPConf::SignalLength, JBB);
18450 return;
18451 }//if
18452
18453 if(requestType == WaitGCPReq::RestartGCI)
18454 {
18455 jam();
18456 conf->senderData = senderData;
18457 conf->gci_hi = Uint32(crestartGci);
18458 conf->gci_lo = 0;
18459 conf->blockStatus = cgcpOrderBlocked;
18460 sendSignal(senderRef, GSN_WAIT_GCP_CONF, signal,
18461 WaitGCPConf::SignalLength, JBB);
18462 return;
18463 }//if
18464
18465 if (requestType == WaitGCPReq::BlockStartGcp)
18466 {
18467 jam();
18468 conf->senderData = senderData;
18469 conf->gci_hi = Uint32(m_micro_gcp.m_current_gci >> 32);
18470 conf->gci_lo = Uint32(m_micro_gcp.m_current_gci);
18471 conf->blockStatus = cgcpOrderBlocked;
18472 sendSignal(senderRef, GSN_WAIT_GCP_CONF, signal,
18473 WaitGCPConf::SignalLength, JBB);
18474 cgcpOrderBlocked = 1;
18475 return;
18476 }
18477
18478 if (requestType == WaitGCPReq::UnblockStartGcp)
18479 {
18480 jam();
18481 conf->senderData = senderData;
18482 conf->gci_hi = Uint32(m_micro_gcp.m_current_gci >> 32);
18483 conf->gci_lo = Uint32(m_micro_gcp.m_current_gci);
18484 conf->blockStatus = cgcpOrderBlocked;
18485 sendSignal(senderRef, GSN_WAIT_GCP_CONF, signal,
18486 WaitGCPConf::SignalLength, JBB);
18487 cgcpOrderBlocked = 0;
18488 return;
18489 }
18490
18491 if(isMaster())
18492 {
18493 /**
18494 * Master
18495 */
18496
18497 if (!isActiveMaster())
18498 {
18499 ndbassert(cmasterState == MASTER_TAKE_OVER_GCP);
18500 errorCode = WaitGCPRef::NF_MasterTakeOverInProgress;
18501 goto error;
18502 }
18503
18504 if((requestType == WaitGCPReq::CompleteIfRunning) &&
18505 (m_gcp_save.m_master.m_state == GcpSave::GCP_SAVE_IDLE))
18506 {
18507 jam();
18508 conf->senderData = senderData;
18509 conf->gci_hi = Uint32(m_micro_gcp.m_old_gci >> 32);
18510 conf->gci_lo = Uint32(m_micro_gcp.m_old_gci);
18511 conf->blockStatus = cgcpOrderBlocked;
18512 sendSignal(senderRef, GSN_WAIT_GCP_CONF, signal,
18513 WaitGCPConf::SignalLength, JBB);
18514 return;
18515 }//if
18516
18517 WaitGCPMasterPtr ptr;
18518 WaitGCPList * list = &c_waitGCPMasterList;
18519 if (requestType == WaitGCPReq::WaitEpoch)
18520 {
18521 jam();
18522 list = &c_waitEpochMasterList;
18523 }
18524
18525 if(list->seize(ptr) == false)
18526 {
18527 jam();
18528 errorCode = WaitGCPRef::NoWaitGCPRecords;
18529 goto error;
18530 return;
18531 }
18532
18533 ptr.p->clientRef = senderRef;
18534 ptr.p->clientData = senderData;
18535
18536 if((requestType == WaitGCPReq::CompleteForceStart) &&
18537 (m_gcp_save.m_master.m_state == GcpSave::GCP_SAVE_IDLE))
18538 {
18539 jam();
18540 m_micro_gcp.m_master.m_start_time = m_gcp_save.m_master.m_start_time = 0;
18541 }//if
18542 return;
18543 }
18544 else
18545 {
18546 /**
18547 * Proxy part
18548 */
18549 jam();
18550 WaitGCPProxyPtr ptr;
18551 if (c_waitGCPProxyList.seize(ptr) == false)
18552 {
18553 jam();
18554 errorCode = WaitGCPRef::NoWaitGCPRecords;
18555 goto error;
18556 }//if
18557 ptr.p->clientRef = senderRef;
18558 ptr.p->clientData = senderData;
18559 ptr.p->masterRef = cmasterdihref;
18560
18561 req->senderData = ptr.i;
18562 req->senderRef = reference();
18563 req->requestType = requestType;
18564
18565 sendSignal(cmasterdihref, GSN_WAIT_GCP_REQ, signal,
18566 WaitGCPReq::SignalLength, JBB);
18567 return;
18568 }//if
18569
18570 error:
18571 ref->senderData = senderData;
18572 ref->errorCode = errorCode;
18573 sendSignal(senderRef, GSN_WAIT_GCP_REF, signal,
18574 WaitGCPRef::SignalLength, JBB);
18575 }//Dbdih::execWAIT_GCP_REQ()
18576
execWAIT_GCP_REF(Signal * signal)18577 void Dbdih::execWAIT_GCP_REF(Signal* signal)
18578 {
18579 jamEntry();
18580 ndbrequire(!isMaster());
18581 WaitGCPRef* const ref = (WaitGCPRef*)&signal->theData[0];
18582
18583 const Uint32 proxyPtr = ref->senderData;
18584 const Uint32 errorCode = ref->errorCode;
18585
18586 WaitGCPProxyPtr ptr;
18587 ptr.i = proxyPtr;
18588 c_waitGCPProxyList.getPtr(ptr);
18589
18590 ref->senderData = ptr.p->clientData;
18591 ref->errorCode = errorCode;
18592 sendSignal(ptr.p->clientRef, GSN_WAIT_GCP_REF, signal,
18593 WaitGCPRef::SignalLength, JBB);
18594
18595 c_waitGCPProxyList.release(ptr);
18596 }//Dbdih::execWAIT_GCP_REF()
18597
execWAIT_GCP_CONF(Signal * signal)18598 void Dbdih::execWAIT_GCP_CONF(Signal* signal)
18599 {
18600 jamEntry();
18601 ndbrequire(!isMaster());
18602 WaitGCPConf* const conf = (WaitGCPConf*)&signal->theData[0];
18603 const Uint32 proxyPtr = conf->senderData;
18604 const Uint32 gci_hi = conf->gci_hi;
18605 const Uint32 gci_lo = conf->gci_lo;
18606 WaitGCPProxyPtr ptr;
18607
18608 ptr.i = proxyPtr;
18609 c_waitGCPProxyList.getPtr(ptr);
18610
18611 conf->senderData = ptr.p->clientData;
18612 conf->gci_hi = gci_hi;
18613 conf->gci_lo = gci_lo;
18614 conf->blockStatus = cgcpOrderBlocked;
18615 sendSignal(ptr.p->clientRef, GSN_WAIT_GCP_CONF, signal,
18616 WaitGCPConf::SignalLength, JBB);
18617
18618 c_waitGCPProxyList.release(ptr);
18619 }//Dbdih::execWAIT_GCP_CONF()
18620
checkWaitGCPProxy(Signal * signal,NodeId failedNodeId)18621 void Dbdih::checkWaitGCPProxy(Signal* signal, NodeId failedNodeId)
18622 {
18623 jam();
18624 WaitGCPRef* const ref = (WaitGCPRef*)&signal->theData[0];
18625 ref->errorCode = WaitGCPRef::NF_CausedAbortOfProcedure;
18626
18627 WaitGCPProxyPtr ptr;
18628 c_waitGCPProxyList.first(ptr);
18629 while(ptr.i != RNIL) {
18630 jam();
18631 const Uint32 i = ptr.i;
18632 const Uint32 clientData = ptr.p->clientData;
18633 const BlockReference clientRef = ptr.p->clientRef;
18634 const BlockReference masterRef = ptr.p->masterRef;
18635
18636 c_waitGCPProxyList.next(ptr);
18637 if(refToNode(masterRef) == failedNodeId) {
18638 jam();
18639 c_waitGCPProxyList.release(i);
18640 ref->senderData = clientData;
18641 sendSignal(clientRef, GSN_WAIT_GCP_REF, signal,
18642 WaitGCPRef::SignalLength, JBB);
18643 }//if
18644 }//while
18645 }//Dbdih::checkWaitGCPProxy()
18646
checkWaitGCPMaster(Signal * signal,NodeId failedNodeId)18647 void Dbdih::checkWaitGCPMaster(Signal* signal, NodeId failedNodeId)
18648 {
18649 jam();
18650 WaitGCPMasterPtr ptr;
18651 c_waitGCPMasterList.first(ptr);
18652
18653 while (ptr.i != RNIL) {
18654 jam();
18655 const Uint32 i = ptr.i;
18656 const NodeId nodeId = refToNode(ptr.p->clientRef);
18657
18658 c_waitGCPMasterList.next(ptr);
18659 if (nodeId == failedNodeId) {
18660 jam();
18661 c_waitGCPMasterList.release(i);
18662 }//if
18663 }//while
18664 }//Dbdih::checkWaitGCPMaster()
18665
emptyWaitGCPMasterQueue(Signal * signal,Uint64 gci,WaitGCPList & list)18666 void Dbdih::emptyWaitGCPMasterQueue(Signal* signal,
18667 Uint64 gci,
18668 WaitGCPList & list)
18669 {
18670 jam();
18671 WaitGCPConf* const conf = (WaitGCPConf*)&signal->theData[0];
18672 conf->gci_hi = Uint32(gci >> 32);
18673 conf->gci_lo = Uint32(gci);
18674
18675 WaitGCPMasterPtr ptr;
18676 list.first(ptr);
18677 while(ptr.i != RNIL) {
18678 jam();
18679 const Uint32 i = ptr.i;
18680 const Uint32 clientData = ptr.p->clientData;
18681 const BlockReference clientRef = ptr.p->clientRef;
18682
18683 c_waitGCPMasterList.next(ptr);
18684 conf->senderData = clientData;
18685 conf->blockStatus = cgcpOrderBlocked;
18686 sendSignal(clientRef, GSN_WAIT_GCP_CONF, signal,
18687 WaitGCPConf::SignalLength, JBB);
18688
18689 list.release(i);
18690 }//while
18691 }//Dbdih::emptyWaitGCPMasterQueue()
18692
setNodeStatus(Uint32 nodeId,NodeRecord::NodeStatus newStatus)18693 void Dbdih::setNodeStatus(Uint32 nodeId, NodeRecord::NodeStatus newStatus)
18694 {
18695 NodeRecordPtr nodePtr;
18696 nodePtr.i = nodeId;
18697 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
18698 nodePtr.p->nodeStatus = newStatus;
18699 }//Dbdih::setNodeStatus()
18700
getNodeStatus(Uint32 nodeId)18701 Dbdih::NodeRecord::NodeStatus Dbdih::getNodeStatus(Uint32 nodeId)
18702 {
18703 NodeRecordPtr nodePtr;
18704 nodePtr.i = nodeId;
18705 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
18706 return nodePtr.p->nodeStatus;
18707 }//Dbdih::getNodeStatus()
18708
18709 Sysfile::ActiveStatus
getNodeActiveStatus(Uint32 nodeId)18710 Dbdih::getNodeActiveStatus(Uint32 nodeId)
18711 {
18712 NodeRecordPtr nodePtr;
18713 nodePtr.i = nodeId;
18714 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
18715 return nodePtr.p->activeStatus;
18716 }//Dbdih::getNodeActiveStatus()
18717
18718
18719 void
setNodeActiveStatus(Uint32 nodeId,Sysfile::ActiveStatus newStatus)18720 Dbdih::setNodeActiveStatus(Uint32 nodeId, Sysfile::ActiveStatus newStatus)
18721 {
18722 NodeRecordPtr nodePtr;
18723 nodePtr.i = nodeId;
18724 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
18725 nodePtr.p->activeStatus = newStatus;
18726 }//Dbdih::setNodeActiveStatus()
18727
setAllowNodeStart(Uint32 nodeId,bool newState)18728 void Dbdih::setAllowNodeStart(Uint32 nodeId, bool newState)
18729 {
18730 NodeRecordPtr nodePtr;
18731 nodePtr.i = nodeId;
18732 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
18733 nodePtr.p->allowNodeStart = newState;
18734 }//Dbdih::setAllowNodeStart()
18735
getAllowNodeStart(Uint32 nodeId)18736 bool Dbdih::getAllowNodeStart(Uint32 nodeId)
18737 {
18738 NodeRecordPtr nodePtr;
18739 nodePtr.i = nodeId;
18740 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
18741 return nodePtr.p->allowNodeStart;
18742 }//Dbdih::getAllowNodeStart()
18743
18744 Uint32
getNodeGroup(Uint32 nodeId) const18745 Dbdih::getNodeGroup(Uint32 nodeId) const
18746 {
18747 NodeRecordPtr nodePtr;
18748 nodePtr.i = nodeId;
18749 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
18750 return nodePtr.p->nodeGroup;
18751 }
18752
checkNodeAlive(Uint32 nodeId)18753 bool Dbdih::checkNodeAlive(Uint32 nodeId)
18754 {
18755 NodeRecordPtr nodePtr;
18756 nodePtr.i = nodeId;
18757 ndbrequire(nodeId > 0);
18758 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
18759 if (nodePtr.p->nodeStatus != NodeRecord::ALIVE) {
18760 return false;
18761 } else {
18762 return true;
18763 }//if
18764 }//Dbdih::checkNodeAlive()
18765
isMaster()18766 bool Dbdih::isMaster()
18767 {
18768 return (reference() == cmasterdihref);
18769 }//Dbdih::isMaster()
18770
isActiveMaster()18771 bool Dbdih::isActiveMaster()
18772 {
18773 return ((reference() == cmasterdihref) && (cmasterState == MASTER_ACTIVE));
18774 }//Dbdih::isActiveMaster()
18775
NodeRecord()18776 Dbdih::NodeRecord::NodeRecord(){
18777 m_nodefailSteps.clear();
18778
18779 activeStatus = Sysfile::NS_NotDefined;
18780 recNODE_FAILREP = ZFALSE;
18781 dbtcFailCompleted = ZTRUE;
18782 dbdictFailCompleted = ZTRUE;
18783 dbdihFailCompleted = ZTRUE;
18784 dblqhFailCompleted = ZTRUE;
18785 noOfStartedChkpt = 0;
18786 noOfQueuedChkpt = 0;
18787 lcpStateAtTakeOver = (MasterLCPConf::State)255;
18788
18789 activeTabptr = RNIL;
18790 nodeStatus = NodeRecord::NOT_IN_CLUSTER;
18791 useInTransactions = false;
18792 copyCompleted = false;
18793 allowNodeStart = true;
18794 }
18795
18796 // DICT lock slave
18797
18798 void
sendDictLockReq(Signal * signal,Uint32 lockType,Callback c)18799 Dbdih::sendDictLockReq(Signal* signal, Uint32 lockType, Callback c)
18800 {
18801 DictLockReq* req = (DictLockReq*)&signal->theData[0];
18802 DictLockSlavePtr lockPtr;
18803
18804 c_dictLockSlavePool.seize(lockPtr);
18805 ndbrequire(lockPtr.i != RNIL);
18806
18807 req->userPtr = lockPtr.i;
18808 req->lockType = lockType;
18809 req->userRef = reference();
18810
18811 lockPtr.p->lockPtr = RNIL;
18812 lockPtr.p->lockType = lockType;
18813 lockPtr.p->locked = false;
18814 lockPtr.p->callback = c;
18815
18816 // handle rolling upgrade
18817 {
18818 Uint32 masterVersion = getNodeInfo(cmasterNodeId).m_version;
18819
18820 const unsigned int get_major = getMajor(masterVersion);
18821 const unsigned int get_minor = getMinor(masterVersion);
18822 const unsigned int get_build = getBuild(masterVersion);
18823 ndbrequire(get_major >= 4);
18824
18825 if (masterVersion < NDBD_DICT_LOCK_VERSION_5 ||
18826 (masterVersion < NDBD_DICT_LOCK_VERSION_5_1 &&
18827 get_major == 5 && get_minor == 1) ||
18828 ERROR_INSERTED(7176)) {
18829 jam();
18830
18831 infoEvent("DIH: detect upgrade: master node %u old version %u.%u.%u",
18832 (unsigned int)cmasterNodeId, get_major, get_minor, get_build);
18833
18834 DictLockConf* conf = (DictLockConf*)&signal->theData[0];
18835 conf->userPtr = lockPtr.i;
18836 conf->lockType = lockType;
18837 conf->lockPtr = ZNIL;
18838
18839 sendSignal(reference(), GSN_DICT_LOCK_CONF, signal,
18840 DictLockConf::SignalLength, JBB);
18841 return;
18842 }
18843 }
18844
18845 BlockReference dictMasterRef = calcDictBlockRef(cmasterNodeId);
18846 sendSignal(dictMasterRef, GSN_DICT_LOCK_REQ, signal,
18847 DictLockReq::SignalLength, JBB);
18848 }
18849
18850 void
execDICT_LOCK_CONF(Signal * signal)18851 Dbdih::execDICT_LOCK_CONF(Signal* signal)
18852 {
18853 jamEntry();
18854 recvDictLockConf(signal);
18855 }
18856
18857 void
execDICT_LOCK_REF(Signal * signal)18858 Dbdih::execDICT_LOCK_REF(Signal* signal)
18859 {
18860 jamEntry();
18861 ndbrequire(false);
18862 }
18863
18864 void
recvDictLockConf(Signal * signal)18865 Dbdih::recvDictLockConf(Signal* signal)
18866 {
18867 const DictLockConf* conf = (const DictLockConf*)&signal->theData[0];
18868
18869 DictLockSlavePtr lockPtr;
18870 c_dictLockSlavePool.getPtr(lockPtr, conf->userPtr);
18871
18872 lockPtr.p->lockPtr = conf->lockPtr;
18873 ndbrequire(lockPtr.p->lockType == conf->lockType);
18874 ndbrequire(lockPtr.p->locked == false);
18875 lockPtr.p->locked = true;
18876
18877 lockPtr.p->callback.m_callbackData = lockPtr.i;
18878 execute(signal, lockPtr.p->callback, 0);
18879 }
18880
18881 void
sendDictUnlockOrd(Signal * signal,Uint32 lockSlavePtrI)18882 Dbdih::sendDictUnlockOrd(Signal* signal, Uint32 lockSlavePtrI)
18883 {
18884 DictUnlockOrd* ord = (DictUnlockOrd*)&signal->theData[0];
18885
18886 DictLockSlavePtr lockPtr;
18887 c_dictLockSlavePool.getPtr(lockPtr, lockSlavePtrI);
18888
18889 ord->lockPtr = lockPtr.p->lockPtr;
18890 ord->lockType = lockPtr.p->lockType;
18891 ord->senderData = lockPtr.i;
18892 ord->senderRef = reference();
18893
18894 c_dictLockSlavePool.release(lockPtr);
18895
18896 // handle rolling upgrade
18897 {
18898 Uint32 masterVersion = getNodeInfo(cmasterNodeId).m_version;
18899
18900 const unsigned int get_major = getMajor(masterVersion);
18901 const unsigned int get_minor = getMinor(masterVersion);
18902 ndbrequire(get_major >= 4);
18903
18904 if (masterVersion < NDBD_DICT_LOCK_VERSION_5 ||
18905 (masterVersion < NDBD_DICT_LOCK_VERSION_5_1 &&
18906 get_major == 5 && get_minor == 1) ||
18907 ERROR_INSERTED(7176)) {
18908 return;
18909 }
18910 }
18911
18912 Uint32 len = DictUnlockOrd::SignalLength;
18913 if (unlikely(getNodeInfo(cmasterNodeId).m_version < NDB_MAKE_VERSION(6,3,0)))
18914 {
18915 jam();
18916 len = 2;
18917 }
18918
18919 BlockReference dictMasterRef = calcDictBlockRef(cmasterNodeId);
18920 sendSignal(dictMasterRef, GSN_DICT_UNLOCK_ORD, signal, len, JBB);
18921 }
18922
18923 #ifdef ERROR_INSERT
18924 void
sendToRandomNodes(const char * msg,Signal * signal,SignalCounter * counter,SendFunction fun,Uint32 extra,Uint32 block,Uint32 gsn,Uint32 len,JobBufferLevel level)18925 Dbdih::sendToRandomNodes(const char * msg,
18926 Signal* signal,
18927 SignalCounter* counter,
18928 SendFunction fun,
18929 Uint32 extra,
18930 Uint32 block,
18931 Uint32 gsn,
18932 Uint32 len,
18933 JobBufferLevel level)
18934 {
18935
18936 if (counter)
18937 counter->clearWaitingFor();
18938
18939 Vector<Uint32> nodes;
18940 NodeRecordPtr nodePtr;
18941 nodePtr.i = cfirstAliveNode;
18942 do {
18943 jam();
18944 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
18945 if (nodePtr.i != getOwnNodeId())
18946 {
18947 nodes.push_back(nodePtr.i);
18948 }
18949 nodePtr.i = nodePtr.p->nextNode;
18950 } while (nodePtr.i != RNIL);
18951
18952
18953 NdbNodeBitmask masked;
18954 Uint32 cnt = nodes.size();
18955 if (cnt <= 1)
18956 {
18957 goto do_send;
18958 }
18959
18960 {
18961 Uint32 remove = (rand() % cnt);
18962 if (remove == 0)
18963 remove = 1;
18964
18965 for (Uint32 i = 0; i<remove; i++)
18966 {
18967 Uint32 rand_node = rand() % nodes.size();
18968 masked.set(nodes[rand_node]);
18969 nodes.erase(rand_node);
18970 }
18971 }
18972
18973 do_send:
18974 char bufpos = 0;
18975 char buf[256];
18976
18977 nodePtr.i = cfirstAliveNode;
18978 do {
18979 jam();
18980 ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
18981 if (counter)
18982 counter->setWaitingFor(nodePtr.i);
18983 if (!masked.get(nodePtr.i))
18984 {
18985 if (fun)
18986 {
18987 (this->*fun)(signal, nodePtr.i, extra);
18988 }
18989 else
18990 {
18991 Uint32 ref = numberToRef(block, nodePtr.i);
18992 sendSignal(ref, gsn, signal, len, level);
18993 }
18994 BaseString::snprintf(buf+bufpos, sizeof(buf)-bufpos, "%u ", nodePtr.i);
18995 }
18996 else
18997 {
18998 BaseString::snprintf(buf+bufpos, sizeof(buf)-bufpos, "[%u] ", nodePtr.i);
18999 }
19000 bufpos = strlen(buf);
19001 nodePtr.i = nodePtr.p->nextNode;
19002 } while (nodePtr.i != RNIL);
19003 infoEvent("%s %s", msg, buf);
19004 }
19005
19006 #endif
19007
19008 // MT LQH
19009
19010 Uint32
dihGetInstanceKey(Uint32 tabId,Uint32 fragId)19011 Dbdih::dihGetInstanceKey(Uint32 tabId, Uint32 fragId)
19012 {
19013 TabRecordPtr tTabPtr;
19014 tTabPtr.i = tabId;
19015 ptrCheckGuard(tTabPtr, ctabFileSize, tabRecord);
19016 FragmentstorePtr tFragPtr;
19017 getFragstore(tTabPtr.p, fragId, tFragPtr);
19018 Uint32 instanceKey = dihGetInstanceKey(tFragPtr);
19019 return instanceKey;
19020 }
19021
19022 /**
19023 *
19024 */
19025 void
execCREATE_NODEGROUP_IMPL_REQ(Signal * signal)19026 Dbdih::execCREATE_NODEGROUP_IMPL_REQ(Signal* signal)
19027 {
19028 jamEntry();
19029 CreateNodegroupImplReq reqCopy = *(CreateNodegroupImplReq*)signal->getDataPtr();
19030 CreateNodegroupImplReq *req = &reqCopy;
19031
19032 Uint32 err = 0;
19033 Uint32 rt = req->requestType;
19034 Uint64 gci = 0;
19035 switch(rt){
19036 case CreateNodegroupImplReq::RT_ABORT:
19037 jam(); // do nothing
19038 break;
19039 case CreateNodegroupImplReq::RT_PARSE:
19040 case CreateNodegroupImplReq::RT_PREPARE:
19041 case CreateNodegroupImplReq::RT_COMMIT:
19042 {
19043 Uint32 cnt = 0;
19044 for (Uint32 i = 0; i<NDB_ARRAY_SIZE(req->nodes) && req->nodes[i] ; i++)
19045 {
19046 cnt++;
19047 if (getNodeActiveStatus(req->nodes[i]) != Sysfile::NS_Configured)
19048 {
19049 jam();
19050 err = CreateNodegroupRef::NodeAlreadyInNodegroup;
19051 goto error;
19052 }
19053 }
19054
19055 if (cnt != cnoReplicas)
19056 {
19057 jam();
19058 err = CreateNodegroupRef::InvalidNoOfNodesInNodegroup;
19059 goto error;
19060 }
19061
19062 Uint32 ng = req->nodegroupId;
19063 NdbNodeBitmask tmp;
19064 tmp.set();
19065 for (Uint32 i = 0; i<cnoOfNodeGroups; i++)
19066 {
19067 tmp.clear(c_node_groups[i]);
19068 }
19069
19070 if (ng == RNIL && rt == CreateNodegroupImplReq::RT_PARSE)
19071 {
19072 jam();
19073 ng = tmp.find(0);
19074 }
19075
19076 if (ng > MAX_NDB_NODES)
19077 {
19078 jam();
19079 err = CreateNodegroupRef::InvalidNodegroupId;
19080 goto error;
19081 }
19082
19083 if (tmp.get(ng) == false)
19084 {
19085 jam();
19086 err = CreateNodegroupRef::NodegroupInUse;
19087 goto error;
19088 }
19089
19090 if (rt == CreateNodegroupImplReq::RT_PARSE || rt == CreateNodegroupImplReq::RT_PREPARE)
19091 {
19092 /**
19093 * Check that atleast one of the nodes are alive
19094 */
19095 bool alive = false;
19096 for (Uint32 i = 0; i<cnoReplicas; i++)
19097 {
19098 jam();
19099 Uint32 nodeId = req->nodes[i];
19100 if (getNodeStatus(nodeId) == NodeRecord::ALIVE)
19101 {
19102 jam();
19103 alive = true;
19104 break;
19105 }
19106 }
19107
19108 jam();
19109 if (alive == false)
19110 {
19111 jam();
19112 err = CreateNodegroupRef::NoNodeAlive;
19113 goto error;
19114 }
19115 }
19116
19117 if (rt == CreateNodegroupImplReq::RT_PARSE)
19118 {
19119 jam();
19120 signal->theData[0] = 0;
19121 signal->theData[1] = ng;
19122 return;
19123 }
19124
19125 if (rt == CreateNodegroupImplReq::RT_PREPARE)
19126 {
19127 jam(); // do nothing
19128 break;
19129 }
19130
19131 ndbrequire(rt == CreateNodegroupImplReq::RT_COMMIT);
19132 for (Uint32 i = 0; i<cnoReplicas; i++)
19133 {
19134 Uint32 nodeId = req->nodes[i];
19135 Sysfile::setNodeGroup(nodeId, SYSFILE->nodeGroups, req->nodegroupId);
19136 if (getNodeStatus(nodeId) == NodeRecord::ALIVE)
19137 {
19138 jam();
19139 Sysfile::setNodeStatus(nodeId, SYSFILE->nodeStatus, Sysfile::NS_Active);
19140 }
19141 else
19142 {
19143 jam();
19144 Sysfile::setNodeStatus(nodeId, SYSFILE->nodeStatus, Sysfile::NS_ActiveMissed_1);
19145 }
19146 setNodeActiveStatus();
19147 setNodeGroups();
19148 }
19149 break;
19150 }
19151 case CreateNodegroupImplReq::RT_COMPLETE:
19152 jam();
19153 gci = m_micro_gcp.m_current_gci;
19154 break;
19155 }
19156
19157 {
19158 CreateNodegroupImplConf* conf = (CreateNodegroupImplConf*)signal->getDataPtrSend();
19159 conf->senderRef = reference();
19160 conf->senderData = req->senderData;
19161 conf->gci_hi = Uint32(gci >> 32);
19162 conf->gci_lo = Uint32(gci);
19163 sendSignal(req->senderRef, GSN_CREATE_NODEGROUP_IMPL_CONF, signal,
19164 CreateNodegroupImplConf::SignalLength, JBB);
19165 }
19166 return;
19167
19168 error:
19169 if (rt == CreateNodegroupImplReq::RT_PARSE)
19170 {
19171 jam();
19172 signal->theData[0] = err;
19173 return;
19174 }
19175
19176 if (rt == CreateNodegroupImplReq::RT_PREPARE)
19177 {
19178 jam();
19179 CreateNodegroupImplRef * ref = (CreateNodegroupImplRef*)signal->getDataPtrSend();
19180 ref->senderRef = reference();
19181 ref->senderData = req->senderData;
19182 ref->errorCode = err;
19183 sendSignal(req->senderRef, GSN_CREATE_NODEGROUP_IMPL_REF, signal,
19184 CreateNodegroupImplRef::SignalLength, JBB);
19185 return;
19186 }
19187
19188 jamLine(err);
19189 ndbrequire(false);
19190 }
19191
19192 /**
19193 *
19194 */
19195 void
execDROP_NODEGROUP_IMPL_REQ(Signal * signal)19196 Dbdih::execDROP_NODEGROUP_IMPL_REQ(Signal* signal)
19197 {
19198 jamEntry();
19199 DropNodegroupImplReq reqCopy = *(DropNodegroupImplReq*)signal->getDataPtr();
19200 DropNodegroupImplReq *req = &reqCopy;
19201
19202 NodeGroupRecordPtr NGPtr;
19203
19204 Uint32 err = 0;
19205 Uint32 rt = req->requestType;
19206 Uint64 gci = 0;
19207 switch(rt){
19208 case DropNodegroupImplReq::RT_ABORT:
19209 jam(); // do nothing
19210 break;
19211 case DropNodegroupImplReq::RT_PARSE:
19212 case DropNodegroupImplReq::RT_PREPARE:
19213 jam();
19214 NGPtr.i = req->nodegroupId;
19215 if (NGPtr.i >= MAX_NDB_NODES)
19216 {
19217 jam();
19218 err = DropNodegroupRef::NoSuchNodegroup;
19219 goto error;
19220 }
19221 ptrCheckGuard(NGPtr, MAX_NDB_NODES, nodeGroupRecord);
19222
19223 if (NGPtr.p->nodegroupIndex == RNIL)
19224 {
19225 jam();
19226 err = DropNodegroupRef::NoSuchNodegroup;
19227 goto error;
19228 }
19229
19230 if (NGPtr.p->m_ref_count)
19231 {
19232 jam();
19233 err = DropNodegroupRef::NodegroupInUse;
19234 goto error;
19235 }
19236 break;
19237 case DropNodegroupImplReq::RT_COMMIT:
19238 {
19239 jam();
19240 gci = m_micro_gcp.m_current_gci;
19241 break;
19242 }
19243 case DropNodegroupImplReq::RT_COMPLETE:
19244 {
19245 NGPtr.i = req->nodegroupId;
19246 ptrCheckGuard(NGPtr, MAX_NDB_NODES, nodeGroupRecord);
19247 for (Uint32 i = 0; i<NGPtr.p->nodeCount; i++)
19248 {
19249 jam();
19250 Uint32 nodeId = NGPtr.p->nodesInGroup[i];
19251 Sysfile::setNodeGroup(nodeId, SYSFILE->nodeGroups, NO_NODE_GROUP_ID);
19252 Sysfile::setNodeStatus(nodeId, SYSFILE->nodeStatus, Sysfile::NS_Configured);
19253 }
19254 setNodeActiveStatus();
19255 setNodeGroups();
19256 break;
19257 }
19258 }
19259
19260 {
19261 DropNodegroupImplConf* conf = (DropNodegroupImplConf*)signal->getDataPtrSend();
19262 conf->senderRef = reference();
19263 conf->senderData = req->senderData;
19264 conf->gci_hi = Uint32(gci >> 32);
19265 conf->gci_lo = Uint32(gci);
19266 sendSignal(req->senderRef, GSN_DROP_NODEGROUP_IMPL_CONF, signal,
19267 DropNodegroupImplConf::SignalLength, JBB);
19268 }
19269 return;
19270
19271 error:
19272 DropNodegroupImplRef * ref = (DropNodegroupImplRef*)signal->getDataPtrSend();
19273 ref->senderRef = reference();
19274 ref->senderData = req->senderData;
19275 ref->errorCode = err;
19276 sendSignal(req->senderRef, GSN_DROP_NODEGROUP_IMPL_REF, signal,
19277 DropNodegroupImplRef::SignalLength, JBB);
19278 }
19279